Full text search and indexing some keywords

Some of keywords are indexed and open for full text search, please refer
to the README for more details.

CertStream, Pastebin and Shodan are running as services and can be
searched.

Next steps: building the matcher and creating edges.
This commit is contained in:
Christopher Talib 2020-05-20 10:03:28 +02:00
parent 7163147a4f
commit 9fa5d13bf6
7 changed files with 79 additions and 57 deletions

View file

@ -45,35 +45,17 @@ same subnet. Check [this](https://serverfault.com/questions/916941/configuring-d
### Example configuration:
```
certstream:
activated: true
activated: true
pastebin:
activated: true
activated: true
shodan:
activated: true
key: "SHODAN_KEY"
ports:
- 80
- 443
kafka:
activated: true
protocol: "tcp"
host: "localhost"
port: 9092
topic: "styx"
partition: 0
balboa:
# the url you tunneled to Balboa
url: http://127.0.0.1:8030
activated: true
elasticsearch:
activated: true
url: http://localhost:9200
index: "pastebin"
activated: true
key: "SHODAN_KEY"
ports:
- 80
- 443
```
## Dgraph Interface
@ -172,10 +154,43 @@ query {
```
Dgraph also supports full text search, so you can query things like:
```
query {
Node(func: allofterms(full, "code")) {
uid
created
modified
type
full
}
}
```
The following fields have can be used as index for searches:
* id
* type
* sourceName
* cn
* serialNumber
* hostnames
* organization
* full (full text of a pastbin)
* title
* user
By design, each node has a `type` field so you know which field you should query
each time you query something.
## Datastructure
### Meta
Edges are not implemented yet. They will prove an existing relation between two
nodes of different origin.
Node --[Edge]-- Node
```go
@ -200,8 +215,7 @@ type Edge struct {
### Certstream
Node --[Edge]-- CertNode --[Edge]-- CertStreamRaw
Node(domain) --[Edge]-- CertNode
Node -- CertNode -- CertStreamRaw
```go
@ -233,7 +247,7 @@ type CertNode struct {
### Pastebin
Node --[Edge]-- PasteNode --[Edge]-- FullPaste
Node -- PasteNode -- FullPaste
```go
// PasteNode is a node from PasteBin.
@ -249,12 +263,13 @@ type PasteNode struct {
type FullPaste struct {
Meta PasteMeta `json:"meta"`
Full string `json:"full"`
Type string `json:"type"`
}
```
### Shodan
Node --[Edge]-- ShodanNode --[Edge]-- Node(s) (hostnames and domains)
Node -- ShodanNode -- Node(s) (hostnames and domains)
```go
type ShodanNode struct {
@ -266,12 +281,12 @@ type ShodanNode struct {
}
```
### Balboa
### Balboa (not in Dgraph yet)
Balboa enrichment happens on domains and hostnames extracted from Certstream
and Shodan streams and the node is created only if Balboa returns data.
Node --[Edge]-- ShodanNode --[Edge]-- Node (domain) --[Edge]-- BalboaNode
Node -- ShodanNode -- Node (domain) -- BalboaNode
```go
type BalboaNode struct {

View file

@ -18,8 +18,11 @@ var (
)
// RunIPFilters runs the battery of filters for an IP.
func RunIPFilters(InputIP string) bool {
ip := net.ParseIP(InputIP)
func RunIPFilters(inputIP string) bool {
ip := net.ParseIP(inputIP)
if ip == nil {
return false
}
if ip.To4() != nil {
path := basepath + "/data/ipv4/"
sliceIPv4, err := ioutil.ReadDir(path)
@ -46,7 +49,7 @@ func RunIPFilters(InputIP string) bool {
} else if ip.To16() != nil {
// run ipv6 filter battery
} else {
logrus.Error("filters#invalid IP format")
logrus.Error("filters#invalid IP format for", inputIP)
return false
}

View file

@ -67,12 +67,12 @@ timestamp: string
sourceName: string
}
fingerprint: string .
fingerprint: string @index(exact, term) .
notBefore: string .
notAfter: string .
cn: string .
sourceName: string .
serialNumber: string .
cn: string @index(term) .
sourceName: string @index(term) .
serialNumber: string @index(term) .
basicConstraints: string .
chain: [uid].
csdata: uid .
@ -109,13 +109,13 @@ modified: string
hostData: uid
}
product: string .
hostnames: [string] .
product: string @index(term) .
hostnames: [string] @index(term) .
version: string .
title: string .
ip: string .
title: string @index(term) .
ip: string @index(term) .
os: string .
organization: string .
organization: string @index(term) .
isp: string .
cpe: [string] .
asn: string .
@ -148,16 +148,16 @@ timestamp: string
fullPaste: uid .
meta: uid .
full: string .
full: string @index(term) .
scrape_url: string .
full_url: string .
date: string .
key: string .
size: string .
expire: string .
title: string .
title: string @index(term) .
syntax: string .
user: string .
user: string @index(term) .
type PasteMeta {
scrape_url: string
@ -174,6 +174,7 @@ user: string
type FullPaste {
meta: PasteMeta
full: string
type: string
}
type PasteNode {

View file

@ -271,6 +271,7 @@ type PasteNode struct {
type FullPaste struct {
Meta PasteMeta `json:"meta,omiempty"`
Full string `json:"full,omiempty"`
Type string `json:"type,omiempty"`
}
// BuildPasteNode builds a node from a FullPaste data.

View file

@ -6,7 +6,6 @@ import (
"sync"
"github.com/CaliDog/certstream-go"
"github.com/christalib/structs"
"github.com/dgraph-io/dgo/v2"
"github.com/dgraph-io/dgo/v2/protos/api"
"github.com/jmoiron/jsonq"
@ -72,13 +71,13 @@ func (c *CertStreamPlugin) doRun(graphClient *dgo.Dgraph) {
// models.SaveCertStreamRaw("raw_certstream.json", rawNode)
certNode := models.BuildCertNode(rawNode)
models.SaveCertNode("cert_nodes.json", certNode)
// models.SaveCertNode("cert_nodes.json", certNode)
mainNode := models.BuildNode("node", "certstream", certNode.ID)
models.SaveNode("nodes.json", mainNode)
rawEdge := models.BuildEdge("certstream", structs.Map(rawNode), structs.Map(mainNode))
models.SaveEdge(rawEdge)
edge := models.BuildEdge("certstream", structs.Map(mainNode), structs.Map(certNode))
models.SaveEdge(edge)
// models.SaveNode("nodes.json", mainNode)
// rawEdge := models.BuildEdge("certstream", structs.Map(rawNode), structs.Map(mainNode))
// models.SaveEdge(rawEdge)
// edge := models.BuildEdge("certstream", structs.Map(mainNode), structs.Map(certNode))
// models.SaveEdge(edge)
// saveSingleValues(conn, "certstream", "domain", certNode.ID, domain)
// edge between Node and CertNode

View file

@ -66,6 +66,7 @@ func (p *PastebinPlugin) doRun(graphClient *dgo.Dgraph) {
fp := models.FullPaste{
Meta: p,
Full: paste,
Type: "fullPaste",
}
pasteNode := models.BuildPasteNode(&fp)
mainNode := models.BuildNode("node", "pastebin", pasteNode.ID)

View file

@ -80,7 +80,8 @@ func (s *ShodanPlugin) doRun(graphClient *dgo.Dgraph) {
for _, hostname := range hostnames {
hostNotInFilters = filters.RunDomainFilters(hostname)
if hostNotInFilters {
logrus.Info("host", hostname, "not in filters")
// logrus.Info("host", hostname " not in filters")
// keep track of new hostnames
// saveSingleValues(conn, "shodan_stream", "hostname", shodanNode.ID, hostname)
}
}
@ -89,14 +90,15 @@ func (s *ShodanPlugin) doRun(graphClient *dgo.Dgraph) {
if len(domains) != 0 {
for _, domain := range domains {
domainNotInFilters = filters.RunDomainFilters(domain)
logrus.Info("domain", domain, "not in filters")
// logrus.Info("domain", domain, "not in filters")
// keep trakc of new domains
// saveSingleValues(conn, "shodan_stream", "domain", shodanNode.ID, domain)
}
}
if domainNotInFilters && hostNotInFilters {
models.SaveShodanNode("raw_shodan.json", shodanNode)
// models.SaveShodanNode("raw_shodan.json", shodanNode)
mainNode := models.BuildNode("shodan", "shodan_stream", shodanNode.ID)
// models.SaveNode("nodes.json", mainNode)
models.SaveNode("nodes.json", mainNode)
// edge := models.BuildEdge("shodan", structs.Map(shodanNode), structs.Map(mainNode))
// models.SaveEdge(edge)
e := models.Node{