From 9fa5d13bf612bb639dec931b0ed55103485f7b43 Mon Sep 17 00:00:00 2001 From: Christopher Talib Date: Wed, 20 May 2020 10:03:28 +0200 Subject: [PATCH] Full text search and indexing some keywords Some of keywords are indexed and open for full text search, please refer to the README for more details. CertStream, Pastebin and Shodan are running as services and can be searched. Next steps: building the matcher and creating edges. --- README.md | 77 ++++++++++++++++++++++++++----------------- filters/main.go | 9 +++-- graph/main.go | 25 +++++++------- models/main.go | 1 + plugins/certstream.go | 13 ++++---- plugins/pastebin.go | 1 + plugins/shodan.go | 10 +++--- 7 files changed, 79 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index b5a72f9..d56a4f1 100644 --- a/README.md +++ b/README.md @@ -45,35 +45,17 @@ same subnet. Check [this](https://serverfault.com/questions/916941/configuring-d ### Example configuration: ``` certstream: -activated: true + activated: true pastebin: -activated: true + activated: true shodan: -activated: true -key: "SHODAN_KEY" -ports: -- 80 -- 443 - -kafka: -activated: true -protocol: "tcp" -host: "localhost" -port: 9092 -topic: "styx" -partition: 0 - -balboa: -# the url you tunneled to Balboa -url: http://127.0.0.1:8030 -activated: true - -elasticsearch: -activated: true -url: http://localhost:9200 -index: "pastebin" + activated: true + key: "SHODAN_KEY" + ports: + - 80 + - 443 ``` ## Dgraph Interface @@ -172,10 +154,43 @@ query { ``` +Dgraph also supports full text search, so you can query things like: + +``` +query { + Node(func: allofterms(full, "code")) { + uid + created + modified + type + full + } +} +``` + +The following fields have can be used as index for searches: + +* id +* type +* sourceName +* cn +* serialNumber +* hostnames +* organization +* full (full text of a pastbin) +* title +* user + +By design, each node has a `type` field so you know which field you should query +each time you query something. + ## Datastructure ### Meta +Edges are not implemented yet. They will prove an existing relation between two +nodes of different origin. + Node --[Edge]-- Node ```go @@ -200,8 +215,7 @@ type Edge struct { ### Certstream -Node --[Edge]-- CertNode --[Edge]-- CertStreamRaw -Node(domain) --[Edge]-- CertNode +Node -- CertNode -- CertStreamRaw ```go @@ -233,7 +247,7 @@ type CertNode struct { ### Pastebin -Node --[Edge]-- PasteNode --[Edge]-- FullPaste +Node -- PasteNode -- FullPaste ```go // PasteNode is a node from PasteBin. @@ -249,12 +263,13 @@ type PasteNode struct { type FullPaste struct { Meta PasteMeta `json:"meta"` Full string `json:"full"` + Type string `json:"type"` } ``` ### Shodan -Node --[Edge]-- ShodanNode --[Edge]-- Node(s) (hostnames and domains) +Node -- ShodanNode -- Node(s) (hostnames and domains) ```go type ShodanNode struct { @@ -266,12 +281,12 @@ type ShodanNode struct { } ``` -### Balboa +### Balboa (not in Dgraph yet) Balboa enrichment happens on domains and hostnames extracted from Certstream and Shodan streams and the node is created only if Balboa returns data. -Node --[Edge]-- ShodanNode --[Edge]-- Node (domain) --[Edge]-- BalboaNode +Node -- ShodanNode -- Node (domain) -- BalboaNode ```go type BalboaNode struct { diff --git a/filters/main.go b/filters/main.go index 70cd05f..a2495d2 100644 --- a/filters/main.go +++ b/filters/main.go @@ -18,8 +18,11 @@ var ( ) // RunIPFilters runs the battery of filters for an IP. -func RunIPFilters(InputIP string) bool { - ip := net.ParseIP(InputIP) +func RunIPFilters(inputIP string) bool { + ip := net.ParseIP(inputIP) + if ip == nil { + return false + } if ip.To4() != nil { path := basepath + "/data/ipv4/" sliceIPv4, err := ioutil.ReadDir(path) @@ -46,7 +49,7 @@ func RunIPFilters(InputIP string) bool { } else if ip.To16() != nil { // run ipv6 filter battery } else { - logrus.Error("filters#invalid IP format") + logrus.Error("filters#invalid IP format for", inputIP) return false } diff --git a/graph/main.go b/graph/main.go index f48c86d..ec85fac 100644 --- a/graph/main.go +++ b/graph/main.go @@ -67,12 +67,12 @@ timestamp: string sourceName: string } -fingerprint: string . +fingerprint: string @index(exact, term) . notBefore: string . notAfter: string . -cn: string . -sourceName: string . -serialNumber: string . +cn: string @index(term) . +sourceName: string @index(term) . +serialNumber: string @index(term) . basicConstraints: string . chain: [uid]. csdata: uid . @@ -109,13 +109,13 @@ modified: string hostData: uid } -product: string . -hostnames: [string] . +product: string @index(term) . +hostnames: [string] @index(term) . version: string . -title: string . -ip: string . +title: string @index(term) . +ip: string @index(term) . os: string . -organization: string . +organization: string @index(term) . isp: string . cpe: [string] . asn: string . @@ -148,16 +148,16 @@ timestamp: string fullPaste: uid . meta: uid . -full: string . +full: string @index(term) . scrape_url: string . full_url: string . date: string . key: string . size: string . expire: string . -title: string . +title: string @index(term) . syntax: string . -user: string . +user: string @index(term) . type PasteMeta { scrape_url: string @@ -174,6 +174,7 @@ user: string type FullPaste { meta: PasteMeta full: string +type: string } type PasteNode { diff --git a/models/main.go b/models/main.go index 7fc80f3..b55bab1 100644 --- a/models/main.go +++ b/models/main.go @@ -271,6 +271,7 @@ type PasteNode struct { type FullPaste struct { Meta PasteMeta `json:"meta,omiempty"` Full string `json:"full,omiempty"` + Type string `json:"type,omiempty"` } // BuildPasteNode builds a node from a FullPaste data. diff --git a/plugins/certstream.go b/plugins/certstream.go index 0d0bd41..64a0c29 100644 --- a/plugins/certstream.go +++ b/plugins/certstream.go @@ -6,7 +6,6 @@ import ( "sync" "github.com/CaliDog/certstream-go" - "github.com/christalib/structs" "github.com/dgraph-io/dgo/v2" "github.com/dgraph-io/dgo/v2/protos/api" "github.com/jmoiron/jsonq" @@ -72,13 +71,13 @@ func (c *CertStreamPlugin) doRun(graphClient *dgo.Dgraph) { // models.SaveCertStreamRaw("raw_certstream.json", rawNode) certNode := models.BuildCertNode(rawNode) - models.SaveCertNode("cert_nodes.json", certNode) + // models.SaveCertNode("cert_nodes.json", certNode) mainNode := models.BuildNode("node", "certstream", certNode.ID) - models.SaveNode("nodes.json", mainNode) - rawEdge := models.BuildEdge("certstream", structs.Map(rawNode), structs.Map(mainNode)) - models.SaveEdge(rawEdge) - edge := models.BuildEdge("certstream", structs.Map(mainNode), structs.Map(certNode)) - models.SaveEdge(edge) + // models.SaveNode("nodes.json", mainNode) + // rawEdge := models.BuildEdge("certstream", structs.Map(rawNode), structs.Map(mainNode)) + // models.SaveEdge(rawEdge) + // edge := models.BuildEdge("certstream", structs.Map(mainNode), structs.Map(certNode)) + // models.SaveEdge(edge) // saveSingleValues(conn, "certstream", "domain", certNode.ID, domain) // edge between Node and CertNode diff --git a/plugins/pastebin.go b/plugins/pastebin.go index 80754ac..b0120ac 100644 --- a/plugins/pastebin.go +++ b/plugins/pastebin.go @@ -66,6 +66,7 @@ func (p *PastebinPlugin) doRun(graphClient *dgo.Dgraph) { fp := models.FullPaste{ Meta: p, Full: paste, + Type: "fullPaste", } pasteNode := models.BuildPasteNode(&fp) mainNode := models.BuildNode("node", "pastebin", pasteNode.ID) diff --git a/plugins/shodan.go b/plugins/shodan.go index 8a8cea5..dc27a5a 100644 --- a/plugins/shodan.go +++ b/plugins/shodan.go @@ -80,7 +80,8 @@ func (s *ShodanPlugin) doRun(graphClient *dgo.Dgraph) { for _, hostname := range hostnames { hostNotInFilters = filters.RunDomainFilters(hostname) if hostNotInFilters { - logrus.Info("host", hostname, "not in filters") + // logrus.Info("host", hostname " not in filters") + // keep track of new hostnames // saveSingleValues(conn, "shodan_stream", "hostname", shodanNode.ID, hostname) } } @@ -89,14 +90,15 @@ func (s *ShodanPlugin) doRun(graphClient *dgo.Dgraph) { if len(domains) != 0 { for _, domain := range domains { domainNotInFilters = filters.RunDomainFilters(domain) - logrus.Info("domain", domain, "not in filters") + // logrus.Info("domain", domain, "not in filters") + // keep trakc of new domains // saveSingleValues(conn, "shodan_stream", "domain", shodanNode.ID, domain) } } if domainNotInFilters && hostNotInFilters { - models.SaveShodanNode("raw_shodan.json", shodanNode) + // models.SaveShodanNode("raw_shodan.json", shodanNode) mainNode := models.BuildNode("shodan", "shodan_stream", shodanNode.ID) - // models.SaveNode("nodes.json", mainNode) + models.SaveNode("nodes.json", mainNode) // edge := models.BuildEdge("shodan", structs.Map(shodanNode), structs.Map(mainNode)) // models.SaveEdge(edge) e := models.Node{