First work on Pastebin parser

This commit is contained in:
Christopher Talib 2020-01-26 17:27:20 +01:00
parent a0d2761c4e
commit e4cc92e100
4 changed files with 124 additions and 1 deletions

View file

@ -34,6 +34,7 @@ type LeafCertExtensions struct {
CertificatePolicies string `json:"certificatePolicies"`
}
// LeafCertSubject is the subject of the LeafCert object.
type LeafCertSubject struct {
Aggregated string `json:"aggregated"`
C string `json:"C"`
@ -44,6 +45,7 @@ type LeafCertSubject struct {
CN string `json:"CN"`
}
// LeafCertStruct represents the LeafCert object.
type LeafCertStruct struct {
Subject LeafCertSubject `json:"subject"`
Extensions LeafCertExtensions `json:"extensions"`
@ -55,11 +57,13 @@ type LeafCertStruct struct {
AllDomains []string `json:"all_domains"`
}
// Source is the object ofr the URL and its name.
type Source struct {
URL string `json:"url"`
Name string `json:"name"`
}
// CertStreamData is the data contained in a CertStream payload.
type CertStreamData struct {
UpdateType string `json:"update_type"`
LeafCert LeafCertStruct `json:"leaf_cert"`
@ -69,7 +73,37 @@ type CertStreamData struct {
Source Source `json:"source"`
}
// CertStreamStruct reprensts a payload received from CertStream. It has a type
// and the content is stored in Data.
type CertStreamStruct struct {
MessageType string `json:"message_data"`
Data CertStreamData `json:"data"`
}
// PasteMeta is a set of descriptive information on a paste.
type PasteMeta struct {
ScrapeURL string `json:"scrape_url"`
FullURL string `json:"full_url"`
Date string `json:"date"`
Key string `json:"key"`
Size string `json:"size"`
Expire string `json:"expire"`
Title string `json:"title"`
Syntax string `json:"syntax"`
User string `json:"user"`
}
// PasteFull extends PasteMeta by the actual content.
type PasteFull struct {
ScrapeURL string `json:"scrape_url"`
FullURL string `json:"full_url"`
Date string `json:"date"`
Key string `json:"key"`
Size string `json:"size"`
Expire string `json:"expire"`
Title string `json:"title"`
Syntax string `json:"syntax"`
User string `json:"user"`
Data string `json:"data"`
RFC3339 string `json:"time"`
}

View file

@ -1,5 +1,14 @@
package parser
import (
"encoding/json"
"io/ioutil"
"github.com/sirupsen/logrus"
"gitlab.dcso.lolcat/LABS/styx/models"
"gitlab.dcso.lolcat/LABS/styx/utils"
)
// read node recieved on kafka
// create a node in the node file
// save domains in another file with node ID
@ -14,6 +23,19 @@ const (
)
func ParseEvent(domains []string) {
nodeFile, err := ioutil.ReadFile(NodesFilename)
if err != nil {
logrus.Error(err)
}
nodeDatas := []models.Node{}
if err := json.Unmarshal(nodeFile, &nodeDatas); err != nil {
logrus.Error(err)
}
for _, node := range nodeDatas {
utils.SaveDomains(node.Data.Data.LeafCert.AllDomains)
}
// saveDomains()
// go findDomainEdges()

View file

@ -1,6 +1,11 @@
package utils
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"strconv"
"github.com/jmoiron/jsonq"
@ -187,3 +192,66 @@ func extractLeafCertStruct(input jsonq.JsonQuery) (models.LeafCertStruct, error)
}, nil
}
// Meta Information: https://pastebin.com/api_scraping.php
// Content: http://pastebin.com/api_scrape_item.php
// QueryPastes returns metadata for the last 100 public pastes.
func QueryPastes() ([]models.PasteMeta, error) {
server := "pastebin.com"
req, err := http.NewRequest("GET", fmt.Sprintf("https://%s/api_scraping.php?limit=100", server), nil)
if err != nil {
logrus.Fatal("Could not build http request", err)
return nil, err
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
logrus.Error("Could not do requeest due to %v", err)
return nil, err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
logrus.Error("Could not fetch response due to %v", err)
return nil, err
}
var pastes []models.PasteMeta
if err := json.Unmarshal(body, &pastes); err != nil {
logrus.Error("Could not decode response due to %v, body %s", err, string(body))
return nil, err
}
return pastes, err
}
// FetchPaste fetches paste contents via the web API.
func FetchPaste(paste models.PasteMeta) (string, error) {
url := paste.ScrapeURL
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Printf("Could build request %v due to %v", req, err)
return "", err
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
log.Printf("Could not do request %v due to %v", req, err)
return "", err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Printf("Could not read response body %v due to %v", resp.Body, err)
return "", err
}
return string(body), nil
}

View file

@ -10,6 +10,5 @@ func FileExists(filename string) error {
return err
}
}
return nil
}