First work on Pastebin parser
This commit is contained in:
parent
a0d2761c4e
commit
e4cc92e100
|
@ -34,6 +34,7 @@ type LeafCertExtensions struct {
|
|||
CertificatePolicies string `json:"certificatePolicies"`
|
||||
}
|
||||
|
||||
// LeafCertSubject is the subject of the LeafCert object.
|
||||
type LeafCertSubject struct {
|
||||
Aggregated string `json:"aggregated"`
|
||||
C string `json:"C"`
|
||||
|
@ -44,6 +45,7 @@ type LeafCertSubject struct {
|
|||
CN string `json:"CN"`
|
||||
}
|
||||
|
||||
// LeafCertStruct represents the LeafCert object.
|
||||
type LeafCertStruct struct {
|
||||
Subject LeafCertSubject `json:"subject"`
|
||||
Extensions LeafCertExtensions `json:"extensions"`
|
||||
|
@ -55,11 +57,13 @@ type LeafCertStruct struct {
|
|||
AllDomains []string `json:"all_domains"`
|
||||
}
|
||||
|
||||
// Source is the object ofr the URL and its name.
|
||||
type Source struct {
|
||||
URL string `json:"url"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
// CertStreamData is the data contained in a CertStream payload.
|
||||
type CertStreamData struct {
|
||||
UpdateType string `json:"update_type"`
|
||||
LeafCert LeafCertStruct `json:"leaf_cert"`
|
||||
|
@ -69,7 +73,37 @@ type CertStreamData struct {
|
|||
Source Source `json:"source"`
|
||||
}
|
||||
|
||||
// CertStreamStruct reprensts a payload received from CertStream. It has a type
|
||||
// and the content is stored in Data.
|
||||
type CertStreamStruct struct {
|
||||
MessageType string `json:"message_data"`
|
||||
Data CertStreamData `json:"data"`
|
||||
}
|
||||
|
||||
// PasteMeta is a set of descriptive information on a paste.
|
||||
type PasteMeta struct {
|
||||
ScrapeURL string `json:"scrape_url"`
|
||||
FullURL string `json:"full_url"`
|
||||
Date string `json:"date"`
|
||||
Key string `json:"key"`
|
||||
Size string `json:"size"`
|
||||
Expire string `json:"expire"`
|
||||
Title string `json:"title"`
|
||||
Syntax string `json:"syntax"`
|
||||
User string `json:"user"`
|
||||
}
|
||||
|
||||
// PasteFull extends PasteMeta by the actual content.
|
||||
type PasteFull struct {
|
||||
ScrapeURL string `json:"scrape_url"`
|
||||
FullURL string `json:"full_url"`
|
||||
Date string `json:"date"`
|
||||
Key string `json:"key"`
|
||||
Size string `json:"size"`
|
||||
Expire string `json:"expire"`
|
||||
Title string `json:"title"`
|
||||
Syntax string `json:"syntax"`
|
||||
User string `json:"user"`
|
||||
Data string `json:"data"`
|
||||
RFC3339 string `json:"time"`
|
||||
}
|
||||
|
|
|
@ -1,5 +1,14 @@
|
|||
package parser
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"gitlab.dcso.lolcat/LABS/styx/models"
|
||||
"gitlab.dcso.lolcat/LABS/styx/utils"
|
||||
)
|
||||
|
||||
// read node recieved on kafka
|
||||
// create a node in the node file
|
||||
// save domains in another file with node ID
|
||||
|
@ -14,6 +23,19 @@ const (
|
|||
)
|
||||
|
||||
func ParseEvent(domains []string) {
|
||||
nodeFile, err := ioutil.ReadFile(NodesFilename)
|
||||
if err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
nodeDatas := []models.Node{}
|
||||
|
||||
if err := json.Unmarshal(nodeFile, &nodeDatas); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
|
||||
for _, node := range nodeDatas {
|
||||
utils.SaveDomains(node.Data.Data.LeafCert.AllDomains)
|
||||
}
|
||||
|
||||
// saveDomains()
|
||||
// go findDomainEdges()
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
package utils
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/jmoiron/jsonq"
|
||||
|
@ -187,3 +192,66 @@ func extractLeafCertStruct(input jsonq.JsonQuery) (models.LeafCertStruct, error)
|
|||
}, nil
|
||||
|
||||
}
|
||||
|
||||
// Meta Information: https://pastebin.com/api_scraping.php
|
||||
// Content: http://pastebin.com/api_scrape_item.php
|
||||
|
||||
// QueryPastes returns metadata for the last 100 public pastes.
|
||||
func QueryPastes() ([]models.PasteMeta, error) {
|
||||
server := "pastebin.com"
|
||||
req, err := http.NewRequest("GET", fmt.Sprintf("https://%s/api_scraping.php?limit=100", server), nil)
|
||||
|
||||
if err != nil {
|
||||
logrus.Fatal("Could not build http request", err)
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
logrus.Error("Could not do requeest due to %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
logrus.Error("Could not fetch response due to %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pastes []models.PasteMeta
|
||||
if err := json.Unmarshal(body, &pastes); err != nil {
|
||||
logrus.Error("Could not decode response due to %v, body %s", err, string(body))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return pastes, err
|
||||
}
|
||||
|
||||
// FetchPaste fetches paste contents via the web API.
|
||||
func FetchPaste(paste models.PasteMeta) (string, error) {
|
||||
url := paste.ScrapeURL
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
log.Printf("Could build request %v due to %v", req, err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
log.Printf("Could not do request %v due to %v", req, err)
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
log.Printf("Could not read response body %v due to %v", resp.Body, err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
return string(body), nil
|
||||
}
|
||||
|
|
|
@ -10,6 +10,5 @@ func FileExists(filename string) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue