Merge branch 'master' of https://github.com/kevthehermit/PasteHunter

kevthehermit · Jan 8, 2019 · f438777 · f438777
2 parents 0b3c15a + 8100547
commit f438777
Show file tree

Hide file tree

Showing 10 changed files with 173 additions and 35 deletions.
diff --git a/.gitignore b/.gitignore
@@ -106,3 +106,4 @@ ENV/
 /.idea
 /postprocess/tester.py
 .vscode/
+logs/
diff --git a/YaraRules/api_keys.yar b/YaraRules/api_keys.yar
@@ -48,7 +48,7 @@ rule google_api
     strings:
         $a = /\bAIza.{35}\b/
     condition:
-        all of them
+        any of them
 }
 
 rule slack_api
@@ -60,8 +60,9 @@ rule slack_api
 
     strings:
         $a = /(xox(p|b|o|a)-[0-9]{9,12}-[0-9]{9,12}-[0-9]{9,12}-[a-z0-9]{32})/
+        $b = "hooks.slack.com" nocase
     condition:
-        all of them
+        any of them
 }
 
 rule github_api
@@ -74,7 +75,7 @@ rule github_api
     strings:
         $a = /[g|G][i|I][t|T][h|H][u|U][b|B].*[[\'|"]0-9a-zA-Z]{35,40}[\'|"]/
     condition:
-        all of them
+        any of them
 }
 
 rule aws_api
@@ -87,7 +88,7 @@ rule aws_api
     strings:
         $a = /AKIA[0-9A-Z]{16}/
     condition:
-        all of them
+        any of them
 }
 
 rule heroku_api
@@ -100,6 +101,5 @@ rule heroku_api
     strings:
         $a = /[h|H][e|E][r|R][o|O][k|K][u|U].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}/
     condition:
-        all of them
+        any of them
 }
-
diff --git a/YaraRules/certificates.yar b/YaraRules/certificates.yar
@@ -0,0 +1,27 @@
+/*
+    This rule will look for common encoded certificates and secrets
+*/
+
+rule certificates
+{
+    meta:
+        author = "@KevTheHermit"
+        info = "Part of PasteHunter"
+        reference = "https://github.com/kevthehermit/PasteHunter"
+
+    strings:
+        $enabled_sec = "enable secret" wide ascii nocase
+        $enable_pass = "enable password" wide ascii nocase
+        $ssh_priv = "BEGIN RSA PRIVATE KEY" wide ascii nocase
+        $openssh_priv = "BEGIN OPENSSH PRIVATE KEY" wide ascii nocase
+        $dsa_priv = "BEGIN DSA PRIVATE KEY" wide ascii nocase
+        $ec_priv = "BEGIN EC PRIVATE KEY" wide ascii nocase
+        $pgp_priv = "BEGIN PGP PRIVATE KEY" wide ascii nocase
+        $pem_cert = "BEGIN CERTIFICATE" wide ascii nocase
+        $pkcs7 = "BEGIN PKCS7"
+
+    condition:
+        any of them
+
+}
+
diff --git a/YaraRules/core_keywords.yar b/YaraRules/core_keywords.yar
@@ -12,13 +12,6 @@ rule core_keywords
     strings:
         $tango_down = "TANGO DOWN" wide ascii nocase
         $antisec = "antisec" wide ascii nocase
-        $enabled_sec = "enable secret" wide ascii nocase
-        $enable_pass = "enable password" wide ascii nocase
-        $ssh_priv = "BEGIN RSA PRIVATE KEY" wide ascii nocase
-        $openssh_priv = "BEGIN OPENSSH PRIVATE KEY" wide ascii nocase
-        $dsa_priv = "BEGIN DSA PRIVATE KEY" wide ascii nocase
-        $ec_priv = "BEGIN EC PRIVATE KEY" wide ascii nocase
-        $pgp_priv = "BEGIN PGP PRIVATE KEY" wide ascii nocase
         $hacked = "hacked by" wide ascii nocase
         $onion_url = /.*.\.onion/
     condition:

diff --git a/YaraRules/powershell.yar b/YaraRules/powershell.yar
@@ -19,6 +19,9 @@ rule powershell
         $g = "invoke" nocase
         $h = "bitsadmin" nocase
         $i = "certutil -decode" nocase
+        $j = "hidden" nocase
+        $k = "nop" nocase
+        $l = "-e" nocase
     condition:
         4 of them
 

diff --git a/inputs/pastebin.py b/inputs/pastebin.py
@@ -16,6 +16,11 @@ def recent_pastes(conf, input_history):
         # Get some pastes and convert to json
         # Get last 'paste_limit' pastes
         paste_list_request = requests.get(scrape_uri)
+
+        # Check to see if our IP is whitelisted or not. 
+        if 'DOES NOT HAVE ACCESS' in paste_list_request.text:
+            logger.error("Your IP is not whitelisted visits 'https://pastebin.com/doc_scraping_api'")
+            return [], []
         paste_list_json = paste_list_request.json()
 
         for paste in paste_list_json:

diff --git a/outputs/slack_output.py b/outputs/slack_output.py
@@ -0,0 +1,51 @@
+import os
+import datetime
+import json
+import logging
+import requests
+from common import parse_config
+
+logger = logging.getLogger('pastehunter')
+
+config = parse_config()
+
+
+class SlackOutput():
+    def __init__(self):
+        self.valid = True
+        self.webhook_url = config['outputs']['slack_output']['webhook_url']
+        self.accepted_rules = config['outputs']['slack_output']['rule_list']
+
+        if self.webhook_url == '':
+            logging.error("Slack Webhook not configured")
+            self.valid = False
+        if self.webhook_url == '':
+            logging.error("No Rules configured to alert")
+
+    def store_paste(self, paste_data):
+        if self.valid:
+            send = False
+
+            for rule in self.accepted_rules:
+                if rule in paste_data['YaraRule']:
+                    send = True
+
+            if send:
+                json_data = {
+                    "text": "Pastehunter alert!",
+                    "attachments": [
+                        {
+                            "fallback": "Plan a vacation",
+                            "author_name": "PasteHunter",
+                            "title": "Paste ID {0}".format(paste_data['pasteid']),
+                            "text": "Yara Rule {0} Found on {1}".format(paste_data['YaraRule'], paste_data['pastesite'])
+                        }
+                    ]
+                }
+
+                req = requests.post(self.webhook_url, json=json_data)
+                if req.status_code == 200 and req.text == 'ok':
+                    logger.debug("Paste sent to slack")
+                else:
+                    logger.error(
+                        "Failed to post to slack Status Code {0}".format(req.status_code))
diff --git a/pastehunter.py b/pastehunter.py
@@ -6,17 +6,18 @@
 import json
 import hashlib
 import requests
-import threading
+import multiprocessing
 import importlib
 import logging
+import time
 from time import sleep
-from queue import Queue
+#from queue import Queue
 from common import parse_config
 from postprocess import post_email
 
-VERSION = 0.1
+from multiprocessing import Queue
 
-lock = threading.Lock()
+VERSION = 0.2
 
 # Setup Default logging
 logger = logging.getLogger('pastehunter')
@@ -93,18 +94,28 @@ def paste_scanner():
     # Store the Paste
     while True:
         paste_data = q.get()
+
+        # Start a timer
+        start_time = time.time()
         logger.debug("Found New {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid']))
         # get raw paste and hash them
         raw_paste_uri = paste_data['scrape_url']
         raw_paste_data = requests.get(raw_paste_uri).text
-        # Process the paste data here
 
+        # Pastebin Cache
+        if raw_paste_data == "File is not ready for scraping yet. Try again in 1 minute.":
+            logger.info("Paste is still cached sleeping to try again")
+            sleep(45)
+            # get raw paste and hash them
+            raw_paste_uri = paste_data['scrape_url']
+            raw_paste_data = requests.get(raw_paste_uri).text
+
+        # Process the paste data here
         try:
             # Scan with yara
             matches = rules.match(data=raw_paste_data)
         except Exception as e:
             logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
-            q.task_done()
             continue
 
         results = []
@@ -137,13 +148,13 @@ def paste_scanner():
         post_results = paste_data
         for post_process, post_values in conf["post_process"].items():
             if post_values["enabled"]:
-                if any(i in results for i in post_values["rule_list"]):
+                if any(i in results for i in post_values["rule_list"]) or "ALL" in post_values["rule_list"]:
                     logger.info("Running Post Module {0} on {1}".format(post_values["module"], paste_data["pasteid"]))
                     post_module = importlib.import_module(post_values["module"])
                     post_results = post_module.run(results,
-                                                   raw_paste_data,
-                                                   paste_data
-                                                   )
+                                                    raw_paste_data,
+                                                    paste_data
+                                                    )
 
         # Throw everything back to paste_data for ease.
         paste_data = post_results
@@ -174,10 +185,14 @@ def paste_scanner():
                 try:
                     output.store_paste(paste_data)
                 except Exception as e:
-                    logger.error("Unable to store {0} to {1}".format(paste_data["pasteid"], e))
+                    logger.error("Unable to store {0} to {1} with error {2}".format(paste_data["pasteid"], output, e))
+
+        end_time = time.time()
+        logger.debug("Processing Finished for {0} in {1} seconds".format(
+            paste_data["pasteid"],
+            (end_time - start_time)
+        ))
 
-        # Mark Tasks as complete
-        q.task_done()
 
 
 if __name__ == "__main__":
@@ -197,16 +212,19 @@ def paste_scanner():
 
     # Create Queue to hold paste URI's
     q = Queue()
+    processes = []
 
     # Threads
     for i in range(5):
-        t = threading.Thread(target=paste_scanner)
-        t.daemon = True
-        t.start()
+        m = multiprocessing.Process(target=paste_scanner)
+        # Add new process to list so we can run join on them later. 
+        processes.append(m)
+        m.start()
 
     # Now Fill the Queue
     try:
         while True:
+            queue_count = 0
             # Paste History
             logger.info("Populating Queue")
             if os.path.exists('paste_history.tmp'):
@@ -227,19 +245,27 @@ def paste_scanner():
                 paste_list, history = i.recent_pastes(conf, input_history)
                 for paste in paste_list:
                     q.put(paste)
+                    queue_count += 1
                 paste_history[input_name] = history
 
             logger.debug("Writing History")
             # Write History
             with open('paste_history.tmp', 'w') as outfile:
                 json.dump(paste_history, outfile)
+            logger.info("Added {0} Items to the queue".format(queue_count))
 
-            # Flush the list
-            q.join()
+            for proc in processes:
+                proc.join(2)
 
             # Slow it down a little
             logger.info("Sleeping for " + str(conf['general']['run_frequency']) + " Seconds")
             sleep(conf['general']['run_frequency'])
+
+
 
     except KeyboardInterrupt:
-        logger.info("Stopping Threads")
+        logger.info("Stopping Processes")
+        for proc in processes:
+            proc.terminate()
+            proc.join()
+
diff --git a/postprocess/post_entropy.py b/postprocess/post_entropy.py
@@ -0,0 +1,16 @@
+import re
+import math
+from collections import Counter
+
+def shannon_entropy(s):
+    # https://rosettacode.org/wiki/Entropy#Python
+    s = str(s)
+    p, lns = Counter(s), float(len(s))
+    return -sum(count / lns * math.log(count / lns, 2) for count in p.values())
+
+
+def run(results, raw_paste_data, paste_object):
+    # Calculate the Shannon Entropy for the raw paste
+    paste_object["Shannon Entropy"] = shannon_entropy(raw_paste_data)
+    # Send the updated json back
+    return paste_object
diff --git a/settings.json.sample b/settings.json.sample
@@ -9,7 +9,8 @@
       "store_all": false
     },
     "dumpz": {
-      "enabled": true,
+      "enabled": false,
+      "comment": "This api endpoint has been removed.",
       "module": "inputs.dumpz",
       "api_scrape": "https://dumpz.org/api/recent",
       "api_raw": "https://dumpz.org/api/dump",
@@ -28,7 +29,10 @@
     "slexy":{
       "enabled": true,
       "module": "inputs.slexy",
-      "store_all": false
+      "store_all": false,
+      "api_scrape": "http://slexy.org/recent",
+      "api_raw": "http://slexy.org/raw",
+      "api_view": "http://slexy.org/view"
     }
   },
   "outputs": {
@@ -94,6 +98,13 @@
           "mandatory_rule_list": ["keyword1", "keyword2"]
         }
       }
+    },
+    "slack_output": {
+      "enabled": true,
+      "module": "outputs.slack_output",
+      "classname": "SlackOutput",
+      "webhook_url": "",
+      "rule_list": ["custom_keywords"]
     }
   },
   "yara": {
@@ -124,7 +135,12 @@
         "enabled": false,
         "api_host": "127.0.0.1",
         "api_port": 8080
-      }
+      },
+    "post_entropy": {
+      "enabled": true,
+      "module": "postprocess.post_entropy",
+      "rule_list": ["ALL"]
+    }
     }
   }
 }