Skip to content

Commit

Permalink
Merge pull request #161 from emmanvg/files-endpoint-fix-dev
Browse files Browse the repository at this point in the history
Fixes for API Endpoint - DEV (#159)
  • Loading branch information
ptcNOP authored Nov 19, 2018
2 parents 984f246 + 2d68095 commit c9f46c6
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 42 deletions.
6 changes: 3 additions & 3 deletions multiscanner/common/pdf_generator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def create_pdf_document(DIR, report):
gen_pdf.pdf_list.extend(summary)

summary_data = [
['Date Submitted', report.get('Report', {}).get('Scan Time', 'N\A')],
['Artifact ID', report.get('Report', {}).get('SHA256', 'N\A')],
['Description', pdf_components.get('summary_description', 'N\A')],
['Date Submitted', report.get('Report', {}).get('Scan Time', 'N/A')],
['Artifact ID', report.get('Report', {}).get('SHA256', 'N/A')],
['Description', pdf_components.get('summary_description', 'N/A')],
['Files Processed', '1'],
['', report.get('Report', {}).get('filename', 'NO FILENAME AVAILABLE')]
]
Expand Down
8 changes: 4 additions & 4 deletions multiscanner/common/stix2_generator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,11 @@ def parse_json_report_to_stix2_bundle(report, custom_labels=None):
cuckoo = r.get('Cuckoo Sandbox', {})

for signature in cuckoo.get('signatures', []):
if ('description' in signature and
'HTTP request' in signature.get('description', '')):
if ('description' in signature and 'HTTP request'
in signature.get('description', '')):
all_objects.extend(extract_http_requests_cuckoo(signature, custom_labels))
elif ('description' in signature and
'Potentially malicious URLs' in signature.get('description', '')):
elif ('description' in signature and 'Potentially malicious URLs'
in signature.get('description', '')):
all_objects.extend(extract_http_requests_cuckoo(signature, custom_labels))
for dropped in cuckoo.get('dropped', []):
if dropped and any(x in dropped for x in ('sha256', 'md5', 'sha1', 'ssdeep')):
Expand Down
42 changes: 23 additions & 19 deletions multiscanner/distributed/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import multiprocessing
import os
import queue
import re
import shutil
import subprocess
import time
Expand All @@ -55,7 +56,7 @@

import rarfile
import requests
from flask import Flask, abort, jsonify, make_response, request
from flask import Flask, abort, jsonify, make_response, request, safe_join
from flask.json import JSONEncoder
from flask_cors import CORS
from jinja2 import Markup
Expand Down Expand Up @@ -83,7 +84,7 @@
'upload_folder': '/mnt/samples/',
'distributed': True,
'web_loc': 'http://localhost:80',
'cors': 'https?://localhost(:\d+)?',
'cors': r'https?://localhost(:\d+)?',
'batch_size': 100,
'batch_interval': 60 # Number of seconds to wait for additional files
# submitted to the create/ API
Expand Down Expand Up @@ -555,7 +556,7 @@ def create_task():
)


@app.route('/api/v1/tasks/<task_id>/report', methods=['GET'])
@app.route('/api/v1/tasks/<int:task_id>/report', methods=['GET'])
def get_report(task_id):
'''
Return a JSON dictionary corresponding
Expand Down Expand Up @@ -642,7 +643,7 @@ def _linkify(s, url, new_tab=True):
s=s)


@app.route('/api/v1/tasks/<task_id>/file', methods=['GET'])
@app.route('/api/v1/tasks/<int:task_id>/file', methods=['GET'])
def files_get_task(task_id):
# try to get report dict
report_dict, success = get_report_dict(task_id)
Expand All @@ -659,7 +660,7 @@ def files_get_task(task_id):
return jsonify({'Error': 'sha256 not in report!'})


@app.route('/api/v1/tasks/<task_id>/maec', methods=['GET'])
@app.route('/api/v1/tasks/<int:task_id>/maec', methods=['GET'])
def get_maec_report(task_id):
# try to get report dict
report_dict, success = get_report_dict(task_id)
Expand Down Expand Up @@ -713,7 +714,7 @@ def taglist():
return jsonify({'Tags': response})


@app.route('/api/v1/tasks/<task_id>/tags', methods=['POST', 'DELETE'])
@app.route('/api/v1/tasks/<int:task_id>/tags', methods=['POST', 'DELETE'])
def tags(task_id):
'''
Add/Remove the specified tag to the specified task.
Expand All @@ -737,7 +738,7 @@ def tags(task_id):
return jsonify({'Message': 'Tag Removed'})


@app.route('/api/v1/tasks/<task_id>/notes', methods=['GET'])
@app.route('/api/v1/tasks/<int:task_id>/notes', methods=['GET'])
def get_notes(task_id):
'''
Get one or more analyst notes/comments associated with the specified task.
Expand Down Expand Up @@ -767,7 +768,7 @@ def get_notes(task_id):
return jsonify(response)


@app.route('/api/v1/tasks/<task_id>/notes', methods=['POST'])
@app.route('/api/v1/tasks/<int:task_id>/notes', methods=['POST'])
def add_note(task_id):
'''
Add an analyst note/comment to the specified task.
Expand All @@ -782,7 +783,7 @@ def add_note(task_id):
return jsonify(response)


@app.route('/api/v1/tasks/<task_id>/notes/<note_id>', methods=['PUT', 'DELETE'])
@app.route('/api/v1/tasks/<int:task_id>/notes/<int:note_id>', methods=['PUT', 'DELETE'])
def edit_note(task_id, note_id):
'''
Modify/remove the specified analyst note/comment.
Expand All @@ -802,7 +803,7 @@ def edit_note(task_id, note_id):
return jsonify(response)


@app.route('/api/v1/files/<sha256>', methods=['GET'])
@app.route('/api/v1/files/<string:sha256>', methods=['GET'])
# get raw file - /api/v1/files/get/<sha256>?raw=true
def files_get_sha256(sha256):
'''
Expand All @@ -811,18 +812,21 @@ def files_get_sha256(sha256):
# is there a robust way to just get this as a bool?
raw = request.args.get('raw', default='False', type=str)

return files_get_sha256_helper(sha256, raw)
if re.match(r'^[a-fA-F0-9]{64}$', sha256):
return files_get_sha256_helper(sha256, raw)
else:
return abort(HTTP_BAD_REQUEST)


def files_get_sha256_helper(sha256, raw=None):
'''
Returns binary from storage. Defaults to password protected zipfile.
'''
file_path = os.path.join(api_config['api']['upload_folder'], sha256)
file_path = safe_join(api_config['api']['upload_folder'], sha256)
if not os.path.exists(file_path):
abort(HTTP_NOT_FOUND)

with open(file_path, "rb") as fh:
with open(file_path, 'rb') as fh:
fh_content = fh.read()

raw = raw[0].lower()
Expand All @@ -834,13 +838,13 @@ def files_get_sha256_helper(sha256, raw=None):
else:
# ref: https://github.com/crits/crits/crits/core/data_tools.py#L122
rawname = sha256 + '.bin'
with open(os.path.join('/tmp/', rawname), 'wb') as raw_fh:
with open(safe_join('/tmp/', rawname), 'wb') as raw_fh:
raw_fh.write(fh_content)

zipname = sha256 + '.zip'
args = ['/usr/bin/zip', '-j',
os.path.join('/tmp', zipname),
os.path.join('/tmp', rawname),
safe_join('/tmp', zipname),
safe_join('/tmp', rawname),
'-P', 'infected']
proc = subprocess.Popen(args)
wait_seconds = 30
Expand All @@ -854,7 +858,7 @@ def files_get_sha256_helper(sha256, raw=None):
proc.terminate()
return make_response(jsonify({'Error': 'Process timed out'}))
else:
with open(os.path.join('/tmp', zipname), 'rb') as zip_fh:
with open(safe_join('/tmp', zipname), 'rb') as zip_fh:
zip_data = zip_fh.read()
if len(zip_data) == 0:
return make_response(jsonify({'Error': 'Zip file empty'}))
Expand Down Expand Up @@ -899,7 +903,7 @@ def run_ssdeep_group():
HTTP_BAD_REQUEST)


@app.route('/api/v1/tasks/<task_id>/pdf', methods=['GET'])
@app.route('/api/v1/tasks/<int:task_id>/pdf', methods=['GET'])
def get_pdf_report(task_id):
'''
Generates a PDF version of a JSON report.
Expand All @@ -916,7 +920,7 @@ def get_pdf_report(task_id):
return response


@app.route('/api/v1/tasks/<task_id>/stix2', methods=['GET'])
@app.route('/api/v1/tasks/<int:task_id>/stix2', methods=['GET'])
def get_stix2_bundle_from_report(task_id):
'''
Generates a STIX2 Bundle with indicators generated of a JSON report.
Expand Down
6 changes: 3 additions & 3 deletions multiscanner/modules/Antivirus/AVGScan.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def scan(filelist, conf=DEFAULTCONF):
return None
# Parse output
output = output.decode("utf-8", errors='replace')
virusresults = re.findall("(?:\([^\)]*\) )?([^\s]+) (.+)\s+$", output, re.MULTILINE)
virusresults = re.findall(r"(?:\([^\)]*\) )?([^\s]+) (.+)\s+$", output, re.MULTILINE)
results = []
for (file, result) in virusresults[:]:
if result.endswith(' '):
Expand All @@ -88,13 +88,13 @@ def scan(filelist, conf=DEFAULTCONF):
results.append((file, result))

metadata = {}
verinfo = re.search("Program version ([\d\.]+), engine ([\d\.]+)", output)
verinfo = re.search(r"Program version ([\d\.]+), engine ([\d\.]+)", output)
metadata["Name"] = NAME
metadata["Type"] = TYPE
if verinfo:
metadata["Program version"] = verinfo.group(1)
metadata["Engine version"] = verinfo.group(2)
verinfo = re.search("Virus Database: Version ([\d/]+) ([\d-]+)", output)
verinfo = re.search(r"Virus Database: Version ([\d/]+) ([\d-]+)", output)
if verinfo:
metadata["Definition version"] = verinfo.group(1)
metadata["Definition date"] = verinfo.group(2)
Expand Down
8 changes: 4 additions & 4 deletions multiscanner/modules/Antivirus/McAfeeScan.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,16 @@ def scan(filelist, conf=DEFAULTCONF):

# Parse output
output = output.decode("utf-8")
virusresults = re.findall("([^\n\r]+) ... Found: ([^\n\r]+)", output, re.MULTILINE)
virusresults = re.findall(r"([^\n\r]+) ... Found: ([^\n\r]+)", output, re.MULTILINE)
metadata = {}
verinfo = re.search("McAfee VirusScan Command Line for \S+ Version: ([\d\.]+)", output)
verinfo = re.search(r"McAfee VirusScan Command Line for \S+ Version: ([\d.]+)", output)
metadata["Name"] = NAME
metadata["Type"] = TYPE
if verinfo:
metadata["Program version"] = verinfo.group(1)
verinfo = re.search("AV Engine version: ([\d\.]+)\s", output)
verinfo = re.search(r"AV Engine version: ([\d\.]+)\s", output)
metadata["Engine version"] = verinfo.group(1)
verinfo = re.search("Dat set version: (\d+) created (\w+ (?:\d|\d\d) \d\d\d\d)", output)
verinfo = re.search(r"Dat set version: (\d+) created (\w+ (?:\d|\d\d) \d\d\d\d)", output)
metadata["Definition version"] = verinfo.group(1)
metadata["Definition date"] = verinfo.group(2)

Expand Down
4 changes: 2 additions & 2 deletions multiscanner/modules/Antivirus/VFindScan.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ def scan(filelist, conf=DEFAULTCONF):
except subprocess.CalledProcessError as error:
return None

results = re.findall("^##==>>>> VIRUS POSSIBLE IN FILE: \"(.+)\"\n##==>>>> VIRUS ID: (\w+ .+)",
results = re.findall(r"^##==>>>> VIRUS POSSIBLE IN FILE: \"(.+)\"\n##==>>>> VIRUS ID: (\w+ .+)",
output,
re.MULTILINE)

vfind_version = ""
try:
vfind_version = re.search("^##==> VFind Version: (\d+), Release: (\d+), Patchlevel: (\d+) .+", output)
vfind_version = re.search(r"^##==> VFind Version: (\d+), Release: (\d+), Patchlevel: (\d+) .+", output)
vfind_version = "{}.{}.{}".format(
vfind_version.group(1),
vfind_version.group(2),
Expand Down
2 changes: 1 addition & 1 deletion multiscanner/modules/Metadata/TrID.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def scan(filelist, conf=DEFAULTCONF):
continue

if fname:
virusresults = re.findall("\s*(\d+.\d+\%) \((\.[^\)]+)\) (.+) \(\d+/", line)
virusresults = re.findall(r"\s*(\d+.\d+\%) \((\.[^\)]+)\) (.+) \(\d+/", line)
if virusresults:
confidence, exnt, ftype = virusresults[0]
fresults[fname].append([confidence, ftype, exnt])
Expand Down
4 changes: 2 additions & 2 deletions multiscanner/modules/Metadata/UADScan.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def scan(filelist, conf=DEFAULTCONF):
return None

# find and submit expanded files for scanning
components = re.findall("^(\d+): Tmpfile: (.+)$", output, re.MULTILINE)
components = re.findall(r"^(\d+): Tmpfile: (.+)$", output, re.MULTILINE)

for item in components:
# skip base level items to avoid rescanning files already expanded
Expand All @@ -61,7 +61,7 @@ def scan(filelist, conf=DEFAULTCONF):
# TODO: is there a max recursion depth?
multiscanner.scan_file(item[1], input_file) # noqa F821

results += re.findall("^0: Name: (.+)\n^\d+: Type: (.+)$", output, re.MULTILINE)
results += re.findall(r"^0: Name: (.+)\n^\d+: Type: (.+)$", output, re.MULTILINE)

metadata = {
"Type": TYPE,
Expand Down
2 changes: 1 addition & 1 deletion multiscanner/modules/Metadata/pdfinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def H(data):


def _get_pdf_version(data):
header_ver = re.compile('%PDF-([A-Za-z0-9\.]{1,3})[\r\n]', re.M)
header_ver = re.compile(r'%PDF-([A-Za-z0-9\.]{1,3})[\r\n]', re.M)
matches = header_ver.match(data.decode('UTF-8', 'replace'))
if matches:
return matches.group(1)
Expand Down
2 changes: 1 addition & 1 deletion multiscanner/storage/elasticsearch_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def search(self, query_string, search_type='default'):
if search_type == 'advanced':
query = self.build_query(query_string)
else:
es_reserved_chars_re = '([\+\-=\>\<\!\(\)\{\}\[\]\^\"\~\*\?\:\\/ ])'
es_reserved_chars_re = r'([\+\-=\>\<\!\(\)\{\}\[\]\^\"\~\*\?\:\\/ ])'
query_string = re.sub(es_reserved_chars_re, r'\\\g<1>', query_string)
if search_type == 'default':
query = self.build_query("*" + query_string + "*")
Expand Down
46 changes: 44 additions & 2 deletions multiscanner/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def test_edit_notes(self, mock_handler):

args, kwargs = mock_handler.edit_note.call_args_list[0]
self.assertEqual(args[0], '114d70ba7d04c76d8c217c970f99682025c89b1a6ffe91eb9045653b4b954eb9')
self.assertEqual(args[1], '1')
self.assertEqual(args[1], 1)
self.assertEqual(args[2], 'bar')

@mock.patch('multiscanner.distributed.api.handler')
Expand All @@ -281,4 +281,46 @@ def test_remove_notes(self, mock_handler):

args, kwargs = mock_handler.delete_note.call_args_list[0]
self.assertEqual(args[0], '114d70ba7d04c76d8c217c970f99682025c89b1a6ffe91eb9045653b4b954eb9')
self.assertEqual(args[1], '1')
self.assertEqual(args[1], 1)


class TestSHA256DownloadSampleCase(APITestCase):
def setUp(self):
super(self.__class__, self).setUp()
# populate the DB w/ a task
post_file(self.app)
self.sql_db.update_task(
task_id=1,
task_status='Complete',
)

@mock.patch('multiscanner.distributed.api.db')
@mock.patch('multiscanner.distributed.api.handler')
def test_malformed_request(self, mock_handler, mock_db):
resp = self.app.get(r'/api/v1/files/..\opt\multiscanner\web_config.ini')

self.assertEqual(resp.status_code, api.HTTP_BAD_REQUEST)

@mock.patch('multiscanner.distributed.api.db')
@mock.patch('multiscanner.distributed.api.handler')
def test_other_hash(self, mock_handler, mock_db):
# using MD5 instead of SHA256
resp = self.app.get('/api/v1/files/96b47da202ddba8d7a6b91fecbf89a41')

self.assertEqual(resp.status_code, api.HTTP_BAD_REQUEST)

@mock.patch('multiscanner.distributed.api.db')
@mock.patch('multiscanner.distributed.api.handler')
def test_file_download_raw(self, mock_handler, mock_db):
expected_response = b'my file contents'
resp = self.app.get('/api/v1/files/114d70ba7d04c76d8c217c970f99682025c89b1a6ffe91eb9045653b4b954eb9?raw=t')

self.assertEqual(resp.status_code, api.HTTP_OK)
self.assertEqual(resp.get_data(), expected_response)

@mock.patch('multiscanner.distributed.api.db')
@mock.patch('multiscanner.distributed.api.handler')
def test_file_not_found(self, mock_handler, mock_db):
resp = self.app.get('/api/v1/files/26d11f0ea5cc77a59b6e47deee859440f26d2d14440beb712dbac8550d35ef1f?raw=t')

self.assertEqual(resp.status_code, api.HTTP_NOT_FOUND)

0 comments on commit c9f46c6

Please sign in to comment.