diff --git a/characterize.py b/characterize.py index 7b56559..dcbc3b9 100644 --- a/characterize.py +++ b/characterize.py @@ -5,6 +5,7 @@ import subprocess import traceback from typing import Dict, List, Optional, Tuple, Union +import array import hachoir.core.config as hachoir_config from hachoir.core.log import log as hachoir_logger, Log, Logger @@ -127,10 +128,19 @@ def start(self) -> None: def execute(self, request: ServiceRequest) -> None: request.result = Result() - # 1. Calculate entropy map with open(request.file_path, 'rb') as fin: + # 1. Calculate entropy map (entropy, part_entropies) = calculate_partition_entropy(fin) + # 2. Optionally get byte histogram + get_byte_histogram = self.config.get("get_entropy_histogram", False) + if get_byte_histogram: + fin.seek(0) + file_bytes = fin.read() + histogram = array.array('L', [0] * 256) + for byte in file_bytes: + histogram[byte] += 1 + entropy_graph_data = { 'type': 'colormap', 'data': { @@ -139,9 +149,18 @@ def execute(self, request: ServiceRequest) -> None: } } - ResultSection(f"File entropy: {round(entropy, 3)}", parent=request.result, body_format=BODY_FORMAT.GRAPH_DATA, + entropy_sec = ResultSection(f"File entropy: {round(entropy, 3)}", parent=request.result, body_format=BODY_FORMAT.GRAPH_DATA, body=json.dumps(entropy_graph_data)) + if get_byte_histogram: + histogram_data = { 'entropy_byte_histogram' : histogram.tolist() } + ResultSection( + 'File Byte Histogram', + body_format=BODY_FORMAT.KEY_VALUE, + body=histogram_data, + parent=entropy_sec + ) + if request.file_type == "meta/shortcut/windows": # 2. Parse windows shortcuts self.parse_link(request.result, request.file_path)