diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 00000000..9e3d0461 --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,3 @@ +codecov: + ci: + - community-tc.services.mozilla.com diff --git a/.github/scripts/start-worker.sh b/.github/scripts/start-worker.sh new file mode 100755 index 00000000..baa0c7af --- /dev/null +++ b/.github/scripts/start-worker.sh @@ -0,0 +1,58 @@ +#!/bin/sh -e + +[ -z "$TASKCLUSTER_ACCESS_TOKEN" ] && echo "Missing TASKCLUSTER_ACCESS_TOKEN" >&2 && exit 2 +[ -z "$TC_WORKER_ID" ] && echo "Missing TC_WORKER_ID" >&2 && exit 2 + +set -x + +TC_VERSION=v44.4.0 +TC_PROJECT=fuzzing +TC_WORKER_TYPE=ci-osx +TC_IDLE_TIMEOUT=300 + +TASKCLUSTER_ROOT_URL="https://community-tc.services.mozilla.com" +TASKCLUSTER_CLIENT_ID="project/$TC_PROJECT/worker-$TC_WORKER_TYPE-gh" + +set +x +cat > worker.config < no-op``` + +**Reduce** - [Grizzly Reduce](https://github.com/MozillaSecurity/grizzly/wiki/Grizzly-Reduce) can reduce a test case. + +```python3 -m grizzly.reduce ``` + +**Replay** - [Grizzly Replay](https://github.com/MozillaSecurity/grizzly/wiki/Grizzly-Replay) can replay a test case with different builds and debuggers. -Target platforms -------- -Other target platforms can be defined as [setuptools entry-points](https://setuptools.readthedocs.io/en/latest/setuptools.html#dynamic-discovery-of-services-and-plugins), -using the name "grizzly_targets". Targets must implement `grizzly.target.Target`. +```python3 -m grizzly.replay ``` diff --git a/grizzly/__main__.py b/grizzly/__main__.py index eb579e83..54151d3d 100644 --- a/grizzly/__main__.py +++ b/grizzly/__main__.py @@ -2,11 +2,6 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from logging import basicConfig, DEBUG -from os import getenv -from sys import exit as sysexit - -from .adapters import load from .args import GrizzlyArgs from .main import main @@ -14,15 +9,4 @@ __credits__ = ["Tyson Smith", "Jesse Schwartzentruber"] -# TODO: This can go away once Adapters are loaded using -# setuptools entrypoints. It is only needed to get log output from -# load() because it is called before parse arguments (which -# is where basicConfig should be called). -if getenv("DEBUG"): - basicConfig( - format="%(levelname).1s %(name)s [%(asctime)s] %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - level=DEBUG) -# load Adapters -load() -sysexit(main(GrizzlyArgs().parse_args())) +raise SystemExit(main(GrizzlyArgs().parse_args())) diff --git a/grizzly/adapter/__init__.py b/grizzly/adapter/__init__.py new file mode 100644 index 00000000..68e44031 --- /dev/null +++ b/grizzly/adapter/__init__.py @@ -0,0 +1,10 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from .adapter import Adapter, AdapterError + +__all__ = ( + "Adapter", + "AdapterError", +) diff --git a/grizzly/adapter/adapter.py b/grizzly/adapter/adapter.py new file mode 100644 index 00000000..473a5f1b --- /dev/null +++ b/grizzly/adapter/adapter.py @@ -0,0 +1,191 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from abc import ABCMeta, abstractmethod +from pathlib import Path + +__all__ = ("Adapter", "AdapterError") +__author__ = "Tyson Smith" +__credits__ = ["Tyson Smith"] + + +class AdapterError(Exception): + """The base class for exceptions raised by an Adapter""" + + +class Adapter(metaclass=ABCMeta): + """An Adapter is an interface between Grizzly and a fuzzer. A subclass must + be created in order to add support for additional fuzzers. The Adapter is + responsible for handling input/output data and executing the fuzzer. + It is expected that any processes launched or file created on file system + in the adapter will also be cleaned up in the adapter. + NOTE: Some methods must not be overloaded doing so will prevent Grizzly from + operating correctly. + + Attributes: + _harness (str): Path to harness file that will be used. If None, no + harness will be used. + fuzz (dict): Available as a safe scratch pad for the end-user. + monitor (TargetMonitor): Used to provide Target status information to + the adapter. + name (str): Name of the adapter. + remaining (int): Can be used to indicate the number of TestCases + remaining to process. + """ + + HARNESS_FILE = str((Path(__file__).parent / "../common/harness.html").resolve()) + # Only report test cases with served content. + IGNORE_UNSERVED = True + # Maximum iterations between Target relaunches (<1 use default) + RELAUNCH = 0 + # Maximum execution time per test (used as minimum timeout). The iteration is + # expected to be complete. If the test is still open the harness will attempt to + # close it. + TIME_LIMIT = 30 + + __slots__ = ("_harness", "fuzz", "monitor", "name", "remaining") + + def __init__(self, name): + assert isinstance(name, str) + if not name: + raise AdapterError("name must not be empty") + self._harness = None + self.fuzz = dict() + self.monitor = None + self.name = name + self.remaining = None + + def cleanup(self): + """Automatically called once at shutdown. Used internally by Grizzly. + *** DO NOT OVERLOAD! *** + + Args: + None + + Returns: + None + """ + self.shutdown() + + def enable_harness(self, file_path=None): + """Enable use of a harness during fuzzing. By default no harness is used. + *** DO NOT OVERLOAD! *** + + Args: + file_path (str): Path to file to use as a harness. If None the default + harness is used. + + Returns: + None + """ + if file_path is None: + file_path = self.HARNESS_FILE + with open(file_path, "rb") as in_fp: + self._harness = in_fp.read() + + def get_harness(self): + """Get the harness. Used internally by Grizzly. + *** DO NOT OVERLOAD! *** + + Args: + None + + Returns: + TestFile: The active harness. + """ + return self._harness + + @staticmethod + def scan_path(path, ignore=("desktop.ini", "thumbs.db"), recursive=False): + """Scan a path and yield the files within it. This is available as + a helper method. + + Args: + path (str): Path to file or directory. + ignore (iterable(str)): Files to ignore. + recursive (bool): Scan recursively into directories. + + Yields: + str: Absolute path to files. + """ + path = Path(path).resolve() + if path.is_dir(): + path_iter = path.rglob("*") if recursive else path.glob("*") + for entry in path_iter: + if not entry.is_file(): + continue + if entry.name in ignore or entry.name.startswith("."): + # skip ignored and hidden system files + continue + yield str(entry) + elif path.is_file(): + yield str(path) + + @abstractmethod + def generate(self, testcase, server_map): + """Automatically called. Populate testcase here. + + Args: + testcase (TestCase): TestCase intended to be populated. + server_map (ServerMap): A ServerMap. + + Returns: + None + """ + + def on_served(self, testcase, served): + """Optional. Automatically called after a test case is successfully served. + + Args: + testcase (TestCase): TestCase that was served. + served (list(str)): Files served from testcase. + + Returns: + None + """ + + def on_timeout(self, testcase, served): + """Optional. Automatically called if timeout occurs while attempting to + serve a test case. By default it calls `self.on_served()`. + + Args: + testcase (TestCase): TestCase that was served. + served (list(str)): Files served from testcase. + + Returns: + None + """ + self.on_served(testcase, served) + + def pre_launch(self): + """Optional. Automatically called before launching the Target. + + Args: + None + + Returns: + None + """ + + def setup(self, input_path, server_map): + """Optional. Automatically called once at startup. + + Args: + input_path (str): Points to a file or directory passed by the user. + None is passed by default. + server_map (ServerMap): A ServerMap + + Returns: + None + """ + + def shutdown(self): + """Optional. Automatically called once at shutdown. + + Args: + None + + Returns: + None + """ diff --git a/grizzly/adapter/no_op_adapter/__init__.py b/grizzly/adapter/no_op_adapter/__init__.py new file mode 100644 index 00000000..0634c9e2 --- /dev/null +++ b/grizzly/adapter/no_op_adapter/__init__.py @@ -0,0 +1,56 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from grizzly.adapter import Adapter + +__author__ = "Tyson Smith" +__credits__ = ["Tyson Smith"] + + +class NoOpAdapter(Adapter): + """This is a simple adapter that has very little overhead. It can be used + to help measure a baseline iteration rate for Grizzly. + """ + + NAME = "no-op" + + def setup(self, _input, _server_map): + """Generate a static test case that calls `window.close()` when run. + Normally this is done in generate() but since the test is static only + do it once. Use the default harness to allow running multiple test cases + in a row without closing the browser after each one. + + Args: + _input (str): Unused. + _server_map (sapphire.server_map.ServerMap): Unused. + + Returns: + None + """ + self.enable_harness() + self.fuzz["test"] = ( + b"\n" + b"\n" + b"\n" + b"\n" + b"\n" + b"" + ) + + def generate(self, testcase, _server_map): + """Since the test case has already been created just add the data to the + TestCase. + + Also all TestCases require an entry point and the one expected by Grizzly + is provided in `testcase.landing_page` so use it as the file name for + the test. + + Args: + testcase (grizzly.common.storage.TestCase): TestCase to be populated. + _server_map (sapphire.server_map.ServerMap): Unused. + + Returns: + None + """ + testcase.add_from_bytes(self.fuzz["test"], testcase.landing_page) diff --git a/grizzly/adapter/no_op_adapter/test_no_op.py b/grizzly/adapter/no_op_adapter/test_no_op.py new file mode 100644 index 00000000..ba1f9b1e --- /dev/null +++ b/grizzly/adapter/no_op_adapter/test_no_op.py @@ -0,0 +1,19 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from grizzly.common.storage import TestCase + +from . import NoOpAdapter + + +def test_no_op_01(): + """test a simple Adapter""" + adapter = NoOpAdapter("no-op") + adapter.setup(None, None) + test = TestCase("a", "b", adapter.name) + assert not test.data_size + assert "a" not in test.contents + adapter.generate(test, None) + assert "a" in test.contents diff --git a/grizzly/common/test_adapter.py b/grizzly/adapter/test_adapter.py similarity index 75% rename from grizzly/common/test_adapter.py rename to grizzly/adapter/test_adapter.py index fb4cd193..cc6b6473 100644 --- a/grizzly/common/test_adapter.py +++ b/grizzly/adapter/test_adapter.py @@ -8,25 +8,19 @@ class SimpleAdapter(Adapter): - NAME = "simple" - def generate(self, testcase, server_map): pass def test_adapter_01(): - """test a bad Adapter""" - class BadAdapter(SimpleAdapter): - NAME = None - with pytest.raises(AdapterError, match="BadAdapter.NAME must be a string"): - BadAdapter() - -def test_adapter_02(): """test a simple Adapter""" - adpt = SimpleAdapter() + with pytest.raises(AdapterError, match="name must not be empty"): + SimpleAdapter("") + adpt = SimpleAdapter("simple") assert isinstance(adpt.fuzz, dict) assert not adpt.fuzz assert adpt.monitor is None + assert adpt.name == "simple" assert adpt.remaining is None assert adpt.get_harness() is None adpt.setup(None, None) @@ -36,16 +30,13 @@ def test_adapter_02(): adpt.pre_launch() adpt.cleanup() -def test_adapter_03(tmp_path): + +def test_adapter_02(tmp_path): """test Adapter.enable_harness()""" - adpt = SimpleAdapter() + adpt = SimpleAdapter("a") # built-in harness - harness_file = tmp_path / "harness.html" - test_data = b"default_harness_data" - harness_file.write_bytes(test_data) - adpt.HARNESS_FILE = str(harness_file) adpt.enable_harness() - assert adpt.get_harness() == test_data + assert adpt.get_harness() # external harness ext_harness_file = tmp_path / "ext_harness.html" test_data = b"external_harness_data" @@ -53,14 +44,15 @@ def test_adapter_03(tmp_path): adpt.enable_harness(str(ext_harness_file)) assert adpt.get_harness() == test_data -def test_adapter_04(tmp_path): + +def test_adapter_03(tmp_path): """test Adapter.scan_path()""" # empty path assert not any(SimpleAdapter.scan_path(str(tmp_path))) # missing path assert not any(SimpleAdapter.scan_path(str(tmp_path / "none"))) # path to file - file1 = (tmp_path / "test1.txt") + file1 = tmp_path / "test1.txt" file1.touch() found = tuple(SimpleAdapter.scan_path(str(file1))) assert str(file1) in found @@ -69,9 +61,9 @@ def test_adapter_04(tmp_path): assert len(tuple(SimpleAdapter.scan_path(str(tmp_path)))) == 1 # path to directory (w/ ignored) (tmp_path / ".ignored").touch() - nested = (tmp_path / "nested") + nested = tmp_path / "nested" nested.mkdir() - file2 = (nested / "test2.bin") + file2 = nested / "test2.bin" file2.touch() assert len(tuple(SimpleAdapter.scan_path(str(tmp_path)))) == 1 # path to directory (recursive) diff --git a/grizzly/adapters/NoOpAdapter/__init__.py b/grizzly/adapters/NoOpAdapter/__init__.py deleted file mode 100644 index 91ab9a84..00000000 --- a/grizzly/adapters/NoOpAdapter/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from grizzly.common import Adapter - -__author__ = "Tyson Smith" -__credits__ = ["Tyson Smith"] - - -class NoOpAdapter(Adapter): - """ - This is an simple adapter that has very little overhead. It can be used - to get a baseline iteration rate for Grizzly. - """ - NAME = "no-op" - - def setup(self, *_): - self.enable_harness() - self.fuzz["test"] = "\n" \ - "\n" \ - "\n" \ - "\n" \ - "\n" \ - "" - - def generate(self, testcase, _): - testcase.add_from_data(self.fuzz["test"], testcase.landing_page) diff --git a/grizzly/adapters/__init__.py b/grizzly/adapters/__init__.py deleted file mode 100644 index 20e48d5c..00000000 --- a/grizzly/adapters/__init__.py +++ /dev/null @@ -1,61 +0,0 @@ -import importlib -import logging -import os -import sys -import traceback - -from grizzly.common import Adapter - -log = logging.getLogger("grizzly") # pylint: disable=invalid-name - -__all__ = ("get", "load", "names") -__adapters__ = dict() - -def get(name): - return __adapters__.get(name.lower(), None) - -def load(path=None, skip_failures=True): - assert not __adapters__, "adapters have already been loaded" - if path is None: - path = os.path.dirname(__file__) - path = os.path.abspath(path) - log.debug("loading adapters from %r", path) - sys.path.append(path) - for sub in os.listdir(path): - if not os.path.isfile(os.path.join(path, sub, "__init__.py")): - continue - log.debug("scanning %r", sub) - try: - lib = importlib.import_module(sub) - except Exception: # pylint: disable=broad-except - if not skip_failures: - raise - exc_type, exc_obj, exc_tb = sys.exc_info() - tbinfo = traceback.extract_tb(exc_tb)[-1] - log.debug("raised %s: %s (%s:%d)", exc_type.__name__, exc_obj, tbinfo[0], tbinfo[1]) - continue - for clsname in dir(lib): - cls = getattr(lib, clsname) - if isinstance(cls, type) and issubclass(cls, Adapter): - if clsname == "Adapter": - continue - log.debug("sanity checking %r", clsname) - if not isinstance(cls.NAME, str): - raise RuntimeError( - "%s.NAME must be 'str' not %r" % (cls.__name__, type(cls.NAME).__name__)) - if cls.NAME.lower() != cls.NAME: - raise RuntimeError( - "%s.NAME %r must be lowercase" % (cls.__name__, cls.NAME)) - if cls.NAME in __adapters__: - raise RuntimeError( - "Name collision! %r is used by %r and %r" % ( - cls.NAME, - __adapters__[cls.NAME].__name__, - cls.__name__)) - __adapters__[cls.NAME] = cls - else: - log.debug("ignored %r", sub) - log.debug("%d adapters loaded", len(__adapters__)) - -def names(): - return __adapters__.keys() diff --git a/grizzly/args.py b/grizzly/args.py index d466e3e3..334f98fc 100644 --- a/grizzly/args.py +++ b/grizzly/args.py @@ -4,11 +4,14 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from argparse import ArgumentParser, HelpFormatter from logging import CRITICAL, DEBUG, ERROR, INFO, WARNING -from os import listdir -from os.path import exists, isfile, isdir +from os.path import exists, isfile +from pathlib import Path +from platform import system + +from .common.plugins import scan as scan_plugins +from .common.plugins import scan_target_assets +from .common.utils import TIMEOUT_DELAY -from .adapters import names as adapter_names -from .target import available as available_targets # ref: https://stackoverflow.com/questions/12268602/sort-argparse-help-alphabetically class SortingHelpFormatter(HelpFormatter): @@ -21,86 +24,198 @@ def __sort_key(action): def add_usage(self, usage, actions, groups, prefix=None): actions = sorted(actions, key=self.__sort_key) - super(SortingHelpFormatter, self).add_usage(usage, actions, groups, prefix) + super().add_usage(usage, actions, groups, prefix) def add_arguments(self, actions): actions = sorted(actions, key=self.__sort_key) - super(SortingHelpFormatter, self).add_arguments(actions) + super().add_arguments(actions) -class CommonArgs(object): +class CommonArgs: IGNORABLE = ("log-limit", "memory", "timeout") - IGNORE = ("log-limit", "timeout") + DEFAULT_IGNORE = ("log-limit", "timeout") def __init__(self): - super(CommonArgs, self).__init__() # log levels for console logging self._level_map = { "CRIT": CRITICAL, "ERROR": ERROR, "WARN": WARNING, "INFO": INFO, - "DEBUG": DEBUG} + "DEBUG": DEBUG, + } self._sanity_skip = set() if not hasattr(self, "parser"): self.parser = ArgumentParser( - formatter_class=SortingHelpFormatter, - conflict_handler='resolve') + formatter_class=SortingHelpFormatter, conflict_handler="resolve" + ) + targets = scan_plugins("grizzly_targets") + if not targets: + self.parser.error("No Platforms (Targets) are installed") + + self.parser.add_argument("binary", help="Firefox binary to run") self.parser.add_argument( - "binary", - help="Firefox binary to run") - self.parser.add_argument( - "--log-level", default="INFO", - help="Configure console logging. Options: %s (default: %%(default)s)" % - ", ".join(k for k, v in sorted(self._level_map.items(), key=lambda x: x[1]))) + "--log-level", + choices=sorted(self._level_map), + default="INFO", + help="Configure console logging (default: %(default)s)", + ) + + # build 'asset' help string + assets = scan_target_assets() + asset_msg = list() + for target in sorted(assets): + if assets[target]: + asset_msg.append( + "%s: %s. " % (target, ", ".join(sorted(assets[target]))) + ) self.launcher_grp = self.parser.add_argument_group("Launcher Arguments") self.launcher_grp.add_argument( - "-e", "--extension", action="append", - help="Install an extension. Specify the path to the xpi or the directory" - " containing the unpacked extension. To install multiple extensions" - " specify multiple times") + "--asset", + action="append", + default=list(), + metavar=("ASSET", "PATH"), + nargs=2, + help="Specify target specific asset files. %s" % ("".join(asset_msg),), + ) + self.launcher_grp.add_argument( + "-e", + "--extension", + help="DEPRECATED. Install an extension. Specify the path to the xpi or the" + " directory containing the unpacked extension.", + ) + headless_choices = ["default"] + if system().startswith("Linux"): + headless_choices.append("xvfb") self.launcher_grp.add_argument( - "--launch-timeout", type=int, default=300, - help="Number of seconds to wait before LaunchError is raised (default: %(default)s)") + "--headless", + choices=headless_choices, + const="default", + default=None, + nargs="?", + help="Headless mode. 'default' uses browser's built-in headless mode.", + ) self.launcher_grp.add_argument( - "--log-limit", type=int, default=0, - help="Browser log file size limit in MBs (default: 'no limit')") + "--launch-attempts", + type=int, + default=3, + help="Number of attempts to launch the browser before LaunchError is raised" + " (default: %(default)s)", + ) self.launcher_grp.add_argument( - "-m", "--memory", type=int, default=0, - help="Browser process memory limit in MBs (default: 'no limit')") + "--launch-timeout", + type=int, + default=300, + help="Number of seconds to wait before LaunchError is raised" + " (default: %(default)s)", + ) self.launcher_grp.add_argument( - "--platform", default="ffpuppet", - help="Platforms available: %s (default: %%(default)s)" % ", ".join(available_targets())) + "--log-limit", + type=int, + default=0, + help="Browser log file size limit in MBs (default: 'no limit')", + ) self.launcher_grp.add_argument( - "-p", "--prefs", - help="prefs.js file to use") + "-m", + "--memory", + type=int, + default=0, + help="Browser process memory limit in MBs (default: 'no limit')", + ) self.launcher_grp.add_argument( - "--relaunch", type=int, default=1000, - help="Number of iterations performed before relaunching the browser (default: %(default)s)") + "--platform", + default="ffpuppet", + choices=sorted(targets), + help="Target to use (default: %(default)s)", + ) self.launcher_grp.add_argument( - "-t", "--timeout", type=int, default=60, - help="Iteration timeout in seconds (default: %(default)s)") + "-p", "--prefs", help="DEPRECATED. prefs.js file to use" + ) self.launcher_grp.add_argument( - "--valgrind", action="store_true", - help="Use Valgrind (Linux only)") + "--relaunch", + type=int, + default=1000, + help="Number of iterations performed before relaunching the browser" + " (default: %(default)s)", + ) self.launcher_grp.add_argument( - "--xvfb", action="store_true", - help="Use Xvfb (Linux only)") + "--time-limit", + type=int, + default=None, + help="This is the maximum amount of time that a test is expected to take." + " After the time has elapsed the harness will attempt to close the test." + " By default `Adapter.TIME_LIMIT` is used." + " Browser build types and debuggers can affect the amount of time" + " required to run a test case.", + ) + self.launcher_grp.add_argument( + "-t", + "--timeout", + type=int, + default=None, + help="Iteration timeout in seconds. By default this is `test-duration`+%ds." + " If the timeout is reached the target is assumed to be in a bad state" + " and will be closed. Typically this should be a few seconds greater" + " than the value used for `test-duration`." % (TIMEOUT_DELAY,), + ) + if system().startswith("Linux"): + self.launcher_grp.add_argument( + "--xvfb", action="store_true", help="DEPRECATED. Use Xvfb." + ) + else: + self.parser.set_defaults(xvfb=False) self.reporter_grp = self.parser.add_argument_group("Reporter Arguments") self.reporter_grp.add_argument( - "--fuzzmanager", action="store_true", - help="Report results to FuzzManager") + "--fuzzmanager", action="store_true", help="Report results to FuzzManager" + ) + self.reporter_grp.add_argument( + "--ignore", + nargs="*", + choices=self.IGNORABLE, + default=self.DEFAULT_IGNORE, + metavar="IGNORABLE", + help="Space-separated list of ignorable types. Pass zero args to disable." + " Available: %s (default: %s)" + % (" ".join(self.IGNORABLE), " ".join(self.DEFAULT_IGNORE)), + ) self.reporter_grp.add_argument( - "--ignore", nargs="*", default=list(self.IGNORE), - help="Space separated list of issue types to ignore. Valid options: %s" - " (default: %s)" % (" ".join(self.IGNORABLE), " ".join(self.IGNORE))) + "-l", + "--logs", + default=Path.cwd(), + type=Path, + help="Location to save logs and test cases. (default: %(default)s)", + ) self.reporter_grp.add_argument( "--tool", - help="Override tool name used when reporting issues to FuzzManager") + help="Override tool name used when reporting issues to FuzzManager", + ) + + if system().startswith("Linux"): + dbg_group = self.launcher_grp.add_mutually_exclusive_group() + dbg_group.add_argument( + "--pernosco", + action="store_true", + help="Use rr. Trace intended to be used with Pernosco.", + ) + dbg_group.add_argument("--rr", action="store_true", help="Use rr.") + dbg_group.add_argument( + "--valgrind", action="store_true", help="Use Valgrind." + ) + else: + self.parser.set_defaults( + pernosco=False, + rr=False, + valgrind=False, + ) + + self.parser.epilog = ( + "For addition help check out the wiki:" + " https://github.com/MozillaSecurity/grizzly/wiki" + ) def parse_args(self, argv=None): args = self.parser.parse_args(argv) @@ -108,107 +223,177 @@ def parse_args(self, argv=None): return args def sanity_check(self, args): - if hasattr(super(CommonArgs, self), 'sanity_check'): - super(CommonArgs, self).sanity_check(args) - if "binary" not in self._sanity_skip and not isfile(args.binary): - self.parser.error("file not found: %r" % args.binary) - - # sanitize ignore list - args.ignore = {arg.lower() for arg in args.ignore} - for ignore in args.ignore: - if ignore not in self.IGNORABLE: - self.parser.error("Unrecognized ignore value: %s" % ignore) - - if "input" not in self._sanity_skip and args.input: - if not exists(args.input): - self.parser.error("%r does not exist" % args.input) - elif isdir(args.input) and not listdir(args.input): - self.parser.error("%r is empty" % args.input) - - # check log level - log_level = self._level_map.get(args.log_level.upper(), None) - if log_level is None: - self.parser.error("Invalid log-level %r" % args.log_level) - args.log_level = log_level + self.parser.error("file not found: %r" % (args.binary,)) + + if args.launch_attempts < 1: + self.parser.error("--launch-attempts must be >= 1") + + args.log_level = self._level_map[args.log_level] if args.log_limit < 0: self.parser.error("--log-limit must be >= 0") - args.log_limit *= 1048576 + args.log_limit *= 1_048_576 + + # if logs is specified, we need it to be a directory (whether existent or not) + if args.logs and args.logs.is_file(): + self.parser.error("--logs cannot be a file") if args.memory < 0: - self.parser.error("-m/--memory must be >= 0") - args.memory *= 1048576 + self.parser.error("--memory must be >= 0") + args.memory *= 1_048_576 if args.relaunch < 1: self.parser.error("--relaunch must be >= 1") - if args.extension: - for ext in args.extension: - if not exists(ext): - self.parser.error("%r does not exist" % ext) - if not isdir(ext) or (isfile(ext) and ext.endswith(".xpi")): - self.parser.error("Extension must be a folder or .xpi") - - if args.platform.lower() not in set(available_targets()): - self.parser.error("Unsupported platform %r" % args.platform) - - if args.prefs and not isfile(args.prefs): - self.parser.error("-p/--prefs not found %r" % args.prefs) + if args.pernosco or args.rr: + # currently we only support rr on Linux + settings = "/proc/sys/kernel/perf_event_paranoid" + value = int(Path(settings).read_text()) + if value > 1: + self.parser.error("rr needs %s <= 1, but it is %d" % (settings, value)) + + # TODO: remove deprecated 'extension' from args + if args.extension: # pragma: no cover + args.asset.append(["extension", args.extension]) + + # TODO: remove deprecated 'prefs' from args + if args.prefs: # pragma: no cover + args.asset.append(["prefs", args.prefs]) + + # check args.platform before args.asset since it is used + if args.asset: + supported_assets = scan_target_assets()[args.platform] + for asset, path in args.asset: + if not supported_assets or asset not in supported_assets: + self.parser.error( + "Asset %r not supported by target %r" % (asset, args.platform) + ) + if not exists(path): + self.parser.error( + "Failed to add asset %r cannot find %r" % (asset, path) + ) + + if args.time_limit is not None and args.time_limit < 1: + self.parser.error("--time-limit must be >= 1") + + if args.timeout is not None and args.timeout < 1: + self.parser.error("--timeout must be >= 1") if "tool" not in self._sanity_skip: if args.tool is not None and not args.fuzzmanager: self.parser.error("--tool can only be given with --fuzzmanager") + if args.xvfb: # pragma: no cover + args.headless = "xvfb" + class GrizzlyArgs(CommonArgs): def __init__(self): - super(GrizzlyArgs, self).__init__() - self._adapters = sorted(adapter_names()) + super().__init__() + + adapters = scan_plugins("grizzly_adapters") + if not adapters: + self.parser.error("No Adapters are installed") + self._sanity_skip.add("tool") self.parser.add_argument( - "adapter", - help="Available adapters: %s" % ", ".join(self._adapters)) + "adapter", choices=sorted(adapters), help="Adapter to use." + ) + self.parser.add_argument( + "--enable-profiling", + action="store_true", + help="Record profiling data. The data can be viewed by running the" + " status reporter while running Grizzly.", + ) self.parser.add_argument( - "-i", "--input", - help="Test case or directory containing test cases") + "-i", + "--input", + type=Path, + help="Test case or directory containing test cases.", + ) self.parser.add_argument( - "-v", "--verbose", action="store_true", + "--limit", + type=int, + default=0, + help="Maximum number of iterations to be performed. (default: 'no limit')", + ) + self.parser.add_argument( + "--smoke-test", + action="store_true", + help="Perform a small number of iterations to check if everything is" + " working as expected. Exit immediately if a result is found.", + ) + self.parser.add_argument( + "-v", + "--verbose", + action="store_true", help="Output console updates every iteration. By default the number" - " of iterations between console updates doubles each update." - " Updates are always printed when a result is detected or the" - " target is relaunched.") + " of iterations between console updates doubles each update." + " Updates are always printed when a result is detected or the" + " target is relaunched.", + ) self.launcher_grp.add_argument( - "--coverage", action="store_true", - help="Enable coverage collection") + "--coverage", action="store_true", help="Enable coverage collection." + ) self.launcher_grp.add_argument( - "--rr", action="store_true", - help="Use RR (Linux only)") + "--runtime", + type=int, + default=0, + help="Maximum runtime in seconds. Checked after each iteration." + " (default: 'no limit')", + ) self.reporter_grp.add_argument( - "-c", "--cache", type=int, default=0, - help="Maximum number of additional test cases to include in report (default: %(default)s)") + "-c", + "--collect", + type=int, + default=1, + help="Maximum number of test cases to include in the report." + " (default: %(default)s)", + ) self.reporter_grp.add_argument( - "--s3-fuzzmanager", action="store_true", - help="Report large attachments (if any) to S3 and then the crash & S3 link to FuzzManager") - - self.parser.epilog = "For addition help check out the wiki:" \ - " https://github.com/MozillaSecurity/grizzly/wiki" + "--limit-reports", + type=int, + default=5, + help="Maximum number of times a unique result will be submitted." + " This includes results submitted by parallel and previously run" + " (within 24h) processes. This can help avoid spamming duplicate results." + " The first time a result is seen it will always be submitted." + " (default: %(default)s) - Use 0 for 'no limit'", + ) + self.reporter_grp.add_argument( + "--s3-fuzzmanager", + action="store_true", + help="Report large attachments (if any) to S3 and then the crash &" + " S3 link to FuzzManager.", + ) def sanity_check(self, args): - super(GrizzlyArgs, self).sanity_check(args) + super().sanity_check(args) - if args.adapter.lower() not in self._adapters: - msg = ["Adapter %r does not exist." % args.adapter.lower()] - if self._adapters: - msg.append("Available adapters: %s" % ", ".join(self._adapters)) - else: - msg.append("No adapters available.") - self.parser.error(" ".join(msg)) + if args.collect < 1: + self.parser.error("--collect must be greater than 0") if args.fuzzmanager and args.s3_fuzzmanager: - self.parser.error("--fuzzmanager and --s3-fuzzmanager are mutually exclusive") + self.parser.error( + "--fuzzmanager and --s3-fuzzmanager are mutually exclusive" + ) + + if args.input and not args.input.exists(): + self.parser.error("'%s' does not exist" % (args.input,)) + + if args.limit < 0: + self.parser.error("--limit must be >= 0") + + if args.limit_reports < 0: + self.parser.error("--limit-reports must be >= 0") + + if args.runtime < 0: + self.parser.error("--runtime must be >= 0") if args.tool is not None and not (args.fuzzmanager or args.s3_fuzzmanager): - self.parser.error("--tool can only be given with --fuzzmanager/--s3-fuzzmanager") + self.parser.error( + "--tool can only be given with --fuzzmanager/--s3-fuzzmanager" + ) diff --git a/grizzly/common/__init__.py b/grizzly/common/__init__.py index 0fe3d642..ce790c19 100644 --- a/grizzly/common/__init__.py +++ b/grizzly/common/__init__.py @@ -3,18 +3,5 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from .adapter import Adapter, AdapterError -from .iomanager import IOManager, ServerMap -from .reporter import FilesystemReporter, FuzzManagerReporter, Report, Reporter, S3FuzzManagerReporter -from .runner import Runner -from .status import ReducerStats, Status -from .storage import TestCaseLoadFailure, TestCase, TestFile, TestFileExists -from .utils import grz_tmp - - -__all__ = ( - "Adapter", "AdapterError", "FilesystemReporter", "FuzzManagerReporter", "grz_tmp", "IOManager", - "ReducerStats", "Report", "Reporter", "Runner", "S3FuzzManagerReporter", "ServerMap", "Status", - "TestCase", "TestCaseLoadFailure", "TestFile", "TestFileExists") __author__ = "Jesse Schwartzentruber" __credits__ = ["Jesse Schwartzentruber", "Tyson Smith"] diff --git a/grizzly/common/adapter.py b/grizzly/common/adapter.py deleted file mode 100644 index cfc46f4f..00000000 --- a/grizzly/common/adapter.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding=utf-8 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -import abc -import os - - -__all__ = ("Adapter", "AdapterError") -__author__ = "Tyson Smith" -__credits__ = ["Tyson Smith"] - - -class AdapterError(Exception): - """The base class for exceptions raised by an Adapter""" - - -class Adapter(metaclass=abc.ABCMeta): - HARNESS_FILE = os.path.join(os.path.dirname(__file__), "harness.html") - IGNORE_UNSERVED = True # Only report test cases with served content - NAME = None # must be a unique string - RELAUNCH = 0 # maximum iterations between Target relaunches (<1 use default) - TEST_DURATION = 30 # maximum execution time per test - - __slots__ = ("_harness", "fuzz", "monitor", "remaining") - - ############################# - # Built-ins do NOT overload! - ############################# - - def __init__(self): - if not isinstance(self.NAME, str): - raise AdapterError("%s.NAME must be a string" % (type(self).__name__,)) - self._harness = None - self.fuzz = dict() - self.monitor = None - # remaining can be used to indicate the number of TestCases remaining to process - self.remaining = None - - def cleanup(self): - """Automatically called once at shutdown. - - Args: - None - - Returns: - None - """ - self.shutdown() - - def enable_harness(self, file_path=None): - """Enable use of a harness during fuzzing. By default no harness is used. - - Args: - file_path (str): Path to file to use as a harness. If None the default harness is used. - - Returns: - None - """ - if file_path is None: - file_path = self.HARNESS_FILE - with open(file_path, "rb") as in_fp: - self._harness = in_fp.read() - - def get_harness(self): - """Get the harness. Used internally by Grizzly. - - Args: - None - - Returns: - TestFile: The current harness - """ - return self._harness - - @staticmethod - def scan_path(path, ignore=("desktop.ini", "thumbs.db"), recursive=False): - """Scan a path and yield the files within it. - - Args: - path (str): Path to file or directory. - ignore (iterable): Filenames to ignore. - recursive (bool): Scan recursively into directories. - - Yields: - str: Absolute path to files. - """ - full_path = os.path.abspath(path) - if os.path.isdir(full_path): - for root, _, files in os.walk(full_path): - for fname in files: - if fname in ignore or fname.startswith("."): - # skip ignored and hidden system files - continue - yield os.path.join(root, fname) - if not recursive: - break - elif os.path.isfile(full_path): - yield full_path - - ############################# - # Methods to overload - ############################# - - @abc.abstractmethod - def generate(self, testcase, server_map): - """Automatically called. Populate testcase here. - - Args: - testcase (TestCase): TestCase intended to be populated - server_map (ServerMap): A ServerMap - - Returns: - None - """ - - def on_served(self, testcase, served): - """Optional. Automatically called after a test case is successfully served. - - Args: - testcase (TestCase): TestCase that was served - served (list): A list of file names served from testcase - - Returns: - None - """ - - def on_timeout(self, testcase, served): - """Optional. Automatically called if timeout occurs attempting to serve a test case. - - Args: - testcase (TestCase): TestCase that was served - served (list): A list of file names served from testcase - - Returns: - None - """ - self.on_served(testcase, served) - - def pre_launch(self): - """Optional. Automatically called before launching the Target. - - Args: - None - - Returns: - None - """ - - def setup(self, input_path, server_map): - """Optional. Automatically called once at startup. - - Args: - input_path (str): Points to a file or directory passed by the user. - None is passed by default. - server_map (ServerMap): A ServerMap - - Returns: - None - """ - - def shutdown(self): - """Optional. Automatically called once at shutdown. - - Args: - None - - Returns: - None - """ diff --git a/grizzly/common/fuzzmanager.py b/grizzly/common/fuzzmanager.py new file mode 100644 index 00000000..952d1633 --- /dev/null +++ b/grizzly/common/fuzzmanager.py @@ -0,0 +1,319 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +"""Interface for getting Crash and Bucket data from CrashManager API""" +import json +from contextlib import contextmanager +from logging import getLogger +from os import unlink +from pathlib import Path +from shutil import rmtree +from tempfile import mkdtemp, mkstemp + +from Collector.Collector import Collector + +from .utils import grz_tmp + +LOG = getLogger(__name__) + + +class Bucket: + """Get Bucket data for a specified CrashManager bucket.""" + + def __init__(self, bucket_id): + """Initialize a Bucket instance. + + Arguments: + bucket_id (int): ID of the requested bucket on the server side + """ + assert isinstance(bucket_id, int) + self._bucket_id = bucket_id + self._sig_filename = None + self._coll = Collector() + self._url = "%s://%s:%d/crashmanager/rest/buckets/%d/" % ( + self._coll.serverProtocol, + self._coll.serverHost, + self._coll.serverPort, + bucket_id, + ) + self._data = None + + @property + def bucket_id(self): + return self._bucket_id + + def __enter__(self): + return self + + def __exit__(self, *exc): + self.cleanup() + + def __getattr__(self, name): + if self._data is None: + self._data = self._coll.get(self._url).json() + if name not in self._data: + raise AttributeError( + "'%s' object has no attribute '%s' (has: %s)" + % (type(self).__name__, name, list(self._data)) + ) + return self._data[name] + + def __setattr__(self, name, value): + if name.startswith("_"): + super().__setattr__(name, value) + return + raise AttributeError("can't set attribute") + + def cleanup(self): + """Cleanup any resources held by this instance. + + Arguments: + None + + Returns: + None + """ + if self._sig_filename is not None: + rmtree(str(self._sig_filename.parent)) + + def iter_crashes(self, quality_filter=None): + """Fetch all crash IDs for this FuzzManager bucket. + Only crashes with testcases are returned. + + Arguments: + quality_filter (int): Filter crashes by quality value (None for all) + + Returns: + generator: generator of CrashEntry + """ + + def _get_results(endpoint, params=None): + """ + Function to get paginated results from FuzzManager + + Args: + endpoint (str): FuzzManager REST API to query (eg. "crashes"). + params (dict): Params to pass through to requests.get + + Returns: + generator: objects returned by FuzzManager (as dicts) + """ + LOG.debug("first request to /%s/", endpoint) + + url = "%s://%s:%d/crashmanager/rest/%s/" % ( + self._coll.serverProtocol, + self._coll.serverHost, + self._coll.serverPort, + endpoint, + ) + + response = self._coll.get(url, params=params).json() + + while True: + LOG.debug( + "got %d/%d %s", + len(response["results"]), + response["count"], + endpoint, + ) + while response["results"]: + yield response["results"].pop() + + if response["next"] is None: + break + + LOG.debug("next request to /%s/", endpoint) + response = self._coll.get(response["next"]).json() + + # Get all crashes for bucket + query_args = [ + ("op", "AND"), + ("bucket", self.bucket_id), + ] + if quality_filter is not None: + query_args.append(("testcase__quality", quality_filter)) + query = json.dumps(dict(query_args)) + + n_yielded = 0 + for crash in _get_results( + "crashes", params={"query": query, "include_raw": "0"} + ): + + if not crash["testcase"]: + LOG.warning("crash %d has no testcase, skipping", crash["id"]) + continue + + n_yielded += 1 + LOG.debug("yielding crash #%d", n_yielded) + result = CrashEntry(crash["id"]) + result._data = crash # pylint: disable=protected-access + yield result + + def signature_path(self): + """Download the bucket data from CrashManager. + + Arguments: + None + + Returns: + Path: Path on disk where signature exists. + """ + if self._sig_filename is not None: + return self._sig_filename + + tmpd = Path( + mkdtemp( + prefix="bucket-%d-" % (self._bucket_id,), dir=grz_tmp("fuzzmanager") + ) + ) + try: + sig_basename = "%d.signature" % (self._bucket_id,) + sig_filename = tmpd / sig_basename + sig_filename.write_text(self.signature) + sigmeta_filename = sig_filename.with_suffix(".metadata") + sigmeta_filename.write_text( + json.dumps( + { + "size": self.size, + "frequent": self.frequent, + "shortDescription": self.shortDescription, + "testcase__quality": self.best_quality, + } + ) + ) + except: # noqa pragma: no cover pylint: disable=bare-except + rmtree(str(tmpd)) + raise + + self._sig_filename = sig_filename + return self._sig_filename + + +class CrashEntry: + """Get the CrashEntry data for the specified CrashManager crash. + + Attributes: + crash_id (int): the server ID for the crash + see crashmanager.serializers.CrashEntrySerializer + """ + + RAW_FIELDS = frozenset({"rawCrashData", "rawStderr", "rawStdout"}) + + def __init__(self, crash_id): + """Initialize CrashEntry. + + Arguments: + crash_id (int): ID of the requested crash on the server side + """ + assert isinstance(crash_id, int) + self._crash_id = crash_id + self._coll = Collector() + self._url = "%s://%s:%d/crashmanager/rest/crashes/%d/" % ( + self._coll.serverProtocol, + self._coll.serverHost, + self._coll.serverPort, + crash_id, + ) + self._data = None + self._tc_filename = None + + @property + def crash_id(self): + return self._crash_id + + def __enter__(self): + return self + + def __exit__(self, *exc): + self.cleanup() + + def __getattr__(self, name): + if self._data is None or (name in self.RAW_FIELDS and name not in self._data): + need_raw = "1" if name in self.RAW_FIELDS else "0" + # TODO: handle 403 and 404? + self._data = self._coll.get( + self._url, params={"include_raw": need_raw} + ).json() + if name not in self._data: + raise AttributeError( + "'%s' object has no attribute '%s' (has: %s)" + % (type(self).__name__, name, list(self._data)) + ) + return self._data[name] + + def __setattr__(self, name, value): + if name.startswith("_"): + super().__setattr__(name, value) + return + if name != "testcase_quality": + raise AttributeError("can't set attribute") + self._coll.patch(self._url, data={name: value}) + if self._data: + self._data[name] = value + + def cleanup(self): + """Cleanup any resources held by this instance. + + Arguments: + None + + Returns: + None + """ + if self._tc_filename is not None: + self._tc_filename.unlink() + + def testcase_path(self): + """Download the testcase data from CrashManager. + + Arguments: + None + + Returns: + Path: Path on disk where testcase exists_ + """ + if self._tc_filename is not None: + return self._tc_filename + + dlurl = self._url + "download/" + response = self._coll.get(dlurl) + + if "content-disposition" not in response.headers: + raise RuntimeError( + "Server sent malformed response: %r" % (response,) + ) # pragma: no cover + + handle, filename = mkstemp( + dir=grz_tmp("fuzzmanager"), + prefix="crash-%d-" % (self.crash_id,), + suffix=Path(self.testcase).suffix, + ) + try: + with open(handle, "wb") as output: + output.write(response.content) + except: # noqa pragma: no cover pylint: disable=bare-except + unlink(filename) + raise + self._tc_filename = Path(filename) + return self._tc_filename + + +@contextmanager +def load_fm_data(crash_id, load_bucket=False): + """Load CrashEntry including Bucket from FuzzManager. + + Arguments: + crash_id (int): Crash ID to load. + load_bucket (bool): Attempt to load bucket. + + Yields: + 2-tuple(CrashEntry, Bucket): Data loaded from FuzzManager. + """ + with CrashEntry(crash_id) as crash: + # load signature if needed + if load_bucket and crash.bucket: + with Bucket(crash.bucket) as bucket: + yield crash, bucket + else: + yield crash, None diff --git a/grizzly/common/harness.html b/grizzly/common/harness.html index 7a9792ca..d7796c9a 100644 --- a/grizzly/common/harness.html +++ b/grizzly/common/harness.html @@ -2,16 +2,24 @@ -🐻 ⋅ Grizzly ⋅ 🦊 +Grizzly ⋅ Harness diff --git a/grizzly/common/iomanager.py b/grizzly/common/iomanager.py index 90053d7b..7caf6b2d 100644 --- a/grizzly/common/iomanager.py +++ b/grizzly/common/iomanager.py @@ -2,41 +2,33 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. - from collections import deque -from os import environ -from os.path import isfile from sapphire.server_map import ServerMap -from .storage import TestCase, TestFile -from ..target import sanitizer_opts +from .storage import TestCase __all__ = ("IOManager",) __author__ = "Tyson Smith" __credits__ = ["Tyson Smith"] -class IOManager(object): - TRACKED_ENVVARS = ( - "ASAN_OPTIONS", - "LSAN_OPTIONS", - "GNOME_ACCESSIBILITY", - "GRZ_FORCED_CLOSE", - "MOZ_CHAOSMODE", - "XPCOM_DEBUG_BREAK") +class IOManager: + __slots__ = ( + "_generated", + "_report_size", + "_test", + "server_map", + "tests", + ) def __init__(self, report_size=1): assert report_size > 0 - self.harness = None - self.server_map = ServerMap() # manage redirects, include directories and dynamic responses + self.server_map = ServerMap() self.tests = deque() - self._environ_files = list() # collection of files that should be added to the testcase self._generated = 0 # number of test cases generated self._report_size = report_size - # used to record environment variable that directly impact the browser - self._tracked_env = self.tracked_environ() - self._add_suppressions() + self._test = None def __enter__(self): return self @@ -44,81 +36,41 @@ def __enter__(self): def __exit__(self, *exc): self.cleanup() - def _add_suppressions(self): - # Add suppression files to environment files - for env_var in (x for x in environ if "SAN_OPTIONS" in x): - opts = sanitizer_opts(environ.get(env_var, "")) - if "suppressions" not in opts: - continue - supp_file = opts["suppressions"].strip("'\"") - if isfile(supp_file): - fname = "%s.supp" % (env_var.split("_")[0].lower(),) - self._environ_files.append(TestFile.from_file(supp_file, fname)) - def cleanup(self): - for e_file in self._environ_files: - e_file.close() - self.purge_tests() + self.purge() + + def commit(self): + assert self._test is not None + self.tests.appendleft(self._test) + self._test = None + # manage testcase cache size + if len(self.tests) > self._report_size: + self.tests.pop().cleanup() - def create_testcase(self, adapter_name): + def create_testcase(self, adapter_name, time_limit): + assert self._test is None # create testcase object and landing page names - test = TestCase( + self._test = TestCase( self.page_name(), self.page_name(offset=1), - adapter_name=adapter_name) - # add environment variable info to the test case - for e_name, e_value in self._tracked_env.items(): - test.add_environ_var(e_name, e_value) - # add environment files to the test case - for e_file in self._environ_files: - test.add_meta(e_file.clone()) + adapter_name=adapter_name, + time_limit=time_limit, + ) # reset redirect map - self.server_map.redirect.clear() - self.server_map.set_redirect("grz_current_test", self.page_name(), required=False) + self.server_map.set_redirect( + "grz_current_test", self.page_name(), required=False + ) self.server_map.set_redirect("grz_next_test", self.page_name(offset=1)) - if self.harness is not None: - # add harness to testcase - self.server_map.set_dynamic_response( - "grz_harness", - lambda: self.harness, - mime_type="text/html") self._generated += 1 - self.tests.append(test) - # manage testcase cache size - if len(self.tests) > self._report_size: - self.tests.popleft().cleanup() - return test + return self._test def page_name(self, offset=0): return "test_%04d.html" % (self._generated + offset,) - def purge_tests(self): + def purge(self): + if self._test is not None: + self._test.cleanup() + self._test = None for testcase in self.tests: testcase.cleanup() self.tests.clear() - - @staticmethod - def tracked_environ(): - # Scan os.environ and collect environment variables - # that are relevant to Grizzly or the test case. - env = dict() - tracked_san_opts = ("detect_leaks",) - for var in IOManager.TRACKED_ENVVARS: - if var not in environ: - continue - if var.endswith("SAN_OPTIONS"): - opts = sanitizer_opts(environ.get(var, "")) - # strip unwanted options - tracked = dict() - for opt in tracked_san_opts: - if opt in opts: - tracked[opt] = opts[opt] - # only record *SAN_OPTIONS if there are options - if tracked: - env[var] = ":".join("=".join((k, v)) for k, v in tracked.items()) - elif var == "XPCOM_DEBUG_BREAK" and environ.get(var, "").lower() == "warn": - # ignore FFPuppet default XPCOM_DEBUG_BREAK value (set in helpers.py) - continue - else: - env[var] = environ[var] - return env diff --git a/grizzly/common/plugins.py b/grizzly/common/plugins.py new file mode 100644 index 00000000..68a8f00e --- /dev/null +++ b/grizzly/common/plugins.py @@ -0,0 +1,74 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from logging import getLogger + +from pkg_resources import iter_entry_points + +__all__ = ("load", "scan", "PluginLoadError") + + +LOG = getLogger(__name__) + + +class PluginLoadError(Exception): + """Raised if loading a plug-in fails""" + + +def load(name, group, base_type): + """Load a plug-in. + + Args: + name (str): Name of entry point to load. + group (str): Group containing entry point. + base_type (type): Used to validate loaded objects. + + Returns: + *: Python object. + """ + assert isinstance(base_type, type) + for entry in iter_entry_points(group): + if entry.name == name: + LOG.debug("loading %r (%s)", name, base_type.__name__) + plugin = entry.load() + break + else: + raise PluginLoadError("%r not found in %r" % (name, group)) + if not issubclass(plugin, base_type): + raise PluginLoadError("%r doesn't inherit from %s" % (name, base_type.__name__)) + return plugin + + +def scan(group): + """Scan for installed plug-ins. + + Args: + group (str): Entry point group to scan. + + Returns: + list: Names of installed entry points. + """ + found = list() + LOG.debug("scanning %r", group) + for entry in iter_entry_points(group): + if entry.name in found: + # not sure if this can even happen + raise PluginLoadError("Duplicate entry %r in %r" % (entry.name, group)) + found.append(entry.name) + return found + + +def scan_target_assets(): + """Scan targets and load list of supported assets (minimal sanity checking). + + Args: + None + + Returns: + dict: Name of target and list of supported assets. + """ + assets = dict() + for entry in iter_entry_points("grizzly_targets"): + assets[entry.name] = entry.load().SUPPORTED_ASSETS + return assets diff --git a/grizzly/common/post_launch_delay.html b/grizzly/common/post_launch_delay.html new file mode 100644 index 00000000..a1dd6a09 --- /dev/null +++ b/grizzly/common/post_launch_delay.html @@ -0,0 +1,62 @@ + + + +Grizzly ⋅ Post Launch Delay + + + + +
Connect debugger, etc...
+
+ + + \ No newline at end of file diff --git a/grizzly/common/report.py b/grizzly/common/report.py new file mode 100644 index 00000000..569226f8 --- /dev/null +++ b/grizzly/common/report.py @@ -0,0 +1,419 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from collections import namedtuple +from hashlib import sha1 +from logging import getLogger +from os import SEEK_END, scandir, stat, unlink +from pathlib import Path +from platform import machine, system +from re import DOTALL, VERBOSE +from re import compile as re_compile +from shutil import copyfileobj, move, rmtree +from tempfile import mkstemp +from time import strftime + +# import FuzzManager utilities +from Collector.Collector import Collector +from fasteners.process_lock import InterProcessLock +from FTB.ProgramConfiguration import ProgramConfiguration +from FTB.Signatures.CrashInfo import CrashInfo, CrashSignature + +from .stack_hasher import Stack +from .utils import grz_tmp + +__all__ = ("Report",) +__author__ = "Tyson Smith" +__credits__ = ["Tyson Smith"] + +LOG = getLogger(__name__) + +# NOTE: order matters, aux -> stderr -> stdout +LogMap = namedtuple("LogMap", "aux stderr stdout") + + +class Report: + DEFAULT_MAJOR = "NO_STACK" + DEFAULT_MINOR = "0" + HANG_STACK_HEIGHT = 10 + MAX_LOG_SIZE = 1_048_576 # 1MB + + __slots__ = ( + "_crash_info", + "_logs", + "_signature", + "_target_binary", + "is_hang", + "path", + "prefix", + "stack", + ) + + def __init__(self, log_path, target_binary, is_hang=False, size_limit=MAX_LOG_SIZE): + assert isinstance(target_binary, str) + self._crash_info = None + self._logs = self.select_logs(log_path) + assert self._logs is not None + self._signature = None + self._target_binary = Path(target_binary) + self.is_hang = is_hang + self.path = Path(log_path) + # tail files in log_path if needed + if size_limit < 1: + LOG.warning("No limit set on report log size!") + else: + for log in scandir(path=log_path): + if log.is_file() and log.stat().st_size > size_limit: + Report.tail(log.path, size_limit) + # look through logs one by one until we find a stack + for log_file in (x for x in self._logs if x is not None): + with open(log_file, "rb") as log_fp: + stack = Stack.from_text(log_fp.read().decode("utf-8", errors="ignore")) + if stack.frames: + # limit the hash calculations to the first n frames if a hang + # was detected to attempt to help local bucketing + stack.height_limit = self.HANG_STACK_HEIGHT if is_hang else None + self.prefix = "%s_%s" % (stack.minor[:8], strftime("%Y-%m-%d_%H-%M-%S")) + self.stack = stack + break + else: + self.prefix = "%s_%s" % (self.DEFAULT_MINOR, strftime("%Y-%m-%d_%H-%M-%S")) + self.stack = None + + @staticmethod + def calc_hash(signature): + """Create unique hash from a signature. + + Args: + None + + Returns: + str: Hash of the raw signature. + """ + if signature is None: + return "NO_SIGNATURE" + return sha1(signature.rawSignature.encode("utf-8")).hexdigest()[:16] + + def cleanup(self): + """Remove Report data from filesystem. + + Args: + None + + Returns: + None + """ + if self.path and self.path.is_dir(): + rmtree(str(self.path)) + self.path = None + + @property + def crash_hash(self): + """Create unique hash from a signature. + + Args: + None + + Returns: + str: Hash of the raw signature of the crash. + """ + return self.calc_hash(self.crash_signature) + + @property + def crash_info(self): + """Create CrashInfo object from logs. + + Args: + None + + Returns: + CrashInfo: CrashInfo based on log data. + """ + if self._crash_info is None: + assert self.path is not None + # read in the log files and create a CrashInfo object + if self._logs.aux is not None: + with open(self._logs.aux, "rb") as log_fp: + aux_data = ( + log_fp.read().decode("utf-8", errors="ignore").splitlines() + ) + else: + aux_data = None + # create ProgramConfiguration that can be reported to a FM server + if Path("%s.fuzzmanagerconf" % (self._target_binary,)).is_file(): + # attempt to use ".fuzzmanagerconf" + fm_cfg = ProgramConfiguration.fromBinary(self._target_binary) + else: + LOG.debug("'%s.fuzzmanagerconf' does not exist", self._target_binary) + LOG.debug("creating ProgramConfiguration") + cpu = machine().lower() + fm_cfg = ProgramConfiguration( + self._target_binary.name, + "x86_64" if cpu == "amd64" else cpu, + system(), + ) + with open(self._logs.stderr, "rb") as err_fp, open( + self._logs.stdout, "rb" + ) as out_fp: + self._crash_info = CrashInfo.fromRawCrashData( + out_fp.read().decode("utf-8", errors="ignore").splitlines(), + err_fp.read().decode("utf-8", errors="ignore").splitlines(), + fm_cfg, + auxCrashData=aux_data, + ) + return self._crash_info + + @property + def crash_signature(self): + """Create CrashSignature object from CrashInfo. + + Args: + None + + Returns: + CrashSignature: CrashSignature based on log data. + """ + if self._signature is None: + collector = Collector() + with InterProcessLock(str(Path(grz_tmp()) / "fm_sigcache.lock")): + if collector.sigCacheDir: + cache_sig, _ = collector.search(self.crash_info) + if cache_sig: + LOG.debug("signature loaded from cache file %r", cache_sig) + self._signature = CrashSignature.fromFile(cache_sig) + # if cache lookup failed generate a crash signature + if self._signature is None: + self._signature = self.crash_info.createCrashSignature( + maxFrames=self.crash_signature_max_frames(self.crash_info) + ) + if self._signature is None: + LOG.debug("failed to create FM signature") + return self._signature + + @staticmethod + def crash_signature_max_frames(crash_info, suggested_frames=8): + if set(crash_info.backtrace) & { + "std::panicking::rust_panic", + "std::panicking::rust_panic_with_hook", + }: + # rust panic adds 5-6 frames of noise at the top of the stack + suggested_frames += 6 + return suggested_frames + + @staticmethod + def _find_ffpuppet_worker(logs): + """Search through list of log files for a ffpuppet worker log. + + Args: + logs (list(str)): List of log files to search. + + Returns: + str: The full file path if a match is found otherwise None. + """ + found = None + for fname in (x for x in logs if "ffp_worker" in x): + if found is not None: + # we only expect one log here... + LOG.warning("overwriting previously selected %r", found) + found = fname + return found + + @staticmethod + def _find_minidump(logs): + """Search through list of log files for a minidump log. + + Args: + logs (list(str)): List of log files to search. + + Returns: + str: The full file path if a match is found otherwise None. + """ + re_dump_req = re_compile( + r"\d+\|0\|.+?\|google_breakpad::ExceptionHandler::WriteMinidump" + ) + for fname in (x for x in logs if "minidump" in x): + with open(fname, "r") as log_fp: + data = log_fp.read(65536) + # this will select log that contains "Crash|SIGSEGV|" or + # the desired "DUMP_REQUESTED" log + # TODO: review this it may be too strict + # see mozilla-central/source/accessible/ipc/DocAccessibleParent.cpp#452 + if "Crash|DUMP_REQUESTED|" not in data or re_dump_req.search(data): + return fname + return None + + @staticmethod + def _find_sanitizer(logs): + """Search through list of log files for a sanitizer (ASan, UBSan, etc...) log. + + Args: + logs (list(str)): List of log files to search. + + Returns: + str: The full file path if a match is found otherwise None. + """ + # pattern to identify the ASan crash triggered when the parent process goes away + # TODO: this may no longer be required + re_e10s_forced = re_compile( + r""" + ==\d+==ERROR:.+?SEGV\son.+?0x[0]+\s\(.+?T2\).+? + #0\s+0x[0-9a-f]+\sin\s+mozilla::ipc::MessageChannel::OnChannelErrorFromLink + """, + DOTALL | VERBOSE, + ) + # this is a list of Sanitizer error reports to prioritize + # Sanitizer reports not included below are deprioritized + prioritize_tokens = ( + "use-after-", + "-buffer-overflow on", + ": data race ", + ": SEGV on ", + "access-violation on ", + "attempting free on ", + "negative-size-param", + "-param-overlap", + ) + fallback = None + found = None + for fname in (x for x in logs if "asan" in x): + with open(fname, "r") as log_fp: + data = log_fp.read(65536) + # look for interesting crash info in the log + if "==ERROR:" in data or "WARNING:" in data: + # check for e10s forced crash + if re_e10s_forced.search(data) is not None: + continue + # make sure there is something that looks like a stack frame + if "#0 " in data: + found = fname + if any(x in data for x in prioritize_tokens): + # this is the likely cause of the crash + break + if found is None: + # UBSan error (non-ASan builds) + if ": runtime error: " in data: + found = fname + # catch all (choose the one with info for now) + elif data: + fallback = fname + return found or fallback + + @staticmethod + def _find_valgrind(logs): + """Search through list of log files for a Valgrind worker log. + + Args: + logs (list(str)): List of log files to search. + + Returns: + str: The full file path if a match is found otherwise None. + """ + for fname in (x for x in logs if "valgrind" in x): + if stat(fname).st_size: + return fname + return None + + @property + def major(self): + """The inclusive bucketing hash based on the stack trace + data found in logs. + + Args: + None + + Returns: + str: major hash string. + """ + if self.stack and self.stack.major is not None: + return self.stack.major + return self.DEFAULT_MAJOR + + @property + def minor(self): + """The specific bucketing hash based on the stack trace + data found in logs. + + Args: + None + + Returns: + str: minor hash string. + """ + if self.stack and self.stack.minor is not None: + return self.stack.minor + return self.DEFAULT_MINOR + + @property + def preferred(self): + """Log file containing most relevant data. + + Args: + None + + Returns: + str: Name of log. + """ + return self._logs.aux or self._logs.stderr + + @classmethod + def select_logs(cls, path): + """Scan path for file containing stderr, stdout and other (aux) + data and build a LogMap. + + Args: + path (str): Path to scan for log files. + + Returns: + LogMap: A LogMap pointing to log files or None if path is empty. + """ + files = (x for x in scandir(path=path) if x.is_file()) + # order by date hopefully the oldest log is the cause of the issue + to_scan = [x.path for x in sorted(files, key=lambda x: x.stat().st_mtime)] + if not to_scan: + LOG.warning("No files found in %r", path) + return None + # look for file to use as aux log + log_aux = cls._find_sanitizer(to_scan) + if log_aux is None: + log_aux = cls._find_valgrind(to_scan) + if log_aux is None: + log_aux = cls._find_minidump(to_scan) + if log_aux is None: + log_aux = cls._find_ffpuppet_worker(to_scan) + # look for stderr and stdout log files + log_err = None + log_out = None + for fname in to_scan: + if "stderr" in fname: + log_err = fname + elif "stdout" in fname: + log_out = fname + result = LogMap(log_aux, log_err, log_out) + return result if any(result) else None + + @staticmethod + def tail(in_file, size_limit): + """Tail the given file. WARNING: This is destructive! + + Args: + in_file (str): Path to file to work with. + size_limit (int): Maximum size of file after tail operation. + + Returns: + None + """ + assert size_limit > 0 + with open(in_file, "rb") as in_fp: + in_fp.seek(0, SEEK_END) + end = in_fp.tell() + if end <= size_limit: + return + dump_pos = end - size_limit + in_fp.seek(dump_pos) + out_fd, out_file = mkstemp(prefix="taillog_", dir=grz_tmp()) + with open(out_fd, "wb") as out_fp: + out_fp.write(b"[LOG TAILED]\n") + copyfileobj(in_fp, out_fp, 0x10000) # 64KB chunks + unlink(in_file) + move(out_file, in_file) diff --git a/grizzly/common/reporter.py b/grizzly/common/reporter.py index 14060dc6..c0e46a19 100644 --- a/grizzly/common/reporter.py +++ b/grizzly/common/reporter.py @@ -2,33 +2,28 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. - from abc import ABCMeta, abstractmethod -from hashlib import sha1 -from json import dump -from logging import getLogger, WARNING -from platform import machine, system -from os import listdir, getcwd, getenv, makedirs, mkdir, SEEK_END, stat, unlink, walk -from os.path import basename, expanduser, isdir, isfile, join as pathjoin, realpath, relpath -from re import compile as re_compile, DOTALL, VERBOSE -from shutil import copyfile, copyfileobj, move, rmtree +from enum import IntEnum, unique +from json import dumps, loads +from logging import WARNING, getLogger +from os import getenv +from pathlib import Path +from shutil import copyfile, move, rmtree from tarfile import open as tar_open -from tempfile import mkstemp -from time import strftime -from zipfile import ZipFile, ZIP_DEFLATED - -from fasteners.process_lock import InterProcessLock -from psutil import disk_usage +from tempfile import TemporaryDirectory +from zipfile import ZIP_DEFLATED, ZipFile # import FuzzManager utilities from Collector.Collector import Collector +from fasteners.process_lock import InterProcessLock from FTB.ProgramConfiguration import ProgramConfiguration -from FTB.Signatures.CrashInfo import CrashInfo +from psutil import disk_usage # check if boto is available for S3FuzzManager reporter try: from boto3 import resource from botocore.exceptions import ClientError + _boto_import_error = None # pylint: disable=invalid-name getLogger("botocore").setLevel(WARNING) getLogger("boto3").setLevel(WARNING) @@ -36,372 +31,155 @@ except ImportError as err: _boto_import_error = err # pylint: disable=invalid-name -from .stack_hasher import Stack +from .report import Report from .utils import grz_tmp -__all__ = ("FilesystemReporter", "FuzzManagerReporter", "Report", "S3FuzzManagerReporter") +__all__ = ( + "FilesystemReporter", + "FuzzManagerReporter", + "Quality", + "S3FuzzManagerReporter", +) __author__ = "Tyson Smith" __credits__ = ["Tyson Smith"] -log = getLogger("grizzly") # pylint: disable=invalid-name - - -class Report(object): - DEFAULT_MAJOR = "NO_STACK" - DEFAULT_MINOR = "0" - MAX_LOG_SIZE = 1048576 # 1MB - - __slots__ = ("_crash_info", "log_aux", "log_err", "log_out", "path", "prefix", "stack") - - def __init__(self, log_path, log_map, size_limit=MAX_LOG_SIZE): - self._crash_info = None - self.log_aux = log_map.get("aux") if log_map is not None else None - self.log_err = log_map.get("stderr") if log_map is not None else None - self.log_out = log_map.get("stdout") if log_map is not None else None - self.path = log_path - - # tail logs if needed - if size_limit < 1: - log.warning("No limit set on report log size!") - elif isdir(log_path): - for fname in listdir(log_path): - log_file_path = pathjoin(log_path, fname) - if isfile(log_file_path): - Report.tail(log_file_path, size_limit) - - # look through logs one by one until we find a stack - # NOTE: order matters aux->stderr->stdout - for scan_log in (self.log_aux, self.log_err, self.log_out): - if scan_log is None: - continue - with open(pathjoin(log_path, scan_log), "rb") as log_fp: - stack = Stack.from_text(log_fp.read().decode("utf-8", errors="ignore")) - if stack.frames: - self.prefix = "%s_%s" % (stack.minor[:8], strftime("%Y-%m-%d_%H-%M-%S")) - self.stack = stack - break - else: - self.stack = None - self.prefix = "%s_%s" % (self.DEFAULT_MINOR, strftime("%Y-%m-%d_%H-%M-%S")) +LOG = getLogger(__name__) + + +@unique +class Quality(IntEnum): + """testcase quality values""" + + # final reduced testcase + REDUCED = 0 + # original used for reduction (a reduced version exists) + ORIGINAL = 1 + # used to manually mark a report as unreducible + IGNORED = 3 + # the testcase is currently being reduced + REDUCING = 4 + # haven't attempted reduction yet (1st attempt, generic reducer) + UNREDUCED = 5 + # platform specific reduction requested (2nd attempt) + REQUEST_SPECIFIC = 6 + # testcase not detected ("testcase" not a testcase?) + NO_TESTCASE = 7 + # the testcase was reproducible, but broke during reduction + REDUCER_BROKE = 8 + # reducer error + REDUCER_ERROR = 9 + # could not reproduce the testcase + NOT_REPRODUCIBLE = 10 - def cleanup(self): - if isdir(self.path): - rmtree(self.path) - - @staticmethod - def crash_hash(crash_info): - """Create CrashInfo object from logs. - - Args: - crash_info (CrashInfo): Binary file being tested. - - Returns: - str: Hash of the raw signature of the crash. - """ - max_frames = Report.crash_signature_max_frames(crash_info, 5) - sig = Report.crash_signature(crash_info, max_frames) - return sha1(sig.rawSignature.encode("utf-8")).hexdigest()[:16] - def crash_info(self, target_binary): - """Create CrashInfo object from logs. - - Args: - target_binary (str): Binary file being tested. - - Returns: - CrashInfo: CrashInfo based on Result log data. - """ - if self._crash_info is None: - # read in the log files and create a CrashInfo object - aux_data = None - if self.log_aux is not None: - with open(pathjoin(self.path, self.log_aux), "rb") as log_fp: - aux_data = log_fp.read().decode("utf-8", errors="ignore").splitlines() - stderr_file = pathjoin(self.path, self.log_err) - stdout_file = pathjoin(self.path, self.log_out) - # create ProgramConfiguration that can be reported to a FM server - if isfile("%s.fuzzmanagerconf" % (target_binary,)): - # attempt to use ".fuzzmanagerconf" - fm_cfg = ProgramConfiguration.fromBinary(target_binary) - else: - log.debug("'%s.fuzzmanagerconf' does not exist", target_binary) - fm_cfg = None - if fm_cfg is None: - log.debug("creating ProgramConfiguration") - cpu = machine().lower() - fm_cfg = ProgramConfiguration( - basename(target_binary), - "x86_64" if cpu == "amd64" else cpu, - system()) - with open(stderr_file, "rb") as err_fp, open(stdout_file, "rb") as out_fp: - self._crash_info = CrashInfo.fromRawCrashData( - out_fp.read().decode("utf-8", errors="ignore").splitlines(), - err_fp.read().decode("utf-8", errors="ignore").splitlines(), - fm_cfg, - auxCrashData=aux_data) - return self._crash_info - - @staticmethod - def crash_signature(crash_info, max_frames=5): - return crash_info.createCrashSignature( - maxFrames=Report.crash_signature_max_frames(crash_info, max_frames)) - - @staticmethod - def crash_signature_max_frames(crash_info, suggested_frames=8): - if set(crash_info.backtrace) & { - "std::panicking::rust_panic", - "std::panicking::rust_panic_with_hook", - }: - # rust panic adds 5-6 frames of noise at the top of the stack - suggested_frames += 6 - return suggested_frames - - @classmethod - def from_path(cls, path, size_limit=MAX_LOG_SIZE): - """Create Report from a directory containing logs. - - Args: - path (str): Directory containing log files. - size_limit (int): Maximum size in bytes of a log file. - - Returns: - Report: Result object based on log data. - """ - return cls(path, Report.select_logs(path), size_limit=size_limit) - - @property - def major(self): - if self.stack and self.stack.major is not None: - return self.stack.major - return self.DEFAULT_MAJOR - - @property - def minor(self): - if self.stack and self.stack.minor is not None: - return self.stack.minor - return self.DEFAULT_MINOR - - @property - def preferred(self): - return self.log_aux if self.log_aux is not None else self.log_err - - @staticmethod - def select_logs(log_path): - if not isdir(log_path): - raise IOError("log_path does not exist %r" % log_path) - log_files = listdir(log_path) - if not log_files: - raise IOError("No logs found in %r" % log_path) - logs = {"aux": None, "stderr": None, "stdout": None} - - # order by creation date because the oldest log is likely the cause of the issue - log_files.sort(key=lambda x: stat(pathjoin(log_path, x)).st_mtime) - - # pattern to identify the ASan crash triggered when the parent process goes away - re_e10s_forced = re_compile(r""" - ==\d+==ERROR:.+?SEGV\son.+?0x[0]+\s\(.+?T2\).+? - #0\s+0x[0-9a-f]+\sin\s+mozilla::ipc::MessageChannel::OnChannelErrorFromLink - """, DOTALL | VERBOSE) - - # this is a list of *San error reports to prioritize - # ASan reports not included below (deprioritized): - # stack-overflow, BUS, failed to allocate, detected memory leaks - interesting_sanitizer_tokens = ( - "use-after-", "-buffer-overflow on", ": SEGV on ", "access-violation on ", - "negative-size-param", "attempting free on ", "-param-overlap") - - # look for sanitizer (ASan, UBSan, etc...) logs - for fname in (log_file for log_file in log_files if "asan" in log_file): - # grab first chunk of log to help triage - with open(pathjoin(log_path, fname), "r") as log_fp: - log_data = log_fp.read(4096) - - # look for interesting crash info in the log - if "==ERROR:" in log_data: - # check for e10s forced crash - if re_e10s_forced.search(log_data) is not None: - continue - # make sure there is something that looks like a stack frame in the log - if "#0 " in log_data: - logs["aux"] = fname - if any(x in log_data for x in interesting_sanitizer_tokens): - break # this is the likely cause of the crash - continue # probably the most interesting but lets keep looking - - # UBSan error (non-ASan builds) - if ": runtime error: " in log_data: - logs["aux"] = fname - - # catch all (choose the one with info for now) - if logs["aux"] is None and stat(pathjoin(log_path, fname)).st_size: - logs["aux"] = fname - - # look for Valgrind logs - if logs["aux"] is None: - for fname in (log_file for log_file in log_files if "valgrind" in log_file): - if stat(pathjoin(log_path, fname)).st_size: - logs["aux"] = fname - break - - # prefer ASan logs over minidump logs - if logs["aux"] is None: - re_dump_req = re_compile(r"\d+\|0\|.+?\|google_breakpad::ExceptionHandler::WriteMinidump") - for fname in (log_file for log_file in log_files if "minidump" in log_file): - with open(pathjoin(log_path, fname), "r") as log_fp: - log_data = log_fp.read(4096) - # this will select log that contains "Crash|SIGSEGV|" or - # the desired "DUMP_REQUESTED" log - # TODO: review this it may be too strict - # see https://searchfox.org/mozilla-central/source/accessible/ipc/DocAccessibleParent.cpp#452 - if "Crash|DUMP_REQUESTED|" not in log_data or re_dump_req.search(log_data): - logs["aux"] = fname - break - - # look for ffpuppet worker logs, worker logs should be used if nothing else is available - if logs["aux"] is None: - for fname in (log_file for log_file in log_files if "ffp_worker" in log_file): - if logs["aux"] is not None: - # we only expect one log here... - log.warning("aux log previously selected: %s, overwriting!", logs["aux"]) - logs["aux"] = fname - - for fname in log_files: - if "stderr" in fname: - logs["stderr"] = fname - elif "stdout" in fname: - logs["stdout"] = fname - - return logs - - @staticmethod - def tail(in_file, size_limit): - assert size_limit > 0 - if stat(in_file).st_size <= size_limit: - return - with open(in_file, "rb") as in_fp: - in_fp.seek(0, SEEK_END) - dump_pos = max((in_fp.tell() - size_limit), 0) - in_fp.seek(dump_pos) - out_fd, out_file = mkstemp(prefix="taillog_", dir=grz_tmp()) - with open(out_fd, "wb") as out_fp: - out_fp.write(b"[LOG TAILED]\n") - copyfileobj(in_fp, out_fp, 0x10000) # 64KB chunks - unlink(in_file) - move(out_file, in_file) +class Reporter(metaclass=ABCMeta): + __slots__ = ("display_logs",) + def __init__(self): + self.display_logs = getenv("GRZ_DISPLAY_REPORT") == "1" -class Reporter(metaclass=ABCMeta): @abstractmethod - def _process_report(self, report): + def _post_submit(self): pass @abstractmethod - def _reset(self): + def _pre_submit(self, report): pass @abstractmethod def _submit_report(self, report, test_cases): pass - def submit(self, test_cases, log_path=None, report=None): - """Submit report containing results. Either `log_path` or `report` must - be specified. + def submit(self, test_cases, report): + """Submit report containing results. Args: test_cases (iterable): A collection of testcases, ordered newest to oldest, the newest being the mostly likely to trigger the result (crash, assert... etc). - log_path (str): Path to logs from the Target. A Report will - be created from this. report (Report): Report to submit. Returns: - None + *: implementation specific result indicating where the report was created """ - if log_path is not None: - assert report is None, "Only 'log_path' or 'report' can be specified!" - if not isdir(log_path): - raise IOError("No such directory %r" % log_path) - report = Report.from_path(log_path) - elif report is not None: - assert isinstance(report, Report) - else: - raise AssertionError("Either 'log_path' or 'report' must be specified!") - self._process_report(report) - self._submit_report(report, test_cases) + assert isinstance(report, Report) + assert report.path is not None + self._pre_submit(report) + # output report contents to console + if self.display_logs: + if not report.is_hang: + with open(report.preferred, "rb") as log_fp: + LOG.info( + "=== BEGIN REPORT ===\n%s", + log_fp.read().decode("utf-8", errors="ignore"), + ) + else: + LOG.info("=== BEGIN REPORT ===\nBrowser hang detected") + LOG.info("=== END REPORT ===") + result = self._submit_report(report, test_cases) if report is not None: report.cleanup() - self._reset() + self._post_submit() + return result class FilesystemReporter(Reporter): DISK_SPACE_ABORT = 512 * 1024 * 1024 # 512 MB - def __init__(self, report_path=None, major_bucket=True): + __slots__ = ("major_bucket", "min_space", "report_path") + + def __init__(self, report_path, major_bucket=True): + super().__init__() self.major_bucket = major_bucket - self.report_path = pathjoin(getcwd(), "results") if report_path is None else report_path + self.min_space = FilesystemReporter.DISK_SPACE_ABORT + self.report_path = Path(report_path) - def _process_report(self, report): + def _pre_submit(self, report): pass - def _reset(self): + def _post_submit(self): pass def _submit_report(self, report, test_cases): # create major bucket directory in working directory if needed if self.major_bucket: - dest_path = pathjoin(self.report_path, report.major[:16]) + dest = self.report_path / report.major[:16] else: - dest_path = self.report_path - if not isdir(dest_path): - makedirs(dest_path) + dest = self.report_path + dest.mkdir(parents=True, exist_ok=True) # dump test cases and the contained files to working directory for test_number, test_case in enumerate(test_cases): - dump_path = pathjoin(dest_path, "%s-%d" % (report.prefix, test_number)) - if not isdir(dump_path): - mkdir(dump_path) + dump_path = dest / ("%s-%d" % (report.prefix, test_number)) + dump_path.mkdir(exist_ok=True) test_case.dump(dump_path, include_details=True) # move logs into bucket directory - log_path = pathjoin(dest_path, "%s_%s" % (report.prefix, "logs")) - if isdir(log_path): - log.warning("Report log path exists %r", log_path) - move(report.path, log_path) + log_path = dest / ("%s_%s" % (report.prefix, "logs")) + if log_path.is_dir(): + LOG.warning("Report log path exists %r", str(log_path)) + move(str(report.path), str(log_path)) # avoid filling the disk - free_space = disk_usage(log_path).free - if free_space < self.DISK_SPACE_ABORT: - raise RuntimeError("Running low on disk space (%0.1fMB)" % (free_space / 1048576.0,)) + free_space = disk_usage(str(log_path)).free + if free_space < self.min_space: + raise RuntimeError( + "Running low on disk space (%0.1fMB)" % (free_space / 1048576.0,) + ) + return dest class FuzzManagerReporter(Reporter): - FM_CONFIG = pathjoin(expanduser("~"), ".fuzzmanagerconf") - # max number of times to report a non-frequent signature to FuzzManager - MAX_REPORTS = 10 - - # testcase quality values - QUAL_REDUCED_RESULT = 0 # the final reduced testcase - QUAL_REDUCED_ORIGINAL = 1 # the original used for successful reduction - QUAL_REPRODUCIBLE = 4 # the testcase was reproducible - QUAL_UNREDUCED = 5 # haven't attempted reduction yet (1st attempt, generic reducer) - QUAL_REQUEST_SPECIFIC = 6 # platform specific reduction requested (2nd attempt) - QUAL_NO_TESTCASE = 7 # no testcase was detected (could be the "testcase" is not a testcase) - QUAL_REDUCER_BROKE = 8 # the testcase was reproducible, but broke during reduction - QUAL_REDUCER_ERROR = 9 # reducer error - QUAL_NOT_REPRODUCIBLE = 10 # could not reproduce the testcase - - def __init__(self, target_binary, tool=None): + FM_CONFIG = Path.home() / ".fuzzmanagerconf" + + __slots__ = ("_extra_metadata", "force_report", "quality", "tool") + + def __init__(self, tool=None): + super().__init__() self._extra_metadata = {} self.force_report = False - self.quality = self.QUAL_UNREDUCED - self.target_binary = target_binary + self.quality = Quality.UNREDUCED self.tool = tool # optional tool name - @staticmethod - def create_crash_info(report, target_binary): - # TODO: this is here to preserve the old way of operation (used by reducer) - return report.crash_info(target_binary) - - def _reset(self): - self._extra_metadata = {} + def _post_submit(self): + self._extra_metadata.clear() @staticmethod def sanity_check(bin_file): @@ -413,199 +191,191 @@ def sanity_check(bin_file): Returns: None """ - if not isfile(FuzzManagerReporter.FM_CONFIG): - raise IOError("Missing: %s" % FuzzManagerReporter.FM_CONFIG) - if not isfile("".join([bin_file, ".fuzzmanagerconf"])): + if not FuzzManagerReporter.FM_CONFIG.is_file(): + raise IOError("Missing: %s" % (FuzzManagerReporter.FM_CONFIG,)) + if not Path("".join([bin_file, ".fuzzmanagerconf"])).is_file(): raise IOError("Missing: %s.fuzzmanagerconf" % (bin_file,)) ProgramConfiguration.fromBinary(bin_file) - @classmethod - def quality_name(cls, value): - for name in dir(cls): - if name.startswith("QUAL_") and getattr(cls, name) == value: - return name - return "unknown quality (%r)" % (value,) + def add_extra_metadata(self, key, value): + """Add extra metadata to be reported with any CrashEntrys reported. + + Arguments: + key (str): key for this data in the metadata dict + value (object): JSON serializable object to be included in the FM crash + metadata. The object will be deep-copied. + + Returns: + None + """ + assert isinstance(key, str) + assert key not in self._extra_metadata + # deep copy and ensure that value is JSON serializable + self._extra_metadata[key] = loads(dumps(value)) - def _process_report(self, report): + def _pre_submit(self, report): self._process_rr_trace(report) def _process_rr_trace(self, report): # don't report large files to FuzzManager - trace_path = pathjoin(report.path, "rr-traces") - if isdir(trace_path): - log.info("Ignored rr trace") - self._extra_metadata["rr-trace"] = "ignored" + trace_path = report.path / "rr-traces" + if trace_path.is_dir(): + LOG.info("Ignored rr trace") + self.add_extra_metadata("rr-trace", "ignored") # remove traces so they are not uploaded to FM (because they are huge) # use S3FuzzManagerReporter instead - rmtree(trace_path) + rmtree(str(trace_path)) @staticmethod def _ignored(report): # This is here to prevent reporting stack-less crashes - # that were caused by system OOM or bogus other crashes - log_file = pathjoin(report.path, report.preferred) - with open(log_file, "rb") as log_fp: + # that were caused by system OOM + with open(report.preferred, "rb") as log_fp: log_data = log_fp.read().decode("utf-8", errors="ignore") - mem_errs = ( - "ERROR: Failed to mmap", - ": AddressSanitizer failed to allocate") - for msg in mem_errs: - if msg in log_data and "#0 " not in log_data: - return True + # ignore sanitizer OOMs missing stack + if report.stack is None: + mem_errs = ( + "ERROR: Failed to mmap", + # NOTE: max_allocation_size_mb can trigger a similar message + ": AddressSanitizer failed to allocate", + "Sanitizer: internal allocator is out of memory trying to allocate", + ) + for msg in mem_errs: + if msg in log_data: + return True + # ignore Valgrind crashes if log_data.startswith("VEX temporary storage exhausted."): - # ignore Valgrind crashes return True return False def _submit_report(self, report, test_cases): - # prepare data for submission as CrashInfo - crash_info = report.crash_info(self.target_binary) - assert crash_info is not None - - # search for a cached signature match and if the signature - # is already in the cache and marked as frequent, don't bother submitting - with InterProcessLock(pathjoin(grz_tmp(), "fm_sigcache.lock")): - collector = Collector() - cache_sig_file, cache_metadata = collector.search(crash_info) - if cache_metadata is not None: - if cache_metadata["frequent"]: - log.info("Frequent crash matched existing signature: %s", - cache_metadata["shortDescription"]) - if not self.force_report: - return - elif "bug__id" in cache_metadata: - log.info("Crash matched existing signature (bug %s): %s", - cache_metadata["bug__id"], - cache_metadata["shortDescription"]) - # we will still report this one, but no more - cache_metadata["frequent"] = True - # there is already a signature, initialize count - cache_metadata.setdefault("_grizzly_seen_count", 0) - else: - # there is no signature, create one locally so we can count - # the number of times we've seen it - max_frames = Report.crash_signature_max_frames(crash_info) - cache_sig_file = collector.generate(crash_info, numFrames=max_frames) - cache_metadata = { - "_grizzly_seen_count": 0, - "frequent": False, - "shortDescription": crash_info.createShortSignature()} - if cache_sig_file is None: - if self._ignored(report): - log.info("Report is unsupported and is in ignore list") - return - log.warning("Report is unsupported by FM, saved to %r", report.path) - # TODO: we should check if stackhasher failed too - raise RuntimeError("Failed to create FM signature") - # limit the number of times we report per cycle - cache_metadata["_grizzly_seen_count"] += 1 - if cache_metadata["_grizzly_seen_count"] >= self.MAX_REPORTS: - # we will still report this one, but no more - cache_metadata["frequent"] = True - metadata_file = cache_sig_file.replace(".signature", ".metadata") - with open(metadata_file, "w") as meta_fp: - dump(cache_metadata, meta_fp) + collector = Collector() + + if not self.force_report: + # search for a cached signature match + with InterProcessLock(str(Path(grz_tmp()) / "fm_sigcache.lock")): + _, cache_metadata = collector.search(report.crash_info) + + # check if signature has been marked as frequent in FM + if cache_metadata is not None and cache_metadata["frequent"]: + LOG.info( + "Frequent crash matched existing signature: %s", + cache_metadata["shortDescription"], + ) + return None + + if self._ignored(report): + LOG.info("Report is in ignore list") + return None + + if report.is_hang: + self.add_extra_metadata("is_hang", True) # dump test cases and the contained files to working directory test_case_meta = [] for test_number, test_case in enumerate(test_cases): test_case_meta.append([test_case.adapter_name, test_case.input_fname]) - dump_path = pathjoin(report.path, "%s-%d" % (report.prefix, test_number)) - if not isdir(dump_path): - mkdir(dump_path) + dump_path = report.path / ("%s-%d" % (report.prefix, test_number)) + dump_path.mkdir(exist_ok=True) test_case.dump(dump_path, include_details=True) - crash_info.configuration.addMetadata({"grizzly_input": repr(test_case_meta)}) + report.crash_info.configuration.addMetadata( + {"grizzly_input": repr(test_case_meta)} + ) if test_cases: - environ_string = " ".join("=".join(kv) for kv in test_cases[0].env_vars.items()) - crash_info.configuration.addMetadata({"recorded_envvars": environ_string}) + environ_string = " ".join( + "=".join(kv) for kv in test_cases[0].env_vars.items() + ) + report.crash_info.configuration.addMetadata( + {"recorded_envvars": environ_string} + ) else: - self.quality = self.QUAL_NO_TESTCASE - crash_info.configuration.addMetadata(self._extra_metadata) + self.quality = Quality.NO_TESTCASE + report.crash_info.configuration.addMetadata(self._extra_metadata) - # grab screen log + # TODO: this should likely move to ffpuppet + # grab screen log (used in automation) if getenv("WINDOW") is not None: - screen_log = ".".join(["screenlog", getenv("WINDOW")]) - if isfile(screen_log): - target_log = pathjoin(report.path, "screenlog.txt") - copyfile(screen_log, target_log) + screen_log = Path.cwd() / ("screenlog.%s" % (getenv("WINDOW"),)) + if screen_log.is_file(): + target_log = report.path / "screenlog.txt" + copyfile(str(screen_log), str(target_log)) Report.tail(target_log, 10240) # limit to last 10K - # add results to a zip file - zip_name = "%s.zip" % (report.prefix,) - with ZipFile(zip_name, mode="w", compression=ZIP_DEFLATED) as zip_fp: - # add test files - for dir_name, _, dir_files in walk(report.path): - arc_path = relpath(dir_name, report.path) - for file_name in dir_files: - zip_fp.write( - pathjoin(dir_name, file_name), - arcname=pathjoin(arc_path, file_name)) - - # override tool name if specified - if self.tool is not None: - collector.tool = self.tool - - # announce shortDescription if crash is not in a bucket - if cache_metadata["_grizzly_seen_count"] == 1 and not cache_metadata["frequent"]: - log.info("Submitting new crash %r", cache_metadata["shortDescription"]) - # submit results to the FuzzManager server - new_entry = collector.submit(crash_info, testCase=zip_name, testCaseQuality=self.quality) - log.info("Logged %d with quality %d", new_entry["id"], self.quality) - - # remove zipfile - if isfile(zip_name): - unlink(zip_name) + with TemporaryDirectory(prefix="fm-zip", dir=grz_tmp()) as tmp_dir: + # add results to a zip file + zip_name = Path(tmp_dir) / ("%s.zip" % (report.prefix,)) + with ZipFile(zip_name, mode="w", compression=ZIP_DEFLATED) as zip_fp: + # add test files + for entry in report.path.rglob("*"): + if entry.is_file(): + zip_fp.write( + str(entry), arcname=str(entry.relative_to(report.path)) + ) + # override tool name if specified + if self.tool is not None: + collector.tool = self.tool + + # submit results to the FuzzManager server + new_entry = collector.submit( + report.crash_info, testCase=zip_name, testCaseQuality=self.quality.value + ) + LOG.info("Logged %d (%s)", new_entry["id"], self.quality.name) + + return new_entry["id"] class S3FuzzManagerReporter(FuzzManagerReporter): @staticmethod def compress_rr_trace(src, dest): # resolve symlink to latest trace available - latest_trace = realpath(pathjoin(src, "latest-trace")) - assert isdir(latest_trace), "missing latest-trace directory" - rr_arc = pathjoin(dest, "rr.tar.bz2") - log.debug("creating %r from %r", rr_arc, latest_trace) + latest_trace = (src / "latest-trace").resolve(strict=True) + assert latest_trace.is_dir(), "missing latest-trace directory" + rr_arc = dest / "rr.tar.bz2" + LOG.debug("creating %r from %r", rr_arc, latest_trace) with tar_open(rr_arc, "w:bz2") as arc_fp: - arc_fp.add(latest_trace, arcname=basename(latest_trace)) + arc_fp.add(str(latest_trace), arcname=latest_trace.name) # remove path containing uncompressed traces - rmtree(src) + rmtree(str(src)) return rr_arc - def _process_report(self, report): + def _pre_submit(self, report): self._process_rr_trace(report) def _process_rr_trace(self, report): - trace_path = pathjoin(report.path, "rr-traces") - if not isdir(trace_path): + trace_path = report.path / "rr-traces" + if not trace_path.is_dir(): return None s3_bucket = getenv("GRZ_S3_BUCKET") assert s3_bucket is not None # check for existing minor hash in S3 - s3 = resource("s3") + s3_res = resource("s3") s3_key = "rr-%s.tar.bz2" % (report.minor,) s3_url = "http://%s.s3.amazonaws.com/%s" % (s3_bucket, s3_key) try: - s3.Object(s3_bucket, s3_key).load() # HEAD, doesn't fetch the whole object + # HEAD, doesn't fetch the whole object + s3_res.Object(s3_bucket, s3_key).load() except ClientError as exc: if exc.response["Error"]["Code"] == "404": # The object does not exist. pass - else: + else: # pragma: no cover # Something else has gone wrong. raise else: # The object already exists. - log.info("RR trace exists at %s", s3_url) - self._extra_metadata["rr-trace"] = s3_url + LOG.info("rr trace exists at %r", s3_url) + self.add_extra_metadata("rr-trace", s3_url) # remove traces so they are not reported to FM - rmtree(trace_path) + rmtree(str(trace_path)) return s3_url # Upload to S3 rr_arc = self.compress_rr_trace(trace_path, report.path) - s3.meta.client.upload_file(rr_arc, s3_bucket, s3_key, ExtraArgs={"ACL": "public-read"}) - unlink(rr_arc) - self._extra_metadata["rr-trace"] = s3_url + s3_res.meta.client.upload_file( + str(rr_arc), s3_bucket, s3_key, ExtraArgs={"ACL": "public-read"} + ) + rr_arc.unlink() + self.add_extra_metadata("rr-trace", s3_url) return s3_url @staticmethod diff --git a/grizzly/common/runner.py b/grizzly/common/runner.py index b1c20381..f5431c33 100644 --- a/grizzly/common/runner.py +++ b/grizzly/common/runner.py @@ -3,32 +3,37 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. from logging import getLogger +from pathlib import Path from time import sleep, time -from sapphire import SERVED_TIMEOUT -from ..target import TargetLaunchTimeout -from .utils import grz_tmp +from sapphire import Served, ServerMap -__all__ = ("Runner",) +from ..target import Result, TargetLaunchError, TargetLaunchTimeout +from .storage import TestCase + +__all__ = ("Runner", "RunResult") __author__ = "Tyson Smith" __credits__ = ["Tyson Smith"] -LOG = getLogger("grz_runner") +LOG = getLogger(__name__) + + +class _IdleChecker: + """_IdleChecker is used to help determine if the target is hung (actively using CPU) + or if it has not made expected the HTTP requests for other reasons (idle). + This will allow the framework to move on without interrupting execution of long + running test cases. + This is not perfect! It is to be used AFTER the test case timeout (initial_delay) + has elapsed. + """ -# _IdleChecker is used to help determine if the target is hung (actively using CPU) -# or if it has not made expected the HTTP requests for other reasons (idle). -# This will allow the framework to move on without interrupting execution of -# long running test cases. -# This is not perfect! It is to be used AFTER the test case timeout -# (initial_delay) has elapsed. -class _IdleChecker(object): __slots__ = ("_check_cb", "_init_delay", "_poll_delay", "_threshold", "_next_poll") def __init__(self, check_cb, threshold, initial_delay, poll_delay=1): assert callable(check_cb) assert initial_delay >= 0 assert poll_delay >= 0 - assert threshold >= 0 + assert 100 > threshold >= 0 self._check_cb = check_cb # callback used to check if target is idle self._init_delay = initial_delay # time to wait before the initial idle poll self._poll_delay = poll_delay # time to wait between subsequent polls @@ -71,32 +76,42 @@ def schedule_poll(self, initial=False, now=None): self._next_poll = now + self._poll_delay -class Runner(object): - COMPLETE = 1 - ERROR = 2 - FAILED = 3 - IGNORED = 4 - - __slots__ = ("_idle", "_server", "_target", "result", "served", "timeout") +class Runner: + __slots__ = ( + "_close_delay", + "_idle", + "_relaunch", + "_server", + "_target", + "_tests_run", + "startup_failure", + ) - def __init__(self, server, target, idle_threshold=0, idle_delay=60): + def __init__( + self, server, target, close_delay=30, idle_threshold=0, idle_delay=0, relaunch=1 + ): + self._close_delay = close_delay if idle_threshold > 0: + assert idle_delay > 0 + LOG.debug("using idle check, th %d, delay %ds", idle_threshold, idle_delay) self._idle = _IdleChecker(target.is_idle, idle_threshold, idle_delay) else: self._idle = None + assert close_delay > 0 + assert relaunch > 0 + self._relaunch = relaunch # tests to run before relaunching target self._server = server # a sapphire instance to serve the test case self._target = target # target to run test case - self.result = None - self.served = None - self.timeout = False + self._tests_run = 0 # number of tests run since target (re)launched + self.startup_failure = False # failure before first test was served - def launch(self, location, env_mod=None, max_retries=3, retry_delay=0): + def launch(self, location, max_retries=3, retry_delay=0): """Launch a target and open `location`. Args: location (str): URL to open via Target. - env_mod (dict): Environment modifications. - max_retries (int): Number of retries to preform before re-raising TargetLaunchTimeout. + max_retries (int): Number of retries to perform before re-raising + TargetLaunchTimeout. retry_delay (int): Time in seconds to wait between retries. Returns: @@ -106,30 +121,46 @@ def launch(self, location, env_mod=None, max_retries=3, retry_delay=0): assert self._target is not None assert max_retries >= 0 assert retry_delay >= 0 + self._server.clear_backlog() + self._tests_run = 0 + self.startup_failure = False + LOG.debug("launching target (timeout %ds)", self._target.launch_timeout) for retries in reversed(range(max_retries)): try: - self._target.launch(location, env_mod=env_mod) + self._target.launch(location) + except TargetLaunchError as exc: + # This is likely due to a bad build or environment configuration. + if retries: + LOG.warning("Failure detected during launch (retries %d)", retries) + exc.report.cleanup() + sleep(retry_delay) + continue + self.startup_failure = True + raise except TargetLaunchTimeout: - # likely has nothing to do with Grizzly but is seen frequently - # on machines under a high load. After multiple consecutive timeouts - # something is likely wrong so raise. + # A TargetLaunchTimeout likely has nothing to do with Grizzly but is + # seen frequently on machines under a high load. After multiple + # consecutive timeouts something is likely wrong so raise. if retries: - LOG.warning("Launch timeout (attempts remaining %d)", retries) + LOG.warning("Timeout detected during launch (retries %d)", retries) sleep(retry_delay) continue + self.startup_failure = True raise break @staticmethod - def location(srv_path, srv_port, close_after=None, forced_close=True, timeout=None): + def location( + srv_path, srv_port, close_after=None, post_launch_delay=None, time_limit=None + ): """Build a valid URL to pass to a browser. Args: srv_path (str): Path segment of the URL srv_port (int): Server listening port close_after (int): Harness argument. - forced_close (bool): Harness argument. - timeout (int): Harness argument. + post_launch_delay (int): Post-launch delay page argument. + time_limit (int): Harness argument. Returns: str: A valid URL. @@ -140,16 +171,70 @@ def location(srv_path, srv_port, close_after=None, forced_close=True, timeout=No if close_after is not None: assert close_after >= 0 args.append("close_after=%d" % (close_after,)) - if not forced_close: - args.append("forced_close=0") - if timeout is not None: - assert timeout >= 0 - args.append("timeout=%d" % (timeout * 1000,)) + if time_limit: + assert time_limit > 0 + args.append("time_limit=%d" % (time_limit * 1000,)) + if post_launch_delay is not None: + assert post_launch_delay >= 0 + args.append("post_launch_delay=%d" % (post_launch_delay,)) if args: - return "?".join([location, "&".join(args)]) + return "?".join((location, "&".join(args))) return location - def run(self, ignore, server_map, testcase, coverage=False, wait_for_callback=False): + @property + def initial(self): + """Check if more than one test has been run since the previous relaunch. + + Args: + None + + Returns: + bool: True if at most one test has been run. + """ + return self._tests_run < 2 + + def post_launch(self, delay=None): + """Perform actions after launching browser before loading test cases. + + Args: + post_launch_delay (int): Amount of time in seconds before the target will + redirect to test case. + + Returns: + None + """ + if delay is not None and not self.startup_failure: + assert delay >= 0 + with TestCase("post_launch_delay.html", None, "None") as content: + content.add_from_file( + Path(__file__).parent / "post_launch_delay.html", + content.landing_page, + copy=True, + ) + srv_map = ServerMap() + srv_map.set_redirect("grz_start", content.landing_page, required=False) + srv_map.set_redirect("grz_continue", "grz_start", required=True) + # temporarily disable server timeout + srv_timeout = self._server.timeout + self._server.timeout = 0 + LOG.info("Browser launched, continuing in %ds...", delay) + # serve prompt page + self._server.serve_path( + content.data_path, + continue_cb=self._target.monitor.is_healthy, + server_map=srv_map, + ) + # re-enable server timeout + self._server.timeout = srv_timeout + + def run( + self, + ignore, + server_map, + testcase, + coverage=False, + wait_for_callback=False, + ): """Serve a testcase and monitor the target for results. Args: @@ -157,61 +242,125 @@ def run(self, ignore, server_map, testcase, coverage=False, wait_for_callback=Fa server_map (sapphire.ServerMap): A ServerMap. testcase (grizzly.TestCase): The test case that will be served. coverage (bool): Trigger coverage dump. - wait_for_callback: (bool): Use `_keep_waiting()` to indicate when - framework should move on. + wait_for_callback (bool): Use `_keep_waiting()` to indicate when + framework should move on. Returns: - None + RunResult: Files served, status and timeout flag from the run. """ - # set initial state - self.served = None - self.result = None - self.timeout = False + self._tests_run += 1 if self._idle is not None: self._idle.schedule_poll(initial=True) + if self._tests_run == self._relaunch: + # overwrite instead of replace 'grz_next_test' for consistency + server_map.set_redirect("grz_next_test", "grz_empty", required=True) + server_map.set_dynamic_response("grz_empty", lambda _: b"", required=True) # serve the test case - server_status, self.served = self._server.serve_testcase( - testcase, + serve_start = time() + server_status, served = self._server.serve_path( + testcase.data_path, continue_cb=self._keep_waiting, forever=wait_for_callback, + optional_files=tuple(testcase.optional), server_map=server_map, - working_path=grz_tmp("serve")) + ) + duration = time() - serve_start + result = RunResult( + served, + duration, + attempted=testcase.landing_page in served, + timeout=server_status == Served.TIMEOUT, + ) + # TODO: fix calling TestCase.add_batch() for multi-test replay # add all include files that were served for url, resource in server_map.include.items(): - testcase.add_batch(resource.target, self.served, prefix=url) - self.timeout = server_status == SERVED_TIMEOUT - served_lpage = testcase.landing_page in self.served - if not served_lpage: - LOG.debug("%r not served!", testcase.landing_page) - elif coverage and not self.timeout: - # dump_coverage() should be called before detect_failure() - # to help catch any coverage related issues. - self._target.dump_coverage() - # detect failure - failure_detected = self._target.detect_failure(ignore, self.timeout) - if failure_detected == self._target.RESULT_FAILURE: - self.result = self.FAILED - elif not served_lpage: + testcase.add_batch( + resource.target, + # only pass files that appear to be in current include path + (x for x in result.served if x.startswith(resource.target)), + prefix=url, + ) + if result.timeout: + LOG.debug("timeout detected") + if self._target.handle_hang(ignore_idle=True) or "timeout" in ignore: + result.status = Result.IGNORED + server_map.dynamic.pop("grz_empty", None) + if result.attempted: + if coverage and not result.timeout: + # dump_coverage() should be called before check_result() + # to help catch any coverage related issues. + self._target.dump_coverage() + # relaunch check + if self._tests_run >= self._relaunch and not result.timeout: + assert self._tests_run == self._relaunch + server_map.dynamic.pop("grz_empty", None) + LOG.debug("relaunch/shutdown limit hit") + # ideally all browser tabs should be closed at this point + # and the browser should exit on its own + # NOTE: this will take the full duration if target.is_idle() + # is not implemented + for close_delay in range(max(int(self._close_delay / 0.5), 1)): + if not self._target.monitor.is_healthy(): + break + # wait 3 seconds (6 passes) before attempting idle exit + if close_delay > 5 and self._target.is_idle(10): + # NOTE: this will always trigger on systems where the + # browser does not exit when the last window is closed + LOG.debug("target idle") + break + # delay to help catch shutdown related crashes, LSan, etc. + # debugger and different builds can slow shutdown + sleep(0.5) + else: + LOG.debug("target.close() required") + self._target.close() + else: # something is wrong so close the target # previous iteration put target in a bad state? + LOG.debug("landing page %r not served!", testcase.landing_page) self._target.close() - self.result = self.ERROR - elif failure_detected == self._target.RESULT_IGNORED: - self.result = self.IGNORED - else: - self.result = self.COMPLETE + # detect startup failures + if self.initial: + self.startup_failure = True + # detect results + if result.status == Result.NONE: + result.status = self._target.check_result(ignore) + return result def _keep_waiting(self): - """Callback used by the server to determine if should continue to wait - for the requests from the target. + """Callback used by the server to determine if it should continue to + wait for the requests from the target. Args: None Returns: - bool: Continue to serve test test case + bool: Continue to serve the test case. """ if self._idle is not None and self._idle.is_idle(): LOG.debug("idle target detected") return False return self._target.monitor.is_healthy() + + +class RunResult: + """A RunResult holds result details from a call to Runner.run(). + + Attributes: + attempted (bool): Test landing page (entry point) was requested. + duration (float): Time spent waiting for test contents to be served. + served (tuple(str)): Files that were served. + status (int): Result status of test. + timeout (bool): A timeout occurred waiting for test to complete. + """ + + __slots__ = ("attempted", "duration", "served", "status", "timeout") + + def __init__( + self, served, duration, attempted=False, status=Result.NONE, timeout=False + ): + self.attempted = attempted + self.duration = duration + self.served = served + self.status = status + self.timeout = timeout diff --git a/grizzly/common/stack_hasher.py b/grizzly/common/stack_hasher.py index 2817f500..a32acd11 100644 --- a/grizzly/common/stack_hasher.py +++ b/grizzly/common/stack_hasher.py @@ -12,45 +12,62 @@ crash id (1st hash) and a bug id (2nd hash). This is not perfect but works very well in most cases. """ +from enum import Enum, unique from hashlib import sha1 -from logging import basicConfig, getLogger, INFO, DEBUG +from logging import DEBUG, INFO, basicConfig, getLogger from os.path import basename -from re import compile as re_compile, match as re_match +from re import compile as re_compile +from re import match as re_match __all__ = ("Stack", "StackFrame") __author__ = "Tyson Smith" __credits__ = ["Tyson Smith"] -LOG = getLogger("stack_hasher") +LOG = getLogger(__name__) MAJOR_DEPTH = 5 MAJOR_DEPTH_RUST = 10 -class StackFrame(object): - MODE_GDB = 0 - MODE_MINIDUMP = 1 - MODE_RR = 2 - MODE_RUST = 3 - MODE_SANITIZER = 4 - MODE_TSAN = 5 - MODE_VALGRIND = 6 +@unique +class Mode(Enum): + """Parse mode for detected stack type""" + GDB = 0 + MINIDUMP = 1 + RR = 2 + RUST = 3 + SANITIZER = 4 + TSAN = 5 + VALGRIND = 6 + + +class StackFrame: _re_func_name = re_compile(r"(?P.+?)[\(|\s|\<]{1}") # regexs for supported stack trace lines _re_gdb = re_compile(r"^#(?P\d+)\s+(?P0x[0-9a-f]+\sin\s)*(?P.+)") _re_rr = re_compile(r"rr\((?P.+)\+(?P0x[0-9a-f]+)\)\[0x[0-9a-f]+\]") _re_rust_frame = re_compile(r"^\s+(?P\d+):\s+0x[0-9a-f]+\s+\-\s+(?P.+)") - _re_sanitizer = re_compile(r"^\s*#(?P\d+)\s0x[0-9a-f]+(?P\sin)?\s+(?P.+)") - _re_tsan = re_compile(r"^\s*#(?P\d+)\s(?P.+)\s\(((?P.+)\+)?(?P0x[0-9a-f]+)\)") - _re_valgrind = re_compile(r"^==\d+==\s+(at|by)\s+0x[0-9A-F]+\:\s+(?P.+?)\s+\((?P.+)\)") + _re_sanitizer = re_compile( + r"^\s*#(?P\d+)\s0x[0-9a-f]+(?P\sin)?\s+(?P.+)" + ) + _re_tsan = re_compile( + r"^\s*#(?P\d+)\s(?P.+)\s\(((?P.+)\+)?(?P0x[0-9a-f]+)\)" + ) + _re_valgrind = re_compile( + r"^==\d+==\s+(at|by)\s+0x[0-9A-F]+\:\s+(?P.+?)\s+\((?P.+)\)" + ) # TODO: add additional debugger support? - #_re_rust_file = re_compile(r"^\s+at\s+(?P.+)") - #_re_windbg = re_compile(r"^(\(Inline\)|[a-f0-9]+)\s([a-f0-9]+|-+)\s+(?P.+)\+(?P0x[a-f0-9]+)") + # _re_rust_file = re_compile(r"^\s+at\s+(?P.+)") + # _re_windbg = re_compile( + # r"^(\(Inline\)|[a-f0-9]+)\s([a-f0-9]+|-+)\s+(?P.+)\+(?P0x[a-f0-9]+)" + # ) __slots__ = ("function", "location", "mode", "offset", "stack_line") - def __init__(self, function=None, location=None, mode=None, offset=None, stack_line=None): + def __init__( + self, function=None, location=None, mode=None, offset=None, stack_line=None + ): self.function = function self.location = location self.mode = mode @@ -73,19 +90,19 @@ def __str__(self): def from_line(cls, input_line, parse_mode=None): assert "\n" not in input_line, "Input contains unexpected new line(s)" sframe = None - if parse_mode is None or parse_mode == cls.MODE_SANITIZER: + if parse_mode is None or parse_mode == Mode.SANITIZER: sframe = cls._parse_sanitizer(input_line) - if not sframe and parse_mode is None or parse_mode == cls.MODE_GDB: + if not sframe and parse_mode is None or parse_mode == Mode.GDB: sframe = cls._parse_gdb(input_line) - if not sframe and parse_mode is None or parse_mode == cls.MODE_MINIDUMP: + if not sframe and parse_mode is None or parse_mode == Mode.MINIDUMP: sframe = cls._parse_minidump(input_line) - if not sframe and parse_mode is None or parse_mode == cls.MODE_RR: + if not sframe and parse_mode is None or parse_mode == Mode.RR: sframe = cls._parse_rr(input_line) - if not sframe and parse_mode is None or parse_mode == cls.MODE_RUST: + if not sframe and parse_mode is None or parse_mode == Mode.RUST: sframe = cls._parse_rust(input_line) - if not sframe and parse_mode is None or parse_mode == cls.MODE_TSAN: + if not sframe and parse_mode is None or parse_mode == Mode.TSAN: sframe = cls._parse_tsan(input_line) - if not sframe and parse_mode is None or parse_mode == cls.MODE_VALGRIND: + if not sframe and parse_mode is None or parse_mode == Mode.VALGRIND: sframe = cls._parse_valgrind(input_line) return sframe @@ -93,18 +110,18 @@ def from_line(cls, input_line, parse_mode=None): def _parse_gdb(cls, input_line): if "#" not in input_line: return None - m = cls._re_gdb.match(input_line) - if m is None: + match = cls._re_gdb.match(input_line) + if match is None: return None - input_line = m.group("line").strip() + input_line = match.group("line").strip() if not input_line: return None - sframe = cls(mode=cls.MODE_GDB, stack_line=m.group("num")) - #sframe.offset = m.group("off") # ignore binary offset for now + sframe = cls(mode=Mode.GDB, stack_line=match.group("num")) + # sframe.offset = m.group("off") # ignore binary offset for now # find function/method name - m = cls._re_func_name.match(input_line) - if m is not None: - sframe.function = m.group("func") + match = cls._re_func_name.match(input_line) + if match is not None: + sframe.function = match.group("func") # find file name and line number if ") at " in input_line: input_line = input_line.split(") at ")[-1] @@ -118,12 +135,20 @@ def _parse_gdb(cls, input_line): @classmethod def _parse_minidump(cls, input_line): try: - tid, stack_line, lib_name, func_name, file_name, line_no, offset = input_line.split("|") + ( + tid, + stack_line, + lib_name, + func_name, + file_name, + line_no, + offset, + ) = input_line.split("|") if int(tid) < 0 or int(stack_line) < 0: return None except ValueError: return None - sframe = cls(mode=cls.MODE_MINIDUMP, stack_line=stack_line) + sframe = cls(mode=Mode.MINIDUMP, stack_line=stack_line) if func_name: sframe.function = func_name.strip() if file_name: @@ -144,23 +169,28 @@ def _parse_minidump(cls, input_line): def _parse_rr(cls, input_line): if "rr(" not in input_line: return None - m = cls._re_rr.match(input_line) - if m is None: + match = cls._re_rr.match(input_line) + if match is None: return None - return cls(location=m.group("loc"), mode=cls.MODE_RR, offset=m.group("off")) + return cls(location=match.group("loc"), mode=Mode.RR, offset=match.group("off")) @classmethod def _parse_rust(cls, input_line): - m = cls._re_rust_frame.match(input_line) - if m is None: + match = cls._re_rust_frame.match(input_line) + if match is None: return None - sframe = cls(mode=cls.MODE_RUST, stack_line=m.group("num")) - sframe.function = m.group("line").strip().rsplit("::h", 1)[0] + sframe = cls(mode=Mode.RUST, stack_line=match.group("num")) + sframe.function = match.group("line").strip().rsplit("::h", 1)[0] # Don't bother with the file offset stuff atm - #m = cls._re_rust_file.match(input_line) if frame is None else None - #if m is not None: - # frame = {"function":None, "mode":cls.MODE_RUST, "offset":None, "stack_line":None} - # input_line = m.group("line").strip() + # match = cls._re_rust_file.match(input_line) if frame is None else None + # if match is not None: + # frame = { + # "function": None, + # "mode": Mode.RUST, + # "offset": None, + # "stack_line": None, + # } + # input_line = match.group("line").strip() # if ":" in input_line: # frame["location"], frame["offset"] = input_line.rsplit(":", 1) # else: @@ -171,17 +201,17 @@ def _parse_rust(cls, input_line): def _parse_sanitizer(cls, input_line): if "#" not in input_line: return None - m = cls._re_sanitizer.match(input_line) - if m is None: + match = cls._re_sanitizer.match(input_line) + if match is None: return None - sframe = cls(mode=cls.MODE_SANITIZER, stack_line=m.group("num")) - input_line = m.group("line") + sframe = cls(mode=Mode.SANITIZER, stack_line=match.group("num")) + input_line = match.group("line") # check if line is symbolized - if m.group("in"): + if match.group("in"): # find function/method name - m = cls._re_func_name.match(input_line) - if m is not None: - sframe.function = m.group("func") + match = cls._re_func_name.match(input_line) + if match is not None: + sframe.function = match.group("func") if input_line.startswith("("): input_line = input_line.strip("()") # find location (file name or module) and offset (line # or offset) @@ -197,11 +227,11 @@ def _parse_sanitizer(cls, input_line): def _parse_tsan(cls, input_line): if "#" not in input_line: return None - m = cls._re_tsan.match(input_line) - if m is None: + match = cls._re_tsan.match(input_line) + if match is None: return None - sframe = cls(mode=cls.MODE_TSAN, stack_line=m.group("num")) - input_line = m.group("line") + sframe = cls(mode=Mode.TSAN, stack_line=match.group("num")) + input_line = match.group("line") location = basename(input_line) # try to parse file name and line number if location: @@ -212,13 +242,13 @@ def _parse_tsan(cls, input_line): sframe.offset = location.pop(0) # use module name if file name cannot be found if not sframe.location: - sframe.location = m.group("mod") + sframe.location = match.group("mod") # use module offset if line number cannot be found if not sframe.offset: - sframe.offset = m.group("off") - m = cls._re_func_name.match(input_line) - if m is not None: - function = m.group("func") + sframe.offset = match.group("off") + match = cls._re_func_name.match(input_line) + if match is not None: + function = match.group("func") if function and function != "": sframe.function = function return sframe @@ -227,15 +257,15 @@ def _parse_tsan(cls, input_line): def _parse_valgrind(cls, input_line): if "== " not in input_line: return None - m = cls._re_valgrind.match(input_line) - if m is None: + match = cls._re_valgrind.match(input_line) + if match is None: return None - input_line = m.group("line") + input_line = match.group("line") if input_line is None: # pragma: no cover # this should not happen LOG.warning("failure in _parse_valgrind()") return None - sframe = cls(function=m.group("func"), mode=cls.MODE_VALGRIND) + sframe = cls(function=match.group("func"), mode=Mode.VALGRIND) try: location, sframe.offset = input_line.split(":") sframe.location = location.strip() @@ -250,12 +280,13 @@ def _parse_valgrind(cls, input_line): return sframe -class Stack(object): - __slots__ = ("frames", "_major", "_major_depth", "_minor") +class Stack: + __slots__ = ("frames", "_height_limit", "_major", "_major_depth", "_minor") - def __init__(self, frames=None, major_depth=MAJOR_DEPTH): + def __init__(self, frames=None, hight_limit=None, major_depth=MAJOR_DEPTH): assert frames is None or isinstance(frames, list) self.frames = list() if frames is None else frames + self._height_limit = hight_limit self._major_depth = major_depth self._major = None self._minor = None @@ -266,23 +297,25 @@ def __str__(self): def _calculate_hash(self, major=False): if not self.frames or (major and self._major_depth < 1): return None - h = sha1() - current_depth = 0 - for frame in self.frames: - current_depth += 1 - if major and current_depth > self._major_depth: + shash = sha1() + if self._height_limit is None: + offset = 0 + else: + offset = max(len(self.frames) - self._height_limit, 0) + for depth, frame in enumerate(self.frames[offset:], start=1): + if major and depth > self._major_depth: break if frame.location is not None: - h.update(frame.location.encode("utf-8", errors="ignore")) + shash.update(frame.location.encode("utf-8", errors="ignore")) if frame.function is not None: - h.update(frame.function.encode("utf-8", errors="ignore")) - if major and current_depth > 1: + shash.update(frame.function.encode("utf-8", errors="ignore")) + if major and depth > 1: # only add the offset from the top frame when calculating # the major hash and skip the rest continue if frame.offset is not None: - h.update(frame.offset.encode("utf-8", errors="ignore")) - return h.hexdigest() + shash.update(frame.offset.encode("utf-8", errors="ignore")) + return shash.hexdigest() def from_file(self, file_name): # pragma: no cover raise NotImplementedError() # TODO @@ -311,6 +344,7 @@ def from_text(cls, input_text, major_depth=MAJOR_DEPTH, parse_mode=None): # avoid issues with mixed stack types if parse_mode is None: parse_mode = frame.mode + LOG.debug("parser mode: %s", parse_mode.name) elif parse_mode != frame.mode: # don't mix parse modes! continue @@ -333,13 +367,31 @@ def from_text(cls, input_text, major_depth=MAJOR_DEPTH, parse_mode=None): if int(frames[0].stack_line) != 0: LOG.warning("First stack line %s not 0", frames[0].stack_line) if int(frames[-1].stack_line) != len(frames) - 1: - LOG.warning("Last stack line %s not %d (frames-1)", frames[0].stack_line, len(frames) - 1) + LOG.warning( + "Last stack line %s not %d (frames-1)", + frames[0].stack_line, + len(frames) - 1, + ) - if frames and frames[0].mode == StackFrame.MODE_RUST and major_depth < MAJOR_DEPTH_RUST: + if frames and frames[0].mode == Mode.RUST and major_depth < MAJOR_DEPTH_RUST: major_depth = MAJOR_DEPTH_RUST return cls(frames=frames, major_depth=major_depth) + @property + def height_limit(self): + return self._height_limit + + @height_limit.setter + def height_limit(self, value): + if value is not None: + assert isinstance(value, int) + assert value > 0 + self._height_limit = value + # force recalculation of hashes + self._major = None + self._minor = None + @property def major(self): if self._major is None: @@ -357,24 +409,25 @@ def minor(self): from argparse import ArgumentParser from os import getenv # pylint: disable=ungrouped-imports - parser = ArgumentParser() - parser.add_argument("input", help="") - args = parser.parse_args() - # set output verbosity if getenv("DEBUG"): - log_level = DEBUG - log_fmt = "[%(levelname).1s] %(message)s" + basicConfig( + format="[%(levelname).1s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=DEBUG, + ) else: - log_level = INFO - log_fmt = "%(message)s" - basicConfig(format=log_fmt, datefmt="%Y-%m-%d %H:%M:%S", level=log_level) - - with open(args.input, "rb") as fp: - stack = Stack.from_text(fp.read().decode("utf-8", errors="ignore")) - - for frame in stack.frames: - LOG.info(frame) - LOG.info("Minor: %s", stack.minor) - LOG.info("Major: %s", stack.major) - LOG.info("Frames: %d", len(stack.frames)) + basicConfig(format="%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=INFO) + + def main(args): + with open(args.input, "rb") as in_fp: + stack = Stack.from_text(in_fp.read().decode("utf-8", errors="ignore")) + for frame in stack.frames: + LOG.info(frame) + LOG.info("Minor: %s", stack.minor) + LOG.info("Major: %s", stack.major) + LOG.info("Frames: %d", len(stack.frames)) + + parser = ArgumentParser() + parser.add_argument("input", help="File to scan for stack trace") + main(parser.parse_args()) diff --git a/grizzly/common/status.py b/grizzly/common/status.py index b61d7c7b..b380835b 100644 --- a/grizzly/common/status.py +++ b/grizzly/common/status.py @@ -3,182 +3,801 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. """Manage Grizzly status reports.""" -from collections import defaultdict -from json import dump, load +from collections import defaultdict, namedtuple +from contextlib import closing, contextmanager +from copy import deepcopy +from json import dumps, loads from logging import getLogger -from os import close, listdir, unlink -from os.path import isdir, isfile, join as pathjoin -from tempfile import mkstemp +from os import getpid +from pathlib import Path +from sqlite3 import OperationalError, connect from time import time -from fasteners.process_lock import InterProcessLock +from ..common.utils import grz_tmp -from .utils import grz_tmp - -__all__ = ("ReducerStats", "Status") +__all__ = ("Status",) __author__ = "Tyson Smith" __credits__ = ["Tyson Smith"] -LOG = getLogger("status") +# time in seconds for db connection to wait before raising an exception +DB_TIMEOUT = 30 +# used to track changes to the database layout +DB_VERSION = 2 +# default expiration limit for report entries in the database (24 hours) +REPORT_EXP_LIMIT = 86400 +# default expiration limit for result entries in the database (30 days) +RESULT_EXP_LIMIT = 2592000 +LOG = getLogger(__name__) + +ProfileEntry = namedtuple("ProfileEntry", "count max min name total") + +ResultEntry = namedtuple("ResultEntry", "rid count desc") + + +def _db_version_check(con, expected=DB_VERSION): + """Perform version check and remove obsolete tables if required. + + Args: + con (sqlite3.Connection): An open database connection. + expected (int): The latest database version. + + Returns: + bool: True if database was reset otherwise False. + """ + assert expected > 0 + cur = con.cursor() + # collect db version and check if an update is required + cur.execute("PRAGMA user_version;") + version = cur.fetchone()[0] + if version < expected: + cur.execute("BEGIN EXCLUSIVE;") + # check db version again while locked to avoid race + cur.execute("PRAGMA user_version;") + version = cur.fetchone()[0] + if version < expected: + LOG.debug("db version %d < %d", version, expected) + # remove ALL tables from the database + cur.execute("SELECT name FROM sqlite_master WHERE type='table';") + with con: + for entry in cur.fetchall(): + LOG.debug("dropping table %r", entry[0]) + cur.execute("DROP TABLE IF EXISTS %s;" % (entry[0],)) + # update db version number + cur.execute("PRAGMA user_version = %d;" % (expected,)) + return True + assert version == expected, "code out of date?" + return False -class Status(object): +class Status: """Status holds status information for the Grizzly session. - There can be multiple readers of the data but only a single writer. + Read-only mode is implied if `_db_file` is None. + + Attributes: + _db_file (str): Database file containing data. None in read-only mode. + _enable_profiling (bool): Profiling support status. + _profiles (dict): Profiling data. + _results (dict): Results data. Used to count occurrences of results. + ignored (int): Ignored result count. + iteration (int): Iteration count. + log_size (int): Log size in bytes. + pid (int): Python process ID. + start_time (float): Start time of session. + test_name (str): Current test name. + timestamp (float): Last time data was saved to database. """ - PATH = grz_tmp("status") + + # database will be updated no more than every 'REPORT_FREQ' seconds. REPORT_FREQ = 60 + STATUS_DB = str(Path(grz_tmp()) / "fuzz-status.db") + __slots__ = ( - "_lock", "_results", "data_file", "ignored", "iteration", - "log_size", "start_time", "test_name", "timestamp") - - def __init__(self, data_file, start_time=None): - assert ".json" in data_file - assert start_time is None or isinstance(start_time, float) - self._lock = InterProcessLock("%s.lock" % (data_file,)) - self._results = defaultdict(int) - # if data_file is None the status report is read only (no reporting) - self.data_file = data_file + "_db_file", + "_enable_profiling", + "_profiles", + "results", + "ignored", + "iteration", + "log_size", + "pid", + "start_time", + "test_name", + "timestamp", + ) + + def __init__( + self, + db_file=None, + enable_profiling=False, + start_time=None, + exp_limit=REPORT_EXP_LIMIT, + pid=None, + report_limit=0, + ): + if db_file is None: + # read-only mode + assert start_time is None + self._enable_profiling = False + else: + assert isinstance(start_time, float) + assert exp_limit >= 0 + assert report_limit >= 0 + assert pid >= 0 + self._enable_profiling = enable_profiling + self._profiles = dict() + self._db_file = db_file self.ignored = 0 self.iteration = 0 self.log_size = 0 + self.pid = pid + self.results = None self.start_time = start_time self.test_name = None self.timestamp = start_time - def cleanup(self): - """Remove data and lock files from disk. + # prepare database + if self._db_file: + LOG.debug("status using db %r", self._db_file) + with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con: + _db_version_check(con) + cur = con.cursor() + with con: + # create table if needed + cur.execute( + """CREATE TABLE IF NOT EXISTS status ( + _profiles TEXT NOT NULL, + ignored INTEGER NOT NULL, + iteration INTEGER NOT NULL, + log_size INTEGER NOT NULL, + pid INTEGER NOT NULL PRIMARY KEY, + start_time REAL NOT NULL, + timestamp REAL NOT NULL);""" + ) + # remove expired status data + if exp_limit > 0: + cur.execute( + """DELETE FROM status WHERE timestamp <= ?;""", + (time() - exp_limit,), + ) + # avoid (unlikely) pid reuse collision + cur.execute("""DELETE FROM status WHERE pid = ?;""", (pid,)) + + self.results = ResultCounter( + pid, + db_file=db_file, + freq_limit=report_limit, + ) + + def blockers(self, iters_per_result=100): + """Any result with an iterations-per-result ratio of less than or equal the + given limit are considered 'blockers'. Results with a count <= 1 are not + included. + + Args: + iters_per_result (int): Iterations-per-result threshold. + + Yields: + ResultEntry: ID, count and description of blocking result. + """ + assert iters_per_result > 0 + if self.results: + for entry in self.results.all(): + if entry.count > 1 and self.iteration / entry.count <= iters_per_result: + yield entry + + @classmethod + def loadall(cls, db_file=STATUS_DB, time_limit=300): + """Load all status reports found in `db_file`. + + Args: + db_file (str): Path to database containing status data. + time_limit (int): Only include entries with a timestamp that is within the + given number of seconds. + + Yields: + Status: Successfully loaded read-only status objects. + """ + assert db_file + assert time_limit >= 0 + with closing(connect(db_file, timeout=DB_TIMEOUT)) as con: + cur = con.cursor() + # collect entries + try: + if time_limit: + cur.execute( + """SELECT pid, + _profiles, + ignored, + iteration, + log_size, + start_time, + timestamp + FROM status + WHERE timestamp > ?;""", + (time() - time_limit,), + ) + else: + cur.execute( + """SELECT pid, + _profiles, + ignored, + iteration, + log_size, + start_time, + timestamp + FROM status;""" + ) + entries = cur.fetchall() + except OperationalError as exc: + if not str(exc).startswith("no such table:"): + raise # pragma: no cover + entries = () + + # Load all results + results = ResultCounter.load(db_file, 0) + for entry in entries: + status = cls(pid=entry[0]) + status._profiles = loads(entry[1]) + status.ignored = entry[2] + status.iteration = entry[3] + status.log_size = entry[4] + status.start_time = entry[5] + status.timestamp = entry[6] + for counter in results: + if counter.pid == status.pid: + status.results = counter + break + else: + # no existing ResultCounter with matching pid found + status.results = ResultCounter(status.pid) + yield status + + @contextmanager + def measure(self, name): + """Used to simplify collecting profiling data. + + Args: + name (str): Used to group the entries. + + Yields: + None + """ + if self._enable_profiling: + mark = time() + yield + self.record(name, time() - mark) + else: + yield + + def profile_entries(self): + """Used to retrieve profiling data. Args: None + Yields: + ProfileEntry: Containing recorded profiling data. + """ + for name, entry in self._profiles.items(): + yield ProfileEntry( + entry["count"], + entry["max"], + entry["min"], + name, + entry["total"], + ) + + @property + def rate(self): + """Calculate the number of iterations performed per second since start() + was called. + + Args: + None + + Returns: + float: Number of iterations performed per second. + """ + runtime = self.runtime + return self.iteration / float(runtime) if runtime else 0 + + def record(self, name, duration): + """Used to add profiling data. This is intended to be used to make rough + calculations to identify major configuration issues. + + Args: + name (str): Used to group the entries. + duration (int, float): Stored to be later used for measurements. + Returns: None """ - if self.data_file is None: - return - with self._lock: + if self._enable_profiling: + assert isinstance(duration, (float, int)) try: - unlink(self.data_file) - except OSError: # pragma: no cover - LOG.warning("Failed to delete %r", self.data_file) - try: - unlink("%s.lock" % (self.data_file,)) - except OSError: # pragma: no cover - pass - self.data_file = None - - def count_result(self, signature): - """Increment counter that matches `signature`. + self._profiles[name]["count"] += 1 + if self._profiles[name]["max"] < duration: + self._profiles[name]["max"] = duration + elif self._profiles[name]["min"] > duration: + self._profiles[name]["min"] = duration + self._profiles[name]["total"] += duration + except KeyError: + # add profile entry + self._profiles[name] = { + "count": 1, + "max": duration, + "min": duration, + "total": duration, + } + + def report(self, force=False, report_freq=REPORT_FREQ): + """Write status report to database. Reports are only written periodically. + It is limited by `report_freq`. The specified number of seconds must + elapse before another write will be performed unless `force` is True. Args: - signature (str): + force (bool): Ignore report frequently limiting. + report_freq (int): Minimum number of seconds between writes. Returns: - None + bool: Returns true if the report was successful otherwise false. """ - self._results[signature] += 1 + now = time() + if not force and now < (self.timestamp + report_freq): + return False + assert self._db_file + assert self.start_time <= now + self.timestamp = now - @property - def _data(self): - return { - "_results": self._results, - "ignored": self.ignored, - "iteration": self.iteration, - "log_size": self.log_size, - "start_time": self.start_time, - "test_name": self.test_name, - "timestamp": self.timestamp} + profiles = dumps(self._profiles) + with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con: + cur = con.cursor() + with con: + cur.execute( + """UPDATE status + SET _profiles = ?, + ignored = ?, + iteration = ?, + log_size = ?, + start_time = ?, + timestamp = ? + WHERE pid = ?;""", + ( + profiles, + self.ignored, + self.iteration, + self.log_size, + self.start_time, + self.timestamp, + self.pid, + ), + ) + if cur.rowcount < 1: + cur.execute( + """INSERT INTO status( + pid, + _profiles, + ignored, + iteration, + log_size, + start_time, + timestamp) + VALUES (?, ?, ?, ?, ?, ?, ?);""", + ( + self.pid, + profiles, + self.ignored, + self.iteration, + self.log_size, + self.start_time, + self.timestamp, + ), + ) + + return True @property - def duration(self): - """Calculate the number of seconds since start() was called. + def runtime(self): + """Calculate the number of seconds since start() was called. Value is + calculated relative to 'timestamp' if status object is read-only. Args: None Returns: - int: Total runtime in seconds since start() was called + int: Total runtime in seconds. """ - return max(self.timestamp - self.start_time, 0) + if self._db_file is None: + return self.timestamp - self.start_time + return max(time() - self.start_time, 0) @classmethod - def load(cls, data_file): - """Load status report. Loading a status report from disk will create a - read only status report. + def start(cls, db_file=None, enable_profiling=False, report_limit=0): + """Create a unique Status object. Args: - data_file (str): JSON file that contains status data. + db_file (str): Path to database containing status data. + enable_profiling (bool): Record profiling data. Returns: - Status: Loaded status object or None + Status: Active status report. """ - status = cls(data_file) - data = None - try: - with status._lock: # pylint: disable=protected-access - with open(data_file, "r") as out_fp: - data = load(out_fp) - except OSError: - LOG.debug("failed to open %r", data_file) - # if data_file exists the lock will be removed by the active session - if not isfile(data_file): - # attempt to remove potentially leaked lock file - try: - unlink("%s.lock" % (data_file,)) - except OSError: # pragma: no cover - pass - except ValueError: - LOG.debug("failed to load json data from %r", data_file) - else: - LOG.debug("no such file %r", data_file) - if data is None: - return None - if "start_time" not in data: - LOG.debug("invalid status json file") - return None - for attr, value in data.items(): - setattr(status, attr, value) - # set read only - status.data_file = None + if db_file is None: + db_file = cls.STATUS_DB + status = cls( + db_file=db_file, + enable_profiling=enable_profiling, + start_time=time(), + pid=getpid(), + report_limit=report_limit, + ) + status.report(force=True) return status - @classmethod - def loadall(cls): - """Load all status reports found in cls.PATH. + +class ResultCounter: + __slots__ = ( + "_count", + "_db_file", + "_desc", + "_frequent", + "_limit", + "pid", + ) + + def __init__(self, pid, db_file=None, exp_limit=RESULT_EXP_LIMIT, freq_limit=0): + assert exp_limit >= 0 + assert freq_limit >= 0 + assert pid >= 0 + self._count = defaultdict(int) + self._desc = dict() + self._db_file = db_file + self._frequent = set() + self._limit = freq_limit + self.pid = pid + + # prepare database + if self._db_file: + LOG.debug("resultcounter using db %r", self._db_file) + with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con: + _db_version_check(con) + cur = con.cursor() + with con: + # create table if needed + cur.execute( + """CREATE TABLE IF NOT EXISTS results ( + count INTEGER NOT NULL, + description TEXT NOT NULL, + pid INTEGER NOT NULL, + result_id TEXT NOT NULL, + timestamp INTEGER NOT NULL, + PRIMARY KEY(pid, result_id));""" + ) + # remove expired entries + if exp_limit > 0: + cur.execute( + """DELETE FROM results WHERE timestamp <= ?;""", + (int(time() - exp_limit),), + ) + # avoid (unlikely) pid reuse collision + cur.execute("""DELETE FROM results WHERE pid = ?;""", (pid,)) + # remove results for jobs that have been removed + try: + cur.execute( + """DELETE FROM results + WHERE pid NOT IN (SELECT pid FROM status);""" + ) + except OperationalError as exc: + if not str(exc).startswith("no such table:"): + raise # pragma: no cover + + def all(self): + """Yield all result data. Args: None + Yields: + ResultEntry: Contains ID, count and description for each result entry. + """ + for result_id, count in self._count.items(): + if count > 0: + yield ResultEntry(result_id, count, self._desc.get(result_id, None)) + + def count(self, result_id, desc): + """ + + Args: + result_id (str): Result ID. + desc (str): User friendly description. + Returns: - Generator: Status objects stored in cls.PATH. + int: Current count for given result_id. """ - if isdir(cls.PATH): - for data_file in listdir(cls.PATH): - if not data_file.endswith(".json"): - continue - status = cls.load(pathjoin(cls.PATH, data_file)) - if status is None: - continue - yield status + assert isinstance(result_id, str) + self._count[result_id] += 1 + if result_id not in self._desc: + self._desc[result_id] = desc + if self._db_file: + with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con: + cur = con.cursor() + timestamp = int(time()) + with con: + cur.execute( + """UPDATE results + SET timestamp = ?, + count = ? + WHERE pid = ? + AND result_id = ?;""", + (timestamp, self._count[result_id], self.pid, result_id), + ) + if cur.rowcount < 1: + cur.execute( + """INSERT INTO results( + pid, + result_id, + description, + timestamp, + count) + VALUES (?, ?, ?, ?, ?);""", + ( + self.pid, + result_id, + desc, + timestamp, + self._count[result_id], + ), + ) + return self._count[result_id] + + @classmethod + def load(cls, db_file, time_limit): + """Load existing entries for database and populate a ResultCounter. + + Args: + db_file (str): Database file. + time_limit (int): Used to filter older entries. + + Returns: + list: Loaded ResultCounters. + """ + assert db_file + assert time_limit >= 0 + with closing(connect(db_file, timeout=DB_TIMEOUT)) as con: + cur = con.cursor() + try: + # collect entries + if time_limit: + cur.execute( + """SELECT pid, + result_id, + description, + count + FROM results + WHERE timestamp > ?;""", + (int(time()) - time_limit,), + ) + else: + cur.execute( + """SELECT pid, result_id, description, count FROM results""" + ) + entries = cur.fetchall() + except OperationalError as exc: + if not str(exc).startswith("no such table:"): + raise # pragma: no cover + entries = () + + loaded = dict() + for pid, result_id, desc, count in entries: + if pid not in loaded: + loaded[pid] = cls(pid) + loaded[pid]._desc[result_id] = desc # pylint: disable=protected-access + loaded[pid]._count[result_id] = count # pylint: disable=protected-access + + return list(loaded.values()) + + def get(self, result_id): + """Get count and description for given result id. + + Args: + result_id (str): Result ID. + + Returns: + ResultEntry: Count and description. + """ + assert isinstance(result_id, str) + return ResultEntry( + result_id, self._count.get(result_id, 0), self._desc.get(result_id, None) + ) + + def is_frequent(self, result_id): + """Scan all results including results from other running instances + to determine if the limit has been exceeded. Local count must be >1 before + limit is checked. + + Args: + result_id (str): Result ID. + + Returns: + bool: True if limit has been exceeded otherwise False. + """ + assert isinstance(result_id, str) + if self._limit < 1: + return False + if result_id in self._frequent: + return True + # get local total + total = self._count.get(result_id, 0) + # only check the db for parallel results if + # - result has been found locally more than once + # - limit has not been exceeded locally + # - a db file is given + if self._limit >= total > 1 and self._db_file: + with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con: + cur = con.cursor() + # look up total count from all processes + cur.execute( + """SELECT SUM(count) FROM results WHERE result_id = ?;""", + (result_id,), + ) + total = cur.fetchone()[0] or 0 + if total > self._limit: + self._frequent.add(result_id) + return True + return False + + def mark_frequent(self, result_id): + """Mark given results ID as frequent locally. + + Args: + result_id (str): Result ID. + + Returns: + None + """ + assert isinstance(result_id, str) + if result_id not in self._frequent: + self._frequent.add(result_id) @property - def rate(self): - """Calculate the number of iterations performed per second since start() - was called. + def total(self): + """Get total count of all results. Args: None Returns: - float: Number of iterations performed per second. + int: Total result count. """ - return self.iteration / float(self.duration) if self.duration > 0 else 0 + return sum(x for x in self._count.values()) + + +ReductionStep = namedtuple( + "ReductionStep", "name, duration, successes, attempts, size, iterations" +) + + +class ReductionStatus: + """Status for a single grizzly reduction""" + + # database will be updated no more than every 'REPORT_FREQ' seconds. + REPORT_FREQ = 60 + + STATUS_DB = str(Path(grz_tmp()) / "reduce-status.db") + + def __init__( + self, + strategies=None, + testcase_size_cb=None, + crash_id=None, + db_file=None, + pid=None, + tool=None, + exp_limit=REPORT_EXP_LIMIT, + ): + """Initialize a ReductionStatus instance. + + Arguments: + strategies (list(str)): List of strategies to be run. + testcase_size_cb (callable): Callback to get testcase size + crash_id (int): CrashManager ID of original testcase + db_file (str): Database file containing data. None in read-only mode. + tool (str): The tool name used for reporting to FuzzManager. + """ + self.analysis = {} + self.attempts = 0 + self.iterations = 0 + self.run_params = {} + self.signature_info = {} + self.successes = 0 + self.current_strategy_idx = None + self._testcase_size_cb = testcase_size_cb + self.crash_id = crash_id + self.finished_steps = [] + self._in_progress_steps = [] + self.strategies = strategies + self._db_file = db_file + self.pid = pid + self.timestamp = time() + self.tool = tool + self._current_size = None + self.last_reports = [] + + # prepare database + if self._db_file: + LOG.debug("status using db %r", self._db_file) + with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con: + _db_version_check(con) + cur = con.cursor() + with con: + # create table if needed + cur.execute( + """CREATE TABLE IF NOT EXISTS reduce_status ( + pid INTEGER NOT NULL PRIMARY KEY, + analysis TEXT NOT NULL, + attempts INTEGER NOT NULL, + iterations INTEGER NOT NULL, + run_params TEXT NOT NULL, + signature_info TEXT NOT NULL, + successes INTEGER NOT NULL, + crash_id INTEGER, + finished_steps TEXT NOT NULL, + _in_progress_steps TEXT NOT NULL, + strategies TEXT NOT NULL, + _current_size INTEGER NOT NULL, + current_strategy_idx INTEGER, + timestamp REAL NOT NULL, + tool TEXT, + last_reports TEXT NOT NULL);""" + ) + # remove expired status data + if exp_limit > 0: + cur.execute( + """DELETE FROM reduce_status WHERE timestamp <= ?;""", + (time() - exp_limit,), + ) + # avoid (unlikely) pid reuse collision + cur.execute("""DELETE FROM reduce_status WHERE pid = ?;""", (pid,)) + + @classmethod + def start( + cls, + db_file=None, + strategies=None, + testcase_size_cb=None, + crash_id=None, + tool=None, + ): + """Create a unique ReductionStatus object. + + Args: + db_file (str): Path to database containing status data. + strategies (list(str)): List of strategies to be run. + testcase_size_cb (callable): Callback to get testcase size + crash_id (int): CrashManager ID of original testcase + tool (str): The tool name used for reporting to FuzzManager. + + Returns: + ReductionStatus: Active status report. + """ + if db_file is None: + db_file = cls.STATUS_DB + status = cls( + crash_id=crash_id, + db_file=db_file, + pid=getpid(), + strategies=strategies, + testcase_size_cb=testcase_size_cb, + tool=tool, + ) + status.report(force=True) + return status def report(self, force=False, report_freq=REPORT_FREQ): - """Write status report. Reports are only written when the time since the - previous report was created exceeds `report_freq` seconds. + """Write status report to database. Reports are only written periodically. + It is limited by `report_freq`. The specified number of seconds must + elapse before another write will be performed unless `force` is True. Args: force (bool): Ignore report frequently limiting. @@ -187,85 +806,377 @@ def report(self, force=False, report_freq=REPORT_FREQ): Returns: bool: Returns true if the report was successful otherwise false. """ - assert self.data_file is not None now = time() if not force and now < (self.timestamp + report_freq): return False + assert self._db_file self.timestamp = now - with self._lock: - with open(self.data_file, "w") as out_fp: - dump(self._data, out_fp) + + with closing(connect(self._db_file, timeout=DB_TIMEOUT)) as con: + cur = con.cursor() + with con: + analysis = dumps(self.analysis) + run_params = dumps(self.run_params) + sig_info = dumps(self.signature_info) + finished = dumps(self.finished_steps) + in_prog = dumps([step.serialize() for step in self._in_progress_steps]) + strategies = dumps(self.strategies) + last_reports = dumps(self.last_reports) + + cur.execute( + """UPDATE reduce_status + SET analysis = ?, + attempts = ?, + iterations = ?, + run_params = ?, + signature_info = ?, + successes = ?, + crash_id = ?, + finished_steps = ?, + _in_progress_steps = ?, + strategies = ?, + _current_size = ?, + current_strategy_idx = ?, + timestamp = ?, + tool = ?, + last_reports = ? + WHERE pid = ?;""", + ( + analysis, + self.attempts, + self.iterations, + run_params, + sig_info, + self.successes, + self.crash_id, + finished, + in_prog, + strategies, + self._testcase_size(), + self.current_strategy_idx, + self.timestamp, + self.tool, + last_reports, + self.pid, + ), + ) + if cur.rowcount < 1: + cur.execute( + """INSERT INTO reduce_status( + pid, + analysis, + attempts, + iterations, + run_params, + signature_info, + successes, + crash_id, + finished_steps, + _in_progress_steps, + strategies, + _current_size, + current_strategy_idx, + timestamp, + tool, + last_reports) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);""", + ( + self.pid, + analysis, + self.attempts, + self.iterations, + run_params, + sig_info, + self.successes, + self.crash_id, + finished, + in_prog, + strategies, + self._testcase_size(), + self.current_strategy_idx, + self.timestamp, + self.tool, + last_reports, + ), + ) + return True - @property - def results(self): - """Calculate the total number of results. + @classmethod + def loadall(cls, db_file=STATUS_DB, time_limit=300): + """Load all reduction status reports found in `db_file`. Args: - None + db_file (str): Path to database containing status data. + time_limit (int): Only include entries with a timestamp that is within the + given number of seconds. + + Yields: + Status: Successfully loaded read-only status objects. + """ + assert db_file + assert time_limit >= 0 + with closing(connect(db_file, timeout=DB_TIMEOUT)) as con: + cur = con.cursor() + # collect entries + try: + cur.execute( + """SELECT pid, + analysis, + attempts, + iterations, + run_params, + signature_info, + successes, + crash_id, + finished_steps, + _in_progress_steps, + strategies, + _current_size, + current_strategy_idx, + timestamp, + tool, + last_reports + FROM reduce_status + WHERE timestamp > ? + ORDER BY timestamp DESC;""", + (time() - time_limit,), + ) + entries = cur.fetchall() + except OperationalError as exc: + if not str(exc).startswith("no such table:"): + raise # pragma: no cover + entries = () + + for entry in entries: + pid = entry[0] + + status = cls( + strategies=loads(entry[10]), + crash_id=entry[7], + pid=pid, + tool=entry[14], + ) + status.analysis = loads(entry[1]) + status.attempts = entry[2] + status.iterations = entry[3] + status.run_params = loads(entry[4]) + status.signature_info = loads(entry[5]) + status.successes = entry[6] + status.finished_steps = [ + ReductionStep._make(step) for step in loads(entry[8]) + ] + status._in_progress_steps = [ + status._construct_milestone(*step) for step in loads(entry[9]) + ] + status._current_size = entry[11] + status.current_strategy_idx = entry[12] + status.timestamp = entry[13] + status.last_reports = loads(entry[15]) + yield status + + def _testcase_size(self): + if self._db_file is None: + return self._current_size + return self._testcase_size_cb() + + def __deepcopy__(self, memo): + """Return a deep copy of this instance.""" + # pylint: disable=protected-access + result = type(self)( + strategies=deepcopy(self.strategies, memo), + crash_id=self.crash_id, + testcase_size_cb=self._testcase_size_cb, + pid=self.pid, + tool=self.tool, + ) + # assign after construction to avoid DB access + result._db_file = self._db_file + result.analysis = deepcopy(self.analysis, memo) + result.attempts = self.attempts + result.iterations = self.iterations + result.run_params = deepcopy(self.run_params, memo) + result.signature_info = deepcopy(self.signature_info, memo) + result.successes = self.successes + result.finished_steps = deepcopy(self.finished_steps, memo) + result.last_reports = deepcopy(self.last_reports, memo) + # finish open timers + for step in reversed(self._in_progress_steps): + result.record( + step.name, + attempts=step.attempts, + duration=step.duration, + iterations=step.iterations, + successes=step.successes, + report=False, + ) + return result + + @property + def current_strategy(self): + if self._in_progress_steps: + return self._in_progress_steps[-1] + if self.finished_steps: + return self.finished_steps[-1] + return None + + @property + def total(self): + if self._in_progress_steps: + return self._in_progress_steps[0] + if self.finished_steps: + return self.finished_steps[-1] + return None + + @property + def original(self): + if self.finished_steps: + return self.finished_steps[0] + return None + + def record( + self, + name, + duration=None, + iterations=None, + attempts=None, + successes=None, + report=True, + ): + """Record reduction status for a given point in time: + + - name of the milestone (eg. init, strategy name completed) + - elapsed time (seconds) + - # of iterations + - # of total attempts + - # of successful attempts + + Arguments: + name (str): name of milestone + duration (float or None): seconds elapsed for period recorded + iterations (int or None): # of iterations performed + attempts (int or None): # of attempts performed + successes (int or None): # of attempts successful + report (bool): Automatically force a report. Returns: - int: Total number of results. + None """ - return sum(self._results.values()) + self.finished_steps.append( + ReductionStep( + name=name, + size=self._testcase_size(), + duration=duration, + iterations=iterations, + attempts=attempts, + successes=successes, + ) + ) + if report: + self.report(force=True) - @classmethod - def start(cls): - """Create a unique Status object. + def _construct_milestone(self, name, start, attempts, iterations, successes): + # pylint: disable=no-self-argument + class _MilestoneTimer: + def __init__(sub): + sub.name = name + sub._start_time = start + sub._start_attempts = attempts + sub._start_iterations = iterations + sub._start_successes = successes - Args: + @property + def size(sub): + return self._testcase_size() # pylint: disable=protected-access + + @property + def attempts(sub): + return self.attempts - sub._start_attempts + + @property + def iterations(sub): + return self.iterations - sub._start_iterations + + @property + def successes(sub): + return self.successes - sub._start_successes + + @property + def duration(sub): + if self._db_file is None: # pylint: disable=protected-access + return self.timestamp - sub._start_time + return time() - sub._start_time + + def serialize(sub): + return ( + sub.name, + sub._start_time, + sub._start_attempts, + sub._start_iterations, + sub._start_successes, + ) + + return _MilestoneTimer() + + @contextmanager + def measure(self, name, report=True): + """Time and record the period leading up to a reduction milestone. + eg. a strategy being run. + + Arguments: + name (str): name of milestone + report (bool): Automatically force a report. + + Yields: + None + """ + + tmr = self._construct_milestone( + name, time(), self.attempts, self.iterations, self.successes + ) + self._in_progress_steps.append(tmr) + yield + assert self._in_progress_steps.pop() is tmr + self.record( + name, + attempts=tmr.attempts, + duration=tmr.duration, + iterations=tmr.iterations, + successes=tmr.successes, + report=report, + ) + + def copy(self): + """Create a deep copy of this instance. + + Arguments: None Returns: - Status: Active status report. + ReductionStatus: Clone of self """ - tfd, filepath = mkstemp(dir=cls.PATH, prefix="grzstatus_", suffix=".json") - close(tfd) - status = cls(filepath, start_time=time()) - status.report(force=True) - return status + return deepcopy(self) + def add_to_reporter(self, reporter, expected=True): + """Add the reducer status to reported metadata for the given reporter. -class ReducerStats(object): - """ReducerStats holds stats for the Grizzly reducer. - """ - FILE = "reducer-stats.json" - PATH = grz_tmp("status") - - __slots__ = ("_file", "_lock", "error", "failed", "passed") - - def __init__(self): - self._file = pathjoin(self.PATH, self.FILE) - self._lock = None - self.error = 0 - self.failed = 0 - self.passed = 0 - - def __enter__(self): - self._lock = InterProcessLock("%s.lock" % (self._file,)) - self._lock.acquire() - try: - with open(self._file, "r") as in_fp: - data = load(in_fp) - self.error = data["error"] - self.failed = data["failed"] - self.passed = data["passed"] - except KeyError: - LOG.debug("invalid status data in %r", self._file) - except OSError: - LOG.debug("%r does not exist", self._file) - except ValueError: - LOG.debug("failed to load stats from %r", self._file) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - try: - with open(self._file, "w") as out_fp: - dump({ - "error": self.error, - "failed": self.failed, - "passed": self.passed}, out_fp) - finally: - if self._lock: - self._lock.release() - self._lock = None + Arguments: + reporter (FuzzManagerReporter): Reporter to update. + expected (bool): Add detailed stats. + + Returns: + None + """ + # only add detailed stats for expected results + if expected: + reporter.add_extra_metadata("reducer-stats", self.finished_steps) + # other parameters + if self.analysis: + reporter.add_extra_metadata("reducer-analysis", self.analysis) + if self.run_params: + reporter.add_extra_metadata("reducer-params", self.run_params) + if self.signature_info: + reporter.add_extra_metadata("reducer-sig", self.signature_info) + # if input was an existing crash-id, record the original + if self.crash_id: + reporter.add_extra_metadata("reducer-input", self.crash_id) diff --git a/grizzly/common/status_reporter.py b/grizzly/common/status_reporter.py index dba784aa..4a098718 100644 --- a/grizzly/common/status_reporter.py +++ b/grizzly/common/status_reporter.py @@ -4,217 +4,320 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. """Manage Grizzly status reports.""" - -import argparse +from argparse import ArgumentParser +from collections import defaultdict from datetime import timedelta from functools import partial -import logging -import os -import re -import sys -import time +from itertools import zip_longest +from logging import DEBUG, INFO, basicConfig + +try: + from os import getloadavg +except ImportError: # pragma: no cover + # os.getloadavg() is not available on all platforms + getloadavg = None +from os import SEEK_CUR, getenv, scandir +from os.path import isdir +from re import match +from re import sub as re_sub +from time import gmtime, localtime, strftime -import psutil +from psutil import cpu_count, cpu_percent, disk_usage, virtual_memory -from .status import ReducerStats, Status +from .status import ReductionStatus, ReductionStep, Status __all__ = ("StatusReporter",) __author__ = "Tyson Smith" __credits__ = ["Tyson Smith"] -class StatusReporter(object): + +class StatusReporter: """Read and merge Grizzly status reports, including tracebacks if found. Output is a single textual report, e.g. for submission to EC2SpotManager. """ + CPU_POLL_INTERVAL = 1 DISPLAY_LIMIT_LOG = 10 # don't include log results unless size exceeds 10MBs - EXP_LIMIT = 600 # expiration limit, ignore older reports READ_BUF_SIZE = 0x10000 # 64KB SUMMARY_LIMIT = 4095 # summary output must be no more than 4KB + TIME_LIMIT = 120 # ignore older reports - def __init__(self, reports, reducer=False, tracebacks=None): - self._reducer = reducer + def __init__(self, reports, tracebacks=None): self.reports = reports self.tracebacks = tracebacks - def dump_specific(self, filename): - """Write out merged reports. + @property + def has_results(self): + return any(x.results.total for x in self.reports) + + @classmethod + def load(cls, db_file, tb_path=None, time_limit=TIME_LIMIT): + """Read Grizzly status reports and create a StatusReporter object. Args: - filename (str): Path where output should be written. + path (str): Path to scan for status data files. + tb_path (str): Directory to scan for files containing Python tracebacks. + time_limit (int): Only include entries with a timestamp that is within the + given number of seconds. Returns: - None + StatusReporter: Contains available status reports and traceback reports. """ - with open(filename, "w") as ofp: - ofp.write(self._specific()) + tracebacks = None if tb_path is None else cls._tracebacks(tb_path) + return cls( + list(Status.loadall(db_file=db_file, time_limit=time_limit)), + tracebacks=tracebacks, + ) - def dump_summary(self, filename, runtime=False, sysinfo=True, timestamp=True): - """Write out summary merged reports. + @staticmethod + def format_entries(entries): + """Generate formatted output from (label, body) pairs. + Each entry must have a label and an optional body. + + Example: + entries = ( + ("Test data output", None), + ("first", "1"), + ("second", "2"), + ("third", "3.0"), + ) + Will generate... + Test data output + first : 1 + second : 2 + third : 3.0 Args: - filename (str): Path where output should be written. - runtime (bool): Include total runtime in output - sysinfo (bool): Include system info (CPU, disk, RAM... etc) in output - timestamp (bool): Include time stamp in output + entries list(2-tuple(str, str)): Data to merge. Returns: - None + str: Formatted output. """ - with open(filename, "w") as ofp: - ofp.write(self._summary(runtime=runtime, sysinfo=sysinfo, timestamp=timestamp)) + label_lengths = tuple(len(x[0]) for x in entries if x[1]) + max_len = max(label_lengths) if label_lengths else 0 + out = list() + for label, body in entries: + if body: + out.append( + "%s%s : %s" % ((" " * max(max_len - len(label), 0), label, body)) + ) + else: + out.append(label) + return "\n".join(out) - @classmethod - def load(cls, reducer=False, tb_path=None): - """Read Grizzly status reports and create a StatusReporter object + def results(self, max_len=85): + """Merged and generate formatted output from results. Args: - tb_path (str): Directory to scan for files containing Python tracebacks + max_len (int): Maximum length of result description. Returns: - StatusReporter: Contains status reports and traceback reports that were found + str: A formatted report. """ - if tb_path is not None and not os.path.isdir(tb_path): - raise OSError("%r is not a directory" % (tb_path,)) - tracebacks = None if tb_path is None else cls._tracebacks(tb_path) - return cls(list(Status.loadall()), reducer=reducer, tracebacks=tracebacks) - - def print_specific(self): - print(self._specific()) - - def print_summary(self, runtime=True, sysinfo=False, timestamp=False): - print(self._summary(runtime=runtime, sysinfo=sysinfo, timestamp=timestamp)) + blockers = set() + counts = defaultdict(int) + descs = dict() + # calculate totals + for report in self.reports: + for result in report.results.all(): + descs[result.rid] = result.desc + counts[result.rid] += result.count + blockers.update(x.rid for x in report.blockers()) + # generate output + entries = list() + for rid, count in sorted(counts.items(), key=lambda x: x[1], reverse=True): + desc = descs[rid] + # trim long descriptions + if len(descs[rid]) > max_len: + desc = "%s..." % (desc[: max_len - 3],) + label = "%s%d" % ("*" if rid in blockers else "", count) + entries.append((label, desc)) + if not entries: + entries.append(("No results available", None)) + elif blockers: + entries.append(("(* = Blocker)", None)) + entries.append(("", None)) + return self.format_entries(entries) @staticmethod def _scan(path, fname_pattern): - abs_path = os.path.abspath(path) - for fname in os.listdir(abs_path): - if fname_pattern.match(fname) is None: + for entry in scandir(path): + if match(fname_pattern, entry.name) is None: continue - full_path = os.path.join(abs_path, fname) - if not os.path.isfile(full_path): + if not entry.is_file(): continue - if os.path.getsize(full_path) > 0: - yield full_path + if entry.stat().st_size: + yield entry.path - def _specific(self): - """Merged and generate formatted output of status reports. + def specific(self, iters_per_result=100): + """Merged and generate formatted output from status reports. Args: - None + iters_per_result (int): Threshold for warning of potential blockers. Returns: - str: A formatted report + str: A formatted report. """ if not self.reports: return "No status reports available" - exp = int(time.time()) - self.EXP_LIMIT - self.reports.sort(key=lambda x: x.duration, reverse=True) - self.reports.sort(key=lambda x: x.timestamp < exp) - txt = list() - for num, report in enumerate(self.reports, start=1): - txt.append("#%02d" % (num,)) - if report.timestamp < exp: - txt.append(" (EXPIRED)\n") - continue - txt.append(" Runtime %s\n" % str(timedelta(seconds=int(report.duration)))) - txt.append(" * Iterations: %03d" % report.iteration) - txt.append(" - Rate: %0.2f" % report.rate) - if not self._reducer: - txt.append(" - Ignored: %02d" % report.ignored) - txt.append(" - Results: %d" % report.results) - txt.append("\n") - return "".join(txt) - - def _summary(self, runtime=True, sysinfo=False, timestamp=False): - """Merge and generate a summary of status reports. + self.reports.sort(key=lambda x: x.start_time) + entries = list() + for report in self.reports: + label = "PID %d started at %s" % ( + report.pid, + strftime("%Y/%m/%d %X", localtime(report.start_time)), + ) + entries.append((label, None)) + # iterations + entries.append( + ("Iterations", "%d @ %0.2f" % (report.iteration, round(report.rate, 2))) + ) + # ignored + if report.ignored: + ignore_pct = report.ignored / float(report.iteration) * 100 + entries.append( + ( + "Ignored", + "%d @ %0.1f%%" % (report.ignored, round(ignore_pct, 1)), + ) + ) + # results + if report.results.total: + # avoid divide by zero if results are found before first update + iters = report.iteration if report.iteration else report.results.total + result_pct = report.results.total / float(iters) * 100 + if any(report.blockers(iters_per_result=iters_per_result)): + blk_str = " (Blockers detected)" + else: + blk_str = "" + entries.append( + ( + "Results", + "%d @ %0.1f%% %s" + % (report.results.total, round(result_pct, 1), blk_str), + ) + ) + else: + entries.append(("Results", "0")) + # runtime + entries.append(("Runtime", str(timedelta(seconds=int(report.runtime))))) + # add profiling data if it exists + if any(report.profile_entries()): + entries.append(("Profiling entries", None)) + for entry in sorted( + report.profile_entries(), key=lambda x: x.total, reverse=True + ): + avg = entry.total / float(entry.count) + body = list() + body.append("%dx " % (entry.count,)) + if entry.total > 300: + body.append(str(timedelta(seconds=int(entry.total)))) + else: + body.append("%0.3fs" % (round(entry.total, 3),)) + body.append( + " %0.2f%%" % (round(entry.total / report.runtime * 100, 2),) + ) + body.append(" (%0.3f avg," % (round(avg, 3),)) + body.append(" %0.3f max," % (round(entry.max, 3),)) + body.append(" %0.3f min)" % (round(entry.min, 3),)) + entries.append((entry.name, "".join(body))) + entries.append(("", None)) + return self.format_entries(entries) + + def summary( + self, runtime=True, sysinfo=False, timestamp=False, iters_per_result=100 + ): + """Merge and generate a summary from status reports. Args: - filename (str): Path where output should be written. - runtime (bool): Include total runtime in output - sysinfo (bool): Include system info (CPU, disk, RAM... etc) in output - timestamp (bool): Include time stamp in output + runtime (bool): Include total runtime in output. + sysinfo (bool): Include system info (CPU, disk, RAM... etc) in output. + timestamp (bool): Include time stamp in output. + iters_per_result (int): Threshold for warning of potential blockers. Returns: - str: A summary of merged reports + str: A summary of merged reports. """ - txt = list() - if self._reducer: - # reducer stats - with ReducerStats() as stats: - r_error = stats.error - r_failed = stats.failed - r_passed = stats.passed - txt.append("======== Stats ========\n") - # Reduced successfully - txt.append(" Reduced : %d\n" % (r_passed,)) - # Failed to reproduce - txt.append(" No Repro : %d\n" % (r_failed,)) - # Error during reduction - txt.append(" Errors : %d\n" % (r_error,)) - txt.append("======= Active ========\n") + entries = list() # Job specific status - exp = int(time.time()) - self.EXP_LIMIT - reports = tuple(x for x in self.reports if x.timestamp > exp) - if reports: + if self.reports: # calculate totals - iterations = tuple(x.iteration for x in reports) - log_sizes = tuple(x.log_size for x in reports) - rates = tuple(x.rate for x in reports) - results = tuple(x.results for x in reports) - count = len(reports) - total_ignored = sum(x.ignored for x in reports) + iterations = tuple(x.iteration for x in self.reports) + log_sizes = tuple(x.log_size for x in self.reports) + rates = tuple(x.rate for x in self.reports) + results = tuple(x.results.total for x in self.reports) + count = len(self.reports) + total_ignored = sum(x.ignored for x in self.reports) total_iters = sum(iterations) + # Iterations - txt.append("Iterations : %d" % (total_iters,)) + disp = list() + disp.append(str(total_iters)) if count > 1: - txt.append(" (%s, %s)" % (max(iterations), min(iterations))) - txt.append("\n") + disp.append(" (%d, %d)" % (max(iterations), min(iterations))) + entries.append(("Iterations", "".join(disp))) + # Rate - txt.append(" Rate : %d @ %0.2f" % (count, sum(rates))) + disp = list() + disp.append("%d @ %0.2f" % (count, round(sum(rates), 2))) if count > 1: - txt.append(" (%0.2f, %0.2f)" % (max(rates), min(rates))) - txt.append("\n") - # Results / Signature mismatch - if self._reducer: - txt.append(" Mismatch : %d" % (sum(results),)) - else: - txt.append(" Results : %d" % (sum(results),)) + disp.append( + " (%0.2f, %0.2f)" % (round(max(rates), 2), round(min(rates), 2)) + ) + entries.append(("Rate", "".join(disp))) + + # Results + if total_iters: + total_results = sum(results) + result_pct = total_results / float(total_iters) * 100 + disp = list() + disp.append("%d" % (total_results,)) + if total_results: + disp.append(" @ %0.1f%%" % (round(result_pct, 1),)) + if any( + any(x.blockers(iters_per_result=iters_per_result)) + for x in self.reports + ): + disp.append(" (Blockers)") + entries.append(("Results", "".join(disp))) + + # Ignored if total_ignored: - ignore_pct = (total_ignored / float(total_iters)) * 100 - txt.append(" (%d ignored @ %0.2f%%)" % (total_ignored, ignore_pct)) + ignore_pct = total_ignored / float(total_iters) * 100 + entries.append( + ("Ignored", "%d @ %0.1f%%" % (total_ignored, round(ignore_pct, 1))) + ) + # Runtime if runtime: - txt.append("\n") - if self._reducer: - durations = tuple(x.duration for x in reports) - if count > 1: - max_duration = str(timedelta(seconds=int(max(durations)))) - min_duration = str(timedelta(seconds=int(min(durations)))) - txt.append(" Runtime : (%s, %s)" % (max_duration, min_duration)) - else: - txt.append(" Runtime : %s" % (str(timedelta(seconds=int(durations[0]))),)) - else: - total_runtime = sum(x.duration for x in reports) - txt.append(" Runtime : %s" % (str(timedelta(seconds=int(total_runtime))),)) + total_runtime = sum(x.runtime for x in self.reports) + entries.append(("Runtime", str(timedelta(seconds=int(total_runtime))))) + # Log size - log_usage = sum(log_sizes) / 1048576.0 + log_usage = sum(log_sizes) / 1_048_576.0 if log_usage > self.DISPLAY_LIMIT_LOG: - txt.append("\n") - txt.append(" Logs : %0.1fMB" % (log_usage,)) + disp = list() + disp.append("%0.1fMB" % (log_usage,)) if count > 1: - txt.append(" (%0.2fMB, %0.2fMB)" % ( - max(log_sizes) / 1048576.0, - min(log_sizes) / 1048576.0)) + disp.append( + " (%0.2fMB, %0.2fMB)" + % (max(log_sizes) / 1_048_576.0, min(log_sizes) / 1_048_576.0) + ) + entries.append(("Logs", "".join(disp))) else: - txt.append("No status reports available") + entries.append(("No status reports available", None)) + + # System information if sysinfo: - txt.append("\n") - txt.append(self._sys_info()) + entries.extend(self._sys_info()) + + # Timestamp if timestamp: - txt.append("\n") - txt.append(time.strftime(" Timestamp : %Y/%m/%d %X %z", time.gmtime())) - msg = "".join(txt) + entries.append(("Timestamp", strftime("%Y/%m/%d %X %z", gmtime()))) + + # Format output + msg = self.format_entries(entries) + if self.tracebacks: txt = self._merge_tracebacks(self.tracebacks, self.SUMMARY_LIMIT - len(msg)) msg = "".join((msg, txt)) @@ -243,53 +346,71 @@ def _merge_tracebacks(tracebacks, size_limit): @staticmethod def _sys_info(): - """Collect and format system information. + """Collect system information. Args: None Returns: - str: System information formatted to match output from _summary() + list(tuple): System information in tuples (label, display data). """ - txt = list() - txt.append("CPU & Load : %d @ %0.1f%%" % ( - psutil.cpu_count(), - psutil.cpu_percent(interval=StatusReporter.CPU_POLL_INTERVAL))) - try: - txt.append(" %s\n" % (str(os.getloadavg()),)) - except AttributeError: - txt.append("\n") - mem_usage = psutil.virtual_memory() - txt.append(" Memory : ") - if mem_usage.available < 1073741824: # < 1GB - txt.append("%dMB" % (mem_usage.available / 1048576,)) + entries = list() + + # CPU and load + disp = list() + disp.append( + "%d (%d) @ %d%%" + % ( + cpu_count(logical=True), + cpu_count(logical=False), + round(cpu_percent(interval=StatusReporter.CPU_POLL_INTERVAL)), + ) + ) + if getloadavg is not None: + disp.append(" (") + # round the results of getloadavg(), precision varies across platforms + disp.append(", ".join("%0.1f" % (round(x, 1),) for x in getloadavg())) + disp.append(")") + entries.append(("CPU & Load", "".join(disp))) + + # memory usage + disp = list() + mem_usage = virtual_memory() + if mem_usage.available < 1_073_741_824: # < 1GB + disp.append("%dMB" % (mem_usage.available / 1_048_576,)) else: - txt.append("%0.1fGB" % (mem_usage.available / 1073741824.0,)) - txt.append(" of %0.1fGB free\n" % (mem_usage.total / 1073741824.0,)) - disk_usage = psutil.disk_usage("/") - txt.append(" Disk : ") - if disk_usage.free < 1073741824: # < 1GB - txt.append("%dMB" % (disk_usage.free / 1048576,)) + disp.append("%0.1fGB" % (mem_usage.available / 1_073_741_824.0,)) + disp.append(" of %0.1fGB free" % (mem_usage.total / 1_073_741_824.0,)) + entries.append(("Memory", "".join(disp))) + + # disk usage + disp = list() + usage = disk_usage("/") + if usage.free < 1_073_741_824: # < 1GB + disp.append("%dMB" % (usage.free / 1_048_576,)) else: - txt.append("%0.1fGB" % (disk_usage.free / 1073741824.0,)) - txt.append(" of %0.1fGB free" % (disk_usage.total / 1073741824.0,)) - return "".join(txt) + disp.append("%0.1fGB" % (usage.free / 1_073_741_824.0,)) + disp.append(" of %0.1fGB free" % (usage.total / 1_073_741_824.0,)) + entries.append(("Disk", "".join(disp))) + + return entries @staticmethod - def _tracebacks(path, ignore_kbi=True, max_preceeding=5): + def _tracebacks(path, ignore_kbi=True, max_preceding=5): """Search screen logs for tracebacks. Args: path (str): Directory containing log files. ignore_kbi (bool): Do not include KeyboardInterupts in results - max_preceeding (int): Maximum number of lines preceding traceback to include. + max_preceding (int): Maximum number of lines preceding traceback to + include. Returns: - list: A list of TracebackReports + list: A list of TracebackReports. """ tracebacks = list() - for screen_log in StatusReporter._scan(path, re.compile(r"screenlog\.\d+")): - tbr = TracebackReport.from_file(screen_log, max_preceeding=max_preceeding) + for screen_log in StatusReporter._scan(path, r"screenlog\.\d+"): + tbr = TracebackReport.from_file(screen_log, max_preceding=max_preceding) if tbr is None: continue if ignore_kbi and tbr.is_kbi: @@ -298,10 +419,11 @@ def _tracebacks(path, ignore_kbi=True, max_preceeding=5): return tracebacks -class TracebackReport(object): +class TracebackReport: """Read Python tracebacks from log files and store it in a manner that is helpful when generating reports. """ + MAX_LINES = 16 # should be no less than 6 READ_LIMIT = 0x10000 # 64KB @@ -313,13 +435,13 @@ def __init__(self, file_name, lines, is_kbi=False, prev_lines=None): self.is_kbi = is_kbi @classmethod - def from_file(cls, input_log, max_preceeding=5): + def from_file(cls, input_log, max_preceding=5): """Create TracebackReport from a text file containing a Python traceback. Only the first traceback in the file will be parsed. Args: input_log (str): File to parse. - max_preceeding (int): Number of lines to collect leading up to the traceback. + max_preceding (int): Number of lines to collect leading up to the traceback. Returns: TracebackReport: Contains data from input_log. @@ -336,7 +458,7 @@ def from_file(cls, input_log, max_preceeding=5): break if len(chunk) == cls.READ_LIMIT: # seek back to avoid missing beginning of token - in_fp.seek(len(token) * -1, os.SEEK_CUR) + in_fp.seek(len(token) * -1, SEEK_CUR) else: # no traceback here, move along return None @@ -363,14 +485,14 @@ def from_file(cls, input_log, max_preceeding=5): # stop at first empty line tb_end = min(line_num, line_count) break - if re.match(r"^\w+(\.\w+)*\:\s|^\w+(Interrupt|Error)$", log_line): + if match(r"^\w+(\.\w+)*\:\s|^\w+(Interrupt|Error)$", log_line): is_kbi = log_line.startswith("KeyboardInterrupt") # stop after error message tb_end = min(line_num + 1, line_count) break assert tb_start is not None - if max_preceeding > 0: - prev_start = max(tb_start - max_preceeding, 0) + if max_preceding > 0: + prev_start = max(tb_start - max_preceding, 0) prev_lines = data[prev_start:tb_start] else: prev_lines = None @@ -379,10 +501,10 @@ def from_file(cls, input_log, max_preceeding=5): tb_end = max(line_count, cls.MAX_LINES) if tb_end - tb_start > cls.MAX_LINES: # add first entry - lines = data[tb_start:tb_start + 3] + lines = data[tb_start : tb_start + 3] lines += ["<--- TRACEBACK TRIMMED--->"] # add end entries - lines += data[tb_end - (cls.MAX_LINES - 3):tb_end] + lines += data[tb_end - (cls.MAX_LINES - 3) : tb_end] else: lines = data[tb_start:tb_end] return cls(input_log, lines, is_kbi=is_kbi, prev_lines=prev_lines) @@ -394,6 +516,333 @@ def __str__(self): return "\n".join(["Log: %r" % self.file_name] + self.prev_lines + self.lines) +class _TableFormatter: + """Format data in a table.""" + + def __init__(self, columns, formatters, vsep=" | ", hsep="-"): + """Initialize a TableFormatter instance. + + Arguments: + columns (iterable(str)): List of column names for the table header. + formatters (iterable(callable)): List of format functions for each column. + None will result in hiding that column. + vsep (str): Vertical separation between columns. + hsep (str): Horizontal separation between header and data. + """ + assert len(columns) == len(formatters) + self._columns = tuple( + column for (column, fmt) in zip(columns, formatters) if fmt is not None + ) + self._formatters = formatters + self._vsep = vsep + self._hsep = hsep + + def format_rows(self, rows): + """Format rows as a table and return a line generator. + + Arguments: + rows (list(list(str))): Tabular data. Each row must be the same length as + `columns` passed to `__init__`. + + Yields: + str: Each line of formatted tabular data. + """ + max_width = [len(col) for col in self._columns] + formatted = [] + for row in rows: + assert len(row) == len(self._formatters) + formatted.append([]) + offset = 0 + for idx, (data, formatter) in enumerate(zip(row, self._formatters)): + if formatter is None: + offset += 1 + continue + data = formatter(data) + max_width[idx - offset] = max(max_width[idx - offset], len(data)) + formatted[-1].append(data) + + # build a format_str to space out the columns with separators using `max_width` + # the first column is left-aligned, and other fields are right-aligned. + format_str = self._vsep.join( + field % (width,) + for field, width in zip_longest(["%%-%ds"], max_width, fillvalue="%%%ds") + ) + yield format_str % self._columns + yield self._hsep * (len(self._vsep) * (len(self._columns) - 1) + sum(max_width)) + for row in formatted: + yield format_str % tuple(row) + + +def _format_seconds(duration): + # format H:M:S, and then remove all leading zeros with regex + minutes, seconds = divmod(int(duration), 60) + hours, minutes = divmod(minutes, 60) + result = re_sub("^[0:]*", "", "%d:%02d:%02d" % (hours, minutes, seconds)) + # if the result is all zeroes, ensure one zero is output + if not result: + result = "0" + # a bare number is ambiguous. output 's' for seconds + if ":" not in result: + result += "s" + return result + + +def _format_duration(duration, total=0): + result = "" + if duration is not None: + if total == 0: + percent = 0 # pragma: no cover + else: + percent = int(100 * duration / total) + result = _format_seconds(duration) + result += " (%3d%%)" % (percent,) + return result + + +def _format_number(number, total=0): + result = "" + if number is not None: + if total == 0: + percent = 0 + else: + percent = int(100 * number / total) + result = "{:n} ({:3d}%)".format(number, percent) + return result + + +class ReductionStatusReporter(StatusReporter): + """Create a status report for a reducer instance. + Merging multiple reports is not possible. This is intended for automated use only. + """ + + TIME_LIMIT = 120 # ignore older reports + + # pylint: disable=super-init-not-called + def __init__(self, reports, tracebacks=None): + self.reports = reports + self.tracebacks = tracebacks + + @property + def has_results(self): + return False # TODO + + @classmethod + def load(cls, db_file, tb_path=None, time_limit=TIME_LIMIT): + """Read Grizzly reduction status reports and create a ReductionStatusReporter + object. + + Args: + path (str): Path to scan for status data files. + tb_path (str): Directory to scan for files containing Python tracebacks. + time_limit (int): Only include entries with a timestamp that is within the + given number of seconds. + + Returns: + ReductionStatusReporter: Contains available status reports and traceback + reports. + """ + tracebacks = None if tb_path is None else cls._tracebacks(tb_path) + return cls( + list(ReductionStatus.loadall(db_file=db_file, time_limit=time_limit)), + tracebacks=tracebacks, + ) + + @staticmethod + def _analysis_entry(report): + return ( + "Analysis", + ", ".join( + ("%s: %d%%" % (desc, 100 * reliability)) + for desc, reliability in report.analysis.items() + ), + ) + + @staticmethod + def _crash_id_entry(report): + crash_str = str(report.crash_id) + if report.tool: + crash_str += " (%s)" % (report.tool,) + return ("Crash ID", crash_str) + + @staticmethod + def _last_reports_entry(report): + return ("Latest Reports", ", ".join(str(r) for r in report.last_reports)) + + @staticmethod + def _run_params_entry(report): + return ( + "Run Parameters", + ", ".join( + ("%s: %r" % (desc, value)) for desc, value in report.run_params.items() + ), + ) + + @staticmethod + def _signature_info_entry(report): + return ( + "Signature", + ", ".join( + ("%s: %r" % (desc, value)) + for desc, value in report.signature_info.items() + ), + ) + + def specific( # pylint: disable=arguments-differ + self, + sysinfo=False, + timestamp=False, + ): + """Generate formatted output from status report. + + Args: + None + + Returns: + str: A formatted report. + """ + if not self.reports: + return "No status reports available" + + reports = [] + for report in self.reports: + entries = [] + if report.crash_id: + entries.append(self._crash_id_entry(report)) + if report.analysis: + entries.append(self._analysis_entry(report)) + if report.run_params: + entries.append(self._run_params_entry(report)) + if report.last_reports: + entries.append(self._last_reports_entry(report)) + if report.current_strategy: + entries.append( + ( + "Current Strategy", + "%s (%r of %d)" + % ( + report.current_strategy.name, + report.current_strategy_idx, + len(report.strategies), + ), + ) + ) + if report.current_strategy and report.original: + # TODO: lines/tokens? + entries.append( + ( + "Current/Original", + "%dB / %dB" + % (report.current_strategy.size, report.original.size), + ) + ) + if report.total: + # TODO: other results + entries.append( + ( + "Results", + "%d successes, %d attempts" + % (report.total.successes, report.total.attempts), + ) + ) + if report.total and report.current_strategy: + entries.append( + ( + "Time Elapsed", + "%s in strategy, %s total" + % ( + _format_seconds(report.current_strategy.duration), + _format_seconds(report.total.duration), + ), + ) + ) + + # System information + if sysinfo: + entries.extend(self._sys_info()) + + # Timestamp + if timestamp: + entries.append(("Timestamp", strftime("%Y/%m/%d %X %z", gmtime()))) + + reports.append(self.format_entries(entries)) + return "\n\n".join(reports) + + def summary( + self, + runtime=False, + sysinfo=False, + timestamp=False, + ): # pylint: disable=arguments-differ + """Merge and generate a summary from status reports. + + Args: + runtime (bool): Ignored (compatibility). + sysinfo (bool): Include system info (CPU, disk, RAM... etc) in output. + timestamp (bool): Include time stamp in output. + + Returns: + str: A summary of merged reports. + """ + if not self.reports: + return "No status reports available" + + reports = [] + for report in self.reports: + entries = [] + lines = [] + if report.crash_id: + entries.append(self._crash_id_entry(report)) + if report.analysis: + entries.append(self._analysis_entry(report)) + if report.signature_info: + entries.append(self._signature_info_entry(report)) + if report.run_params: + entries.append(self._run_params_entry(report)) + if report.last_reports: + entries.append(self._last_reports_entry(report)) + if report.total and report.original: + tabulator = _TableFormatter( + ReductionStep._fields, + ReductionStep( + name=str, + # duration and attempts are % of total/last, size % of init/1st + duration=partial(_format_duration, total=report.total.duration), + attempts=partial(_format_number, total=report.total.attempts), + successes=partial(_format_number, total=report.total.successes), + iterations=None, # hide + size=partial(_format_number, total=report.original.size), + ), + ) + lines.extend(tabulator.format_rows(report.finished_steps)) + # Format output + if entries: + lines.append(self.format_entries(entries)) + if lines: + reports.append("\n".join(lines)) + + entries = [] + + # System information + if sysinfo: + entries.extend(self._sys_info()) + + # Timestamp + if timestamp: + entries.append(("Timestamp", strftime("%Y/%m/%d %X %z", gmtime()))) + + if entries: + reports.append(self.format_entries(entries)) + + msg = "\n\n".join(reports) + + if self.tracebacks: + msg += self._merge_tracebacks( + self.tracebacks, self.SUMMARY_LIMIT - len(msg) + ) + + return msg + + def main(args=None): """Merge Grizzly status files into a single report (main entrypoint). @@ -403,48 +852,95 @@ def main(args=None): Returns: None """ - log_level = logging.INFO - log_fmt = "[%(asctime)s] %(message)s" - if bool(os.getenv("DEBUG")): # pragma: no cover - log_level = logging.DEBUG + if bool(getenv("DEBUG")): # pragma: no cover + log_level = DEBUG log_fmt = "%(levelname).1s %(name)s [%(asctime)s] %(message)s" - logging.basicConfig(format=log_fmt, datefmt="%Y-%m-%d %H:%M:%S", level=log_level) - - modes = ("reduce-status", "status") - parser = argparse.ArgumentParser(description="Grizzly status report generator") + else: + log_level = INFO + log_fmt = "[%(asctime)s] %(message)s" + basicConfig(format=log_fmt, datefmt="%Y-%m-%d %H:%M:%S", level=log_level) + + modes = { + "fuzzing": (StatusReporter, Status.STATUS_DB), + "reducing": (ReductionStatusReporter, ReductionStatus.STATUS_DB), + } + + # report types: define name and time range of scan + report_types = { + # include status reports from the last 2 minutes + "active": 120, + # include status reports from the last 8 hours + "complete": 28800, + } + + parser = ArgumentParser(description="Grizzly status report generator") + parser.add_argument( + "--dump", help="File to write report to, existing files will be overwritten." + ) parser.add_argument( - "--dump", - help="File to write report to") + "--type", + choices=report_types.keys(), + default="active", + help="Report type. active: Current snapshot of activity, complete: " + "Aggregate summary of all jobs over a longer duration (8h). " + "(default: active)", + ) parser.add_argument( - "--mode", default="status", - help="Status loading mode. Available modes: %s (default: 'status')" % (", ".join(modes),)) + "--scan-mode", + choices=modes.keys(), + default="fuzzing", + help="Report mode. (default: fuzzing)", + ) parser.add_argument( - "--system-report", action="store_true", - help="Output summary and system information") + "--system-report", + action="store_true", + help="Output summary and system information", + ) parser.add_argument( "--tracebacks", - help="Scan path for Python tracebacks found in screenlog.# files") + help="Scan path for Python tracebacks found in screenlog.# files", + ) args = parser.parse_args(args) + if args.tracebacks and not isdir(args.tracebacks): + parser.error("--tracebacks must be a directory") + + reporter_cls, status_db = modes.get(args.scan_mode) + reporter = reporter_cls.load( + db_file=status_db, + tb_path=args.tracebacks, + time_limit=report_types[args.type], + ) - if args.mode not in modes: - parser.error("Invalid mode %r" % args.mode) - reducer_mode = args.mode == "reduce-status" - reporter = StatusReporter.load(tb_path=args.tracebacks, reducer=reducer_mode) if args.dump: - reporter.dump_summary(args.dump, runtime=reducer_mode) + with open(args.dump, "w") as ofp: + if args.type == "active" and args.scan_mode == "fuzzing": + ofp.write(reporter.summary(runtime=False, sysinfo=True, timestamp=True)) + elif args.type == "active": + # reducer only has one instance, so show specific report while running + ofp.write(reporter.specific(sysinfo=True, timestamp=True)) + else: + ofp.write( + reporter.summary(runtime=True, sysinfo=False, timestamp=False) + ) return 0 + if not reporter.reports: - print("No status reports to display") + print("Grizzly Status - No status reports to display") return 0 - print("Grizzly Status Report") - print("---------------------") - print("Status report frequency: %ds\n" % (Status.REPORT_FREQ,)) - reporter.print_specific() - print("Summary") - print("-------") - reporter.print_summary(sysinfo=args.system_report) + + print( + "Grizzly Status - %s - Instance report frequency: %ds\n" + % (strftime("%Y/%m/%d %X"), Status.REPORT_FREQ) + ) + print("[Reports]") + print(reporter.specific()) + if reporter.has_results: + print("[Result Signatures]") + print(reporter.results()) + print("[Summary]") + print(reporter.summary(sysinfo=args.system_report)) return 0 if __name__ == "__main__": - sys.exit(main()) + raise SystemExit(main()) diff --git a/grizzly/common/storage.py b/grizzly/common/storage.py index 25e856f1..309ff58e 100644 --- a/grizzly/common/storage.py +++ b/grizzly/common/storage.py @@ -3,18 +3,21 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from collections import namedtuple -from itertools import chain import json -import os -import shutil -from tempfile import SpooledTemporaryFile - -from ..target import sanitizer_opts +from collections import namedtuple +from itertools import chain, product +from os.path import normpath, split +from pathlib import Path +from shutil import copyfile, move, rmtree +from tempfile import NamedTemporaryFile, mkdtemp +from time import time +from zipfile import BadZipfile, ZipFile +from zlib import error as zlib_error + +from ..target import AssetError, AssetManager from .utils import grz_tmp - -__all__ = ("TestCase", "TestFile", "TestCaseLoadFailure", "TestFileExists") +__all__ = ("TestCase", "TestCaseLoadFailure", "TestFileExists") __author__ = "Tyson Smith" __credits__ = ["Tyson Smith"] @@ -24,29 +27,54 @@ class TestCaseLoadFailure(Exception): class TestFileExists(Exception): - """Raised when adding a TestFile to a TestCase that has an existing TestFile with the same name""" + """Raised when adding a TestFile to a TestCase that has an existing + TestFile with the same name""" -TestFileMap = namedtuple("TestFileMap", "meta optional required") +TestFile = namedtuple("TestFile", "file_name data_file") +TestFileMap = namedtuple("TestFileMap", "optional required") -class TestCase(object): +class TestCase: __slots__ = ( - "adapter_name", "duration", "env_vars", "input_fname", "landing_page", - "redirect_page", "_existing_paths", "_files") - - def __init__(self, landing_page, redirect_page, adapter_name, input_fname=None): + "adapter_name", + "assets", + "duration", + "env_vars", + "hang", + "input_fname", + "landing_page", + "redirect_page", + "time_limit", + "timestamp", + "_data_path", + "_files", + ) + + def __init__( + self, + landing_page, + redirect_page, + adapter_name, + input_fname=None, + time_limit=None, + timestamp=None, + ): self.adapter_name = adapter_name + self.assets = None self.duration = None - self.env_vars = dict() # environment variables + self.env_vars = dict() + self.hang = False self.input_fname = input_fname # file that was used to create the test case - self.landing_page = landing_page - self.redirect_page = redirect_page - self._existing_paths = list() # file paths in use - self._files = TestFileMap( - meta=list(), # environment files such as prefs.js, etc... - optional=list(), - required=list()) + self.landing_page = self.sanitize_path(landing_page) + if redirect_page is not None: + self.redirect_page = self.sanitize_path(redirect_page) + else: + self.redirect_page = None + self.time_limit = time_limit + self.timestamp = time() if timestamp is None else timestamp + self._files = TestFileMap(optional=list(), required=list()) + self._data_path = Path(mkdtemp(prefix="testcase_", dir=grz_tmp("storage"))) def __enter__(self): return self @@ -54,172 +82,193 @@ def __enter__(self): def __exit__(self, *exc): self.cleanup() - def _add(self, target, test_file): - """Add a test file to test case and perform sanity checks. - - Args: - target (list): Specific list of Files to append target test_file to. - test_file (TestFile): TestFile to append - - Returns: - None - """ - assert isinstance(test_file, TestFile), "only accepts TestFiles" - if test_file.file_name in self._existing_paths: - raise TestFileExists("%r exists in test" % (test_file.file_name,)) - self._existing_paths.append(test_file.file_name) - target.append(test_file) - - def add_batch(self, path, include_files, prefix=None): + def add_batch(self, path, include_files, prefix=None, copy=True): """Iterate over files in include_files and attach the files that are - located in path to testcase. + located in path to TestCase. Args: path (str): Path to the root of the directory that contains files. - include_files (iterable): Paths of the files to be added to the - test case if they exist in path. + include_files (iterable(str)): Paths of the files to be added to the + TestCase if they exist in path. prefix (str): Path prefix to prepend to file when adding to - test case. + the TestCase. + copy (bool): File will be copied if True otherwise the file will be moved. Returns: None """ - path = os.path.abspath(path) - for fname in (x for x in include_files if x.startswith(path)): - test_path = os.path.relpath(fname, path) - if test_path.startswith(".."): + path = Path(path) + for fname in include_files: + file = Path(fname) + try: + relative = file.relative_to(path) + except ValueError: + # cannot add files outside path continue if prefix: - test_path = "/".join((prefix, test_path)) - self.add_from_file(fname, file_name=test_path) + relative = prefix / relative + self.add_from_file(file, file_name=relative.as_posix(), copy=copy) - def add_meta(self, meta_file): - """Add a test file to test case as a meta file. + def add_from_bytes(self, data, file_name, required=False): + """Create a file and add it to the TestCase. Args: - meta_file (TestFile): TestFile to add to TestCase + data (bytes): Data to write to file. + file_name (str): Used as file path on disk and URI. Relative to wwwroot. + required (bool): Indicates whether the file must be served. Returns: None """ - self._add(self._files.meta, meta_file) + assert isinstance(data, bytes) + with NamedTemporaryFile(delete=False, dir=grz_tmp("storage")) as in_fp: + in_fp.write(data) + data_file = Path(in_fp.name) - def add_environ_var(self, name, value): - """Add environment variable to test case. + try: + self.add_from_file( + data_file, file_name=file_name, required=required, copy=False + ) + finally: + # the temporary file should have been moved to the data path of the TestCase + # unless an exception occurred so remove it if needed + if data_file.is_file(): + data_file.unlink() + + def add_from_file(self, src_file, file_name=None, required=False, copy=False): + """Add a file to the TestCase by either copying or moving an existing file. Args: - name (str): Environment variable name - value (str): Environment variable value + src_file (str): Path to existing file to use. + file_name (str): Used as file path on disk and URI. Relative to wwwroot. + If file_name is not given the name of the src_file + will be used. + required (bool): Indicates whether the file must be served. + copy (bool): File will be copied if True otherwise the file will be moved. Returns: None """ - self.env_vars[name] = value + src_file = Path(src_file) + if file_name is None: + file_name = src_file.name + file_name = self.sanitize_path(file_name) - def add_file(self, test_file, required=True): - """Add a test file to test case. + test_file = TestFile(file_name, self._data_path / file_name) + if test_file.file_name in self.contents: + raise TestFileExists("%r exists in test" % (test_file.file_name,)) - Args: - meta_file (TestFile): TestFile to add to TestCase - required (bool): Indicates if test file must be served + test_file.data_file.parent.mkdir(parents=True, exist_ok=True) + if copy: + copyfile(src_file, test_file.data_file) + else: + move(src_file, test_file.data_file) - Returns: - None - """ - if required: - self._add(self._files.required, test_file) + # landing_page is always 'required' + if required or test_file.file_name == self.landing_page: + self._files.required.append(test_file) else: - self._add(self._files.optional, test_file) + self._files.optional.append(test_file) - def add_from_data(self, data, file_name, encoding="UTF-8", required=True): - """Create a TestFile and add it to the test case. + def cleanup(self): + """Remove all the test files. Args: - data (bytes): Data to write to file - file_name (str): Name for the test file - encoding (str): Encoding to be used - required (bool): Indicates if test file must be served + None Returns: None """ - tfile = TestFile.from_data(data, file_name, encoding=encoding) - try: - self.add_file(tfile, required=required) - except TestFileExists: - tfile.close() - raise + rmtree(self._data_path, ignore_errors=True) - def add_from_file(self, input_file, file_name=None, required=True): - """Create a TestFile from an existing file and add it to the test case. + def clone(self): + """Make a copy of the TestCase. Args: - input_file (str): Path to existing file to use - file_name (str): Name for the test file - required (bool): Indicates if test file must be served + None Returns: - None + TestCase: A copy of the TestCase instance. """ - tfile = TestFile.from_file(input_file, file_name=file_name) - try: - self.add_file(tfile, required=required) - except TestFileExists: - tfile.close() - raise + result = type(self)( + self.landing_page, + self.redirect_page, + self.adapter_name, + self.input_fname, + self.time_limit, + self.timestamp, + ) + result.assets = self.assets + result.duration = self.duration + result.env_vars = dict(self.env_vars) + result.hang = self.hang + + # copy test data files + for entry, required in chain( + product(self._files.required, [True]), + product(self._files.optional, [False]), + ): + result.add_from_file( + entry.data_file, file_name=entry.file_name, required=required, copy=True + ) + return result - def cleanup(self): - """Close all the test files. + @property + def contents(self): + """All files in TestCase. Args: None - Returns: - None + Yields: + str: File path (relative to wwwroot). """ - for file_group in self._files: - for test_file in file_group: - test_file.close() + for tfile in chain(self._files.required, self._files.optional): + yield tfile.file_name - def contains(self, file_name): - """Check TestCase contains the TestFile with name matching `file_name`. + @property + def data_path(self): + """Location test data is stored on disk. This is intended to be used as wwwroot. Args: - file_name (str): File name to search for in TestCase. + None Returns: - bool: True if file exists in the TestCase otherwise False + str: Path to directory containing test case files. """ - return file_name in self._existing_paths + return str(self._data_path) @property def data_size(self): - """The total amount of data used by the test case (bytes). + """Total amount of data used (bytes) by the files in the TestCase. Args: None Returns: - int: Total size of the test case in byte. + int: Total size of the TestCase in bytes. """ total = 0 for group in self._files: - total += sum(x.size for x in group) + total += sum(x.data_file.stat().st_size for x in group) return total - def dump(self, out_path, include_details=False): + def dump(self, dst_path, include_details=False): """Write all the test case data to the filesystem. Args: - out_path (str): Path to directory to output data - include_details (bool): Output "test_info.json" file + dst_path (str): Path to directory to output data. + include_details (bool): Output "test_info.json" file. Returns: None """ - # save test files to out_path + dst_path = Path(dst_path) + # save test files to dst_path for test_file in chain(self._files.required, self._files.optional): - test_file.dump(out_path) + dst_file = dst_path / test_file.file_name + dst_file.parent.mkdir(parents=True, exist_ok=True) + copyfile(test_file.data_file, dst_file) # save test case files and meta data including: # adapter used, input file, environment info and files if include_details: @@ -228,295 +277,276 @@ def dump(self, out_path, include_details=False): "adapter": self.adapter_name, "duration": self.duration, "env": self.env_vars, - "input": os.path.basename(self.input_fname) if self.input_fname else None, - "target": self.landing_page} - with open(os.path.join(out_path, "test_info.json"), "w") as out_fp: + "hang": self.hang, + "input": Path(self.input_fname).name if self.input_fname else None, + "target": self.landing_page, + "time_limit": self.time_limit, + "timestamp": self.timestamp, + } + # save target assets and update meta data + if self.assets and not self.assets.is_empty(): + info["assets_path"] = "_assets_" + info["assets"] = self.assets.dump( + str(dst_path), subdir=info["assets_path"] + ) + with (dst_path / "test_info.json").open("w") as out_fp: json.dump(info, out_fp, indent=2, sort_keys=True) - # save meta files - for meta_file in self._files.meta: - meta_file.dump(out_path) - - def load_environ(self, path, env_data): - # sanity check environment variable data - for name, value in env_data.items(): - if not isinstance(name, str) or not isinstance(value, str): - raise TestCaseLoadFailure("'env_data' contains invalid 'env' entries") - self.env_vars = env_data - known_suppressions = ("lsan.supp", "tsan.supp", "ubsan.supp") - for supp in os.listdir(path): - if supp.lower() in known_suppressions: - # Update *SAN_OPTIONS environment variable to use provided suppression files. - opt_key = "%s_OPTIONS" % (supp.split(".")[0].upper(),) - opts = sanitizer_opts(self.env_vars.get(opt_key, "")) - opts["suppressions"] = "'%s'" % (os.path.join(path, supp),) - self.env_vars[opt_key] = ":".join("=".join((k, v)) for k, v in opts.items()) + + def get_file(self, path): + """Lookup and return the TestFile with the specified file name. + + Args: + path (str): Path (relative to wwwroot) of TestFile to retrieve. + + Returns: + TestFile: TestFile with matching path otherwise None. + """ + for tfile in chain(self._files.optional, self._files.required): + if tfile.file_name == path: + return tfile + return None @classmethod - def load_path(cls, path, full_scan=False, prefs=True): + def load(cls, path, adjacent=False): + """Load TestCases from disk. + + Args: + path (str): Path can be: + 1) A directory containing `test_info.json` and data. + 2) A directory with one or more subdirectories of 1. + 3) A zip archive containing testcase data or + subdirectories containing testcase data. + 4) A single file to be used as a test case. + adjacent (bool): Load adjacent files as part of the test case. + This is always the case when loading a directory. + WARNING: This should be used with caution! + + Returns: + list: TestCases successfully loaded from path. + """ + path = Path(path) + # unpack archive if needed + if path.name.lower().endswith(".zip"): + try: + unpacked = mkdtemp(prefix="unpack_", dir=grz_tmp("storage")) + with ZipFile(path) as zip_fp: + zip_fp.extractall(path=unpacked) + except (BadZipfile, zlib_error): + rmtree(unpacked, ignore_errors=True) + raise TestCaseLoadFailure("Testcase archive is corrupted") from None + path = Path(unpacked) + else: + unpacked = None + # load testcase data from disk + try: + if path.is_file(): + tests = [cls.load_single(path, adjacent=adjacent)] + elif path.is_dir(): + tests = list() + assets = None + for tc_path in TestCase.scan_path(path): + tests.append( + cls.load_single( + tc_path, load_assets=assets is None, copy=unpacked is None + ) + ) + # only load assets once + if not assets and tests[-1].assets: + assets = tests[-1].assets + # reuse AssetManager on all tests + if assets: + for test in tests: + if test.assets is None: + test.assets = assets + tests.sort(key=lambda tc: tc.timestamp) + else: + raise TestCaseLoadFailure("Invalid TestCase path") + finally: + if unpacked is not None: + rmtree(unpacked, ignore_errors=True) + return tests + + @classmethod + def load_single(cls, path, adjacent=False, load_assets=True, copy=True): """Load contents of a TestCase from disk. If `path` is a directory it must - contain a valid test_info.json file. + contain a valid 'test_info.json' file. Args: - path (str): Path to the directory or file to load. - full_scan (bool): Include all files in the directory containing the - test case entry point as well as the contents of - subdirectories. This is always the case when - loading a directory. - WARNING: This should be used with caution! - prefs (bool): Include prefs.js file in the test case. + path (Path): Path to the directory or file to load. + adjacent (bool): Load adjacent files as part of the TestCase. + This is always true when loading a directory. + WARNING: This should be used with caution! + load_assets (bool): Load assets files. + copy (bool): Files will be copied if True otherwise the they will be moved. Returns: TestCase: A TestCase. """ - path = os.path.abspath(path) - if os.path.isdir(path): - # load a directory using test_info.json + path = Path(path) + if path.is_dir(): + # load using test_info.json try: - with open(os.path.join(path, "test_info.json"), "r") as in_fp: + with (path / "test_info.json").open("r") as in_fp: info = json.load(in_fp) except IOError: - raise TestCaseLoadFailure("Missing 'test_info.json'") + raise TestCaseLoadFailure("Missing 'test_info.json'") from None except ValueError: - raise TestCaseLoadFailure("Invalid 'test_info.json'") - if "target" not in info: - raise TestCaseLoadFailure("'test_info.json' missing 'target' entry") - entry_point = os.path.basename(info["target"]) - if not os.path.isfile(os.path.join(path, entry_point)): - raise TestCaseLoadFailure("entry_point '%s' not found in '%s'" % (entry_point, path)) - adapter = info.get("adapter", None) - full_scan = True - elif os.path.isfile(path): - adapter = None - entry_point = os.path.basename(path) - path = os.path.dirname(path) - info = None - else: - raise TestCaseLoadFailure("Cannot find %r" % (path,)) - test = cls(None, None, adapter) - if full_scan: - # load all files from directory as test - for dpath, _, files in os.walk(path): - for fname in files: - if fname == "test_info.json": - continue - if dpath == path: - if fname == "prefs.js": - if prefs: - test.add_meta(TestFile.from_file(os.path.join(dpath, fname))) - continue - if fname == entry_point: - test.add_from_file(os.path.join(dpath, fname)) - # set entry point - test.landing_page = fname - continue - location = None - else: - # handle nested directories - location = "/".join((dpath.split(path, 1)[-1], fname)) - test.add_from_file( - os.path.join(dpath, fname), - file_name=location, - required=False) + raise TestCaseLoadFailure("Invalid 'test_info.json'") from None + if not isinstance(info.get("target"), str): + raise TestCaseLoadFailure("'test_info.json' has invalid 'target' entry") + entry_point = Path(path / info["target"]) + if not entry_point.is_file(): + raise TestCaseLoadFailure( + "Entry point %r not found in %r" % (info["target"], str(path)) + ) + # always load all contents of a directory if a 'test_info.json' is loaded + adjacent = True + elif path.is_file(): + entry_point = path + info = dict() else: - # load single file as test - test.add_from_file(os.path.join(path, entry_point)) - test.landing_page = entry_point - if test.landing_page is None: # pragma: no cover - # this should not be possible - test.cleanup() - raise AssertionError("Scanning for test case 'entry point' failed") - # load environment variables + raise TestCaseLoadFailure("Missing or invalid TestCase %r" % (str(path),)) + # create testcase and add data + test = cls( + entry_point.relative_to(entry_point.parent).as_posix(), + None, + info.get("adapter", None), + input_fname=info.get("input", None), + time_limit=info.get("time_limit", None), + timestamp=info.get("timestamp", 0), + ) + test.duration = info.get("duration", None) + test.hang = info.get("hang", False) + test.add_from_file( + entry_point, file_name=test.landing_page, required=True, copy=copy + ) if info: + # load assets try: - test.load_environ(path, info.get("env", {})) - except TestCaseLoadFailure: + if load_assets and info.get("assets", None): + test.assets = AssetManager.load( + info.get("assets"), + str(entry_point.parent / info.get("assets_path", "")), + ) + except (AssetError, OSError) as exc: test.cleanup() - raise + raise TestCaseLoadFailure(str(exc)) from None + # load environment variables + test.env_vars = info.get("env", dict()) + assert isinstance(test.env_vars, dict) + # sanity check environment variable data + for name, value in test.env_vars.items(): + if not isinstance(name, str) or not isinstance(value, str): + test.cleanup() + if test.assets: + test.assets.cleanup() + raise TestCaseLoadFailure("'env' contains invalid entries") + # load all adjacent data from directory + if adjacent: + asset_path = info.get("assets_path", None) + for entry in Path(entry_point.parent).rglob("*"): + if not entry.is_file(): + continue + location = entry.relative_to(entry_point.parent).as_posix() + # ignore asset path + if asset_path and location.startswith(asset_path): + continue + # ignore files that have been previously loaded + if location in (test.landing_page, "test_info.json"): + continue + # NOTE: when loading all files except the entry point are + # marked as `required=False` + test.add_from_file( + entry, + file_name=location, + required=False, + copy=copy, + ) return test @property def optional(self): - """Get file names of optional TestFiles + """Get file paths of optional files. Args: None - Returns: - generator: file names (str) of optional files + Yields: + str: File path of each optional file. """ for test in self._files.optional: yield test.file_name - def purge_optional(self, keep): - """Remove optional files (by name) that are not in keep. - - Args: - keep (iterable): Filenames that will not be removed. - - Returns: - None - """ - keep = set(keep) - to_remove = [] - for idx, tfile in enumerate(self._files.optional): - if tfile.file_name not in keep: - to_remove.append(idx) - for idx in reversed(to_remove): - self._files.optional.pop(idx).close() - - -class TestFile(object): - CACHE_LIMIT = 0x80000 # data cache limit per file: 512KB - XFER_BUF = 0x10000 # transfer buffer size: 64KB - - __slots__ = ("_file_name", "_fp") - - def __init__(self, file_name): - # This is a naive fix for a larger path issue. This is a simple sanity - # check and does not check if invalid characters are used. If an invalid - # file name is used an exception will be raised when trying to write - # that file to the file system. - if "\\" in file_name: - file_name = file_name.replace("\\", "/") - if file_name.startswith("/"): - file_name = file_name.lstrip("/") - if file_name.endswith("."): - file_name = file_name.rstrip(".") - if not file_name \ - or ("/" in file_name and not file_name.rsplit("/", 1)[-1]) \ - or file_name.startswith("../"): - raise TypeError("file_name is invalid %r" % (file_name,)) - self._file_name = os.path.normpath(file_name) # name including path relative to wwwroot - self._fp = SpooledTemporaryFile(max_size=self.CACHE_LIMIT, dir=grz_tmp(), prefix="grz_tf_") - - def __enter__(self): - return self - - def __exit__(self, *exc): - self.close() - - def clone(self): - """Make a copy of the TestFile. - - Args: - None - - Returns: - TestFile: A copy of the TestFile instance - """ - cloned = TestFile(self._file_name) - self._fp.seek(0) - shutil.copyfileobj(self._fp, cloned._fp, self.XFER_BUF) # pylint: disable=protected-access - return cloned - - def close(self): - """Close the TestFile. - - Args: - None - - Returns: - None TestFile instance - """ - self._fp.close() - - @property - def data(self): - """Get the data from the TestFile. Not recommenced for large files. + def pop_assets(self): + """Remove AssetManager from TestCase. Args: None Returns: - bytes: Data from the TestFile + AssetManager: AssetManager if exists otherwise None. """ - pos = self._fp.tell() - self._fp.seek(0) - data = self._fp.read() - self._fp.seek(pos) - return data + if self.assets is None: + assets = None + else: + assets = self.assets + self.assets = None + return assets - def dump(self, path): - """Write test file data to the filesystem. + def purge_optional(self, keep): + """Remove optional files that are not in keep. Args: - path (str): Path to output data + keep (iterable(str)): Files that will not be removed. This can contain + absolute (includes) and relative paths. Returns: None """ - target_path = os.path.join(path, os.path.dirname(self._file_name)) - if not os.path.isdir(target_path): - os.makedirs(target_path) - self._fp.seek(0) - with open(os.path.join(path, self._file_name), "wb") as dst_fp: - shutil.copyfileobj(self._fp, dst_fp, self.XFER_BUF) - - @property - def file_name(self): - return self._file_name - - @classmethod - def from_data(cls, data, file_name, encoding="UTF-8"): - """Create a TestFile and add it to the test case. - - Args: - data (bytes): Data to write to file - file_name (str): Name for the test file - encoding (str): Encoding to be used - - Returns: - TestFile: new instance - """ - t_file = cls(file_name) - if data: - if isinstance(data, bytes) or not encoding: - t_file.write(data) - else: - t_file.write(data.encode(encoding)) - return t_file - - @classmethod - def from_file(cls, input_file, file_name=None): - """Create a TestFile from an existing file. - - Args: - input_file (str): Path to existing file to use - file_name (str): Name for the test file - - Returns: - TestFile: new instance - """ - if file_name is None: - file_name = os.path.basename(input_file) - t_file = cls(file_name) - with open(input_file, "rb") as src_fp: - shutil.copyfileobj(src_fp, t_file._fp, cls.XFER_BUF) # pylint: disable=protected-access - return t_file + to_remove = list() + # iterate over optional files + for idx, opt in enumerate(self._files.optional): + # check entries in 'keep' for a match + if not any(x.endswith(opt.file_name) for x in keep): + to_remove.append(idx) + # purge + for idx in reversed(to_remove): + self._files.optional.pop(idx).data_file.unlink() - @property - def size(self): - """Size of the file in bytes. + @staticmethod + def sanitize_path(path): + """Sanitize given path for use as a URI path. Args: - None + path (str): Path to sanitize. Must be relative to wwwroot. Returns: - int: Size in bytes. + str: Sanitized path. """ - pos = self._fp.tell() - self._fp.seek(0, os.SEEK_END) - size = self._fp.tell() - self._fp.seek(pos) - return size - - def write(self, data): - """Add data to the TestFile. + assert isinstance(path, str) + # check for missing filename or path containing drive letter (Windows) + if split(path)[-1] in ("", ".", "..") or ":" in path: + raise ValueError("invalid path %r" % (path,)) + # normalize path + path = normpath(path).replace("\\", "/") + # check normalized path does not resolve to location outside of '.' + if path.startswith("../"): + raise ValueError("invalid path %r" % (path,)) + return path.lstrip("/") + + @staticmethod + def scan_path(path): + """Check path and subdirectories for potential test cases. Args: - data (bytes): Data to add to the TestFile + path (Path): Path to scan. - Returns: - None + Yields: + str: Path to what appears to be a valid testcase. """ - self._fp.write(data) + if "test_info.json" in (x.name for x in path.iterdir()): + yield path + else: + for entry in path.iterdir(): + if entry.is_dir() and (entry / "test_info.json").is_file(): + yield entry diff --git a/grizzly/common/test_fuzzmanager.py b/grizzly/common/test_fuzzmanager.py new file mode 100644 index 00000000..b9de8164 --- /dev/null +++ b/grizzly/common/test_fuzzmanager.py @@ -0,0 +1,220 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +"""Tests for interface for getting Crash and Bucket data from CrashManager API""" +import json + +from pytest import mark, raises + +from .fuzzmanager import Bucket, CrashEntry, load_fm_data + + +def test_bucket_1(mocker): + """bucket getattr uses data from get""" + coll = mocker.patch("grizzly.common.fuzzmanager.Collector", autospec=True) + coll.return_value.get.return_value.json.return_value = {"testcase": "data"} + coll.return_value.serverProtocol = "http" + coll.return_value.serverPort = 123 + coll.return_value.serverHost = "allizom.org" + bucket = Bucket(123) + assert coll.return_value.get.call_count == 0 + assert bucket.testcase == "data" + with raises(AttributeError): + getattr(bucket, "other") + assert coll.return_value.get.call_count == 1 + + +def test_bucket_2(mocker): + """bucket setattr raises""" + coll = mocker.patch("grizzly.common.fuzzmanager.Collector", autospec=True) + coll.return_value.serverProtocol = "http" + coll.return_value.serverPort = 123 + coll.return_value.serverHost = "allizom.org" + bucket = Bucket(123) + with raises(AttributeError): + bucket.other = "data" + assert coll.return_value.get.call_count == 0 + + +def test_bucket_3(mocker): + """bucket iter_crashes flattens across pages""" + coll = mocker.patch("grizzly.common.fuzzmanager.Collector", autospec=True) + coll.return_value.serverProtocol = "http" + coll.return_value.serverPort = 123 + coll.return_value.serverHost = "allizom.org" + coll.return_value.get.return_value.json.side_effect = [ + { + "count": 2, + "next": "url", + "results": [ + {"id": 234, "testcase": "test1"}, + {"id": 345, "testcase": None}, + ], + }, + { + "count": 1, + "next": None, + "results": [ + {"id": 456, "testcase": "test2"}, + ], + }, + ] + bucket = Bucket(123) + assert coll.return_value.get.call_count == 0 + crashes = list(bucket.iter_crashes(quality_filter=5)) + assert coll.return_value.get.call_count == 2 + assert coll.return_value.get.call_args_list[0][1]["params"]["include_raw"] == "0" + assert json.loads( + coll.return_value.get.call_args_list[0][1]["params"]["query"] + ) == { + "op": "AND", + "bucket": 123, + "testcase__quality": 5, + } + assert len(crashes) == 2 + assert crashes[0].crash_id == 234 + assert crashes[1].crash_id == 456 + + +def test_bucket_4(mocker): + """bucket signature_path writes and returns sig json and metadata""" + coll = mocker.patch("grizzly.common.fuzzmanager.Collector", autospec=True) + coll.return_value.serverProtocol = "http" + coll.return_value.serverPort = 123 + coll.return_value.serverHost = "allizom.org" + coll.return_value.get.return_value.json.return_value = { + "signature": "sigdata", + "size": 10, + "frequent": True, + "shortDescription": "sig desc", + "best_quality": 0, + } + with Bucket(123) as bucket: + assert coll.return_value.get.call_count == 0 + sig_path = bucket.signature_path() + assert sig_path.is_file() + assert sig_path.with_suffix(".metadata").is_file() + assert sig_path.read_text() == "sigdata" + assert json.loads(sig_path.with_suffix(".metadata").read_text()) == { + "size": 10, + "frequent": True, + "shortDescription": "sig desc", + "testcase__quality": 0, + } + assert coll.return_value.get.call_count == 1 + # second call returns same path + assert bucket.signature_path() == sig_path + assert coll.return_value.get.call_count == 1 + + +def test_crash_1(mocker): + """crash getattr uses data from get""" + coll = mocker.patch("grizzly.common.fuzzmanager.Collector", autospec=True) + coll.return_value.get.return_value.json.return_value = {"testcase": "data"} + coll.return_value.serverProtocol = "http" + coll.return_value.serverPort = 123 + coll.return_value.serverHost = "allizom.org" + crash = CrashEntry(123) + assert coll.return_value.get.call_count == 0 + assert crash.testcase == "data" + with raises(AttributeError): + getattr(crash, "other") + assert coll.return_value.get.call_count == 1 + assert coll.return_value.get.call_args[1]["params"] == {"include_raw": "0"} + + # crash getattr for raw field re-gets + coll.return_value.get.return_value.json.return_value = {"rawStderr": "stderr"} + assert crash.rawStderr == "stderr" + assert coll.return_value.get.call_count == 2 + assert coll.return_value.get.call_args[1]["params"] == {"include_raw": "1"} + + +def test_crash_2(mocker): + """crash setattr raises except testcase_quality""" + coll = mocker.patch("grizzly.common.fuzzmanager.Collector", autospec=True) + coll.return_value.get.return_value.json.return_value = {"testcase": "data"} + coll.return_value.serverProtocol = "http" + coll.return_value.serverPort = 123 + coll.return_value.serverHost = "allizom.org" + crash = CrashEntry(123) + + # crash setattr raises for other field + with raises(AttributeError): + crash.other = "data" + assert coll.return_value.get.call_count == 0 + + # crash setattr for testcase_quality works and updates data if set + assert coll.return_value.patch.call_count == 0 + crash.testcase_quality = 5 + assert coll.return_value.get.call_count == 0 + assert coll.return_value.patch.call_count == 1 + with raises(AttributeError): + getattr(crash, "testcase_quality") + assert coll.return_value.get.call_count == 1 + getattr(crash, "testcase") + assert coll.return_value.get.call_count == 1 + crash.testcase_quality = 10 + assert coll.return_value.patch.call_count == 2 + assert crash.testcase_quality == 10 + assert coll.return_value.get.call_count == 1 + + +def test_crash_3(mocker): + """crash testcase_path writes and returns testcase zip""" + coll = mocker.patch("grizzly.common.fuzzmanager.Collector", autospec=True) + coll.return_value.serverProtocol = "http" + coll.return_value.serverPort = 123 + coll.return_value.serverHost = "allizom.org" + coll.return_value.get.return_value.json.return_value = { + "id": 234, + "testcase": "test.bz2", + } + with CrashEntry(234) as crash: + assert crash.testcase == "test.bz2" # pre-load data dict so I can re-patch get + coll.return_value.get.return_value = mocker.Mock( + content=b"\x01\x02\x03", + headers={"content-disposition"}, + ) + assert coll.return_value.get.call_count == 1 + tc_path = crash.testcase_path() + assert tc_path.is_file() + assert tc_path.suffix == ".bz2" + assert tc_path.read_bytes() == b"\x01\x02\x03" + assert coll.return_value.get.call_count == 2 + # second call returns same path + assert crash.testcase_path() == tc_path + assert coll.return_value.get.call_count == 2 + + +@mark.parametrize( + "bucket_id, load_bucket", + [ + # Nothing to load, don't try + (None, False), + # Nothing to load, try + (None, True), + # Bucket exists, don't load it + (111, False), + # Bucket exists, load it + (111, True), + ], +) +def test_load_fm_data_1(mocker, bucket_id, load_bucket): + """test load_fm_data()""" + coll = mocker.patch("grizzly.common.fuzzmanager.Collector", autospec=True) + coll.return_value.serverProtocol = "http" + coll.return_value.serverPort = 123 + coll.return_value.serverHost = "allizom.org" + coll.return_value.get.return_value = mocker.Mock( + content=b"\x01\x02\x03", + headers={"content-disposition"}, + ) + coll.return_value.get.return_value.json.return_value = {"bucket": bucket_id} + + with load_fm_data(123, load_bucket) as (crash, bucket): + assert isinstance(crash, CrashEntry) + if load_bucket and bucket_id: + assert isinstance(bucket, Bucket) + else: + assert bucket is None diff --git a/grizzly/common/test_iomanager.py b/grizzly/common/test_iomanager.py index 4be91e89..b1863606 100644 --- a/grizzly/common/test_iomanager.py +++ b/grizzly/common/test_iomanager.py @@ -3,20 +3,52 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # pylint: disable=protected-access +from pytest import mark from .iomanager import IOManager -from .storage import TestFile def test_iomanager_01(): """test a simple IOManager""" with IOManager() as iom: assert iom.server_map is not None - assert not iom._environ_files + assert not iom.tests assert iom._generated == 0 assert iom._report_size == 1 + assert iom._test is None + + +@mark.parametrize( + "report_size, iters", + [ + (1, 1), + (1, 2), + (2, 2), + (2, 3), + ], +) +def test_iomanager_02(report_size, iters): + """test IOManager create_testcase(), commit() and purge()""" + with IOManager(report_size=report_size) as iom: + assert not iom.tests + for current in range(1, iters + 1): + tcase = iom.create_testcase("test-adapter", 10) + assert iom._generated == current + assert iom._test + precommit_size = len(iom.tests) + iom.commit() + assert iom._test is None + assert tcase == iom.tests[0] + size = len(iom.tests) + assert precommit_size <= size + assert size <= report_size + assert size == report_size + iom.purge() + assert iom._test is None + assert not iom.tests + -def test_iomanager_02(): +def test_iomanager_03(): """test IOManager.page_name()""" with IOManager() as iom: assert iom.page_name() != iom.page_name(offset=1) @@ -24,25 +56,10 @@ def test_iomanager_02(): iom._generated += 1 assert iom.page_name() == next_page -def test_iomanager_03(mocker, tmp_path): - """test IOManager._add_suppressions()""" - mocker.patch.dict("grizzly.common.iomanager.environ", values={}) - with IOManager() as iom: - assert not iom._environ_files - supp_file = tmp_path / "supp_file.txt" - supp_file.touch() - mocker.patch.dict( - "grizzly.common.iomanager.environ", - values={ - "ASAN_OPTIONS": "blah=1:suppressions='%s':foo=2" % (str(supp_file),), - "DEBUG": "1", - "LSAN_OPTIONS": "nothing=1", - "JUNK": "test"}) - iom._add_suppressions() - assert "asan.supp" in (x.file_name for x in iom._environ_files) def test_iomanager_04(): """test IOManager.create_testcase()""" + time_limit = 10 with IOManager() as iom: assert iom._generated == 0 assert iom._report_size == 1 @@ -50,54 +67,13 @@ def test_iomanager_04(): assert not iom.server_map.dynamic assert not iom.server_map.include assert not iom.server_map.redirect - iom._tracked_env = {"TEST": "1"} - iom._environ_files = [TestFile.from_data(b"data", "e.txt")] - # without a harness, no input files - tcase = iom.create_testcase("test-adapter") + tcase = iom.create_testcase("test-adapter", time_limit) assert tcase is not None - assert iom._generated == 1 - assert len(iom.tests) == 1 assert not any(tcase.optional) + assert tcase.time_limit == time_limit assert "grz_current_test" in iom.server_map.redirect assert iom.server_map.redirect["grz_current_test"].target == tcase.landing_page assert "grz_next_test" in iom.server_map.redirect - assert "grz_harness" not in iom.server_map.dynamic - # with a harness - iom.harness = b"harness-data" - tcase = iom.create_testcase("test-adapter") - assert tcase is not None - assert len(iom.tests) == 1 - assert iom._generated == 2 - assert "grz_current_test" in iom.server_map.redirect - assert iom.server_map.redirect["grz_current_test"].target == tcase.landing_page - assert "grz_next_test" in iom.server_map.redirect - assert "grz_harness" in iom.server_map.dynamic - -def test_iomanager_05(mocker): - """test IOManager.tracked_environ()""" - mocker.patch.dict("grizzly.common.iomanager.environ", values={}) - assert not IOManager.tracked_environ() - mocker.patch.dict( - "grizzly.common.iomanager.environ", - values={ - "ASAN_OPTIONS": "blah='z:/a':detect_leaks=1:foo=2", - "LSAN_OPTIONS": "detect_leaks='x:\\a.1':a=1", - # should be added since it is in IOManager.TRACKED_ENVVARS - "MOZ_CHAOSMODE": "1", - # this should be skipped because it uses the FFPuppet debug - "XPCOM_DEBUG_BREAK": "warn", - "TEST_BAD": "FAIL"}, - clear=True) - tracked = IOManager.tracked_environ() - assert "TEST_BAD" not in tracked - assert "XPCOM_DEBUG_BREAK" not in tracked - assert "ASAN_OPTIONS" in tracked - assert "MOZ_CHAOSMODE" in tracked - assert tracked["ASAN_OPTIONS"] == "detect_leaks=1" - assert "LSAN_OPTIONS" in tracked - assert tracked["LSAN_OPTIONS"] == "detect_leaks='x:\\a.1'" - mocker.patch.dict( - "grizzly.common.iomanager.environ", - values={"ASAN_OPTIONS": "ignored=x"}, - clear=True) - assert not IOManager.tracked_environ() + assert iom._test is not None + iom.purge() + assert iom._test is None diff --git a/grizzly/common/test_plugins.py b/grizzly/common/test_plugins.py new file mode 100644 index 00000000..da0d8c13 --- /dev/null +++ b/grizzly/common/test_plugins.py @@ -0,0 +1,103 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from pkg_resources import EntryPoint +from pytest import raises + +from ..target import Target +from .plugins import PluginLoadError, load, scan, scan_target_assets + + +class FakeType1: + pass + + +class FakeType2: + pass + + +def test_load_01(mocker): + """test load() - nothing to load""" + mocker.patch( + "grizzly.common.plugins.iter_entry_points", autospec=True, return_value=[] + ) + with raises(PluginLoadError, match="'test-name' not found in 'test-group'"): + load("test-name", "test-group", FakeType1) + + +def test_load_02(mocker): + """test load() - successful load""" + # Note: Mock.name cannot be set via the constructor so spec_set cannot be used + entry = mocker.Mock(spec=EntryPoint) + entry.name = "test-name" + entry.load.return_value = FakeType1 + mocker.patch( + "grizzly.common.plugins.iter_entry_points", autospec=True, return_value=[entry] + ) + assert load("test-name", "test-group", FakeType1) + + +def test_load_03(mocker): + """test load() - invalid type""" + entry = mocker.Mock(spec=EntryPoint) + entry.name = "test-name" + entry.load.return_value = FakeType1 + mocker.patch( + "grizzly.common.plugins.iter_entry_points", autospec=True, return_value=[entry] + ) + with raises(PluginLoadError, match="'test-name' doesn't inherit from FakeType2"): + load("test-name", "test-group", FakeType2) + + +def test_scan_01(mocker): + """test scan() - no entries found""" + mocker.patch( + "grizzly.common.plugins.iter_entry_points", autospec=True, return_value=[] + ) + assert not scan("test_group") + + +def test_scan_02(mocker): + """test scan() - duplicate entry""" + entry = mocker.Mock(spec=EntryPoint) + entry.name = "test_entry" + mocker.patch( + "grizzly.common.plugins.iter_entry_points", + autospec=True, + return_value=[entry, entry], + ) + with raises(PluginLoadError, match="Duplicate entry 'test_entry' in 'test_group'"): + scan("test_group") + + +def test_scan_03(mocker): + """test scan() - success""" + entry = mocker.Mock(spec=EntryPoint) + entry.name = "test-name" + mocker.patch( + "grizzly.common.plugins.iter_entry_points", + autospec=True, + return_value=[entry], + ) + assert "test-name" in scan("test_group") + + +def test_scan_target_assets_01(mocker): + """test scan_target_assets() - success""" + targ1 = mocker.Mock(spec=EntryPoint) + targ1.name = "t1" + targ1.load.return_value = mocker.Mock(spec_set=Target, SUPPORTED_ASSETS=None) + targ2 = mocker.Mock(spec=EntryPoint) + targ2.name = "t2" + targ2.load.return_value = mocker.Mock(spec_set=Target, SUPPORTED_ASSETS=("a", "B")) + mocker.patch( + "grizzly.common.plugins.iter_entry_points", + autospec=True, + return_value=[targ1, targ2], + ) + assets = scan_target_assets() + assert "t1" in assets + assert assets["t1"] is None + assert "t2" in assets + assert "B" in assets["t2"] diff --git a/grizzly/common/test_report.py b/grizzly/common/test_report.py new file mode 100644 index 00000000..994de37e --- /dev/null +++ b/grizzly/common/test_report.py @@ -0,0 +1,351 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +"""test Grizzly Report""" +# pylint: disable=protected-access + +from pathlib import Path + +from FTB.Signatures.CrashInfo import CrashInfo +from pytest import mark, raises + +from .report import Report + + +def _create_crash_log(log_path): + with log_path.open("w") as log_fp: + log_fp.write("==1==ERROR: AddressSanitizer: SEGV on unknown address 0x0") + log_fp.write(" (pc 0x0 bp 0x0 sp 0x0 T0)\n") + log_fp.write(" #0 0xbad000 in foo /file1.c:123:234\n") + log_fp.write(" #1 0x1337dd in bar /file2.c:1806:19") + + +def test_report_01(tmp_path): + """test Report() with boring logs (no stack)""" + (tmp_path / "not_a_log.txt").touch() + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + report = Report(tmp_path, "a.bin", size_limit=0) + assert report._target_binary.name == "a.bin" + assert report.path == tmp_path + assert report._logs.aux is None + assert report._logs.stderr.endswith("log_stderr.txt") + assert report._logs.stdout.endswith("log_stdout.txt") + assert report.preferred.endswith("log_stderr.txt") + assert report.stack is None + assert Report.DEFAULT_MAJOR == report.major + assert Report.DEFAULT_MINOR == report.minor + assert report.prefix is not None + report.cleanup() + assert not tmp_path.exists() + + +def test_report_02(tmp_path): + """test Report() with crash logs""" + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + _create_crash_log(tmp_path / "log_asan_blah.txt") + report = Report(tmp_path, "bin") + assert report.path == tmp_path + assert report._logs.aux.endswith("log_asan_blah.txt") + assert report._logs.stderr.endswith("log_stderr.txt") + assert report._logs.stdout.endswith("log_stdout.txt") + assert report.preferred.endswith("log_asan_blah.txt") + assert report.stack is not None + assert Report.DEFAULT_MAJOR != report.major + assert Report.DEFAULT_MINOR != report.minor + assert report.prefix is not None + report.cleanup() + + +def test_report_03(tmp_path): + """test Report.tail()""" + tmp_file = tmp_path / "file.txt" + tmp_file.write_bytes(b"blah\ntest\n123\xEF\x00FOO") + length = tmp_file.stat().st_size + # no size limit + with raises(AssertionError): + Report.tail(str(tmp_file), 0) + assert tmp_file.stat().st_size == length + Report.tail(str(tmp_file), 3) + log_data = tmp_file.read_bytes() + assert log_data.startswith(b"[LOG TAILED]\n") + assert log_data[13:] == b"FOO" + + +def test_report_04(tmp_path): + """test Report.select_logs() uninteresting data""" + # test with empty path + assert Report.select_logs(str(tmp_path)) is None + # empty file + (tmp_path / "not_a_log.txt").touch() + assert Report.select_logs(str(tmp_path)) is None + + +def test_report_05(tmp_path): + """test Report.select_logs()""" + # small log with nothing interesting + with (tmp_path / "log_asan.txt.1").open("wb") as log_fp: + log_fp.write(b"SHORT LOG\n") + log_fp.write(b"filler line") + # crash on another thread + with (tmp_path / "log_asan.txt.2").open("wb") as log_fp: + log_fp.write(b"GOOD LOG\n") + log_fp.write( + b"==70811==ERROR: AddressSanitizer: SEGV on unknown address 0x00000BADF00D" + ) + log_fp.write( + b" (pc 0x7f4c0bb54c67 bp 0x7f4c07bea380 sp 0x7f4c07bea360 T0)\n" + ) # must be 2nd line + # pad out to 6 lines + for l_no in range(4): + log_fp.write(b" #%d blah...\n" % l_no) + # child log that should be ignored (created when parent crashes) + with (tmp_path / "log_asan.txt.3").open("wb") as log_fp: + log_fp.write(b"BAD LOG\n") + log_fp.write( + b"==70811==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000" + ) + log_fp.write( + b" (pc 0x7f4c0bb54c67 bp 0x7f4c07bea380 sp 0x7f4c07bea360 T2)\n" + ) # must be 2nd line + # pad out to 6 lines + for l_no in range(4): + log_fp.write(b" #%d blah...\n" % l_no) + (tmp_path / "log_mindump_blah.txt").write_bytes(b"minidump log") + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + # should be ignored in favor of "GOOD LOG" + (tmp_path / "log_ffp_worker_blah.txt").write_bytes(b"worker log") + log_map = Report.select_logs(str(tmp_path)) + assert "GOOD LOG" in (tmp_path / log_map.aux).read_text() + assert "STDERR" in (tmp_path / log_map.stderr).read_text() + assert "STDOUT" in (tmp_path / log_map.stdout).read_text() + + +def test_report_06(tmp_path): + """test minidump with Report.select_logs()""" + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + with (tmp_path / "log_minidump_01.txt").open("wb") as log_fp: + log_fp.write(b"GPU|||\n") + log_fp.write(b"Crash|SIGSEGV|0x0|0\n") + log_fp.write(b"minidump log\n") + (tmp_path / "log_ffp_worker_blah.txt").write_bytes(b"worker log") + log_map = Report.select_logs(str(tmp_path)) + assert (tmp_path / log_map.stderr).is_file() + assert (tmp_path / log_map.stdout).is_file() + assert "minidump log" in (tmp_path / log_map.aux).read_text() + + +def test_report_07(tmp_path): + """test selecting preferred DUMP_REQUESTED minidump with Report.select_logs()""" + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + with (tmp_path / "log_minidump_01.txt").open("wb") as log_fp: + log_fp.write(b"GPU|||\n") + log_fp.write(b"Crash|DUMP_REQUESTED|0x7f9518665d18|0\n") + log_fp.write(b"0|0|bar.so|sadf|a.cc:739484451a63|3066|0x0\n") + log_fp.write(b"0|1|gar.so|fdsa|b.cc:739484451a63|1644|0x12\n") + with (tmp_path / "log_minidump_02.txt").open("wb") as log_fp: + log_fp.write(b"GPU|||\n") + log_fp.write(b"Crash|DUMP_REQUESTED|0x7f57ac9e2e14|0\n") + log_fp.write( + b"0|0|foo.so|google_breakpad::ExceptionHandler::WriteMinidump|" + b"bar.cc:234|674|0xc\n" + ) + log_fp.write( + b"0|1|foo.so|google_breakpad::ExceptionHandler::WriteMinidump|" + b"bar.cc:4a2|645|0x8\n" + ) + with (tmp_path / "log_minidump_03.txt").open("wb") as log_fp: + log_fp.write(b"GPU|||\n") + log_fp.write(b"Crash|DUMP_REQUESTED|0x7f9518665d18|0\n") + log_fp.write(b"0|0|bar.so|sadf|a.cc:1234|3066|0x0\n") + log_fp.write(b"0|1|gar.so|fdsa|b.cc:4323|1644|0x12\n") + log_map = Report.select_logs(str(tmp_path)) + assert (tmp_path / log_map.stderr).is_file() + assert (tmp_path / log_map.stdout).is_file() + assert ( + "google_breakpad::ExceptionHandler::WriteMinidump" + in (tmp_path / log_map.aux).read_text() + ) + + +def test_report_08(tmp_path): + """test selecting worker logs with Report.select_logs()""" + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + (tmp_path / "log_ffp_worker_1.txt").write_bytes(b"worker log") + # we should only ever see one but if we see multiple we warn, so test that. + (tmp_path / "log_ffp_worker_2.txt").write_bytes(b"worker log") + log_map = Report.select_logs(str(tmp_path)) + assert (tmp_path / log_map.stderr).is_file() + assert (tmp_path / log_map.stdout).is_file() + assert "worker log" in (tmp_path / log_map.aux).read_text() + + +def test_report_09(tmp_path): + """test prioritizing sanitizer logs with Report._find_sanitizer()""" + # NOTE: ordered by selection priority in order to use previously added logs + # test empty + (tmp_path / "log_asan.txt.0").touch() + assert Report._find_sanitizer([str(x) for x in tmp_path.iterdir()]) is None + # test *San log with data + (tmp_path / "log_asan.txt.1").write_text("test") + selected = Report._find_sanitizer([str(x) for x in tmp_path.iterdir()]) + assert selected is not None + assert "test" in Path(selected).read_text() + # test UBSan log + (tmp_path / "log_asan.txt.1").write_text( + "test.cc:3:5: runtime error: signed integer overflow: ..." + ) + selected = Report._find_sanitizer([str(x) for x in tmp_path.iterdir()]) + assert selected is not None + assert "runtime error: signed integer overflow" in Path(selected).read_text() + # test selecting ASan report + with (tmp_path / "log_asan.txt.2").open("wb") as log_fp: + # missing stack + log_fp.write(b"==1184==ERROR: AddressSanitizer: BUS on ... blah\n") + with (tmp_path / "log_asan.txt.3").open("wb") as log_fp: + log_fp.write(b"==9482==ERROR: AddressSanitizer: stack-overflow on ...\n") + for l_no in range(4): + log_fp.write(b" #%d blah...\n" % (l_no,)) + selected = Report._find_sanitizer([str(x) for x in tmp_path.iterdir()]) + assert selected is not None + assert "AddressSanitizer: stack-overflow" in Path(selected).read_text() + # test selecting prioritized + with (tmp_path / "log_asan.txt.4").open("wb") as log_fp: + log_fp.write( + b"==1942==ERROR: AddressSanitizer: heap-use-after-free on ... blah\n" + ) + for l_no in range(4): + log_fp.write(b" #%d blah...\n" % (l_no,)) + with (tmp_path / "log_asan.txt.5").open("wb") as log_fp: + log_fp.write(b"==1984==ERROR: AddressSanitizer: SEGV on ... blah\n") + log_fp.write(b"missing trace...\n") + with (tmp_path / "log_asan.txt.6").open("wb") as log_fp: + log_fp.write(b"ERROR: Failed to mmap\n") + selected = Report._find_sanitizer([str(x) for x in tmp_path.iterdir()]) + assert selected is not None + assert "heap-use-after-free" in Path(selected).read_text() + # test selecting TSan reports + tsan_path = tmp_path / "tsan" + tsan_path.mkdir() + (tsan_path / "log_asan_benign.txt").write_text( + "==27531==WARNING: Symbolizer buffer too small\n" + "==27531==WARNING: Symbolizer buffer too small" + ) + tsan_report = tsan_path / "log_asan_report.txt" + tsan_report.write_text( + "WARNING: ThreadSanitizer: data race (pid=26919)\n" + " Write of size 8 at 0x7f0ca2fc3400 by thread T51:\n" + " #0 memcpy /sanitizer_common_interceptors.inc:810:5 (lib+0x6656e)\n" + ) + selected = Report._find_sanitizer([str(x) for x in tsan_path.iterdir()]) + assert selected is not None + assert selected == str(tsan_report) + + +def test_report_10(tmp_path): + """test Report() size_limit""" + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log\n" * 200) + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log\n" * 200) + (tmp_path / "unrelated.txt").write_bytes(b"nothing burger\n" * 200) + (tmp_path / "rr-trace").mkdir() + size_limit = len("STDERR log\n") + report = Report(tmp_path, "bin", size_limit=size_limit) + assert report.path == tmp_path + assert report._logs.aux is None + assert report._logs.stderr.endswith("log_stderr.txt") + assert report._logs.stdout.endswith("log_stdout.txt") + assert report.preferred.endswith("log_stderr.txt") + assert report.stack is None + size_limit += len("[LOG TAILED]\n") + assert (report.path / report._logs.stderr).stat().st_size == size_limit + assert (report.path / report._logs.stdout).stat().st_size == size_limit + assert (report.path / "unrelated.txt").stat().st_size == size_limit + report.cleanup() + assert not tmp_path.is_dir() + + +def test_report_11(tmp_path): + """test selecting Valgrind logs with Report.select_logs()""" + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + (tmp_path / "log_valgrind.txt").write_bytes(b"valgrind log") + log_map = Report.select_logs(str(tmp_path)) + assert (tmp_path / log_map.stderr).is_file() + assert (tmp_path / log_map.stdout).is_file() + assert "valgrind log" in (tmp_path / log_map.aux).read_text() + + +def test_report_12(tmp_path): + """test Report.crash_info""" + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + _create_crash_log(tmp_path / "log_asan_blah.txt") + # no binary.fuzzmanagerconf + report = Report(tmp_path, target_binary="fake_bin") + assert report._crash_info is None + assert report.crash_info is not None + assert report._crash_info is not None + # with binary.fuzzmanagerconf + with (tmp_path / "fake_bin.fuzzmanagerconf").open("wb") as conf: + conf.write(b"[Main]\n") + conf.write(b"platform = x86-64\n") + conf.write(b"product = mozilla-central\n") + conf.write(b"os = linux\n") + report = Report(tmp_path, target_binary=str(tmp_path / "fake_bin")) + assert report._crash_info is None + assert report.crash_info is not None + assert report._crash_info is not None + + +@mark.parametrize( + "sig_cache, has_sig", + [ + # signature exists in cache + ('{"symptoms": [{"functionNames": ["a"],"type": "stackFrames"}]}', True), + # no signature + (None, True), + # FM failed to generate signature + (None, False), + ], +) +def test_report_13(mocker, tmp_path, sig_cache, has_sig): + """test Report.crash_signature and Report.crash_hash""" + mocker.patch("grizzly.common.report.ProgramConfiguration", autospec=True) + collector = mocker.patch("grizzly.common.report.Collector", autospec=True) + if sig_cache: + sig_file = tmp_path / "cache.sig" + sig_file.write_text(sig_cache) + collector.return_value.search.return_value = (str(sig_file), None) + collector.return_value.sigCacheDir = str(tmp_path) + else: + collector.return_value.sigCacheDir = None + (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") + (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") + if has_sig: + _create_crash_log(tmp_path / "log_asan_blah.txt") + report = Report(tmp_path, "bin") + assert report._signature is None + if has_sig: + assert report.crash_signature + assert report.crash_info.createShortSignature() == "[@ foo]" + else: + assert not report.crash_signature + assert report.crash_hash + + +def test_report_14(mocker): + """test Report.crash_signature_max_frames()""" + info = mocker.Mock(spec=CrashInfo) + info.backtrace = ("blah",) + assert Report.crash_signature_max_frames(info) == 8 + info.backtrace = ( + "std::panicking::rust_panic", + "std::panicking::rust_panic_with_hook", + ) + assert Report.crash_signature_max_frames(info) == 14 diff --git a/grizzly/common/test_reporter.py b/grizzly/common/test_reporter.py index ce844702..809d5304 100644 --- a/grizzly/common/test_reporter.py +++ b/grizzly/common/test_reporter.py @@ -4,296 +4,63 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. """test Grizzly Reporter""" # pylint: disable=protected-access - -import os -import sys -import tarfile - -import pytest +from sys import platform +from tarfile import open as tar_open from FTB.ProgramConfiguration import ProgramConfiguration -from FTB.Signatures.CrashInfo import CrashInfo - -from .reporter import FilesystemReporter, FuzzManagerReporter, Report, Reporter, S3FuzzManagerReporter +from pytest import importorskip, mark, raises + +from .report import Report +from .reporter import ( + FilesystemReporter, + FuzzManagerReporter, + Reporter, + S3FuzzManagerReporter, +) from .storage import TestCase -def test_report_01(): - """test creating a simple Report""" - report = Report("no_dir", dict()) - assert report.path == "no_dir" - assert report.log_aux is None - assert report.log_err is None - assert report.log_out is None - assert report.stack is None - assert report.preferred is None - report.cleanup() - -def test_report_02(tmp_path): - """test from_path() with boring logs (no stack)""" - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - report = Report.from_path(str(tmp_path)) - assert report.path == str(tmp_path) - assert report.log_err.endswith("log_stderr.txt") - assert report.log_out.endswith("log_stdout.txt") - assert report.preferred.endswith("log_stderr.txt") - assert report.log_aux is None - assert report.stack is None - assert Report.DEFAULT_MAJOR == report.major - assert Report.DEFAULT_MINOR == report.minor - assert report.prefix is not None - report.cleanup() - assert not tmp_path.exists() - -def test_report_03(tmp_path): - """test from_path()""" - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - with (tmp_path / "log_asan_blah.txt").open("wb") as log_fp: - log_fp.write(b" #0 0xbad000 in foo /file1.c:123:234\n") - log_fp.write(b" #1 0x1337dd in bar /file2.c:1806:19") - report = Report.from_path(str(tmp_path)) - assert report.path == str(tmp_path) - assert report.log_aux.endswith("log_asan_blah.txt") - assert report.log_err.endswith("log_stderr.txt") - assert report.log_out.endswith("log_stdout.txt") - assert report.preferred.endswith("log_asan_blah.txt") - assert report.stack is not None - assert Report.DEFAULT_MAJOR != report.major - assert Report.DEFAULT_MINOR != report.minor - assert report.prefix is not None - report.cleanup() - -def test_report_04(tmp_path): - """test Report.tail()""" - tmp_file = tmp_path / "file.txt" - tmp_file.write_bytes(b"blah\ntest\n123\xEF\x00FOO") - length = tmp_file.stat().st_size - # no size limit - with pytest.raises(AssertionError): - Report.tail(str(tmp_file), 0) - assert tmp_file.stat().st_size == length - Report.tail(str(tmp_file), 3) - log_data = tmp_file.read_bytes() - assert log_data.startswith(b"[LOG TAILED]\n") - assert log_data[13:] == b"FOO" - -def test_report_05(tmp_path): - """test Report.select_logs()""" - with pytest.raises(IOError, match="log_path does not exist"): - Report.select_logs("missing_path") - # small log with nothing interesting - with (tmp_path / "log_asan.txt.1").open("wb") as log_fp: - log_fp.write(b"SHORT LOG\n") - log_fp.write(b"filler line") - # crash on another thread - with (tmp_path / "log_asan.txt.2").open("wb") as log_fp: - log_fp.write(b"GOOD LOG\n") - log_fp.write(b"==70811==ERROR: AddressSanitizer: SEGV on unknown address 0x00000BADF00D") - log_fp.write(b" (pc 0x7f4c0bb54c67 bp 0x7f4c07bea380 sp 0x7f4c07bea360 T0)\n") # must be 2nd line - # pad out to 6 lines - for l_no in range(4): - log_fp.write(b" #%d blah...\n" % l_no) - # child log that should be ignored (created when parent crashes) - with (tmp_path / "log_asan.txt.3").open("wb") as log_fp: - log_fp.write(b"BAD LOG\n") - log_fp.write(b"==70811==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000") - log_fp.write(b" (pc 0x7f4c0bb54c67 bp 0x7f4c07bea380 sp 0x7f4c07bea360 T2)\n") # must be 2nd line - # pad out to 6 lines - for l_no in range(4): - log_fp.write(b" #%d blah...\n" % l_no) - (tmp_path / "log_mindump_blah.txt").write_bytes(b"minidump log") - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - # should be ignored in favor of "GOOD LOG" - (tmp_path / "log_ffp_worker_blah.txt").write_bytes(b"worker log") - log_map = Report.select_logs(str(tmp_path)) - assert "GOOD LOG" in (tmp_path / log_map["aux"]).read_text() - assert "STDERR" in (tmp_path / log_map["stderr"]).read_text() - assert "STDOUT" in (tmp_path / log_map["stdout"]).read_text() - -def test_report_06(tmp_path): - """test minidump with Report.select_logs()""" - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - with (tmp_path / "log_minidump_01.txt").open("wb") as log_fp: - log_fp.write(b"GPU|||\n") - log_fp.write(b"Crash|SIGSEGV|0x0|0\n") - log_fp.write(b"minidump log\n") - (tmp_path / "log_ffp_worker_blah.txt").write_bytes(b"worker log") - log_map = Report.select_logs(str(tmp_path)) - assert (tmp_path / log_map["stderr"]).is_file() - assert (tmp_path / log_map["stdout"]).is_file() - assert "minidump log" in (tmp_path / log_map["aux"]).read_text() - -def test_report_07(tmp_path): - """test selecting preferred DUMP_REQUESTED minidump with Report.select_logs()""" - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - with (tmp_path / "log_minidump_01.txt").open("wb") as log_fp: - log_fp.write(b"GPU|||\n") - log_fp.write(b"Crash|DUMP_REQUESTED|0x7f9518665d18|0\n") - log_fp.write(b"0|0|bar.so|sadf|a.cc:739484451a63|3066|0x0\n") - log_fp.write(b"0|1|gar.so|fdsa|b.cc:739484451a63|1644|0x12\n") - with (tmp_path / "log_minidump_02.txt").open("wb") as log_fp: - log_fp.write(b"GPU|||\n") - log_fp.write(b"Crash|DUMP_REQUESTED|0x7f57ac9e2e14|0\n") - log_fp.write(b"0|0|foo.so|google_breakpad::ExceptionHandler::WriteMinidump|bar.cc:234|674|0xc\n") - log_fp.write(b"0|1|foo.so|google_breakpad::ExceptionHandler::WriteMinidump|bar.cc:4a2|645|0x8\n") - with (tmp_path / "log_minidump_03.txt").open("wb") as log_fp: - log_fp.write(b"GPU|||\n") - log_fp.write(b"Crash|DUMP_REQUESTED|0x7f9518665d18|0\n") - log_fp.write(b"0|0|bar.so|sadf|a.cc:1234|3066|0x0\n") - log_fp.write(b"0|1|gar.so|fdsa|b.cc:4323|1644|0x12\n") - log_map = Report.select_logs(str(tmp_path)) - assert (tmp_path / log_map["stderr"]).is_file() - assert (tmp_path / log_map["stdout"]).is_file() - assert "google_breakpad::ExceptionHandler::WriteMinidump" in (tmp_path / log_map["aux"]).read_text() - -def test_report_08(tmp_path): - """test selecting worker logs with Report.select_logs()""" - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - (tmp_path / "log_ffp_worker_blah.txt").write_bytes(b"worker log") - log_map = Report.select_logs(str(tmp_path)) - assert (tmp_path / log_map["stderr"]).is_file() - assert (tmp_path / log_map["stdout"]).is_file() - assert "worker log" in (tmp_path / log_map["aux"]).read_text() - -def test_report_09(tmp_path): - """test prioritizing *San logs with Report.select_logs()""" - # crash - with (tmp_path / "log_asan.txt.1").open("wb") as log_fp: - log_fp.write(b"GOOD LOG\n") - log_fp.write(b"==1942==ERROR: AddressSanitizer: heap-use-after-free on ... blah\n") # must be 2nd line - # pad out to 6 lines - for l_no in range(4): - log_fp.write(b" #%d blah...\n" % l_no) - # crash missing trace - with (tmp_path / "log_asan.txt.2").open("wb") as log_fp: - log_fp.write(b"BAD LOG\n") - log_fp.write(b"==1984==ERROR: AddressSanitizer: SEGV on ... blah\n") # must be 2nd line - log_fp.write(b"missing trace...\n") - # child log that should be ignored (created when parent crashes) - with (tmp_path / "log_asan.txt.3").open("wb") as log_fp: - log_fp.write(b"BAD LOG\n") - log_fp.write(b"==1184==ERROR: AddressSanitizer: BUS on ... blah\n") # must be 2nd line - # pad out to 6 lines - for l_no in range(4): - log_fp.write(b" #%d blah...\n" % l_no) - with (tmp_path / "log_asan.txt.4").open("wb") as log_fp: - log_fp.write(b"BAD LOG\n") - log_fp.write(b"==9482==ERROR: AddressSanitizer: stack-overflow on ...\n") # must be 2nd line - # pad out to 6 lines - for l_no in range(4): - log_fp.write(b" #%d blah...\n" % l_no) - with (tmp_path / "log_asan.txt.5").open("wb") as log_fp: - log_fp.write(b"BAD LOG\n") - log_fp.write(b"ERROR: Failed to mmap\n") # must be 2nd line - log_map = Report.select_logs(str(tmp_path)) - assert "GOOD LOG" in (tmp_path / log_map["aux"]).read_text() - -def test_report_10(tmp_path): - """test Report size_limit""" - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log\n" * 200) - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log\n" * 200) - (tmp_path / "unrelated.txt").write_bytes(b"nothing burger\n" * 200) - (tmp_path / "rr-trace").mkdir() - size_limit = len("STDERR log\n") - report = Report.from_path(str(tmp_path), size_limit=size_limit) - assert report.path == str(tmp_path) - assert report.log_err.endswith("log_stderr.txt") - assert report.log_out.endswith("log_stdout.txt") - assert report.preferred.endswith("log_stderr.txt") - assert report.log_aux is None - assert report.stack is None - size_limit += len("[LOG TAILED]\n") - assert os.stat(os.path.join(report.path, report.log_err)).st_size == size_limit - assert os.stat(os.path.join(report.path, report.log_out)).st_size == size_limit - assert os.stat(os.path.join(report.path, "unrelated.txt")).st_size == size_limit - report.cleanup() - assert not tmp_path.is_dir() - -def test_report_11(tmp_path): - """test selecting Valgrind logs with Report.select_logs()""" - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - (tmp_path / "log_valgrind.txt").write_bytes(b"valgrind log") - log_map = Report.select_logs(str(tmp_path)) - assert (tmp_path / log_map["stderr"]).is_file() - assert (tmp_path / log_map["stdout"]).is_file() - assert "valgrind log" in (tmp_path / log_map["aux"]).read_text() - -def test_report_12(tmp_path): - """test Report.crash_info()""" - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - with (tmp_path / "log_asan_blah.txt").open("wb") as log_fp: - log_fp.write(b" #0 0xbad000 in foo /file1.c:123:234\n") - log_fp.write(b" #1 0x1337dd in bar /file2.c:1806:19") - # no binary.fuzzmanagerconf - report = Report.from_path(str(tmp_path)) - assert report._crash_info is None - assert report.crash_info("fake_bin") is not None - assert report._crash_info is not None - # with binary.fuzzmanagerconf - with (tmp_path / "fake_bin.fuzzmanagerconf").open("wb") as conf: - conf.write(b"[Main]\n") - conf.write(b"platform = x86-64\n") - conf.write(b"product = mozilla-central\n") - conf.write(b"os = linux\n") - report = Report.from_path(str(tmp_path)) - assert report._crash_info is None - assert report.crash_info(str(tmp_path / "fake_bin")) is not None - assert report._crash_info is not None - -def test_report_13(mocker, tmp_path): - """test Report.crash_signature() and Report.crash_hash()""" - mocker.patch("grizzly.common.reporter.ProgramConfiguration", autospec=True) - (tmp_path / "log_stderr.txt").write_bytes(b"STDERR log") - (tmp_path / "log_stdout.txt").write_bytes(b"STDOUT log") - with (tmp_path / "log_asan_blah.txt").open("wb") as log_fp: - log_fp.write(b"==1==ERROR: AddressSanitizer: SEGV on unknown address 0x0 (pc 0x0 bp 0x0 sp 0x0 T0)\n") - log_fp.write(b" #0 0xbad000 in foo /file1.c:123:234\n") - log_fp.write(b" #1 0x1337dd in bar /file2.c:1806:19") - report = Report.from_path(str(tmp_path)) - assert report._crash_info is None - info = report.crash_info("fake_bin") - sig = Report.crash_signature(info) - assert sig.symptoms - short_sig = info.createShortSignature() - assert short_sig == "[@ foo]" - assert Report.crash_hash(info) - -def test_report_14(mocker): - """test Report.crash_signature_max_frames()""" - info = mocker.Mock(spec=CrashInfo) - info.backtrace = ("blah") - assert Report.crash_signature_max_frames(info) == 8 - info.backtrace = ("std::panicking::rust_panic", "std::panicking::rust_panic_with_hook") - assert Report.crash_signature_max_frames(info) == 14 - -def test_reporter_01(mocker, tmp_path): +def _create_crash_log(log_path): + with log_path.open("w") as log_fp: + log_fp.write("==1==ERROR: AddressSanitizer: SEGV on unknown address 0x0") + log_fp.write(" (pc 0x0 bp 0x0 sp 0x0 T0)\n") + log_fp.write(" #0 0xbad000 in foo /file1.c:123:234\n") + log_fp.write(" #1 0x1337dd in bar /file2.c:1806:19") + + +@mark.parametrize( + "display_logs, is_hang", + [ + # do not display report + (False, False), + # display report + (True, False), + # display report (hang) + (True, True), + ], +) +def test_reporter_01(mocker, tmp_path, display_logs, is_hang): """test creating a simple Reporter""" + class SimpleReporter(Reporter): - def _process_report(self, report): + def _pre_submit(self, report): pass - def _reset(self): + + def _post_submit(self): pass + def _submit_report(self, report, test_cases): pass + + (tmp_path / "log_stderr.txt").write_bytes(b"log msg") + report = mocker.Mock( + spec_set=Report, is_hang=is_hang, preferred=str(tmp_path / "log_stderr.txt") + ) reporter = SimpleReporter() - with pytest.raises(AssertionError, match="Either 'log_path' or 'report' must be specified!"): - reporter.submit([]) - with pytest.raises(IOError, match="No such directory 'fake_dir'"): - reporter.submit([], log_path="fake_dir") - with pytest.raises(IOError, match="No logs found in"): - reporter.submit([], log_path=str(tmp_path)) - with pytest.raises(AssertionError, match="Only 'log_path' or 'report' can be specified!"): - reporter.submit([], log_path=str(tmp_path), report=mocker.Mock()) - # submit a report - reporter.submit([], report=mocker.Mock(spec=Report)) + reporter.display_logs = display_logs + reporter.submit([], report=report) + assert report.cleanup.call_count == 1 + def test_filesystem_reporter_01(tmp_path): """test FilesystemReporter without testcases""" @@ -301,222 +68,204 @@ def test_filesystem_reporter_01(tmp_path): log_path.mkdir() (log_path / "log_stderr.txt").write_bytes(b"STDERR log") (log_path / "log_stdout.txt").write_bytes(b"STDOUT log") - with (log_path / "log_asan_blah.txt").open("wb") as log_fp: - log_fp.write(b" #0 0xbad000 in foo /file1.c:123:234\n") - log_fp.write(b" #1 0x1337dd in bar /file2.c:1806:19") + _create_crash_log(tmp_path / "log_asan_blah.txt") report_path = tmp_path / "reports" report_path.mkdir() - reporter = FilesystemReporter(report_path=str(report_path)) - reporter.submit([], log_path=str(log_path)) - buckets = [x for x in report_path.iterdir()] + reporter = FilesystemReporter(report_path) + reporter.submit([], Report(log_path, "fake_bin")) + buckets = tuple(report_path.iterdir()) # check major bucket assert len(buckets) == 1 assert buckets[0].is_dir() # check log path exists - log_dirs = [x for x in buckets[0].iterdir()] + log_dirs = tuple(buckets[0].iterdir()) assert len(log_dirs) == 1 assert log_dirs[0].is_dir() assert "_logs" in str(log_dirs[0]) + def test_filesystem_reporter_02(tmp_path, mocker): """test FilesystemReporter with testcases""" log_path = tmp_path / "logs" log_path.mkdir() (log_path / "log_stderr.txt").write_bytes(b"STDERR log") (log_path / "log_stdout.txt").write_bytes(b"STDOUT log") - with (log_path / "log_asan_blah.txt").open("wb") as log_fp: - log_fp.write(b" #0 0xbad000 in foo /file1.c:123:234\n") - log_fp.write(b" #1 0x1337dd in bar /file2.c:1806:19") - testcases = list() - for _ in range(10): - testcases.append(mocker.Mock(spec=TestCase)) + _create_crash_log(log_path / "log_asan_blah.txt") + tests = list(mocker.Mock(spec_set=TestCase) for _ in range(10)) report_path = tmp_path / "reports" assert not report_path.exists() - reporter = FilesystemReporter(report_path=str(report_path)) - reporter.submit(testcases, log_path=str(log_path)) + reporter = FilesystemReporter(report_path) + reporter.submit(tests, Report(log_path, "fake_bin")) assert not log_path.exists() assert report_path.exists() - assert len(tuple(report_path.glob("*"))) == 1 - for tstc in testcases: - assert tstc.dump.call_count == 1 + assert len(tuple(report_path.iterdir())) == 1 + assert all(x.dump.call_count == 1 for x in tests) # call report a 2nd time log_path.mkdir() (log_path / "log_stderr.txt").write_bytes(b"STDERR log") (log_path / "log_stdout.txt").write_bytes(b"STDOUT log") - testcases = list() - for _ in range(2): - testcases.append(mocker.Mock(spec=TestCase)) - reporter.submit(testcases, log_path=str(log_path)) - for tstc in testcases: - assert tstc.dump.call_count == 1 - assert len(tuple(report_path.glob("*"))) == 2 + tests = list(mocker.Mock(spec_set=TestCase) for _ in range(2)) + reporter.submit(tests, Report(log_path, "fake_bin")) + assert all(x.dump.call_count == 1 for x in tests) + assert len(tuple(report_path.iterdir())) == 2 assert len(tuple(report_path.glob("NO_STACK"))) == 1 + def test_filesystem_reporter_03(tmp_path): """test FilesystemReporter disk space failsafe""" log_path = tmp_path / "logs" log_path.mkdir() (log_path / "log_stderr.txt").write_bytes(b"STDERR log") (log_path / "log_stdout.txt").write_bytes(b"STDOUT log") - report_path = tmp_path / "reports" - report_path.mkdir() - reporter = FilesystemReporter(report_path=str(report_path)) - reporter.DISK_SPACE_ABORT = 2 ** 50 - with pytest.raises(RuntimeError) as exc: - reporter.submit([], log_path=str(log_path)) - assert "Running low on disk space" in str(exc.value) + reporter = FilesystemReporter(tmp_path / "reports") + reporter.min_space = 2**50 + with raises(RuntimeError, match="Running low on disk space"): + reporter.submit([], Report(log_path, "fake_bin")) + def test_filesystem_reporter_04(mocker, tmp_path): """test FilesystemReporter w/o major bucket""" - report = mocker.Mock(spec=Report) - report_path = (tmp_path / "report") - report_path.mkdir() - report.path = str(report_path) - report.prefix = "0000_2020_01_01" - reporter = FilesystemReporter(report_path=str(tmp_path), major_bucket=False) - reporter.submit([], report=report) - assert not report_path.is_dir() + fake_report = tmp_path / "fake_report" + fake_report.mkdir() + report = mocker.Mock(spec_set=Report, path=fake_report, prefix="test_prefix") + reporter = FilesystemReporter(tmp_path / "dst", major_bucket=False) + reporter.submit([], report) + assert not fake_report.is_dir() assert not report.major.call_count + assert any((tmp_path / "dst").glob("test_prefix_logs")) -def test_fuzzmanager_reporter_01(tmp_path, mocker): + +def test_fuzzmanager_reporter_01(mocker, tmp_path): """test FuzzManagerReporter.sanity_check()""" fake_reporter = mocker.patch("grizzly.common.reporter.ProgramConfiguration") - fake_reporter.fromBinary.return_value = mocker.Mock(spec=ProgramConfiguration) + fake_reporter.fromBinary.return_value = mocker.Mock(spec_set=ProgramConfiguration) # missing global FM config file - FuzzManagerReporter.FM_CONFIG = "no_file" - with pytest.raises(IOError, match="Missing: no_file"): + FuzzManagerReporter.FM_CONFIG = tmp_path / "no_file" + with raises(IOError, match="no_file"): FuzzManagerReporter.sanity_check("fake") # missing binary FM config file fake_fmc = tmp_path / ".fuzzmanagerconf" fake_fmc.touch() fake_bin = tmp_path / "bin" fake_bin.touch() - FuzzManagerReporter.FM_CONFIG = str(fake_fmc) - with pytest.raises(IOError, match="bin.fuzzmanagerconf"): + FuzzManagerReporter.FM_CONFIG = fake_fmc + with raises(IOError, match="bin.fuzzmanagerconf"): FuzzManagerReporter.sanity_check(str(fake_bin)) # success (tmp_path / "bin.fuzzmanagerconf").touch() FuzzManagerReporter.sanity_check(str(fake_bin)) assert fake_reporter.fromBinary.call_count == 1 -def test_fuzzmanager_reporter_02(tmp_path): - """test FuzzManagerReporter.submit() empty path""" - reporter = FuzzManagerReporter("fake_bin") - report_path = tmp_path / "report" - report_path.mkdir() - with pytest.raises(IOError) as exc: - reporter.submit([], log_path=str(report_path)) - assert "No logs found in" in str(exc.value) -def test_fuzzmanager_reporter_03(tmp_path, mocker): +@mark.parametrize( + "tests, frequent, ignored, force", + [ + # report - without test + (False, False, False, False), + # report - with test + (True, False, False, False), + # report - frequent + (True, True, False, False), + # report - forced frequent + (True, True, False, True), + # report - ignored + (True, False, True, False), + ], +) +def test_fuzzmanager_reporter_02(mocker, tmp_path, tests, frequent, ignored, force): """test FuzzManagerReporter.submit()""" - fake_crashinfo = mocker.patch("grizzly.common.reporter.CrashInfo", autospec=True) - fake_crashinfo.fromRawCrashData.return_value.createShortSignature.return_value = "test [@ test]" - fake_collector = mocker.patch("grizzly.common.reporter.Collector", autospec=True) - fake_collector.return_value.search.return_value = (None, None) - fake_collector.return_value.generate.return_value = str(tmp_path / "fake_sig_file") - log_path = tmp_path / "log_path" - log_path.mkdir() - (log_path / "log_ffp_worker_blah.txt").touch() - (log_path / "log_stderr.txt").touch() - (log_path / "log_stdout.txt").touch() - report = Report.from_path(str(log_path)) - fake_test = mocker.Mock(spec=TestCase) - fake_test.adapter_name = "adapter" - fake_test.input_fname = "input" - fake_test.env_vars = {"TEST": "1"} - reporter = FuzzManagerReporter(str("fake_bin")) - reporter.submit([fake_test], report=report) - assert not log_path.is_dir() - assert fake_test.dump.call_count == 1 - assert fake_collector.return_value.submit.call_count == 1 - -def test_fuzzmanager_reporter_04(tmp_path, mocker): - """test FuzzManagerReporter.submit() hit frequent crash""" - mocker.patch("grizzly.common.reporter.CrashInfo", autospec=True) - fake_collector = mocker.patch("grizzly.common.reporter.Collector", autospec=True) - fake_collector.return_value.search.return_value = (None, {"frequent": True, "shortDescription": "[@ test]"}) - reporter = FuzzManagerReporter("fake_bin") - log_path = tmp_path / "log_path" - log_path.mkdir() - (log_path / "log_stderr.txt").touch() - (log_path / "log_stdout.txt").touch() - reporter.submit([], log_path=str(log_path)) - fake_collector.return_value.submit.assert_not_called() - -def test_fuzzmanager_reporter_05(tmp_path, mocker): - """test FuzzManagerReporter.submit() hit existing crash""" - mocker.patch("grizzly.common.reporter.CrashInfo", autospec=True) + mocker.patch( + "grizzly.common.reporter.FuzzManagerReporter._ignored", + new_callable=mocker.MagicMock, + return_value=ignored, + ) + mocker.patch("grizzly.common.reporter.Path.cwd", return_value=tmp_path) + mocker.patch("grizzly.common.reporter.getenv", autospec=True, return_value="0") fake_collector = mocker.patch("grizzly.common.reporter.Collector", autospec=True) fake_collector.return_value.search.return_value = ( - None, {"bug__id":1, "frequent": False, "shortDescription": "[@ test]"}) - reporter = FuzzManagerReporter("fake_bin") + None, + {"frequent": frequent, "shortDescription": "[@ test]"}, + ) log_path = tmp_path / "log_path" log_path.mkdir() + (log_path / "log_ffp_worker_blah.txt").touch() (log_path / "log_stderr.txt").touch() (log_path / "log_stdout.txt").touch() - reporter._ignored = lambda x: True - reporter.submit([], log_path=str(log_path)) - fake_collector.return_value.submit.assert_not_called() - -def test_fuzzmanager_reporter_06(tmp_path, mocker): - """test FuzzManagerReporter.submit() no signature""" - mocker.patch("grizzly.common.reporter.CrashInfo", autospec=True) - fake_collector = mocker.patch("grizzly.common.reporter.Collector", autospec=True) - fake_collector.return_value.search.return_value = (None, None) - fake_collector.return_value.generate.return_value = None + (log_path / "rr-traces").mkdir() + (tmp_path / "screenlog.0").touch() + test_cases = list() + if tests: + fake_test = mocker.Mock( + spec_set=TestCase, + adapter_name="adapter", + env_vars={"TEST": "1"}, + input_fname="input", + ) + test_cases.append(fake_test) reporter = FuzzManagerReporter("fake_bin") - log_path = tmp_path / "log_path" - log_path.mkdir() - (log_path / "log_stderr.txt").touch() - (log_path / "log_stdout.txt").touch() - with pytest.raises(RuntimeError) as exc: - reporter.submit([], log_path=str(log_path)) - assert "Failed to create FM signature" in str(exc.value) - fake_collector.return_value.submit.assert_not_called() - # test ignore unsymbolized crash - reporter._ignored = lambda x: True - reporter.submit([], log_path=str(log_path)) - fake_collector.return_value.submit.assert_not_called() - -def test_s3fuzzmanager_reporter_01(tmp_path, mocker): + reporter.force_report = force + reporter.submit(test_cases, Report(log_path, "fake_bin", is_hang=True)) + assert not log_path.is_dir() + if (frequent and not force) or ignored: + assert fake_collector.return_value.submit.call_count == 0 + assert fake_test.dump.call_count == 0 + else: + assert fake_collector.return_value.submit.call_count == 1 + if tests: + assert fake_test.dump.call_count == 1 + + +def test_fuzzmanager_reporter_03(mocker, tmp_path): + """test FuzzManagerReporter._ignored()""" + log_file = tmp_path / "test.log" + log_file.touch() + report = mocker.Mock( + spec_set=Report, path=tmp_path, preferred=str(log_file), stack=None + ) + # not ignored + assert not FuzzManagerReporter._ignored(report) + # ignored - sanitizer OOM missing stack + log_file.write_bytes(b"ERROR: Failed to mmap") + assert FuzzManagerReporter._ignored(report) + # ignored - Valgrind OOM + log_file.write_bytes(b"VEX temporary storage exhausted.") + assert FuzzManagerReporter._ignored(report) + + +def test_s3fuzzmanager_reporter_01(mocker, tmp_path): """test S3FuzzManagerReporter.sanity_check()""" mocker.patch("grizzly.common.reporter.FuzzManagerReporter", autospec=True) fake_bin = tmp_path / "bin" - with pytest.raises(EnvironmentError) as exc: + # test GRZ_S3_BUCKET missing + with raises(EnvironmentError, match="'GRZ_S3_BUCKET' is not set in environment"): S3FuzzManagerReporter.sanity_check(str(fake_bin)) - assert "'GRZ_S3_BUCKET' is not set in environment" in str(exc.value) - pytest.importorskip("boto3") - os.environ["GRZ_S3_BUCKET"] = "test" - try: - S3FuzzManagerReporter.sanity_check(str(fake_bin)) - finally: - os.environ.pop("GRZ_S3_BUCKET", None) + # test GRZ_S3_BUCKET set + importorskip("boto3") + mocker.patch("grizzly.common.reporter.getenv", autospec=True, return_value="test") + S3FuzzManagerReporter.sanity_check(str(fake_bin)) + -def test_s3fuzzmanager_reporter_02(tmp_path, mocker): - """test S3FuzzManagerReporter._process_report()""" - pytest.importorskip("boto3") - pytest.importorskip("botocore") +def test_s3fuzzmanager_reporter_02(mocker, tmp_path): + """test S3FuzzManagerReporter._pre_submit()""" + importorskip("boto3") + importorskip("botocore") + mocker.patch("grizzly.common.reporter.getenv", autospec=True, return_value="test") fake_resource = mocker.patch("grizzly.common.reporter.resource", autospec=True) - fake_report = mocker.Mock(spec=Report) - fake_report.path = "no-path" + fake_report = mocker.Mock(spec_set=Report) + fake_report.path = tmp_path / "no-path" reporter = S3FuzzManagerReporter("fake_bin") # test will missing rr-trace - assert reporter._process_report(fake_report) is None + assert reporter._pre_submit(fake_report) is None assert not reporter._extra_metadata # test will exiting rr-trace trace_dir = tmp_path / "rr-traces" / "latest-trace" trace_dir.mkdir(parents=True) fake_report.minor = "1234abcd" - fake_report.path = str(tmp_path) - os.environ["GRZ_S3_BUCKET"] = "test" - try: - reporter._process_report(fake_report) - finally: - os.environ.pop("GRZ_S3_BUCKET", None) - assert not tuple(tmp_path.glob("*")) + fake_report.path = tmp_path + reporter._pre_submit(fake_report) + assert not any(tmp_path.iterdir()) assert "rr-trace" in reporter._extra_metadata assert fake_report.minor in reporter._extra_metadata["rr-trace"] fake_resource.return_value.meta.client.upload_file.assert_not_called() @@ -525,23 +274,24 @@ def test_s3fuzzmanager_reporter_02(tmp_path, mocker): reporter._extra_metadata.clear() trace_dir.mkdir(parents=True) (trace_dir / "trace-file").touch() + class FakeClientError(Exception): def __init__(self, message, response): - super(FakeClientError, self).__init__(message) + super().__init__(message) self.response = response + mocker.patch("grizzly.common.reporter.ClientError", new=FakeClientError) - fake_resource.return_value.Object.side_effect = FakeClientError("test", {"Error": {"Code": "404"}}) - os.environ["GRZ_S3_BUCKET"] = "test" - try: - reporter._process_report(fake_report) - finally: - os.environ.pop("GRZ_S3_BUCKET", None) - assert not tuple(tmp_path.glob("*")) + fake_resource.return_value.Object.side_effect = FakeClientError( + "test", {"Error": {"Code": "404"}} + ) + reporter._pre_submit(fake_report) + assert not any(tmp_path.iterdir()) assert "rr-trace" in reporter._extra_metadata assert fake_report.minor in reporter._extra_metadata["rr-trace"] assert fake_resource.return_value.meta.client.upload_file.call_count == 1 -@pytest.mark.skipif(not sys.platform.startswith("linux"), reason="RR only supported on Linux") + +@mark.skipif(not platform.startswith("linux"), reason="RR only supported on Linux") def test_s3fuzzmanager_reporter_03(tmp_path): """test S3FuzzManagerReporter.compress_rr_trace()""" # create fake trace @@ -556,17 +306,20 @@ def test_s3fuzzmanager_reporter_03(tmp_path): (src / "mmap").write_bytes(b"bar") (src / "tasks").write_bytes(b"foo") (src / "version").write_bytes(b"123") - (tmp_path / "rr-traces" / "latest-trace").symlink_to(str(src), target_is_directory=True) + (tmp_path / "rr-traces" / "latest-trace").symlink_to( + str(src), target_is_directory=True + ) src = tmp_path / "rr-traces" dest = tmp_path / "dest" dest.mkdir() - S3FuzzManagerReporter.compress_rr_trace(str(src), str(dest)) + S3FuzzManagerReporter.compress_rr_trace(src, dest) assert not src.is_dir() assert (dest / "rr.tar.bz2").is_file() - with tarfile.open(str(dest / "rr.tar.bz2"), "r:bz2") as arc_fp: + with tar_open(str(dest / "rr.tar.bz2"), "r:bz2") as arc_fp: entries = arc_fp.getnames() assert "echo-1" in entries assert "echo-0" not in entries assert "latest-trace" not in entries + # TODO: fill out tests for FuzzManagerReporter and S3FuzzManagerReporter diff --git a/grizzly/common/test_runner.py b/grizzly/common/test_runner.py index 0a369a52..187ae5ce 100644 --- a/grizzly/common/test_runner.py +++ b/grizzly/common/test_runner.py @@ -3,126 +3,239 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # pylint: disable=protected-access -from os.path import join as pathjoin +from itertools import count -from pytest import raises +from pytest import mark, raises -from sapphire import Sapphire, SERVED_ALL, SERVED_NONE, SERVED_REQUEST, SERVED_TIMEOUT, ServerMap +from sapphire import Sapphire, Served, ServerMap -from .runner import _IdleChecker, Runner +from ..target import Result, Target, TargetLaunchError, TargetLaunchTimeout +from .reporter import Report +from .runner import Runner, _IdleChecker from .storage import TestCase -from ..target import Target, TargetLaunchError, TargetLaunchTimeout + def test_runner_01(mocker): """test Runner()""" - server = mocker.Mock(spec=Sapphire) - target = mocker.Mock(spec=Target) - target.detect_failure.return_value = target.RESULT_NONE - runner = Runner(server, target) + mocker.patch("grizzly.common.runner.time", autospec=True, side_effect=count()) + server = mocker.Mock(spec_set=Sapphire) + target = mocker.Mock(spec_set=Target) + target.check_result.return_value = Result.NONE + runner = Runner(server, target, relaunch=10) + assert runner.initial + assert not runner.startup_failure assert runner._idle is None - assert runner.result is None - assert runner.served is None - assert not runner.timeout + assert runner._relaunch == 10 + assert runner._tests_run == 0 serv_files = ["a.bin", "/another/file.bin"] - testcase = mocker.Mock(spec=TestCase) - testcase.landing_page = serv_files[0] + testcase = mocker.Mock(spec_set=TestCase, landing_page=serv_files[0], optional=[]) # all files served - server.serve_testcase.return_value = (SERVED_ALL, serv_files) - runner.run([], ServerMap(), testcase) - assert runner.result == runner.COMPLETE - assert runner.served == serv_files - assert not runner.timeout + serv_map = ServerMap() + server.serve_path.return_value = (Served.ALL, serv_files) + result = runner.run([], serv_map, testcase) + assert runner.initial + assert runner._tests_run == 1 + assert result.attempted + assert result.duration == 1 + assert result.status == Result.NONE + assert result.served == serv_files + assert not result.timeout + assert not serv_map.dynamic assert target.close.call_count == 0 assert target.dump_coverage.call_count == 0 - # some files served - server.serve_testcase.return_value = (SERVED_REQUEST, serv_files) - runner.run([], ServerMap(), testcase, coverage=True) - assert runner.result == runner.COMPLETE - assert runner.served == serv_files - assert not runner.timeout + assert target.handle_hang.call_count == 0 + # dump coverage + serv_map = ServerMap() + server.serve_path.return_value = (Served.ALL, serv_files) + result = runner.run([], serv_map, testcase, coverage=True) + assert not runner.initial + assert runner._tests_run == 2 + assert result.attempted + assert result.status == Result.NONE + assert result.served == serv_files + assert not result.timeout + assert not serv_map.dynamic assert target.close.call_count == 0 assert target.dump_coverage.call_count == 1 + assert target.handle_hang.call_count == 0 + def test_runner_02(mocker): - """test Runner() errors""" - server = mocker.Mock(spec=Sapphire) - target = mocker.Mock(spec=Target) - testcase = mocker.Mock(spec=TestCase, landing_page="x") - runner = Runner(server, target) - # no files served - server.serve_testcase.return_value = (SERVED_NONE, []) - target.detect_failure.return_value = target.RESULT_NONE - runner.run([], ServerMap(), testcase) - assert runner.result == runner.ERROR - assert not runner.served - assert not runner.timeout + """test Runner.run() relaunch""" + mocker.patch("grizzly.common.runner.time", autospec=True, return_value=1) + mocker.patch("grizzly.common.runner.sleep", autospec=True) + server = mocker.Mock(spec_set=Sapphire) + target = mocker.Mock(spec_set=Target) + target.check_result.return_value = Result.NONE + serv_files = ["a.bin"] + server.serve_path.return_value = (Served.ALL, serv_files) + testcase = mocker.Mock(spec_set=TestCase, landing_page=serv_files[0], optional=[]) + # single run/iteration relaunch (not idle exit) + target.is_idle.return_value = False + runner = Runner(server, target, relaunch=1) + assert runner._relaunch == 1 + smap = ServerMap() + result = runner.run([], smap, testcase) + assert runner.initial + assert result.attempted assert target.close.call_count == 1 + assert target.is_idle.call_count > 0 + assert target.monitor.is_healthy.call_count > 0 + assert result.status == Result.NONE + assert result.served == serv_files + assert not smap.dynamic + assert smap.redirect.get("grz_next_test").target == "grz_empty" + assert not result.timeout target.reset_mock() - # landing page not served - server.serve_testcase.return_value = (SERVED_REQUEST, ["harness"]) - runner.run([], ServerMap(), testcase) - assert runner.result == runner.ERROR - assert runner.served + testcase.reset_mock() + # single run/iteration relaunch (idle exit) + target.is_idle.return_value = True + runner = Runner(server, target, relaunch=1) + assert runner._relaunch == 1 + result = runner.run([], ServerMap(), testcase) + assert result.attempted + assert target.close.call_count == 1 + assert target.monitor.is_healthy.call_count > 0 + target.reset_mock() + testcase.reset_mock() + # multiple runs/iterations relaunch (is_healthy exit) + runner = Runner(server, target, relaunch=3) + target.monitor.is_healthy.return_value = False + for _ in range(2): + smap = ServerMap() + result = runner.run([], smap, testcase) + assert result.attempted + assert target.close.call_count == 0 + assert target.monitor.is_healthy.call_count == 0 + assert result.status == Result.NONE + assert result.served == serv_files + assert not result.timeout + assert not smap.dynamic + assert "grz_next_test" not in smap.redirect + smap = ServerMap() + result = runner.run([], smap, testcase) + assert runner._tests_run == 3 + assert result.attempted + assert target.close.call_count == 1 + assert target.is_idle.call_count == 0 + assert target.monitor.is_healthy.call_count == 1 + assert result.status == Result.NONE + assert result.served == serv_files + assert not smap.dynamic + assert smap.redirect.get("grz_next_test").target == "grz_empty" + + +@mark.parametrize( + "srv_result, served", + [ + # no files served + (Served.NONE, []), + # landing page not served + (Served.REQUEST, ["harness"]), + ], +) +def test_runner_03(mocker, srv_result, served): + """test Runner() errors""" + server = mocker.Mock(spec_set=Sapphire) + server.serve_path.return_value = (srv_result, served) + target = mocker.Mock(spec_set=Target) + target.check_result.return_value = Result.NONE + testcase = mocker.Mock(spec_set=TestCase, landing_page="x", optional=[]) + runner = Runner(server, target) + result = runner.run([], ServerMap(), testcase) + assert runner.initial + assert runner.startup_failure + assert result.status == Result.NONE + assert not result.attempted + assert set(result.served) == set(served) + assert not result.timeout assert target.close.call_count == 1 -def test_runner_03(mocker): + +@mark.parametrize( + "ignore, status, idle, check_result", + [ + # detect a hang + (["memory"], Result.FOUND, False, 1), + # ignore a hang + (["timeout"], Result.IGNORED, False, 0), + # ignore idle hang + ([], Result.IGNORED, True, 0), + ], +) +def test_runner_04(mocker, ignore, status, idle, check_result): """test reporting timeout""" - server = mocker.Mock(spec=Sapphire) - target = mocker.Mock(spec=Target) + server = mocker.Mock(spec_set=Sapphire) + target = mocker.Mock(spec_set=Target) + testcase = mocker.Mock(spec_set=TestCase, landing_page="a.bin", optional=[]) serv_files = ["a.bin", "/another/file.bin"] - server.serve_testcase.return_value = (SERVED_TIMEOUT, serv_files) - runner = Runner(server, target) - target.detect_failure.return_value = target.RESULT_FAILURE - runner.run([], ServerMap(), mocker.Mock(spec=TestCase, landing_page="x")) - assert runner.result == runner.FAILED - assert runner.served == serv_files - assert runner.timeout + server.serve_path.return_value = (Served.TIMEOUT, serv_files) + target.check_result.return_value = Result.FOUND + target.handle_hang.return_value = idle + target.monitor.is_healthy.return_value = False + runner = Runner(server, target, relaunch=1) + serv_map = ServerMap() + result = runner.run(ignore, serv_map, testcase) + assert result.status == status + assert result.served == serv_files + assert result.timeout + assert "grz_empty" not in serv_map.dynamic + assert target.check_result.call_count == check_result + assert target.handle_hang.call_count == 1 + -def test_runner_04(mocker): +@mark.parametrize( + "served, attempted, target_result, status", + [ + # FAILURE + (["a.bin"], True, Result.FOUND, Result.FOUND), + # IGNORED + (["a.bin"], True, Result.IGNORED, Result.IGNORED), + # failure before serving landing page + (["harness"], False, Result.FOUND, Result.FOUND), + ], +) +def test_runner_05(mocker, served, attempted, target_result, status): """test reporting failures""" - server = mocker.Mock(spec=Sapphire) - target = mocker.Mock(spec=Target) - serv_files = ["file.bin"] - server.serve_testcase.return_value = (SERVED_REQUEST, serv_files) - testcase = mocker.Mock(spec=TestCase, landing_page=serv_files[0]) + server = mocker.Mock(spec_set=Sapphire) + server.serve_path.return_value = (Served.REQUEST, served) + target = mocker.Mock(spec_set=Target, launch_timeout=10) + target.check_result.return_value = target_result + target.monitor.is_healthy.return_value = False + testcase = mocker.Mock(spec_set=TestCase, landing_page="a.bin", optional=[]) runner = Runner(server, target) - # test FAILURE - target.detect_failure.return_value = target.RESULT_FAILURE - runner.run([], ServerMap(), testcase) - assert runner.result == runner.FAILED - assert runner.served == serv_files - assert not runner.timeout - # test IGNORED - target.detect_failure.return_value = target.RESULT_IGNORED - runner.run([], ServerMap(), testcase) - assert runner.result == runner.IGNORED - assert runner.served == serv_files - assert not runner.timeout - # failure before serving landing page - server.serve_testcase.return_value = (SERVED_REQUEST, ["harness"]) - target.detect_failure.return_value = target.RESULT_FAILURE - runner.run([], ServerMap(), testcase) - assert runner.result == runner.FAILED - assert runner.served - assert not runner.timeout - -def test_runner_05(mocker): + runner.launch("http://a/") + result = runner.run([], ServerMap(), testcase) + assert result.attempted == attempted + assert result.status == status + assert not result.timeout + assert target.handle_hang.call_count == 0 + assert target.close.call_count == 1 + + +def test_runner_06(mocker): """test Runner() with idle checking""" - server = mocker.Mock(spec=Sapphire) - target = mocker.Mock(spec=Target) - target.detect_failure.return_value = target.RESULT_NONE + server = mocker.Mock(spec_set=Sapphire) + target = mocker.Mock(spec_set=Target) + target.check_result.return_value = Result.NONE serv_files = ["/fake/file", "/another/file.bin"] - server.serve_testcase.return_value = (SERVED_REQUEST, serv_files) - runner = Runner(server, target, idle_threshold=0.01, idle_delay=0.01) + server.serve_path.return_value = (Served.ALL, serv_files) + runner = Runner(server, target, idle_threshold=0.01, idle_delay=0.01, relaunch=10) assert runner._idle is not None - runner.run([], ServerMap(), mocker.Mock(spec=TestCase, landing_page=serv_files[0])) - assert runner.result == runner.COMPLETE + result = runner.run( + [], + ServerMap(), + mocker.Mock(spec_set=TestCase, landing_page=serv_files[0], optional=[]), + ) + assert result.status == Result.NONE + assert result.attempted assert target.close.call_count == 0 -def test_runner_06(mocker): + +def test_runner_07(mocker): """test Runner._keep_waiting()""" - server = mocker.Mock(spec=Sapphire) - target = mocker.Mock(spec=Target) + server = mocker.Mock(spec_set=Sapphire) + target = mocker.Mock(spec_set=Target) target.monitor.is_healthy.return_value = True runner = Runner(server, target) @@ -131,7 +244,7 @@ def test_runner_06(mocker): target.monitor.is_healthy.return_value = False assert not runner._keep_waiting() - runner._idle = mocker.Mock(spec=_IdleChecker) + runner._idle = mocker.Mock(spec_set=_IdleChecker) runner._idle.is_idle.return_value = False target.monitor.is_healthy.return_value = True assert runner._keep_waiting() @@ -144,7 +257,8 @@ def test_runner_06(mocker): target.monitor.is_healthy.return_value = False assert not runner._keep_waiting() -def test_runner_07(): + +def test_runner_08(): """test Runner.location()""" result = Runner.location("a.html", 34567) assert result == "http://127.0.0.1:34567/a.html" @@ -152,70 +266,89 @@ def test_runner_07(): assert result == "http://127.0.0.1:34567/a.html" result = Runner.location("a.html", 34567, close_after=10) assert result == "http://127.0.0.1:34567/a.html?close_after=10" - result = Runner.location("a.html", 34567, close_after=10, forced_close=False) - assert result == "http://127.0.0.1:34567/a.html?close_after=10&forced_close=0" - result = Runner.location("a.html", 34567, forced_close=False) - assert result == "http://127.0.0.1:34567/a.html?forced_close=0" - result = Runner.location("a.html", 9999, close_after=10, forced_close=False, timeout=60) - assert result == "http://127.0.0.1:9999/a.html?close_after=10&forced_close=0&timeout=60000" - -def test_runner_08(mocker): - """test Runner.launch()""" - server = mocker.Mock(spec=Sapphire, port=0x1337) - target = mocker.Mock(spec=Target) + result = Runner.location("a.html", 9999, time_limit=60) + assert result == "http://127.0.0.1:9999/a.html?time_limit=60000" + result = Runner.location("a.html", 9999, close_after=10, time_limit=60) + assert result == "http://127.0.0.1:9999/a.html?close_after=10&time_limit=60000" + result = Runner.location("a.html", 9999, post_launch_delay=10) + assert result == "http://127.0.0.1:9999/a.html?post_launch_delay=10" + +def test_runner_09(mocker): + """test Runner.launch()""" + server = mocker.Mock(spec_set=Sapphire, port=0x1337) + target = mocker.Mock(spec_set=Target, launch_timeout=30) runner = Runner(server, target) + # successful launch + runner._tests_run = 1 runner.launch("http://a/") + assert runner._tests_run == 0 assert target.launch.call_count == 1 target.reset_mock() - - target.launch.side_effect = TargetLaunchError - with raises(TargetLaunchError): + # target launch error + target.launch.side_effect = TargetLaunchError("test", mocker.Mock(spec_set=Report)) + with raises(TargetLaunchError, match="test"): runner.launch("http://a/") - assert target.launch.call_count == 1 + assert target.launch.call_count == 3 target.reset_mock() - + # target launch timeout target.launch.side_effect = TargetLaunchTimeout with raises(TargetLaunchTimeout): runner.launch("http://a/", max_retries=3) assert target.launch.call_count == 3 -def test_runner_09(mocker, tmp_path): + +def test_runner_10(mocker, tmp_path): """test Runner.run() adding includes to testcase""" - server = mocker.Mock(spec=Sapphire) - target = mocker.Mock(spec=Target) - target.detect_failure.return_value = target.RESULT_NONE - runner = Runner(server, target) + server = mocker.Mock(spec_set=Sapphire) + target = mocker.Mock(spec_set=Target) + target.check_result.return_value = Result.NONE + runner = Runner(server, target, relaunch=10) # create test files - inc_path1 = (tmp_path / "include") + inc_path1 = tmp_path / "include" inc_path1.mkdir() - inc1 = (inc_path1 / "inc_file.bin") + inc1 = inc_path1 / "inc_file.bin" inc1.write_bytes(b"a") (inc_path1 / "nested").mkdir() - inc2 = (inc_path1 / "nested" / "nested_inc.bin") + inc2 = inc_path1 / "nested" / "nested_inc.bin" inc2.write_bytes(b"a") - inc_path2 = (tmp_path / "include2") + inc_path2 = tmp_path / "include2" inc_path2.mkdir() - inc3 = (inc_path2 / "inc_file3.txt") + inc3 = inc_path2 / "inc_file3.txt" inc3.write_bytes(b"a") # build server map smap = ServerMap() smap.set_include("/", str(inc_path1)) smap.set_include("/test", str(inc_path2)) serv_files = ["a.b", str(inc1), str(inc2), str(inc3)] - server.serve_testcase.return_value = (SERVED_ALL, serv_files) + server.serve_path.return_value = (Served.ALL, serv_files) with TestCase("a.b", "x", "x") as tcase: - runner.run([], smap, tcase) - assert runner.result == runner.COMPLETE - assert "inc_file.bin" in tcase._existing_paths - assert pathjoin("nested", "nested_inc.bin") in tcase._existing_paths - assert pathjoin("test", "inc_file3.txt") in tcase._existing_paths + result = runner.run([], smap, tcase) + assert result.attempted + assert result.status == Result.NONE + assert "inc_file.bin" in tcase.contents + assert "nested/nested_inc.bin" in tcase.contents + assert "test/inc_file3.txt" in tcase.contents + + +def test_runner_11(mocker): + """test Runner.post_launch()""" + server = mocker.Mock(spec_set=Sapphire, timeout=1) + target = mocker.Mock(spec_set=Target, launch_timeout=30) + runner = Runner(server, target) + # successful launch + runner.launch("http://a/") + runner.post_launch(delay=10) + assert target.launch.call_count == 1 + assert server.timeout == 1 + assert server.serve_path.call_count == 1 + def test_idle_check_01(mocker): """test simple _IdleChecker""" fake_time = mocker.patch("grizzly.common.runner.time", autospec=True) - ichk = _IdleChecker(mocker.Mock(), 100, 10, poll_delay=1) - assert ichk._threshold == 100 + ichk = _IdleChecker(mocker.Mock(), 95, 10, poll_delay=1) + assert ichk._threshold == 95 assert ichk._init_delay == 10 assert ichk._poll_delay == 1 assert ichk._next_poll is None @@ -225,13 +358,13 @@ def test_idle_check_01(mocker): ichk.schedule_poll() assert ichk._next_poll == 1 + def test_idle_check_02(mocker): """test _IdleChecker.is_idle()""" fake_time = mocker.patch("grizzly.common.runner.time", autospec=True) callbk = mocker.Mock() callbk.return_value = False - #check_cb, delay, duration, threshold - ichk = _IdleChecker(callbk, 100, 10, poll_delay=1) + ichk = _IdleChecker(callbk, 99, 10, poll_delay=1) fake_time.return_value = 0 ichk.schedule_poll() # early check diff --git a/grizzly/common/test_stack_hasher.py b/grizzly/common/test_stack_hasher.py index 5255c2a0..9085ec35 100644 --- a/grizzly/common/test_stack_hasher.py +++ b/grizzly/common/test_stack_hasher.py @@ -5,7 +5,7 @@ import pytest -from .stack_hasher import Stack, StackFrame +from .stack_hasher import Mode, Stack, StackFrame def test_stack_01(): @@ -15,6 +15,7 @@ def test_stack_01(): assert isinstance(stack.frames, list) assert stack._major_depth > 0 # pylint: disable=protected-access + def test_stack_02(): """test creating a Stack with 1 frame""" frames = [StackFrame(function="a", location="b", offset="c", stack_line="0")] @@ -30,11 +31,14 @@ def test_stack_02(): assert "location: 'b'" in output assert "offset: 'c'" in output + def test_stack_03(): """test creating a Stack with 2 frames""" frames = list() for _ in range(2): - frames.append(StackFrame(function="a", location="b", offset="c", stack_line="0")) + frames.append( + StackFrame(function="a", location="b", offset="c", stack_line="0") + ) stack = Stack(frames=frames, major_depth=2) assert stack.minor is not None assert stack.major is not None @@ -43,11 +47,14 @@ def test_stack_03(): assert stack.minor != stack.major assert len(stack.frames) == 2 + def test_stack_04(): """test creating a Stack with 2 frames with a major depth of 0""" frames = list() for line in range(2): - frames.append(StackFrame(function="a", location="b", offset="c", stack_line=str(line))) + frames.append( + StackFrame(function="a", location="b", offset="c", stack_line=str(line)) + ) stack = Stack(frames=frames, major_depth=0) assert stack.minor is not None assert stack.major is None @@ -56,11 +63,14 @@ def test_stack_04(): assert stack.minor != stack.major assert len(stack.frames) == 2 + def test_stack_05(): """test creating a Stack with 10 frames exceeding major depth""" frames = list() for line in range(10): - frames.append(StackFrame(function="a", location="b", offset="c", stack_line=str(line))) + frames.append( + StackFrame(function="a", location="b", offset="c", stack_line=str(line)) + ) stack = Stack(frames=frames, major_depth=5) assert stack.minor is not None assert stack.major is not None @@ -70,54 +80,71 @@ def test_stack_05(): assert len(stack.frames) == 10 assert stack.major != Stack(frames=frames, major_depth=4).major + def test_stack_06(): """test creating a Stack by calling from_text()""" - input_txt = "" \ - "=================================================================\n" \ - "==7854==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x7fcca620e0ee bp 0x7ffd946b2690 sp 0x7ffd946b25c0 T0)\n" \ - "==7854==blah.\n" \ - "==7854==Hint: blah.\n" \ - " #0 0x7fcca620e0ed in test::test::test(nsIWa*, nsICa*) /aa/bb/cc/dd/ee/ff/gg/asdf.cpp:5533:14\n" \ - " #1 0x7fcca3497201 in nsAs::TestTest(mz::dom::El*, int) /aa/bb/cc/dd/ee/ff/nsFilea.cpp:13733:3\n" \ - " #2 0x7fcca3495b9b in asdf::fasd() /aa/bb/cc/dd/ee/ff/base/nsFileb.cpp:11674:21\n" \ - " #3 0x7fcca02eb9d8 in nsAasd::PrNeEv(bool, bool*) /aa/bb/cc/dd/ee/xpcom/nsFilec.cpp:1396:14\n" \ - " #4 0x7fcca0307d40 in R_PNE(asd*, bool) /aa/bb/cc/dd/ee/xpcom/threads/asf.cpp:657:10\n" \ - " #5 0x2a780b25f65a ()\n" \ - "\n" \ - "AddressSanitizer can not provide additional info.\n" \ - "SUMMARY: AddressSanitizer: SEGV /aa/bb/cc/dd/ee/ff/asdf.cpp:5533:14 in test::test::test(nsIWa*, nsICa*)\n" \ + input_txt = ( + "=================================================================\n" + "==7854==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000" + " (pc 0x7fcca620e0ee bp 0x7ffd946b2690 sp 0x7ffd946b25c0 T0)\n" + "==7854==blah.\n" + "==7854==Hint: blah.\n" + " #0 0x7fcca620e0ed in test::test::test(nsIWa*, nsICa*)" + " /aa/bb/cc/dd/ee/ff/gg/asdf.cpp:5533:14\n" + " #1 0x7fcca3497201 in nsAs::TestTest(mz::dom::El*, int)" + " /aa/bb/cc/dd/ee/ff/nsFilea.cpp:13733:3\n" + " #2 0x7fcca3495b9b in asdf::fasd()" + " /aa/bb/cc/dd/ee/ff/base/nsFileb.cpp:11674:21\n" + " #3 0x7fcca02eb9d8 in nsAasd::PrNeEv(bool, bool*)" + " /aa/bb/cc/dd/ee/xpcom/nsFilec.cpp:1396:14\n" + " #4 0x7fcca0307d40 in R_PNE(asd*, bool)" + " /aa/bb/cc/dd/ee/xpcom/threads/asf.cpp:657:10\n" + " #5 0x2a780b25f65a ()\n" + "\n" + "AddressSanitizer can not provide additional info.\n" + "SUMMARY: AddressSanitizer: SEGV /aa/bb/cc/dd/ee/ff/asdf.cpp:5533:14" + " in test::test::test(nsIWa*, nsICa*)\n" "==7854==ABORTING\n" + ) stack = Stack.from_text(input_txt) assert len(stack.frames) == 6 assert stack.minor != stack.major - assert stack.frames[0].mode == StackFrame.MODE_SANITIZER + assert stack.frames[0].mode == Mode.SANITIZER + def test_stack_07(): """test creating a Stack by calling from_text() with mixed frames modes""" - input_txt = "" \ - " #0 0x4d2cde in a_b_c /a/lib/info.c:392:12\n" \ - " #1 0x491e82 in main /a/b/d_e.c:128:8\n" \ - " #2 0x7f090384582f in __libc_start_main /build/glibc-glibc-2.23/csu/../csu/libc-start.c:291\n" \ - "#2 0x0000000000400545 in gdb_frame ()\n" \ - " #3 0x41b228 in _start (bin_name+0x41b228)\n" + input_txt = ( + "" + " #0 0x4d2cde in a_b_c /a/lib/info.c:392:12\n" + " #1 0x491e82 in main /a/b/d_e.c:128:8\n" + " #2 0x7f090384582f in __libc_start_main /build/glibc-glibc-2.23/csu/" + "../csu/libc-start.c:291\n" + "#2 0x0000000000400545 in gdb_frame ()\n" + " #3 0x41b228 in _start (bin_name+0x41b228)\n" + ) stack = Stack.from_text(input_txt) assert len(stack.frames) == 4 assert stack.minor != stack.major - assert stack.frames[0].mode == StackFrame.MODE_SANITIZER + assert stack.frames[0].mode == Mode.SANITIZER + def test_stack_08(): """test creating a Stack by calling from_text() with text containing 2 stacks""" - input_txt = "" \ - " #0 0x0bad0bad in bad::frame0(nsA*, nsB*) /aa/a.cpp:12:1\n" \ - " #1 0x0bad0bad in bad::frame1(mz::d::EE*, int) /aa/a.cpp:12:1\n" \ - " #0 0x1badf00d in good::frame0(nsA*, nsB*) /aa/a.cpp:12:1\n" \ + input_txt = ( + "" + " #0 0x0bad0bad in bad::frame0(nsA*, nsB*) /aa/a.cpp:12:1\n" + " #1 0x0bad0bad in bad::frame1(mz::d::EE*, int) /aa/a.cpp:12:1\n" + " #0 0x1badf00d in good::frame0(nsA*, nsB*) /aa/a.cpp:12:1\n" " #1 0xdeadbeef in good::frame1(mz::d::EE*, int) /aa/a.cpp:12:1\n" + ) stack = Stack.from_text(input_txt) assert len(stack.frames) == 2 assert stack.frames[0].function == "good::frame0" assert stack.frames[1].function == "good::frame1" assert stack.minor != stack.major - assert stack.frames[0].mode == StackFrame.MODE_SANITIZER + assert stack.frames[0].mode == Mode.SANITIZER + def test_stack_09(): """test creating a Stack by calling from_text() with empty string""" @@ -126,27 +153,33 @@ def test_stack_09(): assert stack.minor is None assert stack.major is None + def test_stack_10(): """test creating a Stack from a Sanitizer trace with an unsymbolized lib""" - input_txt = "" \ - " #0 0x4c7702 in realloc asan/asan_malloc_linux.cc:107:3\n" \ - " #1 0x7f6d056ce7fc (/lib/x86_64-linux-gnu/libdbus-1.so.3+0x2d7fc)\n" \ + input_txt = ( + "" + " #0 0x4c7702 in realloc asan/asan_malloc_linux.cc:107:3\n" + " #1 0x7f6d056ce7fc (/lib/x86_64-linux-gnu/libdbus-1.so.3+0x2d7fc)\n" " #2 0x7ffffffff ()\n" + ) stack = Stack.from_text(input_txt) assert len(stack.frames) == 3 assert stack.frames[0].location == "asan_malloc_linux.cc" assert stack.frames[1].location == "libdbus-1.so.3" assert stack.frames[2].location == "" assert stack.minor != stack.major - assert stack.frames[0].mode == StackFrame.MODE_SANITIZER + assert stack.frames[0].mode == Mode.SANITIZER + def test_stack_11(): """test creating a Stack from a Sanitizer trace with an unsymbolized lib""" - input_txt = "" \ - " #0 0x90000223 (/usr/swr_a.so+0x231223)\n" \ - " #1 0x00000447 (/usr/as.so.1+0x42447)\n" \ - " #2 0x000098fc in fSasd /src/obj-firefox/dist/include/something.h:102:9\n" \ + input_txt = ( + "" + " #0 0x90000223 (/usr/swr_a.so+0x231223)\n" + " #1 0x00000447 (/usr/as.so.1+0x42447)\n" + " #2 0x000098fc in fSasd /src/obj-firefox/dist/include/something.h:102:9\n" " #3 0x000098fc in mz::as::asdf::SB() /src/Blah.cpp:655\n" + ) stack = Stack.from_text(input_txt) assert len(stack.frames) == 4 assert stack.frames[0].location == "swr_a.so" @@ -154,20 +187,23 @@ def test_stack_11(): assert stack.frames[2].function == "fSasd" assert stack.frames[3].function == "mz::as::asdf::SB" assert stack.minor != stack.major - assert stack.frames[0].mode == StackFrame.MODE_SANITIZER + assert stack.frames[0].mode == Mode.SANITIZER + def test_stack_12(): """test creating a Stack from a Valgrind trace""" - input_txt = "" \ - "==4754== \n" \ - "==4754== Use of uninitialised value of size 8\n" \ - "==4754== at 0x45C6C0: FooBar (decode.c:964)\n" \ - "==4754== by 0x462A20: main (test.cc:71)\n" \ - "==4754== Uninitialised value was created by a heap allocation\n" \ - "==4754== at 0x4C2AB80: malloc (in /usr/lib/test-linux.so)\n" \ - "==4754== by 0x459455: FooBar (decode.c:757)\n" \ - "==4754== by 0x462A20: main (test.cc:71)\n" \ + input_txt = ( + "" + "==4754== \n" + "==4754== Use of uninitialised value of size 8\n" + "==4754== at 0x45C6C0: FooBar (decode.c:964)\n" + "==4754== by 0x462A20: main (test.cc:71)\n" + "==4754== Uninitialised value was created by a heap allocation\n" + "==4754== at 0x4C2AB80: malloc (in /usr/lib/test-linux.so)\n" + "==4754== by 0x459455: FooBar (decode.c:757)\n" + "==4754== by 0x462A20: main (test.cc:71)\n" "==4754== \n" + ) stack = Stack.from_text(input_txt) assert len(stack.frames) == 5 assert stack.frames[0].location == "decode.c" @@ -176,52 +212,96 @@ def test_stack_12(): assert stack.frames[3].function == "FooBar" assert stack.frames[4].function == "main" assert stack.minor != stack.major - assert stack.frames[0].mode == StackFrame.MODE_VALGRIND + assert stack.frames[0].mode == Mode.VALGRIND + def test_stack_13(): """test creating a Stack from Rust trace""" - input_txt = "" \ - "thread '' panicked at 'Invoking Servo_Element_IsDisplayContents on unstyled element', libcore/option.rs:917:5\n" \ - "stack backtrace:\n" \ - " 0: 0x7ff1c65e93d3 - std::sys::unix::backtrace::tracing::imp::unwind_backtrace::h09c1ee131a74b1c4\n" \ - " at libstd/sys/unix/backtrace/tracing/gcc_s.rs:49\n" \ - " 1: 0x7ff1c65e81c4 - std::panicking::default_hook::{{closure}}::h945a649c9017832e\n" \ - " at libstd/sys_common/backtrace.rs:71\n" \ - " at libstd/sys_common/backtrace.rs:59\n" \ - " at libstd/panicking.rs:380\n" \ - " 2: 0x7ff1c65e7457 - std::panicking::default_hook::hcc534c2d30fbcda3\n" \ - " at libstd/panicking.rs:396\n" \ - " 3: 0x7ff1c65e6de7 - std::panicking::rust_panic_with_hook::h09a7a3a353dc2f38\n" \ - " at libstd/panicking.rs:576\n" \ - " 4: 0x7ff1c65e6c95 - std::panicking::begin_panic::h8327f16bde15df70\n" \ - " at libstd/panicking.rs:537\n" \ - " 5: 0x7ff1c65e6c29 - std::panicking::begin_panic_fmt::h42ff1d37404632d6\n" \ - " at libstd/panicking.rs:521\n" \ - " 6: 0x7ff1c65fa46a - core::panicking::panic_fmt::h0bd854df201d1baf\n" \ - " at libstd/panicking.rs:497\n" \ - " 7: 0x7ff1c65ffba8 - core::option::expect_failed::hfa0c8a51e07f7adc\n" \ - " at libcore/option.rs:917\n" \ - " 8: 0x7ff1c632d473 - Servo_Element_IsDisplayContents\n" \ - " at /checkout/src/libcore/option.rs:302\n" \ - " at servo/ports/geckolib/glue.rs:1086\n" \ - " 9: 0x7f44064ee749 - _ZNK7nsFrame24DoGetParentComputedStyleEPP8nsIFrame\n" \ - " at /builds/worker/workspace/build/src/layout/generic/nsFrame.cpp:9893\n" \ - " 10: 0x7f4406229749 - _ZN7mozilla14RestyleManager35DoReparentComputedStyleForFirstLineEP8nsIFrameRNS_13ServoStyleSetE\n" \ - " at /builds/worker/workspace/build/src/layout/base/RestyleManager.cpp:3407\n" \ - " 11: 0x7f440622a0a5 - _ZN7mozilla14RestyleManager24ReparentFrameDescendantsEP8nsIFrameS2_RNS_13ServoStyleSetE\n" \ - " at /builds/worker/workspace/build/src/layout/base/RestyleManager.cpp:3538\n" + input_txt = ( + "thread '' panicked at 'Invoking Servo_Element_IsDisplayContents" + " on unstyled element', libcore/option.rs:917:5\n" + "stack backtrace:\n" + " 0: 0x7ff1c65e93d3 - std::sys::unix::backtrace::tracing::imp::unwind_bac" + "ktrace::h09c1ee131a74b1c4\n" + " at libstd/sys/unix/backtrace/tracing/gcc_s.rs:4" + "9\n" + " 1: 0x7ff1c65e81c4 - std::panicking::default_hook::{{closure}}::h945a649" + "c9017832e\n" + " at libstd/sys_common/backtrace.rs:71\n" + " at libstd/sys_common/backtrace.rs:59\n" + " at libstd/panicking.rs:380\n" + " 2: 0x7ff1c65e7457 - std::panicking::default_hook::hcc534c2d30fbcda3\n" + " at libstd/panicking.rs:396\n" + " 3: 0x7ff1c65e6de7 - std::panicking::rust_panic_with_hook::h09a7a3a353dc" + "2f38\n" + " at libstd/panicking.rs:576\n" + " 4: 0x7ff1c65e6c95 - std::panicking::begin_panic::h8327f16bde15df70\n" + " at libstd/panicking.rs:537\n" + " 5: 0x7ff1c65e6c29 - std::panicking::begin_panic_fmt::h42ff1d3740463d6\n" + " at libstd/panicking.rs:521\n" + " 6: 0x7ff1c65fa46a - core::panicking::panic_fmt::h0bd854df201d1baf\n" + " at libstd/panicking.rs:497\n" + " 7: 0x7ff1c65ffba8 - core::option::expect_failed::hfa0c8a51e07f7adc\n" + " at libcore/option.rs:917\n" + " 8: 0x7ff1c632d473 - Servo_Element_IsDisplayContents\n" + " at /checkout/src/libcore/option.rs:302\n" + " at servo/ports/geckolib/glue.rs:1086\n" + " 9: 0x7f44064ee749 - _ZNK7nsFrame24DoGetParentComputedStyleEPPnsIFrame\n" + " at /builds/worker/workspace/build/src/layout/g" + "eneric/nsFrame.cpp:9893\n" + " 10: 0x7f4406229749 - _ZN7mozilla14RestyleManager35DoReparentComputedSty" + "leForFirstLineEP8nsIFrameRNS_13ServoStyleSetE\n" + " at /builds/worker/workspace/build/src/layout/b" + "ase/RestyleManager.cpp:3407\n" + " 11: 0x7f440622a0a5 - _ZN7mozilla14RestyleManager24ReparentFrameDescenda" + "ntsEP8nsIFrameS2_RNS_13ServoStyleSetE\n" + " at /builds/worker/workspace/build/src/layout/b" + "ase/RestyleManager.cpp:3538\n" + ) stack = Stack.from_text(input_txt) assert len(stack.frames) == 12 - assert stack.frames[0].function == "std::sys::unix::backtrace::tracing::imp::unwind_backtrace" + assert ( + stack.frames[0].function + == "std::sys::unix::backtrace::tracing::imp::unwind_backtrace" + ) assert stack.frames[8].function == "Servo_Element_IsDisplayContents" assert stack.minor != stack.major - assert stack.frames[0].mode == StackFrame.MODE_RUST + assert stack.frames[0].mode == Mode.RUST + + +def test_stack_14(): + """test Stack.height_limit""" + frames = list() + for num in range(10): + frames.append( + StackFrame(function=str(num), location="b", offset="c", stack_line=str(num)) + ) + stack = Stack(frames=frames, major_depth=3) + assert stack.height_limit is None + no_lim_minor = stack.minor + assert no_lim_minor is not None + no_lim_major = stack.major + assert no_lim_major is not None + # set height limit and check hash recalculations + stack.height_limit = 5 + assert stack.height_limit == 5 + assert stack.minor is not None + assert no_lim_minor != stack.minor + assert stack.major is not None + assert no_lim_major != stack.major + # remove height limit and check hash recalculations + stack.height_limit = None + assert stack.height_limit is None + assert no_lim_minor == stack.minor + assert no_lim_major == stack.major + def test_stackframe_01(): """test creating an empty StackFrame""" stack = StackFrame() assert not stack.__str__() + def test_stackframe_02(): """test creating a StackFrame from junk""" assert StackFrame.from_line("#0 ") is None @@ -239,50 +319,66 @@ def test_stackframe_02(): assert StackFrame.from_line("==123==") is None assert StackFrame.from_line("==1== by 0x0: a ()") is None + def test_sanitizer_stackframe_01(): """test creating a StackFrame from a line with symbols""" - frame = StackFrame.from_line(" #1 0x7f00dad60565 in Abort(char const*) /blah/base/nsDebugImpl.cpp:472") + frame = StackFrame.from_line( + " #1 0x7f00dad60565 in Abort(char const*) /blah/base/nsDebugImpl.cpp:472" + ) assert frame.stack_line == "1" assert frame.function == "Abort" assert frame.location == "nsDebugImpl.cpp" assert frame.offset == "472" - assert frame.mode == StackFrame.MODE_SANITIZER + assert frame.mode == Mode.SANITIZER + def test_sanitizer_stackframe_02(): """test creating a StackFrame from a line with symbols""" - frame = StackFrame.from_line(" #36 0x48a6e4 in main /app/nsBrowserApp.cpp:399:11") + frame = StackFrame.from_line( + " #36 0x48a6e4 in main /app/nsBrowserApp.cpp:399:11" + ) assert frame.stack_line == "36" assert frame.function == "main" assert frame.location == "nsBrowserApp.cpp" assert frame.offset == "399" - assert frame.mode == StackFrame.MODE_SANITIZER + assert frame.mode == Mode.SANITIZER + def test_sanitizer_stackframe_03(): """test creating a StackFrame from a line without symbols""" - frame = StackFrame.from_line(" #1 0x7f00ecc1b33f (/lib/x86_64-linux-gnu/libpthread.so.0+0x1033f)") + frame = StackFrame.from_line( + " #1 0x7f00ecc1b33f (/lib/x86_64-linux-gnu/libpthread.so.0+0x1033f)" + ) assert frame.stack_line == "1" assert frame.function is None assert frame.location == "libpthread.so.0" assert frame.offset == "0x1033f" - assert frame.mode == StackFrame.MODE_SANITIZER + assert frame.mode == Mode.SANITIZER + def test_sanitizer_stackframe_04(): """test creating a StackFrame from a line with symbols""" - frame = StackFrame.from_line(" #25 0x7f0155526181 in start_thread (/l/libpthread.so.0+0x8181)") + frame = StackFrame.from_line( + " #25 0x7f0155526181 in start_thread (/l/libpthread.so.0+0x8181)" + ) assert frame.stack_line == "25" assert frame.function == "start_thread" assert frame.location == "libpthread.so.0" assert frame.offset == "0x8181" - assert frame.mode == StackFrame.MODE_SANITIZER + assert frame.mode == Mode.SANITIZER + def test_sanitizer_stackframe_05(): """test creating a StackFrame from a line with angle brackets""" - frame = StackFrame.from_line(" #123 0x7f30afea9148 in Call /a/b.cpp:356:50") + frame = StackFrame.from_line( + " #123 0x7f30afea9148 in Call /a/b.cpp:356:50" + ) assert frame.stack_line == "123" assert frame.function == "Call" assert frame.location == "b.cpp" assert frame.offset == "356" - assert frame.mode == StackFrame.MODE_SANITIZER + assert frame.mode == Mode.SANITIZER + def test_sanitizer_stackframe_06(): """test creating a StackFrame from a useless frame""" @@ -291,34 +387,44 @@ def test_sanitizer_stackframe_06(): assert frame.function is None assert frame.location == "" assert frame.offset is None - assert frame.mode == StackFrame.MODE_SANITIZER + assert frame.mode == Mode.SANITIZER + def test_sanitizer_stackframe_07(): """test creating a StackFrame from a line missing a function""" - frame = StackFrame.from_line(" #0 0x7f0d571e04bd /a/glibc-2.23/../syscall-template.S:84") + frame = StackFrame.from_line( + " #0 0x7f0d571e04bd /a/glibc-2.23/../syscall-template.S:84" + ) assert frame.stack_line == "0" assert frame.function is None assert frame.location == "syscall-template.S" assert frame.offset == "84" - assert frame.mode == StackFrame.MODE_SANITIZER + assert frame.mode == Mode.SANITIZER + def test_sanitizer_stackframe_08(): """test creating a StackFrame from a line with lots of spaces""" - frame = StackFrame.from_line(" #0 0x48a6e4 in Call /test path/file name.c:1:2") + frame = StackFrame.from_line( + " #0 0x48a6e4 in Call /test path/file name.c:1:2" + ) assert frame.stack_line == "0" assert frame.function == "Call" assert frame.location == "file name.c" assert frame.offset == "1" - assert frame.mode == StackFrame.MODE_SANITIZER + assert frame.mode == Mode.SANITIZER + def test_gdb_stackframe_01(): """test creating a StackFrame from a GDB line with symbols""" - frame = StackFrame.from_line("#0 __memmove_ssse3_back () at ../d/x86_64/a/memcpy-ssse3-back.S:1654") + frame = StackFrame.from_line( + "#0 __memmove_ssse3_back () at ../d/x86_64/a/memcpy-ssse3-back.S:1654" + ) assert frame.stack_line == "0" assert frame.function == "__memmove_ssse3_back" assert frame.location == "memcpy-ssse3-back.S" assert frame.offset == "1654" - assert frame.mode == StackFrame.MODE_GDB + assert frame.mode == Mode.GDB + def test_gdb_stackframe_02(): """test creating a StackFrame from a GDB line with symbols but no line numbers""" @@ -327,7 +433,8 @@ def test_gdb_stackframe_02(): assert frame.function == "main" assert frame.location is None assert frame.offset is None - assert frame.mode == StackFrame.MODE_GDB + assert frame.mode == Mode.GDB + def test_gdb_stackframe_03(): """test creating a StackFrame from a GDB line with symbols""" @@ -336,16 +443,20 @@ def test_gdb_stackframe_03(): assert frame.function == "main" assert frame.location == "test.c" assert frame.offset == "5" - assert frame.mode == StackFrame.MODE_GDB + assert frame.mode == Mode.GDB + def test_minidump_stackframe_01(): """test creating a StackFrame from a Minidump line with symbols""" - frame = StackFrame.from_line("0|2|libtest|main|hg:c.a.org/m-c:a/b/file.cpp:5bf50|114|0x3a") + frame = StackFrame.from_line( + "0|2|libtest|main|hg:c.a.org/m-c:a/b/file.cpp:5bf50|114|0x3a" + ) assert frame.stack_line == "2" assert frame.function == "main" assert frame.location == "file.cpp" assert frame.offset == "114" - assert frame.mode == StackFrame.MODE_MINIDUMP + assert frame.mode == Mode.MINIDUMP + def test_minidump_stackframe_02(): """test creating a StackFrame from a Minidump line without symbols""" @@ -354,16 +465,20 @@ def test_minidump_stackframe_02(): assert frame.function is None assert frame.location == "libpthread-2.26.so" assert frame.offset == "0x10588" - assert frame.mode == StackFrame.MODE_MINIDUMP + assert frame.mode == Mode.MINIDUMP + def test_minidump_stackframe_03(): """test creating a StackFrame from a Minidump line without hg repo info""" - frame = StackFrame.from_line("0|49|libxul.so|foo|/usr/x86_64-linux-gnu/test.h|85|0x5") + frame = StackFrame.from_line( + "0|49|libxul.so|foo|/usr/x86_64-linux-gnu/test.h|85|0x5" + ) assert frame.stack_line == "49" assert frame.function == "foo" assert frame.location == "/usr/x86_64-linux-gnu/test.h" assert frame.offset == "85" - assert frame.mode == StackFrame.MODE_MINIDUMP + assert frame.mode == Mode.MINIDUMP + def test_tsan_stackframe_01(): """test creating a StackFrame from a symbolized TSan line""" @@ -372,16 +487,20 @@ def test_tsan_stackframe_01(): assert frame.function == "main" assert frame.location == "race.c" assert frame.offset == "10" - assert frame.mode == StackFrame.MODE_TSAN + assert frame.mode == Mode.TSAN + def test_tsan_stackframe_02(): """test creating a StackFrame from a symbolized TSan line""" - frame = StackFrame.from_line(" #1 test1 test2 /a b/c.h:51:10 (libxul.so+0x18c9873)") + frame = StackFrame.from_line( + " #1 test1 test2 /a b/c.h:51:10 (libxul.so+0x18c9873)" + ) assert frame.stack_line == "1" assert frame.function == "test1" assert frame.location == "c.h" assert frame.offset == "51" - assert frame.mode == StackFrame.MODE_TSAN + assert frame.mode == Mode.TSAN + def test_tsan_stackframe_03(): """test creating a StackFrame from an unsymbolized TSan line""" @@ -390,7 +509,8 @@ def test_tsan_stackframe_03(): assert frame.function is None assert frame.location is None assert frame.offset == "0xbad" - assert frame.mode == StackFrame.MODE_TSAN + assert frame.mode == Mode.TSAN + def test_tsan_stackframe_04(): """test creating a StackFrame from a TSan line missing file""" @@ -399,7 +519,8 @@ def test_tsan_stackframe_04(): assert frame.function == "func" assert frame.location == "mod" assert frame.offset == "0x123ac" - assert frame.mode == StackFrame.MODE_TSAN + assert frame.mode == Mode.TSAN + def test_valgrind_stackframe_01(): frame = StackFrame.from_line("==4754== at 0x45C6C0: FuncName (decode.c:123)") @@ -407,7 +528,8 @@ def test_valgrind_stackframe_01(): assert frame.function == "FuncName" assert frame.location == "decode.c" assert frame.offset == "123" - assert frame.mode == StackFrame.MODE_VALGRIND + assert frame.mode == Mode.VALGRIND + def test_valgrind_stackframe_02(): frame = StackFrame.from_line("==4754== by 0x462A20: main (foo.cc:71)") @@ -415,31 +537,41 @@ def test_valgrind_stackframe_02(): assert frame.function == "main" assert frame.location == "foo.cc" assert frame.offset == "71" - assert frame.mode == StackFrame.MODE_VALGRIND + assert frame.mode == Mode.VALGRIND + def test_valgrind_stackframe_03(): - frame = StackFrame.from_line("==4754== at 0x4C2AB80: malloc (in /usr/lib/blah-linux.so)") + frame = StackFrame.from_line( + "==4754== at 0x4C2AB80: malloc (in /usr/lib/blah-linux.so)" + ) assert frame.stack_line is None assert frame.function == "malloc" assert frame.location == "blah-linux.so" assert frame.offset is None - assert frame.mode == StackFrame.MODE_VALGRIND + assert frame.mode == Mode.VALGRIND + def test_valgrind_stackframe_04(): - frame = StackFrame.from_line("==2342== by 0x4E3E71: (anon ns)::test(b2::a&, int) (main.cpp:49)") + frame = StackFrame.from_line( + "==2342== by 0x4E3E71: (anon ns)::test(b2::a&, int) (main.cpp:49)" + ) assert frame.stack_line is None assert frame.function == "(anon ns)::test(b2::a&, int)" assert frame.location == "main.cpp" assert frame.offset == "49" - assert frame.mode == StackFrame.MODE_VALGRIND + assert frame.mode == Mode.VALGRIND + def test_valgrind_stackframe_05(): - frame = StackFrame.from_line("==2342== at 0xF00D: Foo::Foo(char *, int, bool) (File.h:37)") + frame = StackFrame.from_line( + "==2342== at 0xF00D: Foo::Foo(char *, int, bool) (File.h:37)" + ) assert frame.stack_line is None assert frame.function == "Foo::Foo(char *, int, bool)" assert frame.location == "File.h" assert frame.offset == "37" - assert frame.mode == StackFrame.MODE_VALGRIND + assert frame.mode == Mode.VALGRIND + def test_rr_stackframe_01(): frame = StackFrame.from_line("rr(main+0x244)[0x450b74]") @@ -447,7 +579,8 @@ def test_rr_stackframe_01(): assert frame.function is None assert frame.location == "main" assert frame.offset == "0x244" - assert frame.mode == StackFrame.MODE_RR + assert frame.mode == Mode.RR + def test_rust_stackframe_01(): """test creating a Rust StackFrame from stack line""" @@ -456,13 +589,16 @@ def test_rust_stackframe_01(): assert frame.function == "__libc_start_main" assert frame.location is None assert frame.offset is None - assert frame.mode == StackFrame.MODE_RUST + assert frame.mode == Mode.RUST + def test_rust_stackframe_02(): """test creating a Rust StackFrame from stack line""" - frame = StackFrame.from_line(" 4: 0x10b715a5b - unwind::begin_unwind_fmt::h227376fe1e021a36n3d") + frame = StackFrame.from_line( + " 4: 0x10b715a5b - unwind::begin_unwind_fmt::h227376fe1e021a36n3d" + ) assert frame.stack_line == "4" assert frame.location is None assert frame.function == "unwind::begin_unwind_fmt" assert frame.offset is None - assert frame.mode == StackFrame.MODE_RUST + assert frame.mode == Mode.RUST diff --git a/grizzly/common/test_status.py b/grizzly/common/test_status.py index 07a2f935..19e0218c 100644 --- a/grizzly/common/test_status.py +++ b/grizzly/common/test_status.py @@ -6,50 +6,46 @@ # pylint: disable=protected-access from multiprocessing import Event, Process -from os import remove, stat -from os.path import isfile +from sqlite3 import connect from time import sleep, time -from .status import ReducerStats, Status +from pytest import mark +from .reporter import FuzzManagerReporter +from .status import ( + DB_VERSION, + ReductionStatus, + ReductionStep, + ResultCounter, + Status, + _db_version_check, +) -def test_status_01(tmp_path): + +def test_status_01(mocker, tmp_path): """test Status.start()""" - Status.PATH = str(tmp_path) - status = Status.start() + mocker.patch("grizzly.common.status.time", autospec=True, return_value=1.0) + status = Status.start(str(tmp_path / "status.db")) assert status is not None - assert status.data_file is not None - assert isfile(status.data_file) - assert stat(status.data_file).st_size > 0 + assert status._db_file is not None assert status.start_time > 0 assert status.timestamp >= status.start_time - assert int(status.duration) == 0 assert status.ignored == 0 assert status.iteration == 0 assert status.log_size == 0 assert status.rate == 0 - assert status.results == 0 + assert status.results.total == 0 + assert int(status.runtime) == 0 + assert status.pid is not None + assert not status._enable_profiling + assert not status._profiles + assert not any(status.blockers()) -def test_status_02(tmp_path): - """test Status.cleanup()""" - Status.PATH = str(tmp_path) - status = Status.start() - dfile = status.data_file - status.cleanup() - assert status.data_file is None - assert not isfile(dfile) - # call 2nd time - status.cleanup() - # missing data file - status = Status.start() - remove(status.data_file) - status.cleanup() -def test_status_03(tmp_path): +def test_status_02(tmp_path): """test Status.report()""" - Status.PATH = str(tmp_path) - status = Status.start() - status.count_result("sig1") + status = Status.start(str(tmp_path / "status.db")) + status.results.count("uid1", "sig1") # try to report before REPORT_FREQ elapses assert not status.report() # REPORT_FREQ elapses @@ -61,179 +57,677 @@ def test_status_03(tmp_path): status.timestamp = future assert status.report(force=True) assert status.timestamp < future - status.cleanup() - -def test_status_04(tmp_path): - """test Status.load() failure paths""" - Status.PATH = str(tmp_path) - # load no db - assert Status.load(str(tmp_path / "missing.json")) is None - # load empty - bad = (tmp_path / "bad.json") - bad.touch() - assert Status.load(str(bad)) is None - # load invalid/incomplete json - bad.write_bytes(b"{}") - assert Status.load(str(bad)) is None - -def test_status_05(tmp_path): - """test Status.load()""" - Status.PATH = str(tmp_path) + + +def test_status_03(tmp_path): + """test Status.loadall()""" + db_file = str(tmp_path / "status.db") + # load from empty db + assert not any(Status.loadall(db_file)) # create simple entry - status = Status.start() - status.count_result("sig1") - status.count_result("sig2") - status.count_result("sig1") + status = Status.start(db_file, enable_profiling=True) + status.results.count("uid1", "sig1") + status.record("test", 123.45) status.report(force=True) - assert status.results == 3 - loaded = Status.load(status.data_file) - assert loaded.data_file is None + assert status.results.total == 1 + loaded = next(Status.loadall(db_file)) assert status.start_time == loaded.start_time assert status.timestamp == loaded.timestamp - assert status.duration == loaded.duration + assert status.runtime >= loaded.runtime assert status.ignored == loaded.ignored assert status.iteration == loaded.iteration assert status.log_size == loaded.log_size - assert status.results == loaded.results - loaded.cleanup() - assert isfile(status.data_file) - data_file = status.data_file - status.cleanup() - assert not isfile(data_file) - -def test_status_06(tmp_path): - """test Status.loadall()""" - working_path = (tmp_path / "status") - Status.PATH = str(working_path) - # missing path - assert not any(Status.loadall()) - # no status data - working_path.mkdir() - assert not any(Status.loadall()) - # add more entries - for _ in range(5): - Status.start() - (working_path / "empty.json").touch() - assert len(tuple(Status.loadall())) == 5 - -def test_status_07(tmp_path): - """test Status.duration and Status.rate calculations""" - Status.PATH = str(tmp_path) - status = Status.start() - status.start_time = 1 - status.timestamp = 2 - status.iteration = 0 - assert status.duration == 1 + assert status.pid == loaded.pid + assert loaded.results.get("uid1") == ("uid1", 1, "sig1") + assert not loaded._enable_profiling + assert "test" in loaded._profiles + + +def test_status_04(mocker, tmp_path): + """test Status.loadall() - multiple entries""" + getpid = mocker.patch("grizzly.common.status.getpid", autospec=True) + db_file = str(tmp_path / "status.db") + for pid in range(5): + getpid.return_value = pid + Status.start(db_file) + assert len(tuple(Status.loadall(db_file))) == 5 + + +def test_status_05(mocker, tmp_path): + """test Status.loadall() - filter entries by time""" + fake_time = mocker.patch("grizzly.common.status.time", autospec=True) + fake_time.return_value = 1.0 + db_file = str(tmp_path / "status.db") + # create entry + status = Status.start(db_file) + status.results.count("uid1", "sig1") + assert status.results.total == 1 + status.report(force=True) + # load entry + assert any(Status.loadall(db_file, time_limit=60)) + # load with expired entry + fake_time.return_value = 1200.0 + assert not any(Status.loadall(db_file, time_limit=60)) + # load with no limit + assert any(Status.loadall(db_file, time_limit=0)) + # load long running entry with a one month old result + fake_time.return_value = 2592000.0 + status.report(force=True) + loaded = next(Status.loadall(db_file, time_limit=60)) + assert status.start_time == loaded.start_time + assert status.timestamp == loaded.timestamp + assert status.runtime >= loaded.runtime + assert status.ignored == loaded.ignored + assert status.iteration == loaded.iteration + assert status.log_size == loaded.log_size + assert status.pid == loaded.pid + assert loaded.results.get("uid1") == ("uid1", 1, "sig1") + + +def test_status_06(mocker, tmp_path): + """test Status.runtime and Status.rate calculations""" + fake_time = mocker.patch("grizzly.common.status.time", autospec=True) + fake_time.return_value = 1.0 + db_file = str(tmp_path / "status.db") + status = Status.start(db_file) + assert status.start_time == 1 + # test no iterations + fake_time.return_value = 3.0 + assert status.runtime == 2.0 assert status.rate == 0 + # test one iteration status.iteration = 1 - assert status.rate == 1 - status.timestamp += 1 - assert status.rate == 0.5 - -def _client_writer(done, working_path): - """Used by test_status_08""" - # NOTE: this must be at the top level to work on Windows - Status.PATH = working_path - status = Status.start() - try: - while not done.is_set(): - status.iteration += 1 - status.report(force=True) - sleep(0.01) - finally: - status.cleanup() + # timestamp should be ignored when calculating rate and runtime on active object + fake_time.return_value = 5.0 + status.timestamp = 100 + assert status.runtime == 4.0 + assert status.rate == 0.25 + # test loaded + status.report(force=True) + loaded = next(Status.loadall(db_file)) + assert loaded.runtime == 4.0 + assert loaded.rate == 0.25 + # timestamp should be used when calculating rate and runtime on loaded object + loaded.timestamp = 2.0 + assert loaded.runtime == 1.0 + assert loaded.rate == 1.0 -def test_status_08(tmp_path): - """test Status.loadall() with multiple active reporters""" - Status.PATH = str(tmp_path) - best_rate = 0 - done = Event() + +# NOTE: this function must be at the top level to work on Windows +def _client_writer(db_file, begin, count): + """Used by test_status_06""" + begin.wait(timeout=45) + status = Status.start(db_file) + for _ in range(count): + status.iteration += 1 + status.report(force=True) + sleep(0.01) + + +@mark.parametrize( + "loads_in_parallel", + [ + # only test reporting in parallel + 0, + # test reporting and loading in parallel + 5, + ], +) +def test_status_07(tmp_path, loads_in_parallel): + """test Status.loadall() with multiple active clients in parallel""" + begin = Event() + clients = 10 + db_file = str(tmp_path / "status.db") + iter_count = 5 procs = list() try: - for _ in range(5): - procs.append(Process(target=_client_writer, args=(done, Status.PATH))) + # create and launch client processes + for _ in range(clients): + procs.append( + Process(target=_client_writer, args=(db_file, begin, iter_count)) + ) procs[-1].start() - deadline = time() + 60 - while len(tuple(Status.loadall())) < len(procs): - sleep(0.1) - assert time() < deadline, "timeout waiting for processes to launch!" - for _ in range(20): - for obj in Status.loadall(): - if obj.rate > best_rate: - best_rate = obj.rate + # synchronize client processes (not perfect but good enough) + begin.set() + # attempt parallel loads + for _ in range(loads_in_parallel): + tuple(Status.loadall(db_file)) + # wait for processes to report and exit + for proc in procs: + proc.join(timeout=60) + assert proc.exitcode == 0 + # collect reports + reports = tuple(Status.loadall(db_file)) + # check that each process created a report + assert len(reports) == clients + # check reported data + assert max(x.rate for x in reports) > 0 + assert sum(x.iteration for x in reports) == iter_count * clients finally: - done.set() for proc in procs: - if proc.pid is not None: - proc.join() - assert best_rate > 0 - assert not any(Status.loadall()) - -def test_reducer_stats_01(tmp_path): - """test ReducerStats() empty""" - ReducerStats.PATH = str(tmp_path) - with ReducerStats() as stats: - assert stats.error == 0 - assert stats.failed == 0 - assert stats.passed == 0 - stats_file = stats._file - assert not isfile(stats_file) - assert isfile(stats_file) - -def test_reducer_stats_02(tmp_path): - """test ReducerStats() simple""" - ReducerStats.PATH = str(tmp_path) - with ReducerStats() as stats: - stats.error += 1 - stats.failed += 1 - stats.passed += 1 - with ReducerStats() as stats: - assert stats.error == 1 - assert stats.failed == 1 - assert stats.passed == 1 - -def test_reducer_stats_03(tmp_path): - """test ReducerStats() empty/incomplete/invalid data file""" - ReducerStats.PATH = str(tmp_path) - stats_file = tmp_path / ReducerStats.FILE - # missing file - with ReducerStats() as stats: - stats.passed += 1 - # invalid empty file - stats_file.write_bytes(b"") - with ReducerStats() as stats: - assert stats.passed == 0 - # incomplete file - stats_file.write_bytes(b"{}") - with ReducerStats() as stats: - assert stats.passed == 0 - -def _reducer_client(working_path, limit, unrestrict): - """Used by test_reducer_stats_04""" - # NOTE: this must be at the top level to work on Windows - ReducerStats.PATH = working_path - for _ in range(50): - with ReducerStats() as stats: - stats.passed += 1 - if stats.passed == limit: - unrestrict.set() - unrestrict.wait(timeout=60) - -def test_reducer_stats_04(tmp_path): - """test ReducerStats() with multiple processes""" - ReducerStats.PATH = str(tmp_path) - procs = list() - unrestrict = Event() # used to sync client procs + if proc.exitcode is None: + proc.terminate() + proc.join() + + +def test_status_08(tmp_path): + """test Status.measure() and Status.record() - profiling support""" + db_file = str(tmp_path / "status.db") + # profiling disabled + status = Status.start(db_file, enable_profiling=False) + status.record("x", 10.1) + assert not status._profiles + with status.measure("x"): + pass + assert not status._profiles + # profiling enabled + status = Status.start(db_file, enable_profiling=True) + assert not status._profiles + # initial entry + status.record("test1", 10.1) + assert "test1" in status._profiles + assert status._profiles["test1"]["count"] == 1 + assert status._profiles["test1"]["max"] == 10.1 + assert status._profiles["test1"]["min"] == 10.1 + assert status._profiles["test1"]["total"] == 10.1 + entry = next(status.profile_entries()) + assert entry.name == "test1" + assert entry.count == 1 + assert entry.max == 10.1 + assert entry.min == 10.1 + assert entry.total == 10.1 + # new min + status.record("test1", 0.4) + entry = next(status.profile_entries()) + assert entry.count == 2 + assert entry.max == 10.1 + assert entry.min == 0.4 + assert entry.total == 10.5 + # entry + status.record("test1", 2) + entry = next(status.profile_entries()) + assert entry.count == 3 + assert entry.max == 10.1 + assert entry.min == 0.4 + assert entry.total == 12.5 + # new max + status.record("test1", 99.12) + entry = next(status.profile_entries()) + assert entry.count == 4 + assert entry.max == 99.12 + assert entry.min == 0.4 + assert entry.total == 111.62 + # new name + status.record("test2", 1) + assert "test2" in status._profiles + assert len(status._profiles) == 2 + assert status._profiles["test2"]["count"] == 1 + assert status._profiles["test2"]["max"] == 1 + assert status._profiles["test2"]["min"] == 1 + assert status._profiles["test2"]["total"] == 1 + status.record("test2", 1) + assert status._profiles["test2"]["count"] == 2 + assert status._profiles["test2"]["max"] == 1 + assert status._profiles["test2"]["min"] == 1 + assert status._profiles["test2"]["total"] == 2 + # test measure + with status.measure("no-op"): + pass + assert len(status._profiles) == 3 + assert "no-op" in status._profiles + assert len(tuple(status.profile_entries())) == 3 + + +@mark.parametrize( + "buckets, ratio, iterations, blockers", + [ + # no results + ([], 1, 1, 0), + # one result seen once (not blocker since count == 1) + ([("uid1", "sig1", 1)], 1, 1, 0), + # one result seen 10x (not blocker) + ([("uid1", "sig1", 10)], 100, 10000, 0), + # one result seen 10x (blocker) + ([("uid1", "sig1", 10)], 100, 1000, 1), + # one result seen 95x (blocker) + ([("uid1", "sig1", 95)], 100, 1000, 1), + # multiple results seen once (not blocker since count == 1) + ([("uid1", "sig1", 1), ("uid2", "sig2", 1)], 1, 1, 0), + # multiple results seen once (one blockers) + ([("uid1", "sig1", 1), ("uid2", "sig2", 10)], 1000, 100, 1), + # multiple results seen once (two blockers) + ([("uid1", "sig1", 99), ("uid2", "sig2", 10)], 1000, 100, 2), + ], +) +def test_status_09(tmp_path, buckets, ratio, iterations, blockers): + """test Status.blockers()""" + status = Status.start(str(tmp_path / "status.db")) + status.iteration = iterations + # populate counter + for report_id, desc, count in buckets: + for _ in range(count): + status.results.count(report_id, desc) + # check for blockers + assert len(tuple(status.blockers(iters_per_result=ratio))) == blockers + + +def test_status_10(mocker, tmp_path): + """test Status() - purge expired entries""" + fake_time = mocker.patch("grizzly.common.status.time", autospec=True) + db_file = str(tmp_path / "status.db") + # purge due to exp_limit + fake_time.return_value = 1.0 + status = Status(db_file=db_file, start_time=1.0, pid=123, exp_limit=10) + status.report(force=True) + assert any(Status.loadall(db_file, time_limit=60)) + fake_time.return_value = 20.0 + Status(db_file=db_file, start_time=20.0, pid=456, exp_limit=10) + assert not any(Status.loadall(db_file, time_limit=60)) + # purge due matching pid + fake_time.return_value = 1.0 + status = Status(db_file=db_file, start_time=1.0, pid=123, exp_limit=10) + status.report(force=True) + assert any(Status.loadall(db_file, time_limit=60)) + Status(db_file=db_file, start_time=1.0, pid=123, exp_limit=10) + assert not any(Status.loadall(db_file, time_limit=60)) + + +def test_reduce_status_01(mocker, tmp_path): + """test ReductionStatus()""" + mocker.patch("grizzly.common.status.time", autospec=True, return_value=1.0) + strategies = ["strategy_%d" % (idx,) for idx in range(5)] + + def fake_tc_size(): + return 47 + + status = ReductionStatus.start( + str(tmp_path / "status.db"), + strategies=strategies, + testcase_size_cb=fake_tc_size, + ) + assert status is not None + assert status.analysis == {} + assert status.attempts == 0 + assert status.iterations == 0 + assert status.run_params == {} + assert status.signature_info == {} + assert status.successes == 0 + assert status.current_strategy_idx is None + assert status._testcase_size_cb is fake_tc_size + assert status.crash_id is None + assert status.finished_steps == [] + assert status._in_progress_steps == [] + assert status.strategies == strategies + assert status._db_file is not None + assert status.pid is not None + assert status.timestamp > 0.0 + assert status._current_size is None + + +def test_reduce_status_02(tmp_path): + """test ReductionStatus.report()""" + status = ReductionStatus.start( + str(tmp_path / "status.db"), + testcase_size_cb=lambda: 47, + ) + # try to report before REPORT_FREQ elapses + assert not status.report() + # REPORT_FREQ elapses + status.timestamp = 0 + assert status.report() + assert status.timestamp > 0 + # force report + future = int(time()) + 1000 + status.timestamp = future + assert status.report(force=True) + assert status.timestamp < future + + +def test_reduce_status_03(tmp_path): + """test ReductionStatus.loadall()""" + db_file = str(tmp_path / "status.db") + strategies = ["strategy_%d" % (idx,) for idx in range(5)] + # create simple entry + status = ReductionStatus.start( + str(tmp_path / "status.db"), + strategies=strategies, + testcase_size_cb=lambda: 47, + ) + loaded = tuple(ReductionStatus.loadall(db_file)) + assert len(loaded) == 1 + loaded = loaded[0] + assert status.analysis == loaded.analysis + assert status.attempts == loaded.attempts + assert status.iterations == loaded.iterations + assert status.run_params == loaded.run_params + assert status.signature_info == loaded.signature_info + assert status.successes == loaded.successes + assert status.current_strategy_idx == loaded.current_strategy_idx + assert loaded._testcase_size_cb is None + assert status.crash_id == loaded.crash_id + assert status.finished_steps == loaded.finished_steps + assert status._in_progress_steps == loaded._in_progress_steps + assert status.strategies == loaded.strategies + assert status._db_file is not None + assert status.pid == loaded.pid + assert status.timestamp == loaded.timestamp + assert loaded._current_size == 47 + assert loaded._testcase_size() == 47 + + +def test_reduce_status_04(mocker, tmp_path): + """test ReductionStatus.loadall()""" + getpid = mocker.patch("grizzly.common.status.getpid", autospec=True) + db_file = str(tmp_path / "status.db") + for pid in range(5): + getpid.return_value = pid + ReductionStatus.start( + db_file, + testcase_size_cb=lambda: 47, + ) + assert len(tuple(ReductionStatus.loadall(db_file))) == 5 + + +def test_reduce_status_05(mocker, tmp_path): + """test ReductionStatus milestone measurements""" + strategies = ["strategy_%d" % (idx,) for idx in range(5)] + + # (time, testcase_size) steps to manually advance through + ticks = [ + (0, 1000), + (1, 900), + (2, 800), + (3, 700), + (4, 600), + (5, 500), + ] + + mocker.patch( + "grizzly.common.status.time", + autospec=True, + side_effect=lambda: ticks[0][0], + ) + testcase_size_cb = mocker.Mock(side_effect=lambda: ticks[0][1]) + status = ReductionStatus.start( + str(tmp_path / "status.db"), + strategies=strategies, + testcase_size_cb=testcase_size_cb, + ) + status.record("begin") + assert status.original.name == "begin" + assert status.total.name == "begin" + assert status.current_strategy.name == "begin" + with status.measure("overall"): + assert status.original.name == "begin" + assert status.total.name == "overall" + assert status.current_strategy.name == "overall" + for idx in range(5): + with status.measure(strategies[idx]): + ticks.pop(0) + status.attempts += 2 + status.successes += 1 + status.iterations += 10 + assert status.original.name == "begin" + assert status.total.name == "overall" + assert status.current_strategy.name == "strategy_%d" % (idx,) + assert status.finished_steps == [ + ReductionStep("begin", None, None, None, 1000, None), + ReductionStep("strategy_0", 1, 1, 2, 900, 10), + ReductionStep("strategy_1", 1, 1, 2, 800, 10), + ReductionStep("strategy_2", 1, 1, 2, 700, 10), + ReductionStep("strategy_3", 1, 1, 2, 600, 10), + ReductionStep("strategy_4", 1, 1, 2, 500, 10), + ReductionStep("overall", 5, 5, 10, 500, 50), + ] + assert status.original.name == "begin" + assert status.total.name == "overall" + assert status.current_strategy.name == "overall" + + +def test_reduce_status_06(mocker, tmp_path): + """test ReductionStatus in-progress milestones""" + mocker.patch("grizzly.common.status.time", autospec=True, return_value=1.0) + status = ReductionStatus.start( + str(tmp_path / "status.db"), + testcase_size_cb=lambda: 47, + ) + with status.measure("milestone"): + assert len(status.finished_steps) == 0 + status2 = status.copy() + assert len(status2.finished_steps) == 1 + + assert status.analysis == status2.analysis + assert status.attempts == status2.attempts + assert status.iterations == status2.iterations + assert status.run_params == status2.run_params + assert status.signature_info == status2.signature_info + assert status.successes == status2.successes + assert status.current_strategy_idx == status2.current_strategy_idx + assert status._testcase_size_cb is status2._testcase_size_cb + assert status.crash_id == status2.crash_id + assert status.finished_steps == status2.finished_steps + assert status._in_progress_steps == status2._in_progress_steps + assert status.strategies == status2.strategies + assert status._db_file is not None + assert status2._db_file is not None + assert status.pid == status2.pid + assert status.timestamp == status2.timestamp + + with status.measure("milestone2"): + status.report(force=True) + + loaded_status = tuple(ReductionStatus.loadall(str(tmp_path / "status.db"))) + assert len(loaded_status) == 1 + loaded_status = loaded_status[0] + + assert loaded_status.finished_steps == status.finished_steps[:1] + assert len(loaded_status._in_progress_steps) == 1 + + loaded_status = loaded_status.copy() + assert len(loaded_status.finished_steps) == 2 + assert len(loaded_status._in_progress_steps) == 0 + assert loaded_status.original == status.original + for field in ReductionStep._fields: + if field == "size": + continue + assert getattr(loaded_status.total, field) == getattr(status.total, field) + assert loaded_status.total.size is None + + +def test_reduce_status_07(mocker, tmp_path): + """test ReductionStatus metadata""" + reporter = mocker.Mock(spec_set=FuzzManagerReporter) + status = ReductionStatus.start( + str(tmp_path / "status.db"), + testcase_size_cb=lambda: 47, + crash_id=123, + ) + status.analysis["thing"] = "done" + status.record("init") + status.run_params["knob"] = "turned" + status.signature_info["dumb"] = True + status.add_to_reporter(reporter) + assert reporter.add_extra_metadata.call_args_list == [ + mocker.call("reducer-stats", status.finished_steps), + mocker.call("reducer-analysis", status.analysis), + mocker.call("reducer-params", status.run_params), + mocker.call("reducer-sig", status.signature_info), + mocker.call("reducer-input", status.crash_id), + ] + + +@mark.parametrize( + "keys, counts, limit, local_only", + [ + # no records + (["a"], [0], 1, True), + (["a"], [0], 1, False), + # single record (not frequent) + (["a"], [1], 2, True), + (["a"], [1], 2, False), + # single record (frequent) + (["a"], [1], 1, True), + (["a"], [1], 1, False), + # single record no limit + (["a"], [1], 0, True), + (["a"], [1], 0, False), + # multiple records + (["a", "b", "c"], [1, 2, 10], 5, True), + (["a", "b", "c"], [1, 2, 10], 5, False), + ], +) +def test_report_counter_01(tmp_path, keys, counts, limit, local_only): + """test ResultCounter local functionality""" + db_path = None if local_only else str(tmp_path / "storage.db") + counter = ResultCounter(1, db_file=db_path, freq_limit=limit) + for report_id, count in zip(keys, counts): + assert counter.get(report_id) == (report_id, 0, None) + assert not counter.is_frequent(report_id) + # call count() with report_id 'count' times + for current in range(1, count + 1): + assert counter.count(report_id, "desc") == current + # test get() + if sum(counts) > 0: + assert counter.get(report_id) == (report_id, count, "desc") + else: + assert counter.get(report_id) == (report_id, count, None) + # test is_frequent() + if count > limit > 0: + assert counter.is_frequent(report_id) + elif limit > 0: + assert not counter.is_frequent(report_id) + # test mark_frequent() + counter.mark_frequent(report_id) + assert counter.is_frequent(report_id) + else: + assert limit == 0 + for _report_id, count, _desc in counter.all(): + assert count > 0 + assert counter.total == sum(counts) + + +def test_report_counter_02(mocker, tmp_path): + """test ResultCounter multi instance functionality""" + fake_time = mocker.patch("grizzly.common.status.time", autospec=True) + fake_time.return_value = 1 + db_path = str(tmp_path / "storage.db") + counter_a = ResultCounter(1, db_file=db_path, freq_limit=0) + counter_b = ResultCounter(2, db_file=db_path, freq_limit=1) + counter_c = ResultCounter(3, db_file=db_path, freq_limit=2) + # local counts are 0, global (all counters) count is 0 + assert not counter_a.is_frequent("a") + assert not counter_b.is_frequent("a") + assert not counter_c.is_frequent("a") + # local (counter_a, bucket a) count is 1, global (all counters) count is 1 + assert counter_a.count("a", "desc") == 1 + assert not counter_a.is_frequent("a") + assert not counter_b.is_frequent("a") + assert not counter_c.is_frequent("a") + # local (counter_b, bucket a) count is 1, global (all counters) count is 2 + assert counter_b.count("a", "desc") == 1 + assert not counter_a.is_frequent("a") + assert not counter_b.is_frequent("a") + assert not counter_c.is_frequent("a") + # local (counter_b, bucket a) count is 2, global (all counters) count is 3 + # locally exceeded + assert counter_b.count("a", "desc") == 2 + assert counter_b.is_frequent("a") + # local (counter_c, bucket a) count is 1, global (all counters) count is 4 + assert counter_c.count("a", "desc") == 1 + assert not counter_a.is_frequent("a") + assert counter_b.is_frequent("a") + assert not counter_c.is_frequent("a") + # local (counter_a, bucket a) count is 2, global (all counters) count is 5 + # no limit + assert counter_a.count("a", "desc") == 2 + assert not counter_a.is_frequent("a") + # local (counter_c, bucket a) count is 2, global (all counters) count is 6 + # locally not exceeded, globally exceeded + assert counter_c.count("a", "desc") == 2 + assert counter_c.is_frequent("a") + # local (counter_a, bucket x) count is 0, global (all counters) count is 0 + assert not counter_a.is_frequent("x") + # remove 'expired' reports + fake_time.return_value = 1000 + counter_d = ResultCounter(4, db_file=db_path, freq_limit=2, exp_limit=10) + # local (counter_d, bucket a) count is 0, global (all counters) count is 0 + assert not counter_d.is_frequent("a") + assert counter_a.total == 2 + assert counter_b.total == 2 + assert counter_c.total == 2 + assert counter_d.total == 0 + + +def test_report_counter_03(mocker, tmp_path): + """test ResultCounter.load()""" + fake_time = mocker.patch("grizzly.common.status.time", autospec=True) + fake_time.return_value = 1 + db_path = str(tmp_path / "storage.db") + # load empty db + assert not ResultCounter.load(db_path, 10) + # create counter + counter = ResultCounter(123, db_file=db_path, exp_limit=1) + counter.count("a", "desc_a") + fake_time.return_value = 2 + counter.count("a", "desc_a") + fake_time.return_value = 3 + counter.count("b", "desc_b") + # filter out reports by time + fake_time.return_value = 4 + # last 1 second + assert not ResultCounter.load(db_path, 1) + # last 2 seconds + loaded = ResultCounter.load(db_path, 2)[0] + assert loaded.total == 1 + assert loaded.get("b") == ("b", 1, "desc_b") + # last 3 seconds + loaded = ResultCounter.load(db_path, 3)[0] + assert loaded.get("a") == ("a", 2, "desc_a") + assert loaded.total == 3 + # increase time limit + fake_time.return_value = 4 + loaded = ResultCounter.load(db_path, 10)[0] + assert loaded.total == counter.total == 3 + assert loaded.get("a") == ("a", 2, "desc_a") + assert loaded.get("b") == ("b", 1, "desc_b") + + +def test_report_counter_04(mocker, tmp_path): + """test ResultCounter remove expired entries""" + fake_time = mocker.patch("grizzly.common.status.time", autospec=True) + fake_time.return_value = 1 + db_path = str(tmp_path / "storage.db") + counter = ResultCounter(123, db_file=db_path, exp_limit=0) + counter.count("a", "desc_a") + fake_time.return_value = 100 + counter.count("b", "desc_b") + loaded = ResultCounter.load(db_path, 100)[0] + assert loaded.total == 2 + # set exp_limit to zero to skip removing expired results + ResultCounter(124, db_file=db_path, exp_limit=0) + loaded = ResultCounter.load(db_path, 100)[0] + assert loaded.total == 2 + # clear expired records from database by setting exp_limit + ResultCounter(125, db_file=db_path, exp_limit=10) + loaded = ResultCounter.load(db_path, 100)[0] + assert loaded.total == 1 + # clear expired records from database by using duplicate pid + ResultCounter(123, db_file=db_path, exp_limit=1000) + assert not ResultCounter.load(db_path, 100) + + +def test_db_version_check_01(tmp_path): + """test _db_version_check()""" + db_path = str(tmp_path / "storage.db") try: - proc_count = 5 - for _ in range(proc_count): - procs.append(Process( - target=_reducer_client, args=(ReducerStats.PATH, proc_count, unrestrict))) - procs[-1].start() + con = connect(db_path) + # empty db + assert _db_version_check(con, expected=DB_VERSION) + # no update needed + assert not _db_version_check(con, expected=DB_VERSION) + # add db contents + Status.start(db_path) + # force update + assert _db_version_check(con, expected=DB_VERSION + 1) finally: - unrestrict.set() - for proc in procs: - if proc.pid is not None: - proc.join() - with ReducerStats() as stats: - assert stats.passed == 250 + con.close() diff --git a/grizzly/common/test_status_reporter.py b/grizzly/common/test_status_reporter.py index 39a514c5..1aaf57c5 100644 --- a/grizzly/common/test_status_reporter.py +++ b/grizzly/common/test_status_reporter.py @@ -5,183 +5,425 @@ """test Grizzly status reporter""" # pylint: disable=protected-access -import re +from itertools import count +from re import match +from unittest.mock import Mock -import pytest +from pytest import mark, raises + +from .status_reporter import ( + ReductionStatus, + ReductionStatusReporter, + Status, + StatusReporter, + TracebackReport, + main, +) + +GBYTES = 1_073_741_824 -from .status_reporter import main, ReducerStats, Status, StatusReporter, TracebackReport def _fake_sys_info(): - return "CPU & Load : 64 @ 93.1% (85.25, 76.21, 51.06)\n" \ - " Memory : 183.9GB of 251.9GB free\n" \ - " Disk : 22.2GB of 28.7GB free" + return [ + ("CPU & Load", "64 @ 93% (85.25, 76.21, 51.06)"), + ("Memory", "183.9GB of 251.9GB free"), + ("Disk", "22.2GB of 28.7GB free"), + ] + + +def test_reduce_status_reporter_01(): + """test basic ReductionStatusReporter""" + st_rpt = ReductionStatusReporter(None) + assert not st_rpt.has_results + st_rpt._sys_info = _fake_sys_info + assert "No status reports available" in st_rpt.specific() + assert "No status reports available" in st_rpt.summary() + + +def test_reduce_status_reporter_02(mocker, tmp_path): + """test ReductionStatusReporter.load()""" + # missing reports path + st_rpt = ReductionStatusReporter.load(str(tmp_path / "status.db")) + assert not st_rpt.reports + + # empty reports and tb paths + st_rpt = ReductionStatusReporter.load( + str(tmp_path / "status.db"), tb_path=str(tmp_path) + ) + assert not st_rpt.reports + assert isinstance(st_rpt.tracebacks, list) + assert not st_rpt.tracebacks + + # multiple reports + size_cb = mocker.Mock(side_effect=count(start=1000, step=-100)) + db_file = str(tmp_path / "status.db") + ReductionStatus.start( + db_file=db_file, + testcase_size_cb=size_cb, + ) + status2 = ReductionStatus( + db_file=db_file, + pid=1, + testcase_size_cb=size_cb, + ) + status2.report(force=True) + st_rpt = ReductionStatusReporter.load(db_file) + assert len(st_rpt.reports) > 1 + + +def test_reduce_status_reporter_03(mocker, tmp_path): + """test ReductionStatusReporter.summary()""" + mocker.patch("grizzly.common.status.getpid", side_effect=(1, 2)) + mocker.patch("grizzly.common.status.time", side_effect=count(start=1.0, step=1.0)) + size_cb = mocker.Mock(side_effect=count(start=1000, step=-100)) + db_file = str(tmp_path / "status.db") + # single report + status = ReductionStatus.start( + db_file=db_file, + testcase_size_cb=size_cb, + crash_id=123, + tool="fuzzmatic", + ) + status.analysis["ran"] = True + status.run_params["speed"] = 123.0 + status.signature_info["info"] = "crash" + status.last_reports.append(45678) + status.record("init") + with status.measure("total"): + with status.measure("strategy_0"): + status.attempts += 1 + status.successes += 1 + status.iterations += 1 + with status.measure("strategy_1"): + status.attempts += 3 + status.successes += 1 + status.iterations += 3 + status.report(force=True) + + rptr = ReductionStatusReporter.load(db_file) + rptr._sys_info = _fake_sys_info + assert rptr.reports + output = rptr.summary(sysinfo=True, timestamp=True) + assert "duration" in output + assert "successes" in output + assert "attempts" in output + assert "init" in output + assert "strategy_0" in output + assert "strategy_1" in output + assert "total" in output + assert "Timestamp" in output + assert len(output.splitlines()) == 16 + + +def test_reduce_status_reporter_04(mocker, tmp_path): + """test ReductionStatusReporter.specific()""" + mocker.patch("grizzly.common.status.getpid", side_effect=(1, 2)) + db_file = str(tmp_path / "status.db") + # single report + status = ReductionStatus.start( + db_file=db_file, + strategies=["strategy_0"], + testcase_size_cb=lambda: 47, + crash_id=12, + tool="fuzzmatic", + ) + assert status.original is None + rptr = ReductionStatusReporter.load(db_file) + assert rptr.reports + output = rptr.specific() + assert len(output.splitlines()) == 1 + status.analysis["ran"] = True + status.run_params["splines"] = "reticulated" + status.last_reports.append(45678) + status.record("init") + with status.measure("total"): + with status.measure("strategy_0"): + status.iterations = 1 + status.attempts = 1 + status.successes = 1 + status.report(force=True) + rptr = ReductionStatusReporter.load(db_file) + assert rptr.reports + output = rptr.specific() + assert len(output.splitlines()) == 8 + assert "Analysis" in output + assert "Run Parameters" in output + assert "Current Strategy" in output + assert "Current/Original" in output + assert "Results" in output + assert "Time Elapsed" in output + + +def test_reduce_status_reporter_05(tmp_path): + """test ReductionStatusReporter.load() with traceback""" + db_file = str(tmp_path / "status.db") + status = ReductionStatus.start( + db_file=db_file, + testcase_size_cb=lambda: 47, + ) + with status.measure("total"): + status.iteration = 1 + status.report(force=True) + # create boring screenlog + (tmp_path / "screenlog.0").write_bytes(b"boring\ntest\n123\n") + # create first screenlog + with (tmp_path / "screenlog.1").open("wb") as test_fp: + test_fp.write(b"Traceback (most recent call last):\n") + test_fp.write(b" blah\n") + test_fp.write(b"IndexError: list index out of range\n") + rptr = StatusReporter.load(db_file, tb_path=str(tmp_path)) + assert len(rptr.tracebacks) == 1 + # create second screenlog + with (tmp_path / "screenlog.1234").open("wb") as test_fp: + test_fp.write(b"Traceback (most recent call last):\n") + test_fp.write(b" blah\n") + test_fp.write(b"foo.bar.error: blah\n") + rptr = StatusReporter.load(db_file, tb_path=str(tmp_path)) + assert len(rptr.tracebacks) == 2 + # create third screenlog + with (tmp_path / "screenlog.3").open("wb") as test_fp: + test_fp.write(b"Traceback (most recent call last):\n") + test_fp.write(b" blah\n") + test_fp.write(b"KeyboardInterrupt\n") + rptr = ReductionStatusReporter.load(db_file, tb_path=str(tmp_path)) + assert len(rptr.tracebacks) == 2 + merged_log = rptr.summary() + assert len(merged_log.splitlines()) == 13 + assert "screenlog.1" in merged_log + assert "screenlog.1234" in merged_log + assert "IndexError" in merged_log + assert "foo.bar.error" in merged_log + assert "screenlog.3" not in merged_log -def test_status_reporter_01(tmp_path): + +def test_status_reporter_01(): """test basic StatusReporter""" st_rpt = StatusReporter(list()) + assert not st_rpt.has_results st_rpt._sys_info = _fake_sys_info - assert "No status reports available" in st_rpt._specific() - report = tmp_path / "output.txt" - st_rpt.dump_specific(str(report)) - assert report.is_file() - st_rpt.print_specific() - assert "No status reports available" in st_rpt._summary() - report.unlink() - st_rpt.dump_summary(str(report)) - assert report.is_file() - st_rpt.print_summary() + assert "No status reports available" in st_rpt.specific() + assert "No status reports available" in st_rpt.summary() + def test_status_reporter_02(tmp_path): """test StatusReporter.load()""" - Status.PATH = str(tmp_path / "missing") # missing reports path - st_rpt = StatusReporter.load() + st_rpt = StatusReporter.load(str(tmp_path / "status.db")) assert not st_rpt.reports - # missing tb path - Status.PATH = str(tmp_path) - with pytest.raises(OSError): - StatusReporter.load(tb_path="no_dir") # empty reports and tb paths - st_rpt = StatusReporter.load(tb_path=str(tmp_path)) + st_rpt = StatusReporter.load(str(tmp_path / "status.db"), tb_path=str(tmp_path)) assert isinstance(st_rpt.reports, list) assert not st_rpt.reports assert isinstance(st_rpt.tracebacks, list) assert not st_rpt.tracebacks -def test_status_reporter_03(mocker): + +@mark.parametrize( + "disk, memory, getloadavg", + [ + (Mock(free=12, total=GBYTES), Mock(available=12, total=GBYTES), None), + ( + Mock(free=10.23 * GBYTES, total=100 * GBYTES), + Mock(available=1.1 * GBYTES, total=2 * GBYTES), + None, + ), + ( + Mock(free=12, total=GBYTES), + Mock(available=12, total=GBYTES), + lambda: (0.12, 0.34, 0.56), + ), + ], +) +def test_status_reporter_03(mocker, disk, memory, getloadavg): """test StatusReporter._sys_info()""" - gbs = 1024 * 1024 * 1024 - fake_psutil = mocker.patch("grizzly.common.status_reporter.psutil", autospec=True) - fake_psutil.cpu_count.return_value = 4 - fake_psutil.cpu_percent.return_value = 10.0 - fake_psutil.virtual_memory.return_value = mocker.Mock(available=12, total=gbs) - fake_psutil.disk_usage.return_value = mocker.Mock(free=12, total=gbs) - sysinfo = StatusReporter._sys_info() - assert "MB" in sysinfo - fake_psutil.virtual_memory.return_value = mocker.Mock(available=1.1 * gbs, total=2 * gbs) - fake_psutil.disk_usage.return_value = mocker.Mock(free=10.23 * gbs, total=100 * gbs) + mocker.patch( + "grizzly.common.status_reporter.cpu_count", autospec=True, return_value=4 + ) + mocker.patch( + "grizzly.common.status_reporter.cpu_percent", autospec=True, return_value=10 + ) + mocker.patch( + "grizzly.common.status_reporter.disk_usage", autospec=True, return_value=disk + ) + mocker.patch( + "grizzly.common.status_reporter.virtual_memory", + autospec=True, + return_value=memory, + ) + if getloadavg is None: + # simulate platform that does not have os.getloadavg() + mocker.patch("grizzly.common.status_reporter.getloadavg", None) + else: + mocker.patch( + "grizzly.common.status_reporter.getloadavg", side_effect=getloadavg + ) sysinfo = StatusReporter._sys_info() - assert "MB" not in sysinfo - lines = sysinfo.split("\n") - assert len(lines) == 3 - assert "CPU & Load : " in lines[0] - assert "Memory : " in lines[1] - assert "Disk : " in lines[2] - # verify alignment - position = len(lines[0].split(":")[0]) - for line in lines: - assert line[position] == ":" + assert len(sysinfo) == 3 + assert sysinfo[0][0] == "CPU & Load" + assert sysinfo[1][0] == "Memory" + assert sysinfo[2][0] == "Disk" + if getloadavg is not None: + assert sysinfo[0][-1].endswith(" (0.1, 0.3, 0.6)") + if disk.free < GBYTES or memory.available < GBYTES: + assert "MB" in sysinfo[1][-1] + assert "MB" in sysinfo[2][-1] + else: + assert "MB" not in sysinfo[1][-1] + assert "MB" not in sysinfo[2][-1] + def test_status_reporter_04(tmp_path): """test StatusReporter._scan()""" - re_filter = re.compile("TEST_FILE") (tmp_path / "somefile.txt").touch() test_path = tmp_path / "TEST_FILE" test_path.mkdir() - assert not any(StatusReporter._scan(str(tmp_path), re_filter)) + assert not any(StatusReporter._scan(str(tmp_path), "TEST_FILE")) test_path.rmdir() test_path.touch() - assert not any(StatusReporter._scan(str(tmp_path), re_filter)) + assert not any(StatusReporter._scan(str(tmp_path), "TEST_FILE")) test_path.write_bytes(b"test") - assert tuple(StatusReporter._scan(str(tmp_path), re_filter)) + assert any(StatusReporter._scan(str(tmp_path), "TEST_FILE")) + -def test_status_reporter_05(tmp_path): - """test StatusReporter._summary()""" - Status.PATH = str(tmp_path) +def test_status_reporter_05(mocker, tmp_path): + """test StatusReporter.summary()""" + mocker.patch("grizzly.common.status.getpid", side_effect=(1, 2)) + mocker.patch("grizzly.common.status.time", side_effect=count(start=1.0, step=1.0)) + db_file = str(tmp_path / "status.db") # single report - status = Status.start() + status = Status.start(db_file=db_file) status.ignored = 0 status.iteration = 1 status.log_size = 0 status.report(force=True) - rptr = StatusReporter.load() + rptr = StatusReporter.load(db_file) rptr._sys_info = _fake_sys_info assert rptr.reports is not None assert len(rptr.reports) == 1 - output = rptr._summary(runtime=False) + output = rptr.summary(runtime=False) assert "Iteration" in output assert "Rate" in output assert "Results" in output - assert "ignored" not in output + assert "Blockers" not in output + assert "Ignored" not in output assert "Logs" not in output assert "Runtime" not in output assert "Timestamp" not in output assert len(output.split("\n")) == 3 # multiple reports - status = Status.start() - status.start_time += 66.0 + status = Status.start(db_file=db_file) status.ignored = 1 status.iteration = 8 status.log_size = 86900000 + status.results.count("test", "test") + status.results.count("test", "test") status.report(force=True) - rptr = StatusReporter.load() + rptr = StatusReporter.load(db_file) rptr._sys_info = _fake_sys_info assert len(rptr.reports) == 2 - output = rptr._summary(sysinfo=True, timestamp=True) + output = rptr.summary(sysinfo=True, timestamp=True) assert "Iteration" in output assert "Rate" in output assert "Results" in output - assert "ignored" in output + assert "Ignored" in output assert "Logs" in output assert "Runtime" in output assert "Timestamp" in output lines = output.split("\n") - assert len(lines) == 9 + assert len(lines) == 10 # verify alignment position = len(lines[0].split(":")[0]) for line in lines: - assert re.match(r"\S\s:\s\S", line[position - 2:]) + assert match(r"\S\s:\s\S", line[position - 2 :]) + def test_status_reporter_06(mocker, tmp_path): - """test StatusReporter._specific()""" - Status.PATH = str(tmp_path) + """test StatusReporter.specific()""" + mocker.patch("grizzly.common.status.getpid", side_effect=(1, 2)) + db_file = str(tmp_path / "status.db") # single report - status = Status.start() + status = Status.start(db_file=db_file) status.ignored = 0 status.iteration = 1 status.log_size = 0 status.report(force=True) - rptr = StatusReporter.load() + rptr = StatusReporter.load(db_file) assert rptr.reports is not None - output = rptr._specific() - assert len(output.split("\n")[:-1]) == 2 - assert "Ignored" in output - assert "Iteration" in output - assert "Rate" in output + output = rptr.specific() + assert len(output.strip().split("\n")) == 4 + assert "Ignored" not in output + assert "Iterations" in output assert "Results" in output - assert "EXPIRED" not in output + assert "(Blockers detected)" not in output + assert "Runtime" in output # multiple reports - status = Status.start() + status = Status.start(db_file=db_file, enable_profiling=True) status.ignored = 1 - status.iteration = 432422 - status._results = {"sig": 123} + status.iteration = 50 + status.results.count("uid1", "sig1") + status.results.count("uid1", "sig1") + status.results.count("uid1", "sig1") + status.record("test1", 0.91) + status.record("test1", 1.0) + status.record("test1", 1.23456) + status.record("test2", 1201.1) status.report(force=True) - rptr = StatusReporter.load() + rptr = StatusReporter.load(db_file) assert len(rptr.reports) == 2 - output = rptr._specific() - assert len(output.split("\n")[:-1]) == 4 + output = rptr.specific() + assert len(output.strip().split("\n")) == 13 assert "Ignored" in output - assert "Iteration" in output - assert "Rate" in output + assert "Iterations" in output assert "Results" in output - assert "EXPIRED" not in output - # expired report - mocker.patch("grizzly.common.status.time", return_value=1.0) - status = Status.start() - status.ignored = 1 + assert "Runtime" in output + assert "(Blockers detected)" in output + assert "Profiling entries" in output + assert "test1" in output + assert "test2" in output + + +def test_status_reporter_07(mocker, tmp_path): + """test StatusReporter.results()""" + mocker.patch("grizzly.common.status.getpid", side_effect=(1, 2, 3)) + db_file = str(tmp_path / "status.db") + # single report without results + status = Status.start(db_file=db_file) + status.ignored = 0 + status.iteration = 1 + status.log_size = 0 + status.report(force=True) + rptr = StatusReporter.load(db_file) + assert rptr.reports is not None + assert len(rptr.reports) == 1 + assert not rptr.has_results + assert rptr.results() == "No results available\n" + # multiple reports with results + status = Status.start(db_file=db_file) + status.iteration = 1 + status.results.count("uid1", "[@ test1]") + status.results.count("uid2", "[@ test2]") + status.results.count("uid1", "[@ test1]") + status.report(force=True) + status = Status.start(db_file=db_file) status.iteration = 1 + status.results.count("uid1", "[@ test1]") + status.results.count("uid3", "[@ longsignature123]") status.report(force=True) - rptr = StatusReporter.load() + rptr = StatusReporter.load(db_file) + assert rptr.has_results assert len(rptr.reports) == 3 - output = rptr._specific() - assert len(output.split("\n")[:-1]) == 5 - assert "EXPIRED" in output + output = rptr.results(max_len=19) + assert "3 : [@ test1]" in output + assert "1 : [@ test2]" in output + assert "1 : [@ longsignature..." in output + assert "(* = Blocker)" in output + assert len(output.strip().split("\n")) == 4 + -def test_status_reporter_07(tmp_path): +def test_status_reporter_08(tmp_path): """test StatusReporter.load() with traceback""" - (tmp_path / "status").mkdir() - Status.PATH = str(tmp_path / "status") - status = Status.start() + db_file = str(tmp_path / "status.db") + status = Status.start(db_file=db_file) status.ignored = 0 status.iteration = 1 status.log_size = 0 @@ -193,23 +435,23 @@ def test_status_reporter_07(tmp_path): test_fp.write(b"Traceback (most recent call last):\n") test_fp.write(b" blah\n") test_fp.write(b"IndexError: list index out of range\n") - rptr = StatusReporter.load(tb_path=str(tmp_path)) + rptr = StatusReporter.load(db_file, tb_path=str(tmp_path)) assert len(rptr.tracebacks) == 1 # create second screenlog with (tmp_path / "screenlog.1234").open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") test_fp.write(b" blah\n") test_fp.write(b"foo.bar.error: blah\n") - rptr = StatusReporter.load(tb_path=str(tmp_path)) + rptr = StatusReporter.load(db_file, tb_path=str(tmp_path)) assert len(rptr.tracebacks) == 2 # create third screenlog with (tmp_path / "screenlog.3").open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") test_fp.write(b" blah\n") test_fp.write(b"KeyboardInterrupt\n") - rptr = StatusReporter.load(tb_path=str(tmp_path)) + rptr = StatusReporter.load(db_file, tb_path=str(tmp_path)) assert len(rptr.tracebacks) == 2 - merged_log = rptr._summary() + merged_log = rptr.summary() assert len(merged_log.splitlines()) == 14 assert "screenlog.1" in merged_log assert "screenlog.1234" in merged_log @@ -217,118 +459,71 @@ def test_status_reporter_07(tmp_path): assert "foo.bar.error" in merged_log assert "screenlog.3" not in merged_log -def test_status_reporter_08(tmp_path): + +def test_status_reporter_09(tmp_path): """test StatusReporter.load() no reports with traceback""" - (tmp_path / "status").mkdir() - Status.PATH = str(tmp_path / "status") # create screenlog with tb with (tmp_path / "screenlog.1").open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") test_fp.write(b" blah\n") test_fp.write(b"IndexError: list index out of range\n") - rptr = StatusReporter.load(tb_path=str(tmp_path)) + rptr = StatusReporter.load(str(tmp_path / "status.db"), tb_path=str(tmp_path)) rptr._sys_info = _fake_sys_info assert len(rptr.tracebacks) == 1 - output = rptr._summary() + output = rptr.summary() assert len(output.splitlines()) == 7 assert "No status reports available" in output assert "IndexError" in output -def test_status_reporter_09(tmp_path): + +def test_status_reporter_10(mocker, tmp_path): """test StatusReporter.summary() limit with traceback""" - (tmp_path / "status").mkdir() - Status.PATH = str(tmp_path / "status") + mocker.patch("grizzly.common.status.getpid", side_effect=(1, 2)) + db_file = str(tmp_path / "status.db") # create reports - status = Status.start() + status = Status.start(db_file=db_file) status.ignored = 100 status.iteration = 1000 status.log_size = 9999999999 - status._results = {"sig": 123} + status.results.count("uid1", "[@ sig1]") + status.results._count["uid1"] = 123 status.report(force=True) - status = Status.start() + status = Status.start(db_file=db_file) status.ignored = 9 status.iteration = 192938 status.log_size = 0 - status._results = {"sig": 3} + status.results.count("uid2", "[@ sig2]") + status.results._count["uid2"] = 3 status.report(force=True) # create screenlogs with tracebacks for i in range(10): with (tmp_path / ("screenlog.%d" % (i,))).open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") for j in range(TracebackReport.MAX_LINES): - test_fp.write(b" File \"some/long/path/name/foobar.py\", line 5000, in \n") + test_fp.write( + b' File "some/long/path/name/foobar.py", line 5000, in \n' + ) test_fp.write(b" some_long_name_for_a_func_%04d()\n" % (j,)) test_fp.write(b"IndexError: list index out of range\n") - rptr = StatusReporter.load(tb_path=str(tmp_path)) + rptr = StatusReporter.load(db_file, tb_path=str(tmp_path)) rptr._sys_info = _fake_sys_info assert len(rptr.tracebacks) == 10 - merged_log = rptr._summary(runtime=True, sysinfo=True, timestamp=True) + merged_log = rptr.summary( + runtime=True, sysinfo=True, timestamp=True, iters_per_result=1 + ) assert len(merged_log) < StatusReporter.SUMMARY_LIMIT -def test_reduce_status_reporter_01(tmp_path): - """test empty StatusReporter in reducer mode""" - Status.PATH = str(tmp_path) - ReducerStats.PATH = Status.PATH - rptr = StatusReporter.load(reducer=True) - assert rptr is not None - assert not rptr.reports - output = rptr._specific() - assert "No status reports available" in output - output = rptr._summary() - assert "No status reports available" in output -def test_reduce_status_reporter_02(tmp_path): - """test StatusReporter._specific() in reducer mode""" - Status.PATH = str(tmp_path) - ReducerStats.PATH = Status.PATH - status = Status.start() - status.ignored = 12 - status.iteration = 432422 - status._results = {"sig": 123} - status.report(force=True) - rptr = StatusReporter.load(reducer=True) - assert rptr.reports - output = rptr._specific() - assert len(output.split("\n")[:-1]) == 2 - assert "Iteration" in output - assert "Rate" in output - assert "Ignored" not in output - assert "Results" not in output +def test_status_reporter_11(): + """test StatusReporter.format_entries()""" + assert StatusReporter.format_entries([]) == "" + assert StatusReporter.format_entries([("test", None)]) == "test" + assert StatusReporter.format_entries([("test", "1")]) == "test : 1" + out = StatusReporter.format_entries( + [("first", "1"), ("second", "2"), ("third", "3")] + ) + assert out == " first : 1\nsecond : 2\n third : 3" -def test_reduce_status_reporter_03(tmp_path): - """test StatusReporter._summary() in reducer mode""" - Status.PATH = str(tmp_path) - ReducerStats.PATH = Status.PATH - status = Status.start() - status.iteration = 1 - status.report(force=True) - status = Status.start() - status.iteration = 10 - status.report(force=True) - rptr = StatusReporter.load(reducer=True) - rptr._sys_info = _fake_sys_info - assert rptr.reports is not None - output = rptr._summary(sysinfo=True, timestamp=True) - assert "======== Stats ========" in output - assert "======= Active ========" in output - assert "Reduced" in output - assert "No Repro" in output - assert "Iteration" in output - assert "Results" not in output - assert "Mismatch" in output - assert "Rate" in output - assert "Runtime" in output - assert "ignored" not in output - assert "Timestamp" in output - lines = output.split("\n") - assert len(lines) == 13 - # verify alignment - position = len(lines[1].split(":")[0]) - for line in lines: - if line.startswith("="): - # skip headers - continue - assert re.match(r"\S\s:\s\S", line[position - 2:]) def test_traceback_report_01(): """test simple TracebackReport""" @@ -340,6 +535,7 @@ def test_traceback_report_01(): assert "2" in output assert "-2" in output + def test_traceback_report_02(): """test empty TracebackReport""" tbr = TracebackReport("log.txt", []) @@ -348,15 +544,16 @@ def test_traceback_report_02(): assert len(output.splitlines()) == 1 assert "log.txt" in output + def test_traceback_report_03(tmp_path): """test TracebackReport.from_file()""" test_log = tmp_path / "screenlog.0" with test_log.open("wb") as test_fp: test_fp.write(b"start junk\npre1\npre2\npre3\npre4\npre5\n") test_fp.write(b"Traceback (most recent call last):\n") - test_fp.write(b" File \"foo.py\", line 556, in \n") + test_fp.write(b' File "foo.py", line 556, in \n') test_fp.write(b" main(parse_args())\n") - test_fp.write(b" File \"foo.py\", line 207, in bar\n") + test_fp.write(b' File "foo.py", line 207, in bar\n') test_fp.write(b" a = b[10]\n") test_fp.write(b"IndexError: list index out of range\n") test_fp.write(b"end junk\n") @@ -374,13 +571,13 @@ def test_traceback_report_03(tmp_path): with test_log.open("wb") as test_fp: test_fp.write(b"start junk\n") test_fp.write(b"Traceback (most recent call last):\n") - test_fp.write(b" File \"foo.py\", line 556, in \n") + test_fp.write(b' File "foo.py", line 556, in \n') test_fp.write(b" main(parse_args())\n") - test_fp.write(b" File \"foo.py\", line 207, in bar\n") + test_fp.write(b' File "foo.py", line 207, in bar\n') test_fp.write(b" a = b[10]\n") test_fp.write(b"foo.bar.error: blah\n") test_fp.write(b"end junk\n") - tbr = TracebackReport.from_file(str(test_log), max_preceeding=0) + tbr = TracebackReport.from_file(str(test_log), max_preceding=0) assert len(tbr.lines) == 6 assert not tbr.prev_lines assert not tbr.is_kbi @@ -392,9 +589,9 @@ def test_traceback_report_03(tmp_path): # kbi with test_log.open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") - test_fp.write(b" File \"foo.py\", line 556, in \n") + test_fp.write(b' File "foo.py", line 556, in \n') test_fp.write(b" main(parse_args())\n") - test_fp.write(b" File \"foo.py\", line 207, in bar\n") + test_fp.write(b' File "foo.py", line 207, in bar\n') test_fp.write(b" a = b[10]\n") test_fp.write(b"KeyboardInterrupt\n") test_fp.write(b"end junk\n") @@ -404,17 +601,18 @@ def test_traceback_report_03(tmp_path): assert len(output.splitlines()) == 7 assert "KeyboardInterrupt" in output + def test_traceback_report_04(tmp_path): """test TracebackReport.from_file() exceed size limit""" test_log = tmp_path / "screenlog.0" with test_log.open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") - test_fp.write(b" File \"foo.py\", line 5, in \n") + test_fp.write(b' File "foo.py", line 5, in \n') test_fp.write(b" first()\n") - test_fp.write(b" File \"foo.py\", line 5, in \n") + test_fp.write(b' File "foo.py", line 5, in \n') test_fp.write(b" second()\n") for i in reversed(range(TracebackReport.MAX_LINES)): - test_fp.write(b" File \"foo.py\", line 5, in \n") + test_fp.write(b' File "foo.py", line 5, in \n') test_fp.write(b" func_%02d()\n" % i) test_fp.write(b"END_WITH_BLANK_LINE\n\n") test_fp.write(b"end junk\n") @@ -429,15 +627,16 @@ def test_traceback_report_04(tmp_path): assert "func_06()" not in output assert "END_WITH_BLANK_LINE" in output + def test_traceback_report_05(tmp_path): """test TracebackReport.from_file() cut off""" test_log = tmp_path / "screenlog.0" with test_log.open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") - test_fp.write(b" File \"foo.py\", line 5, in \n") + test_fp.write(b' File "foo.py", line 5, in \n') test_fp.write(b" first()\n") for i in range(TracebackReport.MAX_LINES * 2): - test_fp.write(b" File \"foo.py\", line 5, in \n") + test_fp.write(b' File "foo.py", line 5, in \n') test_fp.write(b" func_%d()\n" % i) tbr = TracebackReport.from_file(str(test_log)) assert not tbr.is_kbi @@ -446,12 +645,13 @@ def test_traceback_report_05(tmp_path): assert "first()" in output assert "func_%d" % (TracebackReport.MAX_LINES * 2 - 1) in output + def test_traceback_report_06(tmp_path): """test TracebackReport.from_file() single word error""" test_log = tmp_path / "screenlog.0" with test_log.open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") - test_fp.write(b" File \"foo.py\", line 5, in \n") + test_fp.write(b' File "foo.py", line 5, in \n') test_fp.write(b" first()\n") test_fp.write(b"AssertionError\n") test_fp.write(b"end junk\n") @@ -463,12 +663,13 @@ def test_traceback_report_06(tmp_path): assert "AssertionError" in output assert "end junk" not in output + def test_traceback_report_07(tmp_path): """test TracebackReport.from_file() with binary data""" test_log = tmp_path / "screenlog.0" with test_log.open("wb") as test_fp: test_fp.write(b"Traceback (most recent call last):\n") - test_fp.write(b" File \"foo.py\", line 5, in \n") + test_fp.write(b' File "foo.py", line 5, in \n') test_fp.write(b" bin\xd8()\n") test_fp.write(b"AssertionError\n") tbr = TracebackReport.from_file(str(test_log)) @@ -477,13 +678,14 @@ def test_traceback_report_07(tmp_path): assert "bin()" in output assert "AssertionError" in output + def test_traceback_report_08(tmp_path): """test TracebackReport.from_file() locate token across chunks""" test_log = tmp_path / "screenlog.0" with test_log.open("wb") as test_fp: test_fp.write(b"A" * (TracebackReport.READ_LIMIT - 5)) test_fp.write(b"Traceback (most recent call last):\n") - test_fp.write(b" File \"foo.py\", line 5, in \n") + test_fp.write(b' File "foo.py", line 5, in \n') test_fp.write(b" first()\n") test_fp.write(b"AssertionError\n") tbr = TracebackReport.from_file(str(test_log)) @@ -493,49 +695,126 @@ def test_traceback_report_08(tmp_path): assert "first()" in output assert "AssertionError" in output -def test_main_01(tmp_path): + +@mark.usefixtures("tmp_path_status_db") +def test_main_01(mocker): """test main() with no reports""" - Status.PATH = str(tmp_path) - StatusReporter.CPU_POLL_INTERVAL = 0.01 + mocker.patch( + "grizzly.common.status_reporter.StatusReporter.CPU_POLL_INTERVAL", 0.01 + ) assert main([]) == 0 -def test_main_02(tmp_path): + +@mark.usefixtures("tmp_path_status_db") +def test_main_02(mocker): """test main() with a report""" - Status.PATH = str(tmp_path) - StatusReporter.CPU_POLL_INTERVAL = 0.01 + mocker.patch( + "grizzly.common.status_reporter.StatusReporter.CPU_POLL_INTERVAL", 0.01 + ) status = Status.start() status.iteration = 1 + status.results.count("uid", "[@ test]") status.report(force=True) assert main([]) == 0 -def test_main_03(tmp_path): + +@mark.parametrize( + "report_mode", + [ + "fuzzing", + "reducing", + ], +) +@mark.parametrize( + "report_type", + [ + "active", + "complete", + ], +) +@mark.usefixtures("tmp_path_status_db", "tmp_path_reduce_status_db") +def test_main_03(mocker, tmp_path, report_type, report_mode): """test main() --dump""" - Status.PATH = str(tmp_path) - StatusReporter.CPU_POLL_INTERVAL = 0.01 - status = Status.start() - status.iteration = 1 - status.report(force=True) - dump_file = tmp_path / "output.txt" - assert main(["--dump", str(dump_file)]) == 0 - assert dump_file.is_file() - assert b"Runtime" not in dump_file.read_bytes() - #assert False, dump_file.read_bytes() - dump_file.unlink() + mocker.patch( + "grizzly.common.status_reporter.StatusReporter.CPU_POLL_INTERVAL", 0.01 + ) + if report_mode == "reducing": + status = ReductionStatus.start( + testcase_size_cb=lambda: 47, + strategies=[], + ) + with status.measure("total"): + status.iteration = 1 + status.report(force=True) + else: + status = Status.start() + status.iteration = 1 + status.report(force=True) dump_file = tmp_path / "output.txt" - assert main(["--dump", str(dump_file), "--mode", "reduce-status"]) == 0 + assert ( + main( + [ + "--dump", + str(dump_file), + "--type", + report_type, + "--scan-mode", + report_mode, + ] + ) + == 0 + ) assert dump_file.is_file() - assert b"Runtime" in dump_file.read_bytes() + if report_type == "active": + assert b"Runtime" not in dump_file.read_bytes() + else: + assert b"Timestamp" not in dump_file.read_bytes() -def test_main_04(tmp_path): - """test main() with --mode reduce-status""" - Status.PATH = str(tmp_path) - StatusReporter.CPU_POLL_INTERVAL = 0.01 - status = Status.start() - status.iteration = 1 - status.report(force=True) - assert main(["--mode", "reduce-status"]) == 0 -def test_main_05(): - """test main() with invalid mode""" - with pytest.raises(SystemExit): - main(["--mode", "invalid"]) +@mark.usefixtures("tmp_path_status_db") +def test_main_04(capsys): + """test main() with invalid args""" + with raises(SystemExit): + main(["--tracebacks", "missing"]) + assert "--tracebacks must be a directory" in capsys.readouterr()[-1] + + +@mark.parametrize( + "report_type", + [ + "active", + "complete", + ], +) +@mark.usefixtures("tmp_path_reduce_status_db") +def test_main_05(mocker, tmp_path, report_type): + """test main() --dump""" + mocker.patch( + "grizzly.common.status_reporter.StatusReporter.CPU_POLL_INTERVAL", 0.01 + ) + status = ReductionStatus.start( + testcase_size_cb=lambda: 47, + strategies=[], + ) + with status.measure("total"): + status.iteration = 1 + status.report(force=True) + dump_file = tmp_path / "output.txt" + assert ( + main( + [ + "--dump", + str(dump_file), + "--type", + report_type, + "--scan-mode", + "reducing", + ] + ) + == 0 + ) + assert dump_file.is_file() + if report_type == "active": + assert b"Runtime" not in dump_file.read_bytes() + else: + assert b"Timestamp" not in dump_file.read_bytes() diff --git a/grizzly/common/test_storage.py b/grizzly/common/test_storage.py index 2bc35b35..00f9b2f6 100644 --- a/grizzly/common/test_storage.py +++ b/grizzly/common/test_storage.py @@ -4,13 +4,14 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. # pylint: disable=protected-access -import json -import re -import os +from itertools import chain +from json import dumps, loads +from zipfile import ZIP_DEFLATED, ZipFile -import pytest +from pytest import mark, raises -from .storage import TestCase, TestFile, TestCaseLoadFailure, TestFileExists +from ..target import AssetManager +from .storage import TestCase, TestCaseLoadFailure, TestFileExists def test_testcase_01(tmp_path): @@ -21,359 +22,508 @@ def test_testcase_01(tmp_path): with TestCase(l_page, r_page, adpt_name) as tcase: assert tcase.landing_page == l_page assert tcase.redirect_page == r_page + assert tcase.assets is None assert tcase.adapter_name == adpt_name assert tcase.duration is None assert tcase.data_size == 0 assert tcase.input_fname is None + assert tcase.timestamp > 0 assert not tcase.env_vars - assert not tcase._existing_paths - assert not tcase._files.meta + assert tcase.data_path assert not tcase._files.optional assert not tcase._files.required - assert not tcase.contains("no_file") + assert not any(tcase.contents) + assert tcase.pop_assets() is None assert not any(tcase.optional) - tcase.dump(str(tmp_path)) - assert not any(tmp_path.glob("*")) - tcase.dump(str(tmp_path), include_details=True) + tcase.dump(tmp_path) + assert not any(tmp_path.iterdir()) + tcase.dump(tmp_path, include_details=True) assert (tmp_path / "test_info.json").is_file() -def test_testcase_02(tmp_path): - """test TestCase with TestFiles""" - tcase = TestCase("land_page.html", "redirect.html", "test-adapter", input_fname="testinput.bin") - try: - in_file = tmp_path / "testfile1.bin" - in_file.write_bytes(b"test_req") - tcase.add_from_file(str(in_file)) - assert tcase.data_size == 8 - with pytest.raises(TestFileExists, match="'testfile1.bin' exists in test"): - tcase.add_from_file(str(in_file), file_name="testfile1.bin") - with pytest.raises(TestFileExists, match="'testfile1.bin' exists in test"): - tcase.add_from_data("test", "testfile1.bin") - tcase.add_from_data("test_nreq", "nested/testfile2.bin", required=False) - tcase.add_from_data("test_blah", "/testfile3.bin") - tcase.add_from_data("test_windows", "\\\\dir\\file.bin") - assert tcase.contains("testfile1.bin") - opt_files = list(tcase.optional) - assert os.path.join("nested", "testfile2.bin") in opt_files - assert len(opt_files) == 1 - tcase.dump(str(tmp_path), include_details=True) - assert (tmp_path / "nested").is_dir() - test_info = json.loads((tmp_path / "test_info.json").read_text()) - assert test_info["adapter"] == "test-adapter" - assert test_info["input"] == "testinput.bin" - assert test_info["target"] == "land_page.html" - assert isinstance(test_info["env"], dict) - assert in_file.read_bytes() == b"test_req" - assert (tmp_path / "nested" / "testfile2.bin").read_bytes() == b"test_nreq" - assert (tmp_path / "testfile3.bin").read_bytes() == b"test_blah" - assert (tmp_path / "dir" / "file.bin").read_bytes() == b"test_windows" - finally: - tcase.cleanup() -def test_testcase_03(tmp_path): - """test TestCase.add_meta()""" - with TestCase("land_page.html", "redirect.html", "test-adapter") as tcase: - dmp_path = tmp_path / "dmp_test" - dmp_path.mkdir() - meta_file = dmp_path / "metafile.bin" - meta_data = b"foobar" - tcase.add_meta(TestFile.from_data(meta_data, meta_file.name)) - tcase.dump(str(dmp_path), include_details=True) - assert tcase.data_size == 6 - assert meta_file.is_file() - assert meta_file.read_bytes() == meta_data - -def test_testcase_04(tmp_path): - """test TestCase.add_environ_var() and TestCase.env_vars""" - with TestCase("land_page.html", "redirect.html", "test-adapter") as tcase: - tcase.add_environ_var("TEST_ENV_VAR", "1") - assert len(list(tcase.env_vars)) == 1 - tcase.add_environ_var("TEST_NONE", None) - assert len(tcase.env_vars) == 2 - dmp_path = tmp_path / "dmp_test" - dmp_path.mkdir() - tcase.dump(str(dmp_path), include_details=True) - data = json.loads((dmp_path / "test_info.json").read_text()) - assert "env" in data - assert data["env"]["TEST_ENV_VAR"] == "1" - assert data["env"]["TEST_NONE"] is None - -def test_testcase_05(tmp_path): +@mark.parametrize( + "copy, required", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +def test_testcase_02(tmp_path, copy, required): + """test TestCase.add_from_file()""" + with TestCase("land_page.html", "a.html", "adpt", input_fname="in.bin") as tcase: + in_file = tmp_path / "file.bin" + in_file.write_text("data") + tcase.add_from_file(in_file, copy=copy, required=required) + assert tcase.data_size == 4 + assert "file.bin" in tcase.contents + if required: + assert in_file.name not in tcase.optional + else: + assert in_file.name in tcase.optional + assert in_file.exists() == copy + # try overwriting existing file + if copy: + with raises(TestFileExists, match="'file.bin' exists in test"): + tcase.add_from_file(in_file, copy=True) + assert in_file.exists() + else: + assert not in_file.exists() + + +@mark.parametrize( + "file_paths", + [ + ("a.bin",), + ("a/a.bin",), + ("a.bin", "b.bin"), + ("a.bin", "b/c.bin", "b/d.bin"), + ], +) +def test_testcase_03(tmp_path, file_paths): + """test TestCase.add_from_file()""" + with TestCase("land_page.html", "a.html", "adpt") as tcase: + for file_path in file_paths: + src_file = tmp_path / file_path + src_file.parent.mkdir(exist_ok=True, parents=True) + src_file.write_text("data") + tcase.add_from_file(src_file, file_name=file_path, required=True) + assert file_path in tcase.contents + assert file_path not in tcase.optional + + +def test_testcase_04(): + """test TestCase.add_from_bytes()""" + with TestCase("a.html", None, "adpt") as tcase: + tcase.add_from_bytes(b"foo", "a.html", required=True) + tcase.add_from_bytes(b"foo", "b.html", required=False) + assert "a.html" in (x.file_name for x in tcase._files.required) + assert "b.html" in (x.file_name for x in tcase._files.optional) + # add file with invalid file name + with raises(ValueError, match="invalid path ''"): + tcase.add_from_bytes(b"foo", "", required=False) + + +def test_testcase_05(): """test TestCase.purge_optional()""" with TestCase("land_page.html", "redirect.html", "test-adapter") as tcase: - tcase.add_from_data("foo", "testfile1.bin") - tcase.add_from_data("foo", "testfile2.bin", required=False) - tcase.add_from_data("foo", "testfile3.bin", required=False) - tcase.add_from_data("foo", "not_served.bin", required=False) + # no optional files + tcase.purge_optional(["foo"]) + # setup + tcase.add_from_bytes(b"foo", "testfile1.bin", required=True) + tcase.add_from_bytes(b"foo", "testfile2.bin", required=False) + tcase.add_from_bytes(b"foo", "testfile3.bin", required=False) + tcase.add_from_bytes(b"foo", "not_served.bin", required=False) + assert len(tuple(tcase.optional)) == 3 + # nothing to remove - with required + tcase.purge_optional(chain(["testfile1.bin"], tcase.optional)) assert len(tuple(tcase.optional)) == 3 - tcase.purge_optional(tcase.optional) + # nothing to remove - use relative path (forced) + tcase.purge_optional(x.file_name for x in tcase._files.optional) assert len(tuple(tcase.optional)) == 3 + # nothing to remove - use absolute path + tcase.purge_optional(x.data_file.as_posix() for x in tcase._files.optional) + assert len(tuple(tcase.optional)) == 3 + # remove not_served.bin tcase.purge_optional(["testfile2.bin", "testfile3.bin"]) assert len(tuple(tcase.optional)) == 2 - tcase.dump(str(tmp_path)) - assert tmp_path.glob("testfile1.bin") - assert not any(tmp_path.glob("not_served.bin")) + assert "testfile2.bin" in tcase.optional + assert "testfile3.bin" in tcase.optional + assert "not_served.bin" not in tcase.optional + # remove remaining optional + tcase.purge_optional(["testfile1.bin"]) + assert not any(tcase.optional) + def test_testcase_06(): """test TestCase.data_size""" with TestCase("land_page.html", "redirect.html", "test-adapter") as tcase: assert tcase.data_size == 0 - tcase.add_from_data("1", "testfile1.bin", required=True) + tcase.add_from_bytes(b"1", "testfile1.bin", required=True) assert tcase.data_size == 1 - tcase.add_from_data("12", "testfile2.bin", required=False) + tcase.add_from_bytes(b"12", "testfile2.bin", required=False) assert tcase.data_size == 3 - tcase.add_meta(TestFile.from_data("123", "meta.bin")) - assert tcase.data_size == 6 + def test_testcase_07(tmp_path): - """test TestCase.load_path() using a directory fail cases""" + """test TestCase.load_single() using a directory - fail cases""" # missing test_info.json - with pytest.raises(TestCaseLoadFailure, match="Missing 'test_info.json'"): - TestCase.load_path(str(tmp_path)) + with raises(TestCaseLoadFailure, match="Missing 'test_info.json'"): + TestCase.load_single(tmp_path) # invalid test_info.json (tmp_path / "test_info.json").write_bytes(b"X") - with pytest.raises(TestCaseLoadFailure, match="Invalid 'test_info.json'"): - TestCase.load_path(str(tmp_path)) + with raises(TestCaseLoadFailure, match="Invalid 'test_info.json'"): + TestCase.load_single(tmp_path) # test_info.json missing 'target' entry (tmp_path / "test_info.json").write_bytes(b"{}") - with pytest.raises(TestCaseLoadFailure, match="'test_info.json' missing 'target' entry"): - TestCase.load_path(str(tmp_path)) + with raises( + TestCaseLoadFailure, match="'test_info.json' has invalid 'target' entry" + ): + TestCase.load_single(tmp_path) # build a test case - src_dir = (tmp_path / "src") + src_dir = tmp_path / "src" src_dir.mkdir() - (src_dir / "prefs.js").touch() entry_point = src_dir / "target.bin" entry_point.touch() with TestCase("target.bin", None, "test-adapter") as src: - src.add_from_file(str(entry_point)) - src.dump(str(src_dir), include_details=True) - # bad test_info.json 'target' entry + src.add_from_file(entry_point) + src.dump(src_dir, include_details=True) + # bad 'target' entry in test_info.json entry_point.unlink() - with pytest.raises(TestCaseLoadFailure, match="entry_point 'target.bin' not found in"): - TestCase.load_path(str(src_dir)) - # bad test_info.json 'env' entry + with raises(TestCaseLoadFailure, match="Entry point 'target.bin' not found in"): + TestCase.load_single(src_dir) + # bad 'env' entry in test_info.json entry_point.touch() - with TestCase("target.bin", None, "test-adapter") as src: - src.add_environ_var("TEST_ENV_VAR", 100) - src.dump(str(src_dir), include_details=True) - with pytest.raises(TestCaseLoadFailure, match="'env_data' contains invalid 'env' entries"): - TestCase.load_path(str(src_dir)) + with AssetManager(base_path=str(tmp_path)) as assets: + (tmp_path / "example_asset").touch() + assets.add("example", str(tmp_path / "example_asset"), copy=False) + with TestCase("target.bin", None, "test-adapter") as src: + src.assets = assets + src.dump(src_dir, include_details=True) + test_info = loads((src_dir / "test_info.json").read_text()) + test_info["env"] = {"bad": 1} + (src_dir / "test_info.json").write_text(dumps(test_info)) + with raises(TestCaseLoadFailure, match="'env' contains invalid entries"): + TestCase.load_single(src_dir) -def test_testcase_08(tmp_path): - """test TestCase.load_path() using a directory""" + +def test_testcase_08(mocker, tmp_path): + """test TestCase.load_single() using a directory""" # build a valid test case - src_dir = (tmp_path / "src") + src_dir = tmp_path / "src" src_dir.mkdir() - (src_dir / "prefs.js").touch() entry_point = src_dir / "target.bin" entry_point.touch() + asset_file = src_dir / "example_asset" + asset_file.touch() (src_dir / "optional.bin").touch() (src_dir / "x.bin").touch() - nested = (tmp_path / "src" / "nested") + nested = src_dir / "nested" nested.mkdir() # overlap file name in different directories (nested / "x.bin").touch() (tmp_path / "src" / "nested" / "empty").mkdir() - with TestCase("target.bin", None, "test-adapter") as src: - src.add_environ_var("TEST_ENV_VAR", "100") - src.add_from_file(str(entry_point)) - src.dump(str(src_dir), include_details=True) - # load test case from test_info.json - with TestCase.load_path(str(src_dir)) as dst: + dst_dir = tmp_path / "dst" + with AssetManager(base_path=str(tmp_path)) as assets: + assets.add("example", str(asset_file)) + with TestCase("target.bin", None, "test-adapter") as src: + src.env_vars["TEST_ENV_VAR"] = "100" + src.add_from_file(entry_point) + src.add_from_file(src_dir / "optional.bin", required=False) + src.add_from_file(src_dir / "x.bin", required=False) + src.add_from_file( + nested / "x.bin", + file_name=str((nested / "x.bin").relative_to(src_dir)), + required=False, + ) + src.assets = assets + src.dump(dst_dir, include_details=True) + # test loading test case from test_info.json + with TestCase.load_single(dst_dir) as dst: + asset = dst.pop_assets() + assert asset + with asset: + assert "example" in asset.assets assert dst.landing_page == "target.bin" - assert "prefs.js" in (x.file_name for x in dst._files.meta) assert "target.bin" in (x.file_name for x in dst._files.required) assert "optional.bin" in (x.file_name for x in dst._files.optional) assert "x.bin" in (x.file_name for x in dst._files.optional) - assert os.path.join("nested", "x.bin") in (x.file_name for x in dst._files.optional) + assert "nested/x.bin" in (x.file_name for x in dst._files.optional) assert dst.env_vars["TEST_ENV_VAR"] == "100" + assert dst.timestamp > 0 + # test load with missing asset + mocker.patch("grizzly.common.storage.AssetManager.load", side_effect=OSError) + with raises(TestCaseLoadFailure): + TestCase.load_single(dst_dir) + def test_testcase_09(tmp_path): - """test TestCase.load_path() using a file""" + """test TestCase.load_single() using a file""" # invalid entry_point specified - with pytest.raises(TestCaseLoadFailure, match="Cannot find"): - TestCase.load_path(str(tmp_path / "missing_file")) + with raises(TestCaseLoadFailure, match="Missing or invalid TestCase"): + TestCase.load_single(tmp_path / "missing_file", adjacent=False) # valid test case - src_dir = (tmp_path / "src") + src_dir = tmp_path / "src" src_dir.mkdir() - (src_dir / "prefs.js").touch() entry_point = src_dir / "target.bin" entry_point.touch() (src_dir / "optional.bin").touch() # load single file test case - with TestCase.load_path(str(entry_point)) as tcase: + with TestCase.load_single(entry_point, adjacent=False) as tcase: + assert tcase.assets is None + assert not tcase.env_vars assert tcase.landing_page == "target.bin" - assert "prefs.js" not in (x.file_name for x in tcase._files.meta) assert "target.bin" in (x.file_name for x in tcase._files.required) assert "optional.bin" not in (x.file_name for x in tcase._files.optional) + assert tcase.timestamp == 0 # load full test case - with TestCase.load_path(str(entry_point), full_scan=True, prefs=True) as tcase: + with TestCase.load_single(entry_point, adjacent=True) as tcase: assert tcase.landing_page == "target.bin" - assert "prefs.js" in (x.file_name for x in tcase._files.meta) assert "target.bin" in (x.file_name for x in tcase._files.required) assert "optional.bin" in (x.file_name for x in tcase._files.optional) + def test_testcase_10(tmp_path): - """test TestCase.load_environ()""" - (tmp_path / "ubsan.supp").touch() - (tmp_path / "other_file").touch() - with TestCase("a.html", "b.html", "test-adapter") as tcase: - tcase.load_environ(str(tmp_path), {}) - assert "UBSAN_OPTIONS" in tcase.env_vars - assert "ubsan.supp" in tcase.env_vars["UBSAN_OPTIONS"] - # existing *SAN_OPTIONS - tcase.load_environ(str(tmp_path), {"UBSAN_OPTIONS": "a=1:b=2"}) - assert "UBSAN_OPTIONS" in tcase.env_vars - assert "ubsan.supp" in tcase.env_vars["UBSAN_OPTIONS"] - opts = re.split(r":(?![\\|/])", tcase.env_vars["UBSAN_OPTIONS"]) - assert "a=1" in opts - assert "b=2" in opts - assert len(opts) == 3 + """test TestCase - dump, load and compare""" + working = tmp_path / "working" + working.mkdir() + with TestCase("a.html", "b.html", "adpt") as org: + # set non default values + org.duration = 1.23 + org.env_vars = {"en1": "1", "en2": "2"} + org.hang = True + org.input_fname = "infile" + org.time_limit = 10 + org.add_from_bytes(b"a", "a.html") + with AssetManager(base_path=str(tmp_path)) as assets: + fake = tmp_path / "fake_asset" + fake.touch() + assets.add("fake", str(fake)) + org.assets = assets + org.dump(working, include_details=True) + org.assets = None + with TestCase.load_single(working, adjacent=False) as loaded: + try: + for prop in TestCase.__slots__: + if prop.startswith("_") or prop in ("assets", "redirect_page"): + continue + assert getattr(loaded, prop) == getattr(org, prop) + assert not set(org.contents) ^ set(loaded.contents) + assert loaded.assets + assert "fake" in loaded.assets.assets + finally: + if loaded.assets: + loaded.assets.cleanup() + def test_testcase_11(tmp_path): + """test TestCase.load() - missing file and empty directory""" + # missing file + with raises(TestCaseLoadFailure, match="Invalid TestCase path"): + TestCase.load("missing") + # empty path + assert not TestCase.load(tmp_path, adjacent=True) + + +def test_testcase_12(tmp_path): + """test TestCase.load() - single file""" + tfile = tmp_path / "testcase.html" + tfile.touch() + testcases = TestCase.load(tfile, adjacent=False) + try: + assert len(testcases) == 1 + assert all(x.assets is None for x in testcases) + finally: + any(x.cleanup() for x in testcases) + + +def test_testcase_13(tmp_path): + """test TestCase.load() - single directory""" + with TestCase("target.bin", None, "test-adapter") as src: + src.add_from_bytes(b"test", "target.bin") + src.dump(tmp_path, include_details=True) + testcases = TestCase.load(tmp_path) + try: + assert len(testcases) == 1 + assert all(x.assets is None for x in testcases) + finally: + any(x.cleanup() for x in testcases) + + +def test_testcase_14(tmp_path): + """test TestCase.load() - multiple directories (with assets)""" + nested = tmp_path / "nested" + nested.mkdir() + asset_file = tmp_path / "example_asset" + asset_file.touch() + with AssetManager(base_path=str(tmp_path)) as assets: + assets.add("example", str(asset_file)) + with TestCase("target.bin", None, "test-adapter") as src: + src.assets = assets + src.add_from_bytes(b"test", "target.bin") + src.dump(nested / "test-1", include_details=True) + src.dump(nested / "test-2", include_details=True) + src.dump(nested / "test-3", include_details=True) + testcases = TestCase.load(nested) + try: + assert len(testcases) == 3 + assert all(x.assets is not None for x in testcases) + asset = testcases[-1].pop_assets() + assert asset is not None + assert "example" in asset.assets + finally: + any(x.cleanup() for x in testcases) + for test in testcases: + if test.assets: + test.assets.cleanup() + # try loading testcases that are nested too deep + assert not TestCase.load(tmp_path) + + +def test_testcase_15(tmp_path): + """test TestCase.load() - archive""" + archive = tmp_path / "testcase.zip" + # bad archive + archive.write_bytes(b"x") + with raises(TestCaseLoadFailure, match="Testcase archive is corrupted"): + TestCase.load(archive) + # build archive containing multiple testcases + with TestCase("target.bin", None, "test-adapter") as src: + src.add_from_bytes(b"test", "target.bin") + src.dump(tmp_path / "test-0", include_details=True) + src.dump(tmp_path / "test-1", include_details=True) + src.dump(tmp_path / "test-2", include_details=True) + (tmp_path / "log_dummy.txt").touch() + (tmp_path / "not_a_tc").mkdir() + (tmp_path / "not_a_tc" / "file.txt").touch() + with ZipFile(archive, mode="w", compression=ZIP_DEFLATED) as zfp: + for entry in tmp_path.rglob("*"): + if entry.is_file(): + zfp.write(str(entry), arcname=str(entry.relative_to(tmp_path))) + testcases = TestCase.load(archive) + try: + assert len(testcases) == 3 + assert all(x.assets is None for x in testcases) + assert all("target.bin" in x.contents for x in testcases) + finally: + any(x.cleanup() for x in testcases) + + +def test_testcase_16(tmp_path): """test TestCase.add_batch()""" - include = (tmp_path / "inc_path") + include = tmp_path / "inc_path" include.mkdir() - inc_1 = (include / "file.bin") + inc_1 = include / "file.bin" inc_1.write_bytes(b"a") (include / "nested").mkdir() - inc_2 = (include / "nested" / "nested.js") + inc_2 = include / "nested" / "nested.js" inc_2.write_bytes(b"a") - other_path = (tmp_path / "other_path") + other_path = tmp_path / "other_path" other_path.mkdir() (other_path / "no_include.bin").write_bytes(b"a") with TestCase("a.b", "a.b", "simple") as tcase: # missing directory tcase.add_batch("/missing/path/", tuple()) - assert not tcase._existing_paths + assert not any(tcase.contents) # missing file - with pytest.raises(IOError): - tcase.add_batch(str(tmp_path), [str(tmp_path / "missing.bin")]) - assert not tcase._existing_paths + with raises(IOError): + tcase.add_batch(tmp_path, [tmp_path / "missing.bin"]) + assert not any(tcase.contents) # relative file name - tcase.add_batch(str(include), ["file.bin"]) - assert not tcase._existing_paths + tcase.add_batch(include, ["file.bin"]) + assert not any(tcase.contents) # valid list - tcase.add_batch(str(include), [str(inc_1), str(inc_2), str(tmp_path / "inc_path2" / "extra.bin")]) - assert tcase.contains("file.bin") - assert tcase.contains(os.path.join("nested", "nested.js")) - assert len(tcase._existing_paths) == 2 + tcase.add_batch(include, [inc_1, inc_2, tmp_path / "inc_path2" / "extra.bin"]) + assert "file.bin" in tcase.contents + assert "nested/nested.js" in tcase.contents + assert len(list(tcase.contents)) == 2 # nested url - tcase.add_batch(str(include), [str(inc_1)], prefix="test") - assert tcase.contains(os.path.join("test", "file.bin")) - assert len(tcase._existing_paths) == 3 + tcase.add_batch(include, [inc_1], prefix="test") + assert "test/file.bin" in tcase.contents + assert len(list(tcase.contents)) == 3 # collision - with pytest.raises(TestFileExists, match="'file.bin' exists in test"): - tcase.add_batch(str(include), [str(inc_1)]) - -def test_testfile_01(): - """test simple TestFile""" - with TestFile("test_file.txt") as tfile: - assert tfile.file_name == "test_file.txt" - assert not tfile._fp.closed - assert tfile.size == 0 - tfile.close() - assert tfile._fp.closed - -def test_testfile_02(): - """test TestFile file names""" - # empty file name - with pytest.raises(TypeError, match="file_name is invalid"): - TestFile("") - # path (root) with missing file name - with pytest.raises(TypeError, match="file_name is invalid"): - TestFile("/") - # path (root) with missing file name - with pytest.raises(TypeError, match="file_name is invalid"): - TestFile("/.") - # path with missing file name - with pytest.raises(TypeError, match="file_name is invalid"): - TestFile("path/") - # invalid use of '..' - with pytest.raises(TypeError, match="file_name is invalid"): - TestFile("../test") - # path (root) with file - with TestFile("/valid.txt") as tfile: - assert tfile.file_name == "valid.txt" - # path with file - with TestFile("path\\file") as tfile: - assert os.path.split(tfile.file_name) == ("path", "file") - # with valid use of '.' and '..' - with TestFile("./a/./b/../c") as tfile: - assert os.path.split(tfile.file_name) == ("a", "c") - # filename starting with '.' - with TestFile(".file") as tfile: - assert tfile.file_name == ".file" - -def test_testfile_03(tmp_path): - """test TestFile.write() and TestFile.dump()""" - out_file = tmp_path / "test_file.txt" - with TestFile(out_file.name) as tfile: - tfile.write(b"foo") - assert not out_file.is_file() - tfile.dump(str(tmp_path)) - assert out_file.is_file() - assert out_file.read_text() == "foo" - tfile.write(b"bar") - tfile.dump(str(tmp_path)) - assert out_file.read_text() == "foobar" - -def test_testfile_04(tmp_path): - """test TestFile.dump() file with nested path""" - file_path = "test/dir/path/file.txt" - with TestFile(file_path) as tfile: - out_file = tmp_path / file_path - tfile.write(b"foo") - assert not out_file.is_file() - tfile.dump(str(tmp_path)) - assert out_file.is_file() - -def test_testfile_05(tmp_path): - """test TestFile.from_data()""" - # TODO: different encodings - with TestFile.from_data("foo", "test_file.txt") as tfile: - out_file = tmp_path / "test_file.txt" - tfile.dump(str(tmp_path)) - assert out_file.is_file() - assert out_file.read_text() == "foo" - -def test_testfile_06(tmp_path): - """test TestFile.from_file()""" - in_file = tmp_path / "infile.txt" - in_file.write_bytes(b"foobar") - # check re-using filename - with TestFile.from_file(str(in_file)) as tfile: - assert tfile.file_name == "infile.txt" - # check data - with TestFile.from_file(str(in_file), file_name="outfile.txt") as tfile: - assert tfile.file_name == "outfile.txt" - tfile.dump(str(tmp_path)) - out_file = tmp_path / "outfile.txt" - assert out_file.is_file() - assert out_file.read_text() == "foobar" - -def test_testfile_07(tmp_path): - """test TestFile.clone()""" - out_file = tmp_path / "test_file.txt" - with TestFile(out_file.name) as tf1: - tf1.write(b"foobar") - try: - tf2 = tf1.clone() - tf2.write(b"test") - assert tf1.file_name == tf2.file_name - assert tf1._fp != tf2._fp - tf2.dump(str(tmp_path)) - assert out_file.is_file() - assert out_file.read_text() == "foobartest" - finally: - tf2.close() - tf1.dump(str(tmp_path)) - assert out_file.is_file() - assert out_file.read_text() == "foobar" - -def test_testfile_08(tmp_path): - """test TestFile.data()""" - in_file = tmp_path / "infile.txt" - in_file.write_bytes(b"foobar") - with TestFile.from_file(str(in_file), file_name="outfile.txt") as tfile: - assert tfile.data == b"foobar" + with raises(TestFileExists, match="'file.bin' exists in test"): + tcase.add_batch(include, [inc_1]) + + +def test_testcase_17(tmp_path): + """test TestCase.scan_path()""" + # empty path + (tmp_path / "not-test").mkdir() + assert not any(TestCase.scan_path(tmp_path)) + # multiple test case directories + paths = [tmp_path / ("test-%d" % i) for i in range(3)] + with TestCase("test.htm", None, "test-adapter") as src: + src.add_from_bytes(b"test", "test.htm") + for path in paths: + src.dump(path, include_details=True) + tc_paths = list(TestCase.scan_path(tmp_path)) + assert len(tc_paths) == 3 + # single test case directory + tc_paths = list(TestCase.scan_path(paths[0])) + assert len(tc_paths) == 1 + + +def test_testcase_18(): + """test TestCase.get_file()""" + with TestCase("test.htm", None, "test-adapter") as src: + src.add_from_bytes(b"test", "test.htm") + assert src.get_file("missing") is None + assert src.get_file("test.htm") + + +def test_testcase_19(): + """test TestCase.clone()""" + with TestCase("a.htm", "b.htm", "adpt", input_fname="fn", time_limit=2) as src: + src.duration = 1.2 + src.hang = True + src.add_from_bytes(b"123", "test.htm", required=True) + src.add_from_bytes(b"456", "opt.htm", required=False) + src.env_vars["foo"] = "bar" + with src.clone() as dst: + for prop in TestCase.__slots__: + if prop.startswith("_"): + continue + assert getattr(src, prop) == getattr(dst, prop) + assert src.data_size == dst.data_size + for file, data in ( + ("test.htm", b"123"), + ("opt.htm", b"456"), + ): + src_file = src.get_file(file).data_file + dst_file = dst.get_file(file).data_file + assert src_file.read_bytes() == data + assert dst_file.read_bytes() == data + assert not dst_file.samefile(src_file) + assert dst.env_vars == {"foo": "bar"} + assert not set(src.optional) ^ set(dst.optional) + + +@mark.parametrize( + "path", + [ + # empty file name + "", + # path (root) with missing file name + "/", + # path (root) with missing file name + "/.", + # path with missing file name + "path/", + # outside of wwwroot + "../test", + # outside of wwwroot + "a/../../b", + # outside of wwwroot + "..", + # outside of wwwroot + "C:\\test", + # cwd + ".", + ], +) +def test_testcase_20(path): + """test TestCase.sanitize_path() with invalid paths""" + with raises(ValueError, match="invalid path"): + TestCase.sanitize_path(path) + + +@mark.parametrize( + "path, expected_result", + [ + ("a", "a"), + ("file.bin", "file.bin"), + ("a/file.bin", "a/file.bin"), + ("/file.bin", "file.bin"), + ("./file.bin", "file.bin"), + ("path\\file", "path/file"), + ("\\\\a\\b\\file.bin", "a/b/file.bin"), + (".file", ".file"), + ("a/../file.bin", "file.bin"), + ("./a/./b/../c", "a/c"), + ], +) +def test_testcase_21(path, expected_result): + """test TestCase.sanitize_path()""" + assert TestCase.sanitize_path(path) == expected_result diff --git a/grizzly/common/test_utils.py b/grizzly/common/test_utils.py index 8373d7c8..46d2491b 100644 --- a/grizzly/common/test_utils.py +++ b/grizzly/common/test_utils.py @@ -2,12 +2,18 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +from logging import DEBUG, INFO -from .utils import grz_tmp +from pytest import mark -def test_testcase_01(mocker, tmp_path): +from .utils import configure_logging, grz_tmp + + +def test_grz_tmp_01(mocker, tmp_path): """test grz_tmp()""" - mocker.patch("grizzly.common.utils.gettempdir", autospec=True, return_value=str(tmp_path)) + mocker.patch( + "grizzly.common.utils.gettempdir", autospec=True, return_value=str(tmp_path) + ) # create temp path path = grz_tmp() assert path == str(tmp_path / "grizzly") @@ -19,3 +25,28 @@ def test_testcase_01(mocker, tmp_path): path = grz_tmp("test1", "test2") assert path == str(tmp_path / "grizzly" / "test1" / "test2") assert (tmp_path / "grizzly" / "test1" / "test2").is_dir() + + +@mark.parametrize( + "env, log_level", + [ + # default log level + ("0", INFO), + # debug log level + ("0", DEBUG), + # enable debug log level via env + ("1", INFO), + # enable debug log level via env + ("TRUE", INFO), + ], +) +def test_configure_logging_01(mocker, env, log_level): + """test configure_logging()""" + config = mocker.patch("grizzly.common.utils.basicConfig", autospec=True) + mocker.patch("grizzly.common.utils.getenv", autospec=True, return_value=env) + configure_logging(log_level) + assert config.call_count == 1 + if env != "0": + assert config.call_args[-1]["level"] == DEBUG + else: + assert config.call_args[-1]["level"] == log_level diff --git a/grizzly/common/utils.py b/grizzly/common/utils.py index a0123520..26ffb01d 100644 --- a/grizzly/common/utils.py +++ b/grizzly/common/utils.py @@ -2,18 +2,74 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. - -from os import makedirs +from enum import IntEnum, unique +from logging import DEBUG, basicConfig +from os import getenv, makedirs from os.path import join as pathjoin from tempfile import gettempdir - -__all__ = ("grz_tmp",) +__all__ = ( + "ConfigError", + "configure_logging", + "Exit", + "grz_tmp", + "TIMEOUT_DELAY", +) __author__ = "Tyson Smith" __credits__ = ["Tyson Smith"] +# TIMEOUT_DELAY is added to the test time limit to create the default timeout +TIMEOUT_DELAY = 15 + + +class ConfigError(Exception): + """Raised to indicate invalid configuration a state""" + + def __init__(self, message, exit_code): + super().__init__(message) + self.exit_code = exit_code + + +@unique +class Exit(IntEnum): + """Exit codes""" + + SUCCESS = 0 + # unexpected error occurred (invalid input, unhanded exception, etc) + ERROR = 1 + # invalid argument + ARGS = 2 + # run aborted (ctrl+c, etc) + ABORT = 3 + # unrelated Target failure (browser startup crash, etc) + LAUNCH_FAILURE = 4 + # expected results not reproduced (opposite of SUCCESS) + FAILURE = 5 + + +def configure_logging(log_level): + """Configure log output level and formatting. + + Args: + log_level (int): Set log level. + + Returns: + None + """ + # allow force enabling log_level via environment + if getenv("DEBUG", "0").lower() in ("1", "true"): + log_level = DEBUG + if log_level == DEBUG: + date_fmt = None + log_fmt = "%(asctime)s %(levelname).1s %(name)s | %(message)s" + else: + date_fmt = "%Y-%m-%d %H:%M:%S" + log_fmt = "[%(asctime)s] %(message)s" + basicConfig(format=log_fmt, datefmt=date_fmt, level=log_level) + + def grz_tmp(*subdir): - path = pathjoin(gettempdir(), "grizzly", *subdir) + path = pathjoin(getenv("GRZ_TMP", gettempdir()), "grizzly", *subdir) makedirs(path, exist_ok=True) return path diff --git a/grizzly/conftest.py b/grizzly/conftest.py new file mode 100644 index 00000000..ef7d76c1 --- /dev/null +++ b/grizzly/conftest.py @@ -0,0 +1,41 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +"""Common unit test fixtures for `grizzly`. +""" + +from pytest import fixture + +from .common.status import ReductionStatus, Status + + +@fixture +def patch_collector(mocker): + """Provide a mock Collector to avoid scanning for signatures on disk.""" + collector = mocker.patch("grizzly.common.report.Collector", autospec=True) + # don't search for signatures locally + collector.return_value.sigCacheDir = None + + +@fixture +def tmp_path_status_db(tmp_path, mocker): + """Use a temporary database file for testing.""" + mocker.patch.object(Status, "STATUS_DB", new=str(tmp_path / "status-tmp.db")) + + +@fixture +def tmp_path_reduce_status_db(tmp_path, mocker): + """Use a temporary database file for testing.""" + mocker.patch.object( + ReductionStatus, "STATUS_DB", new=str(tmp_path / "reduce-tmp.db") + ) + + +@fixture +def tmp_path_replay_status_db(tmp_path, mocker): + """Use a temporary database file for testing.""" + mocker.patch( + "grizzly.replay.replay.ReplayManager.STATUS_DB", + new=str(tmp_path / "replay-tmp.db"), + ) diff --git a/grizzly/main.py b/grizzly/main.py index dc418ec6..8a199275 100644 --- a/grizzly/main.py +++ b/grizzly/main.py @@ -2,147 +2,172 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from logging import basicConfig, DEBUG, getLogger +from logging import DEBUG, getLogger +from os import getpid from sapphire import Sapphire -from .adapters import get as get_adapter -from .common import FilesystemReporter, FuzzManagerReporter, IOManager, S3FuzzManagerReporter +from .adapter import Adapter +from .common.plugins import load as load_plugin +from .common.reporter import ( + FilesystemReporter, + FuzzManagerReporter, + S3FuzzManagerReporter, +) +from .common.utils import TIMEOUT_DELAY, Exit, configure_logging from .session import Session -from .target import load as load_target, TargetLaunchError, TargetLaunchTimeout - +from .target import Target, TargetLaunchError, TargetLaunchTimeout __author__ = "Tyson Smith" __credits__ = ["Tyson Smith", "Jesse Schwartzentruber"] -log = getLogger("grizzly") # pylint: disable=invalid-name +LOG = getLogger(__name__) -def configure_logging(log_level): - if log_level == DEBUG: - log_fmt = "%(levelname).1s %(name)s [%(asctime)s] %(message)s" - else: - log_fmt = "[%(asctime)s] %(message)s" - basicConfig(format=log_fmt, datefmt="%Y-%m-%d %H:%M:%S", level=log_level) def main(args): configure_logging(args.log_level) - log.info("Starting Grizzly") + LOG.info("Starting Grizzly (%d)", getpid()) if args.fuzzmanager: FuzzManagerReporter.sanity_check(args.binary) elif args.s3_fuzzmanager: S3FuzzManagerReporter.sanity_check(args.binary) + if args.headless: + LOG.info("Running browser headless (%s)", args.headless) if args.ignore: - log.info("Ignoring: %s", ", ".join(args.ignore)) - if args.xvfb: - log.info("Running with Xvfb") - if args.valgrind: - log.info("Running with Valgrind. This will be SLOW!") - if args.rr: - log.info("Running with RR") + LOG.info("Ignoring: %s", ", ".join(args.ignore)) + if args.pernosco: + LOG.info("Running with RR (Pernosco mode)") + elif args.rr: + LOG.info("Running with RR") + elif args.valgrind: + LOG.info("Running with Valgrind. This will be SLOW!") adapter = None - iomanager = None session = None target = None try: - log.debug("initializing the IOManager") - # TODO: move this into Session - iomanager = IOManager(report_size=(max(args.cache, 0) + 1)) - - log.debug("initializing Adapter %r", args.adapter) - adapter = get_adapter(args.adapter)() + LOG.debug("initializing Adapter %r", args.adapter) + adapter = load_plugin(args.adapter, "grizzly_adapters", Adapter)(args.adapter) - if adapter.TEST_DURATION >= args.timeout: - raise RuntimeError("Test duration (%ds) should be less than browser timeout (%ds)" % ( - adapter.TEST_DURATION, args.timeout)) + # test case time limit and timeout sanity checking + if args.time_limit: + time_limit = args.time_limit + else: + assert adapter.TIME_LIMIT >= 1 + time_limit = adapter.TIME_LIMIT + if args.timeout: + timeout = args.timeout + else: + timeout = time_limit + TIMEOUT_DELAY + LOG.info("Using test time limit: %ds, timeout: %ds", time_limit, timeout) + if timeout < time_limit: + LOG.error("Timeout must be at least test time limit if not greater") + return Exit.ARGS + if adapter.HARNESS_FILE and time_limit == timeout: + LOG.warning( + "To avoid relaunches due to tests failing to close" + " themselves use a timeout greater than time limit" + ) if adapter.RELAUNCH > 0: - log.debug("relaunch (%d) set in Adapter", adapter.RELAUNCH) + LOG.info("Relaunch (%d) set in Adapter", adapter.RELAUNCH) relaunch = adapter.RELAUNCH else: relaunch = args.relaunch - log.debug("initializing the Target") - target = load_target(args.platform)( + LOG.debug("initializing the Target %r", args.platform) + target = load_plugin(args.platform, "grizzly_targets", Target)( args.binary, - args.extension, args.launch_timeout, args.log_limit, args.memory, - relaunch, + headless=args.headless, + pernosco=args.pernosco, rr=args.rr, valgrind=args.valgrind, - xvfb=args.xvfb) - if args.prefs: - target.prefs = args.prefs - log.info("Using prefs %r", args.prefs) + ) + # add specified assets + target.assets.add_batch(args.asset) + target.process_assets() adapter.monitor = target.monitor - if args.coverage and relaunch == 1 and target.forced_close: - # this is a workaround to avoid not dumping coverage - # GRZ_FORCED_CLOSE=0 is also an option but the browser MUST - # close itself. - raise RuntimeError("Coverage must be run with --relaunch > 1") - - log.debug("calling adapter setup()") - adapter.setup(args.input, iomanager.server_map) - log.debug("configuring harness") - iomanager.harness = adapter.get_harness() - - log.debug("initializing the Reporter") + LOG.debug("initializing the Reporter") if args.fuzzmanager: - log.info("Results will be reported via FuzzManager") - reporter = FuzzManagerReporter(args.binary, tool=args.tool) + LOG.info("Results will be reported via FuzzManager") + reporter = FuzzManagerReporter(tool=args.tool) elif args.s3_fuzzmanager: - log.info("Results will be reported via FuzzManager w/ large attachments in S3") - reporter = S3FuzzManagerReporter(args.binary, tool=args.tool) + LOG.info( + "Results will be reported via FuzzManager w/ large attachments in S3" + ) + reporter = S3FuzzManagerReporter(tool=args.tool) else: - reporter = FilesystemReporter() - log.info("Results will be stored in %r", reporter.report_path) + reporter = FilesystemReporter(args.logs / "results") + LOG.info("Results will be stored in %r", str(reporter.report_path)) + reporter.display_logs = args.smoke_test or reporter.display_logs + + # make sure an iteration limit is set if smoke_test is True + iteration_limit = (args.limit or 10) if args.smoke_test else args.limit + if iteration_limit: + LOG.info("%r iteration(s) will be attempted", iteration_limit) + if args.runtime: + LOG.info("Runtime is limited to %rs", args.runtime) # set 'auto_close=1' so the client error pages (code 4XX) will # call 'window.close()' after a second. # launch http server used to serve test cases - log.debug("starting Sapphire server") - with Sapphire(auto_close=1, timeout=args.timeout) as server: + LOG.debug("starting Sapphire server") + with Sapphire(auto_close=1, timeout=timeout) as server: target.reverse(server.port, server.port) - log.debug("initializing the Session") + LOG.debug("initializing the Session") session = Session( adapter, - iomanager, reporter, server, target, - coverage=args.coverage) + coverage=args.coverage, + enable_profiling=args.enable_profiling, + relaunch=relaunch, + report_limit=args.limit_reports, + report_size=args.collect, + ) if args.log_level == DEBUG or args.verbose: display_mode = Session.DISPLAY_VERBOSE else: display_mode = Session.DISPLAY_NORMAL - session.run(args.ignore, display_mode=display_mode) + session.run( + args.ignore, + time_limit, + input_path=str(args.input), + iteration_limit=iteration_limit, + result_limit=1 if args.smoke_test else 0, + runtime_limit=args.runtime, + display_mode=display_mode, + launch_attempts=args.launch_attempts, + ) except KeyboardInterrupt: - log.info("Ctrl+C detected.") - return Session.EXIT_ABORT + LOG.info("Ctrl+C detected.") + return Exit.ABORT - except (TargetLaunchError, TargetLaunchTimeout): - return Session.EXIT_LAUNCH_FAILURE + except (TargetLaunchError, TargetLaunchTimeout) as exc: + LOG.error(str(exc)) + return Exit.LAUNCH_FAILURE finally: - log.warning("Shutting down...") + LOG.info("Shutting down...") if session is not None: - log.debug("calling session.close()") + LOG.debug("calling session.close()") session.close() if target is not None: - log.debug("calling target.cleanup()") + LOG.debug("calling target.cleanup()") target.cleanup() if adapter is not None: - log.debug("calling adapter.cleanup()") + LOG.debug("calling adapter.cleanup()") adapter.cleanup() - if iomanager is not None: - log.debug("calling iomanager.cleanup()") - iomanager.cleanup() - log.info("Done.") + LOG.info("Done.") - return Session.EXIT_SUCCESS + if session and session.status.results.total > 0: + return Exit.ERROR + return Exit.SUCCESS diff --git a/grizzly/reduce/__init__.py b/grizzly/reduce/__init__.py index a7af2a63..2d45e7cc 100644 --- a/grizzly/reduce/__init__.py +++ b/grizzly/reduce/__init__.py @@ -2,26 +2,10 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -import os -import re +"""Grizzly reducer module. +""" -__all__ = ("ReductionJob",) +__all__ = ("ReduceManager", "ReduceArgs") - -def testcase_contents(path="."): - for dir_name, _, dir_files in os.walk(path): - arc_path = os.path.relpath(dir_name, path) - # skip tmp folders - if re.match(r"^tmp.+$", arc_path.split(os.sep, 1)[0]) is not None: - continue - for file_name in dir_files: - # skip core files - if re.match(r"^core.\d+$", file_name) is not None: - continue - if arc_path == ".": - yield file_name - else: - yield os.path.join(arc_path, file_name) - - -from .reduce import ReductionJob # noqa pylint: disable=wrong-import-position +from .args import ReduceArgs +from .core import ReduceManager diff --git a/grizzly/reduce/__main__.py b/grizzly/reduce/__main__.py index 3fe92b5b..b02fc9d9 100644 --- a/grizzly/reduce/__main__.py +++ b/grizzly/reduce/__main__.py @@ -2,10 +2,9 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from sys import exit as sysexit +"""Grizzly reducer CLI. +""" +from . import ReduceManager +from .args import ReduceArgs -from .args import ReducerArgs -from .reduce import ReductionJob - - -sysexit(ReductionJob.main(ReducerArgs().parse_args())) +raise SystemExit(ReduceManager.main(ReduceArgs().parse_args())) diff --git a/grizzly/reduce/args.py b/grizzly/reduce/args.py index 4f555cdb..90486fff 100644 --- a/grizzly/reduce/args.py +++ b/grizzly/reduce/args.py @@ -2,142 +2,114 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -from os.path import isdir, isfile +"""CLI argument parsing for Grizzly reduction. +""" +from logging import getLogger +from pathlib import Path -from .reduce import ReductionJob -from .strategies import strategies_by_name -from ..args import CommonArgs +from ..common.reporter import Quality +from ..replay.args import ReplayArgs +from .strategies import DEFAULT_STRATEGIES, STRATEGIES +LOG = getLogger(__name__) -class ReducerArgs(CommonArgs): + +class ReduceArgs(ReplayArgs): + """Argument parser for `grizzly.reduce`. + + Takes all arguments defined for `grizzly.replay`, and a few specific to reduction. + """ def __init__(self): - super(ReducerArgs, self).__init__() - self.parser.add_argument( - "input", - help="Test case or directory containing test cases") - - replay_args = self.parser.add_argument_group("Reduce Arguments") - replay_args.add_argument( - "--any-crash", action="store_true", - help="Any crash is interesting, not only crashes which match the original first crash") - replay_args.add_argument( - "--environ", - help="DEPRICATED: File containing line separated environment variables (VAR=value)" \ - "to be set in the firefox process.") - replay_args.add_argument( - "--idle-threshold", type=int, default=25, - help="CPU usage threshold to mark the process as idle (default: %(default)s)") - replay_args.add_argument( - "--idle-timeout", type=int, default=60, - help="Number of seconds to wait before polling testcase for idle (default: %(default)s)") - replay_args.add_argument( - "--min-crashes", type=int, default=1, - help="Require the testcase to crash n times before accepting the result. (default: %(default)sx)") - replay_args.add_argument( - "--no-analysis", action="store_true", - help="Disable analysis to auto-set --repeat/--min-crashes.") - replay_args.add_argument( - "--no-cache", action="store_true", - help="Disable testcase caching") - replay_args.add_argument( - "--no-harness", action="store_true", - help="Don't use the harness for sapphire redirection") - replay_args.add_argument( - "--reduce-file", - help="Value passed to lithium's --testcase option, needed for testcase cache " \ - "(default: input param)") - replay_args.add_argument( - "--repeat", type=int, default=1, - help="Try to run the testcase multiple times, for intermittent testcases (default: %(default)sx)") - replay_args.add_argument( - "--sig", - help="Signature (JSON) file to match.") - replay_args.add_argument( - "--skip", type=int, default=0, - help="Return interesting = False for the first n reductions (default: %(default)s)") - replay_args.add_argument( - "--static-timeout", action="store_true", dest="fixed_timeout", - help="Disable automatically updating the iteration timeout.") - replay_args.add_argument( - "--strategy", nargs="+", default=list(), metavar="STRATEGY", dest="strategies", + """Initialize argument parser.""" + super().__init__() + + # these arguments have other defaults vs how they are defined in ReplayArgs + self.parser.set_defaults(include_test=True, logs=Path.cwd()) + + reduce_args = self.parser.add_argument_group("Reduce Arguments") + reduce_args.add_argument( + "--no-analysis", + action="store_true", + help="Disable analysis to auto-set --repeat/--min-crashes.", + ) + reduce_args.add_argument( + "--report-period", + type=int, + help="Periodically report the best testcase for long-running strategies." + " (value in seconds, default: no)", + ) + reduce_args.add_argument( + "--static-timeout", + action="store_true", + help="Disable automatically updating the iteration timeout.", + ) + reduce_args.add_argument( + "--strategy", + nargs="+", + choices=STRATEGIES, + default=DEFAULT_STRATEGIES, + metavar="STRATEGY", + dest="strategies", help="One or more strategies (space-separated). Available: %s (default: %s)" - % (" ".join(sorted(strategies_by_name())), " ".join(ReductionJob.DEFAULT_STRATEGIES))) - - self.parser.epilog = "For addition help check out the wiki:" \ - " https://github.com/MozillaSecurity/grizzly/wiki" + % (" ".join(sorted(STRATEGIES)), " ".join(DEFAULT_STRATEGIES)), + ) def sanity_check(self, args): - super(ReducerArgs, self).sanity_check(args) - - if "input" not in self._sanity_skip: - if not (isdir(args.input) - or (isfile(args.input) and (args.input.lower().endswith(".zip") - or args.input.lower().endswith(".html")))): - self.parser.error("Testcase should be a folder, zip, or html file") - - if args.sig is not None and not isfile(args.sig): - self.parser.error("file not found: %r" % args.sig) - - if args.repeat < 1: - self.parser.error("'--repeat' value must be positive") - - if args.min_crashes < 1: - self.parser.error("'--min-crashes' value must be positive") - - if args.environ is not None and not isfile(args.environ): - self.parser.error("file not found: %r" % args.environ) - - if args.strategies: - known_strategies = set(strategies_by_name()) - for strategy in args.strategies: - if strategy not in known_strategies: - self.parser.error("invalid strategy: %s" % (strategy,)) - else: - args.strategies = None - - if args.reduce_file is None: - args.reduce_file = args.input - - -class ReducerFuzzManagerIDArgs(ReducerArgs): - + """Sanity check reducer args. + + Arguments: + args (argparse.Namespace): Result from `parse_args()`. + + Raises: + SystemExit: on error, `ArgumentParser.error()` is called, which will exit. + """ + super().sanity_check(args) + + if args.report_period is not None: + if args.report_period <= 0: + self.parser.error("Invalid --report-period (value is in seconds)") + if args.report_period < 60: + self.parser.error("Very short --report-period (value is in seconds)") + + if not args.no_analysis: + # analysis is enabled, but repeat/min_crashes specified. doesn't make sense + errors = [] + if args.repeat != self.parser.get_default("repeat"): + errors.append("'--repeat'") + if args.min_crashes != self.parser.get_default("min_crashes"): + errors.append("'--min-crashes'") + if errors: + error_str = " and ".join(errors) + LOG.warning( + "%s specified, with analysis enabled, they will be ignored", + error_str, + ) + + +class ReduceFuzzManagerIDArgs(ReduceArgs): def __init__(self): - super(ReducerFuzzManagerIDArgs, self).__init__() - - # madhax alert! - # - # We need to modify the meaning of the 'input' positional to accept an int ID instead of a - # local testcase. This is not possible with the public argparse API. - # - # refs: https://stackoverflow.com/questions/32807319/disable-remove-argument-in-argparse - # https://bugs.python.org/issue19462 - - # look up the action for the positional `input` arg - action = None - for arg in self.parser._actions: - if arg.dest == "input" and not arg.option_strings: - action = arg - break - assert action is not None - - # modify it's type and help string - action.type = int - action.help = "FuzzManager ID to reduce" - - # ... and Bob's your uncle - self._sanity_skip.add("input") + """Initialize argument parser.""" + super().__init__() + self.update_arg("input", int, "FuzzManager ID to reduce") + self.parser.add_argument( + "--no-repro-quality", + choices=[x.value for x in Quality], + default=Quality.NOT_REPRODUCIBLE.value, + type=int, + help="Quality value reported when issue does not reproduce " + "(default: %(default)s).", + ) -class ReducerFuzzManagerIDQualityArgs(ReducerFuzzManagerIDArgs): +class ReduceFuzzManagerIDQualityArgs(ReduceFuzzManagerIDArgs): def __init__(self): - super(ReducerFuzzManagerIDQualityArgs, self).__init__() - self.parser.add_argument("--quality", type=int, - help="Only try crashes with a given quality value") - - def sanity_check(self, args): - super(ReducerFuzzManagerIDQualityArgs, self).sanity_check(args) - - if args.quality is not None and args.quality < 0: - self.parser.error("'--quality' value must be positive or zero") + """Initialize argument parser.""" + super().__init__() + self.parser.add_argument( + "--quality", + choices=[x.value for x in Quality], + type=int, + help="Only try crashes with a given quality value.", + ) diff --git a/grizzly/reduce/bucket.py b/grizzly/reduce/bucket.py index a64ed40c..409e184e 100644 --- a/grizzly/reduce/bucket.py +++ b/grizzly/reduce/bucket.py @@ -2,135 +2,10 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -import collections -import json -import logging -import os -import sys -import tempfile - -from Collector.Collector import Collector - -from .args import ReducerFuzzManagerIDQualityArgs -from .crash import CrashReductionJob - - -LOG = logging.getLogger("grizzly.reduce.bucket") - - -def bucket_crashes(bucket_id, quality_filter): - """Fetch all crash IDs for the specified FuzzManager bucket. - Only crashes with testcases are returned. - - Args: - bucket_id (int): ID of the requested bucket on the server side - quality_filter (int): Filter crashes by quality value (None for all) - - Returns: - generator: generator of crash ID (int) - """ - coll = Collector() - - def _get_results(endpoint, params=None): - """ - Function to get paginated results from FuzzManager - - Args: - endpoint (str): FuzzManager REST API to query (eg. "crashes"). - params (dict): Params to pass through to requests.get - - Returns: - generator: objects returned by FuzzManager (as dicts) - """ - LOG.debug("first request to /%s/", endpoint) - - url = "%s://%s:%d/crashmanager/rest/%s/" \ - % (coll.serverProtocol, coll.serverHost, coll.serverPort, endpoint) - - response = coll.get(url, params=params).json() - - while True: - LOG.debug("got %d/%d %s", len(response["results"]), response["count"], endpoint) - while response["results"]: - yield response["results"].pop() - - if response["next"] is None: - break - - LOG.debug("next request to /%s/", endpoint) - response = coll.get(response["next"]).json() - - # Get all crashes for bucket - query_args = [ - ("op", "AND"), - ("bucket", bucket_id), - ] - if quality_filter is not None: - query_args.append(("testcase__quality", quality_filter)) - query = json.dumps(collections.OrderedDict(query_args)) - - n_yielded = 0 - for crash in _get_results("crashes", params={"query": query, "include_raw": "0"}): - - if not crash["testcase"]: - LOG.warning("crash %d has no testcase, skipping", crash["id"]) - continue - - n_yielded += 1 - LOG.debug("yielding crash #%d", n_yielded) - yield crash["id"] - - -def get_signature(bucket_id): - """ - Download the signature for the specified FuzzManager bucket. - - Args: - bucket_id (int): ID of the requested bucket on the server side - - Returns: - str: temp filename to the JSON signature. caller must remove filename when done - """ - coll = Collector() - - url = "%s://%s:%d/crashmanager/rest/buckets/%d/" \ - % (coll.serverProtocol, coll.serverHost, coll.serverPort, bucket_id) - - response = coll.get(url).json() - - sig_fd, sig_fn = tempfile.mkstemp(suffix=".json") - with os.fdopen(sig_fd, "w") as sig_fp: - sig_fp.write(response["signature"]) - - return sig_fn - - -def main(args): - LOG.info("Trying all crashes in bucket %d until one reduces", args.input) - - # if no signature specified, download the signature from FM - rm_sig = False - if not args.sig: - args.sig = get_signature(args.input) - rm_sig = True - - try: - for crash_id in bucket_crashes(args.input, args.quality): - - # reduce.main expects input to be a crash ID - args.input = crash_id - - if CrashReductionJob.main(args) == 0: - # success! - return 0 - - # none of the testcases reduced - return 1 - - finally: - if rm_sig: - os.unlink(args.sig) - if __name__ == "__main__": - sys.exit(main(ReducerFuzzManagerIDQualityArgs().parse_args())) + from ..replay.bucket import bucket_main + from .args import ReduceFuzzManagerIDQualityArgs + from .crash import main + + raise SystemExit(bucket_main(ReduceFuzzManagerIDQualityArgs().parse_args(), main)) diff --git a/grizzly/reduce/conftest.py b/grizzly/reduce/conftest.py new file mode 100644 index 00000000..2679facf --- /dev/null +++ b/grizzly/reduce/conftest.py @@ -0,0 +1,34 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# pylint: disable=protected-access +"""Common unit test fixtures for `grizzly.reduce`. +""" + +import pytest + + +@pytest.fixture +def tmp_path_fm_config(tmp_path, mocker): + """Ensure fm config is always read from tmp_path so ~/.fuzzmanagerconf + can't be used by accident. + """ + mocker.patch( + "grizzly.reduce.core.FuzzManagerReporter.FM_CONFIG", + new=str(tmp_path / ".fuzzmanagerconf"), + ) + (tmp_path / ".fuzzmanagerconf").touch() + + +@pytest.fixture +def reporter_sequential_strftime(mocker): + """Make `strftime` in `FilesystemReporter` return sequential values. + This ensures ever report gets a unique folder and won't overwrite another. + """ + prefix = mocker.patch("grizzly.common.report.strftime") + + def report_prefix(_): + return "%04d" % (prefix.call_count,) + + prefix.side_effect = report_prefix diff --git a/grizzly/reduce/core.py b/grizzly/reduce/core.py new file mode 100644 index 00000000..57a295f9 --- /dev/null +++ b/grizzly/reduce/core.py @@ -0,0 +1,911 @@ +# coding=utf-8 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +"""`ReduceManager` finds the smallest testcase(s) to reproduce an issue.""" +import json +import os +from itertools import chain +from locale import LC_ALL, setlocale +from logging import getLogger +from math import ceil, log +from pathlib import Path +from time import time + +from FTB.Signatures.CrashInfo import CrashSignature + +from sapphire import Sapphire + +from ..common.fuzzmanager import CrashEntry +from ..common.plugins import load as load_plugin +from ..common.reporter import FilesystemReporter, FuzzManagerReporter, Quality +from ..common.status import ReductionStatus +from ..common.status_reporter import ReductionStatusReporter +from ..common.storage import TestCaseLoadFailure +from ..common.utils import ConfigError, Exit, configure_logging, grz_tmp +from ..replay import ReplayManager +from ..target import Target, TargetLaunchError, TargetLaunchTimeout +from .exceptions import GrizzlyReduceBaseException, NotReproducible +from .strategies import STRATEGIES + +__author__ = "Jesse Schwartzentruber" +__credits__ = ["Jesse Schwartzentruber", "Tyson Smith"] + + +LOG = getLogger(__name__) + + +class ReduceManager: + """Manage reduction of one or more testcases to find the smallest testcase + that reproduces a given issue. + + Attributes: + ignore (list(str)): Classes of results to ignore (see `--ignore`). + server (sapphire.Sapphire): Server instance to serve testcases. + strategies (list(str)): List of strategies to use for reducing + testcases (in order). + target (grizzly.target.Target): Target instance to run testcases. + testcases (list(grizzly.common.storage.TestCase)): List of one or more Grizzly + testcases to reduce. + """ + + ANALYSIS_ITERATIONS = 11 # number of iterations to analyze + # --min-crashes value when analysis is used and reliability is less than perfect + ANALYSIS_MIN_CRASHES = 1 + ANALYSIS_PERFECT_MIN_CRASHES = 2 # --min-crashes when reliability is perfect + # probability that successful reduction will observe the crash + ANALYSIS_TARGET_PROBABILITY = 0.95 + # to see the worst case, run the `repeat` calculation in run_reliability_analysis + # using `crashes_percent = 1.0/ANALYSIS_ITERATIONS` + + IDLE_DELAY_MIN = 10 + IDLE_DELAY_DURATION_MULTIPLIER = 1.5 + ITER_TIMEOUT_MIN = 10 + ITER_TIMEOUT_DURATION_MULTIPLIER = 2 + + def __init__( + self, + ignore, + server, + target, + testcases, + strategies, + log_path, + any_crash=False, + expect_hang=False, + idle_delay=0, + idle_threshold=0, + reducer_crash_id=None, + relaunch=1, + report_period=None, + report_to_fuzzmanager=False, + signature=None, + signature_desc=None, + static_timeout=False, + tool=None, + use_analysis=True, + use_harness=True, + ): + """Initialize reduction manager. Many arguments are common with `ReplayManager`. + + Args: + ignore (list(str)): Value for `self.ignore` attribute. + server (sapphire.Sapphire): Value for `self.server` attribute. + target (grizzly.target.Target): Value for `self.target` attribute. + testcases (list(grizzly.common.storage.TestCase)): + Value for `self.testcases` attribute. + strategies (list(str)): Value for `self.strategies` attribute. + log_path (Path or str): Path to save results when reporting to filesystem. + any_crash (bool): Accept any crash when reducing, not just those matching + the specified or first observed signature. + expect_hang (bool): Attempt to reduce a test that triggers a hang. + idle_delay (int): Number of seconds to wait before polling for idle. + idle_threshold (int): CPU usage threshold to mark the process as idle. + relaunch (int): Maximum number of iterations performed by Runner + before Target should be relaunched. + report_period (int or None): Periodically report best results for + long-running strategies. + report_to_fuzzmanager (bool): Report to FuzzManager rather than filesystem. + signature (FTB.Signatures.CrashInfo.CrashSignature or None): + Signature for accepting crashes. + signature_desc (str): Short description of the given signature. + static_timeout (bool): Use only specified timeouts (`--timeout` and + `--idle-delay`), even if testcase appears to need + less time. + tool (str or None): Override tool when reporting to FuzzManager. + use_analysis (bool): Analyse reliability of testcase before running each + reduction strategy. + use_harness (bool): Whether to allow use of harness when navigating + between testcases. + """ + self.ignore = ignore + self.server = server + self.strategies = strategies + self.target = target + self.testcases = testcases + self._any_crash = any_crash + self._expect_hang = expect_hang + self._idle_delay = idle_delay + self._idle_threshold = idle_threshold + self._log_path = Path(log_path) if isinstance(log_path, str) else log_path + # these parameters may be overwritten during analysis, so keep a copy of them + self._original_relaunch = relaunch + self._original_use_harness = use_harness + self._report_to_fuzzmanager = report_to_fuzzmanager + self._report_periodically = report_period + self._report_tool = tool + self._signature = signature + self._signature_desc = signature_desc + self._static_timeout = expect_hang or static_timeout + self._status = ReductionStatus.start( + strategies=strategies, + testcase_size_cb=self.testcase_size, + crash_id=reducer_crash_id, + tool=tool, + ) + self._use_analysis = use_analysis + self._use_harness = use_harness + + def update_timeout(self, results): + """Tune idle/server timeout values based on actual duration of expected results. + + Expected durations will be updated if the actual duration is much lower. + + Timeouts are not updated in three cases: + + - `static_timeout=True` is passed to constructor (`--static-timeout`), + - `any_crash=True` is passed to constructor (`--any-crash`), + - Target is running under valgrind (`--valgrind`). + + Arguments: + results (grizzly.replay.ReplayResult): + Observed results. Any given expected results may affect the idle delay + and sapphire timeout. + + Returns: + None + """ + # TODO: properly handle test duration and timeout + assert self._static_timeout or not self._expect_hang + if ( + self._static_timeout + or self._any_crash + or getattr(self.target, "use_valgrind", False) + ): + # the amount of time it can take to replay a test case can vary + # when under Valgrind so do not update the timeout in that case + + # when any_crash is given, crashes may be completely unrelated (all are + # expected), so lowering timeout or idle delay will only hide crashes + return + + durations = list( + chain.from_iterable( + result.durations for result in results if result.expected + ) + ) + if not durations: + # no expected results + return + run_time = max(durations) + + # If `run_time * multiplier` is less than idle poll delay, update it + LOG.debug("Run time %r", run_time) + new_idle_delay = max( + self.IDLE_DELAY_MIN, + min(run_time * self.IDLE_DELAY_DURATION_MULTIPLIER, self._idle_delay), + ) + if new_idle_delay < self._idle_delay: + LOG.info("Updating poll delay to: %r", new_idle_delay) + self._idle_delay = new_idle_delay + # If `run_time * multiplier` is less than iter_timeout, update it + # in other words, decrease the timeout if this ran in less than half the timeout + new_iter_timeout = max( + self.ITER_TIMEOUT_MIN, + min(run_time * self.ITER_TIMEOUT_DURATION_MULTIPLIER, self.server.timeout), + ) + if new_iter_timeout < self.server.timeout: + LOG.info("Updating max timeout to: %r", new_iter_timeout) + self.server.timeout = new_iter_timeout + + def _on_replay_iteration(self): + self._status.iterations += 1 + self._status.report() + + def run_reliability_analysis(self): + """Run several analysis passes of the current testcase to find `run` parameters. + + The number of repetitions and minimum number of crashes are calculated to + maximize the chances of observing the expected crash. + + Arguments: + None + + Returns: + tuple(int, int): Values for `repeat` and `min_crashes` resulting from + analysis. + """ + self._status.report(force=True) + harness_last_crashes = 0 + harness_crashes = 0 + non_harness_crashes = 0 + + # Reset parameters. + # Use repeat=1 & relaunch=ITERATIONS because this is closer to how we will run + # post-analysis. + # We're only using repeat=1 instead of repeat=ITERATIONS so we can get feedback + # on every call to interesting. + + # N.B. We only use `last_test_only` if `len(self.testcases) > 1` .. + # if `self.testcases` only has 1 entry to begin with, we don't need + # `last_test_only` to trim it + for (use_harness, last_test_only) in ( + (True, True), + (True, False), + # only one of the two use_harness=False cases will run: + # input is len(self.testcases)>1 and we will only try the last testcase + (False, True), + # input is len(self.testcases)==1 already and there's no need to trim + (False, False), + ): + if use_harness and (not self._original_use_harness or harness_crashes): + # Don't test with harness again if we already found crashes with the + # harness (last_test_only) or if it was disabled by command-line. + continue + if not use_harness and harness_crashes >= self.ANALYSIS_ITERATIONS / 2: + # Don't test without harness if harness found > 50% crashes + continue + if last_test_only and len(self.testcases) == 1: + # Only set `last_test_only` if we initially have more than one testcase + continue + if not use_harness and (not last_test_only and len(self.testcases) > 1): + # Can't run without harness if we have more than one testcase + # (`last_test_only` will run) + continue + + if use_harness and (last_test_only or len(self.testcases) == 1): + relaunch = self.ANALYSIS_ITERATIONS + else: + relaunch = 1 + + with ReplayManager( + self.ignore, + self.server, + self.target, + any_crash=self._any_crash, + relaunch=relaunch, + signature=self._signature, + use_harness=use_harness, + ) as replay: + LOG.info( + "Running for %d iterations to assess reliability %s harness.", + self.ANALYSIS_ITERATIONS, + "using" if use_harness else "without", + ) + testcases = self.testcases + if last_test_only: + if use_harness: + LOG.warning("Checking reliability with only the last testcase.") + else: + LOG.warning( + "Only the last testcase of %d given will be used to " + "assess reliability without harness.", + len(testcases), + ) + testcases = [testcases[-1]] + results = replay.run( + testcases, + self.server.timeout, + repeat=self.ANALYSIS_ITERATIONS, + min_results=1, + exit_early=False, + idle_delay=self._idle_delay, + idle_threshold=self._idle_threshold, + on_iteration_cb=self._on_replay_iteration, + ) + try: + crashes = sum(x.count for x in results if x.expected) + if crashes and not self._any_crash and self._signature_desc is None: + first_expected = next( + (report for report in results if report.expected), None + ) + self._signature_desc = ( + first_expected.report.crash_info.createShortSignature() + ) + self.report( + [result for result in results if not result.expected], + testcases, + ) + if use_harness: + # set harness_crashes in both cases (last_test True/False) + # we only want to iterate through all testcases if the last + # testcase alone never reproduced (crashes == 0). + harness_crashes = crashes + if last_test_only: + harness_last_crashes = crashes + else: + non_harness_crashes = crashes + finally: + for result in results: + result.report.cleanup() + reliability = crashes / self.ANALYSIS_ITERATIONS + desc = ("using" if use_harness else "without") + " harness" + if last_test_only: + desc += "/last test only" + else: + desc += "/all tests" + LOG.info( + "Testcase was interesting %0.1f%% of %d attempts %s.", + 100.0 * reliability, + self.ANALYSIS_ITERATIONS, + desc, + ) + if use_harness and last_test_only: + key = "last test" + elif use_harness: + key = "all tests" + else: + key = "no harness" + self._status.analysis[key] = reliability + # ensure same signature is always used + self._signature = replay.signature + + if not (harness_crashes or non_harness_crashes): + raise NotReproducible("Did not reproduce during analysis") + + # if harness is selected, we'll only use the last testcase + if harness_last_crashes: + harness_crashes = harness_last_crashes + + # should we use the harness? go with harness unless no-harness crashed 50% more + self._use_harness = not ( + non_harness_crashes > harness_crashes + and ( + harness_crashes == 0 + or (non_harness_crashes - harness_crashes) / harness_crashes >= 0.5 + ) + ) + + if (self._use_harness and harness_last_crashes) or ( + not self._use_harness and len(self.testcases) > 1 + ): + LOG.warning( + "Last testcase %s harness was selected, other %d " + "testcases in the original will be ignored.", + "with" if self._use_harness else "without", + len(self.testcases) - 1, + ) + while len(self.testcases) > 1: + self.testcases.pop(0).cleanup() + + crashes_percent = ( + harness_crashes if self._use_harness else non_harness_crashes + ) / self.ANALYSIS_ITERATIONS + + # adjust repeat/min-crashes depending on how reliable the testcase was + if abs(crashes_percent - 1) < 0.01: + min_crashes = self.ANALYSIS_PERFECT_MIN_CRASHES + else: + min_crashes = self.ANALYSIS_MIN_CRASHES + # crashes_percent is max 99.9% to avoid domain errors + repeat = int( + ceil( + log( + 1 - self.ANALYSIS_TARGET_PROBABILITY, + 1 - min(crashes_percent, 0.9999), + ) + ) + * min_crashes + ) + + LOG.info("Analysis results:") + if harness_crashes >= self.ANALYSIS_ITERATIONS / 2: + LOG.info( + "* testcase was better than 50% reliable with the harness " + "(--no-harness not assessed)" + ) + elif harness_crashes == non_harness_crashes: + LOG.info("* testcase was equally reliable with/without the harness") + elif not self._original_use_harness: + LOG.info("* --no-harness was already set") + else: + LOG.info( + "* testcase was %s reliable with the harness", + "more" if harness_crashes > non_harness_crashes else "less", + ) + return (repeat, min_crashes) + + def testcase_size(self): + """Calculate the current testcase size. + + Returns: + int: Current size of the testcase(s). + """ + return sum(tc.data_size for tc in self.testcases) + + def run(self, repeat=1, launch_attempts=3, min_results=1): + """Run testcase reduction. + + Args: + repeat (int): Maximum number of times to run the TestCase. + launch_attempts (int): Number of attempts to launch the browser. + min_results (int): Minimum number of results needed before run can + be considered successful. + + Returns: + int: One of the Exit enum values. + """ + any_success = False + sig_given = self._signature is not None + last_tried = None + self._status.record("init") + # record total stats overall so that any time missed by individual milestones + # will still be included in the total + with self._status.measure("final"): + if self._use_analysis: + with self._status.measure("analysis"): + (repeat, min_results) = self.run_reliability_analysis() + any_success = True # analysis ran and didn't raise + # multi part test cases should always use relaunch == 1 + # since that can mean a delay is required + if self._use_harness and len(self.testcases) == 1: + relaunch = min(self._original_relaunch, repeat) + else: + relaunch = 1 + LOG.info( + "Repeat: %d, Minimum crashes: %d, Relaunch %d", + repeat, + min_results, + relaunch, + ) + self._status.run_params["harness"] = self._use_harness + self._status.run_params["min crashes"] = min_results + self._status.run_params["relaunch"] = relaunch + self._status.run_params["repeat"] = repeat + + for strategy_no, strategy in enumerate(self.strategies, start=1): + self._status.current_strategy_idx = strategy_no + LOG.info("") + LOG.info( + "Using strategy %s (%d/%d)", + strategy, + strategy_no, + len(self.strategies), + ) + replay = ReplayManager( + self.ignore, + self.server, + self.target, + any_crash=self._any_crash, + relaunch=relaunch, + signature=self._signature, + use_harness=self._use_harness, + ) + strategy = STRATEGIES[strategy](self.testcases) + if last_tried is not None: + strategy.update_tried(last_tried) + last_tried = None + + strategy_last_report = time() + strategy_stats = self._status.measure(strategy.name) + best_results = [] + other_results = {} + try: + with replay, strategy, strategy_stats: + self._status.report(force=True) + for reduction in strategy: + keep_reduction = False + results = [] + try: + # reduction is a new list of testcases to be + # replayed + results = replay.run( + reduction, + self.server.timeout, + expect_hang=self._expect_hang, + idle_delay=self._idle_delay, + idle_threshold=self._idle_threshold, + launch_attempts=launch_attempts, + min_results=min_results, + repeat=repeat, + on_iteration_cb=self._on_replay_iteration, + ) + self._status.attempts += 1 + self.update_timeout(results) + # get the first expected result (if any), + # and update the strategy + first_expected = next( + (report for report in results if report.expected), + None, + ) + success = first_expected is not None + if success: + self._status.successes += 1 + if ( + not self._any_crash + and self._signature_desc is None + ): + self._signature_desc = ( + # pylint: disable=line-too-long + first_expected.report.crash_info.createShortSignature() # noqa: E501 + ) + self._status.report() + served = None + if success and not self._any_crash: + served = first_expected.served + strategy.update(success, served=served) + if strategy.name == "check" and not success: + raise NotReproducible("Not reproducible at 'check'") + any_success = any_success or success + # if the reduction reproduced, + # update self.testcases (new best) + if success: + LOG.info("Reduction succeeded") + for testcase in self.testcases: + testcase.cleanup() + # add target assets to test cases + if not self.target.assets.is_empty(): + for test in reduction: + test.assets = self.target.assets + # add target environment variables + if self.target.filtered_environ(): + for test in reduction: + test.env_vars = ( + self.target.filtered_environ() + ) + self.testcases = reduction + keep_reduction = True + # cleanup old best results + for result in best_results: + result.report.cleanup() + # filter expected results out into `best_results` + best_results = [ + result for result in results if result.expected + ] + results = [ + result + for result in results + if not result.expected + ] + else: + LOG.info("Attempt failed") + + # if the reduction found other crashes, + # save those for reporting later + + # only save the smallest testcase that has found + # each result + for result in results: + other_result_exists = bool( + result.report.minor in other_results + ) + + is_smaller = None + if other_result_exists: + # we have a result already queued for this sig + # check size to see which to keep + reduction_size = sum( + tc.data_size for tc in reduction + ) + _, old_reduction = other_results[ + result.report.minor + ] + old_size = sum( + tc.data_size for tc in old_reduction + ) + is_smaller = bool(reduction_size < old_size) + + if not other_result_exists or is_smaller: + if other_result_exists: + # clean-up old result + old_result, old_reduction = other_results[ + result.report.minor + ] + old_result.report.cleanup() + for testcase in old_reduction: + testcase.cleanup() + # store this reduction for later reporting + # as the other result + other_results[result.report.minor] = ( + result, + [ + testcase.clone() + for testcase in reduction + ], + ) + + now = time() + if ( + self._report_periodically + and best_results + and now - strategy_last_report + > self._report_periodically + ): + self._status.last_reports = self.report( + best_results, + self.testcases, + ) + for result in best_results: + result.report.cleanup() + best_results = [] + strategy_last_report = now + LOG.info("Best results reported (periodic)") + + finally: + if not keep_reduction: + for testcase in reduction: + testcase.cleanup() + + # if self._signature was already set, this will do nothing + # otherwise, ensure the first found signature is used throughout + self._signature = replay.signature + + if best_results: + self._status.last_reports = self.report( + best_results, self.testcases + ) + for result, reduction in other_results.values(): + self.report([result], reduction) + + except KeyboardInterrupt: + if best_results: + self._status.last_reports = self.report( + best_results, self.testcases + ) + LOG.warning( + "Ctrl+C detected, best reduction so far reported as %r", + self._status.last_reports, + ) + raise + finally: + for result in best_results: + result.report.cleanup() + for result, reduction in other_results.values(): + result.report.cleanup() + for testcase in reduction: + testcase.cleanup() + + # store "tried" cache to pass to next strategy + last_tried = strategy.get_tried() + + # if we complete all strategies, mark the last reported crashes as reduced + if self._report_to_fuzzmanager and self._status.last_reports: + for crash_id in self._status.last_reports: + LOG.info( + "Updating crash %d to %s (Q%d)", + crash_id, + Quality.REDUCED.name, + Quality.REDUCED, + ) + CrashEntry(crash_id).testcase_quality = Quality.REDUCED.value + + # it's possible we made it this far without ever setting signature_desc. + # this is only possible if --no-analysis is given + # just give None instead of trying to format the CrashSignature + self._status.signature_info["any"] = self._any_crash + self._status.signature_info["description"] = self._signature_desc + self._status.signature_info["given"] = sig_given + + # log a summary of what was done. + LOG.info( + "Reduction summary:%s%s", + os.linesep, + ReductionStatusReporter([self._status]).summary(), + ) + self._status.report(force=True) + + if any_success: + return Exit.SUCCESS + return Exit.FAILURE + + def report(self, results, testcases): + """Report results, either to FuzzManager or to filesystem. + + Arguments: + results (list(ReplayResult)): Results observed during reduction. + testcases (list(TestCase)): Testcases used to trigger results. + + Returns: + list(*): List of return values from `reporter.submit()`. + """ + ret_values = [] + status = self._status.copy() # copy implicitly closes open counters + for result in results: + if self._report_to_fuzzmanager: + reporter = FuzzManagerReporter(self._report_tool) + if result.expected: + reporter.force_report = True + else: + report_dir = "reports" if result.expected else "other_reports" + reporter = FilesystemReporter( + report_path=self._log_path / report_dir, major_bucket=False + ) + # write reduction stats for expected results + if result.expected: + (Path(result.report.path) / "reduce_stats.txt").write_text( + ReductionStatusReporter([status]).summary() + ) + if self._report_to_fuzzmanager: + status.add_to_reporter(reporter, expected=result.expected) + # clone the tests so we can safely call purge_optional here for each report + # (report.served may be different for non-expected or any-crash results) + clones = [test.clone() for test in testcases] + try: + if result.served is not None: + for clone, served in zip(clones, result.served): + clone.purge_optional(served) + result = reporter.submit(clones, result.report) + if result is not None: + if isinstance(result, Path): + result = str(result) + ret_values.append(result) + finally: + for clone in clones: + clone.cleanup() + return ret_values + + @classmethod + def main(cls, args): + """CLI for `grizzly.reduce`. + + Arguments: + args (argparse.Namespace): Result from `ReduceArgs.parse_args`. + + Returns: + int: 0 for success. non-0 indicates a problem. + """ + # pylint: disable=too-many-return-statements + configure_logging(args.log_level) + setlocale(LC_ALL, "") + if args.fuzzmanager: + FuzzManagerReporter.sanity_check(args.binary) + + LOG.info("Starting Grizzly Reduce") + + if args.headless: + LOG.info("Running browser headless (%s)", args.headless) + if args.ignore: + LOG.info("Ignoring: %s", ", ".join(args.ignore)) + if args.pernosco: + LOG.info("Running with RR (Pernosco mode)") + elif args.rr: + LOG.info("Running with RR") + elif args.valgrind: + LOG.info("Running with Valgrind. This will be SLOW!") + + assets = None + signature = None + signature_desc = None + target = None + testcases = [] + try: + if args.sig: + signature = CrashSignature.fromFile(args.sig) + meta = Path(args.sig).with_suffix(".metadata") + if meta.is_file(): + meta = json.loads(meta.read_text()) + signature_desc = meta["shortDescription"] + + try: + testcases, assets, env_vars = ReplayManager.load_testcases( + str(args.input), subset=args.test_index + ) + except TestCaseLoadFailure as exc: + LOG.error("Error: %s", str(exc)) + return Exit.ERROR + + if args.tool is None and testcases[0].adapter_name is not None: + LOG.warning( + "Setting default --tool=grizzly-%s from testcase", + testcases[0].adapter_name, + ) + args.tool = "grizzly-%s" % (testcases[0].adapter_name,) + + expect_hang = ReplayManager.expect_hang(args.ignore, signature, testcases) + + if args.no_harness: + if len(testcases) > 1: + LOG.error( + "Error: '--no-harness' cannot be used with multiple " + "testcases. Perhaps '--test-index' can help." + ) + return Exit.ARGS + LOG.debug("--no-harness specified relaunch set to 1") + args.relaunch = 1 + + # check test time limit and timeout + # TODO: add support for test time limit, use timeout in both cases for now + _, timeout = ReplayManager.time_limits( + args.timeout, args.timeout, testcases + ) + + args.repeat = max(args.min_crashes, args.repeat) + relaunch = min(args.relaunch, args.repeat) + LOG.debug("initializing the Target") + target = load_plugin(args.platform, "grizzly_targets", Target)( + args.binary, + args.launch_timeout, + args.log_limit, + args.memory, + assets=assets, + headless=args.headless, + pernosco=args.pernosco, + rr=args.rr, + valgrind=args.valgrind, + ) + # local environ takes priority over environ loaded from test case + if env_vars is not None: + env_vars.update(target.environ) + target.environ = env_vars + env_vars = None + # TODO: support overriding existing assets + # prioritize specified assets over included + target.assets.add_batch(args.asset) + target.process_assets() + LOG.debug("starting sapphire server") + # launch HTTP server used to serve test cases + with Sapphire(auto_close=1, timeout=timeout) as server: + target.reverse(server.port, server.port) + mgr = ReduceManager( + args.ignore, + server, + target, + testcases, + args.strategies, + args.logs, + any_crash=args.any_crash, + expect_hang=expect_hang, + idle_delay=args.idle_delay, + idle_threshold=args.idle_threshold, + reducer_crash_id=args.original_crash_id, + relaunch=relaunch, + report_period=args.report_period, + report_to_fuzzmanager=args.fuzzmanager, + signature=signature, + signature_desc=signature_desc, + static_timeout=args.static_timeout, + tool=args.tool, + use_analysis=not args.no_analysis, + use_harness=not args.no_harness, + ) + return_code = mgr.run( + repeat=args.repeat, + launch_attempts=args.launch_attempts, + min_results=args.min_crashes, + ) + return return_code + + except ConfigError as exc: + LOG.error(str(exc)) + return exc.exit_code + + except KeyboardInterrupt as exc: + LOG.error("Exception: %r", exc) + return Exit.ABORT + + except (TargetLaunchError, TargetLaunchTimeout) as exc: + LOG.error("Exception: %s", exc) + if isinstance(exc, TargetLaunchError) and exc.report: + path = grz_tmp("launch_failures") + LOG.error("Logs can be found here %r", path) + reporter = FilesystemReporter(path, major_bucket=False) + reporter.submit([], exc.report) + return Exit.LAUNCH_FAILURE + + except GrizzlyReduceBaseException as exc: + LOG.error(exc.msg) + return exc.code + + except Exception: # pylint: disable=broad-except + LOG.exception("Exception during reduction!") + return Exit.ERROR + + finally: + LOG.info("Shutting down...") + if target is not None: + target.cleanup() + for testcase in testcases: + testcase.cleanup() + if assets: + assets.cleanup() + LOG.info("Done.") diff --git a/grizzly/reduce/crash.py b/grizzly/reduce/crash.py index 0acd61ca..bd894c34 100644 --- a/grizzly/reduce/crash.py +++ b/grizzly/reduce/crash.py @@ -2,181 +2,64 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -import logging -import os -import re -import sys -import tempfile +from logging import getLogger -from Collector.Collector import Collector +from ..common.fuzzmanager import load_fm_data +from ..common.reporter import Quality +from ..common.utils import Exit +from ..main import configure_logging +from ..replay.crash import modify_args +from .args import ReduceFuzzManagerIDArgs +from .core import ReduceManager -from .args import ReducerFuzzManagerIDArgs -from .reduce import ReductionJob -from ..common import FuzzManagerReporter +LOG = getLogger(__name__) -LOG = logging.getLogger("grizzly.reduce.crash") +def main(args): + """CLI for `grizzly.reduce.crash`. - -def crashentry_data(crash_id, raw=False): - """Get the CrashEntry data for the specified FuzzManager crash - - Args: - crash_id (int): ID of the requested crash on the server side - raw (bool): include rawCrashData, rawStderr, rawStdout in result - - Returns: - dict: crash entry data (crashmanager.models.CrashEntry) - """ - coll = Collector() - - LOG.debug("crash %d, downloading metadata...", crash_id) - - url = "%s://%s:%d/crashmanager/rest/crashes/%s/" \ - % (coll.serverProtocol, coll.serverHost, coll.serverPort, crash_id) - - return coll.get(url, params={"include_raw": "1" if raw else "0"}).json() - - -def download_crash(crash_id): - """Download testcase for the specified FuzzManager crash. - - Args: - crash_id (int): ID of the requested crash on the server side - - Returns: - str: Temporary filename of the testcase. Caller must remove when finished. - """ - coll = Collector() - - LOG.debug("crash %d, downloading testcase...", crash_id) - - url = "%s://%s:%d/crashmanager/rest/crashes/%s/download/" \ - % (coll.serverProtocol, coll.serverHost, coll.serverPort, crash_id) - - response = coll.get(url) - - disp_m = re.match(r'^attachment; filename="(.*)"$', - response.headers.get("content-disposition", "")) - - if disp_m is None: - raise RuntimeError("Server sent malformed response: %r" % (response,)) - - prefix = "crash.%d." % (crash_id,) - suffix = os.path.splitext(disp_m.group(1))[1] - testcase_fd, testcase_fn = tempfile.mkstemp(prefix=prefix, suffix=suffix) - with os.fdopen(testcase_fd, "wb") as testcase_fp: - testcase_fp.write(response.content) - - return testcase_fn - - -def change_quality(crash_id, quality): - """Update a FuzzManager crash entry quality. - - Args: - crash_id (int): Crash ID on FuzzManager server - quality (int): Quality constant defined in FuzzManagerReporter.QUAL_* + Arguments: + args (argparse.Namespace): Result from `ReduceArgs.parse_args`. Returns: - None + int: 0 for success. non-0 indicates a problem. """ - LOG.info("Updating crash %d to quality %s", crash_id, FuzzManagerReporter.quality_name(quality)) - coll = Collector() - - url = "%s://%s:%d/crashmanager/rest/crashes/%d/" \ - % (coll.serverProtocol, coll.serverHost, coll.serverPort, crash_id) - try: - Collector().patch(url, data={"testcase_quality": quality}) - except RuntimeError as exc: - # let 404's go .. evidently the crash was deleted - if str(exc) == "Unexpected HTTP response: 404": - LOG.warning("Failed to update (404), does the crash still exist?") - else: - raise - - -class CrashReductionJob(ReductionJob): - __slots__ = ['_crash_id', '_fm_reporter', '_quality', '_testcase_path', '_tool_override', - '_was_interesting'] - - def __init__(self, *args, **kwds): - super(CrashReductionJob, self).__init__(*args, **kwds) - self._crash_id = None - self._fm_reporter = False - self._quality = None - self._testcase_path = None - self._tool_override = False - self._was_interesting = False - - def on_result(self, result_code): - # only update quality of the original crash if we are reporting to FuzzManager - if not self._fm_reporter: - return - - if result_code == FuzzManagerReporter.QUAL_REDUCED_ORIGINAL: - # reduce succeeded - change_quality(self._crash_id, result_code) - - elif result_code == FuzzManagerReporter.QUAL_NOT_REPRODUCIBLE: - if self._quality == FuzzManagerReporter.QUAL_UNREDUCED: - # override result to request platform specific reduction - result_code = FuzzManagerReporter.QUAL_REQUEST_SPECIFIC - change_quality(self._crash_id, result_code) - - # for these cases, something went wrong. a reduce log/result would be really valuable - elif result_code in {FuzzManagerReporter.QUAL_REDUCER_BROKE, - FuzzManagerReporter.QUAL_REDUCER_ERROR}: - # for now just change the quality - change_quality(self._crash_id, result_code) - - else: - LOG.error("Got unhandled quality: %s", FuzzManagerReporter.quality_name(result_code)) - - def on_interesting_crash(self, *args, **kwds): - super(CrashReductionJob, self).on_interesting_crash(*args, **kwds) - if self._was_interesting: - return - LOG.info("Crash %d reproduced!", self._crash_id) - if self._fm_reporter: - change_quality(self._crash_id, FuzzManagerReporter.QUAL_REPRODUCIBLE) - self._was_interesting = True - - def run(self, *args, **kwds): - try: - return super(CrashReductionJob, self).run(*args, **kwds) - finally: - os.unlink(self._testcase_path) - - @classmethod - def from_args(cls, args, target): - LOG.info("Trying crash %d", args.input) - - try: - crash_id = args.input - testcase = download_crash(crash_id) - tool_override = args.tool is None - crash = crashentry_data(crash_id) - quality = crash["testcase_quality"] - if tool_override: - args.tool = crash["tool"] - LOG.info("Using toolname from crash: %s", args.tool) - - # reduce.main expects input to be a path to testcase - args.input = testcase - - job = super(CrashReductionJob, cls).from_args(args, target) - job._fm_reporter = args.fuzzmanager - job._crash_id = crash_id - job._tool_override = tool_override - job._quality = quality - job._testcase_path = testcase - return job - - except: # noqa - os.unlink(testcase) - raise + configure_logging(args.log_level) + with load_fm_data(args.input, load_bucket=not args.sig) as (crash, bucket): + LOG.info( + "Loaded crash %d (%s) from FuzzManager", + crash.crash_id, + Quality(crash.testcase_quality).name, + ) + # call grizzly.reduce + result = ReduceManager.main(modify_args(args, crash, bucket)) + + # update quality + # map Exit.* -> Quality.* + # default back to UNREDUCED + # most errors will not be related to the testcase + # so they should be retried later + if args.fuzzmanager: + quality = { + Exit.ERROR: Quality.REDUCER_ERROR, + Exit.ABORT: Quality(crash.testcase_quality), + Exit.SUCCESS: Quality.ORIGINAL, + Exit.FAILURE: Quality(args.no_repro_quality), + }.get(result, Quality.UNREDUCED) + # don't ever set things back to REDUCING, default to UNREDUCED in that case. + # REDUCING is only used in automation, so ABORT should never happen. + if quality == Quality.REDUCING: + quality = Quality.UNREDUCED + LOG.info( + "reducer finished -> exit(%d) -> %s (Q%d)", + result, + quality.name, + quality, + ) + crash.testcase_quality = quality.value + + return result if __name__ == "__main__": - sys.exit(CrashReductionJob.main(ReducerFuzzManagerIDArgs().parse_args())) + raise SystemExit(main(ReduceFuzzManagerIDArgs().parse_args())) diff --git a/grizzly/reduce/exceptions.py b/grizzly/reduce/exceptions.py index f9ed245a..9a32c729 100644 --- a/grizzly/reduce/exceptions.py +++ b/grizzly/reduce/exceptions.py @@ -2,22 +2,21 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +"""Grizzly Reduction exceptions.""" +from ..common.utils import Exit -__author__ = "Jesse Schwartzentruber" -__credits__ = ["Tyson Smith", "Jesse Schwartzentruber"] +class GrizzlyReduceBaseException(Exception): + """Base for other Grizzly Reducer specific exceptions.""" -class ReducerError(Exception): - pass + def __init__(self, msg, code=Exit.ERROR): + super().__init__() + self.msg = msg + self.code = code -class TestcaseError(ReducerError): - pass +class NotReproducible(GrizzlyReduceBaseException): + """Crash was not observed when expected during reduction.""" - -class NoTestcaseError(TestcaseError): - pass - - -class CorruptTestcaseError(TestcaseError): - pass + def __init__(self, msg): + super().__init__(msg, code=Exit.FAILURE) diff --git a/grizzly/reduce/reduce.py b/grizzly/reduce/reduce.py deleted file mode 100644 index 9d57de92..00000000 --- a/grizzly/reduce/reduce.py +++ /dev/null @@ -1,1242 +0,0 @@ -# coding=utf-8 -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -""" -Given a build and testcase, try to reproduce it using a set of strategies. -""" -from __future__ import absolute_import -import glob -import hashlib -import io -import json -import logging -import os -import re -import shutil -import tempfile -import time -import zipfile -import zlib - -import lithium -import sapphire -from FTB.Signatures.CrashInfo import CrashSignature - -from . import strategies as strategies_module, testcase_contents -from .exceptions import CorruptTestcaseError, NoTestcaseError, ReducerError -from ..common.reporter import FilesystemReporter, FuzzManagerReporter, Report -from ..common.runner import Runner -from ..common.status import ReducerStats, Status -from ..common.storage import TestCase, TestFile -from ..common.utils import grz_tmp -from ..main import configure_logging -from ..session import Session -from ..target import load as load_target, sanitizer_opts, TargetLaunchError, \ - TargetLaunchTimeout - - -__author__ = "Jesse Schwartzentruber" -__credits__ = ["Tyson Smith", "Jesse Schwartzentruber", "Jason Kratzer"] - - -LOG = logging.getLogger("grizzly.reduce") - - -class LithiumInterestingProxy(object): - """Proxy to use a ReductionJob object as a Lithium interestingness script object. - """ - __slots__ = ['_job'] - - def __init__(self, job): - self._job = job - - def init(self, _args): - """Lithium initialization entrypoint. - - Do any per-reduction loop setup needed. - - Args: - _args (unused): Command line arguments from Lithium (N/A) - - Returns: - None - """ - self._job.lithium_init() - - def interesting(self, _args, temp_prefix): - """Lithium main iteration entrypoint. - - This should try the reduction and return True or False based on whether the reduction was - good or bad. - - Args: - _args (unused): Command line arguments from Lithium (N/A) - temp_prefix (str): A unique prefix for any files written during this iteration. - - Returns: - bool: True if reduced testcase is still interesting. - """ - return self._job.lithium_interesting(temp_prefix) - - def cleanup(self, _args): - """Lithium cleanup entrypoint. - - Do any per-reduction loop cleanup needed. - - Args: - _args (unused): Command line arguments from Lithium (N/A) - - Returns: - None - """ - self._job.lithium_cleanup() - - -class IterationParamsProxy(object): - __slots__ = ['_fixed_timeout', '_job', '_use_result_cache'] - - def __init__(self, job): - self._job = job - self._use_result_cache = None - self._fixed_timeout = None - - def __enter__(self): - # disable result cache setting - self._use_result_cache = self._job._use_result_cache - self._job._use_result_cache = False - - # do not update the iteration timeout during analysis - self._fixed_timeout = self._job._fixed_timeout - self._job._fixed_timeout = True - - return self - - def __exit__(self, *_args): - # restore saved values - self._job._use_result_cache = self._use_result_cache - self._job._fixed_timeout = self._fixed_timeout - - @property - def force_no_harness(self): - return self._job._force_no_harness - - @property - def min_crashes(self): - return self._job._min_crashes - - @min_crashes.setter - def min_crashes(self, value): - self._job._min_crashes = value - - @property - def no_harness(self): - return self._job._no_harness - - @no_harness.setter - def no_harness(self, value): - self._job._no_harness = value - - @property - def relaunch(self): - return self._job._target.rl_reset - - @relaunch.setter - def relaunch(self, value): - self._job._target.rl_reset = min(self._job._original_relaunch, value) - - @property - def repeat(self): - return self._job._repeat - - @repeat.setter - def repeat(self, value): - self._job._repeat = value - - def commit(self): - # close target so new parameters take effect - self._job.close_target() - - -class TimeoutsUpdateProxy(object): - __slots__ = ['_job'] - - def __init__(self, job): - self._job = job - - @property - def idle(self): - return self._job._idle_timeout - - @idle.setter - def idle(self, value): - self._job._idle_timeout = value - - @property - def iteration(self): - return self._job._iter_timeout - - @iteration.setter - def iteration(self, value): - self._job._iter_timeout = value - - -class RunState(object): - __slots__ = ['files_to_reduce', 'original_size'] - - def __init__(self, files_to_reduce): - self.files_to_reduce = files_to_reduce - self.original_size = -1 - - def total_size(self): - return sum(os.stat(fn).st_size for fn in self.files_to_reduce) - - -class TestcaseUpdateProxy(object): - __slots__ = ['_job', '_run_state'] - - def __init__(self, job, run_state): - self._job = job - self._run_state = run_state - - @property - def cache_iter_harness_created(self): - return self._job._cache_iter_harness_created - - @property - def root(self): - return self._job._tcroot - - @root.setter - def root(self, value): - self._job._tcroot = value - - @property - def entry(self): - return self._job._testcase - - @entry.setter - def entry(self, value): - self._job._testcase = value - - @property - def landing_page(self): - return self._job.landing_page - - @landing_page.setter - def landing_page(self, value): - self._job.landing_page = value - - @property - def files_to_reduce(self): - return self._run_state.files_to_reduce - - @property - def original_size(self): - return self._run_state.original_size - - @original_size.setter - def original_size(self, value): - self._run_state.original_size = value - - def total_size(self): - return self._run_state.total_size() - - -class ReductionJob(object): - LOGGERS_TO_WATCH = ("ffpuppet", "grizzly", "lithium", "sapphire") - DEFAULT_STRATEGIES = ("line", "cssbeautify", "jsbeautify", "collapsebraces", "jschar") - __slots__ = [ - '_any_crash', '_best_testcase', '_cache_iter_harness_created', '_env_mod', - '_fixed_timeout', '_force_no_harness', '_idle_threshold', '_idle_timeout', '_ignore', - '_input_fname', '_interesting_prefix', '_iter_timeout', '_landing_page', '_log_handler', - '_min_crashes', '_no_harness', '_orig_sig', '_original_relaunch', '_other_crashes', - '_reduce_file', '_repeat', '_reporter', '_result_cache', '_result_code', '_server', '_server_map', - '_signature', '_skip', '_skip_analysis', '_skipped', '_status', '_target', '_tcroot', '_testcase', - '_tmpdir', '_use_result_cache', - ] - - def __init__(self, ignore, target, iter_timeout, no_harness, any_crash, skip, min_crashes, - repeat, idle_threshold, idle_timeout, testcase_cache=True, skip_analysis=False): - """Use lithium to reduce a testcase. - - Args: - target (grizzly.target.Target): Target object to use for reduction. - """ - self._any_crash = any_crash - self._best_testcase = None - self._cache_iter_harness_created = None - self._env_mod = None # environment if specified in the testcase - self._fixed_timeout = False # if True iter_timeout will not be changed - self._force_no_harness = no_harness - self._idle_threshold = idle_threshold - self._idle_timeout = idle_timeout - self._ignore = ignore # things to ignore - self._input_fname = None - self._interesting_prefix = None - self._iter_timeout = iter_timeout - self._landing_page = None # the file to point the target at - self._min_crashes = min_crashes - self._no_harness = no_harness - self._orig_sig = None # signature to reduce to (if specified) - self._original_relaunch = target.rl_reset - self._other_crashes = {} - self._reduce_file = None # the file to reduce - self._repeat = repeat - self._reporter = None - self._result_cache = {} - self._result_code = None - self._server = None # a server to serve with - self._server_map = sapphire.ServerMap() # manage dynamic requests, includes and redirects - self._signature = None - self._skip = skip - self._skip_analysis = skip_analysis - self._skipped = None - self._status = Status.start() - self._target = target # a Puppet to run with - self._testcase = None - # testcase cache remembers if we have seen this reduce_file before and if so return the same - # interesting result - self._use_result_cache = testcase_cache - self._tmpdir = tempfile.mkdtemp(prefix="grzreduce", dir=grz_tmp("reduce")) - self._tcroot = os.path.join(self._tmpdir, "tc") - self._log_handler = self._start_log_capture() - if not self._skip_analysis: - # see if any of the args tweaked by analysis were overridden - # --relaunch is regarded as a maximum, so overriding the default is not a deal-breaker for this - if self._min_crashes != 1: - LOG.warning("--min-crashes=%d was given, skipping analysis", self._min_crashes) - self._skip_analysis = True - elif self._repeat != 1: - LOG.warning("--repeat=%d was given, skipping analysis", self._repeat) - self._skip_analysis = True - - @property - def landing_page(self): - return os.path.basename(self._landing_page) - - @landing_page.setter - def landing_page(self, value): - # this looks pointless, but it isn't since it affects both landing_page and wwwdir getters - self._landing_page = value - - @property - def reduce_file(self): - return self._reduce_file - - @reduce_file.setter - def reduce_file(self, value): - self._reduce_file = value - # landing page should default to same value as reduce file - if self._landing_page is None: - self._landing_page = value - - @property - def result_code(self): - return self._result_code - - @property - def server(self): - return self._server - - @property - def target(self): - return self._target - - @property - def wwwdir(self): - return os.path.dirname(os.path.realpath(self._landing_page)) - - def timeouts_proxy(self): - """Return a proxy for modifying the job timeouts. - - Returns: - (object): an object used to modify the timeouts for this job - attributes: - - iteration (iteration timeout) - - idle (idle timeout) - """ - return TimeoutsUpdateProxy(self) - - def testcase_proxy(self, run_state): - """Return a proxy for modifying the testcase. - - Returns: - (object): an object used to modify the testcase for this job - attributes: - - iteration (iteration timeout) - - idle (idle timeout) - """ - return TestcaseUpdateProxy(self, run_state) - - def analysis_mode(self, min_crashes=1, relaunch=1, repeat=1): - """Set parameters for testcase analysis. This has side-effects besides being a proxy: - - - min_crashes/repeat/relaunch are preset according to the function parameters - - result cache is disabled (so every iteration runs fully) if used as a with-statement context - - times are not dynamically adjusted if used as a with-statement context - - Args: - min_crashes (int): How many crashes are needed for a success. - relaunch (int): How many iterations between relaunch. - repeat (int): How many times to repeat the testcase per iteration. - - Returns: - (context manager): an object that can be used to set new parameters - as a result of analysis: - attributes: - - min_crashes - - no_harness - - relaunch - - repeat - """ - # pylint: disable=no-self-argument,no-self-use,protected-access - - proxy = IterationParamsProxy(self) - - # Set parameters for analysis - proxy.min_crashes = min_crashes - proxy.repeat = repeat - proxy.relaunch = relaunch - - return proxy - - def close_target(self): - if not self._target.closed: - self._target.close() - - def lithium_init(self): - """Lithium initialization entrypoint. Do any per-reduction loop setup needed. - - Args: - None - - Returns: - None - """ - self._skipped = None - self._best_testcase = None - self._result_cache = {} - - def lithium_interesting(self, temp_prefix): - """Lithium main iteration entrypoint. - - This should try the reduction and return True or False based on whether the reduction was - good or bad. This is subject to a number of options (skip, repeat, cache) and so may - result in 0 or more actual runs of the target. - - Args: - temp_prefix (str): A unique prefix for any files written during this iteration. - - Returns: - bool: True if reduced testcase is still interesting. - """ - # ensure the target is closed so "repeat" and "relaunch" never get out of sync - self.close_target() - if self._skip: - if self._skipped is None: - self._skipped = 0 - elif self._skipped < self._skip: - self._skipped += 1 - return False - n_crashes = 0 - n_tries = max(self._repeat, self._min_crashes) - if self._use_result_cache: - with open(self.reduce_file, "rb") as test_fp: - cache_key = hashlib.sha1(test_fp.read()).hexdigest() - if cache_key in self._result_cache: - result = self._result_cache[cache_key]['result'] - if result: - LOG.info("Interesting (cached)") - cached_prefix = self._result_cache[cache_key]['prefix'] - for filename in glob.glob(r"%s_*" % cached_prefix): - suffix = os.path.basename(filename).split("_", 1) - if os.path.isfile(filename): - shutil.copy(filename, "%s_%s" % (temp_prefix, suffix[1])) - elif os.path.isdir(filename): - shutil.copytree(filename, "%s_%s" % (temp_prefix, suffix[1])) - else: - raise RuntimeError("Cannot copy non-file/non-directory: %s" - % (filename,)) - else: - LOG.info("Uninteresting (cached)") - return result - - # create the TestCase to try - testcase = TestCase(self.landing_page, None, "grizzly.reduce", input_fname=self._input_fname) - - # add testcase contents - for file_name in testcase_contents(self.wwwdir): - testcase.add_from_file(os.path.join(self.wwwdir, file_name), file_name, - required=bool(file_name == self.landing_page)) - - # add prefs - if self._target.prefs is not None: - testcase.add_meta(TestFile.from_file(self._target.prefs, "prefs.js")) - - # add environment variables - if self._env_mod is not None: - for name, value in self._env_mod.items(): - testcase.add_environ_var(name, value) - - max_duration = 0 - run_prefix = None - for try_num in range(n_tries): - if (n_tries - try_num) < (self._min_crashes - n_crashes): - break # no longer possible to get min_crashes, so stop - self._status.report() - self._status.iteration += 1 - run_prefix = "%s(%d)" % (temp_prefix, try_num) - if self._run(testcase, run_prefix): - # track the maximum duration of the successful reduction attempts - if testcase.duration > max_duration: - max_duration = testcase.duration - n_crashes += 1 - if n_crashes >= self._min_crashes: - self.on_interesting_crash(run_prefix) - if self._use_result_cache: - self._result_cache[cache_key] = { - 'result': True, - 'prefix': run_prefix - } - self._best_testcase = testcase - # the amount of time it can take to replay a test case can vary - # when under Valgrind so do not update the timeout in that case - if not self._fixed_timeout and not getattr(self._target, "use_valgrind", False): - self.update_timeout(max_duration) - return True - if self._use_result_cache: - # No need to save the temp_prefix on uninteresting testcases - # But let's do it anyway to stay consistent - self._result_cache[cache_key] = { - 'result': False, - 'prefix': run_prefix - } - return False - - def lithium_cleanup(self): - """Lithium cleanup entrypoint. Do any per-reduction loop cleanup needed. - - Args: - None - - Returns: - None - """ - try: - if self._server is not None: - self._server.close() - self._server = None - finally: - if self._target is not None: - self._target.close() - - def _add_san_suppressions(self, supp_file): - # Update the sanitizer *SAN_OPTIONS environment variable to use provided - # suppressions file - opt_key = '%s_OPTIONS' % os.path.basename(supp_file).split('.')[0].upper() - opts_data = self._env_mod.get(opt_key, '') - # the value matching *SAN_OPTIONS can be set to None - if opts_data is None: - opts_data = '' - opts = sanitizer_opts(opts_data) - opts['suppressions'] = '\'%s\'' % (supp_file,) - self._env_mod[opt_key] = ':'.join('='.join((k, v)) for k, v in opts.items()) - - def _start_log_capture(self): - """Add a log handler for grizzly and lithium messages generated during this job. - The handler is removed again by close() - - Args: - None - - Returns: - logging.Handler: The log handler to be removed later. - """ - formatter = logging.Formatter("%(levelname).1s %(name)s [%(asctime)s] %(message)s") - handler = logging.FileHandler(os.path.join(self._tmpdir, "reducelog.txt")) - handler.setLevel(logging.DEBUG) - handler.setFormatter(formatter) - for logname in self.LOGGERS_TO_WATCH: - logging.getLogger(logname).addHandler(handler) - - # check that DEBUG messages will actually get through - # if the root logger level is > DEBUG, messages will not get through to our log handler - # set root to DEBUG, and propagate the old root level to each root handler - root_logger = logging.getLogger() - root_level = root_logger.getEffectiveLevel() - if root_level > logging.DEBUG: - root_logger.setLevel(logging.DEBUG) - for root_handler in root_logger.handlers: - if root_handler.level < root_level: - root_handler.setLevel(root_level) - - return handler - - def update_timeout(self, run_time): - # If run_time is less than poll-time, update it - LOG.debug('Run time %r', run_time) - new_poll_timeout = max(10, min(run_time * 1.5, self._idle_timeout)) - if new_poll_timeout < self._idle_timeout: - LOG.info("Updating poll timeout to: %r", new_poll_timeout) - self._idle_timeout = new_poll_timeout - # If run_time * 2 is less than iter_timeout, update it - # in other words, decrease the timeout if this ran in less than half the timeout - # (floored at 10s) - new_iter_timeout = max(10, min(run_time * 2, self._iter_timeout)) - if new_iter_timeout < self._iter_timeout: - LOG.info("Updating max timeout to: %r", new_iter_timeout) - self._iter_timeout = new_iter_timeout - - def _run(self, testcase, temp_prefix): - """Run a single iteration against the target and determine if it is interesting. This is the - low-level iteration function used by `interesting`. - - Args: - testcase (TestCase): The testcase to serve - temp_prefix (str): A unique prefix for any files written during this iteration. - - Returns: - bool: True if reduced testcase is still interesting. - """ - result = False - - # if target is closed and server is alive, we should restart it or else the first request - # against /first_test will 404 - if self._target.closed and self._server is not None: - self._server.close() - self._server = None - self._server_map.dynamic.clear() - self._server_map.redirect.clear() - - # launch sapphire if needed - if self._server is None: - # have client error pages (code 4XX) call window.close() after a few seconds - self._server = sapphire.Sapphire(auto_close=2) - - if not self._no_harness: - harness = os.path.join(os.path.dirname(__file__), '..', 'common', 'harness.html') - with open(harness, 'rb') as harness_fp: - harness = harness_fp.read() - self._server_map.set_dynamic_response("grz_harness", lambda: harness, mime_type="text/html") - self._server_map.set_redirect("grz_current_test", str(self.landing_page), required=False) - - runner = Runner(self._server, self._target, self._idle_threshold, self._idle_timeout) - if self._no_harness: - self._server.timeout = self._iter_timeout - else: - # wait a few extra seconds to avoid races between the harness & sapphire timing out - self._server.timeout = self._iter_timeout + 10 - - # (re)launch Target - if self._target.closed: - if self._no_harness: - location = runner.location( - "/grz_current_test", - self._server.port) - else: - location = runner.location( - "/grz_harness", - self._server.port, - close_after=self._target.rl_reset, - forced_close=self._target.forced_close, - timeout=self._iter_timeout) - # Try to launch the browser, retry 4 times at most - runner.launch(location, env_mod=self._env_mod, max_retries=4, retry_delay=15) - self._target.step() - - if not self._no_harness: - def _dyn_resp_close(): # pragma: no cover - if self.target.monitor.is_healthy(): - # delay to help catch window close/shutdown related crashes - time.sleep(0.1) - self.target.close() - return b"

Close Browser

" - self._server_map.set_dynamic_response("grz_close_browser", _dyn_resp_close, mime_type="text/html") - self._server_map.set_redirect("grz_next_test", str(self.landing_page), required=True) - - # run test case - runner.run(self._ignore, self._server_map, testcase, wait_for_callback=self._no_harness) - - # handle failure if detected - if runner.result == Runner.FAILED: - self._target.close() - testcase.purge_optional(runner.served) - - # save logs - result_logs = temp_prefix + "_logs" - if not os.path.exists(result_logs): - os.mkdir(result_logs) - self._target.save_logs(result_logs) - - # create a CrashInfo - crash = FuzzManagerReporter.create_crash_info( - Report.from_path(result_logs), - self._target.binary) - - short_sig = crash.createShortSignature() - if short_sig == "No crash detected": - # XXX: need to change this to support reducing timeouts? - LOG.info("Uninteresting: no crash detected") - elif self._orig_sig is None or self._orig_sig.matches(crash): - result = True - LOG.info("Interesting: %s", short_sig) - if self._orig_sig is None and not self._any_crash: - self._orig_sig = Report.crash_signature(crash) - else: - LOG.info("Uninteresting: different signature: %s", short_sig) - self.on_other_crash_found(testcase, temp_prefix) - - elif runner.result == Runner.IGNORED: - LOG.info("Uninteresting: ignored") - self._target.close() - - else: - LOG.info("Uninteresting: no failure detected") - - # trigger relaunch by closing the browser if needed - self._target.check_relaunch() - - return result - - def _stop_log_capture(self): - """Stop handling reduce logs. - - Args: - None - - Returns: - None - """ - if self._log_handler is None: - return - for logname in self.LOGGERS_TO_WATCH: - logging.getLogger(logname).removeHandler(self._log_handler) - self._log_handler.flush() - self._log_handler.close() - self._log_handler = None - - def config_environ(self, environ): - with open(environ) as in_fp: - try: - self._env_mod = json.load(in_fp).get('env', {}) - except ValueError: - # TODO: remove this once switched to 'test_info.json' - # legacy support for 'env_vars.txt' - self._env_mod = {} - in_fp.seek(0) - for line in in_fp: - line = line.rstrip() - if not line: - continue - key, value = line.split('=', 1) - if not value: - value = None - self._env_mod[key] = value - # known sanitizer suppression files - known_suppressions = ('lsan.supp', 'ubsan.supp') - working_dir = os.path.dirname(environ) - for file_name in os.listdir(working_dir): - if file_name in known_suppressions: - self._add_san_suppressions(os.path.join(working_dir, file_name)) - - def config_signature(self, signature): - """Configure a signature to use for reduction. If none is given, an automatic signature is - created based on the initial repro. - - Args: - signature (str): A JSON signature to match for reduction. - - Returns: - None - """ - self._signature = CrashSignature(signature) - - @staticmethod - def _get_landing_page(testpath): - """Parse test_info.json for landing page - - Args: - testpath (str): Path to a testcase folder (containing a test_info.json from Grizzly). - - Returns: - str: Path to the landing page within testpath - """ - info_file = os.path.join(testpath, "test_info.json") - if os.path.isfile(info_file): - with open(info_file) as info: - landing_page = json.load(info).get("target", None) - if landing_page is None: - raise ReducerError("Could not find landing page in %s!" % (os.path.abspath(info_file),)) - landing_page = os.path.join(testpath, landing_page) - else: - LOG.warning("Using deprecated test_info.txt") - with io.open(os.path.join(testpath, "test_info.txt"), encoding="utf-8") as info: - for line in info: - if line.lower().startswith("landing page: "): - landing_page = os.path.join(testpath, - line.split(": ", 1)[1].strip()) - break - else: - raise ReducerError("Could not find landing page in %s!" - % (os.path.abspath(info.name),)) - if not os.path.isfile(landing_page): - raise ReducerError("Landing page %s does not exist in %s!" - % (landing_page, os.path.abspath(info.name))) - return landing_page - - def _http_abspath(self, path): - """Return an absolute HTTP path to `path` relative to tcroot""" - path = os.path.relpath(path, self._tcroot) - return '/' + '/'.join(path.split(os.sep)) - - def config_testcase(self, testcase): - """Prepare a user provided testcase for reduction. - - Args: - testcase (str): Path to a testcase. This should be a Grizzly testcase (zip or folder) or html - file. - - Returns: - None - """ - try: - # extract the testcase if necessary - if os.path.exists(self._tcroot): - raise ReducerError("Testcase already configured?") - if os.path.isfile(testcase): - if testcase.lower().endswith(".html"): - os.mkdir(self._tcroot) - shutil.copy(testcase, self._tcroot) - info = {"target": os.path.basename(testcase)} - with open(os.path.join(self._tcroot, "test_info.json"), "w") as info_fp: - json.dump(info, info_fp, indent=2, sort_keys=True) - elif testcase.lower().endswith(".zip"): - os.mkdir(self._tcroot) - try: - with zipfile.ZipFile(testcase) as zip_fp: - zip_fp.extractall(path=self._tcroot) - except (zlib.error, zipfile.BadZipfile): - raise CorruptTestcaseError("Testcase is corrupted") - else: - raise ReducerError("Testcase must be zip, html, or directory") - elif os.path.isdir(testcase): - shutil.copytree(testcase, self._tcroot) - else: - raise ReducerError("Testcase must be zip, html or directory") - - self._input_fname = os.path.basename(testcase) - - # get a list of all directories containing testcases (1-n, depending on how much history - # grizzly saved) - entries = set(os.listdir(self._tcroot)) - if "test_info.json" in entries: - dirs = [self._tcroot] - elif "test_info.txt" in entries: - dirs = [self._tcroot] - else: - dirs = sorted([os.path.join(self._tcroot, entry) for entry in entries - if os.path.exists(os.path.join(self._tcroot, entry, "test_info.json")) - or os.path.exists(os.path.join(self._tcroot, entry, "test_info.txt"))], - key=lambda x: -int(x.rsplit('-', 1)[1])) - if not dirs: - raise NoTestcaseError("No testcase recognized at %r" % (testcase,)) - - # check for included prefs and environment - if "prefs.js" in os.listdir(dirs[0]): - # move the file out of tcroot because we prune these non-testcase files later - os.rename(os.path.join(dirs[0], "prefs.js"), os.path.join(self._tmpdir, "prefs.js")) - self._target.prefs = os.path.abspath(os.path.join(self._tmpdir, "prefs.js")) - LOG.warning("Using prefs included in testcase: %r", self._target.prefs) - if "test_info.json" in os.listdir(dirs[0]): - self.config_environ(os.path.join(dirs[0], "test_info.json")) - elif "env_vars.txt" in os.listdir(dirs[0]): - # TODO: remove this block once move to 'test_info.json' is complete - self.config_environ(os.path.join(dirs[0], "env_vars.txt")) - if self._env_mod: - LOG.warning("Using environment included in testcase") - self._target.forced_close = self._env_mod.get("GRZ_FORCED_CLOSE") != "0" - - # if dirs is singular, we can use the testcase directly, otherwise we need to iterate over - # them all in order - pages = [self._get_landing_page(d) for d in dirs] - if len(pages) == 1: - self._testcase = pages[0] - self._cache_iter_harness_created = False - - else: - # create a harness to iterate over the whole history - harness_path = os.path.join(os.path.dirname(__file__), '..', 'common', 'harness.html') - with io.open(harness_path, encoding="utf-8") as harness_fp: - harness = harness_fp.read() - # change dump string so that logs can be told apart - harness = harness.replace("[grz harness]", "[cache iter]") - # change the window name so that window.open doesn't clobber self - harness = harness.replace("'GrizzlyFuzz'", "'CacheIterator'") - # insert the iteration timeout. insert it directly because we can't set a hash value - new_harness = re.sub(r"^(\s*let\s.*\btime_limit\b)", - r"\1 = %d" % (self._iter_timeout * 1000), - harness, - flags=re.MULTILINE) - if new_harness == harness: - raise ReducerError("Unable to set time_limit in harness, please update pattern " - "to match harness!") - harness = new_harness - # make first test and next test grab from the array - harness = harness.replace("'/grz_current_test'", "_reduce_next()") - harness = harness.replace("'/grz_next_test'", "_reduce_next()") - # insert the close condition. we are iterating over the array of landing pages, - # undefined means we hit the end and the harness should close - # newer harness uses conditional operator in open() call - if re.search(r'open\(.*_reduce_next\(\)\s*:\s*_reduce_next\(\)', harness) is None: - raise ReducerError("Unable to insert finish condition, please update pattern " - "to match harness!") - # insert the landing page loop - harness = harness.replace("\n", + "\n", + id="#7: test beautify js embedded in html", + ), + pytest.param( + "\n", + "\n\n", + id="#9: test DDBEGIN/END respected for js embedded in html, " + "DD outside \n", + "\n", + id="#10: test DDBEGIN/END respected for js embedded in html, " + "DD inside \n\n", + "\n\n", + id="#13: test DDBEGIN/END respected for js embedded in html, " + "DD straddle after ", + ), + pytest.param( + "try{'a';'R'}catch(e){}\n", + "try{'a';'R'}catch(e){}\n", + id="#14: test beautify js embedded in html (no \n", + "" + "\n", + id="#15: test beautify multiple js embedded in html", + ), + ], +) +def test_beautify_js_4(test_data, reduced, mocker): + _test_beautify( + JSBeautify, + lambda x: "Q" in x and "R" in x, + "test.html", + test_data, + reduced, + mocker, + ) + + +@pytest.mark.parametrize( + "test_data, reduced", + [ + pytest.param( + "*,#a{a:0;R:1}\n", + "*,\n#a {\n a: 0;\n R: 1\n}\n", + id="#0: test beautify a .css file", + ), + pytest.param( + "*,\r\n#a{a:0;R:1}\n", + "*,\n#a {\n a: 0;\n R: 1\n}\n", + id="#1: test that mixed crlf/lf gets converted to lf", + ), + pytest.param( + "*,\r#a{a:0;R:1}\n", + "*,\n#a {\n a: 0;\n R: 1\n}\n", + id="#2: test that mixed cr/lf gets converted to lf", + ), + pytest.param( + "*,#a{a:0;\n\nR:1}\n", + "*,\n#a {\n a: 0;\n R: 1\n}\n", + id="#3: test that existing newlines are not preserved", + ), + ], +) +def test_beautify_css_1(test_data, reduced, mocker): + _test_beautify( + CSSBeautify, lambda x: "R" in x, "test.css", test_data, reduced, mocker + ) + + +@pytest.mark.parametrize( + "test_data, reduced", + [ + pytest.param( + "\n", + "\n", + id="#4: test beautify css embedded in html", + ), + pytest.param( + "\n", + "\n", + id="#6: test already beautified css (beautify does nothing)", + ), + pytest.param( + "*,#a{a:0;R:1}\n", + "*,#a{a:0;R:1}\n", + id="#7: test beautify css embedded in html (no