diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c35820923..11677ae264 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 inputs and outputs for operation implementations. - Async helpers got an `aenter_stack` method which creates and returns and `contextlib.AsyncExitStack` after entering all the context's passed to it. +- Example of how to use Data Flow Facilitator / Orchestrator / Operations by + writing a Python meta static analysis tool, + [shouldi](https://pypi.org/project/shouldi/) ### Changed - OperationImplementation add_label and add_orig_label methods now use op.name instead of ENTRY_POINT_ORIG_LABEL and ENTRY_POINT_NAME. diff --git a/dffml/version.py b/dffml/version.py index 54420691bb..40ae17f0c5 100644 --- a/dffml/version.py +++ b/dffml/version.py @@ -7,4 +7,4 @@ Version of DFFML """ -VERSION = "0.2.0" +VERSION = "0.2.1" diff --git a/docs/usage/operations.rst b/docs/usage/operations.rst new file mode 100644 index 0000000000..b3e475dcb6 --- /dev/null +++ b/docs/usage/operations.rst @@ -0,0 +1,278 @@ +Example Data Flow Usage +======================= + +This example will show you how to generate a dataset using operations. + +Operations are the core of DFFML, they have inputs and outputs, are configurable +and are run by the Data Flow Facilitator in what amounts to a large event loop. +The events in the event loop are pieces of data entering the network. When a +piece of data which matches the data types of one of the operations inputs +enters the network, that operation is then run. + +We're going to write a few operations which will run some Python static analysis +tools. With the goal being to create a command line utility called ``shouldi`` +which will provide us with the information we need to make the decision, should +I install Python package X? When it's done it'll look like this + +.. code-block:: console + + $ shouldi install insecure-package bandit + bandit is okay to install + Do not install insecure-package! {'safety_check_number_of_issues': 1} + +Creating our Package +-------------------- + +Clone a copy of DFFML and navigate the top of the source directory. + +Create a new package using the create script. + +.. code-block:: console + + $ ./scripts/create.sh operations shouldi + +You can now move this to another directory if you wish (the copy for this +example is located under ``examples/shouldi``. + +.. code-block:: console + + $ mv operations/shouldi ../shouldi + $ cd ../shouldi + +We're going to change the name of the package to ``shouldi`` instead of the +default, ``dffml_operations_shouldi``. + +**setup.py** + +.. code-block:: python + + NAME = "shouldi" + +We need to rename the directory as well. + +.. code-block:: console + + $ mv dffml_operations_shouldi shouldi + +And the directory within the coveragerc file + +**.coveragerc** + +.. code-block:: python + + source = + shouldi + tests + +Now install your freshly renamed module! + +.. code-block:: console + + $ python3.7 -m pip install -e . + +Installing Static Analysis Tools +-------------------------------- + +For simplicities sake the beginning of this example will use subprocesses to +interact with command line Python static analysis tools. Let's install them all +via ``pip``. + +.. code-block:: console + + $ python3.7 -m pip install -U safety pylint bandit + +We need to make http requests so let's install ``aiohttp``. + +**setup.py** + +.. code-block:: python + + INSTALL_REQUIRES = [ + "aiohttp>=3.5.4" + ] + +Our Zeroth Operation +-------------------- + +We'll write an operation to check for CVEs in a package by using ``safety``. + +Safety uses the package name and version to tell us if there are any security +issues in the package for that version. + +To use safety, we have to have the version of the package we want to check. + +Let's write an operation to grab the version of a package. + +.. literalinclude:: /../examples/shouldi/shouldi/pypi.py + +Write a test for it + +.. literalinclude:: /../examples/shouldi/tests/test_pypi.py + +Run the tests + +.. code-block:: console + + $ python3.7 setup.py test -s tests.test_pypi + +Safety Operation +---------------- + +The output of the last operation will automatticly be combined with the package +name to create a call you our new operation, ``SafetyCheck``. + +This is how running safety on the command line works. + +.. code-block:: console + + $ echo insecure-package==0.1.0 | safety check --stdin + ╒══════════════════════════════════════════════════════════════════════════════╕ + │ │ + │ /$$$$$$ /$$ │ + │ /$$__ $$ | $$ │ + │ /$$$$$$$ /$$$$$$ | $$ \__//$$$$$$ /$$$$$$ /$$ /$$ │ + │ /$$_____/ |____ $$| $$$$ /$$__ $$|_ $$_/ | $$ | $$ │ + │ | $$$$$$ /$$$$$$$| $$_/ | $$$$$$$$ | $$ | $$ | $$ │ + │ \____ $$ /$$__ $$| $$ | $$_____/ | $$ /$$| $$ | $$ │ + │ /$$$$$$$/| $$$$$$$| $$ | $$$$$$$ | $$$$/| $$$$$$$ │ + │ |_______/ \_______/|__/ \_______/ \___/ \____ $$ │ + │ /$$ | $$ │ + │ | $$$$$$/ │ + │ by pyup.io \______/ │ + │ │ + ╞══════════════════════════════════════════════════════════════════════════════╡ + │ REPORT │ + │ checked 1 packages, using default DB │ + ╞════════════════════════════╤═══════════╤══════════════════════════╤══════════╡ + │ package │ installed │ affected │ ID │ + ╞════════════════════════════╧═══════════╧══════════════════════════╧══════════╡ + │ insecure-package │ 0.1.0 │ <0.2.0 │ 25853 │ + ╘══════════════════════════════════════════════════════════════════════════════╛ + +We want parsable output, so let's try it with the ``--json`` flag. + +.. code-block:: console + + $ echo insecure-package==0.1.0 | safety check --stdin --json + [ + [ + "insecure-package", + "<0.2.0", + "0.1.0", + "This is an insecure package with lots of exploitable security vulnerabilities.", + "25853" + ] + ] + +Let's now write the operation to call ``safety`` via a subprocess. + +.. literalinclude:: /../examples/shouldi/shouldi/safety.py + +Write a test for it + +.. literalinclude:: /../examples/shouldi/tests/test_safety.py + +Run the tests + +.. code-block:: console + + $ python3.7 setup.py test -s tests.test_safety + +.. TODO Add they operations to setup.py entry_points + +.. TODO Add bandit + +.. TODO Add pylint + +CLI +--- + +Writing the CLI is as simple as importing our operations and having the memory +orchestrator run them. DFFML also provides a quick and dirty CLI abstraction +based on :py:mod:`argparse` which will speed things up. + +.. TODO explain more about writing the CLI and the orchestrator + +**shouldi/cli.py** + +.. literalinclude:: /../examples/shouldi/shouldi/cli.py + +Let's test out the code in ``shouldi.cli`` before making it accessable via the +command line. + +.. literalinclude:: /../examples/shouldi/tests/test_cli.py + +Run the all the tests this time + +.. code-block:: console + + $ python3.7 setup.py test + +If you have coverage installed (``pip install coverage``) you can also check the +code coverage. + +.. code-block:: console + + $ python3.7 -m coverage run setup.py test + running test + running egg_info + writing shouldi.egg-info/PKG-INFO + writing dependency_links to shouldi.egg-info/dependency_links.txt + writing entry points to shouldi.egg-info/entry_points.txt + writing requirements to shouldi.egg-info/requires.txt + writing top-level names to shouldi.egg-info/top_level.txt + reading manifest file 'shouldi.egg-info/SOURCES.txt' + reading manifest template 'MANIFEST.in' + writing manifest file 'shouldi.egg-info/SOURCES.txt' + running build_ext + test_install (tests.test_cli.TestCLI) ... ok + test_run (tests.test_safety.TestSafetyCheck) ... ok + test_run (tests.test_pypi.TestPyPiLatestPackageVersion) ... ok + + ---------------------------------------------------------------------- + Ran 3 tests in 2.314s + + OK + $ python3.7 -m coverage report -m + Name Stmts Miss Branch BrPart Cover Missing + -------------------------------------------------------------------- + shouldi/__init__.py 0 0 0 0 100% + shouldi/cli.py 30 0 11 0 100% + shouldi/definitions.py 5 0 2 0 100% + shouldi/pypi.py 12 0 2 0 100% + shouldi/safety.py 18 0 0 0 100% + shouldi/version.py 1 0 0 0 100% + tests/__init__.py 0 0 0 0 100% + tests/test_cli.py 11 0 0 0 100% + tests/test_pypi.py 9 0 0 0 100% + tests/test_safety.py 9 0 0 0 100% + -------------------------------------------------------------------- + TOTAL 95 0 15 0 100% + +We want this to be usable as a command line utility, Python's +:py:mod:`setuptools` allows us to define console ``entry_points``. All we have +to do is tell :py:mod:`setuptools` what Python function we want it to call when +a user runs a given command line application. The name of our CLI is ``shouldi`` +and the function we want to run is ``main`` in the ``ShouldI`` class which is in +the ``shouldi.cli`` module. + +**setup.py** + +.. code-block:: python + + entry_points={"console_scripts": ["shouldi = shouldi.cli:ShouldI.main"]}, + +Re-install the package via pip + +.. code-block:: console + + $ python3.7 -m pip install -e . + +Now we should be able to run our new tool via the CLI! (Provided your ``$PATH`` +is set up correctly. + +.. code-block:: console + + $ shouldi install insecure-package bandit + bandit is okay to install + Do not install insecure-package! {'safety_check_number_of_issues': 1} diff --git a/examples/shouldi/.coveragerc b/examples/shouldi/.coveragerc new file mode 100644 index 0000000000..4e9bcf0ef2 --- /dev/null +++ b/examples/shouldi/.coveragerc @@ -0,0 +1,13 @@ +[run] +source = + shouldi + tests +branch = True + +[report] +exclude_lines = + no cov + no qa + noqa + pragma: no cover + if __name__ == .__main__.: diff --git a/examples/shouldi/.gitignore b/examples/shouldi/.gitignore new file mode 100644 index 0000000000..070ee81c83 --- /dev/null +++ b/examples/shouldi/.gitignore @@ -0,0 +1,20 @@ +*.log +*.pyc +.cache/ +.coverage +.idea/ +.vscode/ +*.egg-info/ +build/ +dist/ +docs/build/ +venv/ +wheelhouse/ +*.egss +.mypy_cache/ +*.swp +.venv/ +.eggs/ +*.modeldir +*.db +htmlcov/ diff --git a/examples/shouldi/LICENSE b/examples/shouldi/LICENSE new file mode 100644 index 0000000000..8ce5aa9e27 --- /dev/null +++ b/examples/shouldi/LICENSE @@ -0,0 +1,21 @@ +Copyright (c) 2017-2019 Intel + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/examples/shouldi/MANIFEST.in b/examples/shouldi/MANIFEST.in new file mode 100644 index 0000000000..04f196ac78 --- /dev/null +++ b/examples/shouldi/MANIFEST.in @@ -0,0 +1,2 @@ +include README.md +include LICENSE diff --git a/examples/shouldi/README.md b/examples/shouldi/README.md new file mode 100644 index 0000000000..6e88a0a948 --- /dev/null +++ b/examples/shouldi/README.md @@ -0,0 +1,38 @@ +# shouldi + +![shouldi](https://github.com/intel/dffml/raw/master/examples/shouldi/shouldi.jpg) + +## Usage + +```console +$ shouldi install insecure-package bandit +bandit is okay to install +Do not install insecure-package! {'safety_check_number_of_issues': 1} +``` + +## Dependencies + +`shouldi` depends on safety, pylint, and bandit being installed separately. + +```console +$ python3.7 -m pip install -U safety pylint bandit +``` + +## WTF is this + +`shouldi` is a tool that runs static analysis tools to let you know if there are +any issues in any of the python packages you were thinking of installing. + +`shouldi` is similar to things like [Go Report Card](https://goreportcard.com/). + +Right now `shouldi` runs the following static analysis tools and complains if: + +- [safety](https://pyup.io/safety/) + - Any issues are found +- TODO: [bandit](https://pypi.org/project/bandit/) +- TODO: [pylint](https://pypi.org/project/pylint/) + - TDB (something about the number of errors) + +## License + +shouldi is distributed under the [MIT License](LICENSE). diff --git a/examples/shouldi/pyproject.toml b/examples/shouldi/pyproject.toml new file mode 100644 index 0000000000..8b9d32fa10 --- /dev/null +++ b/examples/shouldi/pyproject.toml @@ -0,0 +1,20 @@ +[tool.black] +line-length = 79 +target-version = ['py37'] + +exclude = ''' +( + /( + \.eggs # exclude a few common directories in the + | \.git # root of the project + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + ) +) +''' diff --git a/examples/shouldi/setup.py b/examples/shouldi/setup.py new file mode 100644 index 0000000000..de5d473550 --- /dev/null +++ b/examples/shouldi/setup.py @@ -0,0 +1,53 @@ +import os +import ast +from io import open + +from setuptools import find_packages, setup + +self_path = os.path.dirname(os.path.realpath(__file__)) + +NAME = "shouldi" +AUTHOR_NAME = "John Andersen" +AUTHOR_EMAIL = "john.s.andersen@intel.com" +DESCRIPTION = "Meta static analysis runner for Python packages" + +with open(os.path.join(self_path, NAME, "version.py"), "r") as f: + for line in f: + if line.startswith("VERSION"): + version = ast.literal_eval(line.strip().split("=")[-1].strip()) + break + +with open(os.path.join(self_path, "README.md"), "r", encoding="utf-8") as f: + readme = f.read() + +INSTALL_REQUIRES = ["aiohttp>=3.5.4", "dffml>=0.2.1"] + +setup( + name=NAME, + version=version, + description=DESCRIPTION, + long_description=readme, + long_description_content_type="text/markdown", + author=AUTHOR_NAME, + author_email=AUTHOR_EMAIL, + maintainer=AUTHOR_NAME, + maintainer_email=AUTHOR_EMAIL, + url="https://github.com/intel/dffml/blob/master/examples/shouldi/README.md", + license="MIT", + keywords=[""], + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "License :: OSI Approved :: Apache Software License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + ], + install_requires=INSTALL_REQUIRES, + tests_require=[], + packages=find_packages(), + entry_points={"console_scripts": ["shouldi = shouldi.cli:ShouldI.main"]}, +) diff --git a/examples/shouldi/shouldi/__init__.py b/examples/shouldi/shouldi/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/shouldi/shouldi/cli.py b/examples/shouldi/shouldi/cli.py new file mode 100644 index 0000000000..c6199b3cf5 --- /dev/null +++ b/examples/shouldi/shouldi/cli.py @@ -0,0 +1,85 @@ +import sys + +from dffml.util.cli.cmd import CMD +from dffml.util.cli.arg import Arg + +from dffml.df.types import Input +from dffml.df.base import ( + operation_in, + opimp_in, + Operation, + BaseConfig, + StringInputSetContext, +) +from dffml.df.memory import ( + MemoryOrchestrator, + MemoryInputSet, + MemoryInputSetConfig, +) +from dffml.operation.output import GetSingle + +from shouldi.pypi import pypi_latest_package_version +from shouldi.safety import safety_check + +OPERATIONS = operation_in(sys.modules[__name__]) +OPIMPS = opimp_in(sys.modules[__name__]) + + +class Install(CMD): + + arg_packages = Arg( + "packages", nargs="+", help="Package to check if we should install" + ) + + async def run(self): + async with MemoryOrchestrator.basic_config( + operations=OPERATIONS, + opimps={ + imp.op.name: imp + for imp in [Imp(BaseConfig()) for Imp in OPIMPS] + }, + ) as orchestrator: + + definitions = Operation.definitions(*OPERATIONS) + + packages = { + package_name: Input( + value=package_name, + definition=definitions["package"], + parents=False, + ) + for package_name in self.packages + } + + get_single_spec = Input( + value=["safety_check_number_of_issues"], + definition=definitions["get_single_spec"], + parents=False, + ) + + async with orchestrator() as octx: + # Add our inputs to the input network with the context being the URL + for package_name in packages.keys(): + await octx.ictx.add( + MemoryInputSet( + MemoryInputSetConfig( + ctx=StringInputSetContext(package_name), + inputs=[packages[package_name]] + + [get_single_spec], + ) + ) + ) + + async for ctx, results in octx.run_operations(strict=True): + package_name = (await ctx.handle()).as_string() + results = results["get_single"] + any_issues = any(map(bool, results.values())) + if any_issues: + print(f"Do not install {package_name}! {results!r}") + else: + print(f"{package_name} is okay to install") + + +class ShouldI(CMD): + + install = Install diff --git a/examples/shouldi/shouldi/definitions.py b/examples/shouldi/shouldi/definitions.py new file mode 100644 index 0000000000..a1e262c161 --- /dev/null +++ b/examples/shouldi/shouldi/definitions.py @@ -0,0 +1,13 @@ +import sys +from dffml.df.types import Definition + +definitions = [ + Definition(name="calc_string", primitive="str"), + Definition(name="is_add", primitive="bool"), + Definition(name="is_mult", primitive="bool"), + Definition(name="numbers", primitive="List[int]"), + Definition(name="result", primitive="int"), +] + +for definition in definitions: + setattr(sys.modules[__name__], definition.name, definition) diff --git a/examples/shouldi/shouldi/pypi.py b/examples/shouldi/shouldi/pypi.py new file mode 100644 index 0000000000..078ce59151 --- /dev/null +++ b/examples/shouldi/shouldi/pypi.py @@ -0,0 +1,22 @@ +import aiohttp +from typing import Dict, Any + +from dffml.df.types import Definition +from dffml.df.base import op + +package = Definition(name="package", primitive="str") +package_version = Definition(name="package_version", primitive="str") + + +@op( + inputs={"package": package}, + outputs={"version": package_version}, + imp_enter={ + "session": (lambda self: aiohttp.ClientSession(trust_env=True)) + }, +) +async def pypi_latest_package_version(self, package: str) -> Dict[str, Any]: + url = f"https://pypi.org/pypi/{package}/json" + async with self.parent.session.get(url) as resp: + package = await resp.json() + return {"version": package["info"]["version"]} diff --git a/examples/shouldi/shouldi/safety.py b/examples/shouldi/shouldi/safety.py new file mode 100644 index 0000000000..610fe89551 --- /dev/null +++ b/examples/shouldi/shouldi/safety.py @@ -0,0 +1,43 @@ +import io +import json +import asyncio +from typing import Dict, Any + +from dffml.df.types import Definition +from dffml.df.base import op + +from .pypi import package, package_version + +safety_check_number_of_issues = Definition( + name="safety_check_number_of_issues", primitive="int" +) + + +@op( + name="safety_check", + inputs={"package": package, "version": package_version}, + outputs={"issues": safety_check_number_of_issues}, + conditions=[], +) +async def safety_check(package: str, version: str) -> Dict[str, Any]: + pinned = f"{package}=={version}" + + proc = await asyncio.create_subprocess_exec( + "safety", + "check", + "--stdin", + "--json", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + proc.stdin.write(pinned.encode()) + proc.stdin.write(b"\n") + proc.stdin.close() + + stdout, _stderr = await proc.communicate() + + issues = json.loads(stdout) + + return {"issues": len(issues)} diff --git a/examples/shouldi/shouldi/version.py b/examples/shouldi/shouldi/version.py new file mode 100644 index 0000000000..901e5110b2 --- /dev/null +++ b/examples/shouldi/shouldi/version.py @@ -0,0 +1 @@ +VERSION = "0.0.1" diff --git a/examples/shouldi/tests/__init__.py b/examples/shouldi/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/shouldi/tests/test_cli.py b/examples/shouldi/tests/test_cli.py new file mode 100644 index 0000000000..3ddcd11edc --- /dev/null +++ b/examples/shouldi/tests/test_cli.py @@ -0,0 +1,15 @@ +import io +from unittest.mock import patch + +from dffml.util.asynctestcase import AsyncTestCase + +from shouldi.cli import ShouldI + + +class TestCLI(AsyncTestCase): + async def test_install(self): + with patch("sys.stdout", new_callable=io.StringIO) as stdout: + await ShouldI.install.cli("insecure-package", "bandit") + output = stdout.getvalue() + self.assertIn("bandit is okay to install", output) + self.assertIn("Do not install insecure-package!", output) diff --git a/examples/shouldi/tests/test_pypi.py b/examples/shouldi/tests/test_pypi.py new file mode 100644 index 0000000000..21525c0595 --- /dev/null +++ b/examples/shouldi/tests/test_pypi.py @@ -0,0 +1,14 @@ +from dffml.df.base import BaseConfig +from dffml.util.asynctestcase import AsyncTestCase + +from shouldi.pypi import pypi_latest_package_version + + +class TestPyPiLatestPackageVersion(AsyncTestCase): + async def test_run(self): + async with pypi_latest_package_version.imp( + BaseConfig() + ) as pypi_latest: + async with pypi_latest(None, None) as ctx: + results = await ctx.run({"package": "insecure-package"}) + self.assertEqual(results["version"], "0.1.0") diff --git a/examples/shouldi/tests/test_safety.py b/examples/shouldi/tests/test_safety.py new file mode 100644 index 0000000000..4cb9a4ede8 --- /dev/null +++ b/examples/shouldi/tests/test_safety.py @@ -0,0 +1,14 @@ +from dffml.df.base import BaseConfig +from dffml.util.asynctestcase import AsyncTestCase + +from shouldi.safety import safety_check + + +class TestSafetyCheck(AsyncTestCase): + async def test_run(self): + async with safety_check.imp(BaseConfig()) as safety: + async with safety(None, None) as ctx: + results = await ctx.run( + {"package": "insecure-package", "version": "0.1.0"} + ) + self.assertEqual(results["issues"], 1) diff --git a/scripts/skel/operations/.coveragerc b/scripts/skel/operations/.coveragerc index 0515b05f32..efecc2e27e 100644 --- a/scripts/skel/operations/.coveragerc +++ b/scripts/skel/operations/.coveragerc @@ -1,10 +1,8 @@ [run] source = - dffml_feature_feature_name + dffml_operations_operations_name tests branch = True -omit = - dffml_feature_feature_name/cli.py [report] exclude_lines =