diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 30b82b35e..2d7b28008 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,7 @@ jobs: - name: ${{ matrix.task.name }} run: ${{ matrix.task.command }} - name: Submit Coverage - run: coveralls + run: ([ -z "$COVERALLS_REPO_TOKEN" ] && echo "coveralls is skipped in forked repo tests" || coveralls) if: matrix.task.name == 'Test' env: COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} diff --git a/conf/clusters/prod.json b/conf/clusters/prod.json index ed04a32ab..f87d17db0 100644 --- a/conf/clusters/prod.json +++ b/conf/clusters/prod.json @@ -63,6 +63,9 @@ "prefix_cluster_gsuite_admin_sm-app-name_app": [ "gsuite" ], + "prefix_cluster_okta_logevents-app-name_app": [ + "okta" + ], "prefix_cluster_onelogin-events-app-name_app": [ "onelogin" ], diff --git a/conf/global.json b/conf/global.json index a4b2edb33..8fa942ed7 100644 --- a/conf/global.json +++ b/conf/global.json @@ -5,6 +5,7 @@ "region": "us-east-1" }, "general": { + "terraform_files": [], "matcher_locations": [ "matchers" ], @@ -27,6 +28,11 @@ "read_capacity": 5, "write_capacity": 5 }, + "artifact_extractor": { + "enabled": false, + "firehose_buffer_size": 128, + "firehose_buffer_interval": 900 + }, "firehose": { "use_prefix": true, "buffer_interval": 900, diff --git a/conf/schemas/carbonblack.json b/conf/schemas/carbonblack.json index c9d4e88de..683465bc4 100644 --- a/conf/schemas/carbonblack.json +++ b/conf/schemas/carbonblack.json @@ -1043,7 +1043,31 @@ "uid", "username", "sha256" - ] + ], + "normalization": { + "command": [ + { + "path": ["command_line"], + "function": "Command line" + } + ], + "path": [ + { + "path": ["path"], + "function": "Process path" + }, + { + "path": ["parent_path"], + "function": "Process parent path", + "send_to_artifacts": false + }, + { + "path": ["process_path"], + "function": "Process parent path", + "send_to_artifacts": false + } + ] + } } }, "carbonblack:ingress.event.regmod": { @@ -1445,4 +1469,4 @@ } } } -} \ No newline at end of file +} diff --git a/conf/schemas/cloudwatch.json b/conf/schemas/cloudwatch.json index 536581639..24eba791b 100644 --- a/conf/schemas/cloudwatch.json +++ b/conf/schemas/cloudwatch.json @@ -88,7 +88,55 @@ "time": "string", "version": "string" }, - "parser": "json" + "parser": "json", + "configuration": { + "normalization": { + "event_name": ["detail", "eventName"], + "account": [ + { + "path": [ + "account" + ], + "function": "Destination account ID" + }, + { + "path": [ + "detail", + "userIdentity", + "principalId" + ], + "function": "Source account ID" + } + ], + "ip_address": [ + { + "path": [ + "detail", + "sourceIPAddress" + ], + "function": "Source IP addresses" + } + ], + "user_agent": [ + "detail", + "userAgent" + ], + "user_identity": [ + { + "path": ["detail", "userIdentity", "type"], + "function": "User identity type" + }, + { + "path": ["detail", "userIdentity", "arn"], + "function": "User identity arn" + }, + { + "path": ["detail", "userIdentity", "userName"], + "function": "User identity username" + } + ] + } + } }, "cloudwatch:flow_logs": { "schema": { @@ -144,4 +192,4 @@ "quotechar": "'" } } -} \ No newline at end of file +} diff --git a/conf/schemas/okta.json b/conf/schemas/okta.json new file mode 100644 index 000000000..7eeae8848 --- /dev/null +++ b/conf/schemas/okta.json @@ -0,0 +1,35 @@ +{ + "okta:logevents": { + "schema": { + "uuid": "string", + "published": "string", + "eventType": "string", + "version": "string", + "severity": "string", + "legacyEventType": "string", + "displayMessage": "string", + "actor": {}, + "client": {}, + "outcome": {}, + "target": [], + "transaction": {}, + "debugContext": {}, + "authenticationContext": {}, + "securityContext": {}, + "request": {} + }, + "parser": "json", + "configuration": { + "optional_top_level_keys": [ + "legacyEventType", + "displayMessage", + "client", + "outcome", + "transaction", + "debugContext", + "authenticationContext", + "request" + ] + } + } +} diff --git a/conf/schemas/osquery.json b/conf/schemas/osquery.json index 5b4f8041e..637f426d4 100644 --- a/conf/schemas/osquery.json +++ b/conf/schemas/osquery.json @@ -48,7 +48,21 @@ "log_type", "logNumericsAsNumbers", "numerics" - ] + ], + "normalization": { + "command": [ + { + "path": ["columns", "command"], + "function": "Command line from shell history" + } + ], + "file_path": [ + { + "path": ["columns", "history_file"], + "function": "Shell history file path" + } + ] + } } }, "osquery:snapshot": { diff --git a/constraints.txt b/constraints.txt new file mode 100644 index 000000000..c25059ceb --- /dev/null +++ b/constraints.txt @@ -0,0 +1,2 @@ +# botocore requires a version of docutils < 0.16, but sphinx-rtd-theme's requirement of >=0.12 breaks this +docutils<0.16 diff --git a/docs/images/artifacts.png b/docs/images/artifacts.png new file mode 100644 index 000000000..7e89a41d5 Binary files /dev/null and b/docs/images/artifacts.png differ diff --git a/docs/images/cloudwatch_events.png b/docs/images/cloudwatch_events.png new file mode 100644 index 000000000..8d3c62811 Binary files /dev/null and b/docs/images/cloudwatch_events.png differ diff --git a/docs/images/join_search.png b/docs/images/join_search.png new file mode 100644 index 000000000..a607fdb63 Binary files /dev/null and b/docs/images/join_search.png differ diff --git a/docs/images/normalization-arch.png b/docs/images/normalization-arch.png new file mode 100644 index 000000000..e41dbb752 Binary files /dev/null and b/docs/images/normalization-arch.png differ diff --git a/docs/source/config-clusters.rst b/docs/source/config-clusters.rst index bab8b4a4d..29e728dae 100644 --- a/docs/source/config-clusters.rst +++ b/docs/source/config-clusters.rst @@ -210,7 +210,9 @@ Example: CloudTrail via S3 Events }, "modules": { "cloudtrail": { - "enable_s3_events": true + "s3_settings": { + "enable_events": true + } } } } @@ -242,8 +244,10 @@ Example: CloudTrail via CloudWatch Logs }, "modules": { "cloudtrail": { - "send_to_cloudwatch": true, - "enable_s3_events": false, + "s3_settings": { + "enable_events": true + }, + "send_to_cloudwatch": true }, "kinesis": { "streams": { @@ -269,18 +273,30 @@ Options ============================== =================================================== =============== **Key** **Default** **Description** ------------------------------ --------------------------------------------------- --------------- -``s3_cross_account_ids`` ``[]`` Grant write access to the CloudTrail S3 bucket for these account IDs. The primary, aka deployment account ID, will be added to this list. +``enabled`` ``true`` Toggle the ``cloudtrail`` module ``enable_logging`` ``true`` Toggle to ``false`` to pause logging to the CloudTrail ``exclude_home_region_events`` ``false`` Ignore events from the StreamAlert deployment region. This only has an effect if ``send_to_cloudwatch`` is set to ``true`` ``is_global_trail`` ``true`` If ``true``, the CloudTrail is applied to all regions ``send_to_cloudwatch`` ``false`` Enable CloudTrail delivery to CloudWatch Logs. Logs sent to CloudWatch Logs are forwarded to this cluster's Kinesis stream for processing. If this is enabled, the ``enable_s3_events`` option should be disabled to avoid duplicative processing. -``cloudwatch_destination_arn`` (Computed from CloudWatch Logs Destination module) CloudWatch Destination ARN used for forwarding data to this cluster's Kinesis stream. This has a default value but can be overriden here with a different CloudWatch Logs Destination ARN +``cloudwatch_destination_arn`` (Computed from CloudWatch Logs Destination module) CloudWatch Destination ARN used for forwarding data to this cluster's Kinesis stream. This has a default value but can be overridden here with a different CloudWatch Logs Destination ARN ``send_to_sns`` ``false`` Create an SNS topic to which notifications should be sent when CloudTrail puts a new object in the S3 bucket. The topic name will be the same as the S3 bucket name -``enable_s3_events`` ``false`` Enable S3 events for the logs sent to the S3 bucket. These will invoke this cluster's classifier for every new object in the CloudTrail S3 bucket -``s3_bucket_name`` ``prefix-cluster-streamalert-cloudtrail`` Name of the S3 bucket to be used for the CloudTrail logs. This can be overriden, but defaults to ``prefix-cluster-streamalert-cloudtrail`` -``s3_event_selector_type`` ``""`` An S3 event selector to enable object level logging for the account's S3 buckets. Choices are: "ReadOnly", "WriteOnly", "All", or "", where "" disables object level logging for S3 +``allow_cross_account_sns`` ``false`` Allow account IDs specified in the ``cross_account_ids`` array within the ``s3_settings`` (see below) to also send SNS notifications to the created SNS Topic +``s3_settings`` ``None`` Configuration options for CloudTrail related to S3. See the `S3 Options`_ section below for details. ============================== =================================================== =============== +S3 Options +---------- +The ``cloudtrail`` module has a subsection of ``s3_settings``, which contains options related to S3. + +======================== =================================================== =============== +**Key** **Default** **Description** +------------------------ --------------------------------------------------- --------------- +``cross_account_ids`` ``[]`` Grant write access to the CloudTrail S3 bucket for these account IDs. The primary, aka deployment account ID, will be added to this list. +``enable_events`` ``false`` Enable S3 events for the logs sent to the S3 bucket. These will invoke this cluster's classifier for every new object in the CloudTrail S3 bucket +``ignore_digest`` ``true`` If ``enable_events`` is enabled, setting ``ignore_digest`` to ``false`` will also process S3 files that are created within the ``AWSLogs//CloudTrail-Digest``. Defaults to ``true``. +``bucket_name`` ``prefix-cluster-streamalert-cloudtrail`` Name of the S3 bucket to be used for the CloudTrail logs. This can be overridden, but defaults to ``prefix-cluster-streamalert-cloudtrail`` +``event_selector_type`` ``""`` An S3 event selector to enable object level logging for the account's S3 buckets. Choices are: "ReadOnly", "WriteOnly", "All", or "", where "" disables object level logging for S3 +======================== =================================================== =============== .. _cloudwatch_events: @@ -325,6 +341,18 @@ Example "EC2 Instance Terminate Successful", "EC2 Instance Terminate Unsuccessful" ] + }, + "cross_account": { + "accounts": { + "123456789012": [ + "us-east-1" + ] + }, + "organizations": { + "o-aabbccddee": [ + "us-east-1" + ] + } } }, "kinesis": { @@ -341,7 +369,7 @@ Example } This creates a CloudWatch Events Rule that will publish all events that match the provided -``event_pattern`` to the Kinesis stream for this cluster. Note in the example above that a custom +``event_pattern`` to the Kinesis Stream for this cluster. Note in the example above that a custom ``event_pattern`` is supplied, but may be omitted entirely. To override the default ``event_patten`` (shown below), a value of ``None`` or ``{}`` may also be supplied to capture all events, regardless of which account the logs came from. In this case, rules should be written against @@ -353,9 +381,21 @@ Options ===================== =================================== =============== **Key** **Default** **Description** --------------------- ----------------------------------- --------------- -``event_pattern`` ``{"account": [""]}`` The `CloudWatch Events pattern `_ to control what is sent to Kinesis +``event_pattern`` ``{"account": [""]}`` The `CloudWatch Events pattern `_ to control what is sent to Kinesis +``cross_account`` ``None`` Configuration options to enable cross account access for specific AWS Accounts and Organizations. See the `Cross Account Options`_ section below for details. ===================== =================================== =============== +Cross Account Options +--------------------- +The ``cross_account`` section of the ``cloudwatch_events`` module has two subsections, outlined here. Usage of these is also shown in the example above. + +===================== =========== =============== +**Key** **Default** **Description** +--------------------- ----------- --------------- +``accounts`` ``None`` A mapping of *account IDs* and regions for which cross account access should be enabled. Example: ``{"123456789012": ["us-east-1"], "234567890123": ["us-west-2"]}`` +``organizations`` ``None`` A mapping of *organization IDs* and regions for which cross account access should be enabled. Example: ``{"o-aabbccddee": ["us-west-2"]}`` +===================== =========== =============== + .. _cloudwatch_logs: @@ -432,8 +472,8 @@ Options ===================== =========== =============== **Key** **Default** **Description** --------------------- ----------- --------------- +``enabled`` ``true`` Toggle the ``cloudwatch_logs_destination`` module ``cross_account_ids`` ``[]`` Authorize StreamAlert to gather logs from these accounts -``enabled`` ``true`` Toggle the CloudWatch Logs module ``excluded_regions`` ``[]`` Do not create CloudWatch Log destinations in these regions ===================== =========== =============== @@ -497,7 +537,7 @@ Options ========================== =========== =============== **Key** **Default** **Description** -------------------------- ----------- --------------- -``enabled`` ``false`` Toggle the CloudWatch Monitoring module +``enabled`` ``false`` Toggle the ``cloudwatch_monitoring`` module ``kinesis_alarms_enabled`` ``true`` Toggle the Kinesis-specific metric alarms ``lambda_alarms_enabled`` ``true`` Toggle the Lambda-specific metric alarms ``settings`` ``{}`` Alarm-specific settings (see below) @@ -748,7 +788,7 @@ Options ===================== ============================================================================================================================================= =============== **Key** **Default** **Description** --------------------- --------------------------------------------------------------------------------------------------------------------------------------------- --------------- -``enabled`` --- Toggle flow log creation +``enabled`` ``true`` Toggle the ``flow_logs`` module ``flow_log_filter`` ``[version, account, eni, source, destination, srcport, destport, protocol, packets, bytes, windowstart, windowend, action, flowlogstatus]`` Toggle flow log creation ``log_retention`` ``7`` Day for which logs should be retained in the log group ``enis`` ``[]`` Add flow logs for these ENIs diff --git a/docs/source/config-global.rst b/docs/source/config-global.rst index 496eac927..9f647bca6 100644 --- a/docs/source/config-global.rst +++ b/docs/source/config-global.rst @@ -61,6 +61,9 @@ Configuration { "general": { + "terraform_files": [ + "/absolute/path/to/extra/terraform/file.tf" + ], "matcher_locations": [ "matchers" ], @@ -90,6 +93,7 @@ Options ``scheduled_query_locations`` Yes ``["scheduled_queries"]`` List of local paths where ``scheduled_queries`` are defined ``publisher_locations`` Yes ``["publishers"]`` List of local paths where ``publishers`` are defined ``third_party_libraries`` No ``["pathlib2==2.3.5"]`` List of third party dependencies that should be installed via ``pip`` at deployment time. These are libraries needed in rules, custom code, etc that are defined in one of the above settings. +``terraform_files`` No ``[]`` List of local paths to Terraform files that should be included as part of this StreamAlert deployment ============================= ============= ========================= =============== diff --git a/docs/source/getting-started.rst b/docs/source/getting-started.rst index 1ed8a4ca3..f8183c853 100644 --- a/docs/source/getting-started.rst +++ b/docs/source/getting-started.rst @@ -208,7 +208,7 @@ Open ``conf/clusters/prod.json`` and change the ``data_sources`` section to look .. code-block:: bash - python manage.py output aws-sns + python manage.py output set aws-sns Please supply a short and unique descriptor for this SNS topic: test-email @@ -217,7 +217,7 @@ Open ``conf/clusters/prod.json`` and change the ``data_sources`` section to look If you look at ``conf/outputs.json``, you'll notice that the SNS topic was automatically added. 7. Configure a rule to send to the alerts topic. -We will use ``rules/community/cloudtrail/cloudtrail_root_account_usage.py`` as an example, which +We will use ``rules/community/cloudwatch_events/cloudtrail_root_account_usage.py`` as an example, which alerts on any usage of the root AWS account. Change the rule decorator to: .. code-block:: python diff --git a/docs/source/index.rst b/docs/source/index.rst index 45059978c..bd725c83a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -82,6 +82,7 @@ Table of Contents rule-promotion historical-search scheduled-queries + normalization conf-schemas-examples troubleshooting faq diff --git a/docs/source/normalization.rst b/docs/source/normalization.rst new file mode 100644 index 000000000..7f1f5b2ac --- /dev/null +++ b/docs/source/normalization.rst @@ -0,0 +1,446 @@ +############# +Normalization +############# + +StreamAlert has an unannounced feature Data Normalization. In its current implementation, it extracts recognized field names from classified records, and saves them to a top-level key on the same record. + +This is useful for rules, as they can be written to compare data fields against IoCs, such as IP Address, instead of writing one rule for each incoming data type. However, there are couple limitations we have identified as we use Normalization internally for a while. + +************************** +Normalization 2.0 (Reboot) +************************** + +In Normalization 2.0, we introduce a new lambda function ``Artifact Extractor`` by leveraging `Amazon Kinesis Data Firehose Data Transformation `_ feature to extract interesting artifacts from records processed by classifiers. The artifacts will be stored in the same S3 bucket where StreamAlert `Historical Search `_ feature uses and the Artifacts will be available for searching via Athena as well. + + +Artifacts Inventory +=================== + +An artifact is any field or subset of data within a record that bears meaning beyond the record itself, and is of interest in computer security. For example, a “carbonblack_version” would not be an artifact, as it is meaningless outside of the context of Carbon Black data. However, an ``ip_address`` would be an artifact. + +``Artifact Extractor`` Lambda function will build an artifacts inventory based on S3 and Athena services. It enables users to search for all artifacts across whole infrastructure from a single Athena table. + +Architecture +============ + +.. figure:: ../images/normalization-arch.png + :alt: Normalization V2 Architecture + :align: center + :target: _images/normalization-arch.png + + (click to enlarge) + +Configuration +============= +In Normalization v1, the normalized types are based on log source (e.g. ``osquery``, ``cloudwatch``, etc) and defined in ``conf/normalized_types.json`` file. + +In Normalization v2, the normalized types will be based on log type (e.g. ``osquery:differential``, ``cloudwatch:cloudtrail``, ``cloudwatch:events``, etc) and defined in ``conf/schemas/*.json``. Please note, ``conf/normalized_types.json`` will is deprecated. + +All normalized types are arbitrary, but only lower case alphabetic characters and underscores should be used for names in order to be compatible with Athena. + +Supported normalization configure syntax: + + .. code-block:: + + "cloudwatch:events": { + "schema": { + "field1": "string", + "field2": "string", + "field3": "string" + }, + "parser": "json", + "configuration": { + "normalization": { + "normalized_key_name1": [ + { + "path": ["path", "to", "original", "key"], + "function": "The purpose of normalized_key_name1", + "condition": { + "path": ["path", "to", "other", "key"], + "is|is_not|in|not_in|contains|not_contains": "string or a list" + }, + "send_to_artifacts": true|false + } + ] + } + } + } + +* ``normalized_key_name1``: An arbitrary string to name the normalized key, e.g. ``ip_address``, ``hostname``, ``command`` etc. +* ``path``: A list contains a json path to the original key which will be normalized. +* ``function``: Describe the purpose of the normalizer. +* ``condition``: An optional block that is executed first. If the condition is not met, then this normalizer is skipped. + + * ``path``: A list contains a json path to the condition key. + * ``is|is_not|in|not_in|contains|not_contains``: Exactly one of these fields must be provided. This is the value that the conditional field that is compared against. E.g + + .. code-block:: + + "condition": { + "path": ["account"], + "is": "123456" + } + + "condition": { + "path": ["detail", "userIdentity", "userName"], + "in": ["root", "test_username"] + } + + .. note:: + + Use all lowercases string a list of strings in the conditional field. The value from the record will be converted to all lowercases. + +* ``send_to_artifacts``: A boolean flag indicates should normalized information sent to ``artifacts`` table. This field is optional and it is default to ``true``. It thinks all normalized information are artifacts unless set this flag to ``false`` explicitly. + +Below are some example configurations for normalization v2. + +* Normalize all ip addresses (``ip_address``) and user identities (``user_identity``) for ``cloudwatch:events`` logs + + ``conf/schemas/cloudwatch.json`` + + .. code-block:: + + "cloudwatch:events": { + "schema": { + "account": "string", + "detail": {}, + "detail-type": "string", + "id": "string", + "region": "string", + "resources": [], + "source": "string", + "time": "string", + "version": "string" + }, + "parser": "json", + "configuration": { + "normalization": { + "ip_address": [ + { + "path": [ + "detail", + "sourceIPAddress" + ], + "function": "Source IP addresses" + } + ], + "user_identity": [ + { + "path": ["detail", "userIdentity", "type"], + "function": "User identity type", + "send_to_artifacts": false + }, + { + "path": ["detail", "userIdentity", "arn"], + "function": "User identity arn" + }, + { + "path": ["detail", "userIdentity", "userName"], + "function": "User identity username" + } + ] + } + } + } + +* Normalize all commands (``command``) and file paths (``file_path``) for ``osquery:differential`` logs + + ``conf/schemas/osquery.json`` + + .. code-block:: + + "osquery:differential": { + "schema": { + "action": "string", + "calendarTime": "string", + "columns": {}, + "counter": "integer", + "decorations": {}, + "epoch": "integer", + "hostIdentifier": "string", + "log_type": "string", + "name": "string", + "unixTime": "integer", + "logNumericsAsNumbers": "string", + "numerics": "string" + }, + "parser": "json", + "configuration": { + "optional_top_level_keys": [ + "counter", + "decorations", + "epoch", + "log_type", + "logNumericsAsNumbers", + "numerics" + ], + "normalization": { + "command": [ + { + "path": ["columns", "command"], + "function": "Command line from shell history" + } + ], + "file_path": [ + { + "path": ["columns", "history_file"], + "function": "Shell history file path" + } + ] + } + } + } + +* Normalize username (``user_identity``) for ``cloudwatch:events`` logs when certain condition is met. In the following example, it will only normalize username related to AWS accounts ``11111111`` and ``22222222``. + + ``conf/schemas/cloudwatch.json`` + + .. code-block:: + + "cloudwatch:events": { + "schema": { + "account": "string", + "detail": {}, + "detail-type": "string", + "id": "string", + "region": "string", + "resources": [], + "source": "string", + "time": "string", + "version": "string" + }, + "parser": "json", + "configuration": { + "normalization": { + "user_identity": [ + { + "path": ["detail", "userIdentity", "userName"], + "function": "User identity username", + "condition": { + "path": ["account"], + "in": ["11111111", "22222222"] + } + } + ] + } + } + } + +Deployment +========== + +* Artifact Extractor will only work if Firehose and Historical Search are enabled in ``conf/global.json`` + + .. code-block:: + + "infrastructure": { + ... + "firehose": { + "use_prefix": true, + "buffer_interval": 60, + "buffer_size": 128, + "enabled": true, + "enabled_logs": { + "cloudwatch": {}, + "osquery": {} + } + } + ... + } + +* Enable Artifact Extractor feature in ``conf/global.json`` + + .. code-block:: + + "infrastructure": { + "artifact_extractor": { + "enabled": true, + "firehose_buffer_size": 128, + "firehose_buffer_interval": 900 + }, + "firehose": { + "use_prefix": true, + "buffer_interval": 60, + "buffer_size": 128, + "enabled": true, + "enabled_logs": { + "cloudwatch": {}, + "osquery": {} + } + } + ... + } + +* Artifact Extractor feature will add few more resources by running ``build`` CLI + + It will add following resources. + + * A new Glue catalog table ``artifacts`` for Historical Search via Athena + * A new Firehose to deliver artifacts to S3 bucket + * New permissions + + .. code-block:: bash + + python manage.py build --target artifact_extractor + +* Then we can deploy ``classifier`` to enable Artifact Extractor feature. + + .. code-block:: bash + + python manage.py deploy --function classifier + + .. note:: + + If the normalization configuration has changed in ``conf/schemas/*.json``, make sure to deploy the classifier Lambda function to take effect. + +Custom Metrics +============== + +Add additional three custom metrics to Classifier for artifacts statistics. + +#. ``ExtractedArtifacts``: Log the number of artifacts extracted from the records +#. ``FirehoseFailedArtifats``: Log the number of records (artifacts) failed sent to Firehose +#. ``FirehoseArtifactsSent``: Log the number of records (artifacts) sent to Firehose + +By default, the custom metrics should be enabled in the Classifier, for example in ``conf/clusters/prod.json`` + + .. code-block:: + + { + "id": "prod", + "classifier_config": { + "enable_custom_metrics": true, + ... + } + } + + .. code-block:: + + python manage.py build --target "metric_filters_*" + + +Artifacts +========= + +1. Artifacts will be searchable within the Athena ``artifacts`` table while original logs are still searchable within dedicated table. + + Search ``cloudwatch:events`` logs: + + .. code-block:: + + SELECT * + FROM PREFIX_streamalert.cloudwatch_events + WHERE dt='2020-06-22-23' + + .. figure:: ../images/cloudwatch_events.png + :alt: Testing Results from cloudwatch_events Table + :align: center + :target: _images/cloudwatch_events.png + + (click to enlarge) + +2. All artifacts, including artifacts extracted from ``cloudwatch:events``, will live in ``artifacts`` table. + + .. code-block:: + + SELECT * + FROM PREFIX_streamalert.artifacts + WHERE dt='2020-06-22-23' + + .. figure:: ../images/artifacts.png + :alt: Artifacts from artifacts Table + :align: center + :target: _images/artifacts.png + + (click to enlarge) + +3. (Advanced) Use join search to find original record associated to the artifacts by ``streamalert_record_id`` + + .. code-block:: + + SELECT artifacts.*, + cloudwatch.* + FROM + (SELECT streamalert_record_id AS record_id, + type, + value + FROM PREFIX_streamalert.artifacts + WHERE dt ='2020-06-22-23' + AND type='user_identity' + AND LOWER(value)='root' LIMIT 10) AS artifacts + LEFT JOIN + (SELECT streamalert_normalization['streamalert_record_id'] AS record_id, detail + FROM PREFIX_streamalert.cloudwatch_events + WHERE dt ='2020-06-22-23' LIMIT 10) AS cloudwatch + ON artifacts.record_id = cloudwatch.record_id + + .. figure:: ../images/join_search.png + :alt: JOIN Search Result + :align: center + :target: _images/join_search.png + + (click to enlarge) + + .. note:: + + Instead issue two searches, we can use JOIN statement to search once across two tables to find the original record(s) associated with the interesting artifacts. This requires ``streamalert_normalization`` field where contains ``streamalert_record_id`` searchable in the original table. Current process is add ``streamalert_normalization`` field as a top level optional key to the schema. + + * Update schema ``conf/schemas/cloudwatch.json`` + + .. code-block:: + + "cloudwatch:events": { + "schema": { + "account": "string", + "detail": {}, + "detail-type": "string", + "id": "string", + "region": "string", + "resources": [], + "source": "string", + "streamalert_normalization": {}, + "time": "string", + "version": "string" + }, + "parser": "json", + "configuration": { + "optional_top_level_keys": [ + "streamalert_normalization" + ], + "normalization": { + "user_identity": [ + { + "path": ["detail", "userIdentity", "type"], + "function": "User identity type" + }, + { + "path": ["detail", "userIdentity", "arn"], + "function": "User identity arn" + }, + { + "path": ["detail", "userIdentity", "userName"], + "function": "User identity username" + } + ] + } + } + } + + * Apply the change by running + + .. code-block:: + + python manage.py build --target "kinesis_firehose_*" + + +************** +Considerations +************** + +The Normalization Reboot will bring us good value in terms of how easy will be to search for artifacts across entire infrastructure in the organization. It will also make it possible to write more efficient scheduled queries to have correlated alerting in place. But, it is worth to mention that there may have some tradeoffs on requiring additional resources, adding additional data delay. + +#. Increase in Data Footprint: Each individual original record has the chance to add many artifacts. In practice, this will likely not be a huge issue as each artifact is very small and only contains few fields. + +#. Additional Delay: Firehose data transformation will add additional up to 900 seconds of delay on the data available for historical search. 900 seconds is a configurable setting on the Firehose where the artifacts extracted from. Reduce the firehose buffer_interval value if want to reduce delay. + +#. High memory usage: Artifact Extractor Lambda function may need at least 3x max(buffer size of firehoses where the artifacts extracted from). Because we are doing lots of data copy in Artifact Extractor lambda function. This may be improved by writing more efficient code in the Artifact Extractor Lambda function.. diff --git a/docs/source/scheduled-queries.rst b/docs/source/scheduled-queries.rst index 071258416..d18facaa2 100644 --- a/docs/source/scheduled-queries.rst +++ b/docs/source/scheduled-queries.rst @@ -129,9 +129,43 @@ All scheduled queries are located in the ``scheduled_queries/`` directory, locat * ``name`` - (str) The name of this query. This name is published in the final result, and is useful when writing rules. * ``description`` - (str) Description of this query. This is published in the final result. * ``query`` - (str) A template SQL statement sent to Athena, with query parameters identified ``{like_this}``. -* ``params`` - (list[str]) A list of query parameters to pass to the query string. These have special values that are calculated at runtime, and are interpolated into the template SQL string. +* ``params`` - (list[str]|dict[str,callable]) Read on below... * ``tags`` - (list[str]) Tags required by this query to be run. The simplest way to use this is to put the **Query pack name** into this array. +params +`````` +The "params" option specifies how to calculate special query parameters. It supports two formats. + +The first format is a list of strings from a predefined set of strings. These have special values that are calculated at runtime, +and are interpolated into the template SQL string. Here is a list of the supported strings: + + + +The second format is a dictionary mapping parameter names to functions, like so: + +.. code-block:: python + + def func1(date): + return date.timestamp() + + def func2(date): + return LookupTables.get('aaaa', 'bbbb') + + QueryPackConfiguration( + ... + query=""" + SELECT * + FROM stuff + WHERE + dt = '{my_param_1}' + AND p2 = '{my_param_2}' + """, + params={ + 'my_param_1': func1, + 'my_param_2': func2, + } + ) + Writing Rules for StreamQuery diff --git a/manage.py b/manage.py index fe6c5a16f..94155ea78 100755 --- a/manage.py +++ b/manage.py @@ -23,13 +23,17 @@ terraform """ -from argparse import ArgumentParser, RawDescriptionHelpFormatter +from argparse import ArgumentParser, FileType, RawDescriptionHelpFormatter import sys from streamalert import __version__ as version from streamalert_cli.config import DEFAULT_CONFIG_PATH from streamalert_cli.runner import cli_runner, StreamAlertCLICommandRepository -from streamalert_cli.utils import DirectoryType, generate_subparser +from streamalert_cli.utils import ( + DirectoryType, + generate_subparser, + UniqueSortedFileListAppendAction, +) def build_parser(): @@ -79,6 +83,29 @@ def build_parser(): type=DirectoryType() ) + parser.add_argument( + '-t', + '--terraform-file', + dest='terraform_files', + help=( + 'Path to one or more additional Terraform configuration ' + 'files to include in this deployment' + ), + action=UniqueSortedFileListAppendAction, + type=FileType('r'), + default=[] + ) + + parser.add_argument( + '-b', + '--build-directory', + help=( + 'Path to directory to use for building StreamAlert and its infrastructure. ' + 'If no path is provided, a temporary directory will be used.' + ), + type=str + ) + # Dynamically generate subparsers, and create a 'commands' block for the prog description command_block = [] subparsers = parser.add_subparsers(dest='command', required=True) diff --git a/publishers/community/generic.py b/publishers/community/generic.py index 4d52eccc6..b388374ed 100644 --- a/publishers/community/generic.py +++ b/publishers/community/generic.py @@ -107,7 +107,7 @@ def remove_fields(alert, publication): @Register def remove_streamalert_normalization(_, publication): - """This publisher removes the super heavyweight 'streamalert:normalization' fields""" + """This publisher removes the super heavyweight 'streamalert_normalization' fields""" return _delete_dictionary_fields(publication, Normalizer.NORMALIZATION_KEY) diff --git a/publishers/sample/__init__.py b/publishers/sample/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/publishers/sample/sample_demisto.py b/publishers/sample/sample_demisto.py new file mode 100644 index 000000000..c8f83d958 --- /dev/null +++ b/publishers/sample/sample_demisto.py @@ -0,0 +1,55 @@ +""" +Batch of example publishers usable with Demisto. +""" +from streamalert.shared.publisher import Register + + +@Register +def demisto_classification(alert, publication): + """ + This publisher appropriately sets the demisto incident type and playbook. + + It first looks into the alert's context for the "demisto" key, where individual rules can + explcitly specify the desired classification traits of the output alert. + """ + + # If a rule explicitly states Demisto information with the alert context, obey that + # The convention to follow is any key in this dict (example, "incident_type") is mapped + # directly onto the Demisto output magic keys (example, @demisto.incident_type) + if 'demisto' in alert.context: + for key, value in alert.context['demisto'].items(): + output_key = '@demisto.{}'.format(key) + publication[output_key] = value + + return publication + + # If no context was explicitly declared, then we default to our global rules + for code in GLOBAL_CLASSIFIERS: + payload = code(alert) + if payload: + for key, value in payload: + output_key = '@demisto.{}'.format(key) + publication[output_key] = value + + return publication + + # Else, nothing + return publication + + +def _any_rule_with_demisto(alert): + if alert.rule_name.contains('sample'): + return { + 'incident_type': 'Sample Alert', + 'playbook': 'Sample Playbook', + } + + return False + + +# The GLOBAL_CLASSIFIERS is an array of functions. Any function that returns a truthy value is +# considered to be a "match". This value must be a dict, and the keys on the dict map directly +# onto the Demisto output magic keys (e.g. "incident_type" -> "@demisto.incident_type") +GLOBAL_CLASSIFIERS = [ + _any_rule_with_demisto +] diff --git a/requirements-top-level.txt b/requirements-top-level.txt index 7755e21d6..7995ba90e 100644 --- a/requirements-top-level.txt +++ b/requirements-top-level.txt @@ -1,3 +1,4 @@ +-c constraints.txt # remove if using pip freeze aliyun-python-sdk-core==2.13.5 aliyun-python-sdk-actiontrail==2.0.0 autoflake diff --git a/requirements.txt b/requirements.txt index 1d866b16f..33fc294c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,120 +1,119 @@ aliyun-python-sdk-core==2.13.5 aliyun-python-sdk-actiontrail==2.0.0 autoflake==1.3.1 -autopep8==1.4.4 -backoff==1.8.1 +autopep8==1.5.3 +backoff==1.10.0 bandit==1.6.2 -boto3==1.10.7 -boxsdk==2.6.1 -cbapi==1.5.4 -coverage==4.5.4 -coveralls==1.11.1 -google-api-python-client==1.7.11 -jmespath==0.9.4 +boto3==1.14.29 +boxsdk==2.9.0 +cbapi==1.7.1 +coverage==5.2.1 +coveralls==2.1.1 +google-api-python-client==1.10.0 +jmespath==0.10.0 jsonlines==1.2.0 -mock==3.0.5 -moto==1.3.13 -netaddr==0.7.19 +mock==4.0.2 +moto==1.3.14 +netaddr==0.8.0 nose==1.3.7 -nose-timer==0.7.5 +nose-timer==1.0.0 pathlib2==2.3.5 -policyuniverse==1.3.2.1 -pyfakefs==3.6.1 +policyuniverse==1.3.2.3 +pyfakefs==4.1.0 pylint==2.3.1 -requests==2.22.0 -Sphinx==2.2.1 -sphinx-rtd-theme==0.4.3 -yapf==0.28.0 +pymsteams==0.1.13 +requests==2.24.0 +Sphinx==3.1.2 +sphinx-rtd-theme==0.5.0 +yapf==0.30.0 ## The following requirements were added by pip freeze: alabaster==0.7.12 -aliyun-python-sdk-core-v3==2.13.10 -astroid==2.3.2 -atomicwrites==1.3.0 +aliyun-python-sdk-core-v3==2.13.11 +astroid==2.4.2 attrdict==2.0.1 attrs==19.3.0 -aws-sam-translator==1.15.1 -aws-xray-sdk==2.4.2 -Babel==2.7.0 +aws-sam-translator==1.25.0 +aws-xray-sdk==2.6.0 +Babel==2.8.0 boto==2.49.0 -botocore==1.13.7 -cachetools==3.1.1 -certifi==2019.9.11 -cffi==1.13.1 -cfn-lint==0.24.6 +botocore==1.17.29 +cachetools==4.1.1 +certifi==2020.6.20 +cffi==1.14.1 +cfn-lint==0.34.0 chardet==3.0.4 -cryptography==2.8 -DateTime==4.3 -decorator==4.4.1 -docker==4.1.0 +cryptography==3.0 +decorator==4.4.2 +docker==4.2.2 docopt==0.6.2 docutils==0.15.2 -ecdsa==0.13.3 +ecdsa==0.15 future==0.18.2 -gitdb2==2.0.6 -GitPython==3.0.4 -google-auth==1.6.3 -google-auth-httplib2==0.0.3 -httplib2==0.14.0 +gitdb==4.0.5 +GitPython==3.1.7 +google-api-core==1.22.0 +google-auth==1.19.2 +google-auth-httplib2==0.0.4 +googleapis-common-protos==1.52.0 +httplib2==0.18.1 idna==2.8 -imagesize==1.1.0 -importlib-metadata==0.23 +imagesize==1.2.0 +importlib-metadata==1.7.0 isort==4.3.21 -Jinja2==2.10.3 +Jinja2==2.11.2 jsondiff==1.1.2 -jsonpatch==1.24 -jsonpickle==1.2 +jsonpatch==1.26 +jsonpickle==1.4.1 jsonpointer==2.0 -jsonschema==3.1.1 +jsonschema==3.2.0 +junit-xml==1.9 lazy-object-proxy==1.4.3 MarkupSafe==1.1.1 mccabe==0.6.1 -more-itertools==7.2.0 -packaging==19.2 -pbr==5.4.3 +networkx==2.4 +packaging==20.4 +pbr==5.4.5 pika==1.1.0 -pluggy==0.13.0 -prompt-toolkit==2.0.10 -protobuf==3.10.0 -py==1.8.0 -pyasn1==0.4.7 -pyasn1-modules==0.2.7 -pycodestyle==2.5.0 -pycparser==2.19 -pyflakes==2.1.1 -Pygments==2.4.2 +prompt-toolkit==3.0.5 +protobuf==3.12.2 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycodestyle==2.6.0 +pycparser==2.20 +pycryptodome==3.9.8 +pyflakes==2.2.0 +Pygments==2.6.1 PyJWT==1.7.1 -pymsteams==0.1.12 -pyparsing==2.4.2 -pyrsistent==0.15.5 -pytest==5.0.0 -python-dateutil==2.8.0 -python-jose==3.0.1 -pytz==2019.3 -PyYAML==5.1.2 +pyparsing==2.4.7 +pyrsistent==0.16.0 +python-dateutil==2.8.1 +python-jose==3.1.0 +pytz==2020.1 +PyYAML==5.3.1 requests-toolbelt==0.9.1 -responses==0.10.6 -rsa==4.0 -s3transfer==0.2.1 -six==1.12.0 -smmap2==2.0.5 +responses==0.10.15 +rsa==4.6 +s3transfer==0.3.3 +six==1.15.0 +smmap==3.0.4 snowballstemmer==2.0.0 solrq==1.1.1 -sphinxcontrib-applehelp==1.0.1 -sphinxcontrib-devhelp==1.0.1 -sphinxcontrib-htmlhelp==1.0.2 +sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==1.0.3 sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.2 -sphinxcontrib-serializinghtml==1.1.3 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.4 sshpubkeys==3.1.0 -stevedore==1.31.0 -typed-ast==1.4.0 -uritemplate==3.0.0 -urllib3==1.25.6 -validators==0.14.0 -wcwidth==0.1.7 -websocket-client==0.56.0 -Werkzeug==0.16.0 -wrapt==1.11.2 +stevedore==3.2.0 +toml==0.10.1 +typed-ast==1.4.1 +uritemplate==3.0.1 +urllib3==1.25.10 +validators==0.16.0 +wcwidth==0.2.5 +websocket-client==0.57.0 +Werkzeug==1.0.1 +wrapt==1.12.1 xmltodict==0.12.0 -zipp==0.6.0 -zope.interface==4.6.0 +zipp==3.1.0 diff --git a/rules/community/mitre_attack/defense_evasion/multi/obfuscated_files_or_information/right_to_left_character.py b/rules/community/mitre_attack/defense_evasion/multi/obfuscated_files_or_information/right_to_left_character.py index b373d908b..e25d07183 100644 --- a/rules/community/mitre_attack/defense_evasion/multi/obfuscated_files_or_information/right_to_left_character.py +++ b/rules/community/mitre_attack/defense_evasion/multi/obfuscated_files_or_information/right_to_left_character.py @@ -1,9 +1,9 @@ -"""Detection of the right to left override unicode character U+202E in filename or process name.""" +"""Detection of the right to left override unicode character U+202E in file_name or process name.""" from streamalert.shared.rule import rule from streamalert.shared.normalize import Normalizer -@rule(datatypes=['command', 'filePath', 'processPath', 'fileName']) +@rule(datatypes=['command', 'path', 'file_name']) def right_to_left_character(rec): """ author: @javutin @@ -27,17 +27,12 @@ def right_to_left_character(rec): if isinstance(command, str) and rlo in command: return True - file_paths = Normalizer.get_values_for_normalized_type(rec, 'filePath') - for file_path in file_paths: - if isinstance(file_path, str) and rlo in file_path: + paths = Normalizer.get_values_for_normalized_type(rec, 'path') + for path in paths: + if isinstance(path, str) and rlo in path: return True - process_paths = Normalizer.get_values_for_normalized_type(rec, 'processPath') - for process_path in process_paths: - if isinstance(process_path, str) and rlo in process_path: - return True - - file_names = Normalizer.get_values_for_normalized_type(rec, 'fileName') + file_names = Normalizer.get_values_for_normalized_type(rec, 'file_name') for file_name in file_names: if isinstance(file_name, str) and rlo in file_name: return True diff --git a/rules/community/okta/__init__.py b/rules/community/okta/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/rules/community/okta/okta_new_login.json b/rules/community/okta/okta_new_login.json new file mode 100644 index 000000000..31ff54438 --- /dev/null +++ b/rules/community/okta/okta_new_login.json @@ -0,0 +1,109 @@ +[ + { + "data": { + "actor": { + "id": "...", + "type": "User", + "alternateId": "user@domain.com", + "displayName": "User Name", + "detailEntry": null + }, + "client": { + "userAgent": { + "rawUserAgent": "...", + "os": "Mac OS X", + "browser": "CHROME" + }, + "zone": "null", + "device": "Computer", + "id": null, + "ipAddress": "192.168.1.1", + "geographicalContext": { + "city": "San Francisco", + "state": "California", + "country": "United States", + "postalCode": "94103", + "geolocation": { + "lat": 37.7717748, + "lon": -122.4072223 + } + } + }, + "authenticationContext": { + "authenticationProvider": null, + "credentialProvider": null, + "credentialType": null, + "issuer": null, + "interface": null, + "authenticationStep": 0, + "externalSessionId": "..." + }, + "displayMessage": "Send user new device notification email", + "eventType": "system.email.new_device_notification.sent_message", + "outcome": { + "result": "SUCCESS", + "reason": null + }, + "published": "2020-01-01T00:00:00.000Z", + "securityContext": { + "asNumber": 1234, + "asOrg": "google", + "isp": "google llc", + "domain": "google.com", + "isProxy": false + }, + "severity": "INFO", + "debugContext": { + "debugData": { + "deviceFingerprint": "...", + "requestId": "...", + "requestUri": "/api/v1/authn", + "threatSuspected": "false", + "url": "/api/v1/authn?" + } + }, + "legacyEventType": "core.user.email.message_sent.new_device_notification", + "transaction": { + "type": "WEB", + "id": "...", + "detail": {} + }, + "uuid": "...", + "version": "0", + "request": { + "ipChain": [ + { + "ip": "192.168.1.1", + "geographicalContext": { + "city": "San Francisco", + "state": "California", + "country": "United States", + "postalCode": "94103", + "geolocation": { + "lat": 37.7717748, + "lon": -122.4072223 + } + }, + "version": "V4", + "source": null + } + ] + }, + "target": [ + { + "id": "...", + "type": "User", + "alternateId": "user@domain.com", + "displayName": "User Name", + "detailEntry": null + } + ] + }, + "description": "An Okta user logging in from a new device triggers an alert.", + "log": "okta:logevents", + "service": "streamalert_app", + "source": "prefix_cluster_okta_logevents-app-name_app", + "trigger_rules": [ + ] + } +] diff --git a/rules/sample/__init__.py b/rules/sample/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/rules/sample/sample_demisto.json b/rules/sample/sample_demisto.json new file mode 100644 index 000000000..d2f969bfe --- /dev/null +++ b/rules/sample/sample_demisto.json @@ -0,0 +1,41 @@ +[ + { + "data": { + "action": "added", + "calendarTime": "Wed Feb 12 21:38:11 2020 UTC", + "columns": { + "host": "10.0.2.2", + "pid": 12345, + "time": 1581542540, + "tty": "ttys001", + "type": "8", + "username": "runlevel" + }, + "decorations": { + "envIdentifier": "fake-environment", + "roleIdentifier": "fake-role" + }, + "epoch": "0", + "hostIdentifier": "sample_demisto", + "log_type": "result", + "name": "pack_incident-response_last", + "unixTime": "1581543491" + }, + "description": "Just shows how to do Demisto stuff", + "log": "osquery:differential", + "service": "kinesis", + "source": "prefix_cluster1_streamalert", + "trigger_rules": [ + "sample_demisto" + ], + "publisher_tests": { + "demisto:sample-integration": [ + { + "jmespath_expression": "\"@demisto.incident_type\"", + "condition": "is", + "value": "My sample type" + } + ] + } + } +] \ No newline at end of file diff --git a/rules/sample/sample_demisto.py b/rules/sample/sample_demisto.py new file mode 100644 index 000000000..09d4daf4a --- /dev/null +++ b/rules/sample/sample_demisto.py @@ -0,0 +1,25 @@ +""" +Example for writing a Demisto role +""" +from publishers.sample.sample_demisto import demisto_classification +from streamalert.shared.rule import rule + + +@rule( + logs=['osquery:differential'], + outputs=['demisto:sample-integration'], + publishers=[demisto_classification], + context={ + 'demisto': { + 'incident_type': 'My sample type', + 'playbook': 'A Playbook', + 'severity': 'informational' + }, + } +) +def sample_demisto(record, _): + """ + author: Derek Wang + description: An example of how to write a Demisto alert using publishers to classify + """ + return record.get('hostIdentifier', '') == 'sample_demisto' diff --git a/streamalert/__init__.py b/streamalert/__init__.py index a4a2fa499..622cc27fb 100644 --- a/streamalert/__init__.py +++ b/streamalert/__init__.py @@ -1,2 +1,2 @@ """StreamAlert version.""" -__version__ = '3.2.1' +__version__ = '3.3.0' diff --git a/streamalert/alert_processor/outputs/demisto.py b/streamalert/alert_processor/outputs/demisto.py index 63df65101..320ed0cdc 100644 --- a/streamalert/alert_processor/outputs/demisto.py +++ b/streamalert/alert_processor/outputs/demisto.py @@ -137,6 +137,7 @@ def send(self, request): 'type': request.incident_type, 'name': request.incident_name, 'owner': request.owner, + 'playbook': request.playbook, 'severity': request.severity, 'labels': request.labels, 'customFields': request.custom_fields, @@ -173,6 +174,7 @@ class DemistoCreateIncidentRequest: def __init__(self, incident_name='Unnamed StreamAlert Alert', incident_type='Unclassified', + playbook='', severity=SEVERITY_UNKNOWN, owner='StreamAlert', details='Details not specified.', @@ -186,6 +188,9 @@ def __init__(self, # "Unclassified". self._incident_type = str(incident_type) + # The playbook to assign to the case. + self._playbook = playbook + # Severity is an integer. Use the constants above. self._severity = severity @@ -222,6 +227,10 @@ def incident_name(self): def incident_type(self): return self._incident_type + @property + def playbook(self): + return self._playbook + @property def severity(self): return self._severity @@ -282,6 +291,7 @@ def assemble(alert, alert_publication): # Default presentation values default_incident_name = alert.rule_name default_incident_type = 'Unclassified' + default_playbook = 'Unknown' default_severity = 'unknown' default_owner = 'StreamAlert' default_details = alert.rule_description @@ -289,6 +299,7 @@ def assemble(alert, alert_publication): # Special keys that publishers can use to modify default presentation incident_type = alert_publication.get('@demisto.incident_type', default_incident_type) + playbook = alert_publication.get('@demisto.playbook', default_playbook) severity = DemistoCreateIncidentRequest.map_severity_string_to_severity_value( alert_publication.get('@demisto.severity', default_severity) ) @@ -303,6 +314,7 @@ def assemble(alert, alert_publication): severity=severity, owner=owner, details=details, + playbook=playbook, create_investigation=True # Important: Trigger workbooks automatically ) diff --git a/streamalert/apps/_apps/aliyun.py b/streamalert/apps/_apps/aliyun.py index 331d055db..34f5d411d 100644 --- a/streamalert/apps/_apps/aliyun.py +++ b/streamalert/apps/_apps/aliyun.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ +from datetime import datetime import json import re @@ -50,6 +51,13 @@ class AliyunApp(AppIntegration): # The maximum number of results to be returned. Valid values: 0 to 50. _MAX_RESULTS = 50 + # In aliyun sdk PR https://github.com/aliyun/aliyun-openapi-python-sdk/pull/216, it separates + # timeout to connection and read timeout and also lower the default connection timeout time + # from 10 to 5 seconds. We notice the connection to server gets timed out more often recently, + # increase default timeout will be helpful. + _CONNECT_TIMEOUT = 15 + _READ_TIMEOUT = 15 + def __init__(self, event, context): super(AliyunApp, self).__init__(event, context) auth = self._config.auth @@ -59,6 +67,14 @@ def __init__(self, event, context): self.request.set_MaxResults(self._MAX_RESULTS) self.request.set_StartTime(self._config.last_timestamp) + # Source code can be found here https://github.com/aliyun/aliyun-openapi-python-sdk/ + # blob/master/aliyun-python-sdk-actiontrail/aliyunsdkactiontrail/request/v20171204/ + # LookupEventsRequest.py + self.request.set_EndTime(datetime.utcnow().strftime(self.date_formatter())) + + self.request.set_connect_timeout(self._CONNECT_TIMEOUT) + self.request.set_read_timeout(self._READ_TIMEOUT) + @classmethod def _type(cls): return 'actiontrail' @@ -135,12 +151,11 @@ def _gather_logs(self): self.request.set_NextToken(json_response['NextToken']) else: self._more_to_poll = False - return json_response['Events'] except (ServerException, ClientException) as e: LOGGER.exception("%s error occurred", e.get_error_type()) - return False + raise @classmethod def _required_auth_info(cls): diff --git a/streamalert/apps/_apps/gsuite.py b/streamalert/apps/_apps/gsuite.py index cd50cce5c..bc82a0ec7 100644 --- a/streamalert/apps/_apps/gsuite.py +++ b/streamalert/apps/_apps/gsuite.py @@ -276,6 +276,13 @@ class GSuiteDriveReports(GSuiteReportsApp): def _type(cls): return 'drive' +@StreamAlertApp +class GSuiteUserGCPReports(GSuiteReportsApp): + """G Suite GCP Accounts Activity Report app integration""" + + @classmethod + def _type(cls): + return 'gcp' @StreamAlertApp class GSuiteGroupReports(GSuiteReportsApp): @@ -312,6 +319,13 @@ class GSuiteLoginReports(GSuiteReportsApp): def _type(cls): return 'login' +@StreamAlertApp +class GSuiteMeetGCPReports(GSuiteReportsApp): + """G Suite meet Accounts Activity Report app integration""" + + @classmethod + def _type(cls): + return 'meet' @StreamAlertApp class GSuiteMobileReports(GSuiteReportsApp): @@ -330,6 +344,14 @@ class GSuiteRulesReports(GSuiteReportsApp): def _type(cls): return 'rules' +@StreamAlertApp +class GSuiteSAMLGCPReports(GSuiteReportsApp): + """G Suite SAML Accounts Activity Report app integration""" + + @classmethod + def _type(cls): + return 'saml' + @StreamAlertApp class GSuiteTokenReports(GSuiteReportsApp): diff --git a/streamalert/classifier/classifier.py b/streamalert/classifier/classifier.py index a694259bc..67c2c7a59 100644 --- a/streamalert/classifier/classifier.py +++ b/streamalert/classifier/classifier.py @@ -17,10 +17,12 @@ import os import logging -from streamalert.classifier.clients import FirehoseClient, SQSClient +from streamalert.classifier.clients import SQSClient +from streamalert.shared.firehose import FirehoseClient from streamalert.classifier.parsers import get_parser from streamalert.classifier.payload.payload_base import StreamPayload from streamalert.shared import config, CLASSIFIER_FUNCTION_NAME as FUNCTION_NAME +from streamalert.shared.artifact_extractor import ArtifactExtractor from streamalert.shared.exceptions import ConfigError from streamalert.shared.logger import get_logger from streamalert.shared.metrics import MetricLogger @@ -187,7 +189,13 @@ def _classify_payload(self, payload): self._log_bad_records(record, len(record.invalid_records)) for parsed_rec in record.parsed_records: - Normalizer.normalize(parsed_rec, record.log_type) + # + # In Normalization v1, the normalized types are defined based on log source + # (e.g. osquery, cloudwatch etc) and this will be deprecated. + # In Normalization v2, the normalized types are defined based on log type + # (e.g. osquery:differential, cloudwatch:cloudtrail, cloudwatch:events etc) + # + Normalizer.normalize(parsed_rec, record.log_schema_type) self._payloads.append(record) @@ -256,6 +264,12 @@ def run(self, records): # Send the data to firehose for historical retention if self.data_retention_enabled: - self.firehose.send(self._payloads) + categorized_records = self.firehose.send(self._payloads) + + # Extract artifacts if it is enabled + if config.artifact_extractor_enabled(self._config): + ArtifactExtractor( + self.firehose.artifacts_firehose_stream_name(self._config) + ).run(categorized_records) return self._payloads diff --git a/streamalert/classifier/clients/__init__.py b/streamalert/classifier/clients/__init__.py index 8f1e3347d..fe53c0ed2 100644 --- a/streamalert/classifier/clients/__init__.py +++ b/streamalert/classifier/clients/__init__.py @@ -1,3 +1,2 @@ """Imports from submodules to make higher up imports easier""" -from .firehose import FirehoseClient from .sqs import SQSClient diff --git a/streamalert/classifier/payload/s3.py b/streamalert/classifier/payload/s3.py index cacdc8b14..8d20aa6c9 100644 --- a/streamalert/classifier/payload/s3.py +++ b/streamalert/classifier/payload/s3.py @@ -50,6 +50,8 @@ class S3PayloadError(Exception): class S3Payload(StreamPayload): """S3Payload class""" + MAX_S3_SIZE = 128 * 1024 * 1024 + @property def bucket(self): return self.raw_record['s3']['bucket']['name'] @@ -89,11 +91,18 @@ def _check_size(self): Returns: bool: True if the file is smaller than 128 MB, False otherwise """ - # size == 0 or greater than 128MB - if self.size == 0 or (self.size > 128 * 1024 * 1024): - raise S3PayloadError('S3 object {}/{} has an invalid size and cannot be downloaded' + # Ignore 0 size files + if self.size == 0: + LOGGER.warning('S3 file size is 0 bytes, skipping: %s/%s', self.bucket, self.key) + return False + + # size greater than 128MB + if self.size > self.MAX_S3_SIZE: + raise S3PayloadError('S3 object {}/{} is too large and cannot be downloaded ' 'from S3: {}'.format(self.bucket, self.key, self.display_size)) + return True + @staticmethod def _cleanup(): """Cleanup method to remove all objects in the Lambda container's temp directory""" @@ -184,7 +193,7 @@ def _read_file(self): client.download_fileobj(key, download) except (IOError, ClientError): LOGGER.exception('Failed to download object from S3') - return + raise total_time = time.time() - start_time LOGGER.info('Completed download in %s seconds', round(total_time, 2)) @@ -209,7 +218,8 @@ def _pre_parse(self): Yields: Instances of PayloadRecord back to the caller containing the current log data """ - self._check_size() + if not self._check_size(): + return # _check_size can raise an exception as well line_num = 0 for line_num, data in self._read_file(): diff --git a/streamalert/scheduled_queries/query_packs/configuration.py b/streamalert/scheduled_queries/query_packs/configuration.py index 5088af0e9..7939d3ca7 100644 --- a/streamalert/scheduled_queries/query_packs/configuration.py +++ b/streamalert/scheduled_queries/query_packs/configuration.py @@ -52,7 +52,6 @@ def generate_query(self, **kwargs): '''.strip().format(name=self.name, error=e, kwargs=kwargs) raise KeyError(msg) - @property def query_template(self): return self._query_template diff --git a/streamalert/scheduled_queries/query_packs/manager.py b/streamalert/scheduled_queries/query_packs/manager.py index 0f3e2c12c..74fa927d1 100644 --- a/streamalert/scheduled_queries/query_packs/manager.py +++ b/streamalert/scheduled_queries/query_packs/manager.py @@ -76,10 +76,22 @@ def __init__(self, query_pack_configuration, execution_context): self._query_execution_id = None self._query_result = None - self._query_parameters = { - param: self._execution_context.parameter_generator.generate(param) - for param in self._configuration.query_parameters - } + if isinstance(self._configuration.query_parameters, dict): + self._query_parameters = { + param: self._execution_context.parameter_generator.generate_advanced( + param, configuration + ) + for param, configuration in self._configuration.query_parameters.items() + } + elif isinstance(self._configuration.query_parameters, list): + self._query_parameters = { + param: self._execution_context.parameter_generator.generate(param) + for param in self._configuration.query_parameters + } + else: + # not intended to be reached + self._query_parameters = {} + self._query_string = None @property diff --git a/streamalert/scheduled_queries/query_packs/parameters.py b/streamalert/scheduled_queries/query_packs/parameters.py index d014e734d..875e3e42f 100644 --- a/streamalert/scheduled_queries/query_packs/parameters.py +++ b/streamalert/scheduled_queries/query_packs/parameters.py @@ -60,7 +60,17 @@ def generate(self, parameter): if parameter == 'utctimestamp': return str(round(self._clock.now.timestamp())) + if parameter == 'utcisotime': + return str(round(self._clock.now.timestamp())) + self._logger.error( 'Parameter generator does not know how to handle "{}"'.format(parameter) ) return None + + def generate_advanced(self, key, configuration): + if callable(configuration): + return configuration(self._clock.now) + + # else, default to whatever generate returns + return self.generate(key) diff --git a/streamalert/shared/artifact_extractor.py b/streamalert/shared/artifact_extractor.py new file mode 100644 index 000000000..cccdbaac5 --- /dev/null +++ b/streamalert/shared/artifact_extractor.py @@ -0,0 +1,188 @@ +""" +Copyright 2017-present Airbnb, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import re +import uuid + +from streamalert.shared.firehose import FirehoseClient +from streamalert.shared import CLASSIFIER_FUNCTION_NAME, config +from streamalert.shared.metrics import MetricLogger +from streamalert.shared.normalize import Normalizer, CONST_ARTIFACTS_FLAG +from streamalert.shared.logger import get_logger + + +LOGGER = get_logger(__name__) + + +class Artifact: + """Encapsulation of a single Artifact that is extracted from an input record.""" + + def __init__(self, function, record_id, source_type, normalized_type, value): + """Create a new Artifact based on normalized information + + Args: + function (str): Describes how this field is used in the record, or what it means. + record_id (str): Currently it is reserved for future support. It will come from the + record processed by classifier. This field is very useful for cross reference back + to the original record in the future. It will be "None" if no "record_id" + information when searching artifacts in Athena. + source_type (str): The original source of the artifact(s) extracted from a record. + e.g. osquery_differential, cloudwatch_cloudtrail + normalized_type (str): Normalized types in a record processed by classifier. + value (str): This is the true value of the type. E.g, a record of type “ip_address” + could have a value of “50.50.50.50” + """ + # Enforce all fields are strings in a Artifact to prevent type corruption in Parquet format + self._function = str(function) + self._record_id = str(record_id) + self._source_type = str(source_type) + self._type = str(normalized_type) + self._value = str(value) + + @property + def artifact(self): + """Generate an artifact + + Returns: + dict: A dictionary contains artifact information. + """ + return { + 'function': self._function, + Normalizer.RECORD_ID_KEY: self._record_id, + 'source_type': self._source_type, + 'type': self._type, + 'value': self._value, + } + + +class ArtifactExtractor: + """ArtifactExtractor class will extract artifacts from "streamalert_normalization" field in the + records. The extracted artfiacts will be saved in the S3 bucket via a dedicated Firehose + delivery stream and searchable from "artifacts" table in Athena. + """ + + STREAM_ARN_REGEX = re.compile(r".*streamalert_(?P.*)") + + _config = None + _firehose_client = None + + def __init__(self, artifacts_fh_stream_name): + self._dst_firehose_stream_name = artifacts_fh_stream_name + self._artifacts = list() + + ArtifactExtractor._config = ArtifactExtractor._config or config.load_config(validate=True) + + ArtifactExtractor._firehose_client = ( + ArtifactExtractor._firehose_client or FirehoseClient.get_client( + prefix=self.config['global']['account']['prefix'], + artifact_extractor_config=self.config['global'].get( + 'infrastructure', {} + ).get('artifact_extractor', {}) + ) + ) + + @property + def config(self): + return ArtifactExtractor._config + + @property + def firehose(self): + return ArtifactExtractor._firehose_client + + @staticmethod + def _extract_artifacts(source_type, records): + """Extract all artifacts from a record + + Returns: + list: A list of Artifacts from a normalized record. + + normalized information in the record will be similar to + { + 'record': { + 'region': 'us-east-1', + 'detail': { + 'awsRegion': 'us-west-2' + } + }, + 'streamalert_normalization': { + 'region': [ + { + 'values': ['region_name'], + 'function': 'AWS region' + }, + { + 'values': ['region_name'], + 'function': 'AWS region', + 'send_to_artifacts': False + } + ] + } + } + """ + artifacts = [] + + for record in records: + if not record.get(Normalizer.NORMALIZATION_KEY): + continue + + record_id = (record[Normalizer.NORMALIZATION_KEY].get(Normalizer.RECORD_ID_KEY) + or str(uuid.uuid4())) + for key, values in record[Normalizer.NORMALIZATION_KEY].items(): + if key == Normalizer.RECORD_ID_KEY: + continue + + for value in values: + # Skip the normalized value is SNED_TO_ARTIFACTS_FLAG set to "false", which is + # default to "true". + if not value.get(CONST_ARTIFACTS_FLAG, True): + continue + + for val in value.get('values', []): + artifacts.append(Artifact( + function=value.get('function'), + record_id=record_id, + # source_type=self._source_type, + source_type=source_type, + normalized_type=key, + value=val + )) + + return artifacts + + def run(self, categorized_records): + """Run extract artifacts logic and send artifacts to the Firehose for retention + + Args: + categorized_records (dict): A dictionary contains log source type and records with + following format + { + 'log_type_01_sub_type_01': [{'key': 'value'}], + 'log_type_01_sub_type_02': [{'key': 'value'}], + 'log_type_02_sub_type_01': [{'key': 'value'}], + 'log_type_02_sub_type_02': [{'key': 'value'}] + } + """ + + for source_type, records in categorized_records.items(): + LOGGER.debug('Extracting artifacts from %d %s logs', len(records), source_type) + for artifact in self._extract_artifacts(source_type, records): + self._artifacts.append(artifact.artifact) + + LOGGER.debug('Extracted %d artifact(s)', len(self._artifacts)) + + MetricLogger.log_metric( + CLASSIFIER_FUNCTION_NAME, + MetricLogger.EXTRACTED_ARTIFACTS, + len(self._artifacts) + ) + + self.firehose.send_artifacts(self._artifacts, self._dst_firehose_stream_name) diff --git a/streamalert/shared/config.py b/streamalert/shared/config.py index 8cc632264..25838eef9 100644 --- a/streamalert/shared/config.py +++ b/streamalert/shared/config.py @@ -443,3 +443,21 @@ def _validate_sources(cluster_name, data_sources, existing_sources): existing_sources.add(source) # FIXME (derek.wang) write a configuration validator for lookuptables (new one) + +def artifact_extractor_enabled(config): + """Check if Artifactor Extractor enabled. + Args: + config (dict): The loaded config from the 'conf/' directory + + Returns: + bool: return True is "artifact_extract" is enabled in conf/global.json + """ + if not config['global']['infrastructure'].get('artifact_extractor', {}).get('enabled', False): + return False + + # Artifact Extractor is enabled once when firehose is enabled. + if not config['global']['infrastructure'].get('firehose', {}).get('enabled', False): + return False + + return True + \ No newline at end of file diff --git a/streamalert/classifier/clients/firehose.py b/streamalert/shared/firehose.py similarity index 81% rename from streamalert/classifier/clients/firehose.py rename to streamalert/shared/firehose.py index f6c3c2443..7e07de399 100644 --- a/streamalert/classifier/clients/firehose.py +++ b/streamalert/shared/firehose.py @@ -23,7 +23,7 @@ from botocore.exceptions import ClientError, HTTPClientError from botocore.exceptions import ConnectionError as BotocoreConnectionError -from streamalert.shared import CLASSIFIER_FUNCTION_NAME as FUNCTION_NAME +from streamalert.shared import CLASSIFIER_FUNCTION_NAME import streamalert.shared.helpers.boto as boto_helpers from streamalert.shared.logger import get_logger from streamalert.shared.metrics import MetricLogger @@ -69,7 +69,7 @@ class FirehoseClient: FIREHOSE_NAME_MIN_HASH_LEN = 8 def __init__(self, prefix, firehose_config=None, log_sources=None): - self._prefix = prefix if firehose_config.get('use_prefix', True) else '' + self._prefix = prefix if firehose_config and firehose_config.get('use_prefix', True) else '' self._client = boto3.client('firehose', config=boto_helpers.default_config()) self.load_enabled_log_sources(firehose_config, log_sources, force_load=True) @@ -89,7 +89,7 @@ def _records_to_json_list(cls, records): ] @classmethod - def _record_batches(cls, records): + def _record_batches(cls, records, function_name): """Segment the records into batches that conform to Firehose restrictions This will log any single record that is too large to send, and skip it. @@ -116,7 +116,7 @@ def _record_batches(cls, records): if line_len > cls.MAX_RECORD_SIZE: LOGGER.error('Record too large (%d) to send to Firehose:\n%s', line_len, record) - cls._log_failed(1) + cls._log_failed(1, function_name) continue # Add the record to the batch @@ -206,7 +206,7 @@ def _categorize_records(self, payloads): return categorized_records @classmethod - def _finalize(cls, response, stream_name, size): + def _finalize(cls, response, stream_name, size, function_name): """Perform any final operations for this response, such as metric logging, etc Args: @@ -223,7 +223,7 @@ def _finalize(cls, response, stream_name, size): failed for failed in response['RequestResponses'] if failed.get('ErrorCode') ] - cls._log_failed(response['FailedPutCount']) + cls._log_failed(response['FailedPutCount'], function_name) # Only print the first 100 failed records to Cloudwatch logs LOGGER.error( @@ -233,7 +233,7 @@ def _finalize(cls, response, stream_name, size): ) return - MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_RECORDS_SENT, size) + MetricLogger.log_metric(function_name, MetricLogger.FIREHOSE_RECORDS_SENT, size) LOGGER.info( 'Successfully sent %d message(s) to firehose %s with RequestId \'%s\'', size, @@ -242,15 +242,15 @@ def _finalize(cls, response, stream_name, size): ) @classmethod - def _log_failed(cls, count): + def _log_failed(cls, count, function_name): """Helper to log the failed Firehose records metric Args: count (int): Number of failed records """ - MetricLogger.log_metric(FUNCTION_NAME, MetricLogger.FIREHOSE_FAILED_RECORDS, count) + MetricLogger.log_metric(function_name, MetricLogger.FIREHOSE_FAILED_RECORDS, count) - def _send_batch(self, stream_name, record_batch): + def _send_batch(self, stream_name, record_batch, function_name): """Send record batches to Firehose Args: @@ -299,7 +299,7 @@ def _firehose_request_helper(data): LOGGER.exception('Firehose request failed') # Use the current length of the records_data in case some records were # successful but others were not - self._log_failed(len(records_data)) + self._log_failed(len(records_data), function_name) @classmethod def generate_firehose_name(cls, prefix, log_stream_name): @@ -334,6 +334,30 @@ def generate_firehose_name(cls, prefix, log_stream_name): base_name, hashlib.md5(stream_name.encode()).hexdigest() # nosec )[:cls.AWS_FIREHOSE_NAME_MAX_LEN] + @classmethod + def artifacts_firehose_stream_name(cls, config): + """Return Artifacts Firehose Stream Name + + Args: + config (dict): The loaded config from the 'conf/' directory + + Returns: + str: Artifacts Firehose Stream Name + """ + # support custom firehose stream name of Artifacts. User should make sure the length of + # the custom firehose name is no longer than 64 chars, otherwise the firehose will be + # failed to create. StreamAlert is not responsible for checking for custom firehose name + # since it should not change custom settings. + stream_name = config.get('lambda', {}).get( + 'artifact_extractor_config', {} + ).get('firehose_stream_name') + + return stream_name or cls.generate_firehose_name( + prefix=config['global']['account']['prefix'], + log_stream_name='artifacts' + ) + + @classmethod def enabled_log_source(cls, log_source_name): """Check that the incoming record is an enabled log source for Firehose @@ -414,23 +438,68 @@ def load_from_config(cls, prefix, firehose_config, log_sources): return return cls(prefix=prefix, firehose_config=firehose_config, log_sources=log_sources) + @classmethod + def get_client(cls, prefix, artifact_extractor_config): + """Get a Firehose client for sending artifacts + + Args: + prefix (str): Account prefix from global.json + artifact_extractor_config (dict): Loaded Artifact Extractor config from lambda.json + + Returns: + FirehoseClient or None: If disabled, this returns None, otherwise it returns an + instanec of FirehoseClient + """ + if not artifact_extractor_config.get('enabled'): + return + return cls(prefix=prefix) + def send(self, payloads): """Send all classified records to a respective Firehose Delivery Stream Args: payloads (list): List of PayloadRecord items that include parsed records """ - records = self._categorize_records(payloads) + categorized_records = self._categorize_records(payloads) # Iterate through each set of categorized payloads. # Each batch will be processed to their specific Firehose, which lands the data # in a specific prefix in S3. - for log_type, records in records.items(): + for log_type, records in categorized_records.items(): # firehose stream name has the length limit, no longer than 64 characters formatted_stream_name = self.generate_firehose_name(self._prefix, log_type) # Process each record batch in the categorized payload set - for record_batch in self._record_batches(records): + for record_batch in self._record_batches(records, CLASSIFIER_FUNCTION_NAME): batch_size = len(record_batch) - response = self._send_batch(formatted_stream_name, record_batch) - self._finalize(response, formatted_stream_name, batch_size) + response = self._send_batch( + formatted_stream_name, + record_batch, + CLASSIFIER_FUNCTION_NAME + ) + + self._finalize( + response, + formatted_stream_name, + batch_size, + CLASSIFIER_FUNCTION_NAME + ) + + # return categorized records for extracting artifacts if the feature is enabled + return categorized_records + + def send_artifacts(self, artifacts, stream_name): + """Send artifacts to artifacts Firehose delievery stream + Args: + artifacts (list(dict)): A list of artifacts extracted from normalized records. + stream_name (str): Stream name of destination Firehose. + """ + for artifact_batch in self._record_batches(artifacts, CLASSIFIER_FUNCTION_NAME): + batch_size = len(artifact_batch) + response = self._send_batch(stream_name, artifact_batch, CLASSIFIER_FUNCTION_NAME) + self._finalize( + response, + stream_name, + batch_size, + CLASSIFIER_FUNCTION_NAME + ) diff --git a/streamalert/shared/helpers/aws_api_client.py b/streamalert/shared/helpers/aws_api_client.py index 6007fa4dd..423d24108 100644 --- a/streamalert/shared/helpers/aws_api_client.py +++ b/streamalert/shared/helpers/aws_api_client.py @@ -44,9 +44,10 @@ def encrypt(plaintext_data, region, key_alias): ClientError """ try: - key_id = 'alias/{}'.format(key_alias) + if not key_alias.startswith('alias/'): + key_alias = 'alias/{}'.format(key_alias) client = boto3.client('kms', config=default_config(region=region)) - response = client.encrypt(KeyId=key_id, Plaintext=plaintext_data) + response = client.encrypt(KeyId=key_alias, Plaintext=plaintext_data) return response['CiphertextBlob'] except ClientError: LOGGER.error('An error occurred during KMS encryption') diff --git a/streamalert/shared/metrics.py b/streamalert/shared/metrics.py index 65ea17b02..07da43260 100644 --- a/streamalert/shared/metrics.py +++ b/streamalert/shared/metrics.py @@ -75,6 +75,11 @@ class MetricLogger: # Alert Merger metric names ALERT_ATTEMPTS = 'AlertAttempts' + # Artifact Extractor metric names + EXTRACTED_ARTIFACTS = 'ExtractedArtifacts' + FIREHOSE_FAILED_ARTIFACTS = 'FirehoseFailedArtifacts' + FIREHOSE_ARTIFACTS_SENT = 'FirehoseArtifactsSent' + _default_filter = '{{ $.metric_name = "{}" }}' _default_value_lookup = '$.metric_value' @@ -91,6 +96,12 @@ class MetricLogger: ALERT_PROCESSOR_NAME: {}, # Placeholder for future alert processor metrics ATHENA_PARTITIONER_NAME: {}, # Placeholder for future athena processor metrics CLASSIFIER_FUNCTION_NAME: { + EXTRACTED_ARTIFACTS: (_default_filter.format(EXTRACTED_ARTIFACTS), + _default_value_lookup), + FIREHOSE_FAILED_ARTIFACTS: (_default_filter.format(FIREHOSE_FAILED_ARTIFACTS), + _default_value_lookup), + FIREHOSE_ARTIFACTS_SENT: (_default_filter.format(FIREHOSE_ARTIFACTS_SENT), + _default_value_lookup), FAILED_PARSES: (_default_filter.format(FAILED_PARSES), _default_value_lookup), FIREHOSE_FAILED_RECORDS: (_default_filter.format(FIREHOSE_FAILED_RECORDS), diff --git a/streamalert/shared/normalize.py b/streamalert/shared/normalize.py index c43544e31..c06cefc74 100644 --- a/streamalert/shared/normalize.py +++ b/streamalert/shared/normalize.py @@ -13,22 +13,172 @@ See the License for the specific language governing permissions and limitations under the License. """ +from collections import defaultdict import logging +import itertools +import uuid from streamalert.shared.config import TopLevelConfigKeys +from streamalert.shared.exceptions import ConfigError from streamalert.shared.logger import get_logger LOGGER = get_logger(__name__) LOGGER_DEBUG_ENABLED = LOGGER.isEnabledFor(logging.DEBUG) +CONST_FUNCTION = 'function' +CONST_PATH = 'path' +CONST_CONDITION = 'condition' +CONST_VALUES = 'values' +CONST_ARTIFACTS_FLAG = 'send_to_artifacts' + +class NormalizedType: + """The class encapsulates normalization information for each normalized type""" + + VALID_KEYS = {CONST_PATH, CONST_FUNCTION, CONST_CONDITION, CONST_ARTIFACTS_FLAG} + CONST_STR = 'str' + CONST_DICT = 'dict' + + def __init__(self, log_type, normalized_type, params): + """Init NormalizatedType + Args: + log_type (str): log type name, e.g. osquery:differential + normalized_type (str): Normalized type name defined in conf/, e.g. 'sourceAddress', + 'destination_ip' may be normalized to 'ip_address'. + params (list): a list of str or dict contains normalization configuration read from + conf/schemas/*.json. The params can be a list of str or a list of dict to specify + the path to the keys which will be normalized. + e.g. + ['path', 'to', 'the', 'key'] + or + [ + { + 'path': ['detail', 'sourceIPAddress'], + 'function': 'source ip address' + }, + { + 'path': ['path', 'to', 'the', 'key'], + 'function': 'destination ip address' + } + ] + """ + self._log_type = log_type + self._log_source = log_type.split(':')[0] + self._normalized_type = normalized_type + self._parsed_params = self._parse_params(params) + + def __eq__(self, other): + """Compare two NormalizedType instances and it is very helpful in unit test when use + assert_equal + """ + if not (self._log_type == other.log_type + and self._log_source == other.log_source + and self._normalized_type == other.normalized_type): + return False + + if len(self._parsed_params) != len(other.parsed_params): + return False + + for idx in range(len(self._parsed_params)): + if self._parsed_params[idx][CONST_PATH] == other.parsed_params[idx][CONST_PATH]: + continue + + return False + + return True + + @property + def log_type(self): + """Return the log type name, e.g. 'osquery:differential'""" + return self._log_type + + @property + def log_source(self): + """Return the log source name, e.g. 'osquery'""" + return self._log_source + + @property + def normalized_type(self): + """Return the normalized type, e.g. 'ip_address'""" + return self._normalized_type + + @property + def parsed_params(self): + """Return the normalization configuration which is a list of dict, e.g. + [ + { + 'path': ['path', 'to', 'the', 'key'], + 'function': None + } + ] + + or + [ + { + 'path': ['detail', 'sourceIPAddress'], + 'function': 'source ip address', + 'send_to_artifacts': False + }, + { + 'path': ['path', 'to', 'the', 'destination', 'ip'], + 'function': 'destination ip address' + } + ] + """ + return self._parsed_params + + def _parse_params(self, params): + """Extract path and function information from params argument + + Args: + params (list): a list of str or dict contains normalization configuration. + """ + param_type = self._parse_param_type(params) + + if param_type == self.CONST_STR: + # Format params to include 'function' field which is set to None. + return [ + { + CONST_PATH: params, + CONST_FUNCTION: None + } + ] + + return params + + def _parse_param_type(self, params): + """Parse all param type in params + + Args: + params (list): a list of str or dict contains normalization configuration. + """ + if not isinstance(params, list): + raise ConfigError( + 'Unsupported params {} for normalization. Convert params to a list'.format(params) + ) + + if all(isinstance(param, str) for param in params): + return self.CONST_STR + + if all(isinstance(param, dict) and set(param.keys()).issubset(self.VALID_KEYS) + for param in params + ): + return self.CONST_DICT + + # FIXME: should we raise exception here? Or may just return False and log a warming message + raise ConfigError( + ('Unsupported type(s) used in {} or missing keys. Valid types are str or dict and ' + 'valid keys are {}').format(params, self.VALID_KEYS) + ) + class Normalizer: """Normalizer class to handle log key normalization in payloads""" - NORMALIZATION_KEY = 'streamalert:normalization' + NORMALIZATION_KEY = 'streamalert_normalization' + RECORD_ID_KEY = 'streamalert_record_id' - # Store the normalized CEF types mapping to original keys from the records + # Store the normalized types mapping to original keys from the records _types_config = dict() @classmethod @@ -43,64 +193,129 @@ def match_types(cls, record, normalized_types): dict: A dict of normalized keys with a list of values Example: - record={ - 'region': 'us-east-1', - 'detail': { - 'awsRegion': 'us-west-2' - } - } - normalized_types={ - 'region': ['region', 'awsRegion'] - } - - return={ - 'region': ['us-east-1', 'us-west-2'] + return + { + 'region': [ + { + 'values': ['us-east-1'] + 'function': 'AWS region' + }, + { + 'values': ['us-west-2'] + 'function': 'AWS region' + } + ] } """ - result = {} - for key, keys_to_normalize in normalized_types.items(): - values = set() - for value in cls._extract_values(record, set(keys_to_normalize)): - # Skip emtpy values - if value is None or value == '': - continue + results = {} + for type_name, type_info in normalized_types.items(): + result = list(cls._extract_values(record, type_info)) - values.add(value) + if result: + results[type_name] = result - if not values: - continue + if results: + results[cls.RECORD_ID_KEY] = str(uuid.uuid4()) + return results - result[key] = sorted(values, key=str) - - return result + @classmethod + def _find_value(cls, record, path): + """Retrieve value from a record based on a json path""" + found_value = False + value = record + for key in path: + value = value.get(key) + if not value: + found_value = False + break + found_value = True + + if not found_value: + return False, None + + return True, value @classmethod - def _extract_values(cls, record, keys_to_normalize): + def _extract_values(cls, record, paths_to_normalize): """Recursively extract lists of path parts from a dictionary Args: record (dict): Parsed payload of log - keys_to_normalize (set): Normalized keys for which to extract paths + paths_to_normalize (set): Normalized keys for which to extract paths path (list=None): Parts of current path for which keys are being extracted Yields: - list: Parts of path in dictionary that contain normalized keys + dict: A dict contians the values of normalized types. For example, + { + 'values': ['1.1.1.2'] + 'function': 'Source ip address' + } """ - for key, value in record.items(): - if isinstance(value, dict): # If this is a dict, look for nested - for nested_value in cls._extract_values(value, keys_to_normalize): - yield nested_value + for param in paths_to_normalize.parsed_params: + if param.get(CONST_CONDITION) and not cls._match_condition( + record, param[CONST_CONDITION] + ): + # If optional 'condition' block is configured, it will only extract values if + # condition is matched. continue - if key not in keys_to_normalize: - continue + found_value, value = cls._find_value(record, param.get(CONST_PATH)) - if isinstance(value, list): # If this is a list of values, return all of them - for item in value: - yield item - continue + if found_value: + result = { + CONST_FUNCTION: param.get(CONST_FUNCTION) or None, + # if value not a list, it will be cast to a str even it is a dict or other + # types + CONST_VALUES: value if isinstance(value, list) else [str(value)] + } + + # Add "send_to_artifacts" flag to the normalized field when it explicitly sets the + # flag to "false" in the normalizer in conf/schemas/*.json + if not param.get(CONST_ARTIFACTS_FLAG, True): + result[CONST_ARTIFACTS_FLAG] = False - yield value + yield result + + @classmethod + def _match_condition(cls, record, condition): + """Apply condition to a record before normalization kicked in. + + Returns: + bool: Return True if the value of the condition path matches to the condition, otherwise + return False. It is False if the path doesn't exist. + """ + if not condition.get(CONST_PATH): + return False + + found_value, value = cls._find_value(record, condition[CONST_PATH]) + if not found_value: + return False + + # cast value to a str in all lowercases + value = str(value).lower() + + # Only support extract one condition. The result is not quaranteed if multiple conditions + # configured. + # FIXME: log a warning if more than one condition configured. + if condition.get('is'): + return value == condition['is'] + + if condition.get('is_not'): + return value != condition['is_not'] + + if condition.get('in'): + return value in condition['in'] + + if condition.get('not_in'): + return value not in condition['not_in'] + + if condition.get('contains'): + return condition['contains'] in value + + if condition.get('not_contains'): + return condition['not_contains'] not in value + + return False @classmethod def normalize(cls, record, log_type): @@ -110,7 +325,7 @@ def normalize(cls, record, log_type): record (dict): The parsed log without data normalization log_type (str): Type of log for which to apply normalizaiton """ - log_normalized_types = cls._types_config.get(log_type) + log_normalized_types = cls._types_config.get(log_type) if cls._types_config else None if not log_normalized_types: LOGGER.debug('No normalized types defined for log type: %s', log_type) return @@ -129,7 +344,12 @@ def get_values_for_normalized_type(cls, record, datatype): Returns: set: The values for the normalized type specified """ - return set(record.get(cls.NORMALIZATION_KEY, {}).get(datatype, set())) + normalization_results = record.get(cls.NORMALIZATION_KEY, {}).get(datatype) + if not normalization_results: + # Return an empty set to be compatible existing rules calling this method which doesn't + # check if the return value is None or empty set. + return set() + return set(itertools.chain(*[result.get(CONST_VALUES) for result in normalization_results])) @classmethod def load_from_config(cls, config): @@ -144,9 +364,63 @@ def load_from_config(cls, config): if cls._types_config: return cls # config is already populated - if TopLevelConfigKeys.NORMALIZED_TYPES not in config: - return cls # nothing to do - - cls._types_config = config[TopLevelConfigKeys.NORMALIZED_TYPES] + cls._types_config = cls._parse_normalization(config) return cls # there are no instance methods, so just return the class + + @classmethod + def _parse_normalization(cls, config): + """Load and parse normalization config from conf/schemas/*.json. Normalization will be + configured along with log schema and a path will be provided to find the original key. + + For example: conf/schemas/cloudwatch.json looks like + 'cloudwatch:events': { + 'schema': { + 'account': 'string', + 'source': 'string', + 'other_key': 'string' + }, + 'configuration': { + 'normalization': { + 'region': ['path', 'to', 'original', 'key'], + 'ip_address': [ + { + 'path': ['detail', 'sourceIPAddress'], + 'function': 'source ip address' + }, + { + 'path': ['path', 'to', 'original', 'key'], + 'function': 'destination ip address' + } + ] + } + } + } + + Args: + config (dict): Config read from 'conf/' directory + + Returns: + dict: return a dict contains normalization information per log type basis. + { + 'cloudwatch:events': { + 'region': NormalizedType(), + 'ip_address': NormalizedType() + } + } + """ + normalized_config = defaultdict(dict) + for log_type, val in config.get(TopLevelConfigKeys.LOGS, {}).items(): + result = defaultdict(dict) + + log_type_normalization = val.get('configuration', {}).get('normalization', {}) + + for normalized_type, params in log_type_normalization.items(): + # add normalization info if it is defined in log type configuration field + result[normalized_type] = NormalizedType(log_type, normalized_type, params) + + if result: + normalized_config[log_type] = result + + # return None is normalized_config is an empty defaultdict. + return normalized_config or None diff --git a/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/iam.tf b/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/iam.tf new file mode 100644 index 000000000..fa27f8e78 --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/iam.tf @@ -0,0 +1,85 @@ +// IAM Role: Artifacts Firehose Delivery Stream permissions +resource "aws_iam_role" "streamalert_kinesis_firehose" { + name = "${var.prefix}_firehose_artifacts_delivery" + path = "/streamalert/" + assume_role_policy = data.aws_iam_policy_document.firehose_assume_role_policy.json + + tags = { + Name = "StreamAlert" + } +} + +// IAM Policy: Service AssumeRole +data "aws_iam_policy_document" "firehose_assume_role_policy" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["firehose.amazonaws.com"] + } + } +} + +// IAM Policy: Write data to S3 +resource "aws_iam_role_policy" "streamalert_firehose_s3" { + name = "WriteArtifactsToS3" + role = aws_iam_role.streamalert_kinesis_firehose.id + policy = data.aws_iam_policy_document.firehose_s3.json +} + +// IAM Policy Document: Write data to S3 +data "aws_iam_policy_document" "firehose_s3" { + statement { + effect = "Allow" + + # Ref: http://amzn.to/2u5t0hS + actions = [ + "s3:AbortMultipartUpload", + "s3:GetBucketLocation", + "s3:GetObject", + "s3:ListBucket", + "s3:ListBucketMultipartUploads", + "s3:PutObject", + ] + + resources = [ + "arn:aws:s3:::${var.s3_bucket_name}", + "arn:aws:s3:::${var.s3_bucket_name}/*", + ] + } + + statement { + effect = "Allow" + + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:GenerateDataKey*", + ] + + resources = [var.kms_key_arn] + } +} + +// IAM Policy: Interact with the Glue Catalog +resource "aws_iam_role_policy" "streamalert_firehose_glue" { + name = "FirehoseReadGlueCatalog" + role = "${aws_iam_role.streamalert_kinesis_firehose.id}" + + policy = "${data.aws_iam_policy_document.firehose_glue_catalog.json}" +} + +// IAM Policy Document: Interact with the Glue Catalog +data "aws_iam_policy_document" "firehose_glue_catalog" { + statement { + effect = "Allow" + + actions = [ + "glue:GetTableVersions" + ] + + resources = ["*"] + } +} diff --git a/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/main.tf b/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/main.tf new file mode 100644 index 000000000..45e5e8076 --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/main.tf @@ -0,0 +1,89 @@ +// AWS Firehose Stream dedicated to deliver Artifacts +// This firehose will only convert and save Artifacts in Parquet format in the S3 bucket to take the +// performance gain from Parquet format. +locals { + s3_path_prefix = "parquet/${var.glue_catalog_table_name}" +} + +locals { + data_location = "s3://${var.s3_bucket_name}/${local.s3_path_prefix}" +} + +resource "aws_kinesis_firehose_delivery_stream" "streamalert_artifacts" { + name = var.stream_name + destination = "extended_s3" + + // AWS Firehose Stream for Artifacts will only support Parquet format + extended_s3_configuration { + role_arn = aws_iam_role.streamalert_kinesis_firehose.arn + bucket_arn = "arn:aws:s3:::${var.s3_bucket_name}" + prefix = "${local.s3_path_prefix}/dt=!{timestamp:yyyy-MM-dd-HH}/" + error_output_prefix = "${local.s3_path_prefix}/!{firehose:error-output-type}/" + buffer_size = var.buffer_size + buffer_interval = var.buffer_interval + + # The S3 destination's compression format must be set to UNCOMPRESSED + # when data format conversion is enabled. + compression_format = "UNCOMPRESSED" + kms_key_arn = var.kms_key_arn + + data_format_conversion_configuration { + input_format_configuration { + deserializer { + # # more resilient with log schemas that have nested JSON comparing to hive_json_ser_de + open_x_json_ser_de {} + } + } + output_format_configuration { + serializer { + parquet_ser_de {} + } + } + schema_configuration { + database_name = aws_glue_catalog_table.artifacts.database_name + role_arn = aws_iam_role.streamalert_kinesis_firehose.arn + table_name = aws_glue_catalog_table.artifacts.name + } + } + } + + tags = { + Name = "StreamAlert" + } +} + +// Artifacts Athena table +resource "aws_glue_catalog_table" "artifacts" { + name = var.glue_catalog_table_name + database_name = var.glue_catalog_db_name + + table_type = "EXTERNAL_TABLE" + + partition_keys { + name = "dt" + type = "string" + } + + storage_descriptor { + location = local.data_location + input_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat" + output_format = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat" + + ser_de_info { + name = "parque_ser_de" + serialization_library = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" + parameters = { + ser_de_params_key = "serialization.format" + ser_de_params_value = "1" + } + } + + dynamic "columns" { + for_each = var.schema + content { + name = columns.value[0] + type = columns.value[1] + } + } + } +} diff --git a/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/variables.tf b/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/variables.tf new file mode 100644 index 000000000..58c0f66a4 --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_artifact_extractor/variables.tf @@ -0,0 +1,47 @@ +variable "account_id" { + type = string +} + +variable "region" { + type = string +} + +variable "prefix" { + type = string +} + +variable "glue_catalog_db_name" { + type = string + description = "Athena Database name" +} + +variable "glue_catalog_table_name" { + type = string + description = "Athena table name for Artifacts" +} + +variable "s3_bucket_name" { + type = string + description = "StreamAlert data bucket name" +} + +variable "stream_name" { + type = string + description = "Fully qualified name to use for delivery stream" +} + +variable "buffer_size" { + default = 5 +} + +variable "buffer_interval" { + default = 300 +} + +variable "kms_key_arn" { + type = string +} + +variable "schema" { + type = list(tuple([string, string])) +} diff --git a/streamalert_cli/_infrastructure/modules/tf_cloudtrail/main.tf b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/main.tf index 931562579..fd324b916 100644 --- a/streamalert_cli/_infrastructure/modules/tf_cloudtrail/main.tf +++ b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/main.tf @@ -257,6 +257,14 @@ data "aws_iam_policy_document" "cloudtrail" { identifiers = ["cloudtrail.amazonaws.com"] } + dynamic "principals" { + for_each = var.allow_cross_account_sns ? [1] : [] + content { + type = "AWS" + identifiers = formatlist("arn:aws:iam::%s:root", var.s3_cross_account_ids) + } + } + actions = ["SNS:Publish"] resources = [ diff --git a/streamalert_cli/_infrastructure/modules/tf_cloudtrail/variables.tf b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/variables.tf index e16fbe550..c9656776b 100644 --- a/streamalert_cli/_infrastructure/modules/tf_cloudtrail/variables.tf +++ b/streamalert_cli/_infrastructure/modules/tf_cloudtrail/variables.tf @@ -55,6 +55,12 @@ variable "send_to_sns" { description = "Whether or not events should be sent to SNS when objects are created in S3. This creates an SNS topic when set to true" } +variable "allow_cross_account_sns" { + type = bool + default = false + description = "Allow account IDs specified in the s3_cross_account_ids variable to also send SNS notifications to the created SNS Topic" +} + variable "cloudwatch_logs_role_arn" { type = string default = null diff --git a/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/README.md b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/README.md new file mode 100644 index 000000000..677e0b670 --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/README.md @@ -0,0 +1,39 @@ +# StreamAlert CloudWatch Events Cross Account Terraform Module +Configure the necessary resources to allow for cross account CloudWatch Events via EventBridge Events Bus + +## Components +* Configures CloudWatch Event Permissions to allow external accounts or organizations to send events to the main account + +## Example +```hcl +module "cloudwatch_events_cross_account" { + source = "./modules/tf_cloudwatch_events/cross_account" + accounts = ["123456789012"] + organizations = ["o-aabbccddee"] + region = "us-east-1" +} +``` + +## Inputs + + + + + + + + + + + + + + + + + + + + + +
PropertyDescriptionDefault (None=Required)
accountsAWS Account IDs for which to enable cross account CloudWatch EventsNone
organizationsAWS Organization IDs for which to enable cross account CloudWatch EventsNone
regionAWS region in which this permission is being addedNone
diff --git a/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/main.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/main.tf new file mode 100644 index 000000000..78a411ea3 --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/main.tf @@ -0,0 +1,19 @@ +// CloudWatch Event Permission for Individual AWS Accounts +resource "aws_cloudwatch_event_permission" "account_access" { + count = length(var.accounts) + principal = element(var.accounts, count.index) + statement_id = "account_${element(var.accounts, count.index)}_${var.region}" +} + +// CloudWatch Event Permission for AWS Orgs +resource "aws_cloudwatch_event_permission" "organization_access" { + count = length(var.organizations) + principal = "*" + statement_id = "organization_${element(var.organizations, count.index)}_${var.region}" + + condition { + key = "aws:PrincipalOrgID" + type = "StringEquals" + value = element(var.organizations, count.index) + } +} diff --git a/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/variables.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/variables.tf new file mode 100644 index 000000000..147cbed7b --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/cross_account/variables.tf @@ -0,0 +1,11 @@ +variable "accounts" { + type = list(string) +} + +variable "organizations" { + type = list(string) +} + +variable "region" { + type = string +} diff --git a/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/main.tf b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/main.tf index b6062c441..27b18f3d4 100644 --- a/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/main.tf +++ b/streamalert_cli/_infrastructure/modules/tf_cloudwatch_events/main.tf @@ -1,4 +1,4 @@ -// Cloudwatch event to capture Cloudtrail API calls +// Cloudwatch Event Rule: Capture CloudWatch Events resource "aws_cloudwatch_event_rule" "capture_events" { name = "${var.prefix}_${var.cluster}_streamalert_all_events" description = "Capture CloudWatch events" @@ -13,8 +13,10 @@ resource "aws_cloudwatch_event_rule" "capture_events" { // The Kinesis destination for Cloudwatch events resource "aws_cloudwatch_event_target" "kinesis" { - rule = aws_cloudwatch_event_rule.capture_events.name - arn = var.kinesis_arn + target_id = "${var.prefix}_${var.cluster}_streamalert_kinesis" + rule = aws_cloudwatch_event_rule.capture_events.name + role_arn = aws_iam_role.cloudwatch_events_role.arn + arn = var.kinesis_arn } // IAM Role: CloudWatch Events diff --git a/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/README.rst b/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/README.rst index bcc987b7f..4355e0e36 100644 --- a/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/README.rst +++ b/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/README.rst @@ -109,7 +109,7 @@ SSH and Build Dependencies $ pip install --upgrade pip setuptools $ mkdir -p $HOME/build_temp $HOME/pip_temp/python - $ pip install boxsdk[jwt]==2.6.1 --build $HOME/build_temp/ --target $HOME/pip_temp/python + $ pip install boxsdk[jwt]==2.9.0 --build $HOME/build_temp/ --target $HOME/pip_temp/python # Replace the `boxsdk[jwt]==2.6.1` below with the desired package & version # For example, the following would update the aliyun dependencies: diff --git a/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/boxsdk[jwt]==2.6.1_dependencies.zip b/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/boxsdk[jwt]==2.6.1_dependencies.zip deleted file mode 100644 index 6bc44b940..000000000 Binary files a/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/boxsdk[jwt]==2.6.1_dependencies.zip and /dev/null differ diff --git a/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/boxsdk[jwt]==2.9.0_dependencies.zip b/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/boxsdk[jwt]==2.9.0_dependencies.zip new file mode 100644 index 000000000..902daacbe Binary files /dev/null and b/streamalert_cli/_infrastructure/modules/tf_globals/lambda_layers/boxsdk[jwt]==2.9.0_dependencies.zip differ diff --git a/streamalert_cli/_infrastructure/modules/tf_globals/main.tf b/streamalert_cli/_infrastructure/modules/tf_globals/main.tf index 5d84e04be..2e51e2a6d 100644 --- a/streamalert_cli/_infrastructure/modules/tf_globals/main.tf +++ b/streamalert_cli/_infrastructure/modules/tf_globals/main.tf @@ -79,7 +79,7 @@ resource "aws_lambda_layer_version" "aliyun_dependencies" { } resource "aws_lambda_layer_version" "box_dependencies" { - filename = "${path.module}/lambda_layers/boxsdk[jwt]==2.6.1_dependencies.zip" + filename = "${path.module}/lambda_layers/boxsdk[jwt]==2.9.0_dependencies.zip" layer_name = "box" compatible_runtimes = ["python3.7"] } diff --git a/streamalert_cli/_infrastructure/modules/tf_scheduled_queries/outputs.tf b/streamalert_cli/_infrastructure/modules/tf_scheduled_queries/outputs.tf new file mode 100644 index 000000000..95e5205aa --- /dev/null +++ b/streamalert_cli/_infrastructure/modules/tf_scheduled_queries/outputs.tf @@ -0,0 +1,4 @@ +# Role id of the lambda function that runs scheduled queries +output "lambda_function_role_id" { + value = module.scheduled_queries_lambda.role_id +} \ No newline at end of file diff --git a/streamalert_cli/apps/handler.py b/streamalert_cli/apps/handler.py index 243c11ae2..b00097a2e 100644 --- a/streamalert_cli/apps/handler.py +++ b/streamalert_cli/apps/handler.py @@ -142,9 +142,9 @@ def _add_default_app_args(app_parser): # Validate the name being used to make sure it does not contain specific characters def _validate_name(val): """Validate acceptable inputs for the name of the function""" - acceptable_chars = ''.join([string.digits, string.ascii_letters, '_-']) + acceptable_chars = ''.join([string.digits, string.ascii_lowercase, '_-']) if not set(str(val)).issubset(acceptable_chars): - raise app_parser.error('Name must contain only letters, numbers, ' + raise app_parser.error('Name must contain only lowercase letters, numbers, ' 'hyphens, or underscores.') return val diff --git a/streamalert_cli/athena/handler.py b/streamalert_cli/athena/handler.py index 691798e36..f91083057 100644 --- a/streamalert_cli/athena/handler.py +++ b/streamalert_cli/athena/handler.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from streamalert.classifier.clients import FirehoseClient +from streamalert.shared.firehose import FirehoseClient from streamalert.shared.utils import get_database_name, get_data_file_format from streamalert.shared.alert import Alert from streamalert.shared.athena import AthenaClient diff --git a/streamalert_cli/athena/helpers.py b/streamalert_cli/athena/helpers.py index 5c87937a6..cf43c6452 100644 --- a/streamalert_cli/athena/helpers.py +++ b/streamalert_cli/athena/helpers.py @@ -15,7 +15,8 @@ """ import re -from streamalert.classifier.clients import FirehoseClient +from streamalert.shared.artifact_extractor import Artifact +from streamalert.shared.firehose import FirehoseClient from streamalert.shared.logger import get_logger from streamalert.shared.alert import Alert from streamalert_cli.helpers import record_to_schema @@ -247,3 +248,21 @@ def generate_data_table_schema(config, table, schema_override=None): ) return format_schema_tf(athena_schema) + +def generate_artifacts_table_schema(): + """Generate the schema for artifacts table in terraform by using a test artifact instance + + Returns: + athena_schema (dict): Equivalent Athena schema used for generating create table statement + """ + artifact = artifact = Artifact( + normalized_type='test_normalized_type', + value='test_value', + source_type='test_source_type', + record_id='test_record_id', + function=None + ) + schema = record_to_schema(artifact.artifact) + athena_schema = logs_schema_to_athena_schema(schema, False) + + return format_schema_tf(athena_schema) diff --git a/streamalert_cli/config.py b/streamalert_cli/config.py index 74617e9aa..12d732489 100644 --- a/streamalert_cli/config.py +++ b/streamalert_cli/config.py @@ -16,7 +16,9 @@ import json import os import re +import shutil import string +import tempfile from streamalert.apps import StreamAlertApp from streamalert.shared import CLUSTERED_FUNCTIONS, config, metrics @@ -24,16 +26,19 @@ from streamalert_cli.helpers import continue_prompt from streamalert_cli.apps.helpers import save_app_auth_info -LOGGER = get_logger(__name__) DEFAULT_CONFIG_PATH = 'conf' +LOGGER = get_logger(__name__) + class CLIConfig: """A class to load, modify, and display the StreamAlertCLI Config""" - def __init__(self, config_path): + def __init__(self, config_path, extra_terraform_files=None, build_directory=None): self.config_path = config_path self.config = config.load_config(config_path) + self._terraform_files = extra_terraform_files or [] + self.build_directory = self._setup_build_directory(build_directory) def __repr__(self): return str(self.config) @@ -57,15 +62,44 @@ def clusters(self): """Return list of cluster configuration keys""" return list(self.config['clusters'].keys()) + @property + def terraform_files(self): + """Return set of terraform files to include with this deployment""" + return set(self._terraform_files).union( + self.config['global']['general'].get('terraform_files', []) + ) + + @staticmethod + def _setup_build_directory(directory): + """Create the directory to be used for building infrastructure + + Args: + directory (str): Optional path to directory to create + + Returns: + str: Path to directory that will be used + """ + if not directory: + temp_dir = tempfile.TemporaryDirectory(prefix='streamalert_build-') + directory = temp_dir.name + # Calling cleanup here to remove this directory so shutil can recreate it + # Without calling this here, an exception is raised when tempfile garbage collects + temp_dir.cleanup() + + if os.path.exists(directory): + shutil.rmtree(directory) + + return directory + def set_prefix(self, prefix): """Set the Org Prefix in Global settings""" if not isinstance(prefix, str): LOGGER.error('Invalid prefix type, must be string') return False - acceptable_chars = set([*string.digits, *string.ascii_letters]) + acceptable_chars = set([*string.digits, *string.ascii_lowercase]) if not set(prefix).issubset(acceptable_chars): - LOGGER.error('Prefix must contain only letters and numbers') + LOGGER.error('Prefix must contain only lowercase letters and numbers') return False self.config['global']['account']['prefix'] = prefix diff --git a/streamalert_cli/helpers.py b/streamalert_cli/helpers.py index 32f2f429c..839178c13 100644 --- a/streamalert_cli/helpers.py +++ b/streamalert_cli/helpers.py @@ -24,8 +24,6 @@ from streamalert.shared.logger import get_logger -from streamalert_cli.terraform import TERRAFORM_FILES_PATH - LOGGER = get_logger(__name__) @@ -39,7 +37,7 @@ } -def run_command(runner_args, **kwargs): +def run_command(runner_args, cwd='./', **kwargs): """Helper function to run commands with error handling. Args: @@ -52,7 +50,6 @@ def run_command(runner_args, **kwargs): """ default_error_message = "An error occurred while running: {}".format(' '.join(runner_args)) error_message = kwargs.get('error_message', default_error_message) - cwd = kwargs.get('cwd', TERRAFORM_FILES_PATH) # Add the -force-copy flag for s3 state copying to suppress dialogs that # the user must type 'yes' into. @@ -98,41 +95,6 @@ def continue_prompt(message=None): return response == 'yes' -def tf_runner(action='apply', refresh=True, auto_approve=False, targets=None): - """Terraform wrapper to build StreamAlert infrastructure. - - Resolves modules with `terraform get` before continuing. - - Args: - action (str): Terraform action ('apply' or 'destroy'). - refresh (bool): If True, Terraform will refresh its state before applying the change. - auto_approve (bool): If True, Terraform will *not* prompt the user for approval. - targets (list): Optional list of affected targets. - If not specified, Terraform will run against all of its resources. - - Returns: - bool: True if the terraform command was successful - """ - LOGGER.debug('Resolving Terraform modules') - if not run_command(['terraform', 'get'], quiet=True): - return False - - tf_command = ['terraform', action, '-refresh={}'.format(str(refresh).lower())] - - if action == 'destroy': - # Terraform destroy has a '-force' flag instead of '-auto-approve' - LOGGER.info('Destroying infrastructure') - tf_command.append('-force={}'.format(str(auto_approve).lower())) - else: - LOGGER.info('%s changes', 'Applying' if auto_approve else 'Planning') - tf_command.append('-auto-approve={}'.format(str(auto_approve).lower())) - - if targets: - tf_command.extend('-target={}'.format(x) for x in targets) - - return run_command(tf_command) - - def check_credentials(): """Check for valid AWS credentials in environment variables diff --git a/streamalert_cli/kinesis/handler.py b/streamalert_cli/kinesis/handler.py index 357a0ffe9..5324e95a4 100644 --- a/streamalert_cli/kinesis/handler.py +++ b/streamalert_cli/kinesis/handler.py @@ -14,8 +14,8 @@ limitations under the License. """ from streamalert.shared.logger import get_logger -from streamalert_cli.helpers import tf_runner from streamalert_cli.terraform.generate import terraform_generate_handler +from streamalert_cli.terraform.helpers import terraform_runner from streamalert_cli.utils import CLICommand, set_parser_epilog, add_clusters_arg LOGGER = get_logger(__name__) @@ -82,8 +82,8 @@ def handler(cls, options, config): if not terraform_generate_handler(config): return False - return tf_runner( - action='apply', + return terraform_runner( + config, targets=[ 'module.{}_{}'.format('kinesis_events', cluster) for cluster in config.clusters() ] diff --git a/streamalert_cli/manage_lambda/deploy.py b/streamalert_cli/manage_lambda/deploy.py index 5472efa8b..d78e2a74e 100644 --- a/streamalert_cli/manage_lambda/deploy.py +++ b/streamalert_cli/manage_lambda/deploy.py @@ -15,9 +15,8 @@ """ from streamalert.shared import rule_table from streamalert.shared.logger import get_logger -from streamalert_cli import helpers -from streamalert_cli.manage_lambda import package from streamalert_cli.terraform.generate import terraform_generate_handler +from streamalert_cli.terraform.helpers import terraform_runner from streamalert_cli.utils import ( add_default_lambda_args, CLICommand, @@ -111,11 +110,6 @@ def deploy(config, functions, clusters=None): """ LOGGER.info('Deploying: %s', ', '.join(sorted(functions))) - deployment_package = package.LambdaPackage(config) - package_path = deployment_package.create() - if not package_path: - return False - # Terraform apply only to the module which contains our lambda functions clusters = clusters or config.clusters() @@ -124,7 +118,7 @@ def deploy(config, functions, clusters=None): LOGGER.debug('Applying terraform targets: %s', ', '.join(sorted(deploy_targets))) # Terraform applies the new package and publishes a new version - return helpers.tf_runner(targets=deploy_targets) + return terraform_runner(config, targets=deploy_targets) def _update_rule_table(options, config): diff --git a/streamalert_cli/manage_lambda/package.py b/streamalert_cli/manage_lambda/package.py index 16d56bb61..fcc32fd3f 100644 --- a/streamalert_cli/manage_lambda/package.py +++ b/streamalert_cli/manage_lambda/package.py @@ -19,21 +19,17 @@ from streamalert.shared.logger import get_logger from streamalert_cli.helpers import run_command -from streamalert_cli.terraform import TERRAFORM_FILES_PATH LOGGER = get_logger(__name__) class LambdaPackage: """Build the deployment package for StreamAlert Lambdas""" - package_name = 'streamalert' # The basename of the generated .zip file + # The name of the directory to package and basename of the generated .zip file + PACKAGE_NAME = 'streamalert' - DEFAULT_PACKAGE_FILES = { # The default folders to zip into the Lambda package - 'conf', - 'streamalert', - } - - CONFIG_EXTRAS = { # The configurable items for user specified files + # The configurable items for user specified files to include in deployment pacakge + CONFIG_EXTRAS = { 'matcher_locations', 'rule_locations', 'scheduled_query_locations', @@ -42,37 +38,42 @@ class LambdaPackage: # Define a package dict to support pinning versions across all subclasses REQUIRED_LIBS = { - 'backoff==1.8.1', - 'boto3==1.10.6', - 'cbapi==1.5.4', - 'google-api-python-client==1.7.11', - 'jmespath==0.9.4', + 'backoff==1.10.0', + 'boto3==1.14.29', + 'cbapi==1.7.1', + 'google-api-python-client==1.10.0', + 'jmespath==0.10.0', 'jsonlines==1.2.0', - 'netaddr==0.7.19', - 'requests==2.22.0', - 'pymsteams==0.1.12', + 'netaddr==0.8.0', + 'requests==2.24.0', + 'pymsteams==0.1.13', } def __init__(self, config): self.config = config - self.temp_package_path = os.path.join(tempfile.gettempdir(), self.package_name) + self.temp_package_path = os.path.join(tempfile.gettempdir(), self.PACKAGE_NAME) def _copy_user_config_files(self): - paths = set() for location in self.CONFIG_EXTRAS: - paths.update(self.config['global']['general'].get(location, set())) - - self._copy_files(paths, ignores={'*.json'}) + paths = self.config['global']['general'].get(location, set()) + if not paths: + continue + for path in paths: + self._copy_directory(path, ignores={'*.json'}) def create(self): """Create a Lambda deployment package .zip file.""" - LOGGER.info('Creating package for %s', self.package_name) + LOGGER.info('Creating package for %s', self.PACKAGE_NAME) if os.path.exists(self.temp_package_path): shutil.rmtree(self.temp_package_path) - # Copy all of the default package files - self._copy_files(self.DEFAULT_PACKAGE_FILES) + # Copy the default package directory + self._copy_directory(self.PACKAGE_NAME) + + # Copy the user-specified config directory + # Ensure this is copied to the 'conf' destination directory + self._copy_directory(self.config.config_path, destination='conf') # Copy in any user-specified files self._copy_user_config_files() @@ -84,7 +85,7 @@ def create(self): # Zip it all up # Build these in the top-level of the terraform directory as streamalert.zip result = shutil.make_archive( - os.path.join(TERRAFORM_FILES_PATH, self.package_name), + os.path.join(self.config.build_directory, self.PACKAGE_NAME), 'zip', self.temp_package_path ) @@ -96,17 +97,18 @@ def create(self): return result - def _copy_files(self, paths, ignores=None): + def _copy_directory(self, path, ignores=None, destination=None): """Copy all files and folders into temporary package path Args: - paths (list): Paths of folders to be copied into the Lambda package - ignores (set=None): File globs to be ignored during the copying of files in paths + path (str): Path of directory to be copied into the Lambda package + ignores (set=None): File globs to be ignored during the copying of the directory """ - for path in paths: - # Copy the directory, skipping any files explicitly ignored - kwargs = {'ignore': shutil.ignore_patterns(*ignores)} if ignores else dict() - shutil.copytree(path, os.path.join(self.temp_package_path, path), **kwargs) + # Copy the directory, skipping any files explicitly ignored + kwargs = {'ignore': shutil.ignore_patterns(*ignores)} if ignores else dict() + destination = destination or path + destination = os.path.join(self.temp_package_path, destination) + shutil.copytree(path, destination, **kwargs) def _resolve_libraries(self): """Install all libraries into the deployment package folder diff --git a/streamalert_cli/runner.py b/streamalert_cli/runner.py index f13dd88f9..0219c5e18 100644 --- a/streamalert_cli/runner.py +++ b/streamalert_cli/runner.py @@ -34,7 +34,6 @@ from streamalert_cli.terraform.generate import TerraformGenerateCommand from streamalert_cli.terraform.handlers import ( TerraformBuildCommand, - TerraformCleanCommand, TerraformDestroyCommand, TerraformInitCommand, TerraformListTargetsCommand, @@ -59,7 +58,7 @@ def cli_runner(args): Returns: bool: False if errors occurred, True otherwise """ - config = CLIConfig(args.config_dir) + config = CLIConfig(args.config_dir, args.terraform_files, args.build_directory) set_logger_levels(args.debug) @@ -93,7 +92,6 @@ def register_all(cls): 'app': AppCommand, 'athena': AthenaCommand, 'build': TerraformBuildCommand, - 'clean': TerraformCleanCommand, 'configure': ConfigureCommand, 'create-alarm': CreateMetricAlarmCommand, 'create-cluster-alarm': CreateClusterMetricAlarmCommand, diff --git a/streamalert_cli/terraform/artifact_extractor.py b/streamalert_cli/terraform/artifact_extractor.py new file mode 100644 index 000000000..6091991d1 --- /dev/null +++ b/streamalert_cli/terraform/artifact_extractor.py @@ -0,0 +1,55 @@ + +""" +Copyright 2017-present Airbnb, Inc. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +# from streamalert.shared import ARTIFACT_EXTRACTOR_NAME +from streamalert.shared.config import artifact_extractor_enabled, firehose_data_bucket +from streamalert.shared.firehose import FirehoseClient +from streamalert.shared.utils import get_database_name +from streamalert_cli.athena.helpers import generate_artifacts_table_schema +from streamalert_cli.terraform.common import infinitedict + +# FIXME: Should we provide custom artifacs table name? +DEFAULT_ARTIFACTS_TABLE_NAME = 'artifacts' + +def generate_artifact_extractor(config): + """Generate Terraform for the Artifact Extractor Lambda function + Args: + config (dict): The loaded config from the 'conf/' directory + Returns: + dict: Artifact Extractor Terraform definition to be marshaled to JSON + """ + result = infinitedict() + + if not artifact_extractor_enabled(config): + return + + ae_config = config['global']['infrastructure']['artifact_extractor'] + stream_name = FirehoseClient.artifacts_firehose_stream_name(config) + + # Set variables for the artifact extractor module + result['module']['artifact_extractor'] = { + 'source': './modules/tf_artifact_extractor', + 'account_id': config['global']['account']['aws_account_id'], + 'prefix': config['global']['account']['prefix'], + 'region': config['global']['account']['region'], + 'glue_catalog_db_name': get_database_name(config), + 'glue_catalog_table_name': ae_config.get('table_name', DEFAULT_ARTIFACTS_TABLE_NAME), + 's3_bucket_name': firehose_data_bucket(config), + 'stream_name': stream_name, + 'buffer_size': ae_config.get('firehose_buffer_size', 128), + 'buffer_interval': ae_config.get('firehose_buffer_interval', 900), + 'kms_key_arn': '${aws_kms_key.server_side_encryption.arn}', + 'schema': generate_artifacts_table_schema() + } + + return result diff --git a/streamalert_cli/terraform/cloudtrail.py b/streamalert_cli/terraform/cloudtrail.py index 5cb9ab320..c6b44c115 100644 --- a/streamalert_cli/terraform/cloudtrail.py +++ b/streamalert_cli/terraform/cloudtrail.py @@ -43,15 +43,16 @@ def generate_cloudtrail(cluster_name, cluster_dict, config): region = config['global']['account']['region'] prefix = config['global']['account']['prefix'] send_to_cloudwatch = settings.get('send_to_cloudwatch', False) - enable_s3_events = settings.get('enable_s3_events', True) + s3_settings = settings.get('s3_settings', {}) + enable_s3_events = s3_settings.get('enable_events', False) - s3_bucket_name = settings.get( - 's3_bucket_name', + s3_bucket_name = s3_settings.get( + 'bucket_name', '{}-{}-streamalert-cloudtrail'.format(prefix, cluster_name) ) primary_account_id = config['global']['account']['aws_account_id'] - account_ids = set(settings.get('s3_cross_account_ids', [])) + account_ids = set(s3_settings.get('cross_account_ids', [])) account_ids.add(primary_account_id) account_ids = sorted(account_ids) @@ -72,13 +73,16 @@ def generate_cloudtrail(cluster_name, cluster_dict, config): settings_with_defaults = { 'enable_logging', 'is_global_trail', - 's3_event_selector_type', 'send_to_sns', + 'allow_cross_account_sns', } for value in settings_with_defaults: if value in settings: module_info[value] = settings[value] + if 'event_selector_type' in s3_settings: + module_info['s3_event_selector_type'] = s3_settings.get('event_selector_type') + if send_to_cloudwatch: if not generate_cloudtrail_cloudwatch( cluster_name, @@ -102,6 +106,7 @@ def generate_cloudtrail(cluster_name, cluster_dict, config): cluster_dict['module']['cloudtrail_{}'.format(cluster_name)] = module_info if enable_s3_events: + ignore_digest = s3_settings.get('ignore_digest', True) s3_event_account_ids = account_ids # Omit the primary account ID from the event notifications to avoid duplicative processing if send_to_cloudwatch: @@ -113,7 +118,11 @@ def generate_cloudtrail(cluster_name, cluster_dict, config): bucket_info = { s3_bucket_name: [ { - 'filter_prefix': 'AWSLogs/{}/'.format(account_id) + 'filter_prefix': ( + 'AWSLogs/{}/CloudTrail/'.format(account_id) + if ignore_digest else + 'AWSLogs/{}/'.format(account_id) + ) } for account_id in s3_event_account_ids ] } diff --git a/streamalert_cli/terraform/cloudwatch_events.py b/streamalert_cli/terraform/cloudwatch_events.py index bc1b8a063..2fd258dca 100644 --- a/streamalert_cli/terraform/cloudwatch_events.py +++ b/streamalert_cli/terraform/cloudwatch_events.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ +from collections import defaultdict import json from streamalert.shared.logger import get_logger @@ -71,4 +72,60 @@ def generate_cloudwatch_events(cluster_name, cluster_dict, config): 'event_pattern': json.dumps(event_pattern) if event_pattern is not None else event_pattern } + cross_account_settings = settings.get('cross_account') + if not cross_account_settings: + return True + + region_map = _map_regions(cross_account_settings) + for region, values in region_map.items(): + tf_module_name = 'cloudwatch_events_cross_account_{}_{}'.format(cluster_name, region) + cluster_dict['module'][tf_module_name] = { + 'source': './modules/tf_cloudwatch_events/cross_account', + 'region': region, + 'accounts': sorted(values.get('accounts', [])), + 'organizations': sorted(values.get('organizations', [])), + 'providers': { + # use the aliased provider for this region from providers.tf + 'aws': 'aws.{}'.format(region) + } + } + return True + + +def _map_regions(settings): + """Reverse the mapping of accounts/orgs <> regions to make it nicer for terraform to use + + Args: + settings (dict): Mapping or accounts/orgs to regions + Example: + { + 'accounts': { + '123456789012': ['us-east-1'], + '234567890123': ['us-east-1'] + }, + 'organizations': { + 'o-aabbccddee': ['us-west-1'] + } + } + + Returns: + dict: An inverse mapping of regions <> accounts/orgs + Example: + { + 'us-east-1': { + 'accounts': ['123456789012', '234567890123'], + }, + 'us-west-1': { + 'organizations': ['o-aabbccddee'] + } + } + """ + region_map = defaultdict(dict) + for scope in ['accounts', 'organizations']: + for aws_id, regions in settings.get(scope, {}).items(): + for region in regions: + region_map[region] = region_map.get(region, defaultdict(list)) + region_map[region][scope].append(aws_id) + + return region_map diff --git a/streamalert_cli/terraform/firehose.py b/streamalert_cli/terraform/firehose.py index 95eac27bb..f81310322 100644 --- a/streamalert_cli/terraform/firehose.py +++ b/streamalert_cli/terraform/firehose.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from streamalert.classifier.clients import FirehoseClient +from streamalert.shared.firehose import FirehoseClient from streamalert.shared.config import firehose_data_bucket from streamalert.shared.utils import get_database_name, get_data_file_format from streamalert_cli.athena.helpers import generate_data_table_schema diff --git a/streamalert_cli/terraform/flow_logs.py b/streamalert_cli/terraform/flow_logs.py index 064fc210e..2e1f3e709 100644 --- a/streamalert_cli/terraform/flow_logs.py +++ b/streamalert_cli/terraform/flow_logs.py @@ -35,7 +35,7 @@ def generate_flow_logs(cluster_name, cluster_dict, config): bool: Result of applying the flow_logs module """ modules = config['clusters'][cluster_name]['modules'] - if not modules['flow_logs']['enabled']: + if not modules['flow_logs'].get('enabled', True): LOGGER.debug('Flow logs disabled, nothing to do') return True # not an error diff --git a/streamalert_cli/terraform/generate.py b/streamalert_cli/terraform/generate.py index 1268db9b0..1634c5c45 100644 --- a/streamalert_cli/terraform/generate.py +++ b/streamalert_cli/terraform/generate.py @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. """ -from fnmatch import fnmatch import json import os +import shutil from streamalert.shared.config import ConfigError, firehose_alerts_bucket from streamalert.shared.logger import get_logger @@ -30,6 +30,7 @@ s3_access_logging_bucket, terraform_state_bucket, ) +from streamalert_cli.terraform.artifact_extractor import generate_artifact_extractor from streamalert_cli.terraform.alert_merger import generate_alert_merger from streamalert_cli.terraform.alert_processor import generate_alert_processor from streamalert_cli.terraform.apps import generate_apps @@ -63,7 +64,7 @@ RESTRICTED_CLUSTER_NAMES = ('main', 'athena') TERRAFORM_VERSION = '~> 0.12.9' -TERRAFORM_PROVIDER_VERSION = '~> 2.28.1' +TERRAFORM_PROVIDER_VERSION = '~> 2.48.0' LOGGER = get_logger(__name__) @@ -372,14 +373,6 @@ def generate_cluster(config, cluster_name): return cluster_dict -def cleanup_old_tf_files(): - """ - Cleanup old *.tf.json files - """ - for terraform_file in os.listdir(TERRAFORM_FILES_PATH): - if fnmatch(terraform_file, '*.tf.json'): - os.remove(os.path.join(TERRAFORM_FILES_PATH, terraform_file)) - class TerraformGenerateCommand(CLICommand): description = 'Generate Terraform files from JSON cluster files' @@ -393,6 +386,28 @@ def handler(cls, options, config): return terraform_generate_handler(config, check_creds=False) +def _copy_terraform_files(config): + """Copy all packaged terraform files and terraform files provided by the user to temp + + Args: + config (CLIConfig): Loaded StreamAlert config + """ + # Copy the packaged terraform files to temp + # Currently this ignores *.tf.json, in the instance that these + # exist in current deployments. This can be removed in a future release. + shutil.copytree( + TERRAFORM_FILES_PATH, + config.build_directory, + ignore=shutil.ignore_patterns('*.tf.json') # TODO: remove this eventually + ) + + # Copy any additional user provided terraform files to temp + for item in config.terraform_files: + shutil.copy2(item, config.build_directory) + + LOGGER.info('Copied Terraform configuration to \'%s\'', config.build_directory) + + def terraform_generate_handler(config, init=False, check_tf=True, check_creds=True): """Generate all Terraform plans for the configured clusters. @@ -411,13 +426,13 @@ def terraform_generate_handler(config, init=False, check_tf=True, check_creds=Tr if check_tf and not terraform_check(): return False - cleanup_old_tf_files() + _copy_terraform_files(config) # Setup the main.tf.json file LOGGER.debug('Generating cluster file: main.tf.json') _create_terraform_module_file( generate_main(config, init=init), - os.path.join(TERRAFORM_FILES_PATH, 'main.tf.json') + os.path.join(config.build_directory, 'main.tf.json') ) # Return early during the init process, clusters are not needed yet @@ -440,21 +455,21 @@ def terraform_generate_handler(config, init=False, check_tf=True, check_creds=Tr file_name = '{}.tf.json'.format(cluster) _create_terraform_module_file( cluster_dict, - os.path.join(TERRAFORM_FILES_PATH, file_name), + os.path.join(config.build_directory, file_name), ) metric_filters = generate_aggregate_cloudwatch_metric_filters(config) if metric_filters: _create_terraform_module_file( metric_filters, - os.path.join(TERRAFORM_FILES_PATH, 'metric_filters.tf.json') + os.path.join(config.build_directory, 'metric_filters.tf.json') ) metric_alarms = generate_aggregate_cloudwatch_metric_alarms(config) if metric_alarms: _create_terraform_module_file( metric_alarms, - os.path.join(TERRAFORM_FILES_PATH, 'metric_alarms.tf.json') + os.path.join(config.build_directory, 'metric_alarms.tf.json') ) # Setup Threat Intel Downloader Lambda function if it is enabled @@ -513,6 +528,10 @@ def terraform_generate_handler(config, init=False, check_tf=True, check_creds=Tr # Setup StreamQuery _generate_streamquery_module(config) + # FIXME: make sure test 'python manage.py destroy' artifact_extractor case + # Setup artifact_extractor + _generate_artifact_extractor_module(config) + return True @@ -520,7 +539,7 @@ def _generate_lookup_tables_settings(config): """ Generates .tf.json file for LookupTables """ - tf_file_name = os.path.join(TERRAFORM_FILES_PATH, 'lookup_tables.tf.json') + tf_file_name = os.path.join(config.build_directory, 'lookup_tables.tf.json') if not config['lookup_tables'].get('enabled', False): remove_temp_terraform_file(tf_file_name) @@ -548,6 +567,7 @@ def _generate_lookup_tables_settings(config): '${module.alert_processor_lambda.role_id}', '${module.alert_merger_lambda.role_id}', '${module.rules_engine_lambda.role_id}', + '${module.scheduled_queries.lambda_function_role_id}', } for cluster in config.clusters(): @@ -582,7 +602,7 @@ def _generate_streamquery_module(config): """ Generates .tf.json file for scheduled queries """ - tf_file_name = os.path.join(TERRAFORM_FILES_PATH, 'scheduled_queries.tf.json') + tf_file_name = os.path.join(config.build_directory, 'scheduled_queries.tf.json') if not config.get('scheduled_queries', {}).get('enabled', False): remove_temp_terraform_file(tf_file_name) return @@ -592,6 +612,14 @@ def _generate_streamquery_module(config): tf_file_name ) +def _generate_artifact_extractor_module(config): + tf_file_name = os.path.join(config.build_directory, 'artifact_extractor.tf.json') + if 'artifact_extractor' in config['global']['infrastructure']: + if config['global']['infrastructure']['artifact_extractor'].get('enabled'): + _create_terraform_module_file(generate_artifact_extractor(config), tf_file_name) + return + + remove_temp_terraform_file(tf_file_name) def generate_global_lambda_settings( config, @@ -625,7 +653,7 @@ def generate_global_lambda_settings( ) raise ConfigError(message) - tf_tmp_file = os.path.join(TERRAFORM_FILES_PATH, '{}.tf.json'.format(tf_tmp_file_name)) + tf_tmp_file = os.path.join(config.build_directory, '{}.tf.json'.format(tf_tmp_file_name)) if required and conf_name not in config['lambda']: message = 'Required configuration missing in lambda.json: {}'.format(conf_name) diff --git a/streamalert_cli/terraform/handlers.py b/streamalert_cli/terraform/handlers.py index ba91777a6..c5a15331b 100644 --- a/streamalert_cli/terraform/handlers.py +++ b/streamalert_cli/terraform/handlers.py @@ -16,17 +16,15 @@ from fnmatch import fnmatch import json import os -import shutil from streamalert.shared.config import firehose_alerts_bucket from streamalert.shared.logger import get_logger from streamalert.shared.utils import get_data_file_format from streamalert_cli.athena.handler import create_table, create_log_tables -from streamalert_cli.helpers import check_credentials, continue_prompt, run_command, tf_runner +from streamalert_cli.helpers import check_credentials, continue_prompt, run_command from streamalert_cli.manage_lambda.deploy import deploy -from streamalert_cli.terraform import TERRAFORM_FILES_PATH from streamalert_cli.terraform.generate import terraform_generate_handler -from streamalert_cli.terraform.helpers import terraform_check +from streamalert_cli.terraform.helpers import terraform_check, terraform_runner from streamalert_cli.utils import ( add_clusters_arg, CLICommand, @@ -42,16 +40,7 @@ class TerraformInitCommand(CLICommand): @classmethod def setup_subparser(cls, subparser): - """Add init subparser: manage.py init [options]""" - subparser.add_argument( - '-b', - '--backend', - action='store_true', - help=( - 'Initialize the Terraform backend (S3). ' - 'Useful for refreshing a pre-existing deployment' - ) - ) + """Manage.py init takes no arguments""" @classmethod def handler(cls, options, config): @@ -63,11 +52,6 @@ def handler(cls, options, config): Returns: bool: False if errors occurred, True otherwise """ - - # Stop here if only initializing the backend - if options.backend: - return cls._terraform_init_backend(config) - LOGGER.info('Initializing StreamAlert') # generate init Terraform files @@ -75,7 +59,7 @@ def handler(cls, options, config): return False LOGGER.info('Initializing Terraform') - if not run_command(['terraform', 'init']): + if not run_command(['terraform', 'init'], cwd=config.build_directory): return False # build init infrastructure @@ -95,7 +79,7 @@ def handler(cls, options, config): if config['global']['infrastructure'].get('firehose', {}).get('enabled'): init_targets.append('aws_s3_bucket.streamalert_data') - if not tf_runner(targets=init_targets): + if not terraform_runner(config, targets=init_targets): LOGGER.error('An error occurred while running StreamAlert init') return False @@ -104,7 +88,7 @@ def handler(cls, options, config): if not terraform_generate_handler(config=config, check_tf=False, check_creds=False): return False - if not run_command(['terraform', 'init']): + if not run_command(['terraform', 'init'], cwd=config.build_directory): return False LOGGER.info('Deploying Lambda Functions') @@ -131,29 +115,7 @@ def handler(cls, options, config): return LOGGER.info('Building remaining infrastructure') - return tf_runner(refresh=False) - - @staticmethod - def _terraform_init_backend(config): - """Initialize the infrastructure backend (S3) using Terraform - - Returns: - bool: False if errors occurred, True otherwise - """ - # Check for valid credentials - if not check_credentials(): - return False - - # Verify terraform is installed - if not terraform_check(): - return False - - # See generate_main() for how it uses the `init` kwarg for the local/remote backend - if not terraform_generate_handler(config=config, init=False): - return False - - LOGGER.info('Initializing StreamAlert backend') - return run_command(['terraform', 'init']) + return terraform_runner(config, refresh=False) class TerraformBuildCommand(CLICommand): @@ -199,7 +161,7 @@ def handler(cls, options, config): if not valid: return False - return tf_runner(targets=target_modules if target_modules else None) + return terraform_runner(config, targets=target_modules if target_modules else None) class TerraformDestroyCommand(CLICommand): @@ -249,8 +211,9 @@ def handler(cls, options, config): if not valid: return False - return tf_runner( - action='destroy', + return terraform_runner( + config, + destroy=True, auto_approve=True, targets=target_modules if target_modules else None ) @@ -262,58 +225,11 @@ def handler(cls, options, config): check_creds=False): return False - if not run_command(['terraform', 'init']): + if not run_command(['terraform', 'init'], cwd=config.build_directory): return False # Destroy all of the infrastructure - if not tf_runner(action='destroy', auto_approve=True): - return False - - # Remove old Terraform files - return TerraformCleanCommand.handler(options, config) - - -class TerraformCleanCommand(CLICommand): - description = 'Remove current Terraform files' - - @classmethod - def setup_subparser(cls, subparser): - """Manage.py clean takes no arguments""" - - @classmethod - def handler(cls, options, config): - """Remove leftover Terraform statefiles and main/cluster files - - Args: - config (CLIConfig): Loaded StreamAlert config - - Returns: - bool: False if errors occurred, True otherwise - """ - LOGGER.info('Cleaning Terraform files') - - def _rm_file(path): - if not os.path.isfile(path): - return - print('Removing terraform file: {}'.format(path)) - os.remove(path) - - for root, _, files in os.walk(TERRAFORM_FILES_PATH): - for file_name in files: - path = os.path.join(root, file_name) - if path.endswith('.tf.json'): - _rm_file(path) - - for tf_file in ['terraform.tfstate', 'terraform.tfstate.backup']: - path = os.path.join(TERRAFORM_FILES_PATH, tf_file) - _rm_file(path) - - # Finally, delete the Terraform directory - tf_path = os.path.join(TERRAFORM_FILES_PATH, '.terraform') - if os.path.isdir(tf_path): - shutil.rmtree(tf_path) - - return True + return terraform_runner(config, destroy=True, auto_approve=True) class TerraformListTargetsCommand(CLICommand): @@ -409,7 +325,7 @@ def get_tf_modules(config, generate=False): modules = set() resources = set() - for root, _, files in os.walk(TERRAFORM_FILES_PATH): + for root, _, files in os.walk(config.build_directory): for file_name in files: path = os.path.join(root, file_name) if path.endswith('.tf.json'): diff --git a/streamalert_cli/terraform/helpers.py b/streamalert_cli/terraform/helpers.py index c22752e75..4db2ccff6 100644 --- a/streamalert_cli/terraform/helpers.py +++ b/streamalert_cli/terraform/helpers.py @@ -14,16 +14,74 @@ """ from streamalert.shared.logger import get_logger from streamalert_cli.helpers import run_command +from streamalert_cli.manage_lambda import package LOGGER = get_logger(__name__) +def terraform_runner(config, refresh=True, auto_approve=False, targets=None, destroy=False): + """Terraform wrapper to build StreamAlert infrastructure. + + Resolves modules with `terraform get` before continuing. + + Args: + config (CLIConfig): Loaded StreamAlert config + action (str): Terraform action ('apply' or 'destroy'). + refresh (bool): If True, Terraform will refresh its state before applying the change. + auto_approve (bool): If True, Terraform will *not* prompt the user for approval. + targets (list): Optional list of affected targets. + If not specified, Terraform will run against all of its resources. + + Returns: + bool: True if the terraform command was successful + """ + LOGGER.info('Initializing StreamAlert') + if not run_command(['terraform', 'init'], cwd=config.build_directory): + return False + + LOGGER.debug('Resolving Terraform modules') + if not run_command(['terraform', 'get'], cwd=config.build_directory, quiet=True): + return False + + tf_command = ['terraform'] + + if destroy: + tf_command.append('destroy') + # Terraform destroy has a '-force' flag instead of '-auto-approve' + LOGGER.info('Destroying infrastructure') + tf_command.append('-force={}'.format(str(auto_approve).lower())) + else: + tf_command.append('apply') + LOGGER.info('%s changes', 'Applying' if auto_approve else 'Planning') + tf_command.append('-auto-approve={}'.format(str(auto_approve).lower())) + + tf_command.append('-refresh={}'.format(str(refresh).lower())) + + if targets: + tf_command.extend('-target={}'.format(x) for x in targets) + + # Build the deployment package so the Lambda does not produce an error + # TODO: maybe remove this as packaging improvements progress + deployment_package = package.LambdaPackage(config) + package_path = deployment_package.create() + if not package_path: + return False + + return run_command(tf_command, cwd=config.build_directory) + + def terraform_check(): """Verify that Terraform is configured correctly Returns: - bool: Success or failure of the command ran""" - prereqs_message = ('Terraform not found! Please install and add to ' - 'your $PATH:\n' - '\t$ export PATH=$PATH:/usr/local/terraform/bin') - return run_command(['terraform', 'version'], error_message=prereqs_message, quiet=True) + bool: Success or failure of the command ran + """ + error_message = ( + 'Terraform not found! Please install and add to your $PATH:\n' + '\texport PATH=$PATH:/usr/local/terraform/bin' + ) + return run_command( + ['terraform', 'version'], + error_message=error_message, + quiet=True, + ) diff --git a/streamalert_cli/test/event_file.py b/streamalert_cli/test/event_file.py index cc0564d48..a9475bbc2 100644 --- a/streamalert_cli/test/event_file.py +++ b/streamalert_cli/test/event_file.py @@ -44,6 +44,10 @@ def __str__(self): return '\n'.join(str(item) for item in output) + @property + def path(self): + return self._full_path + @property def should_print(self): return any(not result.suppressed for result in self._results) diff --git a/streamalert_cli/test/handler.py b/streamalert_cli/test/handler.py index 34b9bd95e..545daae18 100644 --- a/streamalert_cli/test/handler.py +++ b/streamalert_cli/test/handler.py @@ -25,6 +25,7 @@ from streamalert.classifier import classifier from streamalert.rules_engine import rules_engine from streamalert.shared import rule +from streamalert.shared.config import ConfigError from streamalert.shared.logger import get_logger from streamalert.shared.stats import RuleStatisticTracker from streamalert_cli.helpers import check_credentials @@ -134,9 +135,8 @@ def _add_default_test_args(test_parser): '-f', '--test-files', dest='files', - metavar='FILENAMES', nargs='+', - help='One or more file to test, separated by spaces', + help='Full path to one or more file(s) to test, separated by spaces', action=UniqueSortedFileListAction, type=argparse.FileType('r'), default=[] @@ -230,7 +230,6 @@ def __init__(self, options, config): self._failed = 0 prefix = self._config['global']['account']['prefix'] env = { - 'CLUSTER': 'prod', 'STREAMALERT_PREFIX': prefix, 'AWS_ACCOUNT_ID': self._config['global']['account']['aws_account_id'], 'ALERTS_TABLE': '{}_streamalert_alerts'.format(prefix), @@ -239,10 +238,7 @@ def __init__(self, options, config): if 'stats' in options and options.stats: env['STREAMALERT_TRACK_RULE_STATS'] = '1' - patch.dict( - os.environ, - env - ).start() + patch.dict(os.environ, env).start() @staticmethod def _run_classification(record): @@ -329,13 +325,43 @@ def _process_test_file(self, test_file_path): # Iterate over the individual test events in the file event_file = TestEventFile(test_file_path) for event in event_file.process_file(self._config, self._verbose, self._testing_rules): - # Set the cluster in the env since this is used from within the - # classifier to load the proper cluster config + # Each test event should be tied to a cluster, via the configured data_sources + # Reset the CLUSTER env var for each test, since it could differ between each event + # This env var is used from within the classifier to load the proper cluster config + if 'CLUSTER' in os.environ: + del os.environ['CLUSTER'] + for cluster_name, cluster_value in self._config['clusters'].items(): - for service in cluster_value['data_sources'].values(): - if event.source in service: - os.environ['CLUSTER'] = cluster_name - break + if event.service not in cluster_value['data_sources']: + LOGGER.debug( + 'Cluster "%s" does not have service "%s" configured as a data source', + cluster_name, + event.service + ) + continue + + sources = set(cluster_value['data_sources'][event.service]) + if event.source not in sources: + LOGGER.debug( + 'Cluster "%s" does not have the source "%s" configured as a data source ' + 'for service "%s"', + cluster_name, + event.source, + event.service + ) + continue + + # If we got here, then this cluster is actually configured for this data source + os.environ['CLUSTER'] = cluster_name + break + + # A misconfigured test event and/or cluster config can cause this to be unset + if 'CLUSTER' not in os.environ: + error = ( + 'Test event\'s "service" ({}) and "source" ({}) are not defined within ' + 'the "data_sources" of any configured clusters: {}:{}' + ).format(event.service, event.source, event_file.path, event.index) + raise ConfigError(error) classifier_result = self._run_classification(event.record) diff --git a/streamalert_cli/test/results.py b/streamalert_cli/test/results.py index bda1ca67a..58f9f8ec3 100644 --- a/streamalert_cli/test/results.py +++ b/streamalert_cli/test/results.py @@ -173,6 +173,10 @@ def __str__(self): __repr__ = __str__ + @property + def index(self): + return self._index + @property def _disabled_rules(self): return sorted(set(self.trigger_rules).intersection( diff --git a/streamalert_cli/utils.py b/streamalert_cli/utils.py index c2dd0e45d..08b4e1b78 100644 --- a/streamalert_cli/utils.py +++ b/streamalert_cli/utils.py @@ -24,7 +24,7 @@ terraform """ from abc import abstractmethod -from argparse import Action, ArgumentTypeError, RawDescriptionHelpFormatter +from argparse import _AppendAction, Action, ArgumentTypeError, RawDescriptionHelpFormatter import os import textwrap from streamalert.apps.config import AWS_RATE_RE, AWS_RATE_HELPER @@ -92,6 +92,18 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, sorted(unique_items)) # We want this to be consistent +class UniqueSortedFileListAppendAction(_AppendAction): + """Subclass of argparse._AppendAction to avoid multiple of the same choice from a list of files + + This is meant to augment the 'append' argparse action + """ + + def __call__(self, parser, namespace, value, option_string=None): + unique_items = set(getattr(namespace, self.dest, set())) + unique_items.add(value.name) + setattr(namespace, self.dest, sorted(unique_items)) # We want this to be consistent + + class MutuallyExclusiveStagingAction(Action): """Subclass of argparse.Action to avoid staging and unstaging the same rules""" diff --git a/tests/scripts/update_reqs.sh b/tests/scripts/update_reqs.sh index b02f2c424..da5cd2283 100755 --- a/tests/scripts/update_reqs.sh +++ b/tests/scripts/update_reqs.sh @@ -1,5 +1,5 @@ #! /bin/bash -pip install -r requirements-top-level.txt --upgrade --force-reinstall +pip install -r requirements-top-level.txt --upgrade --force-reinstall --no-cache-dir pip freeze -r requirements-top-level.txt > requirements.txt echo "Please also update library versions in streamalert_cli/manage_lambda/package.py" diff --git a/tests/unit/conf/global.json b/tests/unit/conf/global.json index cedda3d39..df375c40d 100644 --- a/tests/unit/conf/global.json +++ b/tests/unit/conf/global.json @@ -6,6 +6,7 @@ "region": "us-west-1" }, "general": { + "terraform_files": [], "matcher_locations": [], "rule_locations": [], "scheduled_query_locations": [], diff --git a/tests/unit/helpers/config.py b/tests/unit/helpers/config.py index 9e7bf0362..2538a8e8a 100644 --- a/tests/unit/helpers/config.py +++ b/tests/unit/helpers/config.py @@ -21,6 +21,7 @@ class MockCLIConfig: def __init__(self, config): self.config = config + self.build_directory = 'streamalert_terraform_unit_test' def __repr__(self): return json.dumps(self.config) @@ -50,6 +51,11 @@ def basic_streamalert_config(): 'prefix': 'unit-test', 'region': 'us-west-2' }, + 'general': { + 'terraform_files': [ + '/test/terraform/file.tf' + ] + }, 'infrastructure': { 'monitoring': {}, 's3_access_logging': { diff --git a/tests/unit/streamalert/alert_processor/helpers.py b/tests/unit/streamalert/alert_processor/helpers.py index d8312855d..685c44e12 100644 --- a/tests/unit/streamalert/alert_processor/helpers.py +++ b/tests/unit/streamalert/alert_processor/helpers.py @@ -15,6 +15,8 @@ """ import random +import boto3 + from streamalert.alert_processor.outputs.credentials.provider import LocalFileDriver from streamalert.shared.alert import Alert from streamalert.shared.helpers.aws_api_client import AwsKms, AwsSsm @@ -81,6 +83,15 @@ def remove_temp_secrets(): LocalFileDriver.clear() +def setup_mock_kms(region, alias): + client = boto3.client('kms', region_name=region) + response = client.create_key() + client.create_alias( + AliasName=alias, + TargetKeyId=response['KeyMetadata']['KeyId'] + ) + + def encrypt_with_kms(data, region, alias): """Encrypt the given data with KMS.""" return AwsKms.encrypt(data, region=region, key_alias=alias) diff --git a/tests/unit/streamalert/alert_processor/outputs/credentials/test_provider.py b/tests/unit/streamalert/alert_processor/outputs/credentials/test_provider.py index 8048adf92..48c0c6045 100644 --- a/tests/unit/streamalert/alert_processor/outputs/credentials/test_provider.py +++ b/tests/unit/streamalert/alert_processor/outputs/credentials/test_provider.py @@ -22,12 +22,13 @@ from mock import patch, MagicMock from moto import mock_kms, mock_ssm from nose.tools import ( - assert_true, assert_equal, - assert_is_instance, - assert_is_not_none, assert_false, + assert_is_instance, assert_is_none, + assert_is_not_none, + assert_not_equal, + assert_true, ) from streamalert.alert_processor.outputs.output_base import OutputProperty @@ -45,7 +46,8 @@ MOCK_ENV ) from tests.unit.streamalert.alert_processor.helpers import ( - encrypt_with_kms + encrypt_with_kms, + setup_mock_kms ) @@ -55,12 +57,18 @@ class TestCredentialsEncrypted: - @mock_kms + def setup(self): + self.kms_mocker = mock_kms() + self.kms_mocker.start() + setup_mock_kms(REGION, KMS_ALIAS) self._plaintext_payload = 'plaintext credentials' self._encrypted_payload = encrypt_with_kms(self._plaintext_payload, REGION, KMS_ALIAS) self._credentials = Credentials(self._encrypted_payload, is_encrypted=True, region=REGION) + def teardown(self): + self.kms_mocker.stop() + def test_is_encrypted(self): """Credentials - Encrypted Credentials - Is Encrypted""" assert_true(self._credentials.is_encrypted()) @@ -69,7 +77,6 @@ def test_is_data(self): """Credentials - Encrypted Credentials - Data""" assert_equal(self._credentials.data(), self._encrypted_payload) - @mock_kms def test_get_data_kms_decrypted(self): """Credentials - Encrypted Credentials - KMS Decrypt""" decrypted = self._credentials.get_data_kms_decrypted() @@ -100,10 +107,17 @@ def test_decrypt_kms_error(self, logging_exception, boto3): class TestCredentialsUnencrypted: + def setup(self): + self.kms_mocker = mock_kms() + self.kms_mocker.start() + setup_mock_kms(REGION, KMS_ALIAS) self._plaintext_payload = 'plaintext credentials' self._credentials = Credentials(self._plaintext_payload, is_encrypted=False) + def teardown(self): + self.kms_mocker.stop() + def test_is_encrypted(self): """Credentials - Plaintext Credentials - Is Encrypted""" assert_false(self._credentials.is_encrypted()) @@ -118,7 +132,6 @@ def test_get_data_kms_decrypted(self, logging_error): assert_is_none(self._credentials.get_data_kms_decrypted()) logging_error.assert_called_with('Cannot decrypt Credentials as they are already decrypted') - @mock_kms def test_encrypt(self): """Credentials - Plaintext Credentials - Encrypt @@ -127,7 +140,11 @@ def test_encrypt(self): self._credentials.encrypt(REGION, KMS_ALIAS) assert_true(self._credentials.is_encrypted()) - assert_equal(self._credentials.data(), 'InBsYWludGV4dCBjcmVkZW50aWFscyI='.encode()) + + # moto changed from simply base64 encoding data to actually + # doing proper encryption/decryption. See here: + # https://github.com/earlrob/moto/commit/98581b9196768ad8d5eaa1e02ca744c0c3b2098e + assert_not_equal(self._credentials.data(), 'plaintext credentials') class TestCredentialsEmpty: @@ -135,7 +152,6 @@ def setup(self): self._plaintext_payload = '' self._credentials = Credentials(self._plaintext_payload, is_encrypted=False) - @mock_kms def test_encrypt(self): """Credentials - Empty Credentials - Encrypt - Does nothing when payload is empty""" self._credentials.encrypt(REGION, KMS_ALIAS) @@ -343,11 +359,14 @@ def test_get_formatted_output_credentials_name_no_descriptor(): #pylint: disable class TestLocalFileDriver: def setup(self): + self.kms_mocker = mock_kms() + self.kms_mocker.start() + setup_mock_kms(REGION, KMS_ALIAS) LocalFileDriver.clear() self._fs_driver = LocalFileDriver(REGION, 'service') - @staticmethod - def teardown(): + def teardown(self): + self.kms_mocker.stop() LocalFileDriver.clear() def test_save_and_has_credentials(self): @@ -359,7 +378,6 @@ def test_save_and_has_credentials(self): assert_true(self._fs_driver.has_credentials('descriptor')) - @mock_kms def test_save_and_load_credentials(self): """LocalFileDriver - Save and Load Credentials""" raw_credentials = 'aaaa' @@ -376,9 +394,8 @@ def test_save_and_load_credentials(self): assert_true(loaded_credentials.is_encrypted()) assert_equal(loaded_credentials.get_data_kms_decrypted(), raw_credentials.encode()) - @mock_kms def test_save_and_load_credentials_persists_statically(self): - """LocalFileDriver - Save and Load Credentials""" + """LocalFileDriver - Save and Load Credentials, Static""" raw_credentials = 'aaaa' descriptor = 'descriptor' @@ -428,11 +445,14 @@ def test_clear(self): class TestSpooledTempfileDriver: def setup(self): + self.kms_mocker = mock_kms() + self.kms_mocker.start() + setup_mock_kms(REGION, KMS_ALIAS) SpooledTempfileDriver.clear() self._sp_driver = SpooledTempfileDriver('service', REGION) - @staticmethod - def teardown(): + def teardown(self): + self.kms_mocker.start() SpooledTempfileDriver.clear() def test_save_and_has_credentials(self): @@ -444,7 +464,6 @@ def test_save_and_has_credentials(self): assert_true(self._sp_driver.has_credentials('descriptor')) - @mock_kms def test_save_and_load_credentials(self): """SpooledTempfileDriver - Save and Load Credentials""" raw_credentials = 'aaaa' @@ -460,9 +479,8 @@ def test_save_and_load_credentials(self): assert_true(loaded_credentials.is_encrypted()) assert_equal(loaded_credentials.get_data_kms_decrypted(), raw_credentials.encode()) - @mock_kms def test_save_and_load_credentials_persists_statically(self): - """SpooledTempfileDriver - Save and Load Credentials""" + """SpooledTempfileDriver - Save and Load Credentials, Static""" raw_credentials_dict = { 'python': 'is very difficult', 'someone': 'save meeeee', @@ -521,11 +539,14 @@ def test_clear(self): class TestEphemeralUnencryptedDriver: def setup(self): + self.kms_mocker = mock_kms() + self.kms_mocker.start() + setup_mock_kms(REGION, KMS_ALIAS) EphemeralUnencryptedDriver.clear() self._ep_driver = EphemeralUnencryptedDriver('service') - @staticmethod - def teardown(): + def teardown(self): + self.kms_mocker.stop() EphemeralUnencryptedDriver.clear() def test_save_and_has_credentials(self): @@ -550,7 +571,7 @@ def test_save_and_load_credentials(self): assert_equal(loaded_credentials.data(), 'aaaa') def test_save_and_load_credentials_persists_statically(self): - """EphemeralUnencryptedDriver - Save and Load Credentials""" + """EphemeralUnencryptedDriver - Save and Load Credentials, Static""" descriptor = 'descriptor' credentials = Credentials('aaaa', False) @@ -563,7 +584,6 @@ def test_save_and_load_credentials_persists_statically(self): assert_false(loaded_credentials.is_encrypted()) assert_equal(loaded_credentials.data(), 'aaaa') - @mock_kms def test_save_automatically_decrypts(self): """EphemeralUnencryptedDriver - Save Automatically Decrypts""" raw_credentials_dict = { diff --git a/tests/unit/streamalert/alert_processor/outputs/test_demisto.py b/tests/unit/streamalert/alert_processor/outputs/test_demisto.py index 8ba255f34..1d948a639 100644 --- a/tests/unit/streamalert/alert_processor/outputs/test_demisto.py +++ b/tests/unit/streamalert/alert_processor/outputs/test_demisto.py @@ -122,6 +122,7 @@ def test_dispatch(self, request_mock): 'type': 'Unclassified', 'name': 'cb_binarystore_file_added', 'owner': 'StreamAlert', + 'playbook': 'Unknown', 'severity': 0, 'labels': EXPECTED_LABELS_FOR_SAMPLE_ALERT, 'customFields': {}, diff --git a/tests/unit/streamalert/apps/test_apps/test_aliyun.py b/tests/unit/streamalert/apps/test_apps/test_aliyun.py index 0c68ec316..bc333c521 100644 --- a/tests/unit/streamalert/apps/test_apps/test_aliyun.py +++ b/tests/unit/streamalert/apps/test_apps/test_aliyun.py @@ -18,7 +18,7 @@ from mock import patch from moto import mock_ssm -from nose.tools import assert_count_equal, assert_equal, assert_false, assert_true +from nose.tools import assert_count_equal, assert_equal, assert_false, assert_true, raises from aliyunsdkcore.acs_exception.exceptions import ServerException @@ -66,12 +66,13 @@ def test_region_validator_failure(self): validation_function = self._app.required_auth_info()['region_id']['format'] assert_equal(validation_function('ap-northeast'), False) + @raises(ServerException) @patch('aliyunsdkcore.client.AcsClient.do_action_with_exception') @patch('logging.Logger.exception') def test_server_exception(self, log_mock, client_mock): """AliyunApp - Gather Logs, Exception""" client_mock.side_effect = ServerException("error", "bad server response") - assert_false(self._app._gather_logs()) + self._app._gather_logs() log_mock.assert_called_with("%s error occurred", "Server") def test_gather_logs_last_timestamp_set(self): diff --git a/tests/unit/streamalert/apps/test_apps/test_app_base.py b/tests/unit/streamalert/apps/test_apps/test_app_base.py index 60e3956a7..0cdf5bf99 100644 --- a/tests/unit/streamalert/apps/test_apps/test_app_base.py +++ b/tests/unit/streamalert/apps/test_apps/test_app_base.py @@ -52,12 +52,15 @@ def test_get_all_apps(self): 'gsuite_admin', 'gsuite_calendar', 'gsuite_drive', + 'gsuite_gcp', 'gsuite_gplus', 'gsuite_groups', 'gsuite_groups_enterprise', 'gsuite_login', + 'gsuite_meet', 'gsuite_mobile', 'gsuite_rules', + 'gsuite_saml', 'gsuite_token', 'gsuite_user_accounts', 'intercom_admin_activity_logs', diff --git a/tests/unit/streamalert/classifier/payload/test_payload_s3.py b/tests/unit/streamalert/classifier/payload/test_payload_s3.py index 8081a64f3..ae6534831 100644 --- a/tests/unit/streamalert/classifier/payload/test_payload_s3.py +++ b/tests/unit/streamalert/classifier/payload/test_payload_s3.py @@ -19,6 +19,7 @@ import tempfile import boto3 +from botocore.exceptions import ClientError from mock import patch from moto import mock_s3 @@ -80,10 +81,10 @@ def test_check_size_exception_large(self): self._payload.raw_record['s3']['object']['size'] = 1024 * 1024 * 129 # 129 MB assert_raises(S3PayloadError, self._payload._check_size) - def test_check_size_exception_zero(self): - """S3Payload - Check Size, Zero Raises Exception""" + def test_check_size_zero(self): + """S3Payload - Check Size, Zero""" self._payload.raw_record['s3']['object']['size'] = 0 - assert_raises(S3PayloadError, self._payload._check_size) + assert_equal(self._payload._check_size(), False) def test_gz_reader(self): """S3Payload - GZ Reader""" @@ -168,12 +169,12 @@ def test_read_file(self): assert_equal(read_lines, [(1, value)]) @mock_s3 - @patch('logging.Logger.exception') - def test_read_file_error(self, log_mock): + def test_read_file_error(self): """S3Payload - Read File, Exception""" boto3.resource('s3').Bucket(self._bucket).create() - list(S3Payload(None, self._record)._read_file()) - log_mock.assert_called_with('Failed to download object from S3') + payload = S3Payload(None, self._record) + result = payload._read_file() + assert_raises(ClientError, list, result) def test_pre_parse(self): """S3Payload - Pre Parse""" diff --git a/tests/unit/streamalert/classifier/test_classifier.py b/tests/unit/streamalert/classifier/test_classifier.py index 0d0e1ae10..8de12af96 100644 --- a/tests/unit/streamalert/classifier/test_classifier.py +++ b/tests/unit/streamalert/classifier/test_classifier.py @@ -246,7 +246,7 @@ def test_classify_payload(self, process_mock): ]) ) normalizer_mock.normalize.assert_called_with( - payload_record.parsed_records[-1], 'foo' + payload_record.parsed_records[-1], 'foo:bar' ) assert_equal(self._classifier._payloads, [payload_record]) log_mock.assert_called_with(payload_record, 1) @@ -325,3 +325,30 @@ def test_run_no_payloads(self, classifiy_mock): load_mock.return_value = False self._classifier.run([Mock()]) classifiy_mock.assert_not_called() + + @patch('streamalert.shared.artifact_extractor.ArtifactExtractor.run') + @patch.object(Classifier, '_classify_payload') + def test_run_artifact_extractor_disabled(self, classifiy_mock, artifact_extractor_mock): + """Classifier - Test run method when artifact_extractor disabled""" + with patch.object(classifier_module.StreamPayload, 'load_from_raw_record') as load_mock: + payload = self._mock_payload([self._mock_payload_record()]) + load_mock.return_value = payload + self._classifier.run([Mock()]) + classifiy_mock.assert_called_with(payload) + artifact_extractor_mock.assert_not_called() + + @patch('streamalert.shared.artifact_extractor.ArtifactExtractor.run') + @patch.object(Classifier, '_classify_payload') + def test_run_artifact_extractor_enabled(self, classifiy_mock, artifact_extractor_mock): + """Classifier - Test run method when artifact_extractor enabled""" + Classifier._config['global']['infrastructure']['artifact_extractor'] = { + 'enabled': True, + 'firehose_buffer_size': 128, + 'firehose_buffer_interval': 60 + } + with patch.object(classifier_module.StreamPayload, 'load_from_raw_record') as load_mock: + payload = self._mock_payload([self._mock_payload_record()]) + load_mock.return_value = payload + self._classifier.run([Mock()]) + classifiy_mock.assert_called_with(payload) + artifact_extractor_mock.assert_called_once() diff --git a/tests/unit/streamalert/rules_engine/test_threat_intel.py b/tests/unit/streamalert/rules_engine/test_threat_intel.py index 3318dbcb9..91f4c44ee 100644 --- a/tests/unit/streamalert/rules_engine/test_threat_intel.py +++ b/tests/unit/streamalert/rules_engine/test_threat_intel.py @@ -85,7 +85,7 @@ def _sample_payload(self): 'recipientAccountId': '12345' }, 'source': '1.1.1.2', - 'streamalert:normalization': { + 'streamalert_normalization': { 'sourceAddress': {'1.1.1.2'}, 'userName': {'alice'} } @@ -118,7 +118,7 @@ def test_threat_detection(self): 'recipientAccountId': '12345' }, 'source': '1.1.1.2', - 'streamalert:normalization': { + 'streamalert_normalization': { 'sourceAddress': {'1.1.1.2'}, 'userName': {'alice'} }, @@ -149,7 +149,7 @@ def test_threat_detection_no_iocs(self): 'recipientAccountId': '12345' }, 'source': '1.1.1.2', - 'streamalert:normalization': { + 'streamalert_normalization': { 'sourceAddress': {'1.1.1.2'}, 'userName': {'alice'} } diff --git a/tests/unit/streamalert/scheduled_queries/query_packs/test_manager.py b/tests/unit/streamalert/scheduled_queries/query_packs/test_manager.py index 0a983bd4b..424149b6b 100644 --- a/tests/unit/streamalert/scheduled_queries/query_packs/test_manager.py +++ b/tests/unit/streamalert/scheduled_queries/query_packs/test_manager.py @@ -261,6 +261,16 @@ def test_generate_unsupported(self): 'Parameter generator does not know how to handle "unsupported"' ) + def test_generate_advanced_function(self): + """StreamQuery - QueryParameterGenerator - generate_advanced - Function""" + def thing(date): + return date.strftime('%Y-%m-%d-%H-%I-%S') + assert_equals(self._generator.generate_advanced('thing', thing), '2019-01-01-01-01-01') + + def test_generate_advanced_nothing(self): + """StreamQuery - QueryParameterGenerator - generate_advanced - Nothing""" + assert_equals(self._generator.generate_advanced('utctimestamp', None), '1546304461') + @patch('streamalert.scheduled_queries.query_packs.manager.QueryPacksManager') def test_new_manager(constructor_spy): diff --git a/tests/unit/streamalert/shared/lookup_tables/test_core.py b/tests/unit/streamalert/shared/lookup_tables/test_core.py index 0d34ecb8d..7a5686d8a 100644 --- a/tests/unit/streamalert/shared/lookup_tables/test_core.py +++ b/tests/unit/streamalert/shared/lookup_tables/test_core.py @@ -74,10 +74,6 @@ def _put_mock_data(self): { 'AttributeName': 'MySortKey', 'AttributeType': 'S' - }, - { - 'AttributeName': 'MyValueKey', - 'AttributeType': 'S' } ], 'KeySchema': [ diff --git a/tests/unit/streamalert/shared/lookup_tables/test_driver_dynamodb.py b/tests/unit/streamalert/shared/lookup_tables/test_driver_dynamodb.py index 4b1a9eadf..dc404cad2 100644 --- a/tests/unit/streamalert/shared/lookup_tables/test_driver_dynamodb.py +++ b/tests/unit/streamalert/shared/lookup_tables/test_driver_dynamodb.py @@ -66,10 +66,6 @@ def _put_mock_tables(self): { 'AttributeName': 'MySortKey', 'AttributeType': 'S' - }, - { - 'AttributeName': 'MyValueKey', - 'AttributeType': 'S' } ], 'KeySchema': [ @@ -269,19 +265,7 @@ def _put_mock_tables(self): { 'AttributeName': 'Pkey', 'AttributeType': 'S' - }, - { - 'AttributeName': 'IntegerField', - 'AttributeType': 'N' - }, - { - 'AttributeName': 'StringField', - 'AttributeType': 'S' - }, - { - 'AttributeName': 'DictField', - 'AttributeType': 'M' - }, + } ], 'KeySchema': [ { diff --git a/tests/unit/streamalert/shared/test_artifact_extractor.py b/tests/unit/streamalert/shared/test_artifact_extractor.py new file mode 100644 index 000000000..8f514a469 --- /dev/null +++ b/tests/unit/streamalert/shared/test_artifact_extractor.py @@ -0,0 +1,99 @@ +""" +Copyright 2017-present Airbnb, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from mock import call, patch +from nose.tools import assert_equal + +from streamalert.shared.artifact_extractor import ( + Artifact, + ArtifactExtractor +) +from streamalert.shared.firehose import FirehoseClient +from tests.unit.streamalert.shared.test_utils import ( + generate_artifacts, + generate_categorized_records, + MOCK_RECORD_ID, +) + + +class TestArtifact: + """Test Artifact class""" + + def test_record(self): # pylint: disable=no-self-use + """Artifact - Test record property in the Artifact class""" + artifact = Artifact( + normalized_type='test_normalized_type', + value='test_value', + source_type='test_source_type', + record_id='test_record_id', + function=None + ) + expected_result = { + 'function': 'None', + 'streamalert_record_id': 'test_record_id', + 'source_type': 'test_source_type', + 'type': 'test_normalized_type', + 'value': 'test_value' + } + + assert_equal(artifact.artifact, expected_result) + + +class TestArtifactExtractor: + """Test ArtifactExtractor class """ + # pylint: disable=attribute-defined-outside-init,protected-access,no-self-use + + def setup(self): + """Setup before each method""" + with patch('boto3.client'): + ArtifactExtractor._firehose_client = FirehoseClient(prefix='unit-test') + + self._artifact_extractor = ArtifactExtractor('unit_test_dst_fh_arn') + + def teardown(self): + """Teardown after each method""" + ArtifactExtractor._firehose_client = None + + @patch('streamalert.shared.artifact_extractor.LOGGER') + def test_run_zero_artifact(self, logger_mock): + """ArtifactExtractor - Test run method extract zero artifact""" + self._artifact_extractor.run(generate_categorized_records()) + logger_mock.assert_has_calls([ + call.debug('Extracting artifacts from %d %s logs', 2, 'log_type_01_sub_type_01'), + call.debug('Extracted %d artifact(s)', 0) + ]) + + assert_equal(self._artifact_extractor._artifacts, list()) + + @patch('uuid.uuid4') + @patch.object(FirehoseClient, '_send_batch') + @patch('streamalert.shared.artifact_extractor.LOGGER') + def test_run(self, logger_mock, send_batch_mock, uuid_mock): + """ArtifactExtractor - Test run method extract artifacts""" + uuid_mock.return_value = MOCK_RECORD_ID + self._artifact_extractor.run(generate_categorized_records(normalized=True)) + + logger_mock.assert_has_calls([ + call.debug('Extracting artifacts from %d %s logs', 2, 'log_type_01_sub_type_01'), + call.debug('Extracted %d artifact(s)', 6) + ]) + + send_batch_mock.assert_called_with( + 'unit_test_dst_fh_arn', + generate_artifacts(firehose_records=True), + 'classifier' + ) + + assert_equal(self._artifact_extractor._artifacts, generate_artifacts()) diff --git a/tests/unit/streamalert/shared/test_aws_api_client.py b/tests/unit/streamalert/shared/test_aws_api_client.py index 788b2459a..fe2d5be84 100644 --- a/tests/unit/streamalert/shared/test_aws_api_client.py +++ b/tests/unit/streamalert/shared/test_aws_api_client.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ +import boto3 from botocore.exceptions import ClientError from mock import patch from moto import mock_kms @@ -30,6 +31,13 @@ def test_encrypt_decrypt(): """AwsApiClient - AwsKms - encrypt/decrypt - Encrypt and push creds, then pull them down""" secret = 'shhhhhh'.encode() # nosec + client = boto3.client('kms', region_name=REGION) + response = client.create_key() + client.create_alias( + AliasName=KMS_ALIAS, + TargetKeyId=response['KeyMetadata']['KeyId'] + ) + ciphertext = AwsKms.encrypt(secret, region=REGION, key_alias=KMS_ALIAS) response = AwsKms.decrypt(ciphertext, region=REGION) diff --git a/tests/unit/streamalert/shared/test_config.py b/tests/unit/streamalert/shared/test_config.py index 7df73de53..5b3e4cbc8 100644 --- a/tests/unit/streamalert/shared/test_config.py +++ b/tests/unit/streamalert/shared/test_config.py @@ -19,11 +19,14 @@ from nose.tools import ( assert_equal, assert_count_equal, + assert_false, assert_raises, + assert_true, ) from pyfakefs import fake_filesystem_unittest from streamalert.shared.config import ( + artifact_extractor_enabled, _validate_config, load_config, parse_lambda_arn, @@ -292,3 +295,49 @@ def test_config_duplicate_sources(self): config = basic_streamalert_config() config['clusters']['dev'] = config['clusters']['prod'] assert_raises(ConfigError, _validate_config, config) + + +class TestConfigArtifactExtractor(): + """Shared - Test Artifact Extractor configuration with mocked config files""" + + def __init__(self): + self.default_conf_data = {} + + def setup(self): + self.default_conf_data = { + 'global': { + 'infrastructure': { + 'firehose': { + 'enabled': False, + 'enabled_logs': {} + }, + 'artifact_extractor': { + 'enabled': False + } + } + }, + 'logs': { + 'test_log:type_1': { + 'schema': {}, + 'configuration': { + 'normalization': {} + } + }, + 'test_log:type_2': { + 'schema': {}, + } + } + } + + def test_artifact_extractor_disabled_by_default(self): + """Shared - artifact extractor is disabled with default config""" + assert_false(artifact_extractor_enabled(self.default_conf_data)) + + + def test_artifact_extractor(self): + """Shared - test artifact_extractor_enabled helper""" + self.default_conf_data['global']['infrastructure']['artifact_extractor']['enabled'] = True + assert_false(artifact_extractor_enabled(self.default_conf_data)) + + self.default_conf_data['global']['infrastructure']['firehose']['enabled'] = True + assert_true(artifact_extractor_enabled(self.default_conf_data)) diff --git a/tests/unit/streamalert/classifier/clients/test_firehose.py b/tests/unit/streamalert/shared/test_firehose.py similarity index 90% rename from tests/unit/streamalert/classifier/clients/test_firehose.py rename to tests/unit/streamalert/shared/test_firehose.py index 2b551f799..4816ad041 100644 --- a/tests/unit/streamalert/classifier/clients/test_firehose.py +++ b/tests/unit/streamalert/shared/test_firehose.py @@ -17,7 +17,7 @@ from mock import Mock, patch from nose.tools import assert_equal -from streamalert.classifier.clients.firehose import FirehoseClient +from streamalert.shared.firehose import FirehoseClient class TestFirehoseClient: @@ -97,7 +97,7 @@ def test_record_batches(self): ] ] - result = list(FirehoseClient._record_batches(records)) + result = list(FirehoseClient._record_batches(records, 'test_function_name')) assert_equal(result, expected_result) @patch.object(FirehoseClient, '_log_failed') @@ -107,15 +107,15 @@ def test_record_batches_rec_too_large(self, failure_mock): {'key': 'test' * 1000 * 1000} ] - result = list(FirehoseClient._record_batches(records)) + result = list(FirehoseClient._record_batches(records, 'test_function_name')) assert_equal(result, []) - failure_mock.assert_called_with(1) + failure_mock.assert_called_with(1, 'test_function_name') def test_record_batches_max_batch_count(self): """FirehoseClient - Record Batches, Max Batch Count""" records = self._sample_raw_records(count=501) - result = list(FirehoseClient._record_batches(records)) + result = list(FirehoseClient._record_batches(records, 'test_function_name')) assert_equal(len(result), 2) assert_equal(len(result[0]), 500) assert_equal(len(result[1]), 1) @@ -126,7 +126,7 @@ def test_record_batches_max_batch_size(self): {'key_{}'.format(i): 'test' * 100000} for i in range(10) ] - result = list(FirehoseClient._record_batches(records)) + result = list(FirehoseClient._record_batches(records, 'test_function_name')) assert_equal(len(result), 2) assert_equal(len(result[0]), 9) assert_equal(len(result[1]), 1) @@ -229,8 +229,8 @@ def test_finalize_failures(self, failure_mock): ] } - FirehoseClient._finalize(response, 'stream_name', 3) - failure_mock.assert_called_with(1) + FirehoseClient._finalize(response, 'stream_name', 3, 'test_function_name') + failure_mock.assert_called_with(1, 'test_function_name') @patch('logging.Logger.info') def test_finalize_success(self, log_mock): @@ -244,7 +244,7 @@ def test_finalize_success(self, log_mock): } } - FirehoseClient._finalize(response, stream_name, count) + FirehoseClient._finalize(response, stream_name, count, 'test_function_name') log_mock.assert_called_with( 'Successfully sent %d message(s) to firehose %s with RequestId \'%s\'', count, @@ -280,7 +280,7 @@ def test_send_batch(self): } ] - self._client._send_batch(stream_name, records) + self._client._send_batch(stream_name, records, 'test_function_name') boto_mock.put_record_batch.assert_called_with( DeliveryStreamName=stream_name, @@ -296,7 +296,7 @@ def test_send_batch_error(self, log_mock): error = ClientError({'Error': {'Code': 10}}, 'InvalidRequestException') boto_mock.put_record_batch.side_effect = error - self._client._send_batch(stream_name, ['data']) + self._client._send_batch(stream_name, ['data'], 'test_function_name') log_mock.assert_called_with('Firehose request failed') @@ -412,7 +412,7 @@ def test_send(self, send_batch_mock): ] self._client.send(self._sample_payloads) send_batch_mock.assert_called_with( - 'unit_test_streamalert_log_type_01_sub_type_01', expected_batch + 'unit_test_streamalert_log_type_01_sub_type_01', expected_batch, 'classifier' ) @patch.object(FirehoseClient, '_send_batch') @@ -434,7 +434,7 @@ def test_send_no_prefixing(self, send_batch_mock): client.send(self._sample_payloads) send_batch_mock.assert_called_with( - 'streamalert_log_type_01_sub_type_01', expected_batch + 'streamalert_log_type_01_sub_type_01', expected_batch, 'classifier' ) @property @@ -476,7 +476,9 @@ def test_send_long_log_name(self, send_batch_mock): client.send(self._sample_payloads_long_log_name) send_batch_mock.assert_called_with( - 'streamalert_very_very_very_long_log_stream_name_abcdefg_7c88167b', expected_batch + 'streamalert_very_very_very_long_log_stream_name_abcdefg_7c88167b', + expected_batch, + 'classifier' ) def test_generate_firehose_name(self): @@ -522,3 +524,30 @@ def test_generate_firehose_name_prefix(self): ] assert_equal(expected_results, results) + + def test_artifacts_firehose_stream_name(self): + """FirehoseClient - Test generate artifacts firehose stream name""" + config_data = { + 'global': { + 'account': { + 'prefix': 'unittest' + } + }, + 'lambda': { + 'artifact_extractor_config': {} + } + } + + assert_equal( + self._client.artifacts_firehose_stream_name(config_data), + 'unittest_streamalert_artifacts' + ) + + config_data['lambda']['artifact_extractor_config']['firehose_stream_name'] = ( + 'test_artifacts_fh_name' + ) + + assert_equal( + self._client.artifacts_firehose_stream_name(config_data), + 'test_artifacts_fh_name' + ) diff --git a/tests/unit/streamalert/shared/test_normalizer.py b/tests/unit/streamalert/shared/test_normalizer.py index ba78c1875..5344d5115 100644 --- a/tests/unit/streamalert/shared/test_normalizer.py +++ b/tests/unit/streamalert/shared/test_normalizer.py @@ -13,10 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. """ -from mock import patch -from nose.tools import assert_equal +from mock import Mock, patch +from nose.tools import assert_equal, assert_false, assert_raises, assert_true -from streamalert.shared.normalize import Normalizer +from streamalert.shared.exceptions import ConfigError +from streamalert.shared.normalize import Normalizer, NormalizedType +from tests.unit.streamalert.shared.test_utils import MOCK_RECORD_ID class TestNormalizer: @@ -42,70 +44,162 @@ def _test_record(cls): 'sourceIPAddress': '1.1.1.3' } + @classmethod + def _normalized_type_ip(cls): + return NormalizedType( + 'test_log_type', + 'ip_address', + [ + { + 'path': ['sourceIPAddress'], + 'function': 'source ip address' + }, + { + 'path': ['detail', 'source'], + 'function': 'source ip address' + } + ] + ) + + @classmethod + def _normalized_type_region(cls): + return NormalizedType( + 'test_log_type', + 'region', + [ + { + 'path': ['region'], + 'function': 'AWS region' + }, + { + 'path': ['detail', 'awsRegion'], + 'function': 'AWS region' + } + ] + ) + + @classmethod + def _normalized_type_account(cls): + return NormalizedType('test_log_type', 'account', ['account']) + + @classmethod + def _normalized_type_user_identity(cls): + return NormalizedType( + 'test_log_type', + 'user_identity', + [ + { + 'path': ['detail', 'userIdentity', 'userName'], + 'function': 'User name' + }, + { + 'path': ['detail', 'userIdentity', 'invokedBy'], + 'function': 'Service name' + } + ] + ) + + @patch('uuid.uuid4', Mock(return_value=MOCK_RECORD_ID)) def test_match_types(self): """Normalizer - Match Types""" normalized_types = { - 'region': ['region', 'awsRegion'], - 'sourceAccount': ['account', 'accountId'], - 'ipv4': ['destination', 'source', 'sourceIPAddress'] + 'region': self._normalized_type_region(), + 'account': self._normalized_type_account(), + 'ipv4': self._normalized_type_ip() } expected_results = { - 'sourceAccount': [123456], - 'ipv4': ['1.1.1.2', '1.1.1.3'], - 'region': ['region_name'] + 'streamalert_record_id': MOCK_RECORD_ID, + 'account': [ + { + 'values': ['123456'], + 'function': None + } + ], + 'ipv4': [ + { + 'values': ['1.1.1.3'], + 'function': 'source ip address' + }, + { + 'values': ['1.1.1.2'], + 'function': 'source ip address' + } + ], + 'region': [ + { + 'values': ['region_name'], + 'function': 'AWS region' + }, + { + 'values': ['region_name'], + 'function': 'AWS region' + } + ] } results = Normalizer.match_types(self._test_record(), normalized_types) assert_equal(results, expected_results) + @patch('uuid.uuid4', Mock(return_value=MOCK_RECORD_ID)) def test_match_types_multiple(self): """Normalizer - Match Types, Mutiple Sub-keys""" normalized_types = { - 'account': ['account'], - 'region': ['region', 'awsRegion'], - 'ipv4': ['destination', 'source', 'sourceIPAddress'], - 'userName': ['userName', 'owner', 'invokedBy'] + 'account': self._normalized_type_account(), + 'ipv4': self._normalized_type_ip(), + 'region': self._normalized_type_region(), + 'user_identity': self._normalized_type_user_identity() } expected_results = { - 'account': [123456], - 'ipv4': ['1.1.1.2', '1.1.1.3'], - 'region': ['region_name'], - 'userName': ['Alice', 'signin.amazonaws.com'] + 'streamalert_record_id': MOCK_RECORD_ID, + 'account': [ + { + 'values': ['123456'], + 'function': None + } + ], + 'ipv4': [ + { + 'values': ['1.1.1.3'], + 'function': 'source ip address' + }, + { + 'values': ['1.1.1.2'], + 'function': 'source ip address' + } + ], + 'region': [ + { + 'values': ['region_name'], + 'function': 'AWS region' + }, + { + 'values': ['region_name'], + 'function': 'AWS region' + } + ], + 'user_identity': [ + { + 'values': ['Alice'], + 'function': 'User name' + }, + { + 'values': ['signin.amazonaws.com'], + 'function': 'Service name' + } + ] } results = Normalizer.match_types(self._test_record(), normalized_types) assert_equal(results, expected_results) - def test_match_types_list(self): - """Normalizer - Match Types, List of Values""" - normalized_types = { - 'ipv4': ['sourceIPAddress'], - } - expected_results = { - 'ipv4': ['1.1.1.2', '1.1.1.3'] - } - - test_record = { - 'account': 123456, - 'sourceIPAddress': ['1.1.1.2', '1.1.1.3'] - } - - results = Normalizer.match_types(test_record, normalized_types) - assert_equal(results, expected_results) - + @patch('uuid.uuid4', Mock(return_value=MOCK_RECORD_ID)) def test_normalize(self): """Normalizer - Normalize""" log_type = 'cloudtrail' Normalizer._types_config = { log_type: { - 'region': { - 'region', - 'awsRegion' - }, - 'sourceAccount': { - 'account', - 'accountId' - } + 'region': self._normalized_type_region(), + 'ipv4': self._normalized_type_ip() } } record = self._test_record() @@ -123,27 +217,45 @@ def test_normalize(self): } }, 'sourceIPAddress': '1.1.1.3', - 'streamalert:normalization': { - 'region': ['region_name'], - 'sourceAccount': [123456] + 'streamalert_normalization': { + 'streamalert_record_id': MOCK_RECORD_ID, + 'region': [ + { + 'values': ['region_name'], + 'function': 'AWS region' + }, + { + 'values': ['region_name'], + 'function': 'AWS region' + } + ], + 'ipv4': [ + { + 'values': ['1.1.1.3'], + 'function': 'source ip address' + }, + { + 'values': ['1.1.1.2'], + 'function': 'source ip address' + } + ] } } assert_equal(record, expected_record) + @patch('uuid.uuid4', Mock(return_value=MOCK_RECORD_ID)) def test_normalize_corner_case(self): """Normalizer - Normalize - Corner Case""" log_type = 'cloudtrail' Normalizer._types_config = { log_type: { - 'normalized_key': { + 'normalized_key': NormalizedType( + log_type, 'normalized_key', - 'original_key' - }, - 'sourceAccount': { - 'account', - 'accountId' - } + ['original_key', 'original_key'] + ), + 'account': self._normalized_type_account() } } record = { @@ -159,8 +271,14 @@ def test_normalize_corner_case(self): 'original_key': { 'original_key': 'fizzbuzz', }, - 'streamalert:normalization': { - 'normalized_key': ['fizzbuzz'] + 'streamalert_normalization': { + 'streamalert_record_id': MOCK_RECORD_ID, + 'normalized_key': [ + { + 'values': ['fizzbuzz'], + 'function': None + } + ] } } @@ -174,6 +292,7 @@ def test_normalize_none_defined(self, log_mock): Normalizer.normalize(self._test_record(), log_type) log_mock.assert_called_with('No normalized types defined for log type: %s', log_type) + @patch('uuid.uuid4', Mock(return_value=MOCK_RECORD_ID)) def test_key_does_not_exist(self): """Normalizer - Normalize, Key Does Not Exist""" test_record = { @@ -182,19 +301,31 @@ def test_key_does_not_exist(self): } normalized_types = { - 'region': ['region', 'awsRegion'], - 'sourceAccount': ['account', 'accountId'], + 'region': self._normalized_type_region(), + 'account': NormalizedType('test_log_type', 'account', ['accountId']), # There is no IP value in record, so normalization should not include this - 'ipv4': ['sourceIPAddress'] + 'ipv4': self._normalized_type_ip() } expected_results = { - 'sourceAccount': [123456], - 'region': ['region_name'] + 'streamalert_record_id': MOCK_RECORD_ID, + 'account': [ + { + 'values': ['123456'], + 'function': None + } + ], + 'region': [ + { + 'values': ['region_name'], + 'function': 'AWS region' + } + ] } results = Normalizer.match_types(test_record, normalized_types) assert_equal(results, expected_results) + @patch('uuid.uuid4', Mock(return_value=MOCK_RECORD_ID)) def test_empty_value(self): """Normalizer - Normalize, Empty Value""" test_record = { @@ -203,12 +334,18 @@ def test_empty_value(self): } normalized_types = { - 'region': ['region', 'awsRegion'], - 'sourceAccount': ['account', 'accountId'], - 'ipv4': ['sourceIPAddress'] + 'region': self._normalized_type_region(), + 'account': self._normalized_type_account(), + 'ipv4': self._normalized_type_ip() } expected_results = { - 'sourceAccount': [123456] + 'streamalert_record_id': MOCK_RECORD_ID, + 'account': [ + { + 'values': ['123456'], + 'function': None + } + ] } results = Normalizer.match_types(test_record, normalized_types) @@ -219,8 +356,13 @@ def test_get_values_for_normalized_type(self): expected_result = {'1.1.1.3'} record = { 'sourceIPAddress': '1.1.1.3', - 'streamalert:normalization': { - 'ip_v4': expected_result, + 'streamalert_normalization': { + 'ip_v4': [ + { + 'values': expected_result, + 'function': None + } + ], } } @@ -230,38 +372,38 @@ def test_get_values_for_normalized_type_none(self): """Normalizer - Get Values for Normalized Type, None""" record = { 'sourceIPAddress': '1.1.1.3', - 'streamalert:normalization': {} + 'streamalert_normalization': {} } assert_equal(Normalizer.get_values_for_normalized_type(record, 'ip_v4'), set()) + def test_load_from_config_exist_types_config(self): + """Normalizer - Load normalized_types from conf when it was loaded previously""" + Normalizer._types_config = {'normalized_type1': {}} + assert_equal(Normalizer.load_from_config({'foo': 'bar'}), Normalizer) + def test_load_from_config(self): """Normalizer - Load From Config""" config = { - 'normalized_types': { + 'logs': { 'cloudtrail': { - 'region': [ - 'region', - 'awsRegion' - ], - 'sourceAccount': [ - 'account', - 'accountId' - ] + 'schema': {}, + 'configuration': { + 'normalization': { + 'region': ['path', 'to', 'awsRegion'], + 'sourceAccount': ['path', 'to', 'accountId'] + } + } } } } normalizer = Normalizer.load_from_config(config) expected_config = { 'cloudtrail': { - 'region': [ - 'region', - 'awsRegion' - ], - 'sourceAccount': [ - 'account', - 'accountId' - ] + 'region': NormalizedType('cloudtrail', 'region', ['path', 'to', 'awsRegion']), + 'sourceAccount': NormalizedType( + 'cloudtrail', 'sourceAccount', ['path', 'to', 'accountId'] + ) } } assert_equal(normalizer, Normalizer) @@ -272,3 +414,378 @@ def test_load_from_config_empty(self): normalizer = Normalizer.load_from_config({}) assert_equal(normalizer, Normalizer) assert_equal(normalizer._types_config, None) + + def test_load_from_config_from_log_conf(self): + """Normalizer - Load normalization config from "logs" field in the config""" + config = { + 'logs': { + 'cloudwatch:events': { + 'schema': { + 'account': 'string', + 'source': 'string', + 'key': 'string' + }, + 'parser': 'json', + 'configuration': { + 'normalization': { + 'event_name': ['detail', 'eventName'], + 'region': [ + { + 'path': ['region'], + 'function': 'aws region information' + }, + { + 'path': ['detail', 'awsRegion'], + 'function': 'aws region information' + } + ], + 'ip_address': [ + { + 'path': ['detail', 'sourceIPAddress'], + 'function': 'source ip address' + } + ] + } + } + } + } + } + + expected_config = { + 'cloudwatch:events': { + 'event_name': NormalizedType( + 'cloudwatch:events', 'event_name', ['detail', 'eventName'] + ), + 'region': NormalizedType( + 'cloudwatch:events', + 'region', + [ + { + 'path': ['region'], + 'function': 'aws region information' + }, + { + 'path': ['detail', 'awsRegion'], + 'function': 'aws region information' + } + ] + ), + 'ip_address': NormalizedType( + 'cloudwatch:events', + 'ip_address', + [ + { + 'path': ['detail', 'sourceIPAddress'], + 'function': 'source ip address' + } + ] + ) + } + } + + normalizer = Normalizer.load_from_config(config) + assert_equal(normalizer, Normalizer) + assert_equal(normalizer._types_config, expected_config) + + def test_load_from_config_deprecate_normalized_types(self): + """Normalizer - Load normalization config and deprecate conf/normalized_types.json + """ + config = { + 'logs': { + 'cloudwatch:events': { + 'schema': { + 'account': 'string', + 'source': 'string', + 'key': 'string' + }, + 'parser': 'json', + 'configuration': { + 'normalization': { + 'ip_address': [ + { + 'path': ['path', 'to', 'sourceIPAddress'], + 'function': 'source ip address' + } + ] + } + } + }, + 'other_log_type': {} + }, + 'normalized_types': { + 'cloudwatch': { + 'region': ['region', 'awsRegion'], + 'sourceAccount': ['account', 'accountId'] + } + } + } + expected_config = { + 'cloudwatch:events': { + 'ip_address': NormalizedType( + 'cloudwatch:events', + 'ip_address', + [ + { + 'path': ['path', 'to', 'sourceIPAddress'], + 'function': 'source ip address' + } + ] + ) + } + } + + normalizer = Normalizer.load_from_config(config) + assert_equal(normalizer, Normalizer) + assert_equal(normalizer._types_config, expected_config) + + def test_load_from_config_error(self): + """Normalizer - Load normalization config raises ConfigError + """ + config = { + 'logs': { + 'cloudwatch:events': { + 'schema': { + 'account': 'string', + 'source': 'string', + 'key': 'string' + }, + 'parser': 'json', + 'configuration': { + 'normalization': { + 'foo': 'bar' + } + } + } + } + } + assert_raises(ConfigError, Normalizer.load_from_config, config) + + config = { + 'logs': { + 'cloudwatch:events': { + 'schema': { + 'account': 'string', + 'source': 'string', + 'key': 'string' + }, + 'parser': 'json', + 'configuration': { + 'normalization': { + 'ip_address':{ + 'path': ['detail', 'sourceIPAddress'], + 'function': 'source ip address' + } + } + } + }, + 'other_log_type': {} + } + } + assert_raises(ConfigError, Normalizer.load_from_config, config) + + @patch('uuid.uuid4', Mock(return_value=MOCK_RECORD_ID)) + def test_load_from_config_with_flag(self): + """Normalizer - Load From Config with send_to_artifacts flag""" + config = { + 'logs': { + 'cloudwatch:flow_logs': { + 'schema': { + 'source': 'string', + 'destination': 'string', + 'destport': 'string' + }, + 'configuration': { + 'normalization': { + 'ip_address': [ + { + 'path': ['destination'], + 'function': 'Destination IP addresses' + } + ], + 'port': [ + { + 'path': ['destport'], + 'function': 'Destination port number', + 'send_to_artifacts': False + } + ] + } + } + } + } + } + normalizer = Normalizer.load_from_config(config) + + record = { + 'source': '1.1.1.2', + 'destination': '2.2.2.2', + 'destport': '54321' + } + + normalizer.normalize(record, 'cloudwatch:flow_logs') + + expect_result = { + 'source': '1.1.1.2', + 'destination': '2.2.2.2', + 'destport': '54321', + 'streamalert_normalization': { + 'streamalert_record_id': MOCK_RECORD_ID, + 'ip_address': [ + { + 'values': ['2.2.2.2'], + 'function': 'Destination IP addresses' + } + ], + 'port': [ + { + 'values': ['54321'], + 'function': 'Destination port number', + 'send_to_artifacts': False + } + ] + } + } + + assert_equal(record, expect_result) + + @patch('uuid.uuid4', Mock(return_value=MOCK_RECORD_ID)) + def test_normalize_condition(self): + """Normalizer - Test normalization when condition applied""" + log_type = 'cloudtrail' + + region = NormalizedType( + 'test_log_type', + 'region', + [ + { + 'path': ['region'], + 'function': 'AWS region' + }, + { + 'path': ['detail', 'awsRegion'], + 'function': 'AWS region', + 'condition': { + 'path': ['detail', 'userIdentity', 'userName'], + 'not_in': ['alice', 'bob'] + } + } + ] + ) + + ipv4 = NormalizedType( + 'test_log_type', + 'ip_address', + [ + { + 'path': ['sourceIPAddress'], + 'function': 'source ip address', + 'condition': { + 'path': ['account'], + 'is': '123456' + } + }, + { + 'path': ['detail', 'source'], + 'function': 'source ip address', + 'condition': { + 'path': ['account'], + 'is_not': '123456' + } + } + ] + ) + + Normalizer._types_config = { + log_type: { + 'region': region, + 'ipv4': ipv4 + } + } + record = self._test_record() + Normalizer.normalize(record, log_type) + + expected_record = { + 'account': 123456, + 'region': 'region_name', + 'detail': { + 'awsRegion': 'region_name', + 'source': '1.1.1.2', + 'userIdentity': { + "userName": "Alice", + "invokedBy": "signin.amazonaws.com" + } + }, + 'sourceIPAddress': '1.1.1.3', + 'streamalert_normalization': { + 'streamalert_record_id': MOCK_RECORD_ID, + 'region': [ + { + 'values': ['region_name'], + 'function': 'AWS region' + } + ], + 'ipv4': [ + { + 'values': ['1.1.1.3'], + 'function': 'source ip address' + } + ] + } + } + assert_equal(record, expected_record) + + def test_match_condition(self): + """Normalizer - Test match condition with different conditions""" + record = self._test_record() + + condition = { + 'path': ['account'], + 'is': '123456' + } + assert_true(Normalizer._match_condition(record, condition)) + + condition = { + 'path': ['account'], + 'is_not': '123456' + } + assert_false(Normalizer._match_condition(record, condition)) + + condition = { + 'path': ['detail', 'awsRegion'], + 'contains': 'region' + } + assert_true(Normalizer._match_condition(record, condition)) + + condition = { + 'path': ['detail', 'awsRegion'], + 'contains': 'not_region' + } + assert_false(Normalizer._match_condition(record, condition)) + + condition = { + 'path': ['detail', 'userIdentity', 'userName'], + 'not_contains': 'alice' + } + assert_false(Normalizer._match_condition(record, condition)) + + condition = { + 'path': ['sourceIPAddress'], + 'in': ['1.1.1.2', '1.1.1.3'] + } + assert_true(Normalizer._match_condition(record, condition)) + + condition = { + 'path': ['sourceIPAddress'], + 'not_in': ['1.1.1.2', '1.1.1.3'] + } + assert_false(Normalizer._match_condition(record, condition)) + + # Only support extract one condition. The result is not quaranteed if multiple conditions + # configured. In this test case, it is because 'not_in' condition is checked before + # 'contains' + condition = { + 'path': ['detail', 'userIdentity', 'invokedBy'], + 'contains': 'amazonaws.com', + 'not_in': ['signin.amazonaws.com', 's3.amazonaws.com'] + } + assert_false(Normalizer._match_condition(record, condition)) diff --git a/tests/unit/streamalert/shared/test_utils.py b/tests/unit/streamalert/shared/test_utils.py index 62138144b..09cd689c7 100644 --- a/tests/unit/streamalert/shared/test_utils.py +++ b/tests/unit/streamalert/shared/test_utils.py @@ -1,8 +1,12 @@ """Tests for streamalert/shared/utils.py""" +import json + from nose.tools import assert_equal, assert_false from streamalert.shared import utils +from streamalert.shared.normalize import Normalizer +MOCK_RECORD_ID = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa' def test_valid_ip(): """Utils - Valid IP""" @@ -87,3 +91,66 @@ def test_get_keys(): assert_equal({'ABC', 'DEF', 'GHI'}, set(utils.get_keys(data, 'path'))) assert_equal(2, len(utils.get_keys(data, 'path', max_matches=2))) assert_equal([], utils.get_keys({}, 'path')) + +def generate_categorized_records(normalized=False, count=2): + """Generate categorized records by source types""" + json_data = [ + {'key_{}'.format(cnt): 'value_{}'.format(cnt)} for cnt in range(count) + ] + + if normalized: + for data in json_data: + data[Normalizer.NORMALIZATION_KEY] = { + 'normalized_type1': [ + { + 'values': ['value1'], + 'function': None + } + ], + 'normalized_type2': [ + { + 'values': ['value2', 'value3'], + 'function': None, + 'send_to_artifacts': True + } + ], + 'normalized_type3': [ + { + 'values': ['value2', 'value3'], + 'function': None, + 'send_to_artifacts': False + } + ] + } + + return { + 'log_type_01_sub_type_01': json_data + } + +def generate_artifacts(firehose_records=False): + """Generate sample artifacts for unit tests""" + + normalized_values = [ + ('normalized_type1', 'value1'), + ('normalized_type2', 'value2'), + ('normalized_type2', 'value3'), + ('normalized_type1', 'value1'), + ('normalized_type2', 'value2'), + ('normalized_type2', 'value3') + ] + artifacts = [ + { + 'function': 'None', + 'streamalert_record_id': MOCK_RECORD_ID, + 'source_type': 'log_type_01_sub_type_01', + 'type': type, + 'value': value + } for type, value in normalized_values + ] + + if firehose_records: + return [ + json.dumps(artifact, separators=(',', ':')) + '\n' for artifact in artifacts + ] + + return artifacts diff --git a/tests/unit/streamalert_cli/athena/test_handler.py b/tests/unit/streamalert_cli/athena/test_handler.py index 3d09d72ed..0354f8778 100644 --- a/tests/unit/streamalert_cli/athena/test_handler.py +++ b/tests/unit/streamalert_cli/athena/test_handler.py @@ -17,7 +17,7 @@ from mock import Mock, patch from nose.tools import assert_equal, assert_true -from streamalert.classifier.clients import FirehoseClient +from streamalert.shared.firehose import FirehoseClient from streamalert_cli.athena import handler from streamalert_cli.config import CLIConfig diff --git a/tests/unit/streamalert_cli/athena/test_helpers.py b/tests/unit/streamalert_cli/athena/test_helpers.py index 61294b4f8..8d55e03a2 100644 --- a/tests/unit/streamalert_cli/athena/test_helpers.py +++ b/tests/unit/streamalert_cli/athena/test_helpers.py @@ -18,7 +18,7 @@ from streamalert_cli.athena import helpers from streamalert_cli.config import CLIConfig -from streamalert.classifier.clients import FirehoseClient +from streamalert.shared.firehose import FirehoseClient CONFIG = CLIConfig(config_path='tests/unit/conf') @@ -150,3 +150,17 @@ def test_generate_data_table_schema_2(): assert_true(helpers.generate_data_table_schema(config, 'cloudwatch:test_match_types')) FirehoseClient._ENABLED_LOGS.clear() + +def test_generate_artifact_table_schema(): + """CLI - Athena test generate_artifact_table_schema helper""" + result = helpers.generate_artifacts_table_schema() + + expected_result = [ + ('function', 'string'), + ('source_type', 'string'), + ('streamalert_record_id', 'string'), + ('type', 'string'), + ('value', 'string') + ] + + assert_equal(result, expected_result) diff --git a/tests/unit/streamalert_cli/manage_lambda/test_package.py b/tests/unit/streamalert_cli/manage_lambda/test_package.py new file mode 100644 index 000000000..d1d1bf2b3 --- /dev/null +++ b/tests/unit/streamalert_cli/manage_lambda/test_package.py @@ -0,0 +1,47 @@ +""" +Copyright 2017-present Airbnb, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +# pylint: disable=no-self-use,protected-access +import os + +from mock import patch +from pyfakefs import fake_filesystem_unittest + +from streamalert_cli.config import CLIConfig +from streamalert_cli.manage_lambda import package + + +class PackageTest(fake_filesystem_unittest.TestCase): + """Test the packaging logic for the Lambda package""" + TEST_CONFIG_PATH = 'tests/unit/conf' + MOCK_TEMP_PATH = '/tmp/test_packaging' + + def setUp(self): + self.setUpPyfakefs() + self.fs.add_real_directory(self.TEST_CONFIG_PATH) + + config = CLIConfig(self.TEST_CONFIG_PATH) + + with patch('tempfile.gettempdir') as temp_dir_mock: + temp_dir_mock.return_value = self.MOCK_TEMP_PATH + self.packager = package.LambdaPackage(config) + + def test_copy_directory_destination(self): + """CLI - LambdaPackage copy directory using destination""" + self.packager._copy_directory(self.TEST_CONFIG_PATH, destination='conf_test') + + # Ensure the specified destination exists and not the default + self.assertTrue(os.path.exists(self.MOCK_TEMP_PATH + '/streamalert/conf_test')) + self.assertFalse(os.path.exists(self.MOCK_TEMP_PATH + '/streamalert/conf')) diff --git a/tests/unit/streamalert_cli/terraform/test_artifact_extractor.py b/tests/unit/streamalert_cli/terraform/test_artifact_extractor.py new file mode 100644 index 000000000..2c19f7f1e --- /dev/null +++ b/tests/unit/streamalert_cli/terraform/test_artifact_extractor.py @@ -0,0 +1,86 @@ +""" +Copyright 2017-present Airbnb, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import json + +from nose.tools import assert_equal, assert_is_none + +from streamalert_cli.config import CLIConfig +from streamalert_cli.terraform import artifact_extractor + +class TestTerraformArtifactExtractor: + """Test class for test generating Artifact Extractor terrform modules""" + + def __init__(self): + """Init config for the test cases""" + self.config = CLIConfig(config_path='tests/unit/conf') + + def test_generate_artifact_extractor(self): + """CLI - Terraform generate artifact extractor""" + result = artifact_extractor.generate_artifact_extractor(self.config) + assert_is_none(result) + + self.config['global']['infrastructure']['artifact_extractor'] = { + 'enabled': True, + 'firehose_buffer_size': 128, + 'firehose_buffer_interval': 900 + } + + self.config['global']['infrastructure']['firehose']['enabled_logs'] = { + 'unit_test:type_1', + 'unit_test:type_2' + } + + self.config['logs']['unit_test:type_1'] = { + 'schema': {}, + 'configuration': { + 'normalization': { + 'normalized_type': ['original_key1', 'original_key2'] + } + } + } + self.config['logs']['unit_test:type_2'] = { + 'schema': {} + } + + result = artifact_extractor.generate_artifact_extractor(self.config) + expected_result = { + 'module': { + 'artifact_extractor': { + 'source': './modules/tf_artifact_extractor', + 'account_id': '12345678910', + 'prefix': 'unit-test', + 'region': 'us-west-1', + 'glue_catalog_db_name': 'unit-test_streamalert', + 'glue_catalog_table_name': 'artifacts', + 's3_bucket_name': 'unit-test-streamalert-data', + 'stream_name': 'unit_test_streamalert_artifacts', + 'buffer_size': 128, + 'buffer_interval': 900, + 'kms_key_arn': '${aws_kms_key.server_side_encryption.arn}', + 'schema': [ + ['function', 'string'], + ['source_type', 'string'], + ['streamalert_record_id', 'string'], + ['type', 'string'], + ['value', 'string'] + ] + } + } + } + + # FIMME: not sure why assert_equal between result (defaultdict) and expected_result (dict) + # fails. + assert_equal(json.dumps(result), json.dumps(expected_result)) diff --git a/tests/unit/streamalert_cli/terraform/test_generate.py b/tests/unit/streamalert_cli/terraform/test_generate.py index da6c07a1f..23809e97c 100644 --- a/tests/unit/streamalert_cli/terraform/test_generate.py +++ b/tests/unit/streamalert_cli/terraform/test_generate.py @@ -93,7 +93,7 @@ def test_generate_main(self): tf_main_expected = { 'provider': { 'aws': { - 'version': '~> 2.28.1', # Changes to this should require unit test update + 'version': '~> 2.48.0', # Changes to this should require unit test update 'region': 'us-west-1' } }, @@ -358,9 +358,11 @@ def test_generate_cloudtrail_minimal(self): """CLI - Terraform Generate CloudTrail Module, Minimal Settings""" cluster_name = 'advanced' self.config['clusters']['advanced']['modules']['cloudtrail'] = { + 's3_settings': { + 'cross_account_ids': ['456789012345'], + 'enable_events': False, + }, 'send_to_cloudwatch': False, - 'enable_s3_events': False, - 's3_cross_account_ids': ['456789012345'], } cloudtrail.generate_cloudtrail( cluster_name, @@ -387,10 +389,12 @@ def test_generate_cloudtrail_with_s3_events(self): """CLI - Terraform Generate CloudTrail Module, With S3 Events""" cluster_name = 'advanced' self.config['clusters']['advanced']['modules']['cloudtrail'] = { + 's3_settings':{ + 'bucket_name': 'unit-test-bucket', + 'cross_account_ids': ['456789012345'], + 'enable_events': True, + }, 'send_to_cloudwatch': False, - 'enable_s3_events': True, - 's3_cross_account_ids': ['456789012345'], - 's3_bucket_name': 'unit-test-bucket' } cloudtrail.generate_cloudtrail( cluster_name, @@ -420,10 +424,10 @@ def test_generate_cloudtrail_with_s3_events(self): 'bucket_name': 'unit-test-bucket', 'filters': [ { - 'filter_prefix': 'AWSLogs/12345678910/' + 'filter_prefix': 'AWSLogs/12345678910/CloudTrail/' }, { - 'filter_prefix': 'AWSLogs/456789012345/' + 'filter_prefix': 'AWSLogs/456789012345/CloudTrail/' } ] } @@ -435,8 +439,10 @@ def test_generate_cloudtrail_with_cloudwatch_logs(self): """CLI - Terraform Generate CloudTrail Module, With CloudWatch Logs""" cluster_name = 'advanced' self.config['clusters']['advanced']['modules']['cloudtrail'] = { + 's3_settings': { + 'enable_events': False, + }, 'send_to_cloudwatch': True, - 'enable_s3_events': False, } cloudtrail.generate_cloudtrail( cluster_name, @@ -504,9 +510,11 @@ def test_generate_cloudtrail_cloudwatch_logs_and_s3(self): """CLI - Terraform Generate CloudTrail Module, With S3 and CloudWatch Logs""" cluster_name = 'advanced' self.config['clusters']['advanced']['modules']['cloudtrail'] = { + 's3_settings': { + 'cross_account_ids': ['456789012345'], + 'enable_events': True, + }, 'send_to_cloudwatch': True, - 's3_cross_account_ids': ['456789012345'], - 'enable_s3_events': True, } cloudtrail.generate_cloudtrail( cluster_name, @@ -577,7 +585,7 @@ def test_generate_cloudtrail_cloudwatch_logs_and_s3(self): 'bucket_name': 'unit-test-advanced-streamalert-cloudtrail', 'filters': [ { - 'filter_prefix': 'AWSLogs/456789012345/' + 'filter_prefix': 'AWSLogs/456789012345/CloudTrail/' } ] }, @@ -697,6 +705,79 @@ def test_generate_cloudwatch_events_invalid_pattern(self, log_mock): assert_true(log_mock.called) + def test_generate_cwe_cross_acct_map_regions(self): + """CLI - Terraform Generate CloudWatch Events Cross Account Region Map""" + # pylint: disable=protected-access + settings = { + 'accounts': { + '123456789012': ['us-east-1'], + '234567890123': ['us-east-1'] + }, + 'organizations': { + 'o-aabbccddee': ['us-west-1'] + } + } + + result = cloudwatch_events._map_regions(settings) + + expected = { + 'us-east-1': { + 'accounts': ['123456789012', '234567890123'], + }, + 'us-west-1': { + 'organizations': ['o-aabbccddee'] + } + } + + assert_equal(expected, result) + + def test_generate_cloudwatch_events_cross_account(self): + """CLI - Terraform Generate CloudWatch Events Cross Account""" + self.config['clusters']['advanced']['modules']['cloudwatch_events']['cross_account'] = { + 'accounts': { + '123456789012': ['us-east-1'], + '234567890123': ['us-east-1'] + }, + 'organizations': { + 'o-aabbccddee': ['us-west-1'] + } + } + cloudwatch_events.generate_cloudwatch_events( + 'advanced', + self.cluster_dict, + self.config + ) + + expected = { + 'cloudwatch_events_advanced': { + 'source': './modules/tf_cloudwatch_events', + 'prefix': 'unit-test', + 'cluster': 'advanced', + 'kinesis_arn': '${module.kinesis_advanced.arn}', + 'event_pattern': '{"account": ["12345678910"]}', + }, + 'cloudwatch_events_cross_account_advanced_us-east-1': { + 'source': './modules/tf_cloudwatch_events/cross_account', + 'region': 'us-east-1', + 'accounts': ['123456789012', '234567890123'], + 'organizations': [], + 'providers': { + 'aws': 'aws.us-east-1' + } + }, + 'cloudwatch_events_cross_account_advanced_us-west-1': { + 'source': './modules/tf_cloudwatch_events/cross_account', + 'region': 'us-west-1', + 'accounts': [], + 'organizations': ['o-aabbccddee'], + 'providers': { + 'aws': 'aws.us-west-1' + } + }, + } + + assert_equal(expected, self.cluster_dict['module']) + def test_generate_cluster_test(self): """CLI - Terraform Generate Test Cluster""" @@ -747,7 +828,6 @@ def test_generate_cluster_advanced(self): 'kinesis_events_advanced', 'flow_logs_advanced', 'cloudtrail_advanced', - 'cloudtrail_s3_events_unit-test_advanced_unit-test-advanced-streamalert-cloudtrail', 'cloudwatch_events_advanced', 's3_events_unit-test_advanced_unit-test-bucket_data', 's3_events_unit-test_advanced_unit-test_cloudtrail_data' diff --git a/tests/unit/streamalert_cli/terraform/test_handlers.py b/tests/unit/streamalert_cli/terraform/test_handlers.py index 2bd382f8c..c529e46aa 100644 --- a/tests/unit/streamalert_cli/terraform/test_handlers.py +++ b/tests/unit/streamalert_cli/terraform/test_handlers.py @@ -20,7 +20,6 @@ from nose.tools import assert_equal, assert_false from pyfakefs import fake_filesystem_unittest -from streamalert_cli.terraform import TERRAFORM_FILES_PATH from streamalert_cli.terraform.handlers import get_tf_modules class TestTerraformHandlers(fake_filesystem_unittest.TestCase): @@ -30,6 +29,7 @@ class TestTerraformHandlers(fake_filesystem_unittest.TestCase): def setUp(self): """Setup before each method""" self.setUpPyfakefs() + self._build_directory = 'unit_test_terraform_path' mock_main_tf_json = { 'module': { @@ -52,18 +52,18 @@ def setUp(self): } # fake *.tf.json files self.fs.create_file( - os.path.join(TERRAFORM_FILES_PATH, 'main.tf.json'), + os.path.join(self._build_directory, 'main.tf.json'), contents=json.dumps(mock_main_tf_json) ) self.fs.create_file( - os.path.join(TERRAFORM_FILES_PATH, 'prod.tf.json'), + os.path.join(self._build_directory, 'prod.tf.json'), contents=json.dumps(mock_prod_tf_json) ) @patch('streamalert_cli.terraform.handlers.terraform_generate_handler', Mock(return_value=True)) def test_get_tf_modules_read_tf_json_files(self): """CLI - Terraform handler function get tf modules read all *.tf.json files""" - config = {} + config = Mock(return_value={}, build_directory=self._build_directory) result = get_tf_modules(config) expected_result = { @@ -78,4 +78,5 @@ def test_get_tf_modules_read_tf_json_files(self): ) def test_get_tf_modules_early_return(self): """CLI - Terraform handler function get tf modules return early""" - assert_false(get_tf_modules(config={}, generate=True)) + config = Mock(return_value={}, build_directory=self._build_directory) + assert_false(get_tf_modules(config, generate=True)) diff --git a/tests/unit/streamalert_cli/test/helpers.py b/tests/unit/streamalert_cli/test/helpers.py index da6162fd9..6541f4976 100644 --- a/tests/unit/streamalert_cli/test/helpers.py +++ b/tests/unit/streamalert_cli/test/helpers.py @@ -19,8 +19,8 @@ @nottest -def basic_test_file_json(): - return json.dumps([basic_test_event_data()]) +def basic_test_file_json(**kwargs): + return json.dumps([basic_test_event_data(**kwargs)]) @nottest diff --git a/tests/unit/streamalert_cli/test/test_handler.py b/tests/unit/streamalert_cli/test/test_handler.py new file mode 100644 index 000000000..d00ebe883 --- /dev/null +++ b/tests/unit/streamalert_cli/test/test_handler.py @@ -0,0 +1,130 @@ +""" +Copyright 2017-present Airbnb, Inc. + +Licensed under the Apache License, Version 2.0 (the 'License'); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an 'AS IS' BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from io import StringIO +import os + +import mock +from mock import patch, MagicMock, Mock +from nose.tools import assert_equal, assert_raises, nottest +from pyfakefs import fake_filesystem_unittest + +from streamalert.shared.config import load_config +from streamalert.shared.exceptions import ConfigError +from streamalert_cli.config import CLIConfig +from streamalert_cli.test.handler import TestRunner +from tests.unit.streamalert_cli.test.helpers import basic_test_file_json + +# Keep nose from trying to treat this as a test +TestRunner = nottest(TestRunner) + + +class TestTestRunner(fake_filesystem_unittest.TestCase): + """Test the TestEventFile class""" + # pylint: disable=protected-access + + TEST_CONFIG_PATH = 'tests/unit/conf' + _DEFAULT_EVENT_PATH = 'rules/community/unit_test/file.json' + + def setUp(self): + cli_config = CLIConfig(config_path='tests/unit/conf') + with patch('streamalert.rules_engine.rules_engine.load_config', + Mock(return_value=load_config(self.TEST_CONFIG_PATH))): + self.runner = TestRunner(MagicMock(), cli_config) + + self.setUpPyfakefs() + + @patch('logging.Logger.debug') + def test_process_test_file_bad_service(self, log_mock): + """StreamAlert CLI - TestRunner Process Test File, Misconfigured Service""" + self.fs.create_file( + self._DEFAULT_EVENT_PATH, + contents=basic_test_file_json( + log='unit_test_simple_log', + source='unit_test_default_stream', + service='s3' # s3 here is a misconfiguration, should be kinesis + ) + ) + + assert_raises(ConfigError, self.runner._process_test_file, self._DEFAULT_EVENT_PATH) + log_mock.assert_has_calls([ + mock.call( + 'Cluster "%s" does not have service "%s" configured as a data source', + 'advanced', + 's3' + ), + mock.call( + 'Cluster "%s" does not have service "%s" configured as a data source', + 'test', + 's3' + ), + mock.call( + 'Cluster "%s" does not have service "%s" configured as a data source', + 'trusted', + 's3' + ) + ], any_order=True) + + @patch('logging.Logger.debug') + def test_process_test_file_bad_source(self, log_mock): + """StreamAlert CLI - TestRunner Process Test File, Misconfigured Source""" + self.fs.create_file( + self._DEFAULT_EVENT_PATH, + contents=basic_test_file_json( + log='unit_test_simple_log', + source='nonexistent_source', # invalid source here + service='kinesis' + ) + ) + + assert_raises(ConfigError, self.runner._process_test_file, self._DEFAULT_EVENT_PATH) + log_mock.assert_has_calls([ + mock.call( + 'Cluster "%s" does not have service "%s" configured as a data source', + 'advanced', + 'kinesis' + ), + mock.call( + 'Cluster "%s" does not have service "%s" configured as a data source', + 'trusted', + 'kinesis' + ), + mock.call( + 'Cluster "%s" does not have the source "%s" configured as a data source ' + 'for service "%s"', + 'test', + 'nonexistent_source', + 'kinesis' + ), + ], any_order=True) + + @patch('sys.stdout', new=StringIO()) # patch stdout to suppress integration test result + def test_process_test_file(self): + """StreamAlert CLI - TestRunner Process Test File""" + self.fs.create_file( + self._DEFAULT_EVENT_PATH, + contents=basic_test_file_json( + log='unit_test_simple_log', + source='unit_test_default_stream', # valid source + service='kinesis' # valid service + ) + ) + self.fs.add_real_directory(self.TEST_CONFIG_PATH) + with patch('streamalert.classifier.classifier.config.load_config', + Mock(return_value=load_config(self.TEST_CONFIG_PATH))): + self.runner._process_test_file(self._DEFAULT_EVENT_PATH) + + # The CLUSTER env var should be properly deduced and set now + assert_equal(os.environ['CLUSTER'], 'test') diff --git a/tests/unit/streamalert_cli/test_cli_config.py b/tests/unit/streamalert_cli/test_cli_config.py index 50cfc8edc..aa9662711 100644 --- a/tests/unit/streamalert_cli/test_cli_config.py +++ b/tests/unit/streamalert_cli/test_cli_config.py @@ -61,6 +61,10 @@ def test_load_config(self): """CLI - Load config""" assert_equal(self.config['global']['account']['prefix'], 'unit-test') + def test_terraform_files(self): + """CLI - Terraform Files""" + assert_equal(self.config.terraform_files, {'/test/terraform/file.tf'}) + def test_toggle_metric(self): """CLI - Metric toggling""" self.config.toggle_metrics('athena_partitioner', enabled=True) diff --git a/vagrant/cli/python-virtualenvwrapper/install.sh b/vagrant/cli/python-virtualenvwrapper/install.sh index dfbb68c10..53d34ae17 100644 --- a/vagrant/cli/python-virtualenvwrapper/install.sh +++ b/vagrant/cli/python-virtualenvwrapper/install.sh @@ -1,5 +1,5 @@ # Install python dependencies -apt-get install python-pip virtualenvwrapper -y +apt-get install python-pip python3.7-venv virtualenvwrapper -y # Install Python with the version specified from the deadsnakes ppa apt-get install software-properties-common -y