diff --git a/kobo/apps/reports/constants.py b/kobo/apps/reports/constants.py index 61c67b4091..866ac4e636 100644 --- a/kobo/apps/reports/constants.py +++ b/kobo/apps/reports/constants.py @@ -9,4 +9,5 @@ DEFAULT_REPORTS_KEY = 'default' FUZZY_VERSION_ID_KEY = '_version_' -INFERRED_VERSION_ID_KEY = '__inferred_version__' \ No newline at end of file +FUZZY_VERSION_PATTERN = r'^__?version__?(\d{3})?$' +INFERRED_VERSION_ID_KEY = '__inferred_version__' diff --git a/kpi/models/paired_data.py b/kpi/models/paired_data.py index 029a1791ad..a49bcf7eb8 100644 --- a/kpi/models/paired_data.py +++ b/kpi/models/paired_data.py @@ -66,6 +66,7 @@ def __init__( self.paired_data_uid = paired_data_uid self._asset_file = None + self._source_asset = None def __str__(self): return f'' @@ -156,7 +157,11 @@ def get_download_url(self, request): args=(self.asset.uid, self.paired_data_uid, 'xml'), request=request) - def get_source(self) -> Union['Asset', None]: + def get_source(self, force: bool = False) -> Union['Asset', None]: + # if `self._source_asset` has been already set once, use the cache + # object instead of fetching it from DB again. + if not force and self._source_asset: + return self._source_asset # Avoid circular import Asset = self.asset.__class__ # noqa @@ -183,7 +188,9 @@ def get_source(self) -> Union['Asset', None]: ): return None - return source_asset + self._source_asset = source_asset + + return self._source_asset @property def md5_hash(self): diff --git a/kpi/serializers/v2/asset.py b/kpi/serializers/v2/asset.py index e633987c91..34394ee8df 100644 --- a/kpi/serializers/v2/asset.py +++ b/kpi/serializers/v2/asset.py @@ -1,5 +1,6 @@ # coding: utf-8 import json +import re from django.conf import settings from django.utils.translation import gettext as t @@ -8,6 +9,7 @@ from rest_framework.reverse import reverse from rest_framework.utils.serializer_helpers import ReturnList +from kobo.apps.reports.constants import FUZZY_VERSION_PATTERN from kobo.apps.reports.report_data import build_formpack from kpi.constants import ( ASSET_STATUS_DISCOVERABLE, @@ -498,11 +500,20 @@ def validate_data_sharing(self, data_sharing: dict) -> dict: else: asset = self.instance fields = data_sharing['fields'] - form_pack, _unused = build_formpack(asset, submission_stream=[]) + # We used to get all fields for every version for valid fields, + # but the UI shows the latest version only, so only its fields + # can be picked up. It is easier then to compare valid fields with + # user's choice. + form_pack, _unused = build_formpack( + asset, submission_stream=[], use_all_form_versions=False + ) + # We do not want to include the version field. + # See `_infer_version_id()` in `kobo.apps.reports.report_data.build_formpack` + # for field name alternatives. valid_fields = [ f.path for f in form_pack.get_fields_for_versions( form_pack.versions.keys() - ) + ) if not re.match(FUZZY_VERSION_PATTERN, f.path) ] unknown_fields = set(fields) - set(valid_fields) if unknown_fields and valid_fields: @@ -510,6 +521,11 @@ def validate_data_sharing(self, data_sharing: dict) -> dict: 'Some fields are invalid, ' 'choices are: `{valid_fields}`' ).format(valid_fields='`,`'.join(valid_fields)) + + # Force `fields` to be an empty list to avoid useless parsing when + # fetching external xml endpoint (i.e.: /api/v2/assets//paired-data//external.xml) + if sorted(valid_fields) == sorted(fields): + data_sharing['fields'] = [] else: data_sharing['fields'] = [] diff --git a/kpi/serializers/v2/paired_data.py b/kpi/serializers/v2/paired_data.py index 711dcb152b..de24083703 100644 --- a/kpi/serializers/v2/paired_data.py +++ b/kpi/serializers/v2/paired_data.py @@ -6,6 +6,7 @@ from rest_framework import serializers from rest_framework.reverse import reverse +from kobo.apps.reports.constants import FUZZY_VERSION_PATTERN from kobo.apps.reports.report_data import build_formpack from kpi.constants import ( ASSET_TYPE_SURVEY, @@ -124,11 +125,20 @@ def _validate_fields(self, attrs: dict): return source = attrs['source'] - form_pack, _unused = build_formpack(source, submission_stream=[]) + # We used to get all fields for every version for valid fields, + # but the UI shows the latest version only, so only its fields + # can be picked up. It is easier then to compare valid fields with + # user's choice. + form_pack, _unused = build_formpack( + source, submission_stream=[], use_all_form_versions=False + ) + # We do not want to include the version field. + # See `_infer_version_id()` in `kobo.apps.reports.report_data.build_formpack` + # for field name alternatives. valid_fields = [ f.path for f in form_pack.get_fields_for_versions( form_pack.versions.keys() - ) + ) if not re.match(FUZZY_VERSION_PATTERN, f.path) ] source_fields = source.data_sharing.get('fields') or valid_fields @@ -145,6 +155,11 @@ def _validate_fields(self, attrs: dict): } ) + # Force `posted_fields` to be an empty list to avoid useless parsing when + # fetching external xml endpoint (i.e.: /api/v2/assets//paired-data//external.xml) + if sorted(valid_fields) == sorted(posted_fields): + posted_fields = [] + attrs['fields'] = posted_fields def _validate_filename(self, attrs: dict): diff --git a/kpi/tests/test_paired_data.py b/kpi/tests/test_paired_data.py index 9a6889e9b7..385cd9ce6f 100644 --- a/kpi/tests/test_paired_data.py +++ b/kpi/tests/test_paired_data.py @@ -125,7 +125,7 @@ def test_cannot_retrieve_source_if_source_stop_data_sharing(self): self.assertEqual(source, self.source_asset) self.source_asset.data_sharing['enabled'] = False self.source_asset.save() - source = self.paired_data.get_source() + source = self.paired_data.get_source(force=True) self.assertEqual(source, None) def test_cannot_retrieve_source_if_source_remove_perm(self): @@ -134,5 +134,5 @@ def test_cannot_retrieve_source_if_source_remove_perm(self): self.source_asset.remove_perm( self.paired_data.asset.owner, PERM_VIEW_SUBMISSIONS ) - source = self.paired_data.get_source() + source = self.paired_data.get_source(force=True) self.assertEqual(source, None) diff --git a/kpi/utils/xml.py b/kpi/utils/xml.py index 6bc2e7b780..fe99ab0cc1 100644 --- a/kpi/utils/xml.py +++ b/kpi/utils/xml.py @@ -3,6 +3,9 @@ from typing import Optional, Union, List from lxml import etree +from django_request_cache import cache_for_request +from shortuuid import ShortUUID + def strip_nodes( source: Union[str, bytes], @@ -10,12 +13,16 @@ def strip_nodes( use_xpath: bool = False, xml_declaration: bool = False, rename_root_node_to: Optional[str] = None, + bulk_action_cache_key: str = None, ) -> str: """ Returns a stripped version of `source`. It keeps only nodes provided in `nodes_to_keep`. If `rename_root_node_to` is provided, the root node will be renamed to the value of that parameter in the returned XML string. + + A random string can be passed to `bulk_action_cache_key` to get the + XPaths only once if calling `strip_nodes()` several times in a loop. """ # Force `source` to be bytes in case it contains an XML declaration # `etree` does not support strings with xml declarations. @@ -28,7 +35,13 @@ def strip_nodes( root_element = tree.getroot() root_path = tree.getpath(root_element) - def get_xpath_matches(): + # `@cache_for_request` uses the parameters of the function it decorates + # to generate the key under which the returned value of the function is + # stored for cache purpose. + # `cache_key` is only there to serve that purpose and ensure + # `@cache_for_request` uniqueness. + @cache_for_request + def get_xpath_matches(cache_key: str): if use_xpath: xpaths_ = [] for xpath_ in nodes_to_keep: @@ -118,7 +131,14 @@ def remove_root_path(path_: str) -> str: return path_.replace(root_path, '') if len(nodes_to_keep): - xpath_matches = get_xpath_matches() + # Always sends an unique string to `get_xpath_matches()` + # See comments above the function + if bulk_action_cache_key is None: + cache_key = ShortUUID().random(24) + else: + cache_key = bulk_action_cache_key + + xpath_matches = get_xpath_matches(cache_key=cache_key) process_node(root_element, xpath_matches) if rename_root_node_to: diff --git a/kpi/views/v2/paired_data.py b/kpi/views/v2/paired_data.py index e92fc33abc..5b67b653cd 100644 --- a/kpi/views/v2/paired_data.py +++ b/kpi/views/v2/paired_data.py @@ -7,6 +7,7 @@ from rest_framework.decorators import action from rest_framework.response import Response from rest_framework_extensions.mixins import NestedViewSetMixin +from shortuuid import ShortUUID from kpi.constants import SUBMISSION_FORMAT_TYPE_XML from kpi.models import Asset, AssetFile, PairedData @@ -252,6 +253,8 @@ def external(self, request, paired_data_uid, **kwargs): format_type=SUBMISSION_FORMAT_TYPE_XML ) parsed_submissions = [] + allowed_fields = paired_data.allowed_fields + random_uuid = ShortUUID().random(24) for submission in submissions: # Use `rename_root_node_to='data'` to rename the root node of each @@ -263,9 +266,10 @@ def external(self, request, paired_data_uid, **kwargs): parsed_submissions.append( strip_nodes( submission, - paired_data.allowed_fields, + allowed_fields, use_xpath=True, rename_root_node_to='data', + bulk_action_cache_key=random_uuid, ) )