diff --git a/run.py b/run.py index 4f4ae52..fca498a 100755 --- a/run.py +++ b/run.py @@ -22,6 +22,7 @@ def main( quiet=False, validate=True, config_file=None, + filter_file=None ): config = Config(config_file=config_file, validate=validate) config_src = config.src @@ -40,7 +41,7 @@ def main( print('📨 Transferring submission data') def transfer(all_results, url=None): - parsed_xml = get_src_submissions_xml(xml_url=url) + parsed_xml = get_src_submissions_xml(xml_url=url, filter_file=filter_file) submissions = parsed_xml.findall(f'results/{config_src["asset_uid"]}') next_ = parsed_xml.find('next').text results = transfer_submissions( @@ -115,8 +116,14 @@ def transfer(all_results, url=None): action='store_true', help='Suppress stdout', ) + parser.add_argument( + '--filter-uuids', + '-F', + #default='uuids.txt', #For debugging purposes + type=str, + help='Location of the text file with specific uuids to transfer (one uuid per line).', + ) args = parser.parse_args() - try: main( limit=args.limit, @@ -126,6 +133,7 @@ def transfer(all_results, url=None): quiet=args.quiet, validate=not args.no_validate, config_file=args.config_file, + filter_file=args.filter_uuids ) except KeyboardInterrupt: print('🛑 Stopping run') diff --git a/transfer/xml.py b/transfer/xml.py index 5509d9e..9b2e9ab 100644 --- a/transfer/xml.py +++ b/transfer/xml.py @@ -45,14 +45,44 @@ def get_xml_value_media_mapping(values): return {get_valid_filename(v):v for v in values} -def get_src_submissions_xml(xml_url): +def get_src_submissions_xml(xml_url, filter_file): config = Config().src + asset_uid = config["asset_uid"] res = requests.get( url=xml_url, headers=config['headers'], params=config['params'] ) if not res.status_code == 200: raise Exception('Something went wrong') - return ET.fromstring(res.text) + + root = ET.fromstring(res.text) + #Get a list of uuids from txt file (if --filter-uuids argument was provided) + uuid_list = None + try: + if filter_file is not None: + if os.path.exists(filter_file): + # Open file in read mode + with open(filter_file, 'r') as file: + # Read all lines from the file, remove newline characters, and store them in a list + uuid_list = [line.strip() for line in file.readlines()] + except: + print('🛑 Could not read from txt file') + + if uuid_list is not None: + results_element = root.find(".//results") + + if results_element is not None: + # Loop child elements of 'results' to find and match uuids + for element in results_element.findall(asset_uid): + instance_id_element = element.find('./meta/instanceID') + + if instance_id_element is not None: + # remove the first part 'uuid:' and just keep the uuid itself. + instance_id = instance_id_element.text.replace('uuid:', '') + + if instance_id not in uuid_list: + # Remove the non-matching uuid from the 'results' + results_element.remove(element) + return root def submit_data(xml_sub, _uuid, original_uuid, xml_value_media_map): diff --git a/uuids.txt b/uuids.txt new file mode 100644 index 0000000..0a39378 --- /dev/null +++ b/uuids.txt @@ -0,0 +1 @@ +52f76bc9-f702-447a-acb1-ff715b148e23 \ No newline at end of file