Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFR-2438: Fix Fulfill URL Manifest Process #495

Merged
merged 8 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class APIUtils():
'muse': 4,
'met': 5,
'isac': 6,
'UofMichigan': 7,
'UofM': 7,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we don't have any files ingested in production under UofM, do we need both?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think since we have both in QA, we will need both!

'UofSC': 8,
'hathitrust': 9,
Expand Down
4 changes: 2 additions & 2 deletions mappings/publisher_backlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ def format_rights(self):
rights_status = rights_elements[0]

if rights_status == 'in copyright':
return '{}|{}||{}|'.format('self.record.source', 'in_copyright', 'In Copyright')
return '{}|{}||{}|'.format(self.record.source, 'in_copyright', 'In Copyright')
elif rights_status == 'public domain':
return '{}|{}||{}|'.format('self.record.source', 'public_domain', 'Public Domain')
return '{}|{}||{}|'.format(self.record.source, 'public_domain', 'Public Domain')

return None
16 changes: 11 additions & 5 deletions processes/file/fulfill_url_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,25 @@ def runProcess(self):

def fetch_and_update_manifests(self, start_timestamp=None):

batches = self.s3_manager.load_batches(self.prefix, self.s3Bucket)
batches = self.s3_manager.load_batches(self.prefix, self.s3_bucket)
if start_timestamp:
#Using JMESPath to extract keys from the JSON batches
filtered_batch_keys = batches.search(f"Contents[?to_string(LastModified) > '\"{start_timestamp}\"'].Key")
for key in filtered_batch_keys:
metadata_object = self.s3Client.get_object(Bucket=self.s3Bucket, Key= f'{key}')
self.update_metadata_object(metadata_object, self.s3Bucket, key)
if not key:
continue

metadata_object = self.s3_manager.s3Client.get_object(Bucket=self.s3_bucket, Key=f'{key}')
self.update_metadata_object(metadata_object, self.s3_bucket, key)
else:
for batch in batches:
if 'Contents' not in batch:
continue

for content in batch['Contents']:
key = content['Key']
metadata_object = self.s3Client.get_object(Bucket=self.s3Bucket, Key= f'{key}')
self.update_metadata_object(metadata_object, self.s3Bucket, key)
metadata_object = self.s3_manager.s3Client.get_object(Bucket=self.s3_bucket, Key=f'{key}')
self.update_metadata_object(metadata_object, self.s3_bucket, key)

def update_metadata_object(self, metadata_object, bucket_name, curr_key):

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/processes/file/test_fulfill_manifest_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def teardown_class(cls):
def test_process(self, mocker):
class TestFulfill(FulfillURLManifestProcess):
def __init__(self):
self.s3Bucket = 'test_aws_bucket'
self.s3_bucket = 'test_aws_bucket'
self.s3_manager = mocker.MagicMock(s3Client=mocker.MagicMock())
self.session = mocker.MagicMock(session='testSession')
self.records = mocker.MagicMock(record='testRecord')
Expand Down
Loading