Skip to content

Commit

Permalink
Merge pull request #1121 from bcgov/dev-NK-FOIMOD-3433
Browse files Browse the repository at this point in the history
clear metadata during dedupe and save copy of original file in s3
  • Loading branch information
nkan-aot2 authored Aug 29, 2024
2 parents 506b06e + b0385d3 commit 54702d3
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
25 changes: 24 additions & 1 deletion computingservices/DedupeServices/services/s3documentservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from html import escape
import hashlib
import uuid
import boto3
from botocore.config import Config
from re import sub
import fitz
from utils import (
Expand Down Expand Up @@ -174,8 +176,29 @@ def gets3documenthashcode(producermessage):
}
)
saveresponse.raise_for_status()
fitz_reader.close()

# clear metadata
writer = fitz.open()
writer.insert_pdf(fitz_reader)
client = boto3.client('s3',config=Config(signature_version='s3v4'),
endpoint_url='https://{0}/'.format(dedupe_s3_host),
aws_access_key_id= s3_access_key_id,
aws_secret_access_key= s3_secret_access_key,
region_name= dedupe_s3_region
)
response = client.copy_object(
CopySource="/" + "/".join(filepath.split("/")[3:]), # /Bucket-name/path/filename
Bucket=filepath.split("/")[3], # Destination bucket
Key= "/".join(filepath.split("/")[3:])[:-4] + 'ORIGINAL' + '.pdf' # Destination path/filename
)
response = requests.put(
filepath,
data=writer.tobytes(),
auth=auth
)

fitz_reader.close()

elif extension.lower() in file_conversion_types:
# "Extension different {0}, so need to download pdf here for pagecount!!".format(extension))
pdfresponseofconverted = requests.get(
Expand Down
1 change: 0 additions & 1 deletion computingservices/DedupeServices/utils/foidedupeconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
dedupe_db_user = os.getenv("DEDUPE_DB_USER")
dedupe_db_password = os.getenv("DEDUPE_DB_PASSWORD")

dedupe_s3_host = os.getenv("DEDUPE_S3_HOST")
dedupe_s3_host = os.getenv("DEDUPE_S3_HOST")
dedupe_s3_region = os.getenv("DEDUPE_S3_REGION")
dedupe_s3_service = os.getenv("DEDUPE_S3_SERVICE")
Expand Down

0 comments on commit 54702d3

Please sign in to comment.