Skip to content

Commit

Permalink
Merge pull request #1148 from bcgov/test-marshal-AH-FOIMOD-2646-main
Browse files Browse the repository at this point in the history
Main <> Test-Marshal Sept 12 2024
  • Loading branch information
Aman-Hundal authored Sep 13, 2024
2 parents b9de45f + f16d2a5 commit 8252581
Show file tree
Hide file tree
Showing 19 changed files with 216 additions and 63 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,5 @@ bld/
MCS.FOI.S3FileConversion/MCS.FOI.S3FileConversion/QtBinariesWindows/
computingservices/ZippingServices/env/*
openshift/templates/zippingservice/zipper.env
*.locenv

2 changes: 1 addition & 1 deletion api/reviewer_api/resources/redaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def post():
try:
requestjson = request.get_json()
print("\nrequestjson:",requestjson)
if(requestjson['bcgovcode'] == "mcf"):
if(requestjson['bcgovcode'] == "mcf" and requestjson['requesttype'] == "personal"):
finalpackageschema = MCFFinalPackageSchema().load(requestjson)
else:
finalpackageschema = FinalPackageSchema().load(requestjson)
Expand Down
2 changes: 2 additions & 0 deletions api/reviewer_api/schemas/finalpackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class FinalPackageSchema(Schema):
summarydocuments = fields.Nested(SummarySchema, allow_none=True)
redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False)
pdfstitchjobattributes = fields.Nested(FeeOverrideSchema, allow_none=True, many=False)
requesttype = fields.Str(data_key="requesttype", allow_none=False)

class SummaryRecordSchema(Schema):
recordname = fields.Str(data_key="recordname", allow_none=True)
Expand All @@ -58,3 +59,4 @@ class MCFFinalPackageSchema(Schema):
summarydocuments = fields.Nested(MCFSummarySchema, allow_none=True)
redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False)
pdfstitchjobattributes = fields.Nested(FeeOverrideSchema, allow_none=True, many=False)
requesttype = fields.Str(data_key="requesttype", allow_none=False)
3 changes: 2 additions & 1 deletion api/reviewer_api/schemas/redline.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ class RedlineSchema(Schema):
AttributeSchema, many=True, required=True, allow_none=False
)
summarydocuments = fields.Nested(SummarySchema, allow_none=True)
redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False)
redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False)
requesttype = fields.Str(data_key="requesttype", allow_none=False)
3 changes: 2 additions & 1 deletion api/reviewer_api/services/radactionservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ def __preparemessageforsummaryservice(self, messageschema, userinfo, job):
"attributes": to_json(messageschema["attributes"]),
"summarydocuments": json.dumps(messageschema["summarydocuments"]),
"redactionlayerid": json.dumps(messageschema["redactionlayerid"]),
"feeoverridereason":feeoverridereason
"feeoverridereason":feeoverridereason,
"requesttype": messageschema["requesttype"],
}
return _message

Expand Down
1 change: 1 addition & 0 deletions api/reviewer_api/utils/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class MinistryTeamWithKeycloackGroup(Enum):
AGR = "AGR Ministry Team"
AG = "AG Ministry Team"
BRD = "BRD Ministry Team"
CAF = "CAF Ministry Team"
CAS = "CAS Ministry Team"
CITZ = "CITZ Ministry Team"
CLB = "CLB Ministry Team"
Expand Down
Binary file modified computingservices/DedupeServices/requirements.txt
Binary file not shown.
36 changes: 35 additions & 1 deletion computingservices/DedupeServices/services/s3documentservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
from html import escape
import hashlib
import uuid
import boto3
from botocore.config import Config
from re import sub
import fitz
import PyPDF2
from utils import (
gets3credentialsobject,
getdedupeproducermessage,
Expand Down Expand Up @@ -173,9 +176,40 @@ def gets3documenthashcode(producermessage):
"Content-Type": "application/json",
}
)
saveresponse.raise_for_status()
saveresponse.raise_for_status()
fitz_reader.close()

# clear metadata
reader2 = PyPDF2.PdfReader(BytesIO(response.content))
# Check if metadata exists.
if reader2.metadata is not None:
# Create a new PDF file without metadata.
writer = PyPDF2.PdfWriter()
# Copy pages from the original PDF to the new PDF.
for page_num in range(len(reader.pages)):
page = reader2.pages[page_num]
writer.add_page(page)
#writer.remove_links() # to remove comments.
buffer = BytesIO()
writer.write(buffer)
client = boto3.client('s3',config=Config(signature_version='s3v4'),
endpoint_url='https://{0}/'.format(dedupe_s3_host),
aws_access_key_id= s3_access_key_id,
aws_secret_access_key= s3_secret_access_key,
region_name= dedupe_s3_region
)
copyresponse = client.copy_object(
CopySource="/" + "/".join(filepath.split("/")[3:]), # /Bucket-name/path/filename
Bucket=filepath.split("/")[3], # Destination bucket
Key= "/".join(filepath.split("/")[4:])[:-4] + 'ORIGINAL' + '.pdf' # Destination path/filename
)
uploadresponse = requests.put(
filepath,
data=buffer.getvalue(),
auth=auth
)
uploadresponse.raise_for_status()

elif extension.lower() in file_conversion_types:
# "Extension different {0}, so need to download pdf here for pagecount!!".format(extension))
pdfresponseofconverted = requests.get(
Expand Down
1 change: 0 additions & 1 deletion computingservices/DedupeServices/utils/foidedupeconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
dedupe_db_user = os.getenv("DEDUPE_DB_USER")
dedupe_db_password = os.getenv("DEDUPE_DB_PASSWORD")

dedupe_s3_host = os.getenv("DEDUPE_S3_HOST")
dedupe_s3_host = os.getenv("DEDUPE_S3_HOST")
dedupe_s3_region = os.getenv("DEDUPE_S3_REGION")
dedupe_s3_service = os.getenv("DEDUPE_S3_SERVICE")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(self, sorteddocuments, pkgdocuments) -> None:

class RedactionSummaryMessage(object):
def __init__(self, jobid, requestid, ministryrequestid, category, requestnumber,
bcgovcode, createdby, filestozip, finaloutput, attributes, summarydocuments ,redactionlayerid, feeoverridereason) -> None:
bcgovcode, createdby, filestozip, finaloutput, attributes, summarydocuments ,redactionlayerid, feeoverridereason,requesttype) -> None:
self.jobid = jobid
self.requestid = requestid
self.ministryrequestid = ministryrequestid
Expand All @@ -44,6 +44,7 @@ def __init__(self, jobid, requestid, ministryrequestid, category, requestnumber,
self.summarydocuments = summarydocuments
self.redactionlayerid = redactionlayerid
self.feeoverridereason = feeoverridereason
self.requesttype = requesttype


def get_in_redactionsummary_msg(producer_json):
Expand Down
65 changes: 38 additions & 27 deletions computingservices/DocumentServices/services/dts/redactionsummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
from rstreamio.message.schemas.redactionsummary import get_in_summary_object,get_in_summarypackage_object
import json
from collections import defaultdict
import traceback

class redactionsummary():

def prepareredactionsummary(self, message, documentids, pageflags, programareas):
if message.bcgovcode == 'mcf' and message.category == "responsepackage":
_ismcfpersonalrequest = True if message.bcgovcode == 'mcf' and message.requesttype == 'personal' else False
if _ismcfpersonalrequest and message.category == "responsepackage":
redactionsummary = self.__packagesummaryforcfdrequests(message, documentids)
else:
redactionsummary = self.__packaggesummary(message, documentids, pageflags, programareas)
if message.category == "responsepackage" and message.bcgovcode != 'mcf':
if message.category == "responsepackage" and _ismcfpersonalrequest == False:
consolidated_redactions = []
for entry in redactionsummary['data']:
consolidated_redactions += entry['sections']
Expand All @@ -25,48 +27,57 @@ def __getrangenumber(self, rangeval):

def __packaggesummary(self, message, documentids, pageflags, programareas):
try:
# print("\nInside __packaggesummary")
print("\nInside __packaggesummary")
redactionlayerid = self.__getredactionlayerid(message)
summarymsg = message.summarydocuments
summaryobject = get_in_summary_object(summarymsg)
ordereddocids = summaryobject.sorteddocuments
stitchedpagedata = documentpageflag().getpagecount_by_documentid(message.ministryrequestid, ordereddocids)
totalpagecount = self.__calculate_totalpages(stitchedpagedata)
# print("\ntotalpagecount",totalpagecount)
print("\n __packaggesummary stitchedpagedata",stitchedpagedata)
print("\n __packaggesummary totalpagecount",totalpagecount)

if totalpagecount <=0:
return
_pageflags = self.__transformpageflags(pageflags)
# print("\n_pageflags",_pageflags)
print("\n_pageflags",_pageflags)
summarydata = []
docpageflags = documentpageflag().get_documentpageflag(message.ministryrequestid, redactionlayerid, ordereddocids)
print("\n docpageflags",docpageflags)
deletedpages = self.__getdeletedpages(message.ministryrequestid, ordereddocids)
skippages= []
pagecount = 0
for docid in ordereddocids:
if docid in documentids:
docdeletedpages = deletedpages[docid] if docid in deletedpages else []
docpageflag = docpageflags[docid]
for pageflag in _pageflags:
filteredpages = self.__get_pages_by_flagid(docpageflag["pageflag"], docdeletedpages, pagecount, pageflag["pageflagid"], message.category)
if len(filteredpages) > 0:
originalpagenos = [pg['originalpageno'] for pg in filteredpages]
docpagesections = documentpageflag().getsections_by_documentid_pageno(redactionlayerid, docid, originalpagenos)
docpageconsults = self.__get_consults_by_pageno(programareas, docpageflag["pageflag"], filteredpages)
pageflag['docpageflags'] = pageflag['docpageflags'] + self.__get_pagesection_mapping(filteredpages, docpagesections, docpageconsults)
skippages = self.__get_skippagenos(docpageflag['pageflag'], message.category)
pagecount = (pagecount+stitchedpagedata[docid]["pagecount"])-len(skippages)
# print("\n_pageflags1",_pageflags)
for pageflag in _pageflags:
_data = {}
if len(pageflag['docpageflags']) > 0:
try:
for docid in ordereddocids:
if docid in documentids:
docdeletedpages = deletedpages[docid] if docid in deletedpages else []
if docpageflags is not None and docid in docpageflags.keys():
docpageflag = docpageflags[docid]
for pageflag in _pageflags:
filteredpages = self.__get_pages_by_flagid(docpageflag["pageflag"], docdeletedpages, pagecount, pageflag["pageflagid"], message.category)
if len(filteredpages) > 0:
originalpagenos = [pg['originalpageno'] for pg in filteredpages]
docpagesections = documentpageflag().getsections_by_documentid_pageno(redactionlayerid, docid, originalpagenos)
docpageconsults = self.__get_consults_by_pageno(programareas, docpageflag["pageflag"], filteredpages)
pageflag['docpageflags'] = pageflag['docpageflags'] + self.__get_pagesection_mapping(filteredpages, docpagesections, docpageconsults)
skippages = self.__get_skippagenos(docpageflag['pageflag'], message.category)
if stitchedpagedata is not None:
pagecount = (pagecount+stitchedpagedata[docid]["pagecount"])-len(skippages)
print("\n_pageflags1",_pageflags)
for pageflag in _pageflags:
_data = {}
_data["flagname"] = pageflag["header"].upper()
_data["pagecount"] = len(pageflag['docpageflags'])
_data["sections"] = self.__format_redaction_summary(pageflag["description"], pageflag['docpageflags'], message.category)
summarydata.append(_data)
if len(pageflag['docpageflags']) > 0:
_data = {}
_data["flagname"] = pageflag["header"].upper()
_data["pagecount"] = len(pageflag['docpageflags'])
_data["sections"] = self.__format_redaction_summary(pageflag["description"], pageflag['docpageflags'], message.category)
summarydata.append(_data)
except (Exception) as err:
traceback.print_exc()
print('error occured in __packaggesummary redaction dts service: ', err)
return {"requestnumber": message.requestnumber, "data": summarydata}
except (Exception) as error:
traceback.print_exc()
print('error occured in redaction dts service: ', error)


Expand Down Expand Up @@ -117,7 +128,7 @@ def __packagesummaryforcfdrequests(self, message, documentids):
return {"requestnumber": message.requestnumber, "data": summarydata}

except Exception as error:
print('Error occurred in redaction dts service: ', error)
print('CFD Error occurred in redaction dts service: ', error)


def __calculate_range(self, mapped_flags, docids):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,35 +13,37 @@ class redactionsummaryservice():
def processmessage(self,incomingmessage):
summaryfilestozip = []
message = get_in_redactionsummary_msg(incomingmessage)
#Condition to handle consults packaages (no summary files to be created)
if message.category == "consultpackage":
return summaryfilestozip
print('\n 1. get_in_redactionsummary_msg is : {0}'.format(message))
try:
category = message.category
#Condition to handle consults packaages (no summary files to be created)
if category == "consultpackage":
return summaryfilestozip
pdfstitchjobactivity().recordjobstatus(message,3,"redactionsummarystarted")
summarymsg = message.summarydocuments
#Condition for handling oipcredline category
bcgovcode= message.bcgovcode
category = message.category
if bcgovcode == 'mcf' and category == 'responsepackage':
requesttype = message.requesttype
if bcgovcode == 'mcf' and requesttype == 'personal' and category == 'responsepackage':
documenttypename= 'CFD_responsepackage_redaction_summary'
else:
documenttypename= category+"_redaction_summary" if category == 'responsepackage' else "redline_redaction_summary"
#print('documenttypename', documenttypename)
print('\n 2. documenttypename', documenttypename)
upload_responses=[]
pageflags = self.__get_pageflags(category)
programareas = documentpageflag().get_all_programareas()
messageattributes= json.loads(message.attributes)
#print("\nmessageattributes:",messageattributes)
print("\n 3. messageattributes:",messageattributes)
divisiondocuments = get_in_summary_object(summarymsg).pkgdocuments
#print("\n divisiondocuments:",divisiondocuments)
print("\n 4. divisiondocuments:",divisiondocuments)
for entry in divisiondocuments:
#print("\n entry:",entry)
if 'documentids' in entry and len(entry['documentids']) > 0 :
# print("\n entry['divisionid']:",entry['divisionid'])
print("\n 5. entry['divisionid']:",entry['divisionid'])
divisionid = entry['divisionid']
documentids = entry['documentids']
formattedsummary = redactionsummary().prepareredactionsummary(message, documentids, pageflags, programareas)
#print("formattedsummary", formattedsummary)
print("\n 6. formattedsummary", formattedsummary)
template_path='templates/'+documenttypename+'.docx'
redaction_summary= documentgenerationservice().generate_pdf(formattedsummary, documenttypename,template_path)
divisioname = None
Expand All @@ -61,7 +63,7 @@ def processmessage(self,incomingmessage):
s3uricategoryfolder = category
s3uri = stitcheddocs3uri.split(s3uricategoryfolder+"/")[0] + s3uricategoryfolder+"/"
filename =self.__get_summaryfilename(message.requestnumber, category, divisioname, stitcheddocfilename)
# print("\n filename:",filename)
print("\n redaction_summary.content length: {0}".format(len(redaction_summary.content)))
uploadobj= uploadbytes(filename,redaction_summary.content,s3uri)
upload_responses.append(uploadobj)
if uploadobj["uploadresponse"].status_code == 200:
Expand All @@ -75,6 +77,7 @@ def processmessage(self,incomingmessage):
summaryfilestozip.append({"filename": uploadobj["filename"], "s3uripath":uploadobj["documentpath"]})
return summaryfilestozip
except (Exception) as error:
traceback.print_exc()
print('error occured in redaction summary service: ', error)
pdfstitchjobactivity().recordjobstatus(message,4,"redactionsummaryfailed",str(error),"summary generation failed")
return summaryfilestozip
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def sendtozipper(self, summaryfiles, message):
def preparemessageforzipperservice(self,summaryfiles, message):
try:
msgjson= json.loads(message)
msgjson.pop('requesttype', None)
if summaryfiles and len(summaryfiles) > 0:
filestozip_list = json.loads(msgjson['filestozip'])+summaryfiles
else:
Expand Down
28 changes: 22 additions & 6 deletions computingservices/ZippingServices/services/zipperservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,14 @@ def __zipfilesandupload(_message, s3credentials):
for fileobj in _jsonfiles:
filename = fileobj["filename"]
print("\nfilename:",filename)

_docbytes = __getdocumentbytearray(fileobj, s3credentials)
_formattedbytes = None
if(filename == "{0}.pdf".format(_message.requestnumber)):
try:
_formattedbytes = __removesensitivecontent(_docbytes)
except Exception:
print(traceback.format_exc())

try:
_formattedbytes = __removesensitivecontent(_docbytes)
except Exception:
print(traceback.format_exc())
zip.writestr(
filename, _docbytes if _formattedbytes is None else _formattedbytes
)
Expand All @@ -144,6 +144,22 @@ def __zipfilesandupload(_message, s3credentials):
finally:
zipped_bytes = None

def __removesensitivecontent(documentbytes):
# clear metadata
reader2 = PyPDF2.PdfReader(BytesIO(documentbytes))
# Check if metadata exists.
if reader2.metadata is not None:
# Create a new PDF file without metadata.
writer = PyPDF2.PdfWriter()
# Copy pages from the original PDF to the new PDF.
for page_num in range(len(reader2.pages)):
page = reader2.pages[page_num]
writer.add_page(page)
#writer.remove_links() # to remove comments.
buffer = BytesIO()
writer.write(buffer)
return buffer.getvalue()


def __getzipfilepath(foldername, filename):
return (
Expand Down
1 change: 1 addition & 0 deletions web/src/components/FOI/App.scss
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@

li.modal-message-list-item {
margin: 6px 0;
font-size: 15px;
}

#state-change-dialog-title .MuiIconButton-root {
Expand Down
Loading

0 comments on commit 8252581

Please sign in to comment.