diff --git a/.gitignore b/.gitignore index 3e5f06c34..f9395bcd2 100644 --- a/.gitignore +++ b/.gitignore @@ -100,3 +100,5 @@ bld/ MCS.FOI.S3FileConversion/MCS.FOI.S3FileConversion/QtBinariesWindows/ computingservices/ZippingServices/env/* openshift/templates/zippingservice/zipper.env +*.locenv + diff --git a/api/reviewer_api/resources/redaction.py b/api/reviewer_api/resources/redaction.py index 4b633b1f3..3552fb3d7 100644 --- a/api/reviewer_api/resources/redaction.py +++ b/api/reviewer_api/resources/redaction.py @@ -342,7 +342,7 @@ def post(): try: requestjson = request.get_json() print("\nrequestjson:",requestjson) - if(requestjson['bcgovcode'] == "mcf"): + if(requestjson['bcgovcode'] == "mcf" and requestjson['requesttype'] == "personal"): finalpackageschema = MCFFinalPackageSchema().load(requestjson) else: finalpackageschema = FinalPackageSchema().load(requestjson) diff --git a/api/reviewer_api/schemas/finalpackage.py b/api/reviewer_api/schemas/finalpackage.py index 94f5f0b4b..0cf5ee155 100644 --- a/api/reviewer_api/schemas/finalpackage.py +++ b/api/reviewer_api/schemas/finalpackage.py @@ -30,6 +30,7 @@ class FinalPackageSchema(Schema): ) summarydocuments = fields.Nested(SummarySchema, allow_none=True) redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False) + requesttype = fields.Str(data_key="requesttype", allow_none=False) class SummaryRecordSchema(Schema): recordname = fields.Str(data_key="recordname", allow_none=True) @@ -53,4 +54,5 @@ class MCFFinalPackageSchema(Schema): AttributeSchema, many=True, required=True, allow_none=False ) summarydocuments = fields.Nested(MCFSummarySchema, allow_none=True) - redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False) \ No newline at end of file + redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False) + requesttype = fields.Str(data_key="requesttype", allow_none=False) \ No newline at end of file diff --git a/api/reviewer_api/schemas/redline.py b/api/reviewer_api/schemas/redline.py index 27db2aa60..bdd484b3d 100644 --- a/api/reviewer_api/schemas/redline.py +++ b/api/reviewer_api/schemas/redline.py @@ -29,4 +29,5 @@ class RedlineSchema(Schema): AttributeSchema, many=True, required=True, allow_none=False ) summarydocuments = fields.Nested(SummarySchema, allow_none=True) - redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False) \ No newline at end of file + redactionlayerid = fields.Int(data_key="redactionlayerid", allow_none=False) + requesttype = fields.Str(data_key="requesttype", allow_none=False) \ No newline at end of file diff --git a/api/reviewer_api/services/radactionservice.py b/api/reviewer_api/services/radactionservice.py index f12b53969..cd63eab93 100644 --- a/api/reviewer_api/services/radactionservice.py +++ b/api/reviewer_api/services/radactionservice.py @@ -145,7 +145,8 @@ def __preparemessageforsummaryservice(self, messageschema, userinfo, job): "finaloutput": to_json(""), "attributes": to_json(messageschema["attributes"]), "summarydocuments": json.dumps(messageschema["summarydocuments"]), - "redactionlayerid": json.dumps(messageschema["redactionlayerid"]) + "redactionlayerid": json.dumps(messageschema["redactionlayerid"]), + "requesttype": messageschema["requesttype"], } return _message diff --git a/computingservices/DedupeServices/requirements.txt b/computingservices/DedupeServices/requirements.txt index 8128a986f..e51e508cd 100644 Binary files a/computingservices/DedupeServices/requirements.txt and b/computingservices/DedupeServices/requirements.txt differ diff --git a/computingservices/DedupeServices/services/s3documentservice.py b/computingservices/DedupeServices/services/s3documentservice.py index ff99843c9..485f2ffa3 100644 --- a/computingservices/DedupeServices/services/s3documentservice.py +++ b/computingservices/DedupeServices/services/s3documentservice.py @@ -12,8 +12,11 @@ from html import escape import hashlib import uuid +import boto3 +from botocore.config import Config from re import sub import fitz +import PyPDF2 from utils import ( gets3credentialsobject, getdedupeproducermessage, @@ -24,6 +27,22 @@ request_management_api, file_conversion_types ) +from reportlab.lib.pagesizes import letter +from reportlab.pdfgen import canvas +from reportlab.pdfbase.ttfonts import TTFont +from reportlab.pdfbase import pdfmetrics +from reportlab.lib.pagesizes import letter +import os + +# Get the directory of the current Python file (inside the 'service' folder) +service_folder_path = os.path.dirname(os.path.abspath(__file__)) +# Navigate to the parent directory (common folder) +common_folder_path = os.path.dirname(service_folder_path) +# Construct the path to the 'utils' folder & get the path to the 'BCSans-Bold.ttf' font file inside the 'utils' folder +utils_folder_path = os.path.join(common_folder_path, "utils") +font_path = os.path.join(utils_folder_path, "fonts", "BCSans-Regular_2f.ttf") +pdfmetrics.registerFont(TTFont('BC-Sans', font_path)) + def __getcredentialsbybcgovcode(bcgovcode): _conn = getdbconnection() @@ -93,6 +112,190 @@ def _generate_file_attachments(producermessage, reader, auth): # file_attachments.append(attachment) return file_attachments +# New function to split comments into pages +def split_comments_to_pages(comments,font,font_size, canvas, lines_per_page=50): + pages = [] + current_page = [] + for comment in comments: + print("\n Each comment:",comment) + if 'text' in comment: + comment_text = f"{comment['text']}" + #comment_text = f"Page {comment['page']}: {comment['text']}\n" + # Wrap the text to fit within the page width + wrapped_lines = wrap_text(comment_text, width=500, font=font, font_size=font_size, canvas=canvas) + for line in wrapped_lines: + current_page.append(line) + if len(current_page) >= lines_per_page: + pages.append(current_page) + current_page = [] + if current_page: # Add any remaining comments to the last page + pages.append(current_page) + print("pages-split_comments_to_pages:",pages) + return pages + + +def wrap_text(text, width, font, font_size, canvas): + """ + This function wraps text into lines based on the available width. + """ + wrapped_lines = [] + line = "" + text_width = canvas.stringWidth(text, font, font_size) + if text_width <= width: + line = text + else: + words = text.split(" ") + for word in words: + # Check the width of the line with the current word + line_width = canvas.stringWidth(line + word + " ", font, font_size) + if line_width <= width: + line += word + " " + else: + #If the word doesn't fit, append the current line and start a new one + if line: + print("line::",line) + wrapped_lines.append(line) # Append current line + line = "" # Reset line + # Handle long words that need to be broken up + while canvas.stringWidth(word, font, font_size) > width: + # Find the largest part of the word that fits within the width + for i in range(1, len(word) + 1): + part = word[:i] + if canvas.stringWidth(part, font, font_size) > width: + # Append the part that fits and continue with the remaining part + wrapped_lines.append(word[:i - 1]) # Add the part that fits + word = word[i - 1:] # Remaining part of the word + break + # Add the remaining part of the word to the line + line = word + " " + # Append the last line + if line: + wrapped_lines.append(line) + return wrapped_lines + +def _clearmetadata(response, pagecount, reader, s3_access_key_id,s3_secret_access_key,filepath,auth): + # clear metadata + reader2 = PyPDF2.PdfReader(BytesIO(response.content)) + # Check if metadata exists. + if reader2.metadata is not None: + # Create a new PDF file without metadata. + writer = PyPDF2.PdfWriter() + # Copy pages from the original PDF to the new PDF. + for page_num in range(len(reader.pages)): + page = reader2.pages[page_num] + try: + #Function to get all comments type annotations & copy it to a new page + pagecount, writer= createpagesforcomments(page, page_num, writer, reader2, pagecount) + except Exception as e: + print(f"Error in creating new page with comment annotations: {e}") + buffer = BytesIO() + writer.write(buffer) + try: + # Now, flatten the PDF content using the __flattenfitz function + flattened_buffer = __flattenfitz(buffer.getvalue()) + except Exception as e: + print(f"Error in flatenning pdf: {e}") + + client = boto3.client('s3',config=Config(signature_version='s3v4'), + endpoint_url='https://{0}/'.format(dedupe_s3_host), + aws_access_key_id= s3_access_key_id, + aws_secret_access_key= s3_secret_access_key, + region_name= dedupe_s3_region + ) + copyresponse = client.copy_object( + CopySource="/" + "/".join(filepath.split("/")[3:]), # /Bucket-name/path/filename + Bucket=filepath.split("/")[3], # Destination bucket + Key= "/".join(filepath.split("/")[4:])[:-4] + 'ORIGINAL' + '.pdf' # Destination path/filename + ) + uploadresponse = requests.put( + filepath, + data= flattened_buffer.getvalue(), #buffer.getvalue(), + auth=auth + ) + uploadresponse.raise_for_status() + return pagecount + +def __flattenfitz(docbytesarr): + doc = fitz.open(stream=BytesIO(docbytesarr)) + out = fitz.open() # output PDF + for page in doc: + w, h = page.rect.br # page width / height taken from bottom right point coords + outpage = out.new_page(width=w, height=h) # out page has same dimensions + pix = page.get_pixmap(dpi=150) # set desired resolution + outpage.insert_image(page.rect, pixmap=pix) + # Saving the flattened PDF to a buffer + buffer = BytesIO() + out.save(buffer, garbage=3, deflate=True) + buffer.seek(0) # Reset the buffer to the beginning + return buffer + +def __rendercommentsonnewpage(comments,pagecount,writer,parameters): + try: + comments_pdf = BytesIO() + c = canvas.Canvas(comments_pdf, pagesize=letter) + font = parameters.get("font") + font_size = parameters.get("fontsize") + width = parameters.get("width") + height = parameters.get("height") + currentpagesize = (width, height) + comment_pages = split_comments_to_pages(comments, font, font_size, c, lines_per_page=50) + for comment_page in comment_pages: + text = c.beginText(40, 750) + text.setFont(font, font_size) + for line in comment_page: + text.textLine(line) + c.setPageSize(currentpagesize) + c.drawText(text) + c.showPage() + pagecount += 1 + c.save() + comments_pdf.seek(0) + comments_pdf_reader = PyPDF2.PdfReader(comments_pdf) + writer.add_page(comments_pdf_reader.pages[0]) # Add comments as a new page + return pagecount,writer + except Exception as e: + print(f"Error in rendering comments on new page in pdf: {e}") + +def createpagesforcomments(page, page_num, writer, reader2, pagecount): + # Check if the page contains annotations + if "/Annots" in page: + comments = [] + annotations = page["/Annots"] + # Create a new PDF overlay with reportlab to draw annotation content + annotation_overlay = BytesIO() + c = canvas.Canvas(annotation_overlay, pagesize=letter) + pagenum=page_num + 1 + for annot in annotations: + annotation_obj = annot.get_object() + subtype = annotation_obj["/Subtype"] + #print("\nAnnotation Object:", annotation_obj) + #Flatten comments - collect all the annots + if subtype == "/Text" and "/Contents" in annotation_obj: + comment = annotation_obj["/Contents"] + comments.append({ + 'page': pagenum,#page_num + 1, + 'text': comment + }) + # Finalize annotation overlay for the page + c.save() + annotation_overlay.seek(0) + # Merge the overlay (annotations rendered as static) onto the original PDF page + overlay_pdf = PyPDF2.PdfReader(annotation_overlay) + if len(overlay_pdf.pages) > 0: + overlay_page = overlay_pdf.pages[0] + page.merge_page(overlay_page) + writer.add_page(page) + if comments: + try: + parameters = get_page_properties(reader2, page_num) + # If there are comments, create an additional page for them + pagecount,writer=__rendercommentsonnewpage(comments,pagecount,writer,parameters) + except Exception as e: + print(f"Error in rendering comments on new page in pdf: {e}") + else: + writer.add_page(page) + return pagecount, writer + def gets3documenthashcode(producermessage): s3credentials = __getcredentialsbybcgovcode(producermessage.bcgovcode) s3_access_key_id = s3credentials.s3accesskey @@ -173,9 +376,14 @@ def gets3documenthashcode(producermessage): "Content-Type": "application/json", } ) - saveresponse.raise_for_status() + saveresponse.raise_for_status() fitz_reader.close() - + # clear metadata + try: + pagecount= _clearmetadata(response, pagecount, reader, s3_access_key_id,s3_secret_access_key,filepath,auth) + except Exception as e: + print(f"Exception while clearing metadata/flattening: {e}") + elif extension.lower() in file_conversion_types: # "Extension different {0}, so need to download pdf here for pagecount!!".format(extension)) pdfresponseofconverted = requests.get( @@ -193,3 +401,22 @@ def gets3documenthashcode(producermessage): sig.update(line) return (sig.hexdigest(), pagecount) + + +def get_page_properties(original_pdf, pagenum, font="BC-Sans") -> dict: + """Getting parameters of previous page for new page""" + width = original_pdf.pages[pagenum].mediabox.width + height = original_pdf.pages[pagenum].mediabox.height + if height < 450: + fontsize=10 + else: + fontsize=12 + return { + "width": width, + "height": height, + "fontsize": fontsize, + "font": font, + "numberofpages": len(original_pdf.pages), + } + + diff --git a/computingservices/DedupeServices/utils/foidedupeconfig.py b/computingservices/DedupeServices/utils/foidedupeconfig.py index 6e648af28..a51710bc2 100644 --- a/computingservices/DedupeServices/utils/foidedupeconfig.py +++ b/computingservices/DedupeServices/utils/foidedupeconfig.py @@ -18,7 +18,6 @@ dedupe_db_user = os.getenv("DEDUPE_DB_USER") dedupe_db_password = os.getenv("DEDUPE_DB_PASSWORD") -dedupe_s3_host = os.getenv("DEDUPE_S3_HOST") dedupe_s3_host = os.getenv("DEDUPE_S3_HOST") dedupe_s3_region = os.getenv("DEDUPE_S3_REGION") dedupe_s3_service = os.getenv("DEDUPE_S3_SERVICE") diff --git a/computingservices/DedupeServices/utils/fonts/BCSans-Regular_2f.ttf b/computingservices/DedupeServices/utils/fonts/BCSans-Regular_2f.ttf new file mode 100644 index 000000000..3c0a838de Binary files /dev/null and b/computingservices/DedupeServices/utils/fonts/BCSans-Regular_2f.ttf differ diff --git a/computingservices/DocumentServices/rstreamio/message/schemas/redactionsummary.py b/computingservices/DocumentServices/rstreamio/message/schemas/redactionsummary.py index 979e4d18f..c981780ec 100644 --- a/computingservices/DocumentServices/rstreamio/message/schemas/redactionsummary.py +++ b/computingservices/DocumentServices/rstreamio/message/schemas/redactionsummary.py @@ -30,7 +30,7 @@ def __init__(self, sorteddocuments, pkgdocuments) -> None: class RedactionSummaryMessage(object): def __init__(self, jobid, requestid, ministryrequestid, category, requestnumber, - bcgovcode, createdby, filestozip, finaloutput, attributes, summarydocuments ,redactionlayerid) -> None: + bcgovcode, createdby, filestozip, finaloutput, attributes, summarydocuments ,redactionlayerid,requesttype) -> None: self.jobid = jobid self.requestid = requestid self.ministryrequestid = ministryrequestid @@ -43,6 +43,7 @@ def __init__(self, jobid, requestid, ministryrequestid, category, requestnumber, self.attributes = attributes self.summarydocuments = summarydocuments self.redactionlayerid = redactionlayerid + self.requesttype = requesttype def get_in_redactionsummary_msg(producer_json): diff --git a/computingservices/DocumentServices/services/dts/redactionsummary.py b/computingservices/DocumentServices/services/dts/redactionsummary.py index a70208a3f..89fded2ef 100644 --- a/computingservices/DocumentServices/services/dts/redactionsummary.py +++ b/computingservices/DocumentServices/services/dts/redactionsummary.py @@ -2,15 +2,17 @@ from rstreamio.message.schemas.redactionsummary import get_in_summary_object,get_in_summarypackage_object import json from collections import defaultdict +import traceback class redactionsummary(): def prepareredactionsummary(self, message, documentids, pageflags, programareas): - if message.bcgovcode == 'mcf' and message.category == "responsepackage": + _ismcfpersonalrequest = True if message.bcgovcode == 'mcf' and message.requesttype == 'personal' else False + if _ismcfpersonalrequest and message.category == "responsepackage": redactionsummary = self.__packagesummaryforcfdrequests(message, documentids) else: redactionsummary = self.__packaggesummary(message, documentids, pageflags, programareas) - if message.category == "responsepackage" and message.bcgovcode != 'mcf': + if message.category == "responsepackage" and _ismcfpersonalrequest == False: consolidated_redactions = [] for entry in redactionsummary['data']: consolidated_redactions += entry['sections'] @@ -25,48 +27,57 @@ def __getrangenumber(self, rangeval): def __packaggesummary(self, message, documentids, pageflags, programareas): try: - # print("\nInside __packaggesummary") + print("\nInside __packaggesummary") redactionlayerid = self.__getredactionlayerid(message) summarymsg = message.summarydocuments summaryobject = get_in_summary_object(summarymsg) ordereddocids = summaryobject.sorteddocuments stitchedpagedata = documentpageflag().getpagecount_by_documentid(message.ministryrequestid, ordereddocids) totalpagecount = self.__calculate_totalpages(stitchedpagedata) - # print("\ntotalpagecount",totalpagecount) + print("\n __packaggesummary stitchedpagedata",stitchedpagedata) + print("\n __packaggesummary totalpagecount",totalpagecount) if totalpagecount <=0: return _pageflags = self.__transformpageflags(pageflags) - # print("\n_pageflags",_pageflags) + print("\n_pageflags",_pageflags) summarydata = [] docpageflags = documentpageflag().get_documentpageflag(message.ministryrequestid, redactionlayerid, ordereddocids) + print("\n docpageflags",docpageflags) deletedpages = self.__getdeletedpages(message.ministryrequestid, ordereddocids) skippages= [] pagecount = 0 - for docid in ordereddocids: - if docid in documentids: - docdeletedpages = deletedpages[docid] if docid in deletedpages else [] - docpageflag = docpageflags[docid] - for pageflag in _pageflags: - filteredpages = self.__get_pages_by_flagid(docpageflag["pageflag"], docdeletedpages, pagecount, pageflag["pageflagid"], message.category) - if len(filteredpages) > 0: - originalpagenos = [pg['originalpageno'] for pg in filteredpages] - docpagesections = documentpageflag().getsections_by_documentid_pageno(redactionlayerid, docid, originalpagenos) - docpageconsults = self.__get_consults_by_pageno(programareas, docpageflag["pageflag"], filteredpages) - pageflag['docpageflags'] = pageflag['docpageflags'] + self.__get_pagesection_mapping(filteredpages, docpagesections, docpageconsults) - skippages = self.__get_skippagenos(docpageflag['pageflag'], message.category) - pagecount = (pagecount+stitchedpagedata[docid]["pagecount"])-len(skippages) - # print("\n_pageflags1",_pageflags) - for pageflag in _pageflags: - _data = {} - if len(pageflag['docpageflags']) > 0: + try: + for docid in ordereddocids: + if docid in documentids: + docdeletedpages = deletedpages[docid] if docid in deletedpages else [] + if docpageflags is not None and docid in docpageflags.keys(): + docpageflag = docpageflags[docid] + for pageflag in _pageflags: + filteredpages = self.__get_pages_by_flagid(docpageflag["pageflag"], docdeletedpages, pagecount, pageflag["pageflagid"], message.category) + if len(filteredpages) > 0: + originalpagenos = [pg['originalpageno'] for pg in filteredpages] + docpagesections = documentpageflag().getsections_by_documentid_pageno(redactionlayerid, docid, originalpagenos) + docpageconsults = self.__get_consults_by_pageno(programareas, docpageflag["pageflag"], filteredpages) + pageflag['docpageflags'] = pageflag['docpageflags'] + self.__get_pagesection_mapping(filteredpages, docpagesections, docpageconsults) + skippages = self.__get_skippagenos(docpageflag['pageflag'], message.category) + if stitchedpagedata is not None: + pagecount = (pagecount+stitchedpagedata[docid]["pagecount"])-len(skippages) + print("\n_pageflags1",_pageflags) + for pageflag in _pageflags: _data = {} - _data["flagname"] = pageflag["header"].upper() - _data["pagecount"] = len(pageflag['docpageflags']) - _data["sections"] = self.__format_redaction_summary(pageflag["description"], pageflag['docpageflags'], message.category) - summarydata.append(_data) + if len(pageflag['docpageflags']) > 0: + _data = {} + _data["flagname"] = pageflag["header"].upper() + _data["pagecount"] = len(pageflag['docpageflags']) + _data["sections"] = self.__format_redaction_summary(pageflag["description"], pageflag['docpageflags'], message.category) + summarydata.append(_data) + except (Exception) as err: + traceback.print_exc() + print('error occured in __packaggesummary redaction dts service: ', err) return {"requestnumber": message.requestnumber, "data": summarydata} except (Exception) as error: + traceback.print_exc() print('error occured in redaction dts service: ', error) @@ -117,7 +128,7 @@ def __packagesummaryforcfdrequests(self, message, documentids): return {"requestnumber": message.requestnumber, "data": summarydata} except Exception as error: - print('Error occurred in redaction dts service: ', error) + print('CFD Error occurred in redaction dts service: ', error) def __calculate_range(self, mapped_flags, docids): diff --git a/computingservices/DocumentServices/services/redactionsummaryservice.py b/computingservices/DocumentServices/services/redactionsummaryservice.py index 9facce3e0..7593ab9da 100644 --- a/computingservices/DocumentServices/services/redactionsummaryservice.py +++ b/computingservices/DocumentServices/services/redactionsummaryservice.py @@ -13,32 +13,34 @@ class redactionsummaryservice(): def processmessage(self,incomingmessage): summaryfilestozip = [] message = get_in_redactionsummary_msg(incomingmessage) + print('\n 1. get_in_redactionsummary_msg is : {0}'.format(message)) try: pdfstitchjobactivity().recordjobstatus(message,3,"redactionsummarystarted") summarymsg = message.summarydocuments #Condition for handling oipcredline category bcgovcode= message.bcgovcode category = message.category - if bcgovcode == 'mcf' and category == 'responsepackage': + requesttype = message.requesttype + if bcgovcode == 'mcf' and requesttype == 'personal' and category == 'responsepackage': documenttypename= 'CFD_responsepackage_redaction_summary' else: documenttypename= category+"_redaction_summary" if category == 'responsepackage' else "redline_redaction_summary" - #print('documenttypename', documenttypename) + print('\n 2. documenttypename', documenttypename) upload_responses=[] pageflags = self.__get_pageflags(category) programareas = documentpageflag().get_all_programareas() messageattributes= json.loads(message.attributes) - #print("\nmessageattributes:",messageattributes) + print("\n 3. messageattributes:",messageattributes) divisiondocuments = get_in_summary_object(summarymsg).pkgdocuments - #print("\n divisiondocuments:",divisiondocuments) + print("\n 4. divisiondocuments:",divisiondocuments) for entry in divisiondocuments: #print("\n entry:",entry) if 'documentids' in entry and len(entry['documentids']) > 0 : - # print("\n entry['divisionid']:",entry['divisionid']) + print("\n 5. entry['divisionid']:",entry['divisionid']) divisionid = entry['divisionid'] documentids = entry['documentids'] formattedsummary = redactionsummary().prepareredactionsummary(message, documentids, pageflags, programareas) - #print("formattedsummary", formattedsummary) + print("\n 6. formattedsummary", formattedsummary) template_path='templates/'+documenttypename+'.docx' redaction_summary= documentgenerationservice().generate_pdf(formattedsummary, documenttypename,template_path) divisioname = None @@ -58,7 +60,7 @@ def processmessage(self,incomingmessage): s3uricategoryfolder = category s3uri = stitcheddocs3uri.split(s3uricategoryfolder+"/")[0] + s3uricategoryfolder+"/" filename =self.__get_summaryfilename(message.requestnumber, category, divisioname, stitcheddocfilename) - # print("\n filename:",filename) + print("\n redaction_summary.content length: {0}".format(len(redaction_summary.content))) uploadobj= uploadbytes(filename,redaction_summary.content,s3uri) upload_responses.append(uploadobj) if uploadobj["uploadresponse"].status_code == 200: @@ -72,6 +74,7 @@ def processmessage(self,incomingmessage): summaryfilestozip.append({"filename": uploadobj["filename"], "s3uripath":uploadobj["documentpath"]}) return summaryfilestozip except (Exception) as error: + traceback.print_exc() print('error occured in redaction summary service: ', error) pdfstitchjobactivity().recordjobstatus(message,4,"redactionsummaryfailed",str(error),"summary generation failed") return summaryfilestozip diff --git a/computingservices/DocumentServices/services/zippingservice.py b/computingservices/DocumentServices/services/zippingservice.py index c997af363..49def91ae 100644 --- a/computingservices/DocumentServices/services/zippingservice.py +++ b/computingservices/DocumentServices/services/zippingservice.py @@ -11,6 +11,7 @@ def sendtozipper(self, summaryfiles, message): def preparemessageforzipperservice(self,summaryfiles, message): try: msgjson= json.loads(message) + msgjson.pop('requesttype', None) if summaryfiles and len(summaryfiles) > 0: filestozip_list = json.loads(msgjson['filestozip'])+summaryfiles else: diff --git a/computingservices/DocumentServices/templates/CFD_responsepackage_redaction_summary.docx b/computingservices/DocumentServices/templates/CFD_responsepackage_redaction_summary.docx index a4f656097..fa9a6034a 100644 Binary files a/computingservices/DocumentServices/templates/CFD_responsepackage_redaction_summary.docx and b/computingservices/DocumentServices/templates/CFD_responsepackage_redaction_summary.docx differ diff --git a/computingservices/ZippingServices/services/zipperservice.py b/computingservices/ZippingServices/services/zipperservice.py index 85bf9a5e6..a1eb283ea 100644 --- a/computingservices/ZippingServices/services/zipperservice.py +++ b/computingservices/ZippingServices/services/zipperservice.py @@ -16,6 +16,7 @@ import traceback import PyPDF2 + def processmessage(message): try: s3credentials = getcredentialsbybcgovcode(message.bcgovcode) @@ -144,6 +145,22 @@ def __zipfilesandupload(_message, s3credentials): finally: zipped_bytes = None +def __removesensitivecontent(documentbytes): + # clear metadata + reader2 = PyPDF2.PdfReader(BytesIO(documentbytes)) + # Check if metadata exists. + if reader2.metadata is not None: + # Create a new PDF file without metadata. + writer = PyPDF2.PdfWriter() + # Copy pages from the original PDF to the new PDF. + for page_num in range(len(reader2.pages)): + page = reader2.pages[page_num] + writer.add_page(page) + #writer.remove_links() # to remove comments. + buffer = BytesIO() + writer.write(buffer) + return buffer.getvalue() + def __getzipfilepath(foldername, filename): return ( diff --git a/docker-compose.yml b/docker-compose.yml index 4cb79291a..d3c57fd9a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,7 @@ services: - REACT_APP_PAGE_SELECT_LIMIT=${PAGE_SELECT_LIMIT} - REACT_APP_REDACTION_SELECT_LIMIT=${REDACTION_SELECT_LIMIT} - REACT_APP_BIG_HTTP_GET_TIMEOUT=${BIG_HTTP_GET_TIMEOUT} + - REACT_APP_SESSION_SECURITY_KEY=${REACT_APP_SESSION_SECURITY_KEY} volumes: - ".:/web" - "/web/node_modules" diff --git a/web/Dockerfile b/web/Dockerfile index fe5156b67..67fad4772 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -14,6 +14,7 @@ ARG FOI_DOCREVIEWER_BASE_API_URL ARG REACT_APP_ANNOTATION_PAGE_SIZE ARG REACT_APP_PAGE_SELECT_LIMIT ARG REACT_APP_REDACTION_SELECT_LIMIT +ARG REACT_APP_SESSION_SECURITY_KEY ENV NODE_ENV ${NODE_ENV} @@ -25,6 +26,7 @@ ENV FOI_DOCREVIEWER_BASE_API_URL ${FOI_DOCREVIEWER_BASE_API_URL} ENV REACT_APP_ANNOTATION_PAGE_SIZE ${REACT_APP_ANNOTATION_PAGE_SIZE} ENV REACT_APP_PAGE_SELECT_LIMIT ${REACT_APP_PAGE_SELECT_LIMIT} ENV REACT_APP_REDACTION_SELECT_LIMIT ${REACT_APP_REDACTION_SELECT_LIMIT} +ENV REACT_APP_SESSION_SECURITY_KEY ${REACT_APP_SESSION_SECURITY_KEY} # add `/app/node_modules/.bin` to $PATH ENV PATH /web/node_modules/.bin:$PATH diff --git a/web/Dockerfile.local b/web/Dockerfile.local index 6d136c50e..728ec0f76 100644 --- a/web/Dockerfile.local +++ b/web/Dockerfile.local @@ -16,6 +16,7 @@ ARG REACT_APP_ANNOTATION_PAGE_SIZE ARG REACT_APP_PAGE_SELECT_LIMIT ARG REACT_APP_REDACTION_SELECT_LIMIT ARG REACT_APP_BIG_HTTP_GET_TIMEOUT +ARG REACT_APP_SESSION_SECURITY_KEY ENV NODE_ENV ${NODE_ENV} ENV GENERATE_SOURCEMAP ${GENERATE_SOURCEMAP} @@ -27,6 +28,7 @@ ENV REACT_APP_ANNOTATION_PAGE_SIZE ${REACT_APP_ANNOTATION_PAGE_SIZE} ENV REACT_APP_PAGE_SELECT_LIMIT ${REACT_APP_PAGE_SELECT_LIMIT} ENV REACT_APP_REDACTION_SELECT_LIMIT ${REACT_APP_REDACTION_SELECT_LIMIT} ENV BIG_HTTP_GET_TIMEOUT ${REACT_APP_BIG_HTTP_GET_TIMEOUT} +ENV REACT_APP_SESSION_SECURITY_KEY ${REACT_APP_SESSION_SECURITY_KEY} # add `/app/node_modules/.bin` to $PATH ENV PATH /web/node_modules/.bin:$PATH diff --git a/web/src/components/FOI/Home/ContextMenu.tsx b/web/src/components/FOI/Home/ContextMenu.tsx index c69fcf869..35f923fcb 100644 --- a/web/src/components/FOI/Home/ContextMenu.tsx +++ b/web/src/components/FOI/Home/ContextMenu.tsx @@ -266,8 +266,9 @@ const ContextMenu = ({ >