diff --git a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs index cf69e7e07..51c0d1188 100644 --- a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs +++ b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs @@ -1,111 +1,146 @@ - -using Serilog; -using Syncfusion.DocIO.DLS; -using Syncfusion.DocIORenderer; -using Syncfusion.Pdf; - - - -namespace MCS.FOI.DocToPDF -{ - public class DocFileProcessor : IDocFileProcessor, IDisposable - { - - - public DocFileProcessor() { } - - public DocFileProcessor(Stream SourceStream) - { - this.SourceStream = SourceStream; - } - - public Stream SourceStream { get; set; } - - public int FailureAttemptCount { get; set; } - - public int WaitTimeinMilliSeconds { get; set; } - - public bool IsSinglePDFOutput { get; set; } - - - private MemoryStream? output = null; - public (bool, Stream) ConvertToPDF() - { - bool converted = false; - string message = string.Empty; - bool _isSinglePDFOutput = IsSinglePDFOutput; - output = new MemoryStream(); - try - { - for (int attempt = 1; attempt <= FailureAttemptCount && !converted; attempt++) - { - try - { - using (WordDocument wordDocument = new WordDocument(SourceStream, Syncfusion.DocIO.FormatType.Automatic)) - { - - wordDocument.RevisionOptions.CommentDisplayMode = CommentDisplayMode.ShowInBalloons; - wordDocument.RevisionOptions.CommentColor = RevisionColor.Blue; - wordDocument.RevisionOptions.ShowMarkup = RevisionType.Deletions | RevisionType.Insertions; - - using (DocIORenderer renderer = new DocIORenderer()) - { - using PdfDocument pdfDocument = renderer.ConvertToPDF(wordDocument); - //Save the PDF file - //Close the instance of document objects - pdfDocument.Save(output); - pdfDocument.Close(true); - converted = true; - - } - - } - } - catch (Exception e) - { - string errorMessage = $"Exception occured while coverting a document file, exception : {e.Message}"; - message = $"Exception happened while accessing File, re-attempting count : {attempt} , Error Message : {e.Message} , Stack trace : {e.StackTrace}"; - Log.Error(message); - Console.WriteLine(message); - if (attempt == FailureAttemptCount) - { - throw new Exception(errorMessage); - } - Thread.Sleep(WaitTimeinMilliSeconds); - } - } - } - catch (Exception ex) - { - converted = false; - string error = $"Exception occured while coverting Doc file, exception : {ex.Message} , stacktrace : {ex.StackTrace}"; - Log.Error(error); - Console.WriteLine(error); - throw; - } - return (converted, output); - } - - public void Dispose() - { - Dispose(true); - GC.SuppressFinalize(this); - } - - protected virtual void Dispose(bool disposing) - { - if (disposing) - { - if (this.SourceStream != null) - { - this.SourceStream.Close(); - this.SourceStream.Dispose(); - } - - if (output != null) output.Dispose(); - // free managed resources - } - - } - } -} \ No newline at end of file + +using Serilog; +using Syncfusion.DocIO.DLS; +using Syncfusion.DocIO; +using Syncfusion.DocIORenderer; +using Syncfusion.Pdf; +using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Wordprocessing; + + + +namespace MCS.FOI.DocToPDF +{ + public class DocFileProcessor : IDocFileProcessor, IDisposable + { + + + public DocFileProcessor() { } + + public DocFileProcessor(Stream SourceStream) + { + this.SourceStream = SourceStream; + } + + public Stream SourceStream { get; set; } + + public int FailureAttemptCount { get; set; } + + public int WaitTimeinMilliSeconds { get; set; } + + public bool IsSinglePDFOutput { get; set; } + + + private MemoryStream? output = null; + public (bool, Stream) ConvertToPDF() + { + bool converted = false; + string message = string.Empty; + bool _isSinglePDFOutput = IsSinglePDFOutput; + output = new MemoryStream(); + try + { + for (int attempt = 1; attempt <= FailureAttemptCount && !converted; attempt++) + { + try + { + using (WordDocument wordDocument = new WordDocument(SourceStream, Syncfusion.DocIO.FormatType.Automatic)) + { + SourceStream.Position = 0; + + using (var docXML = WordprocessingDocument.Open(SourceStream, false)) + { + + DocumentFormat.OpenXml.Wordprocessing.Body body = docXML.MainDocumentPart.Document.Body; + List originalDates = new List(); + foreach (var textItem in body.Descendants().Where(textItem => textItem.Text.Contains("DATE"))) + { + var datetext = textItem.Parent.NextSibling().NextSibling(); + originalDates.Add(datetext.InnerText); + } + + List datefields = wordDocument.FindAllItemsByProperty(EntityType.Field, "FieldType", FieldType.FieldDate.ToString()); + if (datefields != null) + { + foreach (var (datefield, i) in datefields.Select((datefield, i) => (datefield, i))) + { + var dateField = datefield as WField; + //Takes the owner paragraph. + WParagraph ownerPara = dateField.OwnerParagraph; + int dateFieldIndex = ownerPara.ChildEntities.IndexOf(dateField); + //Removes the date field. + ownerPara.ChildEntities.Remove(dateField); + //Creating a new text range with required date. + WTextRange textRange = new WTextRange(ownerPara.Document); + textRange.Text = originalDates[i];//"February 12, 2023"; + //Inserting the date field with the created text range. + ownerPara.ChildEntities.Insert(dateFieldIndex, textRange); + } + } + } + + wordDocument.RevisionOptions.CommentDisplayMode = CommentDisplayMode.ShowInBalloons; + wordDocument.RevisionOptions.CommentColor = RevisionColor.Blue; + wordDocument.RevisionOptions.ShowMarkup = RevisionType.Deletions | RevisionType.Insertions; + + using (DocIORenderer renderer = new DocIORenderer()) + { + using PdfDocument pdfDocument = renderer.ConvertToPDF(wordDocument); + //Save the PDF file + //Close the instance of document objects + pdfDocument.Save(output); + pdfDocument.Close(true); + converted = true; + + } + + } + } + catch (Exception e) + { + string errorMessage = $"Exception occured while coverting a document file, exception : {e.Message}"; + message = $"Exception happened while accessing File, re-attempting count : {attempt} , Error Message : {e.Message} , Stack trace : {e.StackTrace}"; + Log.Error(message); + Console.WriteLine(message); + if (attempt == FailureAttemptCount) + { + throw new Exception(errorMessage); + } + Thread.Sleep(WaitTimeinMilliSeconds); + } + } + } + catch (Exception ex) + { + converted = false; + string error = $"Exception occured while coverting Doc file, exception : {ex.Message} , stacktrace : {ex.StackTrace}"; + Log.Error(error); + Console.WriteLine(error); + throw; + } + return (converted, output); + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + if (this.SourceStream != null) + { + this.SourceStream.Close(); + this.SourceStream.Dispose(); + } + + if (output != null) output.Dispose(); + // free managed resources + } + + } + } +} diff --git a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj index 2c4b45afd..676023de9 100644 --- a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj +++ b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj @@ -17,6 +17,7 @@ + diff --git a/api/reviewer_api/models/AnnotationSections.py b/api/reviewer_api/models/AnnotationSections.py index f97588a82..d96fe30ac 100644 --- a/api/reviewer_api/models/AnnotationSections.py +++ b/api/reviewer_api/models/AnnotationSections.py @@ -229,13 +229,9 @@ def __bulknewsections(cls, annots, _pkvannots, redactionlayerid, _foiministryreq "section": annot["sectionsschema"], "createdby": userinfo, "isactive": True, - "version": pkkey["version"] + 1, - "redactionlayerid": redactionlayerid - if pkkey is not None and "version" in pkkey - else 1, - "id": pkkey["id"] - if pkkey is not None and "id" in pkkey - else None, + "redactionlayerid": redactionlayerid, + "version": pkkey["version"] + 1 if pkkey is not None and "version" in pkkey else 1, + "id": pkkey["id"] if pkkey is not None and "id" in pkkey else None } ) idxannots.append(annot["name"]) @@ -401,7 +397,20 @@ def get_by_ministryid(cls, ministryrequestid, redactionlayerid): @classmethod def getredactedsectionsbyrequest(cls, ministryrequestid, redactionlayerid): try: - sql = """select section from public."Sections" where sectionid in + sql = """ + select unnest(xpath('//contents/text()', annotation::xml))::text as section + from "Annotations" a + join public."Documents" d on d.documentid = a.documentid and d.foiministryrequestid = :ministryrequestid + join public."DocumentMaster" dm on dm.documentmasterid = d.documentmasterid and dm.ministryrequestid = :ministryrequestid + left join public."DocumentDeleted" dd on dm.filepath ilike dd.filepath || '%' and dd.ministryrequestid = :ministryrequestid + where a.annotation like '%%freetext%%' + and a.redactionlayerid = :redactionlayerid + and (dd.deleted is null or dd.deleted is false) + and a.isactive = true; + + """ + """ + sql = select section from public."Sections" where sectionid in (select distinct (json_array_elements((as1.section::json->>'ids')::json)->>'id')::integer from public."AnnotationSections" as1 join public."Annotations" a on a.annotationname = as1.annotationname @@ -414,13 +423,17 @@ def getredactedsectionsbyrequest(cls, ministryrequestid, redactionlayerid): and (dd.deleted is null or dd.deleted is false) and a.isactive = true) and sectionid != 25 - order by sortorder""" + order by sortorder + """ rs = db.session.execute(text(sql), {"ministryrequestid": ministryrequestid, "redactionlayerid": redactionlayerid}) - sectionstring = "" + sections = [] for row in rs: - sectionstring = sectionstring + row["section"] + ", " - sectionstring = sectionstring[:-2] - return sectionstring + sections += [x.strip() for x in row['section'].split(",")] + if len(sections) > 0: + distinctsections = list(set(sections)) + distinctsections.sort() + return ", ".join(distinctsections) + return None except Exception as ex: logging.error(ex) finally: diff --git a/api/reviewer_api/models/Annotations.py b/api/reviewer_api/models/Annotations.py index 74ea45dc5..f7c9f0285 100644 --- a/api/reviewer_api/models/Annotations.py +++ b/api/reviewer_api/models/Annotations.py @@ -375,12 +375,8 @@ def __bulknewannotations(cls, annots, _pkvannots, redactionlayerid, userinfo): "createdby": userinfo, "isactive": True, "redactionlayerid": redactionlayerid, - "version": pkkey["version"] + 1 - if pkkey is not None and "version" in pkkey - else 1, - "annotationid": pkkey["annotationid"] - if pkkey is not None and "annotationid" in pkkey - else None, + "version": pkkey["version"] + 1 if pkkey is not None and "version" in pkkey else 1, + "annotationid": pkkey["annotationid"] if pkkey is not None and "annotationid" in pkkey else None } ) idxannots.append(annot["name"]) diff --git a/computingservices/DocumentServices/rstreamio/reader/documentservicestreamreader.py b/computingservices/DocumentServices/rstreamio/reader/documentservicestreamreader.py index 74b9edebe..54e00e1ae 100644 --- a/computingservices/DocumentServices/rstreamio/reader/documentservicestreamreader.py +++ b/computingservices/DocumentServices/rstreamio/reader/documentservicestreamreader.py @@ -33,10 +33,7 @@ def start(consumer_id: str, start_from: StartFrom = StartFrom.latest): print(f"Starting from {start_from.name}") while True: - print("Reading stream...") messages = stream.read(last_id=last_id, block=BLOCK_TIME) - print("*********** Messages ***********") - print(messages) if messages: for _message in messages: # message_id is the random id created to identify the message @@ -58,4 +55,4 @@ def start(consumer_id: str, start_from: StartFrom = StartFrom.latest): rdb.set(LAST_ID_KEY.format(consumer_id=consumer_id), last_id) print(f"finished processing {message_id}") else: - print(f"No new messages after ID: {last_id}") \ No newline at end of file + logging.info(f"No new messages after ID: {last_id}") \ No newline at end of file diff --git a/computingservices/PDFStitchServices/.sampleenv b/computingservices/PDFStitchServices/.sampleenv index a2a4d558f..839f6cd9a 100644 --- a/computingservices/PDFStitchServices/.sampleenv +++ b/computingservices/PDFStitchServices/.sampleenv @@ -15,6 +15,7 @@ PDFSTITCH_S3_SERVICE= DIVISION_PDF_STITCH_STREAM_KEY=DIVISION-PDF-STITCH DIVISION_BLOB_STITCH_STREAM_KEY=DIVISION-PDF-STITCH +DIVISION_STITCH_FOLDER_PATH=Clean/divisionname ZIPPER_REDIS_HOST= ZIPPER_REDIS_PASSWORD= diff --git a/computingservices/PDFStitchServices/models/zipperproducer.py b/computingservices/PDFStitchServices/models/zipperproducer.py index 527882261..b250a99ea 100644 --- a/computingservices/PDFStitchServices/models/zipperproducer.py +++ b/computingservices/PDFStitchServices/models/zipperproducer.py @@ -2,7 +2,7 @@ class zipperproducer(object): - def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes) -> None: + def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,foldername) -> None: self.jobid = jobid self.requestid = requestid self.category=category @@ -12,4 +12,5 @@ def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,min self.ministryrequestid = ministryrequestid self.filestozip = filestozip self.finaloutput = finaloutput - self.attributes = attributes \ No newline at end of file + self.attributes = attributes + self.foldername = foldername \ No newline at end of file diff --git a/computingservices/PDFStitchServices/services/pdfstichservice.py b/computingservices/PDFStitchServices/services/pdfstichservice.py index deeb3c7af..08cfebf0e 100644 --- a/computingservices/PDFStitchServices/services/pdfstichservice.py +++ b/computingservices/PDFStitchServices/services/pdfstichservice.py @@ -14,6 +14,7 @@ import fitz from utils.basicutils import to_json from .zipperproducerservice import zipperproducerservice as zipperservice +from config import division_stitch_folder_path class pdfstitchservice(basestitchservice): @@ -114,7 +115,7 @@ def pdfstitchbasedondivision(self, requestnumber, s3credentials, bcgovcode, cate fitz.TOOLS.store_shrink(100) del writer print(f"save stitched doc to the bytes_stream completed: {datetime.now()}") - filename = f"{requestnumber} - {category} - {division.divisionname}" + filename = f"{requestnumber} - {self.__getfolderfordivisionfiles()} - {division.divisionname}" if numbering_enabled == "True": paginationtext = add_spacing_around_special_character("-",requestnumber) + " | page [x] of [totalpages]" @@ -201,6 +202,10 @@ def __getfinalmessage(self, _message, results=None): "stitchedoutput": stitchedoutput, "filestozip": filestozip } + setattr(_message, "foldername", self.__getfolderfordivisionfiles()) setattr(_message, "finaloutput", finaloutput) setattr(_message, "outputdocumentpath", filestozip) return _message + + def __getfolderfordivisionfiles(self): + return division_stitch_folder_path.split("/")[0] diff --git a/computingservices/PDFStitchServices/services/zipperproducerservice.py b/computingservices/PDFStitchServices/services/zipperproducerservice.py index d8480b4d8..82bc6b667 100644 --- a/computingservices/PDFStitchServices/services/zipperproducerservice.py +++ b/computingservices/PDFStitchServices/services/zipperproducerservice.py @@ -14,7 +14,7 @@ def producezipevent(self,finalmessage): try: _zipperrequest = zipperproducer(jobid=finalmessage.jobid,requestid=finalmessage.requestid,category=finalmessage.category,requestnumber=finalmessage.requestnumber, bcgovcode=finalmessage.bcgovcode,createdby=finalmessage.createdby,ministryrequestid=finalmessage.ministryrequestid, - filestozip=to_json(finalmessage.outputdocumentpath),finaloutput=to_json(finalmessage.finaloutput),attributes=to_json(finalmessage.attributes)) + filestozip=to_json(finalmessage.outputdocumentpath),finaloutput=to_json(finalmessage.finaloutput),attributes=to_json(finalmessage.attributes),foldername=finalmessage.foldername) _zipperredisstream = self.zipperredisstream if _zipperredisstream is not None: return _zipperredisstream.add(_zipperrequest.__dict__,id="*") diff --git a/computingservices/ZippingServices/models/zipperproducermessage.py b/computingservices/ZippingServices/models/zipperproducermessage.py index 068642334..deee421e1 100644 --- a/computingservices/ZippingServices/models/zipperproducermessage.py +++ b/computingservices/ZippingServices/models/zipperproducermessage.py @@ -1,5 +1,5 @@ class zipperproducermessage(object): - def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,summarydocuments=None,redactionlayerid=None) -> None: + def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,summarydocuments=None,redactionlayerid=None,foldername=None) -> None: self.jobid = jobid self.requestid = requestid self.category=category @@ -10,5 +10,6 @@ def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,min self.filestozip = filestozip self.finaloutput = finaloutput self.attributes = attributes + self.foldername = foldername self.summarydocuments = summarydocuments self.redactionlayerid = redactionlayerid diff --git a/computingservices/ZippingServices/services/zipperservice.py b/computingservices/ZippingServices/services/zipperservice.py index 8f7a2f3d7..dddcc827d 100644 --- a/computingservices/ZippingServices/services/zipperservice.py +++ b/computingservices/ZippingServices/services/zipperservice.py @@ -115,9 +115,12 @@ def __zipfilesandupload(_message, s3credentials): ) tp.seek(0) - zipped_bytes = tp.read() - filepath = __getzipfilepath(_message.category, _message.requestnumber) - print("zipfilename = %s", filepath) + zipped_bytes = tp.read() + if _message.foldername: + filepath = __getzipfilepath(_message.foldername, _message.requestnumber) + else: + filepath = __getzipfilepath(_message.category, _message.requestnumber) + logging.info("zipfilename = %s", filepath) docobj = uploadbytes( filepath, zipped_bytes, @@ -134,9 +137,9 @@ def __zipfilesandupload(_message, s3credentials): zipped_bytes = None -def __getzipfilepath(category, filename): +def __getzipfilepath(foldername, filename): return ( - category.capitalize() + "/" + filename + ".zip" - if category is not None + foldername.capitalize() + "/" + filename + ".zip" + if foldername is not None else filename + ".zip" ) diff --git a/openshift/templates/cronjob-restart-pods.yml b/openshift/templates/cronjob-restart-pods.yml index 75db9040b..c17320476 100644 --- a/openshift/templates/cronjob-restart-pods.yml +++ b/openshift/templates/cronjob-restart-pods.yml @@ -60,5 +60,20 @@ spec: oc rollout latest reviewer-pdfstitch-largefiles sleep 30s oc patch dc/reviewer-pdfstitch-largefiles -p "{\"spec\":{\"replicas\": 1}}" + sleep 30s + + oc patch dc/reviewer-zippingservice -p "{\"spec\":{\"replicas\": 0}}" + sleep 30s + oc rollout latest reviewer-zippingservice + sleep 30s + oc patch dc/reviewer-zippingservice -p "{\"spec\":{\"replicas\": 1}}" + sleep 30s + + oc patch dc/reviewer-documentservice-test -p "{\"spec\":{\"replicas\": 0}}" + sleep 30s + oc rollout latest reviewer-documentservice-test + sleep 30s + oc patch dc/reviewer-documentservice-test -p "{\"spec\":{\"replicas\": 1}}" + sleep 30s restartPolicy: OnFailure diff --git a/sample.env b/sample.env index da1d80e94..658eb4fb1 100644 --- a/sample.env +++ b/sample.env @@ -107,6 +107,7 @@ PDFSTITCH_S3_REGION= PDFSTITCH_S3_SERVICE= DIVISION_PDF_STITCH_STREAM_KEY=DIVISION-PDF-STITCH-{add name or initial here} +DIVISION_STITCH_FOLDER_PATH=Clean/divisionname FOI_RECORD_FORMATS= FILE_CONVERSION_SYNCFUSIONKEY=