From 2fbae698c5f7da3078d0a00bb0302e9f10053618 Mon Sep 17 00:00:00 2001 From: "sumathi.thirumani" Date: Thu, 22 Feb 2024 14:58:07 -0800 Subject: [PATCH 01/10] handle exception in annotation section. --- api/reviewer_api/models/AnnotationSections.py | 11 ++++------- api/reviewer_api/models/Annotations.py | 8 ++------ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/api/reviewer_api/models/AnnotationSections.py b/api/reviewer_api/models/AnnotationSections.py index f97588a82..05abe352c 100644 --- a/api/reviewer_api/models/AnnotationSections.py +++ b/api/reviewer_api/models/AnnotationSections.py @@ -229,13 +229,9 @@ def __bulknewsections(cls, annots, _pkvannots, redactionlayerid, _foiministryreq "section": annot["sectionsschema"], "createdby": userinfo, "isactive": True, - "version": pkkey["version"] + 1, - "redactionlayerid": redactionlayerid - if pkkey is not None and "version" in pkkey - else 1, - "id": pkkey["id"] - if pkkey is not None and "id" in pkkey - else None, + "redactionlayerid": redactionlayerid, + "version": pkkey["version"] + 1 if pkkey is not None and "version" in pkkey else 1, + "id": pkkey["id"] if pkkey is not None and "id" in pkkey else None } ) idxannots.append(annot["name"]) @@ -243,6 +239,7 @@ def __bulknewsections(cls, annots, _pkvannots, redactionlayerid, _foiministryreq db.session.commit() return idxannots except Exception as ex: + print(ex) logging.error(ex) finally: db.session.close() diff --git a/api/reviewer_api/models/Annotations.py b/api/reviewer_api/models/Annotations.py index 74ea45dc5..f7c9f0285 100644 --- a/api/reviewer_api/models/Annotations.py +++ b/api/reviewer_api/models/Annotations.py @@ -375,12 +375,8 @@ def __bulknewannotations(cls, annots, _pkvannots, redactionlayerid, userinfo): "createdby": userinfo, "isactive": True, "redactionlayerid": redactionlayerid, - "version": pkkey["version"] + 1 - if pkkey is not None and "version" in pkkey - else 1, - "annotationid": pkkey["annotationid"] - if pkkey is not None and "annotationid" in pkkey - else None, + "version": pkkey["version"] + 1 if pkkey is not None and "version" in pkkey else 1, + "annotationid": pkkey["annotationid"] if pkkey is not None and "annotationid" in pkkey else None } ) idxannots.append(annot["name"]) From 2204f1d5b6d6d27d5fb8cc4d151dfbf3c465d687 Mon Sep 17 00:00:00 2001 From: "sumathi.thirumani" Date: Thu, 22 Feb 2024 14:58:49 -0800 Subject: [PATCH 02/10] remove print statements. --- api/reviewer_api/models/AnnotationSections.py | 1 - 1 file changed, 1 deletion(-) diff --git a/api/reviewer_api/models/AnnotationSections.py b/api/reviewer_api/models/AnnotationSections.py index 05abe352c..59b297efc 100644 --- a/api/reviewer_api/models/AnnotationSections.py +++ b/api/reviewer_api/models/AnnotationSections.py @@ -239,7 +239,6 @@ def __bulknewsections(cls, annots, _pkvannots, redactionlayerid, _foiministryreq db.session.commit() return idxannots except Exception as ex: - print(ex) logging.error(ex) finally: db.session.close() From a36be712daaf7d1cbddbc3429569d8fe69bba6f5 Mon Sep 17 00:00:00 2001 From: "sumathi.thirumani" Date: Thu, 22 Feb 2024 15:45:14 -0800 Subject: [PATCH 03/10] changes to handle gap in data for redacted sections in request. --- api/reviewer_api/models/AnnotationSections.py | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/api/reviewer_api/models/AnnotationSections.py b/api/reviewer_api/models/AnnotationSections.py index 59b297efc..d96fe30ac 100644 --- a/api/reviewer_api/models/AnnotationSections.py +++ b/api/reviewer_api/models/AnnotationSections.py @@ -397,7 +397,20 @@ def get_by_ministryid(cls, ministryrequestid, redactionlayerid): @classmethod def getredactedsectionsbyrequest(cls, ministryrequestid, redactionlayerid): try: - sql = """select section from public."Sections" where sectionid in + sql = """ + select unnest(xpath('//contents/text()', annotation::xml))::text as section + from "Annotations" a + join public."Documents" d on d.documentid = a.documentid and d.foiministryrequestid = :ministryrequestid + join public."DocumentMaster" dm on dm.documentmasterid = d.documentmasterid and dm.ministryrequestid = :ministryrequestid + left join public."DocumentDeleted" dd on dm.filepath ilike dd.filepath || '%' and dd.ministryrequestid = :ministryrequestid + where a.annotation like '%%freetext%%' + and a.redactionlayerid = :redactionlayerid + and (dd.deleted is null or dd.deleted is false) + and a.isactive = true; + + """ + """ + sql = select section from public."Sections" where sectionid in (select distinct (json_array_elements((as1.section::json->>'ids')::json)->>'id')::integer from public."AnnotationSections" as1 join public."Annotations" a on a.annotationname = as1.annotationname @@ -410,13 +423,17 @@ def getredactedsectionsbyrequest(cls, ministryrequestid, redactionlayerid): and (dd.deleted is null or dd.deleted is false) and a.isactive = true) and sectionid != 25 - order by sortorder""" + order by sortorder + """ rs = db.session.execute(text(sql), {"ministryrequestid": ministryrequestid, "redactionlayerid": redactionlayerid}) - sectionstring = "" + sections = [] for row in rs: - sectionstring = sectionstring + row["section"] + ", " - sectionstring = sectionstring[:-2] - return sectionstring + sections += [x.strip() for x in row['section'].split(",")] + if len(sections) > 0: + distinctsections = list(set(sections)) + distinctsections.sort() + return ", ".join(distinctsections) + return None except Exception as ex: logging.error(ex) finally: From 248afbc1e4ba8a84c96c2546376328307e1f3973 Mon Sep 17 00:00:00 2001 From: divyav-aot Date: Mon, 26 Feb 2024 10:45:28 -0500 Subject: [PATCH 04/10] Text changes from Harms to Clean --- computingservices/PDFStitchServices/.sampleenv | 1 + .../PDFStitchServices/models/zipperproducer.py | 5 +++-- .../PDFStitchServices/services/pdfstichservice.py | 7 ++++++- .../PDFStitchServices/services/zipperproducerservice.py | 2 +- .../ZippingServices/models/zipperproducermessage.py | 3 ++- .../ZippingServices/services/zipperservice.py | 8 ++++---- sample.env | 1 + 7 files changed, 18 insertions(+), 9 deletions(-) diff --git a/computingservices/PDFStitchServices/.sampleenv b/computingservices/PDFStitchServices/.sampleenv index a2a4d558f..839f6cd9a 100644 --- a/computingservices/PDFStitchServices/.sampleenv +++ b/computingservices/PDFStitchServices/.sampleenv @@ -15,6 +15,7 @@ PDFSTITCH_S3_SERVICE= DIVISION_PDF_STITCH_STREAM_KEY=DIVISION-PDF-STITCH DIVISION_BLOB_STITCH_STREAM_KEY=DIVISION-PDF-STITCH +DIVISION_STITCH_FOLDER_PATH=Clean/divisionname ZIPPER_REDIS_HOST= ZIPPER_REDIS_PASSWORD= diff --git a/computingservices/PDFStitchServices/models/zipperproducer.py b/computingservices/PDFStitchServices/models/zipperproducer.py index 527882261..b250a99ea 100644 --- a/computingservices/PDFStitchServices/models/zipperproducer.py +++ b/computingservices/PDFStitchServices/models/zipperproducer.py @@ -2,7 +2,7 @@ class zipperproducer(object): - def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes) -> None: + def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,foldername) -> None: self.jobid = jobid self.requestid = requestid self.category=category @@ -12,4 +12,5 @@ def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,min self.ministryrequestid = ministryrequestid self.filestozip = filestozip self.finaloutput = finaloutput - self.attributes = attributes \ No newline at end of file + self.attributes = attributes + self.foldername = foldername \ No newline at end of file diff --git a/computingservices/PDFStitchServices/services/pdfstichservice.py b/computingservices/PDFStitchServices/services/pdfstichservice.py index deeb3c7af..08cfebf0e 100644 --- a/computingservices/PDFStitchServices/services/pdfstichservice.py +++ b/computingservices/PDFStitchServices/services/pdfstichservice.py @@ -14,6 +14,7 @@ import fitz from utils.basicutils import to_json from .zipperproducerservice import zipperproducerservice as zipperservice +from config import division_stitch_folder_path class pdfstitchservice(basestitchservice): @@ -114,7 +115,7 @@ def pdfstitchbasedondivision(self, requestnumber, s3credentials, bcgovcode, cate fitz.TOOLS.store_shrink(100) del writer print(f"save stitched doc to the bytes_stream completed: {datetime.now()}") - filename = f"{requestnumber} - {category} - {division.divisionname}" + filename = f"{requestnumber} - {self.__getfolderfordivisionfiles()} - {division.divisionname}" if numbering_enabled == "True": paginationtext = add_spacing_around_special_character("-",requestnumber) + " | page [x] of [totalpages]" @@ -201,6 +202,10 @@ def __getfinalmessage(self, _message, results=None): "stitchedoutput": stitchedoutput, "filestozip": filestozip } + setattr(_message, "foldername", self.__getfolderfordivisionfiles()) setattr(_message, "finaloutput", finaloutput) setattr(_message, "outputdocumentpath", filestozip) return _message + + def __getfolderfordivisionfiles(self): + return division_stitch_folder_path.split("/")[0] diff --git a/computingservices/PDFStitchServices/services/zipperproducerservice.py b/computingservices/PDFStitchServices/services/zipperproducerservice.py index d8480b4d8..82bc6b667 100644 --- a/computingservices/PDFStitchServices/services/zipperproducerservice.py +++ b/computingservices/PDFStitchServices/services/zipperproducerservice.py @@ -14,7 +14,7 @@ def producezipevent(self,finalmessage): try: _zipperrequest = zipperproducer(jobid=finalmessage.jobid,requestid=finalmessage.requestid,category=finalmessage.category,requestnumber=finalmessage.requestnumber, bcgovcode=finalmessage.bcgovcode,createdby=finalmessage.createdby,ministryrequestid=finalmessage.ministryrequestid, - filestozip=to_json(finalmessage.outputdocumentpath),finaloutput=to_json(finalmessage.finaloutput),attributes=to_json(finalmessage.attributes)) + filestozip=to_json(finalmessage.outputdocumentpath),finaloutput=to_json(finalmessage.finaloutput),attributes=to_json(finalmessage.attributes),foldername=finalmessage.foldername) _zipperredisstream = self.zipperredisstream if _zipperredisstream is not None: return _zipperredisstream.add(_zipperrequest.__dict__,id="*") diff --git a/computingservices/ZippingServices/models/zipperproducermessage.py b/computingservices/ZippingServices/models/zipperproducermessage.py index 55f7aac2a..fc11e8c0d 100644 --- a/computingservices/ZippingServices/models/zipperproducermessage.py +++ b/computingservices/ZippingServices/models/zipperproducermessage.py @@ -1,5 +1,5 @@ class zipperproducermessage(object): - def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes) -> None: + def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,foldername) -> None: self.jobid = jobid self.requestid = requestid self.category=category @@ -10,3 +10,4 @@ def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,min self.filestozip = filestozip self.finaloutput = finaloutput self.attributes = attributes + self.foldername = foldername diff --git a/computingservices/ZippingServices/services/zipperservice.py b/computingservices/ZippingServices/services/zipperservice.py index dda549e79..e61ba79e1 100644 --- a/computingservices/ZippingServices/services/zipperservice.py +++ b/computingservices/ZippingServices/services/zipperservice.py @@ -115,7 +115,7 @@ def __zipfilesandupload(_message, s3credentials): tp.seek(0) zipped_bytes = tp.read() - filepath = __getzipfilepath(_message.category, _message.requestnumber) + filepath = __getzipfilepath(_message.foldername, _message.requestnumber) logging.info("zipfilename = %s", filepath) docobj = uploadbytes( filepath, @@ -133,9 +133,9 @@ def __zipfilesandupload(_message, s3credentials): zipped_bytes = None -def __getzipfilepath(category, filename): +def __getzipfilepath(foldername, filename): return ( - category.capitalize() + "/" + filename + ".zip" - if category is not None + foldername.capitalize() + "/" + filename + ".zip" + if foldername is not None else filename + ".zip" ) diff --git a/sample.env b/sample.env index da1d80e94..658eb4fb1 100644 --- a/sample.env +++ b/sample.env @@ -107,6 +107,7 @@ PDFSTITCH_S3_REGION= PDFSTITCH_S3_SERVICE= DIVISION_PDF_STITCH_STREAM_KEY=DIVISION-PDF-STITCH-{add name or initial here} +DIVISION_STITCH_FOLDER_PATH=Clean/divisionname FOI_RECORD_FORMATS= FILE_CONVERSION_SYNCFUSIONKEY= From 2684d86d09948faed9f397747cda57ffa311e9ca Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 26 Feb 2024 16:46:27 -0800 Subject: [PATCH 05/10] document conversion date updated fix --- .../MCS.FOI.DocToPDF/DocFileProcessor.cs | 38 +++++++++++++++++-- .../MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj | 1 + 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs index cf69e7e07..1b03c7bf1 100644 --- a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs +++ b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs @@ -1,8 +1,11 @@  using Serilog; -using Syncfusion.DocIO.DLS; +using Syncfusion.DocIO.DLS; +using Syncfusion.DocIO; using Syncfusion.DocIORenderer; -using Syncfusion.Pdf; +using Syncfusion.Pdf; +using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Wordprocessing; @@ -43,7 +46,36 @@ public DocFileProcessor(Stream SourceStream) { using (WordDocument wordDocument = new WordDocument(SourceStream, Syncfusion.DocIO.FormatType.Automatic)) { - + SourceStream.Position = 0; + + using (var docXML = WordprocessingDocument.Open(SourceStream, false)) + { + + DocumentFormat.OpenXml.Wordprocessing.Body body = docXML.MainDocumentPart.Document.Body; + List originalDates = new List(); + foreach (var textItem in body.Descendants().Where(textItem => textItem.Text.Contains("DATE"))) + { + var datetext = textItem.Parent.NextSibling().NextSibling(); + originalDates.Add(datetext.InnerText); + } + + List datefields = wordDocument.FindAllItemsByProperty(EntityType.Field, "FieldType", FieldType.FieldDate.ToString()); + foreach (var (datefield, i) in datefields.Select((datefield, i) => (datefield, i))) + { + var dateField = datefield as WField; + //Takes the owner paragraph. + WParagraph ownerPara = dateField.OwnerParagraph; + int dateFieldIndex = ownerPara.ChildEntities.IndexOf(dateField); + //Removes the date field. + ownerPara.ChildEntities.Remove(dateField); + //Creating a new text range with required date. + WTextRange textRange = new WTextRange(ownerPara.Document); + textRange.Text = originalDates[i];//"February 12, 2023"; + //Inserting the date field with the created text range. + ownerPara.ChildEntities.Insert(dateFieldIndex, textRange); + } + } + wordDocument.RevisionOptions.CommentDisplayMode = CommentDisplayMode.ShowInBalloons; wordDocument.RevisionOptions.CommentColor = RevisionColor.Blue; wordDocument.RevisionOptions.ShowMarkup = RevisionType.Deletions | RevisionType.Insertions; diff --git a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj index 2c4b45afd..676023de9 100644 --- a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj +++ b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/MCS.FOI.DocToPDF.csproj @@ -17,6 +17,7 @@ + From a9da10da9a72d1a981a7d0ae0785750f60c5e7f1 Mon Sep 17 00:00:00 2001 From: divyav-aot Date: Tue, 27 Feb 2024 12:29:08 -0500 Subject: [PATCH 06/10] code conflict fixed --- .../ZippingServices/models/zipperproducermessage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/computingservices/ZippingServices/models/zipperproducermessage.py b/computingservices/ZippingServices/models/zipperproducermessage.py index 03d8211ca..c082dd685 100644 --- a/computingservices/ZippingServices/models/zipperproducermessage.py +++ b/computingservices/ZippingServices/models/zipperproducermessage.py @@ -1,5 +1,5 @@ class zipperproducermessage(object): - def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,summarydocuments,redactionlayerid,foldername) -> None: + def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,summarydocuments,redactionlayerid,foldername="") -> None: self.jobid = jobid self.requestid = requestid self.category=category From de550726100b76989ae80582b61d920feab608d2 Mon Sep 17 00:00:00 2001 From: divyav-aot Date: Tue, 27 Feb 2024 13:40:44 -0500 Subject: [PATCH 07/10] code conflict fixed --- .../ZippingServices/models/zipperproducermessage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/computingservices/ZippingServices/models/zipperproducermessage.py b/computingservices/ZippingServices/models/zipperproducermessage.py index c082dd685..deee421e1 100644 --- a/computingservices/ZippingServices/models/zipperproducermessage.py +++ b/computingservices/ZippingServices/models/zipperproducermessage.py @@ -1,5 +1,5 @@ class zipperproducermessage(object): - def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,summarydocuments,redactionlayerid,foldername="") -> None: + def __init__(self,jobid,requestid,category,requestnumber,bcgovcode,createdby,ministryrequestid,filestozip,finaloutput,attributes,summarydocuments=None,redactionlayerid=None,foldername=None) -> None: self.jobid = jobid self.requestid = requestid self.category=category From 3b9fc305e0cdad61bd96fd6d1e4edf1e19d1de2c Mon Sep 17 00:00:00 2001 From: divyav-aot Date: Fri, 1 Mar 2024 14:26:40 -0500 Subject: [PATCH 08/10] cronjob updated --- openshift/templates/cronjob-restart-pods.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/openshift/templates/cronjob-restart-pods.yml b/openshift/templates/cronjob-restart-pods.yml index 75db9040b..c17320476 100644 --- a/openshift/templates/cronjob-restart-pods.yml +++ b/openshift/templates/cronjob-restart-pods.yml @@ -60,5 +60,20 @@ spec: oc rollout latest reviewer-pdfstitch-largefiles sleep 30s oc patch dc/reviewer-pdfstitch-largefiles -p "{\"spec\":{\"replicas\": 1}}" + sleep 30s + + oc patch dc/reviewer-zippingservice -p "{\"spec\":{\"replicas\": 0}}" + sleep 30s + oc rollout latest reviewer-zippingservice + sleep 30s + oc patch dc/reviewer-zippingservice -p "{\"spec\":{\"replicas\": 1}}" + sleep 30s + + oc patch dc/reviewer-documentservice-test -p "{\"spec\":{\"replicas\": 0}}" + sleep 30s + oc rollout latest reviewer-documentservice-test + sleep 30s + oc patch dc/reviewer-documentservice-test -p "{\"spec\":{\"replicas\": 1}}" + sleep 30s restartPolicy: OnFailure From 7781db768866f2312c4469cac22b3d42134f19d7 Mon Sep 17 00:00:00 2001 From: divyav-aot Date: Fri, 1 Mar 2024 14:30:54 -0500 Subject: [PATCH 09/10] removed print statements --- .../rstreamio/reader/documentservicestreamreader.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/computingservices/DocumentServices/rstreamio/reader/documentservicestreamreader.py b/computingservices/DocumentServices/rstreamio/reader/documentservicestreamreader.py index 74b9edebe..54e00e1ae 100644 --- a/computingservices/DocumentServices/rstreamio/reader/documentservicestreamreader.py +++ b/computingservices/DocumentServices/rstreamio/reader/documentservicestreamreader.py @@ -33,10 +33,7 @@ def start(consumer_id: str, start_from: StartFrom = StartFrom.latest): print(f"Starting from {start_from.name}") while True: - print("Reading stream...") messages = stream.read(last_id=last_id, block=BLOCK_TIME) - print("*********** Messages ***********") - print(messages) if messages: for _message in messages: # message_id is the random id created to identify the message @@ -58,4 +55,4 @@ def start(consumer_id: str, start_from: StartFrom = StartFrom.latest): rdb.set(LAST_ID_KEY.format(consumer_id=consumer_id), last_id) print(f"finished processing {message_id}") else: - print(f"No new messages after ID: {last_id}") \ No newline at end of file + logging.info(f"No new messages after ID: {last_id}") \ No newline at end of file From 27844a1e12ee8f61f1e16140e3314d3f463c9d66 Mon Sep 17 00:00:00 2001 From: nkan-aot2 <156717133+nkan-aot2@users.noreply.github.com> Date: Wed, 6 Mar 2024 10:39:09 -0800 Subject: [PATCH 10/10] include null check for date fields --- .../MCS.FOI.DocToPDF/DocFileProcessor.cs | 251 +++++++++--------- 1 file changed, 127 insertions(+), 124 deletions(-) diff --git a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs index 1b03c7bf1..51c0d1188 100644 --- a/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs +++ b/MCS.FOI.S3FileConversion/MCS.FOI.DocToPDF/DocFileProcessor.cs @@ -1,53 +1,53 @@ - -using Serilog; + +using Serilog; using Syncfusion.DocIO.DLS; -using Syncfusion.DocIO; -using Syncfusion.DocIORenderer; +using Syncfusion.DocIO; +using Syncfusion.DocIORenderer; using Syncfusion.Pdf; using DocumentFormat.OpenXml.Packaging; -using DocumentFormat.OpenXml.Wordprocessing; - - - -namespace MCS.FOI.DocToPDF -{ - public class DocFileProcessor : IDocFileProcessor, IDisposable - { - - - public DocFileProcessor() { } - - public DocFileProcessor(Stream SourceStream) - { - this.SourceStream = SourceStream; - } - - public Stream SourceStream { get; set; } - - public int FailureAttemptCount { get; set; } - - public int WaitTimeinMilliSeconds { get; set; } - - public bool IsSinglePDFOutput { get; set; } - - - private MemoryStream? output = null; - public (bool, Stream) ConvertToPDF() - { - bool converted = false; - string message = string.Empty; - bool _isSinglePDFOutput = IsSinglePDFOutput; - output = new MemoryStream(); - try - { - for (int attempt = 1; attempt <= FailureAttemptCount && !converted; attempt++) - { - try - { - using (WordDocument wordDocument = new WordDocument(SourceStream, Syncfusion.DocIO.FormatType.Automatic)) - { - SourceStream.Position = 0; - +using DocumentFormat.OpenXml.Wordprocessing; + + + +namespace MCS.FOI.DocToPDF +{ + public class DocFileProcessor : IDocFileProcessor, IDisposable + { + + + public DocFileProcessor() { } + + public DocFileProcessor(Stream SourceStream) + { + this.SourceStream = SourceStream; + } + + public Stream SourceStream { get; set; } + + public int FailureAttemptCount { get; set; } + + public int WaitTimeinMilliSeconds { get; set; } + + public bool IsSinglePDFOutput { get; set; } + + + private MemoryStream? output = null; + public (bool, Stream) ConvertToPDF() + { + bool converted = false; + string message = string.Empty; + bool _isSinglePDFOutput = IsSinglePDFOutput; + output = new MemoryStream(); + try + { + for (int attempt = 1; attempt <= FailureAttemptCount && !converted; attempt++) + { + try + { + using (WordDocument wordDocument = new WordDocument(SourceStream, Syncfusion.DocIO.FormatType.Automatic)) + { + SourceStream.Position = 0; + using (var docXML = WordprocessingDocument.Open(SourceStream, false)) { @@ -60,84 +60,87 @@ public DocFileProcessor(Stream SourceStream) } List datefields = wordDocument.FindAllItemsByProperty(EntityType.Field, "FieldType", FieldType.FieldDate.ToString()); - foreach (var (datefield, i) in datefields.Select((datefield, i) => (datefield, i))) + if (datefields != null) { - var dateField = datefield as WField; - //Takes the owner paragraph. - WParagraph ownerPara = dateField.OwnerParagraph; - int dateFieldIndex = ownerPara.ChildEntities.IndexOf(dateField); - //Removes the date field. - ownerPara.ChildEntities.Remove(dateField); - //Creating a new text range with required date. - WTextRange textRange = new WTextRange(ownerPara.Document); - textRange.Text = originalDates[i];//"February 12, 2023"; - //Inserting the date field with the created text range. - ownerPara.ChildEntities.Insert(dateFieldIndex, textRange); + foreach (var (datefield, i) in datefields.Select((datefield, i) => (datefield, i))) + { + var dateField = datefield as WField; + //Takes the owner paragraph. + WParagraph ownerPara = dateField.OwnerParagraph; + int dateFieldIndex = ownerPara.ChildEntities.IndexOf(dateField); + //Removes the date field. + ownerPara.ChildEntities.Remove(dateField); + //Creating a new text range with required date. + WTextRange textRange = new WTextRange(ownerPara.Document); + textRange.Text = originalDates[i];//"February 12, 2023"; + //Inserting the date field with the created text range. + ownerPara.ChildEntities.Insert(dateFieldIndex, textRange); + } } } - wordDocument.RevisionOptions.CommentDisplayMode = CommentDisplayMode.ShowInBalloons; - wordDocument.RevisionOptions.CommentColor = RevisionColor.Blue; - wordDocument.RevisionOptions.ShowMarkup = RevisionType.Deletions | RevisionType.Insertions; - - using (DocIORenderer renderer = new DocIORenderer()) - { - using PdfDocument pdfDocument = renderer.ConvertToPDF(wordDocument); - //Save the PDF file - //Close the instance of document objects - pdfDocument.Save(output); - pdfDocument.Close(true); - converted = true; - - } - - } - } - catch (Exception e) - { - string errorMessage = $"Exception occured while coverting a document file, exception : {e.Message}"; - message = $"Exception happened while accessing File, re-attempting count : {attempt} , Error Message : {e.Message} , Stack trace : {e.StackTrace}"; - Log.Error(message); - Console.WriteLine(message); - if (attempt == FailureAttemptCount) - { - throw new Exception(errorMessage); - } - Thread.Sleep(WaitTimeinMilliSeconds); - } - } - } - catch (Exception ex) - { - converted = false; - string error = $"Exception occured while coverting Doc file, exception : {ex.Message} , stacktrace : {ex.StackTrace}"; - Log.Error(error); - Console.WriteLine(error); - throw; - } - return (converted, output); - } - - public void Dispose() - { - Dispose(true); - GC.SuppressFinalize(this); - } - - protected virtual void Dispose(bool disposing) - { - if (disposing) - { - if (this.SourceStream != null) - { - this.SourceStream.Close(); - this.SourceStream.Dispose(); - } - - if (output != null) output.Dispose(); - // free managed resources - } - - } - } -} \ No newline at end of file + wordDocument.RevisionOptions.CommentDisplayMode = CommentDisplayMode.ShowInBalloons; + wordDocument.RevisionOptions.CommentColor = RevisionColor.Blue; + wordDocument.RevisionOptions.ShowMarkup = RevisionType.Deletions | RevisionType.Insertions; + + using (DocIORenderer renderer = new DocIORenderer()) + { + using PdfDocument pdfDocument = renderer.ConvertToPDF(wordDocument); + //Save the PDF file + //Close the instance of document objects + pdfDocument.Save(output); + pdfDocument.Close(true); + converted = true; + + } + + } + } + catch (Exception e) + { + string errorMessage = $"Exception occured while coverting a document file, exception : {e.Message}"; + message = $"Exception happened while accessing File, re-attempting count : {attempt} , Error Message : {e.Message} , Stack trace : {e.StackTrace}"; + Log.Error(message); + Console.WriteLine(message); + if (attempt == FailureAttemptCount) + { + throw new Exception(errorMessage); + } + Thread.Sleep(WaitTimeinMilliSeconds); + } + } + } + catch (Exception ex) + { + converted = false; + string error = $"Exception occured while coverting Doc file, exception : {ex.Message} , stacktrace : {ex.StackTrace}"; + Log.Error(error); + Console.WriteLine(error); + throw; + } + return (converted, output); + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + if (this.SourceStream != null) + { + this.SourceStream.Close(); + this.SourceStream.Dispose(); + } + + if (output != null) output.Dispose(); + // free managed resources + } + + } + } +}