From 5a8890f97bda5238f830f179092e0746368f77d8 Mon Sep 17 00:00:00 2001 From: Abin Antony Date: Thu, 19 Dec 2024 15:46:59 -0800 Subject: [PATCH 1/5] #FOIMOD-3636 struct for JSON message --- .../src/types/QueueMessageStruct.go | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/computingservices/documentextractservices/azuredocextractservice/src/types/QueueMessageStruct.go b/computingservices/documentextractservices/azuredocextractservice/src/types/QueueMessageStruct.go index dd3c49a..e72cc54 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/types/QueueMessageStruct.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/types/QueueMessageStruct.go @@ -1,11 +1,30 @@ package types +type Divisions struct { + DivisionID int64 `json:"DivisionID"` + Name string `json:"Name"` +} + +type Documents struct { + DocumentID int64 `json:"DocumentID"` + DocumentName string `json:"DocumentName"` + DocumentType string `json:"DocumentType"` + CreatedDate string `json:"CreatedDate"` + DocumentS3URL string `json:"DocumentS3URL"` + Divisions []Divisions `json:"Documents"` +} + +type Requests struct { + MinistryRequestID string `json:"MinistryRequestID"` + RequestNumber string `json:"RequestNumber"` + RequestType string `json:"RequestType"` + MinistryCode string `json:"MinistryCode"` + ReceivedDate string `json:"ReceivedDate"` + Documents []Documents `json:"Documents"` +} + type QueueMessage struct { - MinistryRequestId int64 `json:"ministryRequestId"` - RequestNumber string `json:"requestNumber"` - MinistryCode string `json:"ministryCode"` - DivisionName string `json:"divisionName"` - //ModifiedDate string `json:"modifiedDate"` - DocumentHashCode string `json:"documentHashCode"` - S3Uri string `json:"s3Uri"` + BatchID string `json:"BatchID"` + Date string `json:"Date"` + Requests []Requests `json:"Requests"` } From 952d88715ddd3fd04224a3386dcdb4daf3eca761 Mon Sep 17 00:00:00 2001 From: Abin Antony Date: Mon, 23 Dec 2024 12:08:27 -0800 Subject: [PATCH 2/5] #FOIMOD-3636 Updates to Azure Processing and ActiveMQ dequeuing --- .gitignore | 3 +- .../azuredocextractionservice.go | 8 +-- .../src/azureservices/azuredocumentservice.go | 12 ++--- .../src/httpservices/messagedequeue.go | 13 +++-- .../src/messageprocessor.go | 53 +++++++++++++------ 5 files changed, 57 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 3fc4703..53cfe73 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ computingservices/documentextractservices/azuredocextractservice/src/foiazuredocextractservice -.vscode/* \ No newline at end of file +.vscode/* +computingservices/documentextractservices/azuredocextractservice/src/.env diff --git a/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocextractionservice.go b/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocextractionservice.go index ddbcfed..884bcd3 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocextractionservice.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocextractionservice.go @@ -28,20 +28,20 @@ func NewAzureService(subscriptionKey string, baseURL string) *AzureService { } // CallAzureDocument initiates the document analysis request -func (a *AzureService) AnalyzeAndExtractDocument(jsonPayload []byte) error { +func (a *AzureService) AnalyzeAndExtractDocument(jsonPayload []byte) (map[string]interface{}, error) { requestURL := fmt.Sprintf("%s/formrecognizer/documentModels/prebuilt-read:analyze?api-version=2023-07-31&stringIndexType=utf16CodeUnit", a.BaseURL) // Send the POST request apimRequestID, err := a.createAnalysisRequest(requestURL, jsonPayload) if err != nil { - return fmt.Errorf("failed to initiate document analysis: %w", err) + return nil, fmt.Errorf("failed to initiate document analysis: %w", err) } results, err := a.getAnalysisResults(apimRequestID) if err != nil { - return fmt.Errorf("failed to fetch analysis results: %w", err) + return nil, fmt.Errorf("failed to fetch analysis results: %w", err) } //Print extracted data form document fmt.Printf("Analysis Results: %v\n", results) - return nil + return results, err } // sendAnalyzeRequest sends the initial analysis request to the Azure API diff --git a/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocumentservice.go b/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocumentservice.go index f01517e..2c20216 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocumentservice.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocumentservice.go @@ -1,20 +1,20 @@ package azureservices import ( + "azuredocextractservice/utils" "log" ) -func CallAzureDocument(jsonPayload []byte) { - subscriptionKey := "abcd" +func CallAzureDocument(jsonPayload []byte) (map[string]interface{}, error) { + subscriptionKey := utils.ViperEnvVariable("azuresubcriptionkey") baseURL := "https://foidocintelservice.cognitiveservices.azure.com" service := NewAzureService(subscriptionKey, baseURL) - // Example JSON payload - //jsonPayload := []byte(`{"url": "https://example.com/sample.pdf"}`) - - err := service.AnalyzeAndExtractDocument(jsonPayload) + result, err := service.AnalyzeAndExtractDocument(jsonPayload) if err != nil { log.Fatalf("Error calling Azure Document API: %v", err) } + + return result, err } diff --git a/computingservices/documentextractservices/azuredocextractservice/src/httpservices/messagedequeue.go b/computingservices/documentextractservices/azuredocextractservice/src/httpservices/messagedequeue.go index 757ee78..042e985 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/httpservices/messagedequeue.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/httpservices/messagedequeue.go @@ -17,12 +17,15 @@ import ( var activeMQBaseURL = utils.ViperEnvVariable("activeMQBaseURL") var username = utils.ViperEnvVariable("activeMQUserName") var password = utils.ViperEnvVariable("activeMQPassword") +var queuename = utils.ViperEnvVariable("focdocextractqueue") +var activemqclientid = utils.ViperEnvVariable("activemqclientid") // ProcessMessage fetches messages from the ActiveMQ queue using HTTP func ProcessMessage() ([]types.QueueMessage, error) { - queueName := "queuetest" + queueName := queuename + clientid := activemqclientid // Construct the URL to fetch messages from the queue - url := fmt.Sprintf("%s/%s?type=queue", activeMQBaseURL, queueName) + url := fmt.Sprintf("%s://%s&clientId=%s", activeMQBaseURL, queueName, clientid) messages := []types.QueueMessage{} timeoutCounter := 0 maxTimeouts := 1 @@ -45,7 +48,7 @@ func ProcessMessage() ([]types.QueueMessage, error) { fmt.Println("No more messages in the queue. Exiting...") break } - fmt.Printf("Extracted s3uri: %s\n", message.S3Uri) + fmt.Printf("Extracted s3uri: %s\n", message.BatchID) messages = append(messages, *message) } fmt.Println("All messages processed. Exiting.") @@ -55,8 +58,10 @@ func ProcessMessage() ([]types.QueueMessage, error) { // Fetches a message from the queue func fetchMessageFromQueue(url string) (*types.QueueMessage, error) { fmt.Println("URL:", url) + //url = "https://activemq-fc7a67-dev.apps.gold.devops.gov.bc.ca/api/message?destination=queue://foidocextract&clientId=AZSLFDocExtract" client := &http.Client{Timeout: 30 * time.Second} req, err := http.NewRequest(http.MethodGet, url, nil) + //resp, err := http.Get("https://admin:F0!@activemq@dm1n@activemq-fc7a67-dev.apps.gold.devops.gov.bc.ca/api/message?destination=queue://foidocextract&clientId=AZSLFDocExtract") if err != nil { return nil, fmt.Errorf("failed to create HTTP request: %w", err) } @@ -86,7 +91,7 @@ func fetchMessageFromQueue(url string) (*types.QueueMessage, error) { } fmt.Printf("Response Body: %s\n", string(body)) var message types.QueueMessage - if err := json.Unmarshal(body, &message); err != nil { + if err := json.Unmarshal([]byte(body), &message); err != nil { return nil, fmt.Errorf("failed to unmarshal message: %w", err) } return &message, nil diff --git a/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go b/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go index e43d93a..4d79615 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go @@ -4,6 +4,7 @@ import ( "azuredocextractservice/azureservices" "azuredocextractservice/httpservices" "azuredocextractservice/s3services" + "azuredocextractservice/types" "fmt" "log" "net/url" @@ -28,25 +29,18 @@ func main() { for _, message := range dequeuedmessages { fmt.Printf("Received message: %+v\n", message) - parsedURL, err := url.Parse(message.S3Uri) - if err != nil { - fmt.Printf("Error parsing URL: %v\n", err) - return + var requests []types.Requests = message.Requests + + for _, request := range requests { + for _, document := range request.Documents { + var parsedURL = document.DocumentS3URL + var jsonStrbytes []byte = getBytesfromDocumentPath(parsedURL) + azureservices.CallAzureDocument(jsonStrbytes) + } } + // Get the path after the hostname - path := strings.TrimPrefix(parsedURL.Path, "/") - bucketName, relativePath, found := strings.Cut(path, "/") - if !found { - fmt.Println("Invalid URL format") - return - } - fmt.Printf("Bucket: %s, Key: %s\n", bucketName, relativePath) - var s3url = s3services.GetFilefroms3(relativePath, bucketName) - jsonStr := `{ - "urlSource": "` + s3url + `" - }` - var jsonStrbytes = []byte(jsonStr) - azureservices.CallAzureDocument(jsonStrbytes) + fmt.Printf("################-------------------------------####################") } end := time.Now() @@ -54,3 +48,28 @@ func main() { total := end.Sub(start) fmt.Println("Total time:" + total.String()) } + +func getBytesfromDocumentPath(documenturlpath string) []byte { + //path := strings.TrimPrefix(documenturlpath, "/") + //bucketName, relativePath, found := strings.Cut(path, "/") + parsedURL, err := url.Parse(documenturlpath) + if err != nil { + fmt.Println("Error is parsing URL") + return nil + } + relativePath := parsedURL.Path + relativePath = strings.TrimPrefix(relativePath, "/") + bucketName, relativePath, found := strings.Cut(relativePath, "/") + if !found { + fmt.Println("Invalid URL format") + return nil + } + fmt.Printf("Bucket: %s, Key: %s\n", bucketName, relativePath) + var s3url = s3services.GetFilefroms3(relativePath, bucketName) + jsonStr := `{ + "urlSource": "` + s3url + `" + }` + var jsonStrbytes = []byte(jsonStr) + + return jsonStrbytes +} From 5702a566b04e1e4946f0a9f728b05c95ab6181ce Mon Sep 17 00:00:00 2001 From: abin-aot Date: Tue, 24 Dec 2024 20:35:49 +0000 Subject: [PATCH 3/5] #FOIMOD-3636 --- .../azuredocextractionservice.go | 60 +++++----- .../src/azureservices/azuredocumentservice.go | 3 +- .../src/messageprocessor.go | 6 +- .../src/types/AzureAnalyzeResultStruct.go | 113 ++++++++++++++++++ 4 files changed, 151 insertions(+), 31 deletions(-) create mode 100644 computingservices/documentextractservices/azuredocextractservice/src/types/AzureAnalyzeResultStruct.go diff --git a/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocextractionservice.go b/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocextractionservice.go index 884bcd3..e55e11c 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocextractionservice.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocextractionservice.go @@ -1,6 +1,7 @@ package azureservices import ( + "azuredocextractservice/types" "bytes" "encoding/json" "fmt" @@ -28,16 +29,17 @@ func NewAzureService(subscriptionKey string, baseURL string) *AzureService { } // CallAzureDocument initiates the document analysis request -func (a *AzureService) AnalyzeAndExtractDocument(jsonPayload []byte) (map[string]interface{}, error) { +func (a *AzureService) AnalyzeAndExtractDocument(jsonPayload []byte) (types.AnalyzeResults, error) { + var results types.AnalyzeResults requestURL := fmt.Sprintf("%s/formrecognizer/documentModels/prebuilt-read:analyze?api-version=2023-07-31&stringIndexType=utf16CodeUnit", a.BaseURL) // Send the POST request apimRequestID, err := a.createAnalysisRequest(requestURL, jsonPayload) if err != nil { - return nil, fmt.Errorf("failed to initiate document analysis: %w", err) + return results, fmt.Errorf("failed to initiate document analysis: %w", err) } - results, err := a.getAnalysisResults(apimRequestID) + results, err = a.getAnalysisResults(apimRequestID) if err != nil { - return nil, fmt.Errorf("failed to fetch analysis results: %w", err) + return results, fmt.Errorf("failed to fetch analysis results: %w", err) } //Print extracted data form document fmt.Printf("Analysis Results: %v\n", results) @@ -46,10 +48,8 @@ func (a *AzureService) AnalyzeAndExtractDocument(jsonPayload []byte) (map[string // sendAnalyzeRequest sends the initial analysis request to the Azure API func (a *AzureService) createAnalysisRequest(requestURL string, jsonPayload []byte) (string, error) { - req, err := http.NewRequest(http.MethodPost, requestURL, bytes.NewBuffer(jsonPayload)) - if err != nil { - return "", fmt.Errorf("failed to create POST request: %w", err) - } + + req, _ := http.NewRequest(http.MethodPost, requestURL, bytes.NewBuffer(jsonPayload)) req.Header.Set("Content-Type", "application/json") req.Header.Set("Ocp-Apim-Subscription-Key", a.SubscriptionKey) res, err := a.Client.Do(req) @@ -57,66 +57,68 @@ func (a *AzureService) createAnalysisRequest(requestURL string, jsonPayload []by return "", fmt.Errorf("error making HTTP request: %w", err) } defer res.Body.Close() - if res.StatusCode != http.StatusOK { - return "", fmt.Errorf("unexpected response status: %s", res.Status) - } apimRequestID := res.Header.Get("Apim-Request-Id") + if apimRequestID == "" { return "", fmt.Errorf("missing Apim-Request-Id in response header") } return apimRequestID, nil } -func (a *AzureService) getAnalysisResults(apimRequestID string) (map[string]interface{}, error) { +func (a *AzureService) getAnalysisResults(apimRequestID string) (types.AnalyzeResults, error) { extractReqURL := fmt.Sprintf( "%s/formrecognizer/documentModels/prebuilt-read/analyzeResults/%s?api-version=2023-07-31", a.BaseURL, apimRequestID, ) + for { time.Sleep(1 * time.Second) - jsonResponse, err := a.getExtractedResults(extractReqURL) + result, err := a.getExtractedResults(extractReqURL) if err != nil { - return nil, err + return result, err } - status, ok := jsonResponse["status"].(string) - if !ok { - return nil, fmt.Errorf("missing or invalid 'status' in response") - } - fmt.Printf("Current status: %s\n", status) + + status := result.Status + fmt.Printf("Current status: %s\n", result.Status) switch status { case "succeeded": - return jsonResponse, nil + return result, nil case "running": continue default: - return nil, fmt.Errorf("analysis failed with status: %s", status) + return result, fmt.Errorf("analysis failed with status: %s", status) } } } // Helper function to perform the HTTP GET request and parse the JSON response -func (a *AzureService) getExtractedResults(url string) (map[string]interface{}, error) { +func (a *AzureService) getExtractedResults(url string) (types.AnalyzeResults, error) { + var result types.AnalyzeResults req, err := http.NewRequest(http.MethodGet, url, nil) if err != nil { - return nil, fmt.Errorf("failed to create GET request: %w", err) + return result, fmt.Errorf("failed to create GET request: %w", err) } req.Header.Set("Ocp-Apim-Subscription-Key", a.SubscriptionKey) res, err := a.Client.Do(req) if err != nil { - return nil, fmt.Errorf("error making HTTP request: %w", err) + return result, fmt.Errorf("error making HTTP request: %w", err) } defer res.Body.Close() if res.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected response status: %s", res.Status) + return result, fmt.Errorf("unexpected response status: %s", res.Status) } bodyBytes, err := io.ReadAll(res.Body) if err != nil { - return nil, fmt.Errorf("error reading response body: %w", err) + return result, fmt.Errorf("error reading response body: %w", err) } + fmt.Println("Response Body starts here:") + fmt.Println(string(bodyBytes)) + fmt.Println("Response Body ends here:") var jsonResponse map[string]interface{} - err = json.Unmarshal(bodyBytes, &jsonResponse) + json.Unmarshal(bodyBytes, &jsonResponse) + err = json.Unmarshal(bodyBytes, &result) if err != nil { - return nil, fmt.Errorf("error unmarshaling response body: %w", err) + return result, fmt.Errorf("error unmarshaling response body: %w", err) } - return jsonResponse, nil + return result, nil } diff --git a/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocumentservice.go b/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocumentservice.go index 2c20216..049b8b1 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocumentservice.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/azureservices/azuredocumentservice.go @@ -1,11 +1,12 @@ package azureservices import ( + "azuredocextractservice/types" "azuredocextractservice/utils" "log" ) -func CallAzureDocument(jsonPayload []byte) (map[string]interface{}, error) { +func CallAzureDocument(jsonPayload []byte) (types.AnalyzeResults, error) { subscriptionKey := utils.ViperEnvVariable("azuresubcriptionkey") baseURL := "https://foidocintelservice.cognitiveservices.azure.com" diff --git a/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go b/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go index 4d79615..6540a02 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go @@ -35,7 +35,11 @@ func main() { for _, document := range request.Documents { var parsedURL = document.DocumentS3URL var jsonStrbytes []byte = getBytesfromDocumentPath(parsedURL) - azureservices.CallAzureDocument(jsonStrbytes) + analysisResults, _analyzeerr := azureservices.CallAzureDocument(jsonStrbytes) + if _analyzeerr != nil { + //pUSH to solr. + //analysisResults.AnalyzeResult.Pages + } } } diff --git a/computingservices/documentextractservices/azuredocextractservice/src/types/AzureAnalyzeResultStruct.go b/computingservices/documentextractservices/azuredocextractservice/src/types/AzureAnalyzeResultStruct.go new file mode 100644 index 0000000..32e68aa --- /dev/null +++ b/computingservices/documentextractservices/azuredocextractservice/src/types/AzureAnalyzeResultStruct.go @@ -0,0 +1,113 @@ +package types + +// Root struct for the response +type AnalyzeResults struct { + Status string `json:"status"` + CreatedDateTime string `json:"createdDateTime"` + LastUpdatedDateTime string `json:"lastUpdatedDateTime"` + AnalyzeResult AnalyzeResult `json:"analyzeResult"` +} + +// AnalyzeResult contains the main analysis output +type AnalyzeResult struct { + APIVersion string `json:"apiVersion"` + ModelID string `json:"modelId"` + Content string `json:"content"` + Pages []Page `json:"pages"` + Tables []Table `json:"tables"` + KeyValuePairs []KeyValuePair `json:"keyValuePairs"` + Entities []Entity `json:"entities"` + Styles []Style `json:"styles"` + Paragraphs []Paragraph `json:"paragraphs"` +} + +// Page represents details of a single page +type Page struct { + PageNumber int `json:"pageNumber"` + Angle float64 `json:"angle"` + Width float64 `json:"width"` + Height float64 `json:"height"` + Unit string `json:"unit"` + Lines []Line `json:"lines"` + Words []Word `json:"words"` +} + +// Line represents a line of text +type Line struct { + Content string `json:"content"` + BoundingRegions []BoundingRegion `json:"boundingRegions"` + Spans []Span `json:"spans"` +} + +// Word represents an individual word +type Word struct { + Content string `json:"content"` + BoundingBox []float64 `json:"boundingBox"` + Confidence float64 `json:"confidence"` +} + +// Table represents a detected table +type Table struct { + RowCount int `json:"rowCount"` + ColumnCount int `json:"columnCount"` + BoundingRegions []BoundingRegion `json:"boundingRegions"` + Cells []Cell `json:"cells"` +} + +// Cell represents a cell in a table +type Cell struct { + Kind string `json:"kind"` + RowIndex int `json:"rowIndex"` + ColumnIndex int `json:"columnIndex"` + Content string `json:"content"` + BoundingRegions []BoundingRegion `json:"boundingRegions"` + Spans []Span `json:"spans"` +} + +// KeyValuePair represents a key-value pair extracted from the document +type KeyValuePair struct { + Key KVElement `json:"key"` + Value KVElement `json:"value"` +} + +// KVElement represents the key or value in a key-value pair +type KVElement struct { + Content string `json:"content"` + BoundingRegions []BoundingRegion `json:"boundingRegions"` + Spans []Span `json:"spans"` +} + +// Entity represents a named entity +type Entity struct { + Category string `json:"category"` + Content string `json:"content"` + Confidence float64 `json:"confidence"` + BoundingRegions []BoundingRegion `json:"boundingRegions"` + Spans []Span `json:"spans"` +} + +// Style represents text styles +type Style struct { + IsHandwritten bool `json:"isHandwritten"` + Spans []Span `json:"spans"` + Confidence float64 `json:"confidence"` +} + +// Paragraph represents a paragraph of text +type Paragraph struct { + Content string `json:"content"` + BoundingRegions []BoundingRegion `json:"boundingRegions"` + Spans []Span `json:"spans"` +} + +// BoundingRegion represents the location of an element on a page +type BoundingRegion struct { + PageNumber int `json:"pageNumber"` + BoundingBox []float64 `json:"boundingBox"` +} + +// Span represents a range of text offsets +type Span struct { + Offset int `json:"offset"` + Length int `json:"length"` +} From 02d8edabc0e64d82586421df38b211c1ee8acbdf Mon Sep 17 00:00:00 2001 From: abin-aot Date: Tue, 24 Dec 2024 20:47:39 +0000 Subject: [PATCH 4/5] #FOIMOD-3636 removing comments --- .../azuredocextractservice/src/httpservices/messagedequeue.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/computingservices/documentextractservices/azuredocextractservice/src/httpservices/messagedequeue.go b/computingservices/documentextractservices/azuredocextractservice/src/httpservices/messagedequeue.go index 042e985..77c9d57 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/httpservices/messagedequeue.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/httpservices/messagedequeue.go @@ -58,10 +58,8 @@ func ProcessMessage() ([]types.QueueMessage, error) { // Fetches a message from the queue func fetchMessageFromQueue(url string) (*types.QueueMessage, error) { fmt.Println("URL:", url) - //url = "https://activemq-fc7a67-dev.apps.gold.devops.gov.bc.ca/api/message?destination=queue://foidocextract&clientId=AZSLFDocExtract" client := &http.Client{Timeout: 30 * time.Second} req, err := http.NewRequest(http.MethodGet, url, nil) - //resp, err := http.Get("https://admin:F0!@activemq@dm1n@activemq-fc7a67-dev.apps.gold.devops.gov.bc.ca/api/message?destination=queue://foidocextract&clientId=AZSLFDocExtract") if err != nil { return nil, fmt.Errorf("failed to create HTTP request: %w", err) } From 3bd05280f86d48490953d1f1db380d411bd5e3be Mon Sep 17 00:00:00 2001 From: abin-aot Date: Wed, 25 Dec 2024 00:47:56 +0000 Subject: [PATCH 5/5] #FOIMOD-3636 PUSH TO SOLR updates --- .../src/.vscode/launch.json | 15 ++++++ .../src/messageprocessor.go | 27 +++++++++- .../solrsearchservices/solrsearchservice.go | 54 +++++++++++++++++++ .../src/types/FOISolrSearchStruct.go | 13 +++++ 4 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 computingservices/documentextractservices/azuredocextractservice/src/.vscode/launch.json create mode 100644 computingservices/documentextractservices/azuredocextractservice/src/solrsearchservices/solrsearchservice.go create mode 100644 computingservices/documentextractservices/azuredocextractservice/src/types/FOISolrSearchStruct.go diff --git a/computingservices/documentextractservices/azuredocextractservice/src/.vscode/launch.json b/computingservices/documentextractservices/azuredocextractservice/src/.vscode/launch.json new file mode 100644 index 0000000..8655150 --- /dev/null +++ b/computingservices/documentextractservices/azuredocextractservice/src/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Launch Package", + "type": "go", + "request": "launch", + "mode": "debug", + "program": "${fileDirname}" + } + ] +} \ No newline at end of file diff --git a/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go b/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go index 6540a02..f17a482 100644 --- a/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go +++ b/computingservices/documentextractservices/azuredocextractservice/src/messageprocessor.go @@ -4,10 +4,12 @@ import ( "azuredocextractservice/azureservices" "azuredocextractservice/httpservices" "azuredocextractservice/s3services" + "azuredocextractservice/solrsearchservices" "azuredocextractservice/types" "fmt" "log" "net/url" + "strconv" "strings" "time" ) @@ -36,9 +38,30 @@ func main() { var parsedURL = document.DocumentS3URL var jsonStrbytes []byte = getBytesfromDocumentPath(parsedURL) analysisResults, _analyzeerr := azureservices.CallAzureDocument(jsonStrbytes) - if _analyzeerr != nil { + if _analyzeerr == nil && analysisResults.Status == "succeeded" { + + searchdocumentpagelines := []types.SOLRSearchDocument{} //pUSH to solr. - //analysisResults.AnalyzeResult.Pages + for _, page := range analysisResults.AnalyzeResult.Pages { + for _, line := range page.Lines { + _solrsearchdocuemnt := types.SOLRSearchDocument{ + FoiDocumentID: strconv.Itoa(int(document.DocumentID)), + FoiRequestNumber: request.RequestNumber, + FoiMinistryRequestID: request.MinistryRequestID, + FoiMinistryCode: request.MinistryCode, + FoiDocumentFileName: document.DocumentName, + FoiDocumentPageNumber: page.PageNumber, + FoiDocumentSentence: line.Content, + FoiRequestMiscInfo: document.DocumentS3URL, + } + searchdocumentpagelines = append(searchdocumentpagelines, _solrsearchdocuemnt) + fmt.Println(_solrsearchdocuemnt.FoiDocumentFileName) + } + + } + + solrsearchservices.PushtoSolr(searchdocumentpagelines) + } } } diff --git a/computingservices/documentextractservices/azuredocextractservice/src/solrsearchservices/solrsearchservice.go b/computingservices/documentextractservices/azuredocextractservice/src/solrsearchservices/solrsearchservice.go new file mode 100644 index 0000000..b03627c --- /dev/null +++ b/computingservices/documentextractservices/azuredocextractservice/src/solrsearchservices/solrsearchservice.go @@ -0,0 +1,54 @@ +package solrsearchservices + +import ( + "azuredocextractservice/types" + "azuredocextractservice/utils" + "bytes" + "encoding/json" + "fmt" + "log" + "net/http" +) + +func PushtoSolr(searchdocs []types.SOLRSearchDocument) bool { + + // Convert the struct to JSON + jsonData, err := json.Marshal(searchdocs) + fmt.Println("SOLR Search Data starts here") + fmt.Println(string(jsonData)) + fmt.Println("SOLR Search Data ends here") + if err != nil { + log.Fatal("Error marshaling JSON:", err) + } + + // Solr endpoint URL (Replace with your Solr endpoint) + url := utils.ViperEnvVariable("foisearchsolrpostendpoint") + // Create a POST request with JSON data + req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData)) + if err != nil { + log.Fatal("Error creating request:", err) + } + + // Set the appropriate headers for JSON content + req.Header.Set("Content-Type", "application/json") + username := utils.ViperEnvVariable("solradmin") + password := utils.ViperEnvVariable("solrpassword") + req.SetBasicAuth(username, password) + // Send the request using the http client + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + log.Fatal("Error sending request:", err) + } + defer resp.Body.Close() + + // Handle the response + if resp.StatusCode == http.StatusOK { + fmt.Println("Successfully posted to Solr") + return true + } else { + fmt.Printf("Failed to post to Solr. Status: %s\n", resp.Status) + return false + } + +} diff --git a/computingservices/documentextractservices/azuredocextractservice/src/types/FOISolrSearchStruct.go b/computingservices/documentextractservices/azuredocextractservice/src/types/FOISolrSearchStruct.go new file mode 100644 index 0000000..4f779e2 --- /dev/null +++ b/computingservices/documentextractservices/azuredocextractservice/src/types/FOISolrSearchStruct.go @@ -0,0 +1,13 @@ +package types + +type SOLRSearchDocument struct { + FoiDocumentID string `json:"foidocumentid"` + FoiRequestNumber string `json:"foirequestnumber"` + FoiMinistryRequestID string `json:"foiministryrequestid"` + FoiMinistryCode string `json:"foiministrycode"` + FoiDocumentFileName string `json:"foidocumentfilename"` + FoiDocumentPageNumber int `json:"foidocumentpagenumber"` + FoiDocumentSentence string `json:"foidocumentsentence"` + FoiDocumentSentenceID int `json:"foidocumentsentenceId"` + FoiRequestMiscInfo string `json:"foirequestmiscinfo"` +}