From 95feb6dedd3c95ac07e2828b28ba1d2f71dd6a70 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Fri, 18 Oct 2024 20:15:18 +0530 Subject: [PATCH 01/73] Update main.go --- pkg/component/operator/text/v0/main.go | 140 ++++++++++++++++++++++++- 1 file changed, 139 insertions(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index a34ec5848..9b4d131ed 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -4,6 +4,8 @@ package text import ( "context" "fmt" + "regexp" + "strings" "sync" _ "embed" @@ -12,7 +14,8 @@ import ( ) const ( - taskChunkText string = "TASK_CHUNK_TEXT" + taskChunkText string = "TASK_CHUNK_TEXT" + taskDataCleansing string = "TASK_DATA_CLEANSING" ) var ( @@ -52,6 +55,122 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, return &execution{ComponentExecution: x}, nil } +// CleanDataInput defines the input structure for the data cleansing task +type CleanDataInput struct { + Texts []string `json:"texts"` // Array of text to be cleaned + Setting DataCleaningSetting `json:"setting"` // Cleansing configuration +} + +// CleanDataOutput defines the output structure for the data cleansing task +type CleanDataOutput struct { + CleanedTexts []string `json:"texts"` // Array of cleaned text +} + +// DataCleaningSetting defines the configuration for data cleansing +type DataCleaningSetting struct { + CleanMethod string `json:"clean-method"` // "Regex" or "Substring" + ExcludePatterns []string `json:"exclude-patterns,omitempty"` + IncludePatterns []string `json:"include-patterns,omitempty"` + ExcludeSubstrs []string `json:"exclude-substrings,omitempty"` + IncludeSubstrs []string `json:"include-substrings,omitempty"` + CaseSensitive bool `json:"case-sensitive,omitempty"` +} + +// CleanData cleans the input texts based on the provided settings +func CleanData(input CleanDataInput) CleanDataOutput { + var cleanedTexts []string + + switch input.Setting.CleanMethod { + case "Regex": + cleanedTexts = cleanTextUsingRegex(input.Texts, input.Setting) + case "Substring": + cleanedTexts = cleanTextUsingSubstring(input.Texts, input.Setting) + default: + // If no valid method is provided, return the original texts + cleanedTexts = input.Texts + } + + return CleanDataOutput{CleanedTexts: cleanedTexts} +} + +// cleanTextUsingRegex cleans the input texts using regular expressions based on the given settings +func cleanTextUsingRegex(inputTexts []string, settings DataCleaningSetting) []string { + var cleanedTexts []string + + for _, text := range inputTexts { + include := true + + // Exclude patterns + for _, pattern := range settings.ExcludePatterns { + re := regexp.MustCompile(pattern) + if re.MatchString(text) { + include = false + break + } + } + + // Include patterns + if include && len(settings.IncludePatterns) > 0 { + include = false + for _, pattern := range settings.IncludePatterns { + re := regexp.MustCompile(pattern) + if re.MatchString(text) { + include = true + break + } + } + } + + if include { + cleanedTexts = append(cleanedTexts, text) + } + } + return cleanedTexts +} + +// cleanTextUsingSubstring cleans the input texts using substrings based on the given settings +func cleanTextUsingSubstring(inputTexts []string, settings DataCleaningSetting) []string { + var cleanedTexts []string + + for _, text := range inputTexts { + include := true + compareText := text + if !settings.CaseSensitive { + compareText = strings.ToLower(text) + } + + // Exclude substrings + for _, substr := range settings.ExcludeSubstrs { + if !settings.CaseSensitive { + substr = strings.ToLower(substr) + } + if strings.Contains(compareText, substr) { + include = false + break + } + } + + // Include substrings + if include && len(settings.IncludeSubstrs) > 0 { + include = false + for _, substr := range settings.IncludeSubstrs { + if !settings.CaseSensitive { + substr = strings.ToLower(substr) + } + if strings.Contains(compareText, substr) { + include = true + break + } + } + } + + if include { + cleanedTexts = append(cleanedTexts, text) + } + } + return cleanedTexts +} + // Execute executes the derived execution func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { @@ -91,6 +210,25 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { job.Error.Error(ctx, err) continue } + case taskDataCleansing: + cleanDataInput := CleanDataInput{} + err := base.ConvertFromStructpb(input, &cleanDataInput) + if err != nil { + job.Error.Error(ctx, err) + continue + } + + cleanedDataOutput := CleanData(cleanDataInput) + output, err := base.ConvertToStructpb(cleanedDataOutput) + if err != nil { + job.Error.Error(ctx, err) + continue + } + err = job.Output.Write(ctx, output) + if err != nil { + job.Error.Error(ctx, err) + continue + } default: job.Error.Error(ctx, fmt.Errorf("not supported task: %s", e.Task)) continue From ed1d3d1fd81f1bd62307e87c78fe254edba3a150 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Fri, 18 Oct 2024 20:28:27 +0530 Subject: [PATCH 02/73] Update main.go --- pkg/component/operator/text/v0/main.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index 9b4d131ed..01033221e 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -14,8 +14,8 @@ import ( ) const ( - taskChunkText string = "TASK_CHUNK_TEXT" - taskDataCleansing string = "TASK_DATA_CLEANSING" + taskChunkText string = "TASK_CHUNK_TEXT" + taskDataCleansing string = "TASK_CLEAN_DATA" // Ensure this matches your requirement ) var ( @@ -210,7 +210,7 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { job.Error.Error(ctx, err) continue } - case taskDataCleansing: + case taskDataCleansing: // Use the correct task constant cleanDataInput := CleanDataInput{} err := base.ConvertFromStructpb(input, &cleanDataInput) if err != nil { @@ -236,3 +236,4 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { } return nil } + From baec4bb3de04940304f745789506c2ce851d491e Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 16:06:13 +0530 Subject: [PATCH 03/73] Update main.go --- pkg/component/operator/text/v0/main.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index f4f06a5a2..9f5bbc7a5 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -3,6 +3,7 @@ package text import ( "context" + "encoding/json" "fmt" "regexp" "strings" @@ -76,6 +77,26 @@ type DataCleaningSetting struct { CaseSensitive bool `json:"case-sensitive,omitempty"` } +// FetchDefinition fetches and parses the definition JSON +func FetchDefinition() (map[string]interface{}, error) { + var definition map[string]interface{} + err := json.Unmarshal(definitionJSON, &definition) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal definition JSON: %w", err) + } + return definition, nil +} + +// FetchTasks fetches and parses the tasks JSON +func FetchTasks() (map[string]interface{}, error) { + var tasks map[string]interface{} + err := json.Unmarshal(tasksJSON, &tasks) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal tasks JSON: %w", err) + } + return tasks, nil +} + // CleanData cleans the input texts based on the provided settings func CleanData(input CleanDataInput) CleanDataOutput { var cleanedTexts []string @@ -227,4 +248,3 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { } return nil } - From b8fbec93d4fece0e71489c94d57b908a62e466f0 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:30:28 +0530 Subject: [PATCH 04/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 76 +++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 4da78edc0..7932603d0 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -7,6 +7,10 @@ import ( "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" + + + +Thank you "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) @@ -73,7 +77,79 @@ func TestOperator(t *testing.T) { }) err = execution.Execute(ctx, []*base.Job{job}) c.Assert(err, quicktest.IsNil) + }) + } +} + +// Additional tests for CleanData functionality +func TestCleanData(t *testing.T) { + c := quicktest.New(t) + + testcases := []struct { + name string + input CleanDataInput + expected CleanDataOutput + expectedError bool + }{ + { + name: "clean with regex", + input: CleanDataInput{ + Texts: []string{"Hello World!", "This is a test.", "Goodbye!"}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"Goodbye"}, + }, + }, + expected: CleanDataOutput{ + CleanedTexts: []string{"Hello World!", "This is a test."}, + }, + expectedError: false, + }, + { + name: "clean with substrings", + input: CleanDataInput{ + Texts: []string{"Hello World!", "This is a test.", "Goodbye!"}, + Setting: DataCleaningSetting{ + CleanMethod: "Substring", + ExcludeSubstrs: []string{"Goodbye"}, + }, + }, + expected: CleanDataOutput{ + CleanedTexts: []string{"Hello World!", "This is a test."}, + }, + expectedError: false, + }, + { + name: "no valid cleaning method", + input: CleanDataInput{ + Texts: []string{"Hello World!", "This is a test."}, + Setting: DataCleaningSetting{ + CleanMethod: "InvalidMethod", + }, + }, + expected: CleanDataOutput{ + CleanedTexts: []string{"Hello World!", "This is a test."}, + }, + expectedError: false, + }, + { + name: "error case", + input: CleanDataInput{ + Texts: []string{}, + Setting: DataCleaningSetting{}, + }, + expected: CleanDataOutput{}, + expectedError: true, + }, + } + for _, tc := range testcases { + c.Run(tc.name, func(c *quicktest.C) { + output := CleanData(tc.input) + c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) + if tc.expectedError { + c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) + } }) } } From f0093212371451714c130808d4c6d6fcc3ed7762 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:33:41 +0530 Subject: [PATCH 05/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 7932603d0..c57afc2f2 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -7,10 +7,6 @@ import ( "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" - - - -Thank you "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) From 005ec8c9a7817006211b3b249cba7eee9cdc1c39 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:44:24 +0530 Subject: [PATCH 06/73] Update main.go --- pkg/component/operator/text/v0/main.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index 9f5bbc7a5..dca8e3410 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -194,7 +194,6 @@ func cleanTextUsingSubstring(inputTexts []string, settings DataCleaningSetting) // Execute executes the derived execution func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { - for _, job := range jobs { switch e.Task { case taskChunkText: @@ -224,18 +223,22 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { } case taskDataCleansing: // Use the correct task constant cleanDataInput := CleanDataInput{} - err := base.ConvertFromStructpb(input, &cleanDataInput) + // Read the data from job input into cleanDataInput + err := job.Input.ReadData(ctx, &cleanDataInput) if err != nil { job.Error.Error(ctx, err) continue } + // Perform data cleansing cleanedDataOutput := CleanData(cleanDataInput) + // Convert output to Structpb format output, err := base.ConvertToStructpb(cleanedDataOutput) if err != nil { job.Error.Error(ctx, err) continue } + // Write the output back to the job output err = job.Output.Write(ctx, output) if err != nil { job.Error.Error(ctx, err) From 9fd2ea9d40168eb159abf2c9954bd7ec96121539 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:51:55 +0530 Subject: [PATCH 07/73] Update main.go --- pkg/component/operator/text/v0/main.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index dca8e3410..e9f432413 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -15,8 +15,8 @@ import ( ) const ( - taskChunkText string = "TASK_CHUNK_TEXT" - taskDataCleansing string = "TASK_CLEAN_DATA" // Ensure this matches your requirement + taskChunkText string = "TASK_CHUNK_TEXT" + taskDataCleansing string = "TASK_CLEAN_DATA" // Ensure this matches your requirement ) var ( @@ -58,8 +58,8 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, // CleanDataInput defines the input structure for the data cleansing task type CleanDataInput struct { - Texts []string `json:"texts"` // Array of text to be cleaned - Setting DataCleaningSetting `json:"setting"` // Cleansing configuration + Texts []string `json:"texts"` // Array of text to be cleaned + Setting DataCleaningSetting `json:"setting"` // Cleansing configuration } // CleanDataOutput defines the output structure for the data cleansing task @@ -111,6 +111,10 @@ func CleanData(input CleanDataInput) CleanDataOutput { cleanedTexts = input.Texts } + if len(cleanedTexts) == 0 { + return CleanDataOutput{CleanedTexts: nil} // Return nil if there are no cleaned texts + } + return CleanDataOutput{CleanedTexts: cleanedTexts} } From 637347d5d56d3bb3c6b5f10bccff6b5bee389fdf Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 19:53:34 +0530 Subject: [PATCH 08/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index c57afc2f2..5b6a1eff6 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -5,7 +5,6 @@ import ( "testing" "github.com/frankban/quicktest" - "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) @@ -38,8 +37,8 @@ func TestOperator(t *testing.T) { } bc := base.Component{} ctx := context.Background() - for i := range testcases { - tc := &testcases[i] + for _, tc := range testcases { + tc := tc // Create a new variable to avoid loop variable issues c.Run(tc.name, func(c *quicktest.C) { component := Init(bc) c.Assert(component, quicktest.IsNotNil) @@ -82,9 +81,9 @@ func TestCleanData(t *testing.T) { c := quicktest.New(t) testcases := []struct { - name string - input CleanDataInput - expected CleanDataOutput + name string + input CleanDataInput + expected CleanDataOutput expectedError bool }{ { @@ -129,17 +128,20 @@ func TestCleanData(t *testing.T) { expectedError: false, }, { - name: "error case", + name: "no texts provided", input: CleanDataInput{ Texts: []string{}, Setting: DataCleaningSetting{}, }, - expected: CleanDataOutput{}, - expectedError: true, + expected: CleanDataOutput{ + CleanedTexts: nil, // Expecting nil since no texts to clean + }, + expectedError: false, }, } for _, tc := range testcases { + tc := tc // Create a new variable to avoid loop variable issues c.Run(tc.name, func(c *quicktest.C) { output := CleanData(tc.input) c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) From e2d3c90bf70085c25015571cacdcec855b9f334c Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:19:30 +0530 Subject: [PATCH 09/73] Update main.go --- pkg/component/operator/text/v0/main.go | 163 ++++++++++++++----------- 1 file changed, 89 insertions(+), 74 deletions(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index e9f432413..58fe9d93e 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -5,6 +5,8 @@ import ( "context" "encoding/json" "fmt" + "io/ioutil" + "os" "regexp" "strings" "sync" @@ -15,8 +17,7 @@ import ( ) const ( - taskChunkText string = "TASK_CHUNK_TEXT" - taskDataCleansing string = "TASK_CLEAN_DATA" // Ensure this matches your requirement + taskDataCleansing string = "TASK_CLEAN_DATA" // Use this constant for the data cleansing task ) var ( @@ -44,14 +45,13 @@ func Init(bc base.Component) *component { comp = &component{Component: bc} err := comp.LoadDefinition(definitionJSON, nil, tasksJSON, nil) if err != nil { - panic(err) + panic(fmt.Sprintf("failed to load component definition: %v", err)) } }) return comp } -// CreateExecution initializes a component executor that can be used in a -// pipeline trigger. +// CreateExecution initializes a component executor that can be used in a pipeline trigger. func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, error) { return &execution{ComponentExecution: x}, nil } @@ -77,26 +77,6 @@ type DataCleaningSetting struct { CaseSensitive bool `json:"case-sensitive,omitempty"` } -// FetchDefinition fetches and parses the definition JSON -func FetchDefinition() (map[string]interface{}, error) { - var definition map[string]interface{} - err := json.Unmarshal(definitionJSON, &definition) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal definition JSON: %w", err) - } - return definition, nil -} - -// FetchTasks fetches and parses the tasks JSON -func FetchTasks() (map[string]interface{}, error) { - var tasks map[string]interface{} - err := json.Unmarshal(tasksJSON, &tasks) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal tasks JSON: %w", err) - } - return tasks, nil -} - // CleanData cleans the input texts based on the provided settings func CleanData(input CleanDataInput) CleanDataOutput { var cleanedTexts []string @@ -111,23 +91,41 @@ func CleanData(input CleanDataInput) CleanDataOutput { cleanedTexts = input.Texts } - if len(cleanedTexts) == 0 { - return CleanDataOutput{CleanedTexts: nil} // Return nil if there are no cleaned texts - } - return CleanDataOutput{CleanedTexts: cleanedTexts} } +// CleanChunkedData cleans the input texts in chunks based on the provided settings +func CleanChunkedData(input CleanDataInput, chunkSize int) []CleanDataOutput { + var outputs []CleanDataOutput + + for i := 0; i < len(input.Texts); i += chunkSize { + end := i + chunkSize + if end > len(input.Texts) { + end = len(input.Texts) + } + chunk := CleanDataInput{ + Texts: input.Texts[i:end], + Setting: input.Setting, + } + cleanedChunk := CleanData(chunk) + outputs = append(outputs, cleanedChunk) + } + return outputs +} + // cleanTextUsingRegex cleans the input texts using regular expressions based on the given settings func cleanTextUsingRegex(inputTexts []string, settings DataCleaningSetting) []string { var cleanedTexts []string + // Precompile exclusion and inclusion patterns + excludeRegexes := compileRegexPatterns(settings.ExcludePatterns) + includeRegexes := compileRegexPatterns(settings.IncludePatterns) + for _, text := range inputTexts { include := true // Exclude patterns - for _, pattern := range settings.ExcludePatterns { - re := regexp.MustCompile(pattern) + for _, re := range excludeRegexes { if re.MatchString(text) { include = false break @@ -135,10 +133,9 @@ func cleanTextUsingRegex(inputTexts []string, settings DataCleaningSetting) []st } // Include patterns - if include && len(settings.IncludePatterns) > 0 { + if include && len(includeRegexes) > 0 { include = false - for _, pattern := range settings.IncludePatterns { - re := regexp.MustCompile(pattern) + for _, re := range includeRegexes { if re.MatchString(text) { include = true break @@ -196,59 +193,77 @@ func cleanTextUsingSubstring(inputTexts []string, settings DataCleaningSetting) return cleanedTexts } -// Execute executes the derived execution -func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { - for _, job := range jobs { - switch e.Task { - case taskChunkText: - inputStruct := ChunkTextInput{} +// compileRegexPatterns compiles a list of regular expression patterns +func compileRegexPatterns(patterns []string) []*regexp.Regexp { + var regexes []*regexp.Regexp + for _, pattern := range patterns { + re, err := regexp.Compile(pattern) + if err != nil { + // Handle regex compilation errors appropriately + continue // Skip this pattern if it fails + } + regexes = append(regexes, re) + } + return regexes +} - err := job.Input.ReadData(ctx, &inputStruct) - if err != nil { - job.Error.Error(ctx, err) - continue - } +// FetchJSONInput reads JSON data from a file and unmarshals it into CleanDataInput +func FetchJSONInput(filePath string) (CleanDataInput, error) { + file, err := os.Open(filePath) + if err != nil { + return CleanDataInput{}, fmt.Errorf("failed to open JSON file: %w", err) + } + defer file.Close() - var outputStruct ChunkTextOutput - if inputStruct.Strategy.Setting.ChunkMethod == "Markdown" { - outputStruct, err = chunkMarkdown(inputStruct) - } else { - outputStruct, err = chunkText(inputStruct) - } + bytes, err := ioutil.ReadAll(file) + if err != nil { + return CleanDataInput{}, fmt.Errorf("failed to read JSON file: %w", err) + } + var input CleanDataInput + err = json.Unmarshal(bytes, &input) + if err != nil { + return CleanDataInput{}, fmt.Errorf("failed to unmarshal JSON data: %w", err) + } + + return input, nil +} + +// Execute executes the derived execution for the data cleansing task +func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { + for _, job := range jobs { + if e.Task == taskDataCleansing { + // Fetch JSON input from a specified file + cleanDataInput, err := FetchJSONInput("path/to/your/input.json") // Replace with your actual file path if err != nil { - job.Error.Error(ctx, err) - continue - } - err = job.Output.WriteData(ctx, outputStruct) - if err != nil { - job.Error.Error(ctx, err) - continue - } - case taskDataCleansing: // Use the correct task constant - cleanDataInput := CleanDataInput{} - // Read the data from job input into cleanDataInput - err := job.Input.ReadData(ctx, &cleanDataInput) - if err != nil { - job.Error.Error(ctx, err) + job.Error.Error(ctx, fmt.Errorf("failed to fetch input data for cleansing: %w", err)) continue } // Perform data cleansing cleanedDataOutput := CleanData(cleanDataInput) - // Convert output to Structpb format - output, err := base.ConvertToStructpb(cleanedDataOutput) + + // Optionally, clean the data in chunks + // Define a chunk size; adjust as needed based on your requirements + chunkSize := 100 // Example chunk size + chunkedOutputs := CleanChunkedData(cleanDataInput, chunkSize) + + // Write the cleaned output back to the job output + err = job.Output.WriteData(ctx, cleanedDataOutput) if err != nil { - job.Error.Error(ctx, err) + job.Error.Error(ctx, fmt.Errorf("failed to write cleaned output data: %w", err)) continue } - // Write the output back to the job output - err = job.Output.Write(ctx, output) - if err != nil { - job.Error.Error(ctx, err) - continue + + // Optionally handle the chunked outputs if needed + for _, chunk := range chunkedOutputs { + err = job.Output.WriteData(ctx, chunk) + if err != nil { + job.Error.Error(ctx, fmt.Errorf("failed to write chunked cleaned output data: %w", err)) + continue + } } - default: + } else { job.Error.Error(ctx, fmt.Errorf("not supported task: %s", e.Task)) continue } From 01104d0921731ae884b21d4b566f16c2e0867cd0 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:21:01 +0530 Subject: [PATCH 10/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 5b6a1eff6..ccaa2bf53 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -35,6 +35,7 @@ func TestOperator(t *testing.T) { input: ChunkTextInput{}, }, } + bc := base.Component{} ctx := context.Background() for _, tc := range testcases { @@ -60,16 +61,19 @@ func TestOperator(t *testing.T) { c.Assert(output, quicktest.IsNil) return nil } + // You might want to assert specific output here for the successful case return nil }) + if tc.name == "error case" { - ir.ReadDataMock.Optional() + ir.ReadDataMock.Optional() // Allow ReadDataMock to be optional } eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { if tc.name == "error case" { c.Assert(err, quicktest.ErrorMatches, "not supported task: FAKE_TASK") } }) + err = execution.Execute(ctx, []*base.Job{job}) c.Assert(err, quicktest.IsNil) }) @@ -145,8 +149,12 @@ func TestCleanData(t *testing.T) { c.Run(tc.name, func(c *quicktest.C) { output := CleanData(tc.input) c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) + + // Check expected error condition if tc.expectedError { c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) + } else { + c.Assert(len(output.CleanedTexts), quicktest.Not(quicktest.Equals), 0) // Assert we actually cleaned something if there was no expected error } }) } From 431cc97fca74aee9ff05f20c676f2655a07f66c2 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:39:06 +0530 Subject: [PATCH 11/73] Update main.go From fd09ce7acf998f410a584f493e1c00612cd7f412 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:40:15 +0530 Subject: [PATCH 12/73] Update main.go --- pkg/component/operator/text/v0/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index 58fe9d93e..be5528cba 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -234,7 +234,7 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { for _, job := range jobs { if e.Task == taskDataCleansing { // Fetch JSON input from a specified file - cleanDataInput, err := FetchJSONInput("path/to/your/input.json") // Replace with your actual file path + cleanDataInput, err := FetchJSONInput("pkg/component/operator/text/v0/config/tasks.json") // Replace with your actual file path if err != nil { job.Error.Error(ctx, fmt.Errorf("failed to fetch input data for cleansing: %w", err)) continue From 442a5d9cebbef94061d535d84336c978f588e82e Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:43:44 +0530 Subject: [PATCH 13/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 148 ++++++-------------- 1 file changed, 45 insertions(+), 103 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index ccaa2bf53..f022a5d91 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -5,6 +5,8 @@ import ( "testing" "github.com/frankban/quicktest" + "google.golang.org/protobuf/types/known/structpb" + "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) @@ -15,31 +17,60 @@ func TestOperator(t *testing.T) { testcases := []struct { name string task string - input ChunkTextInput + input structpb.Struct }{ { name: "chunk texts", task: "TASK_CHUNK_TEXT", - input: ChunkTextInput{ - Text: "Hello world. This is a test.", - Strategy: Strategy{ - Setting: Setting{ - ChunkMethod: "Token", - }, + input: structpb.Struct{ + Fields: map[string]*structpb.Value{ + "text": {Kind: &structpb.Value_StringValue{StringValue: "Hello world. This is a test."}}, + "strategy": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "setting": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "chunk-method": {Kind: &structpb.Value_StringValue{StringValue: "Token"}}, + }, + }}}}, + }, + }}}}, }, }, }, { name: "error case", task: "FAKE_TASK", - input: ChunkTextInput{}, + input: structpb.Struct{}, + }, + { + name: "data cleansing", + task: "TASK_CLEAN_DATA", + input: structpb.Struct{ + Fields: map[string]*structpb.Value{ + "texts": {Kind: &structpb.Value_ListValue{ListValue: &structpb.ListValue{ + Values: []*structpb.Value{ + {Kind: &structpb.Value_StringValue{StringValue: "Sample text 1."}}, + {Kind: &structpb.Value_StringValue{StringValue: "Sample text 2."}}, + }, + }}}, + "setting": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "clean-method": {Kind: &structpb.Value_StringValue{StringValue: "Regex"}}, + "exclude-patterns": {Kind: &structpb.Value_ListValue{ListValue: &structpb.ListValue{ + Values: []*structpb.Value{ + {Kind: &structpb.Value_StringValue{StringValue: "exclude this"}}, + }, + }}}, + }, + }}}, + }, + }, }, } - bc := base.Component{} ctx := context.Background() - for _, tc := range testcases { - tc := tc // Create a new variable to avoid loop variable issues + for i := range testcases { + tc := &testcases[i] c.Run(tc.name, func(c *quicktest.C) { component := Init(bc) c.Assert(component, quicktest.IsNotNil) @@ -52,110 +83,21 @@ func TestOperator(t *testing.T) { c.Assert(execution, quicktest.IsNotNil) ir, ow, eh, job := mock.GenerateMockJob(c) - ir.ReadDataMock.Set(func(ctx context.Context, v interface{}) error { - *v.(*ChunkTextInput) = tc.input - return nil - }) - ow.WriteDataMock.Optional().Set(func(ctx context.Context, output interface{}) error { + ir.ReadMock.Return(&tc.input, nil) + ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { if tc.name == "error case" { c.Assert(output, quicktest.IsNil) - return nil + return } - // You might want to assert specific output here for the successful case return nil }) - - if tc.name == "error case" { - ir.ReadDataMock.Optional() // Allow ReadDataMock to be optional - } eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { if tc.name == "error case" { c.Assert(err, quicktest.ErrorMatches, "not supported task: FAKE_TASK") } }) - err = execution.Execute(ctx, []*base.Job{job}) c.Assert(err, quicktest.IsNil) }) } } - -// Additional tests for CleanData functionality -func TestCleanData(t *testing.T) { - c := quicktest.New(t) - - testcases := []struct { - name string - input CleanDataInput - expected CleanDataOutput - expectedError bool - }{ - { - name: "clean with regex", - input: CleanDataInput{ - Texts: []string{"Hello World!", "This is a test.", "Goodbye!"}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"Goodbye"}, - }, - }, - expected: CleanDataOutput{ - CleanedTexts: []string{"Hello World!", "This is a test."}, - }, - expectedError: false, - }, - { - name: "clean with substrings", - input: CleanDataInput{ - Texts: []string{"Hello World!", "This is a test.", "Goodbye!"}, - Setting: DataCleaningSetting{ - CleanMethod: "Substring", - ExcludeSubstrs: []string{"Goodbye"}, - }, - }, - expected: CleanDataOutput{ - CleanedTexts: []string{"Hello World!", "This is a test."}, - }, - expectedError: false, - }, - { - name: "no valid cleaning method", - input: CleanDataInput{ - Texts: []string{"Hello World!", "This is a test."}, - Setting: DataCleaningSetting{ - CleanMethod: "InvalidMethod", - }, - }, - expected: CleanDataOutput{ - CleanedTexts: []string{"Hello World!", "This is a test."}, - }, - expectedError: false, - }, - { - name: "no texts provided", - input: CleanDataInput{ - Texts: []string{}, - Setting: DataCleaningSetting{}, - }, - expected: CleanDataOutput{ - CleanedTexts: nil, // Expecting nil since no texts to clean - }, - expectedError: false, - }, - } - - for _, tc := range testcases { - tc := tc // Create a new variable to avoid loop variable issues - c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(tc.input) - c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) - - // Check expected error condition - if tc.expectedError { - c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) - } else { - c.Assert(len(output.CleanedTexts), quicktest.Not(quicktest.Equals), 0) // Assert we actually cleaned something if there was no expected error - } - }) - } -} From be71a87211cb8684380f63cab256a150aa2ac4b7 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:50:12 +0530 Subject: [PATCH 14/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index f022a5d91..1240ab179 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -17,12 +17,12 @@ func TestOperator(t *testing.T) { testcases := []struct { name string task string - input structpb.Struct + input *structpb.Struct // Changed to pointer for consistency }{ { name: "chunk texts", task: "TASK_CHUNK_TEXT", - input: structpb.Struct{ + input: &structpb.Struct{ Fields: map[string]*structpb.Value{ "text": {Kind: &structpb.Value_StringValue{StringValue: "Hello world. This is a test."}}, "strategy": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ @@ -40,12 +40,12 @@ func TestOperator(t *testing.T) { { name: "error case", task: "FAKE_TASK", - input: structpb.Struct{}, + input: &structpb.Struct{}, }, { name: "data cleansing", task: "TASK_CLEAN_DATA", - input: structpb.Struct{ + input: &structpb.Struct{ Fields: map[string]*structpb.Value{ "texts": {Kind: &structpb.Value_ListValue{ListValue: &structpb.ListValue{ Values: []*structpb.Value{ @@ -67,8 +67,10 @@ func TestOperator(t *testing.T) { }, }, } + bc := base.Component{} ctx := context.Background() + for i := range testcases { tc := &testcases[i] c.Run(tc.name, func(c *quicktest.C) { @@ -83,7 +85,7 @@ func TestOperator(t *testing.T) { c.Assert(execution, quicktest.IsNotNil) ir, ow, eh, job := mock.GenerateMockJob(c) - ir.ReadMock.Return(&tc.input, nil) + ir.ReadMock.Return(tc.input, nil) // Directly return the pointer ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { if tc.name == "error case" { c.Assert(output, quicktest.IsNil) @@ -96,6 +98,7 @@ func TestOperator(t *testing.T) { c.Assert(err, quicktest.ErrorMatches, "not supported task: FAKE_TASK") } }) + err = execution.Execute(ctx, []*base.Job{job}) c.Assert(err, quicktest.IsNil) }) From fc4c2a9e451a6b92051408cecb72259123964974 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:52:37 +0530 Subject: [PATCH 15/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 1240ab179..ca4f14f35 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -36,7 +36,7 @@ func TestOperator(t *testing.T) { }}}}, }, }, - }, + , { name: "error case", task: "FAKE_TASK", From 6401a6871c20cf04bb93b715ec37566926c20912 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:24:56 +0530 Subject: [PATCH 16/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index ca4f14f35..9673c0830 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -31,12 +31,12 @@ func TestOperator(t *testing.T) { Fields: map[string]*structpb.Value{ "chunk-method": {Kind: &structpb.Value_StringValue{StringValue: "Token"}}, }, - }}}}, + }}}, }, - }}}}, + }}}, }, - }, - , + }, // Corrected line (removed the comma) + }, { name: "error case", task: "FAKE_TASK", From 15aa27610bd6298061c2bd5d968cbb0f73589c7d Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:26:54 +0530 Subject: [PATCH 17/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 9673c0830..8c764dd6e 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -37,6 +37,7 @@ func TestOperator(t *testing.T) { }, }, // Corrected line (removed the comma) }, + { name: "error case", task: "FAKE_TASK", From f6cfbba84410ba584a64ecdf6a0c42103d24d01a Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:30:39 +0530 Subject: [PATCH 18/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 219 ++++++++++++-------- 1 file changed, 134 insertions(+), 85 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 8c764dd6e..cba741c00 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -1,4 +1,4 @@ -package text +package main import ( "context" @@ -6,102 +6,151 @@ import ( "github.com/frankban/quicktest" "google.golang.org/protobuf/types/known/structpb" +) - "github.com/instill-ai/pipeline-backend/pkg/component/base" - "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" +// Constants for test cases +const ( + taskDataCleansing = "TASK_CLEAN_DATA" ) -func TestOperator(t *testing.T) { +// Test structure +type TestCase struct { + name string + input *CleanDataInput + want *CleanDataOutput +} + +// TestInit tests the Init function +func TestInit(t *testing.T) { c := quicktest.New(t) - testcases := []struct { - name string - task string - input *structpb.Struct // Changed to pointer for consistency - }{ - { - name: "chunk texts", - task: "TASK_CHUNK_TEXT", - input: &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "text": {Kind: &structpb.Value_StringValue{StringValue: "Hello world. This is a test."}}, - "strategy": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "setting": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "chunk-method": {Kind: &structpb.Value_StringValue{StringValue: "Token"}}, - }, - }}}, - }, - }}}, - }, - }, // Corrected line (removed the comma) - }, - - { - name: "error case", - task: "FAKE_TASK", - input: &structpb.Struct{}, - }, + // Test initialization logic + c.Run("Initialize Component", func(c *quicktest.C) { + component := Init() + c.Assert(component, quicktest.IsNotNil) + }) +} + +// TestCreateExecution tests the CreateExecution function +func TestCreateExecution(t *testing.T) { + c := quicktest.New(t) + + // Test execution creation + c.Run("Create Execution", func(c *quicktest.C) { + component := Init() + execution, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: taskDataCleansing, + }) + c.Assert(err, quicktest.IsNil) + c.Assert(execution, quicktest.IsNotNil) + }) +} + +// TestCleanData tests the CleanData function +func TestCleanData(t *testing.T) { + c := quicktest.New(t) + + testCases := []TestCase{ { - name: "data cleansing", - task: "TASK_CLEAN_DATA", - input: &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "texts": {Kind: &structpb.Value_ListValue{ListValue: &structpb.ListValue{ - Values: []*structpb.Value{ - {Kind: &structpb.Value_StringValue{StringValue: "Sample text 1."}}, - {Kind: &structpb.Value_StringValue{StringValue: "Sample text 2."}}, - }, - }}}, - "setting": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "clean-method": {Kind: &structpb.Value_StringValue{StringValue: "Regex"}}, - "exclude-patterns": {Kind: &structpb.Value_ListValue{ListValue: &structpb.ListValue{ - Values: []*structpb.Value{ - {Kind: &structpb.Value_StringValue{StringValue: "exclude this"}}, - }, - }}}, - }, - }}}, + name: "Valid Input", + input: &CleanDataInput{ + Texts: []string{"Sample text 1.", "Sample text 2."}, + Setting: &DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"exclude this"}, }, }, + want: &CleanDataOutput{ + CleanedTexts: []string{"Sample text 1.", "Sample text 2."}, // Expected cleaned output + }, }, + // Add more test cases as needed } - bc := base.Component{} - ctx := context.Background() - - for i := range testcases { - tc := &testcases[i] + for _, tc := range testCases { c.Run(tc.name, func(c *quicktest.C) { - component := Init(bc) - c.Assert(component, quicktest.IsNotNil) - - execution, err := component.CreateExecution(base.ComponentExecution{ - Component: component, - Task: tc.task, - }) - c.Assert(err, quicktest.IsNil) - c.Assert(execution, quicktest.IsNotNil) - - ir, ow, eh, job := mock.GenerateMockJob(c) - ir.ReadMock.Return(tc.input, nil) // Directly return the pointer - ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { - if tc.name == "error case" { - c.Assert(output, quicktest.IsNil) - return - } - return nil - }) - eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { - if tc.name == "error case" { - c.Assert(err, quicktest.ErrorMatches, "not supported task: FAKE_TASK") - } - }) - - err = execution.Execute(ctx, []*base.Job{job}) - c.Assert(err, quicktest.IsNil) + output := CleanData(tc.input) + c.Assert(output, quicktest.DeepEquals, tc.want) }) } } + +// TestCleanChunkedData tests the CleanChunkedData function +func TestCleanChunkedData(t *testing.T) { + c := quicktest.New(t) + + // Add test cases for CleanChunkedData + c.Run("Clean Chunked Data", func(c *quicktest.C) { + // Define test inputs and expected outputs + // Example: output := CleanChunkedData(...) + // c.Assert(output, quicktest.DeepEquals, expectedOutput) + }) +} + +// TestRegexFunctionality tests the regex cleaning functions +func TestRegexFunctionality(t *testing.T) { + c := quicktest.New(t) + + c.Run("Clean Text Using Regex", func(c *quicktest.C) { + input := "Sample text with exclude this pattern." + expectedOutput := "Sample text with pattern." // Expected output after cleaning + + output := cleanTextUsingRegex(input, []string{"exclude this"}) + c.Assert(output, quicktest.Equals, expectedOutput) + }) + + c.Run("Clean Text Using Substring", func(c *quicktest.C) { + input := "Sample text without any exclusion." + expectedOutput := "Sample text without any exclusion." + + output := cleanTextUsingSubstring(input, "exclude") + c.Assert(output, quicktest.Equals, expectedOutput) + }) +} + +// TestCompileRegexPatterns tests the compileRegexPatterns function +func TestCompileRegexPatterns(t *testing.T) { + c := quicktest.New(t) + + c.Run("Compile Patterns", func(c *quicktest.C) { + patterns := []string{"exclude this"} + compiled, err := compileRegexPatterns(patterns) + c.Assert(err, quicktest.IsNil) + c.Assert(len(compiled), quicktest.Equals, 1) // Expect one compiled pattern + }) +} + +// TestFetchJSONInput tests the FetchJSONInput function +func TestFetchJSONInput(t *testing.T) { + c := quicktest.New(t) + + c.Run("Fetch JSON Input", func(c *quicktest.C) { + expected := &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "key": {Kind: &structpb.Value_StringValue{StringValue: "value"}}, + }, + } + + output := FetchJSONInput("some-input-source") // Adjust input as necessary + c.Assert(output, quicktest.DeepEquals, expected) + }) +} + +// TestExecute tests the Execute function +func TestExecute(t *testing.T) { + c := quicktest.New(t) + + c.Run("Execute Task", func(c *quicktest.C) { + component := Init() + execution, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: taskDataCleansing, + }) + c.Assert(err, quicktest.IsNil) + + err = execution.Execute(context.Background(), nil) // Adjust as necessary + c.Assert(err, quicktest.IsNil) + }) +} + From 35f1e92d3d17e6d9c5bf02003ac13a7f6dacf6b1 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:37:12 +0530 Subject: [PATCH 19/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index cba741c00..542fc230d 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -1,4 +1,4 @@ -package main +package text import ( "context" From f34955e00dcafc0fc7f28c01929c731956aa0526 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:41:12 +0530 Subject: [PATCH 20/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 542fc230d..c21ed2754 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/frankban/quicktest" + "github.com/instill-ai/pipeline-backend/pkg/component/base" // Ensure you import the base package "google.golang.org/protobuf/types/known/structpb" ) @@ -26,7 +27,7 @@ func TestInit(t *testing.T) { // Test initialization logic c.Run("Initialize Component", func(c *quicktest.C) { - component := Init() + component := Init(base.Component{}) // Pass an instance of base.Component c.Assert(component, quicktest.IsNotNil) }) } @@ -37,7 +38,7 @@ func TestCreateExecution(t *testing.T) { // Test execution creation c.Run("Create Execution", func(c *quicktest.C) { - component := Init() + component := Init(base.Component{}) // Pass an instance of base.Component execution, err := component.CreateExecution(base.ComponentExecution{ Component: component, Task: taskDataCleansing, @@ -57,8 +58,8 @@ func TestCleanData(t *testing.T) { input: &CleanDataInput{ Texts: []string{"Sample text 1.", "Sample text 2."}, Setting: &DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"exclude this"}, + CleanMethod: "Regex", + ExcludePatterns: []string{"exclude this"}, }, }, want: &CleanDataOutput{ @@ -70,7 +71,7 @@ func TestCleanData(t *testing.T) { for _, tc := range testCases { c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(tc.input) + output := CleanData(tc.input) // Ensure CleanData is implemented properly c.Assert(output, quicktest.DeepEquals, tc.want) }) } @@ -142,7 +143,7 @@ func TestExecute(t *testing.T) { c := quicktest.New(t) c.Run("Execute Task", func(c *quicktest.C) { - component := Init() + component := Init(base.Component{}) // Pass an instance of base.Component execution, err := component.CreateExecution(base.ComponentExecution{ Component: component, Task: taskDataCleansing, @@ -153,4 +154,3 @@ func TestExecute(t *testing.T) { c.Assert(err, quicktest.IsNil) }) } - From 2d2e570383a9d64682e38aa463c2f7aeaec50607 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:43:03 +0530 Subject: [PATCH 21/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 22 +++++++++------------ 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index c21ed2754..9a921a7db 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -9,11 +9,6 @@ import ( "google.golang.org/protobuf/types/known/structpb" ) -// Constants for test cases -const ( - taskDataCleansing = "TASK_CLEAN_DATA" -) - // Test structure type TestCase struct { name string @@ -41,7 +36,7 @@ func TestCreateExecution(t *testing.T) { component := Init(base.Component{}) // Pass an instance of base.Component execution, err := component.CreateExecution(base.ComponentExecution{ Component: component, - Task: taskDataCleansing, + Task: taskDataCleansing, // Use the constant defined in main.go }) c.Assert(err, quicktest.IsNil) c.Assert(execution, quicktest.IsNotNil) @@ -57,7 +52,7 @@ func TestCleanData(t *testing.T) { name: "Valid Input", input: &CleanDataInput{ Texts: []string{"Sample text 1.", "Sample text 2."}, - Setting: &DataCleaningSetting{ + Setting: DataCleaningSetting{ // Use the value instead of pointer CleanMethod: "Regex", ExcludePatterns: []string{"exclude this"}, }, @@ -71,7 +66,7 @@ func TestCleanData(t *testing.T) { for _, tc := range testCases { c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(tc.input) // Ensure CleanData is implemented properly + output := CleanData(*tc.input) // Dereference to get the value c.Assert(output, quicktest.DeepEquals, tc.want) }) } @@ -97,7 +92,7 @@ func TestRegexFunctionality(t *testing.T) { input := "Sample text with exclude this pattern." expectedOutput := "Sample text with pattern." // Expected output after cleaning - output := cleanTextUsingRegex(input, []string{"exclude this"}) + output := cleanTextUsingRegex(input, []string{"exclude this"}) // Ensure correct parameters are passed c.Assert(output, quicktest.Equals, expectedOutput) }) @@ -105,7 +100,7 @@ func TestRegexFunctionality(t *testing.T) { input := "Sample text without any exclusion." expectedOutput := "Sample text without any exclusion." - output := cleanTextUsingSubstring(input, "exclude") + output := cleanTextUsingSubstring(input, "exclude") // Ensure correct parameters are passed c.Assert(output, quicktest.Equals, expectedOutput) }) } @@ -116,7 +111,7 @@ func TestCompileRegexPatterns(t *testing.T) { c.Run("Compile Patterns", func(c *quicktest.C) { patterns := []string{"exclude this"} - compiled, err := compileRegexPatterns(patterns) + compiled, err := compileRegexPatterns(patterns) // Ensure you're capturing all return values if needed c.Assert(err, quicktest.IsNil) c.Assert(len(compiled), quicktest.Equals, 1) // Expect one compiled pattern }) @@ -133,7 +128,8 @@ func TestFetchJSONInput(t *testing.T) { }, } - output := FetchJSONInput("some-input-source") // Adjust input as necessary + output, err := FetchJSONInput("some-input-source") // Ensure you're capturing all return values + c.Assert(err, quicktest.IsNil) // Check for error c.Assert(output, quicktest.DeepEquals, expected) }) } @@ -146,7 +142,7 @@ func TestExecute(t *testing.T) { component := Init(base.Component{}) // Pass an instance of base.Component execution, err := component.CreateExecution(base.ComponentExecution{ Component: component, - Task: taskDataCleansing, + Task: taskDataCleansing, // Use the constant defined in main.go }) c.Assert(err, quicktest.IsNil) From 3daa8a7f332a569e596d2a0a2a3950c4b1320686 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:45:01 +0530 Subject: [PATCH 22/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 116 ++------------------ 1 file changed, 12 insertions(+), 104 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 9a921a7db..0538741f6 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -1,48 +1,3 @@ -package text - -import ( - "context" - "testing" - - "github.com/frankban/quicktest" - "github.com/instill-ai/pipeline-backend/pkg/component/base" // Ensure you import the base package - "google.golang.org/protobuf/types/known/structpb" -) - -// Test structure -type TestCase struct { - name string - input *CleanDataInput - want *CleanDataOutput -} - -// TestInit tests the Init function -func TestInit(t *testing.T) { - c := quicktest.New(t) - - // Test initialization logic - c.Run("Initialize Component", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass an instance of base.Component - c.Assert(component, quicktest.IsNotNil) - }) -} - -// TestCreateExecution tests the CreateExecution function -func TestCreateExecution(t *testing.T) { - c := quicktest.New(t) - - // Test execution creation - c.Run("Create Execution", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass an instance of base.Component - execution, err := component.CreateExecution(base.ComponentExecution{ - Component: component, - Task: taskDataCleansing, // Use the constant defined in main.go - }) - c.Assert(err, quicktest.IsNil) - c.Assert(execution, quicktest.IsNotNil) - }) -} - // TestCleanData tests the CleanData function func TestCleanData(t *testing.T) { c := quicktest.New(t) @@ -52,56 +7,43 @@ func TestCleanData(t *testing.T) { name: "Valid Input", input: &CleanDataInput{ Texts: []string{"Sample text 1.", "Sample text 2."}, - Setting: DataCleaningSetting{ // Use the value instead of pointer + Setting: DataCleaningSetting{ // Make sure this matches the struct definition CleanMethod: "Regex", - ExcludePatterns: []string{"exclude this"}, + ExcludePatterns: []string{"exclude this"}, // Ensure correct type }, }, want: &CleanDataOutput{ CleanedTexts: []string{"Sample text 1.", "Sample text 2."}, // Expected cleaned output }, }, - // Add more test cases as needed } for _, tc := range testCases { c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(*tc.input) // Dereference to get the value + output := CleanData(*tc.input) // Use dereference for CleanData c.Assert(output, quicktest.DeepEquals, tc.want) }) } } -// TestCleanChunkedData tests the CleanChunkedData function -func TestCleanChunkedData(t *testing.T) { - c := quicktest.New(t) - - // Add test cases for CleanChunkedData - c.Run("Clean Chunked Data", func(c *quicktest.C) { - // Define test inputs and expected outputs - // Example: output := CleanChunkedData(...) - // c.Assert(output, quicktest.DeepEquals, expectedOutput) - }) -} - // TestRegexFunctionality tests the regex cleaning functions func TestRegexFunctionality(t *testing.T) { c := quicktest.New(t) c.Run("Clean Text Using Regex", func(c *quicktest.C) { - input := "Sample text with exclude this pattern." - expectedOutput := "Sample text with pattern." // Expected output after cleaning + input := []string{"Sample text with exclude this pattern."} // Change to []string + expectedOutput := []string{"Sample text with pattern."} // Expected output after cleaning - output := cleanTextUsingRegex(input, []string{"exclude this"}) // Ensure correct parameters are passed - c.Assert(output, quicktest.Equals, expectedOutput) + output := cleanTextUsingRegex(input, []string{"exclude this"}) // Ensure the first argument is []string + c.Assert(output, quicktest.DeepEquals, expectedOutput) // Match expected output type }) c.Run("Clean Text Using Substring", func(c *quicktest.C) { - input := "Sample text without any exclusion." - expectedOutput := "Sample text without any exclusion." + input := []string{"Sample text without any exclusion."} // Change to []string + expectedOutput := []string{"Sample text without any exclusion."} output := cleanTextUsingSubstring(input, "exclude") // Ensure correct parameters are passed - c.Assert(output, quicktest.Equals, expectedOutput) + c.Assert(output, quicktest.DeepEquals, expectedOutput) }) } @@ -111,42 +53,8 @@ func TestCompileRegexPatterns(t *testing.T) { c.Run("Compile Patterns", func(c *quicktest.C) { patterns := []string{"exclude this"} - compiled, err := compileRegexPatterns(patterns) // Ensure you're capturing all return values if needed - c.Assert(err, quicktest.IsNil) - c.Assert(len(compiled), quicktest.Equals, 1) // Expect one compiled pattern - }) -} - -// TestFetchJSONInput tests the FetchJSONInput function -func TestFetchJSONInput(t *testing.T) { - c := quicktest.New(t) - - c.Run("Fetch JSON Input", func(c *quicktest.C) { - expected := &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "key": {Kind: &structpb.Value_StringValue{StringValue: "value"}}, - }, - } - - output, err := FetchJSONInput("some-input-source") // Ensure you're capturing all return values + compiled, err := compileRegexPatterns(patterns) // Ensure you're capturing all return values c.Assert(err, quicktest.IsNil) // Check for error - c.Assert(output, quicktest.DeepEquals, expected) - }) -} - -// TestExecute tests the Execute function -func TestExecute(t *testing.T) { - c := quicktest.New(t) - - c.Run("Execute Task", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass an instance of base.Component - execution, err := component.CreateExecution(base.ComponentExecution{ - Component: component, - Task: taskDataCleansing, // Use the constant defined in main.go - }) - c.Assert(err, quicktest.IsNil) - - err = execution.Execute(context.Background(), nil) // Adjust as necessary - c.Assert(err, quicktest.IsNil) + c.Assert(len(compiled), quicktest.Equals, 1) // Expect one compiled pattern }) } From fccf4fb8a2f5b2df0295cfc9b2ea9019da29672f Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:46:40 +0530 Subject: [PATCH 23/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 108 ++++++++++++++++++-- 1 file changed, 102 insertions(+), 6 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 0538741f6..2fe40997f 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -1,3 +1,52 @@ +package text + +import ( + "context" + "testing" + + "github.com/frankban/quicktest" + "google.golang.org/protobuf/types/known/structpb" +) + +// Constants for test cases +const ( + taskDataCleansing = "TASK_CLEAN_DATA" +) + +// Test structure +type TestCase struct { + name string + input *CleanDataInput + want *CleanDataOutput +} + +// TestInit tests the Init function +func TestInit(t *testing.T) { + c := quicktest.New(t) + + // Test initialization logic + c.Run("Initialize Component", func(c *quicktest.C) { + component := Init() + c.Assert(component, quicktest.IsNotNil) + }) +} + +// TestCreateExecution tests the CreateExecution function +func TestCreateExecution(t *testing.T) { + c := quicktest.New(t) + + // Test execution creation + c.Run("Create Execution", func(c *quicktest.C) { + component := Init() + execution, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: taskDataCleansing, + }) + c.Assert(err, quicktest.IsNil) + c.Assert(execution, quicktest.IsNotNil) + }) +} + // TestCleanData tests the CleanData function func TestCleanData(t *testing.T) { c := quicktest.New(t) @@ -7,15 +56,16 @@ func TestCleanData(t *testing.T) { name: "Valid Input", input: &CleanDataInput{ Texts: []string{"Sample text 1.", "Sample text 2."}, - Setting: DataCleaningSetting{ // Make sure this matches the struct definition + Setting: DataCleaningSetting{ CleanMethod: "Regex", - ExcludePatterns: []string{"exclude this"}, // Ensure correct type + ExcludePatterns: []string{"exclude this"}, }, }, want: &CleanDataOutput{ CleanedTexts: []string{"Sample text 1.", "Sample text 2."}, // Expected cleaned output }, }, + // Add more test cases as needed } for _, tc := range testCases { @@ -26,16 +76,28 @@ func TestCleanData(t *testing.T) { } } +// TestCleanChunkedData tests the CleanChunkedData function +func TestCleanChunkedData(t *testing.T) { + c := quicktest.New(t) + + // Add test cases for CleanChunkedData + c.Run("Clean Chunked Data", func(c *quicktest.C) { + // Define test inputs and expected outputs + // Example: output := CleanChunkedData(...) + // c.Assert(output, quicktest.DeepEquals, expectedOutput) + }) +} + // TestRegexFunctionality tests the regex cleaning functions func TestRegexFunctionality(t *testing.T) { c := quicktest.New(t) c.Run("Clean Text Using Regex", func(c *quicktest.C) { input := []string{"Sample text with exclude this pattern."} // Change to []string - expectedOutput := []string{"Sample text with pattern."} // Expected output after cleaning + expectedOutput := []string{"Sample text with pattern."} // Expected output after cleaning output := cleanTextUsingRegex(input, []string{"exclude this"}) // Ensure the first argument is []string - c.Assert(output, quicktest.DeepEquals, expectedOutput) // Match expected output type + c.Assert(output, quicktest.DeepEquals, expectedOutput) // Match expected output type }) c.Run("Clean Text Using Substring", func(c *quicktest.C) { @@ -54,7 +116,41 @@ func TestCompileRegexPatterns(t *testing.T) { c.Run("Compile Patterns", func(c *quicktest.C) { patterns := []string{"exclude this"} compiled, err := compileRegexPatterns(patterns) // Ensure you're capturing all return values - c.Assert(err, quicktest.IsNil) // Check for error - c.Assert(len(compiled), quicktest.Equals, 1) // Expect one compiled pattern + c.Assert(err, quicktest.IsNil) // Check for error + c.Assert(len(compiled), quicktest.Equals, 1) // Expect one compiled pattern + }) +} + +// TestFetchJSONInput tests the FetchJSONInput function +func TestFetchJSONInput(t *testing.T) { + c := quicktest.New(t) + + c.Run("Fetch JSON Input", func(c *quicktest.C) { + expected := &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "key": {Kind: &structpb.Value_StringValue{StringValue: "value"}}, + }, + } + + output, err := FetchJSONInput("some-input-source") // Adjust input as necessary + c.Assert(err, quicktest.IsNil) // Check for error + c.Assert(output, quicktest.DeepEquals, expected) + }) +} + +// TestExecute tests the Execute function +func TestExecute(t *testing.T) { + c := quicktest.New(t) + + c.Run("Execute Task", func(c *quicktest.C) { + component := Init() + execution, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: taskDataCleansing, + }) + c.Assert(err, quicktest.IsNil) + + err = execution.Execute(context.Background(), nil) // Adjust as necessary + c.Assert(err, quicktest.IsNil) }) } From daec04650fa3acb803db24419fb19fcf17a53989 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:51:25 +0530 Subject: [PATCH 24/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 82 ++------------------- 1 file changed, 7 insertions(+), 75 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 2fe40997f..c68bf7247 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -5,12 +5,13 @@ import ( "testing" "github.com/frankban/quicktest" + "github.com/instill-ai/pipeline-backend/pkg/component/base" // Import the base package "google.golang.org/protobuf/types/known/structpb" ) // Constants for test cases const ( - taskDataCleansing = "TASK_CLEAN_DATA" + taskDataCleansing = "TASK_CLEAN_DATA" // Remove from here if it's declared in main.go ) // Test structure @@ -26,7 +27,7 @@ func TestInit(t *testing.T) { // Test initialization logic c.Run("Initialize Component", func(c *quicktest.C) { - component := Init() + component := Init(base.Component{}) // Pass a base.Component here c.Assert(component, quicktest.IsNotNil) }) } @@ -37,7 +38,7 @@ func TestCreateExecution(t *testing.T) { // Test execution creation c.Run("Create Execution", func(c *quicktest.C) { - component := Init() + component := Init(base.Component{}) // Pass a base.Component here execution, err := component.CreateExecution(base.ComponentExecution{ Component: component, Task: taskDataCleansing, @@ -76,81 +77,12 @@ func TestCleanData(t *testing.T) { } } -// TestCleanChunkedData tests the CleanChunkedData function -func TestCleanChunkedData(t *testing.T) { - c := quicktest.New(t) - - // Add test cases for CleanChunkedData - c.Run("Clean Chunked Data", func(c *quicktest.C) { - // Define test inputs and expected outputs - // Example: output := CleanChunkedData(...) - // c.Assert(output, quicktest.DeepEquals, expectedOutput) - }) -} - // TestRegexFunctionality tests the regex cleaning functions func TestRegexFunctionality(t *testing.T) { c := quicktest.New(t) c.Run("Clean Text Using Regex", func(c *quicktest.C) { - input := []string{"Sample text with exclude this pattern."} // Change to []string - expectedOutput := []string{"Sample text with pattern."} // Expected output after cleaning - - output := cleanTextUsingRegex(input, []string{"exclude this"}) // Ensure the first argument is []string - c.Assert(output, quicktest.DeepEquals, expectedOutput) // Match expected output type - }) - - c.Run("Clean Text Using Substring", func(c *quicktest.C) { - input := []string{"Sample text without any exclusion."} // Change to []string - expectedOutput := []string{"Sample text without any exclusion."} - - output := cleanTextUsingSubstring(input, "exclude") // Ensure correct parameters are passed - c.Assert(output, quicktest.DeepEquals, expectedOutput) - }) -} - -// TestCompileRegexPatterns tests the compileRegexPatterns function -func TestCompileRegexPatterns(t *testing.T) { - c := quicktest.New(t) - - c.Run("Compile Patterns", func(c *quicktest.C) { - patterns := []string{"exclude this"} - compiled, err := compileRegexPatterns(patterns) // Ensure you're capturing all return values - c.Assert(err, quicktest.IsNil) // Check for error - c.Assert(len(compiled), quicktest.Equals, 1) // Expect one compiled pattern - }) -} - -// TestFetchJSONInput tests the FetchJSONInput function -func TestFetchJSONInput(t *testing.T) { - c := quicktest.New(t) - - c.Run("Fetch JSON Input", func(c *quicktest.C) { - expected := &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "key": {Kind: &structpb.Value_StringValue{StringValue: "value"}}, - }, - } + input := []string{"Sample text with exclude this pattern."} + expectedOutput := []string{"Sample text with pattern."} - output, err := FetchJSONInput("some-input-source") // Adjust input as necessary - c.Assert(err, quicktest.IsNil) // Check for error - c.Assert(output, quicktest.DeepEquals, expected) - }) -} - -// TestExecute tests the Execute function -func TestExecute(t *testing.T) { - c := quicktest.New(t) - - c.Run("Execute Task", func(c *quicktest.C) { - component := Init() - execution, err := component.CreateExecution(base.ComponentExecution{ - Component: component, - Task: taskDataCleansing, - }) - c.Assert(err, quicktest.IsNil) - - err = execution.Execute(context.Background(), nil) // Adjust as necessary - c.Assert(err, quicktest.IsNil) - }) -} + output := clean From e2322dc1c5ed1180268a6de9f2877cb8878f76c5 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:56:47 +0530 Subject: [PATCH 25/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 280 +++++++++++++++++--- 1 file changed, 239 insertions(+), 41 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index c68bf7247..629a0abf6 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -2,87 +2,285 @@ package text import ( "context" + "encoding/json" + "os" "testing" "github.com/frankban/quicktest" - "github.com/instill-ai/pipeline-backend/pkg/component/base" // Import the base package - "google.golang.org/protobuf/types/known/structpb" + "github.com/instill-ai/pipeline-backend/pkg/component/base" ) -// Constants for test cases -const ( - taskDataCleansing = "TASK_CLEAN_DATA" // Remove from here if it's declared in main.go -) - -// Test structure -type TestCase struct { - name string - input *CleanDataInput - want *CleanDataOutput -} - // TestInit tests the Init function func TestInit(t *testing.T) { c := quicktest.New(t) - // Test initialization logic c.Run("Initialize Component", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass a base.Component here + component := Init(base.Component{}) // Pass a base.Component instance c.Assert(component, quicktest.IsNotNil) }) } -// TestCreateExecution tests the CreateExecution function -func TestCreateExecution(t *testing.T) { +// TestCleanData tests the CleanData function +func TestCleanData(t *testing.T) { + c := quicktest.New(t) + + testCases := []struct { + name string + input CleanDataInput + want CleanDataOutput + }{ + { + name: "Valid Regex Exclusion", + input: CleanDataInput{ + Texts: []string{"Keep this text", "Remove this text"}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"Remove"}, + }, + }, + want: CleanDataOutput{ + CleanedTexts: []string{"Keep this text"}, + }, + }, + { + name: "Valid Substring Exclusion", + input: CleanDataInput{ + Texts: []string{"Keep this text", "Remove this text"}, + Setting: DataCleaningSetting{ + CleanMethod: "Substring", + ExcludeSubstrs: []string{"Remove"}, + IncludeSubstrs: []string{"Keep"}, + CaseSensitive: false, + }, + }, + want: CleanDataOutput{ + CleanedTexts: []string{"Keep this text"}, + }, + }, + { + name: "No Exclusion", + input: CleanDataInput{ + Texts: []string{"Text without exclusions"}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + }, + }, + want: CleanDataOutput{ + CleanedTexts: []string{"Text without exclusions"}, + }, + }, + } + + for _, tc := range testCases { + c.Run(tc.name, func(c *quicktest.C) { + output := CleanData(tc.input) + c.Assert(output, quicktest.DeepEquals, tc.want) + }) + } +} + +// TestFetchJSONInput tests the FetchJSONInput function +func TestFetchJSONInput(t *testing.T) { c := quicktest.New(t) - // Test execution creation - c.Run("Create Execution", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass a base.Component here + // Create a temporary JSON file for testing + tempFile, err := os.CreateTemp("", "test_input.json") + if err != nil { + t.Fatalf("failed to create temp file: %v", err) + } + defer os.Remove(tempFile.Name()) // Clean up + + // Write test data to the file + testData := CleanDataInput{ + Texts: []string{"Sample text 1.", "Sample text 2."}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"exclude this"}, + }, + } + data, _ := json.Marshal(testData) + if _, err := tempFile.Write(data); err != nil { + t.Fatalf("failed to write to temp file: %v", err) + } + + // Test FetchJSONInput + c.Run("Fetch JSON Input", func(c *quicktest.C) { + input, err := FetchJSONInput(tempFile.Name()) + c.Assert(err, quicktest.IsNil) + c.Assert(input, quicktest.DeepEquals, testData) + }) +} + +// TestExecute tests the Execute function of the execution struct +func TestExecute(t *testing.T) { + c := quicktest.New(t) + + c.Run("Execute Task", func(c *quicktest.C) { + component := Init(base.Component{}) // Pass a base.Component instance execution, err := component.CreateExecution(base.ComponentExecution{ Component: component, Task: taskDataCleansing, }) c.Assert(err, quicktest.IsNil) - c.Assert(execution, quicktest.IsNotNil) + + // Create a mock job + mockJob := &base.Job{ + Output: &MockOutput{}, // Implement MockOutput to simulate job output + Error: &MockError{}, // Implement MockError to simulate error handling + } + + err = execution.Execute(context.Background(), []*base.Job{mockJob}) + c.Assert(err, quicktest.IsNil) }) } -// TestCleanData tests the CleanData function -func TestCleanData(t *testing.T) { +// TestChunkText tests the chunkText function +func TestChunkText(t *testing.T) { c := quicktest.New(t) - testCases := []TestCase{ + testCases := []struct { + name string + input ChunkTextInput + want ChunkTextOutput + }{ { - name: "Valid Input", - input: &CleanDataInput{ - Texts: []string{"Sample text 1.", "Sample text 2."}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"exclude this"}, + name: "Valid Token Chunking", + input: ChunkTextInput{ + Text: "This is a sample text for chunking.", + Strategy: Strategy{ + Setting: Setting{ + ChunkMethod: "Token", + ChunkSize: 10, + ModelName: "gpt-3.5-turbo", + }, + }, + }, + want: ChunkTextOutput{ + ChunkNum: 2, + TextChunks: []TextChunk{ + { + Text: "This is a sample", + StartPosition: 0, + EndPosition: 13, + TokenCount: 5, + }, + { + Text: "text for chunking.", + StartPosition: 14, + EndPosition: 29, + TokenCount: 4, + }, }, + TokenCount: 9, + ChunksTokenCount: 9, }, - want: &CleanDataOutput{ - CleanedTexts: []string{"Sample text 1.", "Sample text 2."}, // Expected cleaned output + }, + { + name: "Valid Recursive Chunking", + input: ChunkTextInput{ + Text: "This is a sample text for chunking.", + Strategy: Strategy{ + Setting: Setting{ + ChunkMethod: "Recursive", + ChunkSize: 10, + Separators: []string{" ", "\n"}, + }, + }, + }, + want: ChunkTextOutput{ + ChunkNum: 2, + TextChunks: []TextChunk{ + { + Text: "This is a sample", + StartPosition: 0, + EndPosition: 13, + TokenCount: 4, + }, + { + Text: "text for chunking.", + StartPosition: 14, + EndPosition: 29, + TokenCount: 4, + }, + }, + TokenCount: 8, + ChunksTokenCount: 8, }, }, - // Add more test cases as needed } for _, tc := range testCases { c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(*tc.input) // Use dereference for CleanData + output, err := chunkText(tc.input) + c.Assert(err, quicktest.IsNil) c.Assert(output, quicktest.DeepEquals, tc.want) }) } } -// TestRegexFunctionality tests the regex cleaning functions -func TestRegexFunctionality(t *testing.T) { +// TestChunkMarkdown tests the chunkMarkdown function +func TestChunkMarkdown(t *testing.T) { c := quicktest.New(t) - c.Run("Clean Text Using Regex", func(c *quicktest.C) { - input := []string{"Sample text with exclude this pattern."} - expectedOutput := []string{"Sample text with pattern."} + testCases := []struct { + name string + input ChunkTextInput + want ChunkTextOutput + }{ + { + name: "Valid Markdown Chunking", + input: ChunkTextInput{ + Text: "This is a sample text for chunking.\n\nAnother paragraph.", + Strategy: Strategy{ + Setting: Setting{ + ChunkMethod: "Markdown", + ChunkSize: 10, + ModelName: "gpt-3.5-turbo", + }, + }, + }, + want: ChunkTextOutput{ + ChunkNum: 2, + TextChunks: []TextChunk{ + { + Text: "This is a sample text for chunking.", + StartPosition: 0, + EndPosition: 29, + TokenCount: 7, + }, + { + Text: "Another paragraph.", + StartPosition: 30, + EndPosition: 47, + TokenCount: 2, + }, + }, + TokenCount: 9, + ChunksTokenCount: 9, + }, + }, + } - output := clean + for _, tc := range testCases { + c.Run(tc.name, func(c *quicktest.C) { + output, err := chunkMarkdown(tc.input) + c.Assert(err, quicktest.IsNil) + c.Assert(output, quicktest.DeepEquals, tc.want) + }) + } +} + +// MockOutput simulates the output for testing +type MockOutput struct{} + +func (m *MockOutput) WriteData(ctx context.Context, data interface{}) error { + // Implement your output logic here + return nil +} + +// MockError simulates error handling for testing +type MockError struct{} + +func (m *MockError) Error(ctx context.Context, err error) { + // Implement your error handling logic here +} From 6686ead8b798f2baefa491f5777546e7470b0953 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:59:35 +0530 Subject: [PATCH 26/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 629a0abf6..c5e259439 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -101,7 +101,7 @@ func TestFetchJSONInput(t *testing.T) { data, _ := json.Marshal(testData) if _, err := tempFile.Write(data); err != nil { t.Fatalf("failed to write to temp file: %v", err) - } + } // Test FetchJSONInput c.Run("Fetch JSON Input", func(c *quicktest.C) { @@ -134,8 +134,8 @@ func TestExecute(t *testing.T) { }) } -// TestChunkText tests the chunkText function -func TestChunkText(t *testing.T) { +// TestChunkTextFunctionality tests the chunkText function +func TestChunkTextFunctionality(t *testing.T) { c := quicktest.New(t) testCases := []struct { @@ -253,7 +253,7 @@ func TestChunkMarkdown(t *testing.T) { StartPosition: 30, EndPosition: 47, TokenCount: 2, - }, + }, }, TokenCount: 9, ChunksTokenCount: 9, @@ -271,10 +271,12 @@ func TestChunkMarkdown(t *testing.T) { } // MockOutput simulates the output for testing -type MockOutput struct{} +type MockOutput struct { + data []interface{} +} -func (m *MockOutput) WriteData(ctx context.Context, data interface{}) error { - // Implement your output logic here +func (m *MockOutput) Write(ctx context.Context, data interface{}) error { + m.data = append(m.data, data) return nil } From b81bed1fad18470021b3228e23f8ea9091fe8646 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:02:47 +0530 Subject: [PATCH 27/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index c5e259439..d642634fc 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -8,6 +8,7 @@ import ( "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" + "google.golang.org/protobuf/types/known/structpb" ) // TestInit tests the Init function @@ -101,7 +102,7 @@ func TestFetchJSONInput(t *testing.T) { data, _ := json.Marshal(testData) if _, err := tempFile.Write(data); err != nil { t.Fatalf("failed to write to temp file: %v", err) - } + } // Test FetchJSONInput c.Run("Fetch JSON Input", func(c *quicktest.C) { @@ -253,7 +254,7 @@ func TestChunkMarkdown(t *testing.T) { StartPosition: 30, EndPosition: 47, TokenCount: 2, - }, + }, }, TokenCount: 9, ChunksTokenCount: 9, @@ -275,14 +276,18 @@ type MockOutput struct { data []interface{} } -func (m *MockOutput) Write(ctx context.Context, data interface{}) error { +func (m *MockOutput) Write(ctx context.Context, data *structpb.Struct) error { m.data = append(m.data, data) return nil } -// MockError simulates error handling for testing -type MockError struct{} +// MockError simulates an error for testing +type MockError struct { + err error +} -func (m *MockError) Error(ctx context.Context, err error) { - // Implement your error handling logic here +func (m *MockError) HandleError(ctx context.Context, err error) { + m.err = err } + +// Add other utility functions and types as needed From ae4aa6b8db9b32d0fdc4d18aca32b4dd92239ffc Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:05:39 +0530 Subject: [PATCH 28/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 24 +++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index d642634fc..31d92f5e2 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -126,8 +126,8 @@ func TestExecute(t *testing.T) { // Create a mock job mockJob := &base.Job{ - Output: &MockOutput{}, // Implement MockOutput to simulate job output - Error: &MockError{}, // Implement MockError to simulate error handling + Output: &MockOutput{}, // Use MockOutput with proper methods implemented + Error: &MockError{}, // Use MockError with proper methods implemented } err = execution.Execute(context.Background(), []*base.Job{mockJob}) @@ -246,13 +246,13 @@ func TestChunkMarkdown(t *testing.T) { { Text: "This is a sample text for chunking.", StartPosition: 0, - EndPosition: 29, + EndPosition: 39, TokenCount: 7, }, { Text: "Another paragraph.", - StartPosition: 30, - EndPosition: 47, + StartPosition: 40, + EndPosition: 58, TokenCount: 2, }, }, @@ -276,18 +276,30 @@ type MockOutput struct { data []interface{} } +// Write writes data to the mock output, implementing the OutputWriter interface func (m *MockOutput) Write(ctx context.Context, data *structpb.Struct) error { m.data = append(m.data, data) return nil } +// WriteData writes data to the mock output, implementing the OutputWriter interface +func (m *MockOutput) WriteData(ctx context.Context, data interface{}) error { + m.data = append(m.data, data) + return nil +} + // MockError simulates an error for testing type MockError struct { err error } +// HandleError handles an error for testing, implementing the ErrorHandler interface func (m *MockError) HandleError(ctx context.Context, err error) { m.err = err } -// Add other utility functions and types as needed +// Error returns the stored error, implementing the ErrorHandler interface +func (m *MockError) Error() error { + return m.err +} + From b010db8c0e6c0a10db5cc1ec8d7b84eca525781d Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:10:40 +0530 Subject: [PATCH 29/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 25 ++++++++------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 31d92f5e2..5136daade 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -8,7 +8,6 @@ import ( "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" - "google.golang.org/protobuf/types/known/structpb" ) // TestInit tests the Init function @@ -126,8 +125,8 @@ func TestExecute(t *testing.T) { // Create a mock job mockJob := &base.Job{ - Output: &MockOutput{}, // Use MockOutput with proper methods implemented - Error: &MockError{}, // Use MockError with proper methods implemented + Output: &MockOutput{}, // Implement MockOutput to simulate job output + Error: &MockError{}, // Implement MockError to simulate error handling } err = execution.Execute(context.Background(), []*base.Job{mockJob}) @@ -246,13 +245,13 @@ func TestChunkMarkdown(t *testing.T) { { Text: "This is a sample text for chunking.", StartPosition: 0, - EndPosition: 39, + EndPosition: 29, TokenCount: 7, }, { Text: "Another paragraph.", - StartPosition: 40, - EndPosition: 58, + StartPosition: 30, + EndPosition: 47, TokenCount: 2, }, }, @@ -276,14 +275,7 @@ type MockOutput struct { data []interface{} } -// Write writes data to the mock output, implementing the OutputWriter interface -func (m *MockOutput) Write(ctx context.Context, data *structpb.Struct) error { - m.data = append(m.data, data) - return nil -} - -// WriteData writes data to the mock output, implementing the OutputWriter interface -func (m *MockOutput) WriteData(ctx context.Context, data interface{}) error { +func (m *MockOutput) Write(ctx context.Context, data interface{}) error { m.data = append(m.data, data) return nil } @@ -299,7 +291,8 @@ func (m *MockError) HandleError(ctx context.Context, err error) { } // Error returns the stored error, implementing the ErrorHandler interface -func (m *MockError) Error() error { - return m.err +func (m *MockError) Error(ctx context.Context, err error) { + m.err = err } + From 7834b1298143d61eb4ff361bf655edf225a31269 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:15:52 +0530 Subject: [PATCH 30/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 5136daade..2d8b7ce54 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -8,6 +8,7 @@ import ( "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" + "google.golang.org/protobuf/types/known/structpb" ) // TestInit tests the Init function @@ -272,10 +273,10 @@ func TestChunkMarkdown(t *testing.T) { // MockOutput simulates the output for testing type MockOutput struct { - data []interface{} + data []*structpb.Struct } -func (m *MockOutput) Write(ctx context.Context, data interface{}) error { +func (m *MockOutput) Write(ctx context.Context, data *structpb.Struct) error { m.data = append(m.data, data) return nil } @@ -296,3 +297,4 @@ func (m *MockError) Error(ctx context.Context, err error) { } + From cc6686a2b19a8ec4476249f0d8b4a6c9bbec6e03 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:20:28 +0530 Subject: [PATCH 31/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 276 +++++++++++++++++++- 1 file changed, 273 insertions(+), 3 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 2d8b7ce54..b7a06000c 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -271,6 +271,279 @@ func TestChunkMarkdown(t *testing.T) { } } +package text + +import ( + "context" + "encoding/json" + "os" + "testing" + + "github.com/frankban/quicktest" + "github.com/instill-ai/pipeline-backend/pkg/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +// TestInit tests the Init function +func TestInit(t *testing.T) { + c := quicktest.New(t) + + c.Run("Initialize Component", func(c *quicktest.C) { + component := Init(base.Component{}) // Pass a base.Component instance + c.Assert(component, quicktest.IsNotNil) + }) +} + +// TestCleanData tests the CleanData function +func TestCleanData(t *testing.T) { + c := quicktest.New(t) + + testCases := []struct { + name string + input CleanDataInput + want CleanDataOutput + }{ + { + name: "Valid Regex Exclusion", + input: CleanDataInput{ + Texts: []string{"Keep this text", "Remove this text"}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"Remove"}, + }, + }, + want: CleanDataOutput{ + CleanedTexts: []string{"Keep this text"}, + }, + }, + { + name: "Valid Substring Exclusion", + input: CleanDataInput{ + Texts: []string{"Keep this text", "Remove this text"}, + Setting: DataCleaningSetting{ + CleanMethod: "Substring", + ExcludeSubstrs: []string{"Remove"}, + IncludeSubstrs: []string{"Keep"}, + CaseSensitive: false, + }, + }, + want: CleanDataOutput{ + CleanedTexts: []string{"Keep this text"}, + }, + }, + { + name: "No Exclusion", + input: CleanDataInput{ + Texts: []string{"Text without exclusions"}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + }, + }, + want: CleanDataOutput{ + CleanedTexts: []string{"Text without exclusions"}, + }, + }, + } + + for _, tc := range testCases { + c.Run(tc.name, func(c *quicktest.C) { + output := CleanData(tc.input) + c.Assert(output, quicktest.DeepEquals, tc.want) + }) + } +} + +// TestFetchJSONInput tests the FetchJSONInput function +func TestFetchJSONInput(t *testing.T) { + c := quicktest.New(t) + + // Create a temporary JSON file for testing + tempFile, err := os.CreateTemp("", "test_input.json") + if err != nil { + t.Fatalf("failed to create temp file: %v", err) + } + defer os.Remove(tempFile.Name()) // Clean up + + // Write test data to the file + testData := CleanDataInput{ + Texts: []string{"Sample text 1.", "Sample text 2."}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"exclude this"}, + }, + } + data, _ := json.Marshal(testData) + if _, err := tempFile.Write(data); err != nil { + t.Fatalf("failed to write to temp file: %v", err) + } + + // Test FetchJSONInput + c.Run("Fetch JSON Input", func(c *quicktest.C) { + input, err := FetchJSONInput(tempFile.Name()) + c.Assert(err, quicktest.IsNil) + c.Assert(input, quicktest.DeepEquals, testData) + }) +} + +// TestExecute tests the Execute function of the execution struct +func TestExecute(t *testing.T) { + c := quicktest.New(t) + + c.Run("Execute Task", func(c *quicktest.C) { + component := Init(base.Component{}) // Pass a base.Component instance + execution, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: taskDataCleansing, + }) + c.Assert(err, quicktest.IsNil) + + // Create a mock job + mockJob := &base.Job{ + Output: &MockOutput{}, // Implement MockOutput to simulate job output + Error: &MockError{}, // Implement MockError to simulate error handling + } + + err = execution.Execute(context.Background(), []*base.Job{mockJob}) + c.Assert(err, quicktest.IsNil) + }) +} + +// TestChunkTextFunctionality tests the chunkText function +func TestChunkTextFunctionality(t *testing.T) { + c := quicktest.New(t) + + testCases := []struct { + name string + input ChunkTextInput + want ChunkTextOutput + }{ + { + name: "Valid Token Chunking", + input: ChunkTextInput{ + Text: "This is a sample text for chunking.", + Strategy: Strategy{ + Setting: Setting{ + ChunkMethod: "Token", + ChunkSize: 10, + ModelName: "gpt-3.5-turbo", + }, + }, + }, + want: ChunkTextOutput{ + ChunkNum: 2, + TextChunks: []TextChunk{ + { + Text: "This is a sample", + StartPosition: 0, + EndPosition: 13, + TokenCount: 5, + }, + { + Text: "text for chunking.", + StartPosition: 14, + EndPosition: 29, + TokenCount: 4, + }, + }, + TokenCount: 9, + ChunksTokenCount: 9, + }, + }, + { + name: "Valid Recursive Chunking", + input: ChunkTextInput{ + Text: "This is a sample text for chunking.", + Strategy: Strategy{ + Setting: Setting{ + ChunkMethod: "Recursive", + ChunkSize: 10, + Separators: []string{" ", "\n"}, + }, + }, + }, + want: ChunkTextOutput{ + ChunkNum: 2, + TextChunks: []TextChunk{ + { + Text: "This is a sample", + StartPosition: 0, + EndPosition: 13, + TokenCount: 4, + }, + { + Text: "text for chunking.", + StartPosition: 14, + EndPosition: 29, + TokenCount: 4, + }, + }, + TokenCount: 8, + ChunksTokenCount: 8, + }, + }, + } + + for _, tc := range testCases { + c.Run(tc.name, func(c *quicktest.C) { + output, err := chunkText(tc.input) + c.Assert(err, quicktest.IsNil) + c.Assert(output, quicktest.DeepEquals, tc.want) + }) + } +} + +// TestChunkMarkdown tests the chunkMarkdown function +func TestChunkMarkdown(t *testing.T) { + c := quicktest.New(t) + + testCases := []struct { + name string + input ChunkTextInput + want ChunkTextOutput + }{ + { + name: "Valid Markdown Chunking", + input: ChunkTextInput{ + Text: "This is a sample text for chunking.\n\nAnother paragraph.", + Strategy: Strategy{ + Setting: Setting{ + ChunkMethod: "Markdown", + ChunkSize: 10, + ModelName: "gpt-3.5-turbo", + }, + }, + }, + want: ChunkTextOutput{ + ChunkNum: 2, + TextChunks: []TextChunk{ + { + Text: "This is a sample text for chunking.", + StartPosition: 0, + EndPosition: 29, + TokenCount: 7, + }, + { + Text: "Another paragraph.", + StartPosition: 30, + EndPosition: 47, + TokenCount: 2, + }, + }, + TokenCount: 9, + ChunksTokenCount: 9, + }, + }, + } + + for _, tc := range testCases { + c.Run(tc.name, func(c *quicktest.C) { + output, err := chunkMarkdown(tc.input) + c.Assert(err, quicktest.IsNil) + c.Assert(output, quicktest.DeepEquals, tc.want) + }) + } +} + // MockOutput simulates the output for testing type MockOutput struct { data []*structpb.Struct @@ -295,6 +568,3 @@ func (m *MockError) HandleError(ctx context.Context, err error) { func (m *MockError) Error(ctx context.Context, err error) { m.err = err } - - - From 17c2513ca476b8271fbbe00d0de28837ba35cb59 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:22:39 +0530 Subject: [PATCH 32/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 273 -------------------- 1 file changed, 273 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index b7a06000c..d08d99a4b 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -271,279 +271,6 @@ func TestChunkMarkdown(t *testing.T) { } } -package text - -import ( - "context" - "encoding/json" - "os" - "testing" - - "github.com/frankban/quicktest" - "github.com/instill-ai/pipeline-backend/pkg/component/base" - "google.golang.org/protobuf/types/known/structpb" -) - -// TestInit tests the Init function -func TestInit(t *testing.T) { - c := quicktest.New(t) - - c.Run("Initialize Component", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass a base.Component instance - c.Assert(component, quicktest.IsNotNil) - }) -} - -// TestCleanData tests the CleanData function -func TestCleanData(t *testing.T) { - c := quicktest.New(t) - - testCases := []struct { - name string - input CleanDataInput - want CleanDataOutput - }{ - { - name: "Valid Regex Exclusion", - input: CleanDataInput{ - Texts: []string{"Keep this text", "Remove this text"}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"Remove"}, - }, - }, - want: CleanDataOutput{ - CleanedTexts: []string{"Keep this text"}, - }, - }, - { - name: "Valid Substring Exclusion", - input: CleanDataInput{ - Texts: []string{"Keep this text", "Remove this text"}, - Setting: DataCleaningSetting{ - CleanMethod: "Substring", - ExcludeSubstrs: []string{"Remove"}, - IncludeSubstrs: []string{"Keep"}, - CaseSensitive: false, - }, - }, - want: CleanDataOutput{ - CleanedTexts: []string{"Keep this text"}, - }, - }, - { - name: "No Exclusion", - input: CleanDataInput{ - Texts: []string{"Text without exclusions"}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - }, - }, - want: CleanDataOutput{ - CleanedTexts: []string{"Text without exclusions"}, - }, - }, - } - - for _, tc := range testCases { - c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(tc.input) - c.Assert(output, quicktest.DeepEquals, tc.want) - }) - } -} - -// TestFetchJSONInput tests the FetchJSONInput function -func TestFetchJSONInput(t *testing.T) { - c := quicktest.New(t) - - // Create a temporary JSON file for testing - tempFile, err := os.CreateTemp("", "test_input.json") - if err != nil { - t.Fatalf("failed to create temp file: %v", err) - } - defer os.Remove(tempFile.Name()) // Clean up - - // Write test data to the file - testData := CleanDataInput{ - Texts: []string{"Sample text 1.", "Sample text 2."}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"exclude this"}, - }, - } - data, _ := json.Marshal(testData) - if _, err := tempFile.Write(data); err != nil { - t.Fatalf("failed to write to temp file: %v", err) - } - - // Test FetchJSONInput - c.Run("Fetch JSON Input", func(c *quicktest.C) { - input, err := FetchJSONInput(tempFile.Name()) - c.Assert(err, quicktest.IsNil) - c.Assert(input, quicktest.DeepEquals, testData) - }) -} - -// TestExecute tests the Execute function of the execution struct -func TestExecute(t *testing.T) { - c := quicktest.New(t) - - c.Run("Execute Task", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass a base.Component instance - execution, err := component.CreateExecution(base.ComponentExecution{ - Component: component, - Task: taskDataCleansing, - }) - c.Assert(err, quicktest.IsNil) - - // Create a mock job - mockJob := &base.Job{ - Output: &MockOutput{}, // Implement MockOutput to simulate job output - Error: &MockError{}, // Implement MockError to simulate error handling - } - - err = execution.Execute(context.Background(), []*base.Job{mockJob}) - c.Assert(err, quicktest.IsNil) - }) -} - -// TestChunkTextFunctionality tests the chunkText function -func TestChunkTextFunctionality(t *testing.T) { - c := quicktest.New(t) - - testCases := []struct { - name string - input ChunkTextInput - want ChunkTextOutput - }{ - { - name: "Valid Token Chunking", - input: ChunkTextInput{ - Text: "This is a sample text for chunking.", - Strategy: Strategy{ - Setting: Setting{ - ChunkMethod: "Token", - ChunkSize: 10, - ModelName: "gpt-3.5-turbo", - }, - }, - }, - want: ChunkTextOutput{ - ChunkNum: 2, - TextChunks: []TextChunk{ - { - Text: "This is a sample", - StartPosition: 0, - EndPosition: 13, - TokenCount: 5, - }, - { - Text: "text for chunking.", - StartPosition: 14, - EndPosition: 29, - TokenCount: 4, - }, - }, - TokenCount: 9, - ChunksTokenCount: 9, - }, - }, - { - name: "Valid Recursive Chunking", - input: ChunkTextInput{ - Text: "This is a sample text for chunking.", - Strategy: Strategy{ - Setting: Setting{ - ChunkMethod: "Recursive", - ChunkSize: 10, - Separators: []string{" ", "\n"}, - }, - }, - }, - want: ChunkTextOutput{ - ChunkNum: 2, - TextChunks: []TextChunk{ - { - Text: "This is a sample", - StartPosition: 0, - EndPosition: 13, - TokenCount: 4, - }, - { - Text: "text for chunking.", - StartPosition: 14, - EndPosition: 29, - TokenCount: 4, - }, - }, - TokenCount: 8, - ChunksTokenCount: 8, - }, - }, - } - - for _, tc := range testCases { - c.Run(tc.name, func(c *quicktest.C) { - output, err := chunkText(tc.input) - c.Assert(err, quicktest.IsNil) - c.Assert(output, quicktest.DeepEquals, tc.want) - }) - } -} - -// TestChunkMarkdown tests the chunkMarkdown function -func TestChunkMarkdown(t *testing.T) { - c := quicktest.New(t) - - testCases := []struct { - name string - input ChunkTextInput - want ChunkTextOutput - }{ - { - name: "Valid Markdown Chunking", - input: ChunkTextInput{ - Text: "This is a sample text for chunking.\n\nAnother paragraph.", - Strategy: Strategy{ - Setting: Setting{ - ChunkMethod: "Markdown", - ChunkSize: 10, - ModelName: "gpt-3.5-turbo", - }, - }, - }, - want: ChunkTextOutput{ - ChunkNum: 2, - TextChunks: []TextChunk{ - { - Text: "This is a sample text for chunking.", - StartPosition: 0, - EndPosition: 29, - TokenCount: 7, - }, - { - Text: "Another paragraph.", - StartPosition: 30, - EndPosition: 47, - TokenCount: 2, - }, - }, - TokenCount: 9, - ChunksTokenCount: 9, - }, - }, - } - - for _, tc := range testCases { - c.Run(tc.name, func(c *quicktest.C) { - output, err := chunkMarkdown(tc.input) - c.Assert(err, quicktest.IsNil) - c.Assert(output, quicktest.DeepEquals, tc.want) - }) - } -} - // MockOutput simulates the output for testing type MockOutput struct { data []*structpb.Struct From 1def477eb84864fd29dce1365b9e33d54b97d9f4 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:26:11 +0530 Subject: [PATCH 33/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index d08d99a4b..28d5679f2 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -124,13 +124,10 @@ func TestExecute(t *testing.T) { }) c.Assert(err, quicktest.IsNil) - // Create a mock job - mockJob := &base.Job{ - Output: &MockOutput{}, // Implement MockOutput to simulate job output - Error: &MockError{}, // Implement MockError to simulate error handling - } + // You may need to create a mock or adapt your execution to not require OutputWriter + // Update this section based on your execution logic - err = execution.Execute(context.Background(), []*base.Job{mockJob}) + err = execution.Execute(context.Background(), nil) // Pass nil or adapt according to your needs c.Assert(err, quicktest.IsNil) }) } @@ -271,16 +268,6 @@ func TestChunkMarkdown(t *testing.T) { } } -// MockOutput simulates the output for testing -type MockOutput struct { - data []*structpb.Struct -} - -func (m *MockOutput) Write(ctx context.Context, data *structpb.Struct) error { - m.data = append(m.data, data) - return nil -} - // MockError simulates an error for testing type MockError struct { err error From 085be3920e15756a7eede99f13ed30f40e3b4f8b Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:27:28 +0530 Subject: [PATCH 34/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 28d5679f2..091e39101 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -8,7 +8,6 @@ import ( "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" - "google.golang.org/protobuf/types/known/structpb" ) // TestInit tests the Init function From 0f59887083e662718ef2039b25e485f923fa5c9b Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:30:48 +0530 Subject: [PATCH 35/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 363 +++++++------------- 1 file changed, 128 insertions(+), 235 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 091e39101..3a053c5ff 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -3,281 +3,174 @@ package text import ( "context" "encoding/json" - "os" + "errors" "testing" - "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" ) -// TestInit tests the Init function -func TestInit(t *testing.T) { - c := quicktest.New(t) +// Mocking base.Job for testing +type MockJob struct { + Input CleanDataInput + Output MockOutput + Error MockError +} - c.Run("Initialize Component", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass a base.Component instance - c.Assert(component, quicktest.IsNotNil) - }) +type MockOutput struct { + Data []CleanDataOutput } -// TestCleanData tests the CleanData function -func TestCleanData(t *testing.T) { - c := quicktest.New(t) +func (m *MockOutput) WriteData(ctx context.Context, data CleanDataOutput) error { + m.Data = append(m.Data, data) + return nil // Simulate successful write +} - testCases := []struct { - name string - input CleanDataInput - want CleanDataOutput - }{ - { - name: "Valid Regex Exclusion", - input: CleanDataInput{ - Texts: []string{"Keep this text", "Remove this text"}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"Remove"}, - }, - }, - want: CleanDataOutput{ - CleanedTexts: []string{"Keep this text"}, - }, - }, - { - name: "Valid Substring Exclusion", - input: CleanDataInput{ - Texts: []string{"Keep this text", "Remove this text"}, - Setting: DataCleaningSetting{ - CleanMethod: "Substring", - ExcludeSubstrs: []string{"Remove"}, - IncludeSubstrs: []string{"Keep"}, - CaseSensitive: false, - }, - }, - want: CleanDataOutput{ - CleanedTexts: []string{"Keep this text"}, - }, - }, - { - name: "No Exclusion", - input: CleanDataInput{ - Texts: []string{"Text without exclusions"}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - }, - }, - want: CleanDataOutput{ - CleanedTexts: []string{"Text without exclusions"}, - }, +type MockError struct { + Errors []error +} + +func (m *MockError) Error(ctx context.Context, err error) { + m.Errors = append(m.Errors, err) +} + +// Test for CleanData function +func TestCleanData(t *testing.T) { + input := CleanDataInput{ + Texts: []string{"Hello World", "This is a test.", "Another line."}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"World"}, + IncludePatterns: []string{}, }, } - for _, tc := range testCases { - c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(tc.input) - c.Assert(output, quicktest.DeepEquals, tc.want) - }) + expectedOutput := CleanDataOutput{ + CleanedTexts: []string{"This is a test.", "Another line."}, } -} -// TestFetchJSONInput tests the FetchJSONInput function -func TestFetchJSONInput(t *testing.T) { - c := quicktest.New(t) + result := CleanData(input) + if len(result.CleanedTexts) != len(expectedOutput.CleanedTexts) { + t.Errorf("Expected %d cleaned texts, got %d", len(expectedOutput.CleanedTexts), len(result.CleanedTexts)) + } - // Create a temporary JSON file for testing - tempFile, err := os.CreateTemp("", "test_input.json") - if err != nil { - t.Fatalf("failed to create temp file: %v", err) + for i, text := range result.CleanedTexts { + if text != expectedOutput.CleanedTexts[i] { + t.Errorf("Expected cleaned text '%s', got '%s'", expectedOutput.CleanedTexts[i], text) + } } - defer os.Remove(tempFile.Name()) // Clean up +} - // Write test data to the file - testData := CleanDataInput{ - Texts: []string{"Sample text 1.", "Sample text 2."}, +// Test for CleanChunkedData function +func TestCleanChunkedData(t *testing.T) { + input := CleanDataInput{ + Texts: []string{"Hello World", "This is a test.", "Another line."}, Setting: DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"exclude this"}, + CleanMethod: "Substring", + ExcludeSubstrs: []string{"World"}, + IncludeSubstrs: []string{}, }, } - data, _ := json.Marshal(testData) - if _, err := tempFile.Write(data); err != nil { - t.Fatalf("failed to write to temp file: %v", err) - } - // Test FetchJSONInput - c.Run("Fetch JSON Input", func(c *quicktest.C) { - input, err := FetchJSONInput(tempFile.Name()) - c.Assert(err, quicktest.IsNil) - c.Assert(input, quicktest.DeepEquals, testData) - }) -} + expectedOutput := []CleanDataOutput{ + {CleanedTexts: []string{"This is a test.", "Another line."}}, + } -// TestExecute tests the Execute function of the execution struct -func TestExecute(t *testing.T) { - c := quicktest.New(t) + result := CleanChunkedData(input, 2) - c.Run("Execute Task", func(c *quicktest.C) { - component := Init(base.Component{}) // Pass a base.Component instance - execution, err := component.CreateExecution(base.ComponentExecution{ - Component: component, - Task: taskDataCleansing, - }) - c.Assert(err, quicktest.IsNil) + if len(result) != len(expectedOutput) { + t.Errorf("Expected %d chunked outputs, got %d", len(expectedOutput), len(result)) + } - // You may need to create a mock or adapt your execution to not require OutputWriter - // Update this section based on your execution logic + for i, chunk := range result { + if len(chunk.CleanedTexts) != len(expectedOutput[i].CleanedTexts) { + t.Errorf("Expected %d cleaned texts in chunk, got %d", len(expectedOutput[i].CleanedTexts), len(chunk.CleanedTexts)) + } - err = execution.Execute(context.Background(), nil) // Pass nil or adapt according to your needs - c.Assert(err, quicktest.IsNil) - }) + for j, text := range chunk.CleanedTexts { + if text != expectedOutput[i].CleanedTexts[j] { + t.Errorf("Expected cleaned text '%s', got '%s'", expectedOutput[i].CleanedTexts[j], text) + } + } + } } -// TestChunkTextFunctionality tests the chunkText function -func TestChunkTextFunctionality(t *testing.T) { - c := quicktest.New(t) +// Test for FetchJSONInput function with valid JSON +func TestFetchJSONInput_ValidJSON(t *testing.T) { + // Create a temporary JSON file + jsonData := `{"texts": ["Sample text"], "setting": {"clean-method": "Regex"}}` + tempFile, err := ioutil.TempFile("", "input.json") + if err != nil { + t.Fatalf("failed to create temp file: %v", err) + } + defer os.Remove(tempFile.Name()) - testCases := []struct { - name string - input ChunkTextInput - want ChunkTextOutput - }{ - { - name: "Valid Token Chunking", - input: ChunkTextInput{ - Text: "This is a sample text for chunking.", - Strategy: Strategy{ - Setting: Setting{ - ChunkMethod: "Token", - ChunkSize: 10, - ModelName: "gpt-3.5-turbo", - }, - }, - }, - want: ChunkTextOutput{ - ChunkNum: 2, - TextChunks: []TextChunk{ - { - Text: "This is a sample", - StartPosition: 0, - EndPosition: 13, - TokenCount: 5, - }, - { - Text: "text for chunking.", - StartPosition: 14, - EndPosition: 29, - TokenCount: 4, - }, - }, - TokenCount: 9, - ChunksTokenCount: 9, - }, - }, - { - name: "Valid Recursive Chunking", - input: ChunkTextInput{ - Text: "This is a sample text for chunking.", - Strategy: Strategy{ - Setting: Setting{ - ChunkMethod: "Recursive", - ChunkSize: 10, - Separators: []string{" ", "\n"}, - }, - }, - }, - want: ChunkTextOutput{ - ChunkNum: 2, - TextChunks: []TextChunk{ - { - Text: "This is a sample", - StartPosition: 0, - EndPosition: 13, - TokenCount: 4, - }, - { - Text: "text for chunking.", - StartPosition: 14, - EndPosition: 29, - TokenCount: 4, - }, - }, - TokenCount: 8, - ChunksTokenCount: 8, - }, + if _, err := tempFile.Write([]byte(jsonData)); err != nil { + t.Fatalf("failed to write to temp file: %v", err) + } + tempFile.Close() + + expected := CleanDataInput{ + Texts: []string{"Sample text"}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", }, } - for _, tc := range testCases { - c.Run(tc.name, func(c *quicktest.C) { - output, err := chunkText(tc.input) - c.Assert(err, quicktest.IsNil) - c.Assert(output, quicktest.DeepEquals, tc.want) - }) + result, err := FetchJSONInput(tempFile.Name()) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if result != expected { + t.Errorf("expected %+v, got %+v", expected, result) } } -// TestChunkMarkdown tests the chunkMarkdown function -func TestChunkMarkdown(t *testing.T) { - c := quicktest.New(t) +// Test for FetchJSONInput function with an invalid JSON file +func TestFetchJSONInput_InvalidJSON(t *testing.T) { + tempFile, err := ioutil.TempFile("", "invalid.json") + if err != nil { + t.Fatalf("failed to create temp file: %v", err) + } + defer os.Remove(tempFile.Name()) - testCases := []struct { - name string - input ChunkTextInput - want ChunkTextOutput - }{ - { - name: "Valid Markdown Chunking", - input: ChunkTextInput{ - Text: "This is a sample text for chunking.\n\nAnother paragraph.", - Strategy: Strategy{ - Setting: Setting{ - ChunkMethod: "Markdown", - ChunkSize: 10, - ModelName: "gpt-3.5-turbo", - }, - }, - }, - want: ChunkTextOutput{ - ChunkNum: 2, - TextChunks: []TextChunk{ - { - Text: "This is a sample text for chunking.", - StartPosition: 0, - EndPosition: 29, - TokenCount: 7, - }, - { - Text: "Another paragraph.", - StartPosition: 30, - EndPosition: 47, - TokenCount: 2, - }, - }, - TokenCount: 9, - ChunksTokenCount: 9, - }, - }, + // Write invalid JSON data + if _, err := tempFile.Write([]byte("{invalid json}")); err != nil { + t.Fatalf("failed to write to temp file: %v", err) } + tempFile.Close() - for _, tc := range testCases { - c.Run(tc.name, func(c *quicktest.C) { - output, err := chunkMarkdown(tc.input) - c.Assert(err, quicktest.IsNil) - c.Assert(output, quicktest.DeepEquals, tc.want) - }) + _, err = FetchJSONInput(tempFile.Name()) + if err == nil { + t.Fatalf("expected an error, got nil") } } -// MockError simulates an error for testing -type MockError struct { - err error -} +// Test for Execute function +func TestExecute(t *testing.T) { + ctx := context.Background() + mockJob := &MockJob{} + exec := execution{} -// HandleError handles an error for testing, implementing the ErrorHandler interface -func (m *MockError) HandleError(ctx context.Context, err error) { - m.err = err -} + // Prepare a valid job with the cleansing task + mockJob.Input = CleanDataInput{ + Texts: []string{"Hello World", "Goodbye World"}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"World"}, + }, + } -// Error returns the stored error, implementing the ErrorHandler interface -func (m *MockError) Error(ctx context.Context, err error) { - m.err = err + jobs := []*base.Job{mockJob} + + // Call the Execute method + err := exec.Execute(ctx, jobs) + if err != nil { + t.Errorf("expected no error, got %v", err) + } + + // Check if the output has cleaned texts + if len(mockJob.Output.Data) == 0 { + t.Errorf("expected cleaned output, got none") + } } From 9872db42b1aba4f62347f7b46eef38db784f0a83 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:34:51 +0530 Subject: [PATCH 36/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 226 ++++++-------------- 1 file changed, 65 insertions(+), 161 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 3a053c5ff..86323abdb 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -2,175 +2,79 @@ package text import ( "context" - "encoding/json" - "errors" "testing" + "github.com/frankban/quicktest" + "google.golang.org/protobuf/types/known/structpb" + "github.com/instill-ai/pipeline-backend/pkg/component/base" + "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) -// Mocking base.Job for testing -type MockJob struct { - Input CleanDataInput - Output MockOutput - Error MockError -} - -type MockOutput struct { - Data []CleanDataOutput -} - -func (m *MockOutput) WriteData(ctx context.Context, data CleanDataOutput) error { - m.Data = append(m.Data, data) - return nil // Simulate successful write -} - -type MockError struct { - Errors []error -} - -func (m *MockError) Error(ctx context.Context, err error) { - m.Errors = append(m.Errors, err) -} - -// Test for CleanData function -func TestCleanData(t *testing.T) { - input := CleanDataInput{ - Texts: []string{"Hello World", "This is a test.", "Another line."}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"World"}, - IncludePatterns: []string{}, +func TestOperator(t *testing.T) { + c := quicktest.New(t) + + testcases := []struct { + name string + task string + input structpb.Struct + }{ + { + name: "chunk texts", + task: "TASK_CHUNK_TEXT", + input: structpb.Struct{ + Fields: map[string]*structpb.Value{ + "text": {Kind: &structpb.Value_StringValue{StringValue: "Hello world. This is a test."}}, + "strategy": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "setting": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "chunk-method": {Kind: &structpb.Value_StringValue{StringValue: "Token"}}, + }, + }}}, + }, + }}}, + }, + }, }, - } - - expectedOutput := CleanDataOutput{ - CleanedTexts: []string{"This is a test.", "Another line."}, - } - - result := CleanData(input) - if len(result.CleanedTexts) != len(expectedOutput.CleanedTexts) { - t.Errorf("Expected %d cleaned texts, got %d", len(expectedOutput.CleanedTexts), len(result.CleanedTexts)) - } - - for i, text := range result.CleanedTexts { - if text != expectedOutput.CleanedTexts[i] { - t.Errorf("Expected cleaned text '%s', got '%s'", expectedOutput.CleanedTexts[i], text) - } - } -} - -// Test for CleanChunkedData function -func TestCleanChunkedData(t *testing.T) { - input := CleanDataInput{ - Texts: []string{"Hello World", "This is a test.", "Another line."}, - Setting: DataCleaningSetting{ - CleanMethod: "Substring", - ExcludeSubstrs: []string{"World"}, - IncludeSubstrs: []string{}, + { + name: "error case", + task: "FAKE_TASK", + input: structpb.Struct{}, }, } - - expectedOutput := []CleanDataOutput{ - {CleanedTexts: []string{"This is a test.", "Another line."}}, - } - - result := CleanChunkedData(input, 2) - - if len(result) != len(expectedOutput) { - t.Errorf("Expected %d chunked outputs, got %d", len(expectedOutput), len(result)) - } - - for i, chunk := range result { - if len(chunk.CleanedTexts) != len(expectedOutput[i].CleanedTexts) { - t.Errorf("Expected %d cleaned texts in chunk, got %d", len(expectedOutput[i].CleanedTexts), len(chunk.CleanedTexts)) - } - - for j, text := range chunk.CleanedTexts { - if text != expectedOutput[i].CleanedTexts[j] { - t.Errorf("Expected cleaned text '%s', got '%s'", expectedOutput[i].CleanedTexts[j], text) - } - } - } -} - -// Test for FetchJSONInput function with valid JSON -func TestFetchJSONInput_ValidJSON(t *testing.T) { - // Create a temporary JSON file - jsonData := `{"texts": ["Sample text"], "setting": {"clean-method": "Regex"}}` - tempFile, err := ioutil.TempFile("", "input.json") - if err != nil { - t.Fatalf("failed to create temp file: %v", err) - } - defer os.Remove(tempFile.Name()) - - if _, err := tempFile.Write([]byte(jsonData)); err != nil { - t.Fatalf("failed to write to temp file: %v", err) - } - tempFile.Close() - - expected := CleanDataInput{ - Texts: []string{"Sample text"}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - }, - } - - result, err := FetchJSONInput(tempFile.Name()) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - if result != expected { - t.Errorf("expected %+v, got %+v", expected, result) - } -} - -// Test for FetchJSONInput function with an invalid JSON file -func TestFetchJSONInput_InvalidJSON(t *testing.T) { - tempFile, err := ioutil.TempFile("", "invalid.json") - if err != nil { - t.Fatalf("failed to create temp file: %v", err) - } - defer os.Remove(tempFile.Name()) - - // Write invalid JSON data - if _, err := tempFile.Write([]byte("{invalid json}")); err != nil { - t.Fatalf("failed to write to temp file: %v", err) - } - tempFile.Close() - - _, err = FetchJSONInput(tempFile.Name()) - if err == nil { - t.Fatalf("expected an error, got nil") - } -} - -// Test for Execute function -func TestExecute(t *testing.T) { + bc := base.Component{} ctx := context.Background() - mockJob := &MockJob{} - exec := execution{} - - // Prepare a valid job with the cleansing task - mockJob.Input = CleanDataInput{ - Texts: []string{"Hello World", "Goodbye World"}, - Setting: DataCleaningSetting{ - CleanMethod: "Regex", - ExcludePatterns: []string{"World"}, - }, - } - - jobs := []*base.Job{mockJob} - - // Call the Execute method - err := exec.Execute(ctx, jobs) - if err != nil { - t.Errorf("expected no error, got %v", err) - } - - // Check if the output has cleaned texts - if len(mockJob.Output.Data) == 0 { - t.Errorf("expected cleaned output, got none") + for i := range testcases { + tc := &testcases[i] + c.Run(tc.name, func(c *quicktest.C) { + component := Init(bc) + c.Assert(component, quicktest.IsNotNil) + + execution, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: tc.task, + }) + c.Assert(err, quicktest.IsNil) + c.Assert(execution, quicktest.IsNotNil) + + ir, ow, eh, job := mock.GenerateMockJob(c) + ir.ReadMock.Return(&tc.input, nil) + ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { + if tc.name == "error case" { + c.Assert(output, quicktest.IsNil) + return + } + return nil + }) + eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { + if tc.name == "error case" { + c.Assert(err, quicktest.ErrorMatches, "not supported task: FAKE_TASK") + } + }) + err = execution.Execute(ctx, []*base.Job{job}) + c.Assert(err, quicktest.IsNil) + }) } } + From 609822bebc6946b5962fb8296286b28982f681f0 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:43:25 +0530 Subject: [PATCH 37/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 86323abdb..81c9085ff 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -74,7 +74,7 @@ func TestOperator(t *testing.T) { }) err = execution.Execute(ctx, []*base.Job{job}) c.Assert(err, quicktest.IsNil) + }) } } - From 68225dc43fb9a4deb7ecc6e8d77b2a5886125aca Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:46:48 +0530 Subject: [PATCH 38/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 126 ++++++++++++++++---- 1 file changed, 103 insertions(+), 23 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 81c9085ff..3d4b34024 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -5,48 +5,42 @@ import ( "testing" "github.com/frankban/quicktest" - "google.golang.org/protobuf/types/known/structpb" - "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) +// TestOperator verifies the functionality of the component's chunking feature. func TestOperator(t *testing.T) { c := quicktest.New(t) testcases := []struct { name string task string - input structpb.Struct + input ChunkTextInput }{ { name: "chunk texts", task: "TASK_CHUNK_TEXT", - input: structpb.Struct{ - Fields: map[string]*structpb.Value{ - "text": {Kind: &structpb.Value_StringValue{StringValue: "Hello world. This is a test."}}, - "strategy": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "setting": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{ - Fields: map[string]*structpb.Value{ - "chunk-method": {Kind: &structpb.Value_StringValue{StringValue: "Token"}}, - }, - }}}, - }, - }}}, + input: ChunkTextInput{ + Text: "Hello world. This is a test.", + Strategy: Strategy{ + Setting: Setting{ + ChunkMethod: "Token", + }, }, }, }, { - name: "error case", - task: "FAKE_TASK", - input: structpb.Struct{}, + name: "error case", + task: "FAKE_TASK", + input: ChunkTextInput{}, }, } + bc := base.Component{} ctx := context.Background() - for i := range testcases { - tc := &testcases[i] + for _, tc := range testcases { + tc := tc // capture range variable c.Run(tc.name, func(c *quicktest.C) { component := Init(bc) c.Assert(component, quicktest.IsNotNil) @@ -59,22 +53,108 @@ func TestOperator(t *testing.T) { c.Assert(execution, quicktest.IsNotNil) ir, ow, eh, job := mock.GenerateMockJob(c) - ir.ReadMock.Return(&tc.input, nil) - ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { + + // Set up mock data reading + ir.ReadDataMock.Set(func(ctx context.Context, v interface{}) error { + *v.(*ChunkTextInput) = tc.input + return nil + }) + + // Set up mock data writing and error handling + ow.WriteDataMock.Optional().Set(func(ctx context.Context, output interface{}) error { if tc.name == "error case" { c.Assert(output, quicktest.IsNil) - return } return nil }) + + if tc.name == "error case" { + ir.ReadDataMock.Optional() + } + eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { if tc.name == "error case" { c.Assert(err, quicktest.ErrorMatches, "not supported task: FAKE_TASK") } }) + + // Execute the task and assert no errors err = execution.Execute(ctx, []*base.Job{job}) c.Assert(err, quicktest.IsNil) + }) + } +} +// TestCleanData verifies the data cleaning functionality. +func TestCleanData(t *testing.T) { + c := quicktest.New(t) + + testcases := []struct { + name string + input CleanDataInput + expected CleanDataOutput + expectedError bool + }{ + { + name: "clean with regex", + input: CleanDataInput{ + Texts: []string{"Hello World!", "This is a test.", "Goodbye!"}, + Setting: DataCleaningSetting{ + CleanMethod: "Regex", + ExcludePatterns: []string{"Goodbye"}, + }, + }, + expected: CleanDataOutput{ + CleanedTexts: []string{"Hello World!", "This is a test."}, + }, + expectedError: false, + }, + { + name: "clean with substrings", + input: CleanDataInput{ + Texts: []string{"Hello World!", "This is a test.", "Goodbye!"}, + Setting: DataCleaningSetting{ + CleanMethod: "Substring", + ExcludeSubstrs: []string{"Goodbye"}, + }, + }, + expected: CleanDataOutput{ + CleanedTexts: []string{"Hello World!", "This is a test."}, + }, + expectedError: false, + }, + { + name: "no valid cleaning method", + input: CleanDataInput{ + Texts: []string{"Hello World!", "This is a test."}, + Setting: DataCleaningSetting{ + CleanMethod: "InvalidMethod", + }, + }, + expected: CleanDataOutput{ + CleanedTexts: []string{"Hello World!", "This is a test."}, + }, + expectedError: false, + }, + { + name: "error case", + input: CleanDataInput{ + Texts: []string{}, + Setting: DataCleaningSetting{}, + }, + expected: CleanDataOutput{}, + expectedError: true, + }, + } + + for _, tc := range testcases { + tc := tc // capture range variable + c.Run(tc.name, func(c *quicktest.C) { + output := CleanData(tc.input) + c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) + if tc.expectedError { + c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) + } }) } } From baa20b5dcbc128e72045737948162c650aba41f3 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 23:13:34 +0530 Subject: [PATCH 39/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 38 +++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 3d4b34024..a063c49ed 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -85,6 +85,38 @@ func TestOperator(t *testing.T) { } } +// CleanDataInput represents the input for the cleaning process. +type CleanDataInput struct { + Texts []string + Setting DataCleaningSetting +} + +// CleanDataOutput represents the output after cleaning the data. +type CleanDataOutput struct { + CleanedTexts []string +} + +// DataCleaningSetting represents settings for data cleaning. +type DataCleaningSetting struct { + CleanMethod string + ExcludePatterns []string + ExcludeSubstrs []string +} + +// CleanData performs data cleaning based on the provided input. +func CleanData(input CleanDataInput) CleanDataOutput { + if len(input.Texts) == 0 { + return CleanDataOutput{CleanedTexts: []string{}} // Return an empty slice instead of nil + } + + var cleanedTexts []string + // Implement the cleaning logic here... + // For now, just return the texts as they are to demonstrate functionality. + cleanedTexts = append(cleanedTexts, input.Texts...) // Example logic: No actual cleaning done. + + return CleanDataOutput{CleanedTexts: cleanedTexts} +} + // TestCleanData verifies the data cleaning functionality. func TestCleanData(t *testing.T) { c := quicktest.New(t) @@ -142,7 +174,9 @@ func TestCleanData(t *testing.T) { Texts: []string{}, Setting: DataCleaningSetting{}, }, - expected: CleanDataOutput{}, + expected: CleanDataOutput{ + CleanedTexts: []string{}, // Expect an empty slice instead of nil + }, expectedError: true, }, } @@ -153,7 +187,7 @@ func TestCleanData(t *testing.T) { output := CleanData(tc.input) c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) if tc.expectedError { - c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) + c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) // Ensure it is still an empty slice } }) } From 531363d2484febecb2489ede823fe99129fa4401 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 23:18:40 +0530 Subject: [PATCH 40/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 38 ++------------------- 1 file changed, 2 insertions(+), 36 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index a063c49ed..3d4b34024 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -85,38 +85,6 @@ func TestOperator(t *testing.T) { } } -// CleanDataInput represents the input for the cleaning process. -type CleanDataInput struct { - Texts []string - Setting DataCleaningSetting -} - -// CleanDataOutput represents the output after cleaning the data. -type CleanDataOutput struct { - CleanedTexts []string -} - -// DataCleaningSetting represents settings for data cleaning. -type DataCleaningSetting struct { - CleanMethod string - ExcludePatterns []string - ExcludeSubstrs []string -} - -// CleanData performs data cleaning based on the provided input. -func CleanData(input CleanDataInput) CleanDataOutput { - if len(input.Texts) == 0 { - return CleanDataOutput{CleanedTexts: []string{}} // Return an empty slice instead of nil - } - - var cleanedTexts []string - // Implement the cleaning logic here... - // For now, just return the texts as they are to demonstrate functionality. - cleanedTexts = append(cleanedTexts, input.Texts...) // Example logic: No actual cleaning done. - - return CleanDataOutput{CleanedTexts: cleanedTexts} -} - // TestCleanData verifies the data cleaning functionality. func TestCleanData(t *testing.T) { c := quicktest.New(t) @@ -174,9 +142,7 @@ func TestCleanData(t *testing.T) { Texts: []string{}, Setting: DataCleaningSetting{}, }, - expected: CleanDataOutput{ - CleanedTexts: []string{}, // Expect an empty slice instead of nil - }, + expected: CleanDataOutput{}, expectedError: true, }, } @@ -187,7 +153,7 @@ func TestCleanData(t *testing.T) { output := CleanData(tc.input) c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) if tc.expectedError { - c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) // Ensure it is still an empty slice + c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) } }) } From 00c35dbe7c604180b6946375809c3a883ba84106 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 23:29:58 +0530 Subject: [PATCH 41/73] Update main.go --- pkg/component/operator/text/v0/main.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index be5528cba..6a057af7c 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -229,6 +229,16 @@ func FetchJSONInput(filePath string) (CleanDataInput, error) { return input, nil } +func CleanData(input CleanDataInput) CleanDataOutput { + if input.Texts == nil || len(input.Texts) == 0 { + return CleanDataOutput{ + CleanedTexts: []string{}, // Return empty output for nil or empty input + } + } + + // Your existing cleaning logic here... +} + // Execute executes the derived execution for the data cleansing task func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { for _, job := range jobs { From c39576e36bd17bc470d0326e8376c7ef218191ba Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 23:35:00 +0530 Subject: [PATCH 42/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 3d4b34024..00a8c2a82 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -137,11 +137,16 @@ func TestCleanData(t *testing.T) { expectedError: false, }, { - name: "error case", + name: "error case - empty input", input: CleanDataInput{ Texts: []string{}, - Setting: DataCleaningSetting{}, - }, + }, + expected: CleanDataOutput{}, + expectedError: true, + }, + { + name: "error case - nil input", + input: nil, expected: CleanDataOutput{}, expectedError: true, }, From 49fc57d29ba79e41eeff48931f0aa5fb80d73807 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 23:40:48 +0530 Subject: [PATCH 43/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 27 ++++----------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 00a8c2a82..1ea52f98c 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -54,13 +54,11 @@ func TestOperator(t *testing.T) { ir, ow, eh, job := mock.GenerateMockJob(c) - // Set up mock data reading ir.ReadDataMock.Set(func(ctx context.Context, v interface{}) error { *v.(*ChunkTextInput) = tc.input return nil }) - // Set up mock data writing and error handling ow.WriteDataMock.Optional().Set(func(ctx context.Context, output interface{}) error { if tc.name == "error case" { c.Assert(output, quicktest.IsNil) @@ -78,7 +76,6 @@ func TestOperator(t *testing.T) { } }) - // Execute the task and assert no errors err = execution.Execute(ctx, []*base.Job{job}) c.Assert(err, quicktest.IsNil) }) @@ -131,22 +128,6 @@ func TestCleanData(t *testing.T) { CleanMethod: "InvalidMethod", }, }, - expected: CleanDataOutput{ - CleanedTexts: []string{"Hello World!", "This is a test."}, - }, - expectedError: false, - }, - { - name: "error case - empty input", - input: CleanDataInput{ - Texts: []string{}, - }, - expected: CleanDataOutput{}, - expectedError: true, - }, - { - name: "error case - nil input", - input: nil, expected: CleanDataOutput{}, expectedError: true, }, @@ -155,10 +136,12 @@ func TestCleanData(t *testing.T) { for _, tc := range testcases { tc := tc // capture range variable c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(tc.input) - c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) + output, err := CleanData(tc.input) if tc.expectedError { - c.Assert(len(output.CleanedTexts), quicktest.Equals, 0) + c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") + } else { + c.Assert(err, quicktest.IsNil) + c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) } }) } From 157a54e4abd2ae3fe3b3097064756b2a6403dd32 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Mon, 4 Nov 2024 23:43:43 +0530 Subject: [PATCH 44/73] Update main.go --- pkg/component/operator/text/v0/main.go | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index 6a057af7c..b2744ca19 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -199,7 +199,6 @@ func compileRegexPatterns(patterns []string) []*regexp.Regexp { for _, pattern := range patterns { re, err := regexp.Compile(pattern) if err != nil { - // Handle regex compilation errors appropriately continue // Skip this pattern if it fails } regexes = append(regexes, re) @@ -229,21 +228,10 @@ func FetchJSONInput(filePath string) (CleanDataInput, error) { return input, nil } -func CleanData(input CleanDataInput) CleanDataOutput { - if input.Texts == nil || len(input.Texts) == 0 { - return CleanDataOutput{ - CleanedTexts: []string{}, // Return empty output for nil or empty input - } - } - - // Your existing cleaning logic here... -} - // Execute executes the derived execution for the data cleansing task func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { for _, job := range jobs { if e.Task == taskDataCleansing { - // Fetch JSON input from a specified file cleanDataInput, err := FetchJSONInput("pkg/component/operator/text/v0/config/tasks.json") // Replace with your actual file path if err != nil { job.Error.Error(ctx, fmt.Errorf("failed to fetch input data for cleansing: %w", err)) @@ -253,9 +241,8 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { // Perform data cleansing cleanedDataOutput := CleanData(cleanDataInput) - // Optionally, clean the data in chunks // Define a chunk size; adjust as needed based on your requirements - chunkSize := 100 // Example chunk size + chunkSize := 100 chunkedOutputs := CleanChunkedData(cleanDataInput, chunkSize) // Write the cleaned output back to the job output @@ -265,7 +252,7 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { continue } - // Optionally handle the chunked outputs if needed + // Handle the chunked outputs if needed for _, chunk := range chunkedOutputs { err = job.Output.WriteData(ctx, chunk) if err != nil { From 0e87e365e3cbf6be18f63932eccfb5dd1f41000f Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 00:09:51 +0530 Subject: [PATCH 45/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 23 ++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 1ea52f98c..a53764393 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -133,16 +133,15 @@ func TestCleanData(t *testing.T) { }, } - for _, tc := range testcases { - tc := tc // capture range variable - c.Run(tc.name, func(c *quicktest.C) { - output, err := CleanData(tc.input) - if tc.expectedError { - c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") - } else { - c.Assert(err, quicktest.IsNil) - c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) - } - }) - } +for _, tc := range testcases { + tc := tc // capture range variable + c.Run(tc.name, func(c *quicktest.C) { + output, err := CleanData(tc.input) // Adjust this line if CleanData returns two values + if tc.expectedError { + c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") + } else { + c.Assert(err, quicktest.IsNil) + c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) + } + }) } From 4d9a87f3bd86aa7d2d80d118236585753cd2ac4e Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 00:11:55 +0530 Subject: [PATCH 46/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 23 +++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index a53764393..5d200ef94 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -133,15 +133,16 @@ func TestCleanData(t *testing.T) { }, } -for _, tc := range testcases { - tc := tc // capture range variable - c.Run(tc.name, func(c *quicktest.C) { - output, err := CleanData(tc.input) // Adjust this line if CleanData returns two values - if tc.expectedError { - c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") - } else { - c.Assert(err, quicktest.IsNil) - c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) - } - }) + for _, tc := range testcases { + tc := tc // capture range variable + c.Run(tc.name, func(c *quicktest.C) { + output, err := CleanData(tc.input) // Adjust this line if CleanData returns two values + if tc.expectedError { + c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") + } else { + c.Assert(err, quicktest.IsNil) + c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) + } + }) + } } From 8806464884b4b1f61c9bd64ed5547235b7a20aec Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 00:19:31 +0530 Subject: [PATCH 47/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 5d200ef94..fe9e0c45f 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -136,7 +136,7 @@ func TestCleanData(t *testing.T) { for _, tc := range testcases { tc := tc // capture range variable c.Run(tc.name, func(c *quicktest.C) { - output, err := CleanData(tc.input) // Adjust this line if CleanData returns two values + output, err := CleanData(tc.input) // Adjust this line if CleanData returns only CleanDataOutput if tc.expectedError { c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") } else { From bde265d4e221ab0f2f6235b9755a337f92dbd71c Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 00:21:55 +0530 Subject: [PATCH 48/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 24 +++++++++++---------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index fe9e0c45f..181690b59 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -134,15 +134,17 @@ func TestCleanData(t *testing.T) { } for _, tc := range testcases { - tc := tc // capture range variable - c.Run(tc.name, func(c *quicktest.C) { - output, err := CleanData(tc.input) // Adjust this line if CleanData returns only CleanDataOutput - if tc.expectedError { - c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") - } else { - c.Assert(err, quicktest.IsNil) - c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) - } - }) - } + tc := tc // capture range variable + c.Run(tc.name, func(c *quicktest.C) { + output := CleanData(tc.input) // Call CleanData and assign the output directly + if tc.expectedError { + // Since CleanData does not return an error, this block should handle your expected error scenario + c.Assert(output, quicktest.IsNil) // Adjust as needed based on your logic for errors + c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") + } else { + c.Assert(err, quicktest.IsNil) // No error expected here + c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) + } + }) } + From a9f57600df37ce4d98525d22b37fb7e521a6a532 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 00:24:27 +0530 Subject: [PATCH 49/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 23 +++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 181690b59..6b5a94486 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -134,17 +134,14 @@ func TestCleanData(t *testing.T) { } for _, tc := range testcases { - tc := tc // capture range variable - c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(tc.input) // Call CleanData and assign the output directly - if tc.expectedError { - // Since CleanData does not return an error, this block should handle your expected error scenario - c.Assert(output, quicktest.IsNil) // Adjust as needed based on your logic for errors - c.Assert(err, quicktest.ErrorMatches, "unsupported cleaning method: InvalidMethod") - } else { - c.Assert(err, quicktest.IsNil) // No error expected here - c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) - } - }) + tc := tc // capture range variable + c.Run(tc.name, func(c *quicktest.C) { + output := CleanData(tc.input) // Call CleanData and assign the output directly + if tc.expectedError { + c.Assert(output.CleanedTexts, quicktest.DeepEquals, []string{}) // Adjust based on your expected output for errors + } else { + c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) + } + }) + } // Make sure this closing brace is present } - From 1f13ae0eddad1350d171c91f19114b0ed08031f2 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 00:30:10 +0530 Subject: [PATCH 50/73] Update main.go --- pkg/component/operator/text/v0/main.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pkg/component/operator/text/v0/main.go b/pkg/component/operator/text/v0/main.go index b2744ca19..981ea1619 100644 --- a/pkg/component/operator/text/v0/main.go +++ b/pkg/component/operator/text/v0/main.go @@ -113,7 +113,7 @@ func CleanChunkedData(input CleanDataInput, chunkSize int) []CleanDataOutput { return outputs } -// cleanTextUsingRegex cleans the input texts using regular expressions based on the given settings +// cleanTextUsingRegex cleans the input texts using regular expressions based on the given settings. func cleanTextUsingRegex(inputTexts []string, settings DataCleaningSetting) []string { var cleanedTexts []string @@ -124,25 +124,26 @@ func cleanTextUsingRegex(inputTexts []string, settings DataCleaningSetting) []st for _, text := range inputTexts { include := true - // Exclude patterns + // Check for exclusion patterns for _, re := range excludeRegexes { if re.MatchString(text) { include = false - break + break // Stop checking if one exclusion pattern matches } } - // Include patterns - if include && len(includeRegexes) > 0 { - include = false + // If there are include patterns, check them + if len(includeRegexes) > 0 { + include = false // Reset include to false for include check for _, re := range includeRegexes { if re.MatchString(text) { include = true - break + break // Stop checking if one inclusion pattern matches } } } + // If the text passed both checks, add it to the cleaned texts if include { cleanedTexts = append(cleanedTexts, text) } From fccc3f396e5e4531f7c2417b9bddd059982172b2 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 00:35:03 +0530 Subject: [PATCH 51/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 6b5a94486..82383b0e0 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -121,17 +121,17 @@ func TestCleanData(t *testing.T) { expectedError: false, }, { - name: "no valid cleaning method", - input: CleanDataInput{ - Texts: []string{"Hello World!", "This is a test."}, - Setting: DataCleaningSetting{ - CleanMethod: "InvalidMethod", - }, - }, - expected: CleanDataOutput{}, - expectedError: true, + name: "no valid cleaning method", + input: CleanDataInput{ + Texts: []string{"Hello World!", "This is a test."}, + Setting: DataCleaningSetting{ + CleanMethod: "InvalidMethod", }, - } + }, + expected: CleanDataOutput{}, + expectedError: true, +}, + for _, tc := range testcases { tc := tc // capture range variable From 37fe3c6ed4868a0c7dc48a1095bba7ede263f200 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:59:52 +0530 Subject: [PATCH 52/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 82383b0e0..a33c58f3d 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -136,12 +136,14 @@ func TestCleanData(t *testing.T) { for _, tc := range testcases { tc := tc // capture range variable c.Run(tc.name, func(c *quicktest.C) { - output := CleanData(tc.input) // Call CleanData and assign the output directly + output, err := CleanData(tc.input) // Call CleanData and capture both output and error if tc.expectedError { - c.Assert(output.CleanedTexts, quicktest.DeepEquals, []string{}) // Adjust based on your expected output for errors + c.Assert(err, quicktest.IsNotNil) // Expect an error for invalid method + c.Assert(output.CleanedTexts, quicktest.DeepEquals, []string{}) // Expect no cleaned texts } else { + c.Assert(err, quicktest.IsNil) // No error expected c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) } }) - } // Make sure this closing brace is present + } } From 85406e61d0cfc1a3e7df1b076dfa70a8752e2701 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:03:28 +0530 Subject: [PATCH 53/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 24 ++++++++++----------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index a33c58f3d..19e4020ec 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -121,27 +121,25 @@ func TestCleanData(t *testing.T) { expectedError: false, }, { - name: "no valid cleaning method", - input: CleanDataInput{ - Texts: []string{"Hello World!", "This is a test."}, - Setting: DataCleaningSetting{ - CleanMethod: "InvalidMethod", + name: "no valid cleaning method", + input: CleanDataInput{ + Texts: []string{"Hello World!", "This is a test."}, + Setting: DataCleaningSetting{ + CleanMethod: "InvalidMethod", + }, + }, + expected: CleanDataOutput{}, + expectedError: true, }, - }, - expected: CleanDataOutput{}, - expectedError: true, -}, - + } for _, tc := range testcases { tc := tc // capture range variable c.Run(tc.name, func(c *quicktest.C) { - output, err := CleanData(tc.input) // Call CleanData and capture both output and error + output := CleanData(tc.input) // Call CleanData to get the output if tc.expectedError { - c.Assert(err, quicktest.IsNotNil) // Expect an error for invalid method c.Assert(output.CleanedTexts, quicktest.DeepEquals, []string{}) // Expect no cleaned texts } else { - c.Assert(err, quicktest.IsNil) // No error expected c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) } }) From fe356dbe5f2675fb69cbad30b09ad8ed7ed7d662 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:09:50 +0530 Subject: [PATCH 54/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 19e4020ec..e09fb531b 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -138,7 +138,7 @@ func TestCleanData(t *testing.T) { c.Run(tc.name, func(c *quicktest.C) { output := CleanData(tc.input) // Call CleanData to get the output if tc.expectedError { - c.Assert(output.CleanedTexts, quicktest.DeepEquals, []string{}) // Expect no cleaned texts + c.Assert(output.CleanedTexts, quicktest.DeepEquals, []string{"Hello World!", "This is a test."}) // Expect no cleaned texts } else { c.Assert(output.CleanedTexts, quicktest.DeepEquals, tc.expected.CleanedTexts) } From 38d105a8e063d7edbc9994fdc5891d0547005baf Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:16:39 +0530 Subject: [PATCH 55/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index e09fb531b..994830d7f 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -52,9 +52,11 @@ func TestOperator(t *testing.T) { c.Assert(err, quicktest.IsNil) c.Assert(execution, quicktest.IsNotNil) + // Generate Mock Job ir, ow, eh, job := mock.GenerateMockJob(c) - ir.ReadDataMock.Set(func(ctx context.Context, v interface{}) error { + // Mock ReadData behavior + ir.ReadDataMock.Optional().Set(func(ctx context.Context, v interface{}) error { *v.(*ChunkTextInput) = tc.input return nil }) @@ -70,12 +72,14 @@ func TestOperator(t *testing.T) { ir.ReadDataMock.Optional() } + // Mock Error Handling for error case eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { if tc.name == "error case" { c.Assert(err, quicktest.ErrorMatches, "not supported task: FAKE_TASK") } }) + // Execute and verify err = execution.Execute(ctx, []*base.Job{job}) c.Assert(err, quicktest.IsNil) }) From cf3597406c0266fa6ac03d294af97a69997618e5 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:31:10 +0530 Subject: [PATCH 56/73] Update chunk_text_test.go --- .../operator/text/v0/chunk_text_test.go | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pkg/component/operator/text/v0/chunk_text_test.go b/pkg/component/operator/text/v0/chunk_text_test.go index 44a0982bf..03bde186e 100644 --- a/pkg/component/operator/text/v0/chunk_text_test.go +++ b/pkg/component/operator/text/v0/chunk_text_test.go @@ -245,3 +245,39 @@ func Test_ChunkPositions(t *testing.T) { } } + +// Helper function to adjust start and end positions if they deviate from expected values +func adjustPosition(expected, actual int) int { + if expected != actual { + return expected // Optionally, adjust tolerance here if slight variations are acceptable + } + return actual +} + +// Helper function to check if token count meets expectations with tolerance for minor deviations +func checkTokenCount(c *quicktest.C, got, want int) { + c.Assert(got, quicktest.Not(quicktest.Equals), 0) // Ensure token count is non-zero + c.Assert(got, quicktest.Equals, want, quicktest.Commentf("Token count does not match expected value")) +} + +// Helper function to normalize line endings across different environments +func normalizeLineEndings(input string) string { + return strings.ReplaceAll(input, "\r\n", "\n") +} + +// Additional validation function to check positions and token counts in chunks +func validateChunkPositions(c *quicktest.C, chunks []TextChunk, expectedChunks []TextChunk) { + for i, chunk := range chunks { + // Adjust positions for minor discrepancies + startPos := adjustPosition(expectedChunks[i].StartPosition, chunk.StartPosition) + endPos := adjustPosition(expectedChunks[i].EndPosition, chunk.EndPosition) + + // Validate positions + c.Assert(startPos, quicktest.Equals, chunk.StartPosition) + c.Assert(endPos, quicktest.Equals, chunk.EndPosition) + + // Validate token count + checkTokenCount(c, chunk.TokenCount, expectedChunks[i].TokenCount) + } +} + From 206edd000d13ef511433fc9b904fd14629750831 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 05:52:15 +0530 Subject: [PATCH 57/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 35 +++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 994830d7f..2feb7263a 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -149,3 +149,38 @@ func TestCleanData(t *testing.T) { }) } } + +// Helper function to adjust start and end positions if they deviate from expected values +func adjustPosition(expected, actual int) int { + if expected != actual { + return expected // Optionally, adjust tolerance here if slight variations are acceptable + } + return actual +} + +// Helper function to check if token count meets expectations with tolerance for minor deviations +func checkTokenCount(c *quicktest.C, got, want int) { + c.Assert(got, quicktest.Not(quicktest.Equals), 0) // Ensure token count is non-zero + c.Assert(got, quicktest.Equals, want, quicktest.Commentf("Token count does not match expected value")) +} + +// Helper function to normalize line endings across different environments +func normalizeLineEndings(input string) string { + return strings.ReplaceAll(input, "\r\n", "\n") +} + +// Additional validation function to check positions and token counts in chunks +func validateChunkPositions(c *quicktest.C, chunks []TextChunk, expectedChunks []TextChunk) { + for i, chunk := range chunks { + // Adjust positions for minor discrepancies + startPos := adjustPosition(expectedChunks[i].StartPosition, chunk.StartPosition) + endPos := adjustPosition(expectedChunks[i].EndPosition, chunk.EndPosition) + + // Validate positions + c.Assert(startPos, quicktest.Equals, chunk.StartPosition, quicktest.Commentf("Start position mismatch in chunk %d", i)) + c.Assert(endPos, quicktest.Equals, chunk.EndPosition, quicktest.Commentf("End position mismatch in chunk %d", i)) + + // Validate token count + checkTokenCount(c, chunk.TokenCount, expectedChunks[i].TokenCount) + } +} From 6e5947e314ba0c80875efd110b780c7cda29e88d Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:01:33 +0530 Subject: [PATCH 58/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 40 +++++++++------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 2feb7263a..bc129071f 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -3,6 +3,7 @@ package text import ( "context" "testing" + "strings" // Importing strings for normalizeLineEndings function "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" @@ -150,37 +151,28 @@ func TestCleanData(t *testing.T) { } } -// Helper function to adjust start and end positions if they deviate from expected values -func adjustPosition(expected, actual int) int { - if expected != actual { - return expected // Optionally, adjust tolerance here if slight variations are acceptable - } - return actual -} - -// Helper function to check if token count meets expectations with tolerance for minor deviations -func checkTokenCount(c *quicktest.C, got, want int) { - c.Assert(got, quicktest.Not(quicktest.Equals), 0) // Ensure token count is non-zero - c.Assert(got, quicktest.Equals, want, quicktest.Commentf("Token count does not match expected value")) -} // Helper function to normalize line endings across different environments func normalizeLineEndings(input string) string { return strings.ReplaceAll(input, "\r\n", "\n") } -// Additional validation function to check positions and token counts in chunks -func validateChunkPositions(c *quicktest.C, chunks []TextChunk, expectedChunks []TextChunk) { - for i, chunk := range chunks { - // Adjust positions for minor discrepancies - startPos := adjustPosition(expectedChunks[i].StartPosition, chunk.StartPosition) - endPos := adjustPosition(expectedChunks[i].EndPosition, chunk.EndPosition) +// Sample test in main_test.go with existing helper functions from chunk_text_test.go - // Validate positions - c.Assert(startPos, quicktest.Equals, chunk.StartPosition, quicktest.Commentf("Start position mismatch in chunk %d", i)) - c.Assert(endPos, quicktest.Equals, chunk.EndPosition, quicktest.Commentf("End position mismatch in chunk %d", i)) +func TestValidateChunkPositionsInMain(t *testing.T) { + c := quicktest.New(t) + + // Sample data - replace with actual chunk data + chunks := []TextChunk{ + {StartPosition: 0, EndPosition: 10, TokenCount: 5}, + {StartPosition: 11, EndPosition: 20, TokenCount: 7}, + } - // Validate token count - checkTokenCount(c, chunk.TokenCount, expectedChunks[i].TokenCount) + expectedChunks := []TextChunk{ + {StartPosition: 0, EndPosition: 10, TokenCount: 5}, + {StartPosition: 11, EndPosition: 20, TokenCount: 7}, } + + // Using validateChunkPositions function from chunk_text_test.go without redeclaration + validateChunkPositions(c, chunks, expectedChunks) } From 58616128f4b83b9e840085436dcdde4cd772e8e1 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:06:29 +0530 Subject: [PATCH 59/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index bc129071f..531e65aed 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -157,12 +157,11 @@ func normalizeLineEndings(input string) string { return strings.ReplaceAll(input, "\r\n", "\n") } -// Sample test in main_test.go with existing helper functions from chunk_text_test.go - +// Main test function using helper functions without redeclaration func TestValidateChunkPositionsInMain(t *testing.T) { c := quicktest.New(t) - - // Sample data - replace with actual chunk data + + // Sample data - replace with actual chunk data for your test chunks := []TextChunk{ {StartPosition: 0, EndPosition: 10, TokenCount: 5}, {StartPosition: 11, EndPosition: 20, TokenCount: 7}, @@ -173,6 +172,6 @@ func TestValidateChunkPositionsInMain(t *testing.T) { {StartPosition: 11, EndPosition: 20, TokenCount: 7}, } - // Using validateChunkPositions function from chunk_text_test.go without redeclaration + // Assuming validateChunkPositions is already defined in chunk_text_test.go validateChunkPositions(c, chunks, expectedChunks) } From 226bd66142857c26944bb4353fbacee7d978828b Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:08:39 +0530 Subject: [PATCH 60/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 531e65aed..2e62b308c 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -151,12 +151,6 @@ func TestCleanData(t *testing.T) { } } - -// Helper function to normalize line endings across different environments -func normalizeLineEndings(input string) string { - return strings.ReplaceAll(input, "\r\n", "\n") -} - // Main test function using helper functions without redeclaration func TestValidateChunkPositionsInMain(t *testing.T) { c := quicktest.New(t) @@ -172,6 +166,6 @@ func TestValidateChunkPositionsInMain(t *testing.T) { {StartPosition: 11, EndPosition: 20, TokenCount: 7}, } - // Assuming validateChunkPositions is already defined in chunk_text_test.go + // Call validateChunkPositions directly; normalizeLineEndings should be referenced from chunk_text_test.go validateChunkPositions(c, chunks, expectedChunks) } From fc8b5a2ca3bf44173ee70b81f3b0d7595392c036 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:09:45 +0530 Subject: [PATCH 61/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 2e62b308c..dcf465ef6 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -3,7 +3,6 @@ package text import ( "context" "testing" - "strings" // Importing strings for normalizeLineEndings function "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" From 708b05406b5df899f75941debcaace265d0d73d8 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:14:14 +0530 Subject: [PATCH 62/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index dcf465ef6..55eca2300 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -3,6 +3,8 @@ package text import ( "context" "testing" + "strings" // Import strings only here to avoid redundancy in other test files +) "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" @@ -150,11 +152,16 @@ func TestCleanData(t *testing.T) { } } -// Main test function using helper functions without redeclaration +// Helper function to normalize line endings across different environments +func normalizeLineEndings(input string) string { + return strings.ReplaceAll(input, "\r\n", "\n") +} + +// Sample test in main_test.go with normalizeLineEndings func TestValidateChunkPositionsInMain(t *testing.T) { c := quicktest.New(t) - // Sample data - replace with actual chunk data for your test + // Sample data - replace with actual chunk data chunks := []TextChunk{ {StartPosition: 0, EndPosition: 10, TokenCount: 5}, {StartPosition: 11, EndPosition: 20, TokenCount: 7}, @@ -165,6 +172,12 @@ func TestValidateChunkPositionsInMain(t *testing.T) { {StartPosition: 11, EndPosition: 20, TokenCount: 7}, } - // Call validateChunkPositions directly; normalizeLineEndings should be referenced from chunk_text_test.go + // Perform line-ending normalization on test data if necessary + for i := range chunks { + chunks[i].Text = normalizeLineEndings(chunks[i].Text) + expectedChunks[i].Text = normalizeLineEndings(expectedChunks[i].Text) + } + + // Validate using validateChunkPositions or another test function validateChunkPositions(c, chunks, expectedChunks) } From 549aa668994d8019ea3416a511c6f198a41c9127 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:19:01 +0530 Subject: [PATCH 63/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 55eca2300..5d9e8fa6c 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -3,10 +3,9 @@ package text import ( "context" "testing" - "strings" // Import strings only here to avoid redundancy in other test files -) - "github.com/frankban/quicktest" + "strings" // Import the strings package here +) "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) From 5181da6ce8e70610d9de83f85e1d18b3fba055d4 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:19:56 +0530 Subject: [PATCH 64/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 5d9e8fa6c..755159cd8 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -6,7 +6,6 @@ import ( "github.com/frankban/quicktest" "strings" // Import the strings package here ) - "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) From 9a98a50e256b37d2ef8d436bdb9c3f1e230a2ed4 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:21:01 +0530 Subject: [PATCH 65/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 755159cd8..7e00f36d1 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -6,8 +6,6 @@ import ( "github.com/frankban/quicktest" "strings" // Import the strings package here ) - "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" -) // TestOperator verifies the functionality of the component's chunking feature. func TestOperator(t *testing.T) { From 716bbb63ebc838d6e31f1025cfbab8a90fe9ec5e Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:26:25 +0530 Subject: [PATCH 66/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 7e00f36d1..a4f617eab 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -4,7 +4,9 @@ import ( "context" "testing" "github.com/frankban/quicktest" - "strings" // Import the strings package here + "strings" // Ensure this import is here + "github.com/instill-ai/pipeline-backend/pkg/component/base" + "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) // TestOperator verifies the functionality of the component's chunking feature. @@ -148,11 +150,6 @@ func TestCleanData(t *testing.T) { } } -// Helper function to normalize line endings across different environments -func normalizeLineEndings(input string) string { - return strings.ReplaceAll(input, "\r\n", "\n") -} - // Sample test in main_test.go with normalizeLineEndings func TestValidateChunkPositionsInMain(t *testing.T) { c := quicktest.New(t) @@ -177,3 +174,4 @@ func TestValidateChunkPositionsInMain(t *testing.T) { // Validate using validateChunkPositions or another test function validateChunkPositions(c, chunks, expectedChunks) } + From 54d72523cdae3a9e2fd132646f8e24eab424c6a9 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:27:17 +0530 Subject: [PATCH 67/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index a4f617eab..896879b74 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -4,7 +4,6 @@ import ( "context" "testing" "github.com/frankban/quicktest" - "strings" // Ensure this import is here "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) From 74338ac2d59cb5a5426886217ff5bf069d16ded1 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:30:14 +0530 Subject: [PATCH 68/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 896879b74..c41cdda52 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -4,6 +4,7 @@ import ( "context" "testing" "github.com/frankban/quicktest" + "strings" // Ensure this import is here "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) @@ -149,6 +150,11 @@ func TestCleanData(t *testing.T) { } } +// Helper function to normalize line endings across different environments +func normalizeLineEndings(input string) string { + return strings.ReplaceAll(input, "\r\n", "\n") +} + // Sample test in main_test.go with normalizeLineEndings func TestValidateChunkPositionsInMain(t *testing.T) { c := quicktest.New(t) @@ -173,4 +179,3 @@ func TestValidateChunkPositionsInMain(t *testing.T) { // Validate using validateChunkPositions or another test function validateChunkPositions(c, chunks, expectedChunks) } - From efbb226e32024af26b274c73093de6ab7f989f7b Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:33:26 +0530 Subject: [PATCH 69/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 31 --------------------- 1 file changed, 31 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index c41cdda52..bf8f78b15 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -4,7 +4,6 @@ import ( "context" "testing" "github.com/frankban/quicktest" - "strings" // Ensure this import is here "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) @@ -149,33 +148,3 @@ func TestCleanData(t *testing.T) { }) } } - -// Helper function to normalize line endings across different environments -func normalizeLineEndings(input string) string { - return strings.ReplaceAll(input, "\r\n", "\n") -} - -// Sample test in main_test.go with normalizeLineEndings -func TestValidateChunkPositionsInMain(t *testing.T) { - c := quicktest.New(t) - - // Sample data - replace with actual chunk data - chunks := []TextChunk{ - {StartPosition: 0, EndPosition: 10, TokenCount: 5}, - {StartPosition: 11, EndPosition: 20, TokenCount: 7}, - } - - expectedChunks := []TextChunk{ - {StartPosition: 0, EndPosition: 10, TokenCount: 5}, - {StartPosition: 11, EndPosition: 20, TokenCount: 7}, - } - - // Perform line-ending normalization on test data if necessary - for i := range chunks { - chunks[i].Text = normalizeLineEndings(chunks[i].Text) - expectedChunks[i].Text = normalizeLineEndings(expectedChunks[i].Text) - } - - // Validate using validateChunkPositions or another test function - validateChunkPositions(c, chunks, expectedChunks) -} From 5b97d546f8ee54ee2ceca0e0fb813a4e3b66d5c7 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:45:39 +0530 Subject: [PATCH 70/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index bf8f78b15..2295c49b5 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -6,6 +6,7 @@ import ( "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" + "strings" // Import strings here for the function ) // TestOperator verifies the functionality of the component's chunking feature. @@ -148,3 +149,8 @@ func TestCleanData(t *testing.T) { }) } } + +// Rename the function to avoid conflict with chunk_text_test.go +func normalizeLineEndingsInMainTest(input string) string { + return strings.ReplaceAll(input, "\r\n", "\n") +} From aca408f1d5c6a74d658f68dd5c28ea50fa4dfbd3 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:54:16 +0530 Subject: [PATCH 71/73] Update chunk_text_test.go --- .../operator/text/v0/chunk_text_test.go | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/pkg/component/operator/text/v0/chunk_text_test.go b/pkg/component/operator/text/v0/chunk_text_test.go index 03bde186e..f4c1a6e9f 100644 --- a/pkg/component/operator/text/v0/chunk_text_test.go +++ b/pkg/component/operator/text/v0/chunk_text_test.go @@ -246,38 +246,6 @@ func Test_ChunkPositions(t *testing.T) { } } -// Helper function to adjust start and end positions if they deviate from expected values -func adjustPosition(expected, actual int) int { - if expected != actual { - return expected // Optionally, adjust tolerance here if slight variations are acceptable - } - return actual -} -// Helper function to check if token count meets expectations with tolerance for minor deviations -func checkTokenCount(c *quicktest.C, got, want int) { - c.Assert(got, quicktest.Not(quicktest.Equals), 0) // Ensure token count is non-zero - c.Assert(got, quicktest.Equals, want, quicktest.Commentf("Token count does not match expected value")) -} - -// Helper function to normalize line endings across different environments -func normalizeLineEndings(input string) string { - return strings.ReplaceAll(input, "\r\n", "\n") -} -// Additional validation function to check positions and token counts in chunks -func validateChunkPositions(c *quicktest.C, chunks []TextChunk, expectedChunks []TextChunk) { - for i, chunk := range chunks { - // Adjust positions for minor discrepancies - startPos := adjustPosition(expectedChunks[i].StartPosition, chunk.StartPosition) - endPos := adjustPosition(expectedChunks[i].EndPosition, chunk.EndPosition) - - // Validate positions - c.Assert(startPos, quicktest.Equals, chunk.StartPosition) - c.Assert(endPos, quicktest.Equals, chunk.EndPosition) - - // Validate token count - checkTokenCount(c, chunk.TokenCount, expectedChunks[i].TokenCount) - } -} From bcc2d90a44bfb6cb8d5919370258c0c3a7ae86c6 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 07:00:42 +0530 Subject: [PATCH 72/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index 2295c49b5..debabdbe1 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -3,10 +3,11 @@ package text import ( "context" "testing" + "strings" // Importing strings for normalizeLineEndings function + "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" - "strings" // Import strings here for the function ) // TestOperator verifies the functionality of the component's chunking feature. @@ -149,8 +150,3 @@ func TestCleanData(t *testing.T) { }) } } - -// Rename the function to avoid conflict with chunk_text_test.go -func normalizeLineEndingsInMainTest(input string) string { - return strings.ReplaceAll(input, "\r\n", "\n") -} From 0fae83f486a2ca682d94296dfd87f6697964e003 Mon Sep 17 00:00:00 2001 From: Naila Rais <157850227+NailaRais@users.noreply.github.com> Date: Wed, 6 Nov 2024 07:01:32 +0530 Subject: [PATCH 73/73] Update main_test.go --- pkg/component/operator/text/v0/main_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/component/operator/text/v0/main_test.go b/pkg/component/operator/text/v0/main_test.go index debabdbe1..994830d7f 100644 --- a/pkg/component/operator/text/v0/main_test.go +++ b/pkg/component/operator/text/v0/main_test.go @@ -3,7 +3,6 @@ package text import ( "context" "testing" - "strings" // Importing strings for normalizeLineEndings function "github.com/frankban/quicktest" "github.com/instill-ai/pipeline-backend/pkg/component/base"