From be665d27bd67219d7c72a167b9330caa72ce45de Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 16 May 2024 13:43:20 -0400 Subject: [PATCH 01/17] Implemented reclassification for bugs originally classified as `Fenix::General` using the `fenixcomponent` model. --- bugbot/rules/component.py | 55 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index da61e3b2f..353cc0bc2 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -91,6 +91,9 @@ def get_bugs(self, date="today", bug_ids=[]): results = {} + # List to store IDs of bugs classified as Fenix::General + fenix_general_bug_ids = [] + for bug_id in sorted(bugs.keys()): bug_data = bugs[bug_id] @@ -172,6 +175,58 @@ def get_bugs(self, date="today", bug_ids=[]): if self.frequency == "hourly": results[bug_id] = result + # Collect bugs that were classified as Fenix::General + if bug["product"] == "Fenix" and bug["component"] == "General": + fenix_general_bug_ids.append(bug_id) + + # Reclassify Fenix::General bugs using the Fenix-specific model + if fenix_general_bug_ids: + # TODO: Use the correct name for the Fenix-specific model + fenix_bugs = get_bug_ids_classification( + "fenixcomponent", fenix_general_bug_ids + ) + for fenix_bug_id in sorted(fenix_bugs.keys()): + fenix_bug_data = fenix_bugs[fenix_bug_id] + + bug = raw_bugs[fenix_bug_id] + prob = fenix_bug_data["prob"] + index = fenix_bug_data["index"] + suggestion = fenix_bug_data["class"] + + i = suggestion.index("::") + suggested_product = suggestion[:i] + suggested_component = suggestion[i + 2 :] + + result = { + "id": fenix_bug_id, + "summary": bug["summary"], + "component": suggestion, + "confidence": nice_round(prob[index]), + "autofixed": False, + } + + # In daily mode, we send an email with all results. + if self.frequency == "daily": + results[fenix_bug_id] = result + + confidence_threshold_conf = ( + "confidence_threshold" + if bug["component"] != "General" + else "general_confidence_threshold" + ) + + if prob[index] >= self.get_config(confidence_threshold_conf): + self.autofix_component[fenix_bug_id] = { + "product": suggested_product, + "component": suggested_component, + } + + result["autofixed"] = True + + # In hourly mode, we send an email with only the bugs we acted upon. + if self.frequency == "hourly": + results[fenix_bug_id] = result + # Don't move bugs back into components they were moved out of. # TODO: Use the component suggestion from the service with the second highest confidence instead. def history_handler(bug): From 38980fc1b603e1c4375b85af04ce2df92230c3ee Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 17 May 2024 13:04:14 -0400 Subject: [PATCH 02/17] Removed duplicate code. --- bugbot/rules/component.py | 205 ++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 119 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 353cc0bc2..bdc9af1ed 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -77,128 +77,62 @@ def get_bz_params(self, date): } def get_bugs(self, date="today", bug_ids=[]): - # Retrieve the bugs with the fields defined in get_bz_params - raw_bugs = super().get_bugs(date=date, bug_ids=bug_ids, chunk_size=7000) - - if len(raw_bugs) == 0: - return {} - - # Extract the bug ids - bug_ids = list(raw_bugs.keys()) - - # Classify those bugs - bugs = get_bug_ids_classification("component", bug_ids) - - results = {} - - # List to store IDs of bugs classified as Fenix::General - fenix_general_bug_ids = [] - - for bug_id in sorted(bugs.keys()): - bug_data = bugs[bug_id] - - if not bug_data.get("available", True): - # The bug was not available, it was either removed or is a - # security bug - continue - - if not {"prob", "index", "class", "extra_data"}.issubset(bug_data.keys()): - raise Exception(f"Invalid bug response {bug_id}: {bug_data!r}") - - bug = raw_bugs[bug_id] - prob = bug_data["prob"] - index = bug_data["index"] - suggestion = bug_data["class"] - conflated_components_mapping = bug_data["extra_data"][ - "conflated_components_mapping" - ] - - # Skip product-only suggestions that are not useful. - if "::" not in suggestion and bug["product"] == suggestion: - continue - - suggestion = conflated_components_mapping.get(suggestion, suggestion) - - if "::" not in suggestion: - logger.error( - f"There is something wrong with this component suggestion! {suggestion}" - ) - continue - - i = suggestion.index("::") - suggested_product = suggestion[:i] - suggested_component = suggestion[i + 2 :] - - # When moving bugs out of the 'General' component, we don't want to change the product (unless it is Firefox). - if bug["component"] == "General" and bug["product"] not in { - suggested_product, - "Firefox", - }: - continue - - # Don't move bugs from Firefox::General to Core::Internationalization. - if ( - bug["product"] == "Firefox" - and bug["component"] == "General" - and suggested_product == "Core" - and suggested_component == "Internationalization" - ): - continue - - result = { - "id": bug_id, - "summary": bug["summary"], - "component": suggestion, - "confidence": nice_round(prob[index]), - "autofixed": False, - } - - # In daily mode, we send an email with all results. - if self.frequency == "daily": - results[bug_id] = result - - confidence_threshold_conf = ( - "confidence_threshold" - if bug["component"] != "General" - else "general_confidence_threshold" - ) - - if prob[index] >= self.get_config(confidence_threshold_conf): - self.autofix_component[bug_id] = { - "product": suggested_product, - "component": suggested_component, - } - - result["autofixed"] = True - - # In hourly mode, we send an email with only the bugs we acted upon. - if self.frequency == "hourly": - results[bug_id] = result - - # Collect bugs that were classified as Fenix::General - if bug["product"] == "Fenix" and bug["component"] == "General": - fenix_general_bug_ids.append(bug_id) - - # Reclassify Fenix::General bugs using the Fenix-specific model - if fenix_general_bug_ids: - # TODO: Use the correct name for the Fenix-specific model - fenix_bugs = get_bug_ids_classification( - "fenixcomponent", fenix_general_bug_ids - ) - for fenix_bug_id in sorted(fenix_bugs.keys()): - fenix_bug_data = fenix_bugs[fenix_bug_id] - - bug = raw_bugs[fenix_bug_id] - prob = fenix_bug_data["prob"] - index = fenix_bug_data["index"] - suggestion = fenix_bug_data["class"] + def process_bugs(bugs, raw_bugs, results, reclassify_fenix): + for bug_id in sorted(bugs.keys()): + bug_data = bugs[bug_id] + + if not bug_data.get("available", True): + # The bug was not available, it was either removed or is a + # security bug + continue + + if not {"prob", "index", "class", "extra_data"}.issubset( + bug_data.keys() + ): + raise Exception(f"Invalid bug response {bug_id}: {bug_data!r}") + + bug = raw_bugs[bug_id] + prob = bug_data["prob"] + index = bug_data["index"] + suggestion = bug_data["class"] + conflated_components_mapping = bug_data["extra_data"][ + "conflated_components_mapping" + ] + + # Skip product-only suggestions that are not useful. + if "::" not in suggestion and bug["product"] == suggestion: + continue + + suggestion = conflated_components_mapping.get(suggestion, suggestion) + + if "::" not in suggestion: + logger.error( + f"There is something wrong with this component suggestion! {suggestion}" + ) + continue i = suggestion.index("::") suggested_product = suggestion[:i] suggested_component = suggestion[i + 2 :] + # When moving bugs out of the 'General' component, we don't want to change the product (unless it is Firefox). + if bug["component"] == "General" and bug["product"] not in { + suggested_product, + "Firefox", + }: + continue + + # Don't move bugs from Firefox::General to Core::Internationalization. + if ( + bug["product"] == "Firefox" + and bug["component"] == "General" + and suggested_product == "Core" + and suggested_component == "Internationalization" + ): + continue + result = { - "id": fenix_bug_id, + "id": bug_id, "summary": bug["summary"], "component": suggestion, "confidence": nice_round(prob[index]), @@ -207,7 +141,7 @@ def get_bugs(self, date="today", bug_ids=[]): # In daily mode, we send an email with all results. if self.frequency == "daily": - results[fenix_bug_id] = result + results[bug_id] = result confidence_threshold_conf = ( "confidence_threshold" @@ -216,7 +150,7 @@ def get_bugs(self, date="today", bug_ids=[]): ) if prob[index] >= self.get_config(confidence_threshold_conf): - self.autofix_component[fenix_bug_id] = { + self.autofix_component[bug_id] = { "product": suggested_product, "component": suggested_component, } @@ -225,7 +159,40 @@ def get_bugs(self, date="today", bug_ids=[]): # In hourly mode, we send an email with only the bugs we acted upon. if self.frequency == "hourly": - results[fenix_bug_id] = result + results[bug_id] = result + + # Collect bugs that were classified as Fenix::General + if bug["product"] == "Fenix" and bug["component"] == "General": + fenix_general_bug_ids.append(bug_id) + + return fenix_general_bug_ids + + # Retrieve the bugs with the fields defined in get_bz_params + raw_bugs = super().get_bugs(date=date, bug_ids=bug_ids, chunk_size=7000) + + if len(raw_bugs) == 0: + return {} + + # Extract the bug ids + bug_ids = list(raw_bugs.keys()) + + # Classify those bugs + bugs = get_bug_ids_classification("component", bug_ids) + + results = {} + + # List to store IDs of bugs classified as Fenix::General + fenix_general_bug_ids = [] + + fenix_general_bug_ids = process_bugs( + bugs, raw_bugs, results, fenix_general_bug_ids + ) + + if fenix_general_bug_ids: + fenix_bugs = get_bug_ids_classification( + "fenixcomponent", fenix_general_bug_ids + ) + process_bugs(fenix_bugs, raw_bugs, results) # Don't move bugs back into components they were moved out of. # TODO: Use the component suggestion from the service with the second highest confidence instead. From aa18d50c9a5635fc36a2ee15a0ab4d7b38adb945 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 21 May 2024 11:46:20 -0400 Subject: [PATCH 03/17] Added model logic before the main loop to reduce code duplicates and large diffs. --- bugbot/rules/component.py | 192 ++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 99 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index bdc9af1ed..32fc69146 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -77,96 +77,6 @@ def get_bz_params(self, date): } def get_bugs(self, date="today", bug_ids=[]): - def process_bugs(bugs, raw_bugs, results, reclassify_fenix): - for bug_id in sorted(bugs.keys()): - bug_data = bugs[bug_id] - - if not bug_data.get("available", True): - # The bug was not available, it was either removed or is a - # security bug - continue - - if not {"prob", "index", "class", "extra_data"}.issubset( - bug_data.keys() - ): - raise Exception(f"Invalid bug response {bug_id}: {bug_data!r}") - - bug = raw_bugs[bug_id] - prob = bug_data["prob"] - index = bug_data["index"] - suggestion = bug_data["class"] - conflated_components_mapping = bug_data["extra_data"][ - "conflated_components_mapping" - ] - - # Skip product-only suggestions that are not useful. - if "::" not in suggestion and bug["product"] == suggestion: - continue - - suggestion = conflated_components_mapping.get(suggestion, suggestion) - - if "::" not in suggestion: - logger.error( - f"There is something wrong with this component suggestion! {suggestion}" - ) - continue - - i = suggestion.index("::") - suggested_product = suggestion[:i] - suggested_component = suggestion[i + 2 :] - - # When moving bugs out of the 'General' component, we don't want to change the product (unless it is Firefox). - if bug["component"] == "General" and bug["product"] not in { - suggested_product, - "Firefox", - }: - continue - - # Don't move bugs from Firefox::General to Core::Internationalization. - if ( - bug["product"] == "Firefox" - and bug["component"] == "General" - and suggested_product == "Core" - and suggested_component == "Internationalization" - ): - continue - - result = { - "id": bug_id, - "summary": bug["summary"], - "component": suggestion, - "confidence": nice_round(prob[index]), - "autofixed": False, - } - - # In daily mode, we send an email with all results. - if self.frequency == "daily": - results[bug_id] = result - - confidence_threshold_conf = ( - "confidence_threshold" - if bug["component"] != "General" - else "general_confidence_threshold" - ) - - if prob[index] >= self.get_config(confidence_threshold_conf): - self.autofix_component[bug_id] = { - "product": suggested_product, - "component": suggested_component, - } - - result["autofixed"] = True - - # In hourly mode, we send an email with only the bugs we acted upon. - if self.frequency == "hourly": - results[bug_id] = result - - # Collect bugs that were classified as Fenix::General - if bug["product"] == "Fenix" and bug["component"] == "General": - fenix_general_bug_ids.append(bug_id) - - return fenix_general_bug_ids - # Retrieve the bugs with the fields defined in get_bz_params raw_bugs = super().get_bugs(date=date, bug_ids=bug_ids, chunk_size=7000) @@ -179,20 +89,104 @@ def process_bugs(bugs, raw_bugs, results, reclassify_fenix): # Classify those bugs bugs = get_bug_ids_classification("component", bug_ids) + # Collect bugs classified as Fenix:General + fenix_general_bug_ids = [ + bug_id + for bug_id, bug_data in bugs.items() + if bugs[bug_id]["product"] == "Fenix" + and bugs[bug_id]["component"] == "General" + ] + + # Reclassify the Fenix:General bugs using the fenixcomponent model + if fenix_general_bug_ids: + fenix_general_classification = get_bug_ids_classification( + "fenixcomponent", fenix_general_bug_ids + ) + for bug_id in fenix_general_classification: + bugs[bug_id] = fenix_general_classification[bug_id] + results = {} - # List to store IDs of bugs classified as Fenix::General - fenix_general_bug_ids = [] + for bug_id in sorted(bugs.keys()): + bug_data = bugs[bug_id] - fenix_general_bug_ids = process_bugs( - bugs, raw_bugs, results, fenix_general_bug_ids - ) + if not bug_data.get("available", True): + # The bug was not available, it was either removed or is a + # security bug + continue - if fenix_general_bug_ids: - fenix_bugs = get_bug_ids_classification( - "fenixcomponent", fenix_general_bug_ids + if not {"prob", "index", "class", "extra_data"}.issubset(bug_data.keys()): + raise Exception(f"Invalid bug response {bug_id}: {bug_data!r}") + + bug = raw_bugs[bug_id] + prob = bug_data["prob"] + index = bug_data["index"] + suggestion = bug_data["class"] + conflated_components_mapping = bug_data["extra_data"][ + "conflated_components_mapping" + ] + + # Skip product-only suggestions that are not useful. + if "::" not in suggestion and bug["product"] == suggestion: + continue + + suggestion = conflated_components_mapping.get(suggestion, suggestion) + + if "::" not in suggestion: + logger.error( + f"There is something wrong with this component suggestion! {suggestion}" + ) + continue + + i = suggestion.index("::") + suggested_product = suggestion[:i] + suggested_component = suggestion[i + 2 :] + + # When moving bugs out of the 'General' component, we don't want to change the product (unless it is Firefox). + if bug["component"] == "General" and bug["product"] not in { + suggested_product, + "Firefox", + }: + continue + + # Don't move bugs from Firefox::General to Core::Internationalization. + if ( + bug["product"] == "Firefox" + and bug["component"] == "General" + and suggested_product == "Core" + and suggested_component == "Internationalization" + ): + continue + + result = { + "id": bug_id, + "summary": bug["summary"], + "component": suggestion, + "confidence": nice_round(prob[index]), + "autofixed": False, + } + + # In daily mode, we send an email with all results. + if self.frequency == "daily": + results[bug_id] = result + + confidence_threshold_conf = ( + "confidence_threshold" + if bug["component"] != "General" + else "general_confidence_threshold" ) - process_bugs(fenix_bugs, raw_bugs, results) + + if prob[index] >= self.get_config(confidence_threshold_conf): + self.autofix_component[bug_id] = { + "product": suggested_product, + "component": suggested_component, + } + + result["autofixed"] = True + + # In hourly mode, we send an email with only the bugs we acted upon. + if self.frequency == "hourly": + results[bug_id] = result # Don't move bugs back into components they were moved out of. # TODO: Use the component suggestion from the service with the second highest confidence instead. From bf4ec274761ec40b1ef2f0abcd80189a3cbf8ee8 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 21 May 2024 11:59:30 -0400 Subject: [PATCH 04/17] Fixed key error --- bugbot/rules/component.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 32fc69146..115c25c2b 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -93,8 +93,7 @@ def get_bugs(self, date="today", bug_ids=[]): fenix_general_bug_ids = [ bug_id for bug_id, bug_data in bugs.items() - if bugs[bug_id]["product"] == "Fenix" - and bugs[bug_id]["component"] == "General" + if "class" in bug_data and bug_data["class"] == "Fenix::General" ] # Reclassify the Fenix:General bugs using the fenixcomponent model From 19cb23361f4fe468c73241f20941cf31cb8c30d4 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 21 May 2024 12:01:08 -0400 Subject: [PATCH 05/17] Comment changes --- bugbot/rules/component.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 115c25c2b..6144844a8 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -89,14 +89,14 @@ def get_bugs(self, date="today", bug_ids=[]): # Classify those bugs bugs = get_bug_ids_classification("component", bug_ids) - # Collect bugs classified as Fenix:General + # Collect bugs classified as Fenix::General fenix_general_bug_ids = [ bug_id for bug_id, bug_data in bugs.items() if "class" in bug_data and bug_data["class"] == "Fenix::General" ] - # Reclassify the Fenix:General bugs using the fenixcomponent model + # Reclassify the Fenix::General bugs using the fenixcomponent model if fenix_general_bug_ids: fenix_general_classification = get_bug_ids_classification( "fenixcomponent", fenix_general_bug_ids From 246a34614bbe11b6aa3783164a10ce6336f20d09 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 21 May 2024 13:31:42 -0400 Subject: [PATCH 06/17] Used `get()` and `items()` --- bugbot/rules/component.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 6144844a8..438393f46 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -93,7 +93,7 @@ def get_bugs(self, date="today", bug_ids=[]): fenix_general_bug_ids = [ bug_id for bug_id, bug_data in bugs.items() - if "class" in bug_data and bug_data["class"] == "Fenix::General" + if bug_data.get("class") == "Fenix::General" ] # Reclassify the Fenix::General bugs using the fenixcomponent model @@ -101,8 +101,8 @@ def get_bugs(self, date="today", bug_ids=[]): fenix_general_classification = get_bug_ids_classification( "fenixcomponent", fenix_general_bug_ids ) - for bug_id in fenix_general_classification: - bugs[bug_id] = fenix_general_classification[bug_id] + for bug_id, data in fenix_general_classification.items(): + bugs[bug_id] = data results = {} From ae5f471484317064592b92a655a8b6c59841fb41 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 21 May 2024 16:01:16 -0400 Subject: [PATCH 07/17] Keep old classification if the new classification does not meet threshold but old classification does. --- bugbot/rules/component.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 438393f46..571d1c0b4 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -101,8 +101,23 @@ def get_bugs(self, date="today", bug_ids=[]): fenix_general_classification = get_bug_ids_classification( "fenixcomponent", fenix_general_bug_ids ) + + confidence_threshold = self.get_config("confidence_threshold") + general_confidence_threshold = self.get_config( + "general_confidence_threshold" + ) + for bug_id, data in fenix_general_classification.items(): - bugs[bug_id] = data + original_data = bugs[bug_id] + original_confidence = original_data["prob"][original_data["index"]] + new_confidence = data["prob"][data["index"]] + + # If the original confidence for Fenix::General met the threshold and the new classification does not, keep the old classification. + if not ( + new_confidence < confidence_threshold + and original_confidence > general_confidence_threshold + ): + bugs[bug_id] = data results = {} From 1af5b0ab6b9c86c82c39142aa33404447e673e9e Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 30 May 2024 16:11:44 -0400 Subject: [PATCH 08/17] Changed `Fenix::General` to `Fenix` --- bugbot/rules/component.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 571d1c0b4..ea3a51fc7 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -93,7 +93,7 @@ def get_bugs(self, date="today", bug_ids=[]): fenix_general_bug_ids = [ bug_id for bug_id, bug_data in bugs.items() - if bug_data.get("class") == "Fenix::General" + if bug_data.get("class") == "Fenix" ] # Reclassify the Fenix::General bugs using the fenixcomponent model From db0adb583d5e336d7dd41cf945d37ab1c606dea6 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 4 Jun 2024 12:28:20 -0400 Subject: [PATCH 09/17] Introduced Fenix-specific component confidence threshold --- bugbot/rules/component.py | 14 ++++---------- configs/rules.json | 1 + 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index ea3a51fc7..c8616f070 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -102,21 +102,15 @@ def get_bugs(self, date="today", bug_ids=[]): "fenixcomponent", fenix_general_bug_ids ) - confidence_threshold = self.get_config("confidence_threshold") - general_confidence_threshold = self.get_config( - "general_confidence_threshold" + fenix_confidence_threshold = self.get_config( + name="component", entry="fenix_confidence_threshold" ) for bug_id, data in fenix_general_classification.items(): - original_data = bugs[bug_id] - original_confidence = original_data["prob"][original_data["index"]] new_confidence = data["prob"][data["index"]] - # If the original confidence for Fenix::General met the threshold and the new classification does not, keep the old classification. - if not ( - new_confidence < confidence_threshold - and original_confidence > general_confidence_threshold - ): + # Only reclassify if the new confidence meets the Fenix component confidence threshold + if new_confidence > fenix_confidence_threshold: bugs[bug_id] = data results = {} diff --git a/configs/rules.json b/configs/rules.json index 3fca4a8c1..c6ecad9af 100644 --- a/configs/rules.json +++ b/configs/rules.json @@ -349,6 +349,7 @@ }, "component": { "confidence_threshold": 0.35, + "fenix_confidence_threshold": 0.6, "general_confidence_threshold": 0.8, "days_lookup": 365, "max_days_in_cache": 7, From 137c4a198c81e7426ae0f057199eba5f12f296e9 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 6 Jun 2024 14:45:04 -0400 Subject: [PATCH 10/17] Ensured smooth dry-run with Fenix model --- bugbot/rules/component.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index c8616f070..f14e1938e 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -102,9 +102,7 @@ def get_bugs(self, date="today", bug_ids=[]): "fenixcomponent", fenix_general_bug_ids ) - fenix_confidence_threshold = self.get_config( - name="component", entry="fenix_confidence_threshold" - ) + fenix_confidence_threshold = self.get_config("fenix_confidence_threshold") for bug_id, data in fenix_general_classification.items(): new_confidence = data["prob"][data["index"]] @@ -130,15 +128,17 @@ def get_bugs(self, date="today", bug_ids=[]): prob = bug_data["prob"] index = bug_data["index"] suggestion = bug_data["class"] - conflated_components_mapping = bug_data["extra_data"][ - "conflated_components_mapping" - ] + + conflated_components_mapping = bug_data["extra_data"].get( + "conflated_components_mapping", {} + ) # Skip product-only suggestions that are not useful. if "::" not in suggestion and bug["product"] == suggestion: continue - suggestion = conflated_components_mapping.get(suggestion, suggestion) + if "Fenix" not in suggestion: + suggestion = conflated_components_mapping.get(suggestion, suggestion) if "::" not in suggestion: logger.error( From 07ed27946303a28b4f013c59157af8ea4551f282 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 7 Jun 2024 15:45:45 -0400 Subject: [PATCH 11/17] Included product for reclassified Fenix bugs --- bugbot/rules/component.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index f14e1938e..2bfe0475e 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -109,6 +109,7 @@ def get_bugs(self, date="today", bug_ids=[]): # Only reclassify if the new confidence meets the Fenix component confidence threshold if new_confidence > fenix_confidence_threshold: + data["class"] = f"Fenix::{data['class']}" bugs[bug_id] = data results = {} From 26cefc8225c5269bdb1e0362c52c78606e029606 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 12 Jun 2024 16:34:10 -0400 Subject: [PATCH 12/17] Added bugs that were original `Fenix::General` that were reclassified with low confidence and removed bugs that were reclassified with low confidence --- bugbot/rules/component.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 2bfe0475e..a9437b023 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -2,6 +2,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. + from libmozdata.bugzilla import Bugzilla from bugbot import logger @@ -94,8 +95,34 @@ def get_bugs(self, date="today", bug_ids=[]): bug_id for bug_id, bug_data in bugs.items() if bug_data.get("class") == "Fenix" + and bug_data["prob"][bug_data["index"]] + >= self.get_config("general_confidence_threshold") + ] + + def get_confidence_threshold( + bug_data, general_confidence_threshold, confidence_threshold + ): + if bug_data["class"] == "General": + return general_confidence_threshold + return confidence_threshold + + # Collection bugs that were originally Fenix::General but reclassified to another product with low confidence + originally_fenix_general_bug_ids = [ + bug_id + for bug_id, bug_data in bugs.items() + if raw_bugs[bug_id]["product"] == "Fenix" + and raw_bugs[bug_id]["component"] == "General" + and bug_data["prob"][bug_data["index"]] + <= get_confidence_threshold( + bug_data, + self.get_config("general_confidence_threshold"), + self.get_config("confidence_threshold"), + ) ] + fenix_general_bug_ids.extend(originally_fenix_general_bug_ids) + fenix_general_bug_ids = set(fenix_general_bug_ids) + # Reclassify the Fenix::General bugs using the fenixcomponent model if fenix_general_bug_ids: fenix_general_classification = get_bug_ids_classification( From bdc76b2b68f66417d99cbb9d2368b5dfee34cf4d Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 4 Jul 2024 11:57:58 -0400 Subject: [PATCH 13/17] Removed whitespace --- bugbot/rules/component.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index a9437b023..40f4063d4 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -2,7 +2,6 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. - from libmozdata.bugzilla import Bugzilla from bugbot import logger From bc68d06132884591efb2653977567238bd0ea681 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 4 Jul 2024 17:50:26 -0400 Subject: [PATCH 14/17] Added new function to check threshold --- bugbot/rules/component.py | 44 ++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 40f4063d4..9a9aaf5d0 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -15,6 +15,11 @@ def __init__(self): super().__init__() self.autofix_component = {} self.frequency = "daily" + self.general_confidence_threshold = self.get_config( + "general_confidence_threshold" + ) + self.component_confidence_threshold = self.get_config("confidence_threshold") + self.fenix_confidence_threshold = self.get_config("fenix_confidence_threshold") def add_custom_arguments(self, parser): parser.add_argument( @@ -76,6 +81,9 @@ def get_bz_params(self, date): "f9": "CP", } + def meets_threshold(self, bug_data, threshold): + return bug_data["prob"][bug_data["index"]] >= threshold + def get_bugs(self, date="today", bug_ids=[]): # Retrieve the bugs with the fields defined in get_bz_params raw_bugs = super().get_bugs(date=date, bug_ids=bug_ids, chunk_size=7000) @@ -90,37 +98,28 @@ def get_bugs(self, date="today", bug_ids=[]): bugs = get_bug_ids_classification("component", bug_ids) # Collect bugs classified as Fenix::General - fenix_general_bug_ids = [ + fenix_general_bug_ids = { bug_id for bug_id, bug_data in bugs.items() if bug_data.get("class") == "Fenix" - and bug_data["prob"][bug_data["index"]] - >= self.get_config("general_confidence_threshold") - ] - - def get_confidence_threshold( - bug_data, general_confidence_threshold, confidence_threshold - ): - if bug_data["class"] == "General": - return general_confidence_threshold - return confidence_threshold + and self.meets_threshold(bug_data, self.general_confidence_threshold) + } # Collection bugs that were originally Fenix::General but reclassified to another product with low confidence - originally_fenix_general_bug_ids = [ + originally_fenix_general_bug_ids = { bug_id for bug_id, bug_data in bugs.items() if raw_bugs[bug_id]["product"] == "Fenix" and raw_bugs[bug_id]["component"] == "General" - and bug_data["prob"][bug_data["index"]] - <= get_confidence_threshold( + and not self.meets_threshold( bug_data, - self.get_config("general_confidence_threshold"), - self.get_config("confidence_threshold"), + self.general_confidence_threshold + if bug_data["class"] == "General" + else self.component_confidence_threshold, ) - ] + } - fenix_general_bug_ids.extend(originally_fenix_general_bug_ids) - fenix_general_bug_ids = set(fenix_general_bug_ids) + fenix_general_bug_ids.update(originally_fenix_general_bug_ids) # Reclassify the Fenix::General bugs using the fenixcomponent model if fenix_general_bug_ids: @@ -128,13 +127,11 @@ def get_confidence_threshold( "fenixcomponent", fenix_general_bug_ids ) - fenix_confidence_threshold = self.get_config("fenix_confidence_threshold") - for bug_id, data in fenix_general_classification.items(): new_confidence = data["prob"][data["index"]] # Only reclassify if the new confidence meets the Fenix component confidence threshold - if new_confidence > fenix_confidence_threshold: + if new_confidence > self.fenix_confidence_threshold: data["class"] = f"Fenix::{data['class']}" bugs[bug_id] = data @@ -164,8 +161,7 @@ def get_confidence_threshold( if "::" not in suggestion and bug["product"] == suggestion: continue - if "Fenix" not in suggestion: - suggestion = conflated_components_mapping.get(suggestion, suggestion) + suggestion = conflated_components_mapping.get(suggestion, suggestion) if "::" not in suggestion: logger.error( From ff5b63363ce48bf29f2ba730efd91ff7de42a0d5 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 8 Jul 2024 09:28:14 -0400 Subject: [PATCH 15/17] Select threshold in `meets_threshold()` --- bugbot/rules/component.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 9a9aaf5d0..921bec5f6 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -81,10 +81,17 @@ def get_bz_params(self, date): "f9": "CP", } - def meets_threshold(self, bug_data, threshold): - return bug_data["prob"][bug_data["index"]] >= threshold - def get_bugs(self, date="today", bug_ids=[]): + def select_threshold(bug_data): + if bug_data["class"] == "Fenix" or bug_data["class"] == "General": + return self.general_confidence_threshold + else: + return self.component_confidence_threshold + + def meets_threshold(bug_data): + threshold = select_threshold(bug_data) + return bug_data["prob"][bug_data["index"]] >= threshold + # Retrieve the bugs with the fields defined in get_bz_params raw_bugs = super().get_bugs(date=date, bug_ids=bug_ids, chunk_size=7000) @@ -101,8 +108,7 @@ def get_bugs(self, date="today", bug_ids=[]): fenix_general_bug_ids = { bug_id for bug_id, bug_data in bugs.items() - if bug_data.get("class") == "Fenix" - and self.meets_threshold(bug_data, self.general_confidence_threshold) + if bug_data.get("class") == "Fenix" and meets_threshold(bug_data) } # Collection bugs that were originally Fenix::General but reclassified to another product with low confidence @@ -111,12 +117,7 @@ def get_bugs(self, date="today", bug_ids=[]): for bug_id, bug_data in bugs.items() if raw_bugs[bug_id]["product"] == "Fenix" and raw_bugs[bug_id]["component"] == "General" - and not self.meets_threshold( - bug_data, - self.general_confidence_threshold - if bug_data["class"] == "General" - else self.component_confidence_threshold, - ) + and not meets_threshold(bug_data) } fenix_general_bug_ids.update(originally_fenix_general_bug_ids) From 805066fecf101b890c0aa47115e8b339e8ccac1e Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 8 Jul 2024 12:13:38 -0400 Subject: [PATCH 16/17] Moved threshold selection to `meets_threshold()` --- bugbot/rules/component.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 921bec5f6..9cc95d981 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -82,14 +82,11 @@ def get_bz_params(self, date): } def get_bugs(self, date="today", bug_ids=[]): - def select_threshold(bug_data): + def meets_threshold(bug_data): if bug_data["class"] == "Fenix" or bug_data["class"] == "General": - return self.general_confidence_threshold + threshold = self.general_confidence_threshold else: - return self.component_confidence_threshold - - def meets_threshold(bug_data): - threshold = select_threshold(bug_data) + threshold = self.component_confidence_threshold return bug_data["prob"][bug_data["index"]] >= threshold # Retrieve the bugs with the fields defined in get_bz_params From be25e479c1ce3c954cf85d323a9dcbf6ffc1f4f2 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 8 Jul 2024 14:09:03 -0400 Subject: [PATCH 17/17] Removed comments --- bugbot/rules/component.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/bugbot/rules/component.py b/bugbot/rules/component.py index 9cc95d981..679c9c4d5 100644 --- a/bugbot/rules/component.py +++ b/bugbot/rules/component.py @@ -83,10 +83,11 @@ def get_bz_params(self, date): def get_bugs(self, date="today", bug_ids=[]): def meets_threshold(bug_data): - if bug_data["class"] == "Fenix" or bug_data["class"] == "General": - threshold = self.general_confidence_threshold - else: - threshold = self.component_confidence_threshold + threshold = ( + self.general_confidence_threshold + if bug_data["class"] == "Fenix" or bug_data["class"] == "General" + else self.component_confidence_threshold + ) return bug_data["prob"][bug_data["index"]] >= threshold # Retrieve the bugs with the fields defined in get_bz_params @@ -101,14 +102,12 @@ def meets_threshold(bug_data): # Classify those bugs bugs = get_bug_ids_classification("component", bug_ids) - # Collect bugs classified as Fenix::General fenix_general_bug_ids = { bug_id for bug_id, bug_data in bugs.items() if bug_data.get("class") == "Fenix" and meets_threshold(bug_data) } - # Collection bugs that were originally Fenix::General but reclassified to another product with low confidence originally_fenix_general_bug_ids = { bug_id for bug_id, bug_data in bugs.items() @@ -119,17 +118,15 @@ def meets_threshold(bug_data): fenix_general_bug_ids.update(originally_fenix_general_bug_ids) - # Reclassify the Fenix::General bugs using the fenixcomponent model if fenix_general_bug_ids: fenix_general_classification = get_bug_ids_classification( "fenixcomponent", fenix_general_bug_ids ) for bug_id, data in fenix_general_classification.items(): - new_confidence = data["prob"][data["index"]] + confidence = data["prob"][data["index"]] - # Only reclassify if the new confidence meets the Fenix component confidence threshold - if new_confidence > self.fenix_confidence_threshold: + if confidence > self.fenix_confidence_threshold: data["class"] = f"Fenix::{data['class']}" bugs[bug_id] = data