From 2baf59e3f22956dd6641b65bd50a6a8380936705 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B5=B5=E8=8B=B1=E8=B6=85?= <randeng.zhao@tuya.com>
Date: Tue, 21 May 2024 17:55:11 +0800
Subject: [PATCH 1/7] feat:add support ddl to be updated by engine and table

---
 src/vanna/base/base.py                    | 14 +++++++++---
 src/vanna/opensearch/opensearch_vector.py | 27 +++++++++++++++++++----
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/vanna/base/base.py b/src/vanna/base/base.py
index c13489b7..b7fe00e2 100644
--- a/src/vanna/base/base.py
+++ b/src/vanna/base/base.py
@@ -395,12 +395,14 @@ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
         pass
 
     @abstractmethod
-    def add_ddl(self, ddl: str, **kwargs) -> str:
+    def add_ddl(self, ddl: str, table: str = None, engine: str = None, **kwargs) -> str:
         """
         This method is used to add a DDL statement to the training data.
 
         Args:
             ddl (str): The DDL statement to add.
+            table (str): The table name.table (str): The table that the DDL statement applies to.
+            engine (str): The database engine that the DDL statement applies to.
 
         Returns:
             str: The ID of the training data that was added.
@@ -1708,6 +1710,8 @@ def train(
         question: str = None,
         sql: str = None,
         ddl: str = None,
+        table: str = None,
+        engine: str = None,
         documentation: str = None,
         plan: TrainingPlan = None,
     ) -> str:
@@ -1728,8 +1732,12 @@ def train(
             question (str): The question to train on.
             sql (str): The SQL query to train on.
             ddl (str):  The DDL statement.
+            table (str): The table name.
+            engine (str): The database engine.
             documentation (str): The documentation to train on.
             plan (TrainingPlan): The training plan to train on.
+        Returns:
+            str: The training pl
         """
 
         if question and not sql:
@@ -1747,12 +1755,12 @@ def train(
 
         if ddl:
             print("Adding ddl:", ddl)
-            return self.add_ddl(ddl)
+            return self.add_ddl(ddl=ddl, table=table, engine=engine)
 
         if plan:
             for item in plan._plan:
                 if item.item_type == TrainingPlanItem.ITEM_TYPE_DDL:
-                    self.add_ddl(item.item_value)
+                    self.add_ddl(ddl=item.item_value)
                 elif item.item_type == TrainingPlanItem.ITEM_TYPE_IS:
                     self.add_documentation(item.item_value)
                 elif item.item_type == TrainingPlanItem.ITEM_TYPE_SQL:
diff --git a/src/vanna/opensearch/opensearch_vector.py b/src/vanna/opensearch/opensearch_vector.py
index 7fab1ecd..0990be4a 100644
--- a/src/vanna/opensearch/opensearch_vector.py
+++ b/src/vanna/opensearch/opensearch_vector.py
@@ -6,7 +6,7 @@
 from opensearchpy import OpenSearch
 
 from ..base import VannaBase
-
+from ..utils import deterministic_uuid
 
 class OpenSearch_VectorStore(VannaBase):
   def __init__(self, config=None):
@@ -56,6 +56,12 @@ def __init__(self, config=None):
       },
       "mappings": {
         "properties": {
+          "engine": {
+            "type": "keyword",
+          },
+          "table": {
+            "type": "keyword",
+          },
           "ddl": {
             "type": "text",
           },
@@ -231,10 +237,24 @@ def create_index_if_not_exists(self, index_name: str,
       print(f"Error creating index: {index_name} ", e)
       return False
 
-  def add_ddl(self, ddl: str, **kwargs) -> str:
+  def calculate_md5(self, string: str) -> str:
+    # 将字符串编码为 bytes
+    string_bytes = self.encode('utf-8')
+    # 计算 MD5 哈希值
+    md5_hash = hashlib.md5(string_bytes)
+    # 获取十六进制表示的哈希值
+    md5_hex = md5_hash.hexdigest()
+    return md5_hex
+
+  def add_ddl(self, ddl: str, table: str = None, engine: str = None, **kwargs) -> str:
     # Assuming that you have a DDL index in your OpenSearch
-    id = str(uuid.uuid4()) + "-ddl"
+    if table is not None and engine is not None:
+      id = deterministic_uuid(engine + "-" + table) + "-ddl"
+    else:
+      id = str(uuid.uuid4()) + "-ddl"
     ddl_dict = {
+      "engine": engine,
+      "table": table,
       "ddl": ddl
     }
     response = self.client.index(index=self.ddl_index, body=ddl_dict, id=id,
@@ -315,7 +335,6 @@ def get_training_data(self, **kwargs) -> pd.DataFrame:
       body={"query": {"match_all": {}}},
       size=1000
     )
-    print(query)
     # records = [hit['_source'] for hit in response['hits']['hits']]
     for hit in response['hits']['hits']:
       data.append(

From 6f483693791433bd0ff1d071dc890e9acd460163 Mon Sep 17 00:00:00 2001
From: zhaoyingchao <randeng.zhao@tuya.com>
Date: Thu, 30 May 2024 17:53:50 +0800
Subject: [PATCH 2/7] feat:add get_similar_tables_metadata

---
 src/vanna/base/base.py                    | 16 +++++++++
 src/vanna/opensearch/opensearch_vector.py | 43 ++++++++++++++++++++---
 2 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/src/vanna/base/base.py b/src/vanna/base/base.py
index e77359cb..299a1709 100644
--- a/src/vanna/base/base.py
+++ b/src/vanna/base/base.py
@@ -367,6 +367,22 @@ def get_related_ddl(self, question: str, **kwargs) -> list:
         """
         pass
 
+    @abstractmethod
+    def get_similar_tables_metadata(self, table: str = None, ddl: str = None, engine: str = None, size: int = 10, **kwargs) -> list:
+        """
+        This method is used to get similar tables metadata.
+
+        Args:
+            table (str): The table to get similar tables metadata for.
+            ddl (str): The DDL statement of the table.
+            engine (str): The database engine of the table.
+            size (int): The number of similar tables metadata to return.
+
+        Returns:
+            list: A list of similar tables metadata.
+        """
+        pass
+
     @abstractmethod
     def get_related_documentation(self, question: str, **kwargs) -> list:
         """
diff --git a/src/vanna/opensearch/opensearch_vector.py b/src/vanna/opensearch/opensearch_vector.py
index 0990be4a..98026727 100644
--- a/src/vanna/opensearch/opensearch_vector.py
+++ b/src/vanna/opensearch/opensearch_vector.py
@@ -8,6 +8,7 @@
 from ..base import VannaBase
 from ..utils import deterministic_uuid
 
+
 class OpenSearch_VectorStore(VannaBase):
   def __init__(self, config=None):
     VannaBase.__init__(self, config=config)
@@ -98,6 +99,8 @@ def __init__(self, config=None):
     if config is not None and "es_question_sql_index_settings" in config:
       question_sql_index_settings = config["es_question_sql_index_settings"]
 
+    self.n_results = config.get("n_results", 10)
+
     self.document_index_settings = document_index_settings
     self.ddl_index_settings = ddl_index_settings
     self.question_sql_index_settings = question_sql_index_settings
@@ -246,7 +249,8 @@ def calculate_md5(self, string: str) -> str:
     md5_hex = md5_hash.hexdigest()
     return md5_hex
 
-  def add_ddl(self, ddl: str, table: str = None, engine: str = None, **kwargs) -> str:
+  def add_ddl(self, ddl: str, table: str = None, engine: str = None,
+              **kwargs) -> str:
     # Assuming that you have a DDL index in your OpenSearch
     if table is not None and engine is not None:
       id = deterministic_uuid(engine + "-" + table) + "-ddl"
@@ -290,7 +294,8 @@ def get_related_ddl(self, question: str, **kwargs) -> List[str]:
         "match": {
           "ddl": question
         }
-      }
+      },
+      "size": self.n_results
     }
     print(query)
     response = self.client.search(index=self.ddl_index, body=query,
@@ -303,7 +308,8 @@ def get_related_documentation(self, question: str, **kwargs) -> List[str]:
         "match": {
           "doc": question
         }
-      }
+      },
+      "size": self.n_results
     }
     print(query)
     response = self.client.search(index=self.document_index,
@@ -317,7 +323,8 @@ def get_similar_question_sql(self, question: str, **kwargs) -> List[str]:
         "match": {
           "question": question
         }
-      }
+      },
+      "size": self.n_results
     }
     print(query)
     response = self.client.search(index=self.question_sql_index,
@@ -326,6 +333,34 @@ def get_similar_question_sql(self, question: str, **kwargs) -> List[str]:
     return [(hit['_source']['question'], hit['_source']['sql']) for hit in
             response['hits']['hits']]
 
+  def get_similar_tables_metadata(self, table: str = None, ddl: str = None,
+                                  engine: str = None, size: int = 10,
+                                  **kwargs) -> list:
+    # Assume you have some vector search mechanism associated with your data
+    if table is None and ddl is None and engine is None:
+      query = {
+        "query": {
+          "match_all": {}
+        },
+        "size": size
+      }
+    else:
+      query = {
+        "size": size
+      }
+
+      if table is not None:
+        query["query"]["match"]["table"] = table
+
+      if ddl is not None:
+        query["query"]["match"]["ddl"] = ddl
+
+      if engine is not None:
+        query["query"]["match"]["engine"] = engine
+    print(query)
+    response = self.client.search(index=self.ddl_index, body=query, **kwargs)
+    return [hit['_source'] for hit in response['hits']['hits']]
+
   def get_training_data(self, **kwargs) -> pd.DataFrame:
     # This will be a simple example pulling all data from an index
     # WARNING: Do not use this approach in production for large indices!

From 6bbb14cb79cf4f9c6c27f6509b5692d740d9c4a8 Mon Sep 17 00:00:00 2001
From: zhaoyingchao <randeng.zhao@tuya.com>
Date: Fri, 31 May 2024 10:26:29 +0800
Subject: [PATCH 3/7] fix: query modify

---
 src/vanna/opensearch/opensearch_vector.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/vanna/opensearch/opensearch_vector.py b/src/vanna/opensearch/opensearch_vector.py
index 98026727..f6294955 100644
--- a/src/vanna/opensearch/opensearch_vector.py
+++ b/src/vanna/opensearch/opensearch_vector.py
@@ -337,18 +337,16 @@ def get_similar_tables_metadata(self, table: str = None, ddl: str = None,
                                   engine: str = None, size: int = 10,
                                   **kwargs) -> list:
     # Assume you have some vector search mechanism associated with your data
+    query = {
+
+    }
     if table is None and ddl is None and engine is None:
       query = {
         "query": {
           "match_all": {}
-        },
-        "size": size
+        }
       }
     else:
-      query = {
-        "size": size
-      }
-
       if table is not None:
         query["query"]["match"]["table"] = table
 
@@ -357,6 +355,9 @@ def get_similar_tables_metadata(self, table: str = None, ddl: str = None,
 
       if engine is not None:
         query["query"]["match"]["engine"] = engine
+    if size is not None:
+      query["size"] = size
+
     print(query)
     response = self.client.search(index=self.ddl_index, body=query, **kwargs)
     return [hit['_source'] for hit in response['hits']['hits']]

From abf84bb09cf1b0e5b4b7f120f3db6257f0079b06 Mon Sep 17 00:00:00 2001
From: zhaoyingchao <randeng.zhao@tuya.com>
Date: Fri, 31 May 2024 14:34:27 +0800
Subject: [PATCH 4/7] feat: add TableMetadata manage

---
 src/vanna/base/base.py                    | 63 +++++++++++++++++++----
 src/vanna/opensearch/opensearch_vector.py | 39 +++++++++-----
 src/vanna/types/__init__.py               | 26 ++++++++++
 3 files changed, 107 insertions(+), 21 deletions(-)

diff --git a/src/vanna/base/base.py b/src/vanna/base/base.py
index 299a1709..e37253bd 100644
--- a/src/vanna/base/base.py
+++ b/src/vanna/base/base.py
@@ -65,7 +65,7 @@
 import sqlparse
 
 from ..exceptions import DependencyError, ImproperlyConfigured, ValidationError
-from ..types import TrainingPlan, TrainingPlanItem
+from ..types import TrainingPlan, TrainingPlanItem, TableMetadata
 from ..utils import validate_config_path
 
 
@@ -209,6 +209,54 @@ def extract_sql(self, llm_response: str) -> str:
 
         return llm_response
 
+    def extract_table_metadata(ddl: str) -> TableMetadata:
+      """
+        Example:
+        ```python
+        vn.extract_table_metadata("CREATE TABLE hive.bi_ads.customers (id INT, name TEXT, sales DECIMAL)")
+        ```
+
+        Extracts the table metadata from a DDL statement. This is useful in case the DDL statement contains other information besides the table metadata.
+        Override this function if your DDL statements need custom extraction logic.
+
+        Args:
+            ddl (str): The DDL statement.
+
+        Returns:
+            TableMetadata: The extracted table metadata.
+        """
+      pattern_with_catalog_schema = re.compile(
+        r'CREATE TABLE\s+(\w+)\.(\w+)\.(\w+)\s*\(',
+        re.IGNORECASE
+      )
+      pattern_with_schema = re.compile(
+        r'CREATE TABLE\s+(\w+)\.(\w+)\s*\(',
+        re.IGNORECASE
+      )
+      pattern_with_table = re.compile(
+        r'CREATE TABLE\s+(\w+)\s*\(',
+        re.IGNORECASE
+      )
+
+      match_with_catalog_schema = pattern_with_catalog_schema.search(ddl)
+      match_with_schema = pattern_with_schema.search(ddl)
+      match_with_table = pattern_with_table.search(ddl)
+
+      if match_with_catalog_schema:
+        catalog = match_with_catalog_schema.group(1)
+        schema = match_with_catalog_schema.group(2)
+        table_name = match_with_catalog_schema.group(3)
+        return TableMetadata(catalog, schema, table_name)
+      elif match_with_schema:
+        schema = match_with_schema.group(1)
+        table_name = match_with_schema.group(2)
+        return TableMetadata(None, schema, table_name)
+      elif match_with_table:
+        table_name = match_with_table.group(1)
+        return TableMetadata(None, None, table_name)
+      else:
+        return TableMetadata()
+
     def is_sql_valid(self, sql: str) -> bool:
         """
         Example:
@@ -368,12 +416,12 @@ def get_related_ddl(self, question: str, **kwargs) -> list:
         pass
 
     @abstractmethod
-    def get_similar_tables_metadata(self, table: str = None, ddl: str = None, engine: str = None, size: int = 10, **kwargs) -> list:
+    def get_similar_tables_metadata(self, table_metadata: TableMetadata = None, ddl: str = None, engine: str = None, size: int = 10, **kwargs) -> list:
         """
         This method is used to get similar tables metadata.
 
         Args:
-            table (str): The table to get similar tables metadata for.
+            table_metadata (TableMetadata): The table to get similar tables metadata for.
             ddl (str): The DDL statement of the table.
             engine (str): The database engine of the table.
             size (int): The number of similar tables metadata to return.
@@ -411,13 +459,12 @@ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
         pass
 
     @abstractmethod
-    def add_ddl(self, ddl: str, table: str = None, engine: str = None, **kwargs) -> str:
+    def add_ddl(self, ddl: str, engine: str = None, **kwargs) -> str:
         """
         This method is used to add a DDL statement to the training data.
 
         Args:
             ddl (str): The DDL statement to add.
-            table (str): The table name.table (str): The table that the DDL statement applies to.
             engine (str): The database engine that the DDL statement applies to.
 
         Returns:
@@ -1726,7 +1773,6 @@ def train(
         question: str = None,
         sql: str = None,
         ddl: str = None,
-        table: str = None,
         engine: str = None,
         documentation: str = None,
         plan: TrainingPlan = None,
@@ -1748,7 +1794,6 @@ def train(
             question (str): The question to train on.
             sql (str): The SQL query to train on.
             ddl (str):  The DDL statement.
-            table (str): The table name.
             engine (str): The database engine.
             documentation (str): The documentation to train on.
             plan (TrainingPlan): The training plan to train on.
@@ -1771,12 +1816,12 @@ def train(
 
         if ddl:
             print("Adding ddl:", ddl)
-            return self.add_ddl(ddl=ddl, table=table, engine=engine)
+            return self.add_ddl(ddl=ddl, engine=engine)
 
         if plan:
             for item in plan._plan:
                 if item.item_type == TrainingPlanItem.ITEM_TYPE_DDL:
-                    self.add_ddl(ddl=item.item_value)
+                    self.add_ddl(ddl=item.item_value, engine=engine)
                 elif item.item_type == TrainingPlanItem.ITEM_TYPE_IS:
                     self.add_documentation(item.item_value)
                 elif item.item_type == TrainingPlanItem.ITEM_TYPE_SQL:
diff --git a/src/vanna/opensearch/opensearch_vector.py b/src/vanna/opensearch/opensearch_vector.py
index f6294955..9143a25c 100644
--- a/src/vanna/opensearch/opensearch_vector.py
+++ b/src/vanna/opensearch/opensearch_vector.py
@@ -4,6 +4,7 @@
 
 import pandas as pd
 from opensearchpy import OpenSearch
+from ..types import TableMetadata
 
 from ..base import VannaBase
 from ..utils import deterministic_uuid
@@ -60,7 +61,13 @@ def __init__(self, config=None):
           "engine": {
             "type": "keyword",
           },
-          "table": {
+          "catalog": {
+            "type": "keyword",
+          },
+          "schema": {
+            "type": "keyword",
+          },
+          "table_name": {
             "type": "keyword",
           },
           "ddl": {
@@ -249,16 +256,20 @@ def calculate_md5(self, string: str) -> str:
     md5_hex = md5_hash.hexdigest()
     return md5_hex
 
-  def add_ddl(self, ddl: str, table: str = None, engine: str = None,
+  def add_ddl(self, ddl: str, engine: str = None,
               **kwargs) -> str:
     # Assuming that you have a DDL index in your OpenSearch
-    if table is not None and engine is not None:
-      id = deterministic_uuid(engine + "-" + table) + "-ddl"
+    table_metadata = self.extract_table_metadata(ddl)
+    full_table_name = table_metadata.getfulltablename()
+    if full_table_name is not None and engine is not None:
+      id = deterministic_uuid(engine + "-" + full_table_name) + "-ddl"
     else:
       id = str(uuid.uuid4()) + "-ddl"
     ddl_dict = {
       "engine": engine,
-      "table": table,
+      "catalog": table_metadata.catalog,
+      "schema": table_metadata.schema,
+      "table_name": table_metadata.table_name,
       "ddl": ddl
     }
     response = self.client.index(index=self.ddl_index, body=ddl_dict, id=id,
@@ -333,22 +344,26 @@ def get_similar_question_sql(self, question: str, **kwargs) -> List[str]:
     return [(hit['_source']['question'], hit['_source']['sql']) for hit in
             response['hits']['hits']]
 
-  def get_similar_tables_metadata(self, table: str = None, ddl: str = None,
+  def get_similar_tables_metadata(self, table_metadata: TableMetadata = None, ddl: str = None,
                                   engine: str = None, size: int = 10,
                                   **kwargs) -> list:
     # Assume you have some vector search mechanism associated with your data
-    query = {
-
-    }
-    if table is None and ddl is None and engine is None:
+    query = {}
+    if table_metadata is None and ddl is None and engine is None:
       query = {
         "query": {
           "match_all": {}
         }
       }
     else:
-      if table is not None:
-        query["query"]["match"]["table"] = table
+      query["query"] = {"match": {}}
+      if table_metadata is not None:
+        if table_metadata.catalog is not None:
+          query["query"]["match"]["catalog"] = table_metadata.catalog
+        if table_metadata.schema is not None:
+          query["query"]["match"]["schema"] = table_metadata.schema
+        if table_metadata.table_name is not None:
+          query["query"]["match"]["table_name"] = table_metadata.table_name
 
       if ddl is not None:
         query["query"]["match"]["ddl"] = ddl
diff --git a/src/vanna/types/__init__.py b/src/vanna/types/__init__.py
index f3841c88..c1904c3e 100644
--- a/src/vanna/types/__init__.py
+++ b/src/vanna/types/__init__.py
@@ -290,3 +290,29 @@ def remove_item(self, item: str):
             if str(plan_item) == item:
                 self._plan.remove(plan_item)
                 break
+
+
+class TableMetadata:
+  def __init__(self, catalog=None, schema=None, table_name=None):
+    self.catalog = catalog
+    self.schema = schema
+    self.table_name = table_name
+
+  def __str__(self):
+    parts = []
+    if self.catalog:
+      parts.append(f"Catalog: {self.catalog}")
+    if self.schema:
+      parts.append(f"Schema: {self.schema}")
+    if self.table_name:
+      parts.append(f"Table: {self.table_name}")
+    return "\n".join(parts) if parts else "No match found"
+
+  def get_full_table_name(self):
+    if self.catalog and self.schema:
+      return f"{self.catalog}.{self.schema}.{self.table_name}"
+    elif self.schema:
+      return f"{self.schema}.{self.table_name}"
+    else:
+      return f"{self.table_name}"
+

From cf603f5d4ad5b8bf9e4da74d31243996bed40c27 Mon Sep 17 00:00:00 2001
From: zhaoyingchao <randeng.zhao@tuya.com>
Date: Fri, 31 May 2024 14:54:29 +0800
Subject: [PATCH 5/7] feat: optimize search_tables_metadata

---
 src/vanna/base/base.py                    | 21 ++++++++++-----
 src/vanna/opensearch/opensearch_vector.py | 33 ++++++++++++++---------
 2 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/src/vanna/base/base.py b/src/vanna/base/base.py
index e37253bd..a62d611f 100644
--- a/src/vanna/base/base.py
+++ b/src/vanna/base/base.py
@@ -416,18 +416,27 @@ def get_related_ddl(self, question: str, **kwargs) -> list:
         pass
 
     @abstractmethod
-    def get_similar_tables_metadata(self, table_metadata: TableMetadata = None, ddl: str = None, engine: str = None, size: int = 10, **kwargs) -> list:
+    def search_tables_metadata(self,
+                              engine: str = None,
+                              catalog: str = None,
+                              schema: str = None,
+                              table_name: str = None,
+                              ddl: str = None,
+                              size: int = 10,
+                              **kwargs) -> list:
         """
         This method is used to get similar tables metadata.
 
         Args:
-            table_metadata (TableMetadata): The table to get similar tables metadata for.
-            ddl (str): The DDL statement of the table.
-            engine (str): The database engine of the table.
-            size (int): The number of similar tables metadata to return.
+            engine (str): The database engine.
+            catalog (str): The catalog.
+            schema (str): The schema.
+            table_name (str): The table name.
+            ddl (str): The DDL statement.
+            size (int): The number of tables to return.
 
         Returns:
-            list: A list of similar tables metadata.
+            list: A list of tables metadata.
         """
         pass
 
diff --git a/src/vanna/opensearch/opensearch_vector.py b/src/vanna/opensearch/opensearch_vector.py
index 9143a25c..e90b1440 100644
--- a/src/vanna/opensearch/opensearch_vector.py
+++ b/src/vanna/opensearch/opensearch_vector.py
@@ -344,12 +344,17 @@ def get_similar_question_sql(self, question: str, **kwargs) -> List[str]:
     return [(hit['_source']['question'], hit['_source']['sql']) for hit in
             response['hits']['hits']]
 
-  def get_similar_tables_metadata(self, table_metadata: TableMetadata = None, ddl: str = None,
-                                  engine: str = None, size: int = 10,
-                                  **kwargs) -> list:
+  def search_tables_metadata(self,
+                            engine: str = None,
+                            catalog: str = None,
+                            schema: str = None,
+                            table_name: str = None,
+                            ddl: str = None,
+                            size: int = 10,
+                            **kwargs) -> list:
     # Assume you have some vector search mechanism associated with your data
     query = {}
-    if table_metadata is None and ddl is None and engine is None:
+    if engine is None and catalog is None and schema is None and table_name is None and ddl is None:
       query = {
         "query": {
           "match_all": {}
@@ -357,19 +362,21 @@ def get_similar_tables_metadata(self, table_metadata: TableMetadata = None, ddl:
       }
     else:
       query["query"] = {"match": {}}
-      if table_metadata is not None:
-        if table_metadata.catalog is not None:
-          query["query"]["match"]["catalog"] = table_metadata.catalog
-        if table_metadata.schema is not None:
-          query["query"]["match"]["schema"] = table_metadata.schema
-        if table_metadata.table_name is not None:
-          query["query"]["match"]["table_name"] = table_metadata.table_name
+      if engine is not None:
+        query["query"]["match"]["engine"] = engine
+
+      if catalog is not None:
+        query["query"]["match"]["catalog"] = catalog
+
+      if schema is not None:
+        query["query"]["match"]["schema"] = schema
+
+      if table_name is not None:
+        query["query"]["match"]["table_name"] = table_name
 
       if ddl is not None:
         query["query"]["match"]["ddl"] = ddl
 
-      if engine is not None:
-        query["query"]["match"]["engine"] = engine
     if size is not None:
       query["size"] = size
 

From d9bd555025ef51634e4f34f08bbae43568cd99e9 Mon Sep 17 00:00:00 2001
From: zhaoyingchao <randeng.zhao@tuya.com>
Date: Tue, 4 Jun 2024 13:33:22 +0800
Subject: [PATCH 6/7] fix: return size

---
 src/vanna/opensearch/opensearch_vector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vanna/opensearch/opensearch_vector.py b/src/vanna/opensearch/opensearch_vector.py
index e90b1440..3e9b2595 100644
--- a/src/vanna/opensearch/opensearch_vector.py
+++ b/src/vanna/opensearch/opensearch_vector.py
@@ -377,7 +377,7 @@ def search_tables_metadata(self,
       if ddl is not None:
         query["query"]["match"]["ddl"] = ddl
 
-    if size is not None:
+    if size > 0:
       query["size"] = size
 
     print(query)

From 6dcff9bca3c171a015c0e1cadc8ed78040a13f64 Mon Sep 17 00:00:00 2001
From: zhaoyingchao <randeng.zhao@tuya.com>
Date: Tue, 4 Jun 2024 20:02:04 +0800
Subject: [PATCH 7/7] fix:  get_full_table_name

---
 src/vanna/opensearch/opensearch_vector.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/vanna/opensearch/opensearch_vector.py b/src/vanna/opensearch/opensearch_vector.py
index 3e9b2595..a6047a96 100644
--- a/src/vanna/opensearch/opensearch_vector.py
+++ b/src/vanna/opensearch/opensearch_vector.py
@@ -259,8 +259,8 @@ def calculate_md5(self, string: str) -> str:
   def add_ddl(self, ddl: str, engine: str = None,
               **kwargs) -> str:
     # Assuming that you have a DDL index in your OpenSearch
-    table_metadata = self.extract_table_metadata(ddl)
-    full_table_name = table_metadata.getfulltablename()
+    table_metadata = VannaBase.extract_table_metadata(ddl)
+    full_table_name = table_metadata.get_full_table_name()
     if full_table_name is not None and engine is not None:
       id = deterministic_uuid(engine + "-" + full_table_name) + "-ddl"
     else:
@@ -361,21 +361,25 @@ def search_tables_metadata(self,
         }
       }
     else:
-      query["query"] = {"match": {}}
+      query["query"] = {
+        "bool": {
+          "should": [
+          ]
+        }
+      }
       if engine is not None:
-        query["query"]["match"]["engine"] = engine
+        query["query"]["bool"]["should"].append({"match": {"engine": engine}})
 
       if catalog is not None:
-        query["query"]["match"]["catalog"] = catalog
+        query["query"]["bool"]["should"].append({"match": {"catalog": catalog}})
 
       if schema is not None:
-        query["query"]["match"]["schema"] = schema
-
+        query["query"]["bool"]["should"].append({"match": {"schema": schema}})
       if table_name is not None:
-        query["query"]["match"]["table_name"] = table_name
+        query["query"]["bool"]["should"].append({"match": {"table_name": table_name}})
 
       if ddl is not None:
-        query["query"]["match"]["ddl"] = ddl
+        query["query"]["bool"]["should"].append({"match": {"ddl": ddl}})
 
     if size > 0:
       query["size"] = size