migrating code from community (#94)

langchain-ai · Mar 26, 2024 · 437cdc8 · 437cdc8
1 parent be5dc2b
commit 437cdc8
Show file tree

Hide file tree

Showing 20 changed files with 1,357 additions and 12 deletions.
diff --git a/libs/community/langchain_google_community/__init__.py b/libs/community/langchain_google_community/__init__.py
@@ -1,18 +1,31 @@
+from langchain_google_community.bigquery import BigQueryLoader
 from langchain_google_community.bigquery_vector_search import BigQueryVectorSearch
 from langchain_google_community.docai import DocAIParser, DocAIParsingResults
 from langchain_google_community.documentai_warehouse import DocumentAIWarehouseRetriever
+from langchain_google_community.drive import GoogleDriveLoader
 from langchain_google_community.gcs_directory import GCSDirectoryLoader
 from langchain_google_community.gcs_file import GCSFileLoader
 from langchain_google_community.gmail.loader import GMailLoader
 from langchain_google_community.gmail.toolkit import GmailToolkit
-from langchain_google_community.google_speech_to_text import GoogleSpeechToTextLoader
-from langchain_google_community.googledrive import GoogleDriveLoader
+from langchain_google_community.google_speech_to_text import SpeechToTextLoader
+from langchain_google_community.places_api import (
+    GooglePlacesAPIWrapper,
+    GooglePlacesTool,
+)
+from langchain_google_community.search import (
+    GoogleSearchAPIWrapper,
+    GoogleSearchResults,
+    GoogleSearchRun,
+)
+from langchain_google_community.texttospeech import TextToSpeechTool
+from langchain_google_community.translate import GoogleTranslateTransformer
 from langchain_google_community.vertex_ai_search import (
     VertexAIMultiTurnSearchRetriever,
     VertexAISearchRetriever,
 )
 
 __all__ = [
+    "BigQueryLoader",
     "BigQueryVectorSearch",
     "DocAIParser",
     "DocAIParsingResults",
@@ -22,7 +35,14 @@
     "GMailLoader",
     "GmailToolkit",
     "GoogleDriveLoader",
-    "GoogleSpeechToTextLoader",
+    "GooglePlacesAPIWrapper",
+    "GooglePlacesTool",
+    "GoogleSearchAPIWrapper",
+    "GoogleSearchResults",
+    "GoogleSearchRun",
+    "GoogleTranslateTransformer",
+    "SpeechToTextLoader",
+    "TextToSpeechTool",
     "VertexAIMultiTurnSearchRetriever",
     "VertexAISearchRetriever",
 ]
diff --git a/libs/community/langchain_google_community/bigquery.py b/libs/community/langchain_google_community/bigquery.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, List, Optional
+
+from langchain_core.document_loaders import BaseLoader
+from langchain_core.documents import Document
+
+from langchain_google_community._utils import get_client_info
+
+if TYPE_CHECKING:
+    from google.auth.credentials import Credentials  # type: ignore[import]
+
+
+class BigQueryLoader(BaseLoader):
+    """Load from the Google Cloud Platform `BigQuery`.
+
+    Each document represents one row of the result. The `page_content_columns`
+    are written into the `page_content` of the document. The `metadata_columns`
+    are written into the `metadata` of the document. By default, all columns
+    are written into the `page_content` and none into the `metadata`.
+
+    """
+
+    def __init__(
+        self,
+        query: str,
+        project: Optional[str] = None,
+        page_content_columns: Optional[List[str]] = None,
+        metadata_columns: Optional[List[str]] = None,
+        credentials: Optional[Credentials] = None,
+    ):
+        """Initialize BigQuery document loader.
+
+        Args:
+            query: The query to run in BigQuery.
+            project: Optional. The project to run the query in.
+            page_content_columns: Optional. The columns to write into the `page_content`
+                of the document.
+            metadata_columns: Optional. The columns to write into the `metadata` of the
+                document.
+            credentials : google.auth.credentials.Credentials, optional
+              Credentials for accessing Google APIs. Use this parameter to override
+                default credentials, such as to use Compute Engine
+                (`google.auth.compute_engine.Credentials`) or Service Account
+                (`google.oauth2.service_account.Credentials`) credentials directly.
+        """
+        self.query = query
+        self.project = project
+        self.page_content_columns = page_content_columns
+        self.metadata_columns = metadata_columns
+        self.credentials = credentials
+
+    def load(self) -> List[Document]:
+        try:
+            from google.cloud import bigquery  # type: ignore[attr-defined]
+        except ImportError as ex:
+            raise ImportError(
+                "Could not import google-cloud-bigquery python package. "
+                "Please install it with `pip install google-cloud-bigquery`."
+            ) from ex
+
+        bq_client = bigquery.Client(
+            credentials=self.credentials,
+            project=self.project,
+            client_info=get_client_info(module="bigquery"),
+        )
+        if not bq_client.project:
+            error_desc = (
+                "GCP project for Big Query is not set! Either provide a "
+                "`project` argument during BigQueryLoader instantiation, "
+                "or set a default project with `gcloud config set project` "
+                "command."
+            )
+            raise ValueError(error_desc)
+        query_result = bq_client.query(self.query).result()
+        docs: List[Document] = []
+
+        page_content_columns = self.page_content_columns
+        metadata_columns = self.metadata_columns
+
+        if page_content_columns is None:
+            page_content_columns = [column.name for column in query_result.schema]
+        if metadata_columns is None:
+            metadata_columns = []
+
+        for row in query_result:
+            page_content = "\n".join(
+                f"{k}: {v}" for k, v in row.items() if k in page_content_columns
+            )
+            metadata = {k: v for k, v in row.items() if k in metadata_columns}
+            doc = Document(page_content=page_content, metadata=metadata)
+            docs.append(doc)
+
+        return docs
diff --git a/...langchain_google_community/googledrive.py → ...unity/langchain_google_community/drive.py b/...langchain_google_community/googledrive.py → ...unity/langchain_google_community/drive.py
diff --git a/libs/community/langchain_google_community/gmail/base.py b/libs/community/langchain_google_community/gmail/base.py
@@ -0,0 +1,37 @@
+"""Base class for Gmail tools."""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from langchain_core.pydantic_v1 import Field
+from langchain_core.tools import BaseTool
+
+from langchain_google_community.gmail.utils import build_resource_service
+
+if TYPE_CHECKING:
+    # This is for linting and IDE typehints
+    from googleapiclient.discovery import Resource  # type: ignore[import]
+else:
+    try:
+        # We do this so pydantic can resolve the types when instantiating
+        from googleapiclient.discovery import Resource
+    except ImportError:
+        pass
+
+
+class GmailBaseTool(BaseTool):
+    """Base class for Gmail tools."""
+
+    api_resource: Resource = Field(default_factory=build_resource_service)
+
+    @classmethod
+    def from_api_resource(cls, api_resource: Resource) -> "GmailBaseTool":
+        """Create a tool from an api resource.
+
+        Args:
+            api_resource: The api resource to use.
+
+        Returns:
+            A tool.
+        """
+        return cls(service=api_resource)
diff --git a/libs/community/langchain_google_community/gmail/create_draft.py b/libs/community/langchain_google_community/gmail/create_draft.py
@@ -0,0 +1,87 @@
+import base64
+from email.message import EmailMessage
+from typing import List, Optional, Type
+
+from langchain_core.callbacks import CallbackManagerForToolRun
+from langchain_core.pydantic_v1 import BaseModel, Field
+
+from langchain_google_community.gmail.base import GmailBaseTool
+
+
+class CreateDraftSchema(BaseModel):
+    """Input for CreateDraftTool."""
+
+    message: str = Field(
+        ...,
+        description="The message to include in the draft.",
+    )
+    to: List[str] = Field(
+        ...,
+        description="The list of recipients.",
+    )
+    subject: str = Field(
+        ...,
+        description="The subject of the message.",
+    )
+    cc: Optional[List[str]] = Field(
+        None,
+        description="The list of CC recipients.",
+    )
+    bcc: Optional[List[str]] = Field(
+        None,
+        description="The list of BCC recipients.",
+    )
+
+
+class GmailCreateDraft(GmailBaseTool):
+    """Tool that creates a draft email for Gmail."""
+
+    name: str = "create_gmail_draft"
+    description: str = (
+        "Use this tool to create a draft email with the provided message fields."
+    )
+    args_schema: Type[CreateDraftSchema] = CreateDraftSchema
+
+    def _prepare_draft_message(
+        self,
+        message: str,
+        to: List[str],
+        subject: str,
+        cc: Optional[List[str]] = None,
+        bcc: Optional[List[str]] = None,
+    ) -> dict:
+        draft_message = EmailMessage()
+        draft_message.set_content(message)
+
+        draft_message["To"] = ", ".join(to)
+        draft_message["Subject"] = subject
+        if cc is not None:
+            draft_message["Cc"] = ", ".join(cc)
+
+        if bcc is not None:
+            draft_message["Bcc"] = ", ".join(bcc)
+
+        encoded_message = base64.urlsafe_b64encode(draft_message.as_bytes()).decode()
+        return {"message": {"raw": encoded_message}}
+
+    def _run(
+        self,
+        message: str,
+        to: List[str],
+        subject: str,
+        cc: Optional[List[str]] = None,
+        bcc: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> str:
+        try:
+            create_message = self._prepare_draft_message(message, to, subject, cc, bcc)
+            draft = (
+                self.api_resource.users()
+                .drafts()
+                .create(userId="me", body=create_message)
+                .execute()
+            )
+            output = f'Draft created. Draft Id: {draft["id"]}'
+            return output
+        except Exception as e:
+            raise Exception(f"An error occurred: {e}")
diff --git a/libs/community/langchain_google_community/gmail/get_message.py b/libs/community/langchain_google_community/gmail/get_message.py
@@ -0,0 +1,70 @@
+import base64
+import email
+from typing import Dict, Optional, Type
+
+from langchain_core.callbacks import CallbackManagerForToolRun
+from langchain_core.pydantic_v1 import BaseModel, Field
+
+from langchain_google_community.gmail.base import GmailBaseTool
+from langchain_google_community.gmail.utils import clean_email_body
+
+
+class SearchArgsSchema(BaseModel):
+    """Input for GetMessageTool."""
+
+    message_id: str = Field(
+        ...,
+        description="The unique ID of the email message, retrieved from a search.",
+    )
+
+
+class GmailGetMessage(GmailBaseTool):
+    """Tool that gets a message by ID from Gmail."""
+
+    name: str = "get_gmail_message"
+    description: str = (
+        "Use this tool to fetch an email by message ID."
+        " Returns the thread ID, snippet, body, subject, and sender."
+    )
+    args_schema: Type[SearchArgsSchema] = SearchArgsSchema
+
+    def _run(
+        self,
+        message_id: str,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Dict:
+        """Run the tool."""
+        query = (
+            self.api_resource.users()
+            .messages()
+            .get(userId="me", format="raw", id=message_id)
+        )
+        message_data = query.execute()
+        raw_message = base64.urlsafe_b64decode(message_data["raw"])
+
+        email_msg = email.message_from_bytes(raw_message)
+
+        subject = email_msg["Subject"]
+        sender = email_msg["From"]
+
+        message_body = ""
+        if email_msg.is_multipart():
+            for part in email_msg.walk():
+                ctype = part.get_content_type()
+                cdispo = str(part.get("Content-Disposition"))
+                if ctype == "text/plain" and "attachment" not in cdispo:
+                    message_body = part.get_payload(decode=True).decode("utf-8")
+                    break
+        else:
+            message_body = email_msg.get_payload(decode=True).decode("utf-8")
+
+        body = clean_email_body(message_body)
+
+        return {
+            "id": message_id,
+            "threadId": message_data["threadId"],
+            "snippet": message_data["snippet"],
+            "body": body,
+            "subject": subject,
+            "sender": sender,
+        }
diff --git a/libs/community/langchain_google_community/gmail/get_thread.py b/libs/community/langchain_google_community/gmail/get_thread.py
@@ -0,0 +1,48 @@
+from typing import Dict, Optional, Type
+
+from langchain_core.callbacks import CallbackManagerForToolRun
+from langchain_core.pydantic_v1 import BaseModel, Field
+
+from langchain_google_community.gmail.base import GmailBaseTool
+
+
+class GetThreadSchema(BaseModel):
+    """Input for GetMessageTool."""
+
+    # From https://support.google.com/mail/answer/7190?hl=en
+    thread_id: str = Field(
+        ...,
+        description="The thread ID.",
+    )
+
+
+class GmailGetThread(GmailBaseTool):
+    """Tool that gets a thread by ID from Gmail."""
+
+    name: str = "get_gmail_thread"
+    description: str = (
+        "Use this tool to search for email messages."
+        " The input must be a valid Gmail query."
+        " The output is a JSON list of messages."
+    )
+    args_schema: Type[GetThreadSchema] = GetThreadSchema
+
+    def _run(
+        self,
+        thread_id: str,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Dict:
+        """Run the tool."""
+        query = self.api_resource.users().threads().get(userId="me", id=thread_id)
+        thread_data = query.execute()
+        if not isinstance(thread_data, dict):
+            raise ValueError("The output of the query must be a list.")
+        messages = thread_data["messages"]
+        thread_data["messages"] = []
+        keys_to_keep = ["id", "snippet", "snippet"]
+        # TODO: Parse body.
+        for message in messages:
+            thread_data["messages"].append(
+                {k: message[k] for k in keys_to_keep if k in message}
+            )
+        return thread_data