From 0a5807db24e6936b727b6bdb572ff1d2029da679 Mon Sep 17 00:00:00 2001 From: lu-ny Date: Tue, 10 Dec 2024 17:57:19 -0500 Subject: [PATCH 1/3] feat: enforce consistent code formatting config- Add Black/Ruff config to pyproject.toml- Update Makefile format commands to use config- Enhance pre-commit hooks with Ruff format- Removed circular dependencies- Standardize exclude patterns across tools- Fixed various redefinitions and commented out unused variables --- .pre-commit-config.yaml | 27 +- Makefile | 7 +- adalflow/adalflow/components/agent/react.py | 1 - .../components/model_client/cohere_client.py | 1 - .../components/model_client/google_client.py | 1 - .../components/model_client/ollama_client.py | 4 +- .../model_client/transformers_client.py | 13 +- .../adalflow/components/model_client/utils.py | 1 + .../components/output_parsers/outputs.py | 2 - .../components/retriever/bm25_retriever.py | 6 +- adalflow/adalflow/core/base_data_class.py | 2 - adalflow/adalflow/core/component.py | 5 +- adalflow/adalflow/core/embedder.py | 1 - adalflow/adalflow/core/func_tool.py | 1 - adalflow/adalflow/core/functional.py | 3 - adalflow/adalflow/core/generator.py | 4 - adalflow/adalflow/core/prompt_builder.py | 2 - adalflow/adalflow/core/tool_manager.py | 1 - .../sqlalchemy/pipeline/inject_data.py | 1 - adalflow/adalflow/datasets/big_bench_hard.py | 2 - adalflow/adalflow/datasets/trec.py | 2 - adalflow/adalflow/eval/g_eval.py | 1 - adalflow/adalflow/optim/_llm_optimizer.py | 7 +- .../optim/few_shot/bootstrap_optimizer.py | 6 +- adalflow/adalflow/optim/optimizer.py | 1 - adalflow/adalflow/optim/parameter.py | 9 +- .../adalflow/optim/text_grad/llm_text_loss.py | 1 - adalflow/adalflow/optim/text_grad/ops.py | 2 - .../optim/text_grad/text_loss_with_eval_fn.py | 1 - .../adalflow/optim/text_grad/tgd_optimizer.py | 4 +- adalflow/adalflow/optim/trainer/adal.py | 4 - adalflow/adalflow/optim/trainer/trainer.py | 11 +- adalflow/adalflow/tracing/decorators.py | 2 - .../tracing/generator_state_logger.py | 8 +- adalflow/tests/_test_optimizer.py | 1 - adalflow/tests/test_AzureClient.py | 1 - adalflow/tests/test_base_data_class.py | 2 - adalflow/tests/test_component.py | 1 - adalflow/tests/test_data_class_parser.py | 1 - .../tests/test_dataclass_object_functions.py | 3 - adalflow/tests/test_evaluators.py | 1 - adalflow/tests/test_faiss_retriever.py | 1 - adalflow/tests/test_grad_component.py | 1 - adalflow/tests/test_ollama_client.py | 1 - adalflow/tests/test_output_parser.py | 1 - adalflow/tests/test_random_sample.py | 1 - adalflow/tests/test_sequential.py | 2 - adalflow/tests/test_string_parser.py | 1 - adalflow/tests/test_text_splitter.py | 1 - adalflow/tests/test_transformer_client.py | 1 - benchmarks/BHH_object_count/dspy_count.py | 1 - .../text-grad/text_grad_train.py | 2 - benchmarks/BHH_object_count/train.py | 1 - benchmarks/hotpot_qa/adal_exp/build.py | 6 - .../hotpot_qa/adal_exp/build_vanilla_rag.py | 3 - .../hotpot_qa/adal_exp/train_vanilla.py | 1 - benchmarks/hotpot_qa/adal_train.py | 6 - .../dspy_train_few_shot_boostrap.py | 1 - notebooks/adalflow_colab_template.ipynb | 4 +- notebooks/evaluation/adalflow_llm_eval.ipynb | 14 +- ...lflow_object_count_auto_optimization.ipynb | 144 +- notebooks/tutorials/adalflow_component.ipynb | 1864 +++++++++-------- .../tutorials/adalflow_dataclasses.ipynb | 152 +- .../tutorials/adalflow_modelclient.ipynb | 29 +- .../tutorials/adalflow_text_splitter.ipynb | 24 +- poetry.lock | 22 +- pyproject.toml | 23 +- tutorials/database.ipynb | 128 +- tutorials/dataclass.ipynb | 49 +- tutorials/embedder.ipynb | 6 +- tutorials/generator.ipynb | 10 +- tutorials/generator_note.py | 2 - tutorials/model_client.ipynb | 19 +- tutorials/parser_note.py | 2 - tutorials/prompt_note.py | 2 - tutorials/rag.ipynb | 13 +- tutorials/react_note.ipynb | 6 +- tutorials/retriever.ipynb | 227 +- tutorials/tools.ipynb | 498 +++-- use_cases/agent/react_agent.ipynb | 138 +- .../classification/trec_task_string_output.py | 2 - .../trec_task_structured_output.py | 2 - use_cases/generator/intermediate.ipynb | 18 +- .../bbh/object_count/task.py | 1 - .../bbh/object_count/train_new.py | 2 - .../bbh/word_sorting/diagnose.py | 2 - .../bbh/word_sorting/task.py | 1 - .../bbh/word_sorting/train.py | 1 - .../bbh/word_sorting/train_paper.py | 1 - use_cases/question_answering/chatbot.ipynb | 15 +- use_cases/question_answering/simple_qa.ipynb | 11 +- use_cases/rag/build/rag.py | 1 - use_cases/rag/rag_with_eval.py | 1 - use_cases/unsorted/rag_optimized.py | 1 - use_cases/unsorted/rag_yaml_config.py | 1 - use_cases/unsorted/simple_qa.py | 3 - use_cases/unsorted/simple_rag_bm_25.py | 1 - 97 files changed, 1866 insertions(+), 1759 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6f85a6c8..02e712d6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -10,28 +10,35 @@ repos: - id: detect-private-key - repo: https://github.com/psf/black - rev: 24.4.2 + rev: 24.10.0 hooks: - id: black - args: ['--line-length=88'] + args: ['--config=pyproject.toml'] exclude: ^docs/|.*\.(json|yaml|md|txt)$ + + - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.2 + rev: v0.8.2 hooks: # Run the linter. - id: ruff - args: ['--fix'] + args: ['--fix', '--config=pyproject.toml'] + exclude: ^docs/|.*\.(json|yaml|md|txt)$ + - id: ruff-format + args: ['--config=pyproject.toml'] exclude: ^docs/|.*\.(json|yaml|md|txt)$ - # Add local hooks to run custom commands + # stage files after ruff - repo: local hooks: - - id: run-make-format - name: Run Make Format - entry: make format + - id: git-add + name: git-add + entry: git add language: system - pass_filenames: false + stages: [commit] + pass_filenames: true + # - repo: https://github.com/pycqa/flake8 # rev: 4.0.1 # hooks: diff --git a/Makefile b/Makefile index 3670e02f..01f5c64a 100644 --- a/Makefile +++ b/Makefile @@ -22,12 +22,15 @@ setup: # Format code using Black and Ruff .PHONY: format format: - $(PYTHON) black $(SRC_DIR) - git ls-files | xargs pre-commit run black --files + $(PYTHON) black $(SRC_DIR) --config pyproject.toml + $(PYTHON) ruff check --fix $(SRC_DIR) + $(PYTHON) ruff format $(SRC_DIR) +# remove git ls-files | xargs pre-commit run black --files, causes a circular dependency # Run lint checks using Ruff .PHONY: lint lint: + $(PYTHON) black --check $(SRC_DIR) --config pyproject.toml $(PYTHON) ruff check $(SRC_DIR) # Run all pre-commit hooks on all files diff --git a/adalflow/adalflow/components/agent/react.py b/adalflow/adalflow/components/agent/react.py index 92428e53..c2710553 100644 --- a/adalflow/adalflow/components/agent/react.py +++ b/adalflow/adalflow/components/agent/react.py @@ -224,7 +224,6 @@ def _execute_action(self, action_step: StepOutput) -> Optional[StepOutput]: """Parse the action string to a function call and execute it. Update the action_step with the result.""" action = action_step.action try: - fun: Function = self.tool_manager.parse_func_expr(action) result: FunctionOutput = self.tool_manager.execute_func(fun) # TODO: optimize the action_step diff --git a/adalflow/adalflow/components/model_client/cohere_client.py b/adalflow/adalflow/components/model_client/cohere_client.py index 2b902c51..6c6ea215 100644 --- a/adalflow/adalflow/components/model_client/cohere_client.py +++ b/adalflow/adalflow/components/model_client/cohere_client.py @@ -105,7 +105,6 @@ def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINE if ( model_type == ModelType.RERANKER ): # query -> # scores for top_k documents, index for the top_k documents, return as tuple - response = self.sync_client.rerank(**api_kwargs) top_k_scores = [result.relevance_score for result in response.results] top_k_indices = [result.index for result in response.results] diff --git a/adalflow/adalflow/components/model_client/google_client.py b/adalflow/adalflow/components/model_client/google_client.py index b7e431c8..27c89971 100644 --- a/adalflow/adalflow/components/model_client/google_client.py +++ b/adalflow/adalflow/components/model_client/google_client.py @@ -117,7 +117,6 @@ def convert_inputs_to_api_kwargs( raise TypeError("input must be a sequence of text") final_model_kwargs["input"] = input elif model_type == ModelType.LLM: - final_model_kwargs["prompt"] = input else: raise ValueError(f"model_type {model_type} is not supported") diff --git a/adalflow/adalflow/components/model_client/ollama_client.py b/adalflow/adalflow/components/model_client/ollama_client.py index 65af3858..48af6ba5 100644 --- a/adalflow/adalflow/components/model_client/ollama_client.py +++ b/adalflow/adalflow/components/model_client/ollama_client.py @@ -16,9 +16,9 @@ import warnings from adalflow.core.types import ModelType, GeneratorOutput -from adalflow.utils.lazy_import import safe_import, OptionalPackages -ollama = safe_import(OptionalPackages.OLLAMA.value[0], OptionalPackages.OLLAMA.value[1]) +# need to pick either safe or regular import +# ollama = safe_import(OptionalPackages.OLLAMA.value[0], OptionalPackages.OLLAMA.value[1]) import ollama from ollama import RequestError, ResponseError, GenerateResponse diff --git a/adalflow/adalflow/components/model_client/transformers_client.py b/adalflow/adalflow/components/model_client/transformers_client.py index f681f23f..b71d2b83 100644 --- a/adalflow/adalflow/components/model_client/transformers_client.py +++ b/adalflow/adalflow/components/model_client/transformers_client.py @@ -12,13 +12,10 @@ from adalflow.core.functional import get_top_k_indices_scores # optional import -from adalflow.utils.lazy_import import safe_import, OptionalPackages - -transformers = safe_import( - OptionalPackages.TRANSFORMERS.value[0], OptionalPackages.TRANSFORMERS.value[1] -) -torch = safe_import(OptionalPackages.TORCH.value[0], OptionalPackages.TORCH.value[1]) +# need to pick either safe or regular import +# transformers = safe_import(OptionalPackages.TRANSFORMERS.value[0], OptionalPackages.TRANSFORMERS.value[1]) +# torch = safe_import(OptionalPackages.TORCH.value[0], OptionalPackages.TORCH.value[1]) import torch @@ -201,7 +198,6 @@ def infer_bge_reranker_base( input = [(query, doc) for doc in documents] with torch.no_grad(): - inputs = self.tokenizer( input, padding=True, @@ -358,7 +354,6 @@ def init_model(self, model_name: str): raise ValueError(f"Model {model_name} is not supported") def _parse_chat_completion_from_pipeline(self, completion: Any) -> str: - text = completion[0]["generated_text"] pattern = r"(?<=\|assistant\|>).*" @@ -407,7 +402,6 @@ def _infer_from_pipeline( ) if model == "HuggingFaceH4/zephyr-7b-beta": - prompt = model_to_use.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) @@ -728,7 +722,6 @@ def convert_inputs_to_api_kwargs( } class CustomizeLLM: - def __init__(self) -> None: pass diff --git a/adalflow/adalflow/components/model_client/utils.py b/adalflow/adalflow/components/model_client/utils.py index 3c0e21fd..24085553 100644 --- a/adalflow/adalflow/components/model_client/utils.py +++ b/adalflow/adalflow/components/model_client/utils.py @@ -1,4 +1,5 @@ "Helpers for model client for integrating models and parsing the output." + from adalflow.core.types import EmbedderOutput, Embedding, Usage diff --git a/adalflow/adalflow/components/output_parsers/outputs.py b/adalflow/adalflow/components/output_parsers/outputs.py index 1f4ff652..95fd3caf 100644 --- a/adalflow/adalflow/components/output_parsers/outputs.py +++ b/adalflow/adalflow/components/output_parsers/outputs.py @@ -145,7 +145,6 @@ def __init__( exclude_fields: ExcludeType = None, return_data_class: bool = False, ): - super().__init__() if not is_dataclass(data_class): raise TypeError(f"Provided class is not a dataclass: {data_class}") @@ -349,7 +348,6 @@ def format_instructions(self) -> str: return "The output should be a boolean value. True or False." def call(self, input: str) -> bool: - input = input.strip() output = None # evaluate the expression to get the boolean value diff --git a/adalflow/adalflow/components/retriever/bm25_retriever.py b/adalflow/adalflow/components/retriever/bm25_retriever.py index 012c7ab1..5e4badb1 100644 --- a/adalflow/adalflow/components/retriever/bm25_retriever.py +++ b/adalflow/adalflow/components/retriever/bm25_retriever.py @@ -1,4 +1,4 @@ -"""BM25 retriever implementation. """ +"""BM25 retriever implementation.""" from typing import List, Dict, Optional, Callable, Any, Sequence import numpy as np @@ -232,9 +232,7 @@ def _initialize(self, corpus: List[List[str]]): def _calc_idf(self): idf_sum = 0 - negative_idf = ( - [] - ) # idf can be negative if word is too common: more than half of the documents + negative_idf = [] # idf can be negative if word is too common: more than half of the documents self.idf: Dict[str, float] = {} for token, freq in self.nd.items(): idf = math.log(self.total_documents - freq + 0.5) - math.log(freq + 0.5) diff --git a/adalflow/adalflow/core/base_data_class.py b/adalflow/adalflow/core/base_data_class.py index daac546d..3c4d86ad 100644 --- a/adalflow/adalflow/core/base_data_class.py +++ b/adalflow/adalflow/core/base_data_class.py @@ -189,10 +189,8 @@ class MyOutputs(DataClass): __output_fields__: List[str] = [] def __post_init__(self): - for f in fields(self): if "desc" not in f.metadata and "description" not in f.metadata: - logger.debug( f"Class { self.__class__.__name__} Field {f.name} is missing 'desc' in metadata" ) diff --git a/adalflow/adalflow/core/component.py b/adalflow/adalflow/core/component.py index 28bb794e..49c75c6c 100644 --- a/adalflow/adalflow/core/component.py +++ b/adalflow/adalflow/core/component.py @@ -138,9 +138,7 @@ def call(self, query: str) -> str: training: bool teacher_mode: bool = False tracing: bool = False - name: str = ( - "Component" # name will help with GradComponent output naming as "{name}_output" - ) + name: str = "Component" # name will help with GradComponent output naming as "{name}_output" _component_type = "base" # def _generate_unique_name(self): @@ -840,7 +838,6 @@ def remove_from(*dicts_or_sets): ) self.register_parameter(name, value) else: # set component - components = self.__dict__.get("_components") if isinstance(value, Component): if components is None: diff --git a/adalflow/adalflow/core/embedder.py b/adalflow/adalflow/core/embedder.py index ca6d5cac..4260c255 100644 --- a/adalflow/adalflow/core/embedder.py +++ b/adalflow/adalflow/core/embedder.py @@ -48,7 +48,6 @@ def __init__( model_kwargs: Dict[str, Any] = {}, output_processors: Optional[Component] = None, ) -> None: - super().__init__(model_kwargs=model_kwargs) if not isinstance(model_kwargs, Dict): raise TypeError( diff --git a/adalflow/adalflow/core/func_tool.py b/adalflow/adalflow/core/func_tool.py index 62d4f3fe..2fcdfaa4 100644 --- a/adalflow/adalflow/core/func_tool.py +++ b/adalflow/adalflow/core/func_tool.py @@ -259,7 +259,6 @@ def _extra_repr(self) -> str: if __name__ == "__main__": - import asyncio import time diff --git a/adalflow/adalflow/core/functional.py b/adalflow/adalflow/core/functional.py index 14157535..d246674f 100644 --- a/adalflow/adalflow/core/functional.py +++ b/adalflow/adalflow/core/functional.py @@ -252,7 +252,6 @@ class TrecDataList: if is_dataclass(cls) or is_potential_dataclass( cls ): # Optional[Address] will be false, and true for each check - log.debug( f"{is_dataclass(cls)} of {cls}, {is_potential_dataclass(cls)} of {cls}" ) @@ -922,7 +921,6 @@ def get_top_k_indices_scores( def generate_readable_key_for_function(fn: Callable) -> str: - module_name = fn.__module__ function_name = fn.__name__ return f"{module_name}.{function_name}" @@ -1236,7 +1234,6 @@ def parse_json_str_to_obj(json_str: str) -> Union[Dict[str, Any], List[Any]]: except json.JSONDecodeError: # 3rd attemp using yaml try: - # NOTE: parsing again with pyyaml # pyyaml is less strict, and allows for trailing commas # right now we rely on this since guidance program generates diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py index 309f954f..18e89a7e 100644 --- a/adalflow/adalflow/core/generator.py +++ b/adalflow/adalflow/core/generator.py @@ -188,7 +188,6 @@ def get_cache_path(self) -> str: def _get_default_mapping( output: "GeneratorOutput" = None, ) -> Tuple[Dict[str, Callable], List[str]]: - if ( output.data and isinstance(output.data, DataClass) @@ -546,7 +545,6 @@ def backward( backward_engine: Optional["Generator"] = None, id: Optional[str] = None, # the id of the input ) -> Parameter: - log.info(f"Generator: Backward: {response}") children_params = response.predecessors @@ -678,7 +676,6 @@ def _backward_through_one_predecessor( data=manual_response, raw_response=manual_response ) else: - gradient_output: GeneratorOutput = backward_engine( prompt_kwargs=backward_engine_prompt_kwargs ) @@ -881,7 +878,6 @@ def failure_message_to_backward_engine( class BackwardEngine(Generator): # it is a generator with defaule template - __doc__ = """The backward engine is a Generator with a default template for the backward pass. If you want to customize the template, you can create your own backward engine""" diff --git a/adalflow/adalflow/core/prompt_builder.py b/adalflow/adalflow/core/prompt_builder.py index 03a61959..948d2a89 100644 --- a/adalflow/adalflow/core/prompt_builder.py +++ b/adalflow/adalflow/core/prompt_builder.py @@ -169,9 +169,7 @@ def _convert_prompt_kwargs_to_str(prompt_kwargs: Dict) -> Dict[str, str]: prompt_kwargs_str: Dict[str, str] = {} for key, p in prompt_kwargs.items(): - if isinstance(p, Parameter): - prompt_kwargs_str[key] = p.data else: prompt_kwargs_str[key] = p diff --git a/adalflow/adalflow/core/tool_manager.py b/adalflow/adalflow/core/tool_manager.py index 3538762a..fa9630ec 100644 --- a/adalflow/adalflow/core/tool_manager.py +++ b/adalflow/adalflow/core/tool_manager.py @@ -129,7 +129,6 @@ def execute_func_expr(self, expr: FunctionExpression) -> FunctionOutput: r"""Execute the function expression. Support both sync and async functions.""" func: Function = self.parse_func_expr(expr) try: - return self.execute_func(func) except Exception as e: # NOTE: if the function expression is not a function call, try to execute it as a function expression diff --git a/adalflow/adalflow/database/sqlalchemy/pipeline/inject_data.py b/adalflow/adalflow/database/sqlalchemy/pipeline/inject_data.py index 40b9df68..c34448fd 100644 --- a/adalflow/adalflow/database/sqlalchemy/pipeline/inject_data.py +++ b/adalflow/adalflow/database/sqlalchemy/pipeline/inject_data.py @@ -83,7 +83,6 @@ def process_batch(self, documents: List[Document]): def __call__(self, documents: List[Document]): batch_size = self.batch_size for i in range(0, len(documents), batch_size): - List = documents[i : i + batch_size] print(i, len(List)) self.process_batch(List) diff --git a/adalflow/adalflow/datasets/big_bench_hard.py b/adalflow/adalflow/datasets/big_bench_hard.py index f98f2517..c08a840c 100644 --- a/adalflow/adalflow/datasets/big_bench_hard.py +++ b/adalflow/adalflow/datasets/big_bench_hard.py @@ -41,7 +41,6 @@ def __init__( *args, **kwargs, ): - if split not in ["train", "val", "test"]: raise ValueError("Split must be one of 'train', 'val', 'test'") @@ -65,7 +64,6 @@ def __init__( ) # dont use a tuple, use a dict {"x": ..., "y": ...} def _check_or_download_dataset(self, data_path: str = None, split: str = "train"): - if data_path is None: raise ValueError("data_path must be specified") json_path = os.path.join(data_path, f"{self.task_name}.json") diff --git a/adalflow/adalflow/datasets/trec.py b/adalflow/adalflow/datasets/trec.py index 75267609..0973f9d9 100644 --- a/adalflow/adalflow/datasets/trec.py +++ b/adalflow/adalflow/datasets/trec.py @@ -28,7 +28,6 @@ def calculate_class_weights(labels: torch.Tensor) -> torch.Tensor: def sample_subset_dataset(dataset, num_samples: int, sample_weights): - # Create a WeightedRandomSampler to get 400 samples sampler = WeightedRandomSampler( weights=sample_weights, num_samples=num_samples, replacement=False @@ -171,7 +170,6 @@ def __init__( ) def _check_or_download_dataset(self, data_path: str = None, split: str = "train"): - if data_path is None: raise ValueError("data_path must be specified") split_csv_path = os.path.join(data_path, f"{split}.csv") diff --git a/adalflow/adalflow/eval/g_eval.py b/adalflow/adalflow/eval/g_eval.py index 5bdda219..67d07fc0 100644 --- a/adalflow/adalflow/eval/g_eval.py +++ b/adalflow/adalflow/eval/g_eval.py @@ -18,7 +18,6 @@ class GEvalMetric(Enum): - RELEVANCE = "Relevance" # range [1, 5] FLUENCY = "Fluency" # range [1, 3] CONSISTENCY = "Consistency" # range [1, 5] diff --git a/adalflow/adalflow/optim/_llm_optimizer.py b/adalflow/adalflow/optim/_llm_optimizer.py index 7b9f9b8f..0ba093b1 100644 --- a/adalflow/adalflow/optim/_llm_optimizer.py +++ b/adalflow/adalflow/optim/_llm_optimizer.py @@ -10,7 +10,6 @@ from adalflow.core.base_data_class import DataClass if TYPE_CHECKING: - from adalflow.core.model_client import ModelClient @@ -115,9 +114,9 @@ def __init__( # Ensure the temperature is at least 1 model_kwargs["temperature"] = max(1, model_kwargs.get("temperature", 1)) - self.instruction_history: List[Instruction] = ( - [] - ) # trace the history of the instructions + self.instruction_history: List[ + Instruction + ] = [] # trace the history of the instructions self.starter_instruction: Optional[str] = None if self.instruction_parameter.data is not None: self.starter_instruction = self.instruction_parameter.data diff --git a/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py b/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py index c61c3649..a088f535 100644 --- a/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py +++ b/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py @@ -71,7 +71,6 @@ def add_scores(self, ids: List[str], scores: List[float], is_teacher: bool = Tru ) for score in scores: - if not isinstance(score, float): raise ValueError( f"score must be a float, got {type(score)}, score: {score}" @@ -140,8 +139,7 @@ def sample( ) # if demo.id in demos and demos[demo.id].score is not None: w = ( - w - - student_demo_score + w - student_demo_score # w - demos[demo.id].score ) # assign higher weights to failed demos but successful in augmented if w < 0: @@ -198,7 +196,6 @@ def samples_to_str( sample_strs = [] for sample in samples: try: - # process the input fields if augmented: exclude_fields = ["id", "score"] @@ -239,7 +236,6 @@ def propose(self): demo_str = "" if len(sampled_augmented_demos) > 0: - demo_str = self.samples_to_str( samples=sampled_augmented_demos, augmented=True, diff --git a/adalflow/adalflow/optim/optimizer.py b/adalflow/adalflow/optim/optimizer.py index c6fad814..29daacf9 100644 --- a/adalflow/adalflow/optim/optimizer.py +++ b/adalflow/adalflow/optim/optimizer.py @@ -47,7 +47,6 @@ def zero_grad(self): class DemoOptimizer(Optimizer): - __doc__ = r"""Base class for all demo optimizers. Demo optimizer are few-shot optimization, where it will sample raw examples from train dataset or bootstrap examples from the model's output. diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py index 85fe25ea..a12fe00a 100644 --- a/adalflow/adalflow/optim/parameter.py +++ b/adalflow/adalflow/optim/parameter.py @@ -170,9 +170,9 @@ def __init__( self._score: float = score # end to end evaluation score self._student_traces: Dict[str, DataClass] = {} # id - self._demos: List[DataClass] = ( - [] - ) # used for the optimizer to save the proposed demos + self._demos: List[ + DataClass + ] = [] # used for the optimizer to save the proposed demos self._previous_demos: List[DataClass] = [] self.eval_input = eval_input @@ -419,7 +419,6 @@ def build_graph(node: "Parameter"): def backward( self, ): # engine should be the llm or customized backwards function to pass feedback - # topological sort of all the predecessors of the current parameter in the graph log.debug(f"Backward pass for {self.data}, backward function: {self.grad_fn}") topo: List[Parameter] = [] @@ -577,7 +576,6 @@ def wrap_and_escape(text, width=40): log.info(f"Node: {n.name}, {n.to_dict()}") # track gradients for g in n.gradients: - log.info(f"Gradient: {g.name}, {g.to_dict()}") log.info(f"Gradient prompt: {g.gradient_prompt}") for n1, n2 in edges: @@ -685,7 +683,6 @@ def __repr__(self): def _check_and_reduce_gradients(variable: Parameter) -> Set[Parameter]: - if variable.get_gradient_and_context_text() == "": log.debug(f"No gradients detected for {variable.data}") return variable.gradients diff --git a/adalflow/adalflow/optim/text_grad/llm_text_loss.py b/adalflow/adalflow/optim/text_grad/llm_text_loss.py index 46cf7a0f..8e7827f6 100644 --- a/adalflow/adalflow/optim/text_grad/llm_text_loss.py +++ b/adalflow/adalflow/optim/text_grad/llm_text_loss.py @@ -68,5 +68,4 @@ def __init__( # return self.forward(*args, **kwargs) def forward(self, *args, **kwargs) -> "Parameter": - return self.loss_llm.forward(*args, **kwargs) diff --git a/adalflow/adalflow/optim/text_grad/ops.py b/adalflow/adalflow/optim/text_grad/ops.py index da2b438f..74f86f15 100644 --- a/adalflow/adalflow/optim/text_grad/ops.py +++ b/adalflow/adalflow/optim/text_grad/ops.py @@ -86,7 +86,6 @@ def backward(self, summation: Parameter): pred_params = summation.predecessors # losses summation_gradients = summation.get_gradient_and_context_text().strip() for param in pred_params: - if param.check_if_already_computed_gradient_respect_to(summation.id): log.info( f"Gradient already computed for {param.role_desc} with respect to {summation.role_desc}" @@ -102,7 +101,6 @@ def backward(self, summation: Parameter): ): # as loss sum to be the base, it simply allows gradients computations on multiple losses param_gradient_value = "" else: # as a mid layer, it will have a combined feedback - param_gradient_value = f"Here is the combined feedback we got for this specific {param.role_desc} and other parameters: {summation_gradients}." extra = { diff --git a/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py b/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py index c8654d4a..5f554b6a 100644 --- a/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py +++ b/adalflow/adalflow/optim/text_grad/text_loss_with_eval_fn.py @@ -116,7 +116,6 @@ def __init__( "EvalFnToTextLoss: No backward engine provided. Creating one using model_client and model_kwargs." ) if model_client and model_kwargs: - self.set_backward_engine(backward_engine, model_client, model_kwargs) else: if not isinstance(backward_engine, BackwardEngine): diff --git a/adalflow/adalflow/optim/text_grad/tgd_optimizer.py b/adalflow/adalflow/optim/text_grad/tgd_optimizer.py index f2d5b918..25e72fb2 100644 --- a/adalflow/adalflow/optim/text_grad/tgd_optimizer.py +++ b/adalflow/adalflow/optim/text_grad/tgd_optimizer.py @@ -253,9 +253,9 @@ def get_gradient_memory_text(self, param: Parameter) -> str: return grad_memory def _get_user_prompt_kwargs(self, param: Parameter) -> Dict[str, str]: - variable_and_peer_info = self.variable_and_peers_info.call( - variable=param.get_param_info(), peers=param.peers # param.peers + variable=param.get_param_info(), + peers=param.peers, # param.peers ) user_prompt_kwargs = { diff --git a/adalflow/adalflow/optim/trainer/adal.py b/adalflow/adalflow/optim/trainer/adal.py index f9bcfc10..38ca52ba 100644 --- a/adalflow/adalflow/optim/trainer/adal.py +++ b/adalflow/adalflow/optim/trainer/adal.py @@ -222,12 +222,10 @@ def evaluate_samples( acc_list = [None] * len(samples) # Initialize accuracy list to hold results with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: - # 1. submit all the tasks futures = {} for i, (sample, y_pred) in enumerate(zip(samples, y_preds)): - if metadata is None: eval_fn, kwargs = self.prepare_eval(sample, y_pred) future = executor.submit(eval_fn, **kwargs) @@ -486,7 +484,6 @@ def validation_step(self, batch, batch_idx, num_workers: int = 2) -> List: avg_score=avg_score, per_item_scores=acc_list ) else: - eval_results = self.evaluate_samples( samples=completed_samples, y_preds=completed_y_preds, @@ -516,7 +513,6 @@ def loss_step( desc="Calculating Loss", ) for future, i, sample in futures: - loss = future.result() if not isinstance(loss, Parameter): raise ValueError(f"Loss is not a Parameter: {loss}") diff --git a/adalflow/adalflow/optim/trainer/trainer.py b/adalflow/adalflow/optim/trainer/trainer.py index ae17b064..343a973d 100644 --- a/adalflow/adalflow/optim/trainer/trainer.py +++ b/adalflow/adalflow/optim/trainer/trainer.py @@ -369,7 +369,6 @@ def fit( # config optimizers if len(self._get_trainable_demo_params()) > 0: - for opt in self.demo_optimizers: opt.config_shots(raw_shots=raw_shots, bootstrap_shots=bootstrap_shots) opt.use_weighted_sampling(weighted=self.weighted_sampling) @@ -443,7 +442,6 @@ def fit( and len(self.text_optimizers) > 0 ): if self.strategy == "random": - self._fit_text_grad_demo_mix_random( train_loader, train_dataset, @@ -507,7 +505,6 @@ def _estimate_num_epochs(train_loader: Any, max_steps: int): return max_steps // num_samples + 1 def initial_validation(self, val_dataset: Any, test_dataset: Any): - val_output = self.adaltask.validation_step(val_dataset, 0, self.num_workers) val_score = val_output.avg_score test_score = None @@ -628,7 +625,6 @@ def _pre_fit(self, val_dataset: Any, test_dataset: Any) -> TrainerResult: def _fit_demos_one_step_for_debug( self, train_loader, train_dataset: Any, val_dataset: Any, test_dataset: Any ) -> str: - # get_logger(level="DEBUG") print("Fitting using Random Demo Optimizer") self.prep_ckpt_file_path() @@ -767,7 +763,6 @@ def _fit_demos_one_step_for_debug( opt_params.extend(opt.params) print(f"Opt params: {opt_params}") for name, param in self.adaltask.named_parameters(): - if param.param_type == ParameterType.DEMOS: print(f"Demo param: {name}, value: {param.data}, param: {param}") if param.data is None: @@ -1344,7 +1339,6 @@ def _fit_demos_random( if self.save_traces: for i, demo_opt in enumerate(self.demo_optimizers): for param in demo_opt.params: - teacher_traces = param._traces student_traces = param._student_traces @@ -1488,7 +1482,6 @@ def _add_one_step_in_trainer_results( step: int, attempted_val_score: Optional[float] = None, ): - step_results = TrainerStepResult( step=step, val_score=val_score, @@ -1524,8 +1517,7 @@ def _downsample_move_batch( error_indices = [i for i, score in enumerate(acc_score_list) if score <= 0.5] if ( - len(error_indices) + len(correct_indices) - <= max_moving_batch_size + len(error_indices) + len(correct_indices) <= max_moving_batch_size # and len(correct_indices) <= max_moving_batch_size ): return all_samples, all_losses, all_y_preds, acc_score_list @@ -1653,7 +1645,6 @@ def _text_grad_constraint_propose_step( # TODO: make this a step tdqm_loader = tqdm(range(self.max_proposals_per_step), desc="Proposing") for i in tdqm_loader: - # print(f"Proposing step: {i}") # self.optimizer.propose() self._propose_text_optimizers() # new prompts diff --git a/adalflow/adalflow/tracing/decorators.py b/adalflow/adalflow/tracing/decorators.py index 1882369a..e78d8bf6 100644 --- a/adalflow/adalflow/tracing/decorators.py +++ b/adalflow/adalflow/tracing/decorators.py @@ -49,7 +49,6 @@ def __init__(self): """ def decorator(cls): - original_init = cls.__init__ class_name = cls.__name__ logger_project_name = project_name or class_name @@ -221,7 +220,6 @@ def new_init(self, *args, **kwargs): # handle the file registration if attr_name not in generator_names_to_files: - self.generator_call_logger.register_generator(attr_name) filename = self.generator_call_logger.get_log_location(attr_name) log.info(f"Registered generator {attr_name} with file {filename}") diff --git a/adalflow/adalflow/tracing/generator_state_logger.py b/adalflow/adalflow/tracing/generator_state_logger.py index 056fc6e3..4e64086a 100644 --- a/adalflow/adalflow/tracing/generator_state_logger.py +++ b/adalflow/adalflow/tracing/generator_state_logger.py @@ -59,9 +59,9 @@ def __init__( self.filename = filename or "generator_state_trace.json" self.filepath = os.path.join(self.filepath, self.filename) - self._trace_map: Dict[str, List[GeneratorStatesRecord]] = ( - {} # generator_name: [prompt_states] - ) + self._trace_map: Dict[ + str, List[GeneratorStatesRecord] + ] = {} # generator_name: [prompt_states] # load previous records if the file exists if os.path.exists(self.filepath): self.load(self.filepath) @@ -82,7 +82,6 @@ def log_prompt(self, generator: "Generator", name: str): ) # TODO: log all states of the generator instead of just the prompt try: - if name not in self._trace_map: self._trace_map[name] = [ GeneratorStatesRecord( @@ -110,7 +109,6 @@ def save(self, filepath: str): f.write(serialized_obj) def load(self, filepath: str): - if os.stat(filepath).st_size == 0: logging.info(f"File {filepath} is empty.") return diff --git a/adalflow/tests/_test_optimizer.py b/adalflow/tests/_test_optimizer.py index 5b24e0bb..adbd280f 100644 --- a/adalflow/tests/_test_optimizer.py +++ b/adalflow/tests/_test_optimizer.py @@ -8,7 +8,6 @@ class TestBootstrapFewShotRandomSampler: - def setup_method(self): r""" Test the optimizer before the output processor which converts result to a string diff --git a/adalflow/tests/test_AzureClient.py b/adalflow/tests/test_AzureClient.py index 14b7cf4a..e6d8ad56 100644 --- a/adalflow/tests/test_AzureClient.py +++ b/adalflow/tests/test_AzureClient.py @@ -5,7 +5,6 @@ class TestAzureAIClient(unittest.TestCase): - @patch("adalflow.components.model_client.azureai_client.AzureOpenAI") @patch("adalflow.components.model_client.azureai_client.DefaultAzureCredential") def setUp(self, MockDefaultAzureCredential, MockAzureOpenAI): diff --git a/adalflow/tests/test_base_data_class.py b/adalflow/tests/test_base_data_class.py index 1049aa31..6dcf5f0a 100644 --- a/adalflow/tests/test_base_data_class.py +++ b/adalflow/tests/test_base_data_class.py @@ -247,7 +247,6 @@ def test_error_non_dataclass(self): class TestGetTypeSchema(unittest.TestCase): - def test_enum_schema(self): result = get_type_schema(Label) expected = "Enum[Label(SPAM=spam, NOT_SPAM=not_spam)]" @@ -383,5 +382,4 @@ class SetDataclass(DataClass): if __name__ == "__main__": - unittest.main() diff --git a/adalflow/tests/test_component.py b/adalflow/tests/test_component.py index f4745109..f5c41da0 100644 --- a/adalflow/tests/test_component.py +++ b/adalflow/tests/test_component.py @@ -42,7 +42,6 @@ def call(self, query: str) -> str: class TestComponent(TestCase): def test_component_missing_super_init(self): - with pytest.raises(AttributeError): a = ComponentMissSuperInit("John", 30) # noqa: F841 diff --git a/adalflow/tests/test_data_class_parser.py b/adalflow/tests/test_data_class_parser.py index b87fde4c..e8c7746d 100644 --- a/adalflow/tests/test_data_class_parser.py +++ b/adalflow/tests/test_data_class_parser.py @@ -25,7 +25,6 @@ class NestedOutput(DataClass): class TestDataClassParser(unittest.TestCase): - def setUp(self): self.basic_data_class = BasicOutput self.nested_data_class = NestedOutput diff --git a/adalflow/tests/test_dataclass_object_functions.py b/adalflow/tests/test_dataclass_object_functions.py index 3113c217..e51cbfd0 100644 --- a/adalflow/tests/test_dataclass_object_functions.py +++ b/adalflow/tests/test_dataclass_object_functions.py @@ -49,7 +49,6 @@ class ComplexData(DataClass): # Define the test class class TestDataclassFuncConversion(unittest.TestCase): - def test_simple_data(self): simple = SimpleData(name="John", age=30, score=95.5) simple_dict = custom_asdict(simple) @@ -191,7 +190,6 @@ def test_exclude(self): class TestDataClassBaseClassConversion(unittest.TestCase): - def test_dict_data(self): simple1 = SimpleData(name="John", age=30, score=95.5) simple2 = SimpleData(name="Jane", age=25, score=88.0) @@ -330,7 +328,6 @@ class ComplexData2(DataClass): # Define the test class class TestDataClassYamlJsonConversion(unittest.TestCase): - def test_simple_data(self): simple = SimpleData(name="John", age=30, score=95.5) simple_dict = simple.to_dict() diff --git a/adalflow/tests/test_evaluators.py b/adalflow/tests/test_evaluators.py index a62eee10..6cff7067 100644 --- a/adalflow/tests/test_evaluators.py +++ b/adalflow/tests/test_evaluators.py @@ -68,7 +68,6 @@ def test_retriever_recall(): # This test is skipped by default. To run this test locally, set the environment variable RUN_LOCAL_TESTS to True (export RUN_LOCAL_TESTS=true). @pytest.mark.skipif(not os.getenv("RUN_LOCAL_TESTS"), reason="Skip unless on local") def test_llm_as_judge(): - questions = [ "Is Beijing in China?", "Is Apple founded before Google?", diff --git a/adalflow/tests/test_faiss_retriever.py b/adalflow/tests/test_faiss_retriever.py index a26b764b..fc2e3b23 100644 --- a/adalflow/tests/test_faiss_retriever.py +++ b/adalflow/tests/test_faiss_retriever.py @@ -20,7 +20,6 @@ def create_dummy_embeddings(num_embeddings, dim, normalize=True): class TestFAISSRetriever(unittest.TestCase): - def setUp(self): self.dimensions = 128 self.num_embeddings = 10 diff --git a/adalflow/tests/test_grad_component.py b/adalflow/tests/test_grad_component.py index 5ea68a1a..06e5e643 100644 --- a/adalflow/tests/test_grad_component.py +++ b/adalflow/tests/test_grad_component.py @@ -6,7 +6,6 @@ class TestGradComponent(unittest.TestCase): - def setUp(self): self.component = GradComponent() self.component.name = "test_component" diff --git a/adalflow/tests/test_ollama_client.py b/adalflow/tests/test_ollama_client.py index 64fe60cd..b0b9ee5a 100644 --- a/adalflow/tests/test_ollama_client.py +++ b/adalflow/tests/test_ollama_client.py @@ -15,7 +15,6 @@ class TestOllamaModelClient(unittest.TestCase): - def test_ollama_llm_client(self): ollama_client = Mock(spec=OllamaClient()) print("Testing ollama LLM client") diff --git a/adalflow/tests/test_output_parser.py b/adalflow/tests/test_output_parser.py index a2b529dc..554ea143 100644 --- a/adalflow/tests/test_output_parser.py +++ b/adalflow/tests/test_output_parser.py @@ -17,7 +17,6 @@ class User(DataClass): class TestOutputParsers(unittest.TestCase): - def setUp(self): self.user_example = User(id=1, name="John") self.json_user_to_parse = '{"id": 2, "name": "Jane"}' diff --git a/adalflow/tests/test_random_sample.py b/adalflow/tests/test_random_sample.py index e6abfb29..2ffde2f5 100644 --- a/adalflow/tests/test_random_sample.py +++ b/adalflow/tests/test_random_sample.py @@ -8,7 +8,6 @@ class TestRandomSample(unittest.TestCase): - def setUp(self): """Set up a common dataset for testing.""" self.dataset = [1, 2, 3, 4, 5] diff --git a/adalflow/tests/test_sequential.py b/adalflow/tests/test_sequential.py index 9db9d2ae..c787efed 100644 --- a/adalflow/tests/test_sequential.py +++ b/adalflow/tests/test_sequential.py @@ -16,7 +16,6 @@ def call(self, input: int) -> int: class SequentialTests(unittest.TestCase): - def setUp(self): self.add_one = AddOne() self.multiply_by_two = MultiplyByTwo() @@ -96,7 +95,6 @@ def call(self, x: int, subtractor: int = 0) -> int: class TestSequential: - @pytest.fixture def setup_advanced_components(self): add = Add() diff --git a/adalflow/tests/test_string_parser.py b/adalflow/tests/test_string_parser.py index 17ba264c..96277e0a 100644 --- a/adalflow/tests/test_string_parser.py +++ b/adalflow/tests/test_string_parser.py @@ -179,7 +179,6 @@ def test_json_parser_escape_single_quotes(): class TestExtractYamlStr(unittest.TestCase): - def test_extract_yaml_with_triple_backticks(self): text = """```yaml name: John diff --git a/adalflow/tests/test_text_splitter.py b/adalflow/tests/test_text_splitter.py index 75d6f227..10bb47c5 100644 --- a/adalflow/tests/test_text_splitter.py +++ b/adalflow/tests/test_text_splitter.py @@ -4,7 +4,6 @@ class TestTextSplitter(unittest.TestCase): - def setUp(self): # Set up a TextSplitter instance before each test self.splitter = TextSplitter(split_by="word", chunk_size=5, chunk_overlap=2) diff --git a/adalflow/tests/test_transformer_client.py b/adalflow/tests/test_transformer_client.py index d8562454..c79ada9f 100644 --- a/adalflow/tests/test_transformer_client.py +++ b/adalflow/tests/test_transformer_client.py @@ -9,7 +9,6 @@ class TestTransformerModelClient(unittest.TestCase): def setUp(self) -> None: - self.query = "what is panda?" self.documents = [ "The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China.", diff --git a/benchmarks/BHH_object_count/dspy_count.py b/benchmarks/BHH_object_count/dspy_count.py index 63d1bd7f..5ff4b33a 100644 --- a/benchmarks/BHH_object_count/dspy_count.py +++ b/benchmarks/BHH_object_count/dspy_count.py @@ -25,7 +25,6 @@ def __init__(self): self.generate_answer = dspy.ChainOfThought(GenerateAnswer) def forward(self, question): - pred = self.generate_answer(question=question) answer = _parse_integer_answer(pred.answer, only_first_line=False) answer = str(answer) # dspy will assume these fields are strings not integers diff --git a/benchmarks/BHH_object_count/text-grad/text_grad_train.py b/benchmarks/BHH_object_count/text-grad/text_grad_train.py index 7dcede3e..3561620e 100644 --- a/benchmarks/BHH_object_count/text-grad/text_grad_train.py +++ b/benchmarks/BHH_object_count/text-grad/text_grad_train.py @@ -53,7 +53,6 @@ def eval_dataset(test_set, eval_fn, model, max_samples: int = None): with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: futures = [] for _, sample in enumerate(test_set): - future = executor.submit(eval_sample, sample, eval_fn, model) futures.append(future) if len(futures) >= max_samples: @@ -85,7 +84,6 @@ def run_validation_revert(system_prompt: tg.Variable, results, model, eval_fn, v if __name__ == "__main__": - from benchmarks.config import text_grad_save_path import os import time diff --git a/benchmarks/BHH_object_count/train.py b/benchmarks/BHH_object_count/train.py index 7d3e357e..716bd975 100644 --- a/benchmarks/BHH_object_count/train.py +++ b/benchmarks/BHH_object_count/train.py @@ -33,7 +33,6 @@ def load_datasets(max_samples=10): def train(dspy_trainset=None): - print("Training on", len(dspy_trainset), "samples", dspy_trainset[0]) teleprompter = BootstrapFewShot(metric=validate_exact_match) diff --git a/benchmarks/hotpot_qa/adal_exp/build.py b/benchmarks/hotpot_qa/adal_exp/build.py index 9f1d078c..63415368 100644 --- a/benchmarks/hotpot_qa/adal_exp/build.py +++ b/benchmarks/hotpot_qa/adal_exp/build.py @@ -23,7 +23,6 @@ def load_datasets(): - trainset = HotPotQA(split="train", size=20) valset = HotPotQA(split="val", size=50) testset = HotPotQA(split="test", size=50) @@ -388,7 +387,6 @@ def __init__(self, model_client, model_kwargs, passages_per_hop=3, max_hops=2): # TODO: the error will be a context # a component wont handle training, forward or backward, just passing everything through def call(self, question: str, id: str = None) -> Union[Parameter, str]: - # normal component, will be called when in inference mode question_param = Parameter( @@ -438,7 +436,6 @@ def call(self, question: str, id: str = None) -> Union[Parameter, str]: if ( not self.training ): # if users want to customize the output, ensure to use if not self.training - # convert the generator output to a normal data format print(f"converting output: {output}") @@ -492,7 +489,6 @@ def handle_one_loss_sample( } def configure_optimizers(self, *args, **kwargs): - # TODO: simplify this, make it accept generator parameters = [] for name, param in self.task.named_parameters(): @@ -504,7 +500,6 @@ def configure_optimizers(self, *args, **kwargs): def evaluate_one_sample( self, sample: Any, y_pred: Any, metadata: Dict[str, Any] ) -> Any: - # we need "context" be passed as metadata # print(f"sample: {sample}, y_pred: {y_pred}") # convert pred to Dspy structure @@ -545,7 +540,6 @@ def validate_dspy_demos( def test_multi_hop_retriever(): - from use_cases.config import ( gpt_3_model, ) diff --git a/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py b/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py index 7e66ca9b..78ef6355 100644 --- a/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py +++ b/benchmarks/hotpot_qa/adal_exp/build_vanilla_rag.py @@ -21,7 +21,6 @@ def load_datasets(): - trainset = HotPotQA(split="train", size=20) valset = HotPotQA(split="val", size=50) testset = HotPotQA(split="test", size=50) @@ -109,7 +108,6 @@ def __init__(self, top_k: int = 3): self.dspy_retriever = dspy.Retrieve(k=top_k) def call(self, input: str, top_k: Optional[int] = None) -> List[RetrieverOutput]: - k = top_k or self.top_k output = self.dspy_retriever(query_or_queries=input, k=k) @@ -255,7 +253,6 @@ def test_retriever(): def test_vailla_rag(): - from use_cases.config import ( gpt_3_model, ) diff --git a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py index b6cfe9e6..82dc23da 100644 --- a/benchmarks/hotpot_qa/adal_exp/train_vanilla.py +++ b/benchmarks/hotpot_qa/adal_exp/train_vanilla.py @@ -82,7 +82,6 @@ def train_diagnose( model_client: adal.ModelClient, model_kwargs: Dict, ) -> Dict: - trainset, valset, testset = load_datasets() adal_component = VallinaRAGAdal( diff --git a/benchmarks/hotpot_qa/adal_train.py b/benchmarks/hotpot_qa/adal_train.py index 4162bc98..425632b1 100644 --- a/benchmarks/hotpot_qa/adal_train.py +++ b/benchmarks/hotpot_qa/adal_train.py @@ -21,7 +21,6 @@ def load_datasets(): - trainset = HotPotQA(split="train", size=20) valset = HotPotQA(split="val", size=50) testset = HotPotQA(split="test", size=50) # to keep the same as the dspy @@ -420,7 +419,6 @@ def __init__(self, model_client, model_kwargs, passages_per_hop=3, max_hops=2): # TODO: the error will be a context # a component wont handle training, forward or backward, just passing everything through def call(self, question: str, id: str = None) -> Union[Parameter, str]: - # normal component, will be called when in inference mode question_param = Parameter( @@ -470,7 +468,6 @@ def call(self, question: str, id: str = None) -> Union[Parameter, str]: if ( not self.training ): # if users want to customize the output, ensure to use if not self.training - # convert the generator output to a normal data format print(f"converting output: {output}") @@ -525,7 +522,6 @@ def handle_one_loss_sample( } def configure_optimizers(self, *args, **kwargs): - # TODO: simplify this, make it accept generator parameters = [] for name, param in self.task.named_parameters(): @@ -537,7 +533,6 @@ def configure_optimizers(self, *args, **kwargs): def evaluate_one_sample( self, sample: Any, y_pred: Any, metadata: Dict[str, Any] ) -> Any: - # we need "context" be passed as metadata # print(f"sample: {sample}, y_pred: {y_pred}") # convert pred to Dspy structure @@ -578,7 +573,6 @@ def validate_dspy_demos( def test_multi_hop_retriever(): - from use_cases.config import ( gpt_3_model, ) diff --git a/benchmarks/trec_classification/dspy_train_few_shot_boostrap.py b/benchmarks/trec_classification/dspy_train_few_shot_boostrap.py index 28c7c484..8f2e41ba 100644 --- a/benchmarks/trec_classification/dspy_train_few_shot_boostrap.py +++ b/benchmarks/trec_classification/dspy_train_few_shot_boostrap.py @@ -37,7 +37,6 @@ def __init__(self, passages_per_hop=3, max_hops=2): self.max_hops = max_hops def forward(self, question): - pred = self.generate_answer(question=question) return dspy.Prediction(answer=pred.answer) diff --git a/notebooks/adalflow_colab_template.ipynb b/notebooks/adalflow_colab_template.ipynb index 191bbf08..39715816 100644 --- a/notebooks/adalflow_colab_template.ipynb +++ b/notebooks/adalflow_colab_template.ipynb @@ -87,8 +87,8 @@ "\n", "\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", "\n", "print(\"API keys have been set.\")" ] diff --git a/notebooks/evaluation/adalflow_llm_eval.ipynb b/notebooks/evaluation/adalflow_llm_eval.ipynb index 5e903978..448215e3 100644 --- a/notebooks/evaluation/adalflow_llm_eval.ipynb +++ b/notebooks/evaluation/adalflow_llm_eval.ipynb @@ -95,7 +95,7 @@ "\n", "\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", "\n", "print(\"API keys have been set.\")" ] @@ -301,11 +301,11 @@ "source": [ "# without questions, and with customized judgement query\n", "\n", + "\n", "def compute_llm_as_judge_wo_questions():\n", " from adalflow.eval.llm_as_judge import LLMasJudge, DefaultLLMJudge\n", " from adalflow.components.model_client import OpenAIClient\n", "\n", - "\n", " llm_judge = DefaultLLMJudge(\n", " model_client=OpenAIClient(),\n", " model_kwargs={\n", @@ -413,7 +413,7 @@ " eval_rslt = llm_evaluator.compute(\n", " questions=questions, gt_answers=gt_answers, pred_answers=pred_answers\n", " )\n", - " print(eval_rslt)\n" + " print(eval_rslt)" ] }, { @@ -569,8 +569,12 @@ } ], "source": [ - "source=\"Paul Merson has restarted his row with Andros Townsend after the Tottenham midfielder was brought on with only seven minutes remaining in his team 's 0-0 draw with Burnley on Sunday . 'Just been watching the game , did you miss the coach ? # RubberDub # 7minutes , ' Merson put on Twitter . Merson initially angered Townsend for writing in his Sky Sports column that 'if Andros Townsend can get in ( the England team ) then it opens it up to anybody . ' Paul Merson had another dig at Andros Townsend after his appearance for Tottenham against Burnley Townsend was brought on in the 83rd minute for Tottenham as they drew 0-0 against Burnley Andros Townsend scores England 's equaliser in their 1-1 friendly draw with Italy in Turin on Tuesday night The former Arsenal man was proven wrong when Townsend hit a stunning equaliser for England against Italy and he duly admitted his mistake . 'It 's not as though I was watching hoping he would n't score for England , I 'm genuinely pleased for him and fair play to him \\u00e2\\u20ac\\u201c it was a great goal , ' Merson said . 'It 's just a matter of opinion , and my opinion was that he got pulled off after half an hour at Manchester United in front of Roy Hodgson , so he should n't have been in the squad . 'When I 'm wrong , I hold my hands up . I do n't have a problem with doing that - I 'll always be the first to admit when I 'm wrong . ' Townsend hit back at Merson on Twitter after scoring for England against Italy Sky Sports pundit Merson ( centre ) criticised Townsend 's call-up to the England squad last week Townsend hit back at Merson after netting for England in Turin on Wednesday , saying 'Not bad for a player that should be 'nowhere near the squad ' ay @ PaulMerse ? ' Any bad feeling between the pair seemed to have passed but Merson was unable to resist having another dig at Townsend after Tottenham drew at Turf Moor .\",\n", - "summary=\"Paul merson was brought on with only seven minutes remaining in his team 's 0-0 draw with burnley . Andros townsend scored the tottenham midfielder in the 89th minute . Paul merson had another dig at andros townsend after his appearance . The midfielder had been brought on to the england squad last week . Click here for all the latest arsenal news news .\",\n", + "source = (\n", + " \"Paul Merson has restarted his row with Andros Townsend after the Tottenham midfielder was brought on with only seven minutes remaining in his team 's 0-0 draw with Burnley on Sunday . 'Just been watching the game , did you miss the coach ? # RubberDub # 7minutes , ' Merson put on Twitter . Merson initially angered Townsend for writing in his Sky Sports column that 'if Andros Townsend can get in ( the England team ) then it opens it up to anybody . ' Paul Merson had another dig at Andros Townsend after his appearance for Tottenham against Burnley Townsend was brought on in the 83rd minute for Tottenham as they drew 0-0 against Burnley Andros Townsend scores England 's equaliser in their 1-1 friendly draw with Italy in Turin on Tuesday night The former Arsenal man was proven wrong when Townsend hit a stunning equaliser for England against Italy and he duly admitted his mistake . 'It 's not as though I was watching hoping he would n't score for England , I 'm genuinely pleased for him and fair play to him \\u00e2\\u20ac\\u201c it was a great goal , ' Merson said . 'It 's just a matter of opinion , and my opinion was that he got pulled off after half an hour at Manchester United in front of Roy Hodgson , so he should n't have been in the squad . 'When I 'm wrong , I hold my hands up . I do n't have a problem with doing that - I 'll always be the first to admit when I 'm wrong . ' Townsend hit back at Merson on Twitter after scoring for England against Italy Sky Sports pundit Merson ( centre ) criticised Townsend 's call-up to the England squad last week Townsend hit back at Merson after netting for England in Turin on Wednesday , saying 'Not bad for a player that should be 'nowhere near the squad ' ay @ PaulMerse ? ' Any bad feeling between the pair seemed to have passed but Merson was unable to resist having another dig at Townsend after Tottenham drew at Turf Moor .\",\n", + ")\n", + "summary = (\n", + " \"Paul merson was brought on with only seven minutes remaining in his team 's 0-0 draw with burnley . Andros townsend scored the tottenham midfielder in the 89th minute . Paul merson had another dig at andros townsend after his appearance . The midfielder had been brought on to the england squad last week . Click here for all the latest arsenal news news .\",\n", + ")\n", "\n", "compute_g_eval_summarization(source=source, summary=summary)" ] diff --git a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb index ac7e3cbf..1cfd3956 100644 --- a/notebooks/qas/adalflow_object_count_auto_optimization.ipynb +++ b/notebooks/qas/adalflow_object_count_auto_optimization.ipynb @@ -137,12 +137,14 @@ "\n", "# Prompt user to enter their API keys securely\n", "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "groq_api_key = getpass(\"Please enter your GROQ API key, simplly press Enter if you don't have one: \")\n", + "groq_api_key = getpass(\n", + " \"Please enter your GROQ API key, simplly press Enter if you don't have one: \"\n", + ")\n", "\n", "\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", "\n", "print(\"API keys have been set.\")" ] @@ -209,6 +211,7 @@ "\n", "\"\"\"\n", "\n", + "\n", "class ObjectCountTaskPipeline(adal.Component):\n", " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n", " super().__init__()\n", @@ -242,9 +245,7 @@ " self, question: str, id: str = None\n", " ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n", " output = self.llm_counter(prompt_kwargs={\"input_str\": question}, id=id)\n", - " return output\n", - "\n", - "\n" + " return output" ] }, { @@ -329,44 +330,42 @@ "from adalflow.components.model_client.groq_client import GroqAPIClient\n", "\n", "\n", - "if len(os.environ['OPENAI_API_KEY']) > 1:\n", - " gpt_3_model = {\n", - " \"model_client\": OpenAIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"gpt-3.5-turbo\",\n", - " \"max_tokens\": 2000,\n", - " \"temperature\": 0.0,\n", - " \"top_p\": 0.99,\n", - " \"frequency_penalty\": 0,\n", - " \"presence_penalty\": 0,\n", - " \"stop\": None,\n", - " },\n", - " }\n", - " gpt_4o_model = {\n", - " \"model_client\": OpenAIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"gpt-4o\",\n", - " \"max_tokens\": 4000,\n", - " \"temperature\": 0.0,\n", - " \"top_p\": 0.99,\n", - " \"frequency_penalty\": 0,\n", - " \"presence_penalty\": 0,\n", - " \"stop\": None,\n", - " },\n", - " }\n", + "if len(os.environ[\"OPENAI_API_KEY\"]) > 1:\n", + " gpt_3_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"max_tokens\": 2000,\n", + " \"temperature\": 0.0,\n", + " \"top_p\": 0.99,\n", + " \"frequency_penalty\": 0,\n", + " \"presence_penalty\": 0,\n", + " \"stop\": None,\n", + " },\n", + " }\n", + " gpt_4o_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-4o\",\n", + " \"max_tokens\": 4000,\n", + " \"temperature\": 0.0,\n", + " \"top_p\": 0.99,\n", + " \"frequency_penalty\": 0,\n", + " \"presence_penalty\": 0,\n", + " \"stop\": None,\n", + " },\n", + " }\n", "\n", - "if len(os.environ['GROQ_API_KEY']) > 1:\n", - " llama_3_1_model ={\n", - " \"model_client\": GroqAPIClient(),\n", - " \"model_kwargs\": {\n", - " \"model\": \"llama-3.1-8b-instant\"\n", - " }\n", - " }\n", + "if len(os.environ[\"GROQ_API_KEY\"]) > 1:\n", + " llama_3_1_model = {\n", + " \"model_client\": GroqAPIClient(),\n", + " \"model_kwargs\": {\"model\": \"llama-3.1-8b-instant\"},\n", + " }\n", "\n", "\n", "question = \"I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?\"\n", "task_pipeline = ObjectCountTaskPipeline(**gpt_3_model)\n", - "print(task_pipeline)\n" + "print(task_pipeline)" ] }, { @@ -467,6 +466,7 @@ "from adalflow.datasets.big_bench_hard import BigBenchHard\n", "from adalflow.utils.data import subset_dataset\n", "\n", + "\n", "def load_datasets(max_samples: int = None):\n", " \"\"\"Load the dataset\"\"\"\n", " train_data = BigBenchHard(split=\"train\")\n", @@ -479,7 +479,7 @@ " val_data = subset_dataset(val_data, max_samples)\n", " test_data = subset_dataset(test_data, max_samples)\n", "\n", - " return train_data, val_data, test_data\n" + " return train_data, val_data, test_data" ] }, { @@ -583,11 +583,11 @@ " def prepare_task(self, sample: Example):\n", " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", "\n", - " def prepare_eval(\n", - " self, sample: Example, y_pred: adal.GeneratorOutput\n", - " ) -> float:\n", + " def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:\n", " y_label = -1\n", - " if (y_pred is not None and y_pred.data is not None): # if y_pred and y_pred.data: might introduce bug when the data is 0\n", + " if (\n", + " y_pred is not None and y_pred.data is not None\n", + " ): # if y_pred and y_pred.data: might introduce bug when the data is 0\n", " y_label = y_pred.data\n", " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}" ] @@ -604,7 +604,6 @@ " model_client: adal.ModelClient,\n", " model_kwargs: Dict,\n", ") -> Dict:\n", - "\n", " trainset, valset, testset = load_datasets()\n", " # use max_samples=10 to test the code\n", "\n", @@ -820,7 +819,7 @@ "from adalflow.datasets.types import Example\n", "\n", "\n", - "class ObjectCountAdalComponent(adal.AdalComponent):# noqa: F811\n", + "class ObjectCountAdalComponent(adal.AdalComponent): # noqa: F811\n", " def __init__(\n", " self,\n", " model_client: adal.ModelClient,\n", @@ -844,12 +843,11 @@ " def prepare_task(self, sample: Example):\n", " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", "\n", - "\n", - " def prepare_eval(\n", - " self, sample: Example, y_pred: adal.GeneratorOutput\n", - " ) -> float:\n", + " def prepare_eval(self, sample: Example, y_pred: adal.GeneratorOutput) -> float:\n", " y_label = -1\n", - " if (y_pred is not None and y_pred.data is not None): # if y_pred and y_pred.data: might introduce bug when the data is 0\n", + " if (\n", + " y_pred is not None and y_pred.data is not None\n", + " ): # if y_pred and y_pred.data: might introduce bug when the data is 0\n", " y_label = y_pred.data\n", " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}\n", "\n", @@ -891,7 +889,7 @@ " **gpt_3_model,\n", " teacher_model_config=gpt_4o_model,\n", " text_optimizer_model_config=gpt_4o_model,\n", - " backward_engine_model_config=gpt_4o_model\n", + " backward_engine_model_config=gpt_4o_model,\n", " )\n", " print(adal_component)\n", " trainer = adal.Trainer(\n", @@ -916,7 +914,7 @@ " test_dataset=test_dataset,\n", " debug=debug,\n", " resume_from_ckpt=resume_from_ckpt,\n", - " )\n" + " )" ] }, { @@ -3255,10 +3253,14 @@ } ], "source": [ - "train(debug=False, max_steps=12, strategy=\"constrained\",\n", - " raw_shots=0, bootstrap_shots=1,\n", - " exclude_input_fields_from_bootstrap_demos=True\n", - " )" + "train(\n", + " debug=False,\n", + " max_steps=12,\n", + " strategy=\"constrained\",\n", + " raw_shots=0,\n", + " bootstrap_shots=1,\n", + " exclude_input_fields_from_bootstrap_demos=True,\n", + ")" ] }, { @@ -6015,13 +6017,17 @@ } ], "source": [ - "\n", "ckpt_path = \"/content/adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_4e8a1_run_1.json\"\n", "\n", - "train(debug=False, max_steps=12, strategy=\"constrained\",\n", - " raw_shots=0, bootstrap_shots=1,\n", - " resume_from_ckpt=ckpt_path,\n", - " exclude_input_fields_from_bootstrap_demos=True)" + "train(\n", + " debug=False,\n", + " max_steps=12,\n", + " strategy=\"constrained\",\n", + " raw_shots=0,\n", + " bootstrap_shots=1,\n", + " resume_from_ckpt=ckpt_path,\n", + " exclude_input_fields_from_bootstrap_demos=True,\n", + ")" ] }, { @@ -8038,11 +8044,15 @@ } ], "source": [ - "\n", - "train(debug=False, max_steps=12, strategy=\"random\",\n", - " raw_shots=0, bootstrap_shots=1,\n", - " resume_from_ckpt=ckpt_path,\n", - " exclude_input_fields_from_bootstrap_demos=False)" + "train(\n", + " debug=False,\n", + " max_steps=12,\n", + " strategy=\"random\",\n", + " raw_shots=0,\n", + " bootstrap_shots=1,\n", + " resume_from_ckpt=ckpt_path,\n", + " exclude_input_fields_from_bootstrap_demos=False,\n", + ")" ] }, { diff --git a/notebooks/tutorials/adalflow_component.ipynb b/notebooks/tutorials/adalflow_component.ipynb index 2da8aa78..dbc0183d 100644 --- a/notebooks/tutorials/adalflow_component.ipynb +++ b/notebooks/tutorials/adalflow_component.ipynb @@ -1,985 +1,989 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 🤗 Welcome to AdalFlow!\n", - "## The library to build & auto-optimize any LLM task pipelines\n", - "\n", - "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ Star us on Github ⭐\n", - "\n", - "\n", - "# Quick Links\n", - "\n", - "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", - "\n", - "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", - "\n", - "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", - "\n", - "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", - "\n", - "# Author\n", - "\n", - "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n", - "\n", - "# Outline\n", - "\n", - "This is a quick introduction of what AdalFlow is capable of. We will cover:\n", - "\n", - "* How to use `DataClass` with `DataClassParser`.\n", - "* How to do nested dataclass, we will test both one and two levels of nesting.\n", - "\n", - "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n", - "\n", - "\n", - "# Installation\n", - "\n", - "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n", - "\n", - " ```bash\n", - " pip install adalflow[openai,groq]\n", - " ```\n", - "2. Setup `openai` and `groq` API key in the environment variables" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "Ab_OmE6XTl4h" - }, - "outputs": [], - "source": [ - "from IPython.display import clear_output\n", - "\n", - "!pip install -U adalflow[openai,groq,datasets]\n", - "\n", - "clear_output()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "PbAIsBeeTQUk" - }, - "outputs": [], - "source": [ - "import re\n", - "from adalflow.core import Component, Generator\n", - "from adalflow.components.model_client import OpenAIClient\n", - "from adalflow.components.model_client import GroqAPIClient\n", - "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "kRymwpwHTQUm", - "outputId": "6a992f52-1661-4002-ef74-ed26938c6baa" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Please enter your OpenAI API key: ··········\n", - "API keys have been set.\n" - ] - } - ], - "source": [ - "from getpass import getpass\n", - "import os\n", - "\n", - "# Prompt user to enter their API keys securely\n", - "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", - "\n", - "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", - "\n", - "print(\"API keys have been set.\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "czGDvnVUTQUm" - }, - "outputs": [], - "source": [ - "template_doc = r\"\"\" You are a doctor User: {{input_str}}\"\"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PPs3gHqeTQUn" - }, - "source": [ - "Let's turn on the library log to help with debugging." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "98QNsOcSTQUn", - "outputId": "d63cba1b-6087-4b04-bb2b-0a9d9d4500a5" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from adalflow.utils import get_logger\n", - "get_logger()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "b3ey1lozTQUo" - }, - "outputs": [], - "source": [ - "#Toy example\n", - "\n", - "class DocQA(Component):\n", - " def __init__(self):\n", - " super(DocQA, self).__init__()\n", - " self.doc = Generator(\n", - " template=template_doc,\n", - " model_client=OpenAIClient(),\n", - " model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n", - " )\n", - "\n", - " def call(self, query: str) -> str:\n", - " return self.doc(prompt_kwargs={\"input_str\": query}).data\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "TZAHSrbUTQUo", - "outputId": "66e81fb3-17f9-4570-dbbd-681cad1afc65" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:40:52 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['input_str']\n", - "2024-11-11 17:40:52 - generator - INFO - [generator.py:144:__init__] - Generator Generator initialized.\n" - ] - } - ], - "source": [ - "doc = DocQA()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "f-y6l44PTQUp", - "outputId": "e24aabd5-d758-4700-fa0d-46b66a88c412" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'type': 'DocQA', 'data': {'_components': {'_ordered_dict': True, 'data': [('doc', {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': , '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': .default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'DocQA', '_init_args': {}}}\n" - ] - }, - { - "data": { - "text/plain": [ - "{'_components': OrderedDict([('doc',\n", - " Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - " ))]),\n", - " '_parameters': OrderedDict(),\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'DocQA',\n", - " '_init_args': {}}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# states\n", - "states = doc.to_dict()\n", - "print(states)\n", - "doc.__dict__" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🤗 Welcome to AdalFlow!\n", + "## The library to build & auto-optimize any LLM task pipelines\n", + "\n", + "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ Star us on Github ⭐\n", + "\n", + "\n", + "# Quick Links\n", + "\n", + "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", + "\n", + "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", + "\n", + "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", + "\n", + "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", + "\n", + "# Author\n", + "\n", + "This notebook was created by community contributor [Ajith](https://github.com/ajithvcoder).\n", + "\n", + "# Outline\n", + "\n", + "This is a quick introduction of what AdalFlow is capable of. We will cover:\n", + "\n", + "* How to use `DataClass` with `DataClassParser`.\n", + "* How to do nested dataclass, we will test both one and two levels of nesting.\n", + "\n", + "**Next: Try our [auto-optimization](https://colab.research.google.com/drive/1n3mHUWekTEYHiBdYBTw43TKlPN41A9za?usp=sharing)**\n", + "\n", + "\n", + "# Installation\n", + "\n", + "1. Use `pip` to install the `adalflow` Python package. We will need `openai` and `groq`from the extra packages.\n", + "\n", + " ```bash\n", + " pip install adalflow[openai,groq]\n", + " ```\n", + "2. Setup `openai` and `groq` API key in the environment variables" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "Ab_OmE6XTl4h" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,datasets]\n", + "\n", + "clear_output()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "PbAIsBeeTQUk" + }, + "outputs": [], + "source": [ + "from adalflow.core import Component, Generator\n", + "from adalflow.components.model_client import OpenAIClient" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "kRymwpwHTQUm", + "outputId": "6a992f52-1661-4002-ef74-ed26938c6baa" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "z_sH59_bTQUp" - }, - "source": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "Please enter your OpenAI API key: ··········\n", + "API keys have been set.\n" + ] + } + ], + "source": [ + "from getpass import getpass\n", + "import os\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "czGDvnVUTQUm" + }, + "outputs": [], + "source": [ + "template_doc = r\"\"\" You are a doctor User: {{input_str}}\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PPs3gHqeTQUn" + }, + "source": [ + "Let's turn on the library log to help with debugging." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "98QNsOcSTQUn", + "outputId": "d63cba1b-6087-4b04-bb2b-0a9d9d4500a5" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "P81kIS2qTQUp", - "outputId": "d8e0e398-d704-4a85-8692-66a8c570b910" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Generator, {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': , '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': .default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}}\n", - "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Prompt, {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}\n", - "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}\n", - "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}\n" - ] - }, - { - "data": { - "text/plain": [ - "{'_components': OrderedDict([('doc',\n", - " Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - " ))]),\n", - " '_parameters': OrderedDict(),\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'DocQA',\n", - " '_init_args': {}}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# restore the states\n", - "doc2 = DocQA.from_dict(states)\n", - "# print(doc2.call(\"What is the capital of France?\"))\n", - "doc2.__dict__\n", - "# doc2.to_dict()" + "data": { + "text/plain": [ + "" ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from adalflow.utils import get_logger\n", + "\n", + "get_logger()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "b3ey1lozTQUo" + }, + "outputs": [], + "source": [ + "# Toy example\n", + "\n", + "\n", + "class DocQA(Component):\n", + " def __init__(self):\n", + " super(DocQA, self).__init__()\n", + " self.doc = Generator(\n", + " template=template_doc,\n", + " model_client=OpenAIClient(),\n", + " model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n", + " )\n", + "\n", + " def call(self, query: str) -> str:\n", + " return self.doc(prompt_kwargs={\"input_str\": query}).data" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "TZAHSrbUTQUo", + "outputId": "66e81fb3-17f9-4570-dbbd-681cad1afc65" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "198xYpLGTQUp", - "outputId": "ffd33d12-6db0-45c2-dfb1-3d57460ad4c9" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'type': 'DocQA',\n", - " 'data': {'_components': {'_ordered_dict': True,\n", - " 'data': [('doc',\n", - " {'type': 'Generator',\n", - " 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n", - " 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n", - " 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n", - " 'cache': ,\n", - " '_components': {'_ordered_dict': True,\n", - " 'data': [('prompt',\n", - " {'type': 'Prompt',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'Prompt',\n", - " '_init_args': {'template': None, 'prompt_kwargs': {}},\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_variables': ['input_str'],\n", - " 'prompt_kwargs': {}}}),\n", - " ('model_client',\n", - " {'type': 'OpenAIClient',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'OpenAIClient',\n", - " '_init_args': {'api_key': None,\n", - " 'chat_completion_parser': None,\n", - " 'input_type': 'text'},\n", - " '_api_key': None,\n", - " 'chat_completion_parser': str>,\n", - " '_input_type': 'text'}})]},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'Generator',\n", - " '_init_args': {'model_client': None,\n", - " 'model_kwargs': {},\n", - " 'template': None,\n", - " 'prompt_kwargs': {},\n", - " 'output_processors': None,\n", - " 'name': None,\n", - " 'cache_path': None,\n", - " 'use_cache': False},\n", - " 'backward_engine': None,\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_kwargs': {},\n", - " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", - " 'output_processors': None,\n", - " 'mock_output': False,\n", - " 'mock_output_data': 'mock data',\n", - " 'data_map_func': .default_map_func(data: 'GeneratorOutputType') -> str>,\n", - " '_use_cache': False,\n", - " '_kwargs': {'model_client': {'type': 'OpenAIClient',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'OpenAIClient',\n", - " '_init_args': {'api_key': None,\n", - " 'chat_completion_parser': None,\n", - " 'input_type': 'text'},\n", - " '_api_key': None,\n", - " 'chat_completion_parser': str>,\n", - " '_input_type': 'text'}},\n", - " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_kwargs': {},\n", - " 'output_processors': None,\n", - " 'name': None,\n", - " 'cache_path': None,\n", - " 'use_cache': False},\n", - " '_teacher': None}})]},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'DocQA',\n", - " '_init_args': {}}}" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc2.to_dict() == doc.to_dict()\n", - "doc2.to_dict()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:40:52 - prompt_builder - INFO - [prompt_builder.py:65:__init__] - Prompt has variables: ['input_str']\n", + "2024-11-11 17:40:52 - generator - INFO - [generator.py:144:__init__] - Generator Generator initialized.\n" + ] + } + ], + "source": [ + "doc = DocQA()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "f-y6l44PTQUp", + "outputId": "e24aabd5-d758-4700-fa0d-46b66a88c412" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Ulb1OWxxTQUq", - "outputId": "99972fcd-ed52-43b4-e461-a76c19bd9522" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:41:29 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?'}]}\n", - "2024-11-11 17:41:30 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "2024-11-11 17:41:30 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', metadata=None)\n", - "As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.\n" - ] - } - ], - "source": [ - "print(doc(\"What is the best treatment for headache?\"))" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "{'type': 'DocQA', 'data': {'_components': {'_ordered_dict': True, 'data': [('doc', {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': , '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': .default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'DocQA', '_init_args': {}}}\n" + ] }, { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "POVal8CgTQUq", - "outputId": "2fadb1d6-b858-4964-9045-8ea7454178e3" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:41:35 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?'}]}\n", - "2024-11-11 17:41:36 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "2024-11-11 17:41:36 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', metadata=None)\n", - "As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.\n" - ] - } - ], - "source": [ - "print(doc2(\"What is the best treatment for headache?\"))" + "data": { + "text/plain": [ + "{'_components': OrderedDict([('doc',\n", + " Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + " ))]),\n", + " '_parameters': OrderedDict(),\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'DocQA',\n", + " '_init_args': {}}" ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# states\n", + "states = doc.to_dict()\n", + "print(states)\n", + "doc.__dict__" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z_sH59_bTQUp" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "P81kIS2qTQUp", + "outputId": "d8e0e398-d704-4a85-8692-66a8c570b910" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "R5gTO1-8TQUr" - }, - "source": [] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Generator, {'type': 'Generator', 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo', 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'), 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []}, 'cache': , '_components': {'_ordered_dict': True, 'data': [('prompt', {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}), ('model_client', {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}})]}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Generator', '_init_args': {'model_client': None, 'model_kwargs': {}, 'template': None, 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, 'backward_engine': None, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'output_processors': None, 'mock_output': False, 'mock_output_data': 'mock data', 'data_map_func': .default_map_func at 0x7b8d471c97e0>, '_use_cache': False, '_kwargs': {'model_client': {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}, 'model_kwargs': {'model': 'gpt-3.5-turbo'}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_kwargs': {}, 'output_processors': None, 'name': None, 'cache_path': None, 'use_cache': False}, '_teacher': None}}\n", + "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict Prompt, {'type': 'Prompt', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'Prompt', '_init_args': {'template': None, 'prompt_kwargs': {}}, 'template': ' You are a doctor User: {{input_str}}', 'prompt_variables': ['input_str'], 'prompt_kwargs': {}}}\n", + "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}\n", + "2024-11-11 17:40:58 - component - INFO - [component.py:350:_restore_value] - Restoring class using from_dict OpenAIClient, {'type': 'OpenAIClient', 'data': {'_components': {'_ordered_dict': True, 'data': []}, '_parameters': {'_ordered_dict': True, 'data': []}, 'training': False, 'teacher_mode': False, 'tracing': False, 'name': 'OpenAIClient', '_init_args': {'api_key': None, 'chat_completion_parser': None, 'input_type': 'text'}, '_api_key': None, 'chat_completion_parser': , '_input_type': 'text'}}\n" + ] }, { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jhgSpKrMTQUr", - "outputId": "15615bf7-2b72-4ac7-d1fe-f436a7304734" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('', DocQA(\n", - " (doc): Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - " )\n", - "))\n", - "('doc', Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - "))\n", - "('doc.prompt', Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str']))\n", - "('doc.model_client', OpenAIClient())\n" - ] - } - ], - "source": [ - "# list other subcomponents\n", - "\n", - "for subcomponent in doc.named_components():\n", - " print(subcomponent)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XjIHAY6bTQUr" - }, - "source": [ - "Let's add a parameter" + "data": { + "text/plain": [ + "{'_components': OrderedDict([('doc',\n", + " Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + " ))]),\n", + " '_parameters': OrderedDict(),\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'DocQA',\n", + " '_init_args': {}}" ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# restore the states\n", + "doc2 = DocQA.from_dict(states)\n", + "# print(doc2.call(\"What is the capital of France?\"))\n", + "doc2.__dict__\n", + "# doc2.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "198xYpLGTQUp", + "outputId": "ffd33d12-6db0-45c2-dfb1-3d57460ad4c9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "id": "vxgjAUiFTQUr" - }, - "outputs": [], - "source": [ - "from adalflow.optim.parameter import Parameter\n", - "\n", - "doc.register_parameter(\"demo\", param=Parameter(data=\"demo\"))" + "data": { + "text/plain": [ + "{'type': 'DocQA',\n", + " 'data': {'_components': {'_ordered_dict': True,\n", + " 'data': [('doc',\n", + " {'type': 'Generator',\n", + " 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n", + " 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n", + " 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n", + " 'cache': ,\n", + " '_components': {'_ordered_dict': True,\n", + " 'data': [('prompt',\n", + " {'type': 'Prompt',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'Prompt',\n", + " '_init_args': {'template': None, 'prompt_kwargs': {}},\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_variables': ['input_str'],\n", + " 'prompt_kwargs': {}}}),\n", + " ('model_client',\n", + " {'type': 'OpenAIClient',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'OpenAIClient',\n", + " '_init_args': {'api_key': None,\n", + " 'chat_completion_parser': None,\n", + " 'input_type': 'text'},\n", + " '_api_key': None,\n", + " 'chat_completion_parser': str>,\n", + " '_input_type': 'text'}})]},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'Generator',\n", + " '_init_args': {'model_client': None,\n", + " 'model_kwargs': {},\n", + " 'template': None,\n", + " 'prompt_kwargs': {},\n", + " 'output_processors': None,\n", + " 'name': None,\n", + " 'cache_path': None,\n", + " 'use_cache': False},\n", + " 'backward_engine': None,\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_kwargs': {},\n", + " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", + " 'output_processors': None,\n", + " 'mock_output': False,\n", + " 'mock_output_data': 'mock data',\n", + " 'data_map_func': .default_map_func(data: 'GeneratorOutputType') -> str>,\n", + " '_use_cache': False,\n", + " '_kwargs': {'model_client': {'type': 'OpenAIClient',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'OpenAIClient',\n", + " '_init_args': {'api_key': None,\n", + " 'chat_completion_parser': None,\n", + " 'input_type': 'text'},\n", + " '_api_key': None,\n", + " 'chat_completion_parser': str>,\n", + " '_input_type': 'text'}},\n", + " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_kwargs': {},\n", + " 'output_processors': None,\n", + " 'name': None,\n", + " 'cache_path': None,\n", + " 'use_cache': False},\n", + " '_teacher': None}})]},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'DocQA',\n", + " '_init_args': {}}}" ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc2.to_dict() == doc.to_dict()\n", + "doc2.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "Ulb1OWxxTQUq", + "outputId": "99972fcd-ed52-43b4-e461-a76c19bd9522" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "86C-h1e1TQUr", - "outputId": "57cab4d0-eddf-433d-e364-5d7f07072fbf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "('demo', Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[], raw_response=None, input_args=None, traces={}))\n" - ] - } - ], - "source": [ - "# list all parameters\n", - "for param in doc.named_parameters():\n", - " print(param)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:41:29 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?'}]}\n", + "2024-11-11 17:41:30 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2024-11-11 17:41:30 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.', metadata=None)\n", + "As a doctor, the best treatment for a headache depends on the underlying cause of the headache. In general, for tension headaches or migraines, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help alleviate symptoms. It is also important to rest in a quiet, dark room and stay hydrated. If headaches are frequent or severe, it is important to consult with a healthcare provider for further evaluation and treatment options.\n" + ] + } + ], + "source": [ + "print(doc(\"What is the best treatment for headache?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "POVal8CgTQUq", + "outputId": "2fadb1d6-b858-4964-9045-8ea7454178e3" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_s2MPukiTQUr", - "outputId": "b51c7d09-fb52-42d9-b2d5-4f44f5d22dc9" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'type': 'DocQA',\n", - " 'data': {'_components': {'_ordered_dict': True,\n", - " 'data': [('doc',\n", - " {'type': 'Generator',\n", - " 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n", - " 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n", - " 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n", - " 'cache': ,\n", - " '_components': {'_ordered_dict': True,\n", - " 'data': [('prompt',\n", - " {'type': 'Prompt',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'Prompt',\n", - " '_init_args': {'template': None, 'prompt_kwargs': {}},\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_variables': ['input_str'],\n", - " 'prompt_kwargs': {}}}),\n", - " ('model_client',\n", - " {'type': 'OpenAIClient',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'OpenAIClient',\n", - " '_init_args': {'api_key': None,\n", - " 'chat_completion_parser': None,\n", - " 'input_type': 'text'},\n", - " '_api_key': None,\n", - " 'chat_completion_parser': str>,\n", - " '_input_type': 'text'}})]},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'Generator',\n", - " '_init_args': {'model_client': None,\n", - " 'model_kwargs': {},\n", - " 'template': None,\n", - " 'prompt_kwargs': {},\n", - " 'output_processors': None,\n", - " 'name': None,\n", - " 'cache_path': None,\n", - " 'use_cache': False},\n", - " 'backward_engine': None,\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_kwargs': {},\n", - " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", - " 'output_processors': None,\n", - " 'mock_output': False,\n", - " 'mock_output_data': 'mock data',\n", - " 'data_map_func': .default_map_func(data: 'GeneratorOutputType') -> str>,\n", - " '_use_cache': False,\n", - " '_kwargs': {'model_client': {'type': 'OpenAIClient',\n", - " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", - " '_parameters': {'_ordered_dict': True, 'data': []},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'OpenAIClient',\n", - " '_init_args': {'api_key': None,\n", - " 'chat_completion_parser': None,\n", - " 'input_type': 'text'},\n", - " '_api_key': None,\n", - " 'chat_completion_parser': str>,\n", - " '_input_type': 'text'}},\n", - " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", - " 'template': ' You are a doctor User: {{input_str}}',\n", - " 'prompt_kwargs': {},\n", - " 'output_processors': None,\n", - " 'name': None,\n", - " 'cache_path': None,\n", - " 'use_cache': False},\n", - " '_teacher': None}})]},\n", - " '_parameters': {'_ordered_dict': True,\n", - " 'data': [('demo',\n", - " {'name': 'param_313f196d-3c48-4eb3-8138-b7bd74298fbd',\n", - " 'role_desc': '',\n", - " 'data': 'demo',\n", - " 'requires_opt': True,\n", - " 'param_type': 'none ()',\n", - " 'predecessors': [],\n", - " 'gradients': [],\n", - " 'previous_data': None,\n", - " 'gradients_context': [],\n", - " 'grad_fn': 'None',\n", - " 'gradient_prompt': 'None',\n", - " 'raw_response': None,\n", - " 'score': None,\n", - " 'traces': {},\n", - " 'input_args': None,\n", - " 'demos': []})]},\n", - " 'training': False,\n", - " 'teacher_mode': False,\n", - " 'tracing': False,\n", - " 'name': 'DocQA',\n", - " '_init_args': {}}}" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc.to_dict()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:41:35 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?'}]}\n", + "2024-11-11 17:41:36 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2024-11-11 17:41:36 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=92, prompt_tokens=27, total_tokens=119), raw_response='As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.', metadata=None)\n", + "As a doctor, the best treatment for a headache will depend on the underlying cause of the headache. In general, over-the-counter pain medications such as acetaminophen, ibuprofen, or aspirin can help relieve mild to moderate headaches. It is also important to stay hydrated, get adequate rest, manage stress, and practice good posture. If the headache persists or is severe, it is important to see a healthcare provider for further evaluation and treatment.\n" + ] + } + ], + "source": [ + "print(doc2(\"What is the best treatment for headache?\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R5gTO1-8TQUr" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "jhgSpKrMTQUr", + "outputId": "15615bf7-2b72-4ac7-d1fe-f436a7304734" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "id": "mcIO1DuVTQUr" - }, - "outputs": [], - "source": [ - "from adalflow.utils.file_io import save_json\n", - "\n", - "save_json(doc.to_dict(), \"doc.json\")" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "('', DocQA(\n", + " (doc): Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + " )\n", + "))\n", + "('doc', Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + "))\n", + "('doc.prompt', Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str']))\n", + "('doc.model_client', OpenAIClient())\n" + ] + } + ], + "source": [ + "# list other subcomponents\n", + "\n", + "for subcomponent in doc.named_components():\n", + " print(subcomponent)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XjIHAY6bTQUr" + }, + "source": [ + "Let's add a parameter" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "vxgjAUiFTQUr" + }, + "outputs": [], + "source": [ + "from adalflow.optim.parameter import Parameter\n", + "\n", + "doc.register_parameter(\"demo\", param=Parameter(data=\"demo\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "86C-h1e1TQUr", + "outputId": "57cab4d0-eddf-433d-e364-5d7f07072fbf" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "0vvO0nogTQUr", - "outputId": "59131d9e-a996-4c8b-f32c-9a6a623d3db6" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "OrderedDict([('demo',\n", - " Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[], raw_response=None, input_args=None, traces={}))])" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc.state_dict()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "('demo', Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[], raw_response=None, input_args=None, traces={}))\n" + ] + } + ], + "source": [ + "# list all parameters\n", + "for param in doc.named_parameters():\n", + " print(param)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "_s2MPukiTQUr", + "outputId": "b51c7d09-fb52-42d9-b2d5-4f44f5d22dc9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 125 - }, - "id": "uroqi93tTQUs", - "outputId": "8a3e4ecc-1368-475b-dc4d-2ff38821b8ac" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:42:18 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for a cold?'}]}\n", - "2024-11-11 17:42:19 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "2024-11-11 17:42:19 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=85, prompt_tokens=28, total_tokens=113), raw_response='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', metadata=None)\n" - ] - }, - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - }, - "text/plain": [ - "'As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.'" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "doc.call(\"What is the best treatment for a cold?\")" + "data": { + "text/plain": [ + "{'type': 'DocQA',\n", + " 'data': {'_components': {'_ordered_dict': True,\n", + " 'data': [('doc',\n", + " {'type': 'Generator',\n", + " 'data': {'model_str': 'OpenAIClient_gpt-3_5-turbo',\n", + " 'cache_path': PosixPath('/root/.adalflow/cache_OpenAIClient_gpt-3_5-turbo.db'),\n", + " 'callbacks': {'on_success': [], 'on_failure': [], 'on_complete': []},\n", + " 'cache': ,\n", + " '_components': {'_ordered_dict': True,\n", + " 'data': [('prompt',\n", + " {'type': 'Prompt',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'Prompt',\n", + " '_init_args': {'template': None, 'prompt_kwargs': {}},\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_variables': ['input_str'],\n", + " 'prompt_kwargs': {}}}),\n", + " ('model_client',\n", + " {'type': 'OpenAIClient',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'OpenAIClient',\n", + " '_init_args': {'api_key': None,\n", + " 'chat_completion_parser': None,\n", + " 'input_type': 'text'},\n", + " '_api_key': None,\n", + " 'chat_completion_parser': str>,\n", + " '_input_type': 'text'}})]},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'Generator',\n", + " '_init_args': {'model_client': None,\n", + " 'model_kwargs': {},\n", + " 'template': None,\n", + " 'prompt_kwargs': {},\n", + " 'output_processors': None,\n", + " 'name': None,\n", + " 'cache_path': None,\n", + " 'use_cache': False},\n", + " 'backward_engine': None,\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_kwargs': {},\n", + " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", + " 'output_processors': None,\n", + " 'mock_output': False,\n", + " 'mock_output_data': 'mock data',\n", + " 'data_map_func': .default_map_func(data: 'GeneratorOutputType') -> str>,\n", + " '_use_cache': False,\n", + " '_kwargs': {'model_client': {'type': 'OpenAIClient',\n", + " 'data': {'_components': {'_ordered_dict': True, 'data': []},\n", + " '_parameters': {'_ordered_dict': True, 'data': []},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'OpenAIClient',\n", + " '_init_args': {'api_key': None,\n", + " 'chat_completion_parser': None,\n", + " 'input_type': 'text'},\n", + " '_api_key': None,\n", + " 'chat_completion_parser': str>,\n", + " '_input_type': 'text'}},\n", + " 'model_kwargs': {'model': 'gpt-3.5-turbo'},\n", + " 'template': ' You are a doctor User: {{input_str}}',\n", + " 'prompt_kwargs': {},\n", + " 'output_processors': None,\n", + " 'name': None,\n", + " 'cache_path': None,\n", + " 'use_cache': False},\n", + " '_teacher': None}})]},\n", + " '_parameters': {'_ordered_dict': True,\n", + " 'data': [('demo',\n", + " {'name': 'param_313f196d-3c48-4eb3-8138-b7bd74298fbd',\n", + " 'role_desc': '',\n", + " 'data': 'demo',\n", + " 'requires_opt': True,\n", + " 'param_type': 'none ()',\n", + " 'predecessors': [],\n", + " 'gradients': [],\n", + " 'previous_data': None,\n", + " 'gradients_context': [],\n", + " 'grad_fn': 'None',\n", + " 'gradient_prompt': 'None',\n", + " 'raw_response': None,\n", + " 'score': None,\n", + " 'traces': {},\n", + " 'input_args': None,\n", + " 'demos': []})]},\n", + " 'training': False,\n", + " 'teacher_mode': False,\n", + " 'tracing': False,\n", + " 'name': 'DocQA',\n", + " '_init_args': {}}}" ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "mcIO1DuVTQUr" + }, + "outputs": [], + "source": [ + "from adalflow.utils.file_io import save_json\n", + "\n", + "save_json(doc.to_dict(), \"doc.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "0vvO0nogTQUr", + "outputId": "59131d9e-a996-4c8b-f32c-9a6a623d3db6" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mYSDr462TQUs", - "outputId": "82414c82-8feb-4667-90ed-91c594cc6a73" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "\n" - ] - } - ], - "source": [ - "from adalflow.core.component import FunComponent\n", - "\n", - "def add_one(x):\n", - " return x + 1\n", - "\n", - "fun_component = FunComponent(add_one)\n", - "print(fun_component(1))\n", - "print(type(fun_component))\n", - "\n", - "# output:\n", - "# 2\n", - "# " + "data": { + "text/plain": [ + "OrderedDict([('demo',\n", + " Parameter(name=param_313f196d-3c48-4eb3-8138-b7bd74298fbd, requires_opt=True, param_type=none (), role_desc=, data=demo, predecessors=set(), gradients=[], raw_response=None, input_args=None, traces={}))])" ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc.state_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 125 }, + "id": "uroqi93tTQUs", + "outputId": "8a3e4ecc-1368-475b-dc4d-2ff38821b8ac" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "3MW1tpzRTQUs", - "outputId": "351b8922-1423-434a-f470-ff435a1962d2" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "\n" - ] - } - ], - "source": [ - "from adalflow.core.component import fun_to_component\n", - "\n", - "fun_component = fun_to_component(add_one)\n", - "print(fun_component(1))\n", - "print(type(fun_component))\n", - "\n", - "# output:\n", - "# 2\n", - "# " - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:42:18 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for a cold?'}]}\n", + "2024-11-11 17:42:19 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2024-11-11 17:42:19 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', error=None, usage=CompletionUsage(completion_tokens=85, prompt_tokens=28, total_tokens=113), raw_response='As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.', metadata=None)\n" + ] }, { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "dxAoGrnQTQUs", - "outputId": "38c462a3-5abf-41f4-9231-746c8d0ffcb3" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2\n", - "\n" - ] - } - ], - "source": [ - "# use it as a decorator\n", - "@fun_to_component\n", - "def add_one(x):\n", - " return x + 1\n", - "\n", - "print(add_one(1))\n", - "print(type(add_one))\n", - "\n", - "# output:\n", - "# 2\n", - "# " + "text/plain": [ + "'As a doctor, I recommend getting plenty of rest, staying hydrated, and taking over-the-counter medications like ibuprofen or acetaminophen to help relieve symptoms such as fever and congestion. Additionally, you can try using saline nasal sprays or lozenges to help soothe a sore throat. If your symptoms persist or worsen, it is best to consult with a healthcare provider for further evaluation and treatment.'" ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "doc.call(\"What is the best treatment for a cold?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "mYSDr462TQUs", + "outputId": "82414c82-8feb-4667-90ed-91c594cc6a73" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "7BvJEP_mTQUs", - "outputId": "066281b8-a650-4c48-c786-312022198015" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2024-11-11 17:42:39 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?Please be concise and only list the top treatments.'}]}\n", - "2024-11-11 17:42:40 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "2024-11-11 17:42:40 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', error=None, usage=CompletionUsage(completion_tokens=37, prompt_tokens=37, total_tokens=74), raw_response='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', metadata=None)\n", - "The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.\n" - ] - } - ], - "source": [ - "from adalflow.core import Sequential\n", - "\n", - "@fun_to_component\n", - "def enhance_query(query:str) -> str:\n", - " return query + \"Please be concise and only list the top treatments.\"\n", - "\n", - "seq = Sequential(enhance_query, doc)\n", - "\n", - "query = \"What is the best treatment for headache?\"\n", - "print(seq(query))" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "\n" + ] + } + ], + "source": [ + "from adalflow.core.component import FunComponent\n", + "\n", + "\n", + "def add_one(x):\n", + " return x + 1\n", + "\n", + "\n", + "fun_component = FunComponent(add_one)\n", + "print(fun_component(1))\n", + "print(type(fun_component))\n", + "\n", + "# output:\n", + "# 2\n", + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "3MW1tpzRTQUs", + "outputId": "351b8922-1423-434a-f470-ff435a1962d2" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "aoZ2w8RUTQUt", - "outputId": "115d0ccf-33d1-4464-a951-cf9f5476284b" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Sequential(\n", - " (0): EnhanceQueryComponent(fun_name=enhance_query)\n", - " (1): DocQA(\n", - " (doc): Generator(\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", - " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", - " (model_client): OpenAIClient()\n", - " )\n", - " )\n", - ")" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "seq" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "\n" + ] + } + ], + "source": [ + "from adalflow.core.component import fun_to_component\n", + "\n", + "fun_component = fun_to_component(add_one)\n", + "print(fun_component(1))\n", + "print(type(fun_component))\n", + "\n", + "# output:\n", + "# 2\n", + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "dxAoGrnQTQUs", + "outputId": "38c462a3-5abf-41f4-9231-746c8d0ffcb3" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "F-ffAlC6TQUt" - }, - "source": [ - "# TODO: LLM for single choices" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "\n" + ] + } + ], + "source": [ + "# use it as a decorator\n", + "@fun_to_component\n", + "def add_one(x):\n", + " return x + 1\n", + "\n", + "\n", + "print(add_one(1))\n", + "print(type(add_one))\n", + "\n", + "# output:\n", + "# 2\n", + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "7BvJEP_mTQUs", + "outputId": "066281b8-a650-4c48-c786-312022198015" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Issues and feedback\n", - "\n", - "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", - "\n", - "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-11-11 17:42:39 - openai_client - INFO - [openai_client.py:279:call] - api_kwargs: {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'system', 'content': ' You are a doctor User: What is the best treatment for headache?Please be concise and only list the top treatments.'}]}\n", + "2024-11-11 17:42:40 - _client - INFO - [_client.py:1038:_send_single_request] - HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "2024-11-11 17:42:40 - generator - INFO - [generator.py:798:call] - output: GeneratorOutput(id=None, data='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', error=None, usage=CompletionUsage(completion_tokens=37, prompt_tokens=37, total_tokens=74), raw_response='The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.', metadata=None)\n", + "The top treatments for headache are rest, hydration, over-the-counter pain relievers such as ibuprofen or acetaminophen, and relaxation techniques such as deep breathing or meditation.\n" + ] } - ], - "metadata": { + ], + "source": [ + "from adalflow.core import Sequential\n", + "\n", + "\n", + "@fun_to_component\n", + "def enhance_query(query: str) -> str:\n", + " return query + \"Please be concise and only list the top treatments.\"\n", + "\n", + "\n", + "seq = Sequential(enhance_query, doc)\n", + "\n", + "query = \"What is the best treatment for headache?\"\n", + "print(seq(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" + "id": "aoZ2w8RUTQUt", + "outputId": "115d0ccf-33d1-4464-a951-cf9f5476284b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Sequential(\n", + " (0): EnhanceQueryComponent(fun_name=enhance_query)\n", + " (1): DocQA(\n", + " (doc): Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo'}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(template: You are a doctor User: {{input_str}}, prompt_variables: ['input_str'])\n", + " (model_client): OpenAIClient()\n", + " )\n", + " )\n", + ")" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" } + ], + "source": [ + "seq" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F-ffAlC6TQUt" + }, + "source": [ + "# TODO: LLM for single choices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Issues and feedback\n", + "\n", + "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", + "\n", + "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/notebooks/tutorials/adalflow_dataclasses.ipynb b/notebooks/tutorials/adalflow_dataclasses.ipynb index 3c96ffe5..7ae08f63 100644 --- a/notebooks/tutorials/adalflow_dataclasses.ipynb +++ b/notebooks/tutorials/adalflow_dataclasses.ipynb @@ -145,8 +145,8 @@ "\n", "\n", "# Set environment variables\n", - "os.environ['GROQ_API_KEY'] = groq_api_key\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ[\"GROQ_API_KEY\"] = groq_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", "\n", "print(\"API keys have been set.\")" ] @@ -231,12 +231,11 @@ " explanation: str = field(\n", " metadata={\"desc\": \"A brief explanation of the concept in one sentence.\"}\n", " )\n", - " example: str = field(\n", - " metadata={\"desc\": \"An example of the concept in a sentence.\"}\n", - " )\n", + " example: str = field(metadata={\"desc\": \"An example of the concept in a sentence.\"})\n", " # Control output fields order\n", " __output_fields__ = [\"explanation\", \"example\"]\n", "\n", + "\n", "# Define the template using jinja2 syntax\n", "qa_template = r\"\"\"\n", "You are a helpful assistant.\n", @@ -278,7 +277,7 @@ "\n", " async def acall(self, query: str):\n", " \"\"\"Asynchronous call to generate response\"\"\"\n", - " return await self.generator.acall({\"input_str\": query})\n" + " return await self.generator.acall({\"input_str\": query})" ] }, { @@ -380,26 +379,20 @@ "# 1. Basic DataClass with different field types\n", "@dataclass\n", "class MovieReview(adal.DataClass):\n", - " title: str = field(\n", - " metadata={\"desc\": \"The title of the movie\"}\n", - " )\n", + " title: str = field(metadata={\"desc\": \"The title of the movie\"})\n", " rating: float = field(\n", - " metadata={\n", - " \"desc\": \"Rating from 1.0 to 10.0\",\n", - " \"min\": 1.0,\n", - " \"max\": 10.0\n", - " }\n", + " metadata={\"desc\": \"Rating from 1.0 to 10.0\", \"min\": 1.0, \"max\": 10.0}\n", " )\n", " pros: List[str] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of positive points about the movie\"}\n", + " metadata={\"desc\": \"List of positive points about the movie\"},\n", " )\n", " cons: List[str] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of negative points about the movie\"}\n", + " metadata={\"desc\": \"List of negative points about the movie\"},\n", " )\n", "\n", - " __output_fields__ = [\"title\", \"rating\", \"pros\", \"cons\"]\n" + " __output_fields__ = [\"title\", \"rating\", \"pros\", \"cons\"]" ] }, { @@ -410,7 +403,6 @@ }, "outputs": [], "source": [ - "\n", "@dataclass\n", "class Actor(adal.DataClass):\n", " name: str = field(metadata={\"desc\": \"Actor's full name\"})\n", @@ -429,20 +421,18 @@ "\n", "# Have both MovieReview and Actor nested in DetailedMovieReview\n", "\n", + "\n", "@dataclass\n", "class DetailedMovieReview(adal.DataClass):\n", " basic_review: MovieReview\n", " cast: List[Actor] = field(\n", - " default_factory=list,\n", - " metadata={\"desc\": \"List of main actors in the movie\"}\n", + " default_factory=list, metadata={\"desc\": \"List of main actors in the movie\"}\n", " )\n", " genre: List[str] = field(\n", - " default_factory=list,\n", - " metadata={\"desc\": \"List of genres for the movie\"}\n", + " default_factory=list, metadata={\"desc\": \"List of genres for the movie\"}\n", " )\n", " recommend: bool = field(\n", - " default_factory=str,\n", - " metadata={\"desc\": \"Whether you would recommend this movie\"}\n", + " default_factory=str, metadata={\"desc\": \"Whether you would recommend this movie\"}\n", " )\n", "\n", " __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]" @@ -472,18 +462,25 @@ "source": [ "# Create the MovieReviewer component with MovieAnalysis data class\n", "class MovieReviewer(adal.Component):\n", - " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict, data_class: adal.DataClass):\n", + " def __init__(\n", + " self,\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + " data_class: adal.DataClass,\n", + " ):\n", " super().__init__()\n", - " self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n", - " parser = adal.DataClassParser(\n", - " data_class=data_class,\n", - " return_data_class=True\n", + " self.additional_structure_prompt = (\n", + " \"Dont use 'type' and 'properties' in output directly give as dict\"\n", " )\n", + " parser = adal.DataClassParser(data_class=data_class, return_data_class=True)\n", " self.generator = adal.Generator(\n", " model_client=model_client,\n", " model_kwargs=model_kwargs,\n", " template=movie_review_template,\n", - " prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt},\n", + " prompt_kwargs={\n", + " \"output_format_str\": parser.get_output_format_str()\n", + " + self.additional_structure_prompt\n", + " },\n", " output_processors=parser,\n", " )\n", "\n", @@ -512,7 +509,7 @@ "reviewer = MovieReviewer(\n", " model_client=GroqAPIClient(),\n", " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", - " data_class=DetailedMovieReview\n", + " data_class=DetailedMovieReview,\n", ")\n", "\n", "response = reviewer(\"The Matrix\")\n", @@ -541,7 +538,7 @@ "reviewer = MovieReviewer(\n", " model_client=adal.OpenAIClient(),\n", " model_kwargs={\"model\": \"gpt-4o\"},\n", - " data_class=DetailedMovieReview\n", + " data_class=DetailedMovieReview,\n", ")\n", "response = reviewer(\"The Matrix\")\n", "print(f\"DetailedMovieReview: {response.data}\")\n", @@ -566,16 +563,16 @@ "source": [ "# 3. second level nested dataclass\n", "\n", + "\n", "@dataclass\n", "class MovieAnalysis(adal.DataClass):\n", " review: DetailedMovieReview\n", " box_office: float = field(\n", - " default=None,\n", - " metadata={\"desc\": \"Box office earnings in millions of dollars\"}\n", + " default=None, metadata={\"desc\": \"Box office earnings in millions of dollars\"}\n", " )\n", " awards: Dict[str, int] = field(\n", " default=None,\n", - " metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n", + " metadata={\"desc\": \"Dictionary of award categories and number of wins\"},\n", " )\n", "\n", " __output_fields__ = [\"review\", \"box_office\", \"awards\"]" @@ -605,7 +602,7 @@ "analysis = MovieReviewer(\n", " model_client=adal.OpenAIClient(),\n", " model_kwargs={\"model\": \"gpt-3.5-turbo\"},\n", - " data_class=MovieAnalysis\n", + " data_class=MovieAnalysis,\n", ")\n", "\n", "response = analysis(\"The Matrix\")\n", @@ -637,7 +634,7 @@ "analysis = MovieReviewer(\n", " model_client=GroqAPIClient(),\n", " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", - " data_class=MovieAnalysis\n", + " data_class=MovieAnalysis,\n", ")\n", "\n", "response = analysis(\"The Matrix\")\n", @@ -668,33 +665,27 @@ "# 1. Basic DataClass with different field types\n", "@dataclass\n", "class SongReview(adal.DataClass):\n", - " title: str = field(\n", - " metadata={\"desc\": \"The title of the song\"}\n", - " )\n", - " album: str = field(\n", - " metadata={\"desc\": \"The album of the song\"}\n", - " )\n", + " title: str = field(metadata={\"desc\": \"The title of the song\"})\n", + " album: str = field(metadata={\"desc\": \"The album of the song\"})\n", " ranking: int = field(\n", - " metadata={\n", - " \"desc\": \"Billboard peak ranking from 1 to 200\",\n", - " \"min\": 1,\n", - " \"max\": 200\n", - " }\n", + " metadata={\"desc\": \"Billboard peak ranking from 1 to 200\", \"min\": 1, \"max\": 200}\n", " )\n", " streaming: Dict[str, int] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"Dict of lastest approximate streaming count in spotify and in youtube. Gives the count in millions\"}\n", + " metadata={\n", + " \"desc\": \"Dict of lastest approximate streaming count in spotify and in youtube. Gives the count in millions\"\n", + " },\n", " )\n", " pros: List[str] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of positive points about the song\"}\n", + " metadata={\"desc\": \"List of positive points about the song\"},\n", " )\n", " cons: List[str] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of negative points about the song\"}\n", + " metadata={\"desc\": \"List of negative points about the song\"},\n", " )\n", "\n", - " __output_fields__ = [\"title\", \"rating\", \"streaming\", \"pros\", \"cons\"]\n" + " __output_fields__ = [\"title\", \"rating\", \"streaming\", \"pros\", \"cons\"]" ] }, { @@ -705,7 +696,6 @@ }, "outputs": [], "source": [ - "\n", "@dataclass\n", "class Artist(adal.DataClass):\n", " name: str = field(metadata={\"desc\": \"Artist's full name\"})\n", @@ -722,6 +712,7 @@ "source": [ "# 2. Nested DataClass example\n", "\n", + "\n", "@dataclass\n", "class DetailedSongReview(adal.DataClass):\n", " basic_review: SongReview = field(\n", @@ -729,15 +720,13 @@ " )\n", " cast: List[Artist] = field(\n", " default_factory=list,\n", - " metadata={\"desc\": \"List of main singer, lyrisist and musicians in the song\"}\n", + " metadata={\"desc\": \"List of main singer, lyrisist and musicians in the song\"},\n", " )\n", " genre: List[str] = field(\n", - " default_factory=list,\n", - " metadata={\"desc\": \"List of genres for the song\"}\n", + " default_factory=list, metadata={\"desc\": \"List of genres for the song\"}\n", " )\n", " recommend: bool = field(\n", - " default_factory=str,\n", - " metadata={\"desc\": \"Whether you would recommend this song\"}\n", + " default_factory=str, metadata={\"desc\": \"Whether you would recommend this song\"}\n", " )\n", "\n", " __output_fields__ = [\"basic_review\", \"cast\", \"genre\", \"recommend\"]" @@ -753,21 +742,19 @@ "source": [ "# 3. two levels of nesting dataclass\n", "\n", - "# all these fields as we use default, it is optional, so \n", + "# all these fields as we use default, it is optional, so\n", "# llm might not output that field if they dont have information\n", "\n", + "\n", "@dataclass\n", "class SongAnalysis(adal.DataClass):\n", " review: DetailedSongReview = field(\n", " default=DetailedSongReview, metadata={\"desc\": \"Song review details\"}\n", " )\n", - " duration: float = field(\n", - " default=None,\n", - " metadata={\"desc\": \"Duration of the song\"}\n", - " )\n", + " duration: float = field(default=None, metadata={\"desc\": \"Duration of the song\"})\n", " awards: Dict[str, int] = field(\n", " default=None,\n", - " metadata={\"desc\": \"Dictionary of award categories and number of wins\"}\n", + " metadata={\"desc\": \"Dictionary of award categories and number of wins\"},\n", " )\n", "\n", " __output_fields__ = [\"review\", \"duration\", \"awards\"]" @@ -788,7 +775,7 @@ "{{output_format_str}}\n", "\n", "\n", - " Review this song: {{song_title}} \"\"\"\n" + " Review this song: {{song_title}} \"\"\"" ] }, { @@ -803,17 +790,20 @@ "class SongReviewer(adal.Component):\n", " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n", " super().__init__()\n", - " self.additional_structure_prompt = \"Dont use 'type' and 'properties' in output directly give as dict\"\n", + " self.additional_structure_prompt = (\n", + " \"Dont use 'type' and 'properties' in output directly give as dict\"\n", + " )\n", " parser = adal.DataClassParser(\n", - " data_class=SongAnalysis,\n", - " return_data_class=False,\n", - " format_type=\"json\"\n", + " data_class=SongAnalysis, return_data_class=False, format_type=\"json\"\n", " )\n", " self.generator = adal.Generator(\n", " model_client=model_client,\n", " model_kwargs=model_kwargs,\n", " template=song_review_template,\n", - " prompt_kwargs={\"output_format_str\": parser.get_output_format_str() + self.additional_structure_prompt },\n", + " prompt_kwargs={\n", + " \"output_format_str\": parser.get_output_format_str()\n", + " + self.additional_structure_prompt\n", + " },\n", " output_processors=parser,\n", " )\n", "\n", @@ -836,8 +826,8 @@ ], "source": [ "analysis = SongReviewer(\n", - " model_client=GroqAPIClient(),\n", - " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", + " model_client=GroqAPIClient(),\n", + " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", ")\n", "\n", "response = analysis(\"Shape of you\")\n", @@ -886,27 +876,27 @@ "print(f\"Album: {analysis['review']['basic_review']['album']}\")\n", "print(f\"Ranking: {analysis['review']['basic_review']['ranking']}\")\n", "\n", - "for platform, views in analysis['review']['basic_review']['streaming'].items():\n", + "for platform, views in analysis[\"review\"][\"basic_review\"][\"streaming\"].items():\n", " print(f\"- {platform} - {views} million views\")\n", "print(\"\\nPros:\")\n", - "for pro in analysis['review'][\"basic_review\"][\"pros\"]:\n", + "for pro in analysis[\"review\"][\"basic_review\"][\"pros\"]:\n", " print(f\"- {pro}\")\n", "\n", "print(\"\\nArtist's:\")\n", - "for actor in analysis['review'][\"cast\"]:\n", - " print(f\"- {actor['name']} as {actor['role']}\")\n", + "for actor in analysis[\"review\"][\"cast\"]:\n", + " print(f\"- {actor['name']} as {actor['role']}\")\n", "\n", - "if analysis['review']['genre']:\n", + "if analysis[\"review\"][\"genre\"]:\n", " print(\"\\nGenere: \")\n", - " for genre in analysis['review']['genre']:\n", + " for genre in analysis[\"review\"][\"genre\"]:\n", " print(f\" {genre} \")\n", "\n", - "if analysis['duration']:\n", + "if analysis[\"duration\"]:\n", " print(f\"\\nDuration: {analysis['duration']} minutes\")\n", "\n", - "if hasattr(analysis, 'awards') and analysis['awards']:\n", + "if hasattr(analysis, \"awards\") and analysis[\"awards\"]:\n", " print(\"\\nAwards:\")\n", - " for category, count in analysis['awards'].items():\n", + " for category, count in analysis[\"awards\"].items():\n", " print(f\"- {category}: {count}\")" ] }, diff --git a/notebooks/tutorials/adalflow_modelclient.ipynb b/notebooks/tutorials/adalflow_modelclient.ipynb index 1674c69a..4d740a01 100644 --- a/notebooks/tutorials/adalflow_modelclient.ipynb +++ b/notebooks/tutorials/adalflow_modelclient.ipynb @@ -294,7 +294,6 @@ "from adalflow.core.types import ModelType, EmbedderOutput\n", "from adalflow.components.model_client import OpenAIClient\n", "from dataclasses import dataclass\n", - "from enum import Enum\n", "from numpy.linalg import norm" ] }, @@ -662,7 +661,7 @@ "from adalflow.components.model_client import OpenAIClient\n", "from adalflow.core.types import ModelType\n", "from adalflow.utils import setup_env\n", - "from typing import List, Dict" + "from typing import List" ] }, { @@ -954,7 +953,6 @@ }, "outputs": [], "source": [ - "import asyncio\n", "import time\n", "from adalflow.components.model_client import (\n", " OpenAIClient,\n", @@ -1173,13 +1171,13 @@ " openai_client = OpenAIClient(chat_completion_parser=func)\n", "\n", " # Define a sample query (user question)\n", - " query = \"What is the capital of France?\"\n", + " # query = \"What is the capital of France?\" # Local variable `query` is assigned to but never used\n", "\n", " # Set the model type to LLM (Large Language Model)\n", - " model_type = ModelType.LLM\n", + " # model_type = ModelType.LLM # Local variable `model_type` is assigned to but never used\n", "\n", " # Create the prompt by formatting the user query as a conversation\n", - " prompt = f\"User: {query}\\n\"\n", + " # prompt = f\"User: {query}\\n\" # Local variable `prompt` is assigned to but never used\n", "\n", " # Define any additional parameters needed for the model (e.g., the input string)\n", " prompt_kwargs = {\n", @@ -1301,7 +1299,7 @@ "from adalflow.components.model_client import GroqAPIClient\n", "from adalflow.core.types import ModelType\n", "from adalflow.utils import setup_env\n", - "from typing import List, Dict" + "from typing import List" ] }, { @@ -1312,6 +1310,10 @@ }, "outputs": [], "source": [ + "# need to delete past ChatConversation calss to avoid redefinition error\n", + "del ChatConversation\n", + "\n", + "\n", "class ChatConversation:\n", " def __init__(self):\n", " \"\"\"\n", @@ -1484,8 +1486,6 @@ }, "outputs": [], "source": [ - "import asyncio\n", - "from adalflow.components.model_client import GroqAPIClient\n", "from adalflow.core.types import ModelType\n", "from typing import List" ] @@ -1498,6 +1498,10 @@ }, "outputs": [], "source": [ + "# need to delete past ChatConversation calss to avoid redefinition error\n", + "del ChatConversation\n", + "\n", + "\n", "class ChatConversation:\n", " def __init__(self):\n", " # Using an asynchronous client for communication with GroqAPI\n", @@ -1616,11 +1620,6 @@ }, "outputs": [], "source": [ - "import asyncio\n", - "import time\n", - "from adalflow.components.model_client import (\n", - " GroqAPIClient,\n", - ") # Assuming GroqAPI with .call() and .acall() is available\n", "from adalflow.core.types import ModelType" ] }, @@ -1791,7 +1790,7 @@ "\n", "import openai\n", "from adalflow.core.model_client import ModelClient\n", - "from adalflow.core.types import ModelType, GeneratorOutput, EmbedderOutput\n", + "from adalflow.core.types import ModelType, GeneratorOutput\n", "from openai.types import (\n", " CreateEmbeddingResponse,\n", ")\n", diff --git a/notebooks/tutorials/adalflow_text_splitter.ipynb b/notebooks/tutorials/adalflow_text_splitter.ipynb index 66fb81c7..4008f45a 100644 --- a/notebooks/tutorials/adalflow_text_splitter.ipynb +++ b/notebooks/tutorials/adalflow_text_splitter.ipynb @@ -31,7 +31,7 @@ "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", "\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", "\n", "print(\"API keys have been set.\")" ] @@ -76,16 +76,11 @@ "from adalflow.core.types import Document\n", "\n", "# Configure the splitter settings\n", - "text_splitter = TextSplitter(\n", - " split_by=\"word\",\n", - " chunk_size=5,\n", - " chunk_overlap=1\n", - ")\n", + "text_splitter = TextSplitter(split_by=\"word\", chunk_size=5, chunk_overlap=1)\n", "\n", "# Example document\n", "doc = Document(\n", - " text=\"Example text. More example text. Even more text to illustrate.\",\n", - " id=\"doc1\"\n", + " text=\"Example text. More example text. Even more text to illustrate.\", id=\"doc1\"\n", ")\n", "\n", "# Execute the splitting\n", @@ -135,18 +130,13 @@ "from adalflow.core.types import Document\n", "\n", "# Configure the splitter settings\n", - "text_splitter = TextSplitter(\n", - " split_by=\"token\",\n", - " chunk_size=5,\n", - " chunk_overlap=0\n", - ")\n", + "text_splitter = TextSplitter(split_by=\"token\", chunk_size=5, chunk_overlap=0)\n", "\n", "doc = Document(\n", - " text=\"Example text. More example text. Even more text to illustrate.\",\n", - " id = \"doc1\"\n", - " )\n", + " text=\"Example text. More example text. Even more text to illustrate.\", id=\"doc1\"\n", + ")\n", "\n", - "splitted_docs = (text_splitter.call(documents=[doc]))\n", + "splitted_docs = text_splitter.call(documents=[doc])\n", "\n", "for doc in splitted_docs:\n", " print(doc)" diff --git a/poetry.lock b/poetry.lock index edc2b949..713cbd90 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "absl-py" @@ -503,10 +503,12 @@ files = [ [package.dependencies] click = ">=8.0.0" +ipython = {version = ">=7.8.0", optional = true, markers = "extra == \"jupyter\""} mypy-extensions = ">=0.4.3" packaging = ">=22.0" pathspec = ">=0.9.0" platformdirs = ">=2" +tokenize-rt = {version = ">=3.2.0", optional = true, markers = "extra == \"jupyter\""} [package.extras] colorama = ["colorama (>=0.4.3)"] @@ -5600,6 +5602,17 @@ webencodings = ">=0.4" doc = ["sphinx", "sphinx_rtd_theme"] test = ["pytest", "ruff"] +[[package]] +name = "tokenize-rt" +version = "6.1.0" +description = "A wrapper around the stdlib `tokenize` which roundtrips." +optional = false +python-versions = ">=3.9" +files = [ + {file = "tokenize_rt-6.1.0-py2.py3-none-any.whl", hash = "sha256:d706141cdec4aa5f358945abe36b911b8cbdc844545da99e811250c0cee9b6fc"}, + {file = "tokenize_rt-6.1.0.tar.gz", hash = "sha256:e8ee836616c0877ab7c7b54776d2fefcc3bde714449a206762425ae114b53c86"}, +] + [[package]] name = "tokenizers" version = "0.19.1" @@ -5932,11 +5945,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -6462,4 +6470,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.11, <4.0" -content-hash = "df5b3eaad85fc2f943506d095b2e3f7094982d55d461f40a7be13d9bb742fc6f" +content-hash = "4cfeaba8b2e4b0191ee52668351e689bee2a69ef93c57bb9d84a319c294ed12d" diff --git a/pyproject.toml b/pyproject.toml index c064d819..1ecc0b9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ faiss-cpu = "^1.8.0.post1" nltk = "^3.9.1" ragas = "^0.1.16" colorama = "^0.4.6" -black = "^24.10.0" +black = {extras = ["jupyter"], version = "^24.10.0"} ruff = "^0.8.0" google-generativeai = "^0.8.3" @@ -62,11 +62,26 @@ build-backend = "poetry.core.masonry.api" [tool.black] line-length = 88 target-version = ["py311"] +include = '\.pyi?$' +extend-exclude = ''' +# A regex preceded with ^/ will apply only to files and directories +# in the root of the project. +^/docs/ +''' [tool.ruff] +line-length = 88 lint.extend-ignore = [ - "E402", # Ignore module-level import issues - "E731", + "E203", # Ignore whitespaces before : in slices (conflicts with Black) + "E402", # Ignore module-level import issues + "E501", # Ignore line too long so that Black handles line length + "E731", # Lambda usage for specific cases "UP007", # Wants | over Union, which breaks 3.8 ] -line-length = 88 +exclude = ["docs/*"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = 'auto' diff --git a/tutorials/database.ipynb b/tutorials/database.ipynb index 8744b1b0..2db749f7 100644 --- a/tutorials/database.ipynb +++ b/tutorials/database.ipynb @@ -15,26 +15,26 @@ "outputs": [], "source": [ "# setup data needed for the notes\n", - "query_1 = \"What are the benefits of renewable energy?\" # gt is [0, 3]\n", - "query_2 = \"How do solar panels impact the environment?\" # gt is [1, 2]\n", + "query_1 = \"What are the benefits of renewable energy?\" # gt is [0, 3]\n", + "query_2 = \"How do solar panels impact the environment?\" # gt is [1, 2]\n", "\n", - "org_documents =[\n", + "org_documents = [\n", " {\n", " \"title\": \"The Impact of Renewable Energy on the Economy\",\n", - " \"content\": \"Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\"\n", + " \"content\": \"Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\",\n", " },\n", " {\n", " \"title\": \"Understanding Solar Panels\",\n", - " \"content\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\"\n", + " \"content\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\",\n", " },\n", " {\n", " \"title\": \"Pros and Cons of Solar Energy\",\n", - " \"content\": \"While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.\"\n", + " \"content\": \"While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.\",\n", " },\n", " {\n", - " \"title\": \"Renewable Energy and Its Effects\",\n", - " \"content\": \"Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.\"\n", - " }\n", + " \"title\": \"Renewable Energy and Its Effects\",\n", + " \"content\": \"Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.\",\n", + " },\n", "]\n", "\n", "turns = [\n", @@ -42,14 +42,14 @@ " \"user\": \"What are the benefits of renewable energy?\",\n", " \"system\": \"I can see you are interested in renewable energy. Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\",\n", " \"user_time\": \"2021-09-01T12:00:00Z\",\n", - " \"system_time\": \"2021-09-01T12:00:01Z\"\n", + " \"system_time\": \"2021-09-01T12:00:01Z\",\n", " },\n", " {\n", " \"user\": \"How do solar panels impact the environment?\",\n", " \"system\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\",\n", " \"user_time\": \"2021-09-01T12:00:02Z\",\n", - " \"system_time\": \"2021-09-01T12:00:03Z\"\n", - " }\n", + " \"system_time\": \"2021-09-01T12:00:03Z\",\n", + " },\n", "]" ] }, @@ -78,7 +78,10 @@ "from adalflow.core.types import Document\n", "\n", "# we will save the content to text and title in the meta_data\n", - "documents = [Document(text=doc['content'], meta_data={'title': doc['title']}) for doc in org_documents]\n", + "documents = [\n", + " Document(text=doc[\"content\"], meta_data={\"title\": doc[\"title\"]})\n", + " for doc in org_documents\n", + "]\n", "print(documents)" ] }, @@ -151,7 +154,7 @@ "source": [ "# prepare the data pipeline\n", "\n", - "from adalflow.core.embedder import Embedder \n", + "from adalflow.core.embedder import Embedder\n", "from adalflow.core.types import ModelClientType\n", "from adalflow.components.data_process import DocumentSplitter, ToEmbeddings\n", "from adalflow.core.component import Sequential\n", @@ -163,14 +166,10 @@ " \"encoding_format\": \"float\",\n", "}\n", "\n", - "splitter_config = {\n", - " \"split_by\": \"word\",\n", - " \"split_length\": 50,\n", - " \"split_overlap\": 10\n", - "}\n", + "splitter_config = {\"split_by\": \"word\", \"split_length\": 50, \"split_overlap\": 10}\n", "\n", "splitter = DocumentSplitter(**splitter_config)\n", - "embedder = Embedder(model_client =ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", + "embedder = Embedder(model_client=ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", "embedder_transformer = ToEmbeddings(embedder, batch_size=2)\n", "data_transformer = Sequential(splitter, embedder_transformer)\n", "print(data_transformer)" @@ -185,13 +184,19 @@ "# prepare mapping functions to map the data to Document object for the pipeline\n", "\n", "from typing import Dict\n", + "\n", + "\n", "# mapping function for org_documents\n", "def map_to_document(doc: Dict) -> Document:\n", - " return Document(text=doc['content'], meta_data={'title': doc['title']})\n", + " return Document(text=doc[\"content\"], meta_data={\"title\": doc[\"title\"]})\n", + "\n", "\n", "def map_dialogturn_to_document(turn: DialogTurn) -> Document:\n", " # it can be important to keep the original data's id\n", - " return Document(id=turn.id, text=turn.user_query.query_str + ' ' + turn.assistant_response.response_str)" + " return Document(\n", + " id=turn.id,\n", + " text=turn.user_query.query_str + \" \" + turn.assistant_response.response_str,\n", + " )" ] }, { @@ -304,7 +309,7 @@ "# create a db for the dialog_turns\n", "from adalflow.core.db import LocalDB\n", "\n", - "dialog_turn_db = LocalDB('dialog_turns')\n", + "dialog_turn_db = LocalDB(\"dialog_turns\")\n", "print(dialog_turn_db)\n", "\n", "dialog_turn_db.load(dialog_turns)\n", @@ -397,7 +402,7 @@ ], "source": [ "# save the state of the dialog_turn_db\n", - "dialog_turn_db.save_state('dialog_turn_db_state.pkl')\n", + "dialog_turn_db.save_state(\"dialog_turn_db_state.pkl\")\n", "\n", "print(dialog_turn_db)" ] @@ -432,8 +437,8 @@ ], "source": [ "# restore the state of the restored_dialog_turn_db\n", - "restored_dialog_turn_db = LocalDB.load_state('dialog_turn_db_state.pkl')\n", - "print(restored_dialog_turn_db)\n" + "restored_dialog_turn_db = LocalDB.load_state(\"dialog_turn_db_state.pkl\")\n", + "print(restored_dialog_turn_db)" ] }, { @@ -537,18 +542,15 @@ } ], "source": [ - "# prepare the generator for the dialog turns \n", + "# prepare the generator for the dialog turns\n", "\n", "from adalflow.core import Generator\n", "\n", - "llm_kwargs = {\n", - " \"model\": \"gpt-3.5-turbo\"\n", - "}\n", + "llm_kwargs = {\"model\": \"gpt-3.5-turbo\"}\n", "\n", "# we will use the default prompt, and using input_str and chat_history_str for the final prompt\n", - "generator = Generator(model_client = ModelClientType.OPENAI(), model_kwargs=llm_kwargs)\n", - "print(generator)\n", - "\n" + "generator = Generator(model_client=ModelClientType.OPENAI(), model_kwargs=llm_kwargs)\n", + "print(generator)" ] }, { @@ -614,16 +616,18 @@ ], "source": [ "# lets see how the prompt will be if we pass the input_str and chat_history_str\n", - "input_str = \"What are the benefits of renewable energy? Did I ask this before?\" \n", + "input_str = \"What are the benefits of renewable energy? Did I ask this before?\"\n", + "\n", "\n", "def format_chat_history_str(turns: list) -> str:\n", " chat_history_str = []\n", " for turn in turns:\n", - " chat_history_str.append(turn.to_yaml()) # format as yaml\n", + " chat_history_str.append(turn.to_yaml()) # format as yaml\n", " # join with newline\n", - " chat_history_str = '\\n_________\\n'.join(chat_history_str)\n", + " chat_history_str = \"\\n_________\\n\".join(chat_history_str)\n", " return chat_history_str\n", "\n", + "\n", "chat_history_str = format_chat_history_str(dialog_turns)\n", "print(generator.print_prompt(input_str=input_str, chat_history_str=chat_history_str))" ] @@ -663,31 +667,33 @@ } ], "source": [ - "# as we have quite a bit of empty fields, lets exclude them \n", + "# as we have quite a bit of empty fields, lets exclude them\n", "from typing import List\n", "\n", - "input_str = \"What are the benefits of renewable energy? Did I ask this before?\" \n", + "input_str = \"What are the benefits of renewable energy? Did I ask this before?\"\n", + "\n", "\n", "def format_chat_history_str(turns: List[DialogTurn]) -> str:\n", " chat_history_str = []\n", " for turn in turns:\n", " chat_history_str.append(\n", - " turn.to_yaml(\n", - " exclude=[\n", - " \"id\",\n", - " \"user_id\",\n", - " \"session_id\",\n", - " \"user_query_timestamp\",\n", - " \"assistant_response_timestamp\",\n", - " \"order\",\n", - " \"metadata\",\n", - " \"vector\",\n", - " ],\n", - " )\n", - " ) \n", - " chat_history_str = '\\n_________\\n'.join(chat_history_str)\n", + " turn.to_yaml(\n", + " exclude=[\n", + " \"id\",\n", + " \"user_id\",\n", + " \"session_id\",\n", + " \"user_query_timestamp\",\n", + " \"assistant_response_timestamp\",\n", + " \"order\",\n", + " \"metadata\",\n", + " \"vector\",\n", + " ],\n", + " )\n", + " )\n", + " chat_history_str = \"\\n_________\\n\".join(chat_history_str)\n", " return chat_history_str\n", "\n", + "\n", "chat_history_str = format_chat_history_str(dialog_turn_db.items[0:1])\n", "print(generator.print_prompt(input_str=input_str, chat_history_str=chat_history_str))" ] @@ -861,6 +867,7 @@ "source": [ "# we will use the retriever to find top_k chunked documents, from its partent_doc_id, we will find the initial dialog_turn, and feed that to the generator\n", "from adalflow.utils.logger import get_logger\n", + "\n", "get_logger()\n", "\n", "embeddings = [item.vector for item in dialog_turn_db.transformed_items[key]]\n", @@ -868,7 +875,7 @@ "retriever.build_index_from_documents(documents=embeddings)\n", "\n", "# top_k_documents = retriever(input=input_str)\n", - "# print(top_k_documents)\n" + "# print(top_k_documents)" ] }, { @@ -907,7 +914,12 @@ "source": [ "# get the parent_doc_id from the top_k_documents\n", "\n", - "parent_doc_ids = set([dialog_turn_db.transformed_items[key][doc_index].parent_doc_id for doc_index in top_k_documents[0].doc_indices])\n", + "parent_doc_ids = set(\n", + " [\n", + " dialog_turn_db.transformed_items[key][doc_index].parent_doc_id\n", + " for doc_index in top_k_documents[0].doc_indices\n", + " ]\n", + ")\n", "print(parent_doc_ids)" ] }, @@ -917,7 +929,9 @@ "metadata": {}, "outputs": [], "source": [ - "fetched_dialog_turns=dialog_turn_db.fetch_items(condition=lambda x: x.id in parent_doc_ids)" + "fetched_dialog_turns = dialog_turn_db.fetch_items(\n", + " condition=lambda x: x.id in parent_doc_ids\n", + ")" ] }, { @@ -941,7 +955,9 @@ "source": [ "chat_history_str = format_chat_history_str(fetched_dialog_turns)\n", "\n", - "output = generator(prompt_kwargs={\"input_str\": input_str, \"chat_history_str\": chat_history_str})\n", + "output = generator(\n", + " prompt_kwargs={\"input_str\": input_str, \"chat_history_str\": chat_history_str}\n", + ")\n", "print(output)" ] } diff --git a/tutorials/dataclass.ipynb b/tutorials/dataclass.ipynb index e2631c2b..06be8204 100644 --- a/tutorials/dataclass.ipynb +++ b/tutorials/dataclass.ipynb @@ -8,23 +8,23 @@ "source": [ "from dataclasses import dataclass, field\n", "\n", + "\n", "@dataclass\n", "class Question:\n", - " question: str = field(\n", - " metadata={\"desc\": \"The question asked by the user\"}\n", - " )\n", + " question: str = field(metadata={\"desc\": \"The question asked by the user\"})\n", " metadata: dict = field(\n", " metadata={\"desc\": \"The metadata of the question\"}, default_factory=dict\n", " )\n", "\n", + "\n", "@dataclass\n", "class TrecData:\n", " question: Question = field(\n", " metadata={\"desc\": \"The question asked by the user\"}\n", - " ) # Required field, you have to provide the question field at the instantiation\n", + " ) # Required field, you have to provide the question field at the instantiation\n", " label: int = field(\n", " metadata={\"desc\": \"The label of the question\"}, default=0\n", - " ) # Optional field" + " ) # Optional field" ] }, { @@ -49,6 +49,7 @@ "print(example)\n", "\n", "from dataclasses import asdict\n", + "\n", "print(asdict(example))\n", "reconstructed = TrecData(**asdict(example))\n", "print(reconstructed)\n", @@ -97,22 +98,24 @@ "metadata": {}, "outputs": [], "source": [ - "# lets see what DataClass can do \n", + "# lets see what DataClass can do\n", "# 1. allow required field after optional field using required_field on default_factory\n", "\n", "from adalflow.core import DataClass, required_field\n", "\n", + "\n", "@dataclass\n", "class TrecData2(DataClass):\n", " question: Question = field(\n", " metadata={\"desc\": \"The question asked by the user\"}\n", - " ) # Required field, you have to provide the question field at the instantiation\n", + " ) # Required field, you have to provide the question field at the instantiation\n", " label: int = field(\n", " metadata={\"desc\": \"The label of the question\"}, default=0\n", - " ) # Optional field\n", + " ) # Optional field\n", " metadata: dict = field(\n", - " metadata={\"desc\": \"The metadata of the question\"}, default_factory=required_field()\n", - " ) # required field" + " metadata={\"desc\": \"The metadata of the question\"},\n", + " default_factory=required_field(),\n", + " ) # required field" ] }, { @@ -140,10 +143,10 @@ "example = TrecData2(Question(\"What is the capital of France?\"), 1, {\"key\": \"value\"})\n", "print(example)\n", "\n", - "dict_example = TrecData2.to_dict(example) # use as if its a class method\n", + "dict_example = TrecData2.to_dict(example) # use as if its a class method\n", "print(dict_example)\n", "\n", - "dict_example_2 = example.to_dict() # use it as instance method\n", + "dict_example_2 = example.to_dict() # use it as instance method\n", "print(dict_example)\n", "\n", "reconstructed = TrecData2.from_dict(dict_example)\n", @@ -178,7 +181,9 @@ "print(dict_exclude)\n", "\n", "# exclude field of the parent and child class\n", - "dict_exclude = example.to_dict(exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]})\n", + "dict_exclude = example.to_dict(\n", + " exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]}\n", + ")\n", "print(dict_exclude)" ] }, @@ -271,8 +276,7 @@ "print(example_str)\n", "\n", "example_str = example.format_example_str(DataClassFormatType.EXAMPLE_YAML)\n", - "print(example_str)\n", - "\n" + "print(example_str)" ] }, { @@ -298,7 +302,7 @@ ], "source": [ "# Now, lets check the data format using class method without instance\n", - "# schema, you can choose to only use properties \n", + "# schema, you can choose to only use properties\n", "\n", "schema = TrecData2.to_schema()\n", "schema" @@ -326,7 +330,9 @@ ], "source": [ "# schema with exclude\n", - "schema_exclude = TrecData2.to_schema(exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]})\n", + "schema_exclude = TrecData2.to_schema(\n", + " exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]}\n", + ")\n", "schema_exclude" ] }, @@ -373,7 +379,9 @@ "source": [ "# exclude field of the parent and child class\n", "\n", - "json_signature_exclude = TrecData2.to_json_signature(exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]})\n", + "json_signature_exclude = TrecData2.to_json_signature(\n", + " exclude={\"TrecData2\": [\"metadata\"], \"Question\": [\"metadata\"]}\n", + ")\n", "print(json_signature_exclude)" ] }, @@ -464,7 +472,7 @@ } ], "source": [ - "# use the DataClassFormatType to control it \n", + "# use the DataClassFormatType to control it\n", "\n", "from adalflow.core import DataClassFormatType\n", "\n", @@ -494,6 +502,8 @@ "source": [ "# load with customizd from dict\n", "from typing import Dict\n", + "\n", + "\n", "@dataclass\n", "class OutputFormat(DataClass):\n", " thought: str = field(\n", @@ -521,6 +531,7 @@ " }\n", " return super().from_dict(data)\n", "\n", + "\n", "data = OutputFormat.from_dict({\"coarse_label\": 1})\n", "print(data)" ] diff --git a/tutorials/embedder.ipynb b/tutorials/embedder.ipynb index 29625454..335e7e6d 100644 --- a/tutorials/embedder.ipynb +++ b/tutorials/embedder.ipynb @@ -508,8 +508,10 @@ "from typing import List\n", "from adalflow.core.component import Component\n", "from copy import deepcopy\n", + "\n", + "\n", "class DecreaseEmbeddingDim(Component):\n", - " def __init__(self, old_dim: int, new_dim: int, normalize: bool = True):\n", + " def __init__(self, old_dim: int, new_dim: int, normalize: bool = True):\n", " super().__init__()\n", " self.old_dim = old_dim\n", " self.new_dim = new_dim\n", @@ -525,7 +527,7 @@ " new_embedding = normalize_vector(new_embedding)\n", " embedding.embedding = new_embedding\n", " return output\n", - " \n", + "\n", " def _extra_repr(self) -> str:\n", " repr_str = f\"old_dim={self.old_dim}, new_dim={self.new_dim}, normalize={self.normalize}\"\n", " return repr_str" diff --git a/tutorials/generator.ipynb b/tutorials/generator.ipynb index e8a3fac2..bc93b021 100644 --- a/tutorials/generator.ipynb +++ b/tutorials/generator.ipynb @@ -49,10 +49,10 @@ "\n", "enable_library_logging(level=\"DEBUG\")\n", "\n", - "model_kwargs={\n", + "model_kwargs = {\n", " \"model\": \"gpt-3.5-turbo\",\n", " \"logprobs\": True,\n", - " \"n\": 2, # the number of chat completion choices\n", + " \"n\": 2, # the number of chat completion choices\n", "}\n", "model_client = OpenAIClient(chat_completion_parser=get_probabilities)\n", "generator = Generator(model_client=model_client, model_kwargs=model_kwargs)\n", @@ -80,7 +80,7 @@ "source": [ "from adalflow.core import Component, Generator\n", "from adalflow.components.model_client import GroqAPIClient\n", - "from adalflow.utils import setup_env # noqa\n", + "from adalflow.utils import setup_env # noqa\n", "\n", "\n", "class SimpleQA(Component):\n", @@ -93,7 +93,9 @@ " You:\n", " \"\"\"\n", " self.generator = Generator(\n", - " model_client=GroqAPIClient(), model_kwargs={\"model\": \"llama3-8b-8192\"}, template=template\n", + " model_client=GroqAPIClient(),\n", + " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", + " template=template,\n", " )\n", "\n", " def call(self, query):\n", diff --git a/tutorials/generator_note.py b/tutorials/generator_note.py index 644f5cfb..5d4a86cd 100644 --- a/tutorials/generator_note.py +++ b/tutorials/generator_note.py @@ -164,7 +164,6 @@ def use_model_client_enum_to_switch_client(): def create_purely_from_config(): - from adalflow.utils.config import new_component from adalflow.core import Generator @@ -195,7 +194,6 @@ def create_purely_from_config(): def create_purely_from_config_2(): - from adalflow.core import Generator config = { diff --git a/tutorials/model_client.ipynb b/tutorials/model_client.ipynb index 3e5b7b06..b61e7ec7 100644 --- a/tutorials/model_client.ipynb +++ b/tutorials/model_client.ipynb @@ -36,9 +36,9 @@ "\n", "prompt = f\"User: {query}\\n\"\n", "model_kwargs = {\"model\": \"gpt-3.5-turbo\", \"temperature\": 0.5, \"max_tokens\": 100}\n", - "api_kwargs = openai_client.convert_inputs_to_api_kwargs(input=prompt, \n", - " model_kwargs=model_kwargs, \n", - " model_type=model_type)\n", + "api_kwargs = openai_client.convert_inputs_to_api_kwargs(\n", + " input=prompt, model_kwargs=model_kwargs, model_type=model_type\n", + ")\n", "print(f\"api_kwargs: {api_kwargs}\")\n", "\n", "response = openai_client.call(api_kwargs=api_kwargs, model_type=model_type)\n", @@ -49,15 +49,20 @@ "model_type = ModelType.EMBEDDER\n", "# do batch embedding\n", "input = [query] * 2\n", - "model_kwargs = {\"model\": \"text-embedding-3-small\", \"dimensions\": 8, \"encoding_format\": \"float\"}\n", - "api_kwargs = openai_client.convert_inputs_to_api_kwargs(input=input, model_kwargs=model_kwargs, model_type=model_type)\n", + "model_kwargs = {\n", + " \"model\": \"text-embedding-3-small\",\n", + " \"dimensions\": 8,\n", + " \"encoding_format\": \"float\",\n", + "}\n", + "api_kwargs = openai_client.convert_inputs_to_api_kwargs(\n", + " input=input, model_kwargs=model_kwargs, model_type=model_type\n", + ")\n", "print(f\"api_kwargs: {api_kwargs}\")\n", "\n", "\n", - "\n", "response = openai_client.call(api_kwargs=api_kwargs, model_type=model_type)\n", "reponse_embedder_output = openai_client.parse_embedding_response(response)\n", - "print(f\"reponse_embedder_output: {reponse_embedder_output}\")\n" + "print(f\"reponse_embedder_output: {reponse_embedder_output}\")" ] }, { diff --git a/tutorials/parser_note.py b/tutorials/parser_note.py index 80c2c009..ee88a028 100644 --- a/tutorials/parser_note.py +++ b/tutorials/parser_note.py @@ -1,5 +1,4 @@ def examples_of_different_ways_to_parse_string(): - int_str = "42" float_str = "42.0" boolean_str = "True" # json works with true/false @@ -178,7 +177,6 @@ def bool_parser(): def list_parser(): - from adalflow.core.string_parser import ListParser list_str = '["key", "value"]' diff --git a/tutorials/prompt_note.py b/tutorials/prompt_note.py index 8796ec1f..a4a645bd 100644 --- a/tutorials/prompt_note.py +++ b/tutorials/prompt_note.py @@ -1,5 +1,4 @@ def python_str_format_example(task_desc_str: str, input_str: str): - # percent(%) formatting print("%s User: %s" % (task_desc_str, input_str)) @@ -58,7 +57,6 @@ def adalflow_default_prompt(): if __name__ == "__main__": - task_desc_str = "You are a helpful assitant" input_str = "What is the capital of France?" tools = ["google", "wikipedia", "wikidata"] diff --git a/tutorials/rag.ipynb b/tutorials/rag.ipynb index 8892f0a2..b5163e51 100644 --- a/tutorials/rag.ipynb +++ b/tutorials/rag.ipynb @@ -16,11 +16,12 @@ "outputs": [], "source": [ "# the data pipeline and the backend data processing\n", - "from adalflow.core.embedder import Embedder \n", + "from adalflow.core.embedder import Embedder\n", "from adalflow.core.types import ModelClientType\n", "from adalflow.components.data_process import TextSplitter, ToEmbeddings\n", "from adalflow.core.container import Sequential\n", "\n", + "\n", "def prepare_data_pipeline():\n", " model_kwargs = {\n", " \"model\": \"text-embedding-3-small\",\n", @@ -28,14 +29,12 @@ " \"encoding_format\": \"float\",\n", " }\n", "\n", - " splitter_config = {\n", - " \"split_by\": \"word\",\n", - " \"split_length\": 50,\n", - " \"split_overlap\": 10\n", - " }\n", + " splitter_config = {\"split_by\": \"word\", \"split_length\": 50, \"split_overlap\": 10}\n", "\n", " splitter = TextSplitter(**splitter_config)\n", - " embedder = Embedder(model_client =ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", + " embedder = Embedder(\n", + " model_client=ModelClientType.OPENAI(), model_kwargs=model_kwargs\n", + " )\n", " embedder_transformer = ToEmbeddings(embedder, batch_size=2)\n", " data_transformer = Sequential(splitter, embedder_transformer)\n", " print(data_transformer)" diff --git a/tutorials/react_note.ipynb b/tutorials/react_note.ipynb index 0b647a4b..b1cc8bba 100644 --- a/tutorials/react_note.ipynb +++ b/tutorials/react_note.ipynb @@ -120,7 +120,7 @@ " test_react_agent(ModelClientType.OPENAI(), gpt_model_kwargs)\n", " print(\"Done\")\n", "\n", - " test_react_agent_use_examples(ModelClientType.GROQ(), llama3_model_kwargs)\n" + " test_react_agent_use_examples(ModelClientType.GROQ(), llama3_model_kwargs)" ] }, { @@ -134,6 +134,7 @@ "import asyncio\n", "import time\n", "\n", + "\n", "def is_running_in_event_loop() -> bool:\n", " try:\n", " loop = asyncio.get_running_loop()\n", @@ -143,7 +144,8 @@ " return False\n", " except RuntimeError:\n", " return False\n", - " \n", + "\n", + "\n", "def sync_func():\n", " time.sleep(1)\n", " print(\"Sync function\")\n", diff --git a/tutorials/retriever.ipynb b/tutorials/retriever.ipynb index c464f46b..859a6de8 100644 --- a/tutorials/retriever.ipynb +++ b/tutorials/retriever.ipynb @@ -23,26 +23,26 @@ "outputs": [], "source": [ "# decide a meaningful query and a list of documents\n", - "query_1 = \"What are the benefits of renewable energy?\" # gt is [0, 3]\n", - "query_2 = \"How do solar panels impact the environment?\" # gt is [1, 2]\n", + "query_1 = \"What are the benefits of renewable energy?\" # gt is [0, 3]\n", + "query_2 = \"How do solar panels impact the environment?\" # gt is [1, 2]\n", "\n", - "documents =[\n", + "documents = [\n", " {\n", " \"title\": \"The Impact of Renewable Energy on the Economy\",\n", - " \"content\": \"Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\"\n", + " \"content\": \"Renewable energy technologies not only help in reducing greenhouse gas emissions but also contribute significantly to the economy by creating jobs in the manufacturing and installation sectors. The growth in renewable energy usage boosts local economies through increased investment in technology and infrastructure.\",\n", " },\n", " {\n", " \"title\": \"Understanding Solar Panels\",\n", - " \"content\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\"\n", + " \"content\": \"Solar panels convert sunlight into electricity by allowing photons, or light particles, to knock electrons free from atoms, generating a flow of electricity. Solar panels are a type of renewable energy technology that has been found to have a significant positive effect on the environment by reducing the reliance on fossil fuels.\",\n", " },\n", " {\n", " \"title\": \"Pros and Cons of Solar Energy\",\n", - " \"content\": \"While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.\"\n", + " \"content\": \"While solar energy offers substantial environmental benefits, such as reducing carbon footprints and pollution, it also has downsides. The production of solar panels can lead to hazardous waste, and large solar farms require significant land, which can disrupt local ecosystems.\",\n", " },\n", " {\n", - " \"title\": \"Renewable Energy and Its Effects\",\n", - " \"content\": \"Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.\"\n", - " }\n", + " \"title\": \"Renewable Energy and Its Effects\",\n", + " \"content\": \"Renewable energy sources like wind, solar, and hydro power play a crucial role in combating climate change. They do not produce greenhouse gases during operation, making them essential for sustainable development. However, the initial setup and material sourcing for these technologies can still have environmental impacts.\",\n", + " },\n", "]" ] }, @@ -67,7 +67,7 @@ ], "source": [ "# create an embedder\n", - "from adalflow.core.embedder import Embedder \n", + "from adalflow.core.embedder import Embedder\n", "from adalflow.core.types import ModelClientType\n", "\n", "\n", @@ -77,7 +77,7 @@ " \"encoding_format\": \"float\",\n", "}\n", "\n", - "embedder = Embedder(model_client =ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", + "embedder = Embedder(model_client=ModelClientType.OPENAI(), model_kwargs=model_kwargs)\n", "embedder" ] }, @@ -128,7 +128,7 @@ "\n", "from adalflow.components.retriever import FAISSRetriever\n", "\n", - "# pass the documents in the initialization \n", + "# pass the documents in the initialization\n", "documents_embeddings = [x.embedding for x in output.data]\n", "retriever = FAISSRetriever(top_k=2, embedder=embedder, documents=documents_embeddings)\n", "retriever" @@ -153,7 +153,7 @@ "# execute the retriever\n", "output_1 = retriever(input=query_1)\n", "output_2 = retriever(input=query_2)\n", - "output_3 = retriever(input = [query_1, query_2])\n", + "output_3 = retriever(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -198,7 +198,7 @@ "\n", "output_1 = retriever_1(input=query_1)\n", "output_2 = retriever_1(input=query_2)\n", - "output_3 = retriever_1(input = [query_1, query_2])\n", + "output_3 = retriever_1(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -229,7 +229,9 @@ "\n", "document_map_func = lambda x: x[\"content\"]\n", "\n", - "bm25_retriever = BM25Retriever(top_k=2, documents=documents, document_map_func=document_map_func)\n", + "bm25_retriever = BM25Retriever(\n", + " top_k=2, documents=documents, document_map_func=document_map_func\n", + ")\n", "print(bm25_retriever)" ] }, @@ -250,7 +252,10 @@ "source": [ "# show how a word splitter and a token splitter differs\n", "\n", - "from adalflow.components.retriever.bm25_retriever import split_text_by_word_fn_then_lower_tokenized, split_text_by_word_fn\n", + "from adalflow.components.retriever.bm25_retriever import (\n", + " split_text_by_word_fn_then_lower_tokenized,\n", + " split_text_by_word_fn,\n", + ")\n", "\n", "query_1_words = split_text_by_word_fn(query_1)\n", "query_1_tokens = split_text_by_word_fn_then_lower_tokenized(query_1)\n", @@ -277,7 +282,7 @@ "source": [ "output_1 = bm25_retriever(input=query_1)\n", "output_2 = bm25_retriever(input=query_2)\n", - "output_3 = bm25_retriever(input = [query_1, query_2])\n", + "output_3 = bm25_retriever(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -324,7 +329,7 @@ "\n", "output_1 = bm25_retriever(input=query_1_short)\n", "output_2 = bm25_retriever(input=query_2_short)\n", - "output_3 = bm25_retriever(input = [query_1_short, query_2_short])\n", + "output_3 = bm25_retriever(input=[query_1_short, query_2_short])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -351,11 +356,13 @@ "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"]\n", "\n", "print(documents)\n", - "bm25_retriever.build_index_from_documents(documents=documents, document_map_func=document_map_func)\n", + "bm25_retriever.build_index_from_documents(\n", + " documents=documents, document_map_func=document_map_func\n", + ")\n", "\n", "output_1 = bm25_retriever(input=query_1_short)\n", "output_2 = bm25_retriever(input=query_2_short)\n", - "output_3 = bm25_retriever(input = [query_1_short, query_2_short])\n", + "output_3 = bm25_retriever(input=[query_1_short, query_2_short])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -425,7 +432,9 @@ "source": [ "# build index and run queries\n", "document_map_func = lambda x: x[\"content\"]\n", - "reranker.build_index_from_documents(documents=documents, document_map_func=document_map_func)\n", + "reranker.build_index_from_documents(\n", + " documents=documents, document_map_func=document_map_func\n", + ")\n", "\n", "print(reranker)" ] @@ -449,7 +458,7 @@ "# run queries\n", "output_1 = reranker(input=query_1)\n", "output_2 = reranker(input=query_2)\n", - "output_3 = reranker(input = [query_1, query_2])\n", + "output_3 = reranker(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -484,7 +493,7 @@ " documents=documents,\n", " document_map_func=document_map_func,\n", ")\n", - "print(reranker)\n" + "print(reranker)" ] }, { @@ -495,6 +504,7 @@ "source": [ "# run queries\n", "import torch\n", + "\n", "# Set the number of threads for PyTorch, avoid segementation fault\n", "torch.set_num_threads(1)\n", "torch.set_num_interop_threads(1)" @@ -516,11 +526,9 @@ } ], "source": [ - "\n", - "\n", "output_1 = reranker(input=query_1)\n", "output_2 = reranker(input=query_2)\n", - "output_3 = reranker(input = [query_1, query_2])\n", + "output_3 = reranker(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -551,17 +559,19 @@ ], "source": [ "# try to use title this time\n", - "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"] # no \n", + "document_map_func = lambda x: x[\"title\"] + \" \" + x[\"content\"] # no\n", "\n", - "reranker.build_index_from_documents(documents=documents, document_map_func=document_map_func)\n", + "reranker.build_index_from_documents(\n", + " documents=documents, document_map_func=document_map_func\n", + ")\n", "\n", "# run queries\n", "output_1 = reranker(input=query_1)\n", "output_2 = reranker(input=query_2)\n", - "output_3 = reranker(input = [query_1, query_2])\n", + "output_3 = reranker(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", - "print(output_3)\n" + "print(output_3)" ] }, { @@ -617,12 +627,12 @@ "}\n", "document_map_func = lambda x: x[\"content\"]\n", "llm_retriever = LLMRetriever(\n", - " top_k=2, \n", - " model_client=model_client, \n", - " model_kwargs=model_kwargs, \n", - " documents=documents, \n", - " document_map_func=document_map_func\n", - " )\n", + " top_k=2,\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " documents=documents,\n", + " document_map_func=document_map_func,\n", + ")\n", "print(llm_retriever)" ] }, @@ -645,7 +655,7 @@ "# run queries\n", "output_1 = llm_retriever(input=query_1)\n", "output_2 = llm_retriever(input=query_2)\n", - "output_3 = llm_retriever(input = [query_1, query_2])\n", + "output_3 = llm_retriever(input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -674,7 +684,7 @@ "}\n", "output_1 = llm_retriever(model_kwargs=model_kwargs, input=query_1)\n", "output_2 = llm_retriever(model_kwargs=model_kwargs, input=query_2)\n", - "output_3 = llm_retriever(model_kwargs=model_kwargs, input = [query_1, query_2])\n", + "output_3 = llm_retriever(model_kwargs=model_kwargs, input=[query_1, query_2])\n", "print(output_1)\n", "print(output_2)\n", "print(output_3)" @@ -739,12 +749,14 @@ "import fsspec\n", "import os\n", "import time\n", + "\n", + "\n", "def get_local_file_metadata(file_path: str):\n", " stat = os.stat(file_path)\n", " return {\n", - " 'size': stat.st_size, # File size in bytes\n", - " 'creation_date': time.ctime(stat.st_ctime), # Creation time\n", - " 'last_modified_date': time.ctime(stat.st_mtime) # Last modification time\n", + " \"size\": stat.st_size, # File size in bytes\n", + " \"creation_date\": time.ctime(stat.st_ctime), # Creation time\n", + " \"last_modified_date\": time.ctime(stat.st_mtime), # Last modification time\n", " }\n", "\n", "\n", @@ -774,9 +786,9 @@ " Returns:\n", " str: The content of the text file.\n", " \"\"\"\n", - " with fsspec.open(file_path, 'r') as file:\n", + " with fsspec.open(file_path, \"r\") as file:\n", " content = file.read()\n", - " return content\n" + " return content" ] }, { @@ -804,8 +816,8 @@ } ], "source": [ - "text = load_text_file('paul_graham/paul_graham_essay.txt')\n", - "file_metadata = get_local_file_metadata('paul_graham/paul_graham_essay.txt')\n", + "text = load_text_file(\"paul_graham/paul_graham_essay.txt\")\n", + "file_metadata = get_local_file_metadata(\"paul_graham/paul_graham_essay.txt\")\n", "print(text[:1000])\n", "print(file_metadata)" ] @@ -839,9 +851,12 @@ "from adalflow.core.types import Document\n", "\n", "# sentence splitting is confusing, the length needs to be smaller\n", - "metadata = {\"title\": \"Paul Graham's essay\", \"path\": \"data/paul_graham/paul_graham_essay.txt\"}\n", + "metadata = {\n", + " \"title\": \"Paul Graham's essay\",\n", + " \"path\": \"data/paul_graham/paul_graham_essay.txt\",\n", + "}\n", "metadata.update(file_metadata)\n", - "documents = [Document(text = text, meta_data = metadata)]\n", + "documents = [Document(text=text, meta_data=metadata)]\n", "splitter = DocumentSplitter(split_by=\"word\", split_length=800, split_overlap=200)\n", "\n", "print(documents)\n", @@ -925,7 +940,7 @@ ], "source": [ "# split the document\n", - "splitted_documents = splitter(documents = documents)\n", + "splitted_documents = splitter(documents=documents)\n", "print(splitted_documents[0], len(splitted_documents))" ] }, @@ -1303,15 +1318,20 @@ "\n", "# 1. set up the tracing for failed call as the retriever has generator attribute\n", "\n", + "\n", "@trace_generator_call(save_dir=\"tutorials/traces\")\n", "class LoggedLLMRetriever(LLMRetriever):\n", " pass\n", + "\n", + "\n", "top_k = 2\n", "retriever = LoggedLLMRetriever(\n", - " top_k = top_k, model_client=OpenAIClient(), model_kwargs={\"model\": \"gpt-3.5-turbo\"}\n", + " top_k=top_k, model_client=OpenAIClient(), model_kwargs={\"model\": \"gpt-3.5-turbo\"}\n", ")\n", "\n", - "retriever.build_index_from_documents(documents=[doc.text for doc in splitted_documents[0:16]])\n", + "retriever.build_index_from_documents(\n", + " documents=[doc.text for doc in splitted_documents[0:16]]\n", + ")\n", "\n", "print(retriever)\n", "retriever.generator.print_prompt()" @@ -1373,7 +1393,9 @@ "source": [ "# output[0].documents = [splitted_documents[idx] for idx in output[0].doc_indices]\n", "for per_query_output in output:\n", - " per_query_output.documents = [splitted_documents[idx] for idx in per_query_output.doc_indices]\n", + " per_query_output.documents = [\n", + " splitted_documents[idx] for idx in per_query_output.doc_indices\n", + " ]\n", "print(\"output.documents\", output[0].documents)\n", "len(output)" ] @@ -1537,51 +1559,51 @@ "source": [ "# create data transformer\n", "data_transformer_config = { # attribute and its config to recreate the component\n", - " \"embedder\":{\n", - " \"component_name\": \"Embedder\",\n", - " \"component_config\": {\n", - " \"model_client\": {\n", - " \"component_name\": \"OpenAIClient\",\n", - " \"component_config\": {},\n", - " },\n", - " \"model_kwargs\": {\n", - " \"model\": \"text-embedding-3-small\",\n", - " \"dimensions\": 256,\n", - " \"encoding_format\": \"float\",\n", - " },\n", + " \"embedder\": {\n", + " \"component_name\": \"Embedder\",\n", + " \"component_config\": {\n", + " \"model_client\": {\n", + " \"component_name\": \"OpenAIClient\",\n", + " \"component_config\": {},\n", " },\n", - " },\n", - " \"document_splitter\": {\n", - " \"component_name\": \"DocumentSplitter\",\n", - " \"component_config\": {\n", - " \"split_by\": \"word\",\n", - " \"split_length\": 400,\n", - " \"split_overlap\": 200,\n", + " \"model_kwargs\": {\n", + " \"model\": \"text-embedding-3-small\",\n", + " \"dimensions\": 256,\n", + " \"encoding_format\": \"float\",\n", " },\n", " },\n", - " \"to_embeddings\": {\n", - " \"component_name\": \"ToEmbeddings\",\n", - " \"component_config\": {\n", - " \"vectorizer\": {\n", - " \"component_name\": \"Embedder\",\n", - " \"component_config\": {\n", - " \"model_client\": {\n", - " \"component_name\": \"OpenAIClient\",\n", - " \"component_config\": {},\n", - " },\n", - " \"model_kwargs\": {\n", - " \"model\": \"text-embedding-3-small\",\n", - " \"dimensions\": 256,\n", - " \"encoding_format\": \"float\",\n", - " },\n", + " },\n", + " \"document_splitter\": {\n", + " \"component_name\": \"DocumentSplitter\",\n", + " \"component_config\": {\n", + " \"split_by\": \"word\",\n", + " \"split_length\": 400,\n", + " \"split_overlap\": 200,\n", + " },\n", + " },\n", + " \"to_embeddings\": {\n", + " \"component_name\": \"ToEmbeddings\",\n", + " \"component_config\": {\n", + " \"vectorizer\": {\n", + " \"component_name\": \"Embedder\",\n", + " \"component_config\": {\n", + " \"model_client\": {\n", + " \"component_name\": \"OpenAIClient\",\n", + " \"component_config\": {},\n", + " },\n", + " \"model_kwargs\": {\n", + " \"model\": \"text-embedding-3-small\",\n", + " \"dimensions\": 256,\n", + " \"encoding_format\": \"float\",\n", " },\n", - " # the other config is to instantiate the entity (class and function) with the given config as arguments\n", - " # \"entity_state\": \"storage/embedder.pkl\", # this will load back the state of the entity\n", " },\n", - " \"batch_size\": 100,\n", + " # the other config is to instantiate the entity (class and function) with the given config as arguments\n", + " # \"entity_state\": \"storage/embedder.pkl\", # this will load back the state of the entity\n", " },\n", + " \"batch_size\": 100,\n", " },\n", - " }" + " },\n", + "}" ] }, { @@ -1653,7 +1675,9 @@ "source": [ "from adalflow.core.component import Sequential\n", "\n", - "data_transformer = Sequential(components[\"document_splitter\"], components[\"to_embeddings\"])\n", + "data_transformer = Sequential(\n", + " components[\"document_splitter\"], components[\"to_embeddings\"]\n", + ")\n", "data_transformer" ] }, @@ -1861,7 +1885,7 @@ "source": [ "# test using only the document splitter\n", "text_split = components[\"document_splitter\"](documents)\n", - "print(text_split)\n" + "print(text_split)" ] }, { @@ -2132,7 +2156,7 @@ } ], "source": [ - "# check the length of all documents,text \n", + "# check the length of all documents,text\n", "lengths = set([doc.estimated_num_tokens for doc in documents])\n", "print(lengths)" ] @@ -2155,7 +2179,7 @@ "for doc in documents:\n", " if len(doc.vector) != 256:\n", " print(doc)\n", - " total+=1\n", + " total += 1\n", "print(total)" ] }, @@ -2334,9 +2358,9 @@ } ], "source": [ - "len_documents=len(restored_db.documents)\n", + "len_documents = len(restored_db.documents)\n", "keys = list(restored_db.transformed_documents.keys())\n", - "len_transformed_documents=len(restored_db.transformed_documents[keys[0]])\n", + "len_transformed_documents = len(restored_db.transformed_documents[keys[0]])\n", "print(len_documents, len_transformed_documents, keys)" ] }, @@ -2367,7 +2391,7 @@ ], "source": [ "# lets' print out part of the vector\n", - "restored_db.transformed_documents[keys[0]][0].vector[0:10]\n" + "restored_db.transformed_documents[keys[0]][0].vector[0:10]" ] }, { @@ -2397,11 +2421,9 @@ } ], "source": [ - "\n", "from adalflow.components.retriever import FAISSRetriever\n", "\n", "\n", - "\n", "retriever = FAISSRetriever(embedder=components[\"embedder\"], top_k=5)\n", "print(retriever)" ] @@ -2447,6 +2469,7 @@ "source": [ "# convert vectors to numpy array\n", "import numpy as np\n", + "\n", "vectors_np = np.array(vectors, dtype=np.float32)" ] }, @@ -2521,7 +2544,9 @@ "source": [ "# get initial documents\n", "for per_query_output in output:\n", - " per_query_output.documents = [documents[idx] for idx in per_query_output.doc_indices]\n", + " per_query_output.documents = [\n", + " documents[idx] for idx in per_query_output.doc_indices\n", + " ]\n", "\n", "output" ] @@ -2591,7 +2616,9 @@ "outputs": [], "source": [ "retriever = BM25Retriever(top_k=1)\n", - "retriever.build_index_from_documents([\"hello world\", \"world is beautiful\", \"today is a good day\"])\n", + "retriever.build_index_from_documents(\n", + " [\"hello world\", \"world is beautiful\", \"today is a good day\"]\n", + ")\n", "output = retriever.retrieve(\"hello\")\n", "output" ] diff --git a/tutorials/tools.ipynb b/tutorials/tools.ipynb index c32b9420..3b962782 100644 --- a/tutorials/tools.ipynb +++ b/tutorials/tools.ipynb @@ -20,6 +20,7 @@ "\n", "client = OpenAI()\n", "\n", + "\n", "# Example dummy function hard coded to return the same weather\n", "# In production, this could be your backend API or an external API\n", "def get_current_weather(location, unit=\"fahrenheit\"):\n", @@ -27,15 +28,23 @@ " if \"tokyo\" in location.lower():\n", " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": unit})\n", " elif \"san francisco\" in location.lower():\n", - " return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit})\n", + " return json.dumps(\n", + " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit}\n", + " )\n", " elif \"paris\" in location.lower():\n", " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": unit})\n", " else:\n", " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n", "\n", + "\n", "def run_conversation():\n", " # Step 1: send the conversation and available functions to the model\n", - " messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\",\n", + " }\n", + " ]\n", " tools = [\n", " {\n", " \"type\": \"function\",\n", @@ -95,6 +104,8 @@ " messages=messages,\n", " ) # get a new response from the model where it can see the function response\n", " return second_response\n", + "\n", + "\n", "print(run_conversation())" ] }, @@ -110,16 +121,19 @@ "import time\n", "import asyncio\n", "\n", + "\n", "def multiply(a: int, b: int) -> int:\n", " \"\"\"Multiply two numbers.\"\"\"\n", " time.sleep(1)\n", " return a * b\n", "\n", + "\n", "def add(a: int, b: int) -> int:\n", " \"\"\"Add two numbers.\"\"\"\n", " time.sleep(1)\n", " return a + b\n", "\n", + "\n", "async def divide(a: float, b: float) -> float:\n", " \"\"\"Divide two numbers.\"\"\"\n", " await asyncio.sleep(1)\n", @@ -136,15 +150,20 @@ " \"\"\"Sum the elements of an array.\"\"\"\n", " return np.sum(arr)\n", "\n", + "\n", "x = 2\n", + "\n", + "\n", "@dataclass\n", "class Point:\n", " x: int\n", " y: int\n", "\n", + "\n", "def add_points(p1: Point, p2: Point) -> Point:\n", " return Point(p1.x + p2.x, p1.y + p2.y)\n", "\n", + "\n", "all_functions = [multiply, add, divide, search, numpy_sum, add_points]\n", "\n", "all_functions_dict = {f.__name__: f for f in all_functions}" @@ -173,10 +192,8 @@ "\n", "from adalflow.core.func_tool import FunctionTool\n", "\n", - "functions =[multiply, add, divide, search, numpy_sum, add_points]\n", - "tools = [\n", - " FunctionTool(fn=fn) for fn in functions\n", - "]\n", + "functions = [multiply, add, divide, search, numpy_sum, add_points]\n", + "tools = [FunctionTool(fn=fn) for fn in functions]\n", "for tool in tools:\n", " print(tool)" ] @@ -188,7 +205,7 @@ "outputs": [], "source": [ "# create a context map\n", - "context_map = {tool.definition.func_name: tool for tool in tools}\n" + "context_map = {tool.definition.func_name: tool for tool in tools}" ] }, { @@ -295,7 +312,7 @@ } ], "source": [ - "# execute get_current_weather using function call \n", + "# execute get_current_weather using function call\n", "\n", "ft.call(**{\"location\": \"San Francisco\", \"unit\": \"celsius\"})" ] @@ -344,8 +361,7 @@ "print(tools[2].execute(**{\"a\": 10, \"b\": 2}))\n", "\n", "display(await tools[2].acall(**{\"a\": 10, \"b\": 2}))\n", - "display(await tools[2].execute(**{\"a\": 10, \"b\": 2}))\n", - "\n" + "display(await tools[2].execute(**{\"a\": 10, \"b\": 2}))" ] }, { @@ -442,34 +458,38 @@ } ], "source": [ - "# call all the above functions \n", + "# call all the above functions\n", "import nest_asyncio\n", "\n", "nest_asyncio.apply()\n", "\n", "\n", - "\n", "async def async_function_1():\n", " await asyncio.sleep(1)\n", " return \"Function 1 completed\"\n", "\n", + "\n", "def sync_function_1():\n", " time.sleep(1)\n", " return \"Function 1 completed\"\n", "\n", + "\n", "async def async_function_2():\n", " await asyncio.sleep(2)\n", " return \"Function 2 completed\"\n", "\n", + "\n", "def sync_function_2():\n", " time.sleep(2)\n", " return \"Function 2 completed\"\n", "\n", + "\n", "async_tool_1 = FunctionTool(async_function_1)\n", "sync_tool_1 = FunctionTool(sync_function_2)\n", "async_tool_2 = FunctionTool(async_function_2)\n", "sync_tool_2 = FunctionTool(sync_function_2)\n", "\n", + "\n", "def run_sync_and_async_mix_without_wait():\n", " # both sync and async tool can use execute\n", " # sync tool can also use call\n", @@ -484,6 +504,7 @@ " print(f\"run_sync_and_async_mix_without_wait time: {end_time - start_time}\")\n", " return results\n", "\n", + "\n", "async def run_sync_and_async_mix():\n", " # both sync and async tool can use execute&to_thread\n", " # async tool can also use acall without to_thread\n", @@ -492,13 +513,13 @@ " results = await asyncio.gather(\n", " async_tool_1.execute(),\n", " sync_tool_1.execute(),\n", - " \n", " async_tool_2.acall(),\n", " )\n", " end_time = time.time()\n", " print(f\"run_sync_and_async_mix time: {end_time - start_time}\")\n", " return results\n", "\n", + "\n", "# Execute functions\n", "results_without_wait = run_sync_and_async_mix_without_wait()\n", "display(results_without_wait)\n", @@ -675,7 +696,7 @@ "small_tool_manager = ToolManager(tools=tools[:2])\n", "\n", "renered_prompt = prompt(tools=tool_manager.yaml_definitions)\n", - "print(renered_prompt)\n" + "print(renered_prompt)" ] }, { @@ -703,16 +724,16 @@ } ], "source": [ - "# let's render the output format using Function class \n", + "# let's render the output format using Function class\n", "\n", "from adalflow.core.types import Function\n", "\n", "\n", - "output_data_class = Function \n", + "output_data_class = Function\n", "output_format_str = output_data_class.to_json_signature(exclude=[\"thought\"])\n", "\n", - "renered_prompt= prompt(output_format_str=output_format_str)\n", - "print(renered_prompt)\n" + "renered_prompt = prompt(output_format_str=output_format_str)\n", + "print(renered_prompt)" ] }, { @@ -776,7 +797,7 @@ "\n", "func_parser = JsonOutputParser(data_class=Function)\n", "instructions = func_parser.format_instructions(exclude=[\"thought\"])\n", - "print(instructions)\n" + "print(instructions)" ] }, { @@ -844,9 +865,7 @@ "model_kwargs = {\"model\": \"gpt-3.5-turbo\"}\n", "prompt_kwargs = {\n", " \"tools\": tool_manager.yaml_definitions,\n", - " \"output_format_str\": func_parser.format_instructions(\n", - " exclude=[\"thought\", \"args\"]\n", - " ),\n", + " \"output_format_str\": func_parser.format_instructions(exclude=[\"thought\", \"args\"]),\n", "}\n", "generator = Generator(\n", " model_client=ModelClientType.OPENAI(),\n", @@ -887,14 +906,14 @@ "source": [ "# call queries\n", "queries = [\n", - " \"add 2 and 3\",\n", - " \"search for something\",\n", - " \"add points (1, 2) and (3, 4)\",\n", - " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", - " \"multiply 2 with local variable x\",\n", - " \"divide 2 by 3\",\n", - " \"Add 5 to variable y\",\n", - " ]" + " \"add 2 and 3\",\n", + " \"search for something\",\n", + " \"add points (1, 2) and (3, 4)\",\n", + " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", + " \"multiply 2 with local variable x\",\n", + " \"divide 2 by 3\",\n", + " \"Add 5 to variable y\",\n", + "]" ] }, { @@ -1046,7 +1065,6 @@ } ], "source": [ - "\n", "for idx, query in enumerate(queries):\n", " prompt_kwargs = {\"input_str\": query}\n", " print(f\"\\n{idx} Query: {query}\")\n", @@ -1056,10 +1074,12 @@ " # print(f\"LLM raw output: {result.raw_response}\")\n", " func = Function.from_dict(result.data)\n", " print(f\"Function: {func}\")\n", - " func_output= tool_manager.execute_func(func)\n", + " func_output = tool_manager.execute_func(func)\n", " display(f\"Function output: {func_output}\")\n", " except Exception as e:\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")" + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )" ] }, { @@ -1100,7 +1120,7 @@ } ], "source": [ - "# let's use FunctionExpression to call the function instead \n", + "# let's use FunctionExpression to call the function instead\n", "\n", "from adalflow.core.types import FunctionExpression\n", "\n", @@ -1109,7 +1129,9 @@ "print(output_format_str)\n", "\n", "# lets' add one example to be more robust that they should call it with function call expression\n", - "example = FunctionExpression.from_function(thought=None, func=add_points, **{\"p1\": Point(1, 2), \"p2\": Point(3, 4)})\n", + "example = FunctionExpression.from_function(\n", + " thought=None, func=add_points, **{\"p1\": Point(1, 2), \"p2\": Point(3, 4)}\n", + ")\n", "print(example)" ] }, @@ -1258,15 +1280,15 @@ "instructions = parser.format_instructions(exclude=[\"thought\"])\n", "\n", "prompt_kwargs = {\n", - " \"tools\": [tool.definition.to_yaml() for tool in tools],\n", - " \"output_format_str\": parser.format_instructions(exclude=[\"thought\"]),\n", - " }\n", + " \"tools\": [tool.definition.to_yaml() for tool in tools],\n", + " \"output_format_str\": parser.format_instructions(exclude=[\"thought\"]),\n", + "}\n", "generator = Generator(\n", " model_client=ModelClientType.OPENAI(),\n", " model_kwargs=model_kwargs,\n", " template=template,\n", " prompt_kwargs=prompt_kwargs,\n", - " output_processors=parser\n", + " output_processors=parser,\n", ")\n", "\n", "generator.print_prompt(**prompt_kwargs)" @@ -1291,67 +1313,64 @@ "\n", "# Define a list of safe built-ins\n", "SAFE_BUILTINS = {\n", - " 'abs': abs,\n", - " 'all': all,\n", - " 'any': any,\n", - " 'bin': bin,\n", - " 'bool': bool,\n", - " 'bytearray': bytearray,\n", - " 'bytes': bytes,\n", - " 'callable': callable,\n", - " 'chr': chr,\n", - " 'complex': complex,\n", - " 'dict': dict,\n", - " 'divmod': divmod,\n", - " 'enumerate': enumerate,\n", - " 'filter': filter,\n", - " 'float': float,\n", - " 'format': format,\n", - " 'frozenset': frozenset,\n", - " 'getattr': getattr,\n", - " 'hasattr': hasattr,\n", - " 'hash': hash,\n", - " 'hex': hex,\n", - " 'int': int,\n", - " 'isinstance': isinstance,\n", - " 'issubclass': issubclass,\n", - " 'iter': iter,\n", - " 'len': len,\n", - " 'list': list,\n", - " 'map': map,\n", - " 'max': max,\n", - " 'min': min,\n", - " 'next': next,\n", - " 'object': object,\n", - " 'oct': oct,\n", - " 'ord': ord,\n", - " 'pow': pow,\n", - " 'range': range,\n", - " 'repr': repr,\n", - " 'reversed': reversed,\n", - " 'round': round,\n", - " 'set': set,\n", - " 'slice': slice,\n", - " 'sorted': sorted,\n", - " 'str': str,\n", - " 'sum': sum,\n", - " 'tuple': tuple,\n", - " 'type': type,\n", - " 'zip': zip,\n", + " \"abs\": abs,\n", + " \"all\": all,\n", + " \"any\": any,\n", + " \"bin\": bin,\n", + " \"bool\": bool,\n", + " \"bytearray\": bytearray,\n", + " \"bytes\": bytes,\n", + " \"callable\": callable,\n", + " \"chr\": chr,\n", + " \"complex\": complex,\n", + " \"dict\": dict,\n", + " \"divmod\": divmod,\n", + " \"enumerate\": enumerate,\n", + " \"filter\": filter,\n", + " \"float\": float,\n", + " \"format\": format,\n", + " \"frozenset\": frozenset,\n", + " \"getattr\": getattr,\n", + " \"hasattr\": hasattr,\n", + " \"hash\": hash,\n", + " \"hex\": hex,\n", + " \"int\": int,\n", + " \"isinstance\": isinstance,\n", + " \"issubclass\": issubclass,\n", + " \"iter\": iter,\n", + " \"len\": len,\n", + " \"list\": list,\n", + " \"map\": map,\n", + " \"max\": max,\n", + " \"min\": min,\n", + " \"next\": next,\n", + " \"object\": object,\n", + " \"oct\": oct,\n", + " \"ord\": ord,\n", + " \"pow\": pow,\n", + " \"range\": range,\n", + " \"repr\": repr,\n", + " \"reversed\": reversed,\n", + " \"round\": round,\n", + " \"set\": set,\n", + " \"slice\": slice,\n", + " \"sorted\": sorted,\n", + " \"str\": str,\n", + " \"sum\": sum,\n", + " \"tuple\": tuple,\n", + " \"type\": type,\n", + " \"zip\": zip,\n", "}\n", "\n", + "\n", "# Define a context manager to limit execution time\n", "# Create a sandbox execution function\n", "def sandbox_exec(code, context=SAFE_BUILTINS, timeout=5):\n", - "\n", " try:\n", - " compiled_code = compile(code, '', 'exec')\n", + " compiled_code = compile(code, \"\", \"exec\")\n", "\n", " # Result dictionary to store execution results\n", - " result = {\n", - " \"output\" : None,\n", - " \"error\" : None\n", - " }\n", + " result = {\"output\": None, \"error\": None}\n", "\n", " # Define a target function for the thread\n", " def target():\n", @@ -1360,7 +1379,6 @@ " exec(compiled_code, context, result)\n", " except Exception as e:\n", " result[\"error\"] = e\n", - " \n", "\n", " # Create a thread to execute the code\n", " thread = threading.Thread(target=target)\n", @@ -1377,6 +1395,7 @@ "\n", " return result\n", "\n", + "\n", "# Example usage\n", "code = \"\"\"\n", "def add(a, b+5):\n", @@ -1391,7 +1410,7 @@ "except TimeoutError as e:\n", " print(e)\n", "except Exception as e:\n", - " print(\"Sandbox error:\", e)\n" + " print(\"Sandbox error:\", e)" ] }, { @@ -1510,24 +1529,23 @@ } ], "source": [ - "# run the generator but we will use FunctionTool.parse_function_call_expr and have a context map \n", + "# run the generator but we will use FunctionTool.parse_function_call_expr and have a context map\n", "\n", "all_functions_dict.update(\n", " {\n", - " \"Point\": Point,\n", - " # support numpy\n", - " \"np\": np,\n", - " \"np.ndarray\": np.ndarray,\n", - " \"array\": np.array,\n", - " \"arr\": arr,\n", - " \"np.array\": np.array,\n", - " \"x\": x\n", + " \"Point\": Point,\n", + " # support numpy\n", + " \"np\": np,\n", + " \"np.ndarray\": np.ndarray,\n", + " \"array\": np.array,\n", + " \"arr\": arr,\n", + " \"np.array\": np.array,\n", + " \"x\": x,\n", " }\n", ")\n", - "y=4\n", + "y = 4\n", "print(all_functions_dict)\n", - "for query in queries+[\"Add 5 to variable y\"]:\n", - "\n", + "for query in queries + [\"Add 5 to variable y\"]:\n", " try:\n", " print(f\"Query: {query}\")\n", " prompt_kwargs = {\"input_str\": query}\n", @@ -1537,10 +1555,14 @@ " func_expr = FunctionExpression.from_dict(result.data)\n", "\n", " print(func_expr)\n", - " assert isinstance(func_expr, FunctionExpression), f\"Expected FunctionExpression, got {type(result.data)}\"\n", + " assert isinstance(\n", + " func_expr, FunctionExpression\n", + " ), f\"Expected FunctionExpression, got {type(result.data)}\"\n", "\n", " # more secure way to handle function call\n", - " func: Function = FunctionTool.parse_function_call_expr(expr=func_expr.action, context_map=all_functions_dict)\n", + " func: Function = FunctionTool.parse_function_call_expr(\n", + " expr=func_expr.action, context_map=all_functions_dict\n", + " )\n", " print(func)\n", " fun_output = all_functions_dict[func.name](*func.args, **func.kwargs)\n", " print(\"func output:\", fun_output)\n", @@ -1558,18 +1580,24 @@ " print(\"sandbox output:\", result)\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", " try:\n", " fun_output = eval(func_expr.action)\n", " print(\"func output:\", fun_output)\n", "\n", - " #sandbox_exec\n", + " # sandbox_exec\n", " action = \"output=\" + func_expr.action\n", - " result = sandbox_exec(action, context={**SAFE_BUILTINS, **all_functions_dict})\n", + " result = sandbox_exec(\n", + " action, context={**SAFE_BUILTINS, **all_functions_dict}\n", + " )\n", " print(\"sandbox output:\", result)\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")" + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )" ] }, { @@ -1776,20 +1804,27 @@ } ], "source": [ - "queries = [\"add 2 and 3\", \"search for something\", \"add points (1, 2) and (3, 4)\", \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\", \"multiply 2 with local variable x\", \"divide 2 by 3\"]\n", + "queries = [\n", + " \"add 2 and 3\",\n", + " \"search for something\",\n", + " \"add points (1, 2) and (3, 4)\",\n", + " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", + " \"multiply 2 with local variable x\",\n", + " \"divide 2 by 3\",\n", + "]\n", "\n", - "from adalflow.core.string_parser import JsonParser # improve a list of json\n", + "from adalflow.core.string_parser import JsonParser # improve a list of json\n", "\n", "preset_prompt_kwargs = {\n", - " \"tools\": [tool.definition.to_yaml() for tool in tools],\n", - " \"output_format_str\": parser.format_instructions(exclude=[\"thought\"])\n", - " }\n", + " \"tools\": [tool.definition.to_yaml() for tool in tools],\n", + " \"output_format_str\": parser.format_instructions(exclude=[\"thought\"]),\n", + "}\n", "multi_call_gen = Generator(\n", " model_client=ModelClientType.OPENAI(),\n", " model_kwargs=model_kwargs,\n", " template=multple_function_call_template,\n", " prompt_kwargs=preset_prompt_kwargs,\n", - " output_processors=JsonParser()\n", + " output_processors=JsonParser(),\n", ")\n", "print(multi_call_gen)\n", "multi_call_gen.print_prompt()" @@ -1882,8 +1917,12 @@ } ], "source": [ - "def execute_function_by_parsing(func_expr: FunctionExpression, all_functions_dict: Dict[str, Any]) -> Any:\n", - " func: Function = FunctionTool.parse_function_call_expr(expr=func_expr.action, context_map=all_functions_dict)\n", + "def execute_function_by_parsing(\n", + " func_expr: FunctionExpression, all_functions_dict: Dict[str, Any]\n", + ") -> Any:\n", + " func: Function = FunctionTool.parse_function_call_expr(\n", + " expr=func_expr.action, context_map=all_functions_dict\n", + " )\n", " print(func)\n", " fun_output = all_functions_dict[func.name](*func.args, **func.kwargs)\n", " print(\"func output:\", fun_output)\n", @@ -1891,7 +1930,6 @@ "\n", "\n", "def execute_function_by_eval(func_expr: FunctionExpression) -> Any:\n", - "\n", " print(f\"func expr: {func_expr.action}\")\n", "\n", " # eval without security check by using eval directly\n", @@ -1900,7 +1938,10 @@ " print(\"func output:\", fun_output)\n", " return fun_output\n", "\n", - "def execute_function_by_sandbox(func_expr: FunctionExpression, all_functions_dict: Dict[str, Any]) -> Any:\n", + "\n", + "def execute_function_by_sandbox(\n", + " func_expr: FunctionExpression, all_functions_dict: Dict[str, Any]\n", + ") -> Any:\n", " # sandbox_exec\n", " action = \"output=\" + func_expr.action\n", " result = sandbox_exec(action, context={**SAFE_BUILTINS, **all_functions_dict})\n", @@ -1909,48 +1950,57 @@ " return result\n", "\n", "\n", - "\n", - "\n", "for i in range(0, len(queries), 2):\n", - " query = \" and \".join(queries[i:i+2])\n", + " query = \" and \".join(queries[i : i + 2])\n", " print(f\"Query: {query}\\n_________________________\\n\")\n", " prompt_kwargs = {\"input_str\": query}\n", " result = multi_call_gen(prompt_kwargs=prompt_kwargs)\n", " print(result)\n", "\n", " try:\n", - "\n", " func_exprs = [FunctionExpression.from_dict(item) for item in result.data]\n", "\n", " print(func_exprs)\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to parse the function for query: {query}, func: {result.data}, error: {e}\")\n", + " print(\n", + " f\"Failed to parse the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", " continue\n", " try:\n", - " func_outputs_1 = [execute_function_by_parsing(func_expr, all_functions_dict) for func_expr in func_exprs]\n", + " func_outputs_1 = [\n", + " execute_function_by_parsing(func_expr, all_functions_dict)\n", + " for func_expr in func_exprs\n", + " ]\n", " print(f\"fun_output by parsing: {func_outputs_1}\\n_________________________\\n\")\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", "\n", " try:\n", - "\n", - " func_outputs_2 = [execute_function_by_eval(func_expr) for func_expr in func_exprs]\n", + " func_outputs_2 = [\n", + " execute_function_by_eval(func_expr) for func_expr in func_exprs\n", + " ]\n", " print(f\"fun_output by eval: {func_outputs_2}\\n_________________________\\n\")\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", "\n", " try:\n", - "\n", - " func_outputs_3 = [execute_function_by_sandbox(func_expr, all_functions_dict) for func_expr in func_exprs]\n", + " func_outputs_3 = [\n", + " execute_function_by_sandbox(func_expr, all_functions_dict)\n", + " for func_expr in func_exprs\n", + " ]\n", " print(f\"fun_output by sandbox: {func_outputs_3}\\n_________________________\\n\")\n", " except Exception as e:\n", " print(e)\n", - " print(f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\")\n", - "\n", - " \n" + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )" ] }, { @@ -1975,6 +2025,7 @@ "\n", "client = OpenAI()\n", "\n", + "\n", "# Example dummy function hard coded to return the same weather\n", "# In production, this could be your backend API or an external API\n", "def get_current_weather(location, unit=\"fahrenheit\"):\n", @@ -1982,15 +2033,23 @@ " if \"tokyo\" in location.lower():\n", " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": unit})\n", " elif \"san francisco\" in location.lower():\n", - " return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit})\n", + " return json.dumps(\n", + " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit}\n", + " )\n", " elif \"paris\" in location.lower():\n", " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": unit})\n", " else:\n", " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n", "\n", + "\n", "def run_conversation():\n", " # Step 1: send the conversation and available functions to the model\n", - " messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris in celsius?\"}]\n", + " messages = [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris in celsius?\",\n", + " }\n", + " ]\n", " tools = [\n", " {\n", " \"type\": \"function\",\n", @@ -2034,11 +2093,13 @@ " for tool_call in tool_calls:\n", " function_name = tool_call.function.name\n", " function_to_call = available_functions[function_name]\n", - " function_args = json.loads(tool_call.function.arguments)# use json.loads to convert a string to a dictionary\n", + " function_args = json.loads(\n", + " tool_call.function.arguments\n", + " ) # use json.loads to convert a string to a dictionary\n", " # function_response = function_to_call(\n", " # location=function_args.get(\"location\"),\n", " # unit=function_args.get(\"unit\"),\n", - " # ) \n", + " # )\n", " # you have to exactly know the arguments, this does not make sense. How would i know its arguments. **function_args (makes more sense)\n", " function_response = function_to_call(**function_args)\n", " messages.append(\n", @@ -2054,6 +2115,8 @@ " messages=messages,\n", " ) # get a new response from the model where it can see the function response\n", " return second_response\n", + "\n", + "\n", "print(run_conversation())" ] }, @@ -2109,18 +2172,17 @@ "outputs": [], "source": [ "def get_current_weather(location: str, unit: str = \"fahrenheit\"):\n", - " \"\"\"Get the current weather in a given location\"\"\"\n", - " if \"tokyo\" in location.lower():\n", - " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": unit})\n", - " elif \"san francisco\" in location.lower():\n", - " return json.dumps(\n", - " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit}\n", - " )\n", - " elif \"paris\" in location.lower():\n", - " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": unit})\n", - " else:\n", - " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n", - "\n" + " \"\"\"Get the current weather in a given location\"\"\"\n", + " if \"tokyo\" in location.lower():\n", + " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": unit})\n", + " elif \"san francisco\" in location.lower():\n", + " return json.dumps(\n", + " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": unit}\n", + " )\n", + " elif \"paris\" in location.lower():\n", + " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": unit})\n", + " else:\n", + " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})" ] }, { @@ -2134,21 +2196,29 @@ "from adalflow.core.base_data_class import DataClass\n", "from dataclasses import dataclass, field\n", "\n", + "\n", "@dataclass\n", "class Weather(DataClass):\n", - " location: str = field(metadata={\"description\": \"The city and state, e.g. San Francisco, CA\"})\n", + " location: str = field(\n", + " metadata={\"description\": \"The city and state, e.g. San Francisco, CA\"}\n", + " )\n", " unit: str = field(metadata={\"enum\": [\"celsius\", \"fahrenheit\"]})\n", "\n", + "\n", "def get_current_weather_2(weather: Weather):\n", " \"\"\"Get the current weather in a given location\"\"\"\n", " if \"tokyo\" in weather.location.lower():\n", - " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": weather.unit})\n", + " return json.dumps(\n", + " {\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": weather.unit}\n", + " )\n", " elif \"san francisco\" in weather.location.lower():\n", " return json.dumps(\n", " {\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": weather.unit}\n", " )\n", " elif \"paris\" in weather.location.lower():\n", - " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": weather.unit})\n", + " return json.dumps(\n", + " {\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": weather.unit}\n", + " )\n", " else:\n", " return json.dumps({\"location\": weather.location, \"temperature\": \"unknown\"})" ] @@ -2211,8 +2281,7 @@ "\n", "tool_2 = FunctionTool.from_defaults(fn=get_current_weather_2)\n", "\n", - "print(tool_2.metadata.to_json())\n", - "\n" + "print(tool_2.metadata.to_json())" ] }, { @@ -2229,38 +2298,23 @@ "metadata": {}, "outputs": [], "source": [ - "adalflow_fn_schema ={\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"weather\": {\n", - " \"type\": \"Weather\",\n", - " \"desc\": \"The city and state, e.g. San Francisco, CA\",\n", - " \"enum\": [\n", - " \"celsius\",\n", - " \"fahrenheit\"\n", - " ]\n", - " }\n", - " },\n", - " \"required\": [\n", - " \"weather\"\n", - " ],\n", - " \"definitions\": {\n", - " \"weather\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"location\": {\n", - " \"type\": \"str\"\n", - " },\n", - " \"unit\": {\n", - " \"type\": \"str\"\n", - " }\n", - " },\n", - " \"required\": [\n", - " \"location\",\n", - " \"unit\"\n", - " ]\n", - " }\n", + "adalflow_fn_schema = {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"weather\": {\n", + " \"type\": \"Weather\",\n", + " \"desc\": \"The city and state, e.g. San Francisco, CA\",\n", + " \"enum\": [\"celsius\", \"fahrenheit\"],\n", + " }\n", + " },\n", + " \"required\": [\"weather\"],\n", + " \"definitions\": {\n", + " \"weather\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\"location\": {\"type\": \"str\"}, \"unit\": {\"type\": \"str\"}},\n", + " \"required\": [\"location\", \"unit\"],\n", " }\n", + " },\n", "}" ] }, @@ -2333,7 +2387,7 @@ } ], "source": [ - "# prepare function tool \n", + "# prepare function tool\n", "weather_tool = FunctionTool.from_defaults(fn=_get_current_weather)\n", "print(weather_tool)" ] @@ -2395,7 +2449,7 @@ } ], "source": [ - "# prepare a minimal function calling template \n", + "# prepare a minimal function calling template\n", "template = r\"\"\"You have these tools available:\n", " \n", " {% for tool in tools %}\n", @@ -2434,11 +2488,13 @@ "\n", "model_kwargs = {\"model\": \"gpt-3.5-turbo\", \"temperature\": 0.3, \"stream\": False}\n", "\n", + "\n", "@dataclass\n", "class Function(DataClass):\n", " name: str = field(metadata={\"desc\": \"The name of the function\"})\n", " args: Dict[str, Any] = field(metadata={\"desc\": \"The arguments of the function\"})\n", "\n", + "\n", "generator = Generator(\n", " model_client=ModelClientType.OPENAI(),\n", " model_kwargs=model_kwargs,\n", @@ -2535,9 +2591,7 @@ "source": [ "# call the function\n", "\n", - "function_map = {\n", - " \"_get_current_weather\": weather_tool\n", - "}\n", + "function_map = {\"_get_current_weather\": weather_tool}\n", "\n", "function_name = structured_output.name\n", "function_args = structured_output.args\n", @@ -2695,22 +2749,30 @@ "from dataclasses import dataclass, field\n", "from typing import Any, Dict\n", "\n", + "\n", "@dataclass\n", "class Address:\n", " street: str\n", " city: str\n", " zipcode: str\n", "\n", + "\n", "@dataclass\n", "class Person:\n", " name: str\n", " age: int\n", " address: Address\n", "\n", + "\n", "# Example instance of the nested dataclasses\n", - "person = Person(name=\"John Doe\", age=30, address=Address(street=\"123 Main St\", city=\"Anytown\", zipcode=\"12345\"))\n", + "person = Person(\n", + " name=\"John Doe\",\n", + " age=30,\n", + " address=Address(street=\"123 Main St\", city=\"Anytown\", zipcode=\"12345\"),\n", + ")\n", "print(person)\n", "\n", + "\n", "def to_dict(obj: Any) -> Dict[str, Any]:\n", " if hasattr(obj, \"__dataclass_fields__\"):\n", " return {key: to_dict(value) for key, value in obj.__dict__.items()}\n", @@ -2721,6 +2783,7 @@ " else:\n", " return obj\n", "\n", + "\n", "# Convert the person instance to a dictionary\n", "person_dict = to_dict(person)\n", "print(person_dict)" @@ -2741,20 +2804,31 @@ ], "source": [ "from typing import List\n", + "\n", + "\n", "@dataclass\n", "class Address:\n", " street: str\n", " city: str\n", " zipcode: str\n", "\n", + "\n", "@dataclass\n", "class Person:\n", " name: str\n", " age: int\n", " addresses: List[Address]\n", "\n", + "\n", "# Example instance of the nested dataclasses\n", - "person = Person(name=\"John Doe\", age=30, addresses=[Address(street=\"123 Main St\", city=\"Anytown\", zipcode=\"12345\"), Address(street=\"456 Elm St\", city=\"Othertown\", zipcode=\"67890\")])\n", + "person = Person(\n", + " name=\"John Doe\",\n", + " age=30,\n", + " addresses=[\n", + " Address(street=\"123 Main St\", city=\"Anytown\", zipcode=\"12345\"),\n", + " Address(street=\"456 Elm St\", city=\"Othertown\", zipcode=\"67890\"),\n", + " ],\n", + ")\n", "print(person)" ] }, @@ -2795,6 +2869,8 @@ ], "source": [ "from typing import List, Dict, Optional\n", + "\n", + "\n", "def dataclass_obj_to_dict(\n", " obj: Any, exclude: Optional[Dict[str, List[str]]] = None, parent_key: str = \"\"\n", ") -> Dict[str, Any]:\n", @@ -2851,24 +2927,29 @@ " else:\n", " return obj\n", "\n", + "\n", "from dataclasses import dataclass\n", "from typing import List\n", "\n", + "\n", "@dataclass\n", "class TrecData:\n", " question: str\n", " label: int\n", "\n", + "\n", "@dataclass\n", "class TrecDataList:\n", - "\n", " data: List[TrecData]\n", " name: str\n", "\n", + "\n", "trec_data = TrecData(question=\"What is the capital of France?\", label=0)\n", "trec_data_list = TrecDataList(data=[trec_data], name=\"trec_data_list\")\n", "\n", - "dataclass_obj_to_dict(trec_data_list, exclude={\"TrecData\": [\"label\"], \"TrecDataList\": [\"name\"]})" + "dataclass_obj_to_dict(\n", + " trec_data_list, exclude={\"TrecData\": [\"label\"], \"TrecDataList\": [\"name\"]}\n", + ")" ] }, { @@ -2878,14 +2959,24 @@ "outputs": [], "source": [ "from typing import Type\n", + "\n", + "\n", "def dataclass_obj_from_dict(cls: Type[Any], data: Dict[str, Any]) -> Any:\n", " if hasattr(cls, \"__dataclass_fields__\"):\n", " fieldtypes = {f.name: f.type for f in cls.__dataclass_fields__.values()}\n", - " return cls(**{key: dataclass_obj_from_dict(fieldtypes[key], value) for key, value in data.items()})\n", + " return cls(\n", + " **{\n", + " key: dataclass_obj_from_dict(fieldtypes[key], value)\n", + " for key, value in data.items()\n", + " }\n", + " )\n", " elif isinstance(data, list):\n", " return [dataclass_obj_from_dict(cls.__args__[0], item) for item in data]\n", " elif isinstance(data, dict):\n", - " return {key: dataclass_obj_from_dict(cls.__args__[1], value) for key, value in data.items()}\n", + " return {\n", + " key: dataclass_obj_from_dict(cls.__args__[1], value)\n", + " for key, value in data.items()\n", + " }\n", " else:\n", " return data" ] @@ -2933,7 +3024,12 @@ } ], "source": [ - "dataclass_obj_from_dict(TrecDataList, dataclass_obj_to_dict(trec_data_list, exclude={\"TrecData\": [\"label\"], \"TrecDataList\": [\"name\"]}))" + "dataclass_obj_from_dict(\n", + " TrecDataList,\n", + " dataclass_obj_to_dict(\n", + " trec_data_list, exclude={\"TrecData\": [\"label\"], \"TrecDataList\": [\"name\"]}\n", + " ),\n", + ")" ] } ], diff --git a/use_cases/agent/react_agent.ipynb b/use_cases/agent/react_agent.ipynb index cdc199fe..baa2ea81 100644 --- a/use_cases/agent/react_agent.ipynb +++ b/use_cases/agent/react_agent.ipynb @@ -43,6 +43,7 @@ "source": [ "# load the dataset\n", "from datasets import load_dataset\n", + "\n", "dataset = load_dataset(path=\"hotpot_qa\", name=\"fullwiki\")" ] }, @@ -114,7 +115,6 @@ } ], "source": [ - "\n", "import dotenv\n", "from adalflow.components.model_client import OpenAIClient\n", "from adalflow.components.agent.react_agent import ReActAgent\n", @@ -150,15 +150,17 @@ "import re\n", "import string\n", "\n", + "\n", "# copy code from the paper\n", "def clean_str(p):\n", - " return p.encode().decode(\"unicode-escape\").encode(\"latin1\").decode(\"utf-8\")\n", + " return p.encode().decode(\"unicode-escape\").encode(\"latin1\").decode(\"utf-8\")\n", + "\n", "\n", "# normalization copied from the paper's code\n", "def normalize_answer(s):\n", " def remove_articles(text):\n", " return re.sub(r\"\\b(a|an|the)\\b\", \" \", text)\n", - " \n", + "\n", " def white_space_fix(text):\n", " return \" \".join(text.split())\n", "\n", @@ -171,6 +173,7 @@ "\n", " return white_space_fix(remove_articles(remove_punc(lower(s))))\n", "\n", + "\n", "def search(entity: str) -> str:\n", " \"\"\"\n", " searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.\n", @@ -178,29 +181,31 @@ " # Format the entity for URL encoding\n", " entity_formatted = entity.replace(\" \", \"+\")\n", " url = f\"https://en.wikipedia.org/w/index.php?search={entity_formatted}\"\n", - " \n", + "\n", " # Fetch the page\n", " response = requests.get(url)\n", - " soup = BeautifulSoup(response.text, 'html.parser')\n", - " \n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + "\n", " # Check if the exact page was found or suggest similar items\n", " # when
is detected, it means the entity page is not found on wikipedia\n", " result_divs = soup.find_all(\"div\", {\"class\": \"mw-search-result-heading\"})\n", - " \n", - " if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities\n", + "\n", + " if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities\n", " # get Similar results\n", " similar_titles = [div.a.get_text() for div in result_divs]\n", - " return f\"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}\" # return the top 5 similar titles\n", + " return f\"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}\" # return the top 5 similar titles\n", " else:\n", " # the paper uses page to represent content in

\n", " # Extract xontent\n", - " page_list = [p.get_text().strip() for p in soup.find_all(\"p\") + soup.find_all(\"ul\")]\n", + " page_list = [\n", + " p.get_text().strip() for p in soup.find_all(\"p\") + soup.find_all(\"ul\")\n", + " ]\n", " # TODO: Recursive search, if find any concept that needs more search then call search again\n", " # if any(\"may refer to:\" in p for p in page_list):\n", " # search(entity)\n", "\n", " # restructure & clean the page content following the paper's logic\n", - " page = ''\n", + " page = \"\"\n", " for p in page_list:\n", " if len(p.split(\" \")) > 2:\n", " page += clean_str(p)\n", @@ -208,31 +213,39 @@ " page += \"\\n\"\n", " paragraphs = page.split(\"\\n\")\n", " paragraphs = [p.strip() for p in paragraphs if p.strip()]\n", - " \n", + "\n", " sentences = []\n", " for p in paragraphs:\n", - " sentences += p.split('. ')\n", - " sentences = [s.strip() + '.' for s in sentences if s.strip()]\n", - " \n", + " sentences += p.split(\". \")\n", + " sentences = [s.strip() + \".\" for s in sentences if s.strip()]\n", + "\n", " # return the first 5 sentences\n", " if sentences:\n", - " return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)\n", + " return (\n", + " \" \".join(sentences[:5]) if len(sentences) >= 5 else \" \".join(sentences)\n", + " )\n", " else:\n", " return \"No content found on this page.\"\n", - " \n", + "\n", " # TODO: clean the paragraphs and return the searched content\n", "\n", "\n", "def lookup(text: str, keyword: str) -> str:\n", " \"\"\"\n", - " returns the sentences containing keyword in the current passage.\n", + " returns the sentences containing keyword in the current passage.\n", " \"\"\"\n", - " sentences = text.split('.')\n", - " matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]\n", + " sentences = text.split(\".\")\n", + " matching_sentences = [\n", + " sentence.strip() + \".\"\n", + " for sentence in sentences\n", + " if keyword.lower() in sentence.lower()\n", + " ]\n", " if not matching_sentences:\n", " return \"No sentences found with the keyword.\"\n", " else:\n", - " return ' '.join(matching_sentences) # Join all matching sentences into a single string" + " return \" \".join(\n", + " matching_sentences\n", + " ) # Join all matching sentences into a single string" ] }, { @@ -262,7 +275,7 @@ "outputs": [], "source": [ "examples = [\n", - "\"\"\"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\n", + " \"\"\"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\n", "Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\n", "Action 1: search(\"Colorado orogeny\")\n", "Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\n", @@ -277,7 +290,7 @@ "Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\n", "Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\n", "Action 5: finish(\"1,800 to 7,000 ft\")\"\"\",\n", - "\"\"\"Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\n", + " \"\"\"Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\n", "Thought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\n", "Action 1: search(\"Milhouse\")\n", "Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\n", @@ -286,7 +299,7 @@ "Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \n", "Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\n", "Action 3: finish(\"Richard Nixon\")\"\"\",\n", - "\"\"\"Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\n", + " \"\"\"Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\n", "Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\n", "Action 1: search(\"Adam Clayton Powell\")\n", "Observation 1: Could not find [\"Adam Clayton Powell\"]. Similar: ['Adam Clayton Powell III', 'Seventh Avenue (Manhattan)', 'Adam Clayton Powell Jr. State Office Building', 'Isabel Washington Powell', 'Adam Powell', 'Adam Clayton Powell (film)', 'Giancarlo Esposito'].\n", @@ -296,7 +309,7 @@ "The film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\n", "Thought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\n", "Action 3: finish(\"The Saimaa Gesture\")\"\"\",\n", - "\"\"\"Question: What profession does Nicholas Ray and Elia Kazan have in common?\n", + " \"\"\"Question: What profession does Nicholas Ray and Elia Kazan have in common?\n", "Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\n", "Action 1: search(\"Nicholas Ray\")\n", "Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 – June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\n", @@ -305,7 +318,7 @@ "Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\n", "Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\n", "Action 3: finish(\"director, screenwriter, actor\")\"\"\",\n", - "\"\"\"Question: Which magazine was started first Arthur's Magazine or First for Women?\n", + " \"\"\"Question: Which magazine was started first Arthur's Magazine or First for Women?\n", "Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.\n", "Action 1: search(\"Arthur's Magazine\")\n", "Observation 1: Arthur's Magazine (1844-€“1846) was an American literary periodical published in Philadelphia in the 19th century. \n", @@ -314,7 +327,7 @@ "Observation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \n", "Thought 3: First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\n", "Action 3: finish(\"Arthur's Magazine\")\"\"\",\n", - "\"\"\"Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\n", + " \"\"\"Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\n", "Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\n", "Action 1: search(\"Pavel Urysohn\")\n", "Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 – August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\n", @@ -322,7 +335,7 @@ "Action 2: search(\"Leonid Levin\")\n", "Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \n", "Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \n", - "Action 3: finish(\"yes\")\"\"\"\n", + "Action 3: finish(\"yes\")\"\"\",\n", "]" ] }, @@ -334,7 +347,7 @@ "source": [ "# preset up the examples as prompt_kwargs, the examples will be included in the system prompt\n", "\n", - "preset_prompt_kwargs = {\"examples\": examples} " + "preset_prompt_kwargs = {\"examples\": examples}" ] }, { @@ -353,8 +366,8 @@ "outputs": [], "source": [ "gpt_model_kwargs = {\n", - " \"model\": \"gpt-3.5-turbo\",\n", - " \"temperature\": 0.0,\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"temperature\": 0.0,\n", "}" ] }, @@ -517,8 +530,11 @@ "# max_steps refers to how many thought-action round we allow the model to perform\n", "# to save resources, let's use 3 here\n", "agent = ReActAgent(\n", - " tools=tools, max_steps=3, model_client=OpenAIClient(),\n", - " model_kwargs=gpt_model_kwargs, preset_prompt_kwargs=preset_prompt_kwargs\n", + " tools=tools,\n", + " max_steps=3,\n", + " model_client=OpenAIClient(),\n", + " model_kwargs=gpt_model_kwargs,\n", + " preset_prompt_kwargs=preset_prompt_kwargs,\n", ")\n", "agent" ] @@ -542,7 +558,8 @@ "source": [ "import importlib\n", "import adalflow\n", - "importlib.reload(adalflow)\n" + "\n", + "importlib.reload(adalflow)" ] }, { @@ -681,13 +698,18 @@ "num_questions = 5\n", "for i in range(num_questions):\n", " question = val_dataset[i][\"question\"]\n", - " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", - " \n", + " gt_answer = normalize_answer(\n", + " val_dataset[i][\"answer\"]\n", + " ) # normalize the ground truth answer\n", + "\n", " # get the agent's response\n", " pred_answer = agent(question)\n", " pred_answer = normalize_answer(pred_answer)\n", - " \n", - " printc(f\"question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n" + "\n", + " printc(\n", + " f\"question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n", + " color=\"yellow\",\n", + " )" ] }, { @@ -995,8 +1017,11 @@ "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n", "\n", "agent = ReActAgent(\n", - " tools=tools, max_steps=7, model_client=OpenAIClient(),\n", - " model_kwargs=gpt_model_kwargs, preset_prompt_kwargs=preset_prompt_kwargs\n", + " tools=tools,\n", + " max_steps=7,\n", + " model_client=OpenAIClient(),\n", + " model_kwargs=gpt_model_kwargs,\n", + " preset_prompt_kwargs=preset_prompt_kwargs,\n", ")\n", "\n", "num_questions = 10\n", @@ -1005,18 +1030,23 @@ "start_time = time.time()\n", "for i in range(num_questions):\n", " question = val_dataset[i][\"question\"]\n", - " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", + " gt_answer = normalize_answer(\n", + " val_dataset[i][\"answer\"]\n", + " ) # normalize the ground truth answer\n", " gt_answers.append(gt_answer)\n", - " \n", + "\n", " # get the agent's response\n", " pred_answer = agent(question)\n", " pred_answer = normalize_answer(pred_answer)\n", " pred_answers.append(pred_answer)\n", - " \n", - " printc(f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n", + "\n", + " printc(\n", + " f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n", + " color=\"yellow\",\n", + " )\n", "\n", "end_time = time.time()\n", - " \n", + "\n", "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", "avg_time = (end_time - start_time) / num_questions\n", @@ -1262,8 +1292,7 @@ "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n", "\n", "agent = ReActAgent(\n", - " max_steps=7, model_client=OpenAIClient(),\n", - " model_kwargs=gpt_model_kwargs\n", + " max_steps=7, model_client=OpenAIClient(), model_kwargs=gpt_model_kwargs\n", ")\n", "\n", "num_questions = 10\n", @@ -1272,18 +1301,23 @@ "start_time = time.time()\n", "for i in range(num_questions):\n", " question = val_dataset[i][\"question\"]\n", - " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", + " gt_answer = normalize_answer(\n", + " val_dataset[i][\"answer\"]\n", + " ) # normalize the ground truth answer\n", " gt_answers.append(gt_answer)\n", - " \n", + "\n", " # get the agent's response\n", " pred_answer = agent(question)\n", " pred_answer = normalize_answer(pred_answer)\n", " pred_answers.append(pred_answer)\n", - " \n", - " printc(f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n", + "\n", + " printc(\n", + " f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\",\n", + " color=\"yellow\",\n", + " )\n", "\n", "end_time = time.time()\n", - " \n", + "\n", "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", "avg_time = (end_time - start_time) / num_questions\n", diff --git a/use_cases/classification/trec_task_string_output.py b/use_cases/classification/trec_task_string_output.py index 98a42f91..48186f4f 100644 --- a/use_cases/classification/trec_task_string_output.py +++ b/use_cases/classification/trec_task_string_output.py @@ -34,7 +34,6 @@ def extract_class_index_value(text: str, get_feedback=False): class TRECClassifierStringOutput(adal.Component): - def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict): super().__init__() @@ -91,7 +90,6 @@ def call( if __name__ == "__main__": - from benchmarks.config import gpt_3_model, load_model from use_cases.classification.data import load_datasets diff --git a/use_cases/classification/trec_task_structured_output.py b/use_cases/classification/trec_task_structured_output.py index eb5333cd..6b9d751c 100644 --- a/use_cases/classification/trec_task_structured_output.py +++ b/use_cases/classification/trec_task_structured_output.py @@ -30,7 +30,6 @@ class TRECClassifierStructuredOutput(adal.Component): - def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict): super().__init__() @@ -112,7 +111,6 @@ def call( if __name__ == "__main__": - from benchmarks.config import gpt_3_model, load_model from use_cases.classification.data import load_datasets diff --git a/use_cases/generator/intermediate.ipynb b/use_cases/generator/intermediate.ipynb index 80f8cff8..fb4dc8a8 100644 --- a/use_cases/generator/intermediate.ipynb +++ b/use_cases/generator/intermediate.ipynb @@ -30,7 +30,10 @@ "from adalflow.core import Component, Generator, Sequential\n", "from adalflow.components.model_client import OpenAIClient\n", "from adalflow.components.model_client import GroqAPIClient\n", - "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY\n", + "from adalflow.utils import (\n", + " setup_env,\n", + ") # make sure you have a .env file with OPENAI_API_KEY and GROQ_API_KEY\n", + "\n", "setup_env(\".env\")" ] }, @@ -72,6 +75,7 @@ "source": [ "# Let's turn on the library log to help with debugging.\n", "from adalflow.utils import get_logger\n", + "\n", "get_logger()" ] }, @@ -248,6 +252,8 @@ "# Router component\n", "\n", "from typing import Dict\n", + "\n", + "\n", "class Router(Component):\n", " def __init__(self, choices: Dict[str, str] = {}):\n", " super().__init__()\n", @@ -260,9 +266,9 @@ "\n", " def call(self, query: str) -> str:\n", " prompt_kwargs = {\"input_str\": query, \"choices\": self.choices}\n", - " choice = self.router(prompt_kwargs=prompt_kwargs).data\n", + " choice = self.router(prompt_kwargs=prompt_kwargs).data\n", " return {\"choice\": choice, \"query\": query}\n", - " \n", + "\n", " def _extra_repr(self):\n", " return f\"Choices: {self.choices}, \"" ] @@ -329,6 +335,7 @@ "source": [ "# the second chat component with two generators\n", "\n", + "\n", "class Chat(Component):\n", " def __init__(self):\n", " super().__init__()\n", @@ -342,6 +349,7 @@ " model_client=GroqAPIClient(),\n", " model_kwargs={\"model\": \"llama3-8b-8192\"},\n", " )\n", + "\n", " # to chain together just to make sure the output can be directly passed to the next as input\n", " def call(self, input: Dict[str, str]) -> Dict[str, str]:\n", " choice = input.get(\"choice\", None)\n", @@ -412,7 +420,9 @@ "class QAWithRouter(Component):\n", " def __init__(self):\n", " super().__init__()\n", - " self.router = Router(choices={\"doctor\": \"Doctor\", \"lawyer\": \"Lawyer\", \"other\": \"Other\"})\n", + " self.router = Router(\n", + " choices={\"doctor\": \"Doctor\", \"lawyer\": \"Lawyer\", \"other\": \"Other\"}\n", + " )\n", " self.chat = Chat()\n", " self.pipeline = Sequential(self.router, self.chat)\n", "\n", diff --git a/use_cases/question_answering/bbh/object_count/task.py b/use_cases/question_answering/bbh/object_count/task.py index 5aebb47b..d0e3a1c7 100644 --- a/use_cases/question_answering/bbh/object_count/task.py +++ b/use_cases/question_answering/bbh/object_count/task.py @@ -82,7 +82,6 @@ def test_object_count_task(): if __name__ == "__main__": - # task = ObjectCountTask(**gpt_3_model) # task_original = ObjectCountTaskOriginal(**gpt_3_model) diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py index 280f7c1a..8f5b7a38 100644 --- a/use_cases/question_answering/bbh/object_count/train_new.py +++ b/use_cases/question_answering/bbh/object_count/train_new.py @@ -83,7 +83,6 @@ def train_diagnose_teacher( model_client: adal.ModelClient, model_kwargs: Dict, ) -> Dict: - trainset, valset, testset = load_datasets() adal_component = ObjectCountAdalComponent(model_client, model_kwargs) @@ -139,7 +138,6 @@ def train( if __name__ == "__main__": - train( debug=True, max_steps=12, diff --git a/use_cases/question_answering/bbh/word_sorting/diagnose.py b/use_cases/question_answering/bbh/word_sorting/diagnose.py index 2faa9e76..df251392 100644 --- a/use_cases/question_answering/bbh/word_sorting/diagnose.py +++ b/use_cases/question_answering/bbh/word_sorting/diagnose.py @@ -55,7 +55,6 @@ def evaluate_one_sample( def evaluate_one_sample(): - trainset, valset, testset = load_datasets(task_name="BBH_word_sorting") adal_component = WordSortingAdalComponent( **gpt_3_model, llm_judge_model_config=gpt_3_model @@ -73,7 +72,6 @@ def diagnose( model_client: adal.ModelClient, model_kwargs: Dict, ) -> Dict: - trainset, valset, testset = load_datasets(task_name="BBH_word_sorting") adal_component = WordSortingAdalComponent( model_client, model_kwargs, llm_judge_model_config=gpt_3_model diff --git a/use_cases/question_answering/bbh/word_sorting/task.py b/use_cases/question_answering/bbh/word_sorting/task.py index ace7d1e6..13ceb5c3 100644 --- a/use_cases/question_answering/bbh/word_sorting/task.py +++ b/use_cases/question_answering/bbh/word_sorting/task.py @@ -88,7 +88,6 @@ def test_word_sorting_task(): if __name__ == "__main__": - # task = ObjectCountTask(**gpt_3_model) # task_original = ObjectCountTaskOriginal(**gpt_3_model) diff --git a/use_cases/question_answering/bbh/word_sorting/train.py b/use_cases/question_answering/bbh/word_sorting/train.py index 4d1af9e3..d560fbe2 100644 --- a/use_cases/question_answering/bbh/word_sorting/train.py +++ b/use_cases/question_answering/bbh/word_sorting/train.py @@ -159,7 +159,6 @@ def train( if __name__ == "__main__": - train( debug=False, max_steps=10, diff --git a/use_cases/question_answering/bbh/word_sorting/train_paper.py b/use_cases/question_answering/bbh/word_sorting/train_paper.py index 00a84830..206f86c0 100644 --- a/use_cases/question_answering/bbh/word_sorting/train_paper.py +++ b/use_cases/question_answering/bbh/word_sorting/train_paper.py @@ -153,7 +153,6 @@ def train( if __name__ == "__main__": - train( debug=False, max_steps=10, diff --git a/use_cases/question_answering/chatbot.ipynb b/use_cases/question_answering/chatbot.ipynb index 3db858a4..7ed71347 100644 --- a/use_cases/question_answering/chatbot.ipynb +++ b/use_cases/question_answering/chatbot.ipynb @@ -21,6 +21,7 @@ "outputs": [], "source": [ "from IPython.display import clear_output\n", + "\n", "!pip install -U adalflow[openai,groq,faiss-cpu]\n", "clear_output()" ] @@ -37,7 +38,9 @@ "from adalflow.core.component import Component\n", "from adalflow.core.generator import Generator\n", "from adalflow.components.memory.memory import Memory\n", - "from adalflow.components.model_client import OpenAIClient # Here, we use the OpenAIClient as an example, but you can use any other clients (with the corresponding API Key as needed), such as AnthropicAPIClient" + "from adalflow.components.model_client import (\n", + " OpenAIClient,\n", + ") # Here, we use the OpenAIClient as an example, but you can use any other clients (with the corresponding API Key as needed), such as AnthropicAPIClient" ] }, { @@ -49,7 +52,7 @@ "# Prompt user to enter their API keys securely\n", "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", "# Set environment variables\n", - "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ[\"OPENAI_API_KEY\"] = openai_api_key\n", "# Replace with your OpenAI API Key, or you can put it in a .env file" ] }, @@ -64,11 +67,10 @@ " def __init__(self):\n", " super().__init__()\n", " self.generator = Generator(\n", - " model_client=OpenAIClient(),\n", - " model_kwargs={'model': 'gpt-4o-mini'}\n", + " model_client=OpenAIClient(), model_kwargs={\"model\": \"gpt-4o-mini\"}\n", " )\n", - " self.chat_history = Memory() # Memory to store the chat history\n", - " \n", + " self.chat_history = Memory() # Memory to store the chat history\n", + "\n", " def call(self) -> str:\n", " print(\"Welcome to the ChatBot. Type anything to chat. Type 'exit' to end.\")\n", " while True:\n", @@ -90,6 +92,7 @@ " )\n", " print(f\"ChatBot: {response}\")\n", "\n", + "\n", "chatbot = ChatBot()\n", "print(chatbot)" ] diff --git a/use_cases/question_answering/simple_qa.ipynb b/use_cases/question_answering/simple_qa.ipynb index 67dc9b04..cec10d76 100644 --- a/use_cases/question_answering/simple_qa.ipynb +++ b/use_cases/question_answering/simple_qa.ipynb @@ -32,7 +32,10 @@ "outputs": [], "source": [ "# Here, we use the OpenAIClient as an example, but you can use any other clients (with the corresponding API Key as needed), such as AnthropicAPIClient\n", - "from adalflow.utils import setup_env # make sure you have a .env file with OPENAI_API_KEY or any other key mentioned with respect to your usage\n", + "from adalflow.utils import (\n", + " setup_env,\n", + ") # make sure you have a .env file with OPENAI_API_KEY or any other key mentioned with respect to your usage\n", + "\n", "setup_env(\".env\")\n", "from adalflow.components.model_client import OpenAIClient" ] @@ -119,12 +122,12 @@ " def __init__(self):\n", " super().__init__()\n", " self.generator = Generator(\n", - " model_client=OpenAIClient(),\n", - " model_kwargs={'model': 'gpt-3.5-turbo'}\n", + " model_client=OpenAIClient(), model_kwargs={\"model\": \"gpt-3.5-turbo\"}\n", " )\n", "\n", " def call(self, query: str):\n", - " return self.generator.call(prompt_kwargs={'input_str': query})\n", + " return self.generator.call(prompt_kwargs={\"input_str\": query})\n", + "\n", "\n", "simple_qa = SimpleQA()\n", "print(simple_qa)" diff --git a/use_cases/rag/build/rag.py b/use_cases/rag/build/rag.py index 03e2b58f..2c16e9bb 100644 --- a/use_cases/rag/build/rag.py +++ b/use_cases/rag/build/rag.py @@ -99,7 +99,6 @@ def prepare_database_with_index( class RAG(Component): - def __init__( self, index_file: str = "index.faiss", diff --git a/use_cases/rag/rag_with_eval.py b/use_cases/rag/rag_with_eval.py index 1e05a116..45ba2ce2 100644 --- a/use_cases/rag/rag_with_eval.py +++ b/use_cases/rag/rag_with_eval.py @@ -60,7 +60,6 @@ def add_all_documents_to_rag_db(rag): if __name__ == "__main__": - rag = RAG(index_file="hotpot_qa_index.faiss") # add_all_documents_to_rag_db(rag) print(rag.transformed_docs) diff --git a/use_cases/unsorted/rag_optimized.py b/use_cases/unsorted/rag_optimized.py index 25f11fc0..197711b9 100644 --- a/use_cases/unsorted/rag_optimized.py +++ b/use_cases/unsorted/rag_optimized.py @@ -31,7 +31,6 @@ # TODO: RAG can potentially be a component itsefl and be provided to the users class RAG(Component): - def __init__(self, settings: dict): super().__init__() self.vectorizer_settings = settings["vectorizer"] diff --git a/use_cases/unsorted/rag_yaml_config.py b/use_cases/unsorted/rag_yaml_config.py index 6b465ce1..cdcd0767 100644 --- a/use_cases/unsorted/rag_yaml_config.py +++ b/use_cases/unsorted/rag_yaml_config.py @@ -32,7 +32,6 @@ # TODO: RAG can potentially be a component itsefl and be provided to the users class RAG(Component): - def __init__(self, settings: dict): super().__init__() self.vectorizer_settings = settings["vectorizer"] diff --git a/use_cases/unsorted/simple_qa.py b/use_cases/unsorted/simple_qa.py index a6816ec3..086290b4 100644 --- a/use_cases/unsorted/simple_qa.py +++ b/use_cases/unsorted/simple_qa.py @@ -18,13 +18,10 @@ def __init__( ): super().__init__() if provider == "openai": - model_client = OpenAIClient() elif provider == "groq": - model_client = GroqAPIClient() elif provider == "anthropic": - model_client = AnthropicAPIClient() else: raise ValueError(f"Unknown provider: {provider}") diff --git a/use_cases/unsorted/simple_rag_bm_25.py b/use_cases/unsorted/simple_rag_bm_25.py index 570c9a08..460bd94f 100644 --- a/use_cases/unsorted/simple_rag_bm_25.py +++ b/use_cases/unsorted/simple_rag_bm_25.py @@ -16,7 +16,6 @@ # TODO: RAG can potentially be a component itsefl and be provided to the users class RAG(Component): - def __init__(self): super().__init__() From d561a9cac3b2adf588a88338d4812866a4a0eb32 Mon Sep 17 00:00:00 2001 From: lu-ny Date: Tue, 10 Dec 2024 18:06:01 -0500 Subject: [PATCH 2/3] removed a block of whitespace in precommit yaml --- .pre-commit-config.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 02e712d6..fa2222b1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,8 +16,6 @@ repos: args: ['--config=pyproject.toml'] exclude: ^docs/|.*\.(json|yaml|md|txt)$ - - - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.8.2 hooks: From 7fc648183a18d82ce02737790c5df760bd216f5e Mon Sep 17 00:00:00 2001 From: lu-ny Date: Tue, 10 Dec 2024 19:02:55 -0500 Subject: [PATCH 3/3] removed formatting tools from make format to avoid formatting redundancy, we should have one source-of-truth for formatting, which is the pre-commit --- .pre-commit-config.yaml | 11 ++++------- Makefile | 1 - .../adalflow/components/retriever/bm25_retriever.py | 4 +++- adalflow/adalflow/core/component.py | 4 +++- adalflow/adalflow/optim/_llm_optimizer.py | 6 +++--- .../adalflow/optim/few_shot/bootstrap_optimizer.py | 3 ++- adalflow/adalflow/optim/parameter.py | 6 +++--- adalflow/adalflow/optim/trainer/trainer.py | 3 ++- adalflow/adalflow/tracing/generator_state_logger.py | 6 +++--- pyproject.toml | 6 ------ 10 files changed, 23 insertions(+), 27 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fa2222b1..bf4be365 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,19 +23,16 @@ repos: - id: ruff args: ['--fix', '--config=pyproject.toml'] exclude: ^docs/|.*\.(json|yaml|md|txt)$ - - id: ruff-format - args: ['--config=pyproject.toml'] - exclude: ^docs/|.*\.(json|yaml|md|txt)$ # stage files after ruff - repo: local hooks: - - id: git-add - name: git-add - entry: git add + - id: run-make-format + name: Run Make Format + entry: make format language: system stages: [commit] - pass_filenames: true + pass_filenames: false # - repo: https://github.com/pycqa/flake8 # rev: 4.0.1 diff --git a/Makefile b/Makefile index 01f5c64a..c95043b8 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,6 @@ setup: format: $(PYTHON) black $(SRC_DIR) --config pyproject.toml $(PYTHON) ruff check --fix $(SRC_DIR) - $(PYTHON) ruff format $(SRC_DIR) # remove git ls-files | xargs pre-commit run black --files, causes a circular dependency # Run lint checks using Ruff diff --git a/adalflow/adalflow/components/retriever/bm25_retriever.py b/adalflow/adalflow/components/retriever/bm25_retriever.py index 5e4badb1..ab7f3fea 100644 --- a/adalflow/adalflow/components/retriever/bm25_retriever.py +++ b/adalflow/adalflow/components/retriever/bm25_retriever.py @@ -232,7 +232,9 @@ def _initialize(self, corpus: List[List[str]]): def _calc_idf(self): idf_sum = 0 - negative_idf = [] # idf can be negative if word is too common: more than half of the documents + negative_idf = ( + [] + ) # idf can be negative if word is too common: more than half of the documents self.idf: Dict[str, float] = {} for token, freq in self.nd.items(): idf = math.log(self.total_documents - freq + 0.5) - math.log(freq + 0.5) diff --git a/adalflow/adalflow/core/component.py b/adalflow/adalflow/core/component.py index 49c75c6c..3d3f689d 100644 --- a/adalflow/adalflow/core/component.py +++ b/adalflow/adalflow/core/component.py @@ -138,7 +138,9 @@ def call(self, query: str) -> str: training: bool teacher_mode: bool = False tracing: bool = False - name: str = "Component" # name will help with GradComponent output naming as "{name}_output" + name: str = ( + "Component" # name will help with GradComponent output naming as "{name}_output" + ) _component_type = "base" # def _generate_unique_name(self): diff --git a/adalflow/adalflow/optim/_llm_optimizer.py b/adalflow/adalflow/optim/_llm_optimizer.py index 0ba093b1..3f321e92 100644 --- a/adalflow/adalflow/optim/_llm_optimizer.py +++ b/adalflow/adalflow/optim/_llm_optimizer.py @@ -114,9 +114,9 @@ def __init__( # Ensure the temperature is at least 1 model_kwargs["temperature"] = max(1, model_kwargs.get("temperature", 1)) - self.instruction_history: List[ - Instruction - ] = [] # trace the history of the instructions + self.instruction_history: List[Instruction] = ( + [] + ) # trace the history of the instructions self.starter_instruction: Optional[str] = None if self.instruction_parameter.data is not None: self.starter_instruction = self.instruction_parameter.data diff --git a/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py b/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py index a088f535..eeec61c8 100644 --- a/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py +++ b/adalflow/adalflow/optim/few_shot/bootstrap_optimizer.py @@ -139,7 +139,8 @@ def sample( ) # if demo.id in demos and demos[demo.id].score is not None: w = ( - w - student_demo_score + w + - student_demo_score # w - demos[demo.id].score ) # assign higher weights to failed demos but successful in augmented if w < 0: diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py index a12fe00a..e5241939 100644 --- a/adalflow/adalflow/optim/parameter.py +++ b/adalflow/adalflow/optim/parameter.py @@ -170,9 +170,9 @@ def __init__( self._score: float = score # end to end evaluation score self._student_traces: Dict[str, DataClass] = {} # id - self._demos: List[ - DataClass - ] = [] # used for the optimizer to save the proposed demos + self._demos: List[DataClass] = ( + [] + ) # used for the optimizer to save the proposed demos self._previous_demos: List[DataClass] = [] self.eval_input = eval_input diff --git a/adalflow/adalflow/optim/trainer/trainer.py b/adalflow/adalflow/optim/trainer/trainer.py index 343a973d..03127131 100644 --- a/adalflow/adalflow/optim/trainer/trainer.py +++ b/adalflow/adalflow/optim/trainer/trainer.py @@ -1517,7 +1517,8 @@ def _downsample_move_batch( error_indices = [i for i, score in enumerate(acc_score_list) if score <= 0.5] if ( - len(error_indices) + len(correct_indices) <= max_moving_batch_size + len(error_indices) + len(correct_indices) + <= max_moving_batch_size # and len(correct_indices) <= max_moving_batch_size ): return all_samples, all_losses, all_y_preds, acc_score_list diff --git a/adalflow/adalflow/tracing/generator_state_logger.py b/adalflow/adalflow/tracing/generator_state_logger.py index 4e64086a..6a86cd0c 100644 --- a/adalflow/adalflow/tracing/generator_state_logger.py +++ b/adalflow/adalflow/tracing/generator_state_logger.py @@ -59,9 +59,9 @@ def __init__( self.filename = filename or "generator_state_trace.json" self.filepath = os.path.join(self.filepath, self.filename) - self._trace_map: Dict[ - str, List[GeneratorStatesRecord] - ] = {} # generator_name: [prompt_states] + self._trace_map: Dict[str, List[GeneratorStatesRecord]] = ( + {} + ) # generator_name: [prompt_states] # load previous records if the file exists if os.path.exists(self.filepath): self.load(self.filepath) diff --git a/pyproject.toml b/pyproject.toml index 1ecc0b9c..02f0cba2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,9 +79,3 @@ lint.extend-ignore = [ "UP007", # Wants | over Union, which breaks 3.8 ] exclude = ["docs/*"] - -[tool.ruff.format] -quote-style = "double" -indent-style = "space" -skip-magic-trailing-comma = false -line-ending = 'auto'