From f65c6c5290a7968e1e4c6473be63debce9ea2a48 Mon Sep 17 00:00:00 2001 From: Li Yin Date: Mon, 20 May 2024 01:00:41 -0700 Subject: [PATCH] the intro page for the library and the structure of all documentations --- docs/.gitignore | 0 docs/requirements.txt | 0 docs/source/_static/custom.css | 17 +++ docs/source/conf.py | 6 +- docs/source/developer_notes/prompt.rst | 2 +- docs/source/index.rst | 145 +++++++++++++++++++++---- pyproject.toml | 1 + 7 files changed, 149 insertions(+), 22 deletions(-) create mode 100644 docs/.gitignore create mode 100644 docs/requirements.txt diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 00000000..e69de29b diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css index 79d120a6..6d264fe7 100644 --- a/docs/source/_static/custom.css +++ b/docs/source/_static/custom.css @@ -6,4 +6,21 @@ background-color: #f0f0f0 !important; /* Light gray color */ color: #000000 !important; /* Sets text color to black */ +} +.half-width { + width: 47%; + float: left; + padding: 2%; + box-sizing: border-box; +} + +.code-comparison { + /* display: flex; */ + width: 100%; + justify-content: space-between; +} + +.caption-text { + font-size: 14px; /* Sets the smallest font size */ + color: gray; /* Sets the color to gray */ } \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index c9ad7d0a..d916e65d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,6 +13,7 @@ import os import sys + # sys.path.insert(0, os.path.abspath(".")) # sys.path.insert(0, os.path.abspath("..")) # sys.path.insert(0, os.path.abspath("../..")) @@ -50,6 +51,7 @@ # 'recommonmark', # 'myst_parser' ] + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -65,9 +67,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -# html_theme = 'alabaster' html_theme = "sphinx_rtd_theme" + # These options are for the sphinx_rtd_theme html_theme_options = { "collapse_navigation": False, @@ -76,6 +78,8 @@ "includehidden": True, "titles_only": False, } + + # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". diff --git a/docs/source/developer_notes/prompt.rst b/docs/source/developer_notes/prompt.rst index e420ebaa..0768d926 100644 --- a/docs/source/developer_notes/prompt.rst +++ b/docs/source/developer_notes/prompt.rst @@ -77,7 +77,7 @@ For example, your task is to instruct the llm to choose `top_k` from the given c :linenos: from core.prompt_builder import Prompt - + task_desc_template = r""" Choose the top {{top_k}} from the following choices: {{choices}} """ diff --git a/docs/source/index.rst b/docs/source/index.rst index 0ff6c09e..80d364b1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -9,13 +9,114 @@ LightRAG documentation .. .. image:: ../../images/lightrag_structure.png .. :width: 60% -LightRAG is the "PyTorch" library for building large langage model(LLM) applications. It is light, modular and robust like "PyTorch", with essential components for `Retriever`-`Agent`-`Generator` (RAG). +LightRAG is the "PyTorch" library for building large langage model(LLM) applications. It is super light, modular and robust like "PyTorch", and offers essential components for `Retriever`-`Agent`-`Generator` (RAG). -.. LightRAG: 1000 lines of code are all you need. No lock-in to vendors and frameworks, only the best practices of productionable RAG and Agent. +You have a similar coding experience as PyTorch. Here is a side to side comparison of writing a PyTorch module and a LightRAG component: -What is LightRAG? -================= -LightRAG comes from the best of the AI research and engineering. Fundamentally, we ask ourselves: what kind of system that combines the best of research(such as LLM), engineering (such as 'jinja') to build the best applications? We are not a framework. We do not want you to directly install the package. We want you to carefully decide to take modules and structures from here to build your own library and applications. This is a cookbook organized uniquely for easy understanding: you can read the 1000 lines of code to see a typical RAG end-to-end without jumping between files and going through multi-level class inheritance. If we build our system expanding from light_rag.py, we as a community will share the same RAG languages, and share other building blocks and use cases easily without depending on a complex framework. +#TODO: make it side to side comparison + +**PyTorch:** + +.. code-block:: python + + import torch + import torch.nn as nn + + class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.conv1 = nn.Conv2d(1, 32, 3, 1) + self.conv2 = nn.Conv2d(32, 64, 3, 1) + self.dropout1 = nn.Dropout2d(0.25) + self.dropout2 = nn.Dropout2d(0.5) + self.fc1 = nn.Linear(9216, 128) + self.fc2 = nn.Linear(128, 10) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.dropout1(x) + x = self.dropout2(x) + x = self.fc1(x) + return self.fc2(x) + + my_nn = Net() + print(my_nn) + +**LightRAG:** + +.. code-block:: python + + from core.component import Component + from core.generator import Generator + from components.api_client import OpenAIClient + + class SimpleQA(Component): + def __init__(self): + super().__init__() + self.generator = Generator( + model_client=OpenAIClient, + model_kwargs={'model_name': 'gpt-3.5-turbo'} + ) + + def call(self, query): + return self.generator.call(query) + + async def acall(self, query): + return await self.generator.acall(query) + + qa = SimpleQA() + print(qa) + + +**Why LightRAG?** + + +1. **Clarity and Simplicity** + + We understand that developers building real-world Large Language Model (LLM) applications are the real heroes. Just like AI researchers and engineers who build models on top of PyTorch, developers require **Maximum Flexibility and Customizability**: Each developer has unique data needs to build their own models/components, experiment with In-context Learning (ICL) or model finetuning, and deploy the LLM applications to production. This means the library must provide fundamental lower-level building blocks and strive for clarity and simplicity: + + - We maintain no more than two levels of subclasses. + - Each core abstract class is designed to be robust and flexible. + - We use 10X less code than other libraries to achieve 10X more robustness and flexibility. + + +2. **Control and Transparency** + + Coming from a deep AI research background, we understand that the more control and transparency developers have over their prompts, the better. In default: + + - LightRAG simplifies what developers need to send to LLM proprietary APIs to just two messages each time: a `system message` and a `user message`. This minimizes reliance on and manipulation by API providers. + - LightRAG provides advanced tooling for developers to build `agents`, `tools/function calls`, etc., without relying on any proprietary API provider's 'advanced' features such as `OpenAI` assistant, tools, and JSON format. + +3. **Suitted for Both Researchers and Production Engineers** + + We start with understanding the frontier of AI research and LLM productionalization, and our design is grounded on our understanding of all dataflow in LLM applications, and + the process of + + +**LightRAG vs other LLM libraries:** + + +**LightRAG library structures as follows:** + +#TODO: One diagram to make people understand lightrag faster + +* `core` - Base abstractions, core functions, and core components like `Generator` and `Embedder` to support more advanced components. +* `components` - Components that are built on top of the core directive. Users will install relevant depencides on their own for some components. + + +**LightRAG documentation is divided into two parts:** + +* **Developer Documentation**: This documentation explains how LightRAG is designed in more depth and is especially useful + for developers who want to contribute to LightRAG. + +* **User Documentation**: This documentation is for users who want to use LightRAG to build their applications. + +We encourage all users to at least skim through the developer documentation. Different from "PyTorch" where a normal user does not have to customize a building module for neural network, +LLM applications have much bigger scope and varies even more to different product environments, so developers customizing components on their own is much more common. + +Developer documentation +======================= .. toctree:: :glob: @@ -42,36 +143,40 @@ LightRAG comes from the best of the AI research and engineering. Fundamentally, developer_notes/retriever developer_notes/data_classes + .. toctree:: - :glob: :maxdepth: 1 - :caption: Get Started - - get_started/installation - get_started/introduction + :caption: API Reference + apis/core/core + apis/components/components .. toctree:: + :glob: :maxdepth: 1 - :caption: Tutorials + :caption: Resources + + resources/resources + resources/contributing - tutorials/simpleQA +User documentation +======================= .. toctree:: + :glob: :maxdepth: 1 - :caption: API Reference + :caption: Get Started + + get_started/installation + get_started/introduction - apis/core/core - apis/components/components .. toctree:: - :glob: :maxdepth: 1 - :caption: Resources - - resources/resources - resources/contributing + :caption: Tutorials + + tutorials/simpleQA diff --git a/pyproject.toml b/pyproject.toml index 1db2c216..b8166fe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ datasets = "^2.19.1" sphinx = "^7.3.7" sphinx-rtd-theme = "^2.0.0" + [tool.poetry.group.dev.dependencies] langchain = "^0.1.16" llama-index = "^0.10.30"