Merge branch 'main' into transformers_cache

guidance-ai · Jan 13, 2025 · 2976cbd · 2976cbd
2 parents 8d17370 + 71f1a68
commit 2976cbd
Show file tree

Hide file tree

Showing 146 changed files with 25,098 additions and 2,636 deletions.
diff --git a/.github/workflows/action_gpu_basic_tests.yml b/.github/workflows/action_gpu_basic_tests.yml
@@ -62,7 +62,7 @@ jobs:
           pip install accelerate
           echo "=============================="
           pip uninstall -y llama-cpp-python
-          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84"
+          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84,!=0.3.6"
       - name: Check GPU available
         run: |
           python -c "import torch; assert torch.cuda.is_available()"

diff --git a/.github/workflows/action_plain_basic_tests.yml b/.github/workflows/action_plain_basic_tests.yml
@@ -40,7 +40,7 @@ jobs:
           pip install sentencepiece
           echo "=============================="
           pip uninstall -y llama-cpp-python
-          pip install "llama-cpp-python!=0.2.58,!=0.2.79,!=0.2.84"
+          pip install "llama-cpp-python!=0.2.58,!=0.2.79,!=0.2.84,!=0.3.6"
           echo "=============================="
           pip uninstall -y transformers
           pip install "transformers!=4.43.0,!=4.43.1,!=4.43.2,!=4.43.3" # Issue 965

diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml
@@ -57,7 +57,7 @@ jobs:
       - name: GPU pip installs
         run: |
           pip install accelerate
-          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84"
+          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84,!=0.3.6"
       - name: Check GPU available
         run: |
           python -c "import torch; assert torch.cuda.is_available()"
@@ -153,7 +153,7 @@ jobs:
           echo "======================"
           nvcc --version
           echo "======================"
-          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75"
+          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.3.6"
       - name: Check GPU available
         run: |
           python -c "import torch; assert torch.cuda.is_available()"

diff --git a/.github/workflows/notebook_tests.yml b/.github/workflows/notebook_tests.yml
@@ -60,7 +60,7 @@ jobs:
       - name: GPU pip installs
         run: |
           pip install accelerate
-          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84"
+          CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84,!=0.3.6"
       - name: Check GPU available
         run: |
           python -c "import torch; assert torch.cuda.is_available()"

diff --git a/.github/workflows/pypi_upload.yml b/.github/workflows/pypi_upload.yml
@@ -32,7 +32,7 @@ jobs:
               cibuildwheel --print-build-identifiers --platform linux --archs x86_64 \
               | jq -nRc '{"only": inputs, "os": "ubuntu-latest"}' \
               && cibuildwheel --print-build-identifiers --platform macos --archs x86_64 \
-              | jq -nRc '{"only": inputs, "os": "macos-12"}' \
+              | jq -nRc '{"only": inputs, "os": "macos-14"}' \
               && cibuildwheel --print-build-identifiers --platform macos --archs arm64 \
               | jq -nRc '{"only": inputs, "os": "macos-latest"}' \
               && cibuildwheel --print-build-identifiers --platform windows --archs auto64 \
@@ -143,7 +143,7 @@ jobs:
     needs: [assemble_wheels]
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-12, macos-latest]
+        os: [ubuntu-latest, windows-latest, macos-14, macos-latest]
         python-version: ["3.11", "3.12"]
     runs-on: ${{ matrix.os }}
     steps:

diff --git a/.github/workflows/workflow-pr-gate.yml b/.github/workflows/workflow-pr-gate.yml
@@ -11,36 +11,10 @@ jobs:
   # First Stage =======================================================================
   # Linting and basic CPU-based tests
 
-  linting-black:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      # https://black.readthedocs.io/en/stable/integrations/github_actions.html
-      - uses: psf/black@stable
-        with:
-          options: "--diff" # Remove this to start enforcement
-
-  linting-mypy:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: 3.9
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          python -m pip install .[all,test]
-      - name: Run mypy
-        run: |
-          python -m mypy guidance
-
   bare-install:
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-12]
+        os: [ubuntu-latest, windows-latest, macos-14]
         python-version: ["3.9", "3.10", "3.11", "3.12"]
     runs-on: ${{ matrix.os }}
     steps:
@@ -77,8 +51,6 @@ jobs:
 
   end-stage-1:
     needs:
-      - linting-black
-      - linting-mypy
       - bare-install
       - basic-tests-linux-python-latest
     name: End Stage 1
@@ -158,7 +130,7 @@ jobs:
           - "llamacpp_phi3_mini_4k_instruct_cpu"
     uses: ./.github/workflows/action_plain_basic_tests.yml
     with:
-      os: macos-12
+      os: macos-14
       python-version: ${{ matrix.python-version }}
       model: ${{ matrix.model }}
 

diff --git a/.gitignore b/.gitignore
@@ -2,13 +2,13 @@ notebooks/local_scratch
 __pycache__/
 .vscode
 .vs
+.idea/
 /build
 /dist
 *.egg-info
 *.diskcache
 .ipynb_checkpoints
 node_modules
-/client
 .eggs/
 .env
 .DS_Store

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -12,7 +12,7 @@ We welcome contributions to `guidance`, and this document exists to provide usef
 
 The quickest way to get started is to run (in a fresh environment):
 ```bash
-pip install -e .[all,test]
+pip install -e .[all,test,bench]
 ```
 which should bring in all of the basic required dependencies.
 Note that if you want to use GPU acceleration, then you will need to do whatever is required to allow `torch` and `llama-cpp` to access your GPU too.
@@ -32,16 +32,16 @@ However, if you have your own means of installing Rust and CUDA, you should be a
 
 ## Running Tests
 
-Because we run tests on GPU-equipped machines and also tests which call LLM endpoints, approval is required before our GitHub workflows will run on external Pull Requests.
-To run a basic test suite locally, we suggest:
+To run a basic test suite locally:
 ```bash
-python -m pytest -m "not (needs_credentials or use_gpu or server)" ./tests/
+python -m pytest ./tests/
 ```
 which runs our basic test suite.
 Where an LLM is required, this will default to using GPT2 on the CPU.
+
 To change that default, run
 ```bash
-python -m pytest -m "not (needs_credentials or use_gpu or server)" --selected_model <MODELNAME> ./tests/
+python -m pytest --selected_model <MODELNAME> ./tests/
 ```
 where `<MODELNAME>` is taken from the `AVAILABLE_MODELS` dictionary defined in `_llms_for_testing.py`.
 
@@ -68,7 +68,6 @@ If your model requires credentials, then those will need to be added to our GitH
 The endpoint itself (and any other required information) should be configured as environment variables too.
 When the test runs, the environment variables will be set, and can then be used to configure the model as required.
 See `test_azureai_openai.py` for examples of this being done.
-The tests should also be marked as `needs_credentials` - if this is needed for the entire module, then `pytestmark` can be used - see `test_azureai_openai.py` again for this.
 
 The environment variables and secrets will also need to be configured in the `ci_tests.yml` file.
 

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+include resources/graphpaper-inline.html
diff --git a/README.md b/README.md
@@ -21,11 +21,11 @@ Guidance is available through PyPI and supports a variety of backends (Transform
 pip install guidance
 ```
 
-_Note: To use Guidance on Phi models in Azure AI, or to use the new accelerated Rust-based parser, please install the release-candidate v0.2.0 guidance package_:
+_Note: To use Guidance with our new accelerated Rust-based parser, please install the release-candidate v0.2.0 guidance package_:
 ```bash
 pip install guidance --pre
 ```
-For a detailed walkthrough of using Guidance on hosted Phi models, check the [Azure AI specific loading instructions.](#azure-ai) and the [Phi-3 + Guidance cookbook](https://github.com/microsoft/Phi-3CookBook/blob/main/code/01.Introduce/guidance.ipynb).
+<!-- For a detailed walkthrough of using Guidance on hosted Phi models, check the [Azure AI specific loading instructions.](#azure-ai) and the [Phi-3 + Guidance cookbook](https://github.com/microsoft/Phi-3CookBook/blob/main/code/01.Introduce/guidance.ipynb). -->
 
 <!-- <a href="https://www.youtube.com/watch?v=9oXjP5IIMzQ"  aria-label="Watch demo"><img alt="Watch demo" src="docs/figures/watch_demo_button.png" width="120"></a> <a href="#get-started" aria-label="Get started"><img alt="Watch demo" src="docs/figures/get_started_button.png" width="120"></a> -->
 
@@ -477,7 +477,7 @@ def calculator_call(lm):
 @guidance
 def calculator(lm):
     expression = lm['tool_args']
-    # You typically don't want to run eval directly for save reasons
+    # You typically don't want to run eval directly for security reasons
     # Here we are guaranteed to only have mathematical expressions
     lm += f' = {eval(expression)}'
     return lm
@@ -489,7 +489,7 @@ lm += gen(max_tokens=30, tools=[calculator_tool], stop='\n\n')
 
 
 ### Gsm8k example
-Notice that the calculator is just called seamlessly during generation. Here is a more realistic exampe of the model solving a gsm8k question:
+Notice that the calculator is just called seamlessly during generation. Here is a more realistic example of the model solving a gsm8k question:
 
 ```python
 @guidance
@@ -669,7 +669,7 @@ from guidance import models
 lm = models.Transformers(model_name_or_path)
 ```
 
-### Azure AI
+<!-- ### Azure AI
 Azure AI is experimenting with a serverside Guidance integration, first available on the Phi-3.5-mini model. To use Guidance with AzureAI, you need to run the pre-release candidate of the `guidance` library (v0.2.0rc1).
 
 ```bash
@@ -689,7 +689,7 @@ phi3_api_key = os.getenv("AZURE_PHI3_KEY")
 lm = AzureGuidance(f"{phi3_url}/guidance#auth={phi3_api_key}") # note the URL structure using the new /guidance endpoint
 ```
 
-Pull the deployment URL and Key from the Azure deployment to instantiate the class. You can now attach _any_ stateless guidance function to the `AzureGuidance` lm, and have it execute in a single API call. Stateless guidance functions executing in the cloud benefit from many key guidance features the same way local models do, including token healing, guidance acceleration, and fine-grained model control. Considerable effort and resources went into preparing this experimental pre-release, so please let us know if you encounter any bugs or have helpful feedback!
+Pull the deployment URL and Key from the Azure deployment to instantiate the class. You can now attach _any_ stateless guidance function to the `AzureGuidance` lm, and have it execute in a single API call. Stateless guidance functions executing in the cloud benefit from many key guidance features the same way local models do, including token healing, guidance acceleration, and fine-grained model control. Considerable effort and resources went into preparing this experimental pre-release, so please let us know if you encounter any bugs or have helpful feedback! -->
 
 ```python
 @guidance(stateless=True) # Note the stateless=True flag in the decorator -- this enables maximal efficiency on the guidance program execution
@@ -714,7 +714,7 @@ character_lm = lm + character_maker(1, 'A nimble fighter', ['axe', 'sword', 'bow
 ```
 
 ### Vertex AI
-Remote endpoints that don't have explicit guidance integration are run "optimistically". This means that all the text that can be forced is given to the model as a prompt (or chat context) and then the model is run in streaming mode without hard constrants (since the remote API doesn't support them). If the model ever violates the contraints then the model stream is stopped and we optionally try it again at that point. This means that all the API-supported control work as expected, and more complex controls/parsing that is not supported by the API work if the model stays consistent with the program.
+Remote endpoints that don't have explicit guidance integration are run "optimistically". This means that all the text that can be forced is given to the model as a prompt (or chat context) and then the model is run in streaming mode without hard constraints (since the remote API doesn't support them). If the model ever violates the contraints then the model stream is stopped and we optionally try it again at that point. This means that all the API-supported control work as expected, and more complex controls/parsing that is not supported by the API work if the model stays consistent with the program.
 ```python
 palm2 = models.VertexAI("text-bison@001")
 

diff --git a/client/graphpaper-inline/.gitignore b/client/graphpaper-inline/.gitignore
@@ -0,0 +1,3 @@
+node_modules/
+build/
+.DS_Store
diff --git a/client/graphpaper-inline/TODO.txt b/client/graphpaper-inline/TODO.txt
@@ -0,0 +1,3 @@
+- Remove CDN font links (googlefonts)
+- Image integration
+- Testing
diff --git a/client/graphpaper-inline/build-to-guidance.sh b/client/graphpaper-inline/build-to-guidance.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+set -x
+
+npm run build
+cp dist/index.html ../../guidance/resources/graphpaper-inline.html
diff --git a/client/graphpaper-inline/dist/.gitignore b/client/graphpaper-inline/dist/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/client/graphpaper-inline/package.json b/client/graphpaper-inline/package.json
@@ -0,0 +1,40 @@
+{
+    "name": "graphpaper",
+    "version": "0.0.1",
+    "scripts": {
+        "build": "rollup -c",
+        "dev": "rollup -c -w",
+        "start": "sirv dist"
+    },
+    "devDependencies": {
+        "@rollup/plugin-commonjs": "^26.0.1",
+        "@rollup/plugin-node-resolve": "^15.2.3",
+        "@rollup/plugin-terser": "^0.4.4",
+        "@rollup/plugin-typescript": "^11.1.6",
+        "@types/d3-scale": "^4.0.8",
+        "@types/d3-scale-chromatic": "^3.0.3",
+        "@types/dompurify": "^3.0.5",
+        "autoprefixer": "^10.4.20",
+        "cssnano": "^7.0.5",
+        "postcss": "^8.4.41",
+        "rollup": "^4.21.0",
+        "rollup-plugin-copy": "^3.5.0",
+        "rollup-plugin-html-bundle": "^0.0.3",
+        "rollup-plugin-livereload": "^2.0.5",
+        "rollup-plugin-postcss": "^4.0.2",
+        "rollup-plugin-serve": "^1.1.1",
+        "rollup-plugin-svelte": "^7.2.2",
+        "sirv-cli": "^2.0.2",
+        "svelte": "^4.2.18",
+        "svelte-preprocess": "^6.0.2",
+        "tailwindcss": "^3.4.10",
+        "tslib": "^2.6.3",
+        "typescript": "^5.5.4"
+    },
+    "dependencies": {
+        "d3-interpolate": "^3.0.1",
+        "d3-scale": "^4.0.2",
+        "d3-scale-chromatic": "^3.1.0",
+        "dompurify": "^3.1.7"
+    }
+}