Skip to content

Commit

Permalink
Merge branch 'main' into transformers_cache
Browse files Browse the repository at this point in the history
  • Loading branch information
hudson-ai authored Jan 13, 2025
2 parents 8d17370 + 71f1a68 commit 2976cbd
Show file tree
Hide file tree
Showing 146 changed files with 25,098 additions and 2,636 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/action_gpu_basic_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
pip install accelerate
echo "=============================="
pip uninstall -y llama-cpp-python
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84"
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84,!=0.3.6"
- name: Check GPU available
run: |
python -c "import torch; assert torch.cuda.is_available()"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/action_plain_basic_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
pip install sentencepiece
echo "=============================="
pip uninstall -y llama-cpp-python
pip install "llama-cpp-python!=0.2.58,!=0.2.79,!=0.2.84"
pip install "llama-cpp-python!=0.2.58,!=0.2.79,!=0.2.84,!=0.3.6"
echo "=============================="
pip uninstall -y transformers
pip install "transformers!=4.43.0,!=4.43.1,!=4.43.2,!=4.43.3" # Issue 965
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
- name: GPU pip installs
run: |
pip install accelerate
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84"
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84,!=0.3.6"
- name: Check GPU available
run: |
python -c "import torch; assert torch.cuda.is_available()"
Expand Down Expand Up @@ -153,7 +153,7 @@ jobs:
echo "======================"
nvcc --version
echo "======================"
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75"
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.3.6"
- name: Check GPU available
run: |
python -c "import torch; assert torch.cuda.is_available()"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/notebook_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
- name: GPU pip installs
run: |
pip install accelerate
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84"
CMAKE_ARGS="-DGGML_CUDA=on" pip install "llama-cpp-python!=0.2.58,!=0.2.75,!=0.2.84,!=0.3.6"
- name: Check GPU available
run: |
python -c "import torch; assert torch.cuda.is_available()"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pypi_upload.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
cibuildwheel --print-build-identifiers --platform linux --archs x86_64 \
| jq -nRc '{"only": inputs, "os": "ubuntu-latest"}' \
&& cibuildwheel --print-build-identifiers --platform macos --archs x86_64 \
| jq -nRc '{"only": inputs, "os": "macos-12"}' \
| jq -nRc '{"only": inputs, "os": "macos-14"}' \
&& cibuildwheel --print-build-identifiers --platform macos --archs arm64 \
| jq -nRc '{"only": inputs, "os": "macos-latest"}' \
&& cibuildwheel --print-build-identifiers --platform windows --archs auto64 \
Expand Down Expand Up @@ -143,7 +143,7 @@ jobs:
needs: [assemble_wheels]
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-12, macos-latest]
os: [ubuntu-latest, windows-latest, macos-14, macos-latest]
python-version: ["3.11", "3.12"]
runs-on: ${{ matrix.os }}
steps:
Expand Down
32 changes: 2 additions & 30 deletions .github/workflows/workflow-pr-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,10 @@ jobs:
# First Stage =======================================================================
# Linting and basic CPU-based tests

linting-black:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
# https://black.readthedocs.io/en/stable/integrations/github_actions.html
- uses: psf/black@stable
with:
options: "--diff" # Remove this to start enforcement

linting-mypy:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .[all,test]
- name: Run mypy
run: |
python -m mypy guidance
bare-install:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-12]
os: [ubuntu-latest, windows-latest, macos-14]
python-version: ["3.9", "3.10", "3.11", "3.12"]
runs-on: ${{ matrix.os }}
steps:
Expand Down Expand Up @@ -77,8 +51,6 @@ jobs:

end-stage-1:
needs:
- linting-black
- linting-mypy
- bare-install
- basic-tests-linux-python-latest
name: End Stage 1
Expand Down Expand Up @@ -158,7 +130,7 @@ jobs:
- "llamacpp_phi3_mini_4k_instruct_cpu"
uses: ./.github/workflows/action_plain_basic_tests.yml
with:
os: macos-12
os: macos-14
python-version: ${{ matrix.python-version }}
model: ${{ matrix.model }}

Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ notebooks/local_scratch
__pycache__/
.vscode
.vs
.idea/
/build
/dist
*.egg-info
*.diskcache
.ipynb_checkpoints
node_modules
/client
.eggs/
.env
.DS_Store
Expand Down
11 changes: 5 additions & 6 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ We welcome contributions to `guidance`, and this document exists to provide usef

The quickest way to get started is to run (in a fresh environment):
```bash
pip install -e .[all,test]
pip install -e .[all,test,bench]
```
which should bring in all of the basic required dependencies.
Note that if you want to use GPU acceleration, then you will need to do whatever is required to allow `torch` and `llama-cpp` to access your GPU too.
Expand All @@ -32,16 +32,16 @@ However, if you have your own means of installing Rust and CUDA, you should be a

## Running Tests

Because we run tests on GPU-equipped machines and also tests which call LLM endpoints, approval is required before our GitHub workflows will run on external Pull Requests.
To run a basic test suite locally, we suggest:
To run a basic test suite locally:
```bash
python -m pytest -m "not (needs_credentials or use_gpu or server)" ./tests/
python -m pytest ./tests/
```
which runs our basic test suite.
Where an LLM is required, this will default to using GPT2 on the CPU.

To change that default, run
```bash
python -m pytest -m "not (needs_credentials or use_gpu or server)" --selected_model <MODELNAME> ./tests/
python -m pytest --selected_model <MODELNAME> ./tests/
```
where `<MODELNAME>` is taken from the `AVAILABLE_MODELS` dictionary defined in `_llms_for_testing.py`.

Expand All @@ -68,7 +68,6 @@ If your model requires credentials, then those will need to be added to our GitH
The endpoint itself (and any other required information) should be configured as environment variables too.
When the test runs, the environment variables will be set, and can then be used to configure the model as required.
See `test_azureai_openai.py` for examples of this being done.
The tests should also be marked as `needs_credentials` - if this is needed for the entire module, then `pytestmark` can be used - see `test_azureai_openai.py` again for this.

The environment variables and secrets will also need to be configured in the `ci_tests.yml` file.

Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include resources/graphpaper-inline.html
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ Guidance is available through PyPI and supports a variety of backends (Transform
pip install guidance
```

_Note: To use Guidance on Phi models in Azure AI, or to use the new accelerated Rust-based parser, please install the release-candidate v0.2.0 guidance package_:
_Note: To use Guidance with our new accelerated Rust-based parser, please install the release-candidate v0.2.0 guidance package_:
```bash
pip install guidance --pre
```
For a detailed walkthrough of using Guidance on hosted Phi models, check the [Azure AI specific loading instructions.](#azure-ai) and the [Phi-3 + Guidance cookbook](https://github.com/microsoft/Phi-3CookBook/blob/main/code/01.Introduce/guidance.ipynb).
<!-- For a detailed walkthrough of using Guidance on hosted Phi models, check the [Azure AI specific loading instructions.](#azure-ai) and the [Phi-3 + Guidance cookbook](https://github.com/microsoft/Phi-3CookBook/blob/main/code/01.Introduce/guidance.ipynb). -->

<!-- <a href="https://www.youtube.com/watch?v=9oXjP5IIMzQ" aria-label="Watch demo"><img alt="Watch demo" src="docs/figures/watch_demo_button.png" width="120"></a> <a href="#get-started" aria-label="Get started"><img alt="Watch demo" src="docs/figures/get_started_button.png" width="120"></a> -->

Expand Down Expand Up @@ -477,7 +477,7 @@ def calculator_call(lm):
@guidance
def calculator(lm):
expression = lm['tool_args']
# You typically don't want to run eval directly for save reasons
# You typically don't want to run eval directly for security reasons
# Here we are guaranteed to only have mathematical expressions
lm += f' = {eval(expression)}'
return lm
Expand All @@ -489,7 +489,7 @@ lm += gen(max_tokens=30, tools=[calculator_tool], stop='\n\n')


### Gsm8k example
Notice that the calculator is just called seamlessly during generation. Here is a more realistic exampe of the model solving a gsm8k question:
Notice that the calculator is just called seamlessly during generation. Here is a more realistic example of the model solving a gsm8k question:

```python
@guidance
Expand Down Expand Up @@ -669,7 +669,7 @@ from guidance import models
lm = models.Transformers(model_name_or_path)
```

### Azure AI
<!-- ### Azure AI
Azure AI is experimenting with a serverside Guidance integration, first available on the Phi-3.5-mini model. To use Guidance with AzureAI, you need to run the pre-release candidate of the `guidance` library (v0.2.0rc1).
```bash
Expand All @@ -689,7 +689,7 @@ phi3_api_key = os.getenv("AZURE_PHI3_KEY")
lm = AzureGuidance(f"{phi3_url}/guidance#auth={phi3_api_key}") # note the URL structure using the new /guidance endpoint
```
Pull the deployment URL and Key from the Azure deployment to instantiate the class. You can now attach _any_ stateless guidance function to the `AzureGuidance` lm, and have it execute in a single API call. Stateless guidance functions executing in the cloud benefit from many key guidance features the same way local models do, including token healing, guidance acceleration, and fine-grained model control. Considerable effort and resources went into preparing this experimental pre-release, so please let us know if you encounter any bugs or have helpful feedback!
Pull the deployment URL and Key from the Azure deployment to instantiate the class. You can now attach _any_ stateless guidance function to the `AzureGuidance` lm, and have it execute in a single API call. Stateless guidance functions executing in the cloud benefit from many key guidance features the same way local models do, including token healing, guidance acceleration, and fine-grained model control. Considerable effort and resources went into preparing this experimental pre-release, so please let us know if you encounter any bugs or have helpful feedback! -->

```python
@guidance(stateless=True) # Note the stateless=True flag in the decorator -- this enables maximal efficiency on the guidance program execution
Expand All @@ -714,7 +714,7 @@ character_lm = lm + character_maker(1, 'A nimble fighter', ['axe', 'sword', 'bow
```

### Vertex AI
Remote endpoints that don't have explicit guidance integration are run "optimistically". This means that all the text that can be forced is given to the model as a prompt (or chat context) and then the model is run in streaming mode without hard constrants (since the remote API doesn't support them). If the model ever violates the contraints then the model stream is stopped and we optionally try it again at that point. This means that all the API-supported control work as expected, and more complex controls/parsing that is not supported by the API work if the model stays consistent with the program.
Remote endpoints that don't have explicit guidance integration are run "optimistically". This means that all the text that can be forced is given to the model as a prompt (or chat context) and then the model is run in streaming mode without hard constraints (since the remote API doesn't support them). If the model ever violates the contraints then the model stream is stopped and we optionally try it again at that point. This means that all the API-supported control work as expected, and more complex controls/parsing that is not supported by the API work if the model stays consistent with the program.
```python
palm2 = models.VertexAI("text-bison@001")

Expand Down
3 changes: 3 additions & 0 deletions client/graphpaper-inline/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
node_modules/
build/
.DS_Store
3 changes: 3 additions & 0 deletions client/graphpaper-inline/TODO.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- Remove CDN font links (googlefonts)
- Image integration
- Testing
5 changes: 5 additions & 0 deletions client/graphpaper-inline/build-to-guidance.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
set -x

npm run build
cp dist/index.html ../../guidance/resources/graphpaper-inline.html
2 changes: 2 additions & 0 deletions client/graphpaper-inline/dist/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
40 changes: 40 additions & 0 deletions client/graphpaper-inline/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"name": "graphpaper",
"version": "0.0.1",
"scripts": {
"build": "rollup -c",
"dev": "rollup -c -w",
"start": "sirv dist"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^26.0.1",
"@rollup/plugin-node-resolve": "^15.2.3",
"@rollup/plugin-terser": "^0.4.4",
"@rollup/plugin-typescript": "^11.1.6",
"@types/d3-scale": "^4.0.8",
"@types/d3-scale-chromatic": "^3.0.3",
"@types/dompurify": "^3.0.5",
"autoprefixer": "^10.4.20",
"cssnano": "^7.0.5",
"postcss": "^8.4.41",
"rollup": "^4.21.0",
"rollup-plugin-copy": "^3.5.0",
"rollup-plugin-html-bundle": "^0.0.3",
"rollup-plugin-livereload": "^2.0.5",
"rollup-plugin-postcss": "^4.0.2",
"rollup-plugin-serve": "^1.1.1",
"rollup-plugin-svelte": "^7.2.2",
"sirv-cli": "^2.0.2",
"svelte": "^4.2.18",
"svelte-preprocess": "^6.0.2",
"tailwindcss": "^3.4.10",
"tslib": "^2.6.3",
"typescript": "^5.5.4"
},
"dependencies": {
"d3-interpolate": "^3.0.1",
"d3-scale": "^4.0.2",
"d3-scale-chromatic": "^3.1.0",
"dompurify": "^3.1.7"
}
}
Loading

0 comments on commit 2976cbd

Please sign in to comment.