lifeomic · epeters3 · Dec 17, 2024 · Dec 17, 2024
diff --git a/.github/workflows/pr-branch-build.yml b/.github/workflows/pr-branch-build.yml
@@ -11,16 +11,16 @@ jobs:
 
     strategy:
       matrix:
-        python_version: ["3.8", "3.9", "3.10", "3.11"]
+        python_version: ["3.9", "3.10", "3.11"]
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Install poetry
-        run: pipx install poetry==1.5.1
+        run: pipx install poetry==1.8.5
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python_version }}
           cache: "poetry"
@@ -32,5 +32,5 @@ jobs:
           poetry build -f sdist
           poetry build -f wheel
 
-      - if: ${{ matrix.python_version == '3.8' }}
+      - if: ${{ matrix.python_version == '3.9' }}
         run: poetry run poe doc
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -10,15 +10,15 @@ jobs:
     runs-on: ubuntu-latest
     environment: pypi
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Install poetry
-        run: pipx install poetry==1.5.1
+        run: pipx install poetry==1.8.5
 
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v4
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.8" # pdoc works under this version
+          python-version: "3.9" # pdoc works under this version
           cache: "poetry"
 
       - run: poetry install

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,19 @@ and this project adheres to
 
 _(NOTE: All examples use fictitious data or freely available data sets.)_
 
+## [1.0.0] - 2024-12-17
+
+### Removed
+
+- Support for Python 3.8 has been dropped, since 3.8 has reached end-of-life:
+  https://devguide.python.org/versions/
+
+### Changed
+
+- This project now uses `pydantic` v2 instead of v1.
+- **BREAKING**: `PagingApiOptions.dict()` has been renamed to
+  `PagingApiOptions.model_dump()`.
+
 ## [0.36.0] - 2024-11-15
 
 ### Added

diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # PHC SDK for Python
 
-The phc-sdk-py is a developer kit for interfacing with the [PHC API](https://api.docs.lifeomic.com/) on Python 3.7 and above.
+The phc-sdk-py is a developer kit for interfacing with the
+[PHC API](https://api.docs.lifeomic.com/) on Python 3.7 and above.
 
 ## Project Status
 
@@ -15,23 +16,27 @@ The phc-sdk-py is a developer kit for interfacing with the [PHC API](https://api
 
 ### Dependencies
 
-- [Python 3](https://www.python.org/download/releases/3.0/) version >= 3.8
+- [Python 3](https://www.python.org/download/releases/3.0/) version >= 3.9
 
 ### Getting the Source
 
 This project is [hosted on GitHub](https://github.com/lifeomic/phc-sdk-py).
 
 ### Usage
 
-A `Session` needs to be created first that stores the token and account information needed to access the PHC API. One can currently using API Key tokens generated from the PHC Account, or OAuth tokens generated using the [CLI](https://github.com/lifeomic/cli).
+A `Session` needs to be created first that stores the token and account
+information needed to access the PHC API. One can currently using API Key tokens
+generated from the PHC Account, or OAuth tokens generated using the
+[CLI](https://github.com/lifeomic/cli).
 
 ```python
 from phc import Session
 
 session = Session(token=<TOKEN VALUE>, account="myaccount")
 ```
 
-Once a `Session` is created, you can then access the different parts of the platform.
+Once a `Session` is created, you can then access the different parts of the
+platform.
 
 ```python
 from phc.services import Accounts
@@ -42,18 +47,24 @@ myaccounts = accounts.get_list()
 
 ## Contributing
 
-We encourage public contributions! Please review [CONTRIBUTING.md](CONTRIBUTING.md) and [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) for details on our code of conduct and development process.
+We encourage public contributions! Please review
+[CONTRIBUTING.md](CONTRIBUTING.md) and [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)
+for details on our code of conduct and development process.
 
 ## License
 
-This project is licensed under the MIT License - see [LICENSE](LICENSE) file for details.
+This project is licensed under the MIT License - see [LICENSE](LICENSE) file for
+details.
 
 ## Authors
 
-See the list of [contributors](https://github.com/lifeomic/phc-sdk-py/contributors) who participate in this project.
+See the list of
+[contributors](https://github.com/lifeomic/phc-sdk-py/contributors) who
+participate in this project.
 
 ## Acknowledgements
 
 This project is built with the following:
 
-- [aiohttp](https://aiohttp.readthedocs.io/en/stable/) - Asynchronous HTTP Client/Server for asyncio and Python.
+- [aiohttp](https://aiohttp.readthedocs.io/en/stable/) - Asynchronous HTTP
+  Client/Server for asyncio and Python.
diff --git a/bin/one-schema.py b/bin/one-schema.py
@@ -8,13 +8,13 @@
 import boto3
 from fire import Fire
 from datamodel_code_generator import (
+    DataModelType,
     InputFileType,
     LiteralType,
     OpenAPIScope,
     PythonVersion,
     generate,
 )
-from datamodel_code_generator import DataModelType
 
 
 def fetch_remote_schema(*, source: str, output: str):
@@ -65,7 +65,7 @@ def _generate_data_models(schema_path: str) -> str:
                 OpenAPIScope.Parameters,
             ],
             # Format of the types generated.
-            output_model_type=DataModelType.PydanticBaseModel,
+            output_model_type=DataModelType.PydanticV2BaseModel,
             target_python_version=PythonVersion.PY_38,
             # Copy doc strings into the source code.
             use_schema_description=True,

diff --git a/phc/__init__.py b/phc/__init__.py
@@ -1,6 +1,7 @@
 """
 .. include:: ../README.md
 """
+
 import nest_asyncio
 from phc.session import Session
 from phc.api_response import ApiResponse

diff --git a/phc/base_client.py b/phc/base_client.py
@@ -1,4 +1,5 @@
 """A Python module for a base PHC web client."""
+
 import asyncio
 import platform
 import sys

diff --git a/phc/easy/abstract/paging_api_item.py b/phc/easy/abstract/paging_api_item.py
@@ -17,8 +17,8 @@ class PagingApiOptions(BaseModel):
     def transform(key, value):
         return (key, value)
 
-    def dict(self):
-        raw = super().dict()
+    def model_dump(self):
+        raw = super().model_dump()
 
         def preprocess_value(v):
             if isinstance(v, Enum):
@@ -68,7 +68,7 @@ def params_class() -> type:
     @classmethod
     def process_params(cls, params: dict) -> dict:
         "Validates and transforms the API query parameters"
-        return cls.params_class()(**params).dict()
+        return cls.params_class()(**params).model_dump()
 
     @staticmethod
     def transform_results(data_frame: pd.DataFrame, **expand_args):

diff --git a/phc/easy/ocr/options/ocr_config_types.py b/phc/easy/ocr/options/ocr_config_types.py
@@ -125,12 +125,10 @@ class Config(BaseModel):
 
 
 class OcrConfigPayload(BaseModel):
-    project: constr(
-        regex=r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-4[0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$",
+    project: str = Field(
+        pattern=r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-4[0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$",
         min_length=36,
         max_length=36,
-    ) = Field(
-        ...,
         examples=["e97e27d3-f553-432a-bde1-7ae3d3ba5078"],
         title="Project Id",
     )

diff --git a/phc/easy/ocr/suggestion.py b/phc/easy/ocr/suggestion.py
@@ -197,16 +197,19 @@ def expand_nested_array_column(df: pd.DataFrame, key: str, lprefix=""):
 
     expanded = pd.concat(
         df.apply(
-            lambda x: pd.concat(
-                [
-                    pd.DataFrame(
-                        [{"index": x.name, "_item": i, **v} for v in array]
-                    )
-                    for i, array in enumerate(x[key])
-                ]
-            )
-            # pd.concat does not like an empty array so we avoid that situation
-            if x[key] != [] else pd.DataFrame(),
+            lambda x: (
+                pd.concat(
+                    [
+                        pd.DataFrame(
+                            [{"index": x.name, "_item": i, **v} for v in array]
+                        )
+                        for i, array in enumerate(x[key])
+                    ]
+                )
+                # pd.concat does not like an empty array so we avoid that situation
+                if x[key] != []
+                else pd.DataFrame()
+            ),
             axis=1,
         ).values
     ).add_prefix(lprefix)

diff --git a/phc/easy/omics/gene.py b/phc/easy/omics/gene.py
@@ -19,9 +19,9 @@ def get_data_frame(search: str = "", auth_args: Auth = Auth.shared()):
 
         if "alias" in frame.columns:
             frame["alias"] = frame.alias.apply(
-                lambda aliases: ",".join(aliases)
-                if isinstance(aliases, list)
-                else None
+                lambda aliases: (
+                    ",".join(aliases) if isinstance(aliases, list) else None
+                )
             )
 
         # We choose to not expand topCancerDrivers and cancerDrivers since it

diff --git a/phc/easy/omics/options/genomic_copy_number_variant.py b/phc/easy/omics/options/genomic_copy_number_variant.py
@@ -16,7 +16,7 @@
 class GenomicCopyNumberVariantOptions(PagingApiOptions):
     """Options to pass to `/v1/genomics/copy-numbers`"""
 
-    variant_set_ids: List[str] = Field(..., min_items=1)
+    variant_set_ids: List[str] = Field(..., min_length=1)
     include: List[GenomicVariantInclude] = []
     gene: List[str] = []
     interpretation: List[str] = []

diff --git a/phc/easy/omics/options/genomic_expression.py b/phc/easy/omics/options/genomic_expression.py
@@ -5,12 +5,6 @@
 from phc.easy.abstract.paging_api_item import PagingApiOptions
 from phc.easy.omics.options.common import GenomicVariantInclude
 
-EXPRESSION = constr(
-    regex=r"^(\d+(\.\d+)?\-\d+(\.\d+)?|[\>\<]\=\s?\d+(\.\d+)?|\d+(\.\d+)?:(lte|gte))$"
-)
-
-ORDER_BY = constr(regex=r"^expression(:desc)?$")
-
 MAPPINGS = {
     "variant_set_ids": "rnaQuantificationSetIds",
     "outlier_std_dev": "outlierStdDev",
@@ -22,11 +16,11 @@
 class GenomicExpressionOptions(PagingApiOptions):
     """Options to pass to `/v1/genomics/expressions`"""
 
-    variant_set_ids: List[str] = Field(..., min_items=1)
+    variant_set_ids: List[str] = Field(..., min_length=1)
     include: List[GenomicVariantInclude] = []
     gene: List[str] = []
-    expression: Optional[EXPRESSION] = None
-    order_by: Optional[ORDER_BY] = None
+    expression: Optional[str] = Field(None, pattern=r"^(\d+(\.\d+)?-\d+(\.\d+)?|[><]=\s?\d+(\.\d+)?|\d+(\.\d+)?:(lte|gte))$")
+    order_by: Optional[str] = Field(None, pattern=r"^expression(:desc)?$")
     in_ckb: Optional[bool] = None
     # TODO: Fill out allowed options for this parameter
     outlier_std_dev: Optional[str] = None

diff --git a/phc/easy/omics/options/genomic_short_variant.py b/phc/easy/omics/options/genomic_short_variant.py
@@ -12,8 +12,8 @@
 from pydantic import Field, constr
 
 RS_ID = r"^rs(\d+)$"
-NUM_DECIMAL_RANGE = constr(regex=r"^\d+(\.\d+)?\-\d+(\.\d+)?$")
-NUM_RANGE = constr(regex=r"^(\d+\-\d+|\d+)$")
+NUM_DECIMAL_RANGE = constr(pattern=r"^\d+(\.\d+)?\-\d+(\.\d+)?$")
+NUM_RANGE = constr(pattern=r"^(\d+\-\d+|\d+)$")
 
 MAPPINGS = {
     "variant_set_ids": "variantSetIds",
@@ -68,10 +68,10 @@ class GenomicShortVariantOptions(PagingApiOptions):
     # - Combined In Silico Prediction
     # - Individual In Silico Predictors
 
-    variant_set_ids: List[str] = Field(..., min_items=1)
+    variant_set_ids: List[str] = Field(..., min_length=1)
     include: List[GenomicVariantInclude] = ["vcf"]
     gene: List[str] = []
-    rs_id: List[constr(regex=RS_ID)] = []
+    rs_id: List[constr(pattern=RS_ID)] = []
     chromosome: List[Chromosome] = []
     clinvar_allele_id: List[str] = []
     clinvar_disease: List[str] = []
@@ -114,7 +114,7 @@ class GenomicShortVariantOptions(PagingApiOptions):
     alt_read_depth: List[str] = []
     ref_read_depth: List[str] = []
     variant_filter: List[str] = []
-    in_ckb: Optional[bool]
+    in_ckb: Optional[bool] = None
 
     @staticmethod
     def transform(key, value):

diff --git a/phc/easy/omics/options/genomic_structural_variant.py b/phc/easy/omics/options/genomic_structural_variant.py
@@ -14,7 +14,7 @@
 
 
 class GenomicStructuralVariantOptions(PagingApiOptions):
-    variant_set_ids: List[str] = Field(..., min_items=1)
+    variant_set_ids: List[str] = Field(..., min_length=1)
     gene: List[str] = []
     effect: List[StructuralType] = []
     interpretation: List[str] = []

diff --git a/phc/easy/query/__init__.py b/phc/easy/query/__init__.py
@@ -297,9 +297,11 @@ def execute_paging_api(
         )
 
         results = with_progress(
-            lambda: (progress if progress is not None else tqdm())
-            if show_progress
-            else None,
+            lambda: (
+                (progress if progress is not None else tqdm())
+                if show_progress
+                else None
+            ),
             lambda progress: recursive_paging_api_call(
                 path,
                 params=params,
@@ -499,21 +501,25 @@ def agg_composite_to_frame(prefix: str, data: dict):
                 ]
             )
             .pipe(
-                lambda df: df
-                if len(df) == 0 or display_query is None
-                # Poor man's way to filter only matching codes (since Elasticsearch
-                # returns records which will include other codes)
-                else df[
-                    df["display"]
-                    .str.lower()
-                    .str.contains(display_query.lower())
-                ]
+                lambda df: (
+                    df
+                    if len(df) == 0 or display_query is None
+                    # Poor man's way to filter only matching codes (since Elasticsearch
+                    # returns records which will include other codes)
+                    else df[
+                        df["display"]
+                        .str.lower()
+                        .str.contains(display_query.lower())
+                    ]
+                )
             )
             .pipe(
-                lambda df: pd.DataFrame()
-                if len(df) == 0
-                else df.sort_values("doc_count", ascending=False).reset_index(
-                    drop=True
+                lambda df: (
+                    pd.DataFrame()
+                    if len(df) == 0
+                    else df.sort_values(
+                        "doc_count", ascending=False
+                    ).reset_index(drop=True)
                 )
             )
         )