Merge branch 'master' into microphone-selection

AssemblyAI · Sep 16, 2024 · 9324e15 · 9324e15
2 parents 8f8509d + 712a78b
commit 9324e15
Show file tree

Hide file tree

Showing 5 changed files with 139 additions and 45 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,92 @@
+name: Code Quality
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the "master" branch
+  push:
+    branches: ["master"]
+  pull_request:
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+concurrency:
+    # Cancel previous actions from the same PR or branch except 'master' branch.
+    # See https://docs.github.com/en/actions/using-jobs/using-concurrency and https://docs.github.com/en/actions/learn-github-actions/contexts for more info.
+    group: concurrency-group::${{ github.workflow }}::${{ github.event.pull_request.number > 0 && format('pr-{0}', github.event.pull_request.number) || github.ref_name }}${{ github.ref_name == 'master' && format('::{0}', github.run_id) || ''}}
+    cancel-in-progress: ${{ github.ref_name != 'master' }}
+
+jobs:
+  ruff:
+    needs: []
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        # Get all changed and modified files.
+      - uses: dorny/paths-filter@v2
+        id: filter
+        with:
+          list-files: shell
+          filters: |
+            python:
+              - added|modified: 'assemblyai/**/*.py'
+        # Get count of filtered files.
+      - run: |
+          if [ '${{ steps.filter.outputs.python_files }}' != '' ]; then
+            echo count=$(ls ${{ steps.filter.outputs.python_files }} | wc -l) >> "$GITHUB_OUTPUT"
+          else
+            echo count=0 >> "$GITHUB_OUTPUT"
+          fi
+        id: counter
+        if: ${{ steps.filter.outputs.python == 'true' }}
+        shell: bash
+        name: Run count files
+        # Run ruff on filtered files if there are any.
+      - uses: chartboost/ruff-action@v1
+        name: Run 'ruff format --check --config ./ruff.toml'
+        if: ${{ steps.counter.outputs.count > 0 }}
+        with:
+          version: 0.3.5
+          args: 'format --check --config ./ruff.toml'
+          src: ${{ steps.filter.outputs.python_files }}
+      - uses: chartboost/ruff-action@v1
+        name: Run 'ruff'
+        if: ${{ steps.counter.outputs.count > 0 }}
+        with:
+          version: 0.3.5
+          args: '--config ./ruff.toml'
+          src: ${{ steps.filter.outputs.python_files }}
+
+  mypy:
+    needs: []
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      # Get all changed and modified files.
+      - uses: dorny/paths-filter@v2
+        id: filter
+        with:
+          list-files: shell
+          filters: |
+            python:
+              - added|modified: 'assemblyai/**/*.py'
+        # Get count of filtered files.
+      - run: |
+          if [ '${{ steps.filter.outputs.python_files }}' != '' ]; then
+            echo count=$(ls ${{ steps.filter.outputs.python_files }} | wc -l) >> "$GITHUB_OUTPUT"
+          else
+            echo count=0 >> "$GITHUB_OUTPUT"
+          fi
+        id: counter
+        if: ${{ steps.filter.outputs.python == 'true' }}
+        shell: bash
+        name: Run count files
+        # Run mypy on filtered files if there are any.
+      - uses: actions/setup-python@v4
+        if: ${{ steps.counter.outputs.count > 0 }}
+        with:
+          python-version: '3.9'
+      - run: pip install mypy==1.5.1
+        if: ${{ steps.counter.outputs.count > 0 }}
+      - run: mypy ${{ steps.filter.outputs.python_files }} --follow-imports=silent --ignore-missing-imports
+        if: ${{ steps.counter.outputs.count > 0 }}
diff --git a/assemblyai/__version__.py b/assemblyai/__version__.py
@@ -1 +1 @@
-__version__ = "0.32.0"
+__version__ = "0.33.0"
diff --git a/assemblyai/types.py b/assemblyai/types.py
@@ -550,19 +550,17 @@ class RawTranscriptionConfig(BaseModel):
     """
     Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
 
-    Automatic Language Detection is supported for the following languages:
+    See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
+    """
 
-        - English
-        - Spanish
-        - French
-        - German
-        - Italian
-        - Portuguese
-        - Dutch
+    language_confidence_threshold: Optional[float]
+    """
+    The confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
+    if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
     """
 
     speech_threshold: Optional[float]
-    "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
+    "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."
 
     speech_model: Optional[SpeechModel]
     """
@@ -608,6 +606,7 @@ def __init__(
         summary_type: Optional[SummarizationType] = None,
         auto_highlights: Optional[bool] = None,
         language_detection: Optional[bool] = None,
+        language_confidence_threshold: Optional[float] = None,
         speech_threshold: Optional[float] = None,
         raw_transcription_config: Optional[RawTranscriptionConfig] = None,
         speech_model: Optional[SpeechModel] = None,
@@ -644,8 +643,10 @@ def __init__(
             summary_model: The summarization model to use in case `summarization` is enabled
             summary_type: The summarization type to use in case `summarization` is enabled
             auto_highlights: Detect important phrases and words in your transcription text.
-            language_detection: Identify the dominant language that’s spoken in an audio file, and route the file to the appropriate model for the detected language.
-            speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive
+            language_detection: Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
+            language_confidence_threshold: The confidence threshold that must be reached if `language_detection` is enabled.
+                An error will be returned if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
+            speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive.
             raw_transcription_config: Create the config from a `RawTranscriptionConfig`
         """
         self._raw_transcription_config = raw_transcription_config
@@ -691,6 +692,7 @@ def __init__(
         )
         self.auto_highlights = auto_highlights
         self.language_detection = language_detection
+        self.language_confidence_threshold = language_confidence_threshold
         self.speech_threshold = speech_threshold
         self.speech_model = speech_model
 
@@ -1021,19 +1023,26 @@ def language_detection(self, enable: Optional[bool]) -> None:
         """
         Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
 
-        Automatic Language Detection is supported for the following languages:
-
-            - English
-            - Spanish
-            - French
-            - German
-            - Italian
-            - Portuguese
-            - Dutch
+        See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
         """
 
         self._raw_transcription_config.language_detection = enable
 
+    @property
+    def language_confidence_threshold(self) -> Optional[float]:
+        "Returns the confidence threshold that must be reached for automatic language detection."
+
+        return self._raw_transcription_config.language_confidence_threshold
+
+    @language_confidence_threshold.setter
+    def language_confidence_threshold(self, threshold: Optional[float]) -> None:
+        """
+        Set the confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
+        if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
+        """
+
+        self._raw_transcription_config.language_confidence_threshold = threshold
+
     @property
     def speech_threshold(self) -> Optional[float]:
         "Returns the current speech threshold."
@@ -1042,10 +1051,7 @@ def speech_threshold(self) -> Optional[float]:
 
     @speech_threshold.setter
     def speech_threshold(self, threshold: Optional[float]) -> None:
-        "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
-
-        if threshold is not None and (threshold < 0 or threshold > 1):
-            raise ValueError("speech_threshold must be between 0 and 1 (inclusive).")
+        "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."
 
         self._raw_transcription_config.speech_threshold = threshold
 
@@ -1638,17 +1644,15 @@ class BaseTranscript(BaseModel):
     """
     Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
 
-    Automatic Language Detection is supported for the following languages:
-
-        - English
-        - Spanish
-        - French
-        - German
-        - Italian
-        - Portuguese
-        - Dutch
+    See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
     """
 
+    language_confidence_threshold: Optional[float]
+    "The confidence threshold that must be reached if `language_detection` is enabled."
+
+    language_confidence: Optional[float]
+    "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)."
+
     speech_threshold: Optional[float]
     "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
 

diff --git a/ruff.toml b/ruff.toml
@@ -0,0 +1,9 @@
+[lint]
+# Enable default rules plus I (isort) and S101 (check for asserts).
+select = ["I", "E4", "E7", "E9", "F", "S101"]
+
+[lint.per-file-ignores]
+# Ignore import violations in all init files.
+"__init__.py" = ["E402"]
+# Ignore assert checks in all test files.
+"**/*test*.py" = ["S101"]
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
@@ -18,14 +18,3 @@ def test_configuration_are_none_by_default():
             pytest.fail(
                 f"Configuration field {name} is {value} and not None by default."
             )
-
-
-def test_speech_threshold_fails_if_outside_range():
-    """
-    Tests that an exception is raised if the value for speech_threshold is outside the range of [0, 1].
-    """
-
-    with pytest.raises(ValueError, match="speech_threshold"):
-        aai.TranscriptionConfig(speech_threshold=1.5)
-    with pytest.raises(ValueError, match="speech_threshold"):
-        aai.TranscriptionConfig(speech_threshold=-0.5)