Skip to content

Commit

Permalink
Merge branch 'master' into microphone-selection
Browse files Browse the repository at this point in the history
  • Loading branch information
ploeber authored Sep 16, 2024
2 parents 8f8509d + 712a78b commit 9324e15
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 45 deletions.
92 changes: 92 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: Code Quality

# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the "master" branch
push:
branches: ["master"]
pull_request:

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

concurrency:
# Cancel previous actions from the same PR or branch except 'master' branch.
# See https://docs.github.com/en/actions/using-jobs/using-concurrency and https://docs.github.com/en/actions/learn-github-actions/contexts for more info.
group: concurrency-group::${{ github.workflow }}::${{ github.event.pull_request.number > 0 && format('pr-{0}', github.event.pull_request.number) || github.ref_name }}${{ github.ref_name == 'master' && format('::{0}', github.run_id) || ''}}
cancel-in-progress: ${{ github.ref_name != 'master' }}

jobs:
ruff:
needs: []
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
# Get all changed and modified files.
- uses: dorny/paths-filter@v2
id: filter
with:
list-files: shell
filters: |
python:
- added|modified: 'assemblyai/**/*.py'
# Get count of filtered files.
- run: |
if [ '${{ steps.filter.outputs.python_files }}' != '' ]; then
echo count=$(ls ${{ steps.filter.outputs.python_files }} | wc -l) >> "$GITHUB_OUTPUT"
else
echo count=0 >> "$GITHUB_OUTPUT"
fi
id: counter
if: ${{ steps.filter.outputs.python == 'true' }}
shell: bash
name: Run count files
# Run ruff on filtered files if there are any.
- uses: chartboost/ruff-action@v1
name: Run 'ruff format --check --config ./ruff.toml'
if: ${{ steps.counter.outputs.count > 0 }}
with:
version: 0.3.5
args: 'format --check --config ./ruff.toml'
src: ${{ steps.filter.outputs.python_files }}
- uses: chartboost/ruff-action@v1
name: Run 'ruff'
if: ${{ steps.counter.outputs.count > 0 }}
with:
version: 0.3.5
args: '--config ./ruff.toml'
src: ${{ steps.filter.outputs.python_files }}

mypy:
needs: []
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
# Get all changed and modified files.
- uses: dorny/paths-filter@v2
id: filter
with:
list-files: shell
filters: |
python:
- added|modified: 'assemblyai/**/*.py'
# Get count of filtered files.
- run: |
if [ '${{ steps.filter.outputs.python_files }}' != '' ]; then
echo count=$(ls ${{ steps.filter.outputs.python_files }} | wc -l) >> "$GITHUB_OUTPUT"
else
echo count=0 >> "$GITHUB_OUTPUT"
fi
id: counter
if: ${{ steps.filter.outputs.python == 'true' }}
shell: bash
name: Run count files
# Run mypy on filtered files if there are any.
- uses: actions/setup-python@v4
if: ${{ steps.counter.outputs.count > 0 }}
with:
python-version: '3.9'
- run: pip install mypy==1.5.1
if: ${{ steps.counter.outputs.count > 0 }}
- run: mypy ${{ steps.filter.outputs.python_files }} --follow-imports=silent --ignore-missing-imports
if: ${{ steps.counter.outputs.count > 0 }}
2 changes: 1 addition & 1 deletion assemblyai/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.32.0"
__version__ = "0.33.0"
70 changes: 37 additions & 33 deletions assemblyai/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,19 +550,17 @@ class RawTranscriptionConfig(BaseModel):
"""
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
Automatic Language Detection is supported for the following languages:
See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
"""

- English
- Spanish
- French
- German
- Italian
- Portuguese
- Dutch
language_confidence_threshold: Optional[float]
"""
The confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
"""

speech_threshold: Optional[float]
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."

speech_model: Optional[SpeechModel]
"""
Expand Down Expand Up @@ -608,6 +606,7 @@ def __init__(
summary_type: Optional[SummarizationType] = None,
auto_highlights: Optional[bool] = None,
language_detection: Optional[bool] = None,
language_confidence_threshold: Optional[float] = None,
speech_threshold: Optional[float] = None,
raw_transcription_config: Optional[RawTranscriptionConfig] = None,
speech_model: Optional[SpeechModel] = None,
Expand Down Expand Up @@ -644,8 +643,10 @@ def __init__(
summary_model: The summarization model to use in case `summarization` is enabled
summary_type: The summarization type to use in case `summarization` is enabled
auto_highlights: Detect important phrases and words in your transcription text.
language_detection: Identify the dominant language that’s spoken in an audio file, and route the file to the appropriate model for the detected language.
speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive
language_detection: Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
language_confidence_threshold: The confidence threshold that must be reached if `language_detection` is enabled.
An error will be returned if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive.
raw_transcription_config: Create the config from a `RawTranscriptionConfig`
"""
self._raw_transcription_config = raw_transcription_config
Expand Down Expand Up @@ -691,6 +692,7 @@ def __init__(
)
self.auto_highlights = auto_highlights
self.language_detection = language_detection
self.language_confidence_threshold = language_confidence_threshold
self.speech_threshold = speech_threshold
self.speech_model = speech_model

Expand Down Expand Up @@ -1021,19 +1023,26 @@ def language_detection(self, enable: Optional[bool]) -> None:
"""
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
Automatic Language Detection is supported for the following languages:
- English
- Spanish
- French
- German
- Italian
- Portuguese
- Dutch
See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
"""

self._raw_transcription_config.language_detection = enable

@property
def language_confidence_threshold(self) -> Optional[float]:
"Returns the confidence threshold that must be reached for automatic language detection."

return self._raw_transcription_config.language_confidence_threshold

@language_confidence_threshold.setter
def language_confidence_threshold(self, threshold: Optional[float]) -> None:
"""
Set the confidence threshold that must be reached if `language_detection` is enabled. An error will be returned
if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive.
"""

self._raw_transcription_config.language_confidence_threshold = threshold

@property
def speech_threshold(self) -> Optional[float]:
"Returns the current speech threshold."
Expand All @@ -1042,10 +1051,7 @@ def speech_threshold(self) -> Optional[float]:

@speech_threshold.setter
def speech_threshold(self, threshold: Optional[float]) -> None:
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"

if threshold is not None and (threshold < 0 or threshold > 1):
raise ValueError("speech_threshold must be between 0 and 1 (inclusive).")
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive."

self._raw_transcription_config.speech_threshold = threshold

Expand Down Expand Up @@ -1638,17 +1644,15 @@ class BaseTranscript(BaseModel):
"""
Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language.
Automatic Language Detection is supported for the following languages:
- English
- Spanish
- French
- German
- Italian
- Portuguese
- Dutch
See the docs for supported languages: https://www.assemblyai.com/docs/getting-started/supported-languages
"""

language_confidence_threshold: Optional[float]
"The confidence threshold that must be reached if `language_detection` is enabled."

language_confidence: Optional[float]
"The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)."

speech_threshold: Optional[float]
"Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive"

Expand Down
9 changes: 9 additions & 0 deletions ruff.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[lint]
# Enable default rules plus I (isort) and S101 (check for asserts).
select = ["I", "E4", "E7", "E9", "F", "S101"]

[lint.per-file-ignores]
# Ignore import violations in all init files.
"__init__.py" = ["E402"]
# Ignore assert checks in all test files.
"**/*test*.py" = ["S101"]
11 changes: 0 additions & 11 deletions tests/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,3 @@ def test_configuration_are_none_by_default():
pytest.fail(
f"Configuration field {name} is {value} and not None by default."
)


def test_speech_threshold_fails_if_outside_range():
"""
Tests that an exception is raised if the value for speech_threshold is outside the range of [0, 1].
"""

with pytest.raises(ValueError, match="speech_threshold"):
aai.TranscriptionConfig(speech_threshold=1.5)
with pytest.raises(ValueError, match="speech_threshold"):
aai.TranscriptionConfig(speech_threshold=-0.5)

0 comments on commit 9324e15

Please sign in to comment.