Skip to content

Commit

Permalink
Add more transforms
Browse files Browse the repository at this point in the history
  • Loading branch information
pprobst committed Oct 19, 2023
1 parent 7f9c796 commit dfeb78d
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 14 deletions.
34 changes: 27 additions & 7 deletions audio/aug.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,32 @@
from typing import List

AUG_PARAMS = {
# See a list of possible transforms here: https://iver56.github.io/audiomentations/
# "p" is the probability of applying the transform
"AddGaussianNoise": {"min_amplitude": 0.001, "max_amplitude": 0.015, "p": 1.0},
"AddGaussianSNR": {"min_snr_db": 5.0, "max_snr_db": 40.0, "p": 0.5},
"ClippingDistortion": {
"min_percentile_threshold": 0,
"max_percentile_threshold": 40,
"p": 0.5,
},
"Gain": {
"min_gain_db": -12.0,
"max_gain_db": 12.0,
"p": 0.5,
},
"GainTransition": {
"min_gain_db": -24.0,
"max_gain_db": 10.0,
"min_duration": 0.25,
"max_duration": 0.25,
"duration_unit": "fraction",
"p": 0.5,
},
"Normalize": {"p": 0.5},
"TimeStretch": {"min_rate": 0.8, "max_rate": 1.25, "p": 0.5},
"PitchShift": {"min_semitones": -4, "max_semitones": 4, "p": 0.5},
"Shift": {"min_fraction": -0.5, "max_fraction": 0.5, "p": 0.5},
"PitchShift": {"min_semitones": -0.5, "max_semitones": 0.5, "p": 0.5},
"Shift": {"min_shift": -0.5, "max_shift": 0.5, "p": 0.5},
}


Expand All @@ -30,12 +52,10 @@ def apply_augmentation(
print(f"Invalid augmentation technique: {augmentation_name}")
exit(1)

print("Augmentations: ", augmentation_list)
augment = Compose(augmentation_list)

# for transform in augment.transforms:
# print(f"{transform.__class__.__name__}: {transform.parameters}")

augmented_samples = augment(samples=samples, sample_rate=sample_rate)

for transform in augment.transforms:
print(f"{transform.__class__.__name__}: {transform.parameters}")

return augmented_samples
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ nltk
num2words
tqdm
gensim
soundfile
librosa
pydub
audiomentations
15 changes: 11 additions & 4 deletions run_audio_aug.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import argparse
import os
import audiomentations as AA
import random
import numpy as np

from audio.aug import apply_augmentation
from utils.files import load_audio, save_audio
Expand Down Expand Up @@ -32,12 +33,18 @@
nargs="+",
help="Audiomentation techniques (e.g., AddGaussianNoise, PitchShift, TimeStretch)",
)
parser.add_argument(
"--seed",
type=int,
default=None,
help="Random seed for reproducible augmentations (default: None, generates a random seed)",
)

args = parser.parse_args()

if not os.path.exists(args.input_file):
print(f"Input file '{args.input_file}' does not exist.")
exit(1)
if args.seed is not None:
random.seed(args.seed)
np.random.seed(args.seed)

audio, sr = load_audio(args.input_file)

Expand Down
6 changes: 3 additions & 3 deletions utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from pathlib import Path
from pydub import AudioSegment
from typing import List, Optional, Union
from typing import List, Optional, Union, Tuple
from .text import pre_process_sentences


Expand Down Expand Up @@ -90,7 +90,7 @@ def download_and_extract(url: str, target_file: str) -> None:
)


def load_audio(audio_file: str) -> Union[tuple, None]:
def load_audio(audio_file: str) -> Tuple[np.ndarray, float]:
"""
Read an audio file using Librosa. Convert to WAV if not in WAV format.
Expand All @@ -115,7 +115,7 @@ def load_audio(audio_file: str) -> Union[tuple, None]:
return y, sr
except Exception as e:
print(f"Error loading or converting the audio file: {e}")
return None
raise e


def save_audio(
Expand Down

0 comments on commit dfeb78d

Please sign in to comment.