Author: Scott H. Hawley
based on paper
Brian McFee Eric J. Humphrey and Juan P. Bello
This script can either be called as a standalone to operate on sound files (e.g. .wav)
or it can be imported & called from elsewhere e.g. prep_data.py.
If you plan on using prep_data.py then don‘t call this as a standalong. just let prep_data
do its thing unless you really want to hear what the augmented data files sound like.
from __future__ import print_function
import numpy as np
import librosa
from random import getrandbits
import sys getopt os
#from scipy.signal import resample # too slow
def random_onoff(): # randomly turns on or off
return bool(getrandbits(1))
# returns a list of augmented audio data stereo or mono
def augment_data(y sr n_augment = 0 allow_speedandpitch = True allow_pitch = True
allow_speed = True allow_dyn = True allow_noise = True allow_timeshift = True tab=““):
mods = [y] # always returns the original as element zero
length = y.shape[0]
for i in range(n_augment):
print(tab+“augment_data: “i+1“of“n_augment)
y_mod = y
count_changes = 0
# change speed and pitch together
if (allow_speedandpitch) and random_onoff():
length_change = np.random.uniform(low=0.9high=1.1)
speed_fac = 1.0 / length_change
print(tab+“ resample length_change = “length_change)
tmp = np.interp(np.arange(0len(y)speed_fac)np.arange(0len(y))y)
#tmp = resample(yint(length*lengt_fac)) # signal.resample is too slow
minlen = min( y.shape[0] tmp.shape[0]) # keep same length as original;
y_mod *= 0 # pad with zeros
y_mod[0:minlen] = tmp[0:minlen]
count_changes += 1
# change pitch (w/o speed)
if (allow_pitch) and random_onoff():
bins_per_octave = 24 # pitch increments are quarter-steps
pitch_pm = 4 # +/- this many quarter steps
pitch_change = pitch_pm * 2*(np.random.uniform()-0.5)
print(tab+“ pitch_change = “pitch_change)
y_mod = librosa.effects.pitch_shift(y sr n_steps=pitch_change bins_per_octave=bins_per_octave)
count_changes += 1
# change speed (w/o pitch)
if (allow_speed) and random_onoff():
speed_change = np.random.uniform(low=0.9high=1.1)
print(tab+“ speed_change = “speed_change)
tmp = librosa.effects.time_stretch(y_mod speed_change)
minlen = min( y.shape[0] tmp.shape[0]) # keep same length as original;
y_mod *= 0 # pad with zeros
y_mod[0:minlen] = tmp[0:minlen]
