@@ -174,7 +174,8 @@ <h1>Source code for data.text2speech.speech_utils</h1><div class="highlight"><pr
174174 < span class ="n "> mean</ span > < span class ="o "> =</ span > < span class ="mf "> 0.</ span > < span class ="p "> ,</ span >
175175 < span class ="n "> std</ span > < span class ="o "> =</ span > < span class ="mf "> 1.</ span > < span class ="p "> ,</ span >
176176 < span class ="n "> trim</ span > < span class ="o "> =</ span > < span class ="kc "> False</ span > < span class ="p "> ,</ span >
177- < span class ="n "> data_min</ span > < span class ="o "> =</ span > < span class ="mf "> 1e-5</ span >
177+ < span class ="n "> data_min</ span > < span class ="o "> =</ span > < span class ="mf "> 1e-5</ span > < span class ="p "> ,</ span >
178+ < span class ="n "> mel_basis</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span >
178179< span class ="p "> ):</ span >
179180 < span class ="sd "> """ Helper function to retrieve spectrograms from wav files</ span >
180181
@@ -210,7 +211,7 @@ <h1>Source code for data.text2speech.speech_utils</h1><div class="highlight"><pr
210211 < span class ="p "> )</ span >
211212 < span class ="k "> return</ span > < span class ="n "> get_speech_features</ span > < span class ="p "> (</ span >
212213 < span class ="n "> signal</ span > < span class ="p "> ,</ span > < span class ="n "> fs</ span > < span class ="p "> ,</ span > < span class ="n "> num_features</ span > < span class ="p "> ,</ span > < span class ="n "> features_type</ span > < span class ="p "> ,</ span > < span class ="n "> n_fft</ span > < span class ="p "> ,</ span >
213- < span class ="n "> hop_length</ span > < span class ="p "> ,</ span > < span class ="n "> mag_power</ span > < span class ="p "> ,</ span > < span class ="n "> feature_normalize</ span > < span class ="p "> ,</ span > < span class ="n "> mean</ span > < span class ="p "> ,</ span > < span class ="n "> std</ span > < span class ="p "> ,</ span > < span class ="n "> data_min</ span >
214+ < span class ="n "> hop_length</ span > < span class ="p "> ,</ span > < span class ="n "> mag_power</ span > < span class ="p "> ,</ span > < span class ="n "> feature_normalize</ span > < span class ="p "> ,</ span > < span class ="n "> mean</ span > < span class ="p "> ,</ span > < span class ="n "> std</ span > < span class ="p "> ,</ span > < span class ="n "> data_min</ span > < span class =" p " > , </ span > < span class =" n " > mel_basis </ span >
214215 < span class ="p "> )</ span > </ div >
215216
216217
@@ -225,7 +226,8 @@ <h1>Source code for data.text2speech.speech_utils</h1><div class="highlight"><pr
225226 < span class ="n "> feature_normalize</ span > < span class ="o "> =</ span > < span class ="kc "> False</ span > < span class ="p "> ,</ span >
226227 < span class ="n "> mean</ span > < span class ="o "> =</ span > < span class ="mf "> 0.</ span > < span class ="p "> ,</ span >
227228 < span class ="n "> std</ span > < span class ="o "> =</ span > < span class ="mf "> 1.</ span > < span class ="p "> ,</ span >
228- < span class ="n "> data_min</ span > < span class ="o "> =</ span > < span class ="mf "> 1e-5</ span >
229+ < span class ="n "> data_min</ span > < span class ="o "> =</ span > < span class ="mf "> 1e-5</ span > < span class ="p "> ,</ span >
230+ < span class ="n "> mel_basis</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span >
229231< span class ="p "> ):</ span >
230232 < span class ="sd "> """ Helper function to retrieve spectrograms from loaded wav</ span >
231233
@@ -249,38 +251,55 @@ <h1>Source code for data.text2speech.speech_utils</h1><div class="highlight"><pr
249251< span class ="sd "> np.array: np.array of audio features with shape=[num_time_steps,</ span >
250252< span class ="sd "> num_features].</ span >
251253< span class ="sd "> """</ span >
252- < span class ="k "> if</ span > < span class ="n "> features_type</ span > < span class ="o "> ==</ span > < span class ="s1 "> 'magnitude'</ span > < span class ="p "> :</ span >
253- < span class ="n "> complex_spec</ span > < span class ="o "> =</ span > < span class ="n "> librosa</ span > < span class ="o "> .</ span > < span class ="n "> stft</ span > < span class ="p "> (</ span > < span class ="n "> y</ span > < span class ="o "> =</ span > < span class ="n "> signal</ span > < span class ="p "> ,</ span > < span class ="n "> n_fft</ span > < span class ="o "> =</ span > < span class ="n "> n_fft</ span > < span class ="p "> )</ span >
254- < span class ="n "> mag</ span > < span class ="p "> ,</ span > < span class ="n "> _</ span > < span class ="o "> =</ span > < span class ="n "> librosa</ span > < span class ="o "> .</ span > < span class ="n "> magphase</ span > < span class ="p "> (</ span > < span class ="n "> complex_spec</ span > < span class ="p "> ,</ span > < span class ="n "> power</ span > < span class ="o "> =</ span > < span class ="n "> mag_power</ span > < span class ="p "> )</ span >
255- < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> log</ span > < span class ="p "> (</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> clip</ span > < span class ="p "> (</ span > < span class ="n "> mag</ span > < span class ="p "> ,</ span > < span class ="n "> a_min</ span > < span class ="o "> =</ span > < span class ="n "> data_min</ span > < span class ="p "> ,</ span > < span class ="n "> a_max</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ))</ span > < span class ="o "> .</ span > < span class ="n "> T</ span >
256- < span class ="k "> assert</ span > < span class ="n "> num_features</ span > < span class ="o "> <=</ span > < span class ="n "> n_fft</ span > < span class ="o "> //</ span > < span class ="mi "> 2</ span > < span class ="o "> +</ span > < span class ="mi "> 1</ span > < span class ="p "> ,</ span > \
254+ < span class ="k "> if</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> data_min</ span > < span class ="p "> ,</ span > < span class ="nb "> dict</ span > < span class ="p "> ):</ span >
255+ < span class ="n "> data_min_mel</ span > < span class ="o "> =</ span > < span class ="n "> data_min</ span > < span class ="p "> [</ span > < span class ="s2 "> "mel"</ span > < span class ="p "> ]</ span >
256+ < span class ="n "> data_min_mag</ span > < span class ="o "> =</ span > < span class ="n "> data_min</ span > < span class ="p "> [</ span > < span class ="s2 "> "magnitude"</ span > < span class ="p "> ]</ span >
257+ < span class ="k "> else</ span > < span class ="p "> :</ span >
258+ < span class ="n "> data_min_mel</ span > < span class ="o "> =</ span > < span class ="n "> data_min_mag</ span > < span class ="o "> =</ span > < span class ="n "> data_min</ span >
259+
260+ < span class ="k "> if</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> num_features</ span > < span class ="p "> ,</ span > < span class ="nb "> dict</ span > < span class ="p "> ):</ span >
261+ < span class ="n "> num_features_mel</ span > < span class ="o "> =</ span > < span class ="n "> num_features</ span > < span class ="p "> [</ span > < span class ="s2 "> "mel"</ span > < span class ="p "> ]</ span >
262+ < span class ="n "> num_features_mag</ span > < span class ="o "> =</ span > < span class ="n "> num_features</ span > < span class ="p "> [</ span > < span class ="s2 "> "magnitude"</ span > < span class ="p "> ]</ span >
263+ < span class ="k "> else</ span > < span class ="p "> :</ span >
264+ < span class ="n "> num_features_mel</ span > < span class ="o "> =</ span > < span class ="n "> num_features_mag</ span > < span class ="o "> =</ span > < span class ="n "> num_features</ span >
265+
266+ < span class ="n "> complex_spec</ span > < span class ="o "> =</ span > < span class ="n "> librosa</ span > < span class ="o "> .</ span > < span class ="n "> stft</ span > < span class ="p "> (</ span > < span class ="n "> y</ span > < span class ="o "> =</ span > < span class ="n "> signal</ span > < span class ="p "> ,</ span > < span class ="n "> n_fft</ span > < span class ="o "> =</ span > < span class ="n "> n_fft</ span > < span class ="p "> )</ span >
267+ < span class ="n "> mag</ span > < span class ="p "> ,</ span > < span class ="n "> _</ span > < span class ="o "> =</ span > < span class ="n "> librosa</ span > < span class ="o "> .</ span > < span class ="n "> magphase</ span > < span class ="p "> (</ span > < span class ="n "> complex_spec</ span > < span class ="p "> ,</ span > < span class ="n "> power</ span > < span class ="o "> =</ span > < span class ="n "> mag_power</ span > < span class ="p "> )</ span >
268+
269+ < span class ="k "> if</ span > < span class ="n "> features_type</ span > < span class ="o "> ==</ span > < span class ="s1 "> 'magnitude'</ span > < span class ="ow "> or</ span > < span class ="n "> features_type</ span > < span class ="o "> ==</ span > < span class ="s2 "> "both"</ span > < span class ="p "> :</ span >
270+ < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> log</ span > < span class ="p "> (</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> clip</ span > < span class ="p "> (</ span > < span class ="n "> mag</ span > < span class ="p "> ,</ span > < span class ="n "> a_min</ span > < span class ="o "> =</ span > < span class ="n "> data_min_mag</ span > < span class ="p "> ,</ span > < span class ="n "> a_max</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ))</ span > < span class ="o "> .</ span > < span class ="n "> T</ span >
271+ < span class ="k "> assert</ span > < span class ="n "> num_features_mag</ span > < span class ="o "> <=</ span > < span class ="n "> n_fft</ span > < span class ="o "> //</ span > < span class ="mi "> 2</ span > < span class ="o "> +</ span > < span class ="mi "> 1</ span > < span class ="p "> ,</ span > \
257272 < span class ="s2 "> "num_features for spectrogram should be <= (fs * window_size // 2 + 1)"</ span >
258273
259274 < span class ="c1 "> # cut high frequency part</ span >
260- < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> features</ span > < span class ="p "> [:,</ span > < span class ="p "> :</ span > < span class ="n "> num_features</ span > < span class ="p "> ]</ span >
261- < span class ="k "> if</ span > < span class ="s1 "> 'mel'</ span > < span class ="ow "> in</ span > < span class ="n "> features_type</ span > < span class ="p "> :</ span >
262- < span class ="n "> htk</ span > < span class ="o "> =</ span > < span class ="kc "> True</ span >
263- < span class ="n "> norm</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span >
264- < span class ="k "> if</ span > < span class ="s1 "> 'slaney'</ span > < span class ="ow "> in</ span > < span class ="n "> features_type</ span > < span class ="p "> :</ span >
265- < span class ="n "> htk</ span > < span class ="o "> =</ span > < span class ="kc "> False</ span >
266- < span class ="n "> norm</ span > < span class ="o "> =</ span > < span class ="mi "> 1</ span >
267- < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> librosa</ span > < span class ="o "> .</ span > < span class ="n "> feature</ span > < span class ="o "> .</ span > < span class ="n "> melspectrogram</ span > < span class ="p "> (</ span >
268- < span class ="n "> y</ span > < span class ="o "> =</ span > < span class ="n "> signal</ span > < span class ="p "> ,</ span >
269- < span class ="n "> sr</ span > < span class ="o "> =</ span > < span class ="n "> fs</ span > < span class ="p "> ,</ span >
270- < span class ="n "> n_fft</ span > < span class ="o "> =</ span > < span class ="n "> n_fft</ span > < span class ="p "> ,</ span >
271- < span class ="n "> hop_length</ span > < span class ="o "> =</ span > < span class ="n "> hop_length</ span > < span class ="p "> ,</ span >
272- < span class ="n "> n_mels</ span > < span class ="o "> =</ span > < span class ="n "> num_features</ span > < span class ="p "> ,</ span >
273- < span class ="n "> power</ span > < span class ="o "> =</ span > < span class ="n "> mag_power</ span > < span class ="p "> ,</ span >
274- < span class ="n "> htk</ span > < span class ="o "> =</ span > < span class ="n "> htk</ span > < span class ="p "> ,</ span >
275- < span class ="n "> norm</ span > < span class ="o "> =</ span > < span class ="n "> norm</ span >
276- < span class ="p "> )</ span >
277- < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> log</ span > < span class ="p "> (</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> clip</ span > < span class ="p "> (</ span > < span class ="n "> features</ span > < span class ="p "> ,</ span > < span class ="n "> a_min</ span > < span class ="o "> =</ span > < span class ="n "> data_min</ span > < span class ="p "> ,</ span > < span class ="n "> a_max</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ))</ span > < span class ="o "> .</ span > < span class ="n "> T</ span >
275+ < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> features</ span > < span class ="p "> [:,</ span > < span class ="p "> :</ span > < span class ="n "> num_features_mag</ span > < span class ="p "> ]</ span >
276+
277+ < span class ="k "> if</ span > < span class ="s1 "> 'mel'</ span > < span class ="ow "> in</ span > < span class ="n "> features_type</ span > < span class ="ow "> or</ span > < span class ="n "> features_type</ span > < span class ="o "> ==</ span > < span class ="s2 "> "both"</ span > < span class ="p "> :</ span >
278+ < span class ="k "> if</ span > < span class ="n "> features_type</ span > < span class ="o "> ==</ span > < span class ="s2 "> "both"</ span > < span class ="p "> :</ span >
279+ < span class ="n "> mag_features</ span > < span class ="o "> =</ span > < span class ="n "> features</ span >
280+ < span class ="k "> if</ span > < span class ="n "> mel_basis</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="p "> :</ span >
281+ < span class ="n "> htk</ span > < span class ="o "> =</ span > < span class ="kc "> True</ span >
282+ < span class ="n "> norm</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span >
283+ < span class ="k "> if</ span > < span class ="s1 "> 'slaney'</ span > < span class ="ow "> in</ span > < span class ="n "> features_type</ span > < span class ="p "> :</ span >
284+ < span class ="n "> htk</ span > < span class ="o "> =</ span > < span class ="kc "> False</ span >
285+ < span class ="n "> norm</ span > < span class ="o "> =</ span > < span class ="mi "> 1</ span >
286+ < span class ="n "> mel_basis</ span > < span class ="o "> =</ span > < span class ="n "> librosa</ span > < span class ="o "> .</ span > < span class ="n "> filters</ span > < span class ="o "> .</ span > < span class ="n "> mel</ span > < span class ="p "> (</ span >
287+ < span class ="n "> sr</ span > < span class ="o "> =</ span > < span class ="n "> fs</ span > < span class ="p "> ,</ span >
288+ < span class ="n "> n_fft</ span > < span class ="o "> =</ span > < span class ="n "> n_fft</ span > < span class ="p "> ,</ span >
289+ < span class ="n "> n_mels</ span > < span class ="o "> =</ span > < span class ="n "> num_features_mel</ span > < span class ="p "> ,</ span >
290+ < span class ="n "> htk</ span > < span class ="o "> =</ span > < span class ="n "> htk</ span > < span class ="p "> ,</ span >
291+ < span class ="n "> norm</ span > < span class ="o "> =</ span > < span class ="n "> norm</ span >
292+ < span class ="p "> )</ span >
293+ < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> dot</ span > < span class ="p "> (</ span > < span class ="n "> mel_basis</ span > < span class ="p "> ,</ span > < span class ="n "> mag</ span > < span class ="p "> )</ span >
294+ < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> log</ span > < span class ="p "> (</ span > < span class ="n "> np</ span > < span class ="o "> .</ span > < span class ="n "> clip</ span > < span class ="p "> (</ span > < span class ="n "> features</ span > < span class ="p "> ,</ span > < span class ="n "> a_min</ span > < span class ="o "> =</ span > < span class ="n "> data_min_mel</ span > < span class ="p "> ,</ span > < span class ="n "> a_max</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ))</ span > < span class ="o "> .</ span > < span class ="n "> T</ span >
278295
279296 < span class ="k "> if</ span > < span class ="n "> feature_normalize</ span > < span class ="p "> :</ span >
280297 < span class ="n "> features</ span > < span class ="o "> =</ span > < span class ="n "> normalize</ span > < span class ="p "> (</ span > < span class ="n "> features</ span > < span class ="p "> ,</ span > < span class ="n "> mean</ span > < span class ="p "> ,</ span > < span class ="n "> std</ span > < span class ="p "> )</ span >
281298
282- < span class ="k "> return</ span > < span class ="n "> features</ span > </ div >
299+ < span class ="k "> if</ span > < span class ="n "> features_type</ span > < span class ="o "> ==</ span > < span class ="s2 "> "both"</ span > < span class ="p "> :</ span >
300+ < span class ="k "> return</ span > < span class ="p "> [</ span > < span class ="n "> features</ span > < span class ="p "> ,</ span > < span class ="n "> mag_features</ span > < span class ="p "> ]</ span >
283301
302+ < span class ="k "> return</ span > < span class ="n "> features</ span > </ div >
284303
285304< div class ="viewcode-block " id ="get_mel "> < a class ="viewcode-back " href ="../../../api-docs/data.text2speech.html#data.text2speech.speech_utils.get_mel "> [docs]</ a > < span class ="k "> def</ span > < span class ="nf "> get_mel</ span > < span class ="p "> (</ span >
286305 < span class ="n "> log_mag_spec</ span > < span class ="p "> ,</ span >
0 commit comments