Skip to content
This repository was archived by the owner on Aug 3, 2021. It is now read-only.

Commit 655eb65

Browse files
committed
Updated docs
1 parent 8460dd1 commit 655eb65

37 files changed

+1552
-290
lines changed

docs/html/_modules/data/speech2text/speech2text.html

Lines changed: 60 additions & 22 deletions
Large diffs are not rendered by default.

docs/html/_modules/data/text2speech/speech_utils.html

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ <h1>Source code for data.text2speech.speech_utils</h1><div class="highlight"><pr
174174
<span class="n">mean</span><span class="o">=</span><span class="mf">0.</span><span class="p">,</span>
175175
<span class="n">std</span><span class="o">=</span><span class="mf">1.</span><span class="p">,</span>
176176
<span class="n">trim</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
177-
<span class="n">data_min</span><span class="o">=</span><span class="mf">1e-5</span>
177+
<span class="n">data_min</span><span class="o">=</span><span class="mf">1e-5</span><span class="p">,</span>
178+
<span class="n">mel_basis</span><span class="o">=</span><span class="kc">None</span>
178179
<span class="p">):</span>
179180
<span class="sd">&quot;&quot;&quot; Helper function to retrieve spectrograms from wav files</span>
180181

@@ -210,7 +211,7 @@ <h1>Source code for data.text2speech.speech_utils</h1><div class="highlight"><pr
210211
<span class="p">)</span>
211212
<span class="k">return</span> <span class="n">get_speech_features</span><span class="p">(</span>
212213
<span class="n">signal</span><span class="p">,</span> <span class="n">fs</span><span class="p">,</span> <span class="n">num_features</span><span class="p">,</span> <span class="n">features_type</span><span class="p">,</span> <span class="n">n_fft</span><span class="p">,</span>
213-
<span class="n">hop_length</span><span class="p">,</span> <span class="n">mag_power</span><span class="p">,</span> <span class="n">feature_normalize</span><span class="p">,</span> <span class="n">mean</span><span class="p">,</span> <span class="n">std</span><span class="p">,</span> <span class="n">data_min</span>
214+
<span class="n">hop_length</span><span class="p">,</span> <span class="n">mag_power</span><span class="p">,</span> <span class="n">feature_normalize</span><span class="p">,</span> <span class="n">mean</span><span class="p">,</span> <span class="n">std</span><span class="p">,</span> <span class="n">data_min</span><span class="p">,</span> <span class="n">mel_basis</span>
214215
<span class="p">)</span></div>
215216

216217

@@ -225,7 +226,8 @@ <h1>Source code for data.text2speech.speech_utils</h1><div class="highlight"><pr
225226
<span class="n">feature_normalize</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
226227
<span class="n">mean</span><span class="o">=</span><span class="mf">0.</span><span class="p">,</span>
227228
<span class="n">std</span><span class="o">=</span><span class="mf">1.</span><span class="p">,</span>
228-
<span class="n">data_min</span><span class="o">=</span><span class="mf">1e-5</span>
229+
<span class="n">data_min</span><span class="o">=</span><span class="mf">1e-5</span><span class="p">,</span>
230+
<span class="n">mel_basis</span><span class="o">=</span><span class="kc">None</span>
229231
<span class="p">):</span>
230232
<span class="sd">&quot;&quot;&quot; Helper function to retrieve spectrograms from loaded wav</span>
231233

@@ -249,38 +251,55 @@ <h1>Source code for data.text2speech.speech_utils</h1><div class="highlight"><pr
249251
<span class="sd"> np.array: np.array of audio features with shape=[num_time_steps,</span>
250252
<span class="sd"> num_features].</span>
251253
<span class="sd"> &quot;&quot;&quot;</span>
252-
<span class="k">if</span> <span class="n">features_type</span> <span class="o">==</span> <span class="s1">&#39;magnitude&#39;</span><span class="p">:</span>
253-
<span class="n">complex_spec</span> <span class="o">=</span> <span class="n">librosa</span><span class="o">.</span><span class="n">stft</span><span class="p">(</span><span class="n">y</span><span class="o">=</span><span class="n">signal</span><span class="p">,</span> <span class="n">n_fft</span><span class="o">=</span><span class="n">n_fft</span><span class="p">)</span>
254-
<span class="n">mag</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">librosa</span><span class="o">.</span><span class="n">magphase</span><span class="p">(</span><span class="n">complex_spec</span><span class="p">,</span> <span class="n">power</span><span class="o">=</span><span class="n">mag_power</span><span class="p">)</span>
255-
<span class="n">features</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">mag</span><span class="p">,</span> <span class="n">a_min</span><span class="o">=</span><span class="n">data_min</span><span class="p">,</span> <span class="n">a_max</span><span class="o">=</span><span class="kc">None</span><span class="p">))</span><span class="o">.</span><span class="n">T</span>
256-
<span class="k">assert</span> <span class="n">num_features</span> <span class="o">&lt;=</span> <span class="n">n_fft</span> <span class="o">//</span> <span class="mi">2</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> \
254+
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data_min</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
255+
<span class="n">data_min_mel</span> <span class="o">=</span> <span class="n">data_min</span><span class="p">[</span><span class="s2">&quot;mel&quot;</span><span class="p">]</span>
256+
<span class="n">data_min_mag</span> <span class="o">=</span> <span class="n">data_min</span><span class="p">[</span><span class="s2">&quot;magnitude&quot;</span><span class="p">]</span>
257+
<span class="k">else</span><span class="p">:</span>
258+
<span class="n">data_min_mel</span> <span class="o">=</span> <span class="n">data_min_mag</span> <span class="o">=</span> <span class="n">data_min</span>
259+
260+
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">num_features</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
261+
<span class="n">num_features_mel</span> <span class="o">=</span> <span class="n">num_features</span><span class="p">[</span><span class="s2">&quot;mel&quot;</span><span class="p">]</span>
262+
<span class="n">num_features_mag</span> <span class="o">=</span> <span class="n">num_features</span><span class="p">[</span><span class="s2">&quot;magnitude&quot;</span><span class="p">]</span>
263+
<span class="k">else</span><span class="p">:</span>
264+
<span class="n">num_features_mel</span> <span class="o">=</span> <span class="n">num_features_mag</span> <span class="o">=</span> <span class="n">num_features</span>
265+
266+
<span class="n">complex_spec</span> <span class="o">=</span> <span class="n">librosa</span><span class="o">.</span><span class="n">stft</span><span class="p">(</span><span class="n">y</span><span class="o">=</span><span class="n">signal</span><span class="p">,</span> <span class="n">n_fft</span><span class="o">=</span><span class="n">n_fft</span><span class="p">)</span>
267+
<span class="n">mag</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">librosa</span><span class="o">.</span><span class="n">magphase</span><span class="p">(</span><span class="n">complex_spec</span><span class="p">,</span> <span class="n">power</span><span class="o">=</span><span class="n">mag_power</span><span class="p">)</span>
268+
269+
<span class="k">if</span> <span class="n">features_type</span> <span class="o">==</span> <span class="s1">&#39;magnitude&#39;</span> <span class="ow">or</span> <span class="n">features_type</span> <span class="o">==</span> <span class="s2">&quot;both&quot;</span><span class="p">:</span>
270+
<span class="n">features</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">mag</span><span class="p">,</span> <span class="n">a_min</span><span class="o">=</span><span class="n">data_min_mag</span><span class="p">,</span> <span class="n">a_max</span><span class="o">=</span><span class="kc">None</span><span class="p">))</span><span class="o">.</span><span class="n">T</span>
271+
<span class="k">assert</span> <span class="n">num_features_mag</span> <span class="o">&lt;=</span> <span class="n">n_fft</span> <span class="o">//</span> <span class="mi">2</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> \
257272
<span class="s2">&quot;num_features for spectrogram should be &lt;= (fs * window_size // 2 + 1)&quot;</span>
258273

259274
<span class="c1"># cut high frequency part</span>
260-
<span class="n">features</span> <span class="o">=</span> <span class="n">features</span><span class="p">[:,</span> <span class="p">:</span><span class="n">num_features</span><span class="p">]</span>
261-
<span class="k">if</span> <span class="s1">&#39;mel&#39;</span> <span class="ow">in</span> <span class="n">features_type</span><span class="p">:</span>
262-
<span class="n">htk</span> <span class="o">=</span> <span class="kc">True</span>
263-
<span class="n">norm</span> <span class="o">=</span> <span class="kc">None</span>
264-
<span class="k">if</span> <span class="s1">&#39;slaney&#39;</span> <span class="ow">in</span> <span class="n">features_type</span><span class="p">:</span>
265-
<span class="n">htk</span> <span class="o">=</span> <span class="kc">False</span>
266-
<span class="n">norm</span> <span class="o">=</span> <span class="mi">1</span>
267-
<span class="n">features</span> <span class="o">=</span> <span class="n">librosa</span><span class="o">.</span><span class="n">feature</span><span class="o">.</span><span class="n">melspectrogram</span><span class="p">(</span>
268-
<span class="n">y</span><span class="o">=</span><span class="n">signal</span><span class="p">,</span>
269-
<span class="n">sr</span><span class="o">=</span><span class="n">fs</span><span class="p">,</span>
270-
<span class="n">n_fft</span><span class="o">=</span><span class="n">n_fft</span><span class="p">,</span>
271-
<span class="n">hop_length</span><span class="o">=</span><span class="n">hop_length</span><span class="p">,</span>
272-
<span class="n">n_mels</span><span class="o">=</span><span class="n">num_features</span><span class="p">,</span>
273-
<span class="n">power</span><span class="o">=</span><span class="n">mag_power</span><span class="p">,</span>
274-
<span class="n">htk</span><span class="o">=</span><span class="n">htk</span><span class="p">,</span>
275-
<span class="n">norm</span><span class="o">=</span><span class="n">norm</span>
276-
<span class="p">)</span>
277-
<span class="n">features</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">features</span><span class="p">,</span> <span class="n">a_min</span><span class="o">=</span><span class="n">data_min</span><span class="p">,</span> <span class="n">a_max</span><span class="o">=</span><span class="kc">None</span><span class="p">))</span><span class="o">.</span><span class="n">T</span>
275+
<span class="n">features</span> <span class="o">=</span> <span class="n">features</span><span class="p">[:,</span> <span class="p">:</span><span class="n">num_features_mag</span><span class="p">]</span>
276+
277+
<span class="k">if</span> <span class="s1">&#39;mel&#39;</span> <span class="ow">in</span> <span class="n">features_type</span> <span class="ow">or</span> <span class="n">features_type</span> <span class="o">==</span> <span class="s2">&quot;both&quot;</span><span class="p">:</span>
278+
<span class="k">if</span> <span class="n">features_type</span> <span class="o">==</span> <span class="s2">&quot;both&quot;</span><span class="p">:</span>
279+
<span class="n">mag_features</span> <span class="o">=</span> <span class="n">features</span>
280+
<span class="k">if</span> <span class="n">mel_basis</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
281+
<span class="n">htk</span> <span class="o">=</span> <span class="kc">True</span>
282+
<span class="n">norm</span> <span class="o">=</span> <span class="kc">None</span>
283+
<span class="k">if</span> <span class="s1">&#39;slaney&#39;</span> <span class="ow">in</span> <span class="n">features_type</span><span class="p">:</span>
284+
<span class="n">htk</span> <span class="o">=</span> <span class="kc">False</span>
285+
<span class="n">norm</span> <span class="o">=</span> <span class="mi">1</span>
286+
<span class="n">mel_basis</span> <span class="o">=</span> <span class="n">librosa</span><span class="o">.</span><span class="n">filters</span><span class="o">.</span><span class="n">mel</span><span class="p">(</span>
287+
<span class="n">sr</span><span class="o">=</span><span class="n">fs</span><span class="p">,</span>
288+
<span class="n">n_fft</span><span class="o">=</span><span class="n">n_fft</span><span class="p">,</span>
289+
<span class="n">n_mels</span><span class="o">=</span><span class="n">num_features_mel</span><span class="p">,</span>
290+
<span class="n">htk</span><span class="o">=</span><span class="n">htk</span><span class="p">,</span>
291+
<span class="n">norm</span><span class="o">=</span><span class="n">norm</span>
292+
<span class="p">)</span>
293+
<span class="n">features</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">mel_basis</span><span class="p">,</span> <span class="n">mag</span><span class="p">)</span>
294+
<span class="n">features</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">features</span><span class="p">,</span> <span class="n">a_min</span><span class="o">=</span><span class="n">data_min_mel</span><span class="p">,</span> <span class="n">a_max</span><span class="o">=</span><span class="kc">None</span><span class="p">))</span><span class="o">.</span><span class="n">T</span>
278295

279296
<span class="k">if</span> <span class="n">feature_normalize</span><span class="p">:</span>
280297
<span class="n">features</span> <span class="o">=</span> <span class="n">normalize</span><span class="p">(</span><span class="n">features</span><span class="p">,</span> <span class="n">mean</span><span class="p">,</span> <span class="n">std</span><span class="p">)</span>
281298

282-
<span class="k">return</span> <span class="n">features</span></div>
299+
<span class="k">if</span> <span class="n">features_type</span> <span class="o">==</span> <span class="s2">&quot;both&quot;</span><span class="p">:</span>
300+
<span class="k">return</span> <span class="p">[</span><span class="n">features</span><span class="p">,</span> <span class="n">mag_features</span><span class="p">]</span>
283301

302+
<span class="k">return</span> <span class="n">features</span></div>
284303

285304
<div class="viewcode-block" id="get_mel"><a class="viewcode-back" href="../../../api-docs/data.text2speech.html#data.text2speech.speech_utils.get_mel">[docs]</a><span class="k">def</span> <span class="nf">get_mel</span><span class="p">(</span>
286305
<span class="n">log_mag_spec</span><span class="p">,</span>

0 commit comments

Comments
 (0)