|
1 | | -from libfmp.c3 import compute_freq_distribution, tuning_similarity |
| 1 | +import librosa |
2 | 2 | import numpy as np |
3 | 3 | from scipy import signal |
| 4 | +from scipy.interpolate import interp1d |
4 | 5 | from typing import Tuple |
5 | 6 |
|
6 | 7 |
|
@@ -97,8 +98,7 @@ def estimate_tuning(x: np.ndarray, |
97 | 98 | local: bool = True, |
98 | 99 | filt: bool = True, |
99 | 100 | filt_len: int = 101) -> int: |
100 | | - """Compute tuning deviation in cents for an audio signal. Convenience wrapper around |
101 | | - 'compute_freq_distribution' and 'tuning_similarity' from libfmp. |
| 101 | + """Compute tuning deviation in cents for an audio signal. |
102 | 102 |
|
103 | 103 | Parameters |
104 | 104 | ---------- |
@@ -129,11 +129,77 @@ def estimate_tuning(x: np.ndarray, |
129 | 129 | Estimated tuning deviation for ``x`` (in cents) |
130 | 130 | """ |
131 | 131 | # TODO supply N in seconds and compute window size in frames via Fs |
132 | | - v, _ = compute_freq_distribution(x, Fs, N, gamma, local, filt, filt_len) |
133 | | - _, _, _, tuning, _ = tuning_similarity(v) |
| 132 | + v, _ = __compute_freq_distribution(x, Fs, N, gamma, local, filt, filt_len) |
| 133 | + _, _, _, tuning, _ = __tuning_similarity(v) |
134 | 134 | return tuning |
135 | 135 |
|
136 | 136 |
|
| 137 | +def __compute_freq_distribution(x, Fs, N=16384, gamma=100.0, local=True, filt=True, filt_len=101): |
| 138 | + """Compute an overall frequency distribution for tuning estimation.""" |
| 139 | + if local: |
| 140 | + if N > len(x) // 2: |
| 141 | + raise Exception('The signal length (%d) should be twice as long as the window length (%d)' % (len(x), N)) |
| 142 | + Y = np.abs(librosa.stft(x, n_fft=N, hop_length=N // 2, win_length=N, |
| 143 | + window='hann', pad_mode='constant', center=True)) ** 2 |
| 144 | + if gamma > 0: |
| 145 | + Y = np.log(1 + gamma * Y) |
| 146 | + Y = np.sum(Y, axis=1) |
| 147 | + F_coef = librosa.fft_frequencies(sr=Fs, n_fft=N) |
| 148 | + else: |
| 149 | + N = len(x) |
| 150 | + Y = np.abs(np.fft.fft(x)) / Fs |
| 151 | + Y = Y[:N // 2 + 1] |
| 152 | + Y = np.log(1 + gamma * Y) |
| 153 | + F_coef = np.arange(N // 2 + 1).astype(float) * Fs / N |
| 154 | + |
| 155 | + f_pitch = lambda p: 440 * 2 ** ((p - 69) / 12) |
| 156 | + F_min = f_pitch(24) |
| 157 | + F_max = f_pitch(108) |
| 158 | + F_coef_log, F_coef_cents = __compute_f_coef_log(R=1, F_min=F_min, F_max=F_max) |
| 159 | + Y_int = interp1d(F_coef, Y, kind='cubic', fill_value='extrapolate')(F_coef_log) |
| 160 | + v = Y_int / np.max(Y_int) |
| 161 | + |
| 162 | + if filt: |
| 163 | + filt_kernel = np.ones(filt_len) |
| 164 | + Y_smooth = signal.convolve(Y_int, filt_kernel, mode='same') / filt_len |
| 165 | + Y_rectified = Y_int - Y_smooth |
| 166 | + Y_rectified[Y_rectified < 0] = 0 |
| 167 | + v = Y_rectified / np.max(Y_rectified) |
| 168 | + |
| 169 | + return v, F_coef_cents |
| 170 | + |
| 171 | + |
| 172 | +def __compute_f_coef_log(R, F_min, F_max): |
| 173 | + n_bins = np.ceil(1200 * np.log2(F_max / F_min) / R).astype(int) |
| 174 | + F_coef_log = 2 ** (np.arange(0, n_bins) * R / 1200) * F_min |
| 175 | + F_coef_cents = 1200 * np.log2(F_coef_log / F_min) |
| 176 | + return F_coef_log, F_coef_cents |
| 177 | + |
| 178 | + |
| 179 | +def __tuning_similarity(v): |
| 180 | + theta_axis = np.arange(-50, 50) |
| 181 | + num_theta = len(theta_axis) |
| 182 | + sim = np.zeros(num_theta) |
| 183 | + M = len(v) |
| 184 | + for i in range(num_theta): |
| 185 | + theta = theta_axis[i] |
| 186 | + template = __template_comb(M=M, theta=theta) |
| 187 | + sim[i] = np.inner(template, v) |
| 188 | + sim = sim / np.max(sim) |
| 189 | + ind_max = np.argmax(sim) |
| 190 | + theta_max = theta_axis[ind_max] |
| 191 | + template_max = __template_comb(M=M, theta=theta_max) |
| 192 | + return theta_axis, sim, ind_max, theta_max, template_max |
| 193 | + |
| 194 | + |
| 195 | +def __template_comb(M, theta=0): |
| 196 | + template = np.zeros(M) |
| 197 | + peak_positions = (np.arange(0, M, 100) + theta) |
| 198 | + peak_positions = np.intersect1d(peak_positions, np.arange(M)).astype(int) |
| 199 | + template[peak_positions] = 1 |
| 200 | + return template |
| 201 | + |
| 202 | + |
137 | 203 | def shift_chroma_vectors(chroma: np.ndarray, |
138 | 204 | chroma_shift: int) -> np.ndarray: |
139 | 205 | """Shift chroma representation by the given number of semitones. |
|
0 commit comments