Skip to content

Commit a6ffb97

Browse files
committed
Port tuning estimation off libfmp
1 parent c02090a commit a6ffb97

2 files changed

Lines changed: 72 additions & 7 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ dependencies = [
3232
"pandas >= 2.3.0, < 3.0.0",
3333
"pretty_midi >= 0.2.11, < 1.0.0",
3434
"soundfile >= 0.13.0, < 1.0.0",
35-
"scipy >= 1.15.0, < 2.0.0",
36-
"libfmp >= 1.3.0, < 2.0.0"
35+
"scipy >= 1.15.0, < 2.0.0"
3736
]
3837

3938
[project.optional-dependencies]

synctoolbox/feature/utils.py

Lines changed: 71 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
from libfmp.c3 import compute_freq_distribution, tuning_similarity
1+
import librosa
22
import numpy as np
33
from scipy import signal
4+
from scipy.interpolate import interp1d
45
from typing import Tuple
56

67

@@ -97,8 +98,7 @@ def estimate_tuning(x: np.ndarray,
9798
local: bool = True,
9899
filt: bool = True,
99100
filt_len: int = 101) -> int:
100-
"""Compute tuning deviation in cents for an audio signal. Convenience wrapper around
101-
'compute_freq_distribution' and 'tuning_similarity' from libfmp.
101+
"""Compute tuning deviation in cents for an audio signal.
102102
103103
Parameters
104104
----------
@@ -129,11 +129,77 @@ def estimate_tuning(x: np.ndarray,
129129
Estimated tuning deviation for ``x`` (in cents)
130130
"""
131131
# TODO supply N in seconds and compute window size in frames via Fs
132-
v, _ = compute_freq_distribution(x, Fs, N, gamma, local, filt, filt_len)
133-
_, _, _, tuning, _ = tuning_similarity(v)
132+
v, _ = __compute_freq_distribution(x, Fs, N, gamma, local, filt, filt_len)
133+
_, _, _, tuning, _ = __tuning_similarity(v)
134134
return tuning
135135

136136

137+
def __compute_freq_distribution(x, Fs, N=16384, gamma=100.0, local=True, filt=True, filt_len=101):
138+
"""Compute an overall frequency distribution for tuning estimation."""
139+
if local:
140+
if N > len(x) // 2:
141+
raise Exception('The signal length (%d) should be twice as long as the window length (%d)' % (len(x), N))
142+
Y = np.abs(librosa.stft(x, n_fft=N, hop_length=N // 2, win_length=N,
143+
window='hann', pad_mode='constant', center=True)) ** 2
144+
if gamma > 0:
145+
Y = np.log(1 + gamma * Y)
146+
Y = np.sum(Y, axis=1)
147+
F_coef = librosa.fft_frequencies(sr=Fs, n_fft=N)
148+
else:
149+
N = len(x)
150+
Y = np.abs(np.fft.fft(x)) / Fs
151+
Y = Y[:N // 2 + 1]
152+
Y = np.log(1 + gamma * Y)
153+
F_coef = np.arange(N // 2 + 1).astype(float) * Fs / N
154+
155+
f_pitch = lambda p: 440 * 2 ** ((p - 69) / 12)
156+
F_min = f_pitch(24)
157+
F_max = f_pitch(108)
158+
F_coef_log, F_coef_cents = __compute_f_coef_log(R=1, F_min=F_min, F_max=F_max)
159+
Y_int = interp1d(F_coef, Y, kind='cubic', fill_value='extrapolate')(F_coef_log)
160+
v = Y_int / np.max(Y_int)
161+
162+
if filt:
163+
filt_kernel = np.ones(filt_len)
164+
Y_smooth = signal.convolve(Y_int, filt_kernel, mode='same') / filt_len
165+
Y_rectified = Y_int - Y_smooth
166+
Y_rectified[Y_rectified < 0] = 0
167+
v = Y_rectified / np.max(Y_rectified)
168+
169+
return v, F_coef_cents
170+
171+
172+
def __compute_f_coef_log(R, F_min, F_max):
173+
n_bins = np.ceil(1200 * np.log2(F_max / F_min) / R).astype(int)
174+
F_coef_log = 2 ** (np.arange(0, n_bins) * R / 1200) * F_min
175+
F_coef_cents = 1200 * np.log2(F_coef_log / F_min)
176+
return F_coef_log, F_coef_cents
177+
178+
179+
def __tuning_similarity(v):
180+
theta_axis = np.arange(-50, 50)
181+
num_theta = len(theta_axis)
182+
sim = np.zeros(num_theta)
183+
M = len(v)
184+
for i in range(num_theta):
185+
theta = theta_axis[i]
186+
template = __template_comb(M=M, theta=theta)
187+
sim[i] = np.inner(template, v)
188+
sim = sim / np.max(sim)
189+
ind_max = np.argmax(sim)
190+
theta_max = theta_axis[ind_max]
191+
template_max = __template_comb(M=M, theta=theta_max)
192+
return theta_axis, sim, ind_max, theta_max, template_max
193+
194+
195+
def __template_comb(M, theta=0):
196+
template = np.zeros(M)
197+
peak_positions = (np.arange(0, M, 100) + theta)
198+
peak_positions = np.intersect1d(peak_positions, np.arange(M)).astype(int)
199+
template[peak_positions] = 1
200+
return template
201+
202+
137203
def shift_chroma_vectors(chroma: np.ndarray,
138204
chroma_shift: int) -> np.ndarray:
139205
"""Shift chroma representation by the given number of semitones.

0 commit comments

Comments
 (0)