-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathspectrogram.py
More file actions
101 lines (80 loc) · 3.69 KB
/
spectrogram.py
File metadata and controls
101 lines (80 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import matplotlib.pyplot as plt
import matplotlib.ticker
import numpy as np
from progressbar import ProgressBar
import argparse
import warnings
import sys
from .wavwrapper import wavfile, monowrapper
from .windowing import overlapped_window
def main():
parser = argparse.ArgumentParser()
parser.add_argument("input_file")
parser.add_argument("-w", dest="window_size", default=1024, type=int, help='window size')
parser.add_argument("-s", dest="scale", default="log", help="scale (log|linear)")
args = parser.parse_args()
args.scale = args.scale.lower()
if not args.scale in ['log', 'linear']:
sys.stderr.write("error: '{:s}' is not a valid scale, choose 'log' or 'linear'.\n".format(args.scale))
return 1
# Open wave file and load frame rate, number of channels, sample width, and number of frames.
w = wavfile(args.input_file)
# Catch case where there are more than 2 channels.
if w.get_param('nchannels') > 2:
sys.stderr.write("error: only mono and stereo tracks are supported\n")
return 1
# Catch case where there is less than one window of audio.
if w.get_param('nframes') < args.window_size:
sys.stderr.write("error: audio file is shorter than configured window size\n")
return 1
# Hann window function coefficients.
hann = 0.5 - 0.5 * np.cos(2.0 * np.pi * (np.arange(args.window_size)) / args.window_size)
# Hann window must have 4x overlap for good results.
overlap = 4
# Y will hold the DFT of each window. We use acc and bar for displaying progress.
Y = []
acc = 0
bar = ProgressBar(max_value=w.get_param('nframes') * overlap)
# Process each window of audio.
for x in overlapped_window(monowrapper(w), args.window_size, overlap):
y = np.fft.rfft(x * hann)[:args.window_size//2]
Y.append(y)
acc += args.window_size
bar.update(acc)
# Inform progress bar that the computation is complete.
bar.finish()
# Normalize data and convert to dB.
Y = np.column_stack(Y)
Y = np.absolute(Y) * 2.0 / np.sum(hann)
Y = Y / np.power(2.0, (8 * w.get_param('sampwidth') - 1))
Y = (20.0 * np.log10(Y)).clip(-120)
# Time domain: We have Y.shape[1] windows, so convert to seconds by multiplying
# by window size, dividing by sample rate, and dividing by the overlap rate.
t = np.arange(0, Y.shape[1], dtype=np.float) * args.window_size / w.get_param('framerate') / overlap
# Frequency domain: There are window_size/2 frequencies represented, and we scale
# by dividing by window size and multiplying by sample frequency.
f = np.arange(0, args.window_size / 2, dtype=np.float) * w.get_param('framerate') / args.window_size
# Plot the spectrogram.
ax = plt.subplot(111)
plt.pcolormesh(t, f, Y, vmin=-120, vmax=0)
# Use log scale above 100 Hz, linear below.
if args.scale == 'log':
yscale = 0.25
# Mitigation for issue 2 (https://github.com/le1ca/spectrogram/issues/2)
if matplotlib.__version__[0:3] == '1.3':
yscale = 1
warnings.warn('You are using matplotlib 1.3.* (and not >= 1.4.0). Therefore linscaley must equal 1, not 0.25')
plt.yscale('symlog', linthresh=100, linscale=yscale)
ax.yaxis.set_major_formatter(matplotlib.ticker.ScalarFormatter())
# Set x/y limits by using the maximums from the time/frequency arrays.
plt.xlim(0, t[-1])
plt.ylim(0, f[-1])
# Set axis labels.
plt.xlabel("Time (s)")
plt.ylabel("Frequency (Hz)")
# Show legend and set label.
cbar = plt.colorbar()
cbar.set_label("Intensity (dB)")
# Display spectrogram.
plt.show()
return 0