-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNMF_multilayer.py
207 lines (150 loc) · 5.37 KB
/
NMF_multilayer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from os import listdir
from utils.audio.audio_utils import *
import matplotlib.pyplot as plt
def create_mel_spectrogram(wav_file):
# wave, samplerate = read_wav(wav_file, resample=8000)
wave, samplerate = read_wav(wav_file)
melspec = mel_spectrogram(wave, samplerate, n_fft=1024)
return normalize(np.abs(melspec))
def generate_random_2D_matrix(n, m):
"""Generate random matrix (n x m).
:n: n dimension (vertical)
:m: m dimension (horizontal)
:returns: random 2D matrix
"""
return np.random.random([n, m])
def left_shifted(matrix, t):
aux = np.zeros(matrix.shape)
if t == 0:
aux = matrix
else:
aux[:,0:-t] = matrix[:,t:]
return aux
#@print_matrix_shapes
def calc_predicted_matrix(V, Wperc, Hperc, Wharm):
"""Calculate predicted matrix.
:V: input matrix
:Wperc: W percussive (dictionary)
:Hperc: H percussive
:Wharm: W harmonic
:returns: updated V_p
"""
Tau = Wperc.shape[0]
# print("Wharm T: ", Wharm.T)
# print("V:", V)
aux = np.matmul(Wharm.T, V)
# print("aux: ", aux)
harmonic_part = np.matmul(Wharm, aux)
# print("harm part: ", harmonic_part)
# V_p = np.zeros(harmonic_part.shape)
V_p = np.zeros(V.shape)
for t in range(Tau):
V_p += np.matmul(Wperc[t], left_shifted(Hperc, t))
V_p += harmonic_part
print("V predicted: ", V_p)
# print("V: ", V)
return V_p
#@print_matrix_shapes
def update_Wharm_Hperc(V, Wperc, Hperc, Wharm):
"""Update W harmonic and H percussive.
:V: input matrix
:Wperc: W percussive (dictionary)
:Hperc: H percussive
:Wharm: W harmonic
:returns: updated tuple of Wharm, Hperc
"""
def _calc_Z(V, V_p_inv):
"""Calculate Z."""
return V * V_p_inv
def _calc_phi(V_p_inv):
"""Calculate phi."""
I = np.ones(V_p_inv.shape)
return I * V_p_inv
Tau = Wperc.shape[0]
V_p = calc_predicted_matrix(V, Wperc, Hperc, Wharm)
V_p_inv = np.power(V_p, -1)
Z = _calc_Z(V, V_p_inv)
phi = _calc_phi(V_p_inv)
lhs = np.matmul(V.T, Wharm)
lhs = np.matmul(Z, lhs)
rhs = np.matmul(Z.T, Wharm)
rhs = np.matmul(V, rhs)
W_numerator = lhs + rhs
lhs = np.matmul(V.T, Wharm)
lhs = np.matmul(phi, lhs)
rhs = np.matmul(phi.T, Wharm)
rhs = np.matmul(V, rhs)
W_denominator = lhs + rhs
####---------------------------------------####
H_numerator_sum = np.zeros(Hperc.shape)
H_denominator_sum = np.zeros(Hperc.shape)
p = Hperc.shape[1]
n = Wperc[0].shape[0]
# print("p y n: ", p, n)
M_of_ones = np.ones((n, p)) # has to match transposed Wperc
# print("M of ones:", M_of_ones.shape)
for t in range(Tau):
aux = left_shifted((V * V_p_inv), t)
H_numerator_sum += np.matmul(Wperc[t].T, aux)
H_denominator_sum += np.matmul(Wperc[t].T, left_shifted(M_of_ones, t))
Wharm = Wharm * (W_numerator / W_denominator)
# print("Wharm: ", Wharm)
Hperc = Hperc * (H_numerator_sum / H_denominator_sum)
# print("Hperc: ", Hperc)
return Wharm, Hperc
def get_files(directory):
"""
Create dictionary of files in audio directory with file name as key and file
path as value.
"""
dir_ls = listdir(directory)
keys = [ name.split('.')[0] for name in dir_ls ]
files = list(map(lambda f: './audio/' + f, dir_ls))
return {k: v for (k, v) in zip(keys, files)}
def generate_sample_dict(samples):
"""
Generate drum dictionary of samples.
:samples: list of spectrograms for individual drum samples
:returns: drum dictionary Wperc, 3D matrix (t x n x m)
"""
m_length = len(samples) # defined by number of samples
n_length = samples[0].shape[0] # defined by height of any sample spectrogram
t_length = samples[0].shape[1] # defined by width of any sample spectrogram
target = np.zeros((t_length, n_length, m_length))
for index, sample in enumerate(samples):
for row in range(t_length):
for col in range(n_length):
target[row][col][index] = sample[col][row]
return target
def plot_matrix(m):
plt.matshow(m)
plt.show()
###############################################################################
d = get_files('./audio') # create file dict
# generate spectrogram V for all samples combined
# order: bd - hh - sd
V = create_mel_spectrogram(d['all']) # create spectrogram for all sounds
# Create spectrograms for all individual samples and add them to sample list
sample_list = []
sd_spec = create_mel_spectrogram(d['sd'])
sample_list.append(sd_spec)
bd_spec = create_mel_spectrogram(d['bd'])
sample_list.append(sd_spec)
hh_spec = create_mel_spectrogram(d['hh'])
sample_list.append(sd_spec)
# Generate Wperc (sample dictionary)
Wperc = generate_sample_dict([bd_spec, hh_spec, sd_spec])
# Generate inicial random matrices Hperc, Wharm
n, p = V.shape
Hperc = generate_random_2D_matrix(len(sample_list), p) # create H matrix
# print(Hperc)
Wharm = generate_random_2D_matrix(n, len(sample_list)) # create W matrix
# print(Wharm)
for i in range(100):
Wharm, Hperc = update_Wharm_Hperc(V, Wperc, Hperc, Wharm)
# plot Wharm x Hperc = V_p
plot_matrix(np.matmul(Wharm, Hperc))
# plot original #V
plot_matrix(V)