-
Notifications
You must be signed in to change notification settings - Fork 1
/
encode.asv
169 lines (158 loc) · 7.41 KB
/
encode.asv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
% Where we are reading our audio from.
read_path = 'snippet.flac';
% Where we are writing our encoded audio to.
write_path = 'snippet-output-1.flac';
% Number of overlapping that happens between each window.
window_overlap_count = 2;
% Range of octaves to analyse for a given frequency.
octave_analysis_range = 1;
% How soft something has to be, relative to the loudest nearby frequency,
% to be removed from the audio.
softer_factor_threshold = 10;
% The further apart an amplitude spike is from other frequencies, the less
% likely it is to drown out said frequency. This variable specifies how
% much 'softer' said frequency's amplitude has to be to be considered
% masked by the other frequency by this equation:
% Drowned out = softness * (1/(octave_distance_weighting * octave_difference + 1)) >
% softer_factor_threshold.
octave_distance_weighting = 4;
% How much of the audio we are analysing, in milliseconds, at any given
% amount of time.
window_time_width = 4;
% Read the audio.
[o_y, o_fs] = audioread(read_path);
audio_sample_len = size(o_y, 1);
n_y = zeros(size(o_y));
audio_info = audioinfo(read_path);
disp(audio_info);
encoding_results = ...
psychoacoustic_encoding(...
o_y, n_y, o_fs, window_overlap_count, window_time_width, ...
octave_analysis_range, softer_factor_threshold, ...
octave_distance_weighting...
);
[n_y, total_components, total_windows, total_components_removed
figure;
plot_fft(o_y, o_fs, audio_sample_len);
title(sprintf('Original %s - Amplitude spectrum', read_path));
figure;
plot_fft(n_y, o_fs, audio_sample_len);
title(sprintf('Encoded %s - Amplitude spectrum', write_path));
sound(n_y, o_fs);
audiowrite(write_path, n_y, o_fs, 'BitsPerSample', audio_info.BitsPerSample);
average_components_removed_per_window = total_components_removed / total_windows;
fprintf('Average components removed per window: %f\n', average_components_removed_per_window);
ratio_average_components_removed_vs_total_components = total_components_removed / total_components;
fprintf('Percent average components removed per window: %f%%\n', ratio_average_components_removed_vs_total_components * 100);
fprintf('Sum squared error: %f\n', sum(sum_squared_error(o_y, n_y)));
function result = psychoacoustic_encoding(o_y, n_y, o_fs, window_overlap_count, window_time_width, octave_analysis_range, softer_factor_threshold, octave_distance_weighting)
total_components_removed = 0;
total_windows = 0;
% Number of elements per window.
window_sample_width = round(o_fs * (window_time_width / 1000));
audio_sample_len = size(o_y, 1);
channel_len = size(o_y, 2);
total_components = 0;
% This loop analyses audio in intervals of <window_sample_width> samples
% and removes all frequencies that are considered to be masked by other
% frequencies.
w = 1;
while w <= audio_sample_len
end_window_index = min(w + (window_sample_width - 1), audio_sample_len);
for c = 1:channel_len
% Grab the fast fourier transform so that we can determine what
% frequencies are being overpowered by other frequencies in the
% sample.
fft_y = fft(o_y(w:end_window_index, c));
fft_y_len = size(fft_y, 1);
% Abs the FFT so we can look at soley the amplitude of the frequencies.
% The FFT, otherwise, would be covered in complex numbers.
fft_y_abs = abs(fft_y);
% Ignore the 0Hz
f = 2;
while f <= fft_y_len
amplitude = fft_y_abs(f);
frequency = fft_index_to_hz(f, o_fs, fft_y_len);
if amplitude ~= 0 && frequency ~= 0
other_f = max(2, floor(octave_to_fft_index(-octave_analysis_range, frequency, o_fs, fft_y_len)));
other_frequency = fft_index_to_hz(f, o_fs, fft_y_len);
other_f_end_index = min(fft_y_len, ceil(octave_to_fft_index(octave_analysis_range, frequency, o_fs, fft_y_len)));
% The maximum amount of 'softness' out of all the frequencies
% that we compared the frequency we're look at with.
max_softer_factor = 0;
while other_f <= other_f_end_index
if other_frequency ~= 0 && f ~= other_f
% How disimilar two wavelengths are to each other in.
octave_difference = abs(octaves(frequency, other_frequency));
other_amplitude = fft_y_abs(other_f);
% How soft the analysed frequency is to the other
% frequency.
softness = other_amplitude/amplitude;
% The weighted softness (we prefer not to remove
% the component if the other frequency is too
% distant).
relative_softness = softness * (1/(octave_distance_weighting * octave_difference + 1));
max_softer_factor = max(relative_softness, max_softer_factor);
end
other_f = other_f + 1;
end
if max_softer_factor >= softer_factor_threshold
fft_y(f) = 0;
total_components_removed = total_components_removed + 1;
end
end
f = f + 1;
end
% disp(total_windows);
ifft_y = ifft(fft_y, 'symmetric');
n_y(w:end_window_index, c) = ifft_y;
% Add 1 window per channel
total_windows = total_windows + 1;
total_components = total_components + fft_y_len;
end
if total_windows == 100
figure;
hold on;
plot_fft(o_y(w:end_window_index, c), o_fs, fft_y_len);
plot_fft_2(fft_y, o_fs, fft_y_len);
hold off;
legend(sprintf('Original %s', read_path), sprintf('Encoded %s', write_path));
title(sprintf('Sample window (%d-%d, channel %d) - Amplitude spectrum', w, end_window_index, c));
end
% Ideally we would slide over the samples but that would take too long,
% instead, increase the window starting position by half of the window
% width.
w = w + ceil(window_sample_width / window_overlap_count);
end
result = [n_y, total_components, total_windows, total_components_removed];
end
function result = sum_squared_error(original, new)
result = sum((original - new).^2);
end
function hz = fft_index_to_hz(index, fs, fft_len)
hz = index * hz_per_fft_index(fs, fft_len);
end
function index = octave_to_fft_index(octaves, original_hz, fs, fft_len)
index = octave_frequency(octaves, original_hz) / hz_per_fft_index(fs, fft_len);
end
function hz = hz_per_fft_index(fs, fft_len)
hz = fs/(fft_len + 1);
end
function f = octave_frequency(octaves, hz)
f = (2^octaves) * hz;
end
function o = octaves(frequency_1, frequency_2)
o = log2(frequency_1 / frequency_2);
end
function plot_fft_2(fft_y, sample_rate, total_samples)
P2 = abs(fft_y/total_samples);
P1 = P2(1:floor(total_samples/2)+1);
P1(2:end-1) = 2*P1(2:end-1);
Q = sample_rate*(0:(total_samples/2))/total_samples;
plot(Q,P1);
xlabel('Frequency (Hz)');
ylabel('Amplitude');
end
function plot_fft(samples, sample_rate, total_samples)
plot_fft_2(fft(samples), sample_rate, total_samples);
end