-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstipa.m
266 lines (209 loc) · 9.62 KB
/
stipa.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
function [STI, mk] = stipa(signal, fs, varargin)
%STIPA computes the Speech Transmission Index according to IEC 60268-16:2020
%standard using the direct STIPA (Speech Transmission Index for Public
%Addressing Systems) method.
%
% [STI, MK] = STIPA(SIGNAL, FS) computes the Speech Transmission Index
% from the test signal SIGNAL and its sampling frequency FS in Hz,
% returning the Speech Transmission index STI and a 2-by-7 matrix of the
% respective modulation transfer values MK for each octave band and
% modulation frequency.
%
% [STI, MK] = STIPA(SIGNAL, FS, REFERENCE) computes the Speech
% Transmission Index using the REFERENCE signal instead of the default
% value 0.55, which results from the STIPA test signal. If the sampling
% frequency of the REFERENCE is not specified, it is assumed that the
% sampling frequency is the same as the sampling frequency of the test
% SIGNAL FS.
%
% [STI, MK] = STIPA(SIGNAL, FS, REFERENCE, FSREF) computes the Speech
% Transmission Index using the REFERENCE signal and its sampling
% frequency FSREF.
%
% [STI, MK] = STIPA(SIGNAL, FS, 'Lsk', LSK) computes the Speech
% Transmission Index with adjustment of the MTF for auditory masking and
% threshold effects.
%
% [STI, MK] = STIPA(SIGNAL, FS, 'Lsk', LSK, 'Lnk', LNK) computes the
% Speech Transmission Index with adjustments of the MTF for ambient
% noise, and auditory masking and threshold effects.
%
% Copyright Pavel Záviška, Brno University of Technology, 2023-2024
% Check number of input arguments
narginchk(2, 8);
% Parse input arguments:
p = inputParser;
validScalarPosNum = @(x) isnumeric(x) && isscalar(x) && (x > 0);
validColumnVector = @(x) isnumeric(x) && iscolumn(x);
valid7PosVector = @(x) isnumeric(x) && length(x) == 7 && all(x > 0);
addRequired(p, 'signal', validColumnVector);
addRequired(p, 'fs', validScalarPosNum);
addOptional(p, 'reference', NaN, validColumnVector);
addOptional(p, 'fsRef', fs, validScalarPosNum);
addParameter(p, 'Lsk', NaN, valid7PosVector);
addParameter(p, 'Lnk', NaN, valid7PosVector);
parse(p, signal, fs, varargin{:});
% Parse vectors of input levels Lsk and Lnk
Lsk = p.Results.Lsk(:).';
Lnk = p.Results.Lnk(:).';
adjustAmbientNoiseFlag = false;
adjustAuditoryMaskingFlag = false;
if any(~isnan(Lsk)) && any(~isnan(Lnk)) % Both Lsk and Lnk given
adjustAmbientNoiseFlag = true;
adjustAuditoryMaskingFlag = true;
Isk = 10 .^ (Lsk / 10);
Ink = 10 .^ (Lnk / 10);
elseif any(~isnan(Lsk)) && any(isnan(Lnk)) % Only Lsk given
adjustAuditoryMaskingFlag = true;
Isk = 10 .^ (Lsk / 10);
Ink = zeros(1, 7);
elseif any(isnan(Lsk)) && any(~isnan(Lnk)) % Only Lnk given
warning("Ambient noise levels alone (Lnk), without signal levels" + ...
" (Lsk), are insufficient for calculating the ambient noise" + ...
" adjustment. Therefore, the adjustment step will be omitted.")
end
% Band-filter the input signal and cut the first 200 ms to suppress the
% transient effects of the used IIR octave filters
signalFiltered = bandFiltering(signal, fs);
signalFiltered = signalFiltered(0.2 * fs : end, :);
% Detect the envelope
signalEnvelope = envelopeDetection(signalFiltered, fs);
% Compute modulation depths of the input signal
mk_o = MTF(signalEnvelope, fs);
% Compute modulation depths of the reference signal if it was passed to the STIPA function
if ~isnan(p.Results.reference)
reference = p.Results.reference;
fsRef = p.Results.fsRef;
referenceFiltered = bandFiltering(reference, fsRef);
referenceEnvelope = envelopeDetection(referenceFiltered, fsRef);
mk_i = MTF(referenceEnvelope, fsRef);
mk = mk_o ./ mk_i;
else % Use the default modulation depth 0.55
mk = mk_o ./ 0.55;
end
% Check for any value in 'mk' exceeding the threshold of 1.3
if any(mk(:) > 1.3)
warning(['One or more m-values are higher than 1.3, which is very ' ...
'unlikely and suggests non-sinusoidal fluctuations or impulsive' ...
' noises. This indicates an invalid measurement!'])
end
% Adjust mk values to ambient noise
if adjustAmbientNoiseFlag == true
mk = adjustAmbientNoise(mk, Isk, Ink);
end
% Adjust mk values to auditory masking and threshold effects
if adjustAuditoryMaskingFlag == true
mk = adjustAuditoryMasking(mk, Lsk, Isk, Ink);
end
% Limit the value of modulation transfer values mk to avoid complex values in SNR
% (Note that, in contrast to the paper, the limitation is applied after the adjustment steps)
mk(mk > 1) = 1;
% Calculate SNR from the modulation transfer values and limit the range to [-15; 15] dB
SNR = computeSNR(mk);
SNR = clipSNR(SNR);
% Calculate Trasmission Index
TI = computeTI(SNR);
% Calculate Modulation Transmission Index
MTI = computeMTI(TI);
% Calculate the final value of Speech Transmission index
STI = computeSTI(MTI);
function y = bandFiltering(x, fs)
% Filter input signal using a octave filter of 18th order to achieve a
% minimum of 42 dB attenuation at the center frequency of each adjacent
% band.
filterOrder = 18;
octaveBands = [125, 250, 500, 1000, 2000, 4000, 8000];
y = NaN(length(x), length(octaveBands));
for bandIdx = 1:length(octaveBands)
octFilt = octaveFilter(octaveBands(bandIdx), '1 octave', ...
'SampleRate', fs, 'FilterOrder', filterOrder);
y(:, bandIdx) = octFilt(x);
end
end
function envelope = envelopeDetection(x, fs)
% Compute the intensity envelope by squaring the outputs of the
% bandpass filters and applying a low pass filter at a cut-off
% frequency of 100 Hz.
envelope = x .* x;
envelope = lowpass(envelope, 100, fs);
end
function mk = MTF(signalEnvelope, fs)
% Compute the modulation depths of the received signal's envelope for
% each octave band k.
fm = [1.6, 1, 0.63, 2, 1.25, 0.8, 2.5; ... % modulation frequencies in Hz
8, 5, 3.15, 10, 6.25, 4, 12.5];
seconds = length(signalEnvelope) / fs; % duration of the signal in seconds
mk = NaN(2, 7);
for k = 1:7 % iterate over octave bands
Ik = signalEnvelope(:, k); % signal envelope of k-th octave band
for n = 1:2 % iterate over each modulation frequency in k-th octave band
% Calculate the duration and index of the signal for a whole number of periods
secondsWholePeriod = floor(fm(n, k) * seconds) / fm(n, k);
indexWholePeriod = round(secondsWholePeriod * fs);
t = linspace(0, secondsWholePeriod, indexWholePeriod).';
% Calculate the modulation depths using a whole number of
% periods for each specific modulation frequency
mk(n, k) = 2 * sqrt(sum(Ik(1:indexWholePeriod) .* sin(2 * pi * fm(n, k) * t)) ^ 2 + ...
sum(Ik(1:indexWholePeriod) .* cos(2 * pi * fm(n, k) * t)) ^ 2) / sum(Ik(1:indexWholePeriod));
end
end
end
function mk_ = adjustAmbientNoise(mk, Isk, Ink)
% Adjust the m-values for ambient noise
mk_ = mk .* (Isk ./ (Isk + Ink));
end
function mk_ = adjustAuditoryMasking(mk, Lsk, Isk, Ink)
% Adjust the m-values for auditory masking and threshold effects
Ik = Isk + Ink; % total acoustic intensity
% Auditory masking as a function of the octave band level
La = NaN(1,6);
for k = 1:6
L = Lsk(k);
if L < 63
La(k) = 0.5 * L - 65;
elseif (L >= 63) && (L < 67)
La(k) = 1.8 * L - 146.9;
elseif (L >= 67) && (L < 100)
La(k) = 0.5 * L - 59.8;
else
La(k) = -10;
end
end
a = 10 .^ (La / 10);
% Total acoustic intensity for the level-dependent auditory masking effect
Iamk = [0, Isk(1:end-1) .* a];
% Absolute speech reception threshold
Ak = [46, 27, 12, 6.5, 7.5, 8, 12];
% Acoustic intensity level of the reception threshold
Irtk = 10 .^ (Ak / 10);
mk_ = mk .* (Ik ./ (Ik + Iamk + Irtk));
end
function SNR = computeSNR(mk)
% Compute the Signal-to-Noise Ratio (SNR) from the MTF matrix
% consisting of modulation ratios mk.
SNR = 10 * log10(mk ./ (1 - mk));
end
function SNR_clipped = clipSNR(SNR)
% Limit the SNR values to fit the range from -15 to 15 dB.
SNR_clipped = SNR;
SNR_clipped(SNR_clipped > 15) = 15;
SNR_clipped(SNR_clipped < -15) = -15;
end
function TI = computeTI(SNR)
% Compute the Transmission index from the SNR.
TI = (SNR + 15) / 30;
end
function MTI = computeMTI(TI)
% Compute the Modulation Transfer index (MTI) from the Transmission index TI.
MTI = mean(TI);
end
function STI = computeSTI(MTI)
% Compute the final Speech Transmission Index (STI) from the Modulation
% transfer indices MTI.
% weighting factors for male speech according to the standard
alpha_k = [0.085, 0.127, 0.230, 0.233, 0.309, 0.224, 0.173];
% redundancy factors for male speech according to the standard
beta_k = [0.085, 0.078, 0.065, 0.011, 0.047, 0.095];
STI = min(sum(alpha_k .* MTI) - sum(beta_k .* sqrt(MTI(1:end - 1) .* MTI(2:end))), 1);
end
end