speech_silence_demo.m: ms_per_s = 1000; % milliseconds per second % Read in the
ID: 3562312 • Letter: S
Question
speech_silence_demo.m:
ms_per_s = 1000; % milliseconds per second
% Read in the audio
[x, Fs] = wavread('talking.wav');
% x contains one column for each channel we recorded
% Let's assume we only have a single channel
if size(x, 2) > 1 % size(matrix, d) --> # elements along dim d
error('Single channel recordings only')
end
samples_N = size(x, 1);
% Create a time axis
t = (0:samples_N-1)/Fs;
figure('Name', 'time domain waveform');
plot(t, x);
xlabel 'Time (s)';
ylabel 'Amplitude (counts)';
% Determine intensity every N ms with no overlap
framelength_ms = 10;
intensity_dB = pcmToFramedIntensity(Fs, framelength_ms, framelength_ms, x);
% plot the intensity
figure('Name', 'Intensity')
framelength_s = framelength_ms / 1000; % framelength in seconds
t2 = (0:length(intensity_dB)-1)* framelength_s; % time axis for intensity
plot(t2, intensity_dB);
xlabel('Time (s)')
% relative as our microphone is not calibrated
ylabel('dB rel.')
figure('Name', 'Intensity Histogram')
hist(intensity_dB, 50) % hist(data, N) --> histrogram with N bins
intensityax = gca; % Save current axis for later
xlabel('dB rel.')
ylabel('Counts');
% Noise estimate
% taken from silent section at the beginning
silencestop_s = 1.3; % start to this time
silencestop_N = floor(silencestop_s / framelength_s);
silence_train = intensity_dB(1:silencestop_N);
figure('Name', 'Silence intensity estimate histogram')
hist(silence_train, 50);
silenceax = gca; % Save axis of silence distribution
xlabel('dB rel.')
ylabel('Counts');
% Make the noise intensity histogram have the same scale as the
% noise and speech intensity histogram
xrange = get(intensityax, 'XLim');
yrange = get(intensityax, 'YLim');
set(silenceax, 'XLim', xrange);
set(silenceax, 'YLim', yrange);
mu_noise = mean(silence_train);
var_noise = var(silence_train);
fprintf('Silence estimate: mean %f variance %f ', mu_noise, var_noise);
% A Normal "bell curve" probability density function (pdf)
% is defined by its mean and variance.
% Let's see how well it fits our noise histogram
noiselikelihoodax = add_normplot(silenceax, xrange, mu_noise, var_noise);
% --------------------------------------------------------------------
% Complete according to assignment instructions...
% Speech estimate
% taken fromt time 1.3 to 2.8 s
speechstart_s = 1.3; % start time
speechstop_s = 2.8; % stop time
addnormplot.m:
function pdfax = add_normplot(PeerAx, xrange, mu, sigma2)
% pdfax = add_normplot(PeerAx, xrange, mu, sigma2)
% Given an axis, an xrange [min, max] over which to plot
% it, add a second axis and plot a normal distribution over
% the xrange.
% mu and sigma2 are the means and variances which must be either
% scalars or row vectors. For row vectors, multiple distributions
% are plotted.
%
% Returns a handle to the new axis object
% We will get fancy and overlay a second axis on our first one
% This is somewhat complex Matlab code (not beginner stuff), but
% will produce a nice overlay of the distribution on our noise
% histogram.
% Find the parent of the peer axis so that we may put the new axis
% in the same container.
parent = get(PeerAx, 'Parent');
% Position a second axis on top of our current one.
% Make it transparent so that we can see what's underneath
% turn on the tick marks on the X axis and set up an alternative
% Y axis in another color on the right side.
pdfax = axes('Position', get(PeerAx, 'Position'), ... % put over
'Color', 'none', ... % don't fill it in
'YAxisLocation', 'right', ... % Y label/axis location
'XTick', [], ... % No labels on x axis
'YColor', 'b', ... % blue
'Parent', parent);
X = linspace(xrange(1), xrange(2), 100)'; % N points across xrange
% Preallocate fx assuming one column per pdf
sigma = sqrt(sigma2); % standard deviation
fx = zeros(length(X), length(mu));
for idx = 1:length(mu)
fx(:,idx) = normpdf(X, mu(idx), sigma(idx));
end
if length(mu) > 1
% Replicate Xs for each row so that we have one set of Xs for
% each one to plot.
X = repmat(X, [1, length(mu)]);
end
% hold is used to prevent an axis from being reset the next time
% that we plot something
hold(pdfax, 'on');
plot(pdfax, X, fx);
function dB = pcmToFramedIntensity(Fs, frameLength_ms, frameAdvance_ms, waveform)
% Frame the given signal waveform according to the length and advance
% specified in milliseconds (frameLength_ms and frameAdvance_ms).
% Compute the intesity of each frame in decibels and return the framed
% intensities as vector dB.
% Determine how many samples correspond to the requested duration.
frameLength_n = msTosamples(frameLength_ms, Fs);
frameAdvance_n = msTosamples(frameAdvance_ms, Fs);
samples_n = length(waveform);
% We can either compute the frames in a while loop and grow the results
% or compute the number of frames and use a for loop. The advantage of
% the latter approach is that one can preallocate the result vector
% to the needed length which will make it faster.
% We compute the number of *complete* frames.
% This can almost be done by dividing the number of samples by
% the frame advance, but this will identify the number of frame starts.
% the last frame(s) may not be complete.
% We can compute the number of complete frames by subtracting off one
% frame and adding one advance.
frames_n = floor((samples_n - frameLength_n + frameAdvance_n) ...
/ frameAdvance_n);
dB = zeros(frames_n, 1); % preallocate the matrix for speed
for fidx = 1:frames_n
% Compute beginning and end samples of frame
startidx = (fidx-1) * frameAdvance_n + 1;
endidx = startidx + frameLength_n - 1;
dB(fidx) = pcmToIntensity(waveform(startidx:endidx));
end
end
function dB = pcmToIntensity(pcm)
% dB = pcmToItensity(pcm)
% Return the intensity in decibels for the given signal pcm.
% Intensity is proportional to pressure squared.
% We square each pressure sample and take the mean
% (we could ahve used mean(pcm .^ 2)
% and multiply by 10 for decibels (10ths of bels)
dB = 10 * log10(sum(pcm .^ 2) / length(pcm));
end
function samples = msTosamples(time_ms, Fs)
% samples = msToSamples(time_ms, Fs)
% Given a time in milliseconds (time_ms) and a sample rate (Fs),
% return the number of samples corresponding to the specified
% time.
ms_per_s = 1000; % # milliseconds per second
time_s = time_ms / ms_per_s; % convert ms to s
% Note that we round here, but floor or ceil would also be okay.
samples = round(time_s * Fs); % Number of samples in specified duration
end
Explanation / Answer
NEW CODE BY REMOVING ERRORS
speech_silence_demo.m:
ms_per_s = 1000; % milliseconds per second
% Read in the audio
[x, Fs] = wavread('talking.wav');
% x contains one column for each channel we recorded
% Let's assume we only have a single channel
if size(x, 2) > 1 % size(matrix, d) --> # elements along dim d
error('Single channel recordings only')
end
samples_N = size(x, 1);
% Create a time axis
t = (0:samples_N-1)/Fs;
figure('Name', 'time domain waveform');
plot(t, x);
xlabel 'Time (s)';
ylabel 'Amplitude (counts)';
% Determine intensity every N ms with no overlap
framelength_ms = 10;
intensity_dB = pcmToFramedIntensity(Fs, framelength_ms, framelength_ms, x);
% plot the intensity
figure('Name', 'Intensity')
framelength_s = framelength_ms / 1000; % framelength in seconds
t2 = (0:length(intensity_dB)-1)* framelength_s; % time axis for intensity
plot(t2, intensity_dB);
xlabel('Time (s)')
% relative as our microphone is not calibrated
ylabel('dB rel.')
figure('Name', 'Intensity Histogram')
hist(intensity_dB, 50) % hist(data, N) --> histrogram with N bins
intensityax = gca; % Save current axis for later
xlabel('dB rel.')
ylabel('Counts');
% Noise estimate
% taken from silent section at the beginning
silencestop_s = 1.3; % start to this time
silencestop_N = floor(silencestop_s / framelength_s);
silence_train = intensity_dB(1:silencestop_N);
figure('Name', 'Silence intensity estimate histogram')
hist(silence_train, 50);
silenceax = gca; % Save axis of silence distribution
xlabel('dB rel.')
ylabel('Counts');
% Make the noise intensity histogram have the same scale as the
% noise and speech intensity histogram
xrange = get(intensityax, 'XLim');
yrange = get(intensityax, 'YLim');
set(silenceax, 'XLim', xrange);
set(silenceax, 'YLim', yrange);
mu_noise = mean(silence_train);
var_noise = var(silence_train);
fprintf('Silence estimate: mean %f variance %f ', mu_noise, var_noise);
% A Normal "bell curve" probability density function (pdf)
% is defined by its mean and variance.
% Let's see how well it fits our noise histogram
noiselikelihoodax = add_normplot(silenceax, xrange, mu_noise, var_noise);
% Speech estimate
% taken fromt time 1.3 to 2.8 s
speechstart_s = 1.3; % start time
speechstop_s = 2.8; % stop time
addnormplot.m:
function pdfax = add_normplot(PeerAx, xrange, mu, sigma2)
% pdfax = add_normplot(PeerAx, xrange, mu, sigma2)
% Given an axis, an xrange [min, max] over which to plot
% it, add a second axis and plot a normal distribution over
% the xrange.
% mu and sigma2 are the means and variances which must be either
% scalars or row vectors. For row vectors, multiple distributions
% are plotted.
%
% Returns a handle to the new axis object
% Find the parent of the peer axis so that we may put the new axis
% in the same container.
parent = get(PeerAx, 'Parent');
% Position a second axis on top of our current one.
% Make it transparent so that we can see what's underneath
% turn on the tick marks on the X axis and set up an alternative
% Y axis in another color on the right side.
pdfax = axes('Position', get(PeerAx, 'Position'), ... % put over
'Color', 'none', ... % don't fill it in
'YAxisLocation', 'right', ... % Y label/axis location
'XTick', [], ... % No labels on x axis
'YColor', 'b', ... % blue
'Parent', parent);
X = linspace(xrange(1), xrange(2), 100)'; % N points across xrange
% Preallocate fx assuming one column per pdf
sigma = sqrt(sigma2); % standard deviation
fx = zeros(length(X), length(mu));
for idx = 1:length(mu)
fx(:,idx) = normpdf(X, mu(idx), sigma(idx));
end
if length(mu) > 1
% Replicate Xs for each row so that we have one set of Xs for
% each one to plot.
X = repmat(X, [1, length(mu)]);
end
% hold is used to prevent an axis from being reset the next time
% that we plot something
hold(pdfax, 'on');
plot(pdfax, X, fx);
function dB = pcmToFramedIntensity(Fs, frameLength_ms, frameAdvance_ms, waveform)
% Frame the given signal waveform according to the length and advance
% specified in milliseconds (frameLength_ms and frameAdvance_ms).
% Compute the intesity of each frame in decibels and return the framed
% intensities as vector dB.
% Determine how many samples correspond to the requested duration.
frameLength_n = msTosamples(frameLength_ms, Fs);
frameAdvance_n = msTosamples(frameAdvance_ms, Fs);
samples_n = length(waveform);
frames_n = floor((samples_n - frameLength_n + frameAdvance_n) ...
/ frameAdvance_n);
dB = zeros(frames_n, 1); % preallocate the matrix for speed
for fidx = 1:frames_n
% Compute beginning and end samples of frame
startidx = (fidx-1) * frameAdvance_n + 1;
endidx = startidx + frameLength_n - 1;
dB(fidx) = pcmToIntensity(waveform(startidx:endidx));
end
end
function dB = pcmToIntensity(pcm)
% dB = pcmToItensity(pcm)
% Return the intensity in decibels for the given signal pcm.
% (we could ahve used mean(pcm .^ 2)
% and multiply by 10 for decibels (10ths of bels)
dB = 10 * log10(sum(pcm .^ 2) / length(pcm));
end
function samples = msTosamples(time_ms, Fs)
% samples = msToSamples(time_ms, Fs)
% Given a time in milliseconds (time_ms) and a sample rate (Fs),
% return the number of samples corresponding to the specified
% time.
ms_per_s = 1000; % # milliseconds per second
time_s = time_ms / ms_per_s; % convert ms to s
% Note that we round here, but floor or ceil would also be okay.
samples = round(time_s * Fs); % Number of samples in specified duration
end
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.