speech_silence_demo.m: ms_per_s = 1000; % milliseconds per second % Read in the
ID: 3562311 • Letter: S
Question
speech_silence_demo.m:
ms_per_s = 1000; % milliseconds per second
% Read in the audio
[x, Fs] = wavread('talking.wav');
% x contains one column for each channel we recorded
% Let's assume we only have a single channel
if size(x, 2) > 1 % size(matrix, d) --> # elements along dim d
error('Single channel recordings only')
end
samples_N = size(x, 1);
% Create a time axis
t = (0:samples_N-1)/Fs;
figure('Name', 'time domain waveform');
plot(t, x);
xlabel 'Time (s)';
ylabel 'Amplitude (counts)';
% Determine intensity every N ms with no overlap
framelength_ms = 10;
intensity_dB = pcmToFramedIntensity(Fs, framelength_ms, framelength_ms, x);
% plot the intensity
figure('Name', 'Intensity')
framelength_s = framelength_ms / 1000; % framelength in seconds
t2 = (0:length(intensity_dB)-1)* framelength_s; % time axis for intensity
plot(t2, intensity_dB);
xlabel('Time (s)')
% relative as our microphone is not calibrated
ylabel('dB rel.')
figure('Name', 'Intensity Histogram')
hist(intensity_dB, 50) % hist(data, N) --> histrogram with N bins
intensityax = gca; % Save current axis for later
xlabel('dB rel.')
ylabel('Counts');
% Noise estimate
% taken from silent section at the beginning
silencestop_s = 1.3; % start to this time
silencestop_N = floor(silencestop_s / framelength_s);
silence_train = intensity_dB(1:silencestop_N);
figure('Name', 'Silence intensity estimate histogram')
hist(silence_train, 50);
silenceax = gca; % Save axis of silence distribution
xlabel('dB rel.')
ylabel('Counts');
% Make the noise intensity histogram have the same scale as the
% noise and speech intensity histogram
xrange = get(intensityax, 'XLim');
yrange = get(intensityax, 'YLim');
set(silenceax, 'XLim', xrange);
set(silenceax, 'YLim', yrange);
mu_noise = mean(silence_train);
var_noise = var(silence_train);
fprintf('Silence estimate: mean %f variance %f ', mu_noise, var_noise);
% A Normal "bell curve" probability density function (pdf)
% is defined by its mean and variance.
% Let's see how well it fits our noise histogram
noiselikelihoodax = add_normplot(silenceax, xrange, mu_noise, var_noise);
% --------------------------------------------------------------------
% Complete according to assignment instructions...
% Speech estimate
% taken fromt time 1.3 to 2.8 s
speechstart_s = 1.3; % start time
speechstop_s = 2.8; % stop time
addnormplot.m:
function pdfax = add_normplot(PeerAx, xrange, mu, sigma2)
% pdfax = add_normplot(PeerAx, xrange, mu, sigma2)
% Given an axis, an xrange [min, max] over which to plot
% it, add a second axis and plot a normal distribution over
% the xrange.
% mu and sigma2 are the means and variances which must be either
% scalars or row vectors. For row vectors, multiple distributions
% are plotted.
%
% Returns a handle to the new axis object
% We will get fancy and overlay a second axis on our first one
% This is somewhat complex Matlab code (not beginner stuff), but
% will produce a nice overlay of the distribution on our noise
% histogram.
% Find the parent of the peer axis so that we may put the new axis
% in the same container.
parent = get(PeerAx, 'Parent');
% Position a second axis on top of our current one.
% Make it transparent so that we can see what's underneath
% turn on the tick marks on the X axis and set up an alternative
% Y axis in another color on the right side.
pdfax = axes('Position', get(PeerAx, 'Position'), ... % put over
'Color', 'none', ... % don't fill it in
'YAxisLocation', 'right', ... % Y label/axis location
'XTick', [], ... % No labels on x axis
'YColor', 'b', ... % blue
'Parent', parent);
X = linspace(xrange(1), xrange(2), 100)'; % N points across xrange
% Preallocate fx assuming one column per pdf
sigma = sqrt(sigma2); % standard deviation
fx = zeros(length(X), length(mu));
for idx = 1:length(mu)
fx(:,idx) = normpdf(X, mu(idx), sigma(idx));
end
if length(mu) > 1
% Replicate Xs for each row so that we have one set of Xs for
% each one to plot.
X = repmat(X, [1, length(mu)]);
end
% hold is used to prevent an axis from being reset the next time
% that we plot something
hold(pdfax, 'on');
plot(pdfax, X, fx);
function dB = pcmToFramedIntensity(Fs, frameLength_ms, frameAdvance_ms, waveform)
% Frame the given signal waveform according to the length and advance
% specified in milliseconds (frameLength_ms and frameAdvance_ms).
% Compute the intesity of each frame in decibels and return the framed
% intensities as vector dB.
% Determine how many samples correspond to the requested duration.
frameLength_n = msTosamples(frameLength_ms, Fs);
frameAdvance_n = msTosamples(frameAdvance_ms, Fs);
samples_n = length(waveform);
% We can either compute the frames in a while loop and grow the results
% or compute the number of frames and use a for loop. The advantage of
% the latter approach is that one can preallocate the result vector
% to the needed length which will make it faster.
% We compute the number of *complete* frames.
% This can almost be done by dividing the number of samples by
% the frame advance, but this will identify the number of frame starts.
% the last frame(s) may not be complete.
% We can compute the number of complete frames by subtracting off one
% frame and adding one advance.
frames_n = floor((samples_n - frameLength_n + frameAdvance_n) ...
/ frameAdvance_n);
dB = zeros(frames_n, 1); % preallocate the matrix for speed
for fidx = 1:frames_n
% Compute beginning and end samples of frame
startidx = (fidx-1) * frameAdvance_n + 1;
endidx = startidx + frameLength_n - 1;
dB(fidx) = pcmToIntensity(waveform(startidx:endidx));
end
end
function dB = pcmToIntensity(pcm)
% dB = pcmToItensity(pcm)
% Return the intensity in decibels for the given signal pcm.
% Intensity is proportional to pressure squared.
% We square each pressure sample and take the mean
% (we could ahve used mean(pcm .^ 2)
% and multiply by 10 for decibels (10ths of bels)
dB = 10 * log10(sum(pcm .^ 2) / length(pcm));
end
function samples = msTosamples(time_ms, Fs)
% samples = msToSamples(time_ms, Fs)
% Given a time in milliseconds (time_ms) and a sample rate (Fs),
% return the number of samples corresponding to the specified
% time.
ms_per_s = 1000; % # milliseconds per second
time_s = time_ms / ms_per_s; % convert ms to s
% Note that we round here, but floor or ceil would also be okay.
samples = round(time_s * Fs); % Number of samples in specified duration
end
.
4. (60 points) Calculate (in your code) the point at which the speech and noise distributions intersect and display it using fprintf (hint: set the two pdfs to be equal and find the roots of x, do not use Matlab's symbolic solver, without care the rounding errors will make the result inaccurate. Either use the quadratic equation' or Matlab's root solver (figure out on your own, do not use Matlab's symbolic solver, without transforming this problem it will not provide a very accurate solution). Plot the following figures: a. Histograms of the speech and noise distributions with the noise and speech pdfs overlaid. Draw a line (plot or line command) on the histograms at the point where the two distributions intersect (must be driven by your calculation, do not insert one by hand). b. Plot the original time-domain samples of speech showing which portions are speech and which are silence according to your classifier. Each figure must have appropriate axis labels. Write a caption to accompany each plot. Good captions are important in scientific papers as readers frequently thumb through a publication looking at the figures before they read the article. Hence, the caption should contain enough information to give a reader familiar with the field a sense of what the figure represents. This typically requires one or two sentences.Explanation / Answer
NEW CODE BY REMOVING ERRORS
speech_silence_demo.m:
ms_per_s = 1000; % milliseconds per second
% Read in the audio
[x, Fs] = wavread('talking.wav');
% x contains one column for each channel we recorded
% Let's assume we only have a single channel
if size(x, 2) > 1 % size(matrix, d) --> # elements along dim d
error('Single channel recordings only')
end
samples_N = size(x, 1);
% Create a time axis
t = (0:samples_N-1)/Fs;
figure('Name', 'time domain waveform');
plot(t, x);
xlabel 'Time (s)';
ylabel 'Amplitude (counts)';
% Determine intensity every N ms with no overlap
framelength_ms = 10;
intensity_dB = pcmToFramedIntensity(Fs, framelength_ms, framelength_ms, x);
% plot the intensity
figure('Name', 'Intensity')
framelength_s = framelength_ms / 1000; % framelength in seconds
t2 = (0:length(intensity_dB)-1)* framelength_s; % time axis for intensity
plot(t2, intensity_dB);
xlabel('Time (s)')
% relative as our microphone is not calibrated
ylabel('dB rel.')
figure('Name', 'Intensity Histogram')
hist(intensity_dB, 50) % hist(data, N) --> histrogram with N bins
intensityax = gca; % Save current axis for later
xlabel('dB rel.')
ylabel('Counts');
% Noise estimate
% taken from silent section at the beginning
silencestop_s = 1.3; % start to this time
silencestop_N = floor(silencestop_s / framelength_s);
silence_train = intensity_dB(1:silencestop_N);
figure('Name', 'Silence intensity estimate histogram')
hist(silence_train, 50);
silenceax = gca; % Save axis of silence distribution
xlabel('dB rel.')
ylabel('Counts');
% Make the noise intensity histogram have the same scale as the
% noise and speech intensity histogram
xrange = get(intensityax, 'XLim');
yrange = get(intensityax, 'YLim');
set(silenceax, 'XLim', xrange);
set(silenceax, 'YLim', yrange);
mu_noise = mean(silence_train);
var_noise = var(silence_train);
fprintf('Silence estimate: mean %f variance %f ', mu_noise, var_noise);
% A Normal "bell curve" probability density function (pdf)
% is defined by its mean and variance.
% Let's see how well it fits our noise histogram
noiselikelihoodax = add_normplot(silenceax, xrange, mu_noise, var_noise);
% Speech estimate
% taken fromt time 1.3 to 2.8 s
speechstart_s = 1.3; % start time
speechstop_s = 2.8; % stop time
addnormplot.m:
function pdfax = add_normplot(PeerAx, xrange, mu, sigma2)
% pdfax = add_normplot(PeerAx, xrange, mu, sigma2)
% Given an axis, an xrange [min, max] over which to plot
% it, add a second axis and plot a normal distribution over
% the xrange.
% mu and sigma2 are the means and variances which must be either
% scalars or row vectors. For row vectors, multiple distributions
% are plotted.
%
% Returns a handle to the new axis object
% Find the parent of the peer axis so that we may put the new axis
% in the same container.
parent = get(PeerAx, 'Parent');
% Position a second axis on top of our current one.
% Make it transparent so that we can see what's underneath
% turn on the tick marks on the X axis and set up an alternative
% Y axis in another color on the right side.
pdfax = axes('Position', get(PeerAx, 'Position'), ... % put over
'Color', 'none', ... % don't fill it in
'YAxisLocation', 'right', ... % Y label/axis location
'XTick', [], ... % No labels on x axis
'YColor', 'b', ... % blue
'Parent', parent);
X = linspace(xrange(1), xrange(2), 100)'; % N points across xrange
% Preallocate fx assuming one column per pdf
sigma = sqrt(sigma2); % standard deviation
fx = zeros(length(X), length(mu));
for idx = 1:length(mu)
fx(:,idx) = normpdf(X, mu(idx), sigma(idx));
end
if length(mu) > 1
% Replicate Xs for each row so that we have one set of Xs for
% each one to plot.
X = repmat(X, [1, length(mu)]);
end
% hold is used to prevent an axis from being reset the next time
% that we plot something
hold(pdfax, 'on');
plot(pdfax, X, fx);
function dB = pcmToFramedIntensity(Fs, frameLength_ms, frameAdvance_ms, waveform)
% Frame the given signal waveform according to the length and advance
% specified in milliseconds (frameLength_ms and frameAdvance_ms).
% Compute the intesity of each frame in decibels and return the framed
% intensities as vector dB.
% Determine how many samples correspond to the requested duration.
frameLength_n = msTosamples(frameLength_ms, Fs);
frameAdvance_n = msTosamples(frameAdvance_ms, Fs);
samples_n = length(waveform);
frames_n = floor((samples_n - frameLength_n + frameAdvance_n) ...
/ frameAdvance_n);
dB = zeros(frames_n, 1); % preallocate the matrix for speed
for fidx = 1:frames_n
% Compute beginning and end samples of frame
startidx = (fidx-1) * frameAdvance_n + 1;
endidx = startidx + frameLength_n - 1;
dB(fidx) = pcmToIntensity(waveform(startidx:endidx));
end
end
function dB = pcmToIntensity(pcm)
% dB = pcmToItensity(pcm)
% Return the intensity in decibels for the given signal pcm.
% (we could ahve used mean(pcm .^ 2)
% and multiply by 10 for decibels (10ths of bels)
dB = 10 * log10(sum(pcm .^ 2) / length(pcm));
end
function samples = msTosamples(time_ms, Fs)
% samples = msToSamples(time_ms, Fs)
% Given a time in milliseconds (time_ms) and a sample rate (Fs),
% return the number of samples corresponding to the specified
% time.
ms_per_s = 1000; % # milliseconds per second
time_s = time_ms / ms_per_s; % convert ms to s
% Note that we round here, but floor or ceil would also be okay.
samples = round(time_s * Fs); % Number of samples in specified duration
end
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.