function [ ret_hat, ret_orig ] = Speaker( seconds ) %SPEAKER sample, encode and trasnmit speech using LPC10 % Coded by Brian Timmons with some portions influenced % by LPC Vocoder GUI ver. 1.0. %default recording length of 5 seconds if (nargin < 1) || (seconds <= 0) seconds=5; end %Java is used for the networking portion %java imports import java.net.ServerSocket import java.io.DataOutputStream %silence_threshold found through experimentation and could vary based % on background noise and mic input level. silence_threshold = 0.0005; fs = 8000; order = 10; fps = 40; %input_frame_size = how much new (non-overlapping) data will be in each % frame. Needs to keep up with fs and fps and is basically shift amt. input_frame_size = floor(fs/fps); %How much the window will overlap: overlap_percent = 50; %How many samples used in each of the 40 frames (TODO: could be fraction!!!) working_frame_size = input_frame_size/(1-overlap_percent/100); % Make 100 Hz impulse train pulsetrain_part = [1; zeros(((fs/100) - 1), 1)]; plen = length(pulsetrain_part); ptlen = ceil(working_frame_size/plen)*plen*2; pulsetrain = zeros(ptlen,1); for i = 1: (ceil(working_frame_size/plen)*2) for j = 1:plen pulsetrain((i-1)*plen + j) = pulsetrain_part(j); end end start_train = 0; voice = audiorecorder(fs, 16, 1); % Give an extra second space for reading too much in the while loop % Note: Preallocating with predetermined number of seconds so we can get % buffer entire signal and play back. signal = zeros((seconds + 1)*fs,1); Y_HAT= zeros((seconds + 1)*fs,1); %Initialize windows: ham_win = hamming(working_frame_size); w_hat=hamming(1024); %initialize counters/pacers working_spot = 0; frame_count = 0; old_spot = get(voice, 'CurrentSample'); spot = old_spot; amount_new = spot - old_spot; % 0 = unvoiced, 1 = voiced u_or_v = 0; %When splitting into client/server uncomment below %Start up the socket and wait for the client to connect %server_socket = ServerSocket(0); %server_socket.getLocalPort() %output_socket = server_socket.accept(); %d_output_stream = DataOutputStream(output_socket.getOutputStream()); record(voice); 'Recording has started!' %Pause here at startup to allow the first frame to fill pause(0.1) while((old_spot <=seconds*fs) || (amount_new > working_frame_size)) % Only interrupt recording when we need more samples % Hopefully allows more recording while processing old samples if (amount_new < working_frame_size) pause(voice); spot = get(voice, 'CurrentSample'); if (spot > old_spot + 1) plain_speech = getaudiodata(voice); % get data as int16 array else %No new samples (shouldn't happen but prevents error if it does) resume(voice); continue; end resume(voice); signal(old_spot:spot - 1) = plain_speech(old_spot:spot-1); amount_new = amount_new + (spot - old_spot); old_spot = spot; continue; end windowed_data = signal(working_spot + 1:working_spot + working_frame_size).*ham_win; working_spot = working_spot + input_frame_size; amount_new = amount_new - input_frame_size; energy = sum(windowed_data.^2); if (energy < silence_threshold) %in the future, send silence packet to listener frame_count = frame_count + 1; continue; end num_zerocrosses = count_zerocrosses(windowed_data); if (num_zerocrosses > 150) u_or_v = 0; else u_or_v = 1; end %Perform autocorrelation method on each frame. Determine alphas and %gains for each frame of speech. % -- Heavily influenced by LPC Vocoder GUI ver. 1.0 Rn=zeros(order + 1,1); alpha=zeros(order,1); for k=0:order Rn(k+1,1)=autocorr(k,windowed_data); %find autocorrelations end [alpha(:,1),G]=durbin_method(Rn,order); % -- End influence % ^ is the same as LPC but with gain output %[a,error] = lpc(windowed_data,order); % transport gain (5 bits), alphas (50 bits total, each one represented % by different amount of bits dependant on importance. %Quantize LPC Coeffs (44 bits total which is off-spec) k1 = quant1_2(alpha(1)); k2 = quant1_2(alpha(2)); k3 = quant3_4(alpha(3)); k4 = quant3_4(alpha(4)); k5 = quant5_9(alpha(5)); k6 = quant5_9(alpha(6)); k7 = quant5_9(alpha(7)); k8 = quant5_9(alpha(8)); k9 = quant5_9(alpha(9)); k10 = quant10(alpha(10)); %TODO %Quantize gain and pitch %Bundle all 56 bits (7 bytes) and send on output stream % Listener program would contain this later portion of the while loop %TODO %Receive message from server (Speaker) %Split and unquantize info %Generate pulsetrain using pitch received but for now use static pulse=pulsetrain(start_train + 1:start_train + working_frame_size); start_train = mod(start_train + working_frame_size, plen); %Synthesize the voice with the LPC coeffs and gain. % -- Heavily influenced by LPC Vocoder GUI ver. 1.0 LPC_Spec=1./fft([1;-alpha],1024); gain=G*ones(working_frame_size,1); G_PULSE_DATA=gain.*pulse; PULSE_SPECTRUM=fft(G_PULSE_DATA,1024); Y_HAT_DATA=PULSE_SPECTRUM.*LPC_Spec; A=real(ifft(Y_HAT_DATA,1024)); A_WINDOW_DATA=A.*w_hat; % -- End influence Y_HAT = Y_HAT+[zeros(frame_count*input_frame_size,1);A_WINDOW_DATA; zeros(length(Y_HAT)-frame_count*input_frame_size - length(A_WINDOW_DATA),1)]; frame_count = frame_count + 1; %Could try playing back after buffered enough overlapping frames, but %sounds extra choppy so just buffer it all before playing back. end stop(voice); 'Press a key to hear the original sound' pause; soundsc(signal,fs); 'Press a key to hear the LPC10 coded speech (without quantization noise and pitch)' pause; sound(Y_HAT/max(abs(Y_HAT)),fs); t=(1:length(Y_HAT))'; figure(1); plot(t,signal,'b',t,Y_HAT,'g'); ret_hat = Y_HAT; ret_orig = signal;