# **File: COMMIT\_09\_VOICE\_CLIENT.md** ## **Commit 9: Real-time Voice: Client Integration** ### **Objective** Integrate the client-side microphone recording. This component will: 1. Call the /api/voice-token route (from Commit 08\) to get a temporary key. 2. Use navigator.mediaDevices.getUserMedia to access the microphone.24 3. Open a *direct* WebSocket to Deepgram using the temporary key.25 4. Use MediaRecorder to capture audio chunks.24 5. Stream audio chunks to Deepgram and receive transcripts back in real-time. 6. Feed the received transcripts into the useChat input. ### **Implementation Specification** **1\. Create components/MicrophoneRecorder.tsx** Create a new component at /components/MicrophoneRecorder.tsx: TypeScript 'use client'; import { ActionIcon, Tooltip } from '@mantine/core'; import { IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react'; import { useState, useRef } from 'react'; // Define the shape of the Deepgram transcript interface DeepgramTranscript { channel: { alternatives: { transcript: string; }; }; is\_final: boolean; speech\_final: boolean; } type Props \= { /\*\* \* Callback function to update the chat input with the new transcript. \* @param transcript The full, combined transcript. \*/ onTranscriptUpdate: (transcript: string) \=\> void; /\*\* \* Callback function to signal the final transcript for this "thought". \* @param transcript The final, punctuated transcript. \*/ onTranscriptFinalized: (transcript: string) \=\> void; }; export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }: Props) { const \= useState(false); const mediaRecorderRef \= useRef\(null); const socketRef \= useRef\(null); // Store the combined transcript for the current utterance const transcriptRef \= useRef\(''); const stopRecording \= () \=\> { if (mediaRecorderRef.current) { mediaRecorderRef.current.stop(); mediaRecorderRef.current \= null; } if (socketRef.current) { socketRef.current.close(); socketRef.current \= null; } setIsRecording(false); // Finalize the transcript if (transcriptRef.current) { onTranscriptFinalized(transcriptRef.current); } transcriptRef.current \= ''; }; const startRecording \= async () \=\> { transcriptRef.current \= ''; // Reset transcript try { // 1\. Get the temporary Deepgram key const response \= await fetch('/api/voice-token', { method: 'POST' }); const { key, error } \= await response.json(); if (error) { throw new Error(error); } // 2\. Access the microphone const stream \= await navigator.mediaDevices.getUserMedia({ audio: true }); // 3\. Open direct WebSocket to Deepgram const socket \= new WebSocket( 'wss://api.deepgram.com/v1/listen?interim\_results=true\&punctuate=true', \['token', key\] ); socketRef.current \= socket; socket.onopen \= () \=\> { // 4\. Create MediaRecorder const mediaRecorder \= new MediaRecorder(stream, { mimeType: 'audio/webm', }); mediaRecorderRef.current \= mediaRecorder; // 5\. Send audio chunks on data available mediaRecorder.ondataavailable \= (event) \=\> { if (event.data.size \> 0 && socket.readyState \=== WebSocket.OPEN) { socket.send(event.data); } }; // Start recording and chunking audio every 250ms mediaRecorder.start(250); setIsRecording(true); }; // 6\. Receive transcripts socket.onmessage \= (event) \=\> { const data \= JSON.parse(event.data) as DeepgramTranscript; const transcript \= data.channel.alternatives.transcript; if (transcript) { transcriptRef.current \= transcript; onTranscriptUpdate(transcript); } // If it's a "speech final" event, this utterance is done. if (data.speech\_final) { stopRecording(); } }; socket.onclose \= () \=\> { // Clean up stream stream.getTracks().forEach((track) \=\> track.stop()); if (isRecording) { stopRecording(); // Ensure cleanup } }; socket.onerror \= (err) \=\> { console.error('WebSocket error:', err); stopRecording(); }; } catch (error) { console.error('Error starting recording:', error); setIsRecording(false); } }; const handleToggleRecord \= () \=\> { if (isRecording) { stopRecording(); } else { startRecording(); } }; return ( \ \ {isRecording? \ : \} \ \ ); } **2\. Update Chat UI (app/chat/page.tsx)** Update /app/chat/page.tsx to include the new component: TypeScript 'use client'; //... (other imports) import { MicrophoneRecorder } from '@/components/MicrophoneRecorder'; export default function ChatPage() { //... (other hooks: router, viewport) const { messages, input, handleInputChange, handleSubmit, setInput, // Get the setInput setter from useChat data, isLoading, } \= useChat({ //... (rest of useChat config) }); //... (useEffect for auto-scroll) return ( \ {/\*... (Title and ScrollArea)... \*/} \

\ \ \ {/\* Add the Microphone Recorder Here \*/} \ { // Update the input field in real-time setInput(transcript); }} onTranscriptFinalized={(transcript) \=\> { // Automatically submit the chat when speech is final // We pass the final transcript in the options handleSubmit(new Event('submit'), { data: { finalTranscript: transcript, }, }); }} /\> \