app/docs/steps/step-09.md

# **File: COMMIT\_09\_VOICE\_CLIENT.md**

## **Commit 9: Real-time Voice: Client Integration**

### **Objective**

Integrate the client-side microphone recording. This component will:

1. Call the /api/voice-token route (from Commit 08\) to get a temporary key.
2. Use navigator.mediaDevices.getUserMedia to access the microphone.24
3. Open a *direct* WebSocket to Deepgram using the temporary key.25
4. Use MediaRecorder to capture audio chunks.24
5. Stream audio chunks to Deepgram and receive transcripts back in real-time.
6. Feed the received transcripts into the useChat input.

### **Implementation Specification**

**1\. Create components/MicrophoneRecorder.tsx**

Create a new component at /components/MicrophoneRecorder.tsx:

TypeScript

'use client';

import { ActionIcon, Tooltip } from '@mantine/core';
import { IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
import { useState, useRef } from 'react';

// Define the shape of the Deepgram transcript
interface DeepgramTranscript {
  channel: {
    alternatives: {
      transcript: string;
    };
  };
  is\_final: boolean;
  speech\_final: boolean;
}

type Props \= {
  /\*\*
   \* Callback function to update the chat input with the new transcript.
   \* @param transcript The full, combined transcript.
   \*/
  onTranscriptUpdate: (transcript: string) \=\> void;
  /\*\*
   \* Callback function to signal the final transcript for this "thought".
   \* @param transcript The final, punctuated transcript.
   \*/
  onTranscriptFinalized: (transcript: string) \=\> void;
};

export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }: Props) {
  const \= useState(false);
  const mediaRecorderRef \= useRef\<MediaRecorder | null\>(null);
  const socketRef \= useRef\<WebSocket | null\>(null);

  // Store the combined transcript for the current utterance
  const transcriptRef \= useRef\<string\>('');

  const stopRecording \= () \=\> {
    if (mediaRecorderRef.current) {
      mediaRecorderRef.current.stop();
      mediaRecorderRef.current \= null;
    }
    if (socketRef.current) {
      socketRef.current.close();
      socketRef.current \= null;
    }
    setIsRecording(false);

    // Finalize the transcript
    if (transcriptRef.current) {
      onTranscriptFinalized(transcriptRef.current);
    }
    transcriptRef.current \= '';
  };

  const startRecording \= async () \=\> {
    transcriptRef.current \= ''; // Reset transcript
    try {
      // 1\. Get the temporary Deepgram key
      const response \= await fetch('/api/voice-token', { method: 'POST' });
      const { key, error } \= await response.json();

      if (error) {
        throw new Error(error);
      }

      // 2\. Access the microphone
      const stream \= await navigator.mediaDevices.getUserMedia({ audio: true });

      // 3\. Open direct WebSocket to Deepgram
      const socket \= new WebSocket(
        'wss://api.deepgram.com/v1/listen?interim\_results=true\&punctuate=true',
        \['token', key\]
      );
      socketRef.current \= socket;

      socket.onopen \= () \=\> {
        // 4\. Create MediaRecorder
        const mediaRecorder \= new MediaRecorder(stream, {
          mimeType: 'audio/webm',
        });
        mediaRecorderRef.current \= mediaRecorder;

        // 5\. Send audio chunks on data available
        mediaRecorder.ondataavailable \= (event) \=\> {
          if (event.data.size \> 0 && socket.readyState \=== WebSocket.OPEN) {
            socket.send(event.data);
          }
        };

        // Start recording and chunking audio every 250ms
        mediaRecorder.start(250);
        setIsRecording(true);
      };

      // 6\. Receive transcripts
      socket.onmessage \= (event) \=\> {
        const data \= JSON.parse(event.data) as DeepgramTranscript;
        const transcript \= data.channel.alternatives.transcript;

        if (transcript) {
          transcriptRef.current \= transcript;
          onTranscriptUpdate(transcript);
        }

        // If it's a "speech final" event, this utterance is done.
        if (data.speech\_final) {
          stopRecording();
        }
      };

      socket.onclose \= () \=\> {
        // Clean up stream
        stream.getTracks().forEach((track) \=\> track.stop());
        if (isRecording) {
          stopRecording(); // Ensure cleanup
        }
      };

      socket.onerror \= (err) \=\> {
        console.error('WebSocket error:', err);
        stopRecording();
      };

    } catch (error) {
      console.error('Error starting recording:', error);
      setIsRecording(false);
    }
  };

  const handleToggleRecord \= () \=\> {
    if (isRecording) {
      stopRecording();
    } else {
      startRecording();
    }
  };

  return (
    \<Tooltip label={isRecording? 'Stop Recording' : 'Start Recording'}\>
      \<ActionIcon
        onClick={handleToggleRecord}
        size="lg"
        radius="xl"
        color={isRecording? 'red' : 'gray'}
        variant="filled"
      \>
        {isRecording? \<IconMicrophoneOff /\> : \<IconMicrophone /\>}
      \</ActionIcon\>
    \</Tooltip\>
  );
}

**2\. Update Chat UI (app/chat/page.tsx)**

Update /app/chat/page.tsx to include the new component:

TypeScript

'use client';

//... (other imports)
import { MicrophoneRecorder } from '@/components/MicrophoneRecorder';

export default function ChatPage() {
  //... (other hooks: router, viewport)

  const {
    messages,
    input,
    handleInputChange,
    handleSubmit,
    setInput, // Get the setInput setter from useChat
    data,
    isLoading,
  } \= useChat({
    //... (rest of useChat config)
  });

  //... (useEffect for auto-scroll)

  return (
    \<Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}\>
      {/\*... (Title and ScrollArea)... \*/}

      \<form onSubmit={handleSubmit}\>
        \<Paper withBorder p="sm" radius="xl" my="md"\>
          \<Group\>
            \<TextInput
              value={input}
              onChange={handleInputChange}
              placeholder="Speak or type your thoughts..."
              style={{ flex: 1 }}
              variant="unstyled"
              disabled={isLoading}
            /\>

            {/\* Add the Microphone Recorder Here \*/}
            \<MicrophoneRecorder
              onTranscriptUpdate={(transcript) \=\> {
                // Update the input field in real-time
                setInput(transcript);
              }}
              onTranscriptFinalized={(transcript) \=\> {
                // Automatically submit the chat when speech is final
                // We pass the final transcript in the options
                handleSubmit(new Event('submit'), {
                  data: {
                    finalTranscript: transcript,
                  },
                });
              }}
            /\>

            \<Button type="submit" radius="xl" loading={isLoading}\>
              Send
            \</Button\>
          \</Group\>
        \</Paper\>
      \</form\>
    \</Container\>
  );
}

### **Test Specification**

**1\. Create Test File (tests/magnitude/09-voice.mag.ts)**

Create a file at /tests/magnitude/09-voice.mag.ts:

TypeScript

import { test } from 'magnitude-test';

test('\[Happy Path\] User can record voice and see transcript', async (agent) \=\> {
  // Act: Go to chat page
  await agent.act('Navigate to /chat');

  // Check: Verify initial state
  await agent.check('The chat input field is empty');
  await agent.check('A "Start Recording" button is visible');

  // Act: Click the record button
  // We must mock the /api/voice-token response and the
  // MediaDevices/WebSocket browser APIs.
  await agent.act('Click the "Start Recording" button');

  // Check: UI updates to recording state
  await agent.check('A "Stop Recording" button is visible');

  // Act: Simulate receiving a transcript from the (mocked) Deepgram WebSocket
  await agent.act(
    'Simulate an interim transcript "Hello world" from the Deepgram WebSocket'
  );

  // Check: The input field is updated
  await agent.check('The chat input field contains "Hello world"');

  // Act: Simulate a final transcript
  await agent.act(
    'Simulate a final transcript "Hello world." from the Deepgram WebSocket'
  );

  // Check: The "Stop Recording" button is gone
  await agent.check('A "Start Recording" button is visible again');

  // Check: The chat input is cleared (because it was submitted)
  await agent.check('The chat input field is empty');

  // Check: The finalized transcript appears as a user message
  await agent.check('The message "Hello world." appears in the chat list');
});