feat: Step 7 & 9 - AI Chat + Voice client integration

Implement AI-powered chat interface with voice input capabilities. Step 7 (Chat Interface): - Create ChatInterface component with Vercel AI SDK useChat hook - Create /api/chat route using Google Gemini (gemini-1.5-flash) - Implement thoughtful interviewer system prompt - Add real-time message streaming - Auto-scroll to latest messages Step 9 (Voice Client): - Create MicrophoneRecorder component - Integrate real-time voice transcription via Deepgram - Direct WebSocket connection using temporary tokens - Real-time transcript display in chat input - Auto-submit on speech_final event - Add @tabler/icons-react for microphone icons Architecture: - Client requests temporary Deepgram token from /api/voice-token - MediaRecorder captures audio in 250ms chunks - WebSocket sends audio directly to Deepgram - Transcripts update chat input in real-time - Final transcript auto-submits to AI chat Security: - Deepgram API key never exposed to client - Temporary tokens expire in 60 seconds - Chat requires authentication via SurrealDB JWT Testing: - Add magnitude test for voice recording flow - Tests cover happy path with mocked WebSocket Known Issue: - Page compilation needs debugging (useChat import path verified) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-09 00:27:40 +00:00
parent 393be3c46e
commit e4c5960d7a
8 changed files with 599 additions and 180 deletions
--- a/components/MicrophoneRecorder.tsx
+++ b/components/MicrophoneRecorder.tsx
@@ -0,0 +1,154 @@
+'use client';
+
+import { ActionIcon, Tooltip } from '@mantine/core';
+import { IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
+import { useState, useRef } from 'react';
+
+// Define the shape of the Deepgram transcript
+interface DeepgramTranscript {
+  channel: {
+    alternatives: Array<{
+      transcript: string;
+    }>;
+  };
+  is_final: boolean;
+  speech_final: boolean;
+}
+
+type Props = {
+  /**
+   * Callback function to update the chat input with the new transcript.
+   * @param transcript - The full, combined transcript
+   */
+  onTranscriptUpdate: (transcript: string) => void;
+  /**
+   * Callback function to signal the final transcript for this "thought".
+   * @param transcript - The final, punctuated transcript
+   */
+  onTranscriptFinalized: (transcript: string) => void;
+};
+
+export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }: Props) {
+  const [isRecording, setIsRecording] = useState(false);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const socketRef = useRef<WebSocket | null>(null);
+
+  // Store the combined transcript for the current utterance
+  const transcriptRef = useRef<string>('');
+
+  const stopRecording = () => {
+    if (mediaRecorderRef.current) {
+      mediaRecorderRef.current.stop();
+      mediaRecorderRef.current = null;
+    }
+    if (socketRef.current) {
+      socketRef.current.close();
+      socketRef.current = null;
+    }
+    setIsRecording(false);
+
+    // Finalize the transcript
+    if (transcriptRef.current) {
+      onTranscriptFinalized(transcriptRef.current);
+    }
+    transcriptRef.current = '';
+  };
+
+  const startRecording = async () => {
+    transcriptRef.current = ''; // Reset transcript
+    try {
+      // 1. Get the temporary Deepgram key
+      const response = await fetch('/api/voice-token', { method: 'POST' });
+      const data = await response.json();
+
+      if (data.error) {
+        throw new Error(data.error);
+      }
+
+      const { key } = data;
+
+      // 2. Access the microphone
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+
+      // 3. Open direct WebSocket to Deepgram
+      const socket = new WebSocket(
+        'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true',
+        ['token', key]
+      );
+      socketRef.current = socket;
+
+      socket.onopen = () => {
+        // 4. Create MediaRecorder
+        const mediaRecorder = new MediaRecorder(stream, {
+          mimeType: 'audio/webm',
+        });
+        mediaRecorderRef.current = mediaRecorder;
+
+        // 5. Send audio chunks on data available
+        mediaRecorder.ondataavailable = (event) => {
+          if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
+            socket.send(event.data);
+          }
+        };
+
+        // Start recording and chunking audio every 250ms
+        mediaRecorder.start(250);
+        setIsRecording(true);
+      };
+
+      // 6. Receive transcripts
+      socket.onmessage = (event) => {
+        const data = JSON.parse(event.data) as DeepgramTranscript;
+        const transcript = data.channel.alternatives[0]?.transcript || '';
+
+        if (transcript) {
+          transcriptRef.current = transcript;
+          onTranscriptUpdate(transcript);
+        }
+
+        // If it's a "speech final" event, this utterance is done.
+        if (data.speech_final) {
+          stopRecording();
+        }
+      };
+
+      socket.onclose = () => {
+        // Clean up stream
+        stream.getTracks().forEach((track) => track.stop());
+        if (isRecording) {
+          stopRecording(); // Ensure cleanup
+        }
+      };
+
+      socket.onerror = (err) => {
+        console.error('WebSocket error:', err);
+        stopRecording();
+      };
+    } catch (error) {
+      console.error('Error starting recording:', error);
+      setIsRecording(false);
+    }
+  };
+
+  const handleToggleRecord = () => {
+    if (isRecording) {
+      stopRecording();
+    } else {
+      startRecording();
+    }
+  };
+
+  return (
+    <Tooltip label={isRecording ? 'Stop Recording' : 'Start Recording'}>
+      <ActionIcon
+        onClick={handleToggleRecord}
+        size="lg"
+        radius="xl"
+        color={isRecording ? 'red' : 'gray'}
+        variant="filled"
+      >
+        {isRecording ? <IconMicrophoneOff /> : <IconMicrophone />}
+      </ActionIcon>
+    </Tooltip>
+  );
+}