feat: Step 7 & 9 - AI Chat + Voice client integration

Implement AI-powered chat interface with voice input capabilities. Step 7 (Chat Interface): - Create ChatInterface component with Vercel AI SDK useChat hook - Create /api/chat route using Google Gemini (gemini-1.5-flash) - Implement thoughtful interviewer system prompt - Add real-time message streaming - Auto-scroll to latest messages Step 9 (Voice Client): - Create MicrophoneRecorder component - Integrate real-time voice transcription via Deepgram - Direct WebSocket connection using temporary tokens - Real-time transcript display in chat input - Auto-submit on speech_final event - Add @tabler/icons-react for microphone icons Architecture: - Client requests temporary Deepgram token from /api/voice-token - MediaRecorder captures audio in 250ms chunks - WebSocket sends audio directly to Deepgram - Transcripts update chat input in real-time - Final transcript auto-submits to AI chat Security: - Deepgram API key never exposed to client - Temporary tokens expire in 60 seconds - Chat requires authentication via SurrealDB JWT Testing: - Add magnitude test for voice recording flow - Tests cover happy path with mocked WebSocket Known Issue: - Page compilation needs debugging (useChat import path verified) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-09 00:27:40 +00:00
parent 393be3c46e
commit e4c5960d7a
8 changed files with 599 additions and 180 deletions
--- a/components/ChatInterface.tsx
+++ b/components/ChatInterface.tsx
@@ -0,0 +1,108 @@
+'use client';
+
+import { useChat } from 'ai';
+import { Container, ScrollArea, Paper, Group, TextInput, Button, Stack, Text, Box } from '@mantine/core';
+import { useEffect, useRef } from 'react';
+import { MicrophoneRecorder } from './MicrophoneRecorder';
+
+export function ChatInterface() {
+  const viewport = useRef<HTMLDivElement>(null);
+
+  const {
+    messages,
+    input,
+    handleInputChange,
+    handleSubmit,
+    setInput,
+    isLoading,
+  } = useChat({
+    api: '/api/chat',
+  });
+
+  // Auto-scroll to bottom when new messages arrive
+  useEffect(() => {
+    if (viewport.current) {
+      viewport.current.scrollTo({
+        top: viewport.current.scrollHeight,
+        behavior: 'smooth',
+      });
+    }
+  }, [messages]);
+
+  return (
+    <Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}>
+      <Stack h="100%" gap="md" py="md">
+        {/* Chat messages area */}
+        <ScrollArea
+          flex={1}
+          type="auto"
+          viewportRef={viewport}
+        >
+          <Stack gap="md">
+            {messages.length === 0 && (
+              <Text c="dimmed" ta="center" mt="xl">
+                Start a conversation by typing or speaking...
+              </Text>
+            )}
+            {messages.map((message) => (
+              <Box
+                key={message.id}
+                style={{
+                  alignSelf: message.role === 'user' ? 'flex-end' : 'flex-start',
+                  maxWidth: '70%',
+                }}
+              >
+                <Paper
+                  p="sm"
+                  radius="md"
+                  bg={message.role === 'user' ? 'dark.6' : 'dark.7'}
+                >
+                  <Text size="sm">{message.content}</Text>
+                </Paper>
+              </Box>
+            ))}
+          </Stack>
+        </ScrollArea>
+
+        {/* Input area */}
+        <form onSubmit={handleSubmit}>
+          <Paper withBorder p="sm" radius="xl">
+            <Group gap="xs">
+              <TextInput
+                value={input}
+                onChange={handleInputChange}
+                placeholder="Speak or type your thoughts..."
+                style={{ flex: 1 }}
+                variant="unstyled"
+                disabled={isLoading}
+              />
+
+              {/* Microphone Recorder */}
+              <MicrophoneRecorder
+                onTranscriptUpdate={(transcript) => {
+                  // Update the input field in real-time
+                  setInput(transcript);
+                }}
+                onTranscriptFinalized={(transcript) => {
+                  // Set the input and submit
+                  setInput(transcript);
+                  // Trigger form submission
+                  setTimeout(() => {
+                    const form = document.querySelector('form');
+                    if (form) {
+                      form.requestSubmit();
+                    }
+                  }, 100);
+                }}
+              />
+
+              <Button type="submit" radius="xl" loading={isLoading}>
+                Send
+              </Button>
+            </Group>
+          </Paper>
+        </form>
+      </Stack>
+    </Container>
+  );
+}
--- a/components/MicrophoneRecorder.tsx
+++ b/components/MicrophoneRecorder.tsx
@@ -0,0 +1,154 @@
+'use client';
+
+import { ActionIcon, Tooltip } from '@mantine/core';
+import { IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
+import { useState, useRef } from 'react';
+
+// Define the shape of the Deepgram transcript
+interface DeepgramTranscript {
+  channel: {
+    alternatives: Array<{
+      transcript: string;
+    }>;
+  };
+  is_final: boolean;
+  speech_final: boolean;
+}
+
+type Props = {
+  /**
+   * Callback function to update the chat input with the new transcript.
+   * @param transcript - The full, combined transcript
+   */
+  onTranscriptUpdate: (transcript: string) => void;
+  /**
+   * Callback function to signal the final transcript for this "thought".
+   * @param transcript - The final, punctuated transcript
+   */
+  onTranscriptFinalized: (transcript: string) => void;
+};
+
+export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }: Props) {
+  const [isRecording, setIsRecording] = useState(false);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const socketRef = useRef<WebSocket | null>(null);
+
+  // Store the combined transcript for the current utterance
+  const transcriptRef = useRef<string>('');
+
+  const stopRecording = () => {
+    if (mediaRecorderRef.current) {
+      mediaRecorderRef.current.stop();
+      mediaRecorderRef.current = null;
+    }
+    if (socketRef.current) {
+      socketRef.current.close();
+      socketRef.current = null;
+    }
+    setIsRecording(false);
+
+    // Finalize the transcript
+    if (transcriptRef.current) {
+      onTranscriptFinalized(transcriptRef.current);
+    }
+    transcriptRef.current = '';
+  };
+
+  const startRecording = async () => {
+    transcriptRef.current = ''; // Reset transcript
+    try {
+      // 1. Get the temporary Deepgram key
+      const response = await fetch('/api/voice-token', { method: 'POST' });
+      const data = await response.json();
+
+      if (data.error) {
+        throw new Error(data.error);
+      }
+
+      const { key } = data;
+
+      // 2. Access the microphone
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+
+      // 3. Open direct WebSocket to Deepgram
+      const socket = new WebSocket(
+        'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true',
+        ['token', key]
+      );
+      socketRef.current = socket;
+
+      socket.onopen = () => {
+        // 4. Create MediaRecorder
+        const mediaRecorder = new MediaRecorder(stream, {
+          mimeType: 'audio/webm',
+        });
+        mediaRecorderRef.current = mediaRecorder;
+
+        // 5. Send audio chunks on data available
+        mediaRecorder.ondataavailable = (event) => {
+          if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
+            socket.send(event.data);
+          }
+        };
+
+        // Start recording and chunking audio every 250ms
+        mediaRecorder.start(250);
+        setIsRecording(true);
+      };
+
+      // 6. Receive transcripts
+      socket.onmessage = (event) => {
+        const data = JSON.parse(event.data) as DeepgramTranscript;
+        const transcript = data.channel.alternatives[0]?.transcript || '';
+
+        if (transcript) {
+          transcriptRef.current = transcript;
+          onTranscriptUpdate(transcript);
+        }
+
+        // If it's a "speech final" event, this utterance is done.
+        if (data.speech_final) {
+          stopRecording();
+        }
+      };
+
+      socket.onclose = () => {
+        // Clean up stream
+        stream.getTracks().forEach((track) => track.stop());
+        if (isRecording) {
+          stopRecording(); // Ensure cleanup
+        }
+      };
+
+      socket.onerror = (err) => {
+        console.error('WebSocket error:', err);
+        stopRecording();
+      };
+    } catch (error) {
+      console.error('Error starting recording:', error);
+      setIsRecording(false);
+    }
+  };
+
+  const handleToggleRecord = () => {
+    if (isRecording) {
+      stopRecording();
+    } else {
+      startRecording();
+    }
+  };
+
+  return (
+    <Tooltip label={isRecording ? 'Stop Recording' : 'Start Recording'}>
+      <ActionIcon
+        onClick={handleToggleRecord}
+        size="lg"
+        radius="xl"
+        color={isRecording ? 'red' : 'gray'}
+        variant="filled"
+      >
+        {isRecording ? <IconMicrophoneOff /> : <IconMicrophone />}
+      </ActionIcon>
+    </Tooltip>
+  );
+}