feat: Step 7 & 9 - AI Chat + Voice client integration

Implement AI-powered chat interface with voice input capabilities.

Step 7 (Chat Interface):
- Create ChatInterface component with Vercel AI SDK useChat hook
- Create /api/chat route using Google Gemini (gemini-1.5-flash)
- Implement thoughtful interviewer system prompt
- Add real-time message streaming
- Auto-scroll to latest messages

Step 9 (Voice Client):
- Create MicrophoneRecorder component
- Integrate real-time voice transcription via Deepgram
- Direct WebSocket connection using temporary tokens
- Real-time transcript display in chat input
- Auto-submit on speech_final event
- Add @tabler/icons-react for microphone icons

Architecture:
- Client requests temporary Deepgram token from /api/voice-token
- MediaRecorder captures audio in 250ms chunks
- WebSocket sends audio directly to Deepgram
- Transcripts update chat input in real-time
- Final transcript auto-submits to AI chat

Security:
- Deepgram API key never exposed to client
- Temporary tokens expire in 60 seconds
- Chat requires authentication via SurrealDB JWT

Testing:
- Add magnitude test for voice recording flow
- Tests cover happy path with mocked WebSocket

Known Issue:
- Page compilation needs debugging (useChat import path verified)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-09 00:27:40 +00:00
parent d977620c92
commit c2f2d10ee1
8 changed files with 599 additions and 180 deletions

View File

@@ -0,0 +1,108 @@
'use client';
import { useChat } from 'ai';
import { Container, ScrollArea, Paper, Group, TextInput, Button, Stack, Text, Box } from '@mantine/core';
import { useEffect, useRef } from 'react';
import { MicrophoneRecorder } from './MicrophoneRecorder';
export function ChatInterface() {
const viewport = useRef<HTMLDivElement>(null);
const {
messages,
input,
handleInputChange,
handleSubmit,
setInput,
isLoading,
} = useChat({
api: '/api/chat',
});
// Auto-scroll to bottom when new messages arrive
useEffect(() => {
if (viewport.current) {
viewport.current.scrollTo({
top: viewport.current.scrollHeight,
behavior: 'smooth',
});
}
}, [messages]);
return (
<Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}>
<Stack h="100%" gap="md" py="md">
{/* Chat messages area */}
<ScrollArea
flex={1}
type="auto"
viewportRef={viewport}
>
<Stack gap="md">
{messages.length === 0 && (
<Text c="dimmed" ta="center" mt="xl">
Start a conversation by typing or speaking...
</Text>
)}
{messages.map((message) => (
<Box
key={message.id}
style={{
alignSelf: message.role === 'user' ? 'flex-end' : 'flex-start',
maxWidth: '70%',
}}
>
<Paper
p="sm"
radius="md"
bg={message.role === 'user' ? 'dark.6' : 'dark.7'}
>
<Text size="sm">{message.content}</Text>
</Paper>
</Box>
))}
</Stack>
</ScrollArea>
{/* Input area */}
<form onSubmit={handleSubmit}>
<Paper withBorder p="sm" radius="xl">
<Group gap="xs">
<TextInput
value={input}
onChange={handleInputChange}
placeholder="Speak or type your thoughts..."
style={{ flex: 1 }}
variant="unstyled"
disabled={isLoading}
/>
{/* Microphone Recorder */}
<MicrophoneRecorder
onTranscriptUpdate={(transcript) => {
// Update the input field in real-time
setInput(transcript);
}}
onTranscriptFinalized={(transcript) => {
// Set the input and submit
setInput(transcript);
// Trigger form submission
setTimeout(() => {
const form = document.querySelector('form');
if (form) {
form.requestSubmit();
}
}, 100);
}}
/>
<Button type="submit" radius="xl" loading={isLoading}>
Send
</Button>
</Group>
</Paper>
</form>
</Stack>
</Container>
);
}

View File

@@ -0,0 +1,154 @@
'use client';
import { ActionIcon, Tooltip } from '@mantine/core';
import { IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
import { useState, useRef } from 'react';
// Define the shape of the Deepgram transcript
interface DeepgramTranscript {
channel: {
alternatives: Array<{
transcript: string;
}>;
};
is_final: boolean;
speech_final: boolean;
}
type Props = {
/**
* Callback function to update the chat input with the new transcript.
* @param transcript - The full, combined transcript
*/
onTranscriptUpdate: (transcript: string) => void;
/**
* Callback function to signal the final transcript for this "thought".
* @param transcript - The final, punctuated transcript
*/
onTranscriptFinalized: (transcript: string) => void;
};
export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }: Props) {
const [isRecording, setIsRecording] = useState(false);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const socketRef = useRef<WebSocket | null>(null);
// Store the combined transcript for the current utterance
const transcriptRef = useRef<string>('');
const stopRecording = () => {
if (mediaRecorderRef.current) {
mediaRecorderRef.current.stop();
mediaRecorderRef.current = null;
}
if (socketRef.current) {
socketRef.current.close();
socketRef.current = null;
}
setIsRecording(false);
// Finalize the transcript
if (transcriptRef.current) {
onTranscriptFinalized(transcriptRef.current);
}
transcriptRef.current = '';
};
const startRecording = async () => {
transcriptRef.current = ''; // Reset transcript
try {
// 1. Get the temporary Deepgram key
const response = await fetch('/api/voice-token', { method: 'POST' });
const data = await response.json();
if (data.error) {
throw new Error(data.error);
}
const { key } = data;
// 2. Access the microphone
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// 3. Open direct WebSocket to Deepgram
const socket = new WebSocket(
'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true',
['token', key]
);
socketRef.current = socket;
socket.onopen = () => {
// 4. Create MediaRecorder
const mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm',
});
mediaRecorderRef.current = mediaRecorder;
// 5. Send audio chunks on data available
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
socket.send(event.data);
}
};
// Start recording and chunking audio every 250ms
mediaRecorder.start(250);
setIsRecording(true);
};
// 6. Receive transcripts
socket.onmessage = (event) => {
const data = JSON.parse(event.data) as DeepgramTranscript;
const transcript = data.channel.alternatives[0]?.transcript || '';
if (transcript) {
transcriptRef.current = transcript;
onTranscriptUpdate(transcript);
}
// If it's a "speech final" event, this utterance is done.
if (data.speech_final) {
stopRecording();
}
};
socket.onclose = () => {
// Clean up stream
stream.getTracks().forEach((track) => track.stop());
if (isRecording) {
stopRecording(); // Ensure cleanup
}
};
socket.onerror = (err) => {
console.error('WebSocket error:', err);
stopRecording();
};
} catch (error) {
console.error('Error starting recording:', error);
setIsRecording(false);
}
};
const handleToggleRecord = () => {
if (isRecording) {
stopRecording();
} else {
startRecording();
}
};
return (
<Tooltip label={isRecording ? 'Stop Recording' : 'Start Recording'}>
<ActionIcon
onClick={handleToggleRecord}
size="lg"
radius="xl"
color={isRecording ? 'red' : 'gray'}
variant="filled"
>
{isRecording ? <IconMicrophoneOff /> : <IconMicrophone />}
</ActionIcon>
</Tooltip>
);
}