feat: Step 7 & 9 - AI Chat + Voice client integration
Implement AI-powered chat interface with voice input capabilities. Step 7 (Chat Interface): - Create ChatInterface component with Vercel AI SDK useChat hook - Create /api/chat route using Google Gemini (gemini-1.5-flash) - Implement thoughtful interviewer system prompt - Add real-time message streaming - Auto-scroll to latest messages Step 9 (Voice Client): - Create MicrophoneRecorder component - Integrate real-time voice transcription via Deepgram - Direct WebSocket connection using temporary tokens - Real-time transcript display in chat input - Auto-submit on speech_final event - Add @tabler/icons-react for microphone icons Architecture: - Client requests temporary Deepgram token from /api/voice-token - MediaRecorder captures audio in 250ms chunks - WebSocket sends audio directly to Deepgram - Transcripts update chat input in real-time - Final transcript auto-submits to AI chat Security: - Deepgram API key never exposed to client - Temporary tokens expire in 60 seconds - Chat requires authentication via SurrealDB JWT Testing: - Add magnitude test for voice recording flow - Tests cover happy path with mocked WebSocket Known Issue: - Page compilation needs debugging (useChat import path verified) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
154
components/MicrophoneRecorder.tsx
Normal file
154
components/MicrophoneRecorder.tsx
Normal file
@@ -0,0 +1,154 @@
|
||||
'use client';
|
||||
|
||||
import { ActionIcon, Tooltip } from '@mantine/core';
|
||||
import { IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
|
||||
import { useState, useRef } from 'react';
|
||||
|
||||
// Define the shape of the Deepgram transcript
|
||||
interface DeepgramTranscript {
|
||||
channel: {
|
||||
alternatives: Array<{
|
||||
transcript: string;
|
||||
}>;
|
||||
};
|
||||
is_final: boolean;
|
||||
speech_final: boolean;
|
||||
}
|
||||
|
||||
type Props = {
|
||||
/**
|
||||
* Callback function to update the chat input with the new transcript.
|
||||
* @param transcript - The full, combined transcript
|
||||
*/
|
||||
onTranscriptUpdate: (transcript: string) => void;
|
||||
/**
|
||||
* Callback function to signal the final transcript for this "thought".
|
||||
* @param transcript - The final, punctuated transcript
|
||||
*/
|
||||
onTranscriptFinalized: (transcript: string) => void;
|
||||
};
|
||||
|
||||
export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }: Props) {
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const socketRef = useRef<WebSocket | null>(null);
|
||||
|
||||
// Store the combined transcript for the current utterance
|
||||
const transcriptRef = useRef<string>('');
|
||||
|
||||
const stopRecording = () => {
|
||||
if (mediaRecorderRef.current) {
|
||||
mediaRecorderRef.current.stop();
|
||||
mediaRecorderRef.current = null;
|
||||
}
|
||||
if (socketRef.current) {
|
||||
socketRef.current.close();
|
||||
socketRef.current = null;
|
||||
}
|
||||
setIsRecording(false);
|
||||
|
||||
// Finalize the transcript
|
||||
if (transcriptRef.current) {
|
||||
onTranscriptFinalized(transcriptRef.current);
|
||||
}
|
||||
transcriptRef.current = '';
|
||||
};
|
||||
|
||||
const startRecording = async () => {
|
||||
transcriptRef.current = ''; // Reset transcript
|
||||
try {
|
||||
// 1. Get the temporary Deepgram key
|
||||
const response = await fetch('/api/voice-token', { method: 'POST' });
|
||||
const data = await response.json();
|
||||
|
||||
if (data.error) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
|
||||
const { key } = data;
|
||||
|
||||
// 2. Access the microphone
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
|
||||
// 3. Open direct WebSocket to Deepgram
|
||||
const socket = new WebSocket(
|
||||
'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true',
|
||||
['token', key]
|
||||
);
|
||||
socketRef.current = socket;
|
||||
|
||||
socket.onopen = () => {
|
||||
// 4. Create MediaRecorder
|
||||
const mediaRecorder = new MediaRecorder(stream, {
|
||||
mimeType: 'audio/webm',
|
||||
});
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
// 5. Send audio chunks on data available
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
|
||||
socket.send(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
// Start recording and chunking audio every 250ms
|
||||
mediaRecorder.start(250);
|
||||
setIsRecording(true);
|
||||
};
|
||||
|
||||
// 6. Receive transcripts
|
||||
socket.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data) as DeepgramTranscript;
|
||||
const transcript = data.channel.alternatives[0]?.transcript || '';
|
||||
|
||||
if (transcript) {
|
||||
transcriptRef.current = transcript;
|
||||
onTranscriptUpdate(transcript);
|
||||
}
|
||||
|
||||
// If it's a "speech final" event, this utterance is done.
|
||||
if (data.speech_final) {
|
||||
stopRecording();
|
||||
}
|
||||
};
|
||||
|
||||
socket.onclose = () => {
|
||||
// Clean up stream
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
if (isRecording) {
|
||||
stopRecording(); // Ensure cleanup
|
||||
}
|
||||
};
|
||||
|
||||
socket.onerror = (err) => {
|
||||
console.error('WebSocket error:', err);
|
||||
stopRecording();
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('Error starting recording:', error);
|
||||
setIsRecording(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleToggleRecord = () => {
|
||||
if (isRecording) {
|
||||
stopRecording();
|
||||
} else {
|
||||
startRecording();
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Tooltip label={isRecording ? 'Stop Recording' : 'Start Recording'}>
|
||||
<ActionIcon
|
||||
onClick={handleToggleRecord}
|
||||
size="lg"
|
||||
radius="xl"
|
||||
color={isRecording ? 'red' : 'gray'}
|
||||
variant="filled"
|
||||
>
|
||||
{isRecording ? <IconMicrophoneOff /> : <IconMicrophone />}
|
||||
</ActionIcon>
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user