'use client'; import { ActionIcon, Tooltip } from '@mantine/core'; import { IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react'; import { useState, useRef } from 'react'; // Define the shape of the Deepgram transcript interface DeepgramTranscript { channel: { alternatives: Array<{ transcript: string; }>; }; is_final: boolean; speech_final: boolean; } type Props = { /** * Callback function to update the chat input with the new transcript. * @param transcript - The full, combined transcript */ onTranscriptUpdate: (transcript: string) => void; /** * Callback function to signal the final transcript for this "thought". * @param transcript - The final, punctuated transcript */ onTranscriptFinalized: (transcript: string) => void; }; export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }: Props) { const [isRecording, setIsRecording] = useState(false); const mediaRecorderRef = useRef(null); const socketRef = useRef(null); // Store the combined transcript for the current utterance const transcriptRef = useRef(''); const stopRecording = () => { if (mediaRecorderRef.current) { mediaRecorderRef.current.stop(); mediaRecorderRef.current = null; } if (socketRef.current) { socketRef.current.close(); socketRef.current = null; } setIsRecording(false); // Finalize the transcript if (transcriptRef.current) { onTranscriptFinalized(transcriptRef.current); } transcriptRef.current = ''; }; const startRecording = async () => { transcriptRef.current = ''; // Reset transcript try { // 1. Get the temporary Deepgram key const response = await fetch('/api/voice-token', { method: 'POST' }); const data = await response.json(); if (data.error) { throw new Error(data.error); } const { key } = data; // 2. Access the microphone const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); // 3. Open direct WebSocket to Deepgram const socket = new WebSocket( 'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true', ['token', key] ); socketRef.current = socket; socket.onopen = () => { console.log('[MicrophoneRecorder] ✓ WebSocket connected to Deepgram'); // 4. Create MediaRecorder const mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm', }); mediaRecorderRef.current = mediaRecorder; // 5. Send audio chunks on data available mediaRecorder.ondataavailable = (event) => { if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) { console.log('[MicrophoneRecorder] Sending audio chunk:', event.data.size, 'bytes'); socket.send(event.data); } }; // Start recording and chunking audio every 250ms mediaRecorder.start(250); setIsRecording(true); console.log('[MicrophoneRecorder] ✓ Recording started'); }; // 6. Receive transcripts socket.onmessage = (event) => { const data = JSON.parse(event.data) as DeepgramTranscript; const transcript = data.channel.alternatives[0]?.transcript || ''; console.log('[MicrophoneRecorder] Received from Deepgram:', { transcript, is_final: data.is_final, speech_final: data.speech_final }); if (transcript) { transcriptRef.current = transcript; onTranscriptUpdate(transcript); console.log('[MicrophoneRecorder] Updated transcript:', transcript); } // If it's a "speech final" event, this utterance is done. if (data.speech_final) { console.log('[MicrophoneRecorder] Speech finalized, stopping recording'); stopRecording(); } }; socket.onclose = () => { // Clean up stream stream.getTracks().forEach((track) => track.stop()); if (isRecording) { stopRecording(); // Ensure cleanup } }; socket.onerror = (err) => { console.error('WebSocket error:', err); stopRecording(); }; } catch (error) { console.error('Error starting recording:', error); setIsRecording(false); } }; const handleToggleRecord = () => { if (isRecording) { stopRecording(); } else { startRecording(); } }; return ( {isRecording ? : } ); }