fix: Implement working voice transcription with Deepgram API key
After testing, discovered that temporary tokens from grantToken() fail with WebSocket connections. Switched to using API key directly, which is the standard approach for client-side Deepgram WebSocket connections. Changes: - Simplified voice-token route to return API key directly - Added comprehensive logging to MicrophoneRecorder for debugging - Documented security considerations and mitigation strategies - Verified working end-to-end voice transcription This matches Deepgram's official Next.js starter pattern and is the recommended approach for client-side real-time transcription. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -78,6 +78,8 @@ export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }
|
||||
socketRef.current = socket;
|
||||
|
||||
socket.onopen = () => {
|
||||
console.log('[MicrophoneRecorder] ✓ WebSocket connected to Deepgram');
|
||||
|
||||
// 4. Create MediaRecorder
|
||||
const mediaRecorder = new MediaRecorder(stream, {
|
||||
mimeType: 'audio/webm',
|
||||
@@ -87,6 +89,7 @@ export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }
|
||||
// 5. Send audio chunks on data available
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
|
||||
console.log('[MicrophoneRecorder] Sending audio chunk:', event.data.size, 'bytes');
|
||||
socket.send(event.data);
|
||||
}
|
||||
};
|
||||
@@ -94,6 +97,7 @@ export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }
|
||||
// Start recording and chunking audio every 250ms
|
||||
mediaRecorder.start(250);
|
||||
setIsRecording(true);
|
||||
console.log('[MicrophoneRecorder] ✓ Recording started');
|
||||
};
|
||||
|
||||
// 6. Receive transcripts
|
||||
@@ -101,13 +105,21 @@ export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }
|
||||
const data = JSON.parse(event.data) as DeepgramTranscript;
|
||||
const transcript = data.channel.alternatives[0]?.transcript || '';
|
||||
|
||||
console.log('[MicrophoneRecorder] Received from Deepgram:', {
|
||||
transcript,
|
||||
is_final: data.is_final,
|
||||
speech_final: data.speech_final
|
||||
});
|
||||
|
||||
if (transcript) {
|
||||
transcriptRef.current = transcript;
|
||||
onTranscriptUpdate(transcript);
|
||||
console.log('[MicrophoneRecorder] Updated transcript:', transcript);
|
||||
}
|
||||
|
||||
// If it's a "speech final" event, this utterance is done.
|
||||
if (data.speech_final) {
|
||||
console.log('[MicrophoneRecorder] Speech finalized, stopping recording');
|
||||
stopRecording();
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user