fix: Implement working voice transcription with Deepgram API key

After testing, discovered that temporary tokens from grantToken() fail
with WebSocket connections. Switched to using API key directly, which
is the standard approach for client-side Deepgram WebSocket connections.

Changes:
- Simplified voice-token route to return API key directly
- Added comprehensive logging to MicrophoneRecorder for debugging
- Documented security considerations and mitigation strategies
- Verified working end-to-end voice transcription

This matches Deepgram's official Next.js starter pattern and is the
recommended approach for client-side real-time transcription.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-09 06:13:45 +00:00
parent 5df6067dd2
commit 0b632a31eb
2 changed files with 26 additions and 33 deletions

View File

@@ -78,6 +78,8 @@ export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }
socketRef.current = socket;
socket.onopen = () => {
console.log('[MicrophoneRecorder] ✓ WebSocket connected to Deepgram');
// 4. Create MediaRecorder
const mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm',
@@ -87,6 +89,7 @@ export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }
// 5. Send audio chunks on data available
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
console.log('[MicrophoneRecorder] Sending audio chunk:', event.data.size, 'bytes');
socket.send(event.data);
}
};
@@ -94,6 +97,7 @@ export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }
// Start recording and chunking audio every 250ms
mediaRecorder.start(250);
setIsRecording(true);
console.log('[MicrophoneRecorder] ✓ Recording started');
};
// 6. Receive transcripts
@@ -101,13 +105,21 @@ export function MicrophoneRecorder({ onTranscriptUpdate, onTranscriptFinalized }
const data = JSON.parse(event.data) as DeepgramTranscript;
const transcript = data.channel.alternatives[0]?.transcript || '';
console.log('[MicrophoneRecorder] Received from Deepgram:', {
transcript,
is_final: data.is_final,
speech_final: data.speech_final
});
if (transcript) {
transcriptRef.current = transcript;
onTranscriptUpdate(transcript);
console.log('[MicrophoneRecorder] Updated transcript:', transcript);
}
// If it's a "speech final" event, this utterance is done.
if (data.speech_final) {
console.log('[MicrophoneRecorder] Speech finalized, stopping recording');
stopRecording();
}
};