diff --git a/app/chat/page.tsx.backup b/app/chat/page.tsx.backup deleted file mode 100644 index dd408cd..0000000 --- a/app/chat/page.tsx.backup +++ /dev/null @@ -1,664 +0,0 @@ -'use client'; - -import { useChat } from '@ai-sdk/react'; -import { - Stack, - TextInput, - Button, - Paper, - ScrollArea, - Title, - Container, - Group, - Text, - Loader, - ActionIcon, - Tooltip, -} from '@mantine/core'; -import { useRef, useState, useEffect, useCallback } from 'react'; -import { IconVolume, IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react'; -import { UserMenu } from '@/components/UserMenu'; - -// Define the shape of the Deepgram transcript -interface DeepgramTranscript { - channel: { - alternatives: Array<{ - transcript: string; - }>; - }; - is_final: boolean; - speech_final: boolean; -} - -type VoiceState = 'idle' | 'listening' | 'user-speaking' | 'processing' | 'ai-speaking'; - -export default function ChatPage() { - const viewport = useRef(null); - const [input, setInput] = useState(''); - const [voiceState, setVoiceState] = useState('idle'); - const [countdown, setCountdown] = useState(3); - const [isGeneratingSpeech, setIsGeneratingSpeech] = useState(false); - const lastSpokenMessageId = useRef(null); - const audioRef = useRef(null); - const mediaRecorderRef = useRef(null); - const socketRef = useRef(null); - const transcriptRef = useRef(''); - const silenceTimeoutRef = useRef(null); - const silenceStartTimeRef = useRef(null); - const countdownIntervalRef = useRef(null); - const hasStartedSpeakingRef = useRef(false); - - const { messages, sendMessage, setMessages, status } = useChat({ - api: '/api/chat', - body: { - persona: 'Socratic', - }, - credentials: 'include', - }); - - // Handle AI response in voice conversation mode - useEffect(() => { - if (voiceState !== 'processing') return; - - console.log('[Voice Mode] Effect running - voiceState: processing, status:', status, 'messages:', messages.length); - - // Wait until the AI response is complete (status returns to 'ready') - if (status !== 'ready') { - console.log('[Voice Mode] Waiting for status to be ready, current:', status); - return; - } - - // Find the latest assistant message - console.log('[Voice Mode] All messages:', messages.map(m => ({ role: m.role, id: m.id, preview: m.parts[0]?.text?.substring(0, 30) }))); - - const lastAssistantMessage = [...messages] - .reverse() - .find((m) => m.role === 'assistant'); - - if (!lastAssistantMessage) { - console.log('[Voice Mode] No assistant message found'); - return; - } - - console.log('[Voice Mode] Selected message ID:', lastAssistantMessage.id); - console.log('[Voice Mode] Selected message text preview:', lastAssistantMessage.parts.find(p => p.type === 'text')?.text?.substring(0, 50)); - console.log('[Voice Mode] Last spoken message ID:', lastSpokenMessageId.current); - - // Skip if we've already spoken this message - if (lastSpokenMessageId.current === lastAssistantMessage.id) { - console.log('[Voice Mode] Already spoke this message, skipping'); - return; - } - - // Extract text from the message - const textPart = lastAssistantMessage.parts.find((p) => p.type === 'text'); - if (!textPart || !textPart.text) { - console.log('[Voice Mode] No text part found in message'); - return; - } - - // Play the audio and transition to ai-speaking state - console.log('[Voice Mode] Transitioning to ai-speaking, will play audio'); - setVoiceState('ai-speaking'); - playAudio(textPart.text, lastAssistantMessage.id); - }, [messages, voiceState, status]); - - const playAudio = async (text: string, messageId: string) => { - try { - console.log('[Voice Mode] Generating speech for message:', messageId); - setIsGeneratingSpeech(true); - - const response = await fetch('/api/tts', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ text }), - }); - - if (!response.ok) { - throw new Error('Failed to generate speech'); - } - - const audioBlob = await response.blob(); - const audioUrl = URL.createObjectURL(audioBlob); - - // Create or reuse audio element - if (!audioRef.current) { - audioRef.current = new Audio(); - } - - audioRef.current.src = audioUrl; - audioRef.current.onended = () => { - URL.revokeObjectURL(audioUrl); - console.log('[Voice Mode] ✓ Finished playing audio, starting new listening session'); - lastSpokenMessageId.current = messageId; - setIsGeneratingSpeech(false); - - // After AI finishes speaking, go back to listening for user - startListening(); - }; - - audioRef.current.onerror = () => { - URL.revokeObjectURL(audioUrl); - console.error('[Voice Mode] Error playing audio'); - setIsGeneratingSpeech(false); - // On error, also go back to listening - startListening(); - }; - - await audioRef.current.play(); - console.log('[Voice Mode] ✓ Playing audio'); - setIsGeneratingSpeech(false); // Audio is now playing - } catch (error) { - console.error('[Voice Mode] Error:', error); - setIsGeneratingSpeech(false); - // On error, go back to listening - startListening(); - } - }; - - const submitUserInput = useCallback(() => { - // Clear any pending silence timeout and countdown - if (silenceTimeoutRef.current) { - clearTimeout(silenceTimeoutRef.current); - silenceTimeoutRef.current = null; - } - if (countdownIntervalRef.current) { - clearInterval(countdownIntervalRef.current); - countdownIntervalRef.current = null; - } - silenceStartTimeRef.current = null; - setCountdown(3); - - // Stop recording - if (mediaRecorderRef.current) { - mediaRecorderRef.current.stop(); - mediaRecorderRef.current = null; - } - if (socketRef.current) { - socketRef.current.close(); - socketRef.current = null; - } - - // Reset speaking flag - hasStartedSpeakingRef.current = false; - - // Send the transcript as a message if we have one - if (transcriptRef.current.trim()) { - console.log('[Voice Mode] Submitting transcript:', transcriptRef.current); - setInput(transcriptRef.current); - setVoiceState('processing'); - - setTimeout(() => { - const form = document.querySelector('form'); - if (form) { - console.log('[Voice Mode] Form found, submitting...'); - form.requestSubmit(); - } else { - console.error('[Voice Mode] Form not found!'); - } - }, 100); - } else { - // If no transcript, go back to listening - console.log('[Voice Mode] No transcript to submit, going back to listening'); - startListening(); - } - - transcriptRef.current = ''; - }, []); - - const startListening = useCallback(async () => { - transcriptRef.current = ''; - setInput(''); - hasStartedSpeakingRef.current = false; - // DON'T reset lastSpokenMessageId here - we need it to track what we've already spoken - silenceStartTimeRef.current = null; - setCountdown(3); - setVoiceState('listening'); - - try { - // 1. Get the Deepgram API key - const response = await fetch('/api/voice-token', { method: 'POST' }); - const data = await response.json(); - - if (data.error) { - throw new Error(data.error); - } - - const { key } = data; - - // 2. Access the microphone - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - - // 3. Open direct WebSocket to Deepgram with voice activity detection - const socket = new WebSocket( - 'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true&vad_events=true', - ['token', key] - ); - socketRef.current = socket; - - socket.onopen = () => { - console.log('[Voice Mode] ✓ WebSocket connected, listening for speech...'); - - // 4. Create MediaRecorder - const mediaRecorder = new MediaRecorder(stream, { - mimeType: 'audio/webm', - }); - mediaRecorderRef.current = mediaRecorder; - - // 5. Send audio chunks on data available - mediaRecorder.ondataavailable = (event) => { - if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) { - socket.send(event.data); - } - }; - - // Start recording and chunking audio every 250ms - mediaRecorder.start(250); - }; - - // 6. Receive transcripts and handle silence detection - socket.onmessage = (event) => { - const data = JSON.parse(event.data) as DeepgramTranscript; - - // Check if this message has alternatives (some Deepgram messages don't) - if (!data.channel?.alternatives) { - return; // Skip non-transcript messages (metadata, VAD events, etc.) - } - - const transcript = data.channel.alternatives[0]?.transcript || ''; - - if (transcript) { - // User has started speaking - if (!hasStartedSpeakingRef.current) { - console.log('[Voice Mode] User started speaking'); - hasStartedSpeakingRef.current = true; - setVoiceState('user-speaking'); - } - - // Clear any existing silence timeout and countdown - if (silenceTimeoutRef.current) { - clearTimeout(silenceTimeoutRef.current); - silenceTimeoutRef.current = null; - } - if (countdownIntervalRef.current) { - clearInterval(countdownIntervalRef.current); - countdownIntervalRef.current = null; - } - silenceStartTimeRef.current = null; - setCountdown(3); - - // Handle transcript updates - if (data.is_final) { - // This is a finalized phrase - append it to our transcript - transcriptRef.current = transcriptRef.current - ? transcriptRef.current + ' ' + transcript - : transcript; - setInput(transcriptRef.current); - console.log('[Voice Mode] Finalized phrase:', transcript); - - // Start a generous 3-second silence timer after each finalized phrase - silenceStartTimeRef.current = Date.now(); - - // Update countdown every 100ms - countdownIntervalRef.current = setInterval(() => { - if (silenceStartTimeRef.current) { - const elapsed = Date.now() - silenceStartTimeRef.current; - const remaining = Math.max(0, 3 - elapsed / 1000); - setCountdown(remaining); - } - }, 100); - - silenceTimeoutRef.current = setTimeout(() => { - console.log('[Voice Mode] 3 seconds of silence detected, submitting...'); - submitUserInput(); - }, 3000); - } else { - // This is an interim result - show it temporarily - const displayText = transcriptRef.current - ? transcriptRef.current + ' ' + transcript - : transcript; - setInput(displayText); - } - } - }; - - socket.onclose = () => { - // Clean up stream - stream.getTracks().forEach((track) => track.stop()); - console.log('[Voice Mode] WebSocket closed'); - }; - - socket.onerror = (err) => { - console.error('[Voice Mode] WebSocket error:', err); - setVoiceState('idle'); - }; - } catch (error) { - console.error('[Voice Mode] Error starting listening:', error); - setVoiceState('idle'); - } - }, [submitUserInput]); - - const skipAudioAndListen = useCallback(() => { - console.log('[Voice Mode] Skipping audio playback'); - - // Stop current audio - if (audioRef.current) { - audioRef.current.pause(); - audioRef.current.currentTime = 0; - } - - setIsGeneratingSpeech(false); - - // Go straight to listening - startListening(); - }, [startListening]); - - const exitVoiceMode = useCallback(() => { - // Clear any timeouts and intervals - if (silenceTimeoutRef.current) { - clearTimeout(silenceTimeoutRef.current); - silenceTimeoutRef.current = null; - } - if (countdownIntervalRef.current) { - clearInterval(countdownIntervalRef.current); - countdownIntervalRef.current = null; - } - silenceStartTimeRef.current = null; - - // Stop recording - if (mediaRecorderRef.current) { - mediaRecorderRef.current.stop(); - mediaRecorderRef.current = null; - } - if (socketRef.current) { - socketRef.current.close(); - socketRef.current = null; - } - - // Stop audio playback - if (audioRef.current) { - audioRef.current.pause(); - audioRef.current = null; - } - - hasStartedSpeakingRef.current = false; - lastSpokenMessageId.current = null; - transcriptRef.current = ''; - setInput(''); - setCountdown(3); - setIsGeneratingSpeech(false); - setVoiceState('idle'); - console.log('[Voice Mode] Exited voice conversation mode'); - }, []); - - const handleToggleVoiceMode = useCallback(() => { - if (voiceState === 'idle') { - // Start voice conversation mode - // First, check if there's a recent AI message to read out - const lastAssistantMessage = [...messages] - .reverse() - .find((m) => m.role === 'assistant'); - - if (lastAssistantMessage) { - // Extract text from the message - const textPart = lastAssistantMessage.parts.find((p) => p.type === 'text'); - - if (textPart && textPart.text) { - // Play the most recent AI message first, then start listening - console.log('[Voice Mode] Starting voice mode, reading most recent AI message first'); - setVoiceState('ai-speaking'); - playAudio(textPart.text, lastAssistantMessage.id); - return; - } - } - - // No AI message to read, just start listening - startListening(); - } else { - // Exit voice conversation mode - exitVoiceMode(); - } - }, [voiceState, startListening, exitVoiceMode, messages]); - - // Add initial greeting message on first load - useEffect(() => { - if (messages.length === 0) { - setMessages([ - { - id: 'initial-greeting', - role: 'assistant', - parts: [ - { - type: 'text', - text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.', - }, - ], - }, - ]); - } - }, []); - - // Auto-scroll to bottom - useEffect(() => { - viewport.current?.scrollTo({ - top: viewport.current.scrollHeight, - behavior: 'smooth', - }); - }, [messages]); - - const handleSubmit = (e: React.FormEvent) => { - e.preventDefault(); - if (!input.trim() || status === 'submitted' || status === 'streaming') return; - - sendMessage({ text: input }); - setInput(''); - }; - - const handleNewConversation = () => { - // Clear all messages and reset to initial greeting - setMessages([ - { - id: 'initial-greeting', - role: 'assistant', - parts: [ - { - type: 'text', - text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.', - }, - ], - }, - ]); - }; - - return ( - - - - Ponderants Interview - - - - - - - - - - - - {messages.map((m) => ( - - {m.role === 'user' ? 'You' : 'AI'} - {m.parts.map((part, i) => { - if (part.type === 'text') { - return ( - - {part.text} - - ); - } - - // Handle tool calls (e.g., suggest_node) - if (part.type === 'tool-call') { - return ( - - - 💡 Node Suggestion - - {part.args.title} - - {part.args.content} - - {part.args.tags && part.args.tags.length > 0 && ( - - {part.args.tags.map((tag: string, tagIdx: number) => ( - - #{tag} - - ))} - - )} - - ); - } - - return null; - })} - - ))} - - {/* Typing indicator while AI is generating a response */} - {(status === 'submitted' || status === 'streaming') && ( - - AI - - - Thinking... - - - )} - - - - - {/* Big Voice Mode Button - shown above text input */} - - - - - - {/* Skip button - only shown when AI is speaking */} - {voiceState === 'ai-speaking' && ( - - )} - - - {/* Text Input - always available */} -
- - setInput(e.currentTarget.value)} - placeholder="Or type your thoughts here..." - style={{ flex: 1 }} - variant="filled" - disabled={voiceState !== 'idle'} - /> - - -
-
-
-
- ); -} diff --git a/app/chat/page.tsx.old b/app/chat/page.tsx.old deleted file mode 100644 index ddd17e9..0000000 --- a/app/chat/page.tsx.old +++ /dev/null @@ -1,814 +0,0 @@ -'use client'; - -import { useChat } from '@ai-sdk/react'; -import { - Stack, - TextInput, - Button, - Paper, - ScrollArea, - Title, - Container, - Group, - Text, - Loader, - ActionIcon, - Tooltip, -} from '@mantine/core'; -import { useRef, useState, useEffect, useCallback } from 'react'; -import { IconVolume, IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react'; -import { createActor } from 'xstate'; -import { useSelector } from '@xstate/react'; -import { appMachine } from '@/lib/app-machine'; -import { UserMenu } from '@/components/UserMenu'; - -// Define the shape of the Deepgram transcript -interface DeepgramTranscript { - channel: { - alternatives: Array<{ - transcript: string; - }>; - }; - is_final: boolean; - speech_final: boolean; -} - -/** - * Get the voice button text based on the current state tags. - * This replaces complex nested ternaries with a clean, readable function. - */ -function getVoiceButtonText( - state: ReturnType>, - silenceStartTime: number | null -): string { - // Check tags in priority order and return appropriate text - let buttonText: string; - - if (state.hasTag('textMode') || state.hasTag('voiceIdle')) { - buttonText = 'Start Voice Conversation'; - } else if (state.hasTag('listening')) { - buttonText = 'Listening... Start speaking'; - } else if (state.hasTag('userSpeaking')) { - buttonText = 'Speaking... (will auto-submit after 3s silence)'; - } else if (state.hasTag('timingOut')) { - if (silenceStartTime) { - const elapsed = Date.now() - silenceStartTime; - const remaining = Math.max(0, 3 - elapsed / 1000); - buttonText = `Speaking... (auto-submits in ${remaining.toFixed(1)}s)`; - } else { - buttonText = 'Speaking... (timing out...)'; - } - } else if (state.hasTag('processing')) { - buttonText = 'Processing...'; - } else if (state.hasTag('aiGenerating')) { - buttonText = 'Generating speech...'; - } else if (state.hasTag('aiSpeaking')) { - buttonText = 'AI is speaking... Please wait'; - } else { - // Fallback (should never reach here if tags are properly defined) - buttonText = 'Start Voice Conversation'; - console.warn('[Voice Mode] No matching tag found, using fallback text. Active tags:', state.tags); - } - - console.log('[Voice Mode] Button text determined:', buttonText, 'Active tags:', Array.from(state.tags)); - return buttonText; -} - -export default function ChatPage() { - const viewport = useRef(null); - - // XState machine for voice mode state management - const [actorRef] = useState(() => createActor(appMachine).start()); - const state = useSelector(actorRef, (snapshot) => snapshot); - const send = actorRef.send.bind(actorRef); - - // Imperative refs for managing side effects - const audioRef = useRef(null); - const mediaRecorderRef = useRef(null); - const socketRef = useRef(null); - const silenceTimeoutRef = useRef(null); - const silenceStartTimeRef = useRef(null); - const countdownIntervalRef = useRef(null); - const shouldCancelAudioRef = useRef(false); // Flag to cancel pending audio operations - - const { messages, sendMessage, setMessages, status } = useChat(); - - // Extract text from message (handles v5 parts structure) - const getMessageText = (msg: any): string => { - if ('parts' in msg && Array.isArray(msg.parts)) { - const textPart = msg.parts.find((p: any) => p.type === 'text'); - return textPart?.text || ''; - } - return msg.content || ''; - }; - - // Handle AI response in voice conversation mode - SIMPLE VERSION - useEffect(() => { - if (!state.hasTag('processing')) return; - if (status !== 'ready') { - console.log('[Voice Mode] Waiting, status:', status); - return; - } - - const transcript = state.context.transcript?.trim(); - if (!transcript) return; - - console.log('[Voice Mode] === PROCESSING ==='); - console.log('[Voice Mode] Transcript:', transcript); - console.log('[Voice Mode] Messages:', messages.length); - - // Get last 2 messages - const lastMsg = messages[messages.length - 1]; - const secondLastMsg = messages[messages.length - 2]; - - console.log('[Voice Mode] Last msg:', lastMsg?.role, getMessageText(lastMsg || {}).substring(0, 30)); - console.log('[Voice Mode] 2nd last msg:', secondLastMsg?.role, getMessageText(secondLastMsg || {}).substring(0, 30)); - - // Case 1: User message not submitted yet - // Check if the last message is the user's transcript - const userMessageExists = messages.some(m => - m.role === 'user' && getMessageText(m) === transcript - ); - - if (!userMessageExists) { - console.log('[Voice Mode] → Submitting user message'); - submitUserInput(); - return; - } - - // Case 2: User message submitted, check if AI has responded - // After user submits, if AI responds, the new AI message is LAST - if (lastMsg && lastMsg.role === 'assistant' && - secondLastMsg && secondLastMsg.role === 'user' && - getMessageText(secondLastMsg) === transcript) { - - const aiMsg = lastMsg; - console.log('[Voice Mode] → AI response found:', aiMsg.id); - console.log('[Voice Mode] → Last spoken:', state.context.lastSpokenMessageId); - - // Only play if we haven't played this message yet - if (state.context.lastSpokenMessageId !== aiMsg.id) { - const text = getMessageText(aiMsg); - console.log('[Voice Mode] → Playing:', text.substring(0, 50) + '...'); - send({ type: 'AI_RESPONSE_READY', messageId: aiMsg.id, text }); - playAudio(text, aiMsg.id); - } else { - console.log('[Voice Mode] → Already played, skipping'); - } - return; - } - - // Case 3: Waiting for AI response - console.log('[Voice Mode] → Waiting for AI response...'); - }, [messages, state, status, send]); - - - // Stop all audio playback and cancel pending operations - const stopAllAudio = useCallback(() => { - console.log('[Voice Mode] Stopping all audio operations'); - - // Set cancel flag to prevent any pending audio from playing - shouldCancelAudioRef.current = true; - - // Stop and clean up audio element - if (audioRef.current) { - audioRef.current.pause(); - audioRef.current.currentTime = 0; - audioRef.current.src = ''; - } - }, []); - - const playAudio = async (text: string, messageId: string) => { - try { - // Reset cancel flag at the start of a new audio operation - shouldCancelAudioRef.current = false; - - console.log('[Voice Mode] Generating speech for message:', messageId); - console.log('[Voice Mode] State transition:', state.value); - - const response = await fetch('/api/tts', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ text }), - }); - - // Check if we should cancel before continuing - if (shouldCancelAudioRef.current) { - console.log('[Voice Mode] Audio generation canceled before blob creation'); - return; - } - - if (!response.ok) { - throw new Error('Failed to generate speech'); - } - - const audioBlob = await response.blob(); - - // Check again after async operation - if (shouldCancelAudioRef.current) { - console.log('[Voice Mode] Audio generation canceled after blob creation'); - return; - } - - const audioUrl = URL.createObjectURL(audioBlob); - - // Create or reuse audio element - if (!audioRef.current) { - audioRef.current = new Audio(); - } - - audioRef.current.src = audioUrl; - audioRef.current.onended = () => { - URL.revokeObjectURL(audioUrl); - console.log('[Voice Mode] ✓ Finished playing audio, sending TTS_FINISHED event'); - console.log('[Voice Mode] State transition:', state.value); - send({ type: 'TTS_FINISHED', messageId }); - - // After AI finishes speaking, go back to listening for user - startListening(); - }; - - audioRef.current.onerror = () => { - URL.revokeObjectURL(audioUrl); - console.error('[Voice Mode] Error playing audio'); - // On error, also go back to listening - startListening(); - }; - - // Final check before playing - if (shouldCancelAudioRef.current) { - console.log('[Voice Mode] Audio playback canceled before play()'); - URL.revokeObjectURL(audioUrl); - return; - } - - await audioRef.current.play(); - - // Only send TTS_PLAYING if we haven't been canceled - if (!shouldCancelAudioRef.current) { - console.log('[Voice Mode] ✓ Playing audio, sending TTS_PLAYING event'); - console.log('[Voice Mode] State transition:', state.value); - send({ type: 'TTS_PLAYING' }); - } else { - console.log('[Voice Mode] Audio playback canceled after play()'); - URL.revokeObjectURL(audioUrl); - } - } catch (error) { - console.error('[Voice Mode] Error:', error); - // On error, go back to listening - startListening(); - } - }; - - const submitUserInput = useCallback(() => { - // Clear any pending silence timeout and countdown - if (silenceTimeoutRef.current) { - clearTimeout(silenceTimeoutRef.current); - silenceTimeoutRef.current = null; - } - if (countdownIntervalRef.current) { - clearInterval(countdownIntervalRef.current); - countdownIntervalRef.current = null; - } - silenceStartTimeRef.current = null; - - // Stop recording - if (mediaRecorderRef.current) { - mediaRecorderRef.current.stop(); - mediaRecorderRef.current = null; - } - if (socketRef.current) { - socketRef.current.close(); - socketRef.current = null; - } - - // Send the transcript as a message if we have one - const transcript = state.context.transcript; - if (transcript.trim()) { - console.log('[Voice Mode] Submitting transcript:', transcript); - console.log('[Voice Mode] State transition:', state.value); - - setTimeout(() => { - const form = document.querySelector('form'); - if (form) { - console.log('[Voice Mode] Form found, submitting...'); - form.requestSubmit(); - } else { - console.error('[Voice Mode] Form not found!'); - } - }, 100); - } else { - // If no transcript, go back to listening - console.log('[Voice Mode] No transcript to submit, going back to listening'); - startListening(); - } - }, [state, send]); - - const startListening = useCallback(async () => { - silenceStartTimeRef.current = null; - - // Send event to enter listening state (which clears transcript/input/countdown) - console.log('[Voice Mode] Sending START_LISTENING event (implicitly via state transition)'); - console.log('[Voice Mode] State transition:', state.value); - - try { - // 1. Get the Deepgram API key - const response = await fetch('/api/voice-token', { method: 'POST' }); - const data = await response.json(); - - if (data.error) { - throw new Error(data.error); - } - - const { key } = data; - - // 2. Access the microphone - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - - // 3. Open direct WebSocket to Deepgram with voice activity detection - const socket = new WebSocket( - 'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true&vad_events=true', - ['token', key] - ); - socketRef.current = socket; - - socket.onopen = () => { - console.log('[Voice Mode] ✓ WebSocket connected, listening for speech...'); - console.log('[Voice Mode] State transition:', state.value); - - // 4. Create MediaRecorder - const mediaRecorder = new MediaRecorder(stream, { - mimeType: 'audio/webm', - }); - mediaRecorderRef.current = mediaRecorder; - - // 5. Send audio chunks on data available - mediaRecorder.ondataavailable = (event) => { - if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) { - socket.send(event.data); - } - }; - - // Start recording and chunking audio every 250ms - mediaRecorder.start(250); - }; - - // 6. Receive transcripts and handle silence detection - socket.onmessage = (event) => { - const data = JSON.parse(event.data) as DeepgramTranscript; - - // Check if this message has alternatives (some Deepgram messages don't) - if (!data.channel?.alternatives) { - return; // Skip non-transcript messages (metadata, VAD events, etc.) - } - - const transcript = data.channel.alternatives[0]?.transcript || ''; - - if (transcript) { - // User has started speaking - if (!state.context.hasStartedSpeaking) { - console.log('[Voice Mode] User started speaking, sending USER_STARTED_SPEAKING event'); - console.log('[Voice Mode] State transition:', state.value); - send({ type: 'USER_STARTED_SPEAKING' }); - } - - // Clear any existing silence timeout and countdown - if (silenceTimeoutRef.current) { - clearTimeout(silenceTimeoutRef.current); - silenceTimeoutRef.current = null; - } - if (countdownIntervalRef.current) { - clearInterval(countdownIntervalRef.current); - countdownIntervalRef.current = null; - } - silenceStartTimeRef.current = null; - - // Handle transcript updates - if (data.is_final) { - // This is a finalized phrase - send to machine - console.log('[Voice Mode] === FINALIZED PHRASE ==='); - console.log('[Voice Mode] Transcript:', transcript); - console.log('[Voice Mode] state.value BEFORE:', JSON.stringify(state.value)); - console.log('[Voice Mode] tags BEFORE:', Array.from(state.tags)); - console.log('[Voice Mode] context BEFORE:', JSON.stringify(state.context)); - console.log('[Voice Mode] Sending FINALIZED_PHRASE event'); - send({ type: 'FINALIZED_PHRASE', phrase: transcript }); - - // Start a generous 3-second silence timer after each finalized phrase - silenceStartTimeRef.current = Date.now(); - - // Update countdown every 100ms - countdownIntervalRef.current = setInterval(() => { - if (silenceStartTimeRef.current) { - const elapsed = Date.now() - silenceStartTimeRef.current; - const remaining = Math.max(0, 3 - elapsed / 1000); - // Note: countdown is now managed in machine context, but we need - // to update it frequently for UI display. This is acceptable as - // a UI-only side effect. - } - }, 100); - - silenceTimeoutRef.current = setTimeout(() => { - console.log('[Voice Mode] 3 seconds of silence detected, sending SILENCE_TIMEOUT event'); - console.log('[Voice Mode] State transition:', state.value); - send({ type: 'SILENCE_TIMEOUT' }); - // Note: submitUserInput will be called by the processing state effect - }, 3000); - } else { - // This is an interim result - update display (send TRANSCRIPT_UPDATE) - const currentTranscript = state.context.transcript; - const displayText = currentTranscript - ? currentTranscript + ' ' + transcript - : transcript; - send({ type: 'TRANSCRIPT_UPDATE', transcript: displayText }); - } - } - }; - - socket.onclose = () => { - // Clean up stream - stream.getTracks().forEach((track) => track.stop()); - console.log('[Voice Mode] WebSocket closed'); - console.log('[Voice Mode] State transition:', state.value); - }; - - socket.onerror = (err) => { - console.error('[Voice Mode] WebSocket error:', err); - console.log('[Voice Mode] State transition:', state.value); - // On error, toggle back to text mode if we're in voice mode - if (!state.hasTag('textMode')) { - send({ type: 'TOGGLE_VOICE_MODE' }); - } - }; - } catch (error) { - console.error('[Voice Mode] Error starting listening:', error); - console.log('[Voice Mode] State transition:', state.value); - // On error, toggle back to text mode if we're in voice mode - if (!state.hasTag('textMode')) { - send({ type: 'TOGGLE_VOICE_MODE' }); - } - } - }, [submitUserInput, state, send]); - - const skipAudioAndListen = useCallback(() => { - console.log('[Voice Mode] === SKIP BUTTON CLICKED ==='); - console.log('[Voice Mode] Current state.value:', JSON.stringify(state.value)); - console.log('[Voice Mode] Current tags:', Array.from(state.tags)); - - // Stop ALL audio operations - stopAllAudio(); - - // Send skip event - send({ type: 'SKIP_AUDIO' }); - - // Go straight to listening - startListening(); - }, [startListening, state, send, stopAllAudio]); - - const handleToggleVoiceMode = useCallback(() => { - console.log('[Voice Mode] Voice button pressed, sending TOGGLE_VOICE_MODE event'); - console.log('[Voice Mode] Current state:', state.value); - send({ type: 'TOGGLE_VOICE_MODE' }); - }, [state, send]); - - // Handle entering voice.idle state (after TOGGLE_VOICE_MODE from text mode) - useEffect(() => { - if (!state.hasTag('voiceIdle')) return; - - console.log('[Voice Mode] Entered voice.idle, checking for AI message to read'); - - // Get ALL assistant messages in order - const assistantMessages = messages.filter((m) => m.role === 'assistant'); - console.log('[Voice Mode] (idle) Found', assistantMessages.length, 'assistant messages'); - - if (assistantMessages.length === 0) { - console.log('[Voice Mode] (idle) No assistant messages, starting listening'); - send({ type: 'START_LISTENING' }); - startListening(); - return; - } - - // Get the LAST (most recent) assistant message - const latestAssistantMessage = assistantMessages[assistantMessages.length - 1]; - console.log('[Voice Mode] (idle) Latest message ID:', latestAssistantMessage.id); - console.log('[Voice Mode] (idle) Last spoken message ID:', state.context.lastSpokenMessageId); - - // Skip if we've already spoken this message - if (state.context.lastSpokenMessageId === latestAssistantMessage.id) { - console.log('[Voice Mode] (idle) Already spoke latest message, starting listening'); - send({ type: 'START_LISTENING' }); - startListening(); - return; - } - - // Extract text from the message - let text = ''; - if ('parts' in latestAssistantMessage && Array.isArray((latestAssistantMessage as any).parts)) { - const textPart = (latestAssistantMessage as any).parts.find((p: any) => p.type === 'text'); - text = textPart?.text || ''; - } - - if (text) { - // Play the most recent AI message first, then start listening - console.log('[Voice Mode] (idle) Reading latest AI message:', text.substring(0, 50) + '...'); - send({ type: 'AI_RESPONSE_READY', messageId: latestAssistantMessage.id, text }); - playAudio(text, latestAssistantMessage.id); - return; - } - - // No text found, just start listening - console.log('[Voice Mode] (idle) No text in latest message, starting listening'); - send({ type: 'START_LISTENING' }); - startListening(); - }, [state, messages, send]); - - // Stop audio when leaving audio-related states - useEffect(() => { - const isInAudioState = state.hasTag('canSkipAudio'); - - if (!isInAudioState) { - // We're not in an audio state, make sure everything is stopped - stopAllAudio(); - } - }, [state, stopAllAudio]); - - // Log state transitions for debugging - useEffect(() => { - console.log('[Voice Mode] === STATE TRANSITION ==='); - console.log('[Voice Mode] state.value:', JSON.stringify(state.value)); - console.log('[Voice Mode] Active tags:', Array.from(state.tags)); - console.log('[Voice Mode] Context:', JSON.stringify(state.context)); - }, [state.value]); - - // Add initial greeting message on first load - useEffect(() => { - if (messages.length === 0) { - setMessages([ - { - id: 'initial-greeting', - role: 'assistant', - parts: [ - { - type: 'text', - text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.', - }, - ], - } as any, - ]); - } - }, []); - - // Auto-scroll to bottom - useEffect(() => { - viewport.current?.scrollTo({ - top: viewport.current.scrollHeight, - behavior: 'smooth', - }); - }, [messages]); - - const handleSubmit = (e: React.FormEvent) => { - e.preventDefault(); - const inputText = state.context.input; - if (!inputText.trim() || status === 'submitted' || status === 'streaming') return; - - console.log('[Voice Mode] Submitting message:', inputText); - console.log('[Voice Mode] State transition:', state.value); - - sendMessage({ text: inputText }); - // Clear input via machine context (will be cleared on next state transition) - }; - - const handleNewConversation = () => { - // Clear all messages and reset to initial greeting - setMessages([ - { - id: 'initial-greeting', - role: 'assistant', - parts: [ - { - type: 'text', - text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.', - }, - ], - } as any, - ]); - }; - - return ( - - - - Ponderants Interview - - - - - - - - - - - - {messages.map((m) => ( - - {m.role === 'user' ? 'You' : 'AI'} - {/* Extract text from message parts */} - {(() => { - if ('parts' in m && Array.isArray((m as any).parts)) { - return (m as any).parts.map((part: any, i: number) => { - if (part.type === 'text') { - return ( - - {part.text} - - ); - } - return null; - }); - } - return Message content unavailable; - })()} - - ))} - - {/* Typing indicator while AI is generating a response */} - {(status === 'submitted' || status === 'streaming') && ( - - AI - - - Thinking... - - - )} - - - - - {/* Big Voice Mode Button - shown above text input */} - - - - - - {/* Skip button - shown when audio can be skipped */} - {state.hasTag('canSkipAudio') && ( - - )} - - - {/* Test Controls - Development Only */} - {process.env.NODE_ENV === 'development' && ( - - - DEV: State Machine Testing - - State: {JSON.stringify(state.value)} | Tags: {Array.from(state.tags).join(', ')} - - - - - - - - - - - )} - - {/* Text Input - always available */} -
- - send({ type: 'TRANSCRIPT_UPDATE', transcript: e.currentTarget.value })} - placeholder="Or type your thoughts here..." - style={{ flex: 1 }} - variant="filled" - disabled={!state.hasTag('textMode') && !state.hasTag('voiceIdle')} - /> - - -
-
-
-
- ); -}