chore: Remove backup and old page files

Deleted unused backup files:
- app/chat/page.tsx.backup
- app/chat/page.tsx.old

Keeps codebase clean and reduces confusion. Current page.tsx is the
canonical implementation.

Resolves plan: 07-delete-backup-files.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-09 21:15:47 +00:00
parent 4967ce3cd1
commit baddf4f09d
2 changed files with 0 additions and 1478 deletions

View File

@@ -1,664 +0,0 @@
'use client';
import { useChat } from '@ai-sdk/react';
import {
Stack,
TextInput,
Button,
Paper,
ScrollArea,
Title,
Container,
Group,
Text,
Loader,
ActionIcon,
Tooltip,
} from '@mantine/core';
import { useRef, useState, useEffect, useCallback } from 'react';
import { IconVolume, IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
import { UserMenu } from '@/components/UserMenu';
// Define the shape of the Deepgram transcript
interface DeepgramTranscript {
channel: {
alternatives: Array<{
transcript: string;
}>;
};
is_final: boolean;
speech_final: boolean;
}
type VoiceState = 'idle' | 'listening' | 'user-speaking' | 'processing' | 'ai-speaking';
export default function ChatPage() {
const viewport = useRef<HTMLDivElement>(null);
const [input, setInput] = useState('');
const [voiceState, setVoiceState] = useState<VoiceState>('idle');
const [countdown, setCountdown] = useState<number>(3);
const [isGeneratingSpeech, setIsGeneratingSpeech] = useState(false);
const lastSpokenMessageId = useRef<string | null>(null);
const audioRef = useRef<HTMLAudioElement | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const socketRef = useRef<WebSocket | null>(null);
const transcriptRef = useRef<string>('');
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
const silenceStartTimeRef = useRef<number | null>(null);
const countdownIntervalRef = useRef<NodeJS.Timeout | null>(null);
const hasStartedSpeakingRef = useRef(false);
const { messages, sendMessage, setMessages, status } = useChat({
api: '/api/chat',
body: {
persona: 'Socratic',
},
credentials: 'include',
});
// Handle AI response in voice conversation mode
useEffect(() => {
if (voiceState !== 'processing') return;
console.log('[Voice Mode] Effect running - voiceState: processing, status:', status, 'messages:', messages.length);
// Wait until the AI response is complete (status returns to 'ready')
if (status !== 'ready') {
console.log('[Voice Mode] Waiting for status to be ready, current:', status);
return;
}
// Find the latest assistant message
console.log('[Voice Mode] All messages:', messages.map(m => ({ role: m.role, id: m.id, preview: m.parts[0]?.text?.substring(0, 30) })));
const lastAssistantMessage = [...messages]
.reverse()
.find((m) => m.role === 'assistant');
if (!lastAssistantMessage) {
console.log('[Voice Mode] No assistant message found');
return;
}
console.log('[Voice Mode] Selected message ID:', lastAssistantMessage.id);
console.log('[Voice Mode] Selected message text preview:', lastAssistantMessage.parts.find(p => p.type === 'text')?.text?.substring(0, 50));
console.log('[Voice Mode] Last spoken message ID:', lastSpokenMessageId.current);
// Skip if we've already spoken this message
if (lastSpokenMessageId.current === lastAssistantMessage.id) {
console.log('[Voice Mode] Already spoke this message, skipping');
return;
}
// Extract text from the message
const textPart = lastAssistantMessage.parts.find((p) => p.type === 'text');
if (!textPart || !textPart.text) {
console.log('[Voice Mode] No text part found in message');
return;
}
// Play the audio and transition to ai-speaking state
console.log('[Voice Mode] Transitioning to ai-speaking, will play audio');
setVoiceState('ai-speaking');
playAudio(textPart.text, lastAssistantMessage.id);
}, [messages, voiceState, status]);
const playAudio = async (text: string, messageId: string) => {
try {
console.log('[Voice Mode] Generating speech for message:', messageId);
setIsGeneratingSpeech(true);
const response = await fetch('/api/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text }),
});
if (!response.ok) {
throw new Error('Failed to generate speech');
}
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
// Create or reuse audio element
if (!audioRef.current) {
audioRef.current = new Audio();
}
audioRef.current.src = audioUrl;
audioRef.current.onended = () => {
URL.revokeObjectURL(audioUrl);
console.log('[Voice Mode] ✓ Finished playing audio, starting new listening session');
lastSpokenMessageId.current = messageId;
setIsGeneratingSpeech(false);
// After AI finishes speaking, go back to listening for user
startListening();
};
audioRef.current.onerror = () => {
URL.revokeObjectURL(audioUrl);
console.error('[Voice Mode] Error playing audio');
setIsGeneratingSpeech(false);
// On error, also go back to listening
startListening();
};
await audioRef.current.play();
console.log('[Voice Mode] ✓ Playing audio');
setIsGeneratingSpeech(false); // Audio is now playing
} catch (error) {
console.error('[Voice Mode] Error:', error);
setIsGeneratingSpeech(false);
// On error, go back to listening
startListening();
}
};
const submitUserInput = useCallback(() => {
// Clear any pending silence timeout and countdown
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
if (countdownIntervalRef.current) {
clearInterval(countdownIntervalRef.current);
countdownIntervalRef.current = null;
}
silenceStartTimeRef.current = null;
setCountdown(3);
// Stop recording
if (mediaRecorderRef.current) {
mediaRecorderRef.current.stop();
mediaRecorderRef.current = null;
}
if (socketRef.current) {
socketRef.current.close();
socketRef.current = null;
}
// Reset speaking flag
hasStartedSpeakingRef.current = false;
// Send the transcript as a message if we have one
if (transcriptRef.current.trim()) {
console.log('[Voice Mode] Submitting transcript:', transcriptRef.current);
setInput(transcriptRef.current);
setVoiceState('processing');
setTimeout(() => {
const form = document.querySelector('form');
if (form) {
console.log('[Voice Mode] Form found, submitting...');
form.requestSubmit();
} else {
console.error('[Voice Mode] Form not found!');
}
}, 100);
} else {
// If no transcript, go back to listening
console.log('[Voice Mode] No transcript to submit, going back to listening');
startListening();
}
transcriptRef.current = '';
}, []);
const startListening = useCallback(async () => {
transcriptRef.current = '';
setInput('');
hasStartedSpeakingRef.current = false;
// DON'T reset lastSpokenMessageId here - we need it to track what we've already spoken
silenceStartTimeRef.current = null;
setCountdown(3);
setVoiceState('listening');
try {
// 1. Get the Deepgram API key
const response = await fetch('/api/voice-token', { method: 'POST' });
const data = await response.json();
if (data.error) {
throw new Error(data.error);
}
const { key } = data;
// 2. Access the microphone
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// 3. Open direct WebSocket to Deepgram with voice activity detection
const socket = new WebSocket(
'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true&vad_events=true',
['token', key]
);
socketRef.current = socket;
socket.onopen = () => {
console.log('[Voice Mode] ✓ WebSocket connected, listening for speech...');
// 4. Create MediaRecorder
const mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm',
});
mediaRecorderRef.current = mediaRecorder;
// 5. Send audio chunks on data available
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
socket.send(event.data);
}
};
// Start recording and chunking audio every 250ms
mediaRecorder.start(250);
};
// 6. Receive transcripts and handle silence detection
socket.onmessage = (event) => {
const data = JSON.parse(event.data) as DeepgramTranscript;
// Check if this message has alternatives (some Deepgram messages don't)
if (!data.channel?.alternatives) {
return; // Skip non-transcript messages (metadata, VAD events, etc.)
}
const transcript = data.channel.alternatives[0]?.transcript || '';
if (transcript) {
// User has started speaking
if (!hasStartedSpeakingRef.current) {
console.log('[Voice Mode] User started speaking');
hasStartedSpeakingRef.current = true;
setVoiceState('user-speaking');
}
// Clear any existing silence timeout and countdown
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
if (countdownIntervalRef.current) {
clearInterval(countdownIntervalRef.current);
countdownIntervalRef.current = null;
}
silenceStartTimeRef.current = null;
setCountdown(3);
// Handle transcript updates
if (data.is_final) {
// This is a finalized phrase - append it to our transcript
transcriptRef.current = transcriptRef.current
? transcriptRef.current + ' ' + transcript
: transcript;
setInput(transcriptRef.current);
console.log('[Voice Mode] Finalized phrase:', transcript);
// Start a generous 3-second silence timer after each finalized phrase
silenceStartTimeRef.current = Date.now();
// Update countdown every 100ms
countdownIntervalRef.current = setInterval(() => {
if (silenceStartTimeRef.current) {
const elapsed = Date.now() - silenceStartTimeRef.current;
const remaining = Math.max(0, 3 - elapsed / 1000);
setCountdown(remaining);
}
}, 100);
silenceTimeoutRef.current = setTimeout(() => {
console.log('[Voice Mode] 3 seconds of silence detected, submitting...');
submitUserInput();
}, 3000);
} else {
// This is an interim result - show it temporarily
const displayText = transcriptRef.current
? transcriptRef.current + ' ' + transcript
: transcript;
setInput(displayText);
}
}
};
socket.onclose = () => {
// Clean up stream
stream.getTracks().forEach((track) => track.stop());
console.log('[Voice Mode] WebSocket closed');
};
socket.onerror = (err) => {
console.error('[Voice Mode] WebSocket error:', err);
setVoiceState('idle');
};
} catch (error) {
console.error('[Voice Mode] Error starting listening:', error);
setVoiceState('idle');
}
}, [submitUserInput]);
const skipAudioAndListen = useCallback(() => {
console.log('[Voice Mode] Skipping audio playback');
// Stop current audio
if (audioRef.current) {
audioRef.current.pause();
audioRef.current.currentTime = 0;
}
setIsGeneratingSpeech(false);
// Go straight to listening
startListening();
}, [startListening]);
const exitVoiceMode = useCallback(() => {
// Clear any timeouts and intervals
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
if (countdownIntervalRef.current) {
clearInterval(countdownIntervalRef.current);
countdownIntervalRef.current = null;
}
silenceStartTimeRef.current = null;
// Stop recording
if (mediaRecorderRef.current) {
mediaRecorderRef.current.stop();
mediaRecorderRef.current = null;
}
if (socketRef.current) {
socketRef.current.close();
socketRef.current = null;
}
// Stop audio playback
if (audioRef.current) {
audioRef.current.pause();
audioRef.current = null;
}
hasStartedSpeakingRef.current = false;
lastSpokenMessageId.current = null;
transcriptRef.current = '';
setInput('');
setCountdown(3);
setIsGeneratingSpeech(false);
setVoiceState('idle');
console.log('[Voice Mode] Exited voice conversation mode');
}, []);
const handleToggleVoiceMode = useCallback(() => {
if (voiceState === 'idle') {
// Start voice conversation mode
// First, check if there's a recent AI message to read out
const lastAssistantMessage = [...messages]
.reverse()
.find((m) => m.role === 'assistant');
if (lastAssistantMessage) {
// Extract text from the message
const textPart = lastAssistantMessage.parts.find((p) => p.type === 'text');
if (textPart && textPart.text) {
// Play the most recent AI message first, then start listening
console.log('[Voice Mode] Starting voice mode, reading most recent AI message first');
setVoiceState('ai-speaking');
playAudio(textPart.text, lastAssistantMessage.id);
return;
}
}
// No AI message to read, just start listening
startListening();
} else {
// Exit voice conversation mode
exitVoiceMode();
}
}, [voiceState, startListening, exitVoiceMode, messages]);
// Add initial greeting message on first load
useEffect(() => {
if (messages.length === 0) {
setMessages([
{
id: 'initial-greeting',
role: 'assistant',
parts: [
{
type: 'text',
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
},
],
},
]);
}
}, []);
// Auto-scroll to bottom
useEffect(() => {
viewport.current?.scrollTo({
top: viewport.current.scrollHeight,
behavior: 'smooth',
});
}, [messages]);
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
if (!input.trim() || status === 'submitted' || status === 'streaming') return;
sendMessage({ text: input });
setInput('');
};
const handleNewConversation = () => {
// Clear all messages and reset to initial greeting
setMessages([
{
id: 'initial-greeting',
role: 'assistant',
parts: [
{
type: 'text',
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
},
],
},
]);
};
return (
<Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}>
<Group justify="space-between" py="md">
<Title order={2}>
Ponderants Interview
</Title>
<Group gap="md">
<Tooltip label="Start a new conversation">
<Button
variant="subtle"
onClick={handleNewConversation}
disabled={status === 'submitted' || status === 'streaming'}
>
New Conversation
</Button>
</Tooltip>
<UserMenu />
</Group>
</Group>
<ScrollArea
h="100%"
style={{ flex: 1 }}
viewportRef={viewport}
>
<Stack gap="md" pb="xl">
{messages.map((m) => (
<Paper
key={m.id}
withBorder
shadow="md"
p="sm"
radius="lg"
style={{
alignSelf: m.role === 'user' ? 'flex-end' : 'flex-start',
backgroundColor:
m.role === 'user' ? '#343a40' : '#212529',
}}
w="80%"
>
<Text fw={700} size="sm">{m.role === 'user' ? 'You' : 'AI'}</Text>
{m.parts.map((part, i) => {
if (part.type === 'text') {
return (
<Text key={i} style={{ whiteSpace: 'pre-wrap' }}>
{part.text}
</Text>
);
}
// Handle tool calls (e.g., suggest_node)
if (part.type === 'tool-call') {
return (
<Paper key={i} withBorder p="xs" mt="xs" bg="dark.6">
<Text size="xs" c="dimmed" mb="xs">
💡 Node Suggestion
</Text>
<Text fw={600}>{part.args.title}</Text>
<Text size="sm" mt="xs">
{part.args.content}
</Text>
{part.args.tags && part.args.tags.length > 0 && (
<Group gap="xs" mt="xs">
{part.args.tags.map((tag: string, tagIdx: number) => (
<Text key={tagIdx} size="xs" c="blue.4">
#{tag}
</Text>
))}
</Group>
)}
</Paper>
);
}
return null;
})}
</Paper>
))}
{/* Typing indicator while AI is generating a response */}
{(status === 'submitted' || status === 'streaming') && (
<Paper
withBorder
shadow="md"
p="sm"
radius="lg"
style={{
alignSelf: 'flex-start',
backgroundColor: '#212529',
}}
w="80%"
>
<Text fw={700} size="sm">AI</Text>
<Group gap="xs" mt="xs">
<Loader size="xs" />
<Text size="sm" c="dimmed">Thinking...</Text>
</Group>
</Paper>
)}
</Stack>
</ScrollArea>
{/* Big Voice Mode Button - shown above text input */}
<Paper withBorder p="md" radius="xl" my="md">
<Stack gap="sm">
<Group gap="sm">
<Button
onClick={handleToggleVoiceMode}
size="xl"
radius="xl"
h={80}
style={{ flex: 1 }}
color={
voiceState === 'ai-speaking'
? 'blue'
: voiceState === 'user-speaking'
? 'green'
: voiceState === 'listening'
? 'yellow'
: voiceState === 'processing'
? 'blue'
: 'gray'
}
variant={voiceState !== 'idle' ? 'filled' : 'light'}
leftSection={
voiceState === 'ai-speaking' ? (
<IconVolume size={32} />
) : voiceState === 'user-speaking' || voiceState === 'listening' ? (
<IconMicrophone size={32} />
) : (
<IconMicrophone size={32} />
)
}
disabled={status === 'submitted' || status === 'streaming'}
>
{voiceState === 'idle'
? 'Start Voice Conversation'
: voiceState === 'listening'
? 'Listening... Start speaking'
: voiceState === 'user-speaking'
? silenceStartTimeRef.current
? `Speaking... (auto-submits in ${countdown.toFixed(1)}s)`
: 'Speaking... (will auto-submit after 3s silence)'
: voiceState === 'processing'
? 'Processing...'
: isGeneratingSpeech
? 'Generating speech...'
: 'AI is speaking... Please wait'}
</Button>
{/* Skip button - only shown when AI is speaking */}
{voiceState === 'ai-speaking' && (
<Button
onClick={skipAudioAndListen}
size="xl"
radius="xl"
h={80}
color="gray"
variant="outline"
>
Skip
</Button>
)}
</Group>
{/* Text Input - always available */}
<form onSubmit={handleSubmit}>
<Group>
<TextInput
value={input}
onChange={(e) => setInput(e.currentTarget.value)}
placeholder="Or type your thoughts here..."
style={{ flex: 1 }}
variant="filled"
disabled={voiceState !== 'idle'}
/>
<Button
type="submit"
radius="xl"
loading={status === 'submitted' || status === 'streaming'}
disabled={!input.trim() || voiceState !== 'idle'}
>
Send
</Button>
</Group>
</form>
</Stack>
</Paper>
</Container>
);
}

View File

@@ -1,814 +0,0 @@
'use client';
import { useChat } from '@ai-sdk/react';
import {
Stack,
TextInput,
Button,
Paper,
ScrollArea,
Title,
Container,
Group,
Text,
Loader,
ActionIcon,
Tooltip,
} from '@mantine/core';
import { useRef, useState, useEffect, useCallback } from 'react';
import { IconVolume, IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
import { createActor } from 'xstate';
import { useSelector } from '@xstate/react';
import { appMachine } from '@/lib/app-machine';
import { UserMenu } from '@/components/UserMenu';
// Define the shape of the Deepgram transcript
interface DeepgramTranscript {
channel: {
alternatives: Array<{
transcript: string;
}>;
};
is_final: boolean;
speech_final: boolean;
}
/**
* Get the voice button text based on the current state tags.
* This replaces complex nested ternaries with a clean, readable function.
*/
function getVoiceButtonText(
state: ReturnType<typeof useSelector<typeof actorRef, any>>,
silenceStartTime: number | null
): string {
// Check tags in priority order and return appropriate text
let buttonText: string;
if (state.hasTag('textMode') || state.hasTag('voiceIdle')) {
buttonText = 'Start Voice Conversation';
} else if (state.hasTag('listening')) {
buttonText = 'Listening... Start speaking';
} else if (state.hasTag('userSpeaking')) {
buttonText = 'Speaking... (will auto-submit after 3s silence)';
} else if (state.hasTag('timingOut')) {
if (silenceStartTime) {
const elapsed = Date.now() - silenceStartTime;
const remaining = Math.max(0, 3 - elapsed / 1000);
buttonText = `Speaking... (auto-submits in ${remaining.toFixed(1)}s)`;
} else {
buttonText = 'Speaking... (timing out...)';
}
} else if (state.hasTag('processing')) {
buttonText = 'Processing...';
} else if (state.hasTag('aiGenerating')) {
buttonText = 'Generating speech...';
} else if (state.hasTag('aiSpeaking')) {
buttonText = 'AI is speaking... Please wait';
} else {
// Fallback (should never reach here if tags are properly defined)
buttonText = 'Start Voice Conversation';
console.warn('[Voice Mode] No matching tag found, using fallback text. Active tags:', state.tags);
}
console.log('[Voice Mode] Button text determined:', buttonText, 'Active tags:', Array.from(state.tags));
return buttonText;
}
export default function ChatPage() {
const viewport = useRef<HTMLDivElement>(null);
// XState machine for voice mode state management
const [actorRef] = useState(() => createActor(appMachine).start());
const state = useSelector(actorRef, (snapshot) => snapshot);
const send = actorRef.send.bind(actorRef);
// Imperative refs for managing side effects
const audioRef = useRef<HTMLAudioElement | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const socketRef = useRef<WebSocket | null>(null);
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
const silenceStartTimeRef = useRef<number | null>(null);
const countdownIntervalRef = useRef<NodeJS.Timeout | null>(null);
const shouldCancelAudioRef = useRef<boolean>(false); // Flag to cancel pending audio operations
const { messages, sendMessage, setMessages, status } = useChat();
// Extract text from message (handles v5 parts structure)
const getMessageText = (msg: any): string => {
if ('parts' in msg && Array.isArray(msg.parts)) {
const textPart = msg.parts.find((p: any) => p.type === 'text');
return textPart?.text || '';
}
return msg.content || '';
};
// Handle AI response in voice conversation mode - SIMPLE VERSION
useEffect(() => {
if (!state.hasTag('processing')) return;
if (status !== 'ready') {
console.log('[Voice Mode] Waiting, status:', status);
return;
}
const transcript = state.context.transcript?.trim();
if (!transcript) return;
console.log('[Voice Mode] === PROCESSING ===');
console.log('[Voice Mode] Transcript:', transcript);
console.log('[Voice Mode] Messages:', messages.length);
// Get last 2 messages
const lastMsg = messages[messages.length - 1];
const secondLastMsg = messages[messages.length - 2];
console.log('[Voice Mode] Last msg:', lastMsg?.role, getMessageText(lastMsg || {}).substring(0, 30));
console.log('[Voice Mode] 2nd last msg:', secondLastMsg?.role, getMessageText(secondLastMsg || {}).substring(0, 30));
// Case 1: User message not submitted yet
// Check if the last message is the user's transcript
const userMessageExists = messages.some(m =>
m.role === 'user' && getMessageText(m) === transcript
);
if (!userMessageExists) {
console.log('[Voice Mode] → Submitting user message');
submitUserInput();
return;
}
// Case 2: User message submitted, check if AI has responded
// After user submits, if AI responds, the new AI message is LAST
if (lastMsg && lastMsg.role === 'assistant' &&
secondLastMsg && secondLastMsg.role === 'user' &&
getMessageText(secondLastMsg) === transcript) {
const aiMsg = lastMsg;
console.log('[Voice Mode] → AI response found:', aiMsg.id);
console.log('[Voice Mode] → Last spoken:', state.context.lastSpokenMessageId);
// Only play if we haven't played this message yet
if (state.context.lastSpokenMessageId !== aiMsg.id) {
const text = getMessageText(aiMsg);
console.log('[Voice Mode] → Playing:', text.substring(0, 50) + '...');
send({ type: 'AI_RESPONSE_READY', messageId: aiMsg.id, text });
playAudio(text, aiMsg.id);
} else {
console.log('[Voice Mode] → Already played, skipping');
}
return;
}
// Case 3: Waiting for AI response
console.log('[Voice Mode] → Waiting for AI response...');
}, [messages, state, status, send]);
// Stop all audio playback and cancel pending operations
const stopAllAudio = useCallback(() => {
console.log('[Voice Mode] Stopping all audio operations');
// Set cancel flag to prevent any pending audio from playing
shouldCancelAudioRef.current = true;
// Stop and clean up audio element
if (audioRef.current) {
audioRef.current.pause();
audioRef.current.currentTime = 0;
audioRef.current.src = '';
}
}, []);
const playAudio = async (text: string, messageId: string) => {
try {
// Reset cancel flag at the start of a new audio operation
shouldCancelAudioRef.current = false;
console.log('[Voice Mode] Generating speech for message:', messageId);
console.log('[Voice Mode] State transition:', state.value);
const response = await fetch('/api/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text }),
});
// Check if we should cancel before continuing
if (shouldCancelAudioRef.current) {
console.log('[Voice Mode] Audio generation canceled before blob creation');
return;
}
if (!response.ok) {
throw new Error('Failed to generate speech');
}
const audioBlob = await response.blob();
// Check again after async operation
if (shouldCancelAudioRef.current) {
console.log('[Voice Mode] Audio generation canceled after blob creation');
return;
}
const audioUrl = URL.createObjectURL(audioBlob);
// Create or reuse audio element
if (!audioRef.current) {
audioRef.current = new Audio();
}
audioRef.current.src = audioUrl;
audioRef.current.onended = () => {
URL.revokeObjectURL(audioUrl);
console.log('[Voice Mode] ✓ Finished playing audio, sending TTS_FINISHED event');
console.log('[Voice Mode] State transition:', state.value);
send({ type: 'TTS_FINISHED', messageId });
// After AI finishes speaking, go back to listening for user
startListening();
};
audioRef.current.onerror = () => {
URL.revokeObjectURL(audioUrl);
console.error('[Voice Mode] Error playing audio');
// On error, also go back to listening
startListening();
};
// Final check before playing
if (shouldCancelAudioRef.current) {
console.log('[Voice Mode] Audio playback canceled before play()');
URL.revokeObjectURL(audioUrl);
return;
}
await audioRef.current.play();
// Only send TTS_PLAYING if we haven't been canceled
if (!shouldCancelAudioRef.current) {
console.log('[Voice Mode] ✓ Playing audio, sending TTS_PLAYING event');
console.log('[Voice Mode] State transition:', state.value);
send({ type: 'TTS_PLAYING' });
} else {
console.log('[Voice Mode] Audio playback canceled after play()');
URL.revokeObjectURL(audioUrl);
}
} catch (error) {
console.error('[Voice Mode] Error:', error);
// On error, go back to listening
startListening();
}
};
const submitUserInput = useCallback(() => {
// Clear any pending silence timeout and countdown
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
if (countdownIntervalRef.current) {
clearInterval(countdownIntervalRef.current);
countdownIntervalRef.current = null;
}
silenceStartTimeRef.current = null;
// Stop recording
if (mediaRecorderRef.current) {
mediaRecorderRef.current.stop();
mediaRecorderRef.current = null;
}
if (socketRef.current) {
socketRef.current.close();
socketRef.current = null;
}
// Send the transcript as a message if we have one
const transcript = state.context.transcript;
if (transcript.trim()) {
console.log('[Voice Mode] Submitting transcript:', transcript);
console.log('[Voice Mode] State transition:', state.value);
setTimeout(() => {
const form = document.querySelector('form');
if (form) {
console.log('[Voice Mode] Form found, submitting...');
form.requestSubmit();
} else {
console.error('[Voice Mode] Form not found!');
}
}, 100);
} else {
// If no transcript, go back to listening
console.log('[Voice Mode] No transcript to submit, going back to listening');
startListening();
}
}, [state, send]);
const startListening = useCallback(async () => {
silenceStartTimeRef.current = null;
// Send event to enter listening state (which clears transcript/input/countdown)
console.log('[Voice Mode] Sending START_LISTENING event (implicitly via state transition)');
console.log('[Voice Mode] State transition:', state.value);
try {
// 1. Get the Deepgram API key
const response = await fetch('/api/voice-token', { method: 'POST' });
const data = await response.json();
if (data.error) {
throw new Error(data.error);
}
const { key } = data;
// 2. Access the microphone
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// 3. Open direct WebSocket to Deepgram with voice activity detection
const socket = new WebSocket(
'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true&vad_events=true',
['token', key]
);
socketRef.current = socket;
socket.onopen = () => {
console.log('[Voice Mode] ✓ WebSocket connected, listening for speech...');
console.log('[Voice Mode] State transition:', state.value);
// 4. Create MediaRecorder
const mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm',
});
mediaRecorderRef.current = mediaRecorder;
// 5. Send audio chunks on data available
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
socket.send(event.data);
}
};
// Start recording and chunking audio every 250ms
mediaRecorder.start(250);
};
// 6. Receive transcripts and handle silence detection
socket.onmessage = (event) => {
const data = JSON.parse(event.data) as DeepgramTranscript;
// Check if this message has alternatives (some Deepgram messages don't)
if (!data.channel?.alternatives) {
return; // Skip non-transcript messages (metadata, VAD events, etc.)
}
const transcript = data.channel.alternatives[0]?.transcript || '';
if (transcript) {
// User has started speaking
if (!state.context.hasStartedSpeaking) {
console.log('[Voice Mode] User started speaking, sending USER_STARTED_SPEAKING event');
console.log('[Voice Mode] State transition:', state.value);
send({ type: 'USER_STARTED_SPEAKING' });
}
// Clear any existing silence timeout and countdown
if (silenceTimeoutRef.current) {
clearTimeout(silenceTimeoutRef.current);
silenceTimeoutRef.current = null;
}
if (countdownIntervalRef.current) {
clearInterval(countdownIntervalRef.current);
countdownIntervalRef.current = null;
}
silenceStartTimeRef.current = null;
// Handle transcript updates
if (data.is_final) {
// This is a finalized phrase - send to machine
console.log('[Voice Mode] === FINALIZED PHRASE ===');
console.log('[Voice Mode] Transcript:', transcript);
console.log('[Voice Mode] state.value BEFORE:', JSON.stringify(state.value));
console.log('[Voice Mode] tags BEFORE:', Array.from(state.tags));
console.log('[Voice Mode] context BEFORE:', JSON.stringify(state.context));
console.log('[Voice Mode] Sending FINALIZED_PHRASE event');
send({ type: 'FINALIZED_PHRASE', phrase: transcript });
// Start a generous 3-second silence timer after each finalized phrase
silenceStartTimeRef.current = Date.now();
// Update countdown every 100ms
countdownIntervalRef.current = setInterval(() => {
if (silenceStartTimeRef.current) {
const elapsed = Date.now() - silenceStartTimeRef.current;
const remaining = Math.max(0, 3 - elapsed / 1000);
// Note: countdown is now managed in machine context, but we need
// to update it frequently for UI display. This is acceptable as
// a UI-only side effect.
}
}, 100);
silenceTimeoutRef.current = setTimeout(() => {
console.log('[Voice Mode] 3 seconds of silence detected, sending SILENCE_TIMEOUT event');
console.log('[Voice Mode] State transition:', state.value);
send({ type: 'SILENCE_TIMEOUT' });
// Note: submitUserInput will be called by the processing state effect
}, 3000);
} else {
// This is an interim result - update display (send TRANSCRIPT_UPDATE)
const currentTranscript = state.context.transcript;
const displayText = currentTranscript
? currentTranscript + ' ' + transcript
: transcript;
send({ type: 'TRANSCRIPT_UPDATE', transcript: displayText });
}
}
};
socket.onclose = () => {
// Clean up stream
stream.getTracks().forEach((track) => track.stop());
console.log('[Voice Mode] WebSocket closed');
console.log('[Voice Mode] State transition:', state.value);
};
socket.onerror = (err) => {
console.error('[Voice Mode] WebSocket error:', err);
console.log('[Voice Mode] State transition:', state.value);
// On error, toggle back to text mode if we're in voice mode
if (!state.hasTag('textMode')) {
send({ type: 'TOGGLE_VOICE_MODE' });
}
};
} catch (error) {
console.error('[Voice Mode] Error starting listening:', error);
console.log('[Voice Mode] State transition:', state.value);
// On error, toggle back to text mode if we're in voice mode
if (!state.hasTag('textMode')) {
send({ type: 'TOGGLE_VOICE_MODE' });
}
}
}, [submitUserInput, state, send]);
const skipAudioAndListen = useCallback(() => {
console.log('[Voice Mode] === SKIP BUTTON CLICKED ===');
console.log('[Voice Mode] Current state.value:', JSON.stringify(state.value));
console.log('[Voice Mode] Current tags:', Array.from(state.tags));
// Stop ALL audio operations
stopAllAudio();
// Send skip event
send({ type: 'SKIP_AUDIO' });
// Go straight to listening
startListening();
}, [startListening, state, send, stopAllAudio]);
const handleToggleVoiceMode = useCallback(() => {
console.log('[Voice Mode] Voice button pressed, sending TOGGLE_VOICE_MODE event');
console.log('[Voice Mode] Current state:', state.value);
send({ type: 'TOGGLE_VOICE_MODE' });
}, [state, send]);
// Handle entering voice.idle state (after TOGGLE_VOICE_MODE from text mode)
useEffect(() => {
if (!state.hasTag('voiceIdle')) return;
console.log('[Voice Mode] Entered voice.idle, checking for AI message to read');
// Get ALL assistant messages in order
const assistantMessages = messages.filter((m) => m.role === 'assistant');
console.log('[Voice Mode] (idle) Found', assistantMessages.length, 'assistant messages');
if (assistantMessages.length === 0) {
console.log('[Voice Mode] (idle) No assistant messages, starting listening');
send({ type: 'START_LISTENING' });
startListening();
return;
}
// Get the LAST (most recent) assistant message
const latestAssistantMessage = assistantMessages[assistantMessages.length - 1];
console.log('[Voice Mode] (idle) Latest message ID:', latestAssistantMessage.id);
console.log('[Voice Mode] (idle) Last spoken message ID:', state.context.lastSpokenMessageId);
// Skip if we've already spoken this message
if (state.context.lastSpokenMessageId === latestAssistantMessage.id) {
console.log('[Voice Mode] (idle) Already spoke latest message, starting listening');
send({ type: 'START_LISTENING' });
startListening();
return;
}
// Extract text from the message
let text = '';
if ('parts' in latestAssistantMessage && Array.isArray((latestAssistantMessage as any).parts)) {
const textPart = (latestAssistantMessage as any).parts.find((p: any) => p.type === 'text');
text = textPart?.text || '';
}
if (text) {
// Play the most recent AI message first, then start listening
console.log('[Voice Mode] (idle) Reading latest AI message:', text.substring(0, 50) + '...');
send({ type: 'AI_RESPONSE_READY', messageId: latestAssistantMessage.id, text });
playAudio(text, latestAssistantMessage.id);
return;
}
// No text found, just start listening
console.log('[Voice Mode] (idle) No text in latest message, starting listening');
send({ type: 'START_LISTENING' });
startListening();
}, [state, messages, send]);
// Stop audio when leaving audio-related states
useEffect(() => {
const isInAudioState = state.hasTag('canSkipAudio');
if (!isInAudioState) {
// We're not in an audio state, make sure everything is stopped
stopAllAudio();
}
}, [state, stopAllAudio]);
// Log state transitions for debugging
useEffect(() => {
console.log('[Voice Mode] === STATE TRANSITION ===');
console.log('[Voice Mode] state.value:', JSON.stringify(state.value));
console.log('[Voice Mode] Active tags:', Array.from(state.tags));
console.log('[Voice Mode] Context:', JSON.stringify(state.context));
}, [state.value]);
// Add initial greeting message on first load
useEffect(() => {
if (messages.length === 0) {
setMessages([
{
id: 'initial-greeting',
role: 'assistant',
parts: [
{
type: 'text',
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
},
],
} as any,
]);
}
}, []);
// Auto-scroll to bottom
useEffect(() => {
viewport.current?.scrollTo({
top: viewport.current.scrollHeight,
behavior: 'smooth',
});
}, [messages]);
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
const inputText = state.context.input;
if (!inputText.trim() || status === 'submitted' || status === 'streaming') return;
console.log('[Voice Mode] Submitting message:', inputText);
console.log('[Voice Mode] State transition:', state.value);
sendMessage({ text: inputText });
// Clear input via machine context (will be cleared on next state transition)
};
const handleNewConversation = () => {
// Clear all messages and reset to initial greeting
setMessages([
{
id: 'initial-greeting',
role: 'assistant',
parts: [
{
type: 'text',
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
},
],
} as any,
]);
};
return (
<Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}>
<Group justify="space-between" py="md">
<Title order={2}>
Ponderants Interview
</Title>
<Group gap="md">
<Tooltip label="Start a new conversation">
<Button
variant="subtle"
onClick={handleNewConversation}
disabled={status === 'submitted' || status === 'streaming'}
>
New Conversation
</Button>
</Tooltip>
<UserMenu />
</Group>
</Group>
<ScrollArea
h="100%"
style={{ flex: 1 }}
viewportRef={viewport}
>
<Stack gap="md" pb="xl">
{messages.map((m) => (
<Paper
key={m.id}
withBorder
shadow="md"
p="sm"
radius="lg"
style={{
alignSelf: m.role === 'user' ? 'flex-end' : 'flex-start',
backgroundColor:
m.role === 'user' ? '#343a40' : '#212529',
}}
w="80%"
>
<Text fw={700} size="sm">{m.role === 'user' ? 'You' : 'AI'}</Text>
{/* Extract text from message parts */}
{(() => {
if ('parts' in m && Array.isArray((m as any).parts)) {
return (m as any).parts.map((part: any, i: number) => {
if (part.type === 'text') {
return (
<Text key={i} style={{ whiteSpace: 'pre-wrap' }}>
{part.text}
</Text>
);
}
return null;
});
}
return <Text>Message content unavailable</Text>;
})()}
</Paper>
))}
{/* Typing indicator while AI is generating a response */}
{(status === 'submitted' || status === 'streaming') && (
<Paper
withBorder
shadow="md"
p="sm"
radius="lg"
style={{
alignSelf: 'flex-start',
backgroundColor: '#212529',
}}
w="80%"
>
<Text fw={700} size="sm">AI</Text>
<Group gap="xs" mt="xs">
<Loader size="xs" />
<Text size="sm" c="dimmed">Thinking...</Text>
</Group>
</Paper>
)}
</Stack>
</ScrollArea>
{/* Big Voice Mode Button - shown above text input */}
<Paper withBorder p="md" radius="xl" my="md">
<Stack gap="sm">
<Group gap="sm">
<Button
onClick={handleToggleVoiceMode}
size="xl"
radius="xl"
h={80}
style={{ flex: 1 }}
color={
state.hasTag('canSkipAudio')
? 'blue'
: state.hasTag('userSpeaking') || state.hasTag('timingOut')
? 'green'
: state.hasTag('listening')
? 'yellow'
: state.hasTag('processing')
? 'blue'
: 'gray'
}
variant={!state.hasTag('textMode') && !state.hasTag('voiceIdle') ? 'filled' : 'light'}
leftSection={
state.hasTag('canSkipAudio') ? (
<IconVolume size={32} />
) : state.hasTag('userSpeaking') || state.hasTag('timingOut') || state.hasTag('listening') ? (
<IconMicrophone size={32} />
) : (
<IconMicrophone size={32} />
)
}
disabled={status === 'submitted' || status === 'streaming'}
>
{getVoiceButtonText(state, silenceStartTimeRef.current)}
</Button>
{/* Skip button - shown when audio can be skipped */}
{state.hasTag('canSkipAudio') && (
<Button
onClick={skipAudioAndListen}
size="xl"
radius="xl"
h={80}
color="gray"
variant="outline"
>
Skip
</Button>
)}
</Group>
{/* Test Controls - Development Only */}
{process.env.NODE_ENV === 'development' && (
<Paper withBorder p="sm" radius="md" style={{ backgroundColor: '#1a1b1e' }}>
<Stack gap="xs">
<Text size="xs" fw={700} c="dimmed">DEV: State Machine Testing</Text>
<Text size="xs" c="dimmed">
State: {JSON.stringify(state.value)} | Tags: {Array.from(state.tags).join(', ')}
</Text>
<Group gap="xs">
<Button
size="xs"
onClick={() => send({ type: 'START_LISTENING' })}
disabled={state.hasTag('textMode')}
>
Start Listening
</Button>
<Button
size="xs"
onClick={() => send({ type: 'USER_STARTED_SPEAKING' })}
disabled={!state.hasTag('listening')}
>
Simulate Speech
</Button>
<Button
size="xs"
onClick={() => {
send({ type: 'FINALIZED_PHRASE', phrase: 'Test message' });
}}
disabled={!state.hasTag('userSpeaking') && !state.hasTag('listening')}
>
Add Phrase
</Button>
<Button
size="xs"
onClick={() => send({ type: 'SILENCE_TIMEOUT' })}
disabled={!state.hasTag('timingOut')}
>
Trigger Timeout
</Button>
<Button
size="xs"
onClick={() => {
const testMsg = messages.filter(m => m.role === 'assistant')[0];
if (testMsg) {
const text = (testMsg as any).parts?.[0]?.text || 'Test AI response';
send({ type: 'AI_RESPONSE_READY', messageId: testMsg.id, text });
}
}}
disabled={!state.hasTag('processing')}
>
Simulate AI Response
</Button>
</Group>
</Stack>
</Paper>
)}
{/* Text Input - always available */}
<form onSubmit={handleSubmit}>
<Group>
<TextInput
value={state.context.input}
onChange={(e) => send({ type: 'TRANSCRIPT_UPDATE', transcript: e.currentTarget.value })}
placeholder="Or type your thoughts here..."
style={{ flex: 1 }}
variant="filled"
disabled={!state.hasTag('textMode') && !state.hasTag('voiceIdle')}
/>
<Button
type="submit"
radius="xl"
loading={status === 'submitted' || status === 'streaming'}
disabled={!state.context.input.trim() || (!state.hasTag('textMode') && !state.hasTag('voiceIdle'))}
>
Send
</Button>
</Group>
</form>
</Stack>
</Paper>
</Container>
);
}