chore: Remove backup and old page files
Deleted unused backup files: - app/chat/page.tsx.backup - app/chat/page.tsx.old Keeps codebase clean and reduces confusion. Current page.tsx is the canonical implementation. Resolves plan: 07-delete-backup-files.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,664 +0,0 @@
|
||||
'use client';
|
||||
|
||||
import { useChat } from '@ai-sdk/react';
|
||||
import {
|
||||
Stack,
|
||||
TextInput,
|
||||
Button,
|
||||
Paper,
|
||||
ScrollArea,
|
||||
Title,
|
||||
Container,
|
||||
Group,
|
||||
Text,
|
||||
Loader,
|
||||
ActionIcon,
|
||||
Tooltip,
|
||||
} from '@mantine/core';
|
||||
import { useRef, useState, useEffect, useCallback } from 'react';
|
||||
import { IconVolume, IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
|
||||
import { UserMenu } from '@/components/UserMenu';
|
||||
|
||||
// Define the shape of the Deepgram transcript
|
||||
interface DeepgramTranscript {
|
||||
channel: {
|
||||
alternatives: Array<{
|
||||
transcript: string;
|
||||
}>;
|
||||
};
|
||||
is_final: boolean;
|
||||
speech_final: boolean;
|
||||
}
|
||||
|
||||
type VoiceState = 'idle' | 'listening' | 'user-speaking' | 'processing' | 'ai-speaking';
|
||||
|
||||
export default function ChatPage() {
|
||||
const viewport = useRef<HTMLDivElement>(null);
|
||||
const [input, setInput] = useState('');
|
||||
const [voiceState, setVoiceState] = useState<VoiceState>('idle');
|
||||
const [countdown, setCountdown] = useState<number>(3);
|
||||
const [isGeneratingSpeech, setIsGeneratingSpeech] = useState(false);
|
||||
const lastSpokenMessageId = useRef<string | null>(null);
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const socketRef = useRef<WebSocket | null>(null);
|
||||
const transcriptRef = useRef<string>('');
|
||||
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const silenceStartTimeRef = useRef<number | null>(null);
|
||||
const countdownIntervalRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const hasStartedSpeakingRef = useRef(false);
|
||||
|
||||
const { messages, sendMessage, setMessages, status } = useChat({
|
||||
api: '/api/chat',
|
||||
body: {
|
||||
persona: 'Socratic',
|
||||
},
|
||||
credentials: 'include',
|
||||
});
|
||||
|
||||
// Handle AI response in voice conversation mode
|
||||
useEffect(() => {
|
||||
if (voiceState !== 'processing') return;
|
||||
|
||||
console.log('[Voice Mode] Effect running - voiceState: processing, status:', status, 'messages:', messages.length);
|
||||
|
||||
// Wait until the AI response is complete (status returns to 'ready')
|
||||
if (status !== 'ready') {
|
||||
console.log('[Voice Mode] Waiting for status to be ready, current:', status);
|
||||
return;
|
||||
}
|
||||
|
||||
// Find the latest assistant message
|
||||
console.log('[Voice Mode] All messages:', messages.map(m => ({ role: m.role, id: m.id, preview: m.parts[0]?.text?.substring(0, 30) })));
|
||||
|
||||
const lastAssistantMessage = [...messages]
|
||||
.reverse()
|
||||
.find((m) => m.role === 'assistant');
|
||||
|
||||
if (!lastAssistantMessage) {
|
||||
console.log('[Voice Mode] No assistant message found');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[Voice Mode] Selected message ID:', lastAssistantMessage.id);
|
||||
console.log('[Voice Mode] Selected message text preview:', lastAssistantMessage.parts.find(p => p.type === 'text')?.text?.substring(0, 50));
|
||||
console.log('[Voice Mode] Last spoken message ID:', lastSpokenMessageId.current);
|
||||
|
||||
// Skip if we've already spoken this message
|
||||
if (lastSpokenMessageId.current === lastAssistantMessage.id) {
|
||||
console.log('[Voice Mode] Already spoke this message, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract text from the message
|
||||
const textPart = lastAssistantMessage.parts.find((p) => p.type === 'text');
|
||||
if (!textPart || !textPart.text) {
|
||||
console.log('[Voice Mode] No text part found in message');
|
||||
return;
|
||||
}
|
||||
|
||||
// Play the audio and transition to ai-speaking state
|
||||
console.log('[Voice Mode] Transitioning to ai-speaking, will play audio');
|
||||
setVoiceState('ai-speaking');
|
||||
playAudio(textPart.text, lastAssistantMessage.id);
|
||||
}, [messages, voiceState, status]);
|
||||
|
||||
const playAudio = async (text: string, messageId: string) => {
|
||||
try {
|
||||
console.log('[Voice Mode] Generating speech for message:', messageId);
|
||||
setIsGeneratingSpeech(true);
|
||||
|
||||
const response = await fetch('/api/tts', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to generate speech');
|
||||
}
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
|
||||
// Create or reuse audio element
|
||||
if (!audioRef.current) {
|
||||
audioRef.current = new Audio();
|
||||
}
|
||||
|
||||
audioRef.current.src = audioUrl;
|
||||
audioRef.current.onended = () => {
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
console.log('[Voice Mode] ✓ Finished playing audio, starting new listening session');
|
||||
lastSpokenMessageId.current = messageId;
|
||||
setIsGeneratingSpeech(false);
|
||||
|
||||
// After AI finishes speaking, go back to listening for user
|
||||
startListening();
|
||||
};
|
||||
|
||||
audioRef.current.onerror = () => {
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
console.error('[Voice Mode] Error playing audio');
|
||||
setIsGeneratingSpeech(false);
|
||||
// On error, also go back to listening
|
||||
startListening();
|
||||
};
|
||||
|
||||
await audioRef.current.play();
|
||||
console.log('[Voice Mode] ✓ Playing audio');
|
||||
setIsGeneratingSpeech(false); // Audio is now playing
|
||||
} catch (error) {
|
||||
console.error('[Voice Mode] Error:', error);
|
||||
setIsGeneratingSpeech(false);
|
||||
// On error, go back to listening
|
||||
startListening();
|
||||
}
|
||||
};
|
||||
|
||||
const submitUserInput = useCallback(() => {
|
||||
// Clear any pending silence timeout and countdown
|
||||
if (silenceTimeoutRef.current) {
|
||||
clearTimeout(silenceTimeoutRef.current);
|
||||
silenceTimeoutRef.current = null;
|
||||
}
|
||||
if (countdownIntervalRef.current) {
|
||||
clearInterval(countdownIntervalRef.current);
|
||||
countdownIntervalRef.current = null;
|
||||
}
|
||||
silenceStartTimeRef.current = null;
|
||||
setCountdown(3);
|
||||
|
||||
// Stop recording
|
||||
if (mediaRecorderRef.current) {
|
||||
mediaRecorderRef.current.stop();
|
||||
mediaRecorderRef.current = null;
|
||||
}
|
||||
if (socketRef.current) {
|
||||
socketRef.current.close();
|
||||
socketRef.current = null;
|
||||
}
|
||||
|
||||
// Reset speaking flag
|
||||
hasStartedSpeakingRef.current = false;
|
||||
|
||||
// Send the transcript as a message if we have one
|
||||
if (transcriptRef.current.trim()) {
|
||||
console.log('[Voice Mode] Submitting transcript:', transcriptRef.current);
|
||||
setInput(transcriptRef.current);
|
||||
setVoiceState('processing');
|
||||
|
||||
setTimeout(() => {
|
||||
const form = document.querySelector('form');
|
||||
if (form) {
|
||||
console.log('[Voice Mode] Form found, submitting...');
|
||||
form.requestSubmit();
|
||||
} else {
|
||||
console.error('[Voice Mode] Form not found!');
|
||||
}
|
||||
}, 100);
|
||||
} else {
|
||||
// If no transcript, go back to listening
|
||||
console.log('[Voice Mode] No transcript to submit, going back to listening');
|
||||
startListening();
|
||||
}
|
||||
|
||||
transcriptRef.current = '';
|
||||
}, []);
|
||||
|
||||
const startListening = useCallback(async () => {
|
||||
transcriptRef.current = '';
|
||||
setInput('');
|
||||
hasStartedSpeakingRef.current = false;
|
||||
// DON'T reset lastSpokenMessageId here - we need it to track what we've already spoken
|
||||
silenceStartTimeRef.current = null;
|
||||
setCountdown(3);
|
||||
setVoiceState('listening');
|
||||
|
||||
try {
|
||||
// 1. Get the Deepgram API key
|
||||
const response = await fetch('/api/voice-token', { method: 'POST' });
|
||||
const data = await response.json();
|
||||
|
||||
if (data.error) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
|
||||
const { key } = data;
|
||||
|
||||
// 2. Access the microphone
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
|
||||
// 3. Open direct WebSocket to Deepgram with voice activity detection
|
||||
const socket = new WebSocket(
|
||||
'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true&vad_events=true',
|
||||
['token', key]
|
||||
);
|
||||
socketRef.current = socket;
|
||||
|
||||
socket.onopen = () => {
|
||||
console.log('[Voice Mode] ✓ WebSocket connected, listening for speech...');
|
||||
|
||||
// 4. Create MediaRecorder
|
||||
const mediaRecorder = new MediaRecorder(stream, {
|
||||
mimeType: 'audio/webm',
|
||||
});
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
// 5. Send audio chunks on data available
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
|
||||
socket.send(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
// Start recording and chunking audio every 250ms
|
||||
mediaRecorder.start(250);
|
||||
};
|
||||
|
||||
// 6. Receive transcripts and handle silence detection
|
||||
socket.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data) as DeepgramTranscript;
|
||||
|
||||
// Check if this message has alternatives (some Deepgram messages don't)
|
||||
if (!data.channel?.alternatives) {
|
||||
return; // Skip non-transcript messages (metadata, VAD events, etc.)
|
||||
}
|
||||
|
||||
const transcript = data.channel.alternatives[0]?.transcript || '';
|
||||
|
||||
if (transcript) {
|
||||
// User has started speaking
|
||||
if (!hasStartedSpeakingRef.current) {
|
||||
console.log('[Voice Mode] User started speaking');
|
||||
hasStartedSpeakingRef.current = true;
|
||||
setVoiceState('user-speaking');
|
||||
}
|
||||
|
||||
// Clear any existing silence timeout and countdown
|
||||
if (silenceTimeoutRef.current) {
|
||||
clearTimeout(silenceTimeoutRef.current);
|
||||
silenceTimeoutRef.current = null;
|
||||
}
|
||||
if (countdownIntervalRef.current) {
|
||||
clearInterval(countdownIntervalRef.current);
|
||||
countdownIntervalRef.current = null;
|
||||
}
|
||||
silenceStartTimeRef.current = null;
|
||||
setCountdown(3);
|
||||
|
||||
// Handle transcript updates
|
||||
if (data.is_final) {
|
||||
// This is a finalized phrase - append it to our transcript
|
||||
transcriptRef.current = transcriptRef.current
|
||||
? transcriptRef.current + ' ' + transcript
|
||||
: transcript;
|
||||
setInput(transcriptRef.current);
|
||||
console.log('[Voice Mode] Finalized phrase:', transcript);
|
||||
|
||||
// Start a generous 3-second silence timer after each finalized phrase
|
||||
silenceStartTimeRef.current = Date.now();
|
||||
|
||||
// Update countdown every 100ms
|
||||
countdownIntervalRef.current = setInterval(() => {
|
||||
if (silenceStartTimeRef.current) {
|
||||
const elapsed = Date.now() - silenceStartTimeRef.current;
|
||||
const remaining = Math.max(0, 3 - elapsed / 1000);
|
||||
setCountdown(remaining);
|
||||
}
|
||||
}, 100);
|
||||
|
||||
silenceTimeoutRef.current = setTimeout(() => {
|
||||
console.log('[Voice Mode] 3 seconds of silence detected, submitting...');
|
||||
submitUserInput();
|
||||
}, 3000);
|
||||
} else {
|
||||
// This is an interim result - show it temporarily
|
||||
const displayText = transcriptRef.current
|
||||
? transcriptRef.current + ' ' + transcript
|
||||
: transcript;
|
||||
setInput(displayText);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
socket.onclose = () => {
|
||||
// Clean up stream
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
console.log('[Voice Mode] WebSocket closed');
|
||||
};
|
||||
|
||||
socket.onerror = (err) => {
|
||||
console.error('[Voice Mode] WebSocket error:', err);
|
||||
setVoiceState('idle');
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[Voice Mode] Error starting listening:', error);
|
||||
setVoiceState('idle');
|
||||
}
|
||||
}, [submitUserInput]);
|
||||
|
||||
const skipAudioAndListen = useCallback(() => {
|
||||
console.log('[Voice Mode] Skipping audio playback');
|
||||
|
||||
// Stop current audio
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current.currentTime = 0;
|
||||
}
|
||||
|
||||
setIsGeneratingSpeech(false);
|
||||
|
||||
// Go straight to listening
|
||||
startListening();
|
||||
}, [startListening]);
|
||||
|
||||
const exitVoiceMode = useCallback(() => {
|
||||
// Clear any timeouts and intervals
|
||||
if (silenceTimeoutRef.current) {
|
||||
clearTimeout(silenceTimeoutRef.current);
|
||||
silenceTimeoutRef.current = null;
|
||||
}
|
||||
if (countdownIntervalRef.current) {
|
||||
clearInterval(countdownIntervalRef.current);
|
||||
countdownIntervalRef.current = null;
|
||||
}
|
||||
silenceStartTimeRef.current = null;
|
||||
|
||||
// Stop recording
|
||||
if (mediaRecorderRef.current) {
|
||||
mediaRecorderRef.current.stop();
|
||||
mediaRecorderRef.current = null;
|
||||
}
|
||||
if (socketRef.current) {
|
||||
socketRef.current.close();
|
||||
socketRef.current = null;
|
||||
}
|
||||
|
||||
// Stop audio playback
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current = null;
|
||||
}
|
||||
|
||||
hasStartedSpeakingRef.current = false;
|
||||
lastSpokenMessageId.current = null;
|
||||
transcriptRef.current = '';
|
||||
setInput('');
|
||||
setCountdown(3);
|
||||
setIsGeneratingSpeech(false);
|
||||
setVoiceState('idle');
|
||||
console.log('[Voice Mode] Exited voice conversation mode');
|
||||
}, []);
|
||||
|
||||
const handleToggleVoiceMode = useCallback(() => {
|
||||
if (voiceState === 'idle') {
|
||||
// Start voice conversation mode
|
||||
// First, check if there's a recent AI message to read out
|
||||
const lastAssistantMessage = [...messages]
|
||||
.reverse()
|
||||
.find((m) => m.role === 'assistant');
|
||||
|
||||
if (lastAssistantMessage) {
|
||||
// Extract text from the message
|
||||
const textPart = lastAssistantMessage.parts.find((p) => p.type === 'text');
|
||||
|
||||
if (textPart && textPart.text) {
|
||||
// Play the most recent AI message first, then start listening
|
||||
console.log('[Voice Mode] Starting voice mode, reading most recent AI message first');
|
||||
setVoiceState('ai-speaking');
|
||||
playAudio(textPart.text, lastAssistantMessage.id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No AI message to read, just start listening
|
||||
startListening();
|
||||
} else {
|
||||
// Exit voice conversation mode
|
||||
exitVoiceMode();
|
||||
}
|
||||
}, [voiceState, startListening, exitVoiceMode, messages]);
|
||||
|
||||
// Add initial greeting message on first load
|
||||
useEffect(() => {
|
||||
if (messages.length === 0) {
|
||||
setMessages([
|
||||
{
|
||||
id: 'initial-greeting',
|
||||
role: 'assistant',
|
||||
parts: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Auto-scroll to bottom
|
||||
useEffect(() => {
|
||||
viewport.current?.scrollTo({
|
||||
top: viewport.current.scrollHeight,
|
||||
behavior: 'smooth',
|
||||
});
|
||||
}, [messages]);
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!input.trim() || status === 'submitted' || status === 'streaming') return;
|
||||
|
||||
sendMessage({ text: input });
|
||||
setInput('');
|
||||
};
|
||||
|
||||
const handleNewConversation = () => {
|
||||
// Clear all messages and reset to initial greeting
|
||||
setMessages([
|
||||
{
|
||||
id: 'initial-greeting',
|
||||
role: 'assistant',
|
||||
parts: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
};
|
||||
|
||||
return (
|
||||
<Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}>
|
||||
<Group justify="space-between" py="md">
|
||||
<Title order={2}>
|
||||
Ponderants Interview
|
||||
</Title>
|
||||
<Group gap="md">
|
||||
<Tooltip label="Start a new conversation">
|
||||
<Button
|
||||
variant="subtle"
|
||||
onClick={handleNewConversation}
|
||||
disabled={status === 'submitted' || status === 'streaming'}
|
||||
>
|
||||
New Conversation
|
||||
</Button>
|
||||
</Tooltip>
|
||||
<UserMenu />
|
||||
</Group>
|
||||
</Group>
|
||||
|
||||
<ScrollArea
|
||||
h="100%"
|
||||
style={{ flex: 1 }}
|
||||
viewportRef={viewport}
|
||||
>
|
||||
<Stack gap="md" pb="xl">
|
||||
{messages.map((m) => (
|
||||
<Paper
|
||||
key={m.id}
|
||||
withBorder
|
||||
shadow="md"
|
||||
p="sm"
|
||||
radius="lg"
|
||||
style={{
|
||||
alignSelf: m.role === 'user' ? 'flex-end' : 'flex-start',
|
||||
backgroundColor:
|
||||
m.role === 'user' ? '#343a40' : '#212529',
|
||||
}}
|
||||
w="80%"
|
||||
>
|
||||
<Text fw={700} size="sm">{m.role === 'user' ? 'You' : 'AI'}</Text>
|
||||
{m.parts.map((part, i) => {
|
||||
if (part.type === 'text') {
|
||||
return (
|
||||
<Text key={i} style={{ whiteSpace: 'pre-wrap' }}>
|
||||
{part.text}
|
||||
</Text>
|
||||
);
|
||||
}
|
||||
|
||||
// Handle tool calls (e.g., suggest_node)
|
||||
if (part.type === 'tool-call') {
|
||||
return (
|
||||
<Paper key={i} withBorder p="xs" mt="xs" bg="dark.6">
|
||||
<Text size="xs" c="dimmed" mb="xs">
|
||||
💡 Node Suggestion
|
||||
</Text>
|
||||
<Text fw={600}>{part.args.title}</Text>
|
||||
<Text size="sm" mt="xs">
|
||||
{part.args.content}
|
||||
</Text>
|
||||
{part.args.tags && part.args.tags.length > 0 && (
|
||||
<Group gap="xs" mt="xs">
|
||||
{part.args.tags.map((tag: string, tagIdx: number) => (
|
||||
<Text key={tagIdx} size="xs" c="blue.4">
|
||||
#{tag}
|
||||
</Text>
|
||||
))}
|
||||
</Group>
|
||||
)}
|
||||
</Paper>
|
||||
);
|
||||
}
|
||||
|
||||
return null;
|
||||
})}
|
||||
</Paper>
|
||||
))}
|
||||
|
||||
{/* Typing indicator while AI is generating a response */}
|
||||
{(status === 'submitted' || status === 'streaming') && (
|
||||
<Paper
|
||||
withBorder
|
||||
shadow="md"
|
||||
p="sm"
|
||||
radius="lg"
|
||||
style={{
|
||||
alignSelf: 'flex-start',
|
||||
backgroundColor: '#212529',
|
||||
}}
|
||||
w="80%"
|
||||
>
|
||||
<Text fw={700} size="sm">AI</Text>
|
||||
<Group gap="xs" mt="xs">
|
||||
<Loader size="xs" />
|
||||
<Text size="sm" c="dimmed">Thinking...</Text>
|
||||
</Group>
|
||||
</Paper>
|
||||
)}
|
||||
|
||||
</Stack>
|
||||
</ScrollArea>
|
||||
|
||||
{/* Big Voice Mode Button - shown above text input */}
|
||||
<Paper withBorder p="md" radius="xl" my="md">
|
||||
<Stack gap="sm">
|
||||
<Group gap="sm">
|
||||
<Button
|
||||
onClick={handleToggleVoiceMode}
|
||||
size="xl"
|
||||
radius="xl"
|
||||
h={80}
|
||||
style={{ flex: 1 }}
|
||||
color={
|
||||
voiceState === 'ai-speaking'
|
||||
? 'blue'
|
||||
: voiceState === 'user-speaking'
|
||||
? 'green'
|
||||
: voiceState === 'listening'
|
||||
? 'yellow'
|
||||
: voiceState === 'processing'
|
||||
? 'blue'
|
||||
: 'gray'
|
||||
}
|
||||
variant={voiceState !== 'idle' ? 'filled' : 'light'}
|
||||
leftSection={
|
||||
voiceState === 'ai-speaking' ? (
|
||||
<IconVolume size={32} />
|
||||
) : voiceState === 'user-speaking' || voiceState === 'listening' ? (
|
||||
<IconMicrophone size={32} />
|
||||
) : (
|
||||
<IconMicrophone size={32} />
|
||||
)
|
||||
}
|
||||
disabled={status === 'submitted' || status === 'streaming'}
|
||||
>
|
||||
{voiceState === 'idle'
|
||||
? 'Start Voice Conversation'
|
||||
: voiceState === 'listening'
|
||||
? 'Listening... Start speaking'
|
||||
: voiceState === 'user-speaking'
|
||||
? silenceStartTimeRef.current
|
||||
? `Speaking... (auto-submits in ${countdown.toFixed(1)}s)`
|
||||
: 'Speaking... (will auto-submit after 3s silence)'
|
||||
: voiceState === 'processing'
|
||||
? 'Processing...'
|
||||
: isGeneratingSpeech
|
||||
? 'Generating speech...'
|
||||
: 'AI is speaking... Please wait'}
|
||||
</Button>
|
||||
|
||||
{/* Skip button - only shown when AI is speaking */}
|
||||
{voiceState === 'ai-speaking' && (
|
||||
<Button
|
||||
onClick={skipAudioAndListen}
|
||||
size="xl"
|
||||
radius="xl"
|
||||
h={80}
|
||||
color="gray"
|
||||
variant="outline"
|
||||
>
|
||||
Skip
|
||||
</Button>
|
||||
)}
|
||||
</Group>
|
||||
|
||||
{/* Text Input - always available */}
|
||||
<form onSubmit={handleSubmit}>
|
||||
<Group>
|
||||
<TextInput
|
||||
value={input}
|
||||
onChange={(e) => setInput(e.currentTarget.value)}
|
||||
placeholder="Or type your thoughts here..."
|
||||
style={{ flex: 1 }}
|
||||
variant="filled"
|
||||
disabled={voiceState !== 'idle'}
|
||||
/>
|
||||
<Button
|
||||
type="submit"
|
||||
radius="xl"
|
||||
loading={status === 'submitted' || status === 'streaming'}
|
||||
disabled={!input.trim() || voiceState !== 'idle'}
|
||||
>
|
||||
Send
|
||||
</Button>
|
||||
</Group>
|
||||
</form>
|
||||
</Stack>
|
||||
</Paper>
|
||||
</Container>
|
||||
);
|
||||
}
|
||||
@@ -1,814 +0,0 @@
|
||||
'use client';
|
||||
|
||||
import { useChat } from '@ai-sdk/react';
|
||||
import {
|
||||
Stack,
|
||||
TextInput,
|
||||
Button,
|
||||
Paper,
|
||||
ScrollArea,
|
||||
Title,
|
||||
Container,
|
||||
Group,
|
||||
Text,
|
||||
Loader,
|
||||
ActionIcon,
|
||||
Tooltip,
|
||||
} from '@mantine/core';
|
||||
import { useRef, useState, useEffect, useCallback } from 'react';
|
||||
import { IconVolume, IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
|
||||
import { createActor } from 'xstate';
|
||||
import { useSelector } from '@xstate/react';
|
||||
import { appMachine } from '@/lib/app-machine';
|
||||
import { UserMenu } from '@/components/UserMenu';
|
||||
|
||||
// Define the shape of the Deepgram transcript
|
||||
interface DeepgramTranscript {
|
||||
channel: {
|
||||
alternatives: Array<{
|
||||
transcript: string;
|
||||
}>;
|
||||
};
|
||||
is_final: boolean;
|
||||
speech_final: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the voice button text based on the current state tags.
|
||||
* This replaces complex nested ternaries with a clean, readable function.
|
||||
*/
|
||||
function getVoiceButtonText(
|
||||
state: ReturnType<typeof useSelector<typeof actorRef, any>>,
|
||||
silenceStartTime: number | null
|
||||
): string {
|
||||
// Check tags in priority order and return appropriate text
|
||||
let buttonText: string;
|
||||
|
||||
if (state.hasTag('textMode') || state.hasTag('voiceIdle')) {
|
||||
buttonText = 'Start Voice Conversation';
|
||||
} else if (state.hasTag('listening')) {
|
||||
buttonText = 'Listening... Start speaking';
|
||||
} else if (state.hasTag('userSpeaking')) {
|
||||
buttonText = 'Speaking... (will auto-submit after 3s silence)';
|
||||
} else if (state.hasTag('timingOut')) {
|
||||
if (silenceStartTime) {
|
||||
const elapsed = Date.now() - silenceStartTime;
|
||||
const remaining = Math.max(0, 3 - elapsed / 1000);
|
||||
buttonText = `Speaking... (auto-submits in ${remaining.toFixed(1)}s)`;
|
||||
} else {
|
||||
buttonText = 'Speaking... (timing out...)';
|
||||
}
|
||||
} else if (state.hasTag('processing')) {
|
||||
buttonText = 'Processing...';
|
||||
} else if (state.hasTag('aiGenerating')) {
|
||||
buttonText = 'Generating speech...';
|
||||
} else if (state.hasTag('aiSpeaking')) {
|
||||
buttonText = 'AI is speaking... Please wait';
|
||||
} else {
|
||||
// Fallback (should never reach here if tags are properly defined)
|
||||
buttonText = 'Start Voice Conversation';
|
||||
console.warn('[Voice Mode] No matching tag found, using fallback text. Active tags:', state.tags);
|
||||
}
|
||||
|
||||
console.log('[Voice Mode] Button text determined:', buttonText, 'Active tags:', Array.from(state.tags));
|
||||
return buttonText;
|
||||
}
|
||||
|
||||
export default function ChatPage() {
|
||||
const viewport = useRef<HTMLDivElement>(null);
|
||||
|
||||
// XState machine for voice mode state management
|
||||
const [actorRef] = useState(() => createActor(appMachine).start());
|
||||
const state = useSelector(actorRef, (snapshot) => snapshot);
|
||||
const send = actorRef.send.bind(actorRef);
|
||||
|
||||
// Imperative refs for managing side effects
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const socketRef = useRef<WebSocket | null>(null);
|
||||
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const silenceStartTimeRef = useRef<number | null>(null);
|
||||
const countdownIntervalRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const shouldCancelAudioRef = useRef<boolean>(false); // Flag to cancel pending audio operations
|
||||
|
||||
const { messages, sendMessage, setMessages, status } = useChat();
|
||||
|
||||
// Extract text from message (handles v5 parts structure)
|
||||
const getMessageText = (msg: any): string => {
|
||||
if ('parts' in msg && Array.isArray(msg.parts)) {
|
||||
const textPart = msg.parts.find((p: any) => p.type === 'text');
|
||||
return textPart?.text || '';
|
||||
}
|
||||
return msg.content || '';
|
||||
};
|
||||
|
||||
// Handle AI response in voice conversation mode - SIMPLE VERSION
|
||||
useEffect(() => {
|
||||
if (!state.hasTag('processing')) return;
|
||||
if (status !== 'ready') {
|
||||
console.log('[Voice Mode] Waiting, status:', status);
|
||||
return;
|
||||
}
|
||||
|
||||
const transcript = state.context.transcript?.trim();
|
||||
if (!transcript) return;
|
||||
|
||||
console.log('[Voice Mode] === PROCESSING ===');
|
||||
console.log('[Voice Mode] Transcript:', transcript);
|
||||
console.log('[Voice Mode] Messages:', messages.length);
|
||||
|
||||
// Get last 2 messages
|
||||
const lastMsg = messages[messages.length - 1];
|
||||
const secondLastMsg = messages[messages.length - 2];
|
||||
|
||||
console.log('[Voice Mode] Last msg:', lastMsg?.role, getMessageText(lastMsg || {}).substring(0, 30));
|
||||
console.log('[Voice Mode] 2nd last msg:', secondLastMsg?.role, getMessageText(secondLastMsg || {}).substring(0, 30));
|
||||
|
||||
// Case 1: User message not submitted yet
|
||||
// Check if the last message is the user's transcript
|
||||
const userMessageExists = messages.some(m =>
|
||||
m.role === 'user' && getMessageText(m) === transcript
|
||||
);
|
||||
|
||||
if (!userMessageExists) {
|
||||
console.log('[Voice Mode] → Submitting user message');
|
||||
submitUserInput();
|
||||
return;
|
||||
}
|
||||
|
||||
// Case 2: User message submitted, check if AI has responded
|
||||
// After user submits, if AI responds, the new AI message is LAST
|
||||
if (lastMsg && lastMsg.role === 'assistant' &&
|
||||
secondLastMsg && secondLastMsg.role === 'user' &&
|
||||
getMessageText(secondLastMsg) === transcript) {
|
||||
|
||||
const aiMsg = lastMsg;
|
||||
console.log('[Voice Mode] → AI response found:', aiMsg.id);
|
||||
console.log('[Voice Mode] → Last spoken:', state.context.lastSpokenMessageId);
|
||||
|
||||
// Only play if we haven't played this message yet
|
||||
if (state.context.lastSpokenMessageId !== aiMsg.id) {
|
||||
const text = getMessageText(aiMsg);
|
||||
console.log('[Voice Mode] → Playing:', text.substring(0, 50) + '...');
|
||||
send({ type: 'AI_RESPONSE_READY', messageId: aiMsg.id, text });
|
||||
playAudio(text, aiMsg.id);
|
||||
} else {
|
||||
console.log('[Voice Mode] → Already played, skipping');
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Case 3: Waiting for AI response
|
||||
console.log('[Voice Mode] → Waiting for AI response...');
|
||||
}, [messages, state, status, send]);
|
||||
|
||||
|
||||
// Stop all audio playback and cancel pending operations
|
||||
const stopAllAudio = useCallback(() => {
|
||||
console.log('[Voice Mode] Stopping all audio operations');
|
||||
|
||||
// Set cancel flag to prevent any pending audio from playing
|
||||
shouldCancelAudioRef.current = true;
|
||||
|
||||
// Stop and clean up audio element
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current.currentTime = 0;
|
||||
audioRef.current.src = '';
|
||||
}
|
||||
}, []);
|
||||
|
||||
const playAudio = async (text: string, messageId: string) => {
|
||||
try {
|
||||
// Reset cancel flag at the start of a new audio operation
|
||||
shouldCancelAudioRef.current = false;
|
||||
|
||||
console.log('[Voice Mode] Generating speech for message:', messageId);
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
|
||||
const response = await fetch('/api/tts', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
|
||||
// Check if we should cancel before continuing
|
||||
if (shouldCancelAudioRef.current) {
|
||||
console.log('[Voice Mode] Audio generation canceled before blob creation');
|
||||
return;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Failed to generate speech');
|
||||
}
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
|
||||
// Check again after async operation
|
||||
if (shouldCancelAudioRef.current) {
|
||||
console.log('[Voice Mode] Audio generation canceled after blob creation');
|
||||
return;
|
||||
}
|
||||
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
|
||||
// Create or reuse audio element
|
||||
if (!audioRef.current) {
|
||||
audioRef.current = new Audio();
|
||||
}
|
||||
|
||||
audioRef.current.src = audioUrl;
|
||||
audioRef.current.onended = () => {
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
console.log('[Voice Mode] ✓ Finished playing audio, sending TTS_FINISHED event');
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
send({ type: 'TTS_FINISHED', messageId });
|
||||
|
||||
// After AI finishes speaking, go back to listening for user
|
||||
startListening();
|
||||
};
|
||||
|
||||
audioRef.current.onerror = () => {
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
console.error('[Voice Mode] Error playing audio');
|
||||
// On error, also go back to listening
|
||||
startListening();
|
||||
};
|
||||
|
||||
// Final check before playing
|
||||
if (shouldCancelAudioRef.current) {
|
||||
console.log('[Voice Mode] Audio playback canceled before play()');
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
return;
|
||||
}
|
||||
|
||||
await audioRef.current.play();
|
||||
|
||||
// Only send TTS_PLAYING if we haven't been canceled
|
||||
if (!shouldCancelAudioRef.current) {
|
||||
console.log('[Voice Mode] ✓ Playing audio, sending TTS_PLAYING event');
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
send({ type: 'TTS_PLAYING' });
|
||||
} else {
|
||||
console.log('[Voice Mode] Audio playback canceled after play()');
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[Voice Mode] Error:', error);
|
||||
// On error, go back to listening
|
||||
startListening();
|
||||
}
|
||||
};
|
||||
|
||||
const submitUserInput = useCallback(() => {
|
||||
// Clear any pending silence timeout and countdown
|
||||
if (silenceTimeoutRef.current) {
|
||||
clearTimeout(silenceTimeoutRef.current);
|
||||
silenceTimeoutRef.current = null;
|
||||
}
|
||||
if (countdownIntervalRef.current) {
|
||||
clearInterval(countdownIntervalRef.current);
|
||||
countdownIntervalRef.current = null;
|
||||
}
|
||||
silenceStartTimeRef.current = null;
|
||||
|
||||
// Stop recording
|
||||
if (mediaRecorderRef.current) {
|
||||
mediaRecorderRef.current.stop();
|
||||
mediaRecorderRef.current = null;
|
||||
}
|
||||
if (socketRef.current) {
|
||||
socketRef.current.close();
|
||||
socketRef.current = null;
|
||||
}
|
||||
|
||||
// Send the transcript as a message if we have one
|
||||
const transcript = state.context.transcript;
|
||||
if (transcript.trim()) {
|
||||
console.log('[Voice Mode] Submitting transcript:', transcript);
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
|
||||
setTimeout(() => {
|
||||
const form = document.querySelector('form');
|
||||
if (form) {
|
||||
console.log('[Voice Mode] Form found, submitting...');
|
||||
form.requestSubmit();
|
||||
} else {
|
||||
console.error('[Voice Mode] Form not found!');
|
||||
}
|
||||
}, 100);
|
||||
} else {
|
||||
// If no transcript, go back to listening
|
||||
console.log('[Voice Mode] No transcript to submit, going back to listening');
|
||||
startListening();
|
||||
}
|
||||
}, [state, send]);
|
||||
|
||||
const startListening = useCallback(async () => {
|
||||
silenceStartTimeRef.current = null;
|
||||
|
||||
// Send event to enter listening state (which clears transcript/input/countdown)
|
||||
console.log('[Voice Mode] Sending START_LISTENING event (implicitly via state transition)');
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
|
||||
try {
|
||||
// 1. Get the Deepgram API key
|
||||
const response = await fetch('/api/voice-token', { method: 'POST' });
|
||||
const data = await response.json();
|
||||
|
||||
if (data.error) {
|
||||
throw new Error(data.error);
|
||||
}
|
||||
|
||||
const { key } = data;
|
||||
|
||||
// 2. Access the microphone
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
|
||||
// 3. Open direct WebSocket to Deepgram with voice activity detection
|
||||
const socket = new WebSocket(
|
||||
'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true&vad_events=true',
|
||||
['token', key]
|
||||
);
|
||||
socketRef.current = socket;
|
||||
|
||||
socket.onopen = () => {
|
||||
console.log('[Voice Mode] ✓ WebSocket connected, listening for speech...');
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
|
||||
// 4. Create MediaRecorder
|
||||
const mediaRecorder = new MediaRecorder(stream, {
|
||||
mimeType: 'audio/webm',
|
||||
});
|
||||
mediaRecorderRef.current = mediaRecorder;
|
||||
|
||||
// 5. Send audio chunks on data available
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
|
||||
socket.send(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
// Start recording and chunking audio every 250ms
|
||||
mediaRecorder.start(250);
|
||||
};
|
||||
|
||||
// 6. Receive transcripts and handle silence detection
|
||||
socket.onmessage = (event) => {
|
||||
const data = JSON.parse(event.data) as DeepgramTranscript;
|
||||
|
||||
// Check if this message has alternatives (some Deepgram messages don't)
|
||||
if (!data.channel?.alternatives) {
|
||||
return; // Skip non-transcript messages (metadata, VAD events, etc.)
|
||||
}
|
||||
|
||||
const transcript = data.channel.alternatives[0]?.transcript || '';
|
||||
|
||||
if (transcript) {
|
||||
// User has started speaking
|
||||
if (!state.context.hasStartedSpeaking) {
|
||||
console.log('[Voice Mode] User started speaking, sending USER_STARTED_SPEAKING event');
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
send({ type: 'USER_STARTED_SPEAKING' });
|
||||
}
|
||||
|
||||
// Clear any existing silence timeout and countdown
|
||||
if (silenceTimeoutRef.current) {
|
||||
clearTimeout(silenceTimeoutRef.current);
|
||||
silenceTimeoutRef.current = null;
|
||||
}
|
||||
if (countdownIntervalRef.current) {
|
||||
clearInterval(countdownIntervalRef.current);
|
||||
countdownIntervalRef.current = null;
|
||||
}
|
||||
silenceStartTimeRef.current = null;
|
||||
|
||||
// Handle transcript updates
|
||||
if (data.is_final) {
|
||||
// This is a finalized phrase - send to machine
|
||||
console.log('[Voice Mode] === FINALIZED PHRASE ===');
|
||||
console.log('[Voice Mode] Transcript:', transcript);
|
||||
console.log('[Voice Mode] state.value BEFORE:', JSON.stringify(state.value));
|
||||
console.log('[Voice Mode] tags BEFORE:', Array.from(state.tags));
|
||||
console.log('[Voice Mode] context BEFORE:', JSON.stringify(state.context));
|
||||
console.log('[Voice Mode] Sending FINALIZED_PHRASE event');
|
||||
send({ type: 'FINALIZED_PHRASE', phrase: transcript });
|
||||
|
||||
// Start a generous 3-second silence timer after each finalized phrase
|
||||
silenceStartTimeRef.current = Date.now();
|
||||
|
||||
// Update countdown every 100ms
|
||||
countdownIntervalRef.current = setInterval(() => {
|
||||
if (silenceStartTimeRef.current) {
|
||||
const elapsed = Date.now() - silenceStartTimeRef.current;
|
||||
const remaining = Math.max(0, 3 - elapsed / 1000);
|
||||
// Note: countdown is now managed in machine context, but we need
|
||||
// to update it frequently for UI display. This is acceptable as
|
||||
// a UI-only side effect.
|
||||
}
|
||||
}, 100);
|
||||
|
||||
silenceTimeoutRef.current = setTimeout(() => {
|
||||
console.log('[Voice Mode] 3 seconds of silence detected, sending SILENCE_TIMEOUT event');
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
send({ type: 'SILENCE_TIMEOUT' });
|
||||
// Note: submitUserInput will be called by the processing state effect
|
||||
}, 3000);
|
||||
} else {
|
||||
// This is an interim result - update display (send TRANSCRIPT_UPDATE)
|
||||
const currentTranscript = state.context.transcript;
|
||||
const displayText = currentTranscript
|
||||
? currentTranscript + ' ' + transcript
|
||||
: transcript;
|
||||
send({ type: 'TRANSCRIPT_UPDATE', transcript: displayText });
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
socket.onclose = () => {
|
||||
// Clean up stream
|
||||
stream.getTracks().forEach((track) => track.stop());
|
||||
console.log('[Voice Mode] WebSocket closed');
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
};
|
||||
|
||||
socket.onerror = (err) => {
|
||||
console.error('[Voice Mode] WebSocket error:', err);
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
// On error, toggle back to text mode if we're in voice mode
|
||||
if (!state.hasTag('textMode')) {
|
||||
send({ type: 'TOGGLE_VOICE_MODE' });
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[Voice Mode] Error starting listening:', error);
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
// On error, toggle back to text mode if we're in voice mode
|
||||
if (!state.hasTag('textMode')) {
|
||||
send({ type: 'TOGGLE_VOICE_MODE' });
|
||||
}
|
||||
}
|
||||
}, [submitUserInput, state, send]);
|
||||
|
||||
const skipAudioAndListen = useCallback(() => {
|
||||
console.log('[Voice Mode] === SKIP BUTTON CLICKED ===');
|
||||
console.log('[Voice Mode] Current state.value:', JSON.stringify(state.value));
|
||||
console.log('[Voice Mode] Current tags:', Array.from(state.tags));
|
||||
|
||||
// Stop ALL audio operations
|
||||
stopAllAudio();
|
||||
|
||||
// Send skip event
|
||||
send({ type: 'SKIP_AUDIO' });
|
||||
|
||||
// Go straight to listening
|
||||
startListening();
|
||||
}, [startListening, state, send, stopAllAudio]);
|
||||
|
||||
const handleToggleVoiceMode = useCallback(() => {
|
||||
console.log('[Voice Mode] Voice button pressed, sending TOGGLE_VOICE_MODE event');
|
||||
console.log('[Voice Mode] Current state:', state.value);
|
||||
send({ type: 'TOGGLE_VOICE_MODE' });
|
||||
}, [state, send]);
|
||||
|
||||
// Handle entering voice.idle state (after TOGGLE_VOICE_MODE from text mode)
|
||||
useEffect(() => {
|
||||
if (!state.hasTag('voiceIdle')) return;
|
||||
|
||||
console.log('[Voice Mode] Entered voice.idle, checking for AI message to read');
|
||||
|
||||
// Get ALL assistant messages in order
|
||||
const assistantMessages = messages.filter((m) => m.role === 'assistant');
|
||||
console.log('[Voice Mode] (idle) Found', assistantMessages.length, 'assistant messages');
|
||||
|
||||
if (assistantMessages.length === 0) {
|
||||
console.log('[Voice Mode] (idle) No assistant messages, starting listening');
|
||||
send({ type: 'START_LISTENING' });
|
||||
startListening();
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the LAST (most recent) assistant message
|
||||
const latestAssistantMessage = assistantMessages[assistantMessages.length - 1];
|
||||
console.log('[Voice Mode] (idle) Latest message ID:', latestAssistantMessage.id);
|
||||
console.log('[Voice Mode] (idle) Last spoken message ID:', state.context.lastSpokenMessageId);
|
||||
|
||||
// Skip if we've already spoken this message
|
||||
if (state.context.lastSpokenMessageId === latestAssistantMessage.id) {
|
||||
console.log('[Voice Mode] (idle) Already spoke latest message, starting listening');
|
||||
send({ type: 'START_LISTENING' });
|
||||
startListening();
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract text from the message
|
||||
let text = '';
|
||||
if ('parts' in latestAssistantMessage && Array.isArray((latestAssistantMessage as any).parts)) {
|
||||
const textPart = (latestAssistantMessage as any).parts.find((p: any) => p.type === 'text');
|
||||
text = textPart?.text || '';
|
||||
}
|
||||
|
||||
if (text) {
|
||||
// Play the most recent AI message first, then start listening
|
||||
console.log('[Voice Mode] (idle) Reading latest AI message:', text.substring(0, 50) + '...');
|
||||
send({ type: 'AI_RESPONSE_READY', messageId: latestAssistantMessage.id, text });
|
||||
playAudio(text, latestAssistantMessage.id);
|
||||
return;
|
||||
}
|
||||
|
||||
// No text found, just start listening
|
||||
console.log('[Voice Mode] (idle) No text in latest message, starting listening');
|
||||
send({ type: 'START_LISTENING' });
|
||||
startListening();
|
||||
}, [state, messages, send]);
|
||||
|
||||
// Stop audio when leaving audio-related states
|
||||
useEffect(() => {
|
||||
const isInAudioState = state.hasTag('canSkipAudio');
|
||||
|
||||
if (!isInAudioState) {
|
||||
// We're not in an audio state, make sure everything is stopped
|
||||
stopAllAudio();
|
||||
}
|
||||
}, [state, stopAllAudio]);
|
||||
|
||||
// Log state transitions for debugging
|
||||
useEffect(() => {
|
||||
console.log('[Voice Mode] === STATE TRANSITION ===');
|
||||
console.log('[Voice Mode] state.value:', JSON.stringify(state.value));
|
||||
console.log('[Voice Mode] Active tags:', Array.from(state.tags));
|
||||
console.log('[Voice Mode] Context:', JSON.stringify(state.context));
|
||||
}, [state.value]);
|
||||
|
||||
// Add initial greeting message on first load
|
||||
useEffect(() => {
|
||||
if (messages.length === 0) {
|
||||
setMessages([
|
||||
{
|
||||
id: 'initial-greeting',
|
||||
role: 'assistant',
|
||||
parts: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
|
||||
},
|
||||
],
|
||||
} as any,
|
||||
]);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Auto-scroll to bottom
|
||||
useEffect(() => {
|
||||
viewport.current?.scrollTo({
|
||||
top: viewport.current.scrollHeight,
|
||||
behavior: 'smooth',
|
||||
});
|
||||
}, [messages]);
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
const inputText = state.context.input;
|
||||
if (!inputText.trim() || status === 'submitted' || status === 'streaming') return;
|
||||
|
||||
console.log('[Voice Mode] Submitting message:', inputText);
|
||||
console.log('[Voice Mode] State transition:', state.value);
|
||||
|
||||
sendMessage({ text: inputText });
|
||||
// Clear input via machine context (will be cleared on next state transition)
|
||||
};
|
||||
|
||||
const handleNewConversation = () => {
|
||||
// Clear all messages and reset to initial greeting
|
||||
setMessages([
|
||||
{
|
||||
id: 'initial-greeting',
|
||||
role: 'assistant',
|
||||
parts: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
|
||||
},
|
||||
],
|
||||
} as any,
|
||||
]);
|
||||
};
|
||||
|
||||
return (
|
||||
<Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}>
|
||||
<Group justify="space-between" py="md">
|
||||
<Title order={2}>
|
||||
Ponderants Interview
|
||||
</Title>
|
||||
<Group gap="md">
|
||||
<Tooltip label="Start a new conversation">
|
||||
<Button
|
||||
variant="subtle"
|
||||
onClick={handleNewConversation}
|
||||
disabled={status === 'submitted' || status === 'streaming'}
|
||||
>
|
||||
New Conversation
|
||||
</Button>
|
||||
</Tooltip>
|
||||
<UserMenu />
|
||||
</Group>
|
||||
</Group>
|
||||
|
||||
<ScrollArea
|
||||
h="100%"
|
||||
style={{ flex: 1 }}
|
||||
viewportRef={viewport}
|
||||
>
|
||||
<Stack gap="md" pb="xl">
|
||||
{messages.map((m) => (
|
||||
<Paper
|
||||
key={m.id}
|
||||
withBorder
|
||||
shadow="md"
|
||||
p="sm"
|
||||
radius="lg"
|
||||
style={{
|
||||
alignSelf: m.role === 'user' ? 'flex-end' : 'flex-start',
|
||||
backgroundColor:
|
||||
m.role === 'user' ? '#343a40' : '#212529',
|
||||
}}
|
||||
w="80%"
|
||||
>
|
||||
<Text fw={700} size="sm">{m.role === 'user' ? 'You' : 'AI'}</Text>
|
||||
{/* Extract text from message parts */}
|
||||
{(() => {
|
||||
if ('parts' in m && Array.isArray((m as any).parts)) {
|
||||
return (m as any).parts.map((part: any, i: number) => {
|
||||
if (part.type === 'text') {
|
||||
return (
|
||||
<Text key={i} style={{ whiteSpace: 'pre-wrap' }}>
|
||||
{part.text}
|
||||
</Text>
|
||||
);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
return <Text>Message content unavailable</Text>;
|
||||
})()}
|
||||
</Paper>
|
||||
))}
|
||||
|
||||
{/* Typing indicator while AI is generating a response */}
|
||||
{(status === 'submitted' || status === 'streaming') && (
|
||||
<Paper
|
||||
withBorder
|
||||
shadow="md"
|
||||
p="sm"
|
||||
radius="lg"
|
||||
style={{
|
||||
alignSelf: 'flex-start',
|
||||
backgroundColor: '#212529',
|
||||
}}
|
||||
w="80%"
|
||||
>
|
||||
<Text fw={700} size="sm">AI</Text>
|
||||
<Group gap="xs" mt="xs">
|
||||
<Loader size="xs" />
|
||||
<Text size="sm" c="dimmed">Thinking...</Text>
|
||||
</Group>
|
||||
</Paper>
|
||||
)}
|
||||
|
||||
</Stack>
|
||||
</ScrollArea>
|
||||
|
||||
{/* Big Voice Mode Button - shown above text input */}
|
||||
<Paper withBorder p="md" radius="xl" my="md">
|
||||
<Stack gap="sm">
|
||||
<Group gap="sm">
|
||||
<Button
|
||||
onClick={handleToggleVoiceMode}
|
||||
size="xl"
|
||||
radius="xl"
|
||||
h={80}
|
||||
style={{ flex: 1 }}
|
||||
color={
|
||||
state.hasTag('canSkipAudio')
|
||||
? 'blue'
|
||||
: state.hasTag('userSpeaking') || state.hasTag('timingOut')
|
||||
? 'green'
|
||||
: state.hasTag('listening')
|
||||
? 'yellow'
|
||||
: state.hasTag('processing')
|
||||
? 'blue'
|
||||
: 'gray'
|
||||
}
|
||||
variant={!state.hasTag('textMode') && !state.hasTag('voiceIdle') ? 'filled' : 'light'}
|
||||
leftSection={
|
||||
state.hasTag('canSkipAudio') ? (
|
||||
<IconVolume size={32} />
|
||||
) : state.hasTag('userSpeaking') || state.hasTag('timingOut') || state.hasTag('listening') ? (
|
||||
<IconMicrophone size={32} />
|
||||
) : (
|
||||
<IconMicrophone size={32} />
|
||||
)
|
||||
}
|
||||
disabled={status === 'submitted' || status === 'streaming'}
|
||||
>
|
||||
{getVoiceButtonText(state, silenceStartTimeRef.current)}
|
||||
</Button>
|
||||
|
||||
{/* Skip button - shown when audio can be skipped */}
|
||||
{state.hasTag('canSkipAudio') && (
|
||||
<Button
|
||||
onClick={skipAudioAndListen}
|
||||
size="xl"
|
||||
radius="xl"
|
||||
h={80}
|
||||
color="gray"
|
||||
variant="outline"
|
||||
>
|
||||
Skip
|
||||
</Button>
|
||||
)}
|
||||
</Group>
|
||||
|
||||
{/* Test Controls - Development Only */}
|
||||
{process.env.NODE_ENV === 'development' && (
|
||||
<Paper withBorder p="sm" radius="md" style={{ backgroundColor: '#1a1b1e' }}>
|
||||
<Stack gap="xs">
|
||||
<Text size="xs" fw={700} c="dimmed">DEV: State Machine Testing</Text>
|
||||
<Text size="xs" c="dimmed">
|
||||
State: {JSON.stringify(state.value)} | Tags: {Array.from(state.tags).join(', ')}
|
||||
</Text>
|
||||
<Group gap="xs">
|
||||
<Button
|
||||
size="xs"
|
||||
onClick={() => send({ type: 'START_LISTENING' })}
|
||||
disabled={state.hasTag('textMode')}
|
||||
>
|
||||
Start Listening
|
||||
</Button>
|
||||
<Button
|
||||
size="xs"
|
||||
onClick={() => send({ type: 'USER_STARTED_SPEAKING' })}
|
||||
disabled={!state.hasTag('listening')}
|
||||
>
|
||||
Simulate Speech
|
||||
</Button>
|
||||
<Button
|
||||
size="xs"
|
||||
onClick={() => {
|
||||
send({ type: 'FINALIZED_PHRASE', phrase: 'Test message' });
|
||||
}}
|
||||
disabled={!state.hasTag('userSpeaking') && !state.hasTag('listening')}
|
||||
>
|
||||
Add Phrase
|
||||
</Button>
|
||||
<Button
|
||||
size="xs"
|
||||
onClick={() => send({ type: 'SILENCE_TIMEOUT' })}
|
||||
disabled={!state.hasTag('timingOut')}
|
||||
>
|
||||
Trigger Timeout
|
||||
</Button>
|
||||
<Button
|
||||
size="xs"
|
||||
onClick={() => {
|
||||
const testMsg = messages.filter(m => m.role === 'assistant')[0];
|
||||
if (testMsg) {
|
||||
const text = (testMsg as any).parts?.[0]?.text || 'Test AI response';
|
||||
send({ type: 'AI_RESPONSE_READY', messageId: testMsg.id, text });
|
||||
}
|
||||
}}
|
||||
disabled={!state.hasTag('processing')}
|
||||
>
|
||||
Simulate AI Response
|
||||
</Button>
|
||||
</Group>
|
||||
</Stack>
|
||||
</Paper>
|
||||
)}
|
||||
|
||||
{/* Text Input - always available */}
|
||||
<form onSubmit={handleSubmit}>
|
||||
<Group>
|
||||
<TextInput
|
||||
value={state.context.input}
|
||||
onChange={(e) => send({ type: 'TRANSCRIPT_UPDATE', transcript: e.currentTarget.value })}
|
||||
placeholder="Or type your thoughts here..."
|
||||
style={{ flex: 1 }}
|
||||
variant="filled"
|
||||
disabled={!state.hasTag('textMode') && !state.hasTag('voiceIdle')}
|
||||
/>
|
||||
<Button
|
||||
type="submit"
|
||||
radius="xl"
|
||||
loading={status === 'submitted' || status === 'streaming'}
|
||||
disabled={!state.context.input.trim() || (!state.hasTag('textMode') && !state.hasTag('voiceIdle'))}
|
||||
>
|
||||
Send
|
||||
</Button>
|
||||
</Group>
|
||||
</form>
|
||||
</Stack>
|
||||
</Paper>
|
||||
</Container>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user