chore: Remove backup and old page files
Deleted unused backup files: - app/chat/page.tsx.backup - app/chat/page.tsx.old Keeps codebase clean and reduces confusion. Current page.tsx is the canonical implementation. Resolves plan: 07-delete-backup-files.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,664 +0,0 @@
|
|||||||
'use client';
|
|
||||||
|
|
||||||
import { useChat } from '@ai-sdk/react';
|
|
||||||
import {
|
|
||||||
Stack,
|
|
||||||
TextInput,
|
|
||||||
Button,
|
|
||||||
Paper,
|
|
||||||
ScrollArea,
|
|
||||||
Title,
|
|
||||||
Container,
|
|
||||||
Group,
|
|
||||||
Text,
|
|
||||||
Loader,
|
|
||||||
ActionIcon,
|
|
||||||
Tooltip,
|
|
||||||
} from '@mantine/core';
|
|
||||||
import { useRef, useState, useEffect, useCallback } from 'react';
|
|
||||||
import { IconVolume, IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
|
|
||||||
import { UserMenu } from '@/components/UserMenu';
|
|
||||||
|
|
||||||
// Define the shape of the Deepgram transcript
|
|
||||||
interface DeepgramTranscript {
|
|
||||||
channel: {
|
|
||||||
alternatives: Array<{
|
|
||||||
transcript: string;
|
|
||||||
}>;
|
|
||||||
};
|
|
||||||
is_final: boolean;
|
|
||||||
speech_final: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
type VoiceState = 'idle' | 'listening' | 'user-speaking' | 'processing' | 'ai-speaking';
|
|
||||||
|
|
||||||
export default function ChatPage() {
|
|
||||||
const viewport = useRef<HTMLDivElement>(null);
|
|
||||||
const [input, setInput] = useState('');
|
|
||||||
const [voiceState, setVoiceState] = useState<VoiceState>('idle');
|
|
||||||
const [countdown, setCountdown] = useState<number>(3);
|
|
||||||
const [isGeneratingSpeech, setIsGeneratingSpeech] = useState(false);
|
|
||||||
const lastSpokenMessageId = useRef<string | null>(null);
|
|
||||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
|
||||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
|
||||||
const socketRef = useRef<WebSocket | null>(null);
|
|
||||||
const transcriptRef = useRef<string>('');
|
|
||||||
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
|
||||||
const silenceStartTimeRef = useRef<number | null>(null);
|
|
||||||
const countdownIntervalRef = useRef<NodeJS.Timeout | null>(null);
|
|
||||||
const hasStartedSpeakingRef = useRef(false);
|
|
||||||
|
|
||||||
const { messages, sendMessage, setMessages, status } = useChat({
|
|
||||||
api: '/api/chat',
|
|
||||||
body: {
|
|
||||||
persona: 'Socratic',
|
|
||||||
},
|
|
||||||
credentials: 'include',
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handle AI response in voice conversation mode
|
|
||||||
useEffect(() => {
|
|
||||||
if (voiceState !== 'processing') return;
|
|
||||||
|
|
||||||
console.log('[Voice Mode] Effect running - voiceState: processing, status:', status, 'messages:', messages.length);
|
|
||||||
|
|
||||||
// Wait until the AI response is complete (status returns to 'ready')
|
|
||||||
if (status !== 'ready') {
|
|
||||||
console.log('[Voice Mode] Waiting for status to be ready, current:', status);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the latest assistant message
|
|
||||||
console.log('[Voice Mode] All messages:', messages.map(m => ({ role: m.role, id: m.id, preview: m.parts[0]?.text?.substring(0, 30) })));
|
|
||||||
|
|
||||||
const lastAssistantMessage = [...messages]
|
|
||||||
.reverse()
|
|
||||||
.find((m) => m.role === 'assistant');
|
|
||||||
|
|
||||||
if (!lastAssistantMessage) {
|
|
||||||
console.log('[Voice Mode] No assistant message found');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('[Voice Mode] Selected message ID:', lastAssistantMessage.id);
|
|
||||||
console.log('[Voice Mode] Selected message text preview:', lastAssistantMessage.parts.find(p => p.type === 'text')?.text?.substring(0, 50));
|
|
||||||
console.log('[Voice Mode] Last spoken message ID:', lastSpokenMessageId.current);
|
|
||||||
|
|
||||||
// Skip if we've already spoken this message
|
|
||||||
if (lastSpokenMessageId.current === lastAssistantMessage.id) {
|
|
||||||
console.log('[Voice Mode] Already spoke this message, skipping');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract text from the message
|
|
||||||
const textPart = lastAssistantMessage.parts.find((p) => p.type === 'text');
|
|
||||||
if (!textPart || !textPart.text) {
|
|
||||||
console.log('[Voice Mode] No text part found in message');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Play the audio and transition to ai-speaking state
|
|
||||||
console.log('[Voice Mode] Transitioning to ai-speaking, will play audio');
|
|
||||||
setVoiceState('ai-speaking');
|
|
||||||
playAudio(textPart.text, lastAssistantMessage.id);
|
|
||||||
}, [messages, voiceState, status]);
|
|
||||||
|
|
||||||
const playAudio = async (text: string, messageId: string) => {
|
|
||||||
try {
|
|
||||||
console.log('[Voice Mode] Generating speech for message:', messageId);
|
|
||||||
setIsGeneratingSpeech(true);
|
|
||||||
|
|
||||||
const response = await fetch('/api/tts', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: { 'Content-Type': 'application/json' },
|
|
||||||
body: JSON.stringify({ text }),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error('Failed to generate speech');
|
|
||||||
}
|
|
||||||
|
|
||||||
const audioBlob = await response.blob();
|
|
||||||
const audioUrl = URL.createObjectURL(audioBlob);
|
|
||||||
|
|
||||||
// Create or reuse audio element
|
|
||||||
if (!audioRef.current) {
|
|
||||||
audioRef.current = new Audio();
|
|
||||||
}
|
|
||||||
|
|
||||||
audioRef.current.src = audioUrl;
|
|
||||||
audioRef.current.onended = () => {
|
|
||||||
URL.revokeObjectURL(audioUrl);
|
|
||||||
console.log('[Voice Mode] ✓ Finished playing audio, starting new listening session');
|
|
||||||
lastSpokenMessageId.current = messageId;
|
|
||||||
setIsGeneratingSpeech(false);
|
|
||||||
|
|
||||||
// After AI finishes speaking, go back to listening for user
|
|
||||||
startListening();
|
|
||||||
};
|
|
||||||
|
|
||||||
audioRef.current.onerror = () => {
|
|
||||||
URL.revokeObjectURL(audioUrl);
|
|
||||||
console.error('[Voice Mode] Error playing audio');
|
|
||||||
setIsGeneratingSpeech(false);
|
|
||||||
// On error, also go back to listening
|
|
||||||
startListening();
|
|
||||||
};
|
|
||||||
|
|
||||||
await audioRef.current.play();
|
|
||||||
console.log('[Voice Mode] ✓ Playing audio');
|
|
||||||
setIsGeneratingSpeech(false); // Audio is now playing
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[Voice Mode] Error:', error);
|
|
||||||
setIsGeneratingSpeech(false);
|
|
||||||
// On error, go back to listening
|
|
||||||
startListening();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const submitUserInput = useCallback(() => {
|
|
||||||
// Clear any pending silence timeout and countdown
|
|
||||||
if (silenceTimeoutRef.current) {
|
|
||||||
clearTimeout(silenceTimeoutRef.current);
|
|
||||||
silenceTimeoutRef.current = null;
|
|
||||||
}
|
|
||||||
if (countdownIntervalRef.current) {
|
|
||||||
clearInterval(countdownIntervalRef.current);
|
|
||||||
countdownIntervalRef.current = null;
|
|
||||||
}
|
|
||||||
silenceStartTimeRef.current = null;
|
|
||||||
setCountdown(3);
|
|
||||||
|
|
||||||
// Stop recording
|
|
||||||
if (mediaRecorderRef.current) {
|
|
||||||
mediaRecorderRef.current.stop();
|
|
||||||
mediaRecorderRef.current = null;
|
|
||||||
}
|
|
||||||
if (socketRef.current) {
|
|
||||||
socketRef.current.close();
|
|
||||||
socketRef.current = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset speaking flag
|
|
||||||
hasStartedSpeakingRef.current = false;
|
|
||||||
|
|
||||||
// Send the transcript as a message if we have one
|
|
||||||
if (transcriptRef.current.trim()) {
|
|
||||||
console.log('[Voice Mode] Submitting transcript:', transcriptRef.current);
|
|
||||||
setInput(transcriptRef.current);
|
|
||||||
setVoiceState('processing');
|
|
||||||
|
|
||||||
setTimeout(() => {
|
|
||||||
const form = document.querySelector('form');
|
|
||||||
if (form) {
|
|
||||||
console.log('[Voice Mode] Form found, submitting...');
|
|
||||||
form.requestSubmit();
|
|
||||||
} else {
|
|
||||||
console.error('[Voice Mode] Form not found!');
|
|
||||||
}
|
|
||||||
}, 100);
|
|
||||||
} else {
|
|
||||||
// If no transcript, go back to listening
|
|
||||||
console.log('[Voice Mode] No transcript to submit, going back to listening');
|
|
||||||
startListening();
|
|
||||||
}
|
|
||||||
|
|
||||||
transcriptRef.current = '';
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const startListening = useCallback(async () => {
|
|
||||||
transcriptRef.current = '';
|
|
||||||
setInput('');
|
|
||||||
hasStartedSpeakingRef.current = false;
|
|
||||||
// DON'T reset lastSpokenMessageId here - we need it to track what we've already spoken
|
|
||||||
silenceStartTimeRef.current = null;
|
|
||||||
setCountdown(3);
|
|
||||||
setVoiceState('listening');
|
|
||||||
|
|
||||||
try {
|
|
||||||
// 1. Get the Deepgram API key
|
|
||||||
const response = await fetch('/api/voice-token', { method: 'POST' });
|
|
||||||
const data = await response.json();
|
|
||||||
|
|
||||||
if (data.error) {
|
|
||||||
throw new Error(data.error);
|
|
||||||
}
|
|
||||||
|
|
||||||
const { key } = data;
|
|
||||||
|
|
||||||
// 2. Access the microphone
|
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
||||||
|
|
||||||
// 3. Open direct WebSocket to Deepgram with voice activity detection
|
|
||||||
const socket = new WebSocket(
|
|
||||||
'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true&vad_events=true',
|
|
||||||
['token', key]
|
|
||||||
);
|
|
||||||
socketRef.current = socket;
|
|
||||||
|
|
||||||
socket.onopen = () => {
|
|
||||||
console.log('[Voice Mode] ✓ WebSocket connected, listening for speech...');
|
|
||||||
|
|
||||||
// 4. Create MediaRecorder
|
|
||||||
const mediaRecorder = new MediaRecorder(stream, {
|
|
||||||
mimeType: 'audio/webm',
|
|
||||||
});
|
|
||||||
mediaRecorderRef.current = mediaRecorder;
|
|
||||||
|
|
||||||
// 5. Send audio chunks on data available
|
|
||||||
mediaRecorder.ondataavailable = (event) => {
|
|
||||||
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
|
|
||||||
socket.send(event.data);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Start recording and chunking audio every 250ms
|
|
||||||
mediaRecorder.start(250);
|
|
||||||
};
|
|
||||||
|
|
||||||
// 6. Receive transcripts and handle silence detection
|
|
||||||
socket.onmessage = (event) => {
|
|
||||||
const data = JSON.parse(event.data) as DeepgramTranscript;
|
|
||||||
|
|
||||||
// Check if this message has alternatives (some Deepgram messages don't)
|
|
||||||
if (!data.channel?.alternatives) {
|
|
||||||
return; // Skip non-transcript messages (metadata, VAD events, etc.)
|
|
||||||
}
|
|
||||||
|
|
||||||
const transcript = data.channel.alternatives[0]?.transcript || '';
|
|
||||||
|
|
||||||
if (transcript) {
|
|
||||||
// User has started speaking
|
|
||||||
if (!hasStartedSpeakingRef.current) {
|
|
||||||
console.log('[Voice Mode] User started speaking');
|
|
||||||
hasStartedSpeakingRef.current = true;
|
|
||||||
setVoiceState('user-speaking');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear any existing silence timeout and countdown
|
|
||||||
if (silenceTimeoutRef.current) {
|
|
||||||
clearTimeout(silenceTimeoutRef.current);
|
|
||||||
silenceTimeoutRef.current = null;
|
|
||||||
}
|
|
||||||
if (countdownIntervalRef.current) {
|
|
||||||
clearInterval(countdownIntervalRef.current);
|
|
||||||
countdownIntervalRef.current = null;
|
|
||||||
}
|
|
||||||
silenceStartTimeRef.current = null;
|
|
||||||
setCountdown(3);
|
|
||||||
|
|
||||||
// Handle transcript updates
|
|
||||||
if (data.is_final) {
|
|
||||||
// This is a finalized phrase - append it to our transcript
|
|
||||||
transcriptRef.current = transcriptRef.current
|
|
||||||
? transcriptRef.current + ' ' + transcript
|
|
||||||
: transcript;
|
|
||||||
setInput(transcriptRef.current);
|
|
||||||
console.log('[Voice Mode] Finalized phrase:', transcript);
|
|
||||||
|
|
||||||
// Start a generous 3-second silence timer after each finalized phrase
|
|
||||||
silenceStartTimeRef.current = Date.now();
|
|
||||||
|
|
||||||
// Update countdown every 100ms
|
|
||||||
countdownIntervalRef.current = setInterval(() => {
|
|
||||||
if (silenceStartTimeRef.current) {
|
|
||||||
const elapsed = Date.now() - silenceStartTimeRef.current;
|
|
||||||
const remaining = Math.max(0, 3 - elapsed / 1000);
|
|
||||||
setCountdown(remaining);
|
|
||||||
}
|
|
||||||
}, 100);
|
|
||||||
|
|
||||||
silenceTimeoutRef.current = setTimeout(() => {
|
|
||||||
console.log('[Voice Mode] 3 seconds of silence detected, submitting...');
|
|
||||||
submitUserInput();
|
|
||||||
}, 3000);
|
|
||||||
} else {
|
|
||||||
// This is an interim result - show it temporarily
|
|
||||||
const displayText = transcriptRef.current
|
|
||||||
? transcriptRef.current + ' ' + transcript
|
|
||||||
: transcript;
|
|
||||||
setInput(displayText);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
socket.onclose = () => {
|
|
||||||
// Clean up stream
|
|
||||||
stream.getTracks().forEach((track) => track.stop());
|
|
||||||
console.log('[Voice Mode] WebSocket closed');
|
|
||||||
};
|
|
||||||
|
|
||||||
socket.onerror = (err) => {
|
|
||||||
console.error('[Voice Mode] WebSocket error:', err);
|
|
||||||
setVoiceState('idle');
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[Voice Mode] Error starting listening:', error);
|
|
||||||
setVoiceState('idle');
|
|
||||||
}
|
|
||||||
}, [submitUserInput]);
|
|
||||||
|
|
||||||
const skipAudioAndListen = useCallback(() => {
|
|
||||||
console.log('[Voice Mode] Skipping audio playback');
|
|
||||||
|
|
||||||
// Stop current audio
|
|
||||||
if (audioRef.current) {
|
|
||||||
audioRef.current.pause();
|
|
||||||
audioRef.current.currentTime = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
setIsGeneratingSpeech(false);
|
|
||||||
|
|
||||||
// Go straight to listening
|
|
||||||
startListening();
|
|
||||||
}, [startListening]);
|
|
||||||
|
|
||||||
const exitVoiceMode = useCallback(() => {
|
|
||||||
// Clear any timeouts and intervals
|
|
||||||
if (silenceTimeoutRef.current) {
|
|
||||||
clearTimeout(silenceTimeoutRef.current);
|
|
||||||
silenceTimeoutRef.current = null;
|
|
||||||
}
|
|
||||||
if (countdownIntervalRef.current) {
|
|
||||||
clearInterval(countdownIntervalRef.current);
|
|
||||||
countdownIntervalRef.current = null;
|
|
||||||
}
|
|
||||||
silenceStartTimeRef.current = null;
|
|
||||||
|
|
||||||
// Stop recording
|
|
||||||
if (mediaRecorderRef.current) {
|
|
||||||
mediaRecorderRef.current.stop();
|
|
||||||
mediaRecorderRef.current = null;
|
|
||||||
}
|
|
||||||
if (socketRef.current) {
|
|
||||||
socketRef.current.close();
|
|
||||||
socketRef.current = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stop audio playback
|
|
||||||
if (audioRef.current) {
|
|
||||||
audioRef.current.pause();
|
|
||||||
audioRef.current = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
hasStartedSpeakingRef.current = false;
|
|
||||||
lastSpokenMessageId.current = null;
|
|
||||||
transcriptRef.current = '';
|
|
||||||
setInput('');
|
|
||||||
setCountdown(3);
|
|
||||||
setIsGeneratingSpeech(false);
|
|
||||||
setVoiceState('idle');
|
|
||||||
console.log('[Voice Mode] Exited voice conversation mode');
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const handleToggleVoiceMode = useCallback(() => {
|
|
||||||
if (voiceState === 'idle') {
|
|
||||||
// Start voice conversation mode
|
|
||||||
// First, check if there's a recent AI message to read out
|
|
||||||
const lastAssistantMessage = [...messages]
|
|
||||||
.reverse()
|
|
||||||
.find((m) => m.role === 'assistant');
|
|
||||||
|
|
||||||
if (lastAssistantMessage) {
|
|
||||||
// Extract text from the message
|
|
||||||
const textPart = lastAssistantMessage.parts.find((p) => p.type === 'text');
|
|
||||||
|
|
||||||
if (textPart && textPart.text) {
|
|
||||||
// Play the most recent AI message first, then start listening
|
|
||||||
console.log('[Voice Mode] Starting voice mode, reading most recent AI message first');
|
|
||||||
setVoiceState('ai-speaking');
|
|
||||||
playAudio(textPart.text, lastAssistantMessage.id);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// No AI message to read, just start listening
|
|
||||||
startListening();
|
|
||||||
} else {
|
|
||||||
// Exit voice conversation mode
|
|
||||||
exitVoiceMode();
|
|
||||||
}
|
|
||||||
}, [voiceState, startListening, exitVoiceMode, messages]);
|
|
||||||
|
|
||||||
// Add initial greeting message on first load
|
|
||||||
useEffect(() => {
|
|
||||||
if (messages.length === 0) {
|
|
||||||
setMessages([
|
|
||||||
{
|
|
||||||
id: 'initial-greeting',
|
|
||||||
role: 'assistant',
|
|
||||||
parts: [
|
|
||||||
{
|
|
||||||
type: 'text',
|
|
||||||
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
// Auto-scroll to bottom
|
|
||||||
useEffect(() => {
|
|
||||||
viewport.current?.scrollTo({
|
|
||||||
top: viewport.current.scrollHeight,
|
|
||||||
behavior: 'smooth',
|
|
||||||
});
|
|
||||||
}, [messages]);
|
|
||||||
|
|
||||||
const handleSubmit = (e: React.FormEvent) => {
|
|
||||||
e.preventDefault();
|
|
||||||
if (!input.trim() || status === 'submitted' || status === 'streaming') return;
|
|
||||||
|
|
||||||
sendMessage({ text: input });
|
|
||||||
setInput('');
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleNewConversation = () => {
|
|
||||||
// Clear all messages and reset to initial greeting
|
|
||||||
setMessages([
|
|
||||||
{
|
|
||||||
id: 'initial-greeting',
|
|
||||||
role: 'assistant',
|
|
||||||
parts: [
|
|
||||||
{
|
|
||||||
type: 'text',
|
|
||||||
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
|
||||||
<Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}>
|
|
||||||
<Group justify="space-between" py="md">
|
|
||||||
<Title order={2}>
|
|
||||||
Ponderants Interview
|
|
||||||
</Title>
|
|
||||||
<Group gap="md">
|
|
||||||
<Tooltip label="Start a new conversation">
|
|
||||||
<Button
|
|
||||||
variant="subtle"
|
|
||||||
onClick={handleNewConversation}
|
|
||||||
disabled={status === 'submitted' || status === 'streaming'}
|
|
||||||
>
|
|
||||||
New Conversation
|
|
||||||
</Button>
|
|
||||||
</Tooltip>
|
|
||||||
<UserMenu />
|
|
||||||
</Group>
|
|
||||||
</Group>
|
|
||||||
|
|
||||||
<ScrollArea
|
|
||||||
h="100%"
|
|
||||||
style={{ flex: 1 }}
|
|
||||||
viewportRef={viewport}
|
|
||||||
>
|
|
||||||
<Stack gap="md" pb="xl">
|
|
||||||
{messages.map((m) => (
|
|
||||||
<Paper
|
|
||||||
key={m.id}
|
|
||||||
withBorder
|
|
||||||
shadow="md"
|
|
||||||
p="sm"
|
|
||||||
radius="lg"
|
|
||||||
style={{
|
|
||||||
alignSelf: m.role === 'user' ? 'flex-end' : 'flex-start',
|
|
||||||
backgroundColor:
|
|
||||||
m.role === 'user' ? '#343a40' : '#212529',
|
|
||||||
}}
|
|
||||||
w="80%"
|
|
||||||
>
|
|
||||||
<Text fw={700} size="sm">{m.role === 'user' ? 'You' : 'AI'}</Text>
|
|
||||||
{m.parts.map((part, i) => {
|
|
||||||
if (part.type === 'text') {
|
|
||||||
return (
|
|
||||||
<Text key={i} style={{ whiteSpace: 'pre-wrap' }}>
|
|
||||||
{part.text}
|
|
||||||
</Text>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle tool calls (e.g., suggest_node)
|
|
||||||
if (part.type === 'tool-call') {
|
|
||||||
return (
|
|
||||||
<Paper key={i} withBorder p="xs" mt="xs" bg="dark.6">
|
|
||||||
<Text size="xs" c="dimmed" mb="xs">
|
|
||||||
💡 Node Suggestion
|
|
||||||
</Text>
|
|
||||||
<Text fw={600}>{part.args.title}</Text>
|
|
||||||
<Text size="sm" mt="xs">
|
|
||||||
{part.args.content}
|
|
||||||
</Text>
|
|
||||||
{part.args.tags && part.args.tags.length > 0 && (
|
|
||||||
<Group gap="xs" mt="xs">
|
|
||||||
{part.args.tags.map((tag: string, tagIdx: number) => (
|
|
||||||
<Text key={tagIdx} size="xs" c="blue.4">
|
|
||||||
#{tag}
|
|
||||||
</Text>
|
|
||||||
))}
|
|
||||||
</Group>
|
|
||||||
)}
|
|
||||||
</Paper>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
})}
|
|
||||||
</Paper>
|
|
||||||
))}
|
|
||||||
|
|
||||||
{/* Typing indicator while AI is generating a response */}
|
|
||||||
{(status === 'submitted' || status === 'streaming') && (
|
|
||||||
<Paper
|
|
||||||
withBorder
|
|
||||||
shadow="md"
|
|
||||||
p="sm"
|
|
||||||
radius="lg"
|
|
||||||
style={{
|
|
||||||
alignSelf: 'flex-start',
|
|
||||||
backgroundColor: '#212529',
|
|
||||||
}}
|
|
||||||
w="80%"
|
|
||||||
>
|
|
||||||
<Text fw={700} size="sm">AI</Text>
|
|
||||||
<Group gap="xs" mt="xs">
|
|
||||||
<Loader size="xs" />
|
|
||||||
<Text size="sm" c="dimmed">Thinking...</Text>
|
|
||||||
</Group>
|
|
||||||
</Paper>
|
|
||||||
)}
|
|
||||||
|
|
||||||
</Stack>
|
|
||||||
</ScrollArea>
|
|
||||||
|
|
||||||
{/* Big Voice Mode Button - shown above text input */}
|
|
||||||
<Paper withBorder p="md" radius="xl" my="md">
|
|
||||||
<Stack gap="sm">
|
|
||||||
<Group gap="sm">
|
|
||||||
<Button
|
|
||||||
onClick={handleToggleVoiceMode}
|
|
||||||
size="xl"
|
|
||||||
radius="xl"
|
|
||||||
h={80}
|
|
||||||
style={{ flex: 1 }}
|
|
||||||
color={
|
|
||||||
voiceState === 'ai-speaking'
|
|
||||||
? 'blue'
|
|
||||||
: voiceState === 'user-speaking'
|
|
||||||
? 'green'
|
|
||||||
: voiceState === 'listening'
|
|
||||||
? 'yellow'
|
|
||||||
: voiceState === 'processing'
|
|
||||||
? 'blue'
|
|
||||||
: 'gray'
|
|
||||||
}
|
|
||||||
variant={voiceState !== 'idle' ? 'filled' : 'light'}
|
|
||||||
leftSection={
|
|
||||||
voiceState === 'ai-speaking' ? (
|
|
||||||
<IconVolume size={32} />
|
|
||||||
) : voiceState === 'user-speaking' || voiceState === 'listening' ? (
|
|
||||||
<IconMicrophone size={32} />
|
|
||||||
) : (
|
|
||||||
<IconMicrophone size={32} />
|
|
||||||
)
|
|
||||||
}
|
|
||||||
disabled={status === 'submitted' || status === 'streaming'}
|
|
||||||
>
|
|
||||||
{voiceState === 'idle'
|
|
||||||
? 'Start Voice Conversation'
|
|
||||||
: voiceState === 'listening'
|
|
||||||
? 'Listening... Start speaking'
|
|
||||||
: voiceState === 'user-speaking'
|
|
||||||
? silenceStartTimeRef.current
|
|
||||||
? `Speaking... (auto-submits in ${countdown.toFixed(1)}s)`
|
|
||||||
: 'Speaking... (will auto-submit after 3s silence)'
|
|
||||||
: voiceState === 'processing'
|
|
||||||
? 'Processing...'
|
|
||||||
: isGeneratingSpeech
|
|
||||||
? 'Generating speech...'
|
|
||||||
: 'AI is speaking... Please wait'}
|
|
||||||
</Button>
|
|
||||||
|
|
||||||
{/* Skip button - only shown when AI is speaking */}
|
|
||||||
{voiceState === 'ai-speaking' && (
|
|
||||||
<Button
|
|
||||||
onClick={skipAudioAndListen}
|
|
||||||
size="xl"
|
|
||||||
radius="xl"
|
|
||||||
h={80}
|
|
||||||
color="gray"
|
|
||||||
variant="outline"
|
|
||||||
>
|
|
||||||
Skip
|
|
||||||
</Button>
|
|
||||||
)}
|
|
||||||
</Group>
|
|
||||||
|
|
||||||
{/* Text Input - always available */}
|
|
||||||
<form onSubmit={handleSubmit}>
|
|
||||||
<Group>
|
|
||||||
<TextInput
|
|
||||||
value={input}
|
|
||||||
onChange={(e) => setInput(e.currentTarget.value)}
|
|
||||||
placeholder="Or type your thoughts here..."
|
|
||||||
style={{ flex: 1 }}
|
|
||||||
variant="filled"
|
|
||||||
disabled={voiceState !== 'idle'}
|
|
||||||
/>
|
|
||||||
<Button
|
|
||||||
type="submit"
|
|
||||||
radius="xl"
|
|
||||||
loading={status === 'submitted' || status === 'streaming'}
|
|
||||||
disabled={!input.trim() || voiceState !== 'idle'}
|
|
||||||
>
|
|
||||||
Send
|
|
||||||
</Button>
|
|
||||||
</Group>
|
|
||||||
</form>
|
|
||||||
</Stack>
|
|
||||||
</Paper>
|
|
||||||
</Container>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
@@ -1,814 +0,0 @@
|
|||||||
'use client';
|
|
||||||
|
|
||||||
import { useChat } from '@ai-sdk/react';
|
|
||||||
import {
|
|
||||||
Stack,
|
|
||||||
TextInput,
|
|
||||||
Button,
|
|
||||||
Paper,
|
|
||||||
ScrollArea,
|
|
||||||
Title,
|
|
||||||
Container,
|
|
||||||
Group,
|
|
||||||
Text,
|
|
||||||
Loader,
|
|
||||||
ActionIcon,
|
|
||||||
Tooltip,
|
|
||||||
} from '@mantine/core';
|
|
||||||
import { useRef, useState, useEffect, useCallback } from 'react';
|
|
||||||
import { IconVolume, IconMicrophone, IconMicrophoneOff } from '@tabler/icons-react';
|
|
||||||
import { createActor } from 'xstate';
|
|
||||||
import { useSelector } from '@xstate/react';
|
|
||||||
import { appMachine } from '@/lib/app-machine';
|
|
||||||
import { UserMenu } from '@/components/UserMenu';
|
|
||||||
|
|
||||||
// Define the shape of the Deepgram transcript
|
|
||||||
interface DeepgramTranscript {
|
|
||||||
channel: {
|
|
||||||
alternatives: Array<{
|
|
||||||
transcript: string;
|
|
||||||
}>;
|
|
||||||
};
|
|
||||||
is_final: boolean;
|
|
||||||
speech_final: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the voice button text based on the current state tags.
|
|
||||||
* This replaces complex nested ternaries with a clean, readable function.
|
|
||||||
*/
|
|
||||||
function getVoiceButtonText(
|
|
||||||
state: ReturnType<typeof useSelector<typeof actorRef, any>>,
|
|
||||||
silenceStartTime: number | null
|
|
||||||
): string {
|
|
||||||
// Check tags in priority order and return appropriate text
|
|
||||||
let buttonText: string;
|
|
||||||
|
|
||||||
if (state.hasTag('textMode') || state.hasTag('voiceIdle')) {
|
|
||||||
buttonText = 'Start Voice Conversation';
|
|
||||||
} else if (state.hasTag('listening')) {
|
|
||||||
buttonText = 'Listening... Start speaking';
|
|
||||||
} else if (state.hasTag('userSpeaking')) {
|
|
||||||
buttonText = 'Speaking... (will auto-submit after 3s silence)';
|
|
||||||
} else if (state.hasTag('timingOut')) {
|
|
||||||
if (silenceStartTime) {
|
|
||||||
const elapsed = Date.now() - silenceStartTime;
|
|
||||||
const remaining = Math.max(0, 3 - elapsed / 1000);
|
|
||||||
buttonText = `Speaking... (auto-submits in ${remaining.toFixed(1)}s)`;
|
|
||||||
} else {
|
|
||||||
buttonText = 'Speaking... (timing out...)';
|
|
||||||
}
|
|
||||||
} else if (state.hasTag('processing')) {
|
|
||||||
buttonText = 'Processing...';
|
|
||||||
} else if (state.hasTag('aiGenerating')) {
|
|
||||||
buttonText = 'Generating speech...';
|
|
||||||
} else if (state.hasTag('aiSpeaking')) {
|
|
||||||
buttonText = 'AI is speaking... Please wait';
|
|
||||||
} else {
|
|
||||||
// Fallback (should never reach here if tags are properly defined)
|
|
||||||
buttonText = 'Start Voice Conversation';
|
|
||||||
console.warn('[Voice Mode] No matching tag found, using fallback text. Active tags:', state.tags);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('[Voice Mode] Button text determined:', buttonText, 'Active tags:', Array.from(state.tags));
|
|
||||||
return buttonText;
|
|
||||||
}
|
|
||||||
|
|
||||||
export default function ChatPage() {
|
|
||||||
const viewport = useRef<HTMLDivElement>(null);
|
|
||||||
|
|
||||||
// XState machine for voice mode state management
|
|
||||||
const [actorRef] = useState(() => createActor(appMachine).start());
|
|
||||||
const state = useSelector(actorRef, (snapshot) => snapshot);
|
|
||||||
const send = actorRef.send.bind(actorRef);
|
|
||||||
|
|
||||||
// Imperative refs for managing side effects
|
|
||||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
|
||||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
|
||||||
const socketRef = useRef<WebSocket | null>(null);
|
|
||||||
const silenceTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
|
||||||
const silenceStartTimeRef = useRef<number | null>(null);
|
|
||||||
const countdownIntervalRef = useRef<NodeJS.Timeout | null>(null);
|
|
||||||
const shouldCancelAudioRef = useRef<boolean>(false); // Flag to cancel pending audio operations
|
|
||||||
|
|
||||||
const { messages, sendMessage, setMessages, status } = useChat();
|
|
||||||
|
|
||||||
// Extract text from message (handles v5 parts structure)
|
|
||||||
const getMessageText = (msg: any): string => {
|
|
||||||
if ('parts' in msg && Array.isArray(msg.parts)) {
|
|
||||||
const textPart = msg.parts.find((p: any) => p.type === 'text');
|
|
||||||
return textPart?.text || '';
|
|
||||||
}
|
|
||||||
return msg.content || '';
|
|
||||||
};
|
|
||||||
|
|
||||||
// Handle AI response in voice conversation mode - SIMPLE VERSION
|
|
||||||
useEffect(() => {
|
|
||||||
if (!state.hasTag('processing')) return;
|
|
||||||
if (status !== 'ready') {
|
|
||||||
console.log('[Voice Mode] Waiting, status:', status);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const transcript = state.context.transcript?.trim();
|
|
||||||
if (!transcript) return;
|
|
||||||
|
|
||||||
console.log('[Voice Mode] === PROCESSING ===');
|
|
||||||
console.log('[Voice Mode] Transcript:', transcript);
|
|
||||||
console.log('[Voice Mode] Messages:', messages.length);
|
|
||||||
|
|
||||||
// Get last 2 messages
|
|
||||||
const lastMsg = messages[messages.length - 1];
|
|
||||||
const secondLastMsg = messages[messages.length - 2];
|
|
||||||
|
|
||||||
console.log('[Voice Mode] Last msg:', lastMsg?.role, getMessageText(lastMsg || {}).substring(0, 30));
|
|
||||||
console.log('[Voice Mode] 2nd last msg:', secondLastMsg?.role, getMessageText(secondLastMsg || {}).substring(0, 30));
|
|
||||||
|
|
||||||
// Case 1: User message not submitted yet
|
|
||||||
// Check if the last message is the user's transcript
|
|
||||||
const userMessageExists = messages.some(m =>
|
|
||||||
m.role === 'user' && getMessageText(m) === transcript
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!userMessageExists) {
|
|
||||||
console.log('[Voice Mode] → Submitting user message');
|
|
||||||
submitUserInput();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 2: User message submitted, check if AI has responded
|
|
||||||
// After user submits, if AI responds, the new AI message is LAST
|
|
||||||
if (lastMsg && lastMsg.role === 'assistant' &&
|
|
||||||
secondLastMsg && secondLastMsg.role === 'user' &&
|
|
||||||
getMessageText(secondLastMsg) === transcript) {
|
|
||||||
|
|
||||||
const aiMsg = lastMsg;
|
|
||||||
console.log('[Voice Mode] → AI response found:', aiMsg.id);
|
|
||||||
console.log('[Voice Mode] → Last spoken:', state.context.lastSpokenMessageId);
|
|
||||||
|
|
||||||
// Only play if we haven't played this message yet
|
|
||||||
if (state.context.lastSpokenMessageId !== aiMsg.id) {
|
|
||||||
const text = getMessageText(aiMsg);
|
|
||||||
console.log('[Voice Mode] → Playing:', text.substring(0, 50) + '...');
|
|
||||||
send({ type: 'AI_RESPONSE_READY', messageId: aiMsg.id, text });
|
|
||||||
playAudio(text, aiMsg.id);
|
|
||||||
} else {
|
|
||||||
console.log('[Voice Mode] → Already played, skipping');
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Case 3: Waiting for AI response
|
|
||||||
console.log('[Voice Mode] → Waiting for AI response...');
|
|
||||||
}, [messages, state, status, send]);
|
|
||||||
|
|
||||||
|
|
||||||
// Stop all audio playback and cancel pending operations
|
|
||||||
const stopAllAudio = useCallback(() => {
|
|
||||||
console.log('[Voice Mode] Stopping all audio operations');
|
|
||||||
|
|
||||||
// Set cancel flag to prevent any pending audio from playing
|
|
||||||
shouldCancelAudioRef.current = true;
|
|
||||||
|
|
||||||
// Stop and clean up audio element
|
|
||||||
if (audioRef.current) {
|
|
||||||
audioRef.current.pause();
|
|
||||||
audioRef.current.currentTime = 0;
|
|
||||||
audioRef.current.src = '';
|
|
||||||
}
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
const playAudio = async (text: string, messageId: string) => {
|
|
||||||
try {
|
|
||||||
// Reset cancel flag at the start of a new audio operation
|
|
||||||
shouldCancelAudioRef.current = false;
|
|
||||||
|
|
||||||
console.log('[Voice Mode] Generating speech for message:', messageId);
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
|
|
||||||
const response = await fetch('/api/tts', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: { 'Content-Type': 'application/json' },
|
|
||||||
body: JSON.stringify({ text }),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Check if we should cancel before continuing
|
|
||||||
if (shouldCancelAudioRef.current) {
|
|
||||||
console.log('[Voice Mode] Audio generation canceled before blob creation');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error('Failed to generate speech');
|
|
||||||
}
|
|
||||||
|
|
||||||
const audioBlob = await response.blob();
|
|
||||||
|
|
||||||
// Check again after async operation
|
|
||||||
if (shouldCancelAudioRef.current) {
|
|
||||||
console.log('[Voice Mode] Audio generation canceled after blob creation');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const audioUrl = URL.createObjectURL(audioBlob);
|
|
||||||
|
|
||||||
// Create or reuse audio element
|
|
||||||
if (!audioRef.current) {
|
|
||||||
audioRef.current = new Audio();
|
|
||||||
}
|
|
||||||
|
|
||||||
audioRef.current.src = audioUrl;
|
|
||||||
audioRef.current.onended = () => {
|
|
||||||
URL.revokeObjectURL(audioUrl);
|
|
||||||
console.log('[Voice Mode] ✓ Finished playing audio, sending TTS_FINISHED event');
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
send({ type: 'TTS_FINISHED', messageId });
|
|
||||||
|
|
||||||
// After AI finishes speaking, go back to listening for user
|
|
||||||
startListening();
|
|
||||||
};
|
|
||||||
|
|
||||||
audioRef.current.onerror = () => {
|
|
||||||
URL.revokeObjectURL(audioUrl);
|
|
||||||
console.error('[Voice Mode] Error playing audio');
|
|
||||||
// On error, also go back to listening
|
|
||||||
startListening();
|
|
||||||
};
|
|
||||||
|
|
||||||
// Final check before playing
|
|
||||||
if (shouldCancelAudioRef.current) {
|
|
||||||
console.log('[Voice Mode] Audio playback canceled before play()');
|
|
||||||
URL.revokeObjectURL(audioUrl);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
await audioRef.current.play();
|
|
||||||
|
|
||||||
// Only send TTS_PLAYING if we haven't been canceled
|
|
||||||
if (!shouldCancelAudioRef.current) {
|
|
||||||
console.log('[Voice Mode] ✓ Playing audio, sending TTS_PLAYING event');
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
send({ type: 'TTS_PLAYING' });
|
|
||||||
} else {
|
|
||||||
console.log('[Voice Mode] Audio playback canceled after play()');
|
|
||||||
URL.revokeObjectURL(audioUrl);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[Voice Mode] Error:', error);
|
|
||||||
// On error, go back to listening
|
|
||||||
startListening();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const submitUserInput = useCallback(() => {
|
|
||||||
// Clear any pending silence timeout and countdown
|
|
||||||
if (silenceTimeoutRef.current) {
|
|
||||||
clearTimeout(silenceTimeoutRef.current);
|
|
||||||
silenceTimeoutRef.current = null;
|
|
||||||
}
|
|
||||||
if (countdownIntervalRef.current) {
|
|
||||||
clearInterval(countdownIntervalRef.current);
|
|
||||||
countdownIntervalRef.current = null;
|
|
||||||
}
|
|
||||||
silenceStartTimeRef.current = null;
|
|
||||||
|
|
||||||
// Stop recording
|
|
||||||
if (mediaRecorderRef.current) {
|
|
||||||
mediaRecorderRef.current.stop();
|
|
||||||
mediaRecorderRef.current = null;
|
|
||||||
}
|
|
||||||
if (socketRef.current) {
|
|
||||||
socketRef.current.close();
|
|
||||||
socketRef.current = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send the transcript as a message if we have one
|
|
||||||
const transcript = state.context.transcript;
|
|
||||||
if (transcript.trim()) {
|
|
||||||
console.log('[Voice Mode] Submitting transcript:', transcript);
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
|
|
||||||
setTimeout(() => {
|
|
||||||
const form = document.querySelector('form');
|
|
||||||
if (form) {
|
|
||||||
console.log('[Voice Mode] Form found, submitting...');
|
|
||||||
form.requestSubmit();
|
|
||||||
} else {
|
|
||||||
console.error('[Voice Mode] Form not found!');
|
|
||||||
}
|
|
||||||
}, 100);
|
|
||||||
} else {
|
|
||||||
// If no transcript, go back to listening
|
|
||||||
console.log('[Voice Mode] No transcript to submit, going back to listening');
|
|
||||||
startListening();
|
|
||||||
}
|
|
||||||
}, [state, send]);
|
|
||||||
|
|
||||||
const startListening = useCallback(async () => {
|
|
||||||
silenceStartTimeRef.current = null;
|
|
||||||
|
|
||||||
// Send event to enter listening state (which clears transcript/input/countdown)
|
|
||||||
console.log('[Voice Mode] Sending START_LISTENING event (implicitly via state transition)');
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
|
|
||||||
try {
|
|
||||||
// 1. Get the Deepgram API key
|
|
||||||
const response = await fetch('/api/voice-token', { method: 'POST' });
|
|
||||||
const data = await response.json();
|
|
||||||
|
|
||||||
if (data.error) {
|
|
||||||
throw new Error(data.error);
|
|
||||||
}
|
|
||||||
|
|
||||||
const { key } = data;
|
|
||||||
|
|
||||||
// 2. Access the microphone
|
|
||||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
||||||
|
|
||||||
// 3. Open direct WebSocket to Deepgram with voice activity detection
|
|
||||||
const socket = new WebSocket(
|
|
||||||
'wss://api.deepgram.com/v1/listen?interim_results=true&punctuate=true&vad_events=true',
|
|
||||||
['token', key]
|
|
||||||
);
|
|
||||||
socketRef.current = socket;
|
|
||||||
|
|
||||||
socket.onopen = () => {
|
|
||||||
console.log('[Voice Mode] ✓ WebSocket connected, listening for speech...');
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
|
|
||||||
// 4. Create MediaRecorder
|
|
||||||
const mediaRecorder = new MediaRecorder(stream, {
|
|
||||||
mimeType: 'audio/webm',
|
|
||||||
});
|
|
||||||
mediaRecorderRef.current = mediaRecorder;
|
|
||||||
|
|
||||||
// 5. Send audio chunks on data available
|
|
||||||
mediaRecorder.ondataavailable = (event) => {
|
|
||||||
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
|
|
||||||
socket.send(event.data);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Start recording and chunking audio every 250ms
|
|
||||||
mediaRecorder.start(250);
|
|
||||||
};
|
|
||||||
|
|
||||||
// 6. Receive transcripts and handle silence detection
|
|
||||||
socket.onmessage = (event) => {
|
|
||||||
const data = JSON.parse(event.data) as DeepgramTranscript;
|
|
||||||
|
|
||||||
// Check if this message has alternatives (some Deepgram messages don't)
|
|
||||||
if (!data.channel?.alternatives) {
|
|
||||||
return; // Skip non-transcript messages (metadata, VAD events, etc.)
|
|
||||||
}
|
|
||||||
|
|
||||||
const transcript = data.channel.alternatives[0]?.transcript || '';
|
|
||||||
|
|
||||||
if (transcript) {
|
|
||||||
// User has started speaking
|
|
||||||
if (!state.context.hasStartedSpeaking) {
|
|
||||||
console.log('[Voice Mode] User started speaking, sending USER_STARTED_SPEAKING event');
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
send({ type: 'USER_STARTED_SPEAKING' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear any existing silence timeout and countdown
|
|
||||||
if (silenceTimeoutRef.current) {
|
|
||||||
clearTimeout(silenceTimeoutRef.current);
|
|
||||||
silenceTimeoutRef.current = null;
|
|
||||||
}
|
|
||||||
if (countdownIntervalRef.current) {
|
|
||||||
clearInterval(countdownIntervalRef.current);
|
|
||||||
countdownIntervalRef.current = null;
|
|
||||||
}
|
|
||||||
silenceStartTimeRef.current = null;
|
|
||||||
|
|
||||||
// Handle transcript updates
|
|
||||||
if (data.is_final) {
|
|
||||||
// This is a finalized phrase - send to machine
|
|
||||||
console.log('[Voice Mode] === FINALIZED PHRASE ===');
|
|
||||||
console.log('[Voice Mode] Transcript:', transcript);
|
|
||||||
console.log('[Voice Mode] state.value BEFORE:', JSON.stringify(state.value));
|
|
||||||
console.log('[Voice Mode] tags BEFORE:', Array.from(state.tags));
|
|
||||||
console.log('[Voice Mode] context BEFORE:', JSON.stringify(state.context));
|
|
||||||
console.log('[Voice Mode] Sending FINALIZED_PHRASE event');
|
|
||||||
send({ type: 'FINALIZED_PHRASE', phrase: transcript });
|
|
||||||
|
|
||||||
// Start a generous 3-second silence timer after each finalized phrase
|
|
||||||
silenceStartTimeRef.current = Date.now();
|
|
||||||
|
|
||||||
// Update countdown every 100ms
|
|
||||||
countdownIntervalRef.current = setInterval(() => {
|
|
||||||
if (silenceStartTimeRef.current) {
|
|
||||||
const elapsed = Date.now() - silenceStartTimeRef.current;
|
|
||||||
const remaining = Math.max(0, 3 - elapsed / 1000);
|
|
||||||
// Note: countdown is now managed in machine context, but we need
|
|
||||||
// to update it frequently for UI display. This is acceptable as
|
|
||||||
// a UI-only side effect.
|
|
||||||
}
|
|
||||||
}, 100);
|
|
||||||
|
|
||||||
silenceTimeoutRef.current = setTimeout(() => {
|
|
||||||
console.log('[Voice Mode] 3 seconds of silence detected, sending SILENCE_TIMEOUT event');
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
send({ type: 'SILENCE_TIMEOUT' });
|
|
||||||
// Note: submitUserInput will be called by the processing state effect
|
|
||||||
}, 3000);
|
|
||||||
} else {
|
|
||||||
// This is an interim result - update display (send TRANSCRIPT_UPDATE)
|
|
||||||
const currentTranscript = state.context.transcript;
|
|
||||||
const displayText = currentTranscript
|
|
||||||
? currentTranscript + ' ' + transcript
|
|
||||||
: transcript;
|
|
||||||
send({ type: 'TRANSCRIPT_UPDATE', transcript: displayText });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
socket.onclose = () => {
|
|
||||||
// Clean up stream
|
|
||||||
stream.getTracks().forEach((track) => track.stop());
|
|
||||||
console.log('[Voice Mode] WebSocket closed');
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
};
|
|
||||||
|
|
||||||
socket.onerror = (err) => {
|
|
||||||
console.error('[Voice Mode] WebSocket error:', err);
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
// On error, toggle back to text mode if we're in voice mode
|
|
||||||
if (!state.hasTag('textMode')) {
|
|
||||||
send({ type: 'TOGGLE_VOICE_MODE' });
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[Voice Mode] Error starting listening:', error);
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
// On error, toggle back to text mode if we're in voice mode
|
|
||||||
if (!state.hasTag('textMode')) {
|
|
||||||
send({ type: 'TOGGLE_VOICE_MODE' });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}, [submitUserInput, state, send]);
|
|
||||||
|
|
||||||
const skipAudioAndListen = useCallback(() => {
|
|
||||||
console.log('[Voice Mode] === SKIP BUTTON CLICKED ===');
|
|
||||||
console.log('[Voice Mode] Current state.value:', JSON.stringify(state.value));
|
|
||||||
console.log('[Voice Mode] Current tags:', Array.from(state.tags));
|
|
||||||
|
|
||||||
// Stop ALL audio operations
|
|
||||||
stopAllAudio();
|
|
||||||
|
|
||||||
// Send skip event
|
|
||||||
send({ type: 'SKIP_AUDIO' });
|
|
||||||
|
|
||||||
// Go straight to listening
|
|
||||||
startListening();
|
|
||||||
}, [startListening, state, send, stopAllAudio]);
|
|
||||||
|
|
||||||
const handleToggleVoiceMode = useCallback(() => {
|
|
||||||
console.log('[Voice Mode] Voice button pressed, sending TOGGLE_VOICE_MODE event');
|
|
||||||
console.log('[Voice Mode] Current state:', state.value);
|
|
||||||
send({ type: 'TOGGLE_VOICE_MODE' });
|
|
||||||
}, [state, send]);
|
|
||||||
|
|
||||||
// Handle entering voice.idle state (after TOGGLE_VOICE_MODE from text mode)
|
|
||||||
useEffect(() => {
|
|
||||||
if (!state.hasTag('voiceIdle')) return;
|
|
||||||
|
|
||||||
console.log('[Voice Mode] Entered voice.idle, checking for AI message to read');
|
|
||||||
|
|
||||||
// Get ALL assistant messages in order
|
|
||||||
const assistantMessages = messages.filter((m) => m.role === 'assistant');
|
|
||||||
console.log('[Voice Mode] (idle) Found', assistantMessages.length, 'assistant messages');
|
|
||||||
|
|
||||||
if (assistantMessages.length === 0) {
|
|
||||||
console.log('[Voice Mode] (idle) No assistant messages, starting listening');
|
|
||||||
send({ type: 'START_LISTENING' });
|
|
||||||
startListening();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the LAST (most recent) assistant message
|
|
||||||
const latestAssistantMessage = assistantMessages[assistantMessages.length - 1];
|
|
||||||
console.log('[Voice Mode] (idle) Latest message ID:', latestAssistantMessage.id);
|
|
||||||
console.log('[Voice Mode] (idle) Last spoken message ID:', state.context.lastSpokenMessageId);
|
|
||||||
|
|
||||||
// Skip if we've already spoken this message
|
|
||||||
if (state.context.lastSpokenMessageId === latestAssistantMessage.id) {
|
|
||||||
console.log('[Voice Mode] (idle) Already spoke latest message, starting listening');
|
|
||||||
send({ type: 'START_LISTENING' });
|
|
||||||
startListening();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract text from the message
|
|
||||||
let text = '';
|
|
||||||
if ('parts' in latestAssistantMessage && Array.isArray((latestAssistantMessage as any).parts)) {
|
|
||||||
const textPart = (latestAssistantMessage as any).parts.find((p: any) => p.type === 'text');
|
|
||||||
text = textPart?.text || '';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (text) {
|
|
||||||
// Play the most recent AI message first, then start listening
|
|
||||||
console.log('[Voice Mode] (idle) Reading latest AI message:', text.substring(0, 50) + '...');
|
|
||||||
send({ type: 'AI_RESPONSE_READY', messageId: latestAssistantMessage.id, text });
|
|
||||||
playAudio(text, latestAssistantMessage.id);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// No text found, just start listening
|
|
||||||
console.log('[Voice Mode] (idle) No text in latest message, starting listening');
|
|
||||||
send({ type: 'START_LISTENING' });
|
|
||||||
startListening();
|
|
||||||
}, [state, messages, send]);
|
|
||||||
|
|
||||||
// Stop audio when leaving audio-related states
|
|
||||||
useEffect(() => {
|
|
||||||
const isInAudioState = state.hasTag('canSkipAudio');
|
|
||||||
|
|
||||||
if (!isInAudioState) {
|
|
||||||
// We're not in an audio state, make sure everything is stopped
|
|
||||||
stopAllAudio();
|
|
||||||
}
|
|
||||||
}, [state, stopAllAudio]);
|
|
||||||
|
|
||||||
// Log state transitions for debugging
|
|
||||||
useEffect(() => {
|
|
||||||
console.log('[Voice Mode] === STATE TRANSITION ===');
|
|
||||||
console.log('[Voice Mode] state.value:', JSON.stringify(state.value));
|
|
||||||
console.log('[Voice Mode] Active tags:', Array.from(state.tags));
|
|
||||||
console.log('[Voice Mode] Context:', JSON.stringify(state.context));
|
|
||||||
}, [state.value]);
|
|
||||||
|
|
||||||
// Add initial greeting message on first load
|
|
||||||
useEffect(() => {
|
|
||||||
if (messages.length === 0) {
|
|
||||||
setMessages([
|
|
||||||
{
|
|
||||||
id: 'initial-greeting',
|
|
||||||
role: 'assistant',
|
|
||||||
parts: [
|
|
||||||
{
|
|
||||||
type: 'text',
|
|
||||||
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
} as any,
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
// Auto-scroll to bottom
|
|
||||||
useEffect(() => {
|
|
||||||
viewport.current?.scrollTo({
|
|
||||||
top: viewport.current.scrollHeight,
|
|
||||||
behavior: 'smooth',
|
|
||||||
});
|
|
||||||
}, [messages]);
|
|
||||||
|
|
||||||
const handleSubmit = (e: React.FormEvent) => {
|
|
||||||
e.preventDefault();
|
|
||||||
const inputText = state.context.input;
|
|
||||||
if (!inputText.trim() || status === 'submitted' || status === 'streaming') return;
|
|
||||||
|
|
||||||
console.log('[Voice Mode] Submitting message:', inputText);
|
|
||||||
console.log('[Voice Mode] State transition:', state.value);
|
|
||||||
|
|
||||||
sendMessage({ text: inputText });
|
|
||||||
// Clear input via machine context (will be cleared on next state transition)
|
|
||||||
};
|
|
||||||
|
|
||||||
const handleNewConversation = () => {
|
|
||||||
// Clear all messages and reset to initial greeting
|
|
||||||
setMessages([
|
|
||||||
{
|
|
||||||
id: 'initial-greeting',
|
|
||||||
role: 'assistant',
|
|
||||||
parts: [
|
|
||||||
{
|
|
||||||
type: 'text',
|
|
||||||
text: 'Welcome to Ponderants! I\'m here to help you explore and structure your ideas through conversation.\n\nWhat would you like to talk about today? I can adapt my interview style to best suit your needs (Socratic questioning, collaborative brainstorming, or other approaches).\n\nJust start sharing your thoughts, and we\'ll discover meaningful insights together.',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
} as any,
|
|
||||||
]);
|
|
||||||
};
|
|
||||||
|
|
||||||
return (
|
|
||||||
<Container size="md" h="100vh" style={{ display: 'flex', flexDirection: 'column' }}>
|
|
||||||
<Group justify="space-between" py="md">
|
|
||||||
<Title order={2}>
|
|
||||||
Ponderants Interview
|
|
||||||
</Title>
|
|
||||||
<Group gap="md">
|
|
||||||
<Tooltip label="Start a new conversation">
|
|
||||||
<Button
|
|
||||||
variant="subtle"
|
|
||||||
onClick={handleNewConversation}
|
|
||||||
disabled={status === 'submitted' || status === 'streaming'}
|
|
||||||
>
|
|
||||||
New Conversation
|
|
||||||
</Button>
|
|
||||||
</Tooltip>
|
|
||||||
<UserMenu />
|
|
||||||
</Group>
|
|
||||||
</Group>
|
|
||||||
|
|
||||||
<ScrollArea
|
|
||||||
h="100%"
|
|
||||||
style={{ flex: 1 }}
|
|
||||||
viewportRef={viewport}
|
|
||||||
>
|
|
||||||
<Stack gap="md" pb="xl">
|
|
||||||
{messages.map((m) => (
|
|
||||||
<Paper
|
|
||||||
key={m.id}
|
|
||||||
withBorder
|
|
||||||
shadow="md"
|
|
||||||
p="sm"
|
|
||||||
radius="lg"
|
|
||||||
style={{
|
|
||||||
alignSelf: m.role === 'user' ? 'flex-end' : 'flex-start',
|
|
||||||
backgroundColor:
|
|
||||||
m.role === 'user' ? '#343a40' : '#212529',
|
|
||||||
}}
|
|
||||||
w="80%"
|
|
||||||
>
|
|
||||||
<Text fw={700} size="sm">{m.role === 'user' ? 'You' : 'AI'}</Text>
|
|
||||||
{/* Extract text from message parts */}
|
|
||||||
{(() => {
|
|
||||||
if ('parts' in m && Array.isArray((m as any).parts)) {
|
|
||||||
return (m as any).parts.map((part: any, i: number) => {
|
|
||||||
if (part.type === 'text') {
|
|
||||||
return (
|
|
||||||
<Text key={i} style={{ whiteSpace: 'pre-wrap' }}>
|
|
||||||
{part.text}
|
|
||||||
</Text>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return <Text>Message content unavailable</Text>;
|
|
||||||
})()}
|
|
||||||
</Paper>
|
|
||||||
))}
|
|
||||||
|
|
||||||
{/* Typing indicator while AI is generating a response */}
|
|
||||||
{(status === 'submitted' || status === 'streaming') && (
|
|
||||||
<Paper
|
|
||||||
withBorder
|
|
||||||
shadow="md"
|
|
||||||
p="sm"
|
|
||||||
radius="lg"
|
|
||||||
style={{
|
|
||||||
alignSelf: 'flex-start',
|
|
||||||
backgroundColor: '#212529',
|
|
||||||
}}
|
|
||||||
w="80%"
|
|
||||||
>
|
|
||||||
<Text fw={700} size="sm">AI</Text>
|
|
||||||
<Group gap="xs" mt="xs">
|
|
||||||
<Loader size="xs" />
|
|
||||||
<Text size="sm" c="dimmed">Thinking...</Text>
|
|
||||||
</Group>
|
|
||||||
</Paper>
|
|
||||||
)}
|
|
||||||
|
|
||||||
</Stack>
|
|
||||||
</ScrollArea>
|
|
||||||
|
|
||||||
{/* Big Voice Mode Button - shown above text input */}
|
|
||||||
<Paper withBorder p="md" radius="xl" my="md">
|
|
||||||
<Stack gap="sm">
|
|
||||||
<Group gap="sm">
|
|
||||||
<Button
|
|
||||||
onClick={handleToggleVoiceMode}
|
|
||||||
size="xl"
|
|
||||||
radius="xl"
|
|
||||||
h={80}
|
|
||||||
style={{ flex: 1 }}
|
|
||||||
color={
|
|
||||||
state.hasTag('canSkipAudio')
|
|
||||||
? 'blue'
|
|
||||||
: state.hasTag('userSpeaking') || state.hasTag('timingOut')
|
|
||||||
? 'green'
|
|
||||||
: state.hasTag('listening')
|
|
||||||
? 'yellow'
|
|
||||||
: state.hasTag('processing')
|
|
||||||
? 'blue'
|
|
||||||
: 'gray'
|
|
||||||
}
|
|
||||||
variant={!state.hasTag('textMode') && !state.hasTag('voiceIdle') ? 'filled' : 'light'}
|
|
||||||
leftSection={
|
|
||||||
state.hasTag('canSkipAudio') ? (
|
|
||||||
<IconVolume size={32} />
|
|
||||||
) : state.hasTag('userSpeaking') || state.hasTag('timingOut') || state.hasTag('listening') ? (
|
|
||||||
<IconMicrophone size={32} />
|
|
||||||
) : (
|
|
||||||
<IconMicrophone size={32} />
|
|
||||||
)
|
|
||||||
}
|
|
||||||
disabled={status === 'submitted' || status === 'streaming'}
|
|
||||||
>
|
|
||||||
{getVoiceButtonText(state, silenceStartTimeRef.current)}
|
|
||||||
</Button>
|
|
||||||
|
|
||||||
{/* Skip button - shown when audio can be skipped */}
|
|
||||||
{state.hasTag('canSkipAudio') && (
|
|
||||||
<Button
|
|
||||||
onClick={skipAudioAndListen}
|
|
||||||
size="xl"
|
|
||||||
radius="xl"
|
|
||||||
h={80}
|
|
||||||
color="gray"
|
|
||||||
variant="outline"
|
|
||||||
>
|
|
||||||
Skip
|
|
||||||
</Button>
|
|
||||||
)}
|
|
||||||
</Group>
|
|
||||||
|
|
||||||
{/* Test Controls - Development Only */}
|
|
||||||
{process.env.NODE_ENV === 'development' && (
|
|
||||||
<Paper withBorder p="sm" radius="md" style={{ backgroundColor: '#1a1b1e' }}>
|
|
||||||
<Stack gap="xs">
|
|
||||||
<Text size="xs" fw={700} c="dimmed">DEV: State Machine Testing</Text>
|
|
||||||
<Text size="xs" c="dimmed">
|
|
||||||
State: {JSON.stringify(state.value)} | Tags: {Array.from(state.tags).join(', ')}
|
|
||||||
</Text>
|
|
||||||
<Group gap="xs">
|
|
||||||
<Button
|
|
||||||
size="xs"
|
|
||||||
onClick={() => send({ type: 'START_LISTENING' })}
|
|
||||||
disabled={state.hasTag('textMode')}
|
|
||||||
>
|
|
||||||
Start Listening
|
|
||||||
</Button>
|
|
||||||
<Button
|
|
||||||
size="xs"
|
|
||||||
onClick={() => send({ type: 'USER_STARTED_SPEAKING' })}
|
|
||||||
disabled={!state.hasTag('listening')}
|
|
||||||
>
|
|
||||||
Simulate Speech
|
|
||||||
</Button>
|
|
||||||
<Button
|
|
||||||
size="xs"
|
|
||||||
onClick={() => {
|
|
||||||
send({ type: 'FINALIZED_PHRASE', phrase: 'Test message' });
|
|
||||||
}}
|
|
||||||
disabled={!state.hasTag('userSpeaking') && !state.hasTag('listening')}
|
|
||||||
>
|
|
||||||
Add Phrase
|
|
||||||
</Button>
|
|
||||||
<Button
|
|
||||||
size="xs"
|
|
||||||
onClick={() => send({ type: 'SILENCE_TIMEOUT' })}
|
|
||||||
disabled={!state.hasTag('timingOut')}
|
|
||||||
>
|
|
||||||
Trigger Timeout
|
|
||||||
</Button>
|
|
||||||
<Button
|
|
||||||
size="xs"
|
|
||||||
onClick={() => {
|
|
||||||
const testMsg = messages.filter(m => m.role === 'assistant')[0];
|
|
||||||
if (testMsg) {
|
|
||||||
const text = (testMsg as any).parts?.[0]?.text || 'Test AI response';
|
|
||||||
send({ type: 'AI_RESPONSE_READY', messageId: testMsg.id, text });
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
disabled={!state.hasTag('processing')}
|
|
||||||
>
|
|
||||||
Simulate AI Response
|
|
||||||
</Button>
|
|
||||||
</Group>
|
|
||||||
</Stack>
|
|
||||||
</Paper>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{/* Text Input - always available */}
|
|
||||||
<form onSubmit={handleSubmit}>
|
|
||||||
<Group>
|
|
||||||
<TextInput
|
|
||||||
value={state.context.input}
|
|
||||||
onChange={(e) => send({ type: 'TRANSCRIPT_UPDATE', transcript: e.currentTarget.value })}
|
|
||||||
placeholder="Or type your thoughts here..."
|
|
||||||
style={{ flex: 1 }}
|
|
||||||
variant="filled"
|
|
||||||
disabled={!state.hasTag('textMode') && !state.hasTag('voiceIdle')}
|
|
||||||
/>
|
|
||||||
<Button
|
|
||||||
type="submit"
|
|
||||||
radius="xl"
|
|
||||||
loading={status === 'submitted' || status === 'streaming'}
|
|
||||||
disabled={!state.context.input.trim() || (!state.hasTag('textMode') && !state.hasTag('voiceIdle'))}
|
|
||||||
>
|
|
||||||
Send
|
|
||||||
</Button>
|
|
||||||
</Group>
|
|
||||||
</form>
|
|
||||||
</Stack>
|
|
||||||
</Paper>
|
|
||||||
</Container>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user