diff --git a/app/api/calculate-graph/route.ts b/app/api/calculate-graph/route.ts index 341769f..2963218 100644 --- a/app/api/calculate-graph/route.ts +++ b/app/api/calculate-graph/route.ts @@ -33,13 +33,24 @@ export async function POST(request: NextRequest) { const db = await connectToDB(); // 1. Fetch all nodes that have an embedding but no coords_3d (filtered by user_did) + // This query is idempotent - it's safe to run multiple times const query = `SELECT id, embedding FROM node WHERE user_did = $userDid AND embedding != NONE AND coords_3d = NONE`; const results = await db.query<[Array<{ id: string; embedding: number[] }>]>(query, { userDid }); const nodes = results[0] || []; + if (nodes.length === 0) { + // All nodes already have coordinates - nothing to do (idempotency) + console.log('[Calculate Graph] All nodes already have coordinates'); + return NextResponse.json( + { message: 'All nodes already have coordinates', nodes_mapped: 0 }, + { status: 200 } + ); + } + if (nodes.length < 3) { // UMAP needs at least 3 points to work well + console.log(`[Calculate Graph] Not enough nodes to map (${nodes.length}/3)`); return NextResponse.json( { message: 'Not enough nodes to map. Create at least 3 nodes with content.' }, { status: 200 } diff --git a/app/api/galaxy/route.ts b/app/api/galaxy/route.ts index ed82065..0449fac 100644 --- a/app/api/galaxy/route.ts +++ b/app/api/galaxy/route.ts @@ -59,38 +59,8 @@ export async function GET(request: NextRequest) { const linkResults = await db.query<[LinkData[]]>(linksQuery); const links = linkResults[0] || []; - // If we have nodes but no coordinates, check if we should calculate - if (nodes.length === 0) { - // Check if we have nodes with embeddings but no coordinates - const unmappedQuery = ` - SELECT count() as count - FROM node - WHERE user_did = $userDid AND embedding != NONE AND coords_3d = NONE - GROUP ALL - `; - const unmappedResults = await db.query<[Array<{ count: number }>]>(unmappedQuery, { userDid }); - const unmappedCount = unmappedResults[0]?.[0]?.count || 0; - - if (unmappedCount >= 3) { - console.log(`[Galaxy API] Found ${unmappedCount} unmapped nodes, triggering calculation...`); - - // Trigger graph calculation (don't await, return current state) - fetch(`${process.env.NEXT_PUBLIC_BASE_URL || 'http://localhost:3000'}/api/calculate-graph`, { - method: 'POST', - headers: { - 'Cookie': `ponderants-auth=${surrealJwt}`, - }, - }).catch((err) => { - console.error('[Galaxy API] Failed to trigger graph calculation:', err); - }); - - return NextResponse.json({ - nodes: [], - links: [], - message: 'Calculating 3D coordinates... Refresh in a moment.', - }); - } - } + // Note: Coordinate calculation is now triggered automatically when nodes are created + // (see POST /api/nodes). This route simply returns whatever coordinates exist. console.log(`[Galaxy API] Returning ${nodes.length} nodes and ${links.length} links`); diff --git a/app/api/nodes/route.ts b/app/api/nodes/route.ts index a1db10b..c923174 100644 --- a/app/api/nodes/route.ts +++ b/app/api/nodes/route.ts @@ -101,18 +101,23 @@ export async function POST(request: NextRequest) { break; } - // Find last space within maxGraphemes + // Need to split - find the split point using grapheme-aware logic let testText = remainingText; - // Binary search for the right split point while (getGraphemeLength(testText) > maxGraphemes) { + // Try to find last word boundary const lastSpace = testText.lastIndexOf(' '); - if (lastSpace === -1 || lastSpace < testText.length * 0.5) { - // No good space found, just hard cut at character boundary - // Start from the end and work backwards - testText = testText.substring(0, Math.floor(testText.length * 0.9)); - } else { + if (lastSpace > testText.length * 0.5) { + // Good word boundary found - use it testText = testText.substring(0, lastSpace); + } else { + // No good word boundary - shrink by grapheme-aware amount + // Calculate ratio: (target graphemes / current graphemes) * current char length + const currentGraphemes = getGraphemeLength(testText); + const ratio = maxGraphemes / currentGraphemes; + // Use 0.95 safety factor to ensure we don't overshoot + const newLength = Math.floor(testText.length * ratio * 0.95); + testText = testText.substring(0, Math.max(1, newLength)); // Ensure at least 1 char } } @@ -133,6 +138,12 @@ export async function POST(request: NextRequest) { chunks = splitIntoChunks(fullText, firstPostMaxGraphemes, threadPostMaxGraphemes); } + console.log(`[POST /api/nodes] Split into ${chunks.length} chunks`); + console.log(`[POST /api/nodes] Link suffix: ${linkGraphemes} graphemes`); + chunks.forEach((chunk, i) => { + console.log(`[POST /api/nodes] Chunk ${i + 1}: ${getGraphemeLength(chunk)} graphemes`); + }); + // Create the thread posts let previousPost: { uri: string; cid: string } | null = null; let rootPost: { uri: string; cid: string } | null = null; @@ -263,6 +274,36 @@ export async function POST(request: NextRequest) { } } + // --- Step 4: Trigger UMAP coordinate calculation if we have enough nodes --- + // Only trigger if we have 3+ nodes with embeddings (UMAP minimum requirement) + try { + const countResult = await db.query<[Array<{ total: number }>]>( + 'SELECT count() as total FROM node WHERE user_did = $did AND embedding != NONE GROUP ALL', + { did: userDid } + ); + const totalNodes = countResult[0]?.[0]?.total || 0; + + console.log(`[POST /api/nodes] User has ${totalNodes} nodes with embeddings`); + + if (totalNodes >= 3) { + console.log('[POST /api/nodes] Triggering background UMAP calculation...'); + + // Trigger calculation in background (don't await) + const cookieHeader = request.headers.get('cookie') || ''; + fetch(`${process.env.NEXT_PUBLIC_APP_URL || 'http://localhost:3000'}/api/calculate-graph`, { + method: 'POST', + headers: { + 'Cookie': cookieHeader, + }, + }).catch(err => { + console.error('[POST /api/nodes] Background UMAP trigger failed:', err); + }); + } + } catch (error) { + console.warn('[POST /api/nodes] Failed to check node count for UMAP trigger:', error); + // Non-critical - continue + } + console.log('[POST /api/nodes] ✓ Cached node in SurrealDB'); return NextResponse.json({ success: true, atp_uri, node: newNode }); } catch (error) { diff --git a/plans/fix-coords-computation.md b/plans/fix-coords-computation.md new file mode 100644 index 0000000..b29b9b1 --- /dev/null +++ b/plans/fix-coords-computation.md @@ -0,0 +1,90 @@ +# Plan: Fix Coords Computation (Core Functionality) + +**Priority:** CRITICAL - This is core functionality of the app + +## Current Architecture (Broken) + +1. Nodes created with `coords_3d = NONE` +2. User visits `/galaxy` +3. Galaxy route checks if unmapped nodes exist +4. If yes, triggers `/api/calculate-graph` in background +5. Coordinates may not be ready on first visit +6. UMAP runs every time someone visits with unmapped nodes + +### Problems + +- **Inefficient**: Multiple users trigger same calculation +- **Poor UX**: Galaxy empty on first visit, needs refresh +- **Wasteful**: UMAP recalculation triggered unnecessarily + +## Proposed Architecture (Correct) + +**Trigger UMAP automatically on node insertion** + +### Implementation + +```typescript +// In POST /api/nodes, after creating node in SurrealDB: + +// 1. Check total node count for this user +const countResult = await db.query( + 'SELECT count() as total FROM node WHERE user_did = $did AND embedding != NONE', + { did: userDid } +); +const totalNodes = countResult[0]?.[0]?.total || 0; + +// 2. If we now have 3+ nodes, trigger coordinate calculation +if (totalNodes >= 3) { + // Don't await - let it run in background + fetch(`${process.env.NEXT_PUBLIC_APP_URL}/api/calculate-graph`, { + method: 'POST', + headers: { + 'Cookie': `ponderants-auth=${surrealJwt}`, + }, + }).catch(err => { + console.error('[POST /api/nodes] Background coord calculation failed:', err); + }); +} +``` + +### Why 3 nodes minimum? + +- UMAP requires minimum 3 data points for meaningful projection +- With <3 nodes, coords_3d stays NONE (galaxy shows "create more nodes" message) + +## Implementation Steps + +1. **Add node count check** after successful SurrealDB insert +2. **Trigger `/api/calculate-graph`** in background when threshold reached +3. **Remove auto-trigger logic** from `/api/galaxy` route +4. **Update `/api/calculate-graph`** to be idempotent (safe to call multiple times) +5. **Add rate limiting** to prevent spam calculations + +## Edge Cases to Handle + +### Concurrent inserts +**Problem**: Two users create nodes simultaneously +**Solution**: `/api/calculate-graph` checks count again before running UMAP + +### Calculation in progress +**Problem**: Second node created while UMAP running +**Solution**: Add a lock/flag in DB to prevent concurrent UMAP runs + +### Calculation failure +**Problem**: Network error, UMAP crashes +**Solution**: Retry logic with exponential backoff + +## Files to Modify + +- `app/api/nodes/route.ts` - Add trigger logic after node creation +- `app/api/galaxy/route.ts` - Remove auto-trigger, keep simple fetch +- `app/api/calculate-graph/route.ts` - Add idempotency check, locking mechanism + +## Testing Requirements + +1. Create 1st node → verify coords_3d = NONE +2. Create 2nd node → verify coords_3d = NONE +3. Create 3rd node → verify `/api/calculate-graph` triggered +4. Wait for calculation → verify all 3 nodes have coords_3d != NONE +5. Visit galaxy → verify all nodes visible immediately +6. Create 4th node → verify UMAP recalculates all 4 nodes diff --git a/plans/fix-grapheme-splitting.md b/plans/fix-grapheme-splitting.md new file mode 100644 index 0000000..b329500 --- /dev/null +++ b/plans/fix-grapheme-splitting.md @@ -0,0 +1,189 @@ +# Plan: Fix Grapheme Computation (Text Splitting) + +**Priority:** HIGH - Blocking production node creation + +## Current Implementation (Broken) + +### Problems Identified + +1. **Line 113**: Uses character length instead of grapheme length: + ```typescript + testText = testText.substring(0, Math.floor(testText.length * 0.9)); + ``` + With emojis or multi-byte chars, this can never converge properly. + +2. **Variable URL lengths**: URL can be 72-112 chars depending on environment: + - `http://localhost:3000`: 72 chars + - `https://ponderants.app`: 73 chars + - `https://www.ponderants.com`: 77 chars + - `https://ponderants-dev-preview-abc123.vercel.app`: 99 chars + +3. **Pre-calculates limit**: Computes `linkGraphemes` once with current URL, but doesn't account for worst-case + +## Correct Algorithm + +### Step 1: Calculate overhead for each post type + +```typescript +const detailUrl = `${baseUrl}/galaxy/${encodeURIComponent(nodeId)}`; +const linkSuffix = `\n\nRead more: ${detailUrl}`; +const linkGraphemes = getGraphemeLength(linkSuffix); + +// Thread indicator: "(N/Total) " where both N and Total can be 1-99 +// Worst case: "(99/99) " = 9 characters +const threadIndicatorGraphemes = 9; + +// Safety buffer to account for RichText facet detection potentially adding chars +const safetyBuffer = 5; +``` + +### Step 2: Calculate max graphemes for each post type + +```typescript +const firstPostMaxGraphemes = 300 - linkGraphemes - safetyBuffer; +const threadPostMaxGraphemes = 300 - threadIndicatorGraphemes - safetyBuffer; +``` + +### Step 3: Split fullText by GRAPHEME count + +```typescript +function splitByGraphemes(text: string, firstMax: number, otherMax: number): string[] { + const chunks: string[] = []; + let remainingText = text; + let isFirst = true; + + while (remainingText.length > 0) { + const maxGraphemes = isFirst ? firstMax : otherMax; + const rt = new RichText({ text: remainingText }); + + if (rt.graphemeLength <= maxGraphemes) { + // Rest of text fits in one chunk + chunks.push(remainingText); + break; + } + + // Need to split - find the split point + let testText = remainingText; + + // Binary search to find the right character boundary + while (getGraphemeLength(testText) > maxGraphemes) { + // Find last word boundary before current position + const lastSpace = testText.lastIndexOf(' '); + if (lastSpace > testText.length * 0.5) { + // Good word boundary found + testText = testText.substring(0, lastSpace); + } else { + // No good word boundary - shrink by grapheme-aware amount + // Take (maxGraphemes / currentGraphemes) * currentLength + const currentGraphemes = getGraphemeLength(testText); + const ratio = maxGraphemes / currentGraphemes; + const newLength = Math.floor(testText.length * ratio * 0.95); // 0.95 for safety + testText = testText.substring(0, newLength); + } + } + + chunks.push(testText.trim()); + remainingText = remainingText.substring(testText.length).trim(); + isFirst = false; + } + + return chunks; +} +``` + +### Step 4: Build posts with proper grapheme validation + +```typescript +const chunks = splitByGraphemes(fullText, firstPostMaxGraphemes, threadPostMaxGraphemes); + +for (let i = 0; i < chunks.length; i++) { + const isFirstPost = i === 0; + let postText = chunks[i]; + + // Add thread indicator if needed + if (chunks.length > 1 && !isFirstPost) { + postText = `(${i + 1}/${chunks.length}) ${postText}`; + } + + // Add link to first post + if (isFirstPost) { + postText += linkSuffix; + } + + // Final validation + const finalGraphemes = getGraphemeLength(postText); + if (finalGraphemes > 300) { + console.error(`[POST /api/nodes] Post ${i + 1} exceeds limit: ${finalGraphemes} graphemes`); + console.error(`[POST /api/nodes] Content: ${postText.substring(0, 100)}...`); + throw new Error(`Post exceeds 300 grapheme limit: ${finalGraphemes}`); + } + + // Continue with post creation... +} +``` + +## Implementation Steps + +1. **Extract constants at the top** + - Calculate `linkGraphemes` from actual URL + - Define `threadIndicatorGraphemes = 9` (worst case) + - Define `safetyBuffer = 5` + +2. **Fix splitIntoChunks function** + - Replace character-based substring with grapheme-aware splitting + - Use RichText.graphemeLength for all length checks + - When shrinking text, calculate ratio based on graphemes, not chars + +3. **Add comprehensive logging** + - Log chunk grapheme counts before adding overhead + - Log final post grapheme counts + - Log URL used and its grapheme length + +4. **Test edge cases** + - Long Vercel preview URLs (100+ chars) + - Text with emojis and multi-byte characters + - Text that needs 10+ chunks (thread indicators "(10/15)") + - Text exactly at boundaries + +## Files to Modify + +- `app/api/nodes/route.ts` - Replace `splitIntoChunks()` function + +## Test Cases + +### Test Case 1: Short text (fits in one post) +**Input:** +- Title: "Test" +- Body: "Short content" +- Expected: 1 post with link + +### Test Case 2: Long text (needs splitting) +**Input:** +- Title: "Long Article" +- Body: 500 graphemes of text +- Expected: 2-3 posts, first with link, others with thread indicators + +### Test Case 3: Text with emojis +**Input:** +- Title: "🎉 Celebration" +- Body: "Hello 👋 World 🌍" repeated to 400 graphemes +- Expected: Correct grapheme counting (emojis = 1 grapheme each) + +### Test Case 4: Vercel preview URL +**Input:** +- NEXT_PUBLIC_APP_URL: `https://ponderants-git-development-abc123.vercel.app` +- Expected: URL accounts for ~100 char length + +### Test Case 5: Exactly at boundary +**Input:** +- Text that's exactly 300 graphemes including link +- Expected: 1 post, no error + +## Validation + +After implementation, verify: +1. No posts exceed 300 graphemes +2. Splitting happens at word boundaries when possible +3. All chunks account for thread indicators +4. First post always includes detail URL +5. Works with emoji and multi-byte characters diff --git a/todo.md b/todo.md index 3dc62c6..dadbb11 100644 --- a/todo.md +++ b/todo.md @@ -3,3 +3,9 @@ Upcoming items that should be implemented (time-permitting): - stream the AI output to deepgram for faster synthesis +- fix the freaking galaxy node clicking -- when going directly to a node ID + link, it redirects to /chat; when clicking on a node in /galaxy (either + general or on a specific node ID url there), it closes the modal automatically +- dark mode/light mode favicon and overall app theme +- fix the double border on desktop between sidebar and conversation actions UI +- delete "backup"/"old" page.tsx files