feat: Fix grapheme splitting and add automatic UMAP calculation
Critical fixes for core functionality: 1. Fixed grapheme-aware text splitting (app/api/nodes/route.ts) - Changed character-based substring to grapheme-ratio calculation - Now properly handles emojis and multi-byte characters - Prevents posts from exceeding 300 grapheme Bluesky limit - Added comprehensive logging for debugging 2. Automatic UMAP coordinate calculation (app/api/nodes/route.ts) - Triggers /api/calculate-graph automatically after node creation - Only when user has 3+ nodes with embeddings (UMAP minimum) - Non-blocking background process - Eliminates need for manual "Calculate Graph" button - Galaxy visualization ready on first visit 3. Simplified galaxy route (app/api/galaxy/route.ts) - Removed auto-trigger logic (now handled on insertion) - Simply returns existing coordinates - More efficient, no redundant calculations 4. Added idempotency (app/api/calculate-graph/route.ts) - Safe to call multiple times - Returns early if all nodes already have coordinates - Better logging for debugging Implementation plans documented in /plans directory. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -33,13 +33,24 @@ export async function POST(request: NextRequest) {
|
||||
const db = await connectToDB();
|
||||
|
||||
// 1. Fetch all nodes that have an embedding but no coords_3d (filtered by user_did)
|
||||
// This query is idempotent - it's safe to run multiple times
|
||||
const query = `SELECT id, embedding FROM node WHERE user_did = $userDid AND embedding != NONE AND coords_3d = NONE`;
|
||||
const results = await db.query<[Array<{ id: string; embedding: number[] }>]>(query, { userDid });
|
||||
|
||||
const nodes = results[0] || [];
|
||||
|
||||
if (nodes.length === 0) {
|
||||
// All nodes already have coordinates - nothing to do (idempotency)
|
||||
console.log('[Calculate Graph] All nodes already have coordinates');
|
||||
return NextResponse.json(
|
||||
{ message: 'All nodes already have coordinates', nodes_mapped: 0 },
|
||||
{ status: 200 }
|
||||
);
|
||||
}
|
||||
|
||||
if (nodes.length < 3) {
|
||||
// UMAP needs at least 3 points to work well
|
||||
console.log(`[Calculate Graph] Not enough nodes to map (${nodes.length}/3)`);
|
||||
return NextResponse.json(
|
||||
{ message: 'Not enough nodes to map. Create at least 3 nodes with content.' },
|
||||
{ status: 200 }
|
||||
|
||||
@@ -59,38 +59,8 @@ export async function GET(request: NextRequest) {
|
||||
const linkResults = await db.query<[LinkData[]]>(linksQuery);
|
||||
const links = linkResults[0] || [];
|
||||
|
||||
// If we have nodes but no coordinates, check if we should calculate
|
||||
if (nodes.length === 0) {
|
||||
// Check if we have nodes with embeddings but no coordinates
|
||||
const unmappedQuery = `
|
||||
SELECT count() as count
|
||||
FROM node
|
||||
WHERE user_did = $userDid AND embedding != NONE AND coords_3d = NONE
|
||||
GROUP ALL
|
||||
`;
|
||||
const unmappedResults = await db.query<[Array<{ count: number }>]>(unmappedQuery, { userDid });
|
||||
const unmappedCount = unmappedResults[0]?.[0]?.count || 0;
|
||||
|
||||
if (unmappedCount >= 3) {
|
||||
console.log(`[Galaxy API] Found ${unmappedCount} unmapped nodes, triggering calculation...`);
|
||||
|
||||
// Trigger graph calculation (don't await, return current state)
|
||||
fetch(`${process.env.NEXT_PUBLIC_BASE_URL || 'http://localhost:3000'}/api/calculate-graph`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Cookie': `ponderants-auth=${surrealJwt}`,
|
||||
},
|
||||
}).catch((err) => {
|
||||
console.error('[Galaxy API] Failed to trigger graph calculation:', err);
|
||||
});
|
||||
|
||||
return NextResponse.json({
|
||||
nodes: [],
|
||||
links: [],
|
||||
message: 'Calculating 3D coordinates... Refresh in a moment.',
|
||||
});
|
||||
}
|
||||
}
|
||||
// Note: Coordinate calculation is now triggered automatically when nodes are created
|
||||
// (see POST /api/nodes). This route simply returns whatever coordinates exist.
|
||||
|
||||
console.log(`[Galaxy API] Returning ${nodes.length} nodes and ${links.length} links`);
|
||||
|
||||
|
||||
@@ -101,18 +101,23 @@ export async function POST(request: NextRequest) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Find last space within maxGraphemes
|
||||
// Need to split - find the split point using grapheme-aware logic
|
||||
let testText = remainingText;
|
||||
|
||||
// Binary search for the right split point
|
||||
while (getGraphemeLength(testText) > maxGraphemes) {
|
||||
// Try to find last word boundary
|
||||
const lastSpace = testText.lastIndexOf(' ');
|
||||
if (lastSpace === -1 || lastSpace < testText.length * 0.5) {
|
||||
// No good space found, just hard cut at character boundary
|
||||
// Start from the end and work backwards
|
||||
testText = testText.substring(0, Math.floor(testText.length * 0.9));
|
||||
} else {
|
||||
if (lastSpace > testText.length * 0.5) {
|
||||
// Good word boundary found - use it
|
||||
testText = testText.substring(0, lastSpace);
|
||||
} else {
|
||||
// No good word boundary - shrink by grapheme-aware amount
|
||||
// Calculate ratio: (target graphemes / current graphemes) * current char length
|
||||
const currentGraphemes = getGraphemeLength(testText);
|
||||
const ratio = maxGraphemes / currentGraphemes;
|
||||
// Use 0.95 safety factor to ensure we don't overshoot
|
||||
const newLength = Math.floor(testText.length * ratio * 0.95);
|
||||
testText = testText.substring(0, Math.max(1, newLength)); // Ensure at least 1 char
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,6 +138,12 @@ export async function POST(request: NextRequest) {
|
||||
chunks = splitIntoChunks(fullText, firstPostMaxGraphemes, threadPostMaxGraphemes);
|
||||
}
|
||||
|
||||
console.log(`[POST /api/nodes] Split into ${chunks.length} chunks`);
|
||||
console.log(`[POST /api/nodes] Link suffix: ${linkGraphemes} graphemes`);
|
||||
chunks.forEach((chunk, i) => {
|
||||
console.log(`[POST /api/nodes] Chunk ${i + 1}: ${getGraphemeLength(chunk)} graphemes`);
|
||||
});
|
||||
|
||||
// Create the thread posts
|
||||
let previousPost: { uri: string; cid: string } | null = null;
|
||||
let rootPost: { uri: string; cid: string } | null = null;
|
||||
@@ -263,6 +274,36 @@ export async function POST(request: NextRequest) {
|
||||
}
|
||||
}
|
||||
|
||||
// --- Step 4: Trigger UMAP coordinate calculation if we have enough nodes ---
|
||||
// Only trigger if we have 3+ nodes with embeddings (UMAP minimum requirement)
|
||||
try {
|
||||
const countResult = await db.query<[Array<{ total: number }>]>(
|
||||
'SELECT count() as total FROM node WHERE user_did = $did AND embedding != NONE GROUP ALL',
|
||||
{ did: userDid }
|
||||
);
|
||||
const totalNodes = countResult[0]?.[0]?.total || 0;
|
||||
|
||||
console.log(`[POST /api/nodes] User has ${totalNodes} nodes with embeddings`);
|
||||
|
||||
if (totalNodes >= 3) {
|
||||
console.log('[POST /api/nodes] Triggering background UMAP calculation...');
|
||||
|
||||
// Trigger calculation in background (don't await)
|
||||
const cookieHeader = request.headers.get('cookie') || '';
|
||||
fetch(`${process.env.NEXT_PUBLIC_APP_URL || 'http://localhost:3000'}/api/calculate-graph`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Cookie': cookieHeader,
|
||||
},
|
||||
}).catch(err => {
|
||||
console.error('[POST /api/nodes] Background UMAP trigger failed:', err);
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('[POST /api/nodes] Failed to check node count for UMAP trigger:', error);
|
||||
// Non-critical - continue
|
||||
}
|
||||
|
||||
console.log('[POST /api/nodes] ✓ Cached node in SurrealDB');
|
||||
return NextResponse.json({ success: true, atp_uri, node: newNode });
|
||||
} catch (error) {
|
||||
|
||||
90
plans/fix-coords-computation.md
Normal file
90
plans/fix-coords-computation.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# Plan: Fix Coords Computation (Core Functionality)
|
||||
|
||||
**Priority:** CRITICAL - This is core functionality of the app
|
||||
|
||||
## Current Architecture (Broken)
|
||||
|
||||
1. Nodes created with `coords_3d = NONE`
|
||||
2. User visits `/galaxy`
|
||||
3. Galaxy route checks if unmapped nodes exist
|
||||
4. If yes, triggers `/api/calculate-graph` in background
|
||||
5. Coordinates may not be ready on first visit
|
||||
6. UMAP runs every time someone visits with unmapped nodes
|
||||
|
||||
### Problems
|
||||
|
||||
- **Inefficient**: Multiple users trigger same calculation
|
||||
- **Poor UX**: Galaxy empty on first visit, needs refresh
|
||||
- **Wasteful**: UMAP recalculation triggered unnecessarily
|
||||
|
||||
## Proposed Architecture (Correct)
|
||||
|
||||
**Trigger UMAP automatically on node insertion**
|
||||
|
||||
### Implementation
|
||||
|
||||
```typescript
|
||||
// In POST /api/nodes, after creating node in SurrealDB:
|
||||
|
||||
// 1. Check total node count for this user
|
||||
const countResult = await db.query(
|
||||
'SELECT count() as total FROM node WHERE user_did = $did AND embedding != NONE',
|
||||
{ did: userDid }
|
||||
);
|
||||
const totalNodes = countResult[0]?.[0]?.total || 0;
|
||||
|
||||
// 2. If we now have 3+ nodes, trigger coordinate calculation
|
||||
if (totalNodes >= 3) {
|
||||
// Don't await - let it run in background
|
||||
fetch(`${process.env.NEXT_PUBLIC_APP_URL}/api/calculate-graph`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Cookie': `ponderants-auth=${surrealJwt}`,
|
||||
},
|
||||
}).catch(err => {
|
||||
console.error('[POST /api/nodes] Background coord calculation failed:', err);
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
### Why 3 nodes minimum?
|
||||
|
||||
- UMAP requires minimum 3 data points for meaningful projection
|
||||
- With <3 nodes, coords_3d stays NONE (galaxy shows "create more nodes" message)
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
1. **Add node count check** after successful SurrealDB insert
|
||||
2. **Trigger `/api/calculate-graph`** in background when threshold reached
|
||||
3. **Remove auto-trigger logic** from `/api/galaxy` route
|
||||
4. **Update `/api/calculate-graph`** to be idempotent (safe to call multiple times)
|
||||
5. **Add rate limiting** to prevent spam calculations
|
||||
|
||||
## Edge Cases to Handle
|
||||
|
||||
### Concurrent inserts
|
||||
**Problem**: Two users create nodes simultaneously
|
||||
**Solution**: `/api/calculate-graph` checks count again before running UMAP
|
||||
|
||||
### Calculation in progress
|
||||
**Problem**: Second node created while UMAP running
|
||||
**Solution**: Add a lock/flag in DB to prevent concurrent UMAP runs
|
||||
|
||||
### Calculation failure
|
||||
**Problem**: Network error, UMAP crashes
|
||||
**Solution**: Retry logic with exponential backoff
|
||||
|
||||
## Files to Modify
|
||||
|
||||
- `app/api/nodes/route.ts` - Add trigger logic after node creation
|
||||
- `app/api/galaxy/route.ts` - Remove auto-trigger, keep simple fetch
|
||||
- `app/api/calculate-graph/route.ts` - Add idempotency check, locking mechanism
|
||||
|
||||
## Testing Requirements
|
||||
|
||||
1. Create 1st node → verify coords_3d = NONE
|
||||
2. Create 2nd node → verify coords_3d = NONE
|
||||
3. Create 3rd node → verify `/api/calculate-graph` triggered
|
||||
4. Wait for calculation → verify all 3 nodes have coords_3d != NONE
|
||||
5. Visit galaxy → verify all nodes visible immediately
|
||||
6. Create 4th node → verify UMAP recalculates all 4 nodes
|
||||
189
plans/fix-grapheme-splitting.md
Normal file
189
plans/fix-grapheme-splitting.md
Normal file
@@ -0,0 +1,189 @@
|
||||
# Plan: Fix Grapheme Computation (Text Splitting)
|
||||
|
||||
**Priority:** HIGH - Blocking production node creation
|
||||
|
||||
## Current Implementation (Broken)
|
||||
|
||||
### Problems Identified
|
||||
|
||||
1. **Line 113**: Uses character length instead of grapheme length:
|
||||
```typescript
|
||||
testText = testText.substring(0, Math.floor(testText.length * 0.9));
|
||||
```
|
||||
With emojis or multi-byte chars, this can never converge properly.
|
||||
|
||||
2. **Variable URL lengths**: URL can be 72-112 chars depending on environment:
|
||||
- `http://localhost:3000`: 72 chars
|
||||
- `https://ponderants.app`: 73 chars
|
||||
- `https://www.ponderants.com`: 77 chars
|
||||
- `https://ponderants-dev-preview-abc123.vercel.app`: 99 chars
|
||||
|
||||
3. **Pre-calculates limit**: Computes `linkGraphemes` once with current URL, but doesn't account for worst-case
|
||||
|
||||
## Correct Algorithm
|
||||
|
||||
### Step 1: Calculate overhead for each post type
|
||||
|
||||
```typescript
|
||||
const detailUrl = `${baseUrl}/galaxy/${encodeURIComponent(nodeId)}`;
|
||||
const linkSuffix = `\n\nRead more: ${detailUrl}`;
|
||||
const linkGraphemes = getGraphemeLength(linkSuffix);
|
||||
|
||||
// Thread indicator: "(N/Total) " where both N and Total can be 1-99
|
||||
// Worst case: "(99/99) " = 9 characters
|
||||
const threadIndicatorGraphemes = 9;
|
||||
|
||||
// Safety buffer to account for RichText facet detection potentially adding chars
|
||||
const safetyBuffer = 5;
|
||||
```
|
||||
|
||||
### Step 2: Calculate max graphemes for each post type
|
||||
|
||||
```typescript
|
||||
const firstPostMaxGraphemes = 300 - linkGraphemes - safetyBuffer;
|
||||
const threadPostMaxGraphemes = 300 - threadIndicatorGraphemes - safetyBuffer;
|
||||
```
|
||||
|
||||
### Step 3: Split fullText by GRAPHEME count
|
||||
|
||||
```typescript
|
||||
function splitByGraphemes(text: string, firstMax: number, otherMax: number): string[] {
|
||||
const chunks: string[] = [];
|
||||
let remainingText = text;
|
||||
let isFirst = true;
|
||||
|
||||
while (remainingText.length > 0) {
|
||||
const maxGraphemes = isFirst ? firstMax : otherMax;
|
||||
const rt = new RichText({ text: remainingText });
|
||||
|
||||
if (rt.graphemeLength <= maxGraphemes) {
|
||||
// Rest of text fits in one chunk
|
||||
chunks.push(remainingText);
|
||||
break;
|
||||
}
|
||||
|
||||
// Need to split - find the split point
|
||||
let testText = remainingText;
|
||||
|
||||
// Binary search to find the right character boundary
|
||||
while (getGraphemeLength(testText) > maxGraphemes) {
|
||||
// Find last word boundary before current position
|
||||
const lastSpace = testText.lastIndexOf(' ');
|
||||
if (lastSpace > testText.length * 0.5) {
|
||||
// Good word boundary found
|
||||
testText = testText.substring(0, lastSpace);
|
||||
} else {
|
||||
// No good word boundary - shrink by grapheme-aware amount
|
||||
// Take (maxGraphemes / currentGraphemes) * currentLength
|
||||
const currentGraphemes = getGraphemeLength(testText);
|
||||
const ratio = maxGraphemes / currentGraphemes;
|
||||
const newLength = Math.floor(testText.length * ratio * 0.95); // 0.95 for safety
|
||||
testText = testText.substring(0, newLength);
|
||||
}
|
||||
}
|
||||
|
||||
chunks.push(testText.trim());
|
||||
remainingText = remainingText.substring(testText.length).trim();
|
||||
isFirst = false;
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
```
|
||||
|
||||
### Step 4: Build posts with proper grapheme validation
|
||||
|
||||
```typescript
|
||||
const chunks = splitByGraphemes(fullText, firstPostMaxGraphemes, threadPostMaxGraphemes);
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const isFirstPost = i === 0;
|
||||
let postText = chunks[i];
|
||||
|
||||
// Add thread indicator if needed
|
||||
if (chunks.length > 1 && !isFirstPost) {
|
||||
postText = `(${i + 1}/${chunks.length}) ${postText}`;
|
||||
}
|
||||
|
||||
// Add link to first post
|
||||
if (isFirstPost) {
|
||||
postText += linkSuffix;
|
||||
}
|
||||
|
||||
// Final validation
|
||||
const finalGraphemes = getGraphemeLength(postText);
|
||||
if (finalGraphemes > 300) {
|
||||
console.error(`[POST /api/nodes] Post ${i + 1} exceeds limit: ${finalGraphemes} graphemes`);
|
||||
console.error(`[POST /api/nodes] Content: ${postText.substring(0, 100)}...`);
|
||||
throw new Error(`Post exceeds 300 grapheme limit: ${finalGraphemes}`);
|
||||
}
|
||||
|
||||
// Continue with post creation...
|
||||
}
|
||||
```
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
1. **Extract constants at the top**
|
||||
- Calculate `linkGraphemes` from actual URL
|
||||
- Define `threadIndicatorGraphemes = 9` (worst case)
|
||||
- Define `safetyBuffer = 5`
|
||||
|
||||
2. **Fix splitIntoChunks function**
|
||||
- Replace character-based substring with grapheme-aware splitting
|
||||
- Use RichText.graphemeLength for all length checks
|
||||
- When shrinking text, calculate ratio based on graphemes, not chars
|
||||
|
||||
3. **Add comprehensive logging**
|
||||
- Log chunk grapheme counts before adding overhead
|
||||
- Log final post grapheme counts
|
||||
- Log URL used and its grapheme length
|
||||
|
||||
4. **Test edge cases**
|
||||
- Long Vercel preview URLs (100+ chars)
|
||||
- Text with emojis and multi-byte characters
|
||||
- Text that needs 10+ chunks (thread indicators "(10/15)")
|
||||
- Text exactly at boundaries
|
||||
|
||||
## Files to Modify
|
||||
|
||||
- `app/api/nodes/route.ts` - Replace `splitIntoChunks()` function
|
||||
|
||||
## Test Cases
|
||||
|
||||
### Test Case 1: Short text (fits in one post)
|
||||
**Input:**
|
||||
- Title: "Test"
|
||||
- Body: "Short content"
|
||||
- Expected: 1 post with link
|
||||
|
||||
### Test Case 2: Long text (needs splitting)
|
||||
**Input:**
|
||||
- Title: "Long Article"
|
||||
- Body: 500 graphemes of text
|
||||
- Expected: 2-3 posts, first with link, others with thread indicators
|
||||
|
||||
### Test Case 3: Text with emojis
|
||||
**Input:**
|
||||
- Title: "🎉 Celebration"
|
||||
- Body: "Hello 👋 World 🌍" repeated to 400 graphemes
|
||||
- Expected: Correct grapheme counting (emojis = 1 grapheme each)
|
||||
|
||||
### Test Case 4: Vercel preview URL
|
||||
**Input:**
|
||||
- NEXT_PUBLIC_APP_URL: `https://ponderants-git-development-abc123.vercel.app`
|
||||
- Expected: URL accounts for ~100 char length
|
||||
|
||||
### Test Case 5: Exactly at boundary
|
||||
**Input:**
|
||||
- Text that's exactly 300 graphemes including link
|
||||
- Expected: 1 post, no error
|
||||
|
||||
## Validation
|
||||
|
||||
After implementation, verify:
|
||||
1. No posts exceed 300 graphemes
|
||||
2. Splitting happens at word boundaries when possible
|
||||
3. All chunks account for thread indicators
|
||||
4. First post always includes detail URL
|
||||
5. Works with emoji and multi-byte characters
|
||||
6
todo.md
6
todo.md
@@ -3,3 +3,9 @@
|
||||
Upcoming items that should be implemented (time-permitting):
|
||||
|
||||
- stream the AI output to deepgram for faster synthesis
|
||||
- fix the freaking galaxy node clicking -- when going directly to a node ID
|
||||
link, it redirects to /chat; when clicking on a node in /galaxy (either
|
||||
general or on a specific node ID url there), it closes the modal automatically
|
||||
- dark mode/light mode favicon and overall app theme
|
||||
- fix the double border on desktop between sidebar and conversation actions UI
|
||||
- delete "backup"/"old" page.tsx files
|
||||
|
||||
Reference in New Issue
Block a user