feat: implement core services infrastructure
- Create VectorService with ChromaDB persistence and Google embeddings - Add semantic search functionality with similarity scoring - Implement externalized prompt system with text file storage - Add prompt loader with caching for better performance - Structure prompts for interviewer, segmentation, and linking agents
This commit is contained in:
45
app/core/prompt_loader.py
Normal file
45
app/core/prompt_loader.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
class PromptLoader:
|
||||
"""Loads and caches prompts from text files."""
|
||||
|
||||
def __init__(self):
|
||||
self.prompts_dir = Path(__file__).parent / "prompts"
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def load_prompt(self, prompt_name: str) -> str:
|
||||
"""Load a prompt from a text file.
|
||||
|
||||
Args:
|
||||
prompt_name: Name of the prompt file (without .txt extension)
|
||||
|
||||
Returns:
|
||||
The prompt content as a string
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the prompt file doesn't exist
|
||||
"""
|
||||
prompt_path = self.prompts_dir / f"{prompt_name}.txt"
|
||||
if not prompt_path.exists():
|
||||
raise FileNotFoundError(f"Prompt file not found: {prompt_path}")
|
||||
|
||||
return prompt_path.read_text(encoding="utf-8").strip()
|
||||
|
||||
def get_interviewer_prompt(self) -> str:
|
||||
"""Get the interviewer system prompt."""
|
||||
return self.load_prompt("interviewer")
|
||||
|
||||
def get_segmentation_prompt(self) -> str:
|
||||
"""Get the segmentation prompt."""
|
||||
return self.load_prompt("segmentation")
|
||||
|
||||
def get_linking_prompt(self) -> str:
|
||||
"""Get the generative linking prompt."""
|
||||
return self.load_prompt("linking")
|
||||
|
||||
|
||||
# Global instance for easy importing
|
||||
prompt_loader = PromptLoader()
|
||||
22
app/core/prompts/interviewer.txt
Normal file
22
app/core/prompts/interviewer.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
You are a Socratic interviewer designed to help users explore and develop their ideas through thoughtful questioning. Your goal is to elicit deep thinking and uncover the nuances of their thoughts.
|
||||
|
||||
GUIDELINES:
|
||||
- Ask ONE question at a time - never multiple questions in a single response
|
||||
- Use the Socratic method: probe assumptions, explore implications, seek clarification
|
||||
- Build on the user's responses with follow-up questions that go deeper
|
||||
- Be curious and genuinely interested in their thinking process
|
||||
- Help them articulate vague or incomplete thoughts
|
||||
- Challenge ideas constructively when appropriate
|
||||
- Guide the conversation toward actionable insights
|
||||
|
||||
CONTEXT:
|
||||
You have access to related knowledge from previous conversations:
|
||||
{retrieved_context}
|
||||
|
||||
CONVERSATION FLOW:
|
||||
- Start with open-ended questions about their topic
|
||||
- Follow interesting threads that emerge
|
||||
- Help them explore contradictions or tensions in their thinking
|
||||
- When you sense the conversation has reached a natural conclusion and the user has thoroughly explored their ideas, output exactly: [END_SESSION]
|
||||
|
||||
Remember: Your role is to be a thoughtful conversation partner who helps users think more deeply, not to provide answers or solutions.
|
||||
26
app/core/prompts/linking.txt
Normal file
26
app/core/prompts/linking.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
You are tasked with identifying semantic relationships between a new note and existing notes in a knowledge base.
|
||||
|
||||
INSTRUCTIONS:
|
||||
- Compare the new note against the provided neighboring notes
|
||||
- Identify meaningful conceptual relationships (not just keyword matches)
|
||||
- Focus on deep semantic connections: causation, contradiction, extension, examples, etc.
|
||||
- Only create links where there are genuine intellectual relationships
|
||||
- Provide rich context explaining HOW the concepts relate
|
||||
|
||||
RELATIONSHIP TYPES TO CONSIDER:
|
||||
- Builds upon or extends the concept
|
||||
- Provides a contrasting perspective
|
||||
- Illustrates with a concrete example
|
||||
- Shares underlying principles
|
||||
- Challenges or refines the idea
|
||||
- Applies the concept in a different domain
|
||||
|
||||
NEW NOTE:
|
||||
Title: {new_note_title}
|
||||
Content: {new_note_content}
|
||||
Tags: {new_note_tags}
|
||||
|
||||
POTENTIAL RELATED NOTES:
|
||||
{neighbors}
|
||||
|
||||
For each meaningful relationship you identify, explain the specific conceptual connection and why linking these ideas would be valuable for knowledge synthesis.
|
||||
20
app/core/prompts/segmentation.txt
Normal file
20
app/core/prompts/segmentation.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
You are tasked with analyzing a conversation transcript and extracting atomic, standalone ideas (Zettels) from it.
|
||||
|
||||
INSTRUCTIONS:
|
||||
- Read through the entire transcript carefully
|
||||
- Identify distinct, atomic concepts that emerged during the conversation
|
||||
- Each Zettel should focus on ONE clear idea or insight
|
||||
- Extract the core insight and expand it into a concise, standalone mini-blog post
|
||||
- Make each Zettel self-contained - someone should understand the concept without reading the original conversation
|
||||
- Focus on the user's insights, revelations, and developed thoughts
|
||||
- Ignore small talk, questions, or incomplete thoughts
|
||||
|
||||
ZETTEL REQUIREMENTS:
|
||||
- Title: Clear, descriptive title that captures the essence of the concept
|
||||
- Content: 2-4 paragraphs that fully explain the concept, its context, and its implications
|
||||
- Tags: 3-7 relevant conceptual keywords that would help in future discovery
|
||||
|
||||
TRANSCRIPT TO ANALYZE:
|
||||
{transcript}
|
||||
|
||||
Extract the meaningful concepts and format them as structured Zettels.
|
||||
96
app/services/vector.py
Normal file
96
app/services/vector.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import chromadb
|
||||
from chromadb.config import Settings as ChromaSettings
|
||||
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from app.core.config import settings
|
||||
from app.data.models import Note
|
||||
|
||||
|
||||
class VectorService:
|
||||
def __init__(self):
|
||||
self.embeddings = GoogleGenerativeAIEmbeddings(
|
||||
model=settings.EMBEDDING_MODEL,
|
||||
google_api_key=settings.GOOGLE_API_KEY
|
||||
)
|
||||
|
||||
os.makedirs(settings.CHROMA_PERSIST_DIR, exist_ok=True)
|
||||
|
||||
self.client = chromadb.PersistentClient(
|
||||
path=settings.CHROMA_PERSIST_DIR,
|
||||
settings=ChromaSettings(
|
||||
anonymized_telemetry=False,
|
||||
allow_reset=True
|
||||
)
|
||||
)
|
||||
|
||||
self.collection = self.client.get_or_create_collection(
|
||||
name="skytalk_notes",
|
||||
metadata={"hnsw:space": "cosine"}
|
||||
)
|
||||
|
||||
async def add_notes(self, notes: List[Note]) -> None:
|
||||
if not notes:
|
||||
return
|
||||
|
||||
documents = []
|
||||
metadatas = []
|
||||
ids = []
|
||||
|
||||
for note in notes:
|
||||
documents.append(note.content)
|
||||
metadatas.append({
|
||||
"title": note.title,
|
||||
"tags": ",".join(note.tags),
|
||||
"session_id": str(note.session_id),
|
||||
"created_at": note.created_at.isoformat()
|
||||
})
|
||||
ids.append(str(note.id))
|
||||
|
||||
embeddings = await self.embeddings.aembed_documents(documents)
|
||||
|
||||
self.collection.add(
|
||||
embeddings=embeddings,
|
||||
documents=documents,
|
||||
metadatas=metadatas,
|
||||
ids=ids
|
||||
)
|
||||
|
||||
async def semantic_search(self, query: str, k: int = 5) -> List[Document]:
|
||||
if self.collection.count() == 0:
|
||||
return []
|
||||
|
||||
query_embedding = await self.embeddings.aembed_query(query)
|
||||
|
||||
results = self.collection.query(
|
||||
query_embeddings=[query_embedding],
|
||||
n_results=min(k, self.collection.count())
|
||||
)
|
||||
|
||||
documents = []
|
||||
if results["documents"] and results["documents"][0]:
|
||||
for i, doc in enumerate(results["documents"][0]):
|
||||
metadata = results["metadatas"][0][i] if results["metadatas"] else {}
|
||||
distance = results["distances"][0][i] if results["distances"] else 0.0
|
||||
|
||||
metadata["similarity_score"] = 1 - distance
|
||||
|
||||
documents.append(Document(
|
||||
page_content=doc,
|
||||
metadata=metadata
|
||||
))
|
||||
|
||||
return documents
|
||||
|
||||
def reset_collection(self) -> None:
|
||||
try:
|
||||
self.client.delete_collection("skytalk_notes")
|
||||
self.collection = self.client.get_or_create_collection(
|
||||
name="skytalk_notes",
|
||||
metadata={"hnsw:space": "cosine"}
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user