diff --git a/app/core/prompt_loader.py b/app/core/prompt_loader.py new file mode 100644 index 0000000..ab6a4f7 --- /dev/null +++ b/app/core/prompt_loader.py @@ -0,0 +1,45 @@ +from pathlib import Path +from typing import Dict +from functools import lru_cache + + +class PromptLoader: + """Loads and caches prompts from text files.""" + + def __init__(self): + self.prompts_dir = Path(__file__).parent / "prompts" + + @lru_cache(maxsize=None) + def load_prompt(self, prompt_name: str) -> str: + """Load a prompt from a text file. + + Args: + prompt_name: Name of the prompt file (without .txt extension) + + Returns: + The prompt content as a string + + Raises: + FileNotFoundError: If the prompt file doesn't exist + """ + prompt_path = self.prompts_dir / f"{prompt_name}.txt" + if not prompt_path.exists(): + raise FileNotFoundError(f"Prompt file not found: {prompt_path}") + + return prompt_path.read_text(encoding="utf-8").strip() + + def get_interviewer_prompt(self) -> str: + """Get the interviewer system prompt.""" + return self.load_prompt("interviewer") + + def get_segmentation_prompt(self) -> str: + """Get the segmentation prompt.""" + return self.load_prompt("segmentation") + + def get_linking_prompt(self) -> str: + """Get the generative linking prompt.""" + return self.load_prompt("linking") + + +# Global instance for easy importing +prompt_loader = PromptLoader() \ No newline at end of file diff --git a/app/core/prompts/interviewer.txt b/app/core/prompts/interviewer.txt new file mode 100644 index 0000000..d37b61d --- /dev/null +++ b/app/core/prompts/interviewer.txt @@ -0,0 +1,22 @@ +You are a Socratic interviewer designed to help users explore and develop their ideas through thoughtful questioning. Your goal is to elicit deep thinking and uncover the nuances of their thoughts. + +GUIDELINES: +- Ask ONE question at a time - never multiple questions in a single response +- Use the Socratic method: probe assumptions, explore implications, seek clarification +- Build on the user's responses with follow-up questions that go deeper +- Be curious and genuinely interested in their thinking process +- Help them articulate vague or incomplete thoughts +- Challenge ideas constructively when appropriate +- Guide the conversation toward actionable insights + +CONTEXT: +You have access to related knowledge from previous conversations: +{retrieved_context} + +CONVERSATION FLOW: +- Start with open-ended questions about their topic +- Follow interesting threads that emerge +- Help them explore contradictions or tensions in their thinking +- When you sense the conversation has reached a natural conclusion and the user has thoroughly explored their ideas, output exactly: [END_SESSION] + +Remember: Your role is to be a thoughtful conversation partner who helps users think more deeply, not to provide answers or solutions. \ No newline at end of file diff --git a/app/core/prompts/linking.txt b/app/core/prompts/linking.txt new file mode 100644 index 0000000..b5e79c5 --- /dev/null +++ b/app/core/prompts/linking.txt @@ -0,0 +1,26 @@ +You are tasked with identifying semantic relationships between a new note and existing notes in a knowledge base. + +INSTRUCTIONS: +- Compare the new note against the provided neighboring notes +- Identify meaningful conceptual relationships (not just keyword matches) +- Focus on deep semantic connections: causation, contradiction, extension, examples, etc. +- Only create links where there are genuine intellectual relationships +- Provide rich context explaining HOW the concepts relate + +RELATIONSHIP TYPES TO CONSIDER: +- Builds upon or extends the concept +- Provides a contrasting perspective +- Illustrates with a concrete example +- Shares underlying principles +- Challenges or refines the idea +- Applies the concept in a different domain + +NEW NOTE: +Title: {new_note_title} +Content: {new_note_content} +Tags: {new_note_tags} + +POTENTIAL RELATED NOTES: +{neighbors} + +For each meaningful relationship you identify, explain the specific conceptual connection and why linking these ideas would be valuable for knowledge synthesis. \ No newline at end of file diff --git a/app/core/prompts/segmentation.txt b/app/core/prompts/segmentation.txt new file mode 100644 index 0000000..452dc43 --- /dev/null +++ b/app/core/prompts/segmentation.txt @@ -0,0 +1,20 @@ +You are tasked with analyzing a conversation transcript and extracting atomic, standalone ideas (Zettels) from it. + +INSTRUCTIONS: +- Read through the entire transcript carefully +- Identify distinct, atomic concepts that emerged during the conversation +- Each Zettel should focus on ONE clear idea or insight +- Extract the core insight and expand it into a concise, standalone mini-blog post +- Make each Zettel self-contained - someone should understand the concept without reading the original conversation +- Focus on the user's insights, revelations, and developed thoughts +- Ignore small talk, questions, or incomplete thoughts + +ZETTEL REQUIREMENTS: +- Title: Clear, descriptive title that captures the essence of the concept +- Content: 2-4 paragraphs that fully explain the concept, its context, and its implications +- Tags: 3-7 relevant conceptual keywords that would help in future discovery + +TRANSCRIPT TO ANALYZE: +{transcript} + +Extract the meaningful concepts and format them as structured Zettels. \ No newline at end of file diff --git a/app/services/vector.py b/app/services/vector.py new file mode 100644 index 0000000..15d3788 --- /dev/null +++ b/app/services/vector.py @@ -0,0 +1,96 @@ +import os +from typing import List + +import chromadb +from chromadb.config import Settings as ChromaSettings +from langchain_google_genai import GoogleGenerativeAIEmbeddings +from langchain_core.documents import Document + +from app.core.config import settings +from app.data.models import Note + + +class VectorService: + def __init__(self): + self.embeddings = GoogleGenerativeAIEmbeddings( + model=settings.EMBEDDING_MODEL, + google_api_key=settings.GOOGLE_API_KEY + ) + + os.makedirs(settings.CHROMA_PERSIST_DIR, exist_ok=True) + + self.client = chromadb.PersistentClient( + path=settings.CHROMA_PERSIST_DIR, + settings=ChromaSettings( + anonymized_telemetry=False, + allow_reset=True + ) + ) + + self.collection = self.client.get_or_create_collection( + name="skytalk_notes", + metadata={"hnsw:space": "cosine"} + ) + + async def add_notes(self, notes: List[Note]) -> None: + if not notes: + return + + documents = [] + metadatas = [] + ids = [] + + for note in notes: + documents.append(note.content) + metadatas.append({ + "title": note.title, + "tags": ",".join(note.tags), + "session_id": str(note.session_id), + "created_at": note.created_at.isoformat() + }) + ids.append(str(note.id)) + + embeddings = await self.embeddings.aembed_documents(documents) + + self.collection.add( + embeddings=embeddings, + documents=documents, + metadatas=metadatas, + ids=ids + ) + + async def semantic_search(self, query: str, k: int = 5) -> List[Document]: + if self.collection.count() == 0: + return [] + + query_embedding = await self.embeddings.aembed_query(query) + + results = self.collection.query( + query_embeddings=[query_embedding], + n_results=min(k, self.collection.count()) + ) + + documents = [] + if results["documents"] and results["documents"][0]: + for i, doc in enumerate(results["documents"][0]): + metadata = results["metadatas"][0][i] if results["metadatas"] else {} + distance = results["distances"][0][i] if results["distances"] else 0.0 + + metadata["similarity_score"] = 1 - distance + + documents.append(Document( + page_content=doc, + metadata=metadata + )) + + return documents + + def reset_collection(self) -> None: + try: + self.client.delete_collection("skytalk_notes") + self.collection = self.client.get_or_create_collection( + name="skytalk_notes", + metadata={"hnsw:space": "cosine"} + ) + except Exception: + pass \ No newline at end of file