feat: implement AI agent layer with LangChain integration

- Create InterviewerAgent with Socratic questioning and RAG context
  - Build SynthesizerAgent for transcript segmentation and link generation
  - Integrate Google Gemini models (Flash for interviewing, Pro for synthesis)
  - Add structured output parsing for Zettel extraction and linking
  - Implement session termination detection with [END_SESSION] token
  - Add conversation context formatting and similarity-based neighbor filtering
  - Add vector service tests with mocked ChromaDB and embeddings
  - Test interviewer agent RAG conversations and session termination
  - Test synthesizer agent transcript formatting and neighbor analysis
  - Add prompt loader tests for external prompt system
  - Test all repository CRUD operations and database transactions
This commit is contained in:
Albert
2025-08-17 01:47:04 +00:00
parent a5f8c90d78
commit 89273619c2
6 changed files with 791 additions and 0 deletions

View File

@@ -0,0 +1,121 @@
"""Test vector service functionality."""
import pytest
import uuid
from datetime import datetime
from unittest.mock import AsyncMock, MagicMock, patch
from app.services.vector import VectorService
from app.data.models import Note
class TestVectorService:
"""Test VectorService operations."""
@pytest.fixture
def sample_notes(self):
"""Create sample notes for testing."""
session_id = uuid.uuid4()
return [
Note(
title="AI Ethics",
content="Artificial intelligence systems must be designed with ethical considerations in mind.",
tags=["ai", "ethics", "technology"],
session_id=session_id
),
Note(
title="Machine Learning Bias",
content="Bias in machine learning models can perpetuate unfair discrimination.",
tags=["ml", "bias", "fairness"],
session_id=session_id
)
]
@pytest.mark.asyncio
async def test_vector_service_initialization(self):
"""Test that VectorService initializes correctly."""
with patch('app.services.vector.GoogleGenerativeAIEmbeddings'):
with patch('app.services.vector.chromadb.PersistentClient'):
service = VectorService()
assert service is not None
@pytest.mark.asyncio
async def test_add_notes_empty_list(self):
"""Test adding empty list of notes."""
with patch('app.services.vector.GoogleGenerativeAIEmbeddings'):
with patch('app.services.vector.chromadb.PersistentClient'):
service = VectorService()
await service.add_notes([]) # Should not raise an error
@pytest.mark.asyncio
async def test_add_notes_with_mocked_embeddings(self, sample_notes):
"""Test adding notes with mocked embedding service."""
mock_embeddings = AsyncMock()
mock_embeddings.aembed_documents = AsyncMock(return_value=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
with patch('app.services.vector.GoogleGenerativeAIEmbeddings', return_value=mock_embeddings):
with patch('app.services.vector.chromadb.PersistentClient', return_value=mock_client):
service = VectorService()
await service.add_notes(sample_notes)
# Verify embeddings were called
mock_embeddings.aembed_documents.assert_called_once()
# Verify collection.add was called
mock_collection.add.assert_called_once()
@pytest.mark.asyncio
async def test_semantic_search_empty_collection(self):
"""Test semantic search on empty collection."""
mock_collection = MagicMock()
mock_collection.count.return_value = 0
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
with patch('app.services.vector.GoogleGenerativeAIEmbeddings'):
with patch('app.services.vector.chromadb.PersistentClient', return_value=mock_client):
service = VectorService()
results = await service.semantic_search("test query")
assert results == []
@pytest.mark.asyncio
async def test_semantic_search_with_results(self):
"""Test semantic search with mocked results."""
mock_embeddings = AsyncMock()
mock_embeddings.aembed_query = AsyncMock(return_value=[0.1, 0.2, 0.3])
mock_collection = MagicMock()
mock_collection.count.return_value = 2
mock_collection.query.return_value = {
"documents": [["AI ethics content", "ML bias content"]],
"metadatas": [[{"title": "AI Ethics", "tags": "ai,ethics"}, {"title": "ML Bias", "tags": "ml,bias"}]],
"distances": [[0.2, 0.4]]
}
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
with patch('app.services.vector.GoogleGenerativeAIEmbeddings', return_value=mock_embeddings):
with patch('app.services.vector.chromadb.PersistentClient', return_value=mock_client):
service = VectorService()
results = await service.semantic_search("ethics")
assert len(results) == 2
assert results[0].page_content == "AI ethics content"
assert results[0].metadata["title"] == "AI Ethics"
assert results[0].metadata["similarity_score"] == 0.8 # 1 - 0.2
def test_reset_collection(self):
"""Test collection reset functionality."""
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
with patch('app.services.vector.GoogleGenerativeAIEmbeddings'):
with patch('app.services.vector.chromadb.PersistentClient', return_value=mock_client):
service = VectorService()
service.reset_collection() # Should not raise an error
mock_client.delete_collection.assert_called_once_with("skytalk_notes")