feat: Implement semantic search and smart note linking with embedding generation, vector storage, and link persistence.

This commit is contained in:
2025-12-25 23:59:11 +01:00
parent 4cb398869d
commit 178d59540e
19 changed files with 2501 additions and 49 deletions

2091
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
CREATE TABLE IF NOT EXISTS note_links (
source_note_id TEXT NOT NULL,
target_note_id TEXT NOT NULL,
score REAL NOT NULL,
created_at DATETIME NOT NULL,
PRIMARY KEY (source_note_id, target_note_id),
FOREIGN KEY (source_note_id) REFERENCES notes(id) ON DELETE CASCADE,
FOREIGN KEY (target_note_id) REFERENCES notes(id) ON DELETE CASCADE
);
CREATE INDEX idx_note_links_source ON note_links(source_note_id);
CREATE INDEX idx_note_links_target ON note_links(target_note_id);

View File

@@ -1,3 +1,4 @@
use notes_infra::factory::{EmbeddingProvider, VectorProvider};
use std::env;
/// Server configuration
@@ -9,6 +10,8 @@ pub struct Config {
pub session_secret: String,
pub cors_allowed_origins: Vec<String>,
pub allow_registration: bool,
pub embedding_provider: EmbeddingProvider,
pub vector_provider: VectorProvider,
}
impl Default for Config {
@@ -21,6 +24,11 @@ impl Default for Config {
.to_string(),
cors_allowed_origins: vec!["http://localhost:5173".to_string()],
allow_registration: true,
embedding_provider: EmbeddingProvider::FastEmbed,
vector_provider: VectorProvider::Qdrant {
url: "http://localhost:6334".to_string(),
collection: "notes".to_string(),
},
}
}
}
@@ -56,6 +64,19 @@ impl Config {
.map(|s| s.to_lowercase() == "true")
.unwrap_or(true);
let embedding_provider = match env::var("EMBEDDING_PROVIDER").unwrap_or_default().as_str() {
// Future: "ollama" => EmbeddingProvider::Ollama(...),
_ => EmbeddingProvider::FastEmbed,
};
let vector_provider = match env::var("VECTOR_PROVIDER").unwrap_or_default().as_str() {
// Future: "postgres" => ...
_ => VectorProvider::Qdrant {
url: env::var("QDRANT_URL").unwrap_or_else(|_| "http://localhost:6334".to_string()),
collection: env::var("QDRANT_COLLECTION").unwrap_or_else(|_| "notes".to_string()),
},
};
Self {
host,
port,
@@ -63,6 +84,8 @@ impl Config {
session_secret,
cors_allowed_origins,
allow_registration,
embedding_provider,
vector_provider,
}
}
}

View File

@@ -55,7 +55,9 @@ impl IntoResponse for ApiError {
DomainError::Unauthorized(_) => StatusCode::FORBIDDEN,
DomainError::RepositoryError(_) => StatusCode::INTERNAL_SERVER_ERROR,
DomainError::RepositoryError(_) | DomainError::InfrastructureError(_) => {
StatusCode::INTERNAL_SERVER_ERROR
}
};
(

View File

@@ -204,6 +204,27 @@ impl NoteVersion {
}
}
/// A derived link between two notes, typically generated by semantic similarity.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NoteLink {
pub source_note_id: Uuid,
pub target_note_id: Uuid,
/// Similarity score (0.0 to 1.0)
pub score: f32,
pub created_at: DateTime<Utc>,
}
impl NoteLink {
pub fn new(source_note_id: Uuid, target_note_id: Uuid, score: f32) -> Self {
Self {
source_note_id,
target_note_id,
score,
created_at: Utc::now(),
}
}
}
/// Filter options for querying notes
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct NoteFilter {

View File

@@ -47,6 +47,10 @@ pub enum DomainError {
/// A repository/infrastructure error occurred
#[error("Repository error: {0}")]
RepositoryError(String),
/// An infrastructure adapter error occurred
#[error("Infrastructure error: {0}")]
InfrastructureError(String),
}
impl DomainError {

View File

@@ -10,6 +10,7 @@
pub mod entities;
pub mod errors;
pub mod ports;
pub mod repositories;
pub mod services;

33
notes-domain/src/ports.rs Normal file
View File

@@ -0,0 +1,33 @@
use async_trait::async_trait;
use uuid::Uuid;
use crate::entities::NoteLink;
use crate::errors::DomainResult;
/// Defines how to generate vector embeddings from text.
#[async_trait]
pub trait EmbeddingGenerator: Send + Sync {
/// Generate a vector embedding for the given text.
async fn generate_embedding(&self, text: &str) -> DomainResult<Vec<f32>>;
}
/// Defines how to store and retrieve vectors.
#[async_trait]
pub trait VectorStore: Send + Sync {
/// Upsert a vector for a given note ID.
async fn upsert(&self, id: Uuid, vector: &[f32]) -> DomainResult<()>;
/// Find similar items to the given vector.
/// Returns a list of (NoteID, Score) tuples.
async fn find_similar(&self, vector: &[f32], limit: usize) -> DomainResult<Vec<(Uuid, f32)>>;
}
/// Defines how to persist note links.
#[async_trait]
pub trait LinkRepository: Send + Sync {
/// Save a batch of generated links.
async fn save_links(&self, links: &[NoteLink]) -> DomainResult<()>;
/// Delete existing links for a specific source note (e.g., before regenerating).
async fn delete_links_for_source(&self, source_note_id: Uuid) -> DomainResult<()>;
}

View File

@@ -356,6 +356,58 @@ impl UserService {
}
}
/// Service for Smart Features (Embeddings, Vector Search, Linking)
pub struct SmartNoteService {
embedding_generator: Arc<dyn crate::ports::EmbeddingGenerator>,
vector_store: Arc<dyn crate::ports::VectorStore>,
link_repo: Arc<dyn crate::ports::LinkRepository>,
}
impl SmartNoteService {
pub fn new(
embedding_generator: Arc<dyn crate::ports::EmbeddingGenerator>,
vector_store: Arc<dyn crate::ports::VectorStore>,
link_repo: Arc<dyn crate::ports::LinkRepository>,
) -> Self {
Self {
embedding_generator,
vector_store,
link_repo,
}
}
/// Process a note to generate embeddings and find similar notes
pub async fn process_note(&self, note: &Note) -> DomainResult<()> {
// 1. Generate embedding
let embedding = self
.embedding_generator
.generate_embedding(&note.content)
.await?;
// 2. Upsert to vector store
self.vector_store.upsert(note.id, &embedding).await?;
// 3. Find similar notes
// TODO: Make limit configurable
let similar = self.vector_store.find_similar(&embedding, 5).await?;
// 4. Create links
let links: Vec<crate::entities::NoteLink> = similar
.into_iter()
.filter(|(id, _)| *id != note.id) // Exclude self
.map(|(target_id, score)| crate::entities::NoteLink::new(note.id, target_id, score))
.collect();
// 5. Save links (replacing old ones)
if !links.is_empty() {
self.link_repo.delete_links_for_source(note.id).await?;
self.link_repo.save_links(&links).await?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -19,3 +19,6 @@ tracing = "0.1"
uuid = { version = "1.19.0", features = ["v4", "serde"] }
tower-sessions = "0.14.0"
tower-sessions-sqlx-store = { version = "0.15.0", default-features = false }
fastembed = "5.4"
qdrant-client = "1.16"
serde_json = "1.0"

View File

@@ -0,0 +1,48 @@
use async_trait::async_trait;
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
use notes_domain::errors::{DomainError, DomainResult};
use notes_domain::ports::EmbeddingGenerator;
use std::sync::{Arc, Mutex};
pub struct FastEmbedAdapter {
model: Arc<Mutex<TextEmbedding>>,
}
impl FastEmbedAdapter {
pub fn new() -> DomainResult<Self> {
let mut options = InitOptions::default();
options.model_name = EmbeddingModel::AllMiniLML6V2;
options.show_download_progress = false;
let model = TextEmbedding::try_new(options).map_err(|e| {
DomainError::InfrastructureError(format!("Failed to init fastembed: {}", e))
})?;
Ok(Self {
model: Arc::new(Mutex::new(model)),
})
}
}
#[async_trait]
impl EmbeddingGenerator for FastEmbedAdapter {
async fn generate_embedding(&self, text: &str) -> DomainResult<Vec<f32>> {
let model = self.model.clone();
let text = text.to_string();
let embeddings = tokio::task::spawn_blocking(move || {
let mut model = model.lock().map_err(|e| format!("Lock error: {}", e))?;
model
.embed(vec![text], None)
.map_err(|e| format!("Embed error: {}", e))
})
.await
.map_err(|e| DomainError::InfrastructureError(format!("Join error: {}", e)))?
.map_err(|e| DomainError::InfrastructureError(e))?;
embeddings
.into_iter()
.next()
.ok_or_else(|| DomainError::InfrastructureError("No embedding generated".to_string()))
}
}

View File

@@ -0,0 +1 @@
pub mod fastembed;

View File

@@ -11,10 +11,61 @@ pub enum FactoryError {
Database(#[from] sqlx::Error),
#[error("Not implemented: {0}")]
NotImplemented(String),
#[error("Infrastructure error: {0}")]
Infrastructure(#[from] notes_domain::DomainError),
}
pub type FactoryResult<T> = Result<T, FactoryError>;
#[derive(Debug, Clone)]
pub enum EmbeddingProvider {
FastEmbed,
// Ollama(String), // Url
// OpenAI(String), // ApiKey
}
#[derive(Debug, Clone)]
pub enum VectorProvider {
Qdrant { url: String, collection: String },
// InMemory,
}
pub async fn build_embedding_generator(
provider: &EmbeddingProvider,
) -> FactoryResult<Arc<dyn notes_domain::ports::EmbeddingGenerator>> {
match provider {
EmbeddingProvider::FastEmbed => {
let adapter = crate::embeddings::fastembed::FastEmbedAdapter::new()?;
Ok(Arc::new(adapter))
}
}
}
pub async fn build_vector_store(
provider: &VectorProvider,
) -> FactoryResult<Arc<dyn notes_domain::ports::VectorStore>> {
match provider {
VectorProvider::Qdrant { url, collection } => {
let adapter = crate::vector::qdrant::QdrantVectorAdapter::new(url, collection)?;
Ok(Arc::new(adapter))
}
}
}
#[cfg(feature = "sqlite")]
pub async fn build_link_repository(
pool: &DatabasePool,
) -> FactoryResult<Arc<dyn notes_domain::ports::LinkRepository>> {
match pool {
DatabasePool::Sqlite(pool) => Ok(Arc::new(
crate::link_repository::SqliteLinkRepository::new(pool.clone()),
)),
_ => Err(FactoryError::NotImplemented(
"LinkRepostiory for non-sqlite".to_string(),
)),
}
}
pub async fn build_database_pool(db_config: &DatabaseConfig) -> FactoryResult<DatabasePool> {
if db_config.url.starts_with("sqlite:") {
#[cfg(feature = "sqlite")]

View File

@@ -15,20 +15,26 @@
//! - [`db::run_migrations`] - Run database migrations
pub mod db;
pub mod embeddings;
pub mod factory;
#[cfg(feature = "sqlite")]
pub mod link_repository;
#[cfg(feature = "sqlite")]
pub mod note_repository;
pub mod session_store;
#[cfg(feature = "sqlite")]
pub mod tag_repository;
#[cfg(feature = "sqlite")]
pub mod user_repository;
pub mod vector;
// Re-export for convenience
#[cfg(feature = "sqlite")]
pub use db::create_pool;
pub use db::{DatabaseConfig, run_migrations};
#[cfg(feature = "sqlite")]
pub use link_repository::SqliteLinkRepository;
#[cfg(feature = "sqlite")]
pub use note_repository::SqliteNoteRepository;
#[cfg(feature = "sqlite")]
pub use tag_repository::SqliteTagRepository;

View File

@@ -0,0 +1,68 @@
use async_trait::async_trait;
use sqlx::SqlitePool;
use uuid::Uuid;
use notes_domain::entities::NoteLink;
use notes_domain::errors::{DomainError, DomainResult};
use notes_domain::ports::LinkRepository;
pub struct SqliteLinkRepository {
pool: SqlitePool,
}
impl SqliteLinkRepository {
pub fn new(pool: SqlitePool) -> Self {
Self { pool }
}
}
#[async_trait]
impl LinkRepository for SqliteLinkRepository {
async fn save_links(&self, links: &[NoteLink]) -> DomainResult<()> {
let mut tx = self
.pool
.begin()
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
for link in links {
let source = link.source_note_id.to_string();
let target = link.target_note_id.to_string();
let created_at = link.created_at.to_rfc3339();
sqlx::query(
r#"
INSERT INTO note_links (source_note_id, target_note_id, score, created_at)
VALUES (?, ?, ?, ?)
ON CONFLICT(source_note_id, target_note_id) DO UPDATE SET
score = excluded.score,
created_at = excluded.created_at
"#,
)
.bind(source)
.bind(target)
.bind(link.score)
.bind(created_at)
.execute(&mut *tx)
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
}
tx.commit()
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
Ok(())
}
async fn delete_links_for_source(&self, source_note_id: Uuid) -> DomainResult<()> {
let source_str = source_note_id.to_string();
sqlx::query("DELETE FROM note_links WHERE source_note_id = ?")
.bind(source_str)
.execute(&self.pool)
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
Ok(())
}
}

View File

@@ -0,0 +1 @@
pub mod qdrant;

View File

@@ -0,0 +1,73 @@
use async_trait::async_trait;
use notes_domain::errors::{DomainError, DomainResult};
use notes_domain::ports::VectorStore;
use qdrant_client::Qdrant;
use qdrant_client::qdrant::{PointStruct, SearchPointsBuilder, UpsertPointsBuilder, Value};
use std::collections::HashMap;
use std::sync::Arc;
use uuid::Uuid;
pub struct QdrantVectorAdapter {
client: Arc<Qdrant>,
collection_name: String,
}
impl QdrantVectorAdapter {
pub fn new(url: &str, collection_name: &str) -> DomainResult<Self> {
let client = Qdrant::from_url(url).build().map_err(|e| {
DomainError::InfrastructureError(format!("Failed to create Qdrant client: {}", e))
})?;
Ok(Self {
client: Arc::new(client),
collection_name: collection_name.to_string(),
})
}
}
#[async_trait]
impl VectorStore for QdrantVectorAdapter {
async fn upsert(&self, id: Uuid, vector: &[f32]) -> DomainResult<()> {
let payload: HashMap<String, Value> = HashMap::new();
let point = PointStruct::new(id.to_string(), vector.to_vec(), payload);
let upsert_points = UpsertPointsBuilder::new(self.collection_name.clone(), vec![point]);
self.client
.upsert_points(upsert_points)
.await
.map_err(|e| DomainError::InfrastructureError(format!("Qdrant upsert error: {}", e)))?;
Ok(())
}
async fn find_similar(&self, vector: &[f32], limit: usize) -> DomainResult<Vec<(Uuid, f32)>> {
let search_points =
SearchPointsBuilder::new(self.collection_name.clone(), vector.to_vec(), limit as u64)
.with_payload(true);
let search_result = self
.client
.search_points(search_points)
.await
.map_err(|e| DomainError::InfrastructureError(format!("Qdrant search error: {}", e)))?;
let results = search_result
.result
.into_iter()
.filter_map(|point| {
let id = point.id?;
let uuid_str = match id.point_id_options? {
qdrant_client::qdrant::point_id::PointIdOptions::Uuid(u) => u,
_ => return None,
};
let uuid = Uuid::parse_str(&uuid_str).ok()?;
Some((uuid, point.score))
})
.collect();
Ok(results)
}
}

View File

@@ -1,7 +1,11 @@
use notes_infra::factory::{EmbeddingProvider, VectorProvider};
#[derive(Debug, Clone)]
pub struct Config {
pub broker_url: String,
pub database_url: String,
pub embedding_provider: EmbeddingProvider,
pub vector_provider: VectorProvider,
}
impl Default for Config {
@@ -9,6 +13,11 @@ impl Default for Config {
Self {
broker_url: "nats://localhost:4222".to_string(),
database_url: "sqlite::memory:".to_string(),
embedding_provider: EmbeddingProvider::FastEmbed,
vector_provider: VectorProvider::Qdrant {
url: "http://localhost:6334".to_string(),
collection: "notes".to_string(),
},
}
}
}
@@ -17,9 +26,30 @@ impl Config {
pub fn from_env() -> Self {
let _ = dotenvy::dotenv();
let embedding_provider = match std::env::var("EMBEDDING_PROVIDER")
.unwrap_or_default()
.as_str()
{
_ => EmbeddingProvider::FastEmbed,
};
let vector_provider = match std::env::var("VECTOR_PROVIDER")
.unwrap_or_default()
.as_str()
{
_ => VectorProvider::Qdrant {
url: std::env::var("QDRANT_URL")
.unwrap_or_else(|_| "http://localhost:6334".to_string()),
collection: std::env::var("QDRANT_COLLECTION")
.unwrap_or_else(|_| "notes".to_string()),
},
};
Self {
broker_url: std::env::var("BROKER_URL").unwrap_or("nats://localhost:4222".to_string()),
database_url: std::env::var("DATABASE_URL").unwrap_or("sqlite::memory:".to_string()),
embedding_provider,
vector_provider,
}
}
}

View File

@@ -1,4 +1,10 @@
use notes_infra::{DatabaseConfig, create_pool};
use notes_domain::services::SmartNoteService;
use notes_infra::{
DatabaseConfig,
factory::{
build_database_pool, build_embedding_generator, build_link_repository, build_vector_store,
},
};
use crate::config::Config;
@@ -7,10 +13,24 @@ mod config;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let config = Config::from_env();
let nats_client = async_nats::connect(config.broker_url).await?;
let db_config = DatabaseConfig::new(config.database_url);
let db_pool = create_pool(&db_config).await?;
let nats_client = async_nats::connect(&config.broker_url).await?;
let db_config = DatabaseConfig::new(config.database_url.clone());
let db_pool = build_database_pool(&db_config).await?;
// Initialize smart feature adapters
let embedding_generator = build_embedding_generator(&config.embedding_provider).await?;
let vector_store = build_vector_store(&config.vector_provider).await?;
let link_repo = build_link_repository(&db_pool).await?;
// Create the service
let smart_service = SmartNoteService::new(embedding_generator, vector_store, link_repo);
tracing::info!(
"SmartNoteService initialized successfully with {:?}",
config.embedding_provider
);
// subscribe to jobs and process them
// (Future: consume NATS messages and call smart_service.process_note(&note))
Ok(())
}