feat: Implement semantic search and smart note linking with embedding generation, vector storage, and link persistence.
This commit is contained in:
48
notes-infra/src/embeddings/fastembed.rs
Normal file
48
notes-infra/src/embeddings/fastembed.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use async_trait::async_trait;
|
||||
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
|
||||
use notes_domain::errors::{DomainError, DomainResult};
|
||||
use notes_domain::ports::EmbeddingGenerator;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
pub struct FastEmbedAdapter {
|
||||
model: Arc<Mutex<TextEmbedding>>,
|
||||
}
|
||||
|
||||
impl FastEmbedAdapter {
|
||||
pub fn new() -> DomainResult<Self> {
|
||||
let mut options = InitOptions::default();
|
||||
options.model_name = EmbeddingModel::AllMiniLML6V2;
|
||||
options.show_download_progress = false;
|
||||
|
||||
let model = TextEmbedding::try_new(options).map_err(|e| {
|
||||
DomainError::InfrastructureError(format!("Failed to init fastembed: {}", e))
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
model: Arc::new(Mutex::new(model)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EmbeddingGenerator for FastEmbedAdapter {
|
||||
async fn generate_embedding(&self, text: &str) -> DomainResult<Vec<f32>> {
|
||||
let model = self.model.clone();
|
||||
let text = text.to_string();
|
||||
|
||||
let embeddings = tokio::task::spawn_blocking(move || {
|
||||
let mut model = model.lock().map_err(|e| format!("Lock error: {}", e))?;
|
||||
model
|
||||
.embed(vec![text], None)
|
||||
.map_err(|e| format!("Embed error: {}", e))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| DomainError::InfrastructureError(format!("Join error: {}", e)))?
|
||||
.map_err(|e| DomainError::InfrastructureError(e))?;
|
||||
|
||||
embeddings
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| DomainError::InfrastructureError("No embedding generated".to_string()))
|
||||
}
|
||||
}
|
||||
1
notes-infra/src/embeddings/mod.rs
Normal file
1
notes-infra/src/embeddings/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod fastembed;
|
||||
@@ -11,10 +11,61 @@ pub enum FactoryError {
|
||||
Database(#[from] sqlx::Error),
|
||||
#[error("Not implemented: {0}")]
|
||||
NotImplemented(String),
|
||||
#[error("Infrastructure error: {0}")]
|
||||
Infrastructure(#[from] notes_domain::DomainError),
|
||||
}
|
||||
|
||||
pub type FactoryResult<T> = Result<T, FactoryError>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum EmbeddingProvider {
|
||||
FastEmbed,
|
||||
// Ollama(String), // Url
|
||||
// OpenAI(String), // ApiKey
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum VectorProvider {
|
||||
Qdrant { url: String, collection: String },
|
||||
// InMemory,
|
||||
}
|
||||
|
||||
pub async fn build_embedding_generator(
|
||||
provider: &EmbeddingProvider,
|
||||
) -> FactoryResult<Arc<dyn notes_domain::ports::EmbeddingGenerator>> {
|
||||
match provider {
|
||||
EmbeddingProvider::FastEmbed => {
|
||||
let adapter = crate::embeddings::fastembed::FastEmbedAdapter::new()?;
|
||||
Ok(Arc::new(adapter))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn build_vector_store(
|
||||
provider: &VectorProvider,
|
||||
) -> FactoryResult<Arc<dyn notes_domain::ports::VectorStore>> {
|
||||
match provider {
|
||||
VectorProvider::Qdrant { url, collection } => {
|
||||
let adapter = crate::vector::qdrant::QdrantVectorAdapter::new(url, collection)?;
|
||||
Ok(Arc::new(adapter))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub async fn build_link_repository(
|
||||
pool: &DatabasePool,
|
||||
) -> FactoryResult<Arc<dyn notes_domain::ports::LinkRepository>> {
|
||||
match pool {
|
||||
DatabasePool::Sqlite(pool) => Ok(Arc::new(
|
||||
crate::link_repository::SqliteLinkRepository::new(pool.clone()),
|
||||
)),
|
||||
_ => Err(FactoryError::NotImplemented(
|
||||
"LinkRepostiory for non-sqlite".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn build_database_pool(db_config: &DatabaseConfig) -> FactoryResult<DatabasePool> {
|
||||
if db_config.url.starts_with("sqlite:") {
|
||||
#[cfg(feature = "sqlite")]
|
||||
|
||||
@@ -15,20 +15,26 @@
|
||||
//! - [`db::run_migrations`] - Run database migrations
|
||||
|
||||
pub mod db;
|
||||
pub mod embeddings;
|
||||
pub mod factory;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod link_repository;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod note_repository;
|
||||
pub mod session_store;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod tag_repository;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod user_repository;
|
||||
pub mod vector;
|
||||
|
||||
// Re-export for convenience
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub use db::create_pool;
|
||||
pub use db::{DatabaseConfig, run_migrations};
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub use link_repository::SqliteLinkRepository;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub use note_repository::SqliteNoteRepository;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub use tag_repository::SqliteTagRepository;
|
||||
|
||||
68
notes-infra/src/link_repository.rs
Normal file
68
notes-infra/src/link_repository.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
use async_trait::async_trait;
|
||||
use sqlx::SqlitePool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use notes_domain::entities::NoteLink;
|
||||
use notes_domain::errors::{DomainError, DomainResult};
|
||||
use notes_domain::ports::LinkRepository;
|
||||
|
||||
pub struct SqliteLinkRepository {
|
||||
pool: SqlitePool,
|
||||
}
|
||||
|
||||
impl SqliteLinkRepository {
|
||||
pub fn new(pool: SqlitePool) -> Self {
|
||||
Self { pool }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LinkRepository for SqliteLinkRepository {
|
||||
async fn save_links(&self, links: &[NoteLink]) -> DomainResult<()> {
|
||||
let mut tx = self
|
||||
.pool
|
||||
.begin()
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
|
||||
for link in links {
|
||||
let source = link.source_note_id.to_string();
|
||||
let target = link.target_note_id.to_string();
|
||||
let created_at = link.created_at.to_rfc3339();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO note_links (source_note_id, target_note_id, score, created_at)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(source_note_id, target_note_id) DO UPDATE SET
|
||||
score = excluded.score,
|
||||
created_at = excluded.created_at
|
||||
"#,
|
||||
)
|
||||
.bind(source)
|
||||
.bind(target)
|
||||
.bind(link.score)
|
||||
.bind(created_at)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
}
|
||||
|
||||
tx.commit()
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_links_for_source(&self, source_note_id: Uuid) -> DomainResult<()> {
|
||||
let source_str = source_note_id.to_string();
|
||||
sqlx::query("DELETE FROM note_links WHERE source_note_id = ?")
|
||||
.bind(source_str)
|
||||
.execute(&self.pool)
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
1
notes-infra/src/vector/mod.rs
Normal file
1
notes-infra/src/vector/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod qdrant;
|
||||
73
notes-infra/src/vector/qdrant.rs
Normal file
73
notes-infra/src/vector/qdrant.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
use async_trait::async_trait;
|
||||
use notes_domain::errors::{DomainError, DomainResult};
|
||||
use notes_domain::ports::VectorStore;
|
||||
use qdrant_client::Qdrant;
|
||||
use qdrant_client::qdrant::{PointStruct, SearchPointsBuilder, UpsertPointsBuilder, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct QdrantVectorAdapter {
|
||||
client: Arc<Qdrant>,
|
||||
collection_name: String,
|
||||
}
|
||||
|
||||
impl QdrantVectorAdapter {
|
||||
pub fn new(url: &str, collection_name: &str) -> DomainResult<Self> {
|
||||
let client = Qdrant::from_url(url).build().map_err(|e| {
|
||||
DomainError::InfrastructureError(format!("Failed to create Qdrant client: {}", e))
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
client: Arc::new(client),
|
||||
collection_name: collection_name.to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl VectorStore for QdrantVectorAdapter {
|
||||
async fn upsert(&self, id: Uuid, vector: &[f32]) -> DomainResult<()> {
|
||||
let payload: HashMap<String, Value> = HashMap::new();
|
||||
|
||||
let point = PointStruct::new(id.to_string(), vector.to_vec(), payload);
|
||||
|
||||
let upsert_points = UpsertPointsBuilder::new(self.collection_name.clone(), vec![point]);
|
||||
|
||||
self.client
|
||||
.upsert_points(upsert_points)
|
||||
.await
|
||||
.map_err(|e| DomainError::InfrastructureError(format!("Qdrant upsert error: {}", e)))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn find_similar(&self, vector: &[f32], limit: usize) -> DomainResult<Vec<(Uuid, f32)>> {
|
||||
let search_points =
|
||||
SearchPointsBuilder::new(self.collection_name.clone(), vector.to_vec(), limit as u64)
|
||||
.with_payload(true);
|
||||
|
||||
let search_result = self
|
||||
.client
|
||||
.search_points(search_points)
|
||||
.await
|
||||
.map_err(|e| DomainError::InfrastructureError(format!("Qdrant search error: {}", e)))?;
|
||||
|
||||
let results = search_result
|
||||
.result
|
||||
.into_iter()
|
||||
.filter_map(|point| {
|
||||
let id = point.id?;
|
||||
let uuid_str = match id.point_id_options? {
|
||||
qdrant_client::qdrant::point_id::PointIdOptions::Uuid(u) => u,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let uuid = Uuid::parse_str(&uuid_str).ok()?;
|
||||
Some((uuid, point.score))
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user