First batch of smart stuff

This commit is contained in:
2025-12-25 23:53:12 +00:00
parent 4cb398869d
commit 58de25e5bc
34 changed files with 2974 additions and 74 deletions

View File

@@ -0,0 +1,48 @@
use async_trait::async_trait;
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
use notes_domain::errors::{DomainError, DomainResult};
use notes_domain::ports::EmbeddingGenerator;
use std::sync::{Arc, Mutex};
pub struct FastEmbedAdapter {
model: Arc<Mutex<TextEmbedding>>,
}
impl FastEmbedAdapter {
pub fn new() -> DomainResult<Self> {
let mut options = InitOptions::default();
options.model_name = EmbeddingModel::AllMiniLML6V2;
options.show_download_progress = false;
let model = TextEmbedding::try_new(options).map_err(|e| {
DomainError::InfrastructureError(format!("Failed to init fastembed: {}", e))
})?;
Ok(Self {
model: Arc::new(Mutex::new(model)),
})
}
}
#[async_trait]
impl EmbeddingGenerator for FastEmbedAdapter {
async fn generate_embedding(&self, text: &str) -> DomainResult<Vec<f32>> {
let model = self.model.clone();
let text = text.to_string();
let embeddings = tokio::task::spawn_blocking(move || {
let mut model = model.lock().map_err(|e| format!("Lock error: {}", e))?;
model
.embed(vec![text], None)
.map_err(|e| format!("Embed error: {}", e))
})
.await
.map_err(|e| DomainError::InfrastructureError(format!("Join error: {}", e)))?
.map_err(|e| DomainError::InfrastructureError(e))?;
embeddings
.into_iter()
.next()
.ok_or_else(|| DomainError::InfrastructureError("No embedding generated".to_string()))
}
}

View File

@@ -0,0 +1 @@
pub mod fastembed;

View File

@@ -11,10 +11,66 @@ pub enum FactoryError {
Database(#[from] sqlx::Error),
#[error("Not implemented: {0}")]
NotImplemented(String),
#[error("Infrastructure error: {0}")]
Infrastructure(#[from] notes_domain::DomainError),
}
pub type FactoryResult<T> = Result<T, FactoryError>;
#[cfg(feature = "smart-features")]
#[derive(Debug, Clone)]
pub enum EmbeddingProvider {
FastEmbed,
// Ollama(String), // Url
// OpenAI(String), // ApiKey
}
#[cfg(feature = "smart-features")]
#[derive(Debug, Clone)]
pub enum VectorProvider {
Qdrant { url: String, collection: String },
// InMemory,
}
#[cfg(feature = "smart-features")]
pub async fn build_embedding_generator(
provider: &EmbeddingProvider,
) -> FactoryResult<Arc<dyn notes_domain::ports::EmbeddingGenerator>> {
match provider {
EmbeddingProvider::FastEmbed => {
let adapter = crate::embeddings::fastembed::FastEmbedAdapter::new()?;
Ok(Arc::new(adapter))
}
}
}
#[cfg(feature = "smart-features")]
pub async fn build_vector_store(
provider: &VectorProvider,
) -> FactoryResult<Arc<dyn notes_domain::ports::VectorStore>> {
match provider {
VectorProvider::Qdrant { url, collection } => {
let adapter = crate::vector::qdrant::QdrantVectorAdapter::new(url, collection)?;
adapter.create_collection_if_not_exists().await?;
Ok(Arc::new(adapter))
}
}
}
#[cfg(feature = "sqlite")]
pub async fn build_link_repository(
pool: &DatabasePool,
) -> FactoryResult<Arc<dyn notes_domain::ports::LinkRepository>> {
match pool {
DatabasePool::Sqlite(pool) => Ok(Arc::new(
crate::link_repository::SqliteLinkRepository::new(pool.clone()),
)),
_ => Err(FactoryError::NotImplemented(
"LinkRepostiory for non-sqlite".to_string(),
)),
}
}
pub async fn build_database_pool(db_config: &DatabaseConfig) -> FactoryResult<DatabasePool> {
if db_config.url.starts_with("sqlite:") {
#[cfg(feature = "sqlite")]

View File

@@ -15,20 +15,28 @@
//! - [`db::run_migrations`] - Run database migrations
pub mod db;
#[cfg(feature = "smart-features")]
pub mod embeddings;
pub mod factory;
#[cfg(feature = "sqlite")]
pub mod link_repository;
#[cfg(feature = "sqlite")]
pub mod note_repository;
pub mod session_store;
#[cfg(feature = "sqlite")]
pub mod tag_repository;
#[cfg(feature = "sqlite")]
pub mod user_repository;
#[cfg(feature = "smart-features")]
pub mod vector;
// Re-export for convenience
#[cfg(feature = "sqlite")]
pub use db::create_pool;
pub use db::{DatabaseConfig, run_migrations};
#[cfg(feature = "sqlite")]
pub use link_repository::SqliteLinkRepository;
#[cfg(feature = "sqlite")]
pub use note_repository::SqliteNoteRepository;
#[cfg(feature = "sqlite")]
pub use tag_repository::SqliteTagRepository;

View File

@@ -0,0 +1,111 @@
use async_trait::async_trait;
use sqlx::SqlitePool;
use uuid::Uuid;
use notes_domain::entities::NoteLink;
use notes_domain::errors::{DomainError, DomainResult};
use notes_domain::ports::LinkRepository;
pub struct SqliteLinkRepository {
pool: SqlitePool,
}
impl SqliteLinkRepository {
pub fn new(pool: SqlitePool) -> Self {
Self { pool }
}
}
#[async_trait]
impl LinkRepository for SqliteLinkRepository {
async fn save_links(&self, links: &[NoteLink]) -> DomainResult<()> {
let mut tx = self
.pool
.begin()
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
for link in links {
let source = link.source_note_id.to_string();
let target = link.target_note_id.to_string();
let created_at = link.created_at.to_rfc3339();
sqlx::query(
r#"
INSERT INTO note_links (source_note_id, target_note_id, score, created_at)
VALUES (?, ?, ?, ?)
ON CONFLICT(source_note_id, target_note_id) DO UPDATE SET
score = excluded.score,
created_at = excluded.created_at
"#,
)
.bind(source)
.bind(target)
.bind(link.score)
.bind(created_at)
.execute(&mut *tx)
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
}
tx.commit()
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
Ok(())
}
async fn delete_links_for_source(&self, source_note_id: Uuid) -> DomainResult<()> {
let source_str = source_note_id.to_string();
sqlx::query("DELETE FROM note_links WHERE source_note_id = ?")
.bind(source_str)
.execute(&self.pool)
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
Ok(())
}
async fn get_links_for_note(&self, source_note_id: Uuid) -> DomainResult<Vec<NoteLink>> {
let source_str = source_note_id.to_string();
// We select links where the note is the source
// TODO: Should we also include links where the note is the target?
// For now, let's stick to outgoing links as defined by the service logic.
// Actually, semantic similarity is symmetric, but we only save (A -> B) if we process A.
// Ideally we should look for both directions or enforce symmetry.
// Given current implementation saves A->B when A is processed, if B is processed it saves B->A.
// So just querying source_note_id is fine if we assume all notes are processed.
let links = sqlx::query_as::<_, SqliteNoteLink>(
"SELECT * FROM note_links WHERE source_note_id = ? ORDER BY score DESC",
)
.bind(source_str)
.fetch_all(&self.pool)
.await
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
Ok(links.into_iter().map(NoteLink::from).collect())
}
}
#[derive(sqlx::FromRow)]
struct SqliteNoteLink {
source_note_id: String,
target_note_id: String,
score: f32,
created_at: String, // Stored as ISO string
}
impl From<SqliteNoteLink> for NoteLink {
fn from(row: SqliteNoteLink) -> Self {
Self {
source_note_id: Uuid::parse_str(&row.source_note_id).unwrap_or_default(),
target_note_id: Uuid::parse_str(&row.target_note_id).unwrap_or_default(),
score: row.score,
created_at: chrono::DateTime::parse_from_rfc3339(&row.created_at)
.unwrap_or_default()
.with_timezone(&chrono::Utc),
}
}
}

View File

@@ -0,0 +1 @@
pub mod qdrant;

View File

@@ -0,0 +1,101 @@
use async_trait::async_trait;
use notes_domain::errors::{DomainError, DomainResult};
use notes_domain::ports::VectorStore;
use qdrant_client::Qdrant;
use qdrant_client::qdrant::{
CreateCollectionBuilder, Distance, PointStruct, SearchPointsBuilder, UpsertPointsBuilder,
Value, VectorParamsBuilder,
};
use std::collections::HashMap;
use std::sync::Arc;
use uuid::Uuid;
pub struct QdrantVectorAdapter {
client: Arc<Qdrant>,
collection_name: String,
}
impl QdrantVectorAdapter {
pub fn new(url: &str, collection_name: &str) -> DomainResult<Self> {
let client = Qdrant::from_url(url).build().map_err(|e| {
DomainError::InfrastructureError(format!("Failed to create Qdrant client: {}", e))
})?;
Ok(Self {
client: Arc::new(client),
collection_name: collection_name.to_string(),
})
}
pub async fn create_collection_if_not_exists(&self) -> DomainResult<()> {
if !self
.client
.collection_exists(&self.collection_name)
.await
.map_err(|e| {
DomainError::InfrastructureError(format!(
"Failed to check collection existence: {}",
e
))
})?
{
self.client
.create_collection(
CreateCollectionBuilder::new(self.collection_name.clone())
.vectors_config(VectorParamsBuilder::new(384, Distance::Cosine)),
)
.await
.map_err(|e| {
DomainError::InfrastructureError(format!("Failed to create collection: {}", e))
})?;
}
Ok(())
}
}
#[async_trait]
impl VectorStore for QdrantVectorAdapter {
async fn upsert(&self, id: Uuid, vector: &[f32]) -> DomainResult<()> {
let payload: HashMap<String, Value> = HashMap::new();
let point = PointStruct::new(id.to_string(), vector.to_vec(), payload);
let upsert_points = UpsertPointsBuilder::new(self.collection_name.clone(), vec![point]);
self.client
.upsert_points(upsert_points)
.await
.map_err(|e| DomainError::InfrastructureError(format!("Qdrant upsert error: {}", e)))?;
Ok(())
}
async fn find_similar(&self, vector: &[f32], limit: usize) -> DomainResult<Vec<(Uuid, f32)>> {
let search_points =
SearchPointsBuilder::new(self.collection_name.clone(), vector.to_vec(), limit as u64)
.with_payload(true);
let search_result = self
.client
.search_points(search_points)
.await
.map_err(|e| DomainError::InfrastructureError(format!("Qdrant search error: {}", e)))?;
let results = search_result
.result
.into_iter()
.filter_map(|point| {
let id = point.id?;
let uuid_str = match id.point_id_options? {
qdrant_client::qdrant::point_id::PointIdOptions::Uuid(u) => u,
_ => return None,
};
let uuid = Uuid::parse_str(&uuid_str).ok()?;
Some((uuid, point.score))
})
.collect();
Ok(results)
}
}