First batch of smart stuff
This commit is contained in:
@@ -4,9 +4,10 @@ version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[features]
|
||||
default = ["sqlite"]
|
||||
default = ["sqlite", "smart-features"]
|
||||
sqlite = ["sqlx/sqlite", "tower-sessions-sqlx-store/sqlite"]
|
||||
postgres = ["sqlx/postgres", "tower-sessions-sqlx-store/postgres"]
|
||||
smart-features = ["dep:fastembed", "dep:qdrant-client"]
|
||||
|
||||
[dependencies]
|
||||
notes-domain = { path = "../notes-domain" }
|
||||
@@ -19,3 +20,6 @@ tracing = "0.1"
|
||||
uuid = { version = "1.19.0", features = ["v4", "serde"] }
|
||||
tower-sessions = "0.14.0"
|
||||
tower-sessions-sqlx-store = { version = "0.15.0", default-features = false }
|
||||
fastembed = { version = "5.4", optional = true }
|
||||
qdrant-client = { version = "1.16", optional = true }
|
||||
serde_json = "1.0"
|
||||
|
||||
48
notes-infra/src/embeddings/fastembed.rs
Normal file
48
notes-infra/src/embeddings/fastembed.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use async_trait::async_trait;
|
||||
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
|
||||
use notes_domain::errors::{DomainError, DomainResult};
|
||||
use notes_domain::ports::EmbeddingGenerator;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
pub struct FastEmbedAdapter {
|
||||
model: Arc<Mutex<TextEmbedding>>,
|
||||
}
|
||||
|
||||
impl FastEmbedAdapter {
|
||||
pub fn new() -> DomainResult<Self> {
|
||||
let mut options = InitOptions::default();
|
||||
options.model_name = EmbeddingModel::AllMiniLML6V2;
|
||||
options.show_download_progress = false;
|
||||
|
||||
let model = TextEmbedding::try_new(options).map_err(|e| {
|
||||
DomainError::InfrastructureError(format!("Failed to init fastembed: {}", e))
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
model: Arc::new(Mutex::new(model)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EmbeddingGenerator for FastEmbedAdapter {
|
||||
async fn generate_embedding(&self, text: &str) -> DomainResult<Vec<f32>> {
|
||||
let model = self.model.clone();
|
||||
let text = text.to_string();
|
||||
|
||||
let embeddings = tokio::task::spawn_blocking(move || {
|
||||
let mut model = model.lock().map_err(|e| format!("Lock error: {}", e))?;
|
||||
model
|
||||
.embed(vec![text], None)
|
||||
.map_err(|e| format!("Embed error: {}", e))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| DomainError::InfrastructureError(format!("Join error: {}", e)))?
|
||||
.map_err(|e| DomainError::InfrastructureError(e))?;
|
||||
|
||||
embeddings
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| DomainError::InfrastructureError("No embedding generated".to_string()))
|
||||
}
|
||||
}
|
||||
1
notes-infra/src/embeddings/mod.rs
Normal file
1
notes-infra/src/embeddings/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod fastembed;
|
||||
@@ -11,10 +11,66 @@ pub enum FactoryError {
|
||||
Database(#[from] sqlx::Error),
|
||||
#[error("Not implemented: {0}")]
|
||||
NotImplemented(String),
|
||||
#[error("Infrastructure error: {0}")]
|
||||
Infrastructure(#[from] notes_domain::DomainError),
|
||||
}
|
||||
|
||||
pub type FactoryResult<T> = Result<T, FactoryError>;
|
||||
|
||||
#[cfg(feature = "smart-features")]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum EmbeddingProvider {
|
||||
FastEmbed,
|
||||
// Ollama(String), // Url
|
||||
// OpenAI(String), // ApiKey
|
||||
}
|
||||
|
||||
#[cfg(feature = "smart-features")]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum VectorProvider {
|
||||
Qdrant { url: String, collection: String },
|
||||
// InMemory,
|
||||
}
|
||||
|
||||
#[cfg(feature = "smart-features")]
|
||||
pub async fn build_embedding_generator(
|
||||
provider: &EmbeddingProvider,
|
||||
) -> FactoryResult<Arc<dyn notes_domain::ports::EmbeddingGenerator>> {
|
||||
match provider {
|
||||
EmbeddingProvider::FastEmbed => {
|
||||
let adapter = crate::embeddings::fastembed::FastEmbedAdapter::new()?;
|
||||
Ok(Arc::new(adapter))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "smart-features")]
|
||||
pub async fn build_vector_store(
|
||||
provider: &VectorProvider,
|
||||
) -> FactoryResult<Arc<dyn notes_domain::ports::VectorStore>> {
|
||||
match provider {
|
||||
VectorProvider::Qdrant { url, collection } => {
|
||||
let adapter = crate::vector::qdrant::QdrantVectorAdapter::new(url, collection)?;
|
||||
adapter.create_collection_if_not_exists().await?;
|
||||
Ok(Arc::new(adapter))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub async fn build_link_repository(
|
||||
pool: &DatabasePool,
|
||||
) -> FactoryResult<Arc<dyn notes_domain::ports::LinkRepository>> {
|
||||
match pool {
|
||||
DatabasePool::Sqlite(pool) => Ok(Arc::new(
|
||||
crate::link_repository::SqliteLinkRepository::new(pool.clone()),
|
||||
)),
|
||||
_ => Err(FactoryError::NotImplemented(
|
||||
"LinkRepostiory for non-sqlite".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn build_database_pool(db_config: &DatabaseConfig) -> FactoryResult<DatabasePool> {
|
||||
if db_config.url.starts_with("sqlite:") {
|
||||
#[cfg(feature = "sqlite")]
|
||||
|
||||
@@ -15,20 +15,28 @@
|
||||
//! - [`db::run_migrations`] - Run database migrations
|
||||
|
||||
pub mod db;
|
||||
#[cfg(feature = "smart-features")]
|
||||
pub mod embeddings;
|
||||
pub mod factory;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod link_repository;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod note_repository;
|
||||
pub mod session_store;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod tag_repository;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub mod user_repository;
|
||||
#[cfg(feature = "smart-features")]
|
||||
pub mod vector;
|
||||
|
||||
// Re-export for convenience
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub use db::create_pool;
|
||||
pub use db::{DatabaseConfig, run_migrations};
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub use link_repository::SqliteLinkRepository;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub use note_repository::SqliteNoteRepository;
|
||||
#[cfg(feature = "sqlite")]
|
||||
pub use tag_repository::SqliteTagRepository;
|
||||
|
||||
111
notes-infra/src/link_repository.rs
Normal file
111
notes-infra/src/link_repository.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
use async_trait::async_trait;
|
||||
use sqlx::SqlitePool;
|
||||
use uuid::Uuid;
|
||||
|
||||
use notes_domain::entities::NoteLink;
|
||||
use notes_domain::errors::{DomainError, DomainResult};
|
||||
use notes_domain::ports::LinkRepository;
|
||||
|
||||
pub struct SqliteLinkRepository {
|
||||
pool: SqlitePool,
|
||||
}
|
||||
|
||||
impl SqliteLinkRepository {
|
||||
pub fn new(pool: SqlitePool) -> Self {
|
||||
Self { pool }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LinkRepository for SqliteLinkRepository {
|
||||
async fn save_links(&self, links: &[NoteLink]) -> DomainResult<()> {
|
||||
let mut tx = self
|
||||
.pool
|
||||
.begin()
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
|
||||
for link in links {
|
||||
let source = link.source_note_id.to_string();
|
||||
let target = link.target_note_id.to_string();
|
||||
let created_at = link.created_at.to_rfc3339();
|
||||
|
||||
sqlx::query(
|
||||
r#"
|
||||
INSERT INTO note_links (source_note_id, target_note_id, score, created_at)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(source_note_id, target_note_id) DO UPDATE SET
|
||||
score = excluded.score,
|
||||
created_at = excluded.created_at
|
||||
"#,
|
||||
)
|
||||
.bind(source)
|
||||
.bind(target)
|
||||
.bind(link.score)
|
||||
.bind(created_at)
|
||||
.execute(&mut *tx)
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
}
|
||||
|
||||
tx.commit()
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn delete_links_for_source(&self, source_note_id: Uuid) -> DomainResult<()> {
|
||||
let source_str = source_note_id.to_string();
|
||||
sqlx::query("DELETE FROM note_links WHERE source_note_id = ?")
|
||||
.bind(source_str)
|
||||
.execute(&self.pool)
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_links_for_note(&self, source_note_id: Uuid) -> DomainResult<Vec<NoteLink>> {
|
||||
let source_str = source_note_id.to_string();
|
||||
|
||||
// We select links where the note is the source
|
||||
// TODO: Should we also include links where the note is the target?
|
||||
// For now, let's stick to outgoing links as defined by the service logic.
|
||||
// Actually, semantic similarity is symmetric, but we only save (A -> B) if we process A.
|
||||
// Ideally we should look for both directions or enforce symmetry.
|
||||
// Given current implementation saves A->B when A is processed, if B is processed it saves B->A.
|
||||
// So just querying source_note_id is fine if we assume all notes are processed.
|
||||
|
||||
let links = sqlx::query_as::<_, SqliteNoteLink>(
|
||||
"SELECT * FROM note_links WHERE source_note_id = ? ORDER BY score DESC",
|
||||
)
|
||||
.bind(source_str)
|
||||
.fetch_all(&self.pool)
|
||||
.await
|
||||
.map_err(|e| DomainError::RepositoryError(e.to_string()))?;
|
||||
|
||||
Ok(links.into_iter().map(NoteLink::from).collect())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(sqlx::FromRow)]
|
||||
struct SqliteNoteLink {
|
||||
source_note_id: String,
|
||||
target_note_id: String,
|
||||
score: f32,
|
||||
created_at: String, // Stored as ISO string
|
||||
}
|
||||
|
||||
impl From<SqliteNoteLink> for NoteLink {
|
||||
fn from(row: SqliteNoteLink) -> Self {
|
||||
Self {
|
||||
source_note_id: Uuid::parse_str(&row.source_note_id).unwrap_or_default(),
|
||||
target_note_id: Uuid::parse_str(&row.target_note_id).unwrap_or_default(),
|
||||
score: row.score,
|
||||
created_at: chrono::DateTime::parse_from_rfc3339(&row.created_at)
|
||||
.unwrap_or_default()
|
||||
.with_timezone(&chrono::Utc),
|
||||
}
|
||||
}
|
||||
}
|
||||
1
notes-infra/src/vector/mod.rs
Normal file
1
notes-infra/src/vector/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod qdrant;
|
||||
101
notes-infra/src/vector/qdrant.rs
Normal file
101
notes-infra/src/vector/qdrant.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
use async_trait::async_trait;
|
||||
use notes_domain::errors::{DomainError, DomainResult};
|
||||
use notes_domain::ports::VectorStore;
|
||||
use qdrant_client::Qdrant;
|
||||
use qdrant_client::qdrant::{
|
||||
CreateCollectionBuilder, Distance, PointStruct, SearchPointsBuilder, UpsertPointsBuilder,
|
||||
Value, VectorParamsBuilder,
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct QdrantVectorAdapter {
|
||||
client: Arc<Qdrant>,
|
||||
collection_name: String,
|
||||
}
|
||||
|
||||
impl QdrantVectorAdapter {
|
||||
pub fn new(url: &str, collection_name: &str) -> DomainResult<Self> {
|
||||
let client = Qdrant::from_url(url).build().map_err(|e| {
|
||||
DomainError::InfrastructureError(format!("Failed to create Qdrant client: {}", e))
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
client: Arc::new(client),
|
||||
collection_name: collection_name.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn create_collection_if_not_exists(&self) -> DomainResult<()> {
|
||||
if !self
|
||||
.client
|
||||
.collection_exists(&self.collection_name)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
DomainError::InfrastructureError(format!(
|
||||
"Failed to check collection existence: {}",
|
||||
e
|
||||
))
|
||||
})?
|
||||
{
|
||||
self.client
|
||||
.create_collection(
|
||||
CreateCollectionBuilder::new(self.collection_name.clone())
|
||||
.vectors_config(VectorParamsBuilder::new(384, Distance::Cosine)),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
DomainError::InfrastructureError(format!("Failed to create collection: {}", e))
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl VectorStore for QdrantVectorAdapter {
|
||||
async fn upsert(&self, id: Uuid, vector: &[f32]) -> DomainResult<()> {
|
||||
let payload: HashMap<String, Value> = HashMap::new();
|
||||
|
||||
let point = PointStruct::new(id.to_string(), vector.to_vec(), payload);
|
||||
|
||||
let upsert_points = UpsertPointsBuilder::new(self.collection_name.clone(), vec![point]);
|
||||
|
||||
self.client
|
||||
.upsert_points(upsert_points)
|
||||
.await
|
||||
.map_err(|e| DomainError::InfrastructureError(format!("Qdrant upsert error: {}", e)))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn find_similar(&self, vector: &[f32], limit: usize) -> DomainResult<Vec<(Uuid, f32)>> {
|
||||
let search_points =
|
||||
SearchPointsBuilder::new(self.collection_name.clone(), vector.to_vec(), limit as u64)
|
||||
.with_payload(true);
|
||||
|
||||
let search_result = self
|
||||
.client
|
||||
.search_points(search_points)
|
||||
.await
|
||||
.map_err(|e| DomainError::InfrastructureError(format!("Qdrant search error: {}", e)))?;
|
||||
|
||||
let results = search_result
|
||||
.result
|
||||
.into_iter()
|
||||
.filter_map(|point| {
|
||||
let id = point.id?;
|
||||
let uuid_str = match id.point_id_options? {
|
||||
qdrant_client::qdrant::point_id::PointIdOptions::Uuid(u) => u,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let uuid = Uuid::parse_str(&uuid_str).ok()?;
|
||||
Some((uuid, point.score))
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user