feat: extensible search engine with person entities (FTS5/tsvector)

This commit is contained in:
2026-05-12 18:45:24 +02:00
parent 763d622601
commit c6770659c5
45 changed files with 2421 additions and 86 deletions

View File

@@ -76,3 +76,8 @@ pub struct UpdateProfileCommand {
pub avatar_bytes: Option<Vec<u8>>,
pub avatar_content_type: Option<String>,
}
pub struct EnrichMovieCommand {
pub movie_id: domain::value_objects::MovieId,
pub profile: domain::models::MovieProfile,
}

View File

@@ -5,6 +5,7 @@ use domain::ports::{
ImageStorage,
ImportProfileRepository, ImportSessionRepository,
MetadataClient, MovieProfileRepository, MovieRepository, PasswordHasher, PosterFetcherClient,
PersonCommand, PersonQuery, SearchCommand, SearchPort,
ReviewRepository, StatsRepository, UserRepository,
};
@@ -28,5 +29,9 @@ pub struct AppContext {
pub import_session_repository: Arc<dyn ImportSessionRepository>,
pub import_profile_repository: Arc<dyn ImportProfileRepository>,
pub movie_profile_repository: Arc<dyn MovieProfileRepository>,
pub person_command: Arc<dyn PersonCommand>,
pub person_query: Arc<dyn PersonQuery>,
pub search_port: Arc<dyn SearchPort>,
pub search_command: Arc<dyn SearchCommand>,
pub config: AppConfig,
}

View File

@@ -7,3 +7,6 @@ pub mod movie_resolver;
pub mod ports;
pub mod queries;
pub mod use_cases;
pub mod search_cleanup;
pub use search_cleanup::SearchCleanupHandler;

View File

@@ -0,0 +1,34 @@
use std::sync::Arc;
use async_trait::async_trait;
use domain::{
errors::DomainError,
events::DomainEvent,
models::EntityType,
ports::{EventHandler, SearchCommand},
};
pub struct SearchCleanupHandler {
search_command: Arc<dyn SearchCommand>,
}
impl SearchCleanupHandler {
pub fn new(search_command: Arc<dyn SearchCommand>) -> Self {
Self { search_command }
}
}
#[async_trait]
impl EventHandler for SearchCleanupHandler {
async fn handle(&self, event: &DomainEvent) -> Result<(), DomainError> {
let movie_id = match event {
DomainEvent::MovieDeleted { movie_id, .. } => movie_id.value().to_string(),
_ => return Ok(()),
};
if let Err(e) = self.search_command.remove(EntityType::Movie, &movie_id).await {
tracing::warn!("search cleanup failed for movie {movie_id}: {e}");
}
Ok(())
}
}

View File

@@ -0,0 +1,96 @@
use std::collections::HashMap;
use std::sync::Arc;
use domain::{
errors::DomainError,
models::{
CastMember, CrewMember, ExternalPersonId, IndexableDocument, Person, PersonId,
},
ports::{MovieProfileRepository, MovieRepository, PersonCommand, SearchCommand},
};
use crate::commands::EnrichMovieCommand;
pub async fn execute(
movie_repository: &Arc<dyn MovieRepository>,
profile_repository: &Arc<dyn MovieProfileRepository>,
person_command: &Arc<dyn PersonCommand>,
search_command: &Arc<dyn SearchCommand>,
cmd: EnrichMovieCommand,
) -> Result<(), DomainError> {
// 1. Persist the enriched profile (also handles movie_cast, movie_crew, genres, keywords)
profile_repository.upsert(&cmd.profile).await?;
// 2. Upsert persons extracted from cast + crew (no reads — only upsert)
let persons = extract_persons(&cmd.profile.cast, &cmd.profile.crew);
if !persons.is_empty() {
person_command.upsert_batch(&persons).await?;
}
// 3. Fetch the movie for the search index document
let Some(movie) = movie_repository.get_movie_by_id(&cmd.movie_id).await? else {
tracing::warn!(movie_id = %cmd.movie_id.value(), "enrich_movie: movie not found after profile upsert");
return Ok(());
};
// 4. Index the movie in search
search_command
.index(IndexableDocument::Movie {
id: cmd.movie_id.clone(),
movie: Box::new(movie),
profile: Some(Box::new(cmd.profile.clone())),
})
.await?;
// 5. Index each unique person in search (no reads — persons built from in-memory data)
for person in &persons {
search_command
.index(IndexableDocument::Person {
id: person.id().clone(),
person: Box::new(person.clone()),
})
.await?;
}
tracing::info!(
movie_id = %cmd.movie_id.value(),
persons = persons.len(),
"enrich_movie: profile stored and search index updated"
);
Ok(())
}
/// Build unique Person values from cast and crew.
/// Uses deterministic UUIDv5 so the same tmdb_person_id always maps to the same PersonId.
/// No DB reads — persons are built entirely from in-memory TMDb data.
fn extract_persons(cast: &[CastMember], crew: &[CrewMember]) -> Vec<Person> {
let mut seen: HashMap<u64, Person> = HashMap::new();
for member in cast {
seen.entry(member.tmdb_person_id).or_insert_with(|| {
let ext = ExternalPersonId::new(format!("tmdb:{}", member.tmdb_person_id));
Person::new(
PersonId::from_external(&ext),
ext,
member.name.clone(),
Some("Acting".to_string()),
member.profile_path.clone(),
)
});
}
for member in crew {
seen.entry(member.tmdb_person_id).or_insert_with(|| {
let ext = ExternalPersonId::new(format!("tmdb:{}", member.tmdb_person_id));
Person::new(
PersonId::from_external(&ext),
ext,
member.name.clone(),
Some(member.department.clone()),
member.profile_path.clone(),
)
});
}
seen.into_values().collect()
}

View File

@@ -0,0 +1,6 @@
use domain::{errors::DomainError, models::{Person, PersonId}};
use crate::context::AppContext;
pub async fn execute(ctx: &AppContext, id: PersonId) -> Result<Option<Person>, DomainError> {
ctx.person_query.get_by_id(&id).await
}

View File

@@ -0,0 +1,6 @@
use domain::{errors::DomainError, models::{PersonCredits, PersonId}};
use crate::context::AppContext;
pub async fn execute(ctx: &AppContext, id: PersonId) -> Result<PersonCredits, DomainError> {
ctx.person_query.get_credits(&id).await
}

View File

@@ -1,3 +1,4 @@
pub mod enrich_movie;
pub mod apply_import_mapping;
pub mod apply_import_profile;
pub mod cleanup_expired_import_sessions;
@@ -12,11 +13,14 @@ pub mod get_activity_feed;
pub mod get_diary;
pub mod get_movie_social_page;
pub mod get_movies;
pub mod get_person;
pub mod get_person_credits;
pub mod get_review_history;
pub mod get_user_profile;
pub mod get_users;
pub mod log_review;
pub mod login;
pub mod register;
pub mod search;
pub mod sync_poster;
pub mod update_profile;

View File

@@ -0,0 +1,6 @@
use domain::{errors::DomainError, models::{SearchQuery, SearchResults}};
use crate::context::AppContext;
pub async fn execute(ctx: &AppContext, query: SearchQuery) -> Result<SearchResults, DomainError> {
ctx.search_port.search(&query).await
}