feat: extensible search engine with person entities (FTS5/tsvector)

This commit is contained in:
2026-05-12 18:45:24 +02:00
parent 763d622601
commit c6770659c5
45 changed files with 2421 additions and 86 deletions

View File

@@ -12,6 +12,8 @@ pub mod collections;
pub mod import;
pub mod import_session;
pub mod import_profile;
pub mod person;
pub mod search;
pub use import::{
AnnotatedRow, DomainField, FieldMapping, FileFormat, ImportError,
@@ -19,6 +21,11 @@ pub use import::{
};
pub use import_session::ImportSession;
pub use import_profile::ImportProfile;
pub use person::{CastCredit, CrewCredit, ExternalPersonId, Person, PersonCredits, PersonId};
pub use search::{
EntityType, IndexableDocument, MovieSearchHit, PersonSearchHit,
SearchFilters, SearchQuery, SearchResults,
};
#[derive(Clone, Debug, Default)]
pub enum SortDirection {

View File

@@ -0,0 +1,107 @@
use uuid::Uuid;
use crate::models::MovieId;
#[derive(Clone, Debug, PartialEq)]
pub struct PersonId(Uuid);
impl PersonId {
pub fn from_uuid(uuid: Uuid) -> Self {
Self(uuid)
}
/// Deterministic UUIDv5 from an external person ID string.
/// "tmdb:12345" always maps to the same PersonId.
pub fn from_external(external_id: &ExternalPersonId) -> Self {
Self(Uuid::new_v5(&Uuid::NAMESPACE_URL, external_id.0.as_bytes()))
}
pub fn value(&self) -> Uuid {
self.0
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ExternalPersonId(String);
impl ExternalPersonId {
pub fn new(s: impl Into<String>) -> Self {
Self(s.into())
}
pub fn value(&self) -> &str {
&self.0
}
/// Parse the TMDb numeric ID from "tmdb:12345". Returns None for other formats.
pub fn tmdb_id(&self) -> Option<i64> {
self.0.strip_prefix("tmdb:").and_then(|s| s.parse().ok())
}
}
#[derive(Clone, Debug)]
pub struct Person {
id: PersonId,
external_id: ExternalPersonId,
name: String,
known_for_department: Option<String>,
profile_path: Option<String>,
}
impl Person {
pub fn new(
id: PersonId,
external_id: ExternalPersonId,
name: String,
known_for_department: Option<String>,
profile_path: Option<String>,
) -> Self {
Self { id, external_id, name, known_for_department, profile_path }
}
pub fn id(&self) -> &PersonId {
&self.id
}
pub fn external_id(&self) -> &ExternalPersonId {
&self.external_id
}
pub fn name(&self) -> &str {
&self.name
}
pub fn known_for_department(&self) -> Option<&str> {
self.known_for_department.as_deref()
}
pub fn profile_path(&self) -> Option<&str> {
self.profile_path.as_deref()
}
}
#[derive(Clone, Debug)]
pub struct PersonCredits {
pub person: Person,
pub cast: Vec<CastCredit>,
pub crew: Vec<CrewCredit>,
}
#[derive(Clone, Debug)]
pub struct CastCredit {
pub movie_id: MovieId,
pub title: String,
pub release_year: Option<u16>,
pub character: String,
pub poster_path: Option<String>,
}
#[derive(Clone, Debug)]
pub struct CrewCredit {
pub movie_id: MovieId,
pub title: String,
pub release_year: Option<u16>,
pub job: String,
pub department: String,
pub poster_path: Option<String>,
}

View File

@@ -0,0 +1,68 @@
use crate::models::{
Movie, MovieId, MovieProfile, Person, PersonId,
collections::{PageParams, Paginated},
};
#[derive(Clone, Debug, Default)]
pub struct SearchQuery {
pub text: Option<String>,
pub filters: SearchFilters,
pub page: PageParams,
}
#[derive(Clone, Debug, Default)]
pub struct SearchFilters {
pub genre: Option<String>,
pub year: Option<u16>,
pub person_id: Option<PersonId>,
pub department: Option<String>,
pub language: Option<String>,
}
#[derive(Clone, Debug)]
pub struct SearchResults {
pub movies: Paginated<MovieSearchHit>,
pub people: Paginated<PersonSearchHit>,
}
#[derive(Clone, Debug)]
pub struct MovieSearchHit {
pub movie_id: MovieId,
pub title: String,
pub release_year: Option<u16>,
pub director: Option<String>,
pub poster_path: Option<String>,
pub genres: Vec<String>,
}
#[derive(Clone, Debug)]
pub struct PersonSearchHit {
pub person_id: PersonId,
pub name: String,
pub known_for_department: Option<String>,
pub profile_path: Option<String>,
/// Top movie titles this person is known for — populated at query time
/// by joining relational tables, never from the index.
pub known_for_titles: Vec<String>,
}
/// Document submitted to the search index.
/// Add a new variant here to make a new entity type searchable — the port never changes.
pub enum IndexableDocument {
Movie {
id: MovieId,
movie: Box<Movie>,
profile: Option<Box<MovieProfile>>,
},
Person {
id: PersonId,
person: Box<Person>,
// known_for_titles intentionally absent — no reads inside a command flow
},
}
#[derive(Clone, Debug, PartialEq)]
pub enum EntityType {
Movie,
Person,
}

View File

@@ -8,6 +8,8 @@ use crate::{
AnnotatedRow, DiaryEntry, DiaryFilter, ExportFormat, FeedEntry, FieldMapping,
FileFormat, ImportError, ImportProfile, ImportSession, Movie, MovieProfile, MovieStats,
ParsedFile, Review, ReviewHistory, User, UserStats, UserSummary, UserTrends,
EntityType, ExternalPersonId, IndexableDocument, Person, PersonCredits,
PersonId, SearchQuery, SearchResults,
collections::{self, PageParams, Paginated},
},
value_objects::{
@@ -274,3 +276,34 @@ pub trait ImageRefCommand: Send + Sync {
pub trait ImageRefQuery: Send + Sync {
async fn list_keys(&self) -> Result<Vec<String>, DomainError>;
}
/// Write port — mutates the persons table. No reads.
#[async_trait]
pub trait PersonCommand: Send + Sync {
/// Upsert a batch of persons. Uses INSERT OR REPLACE (SQLite) / ON CONFLICT DO UPDATE (Postgres).
async fn upsert_batch(&self, persons: &[Person]) -> Result<(), DomainError>;
}
/// Read port — queries persons and credits. No mutations.
#[async_trait]
pub trait PersonQuery: Send + Sync {
async fn get_by_id(&self, id: &PersonId) -> Result<Option<Person>, DomainError>;
async fn get_by_external_id(&self, id: &ExternalPersonId) -> Result<Option<Person>, DomainError>;
/// Returns the person's full cast and crew credit history across all indexed movies.
async fn get_credits(&self, id: &PersonId) -> Result<PersonCredits, DomainError>;
}
/// Read port — executes search queries. No mutations.
#[async_trait]
pub trait SearchPort: Send + Sync {
async fn search(&self, query: &SearchQuery) -> Result<SearchResults, DomainError>;
}
/// Write port — manages the search index. No reads.
#[async_trait]
pub trait SearchCommand: Send + Sync {
/// Add or replace a document in the search index.
async fn index(&self, doc: IndexableDocument) -> Result<(), DomainError>;
/// Remove a document from the search index by entity type and internal ID string.
async fn remove(&self, entity_type: EntityType, id: &str) -> Result<(), DomainError>;
}