feat: extensible search engine with person entities (FTS5/tsvector)

This commit is contained in:
2026-05-12 18:45:24 +02:00
parent 763d622601
commit c6770659c5
45 changed files with 2421 additions and 86 deletions

View File

@@ -12,6 +12,8 @@ pub mod collections;
pub mod import;
pub mod import_session;
pub mod import_profile;
pub mod person;
pub mod search;
pub use import::{
AnnotatedRow, DomainField, FieldMapping, FileFormat, ImportError,
@@ -19,6 +21,11 @@ pub use import::{
};
pub use import_session::ImportSession;
pub use import_profile::ImportProfile;
pub use person::{CastCredit, CrewCredit, ExternalPersonId, Person, PersonCredits, PersonId};
pub use search::{
EntityType, IndexableDocument, MovieSearchHit, PersonSearchHit,
SearchFilters, SearchQuery, SearchResults,
};
#[derive(Clone, Debug, Default)]
pub enum SortDirection {

View File

@@ -0,0 +1,107 @@
use uuid::Uuid;
use crate::models::MovieId;
#[derive(Clone, Debug, PartialEq)]
pub struct PersonId(Uuid);
impl PersonId {
pub fn from_uuid(uuid: Uuid) -> Self {
Self(uuid)
}
/// Deterministic UUIDv5 from an external person ID string.
/// "tmdb:12345" always maps to the same PersonId.
pub fn from_external(external_id: &ExternalPersonId) -> Self {
Self(Uuid::new_v5(&Uuid::NAMESPACE_URL, external_id.0.as_bytes()))
}
pub fn value(&self) -> Uuid {
self.0
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct ExternalPersonId(String);
impl ExternalPersonId {
pub fn new(s: impl Into<String>) -> Self {
Self(s.into())
}
pub fn value(&self) -> &str {
&self.0
}
/// Parse the TMDb numeric ID from "tmdb:12345". Returns None for other formats.
pub fn tmdb_id(&self) -> Option<i64> {
self.0.strip_prefix("tmdb:").and_then(|s| s.parse().ok())
}
}
#[derive(Clone, Debug)]
pub struct Person {
id: PersonId,
external_id: ExternalPersonId,
name: String,
known_for_department: Option<String>,
profile_path: Option<String>,
}
impl Person {
pub fn new(
id: PersonId,
external_id: ExternalPersonId,
name: String,
known_for_department: Option<String>,
profile_path: Option<String>,
) -> Self {
Self { id, external_id, name, known_for_department, profile_path }
}
pub fn id(&self) -> &PersonId {
&self.id
}
pub fn external_id(&self) -> &ExternalPersonId {
&self.external_id
}
pub fn name(&self) -> &str {
&self.name
}
pub fn known_for_department(&self) -> Option<&str> {
self.known_for_department.as_deref()
}
pub fn profile_path(&self) -> Option<&str> {
self.profile_path.as_deref()
}
}
#[derive(Clone, Debug)]
pub struct PersonCredits {
pub person: Person,
pub cast: Vec<CastCredit>,
pub crew: Vec<CrewCredit>,
}
#[derive(Clone, Debug)]
pub struct CastCredit {
pub movie_id: MovieId,
pub title: String,
pub release_year: Option<u16>,
pub character: String,
pub poster_path: Option<String>,
}
#[derive(Clone, Debug)]
pub struct CrewCredit {
pub movie_id: MovieId,
pub title: String,
pub release_year: Option<u16>,
pub job: String,
pub department: String,
pub poster_path: Option<String>,
}

View File

@@ -0,0 +1,68 @@
use crate::models::{
Movie, MovieId, MovieProfile, Person, PersonId,
collections::{PageParams, Paginated},
};
#[derive(Clone, Debug, Default)]
pub struct SearchQuery {
pub text: Option<String>,
pub filters: SearchFilters,
pub page: PageParams,
}
#[derive(Clone, Debug, Default)]
pub struct SearchFilters {
pub genre: Option<String>,
pub year: Option<u16>,
pub person_id: Option<PersonId>,
pub department: Option<String>,
pub language: Option<String>,
}
#[derive(Clone, Debug)]
pub struct SearchResults {
pub movies: Paginated<MovieSearchHit>,
pub people: Paginated<PersonSearchHit>,
}
#[derive(Clone, Debug)]
pub struct MovieSearchHit {
pub movie_id: MovieId,
pub title: String,
pub release_year: Option<u16>,
pub director: Option<String>,
pub poster_path: Option<String>,
pub genres: Vec<String>,
}
#[derive(Clone, Debug)]
pub struct PersonSearchHit {
pub person_id: PersonId,
pub name: String,
pub known_for_department: Option<String>,
pub profile_path: Option<String>,
/// Top movie titles this person is known for — populated at query time
/// by joining relational tables, never from the index.
pub known_for_titles: Vec<String>,
}
/// Document submitted to the search index.
/// Add a new variant here to make a new entity type searchable — the port never changes.
pub enum IndexableDocument {
Movie {
id: MovieId,
movie: Box<Movie>,
profile: Option<Box<MovieProfile>>,
},
Person {
id: PersonId,
person: Box<Person>,
// known_for_titles intentionally absent — no reads inside a command flow
},
}
#[derive(Clone, Debug, PartialEq)]
pub enum EntityType {
Movie,
Person,
}