feat: extensible search engine with person entities (FTS5/tsvector)

This commit is contained in:
2026-05-12 18:45:24 +02:00
parent 763d622601
commit c6770659c5
45 changed files with 2421 additions and 86 deletions

View File

@@ -0,0 +1,356 @@
use std::sync::Arc;
use async_trait::async_trait;
use domain::{
errors::DomainError,
models::{
EntityType, IndexableDocument, MovieSearchHit, PersonSearchHit,
SearchQuery, SearchResults,
collections::Paginated,
},
models::PersonId,
value_objects::MovieId,
ports::{SearchCommand, SearchPort},
};
use sqlx::SqlitePool;
pub struct SqliteSearchAdapter {
pool: SqlitePool,
}
impl SqliteSearchAdapter {
pub fn new(pool: SqlitePool) -> Self {
Self { pool }
}
}
pub fn create_search_adapter(pool: SqlitePool) -> (Arc<dyn SearchCommand>, Arc<dyn SearchPort>) {
let adapter = Arc::new(SqliteSearchAdapter::new(pool));
(Arc::clone(&adapter) as Arc<dyn SearchCommand>, adapter as Arc<dyn SearchPort>)
}
fn map_err(e: sqlx::Error) -> DomainError {
DomainError::InfrastructureError(e.to_string())
}
#[async_trait]
impl SearchCommand for SqliteSearchAdapter {
async fn index(&self, doc: IndexableDocument) -> Result<(), DomainError> {
match doc {
IndexableDocument::Movie { id, movie, profile } => {
let movie_id = id.value().to_string();
let title = movie.title().value().to_string();
let director = movie.director().unwrap_or("").to_string();
let release_year = movie.release_year().value() as i64;
let (overview, genres, keywords, cast_names, crew_names, language) =
match profile.as_deref() {
Some(p) => (
p.overview.clone().unwrap_or_default(),
p.genres.iter().map(|g| g.name.as_str()).collect::<Vec<_>>().join(" "),
p.keywords.iter().map(|k| k.name.as_str()).collect::<Vec<_>>().join(" "),
p.cast.iter().map(|c| c.name.as_str()).collect::<Vec<_>>().join(" "),
p.crew.iter().map(|c| c.name.as_str()).collect::<Vec<_>>().join(" "),
p.original_language.clone().unwrap_or_default(),
),
None => (String::new(), String::new(), String::new(), String::new(), String::new(), String::new()),
};
sqlx::query(
"DELETE FROM movies_fts WHERE rowid = (SELECT rowid FROM movies_fts WHERE movie_id = ? LIMIT 1)",
)
.bind(&movie_id)
.execute(&self.pool)
.await
.map_err(map_err)?;
sqlx::query(
"INSERT INTO movies_fts(movie_id, title, director, overview, genres, keywords, cast_names, crew_names, release_year, language)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
)
.bind(&movie_id)
.bind(&title)
.bind(&director)
.bind(&overview)
.bind(&genres)
.bind(&keywords)
.bind(&cast_names)
.bind(&crew_names)
.bind(release_year)
.bind(&language)
.execute(&self.pool)
.await
.map_err(map_err)?;
Ok(())
}
IndexableDocument::Person { id, person } => {
let person_id = id.value().to_string();
sqlx::query(
"DELETE FROM people_fts WHERE rowid = (SELECT rowid FROM people_fts WHERE person_id = ? LIMIT 1)",
)
.bind(&person_id)
.execute(&self.pool)
.await
.map_err(map_err)?;
sqlx::query(
"INSERT INTO people_fts(person_id, name, known_for_department) VALUES (?, ?, ?)",
)
.bind(&person_id)
.bind(person.name())
.bind(person.known_for_department())
.execute(&self.pool)
.await
.map_err(map_err)?;
Ok(())
}
}
}
async fn remove(&self, entity_type: EntityType, id: &str) -> Result<(), DomainError> {
match entity_type {
EntityType::Movie => {
sqlx::query(
"DELETE FROM movies_fts WHERE rowid = (SELECT rowid FROM movies_fts WHERE movie_id = ? LIMIT 1)",
)
.bind(id)
.execute(&self.pool)
.await
.map_err(map_err)?;
}
EntityType::Person => {
sqlx::query(
"DELETE FROM people_fts WHERE rowid = (SELECT rowid FROM people_fts WHERE person_id = ? LIMIT 1)",
)
.bind(id)
.execute(&self.pool)
.await
.map_err(map_err)?;
}
}
Ok(())
}
}
#[async_trait]
impl SearchPort for SqliteSearchAdapter {
async fn search(&self, query: &SearchQuery) -> Result<SearchResults, DomainError> {
let movies = self.search_movies(query).await?;
let people = self.search_people(query).await?;
Ok(SearchResults { movies, people })
}
}
impl SqliteSearchAdapter {
async fn search_movies(&self, query: &SearchQuery) -> Result<Paginated<MovieSearchHit>, DomainError> {
let limit = query.page.limit as i64;
let offset = query.page.offset as i64;
#[derive(sqlx::FromRow)]
struct Row {
id: String,
title: String,
release_year: Option<i64>,
director: Option<String>,
poster_path: Option<String>,
genres: Option<String>,
}
let total: u64 = if let Some(text) = &query.text {
let fts_query = format!("{}*", text.replace('"', "").replace('*', ""));
let count: i64 = sqlx::query_scalar(
"SELECT COUNT(DISTINCT m.id)
FROM movies_fts fts
JOIN movies m ON m.id = fts.movie_id
LEFT JOIN movie_genres mg ON mg.movie_id = m.id
WHERE movies_fts MATCH ?
AND (? IS NULL OR EXISTS (SELECT 1 FROM movie_genres WHERE movie_id = m.id AND name = ?))
AND (? IS NULL OR m.release_year = ?)",
)
.bind(&fts_query)
.bind(&query.filters.genre)
.bind(&query.filters.genre)
.bind(query.filters.year.map(|y| y as i64))
.bind(query.filters.year.map(|y| y as i64))
.fetch_one(&self.pool)
.await
.map_err(map_err)?;
count as u64
} else {
let count: i64 = sqlx::query_scalar(
"SELECT COUNT(DISTINCT m.id)
FROM movies m
LEFT JOIN movie_genres mg ON mg.movie_id = m.id
WHERE (? IS NULL OR EXISTS (SELECT 1 FROM movie_genres WHERE movie_id = m.id AND name = ?))
AND (? IS NULL OR m.release_year = ?)",
)
.bind(&query.filters.genre)
.bind(&query.filters.genre)
.bind(query.filters.year.map(|y| y as i64))
.bind(query.filters.year.map(|y| y as i64))
.fetch_one(&self.pool)
.await
.map_err(map_err)?;
count as u64
};
let rows: Vec<Row> = if let Some(text) = &query.text {
let fts_query = format!("{}*", text.replace('"', "").replace('*', ""));
sqlx::query_as::<_, Row>(
"SELECT m.id, m.title, m.release_year, m.director, m.poster_path,
GROUP_CONCAT(DISTINCT mg.name) AS genres
FROM movies_fts fts
JOIN movies m ON m.id = fts.movie_id
LEFT JOIN movie_genres mg ON mg.movie_id = m.id
WHERE movies_fts MATCH ?
AND (? IS NULL OR EXISTS (SELECT 1 FROM movie_genres WHERE movie_id = m.id AND name = ?))
AND (? IS NULL OR m.release_year = ?)
GROUP BY m.id
ORDER BY rank
LIMIT ? OFFSET ?",
)
.bind(&fts_query)
.bind(&query.filters.genre)
.bind(&query.filters.genre)
.bind(query.filters.year.map(|y| y as i64))
.bind(query.filters.year.map(|y| y as i64))
.bind(limit)
.bind(offset)
.fetch_all(&self.pool)
.await
.map_err(map_err)?
} else {
sqlx::query_as::<_, Row>(
"SELECT m.id, m.title, m.release_year, m.director, m.poster_path,
GROUP_CONCAT(DISTINCT mg.name) AS genres
FROM movies m
LEFT JOIN movie_genres mg ON mg.movie_id = m.id
WHERE (? IS NULL OR EXISTS (SELECT 1 FROM movie_genres WHERE movie_id = m.id AND name = ?))
AND (? IS NULL OR m.release_year = ?)
GROUP BY m.id
ORDER BY m.title
LIMIT ? OFFSET ?",
)
.bind(&query.filters.genre)
.bind(&query.filters.genre)
.bind(query.filters.year.map(|y| y as i64))
.bind(query.filters.year.map(|y| y as i64))
.bind(limit)
.bind(offset)
.fetch_all(&self.pool)
.await
.map_err(map_err)?
};
let items = rows.into_iter().map(|r| MovieSearchHit {
movie_id: MovieId::from_uuid(uuid::Uuid::parse_str(&r.id).unwrap_or_default()),
title: r.title,
release_year: r.release_year.map(|y| y as u16),
director: r.director,
poster_path: r.poster_path,
genres: r.genres
.unwrap_or_default()
.split(',')
.filter(|s| !s.is_empty())
.map(str::to_string)
.collect(),
}).collect::<Vec<_>>();
Ok(Paginated { items, total_count: total, limit: query.page.limit, offset: query.page.offset })
}
async fn search_people(&self, query: &SearchQuery) -> Result<Paginated<PersonSearchHit>, DomainError> {
let Some(text) = &query.text else {
return Ok(Paginated {
items: vec![],
total_count: 0,
limit: query.page.limit,
offset: query.page.offset,
});
};
let limit = query.page.limit as i64;
let offset = query.page.offset as i64;
let fts_query = format!("{}*", text.replace('"', "").replace('*', ""));
let total: u64 = {
let count: i64 = sqlx::query_scalar(
"SELECT COUNT(*) FROM people_fts WHERE people_fts MATCH ?",
)
.bind(&fts_query)
.fetch_one(&self.pool)
.await
.map_err(map_err)?;
count as u64
};
#[derive(sqlx::FromRow)]
struct Row {
person_id: String,
name: String,
known_for_department: Option<String>,
profile_path: Option<String>,
}
let rows = sqlx::query_as::<_, Row>(
"SELECT fts.person_id, p.name, p.known_for_department, p.profile_path
FROM people_fts fts
JOIN persons p ON p.id = fts.person_id
WHERE people_fts MATCH ?
ORDER BY rank
LIMIT ? OFFSET ?",
)
.bind(&fts_query)
.bind(limit)
.bind(offset)
.fetch_all(&self.pool)
.await
.map_err(map_err)?;
let mut items = Vec::with_capacity(rows.len());
for row in rows {
let tmdb_id: Option<i64> = sqlx::query_scalar(
"SELECT tmdb_person_id FROM persons WHERE id = ?",
)
.bind(&row.person_id)
.fetch_optional(&self.pool)
.await
.map_err(map_err)?
.flatten();
let known_for_titles = if let Some(tid) = tmdb_id {
sqlx::query_scalar::<_, String>(
"SELECT m.title FROM movie_cast mc
JOIN movies m ON m.id = mc.movie_id
WHERE mc.tmdb_person_id = ?
ORDER BY mc.billing_order
LIMIT 3",
)
.bind(tid)
.fetch_all(&self.pool)
.await
.unwrap_or_default()
} else {
vec![]
};
items.push(PersonSearchHit {
person_id: PersonId::from_uuid(
uuid::Uuid::parse_str(&row.person_id).unwrap_or_default()
),
name: row.name,
known_for_department: row.known_for_department,
profile_path: row.profile_path,
known_for_titles,
});
}
Ok(Paginated { items, total_count: total, limit: query.page.limit, offset: query.page.offset })
}
}
#[cfg(test)]
#[path = "tests/lib.rs"]
mod tests;

View File

@@ -0,0 +1,157 @@
use super::{SqliteSearchAdapter, create_search_adapter};
use domain::{
models::{
EntityType, IndexableDocument, Movie,
Person, PersonId, SearchFilters, SearchQuery,
ExternalPersonId,
collections::PageParams,
},
value_objects::{MovieId, MovieTitle, ReleaseYear},
ports::{SearchCommand, SearchPort},
};
use sqlx::SqlitePool;
async fn pool_with_schema() -> SqlitePool {
let pool = SqlitePool::connect("sqlite::memory:").await.unwrap();
sqlx::query(
"CREATE TABLE movies (id TEXT PRIMARY KEY, title TEXT NOT NULL,
release_year INTEGER, director TEXT, poster_path TEXT, external_metadata_id TEXT)",
)
.execute(&pool).await.unwrap();
sqlx::query(
"CREATE TABLE persons (id TEXT PRIMARY KEY, external_id TEXT UNIQUE,
tmdb_person_id INTEGER UNIQUE, name TEXT NOT NULL,
known_for_department TEXT, profile_path TEXT)",
)
.execute(&pool).await.unwrap();
sqlx::query(
"CREATE TABLE movie_cast (movie_id TEXT, tmdb_person_id INTEGER,
name TEXT, character TEXT, billing_order INTEGER, profile_path TEXT)",
)
.execute(&pool).await.unwrap();
sqlx::query(
"CREATE TABLE movie_genres (movie_id TEXT, tmdb_id INTEGER, name TEXT)",
)
.execute(&pool).await.unwrap();
sqlx::query(
"CREATE VIRTUAL TABLE movies_fts USING fts5(
movie_id UNINDEXED, title, director, overview, genres, keywords,
cast_names, crew_names, release_year UNINDEXED, language UNINDEXED)",
)
.execute(&pool).await.unwrap();
sqlx::query(
"CREATE VIRTUAL TABLE people_fts USING fts5(
person_id UNINDEXED, name, known_for_department UNINDEXED)",
)
.execute(&pool).await.unwrap();
pool
}
fn test_movie(id: &str, title: &str, year: u16) -> Movie {
Movie::from_persistence(
MovieId::from_uuid(uuid::Uuid::parse_str(id).unwrap()),
None,
MovieTitle::new(title.into()).unwrap(),
ReleaseYear::new(year).unwrap(),
Some("Test Director".to_string()),
None,
)
}
fn default_page() -> PageParams {
PageParams::new(Some(10), Some(0)).unwrap()
}
#[tokio::test]
async fn index_and_search_movie_by_title() {
let pool = pool_with_schema().await;
let (cmd, query) = create_search_adapter(pool.clone());
let id_str = "00000000-0000-0000-0000-000000000001";
let movie = test_movie(id_str, "Interstellar", 2014);
let movie_id = movie.id().clone();
sqlx::query("INSERT INTO movies VALUES (?, ?, ?, ?, ?, ?)")
.bind(id_str).bind("Interstellar").bind(2014i32)
.bind("Christopher Nolan").bind::<Option<String>>(None).bind::<Option<String>>(None)
.execute(&pool).await.unwrap();
cmd.index(IndexableDocument::Movie { id: movie_id.clone(), movie: Box::new(movie), profile: None })
.await.unwrap();
let results = query.search(&SearchQuery {
text: Some("Interstellar".to_string()),
filters: SearchFilters::default(),
page: default_page(),
}).await.unwrap();
assert_eq!(results.movies.items.len(), 1);
assert_eq!(results.movies.items[0].title, "Interstellar");
}
#[tokio::test]
async fn remove_movie_clears_from_index() {
let pool = pool_with_schema().await;
let (cmd, query) = create_search_adapter(pool.clone());
let id_str = "00000000-0000-0000-0000-000000000002";
let movie = test_movie(id_str, "Inception", 2010);
let movie_id = movie.id().clone();
sqlx::query("INSERT INTO movies VALUES (?, ?, ?, ?, ?, ?)")
.bind(id_str).bind("Inception").bind(2010i32)
.bind("Christopher Nolan").bind::<Option<String>>(None).bind::<Option<String>>(None)
.execute(&pool).await.unwrap();
cmd.index(IndexableDocument::Movie { id: movie_id.clone(), movie: Box::new(movie), profile: None })
.await.unwrap();
cmd.remove(EntityType::Movie, id_str).await.unwrap();
let results = query.search(&SearchQuery {
text: Some("Inception".to_string()),
filters: SearchFilters::default(),
page: default_page(),
}).await.unwrap();
assert!(results.movies.items.is_empty());
}
#[tokio::test]
async fn search_with_genre_filter() {
let pool = pool_with_schema().await;
let (cmd, query) = create_search_adapter(pool.clone());
let id_str = "00000000-0000-0000-0000-000000000003";
let movie = test_movie(id_str, "The Dark Knight", 2008);
let movie_id = movie.id().clone();
sqlx::query("INSERT INTO movies VALUES (?, ?, ?, ?, ?, ?)")
.bind(id_str).bind("The Dark Knight").bind(2008i32)
.bind("Christopher Nolan").bind::<Option<String>>(None).bind::<Option<String>>(None)
.execute(&pool).await.unwrap();
sqlx::query("INSERT INTO movie_genres VALUES (?, 1, 'Action')")
.bind(id_str)
.execute(&pool).await.unwrap();
cmd.index(IndexableDocument::Movie {
id: movie_id.clone(),
movie: Box::new(movie),
profile: None,
}).await.unwrap();
// Matching genre — no text filter
let results = query.search(&SearchQuery {
text: None,
filters: SearchFilters { genre: Some("Action".to_string()), ..Default::default() },
page: default_page(),
}).await.unwrap();
assert_eq!(results.movies.items.len(), 1);
// Non-matching genre
let results = query.search(&SearchQuery {
text: None,
filters: SearchFilters { genre: Some("Comedy".to_string()), ..Default::default() },
page: default_page(),
}).await.unwrap();
assert!(results.movies.items.is_empty());
}