feat: implement TMDb enrichment for movie profiles

- Add SqliteMovieProfileRepository for managing movie profiles in SQLite.
- Create TmdbEnrichmentClient to fetch movie details from TMDb API.
- Implement enrichment event handling with EnrichmentHandler.
- Introduce periodic jobs for cleaning up expired import sessions and checking for stale movie profiles.
- Update application context to include movie profile repository.
- Add API endpoint to retrieve movie profiles.
- Extend domain models with new structures for movie enrichment (Genre, Keyword, CastMember, CrewMember, MovieProfile).
- Modify event system to include MovieEnrichmentRequested event.
- Enhance tests to cover new functionality and ensure stability.
This commit is contained in:
2026-05-12 13:23:41 +02:00
parent c696a3b780
commit 38d13fbff1
30 changed files with 1193 additions and 30 deletions

View File

@@ -39,6 +39,10 @@ pub enum EventPayload {
review_id: String,
user_id: String,
},
MovieEnrichmentRequested {
movie_id: String,
external_metadata_id: String,
},
}
impl EventPayload {
@@ -50,6 +54,7 @@ impl EventPayload {
EventPayload::MovieDeleted { .. } => "MovieDeleted",
EventPayload::UserUpdated { .. } => "UserUpdated",
EventPayload::ReviewDeleted { .. } => "ReviewDeleted",
EventPayload::MovieEnrichmentRequested { .. } => "MovieEnrichmentRequested",
}
}
}
@@ -103,6 +108,12 @@ impl From<&DomainEvent> for EventPayload {
review_id: review_id.value().to_string(),
user_id: user_id.value().to_string(),
},
DomainEvent::MovieEnrichmentRequested { movie_id, external_metadata_id } => {
EventPayload::MovieEnrichmentRequested {
movie_id: movie_id.value().to_string(),
external_metadata_id: external_metadata_id.clone(),
}
}
}
}
}
@@ -154,6 +165,12 @@ impl TryFrom<EventPayload> for DomainEvent {
user_id: UserId::from_uuid(parse_uuid(&user_id, "user_id")?),
})
}
EventPayload::MovieEnrichmentRequested { movie_id, external_metadata_id } => {
Ok(DomainEvent::MovieEnrichmentRequested {
movie_id: MovieId::from_uuid(parse_uuid(&movie_id, "movie_id")?),
external_metadata_id,
})
}
}
}
}

View File

@@ -6,8 +6,9 @@ pub fn event_to_subject(prefix: &str, event: &DomainEvent) -> String {
DomainEvent::ReviewUpdated { .. } => "review.updated",
DomainEvent::ReviewDeleted { .. } => "review.deleted",
DomainEvent::MovieDiscovered { .. } => "movie.discovered",
DomainEvent::MovieDeleted { .. } => "movie.deleted",
DomainEvent::UserUpdated { .. } => "user.updated",
DomainEvent::MovieDeleted { .. } => "movie.deleted",
DomainEvent::UserUpdated { .. } => "user.updated",
DomainEvent::MovieEnrichmentRequested { .. } => "movie.enrichment.requested",
};
format!("{prefix}.{suffix}")
}

View File

@@ -0,0 +1,54 @@
CREATE TABLE IF NOT EXISTS movie_profiles (
movie_id TEXT PRIMARY KEY NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_id BIGINT NOT NULL,
imdb_id TEXT,
overview TEXT,
tagline TEXT,
runtime_minutes INTEGER,
budget_usd BIGINT,
revenue_usd BIGINT,
vote_average DOUBLE PRECISION,
vote_count INTEGER,
original_language TEXT,
collection_name TEXT,
enriched_at TIMESTAMPTZ NOT NULL
);
CREATE TABLE IF NOT EXISTS movie_genres (
movie_id TEXT NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_id INTEGER NOT NULL,
name TEXT NOT NULL,
PRIMARY KEY (movie_id, tmdb_id)
);
CREATE TABLE IF NOT EXISTS movie_keywords (
movie_id TEXT NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_id INTEGER NOT NULL,
name TEXT NOT NULL,
PRIMARY KEY (movie_id, tmdb_id)
);
CREATE TABLE IF NOT EXISTS movie_cast (
movie_id TEXT NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_person_id BIGINT NOT NULL,
name TEXT NOT NULL,
character TEXT NOT NULL,
billing_order INTEGER NOT NULL,
profile_path TEXT,
PRIMARY KEY (movie_id, tmdb_person_id)
);
CREATE TABLE IF NOT EXISTS movie_crew (
movie_id TEXT NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_person_id BIGINT NOT NULL,
name TEXT NOT NULL,
job TEXT NOT NULL,
department TEXT NOT NULL,
profile_path TEXT,
PRIMARY KEY (movie_id, tmdb_person_id, job)
);
CREATE INDEX IF NOT EXISTS idx_movie_cast_person ON movie_cast (tmdb_person_id);
CREATE INDEX IF NOT EXISTS idx_movie_crew_person ON movie_crew (tmdb_person_id);
CREATE INDEX IF NOT EXISTS idx_movie_genres_name ON movie_genres (name);
CREATE INDEX IF NOT EXISTS idx_movie_keywords_name ON movie_keywords (name);

View File

@@ -15,6 +15,7 @@ use sqlx::PgPool;
mod import_profile;
mod import_session;
mod models;
mod profile;
mod users;
use models::{
@@ -24,6 +25,7 @@ use models::{
pub use import_profile::PostgresImportProfileRepository;
pub use import_session::PostgresImportSessionRepository;
pub use profile::PostgresMovieProfileRepository;
pub use users::PostgresUserRepository;
fn format_year_month(ym: &str) -> String {
@@ -865,6 +867,7 @@ pub async fn wire(database_url: &str) -> anyhow::Result<(
std::sync::Arc<dyn domain::ports::UserRepository>,
std::sync::Arc<dyn domain::ports::ImportSessionRepository>,
std::sync::Arc<dyn domain::ports::ImportProfileRepository>,
std::sync::Arc<dyn domain::ports::MovieProfileRepository>,
)> {
use anyhow::Context;
@@ -880,6 +883,7 @@ pub async fn wire(database_url: &str) -> anyhow::Result<(
let import_session_repo = std::sync::Arc::new(PostgresImportSessionRepository::new(pool.clone()));
let import_profile_repo = std::sync::Arc::new(PostgresImportProfileRepository::new(pool.clone()));
let movie_profile_repo = std::sync::Arc::new(PostgresMovieProfileRepository::new(pool.clone()));
Ok((
pool.clone(),
@@ -890,5 +894,6 @@ pub async fn wire(database_url: &str) -> anyhow::Result<(
std::sync::Arc::new(PostgresUserRepository::new(pool)) as _,
import_session_repo as _,
import_profile_repo as _,
movie_profile_repo as _,
))
}

View File

@@ -0,0 +1,236 @@
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use domain::{
errors::DomainError,
models::{CastMember, CrewMember, Genre, Keyword, MovieProfile},
ports::MovieProfileRepository,
value_objects::MovieId,
};
use sqlx::{PgPool, Row};
pub struct PostgresMovieProfileRepository {
pool: PgPool,
}
impl PostgresMovieProfileRepository {
pub fn new(pool: PgPool) -> Self {
Self { pool }
}
fn map_err(e: sqlx::Error) -> DomainError {
tracing::error!("Database error: {:?}", e);
DomainError::InfrastructureError("Database operation failed".into())
}
}
#[async_trait]
impl MovieProfileRepository for PostgresMovieProfileRepository {
async fn upsert(&self, p: &MovieProfile) -> Result<(), DomainError> {
let movie_id = p.movie_id.value().to_string();
let mut tx = self.pool.begin().await.map_err(Self::map_err)?;
sqlx::query(
r#"INSERT INTO movie_profiles
(movie_id, tmdb_id, imdb_id, overview, tagline, runtime_minutes,
budget_usd, revenue_usd, vote_average, vote_count,
original_language, collection_name, enriched_at)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13)
ON CONFLICT(movie_id) DO UPDATE SET
tmdb_id=EXCLUDED.tmdb_id, imdb_id=EXCLUDED.imdb_id,
overview=EXCLUDED.overview, tagline=EXCLUDED.tagline,
runtime_minutes=EXCLUDED.runtime_minutes,
budget_usd=EXCLUDED.budget_usd, revenue_usd=EXCLUDED.revenue_usd,
vote_average=EXCLUDED.vote_average, vote_count=EXCLUDED.vote_count,
original_language=EXCLUDED.original_language,
collection_name=EXCLUDED.collection_name,
enriched_at=EXCLUDED.enriched_at"#,
)
.bind(&movie_id)
.bind(p.tmdb_id as i64)
.bind(&p.imdb_id)
.bind(&p.overview)
.bind(&p.tagline)
.bind(p.runtime_minutes.map(|v| v as i32))
.bind(p.budget_usd)
.bind(p.revenue_usd)
.bind(p.vote_average)
.bind(p.vote_count.map(|v| v as i32))
.bind(&p.original_language)
.bind(&p.collection_name)
.bind(p.enriched_at)
.execute(&mut *tx)
.await
.map_err(Self::map_err)?;
sqlx::query("DELETE FROM movie_genres WHERE movie_id = $1")
.bind(&movie_id)
.execute(&mut *tx).await.map_err(Self::map_err)?;
for g in &p.genres {
sqlx::query("INSERT INTO movie_genres (movie_id, tmdb_id, name) VALUES ($1,$2,$3) ON CONFLICT DO NOTHING")
.bind(&movie_id).bind(g.tmdb_id as i32).bind(&g.name)
.execute(&mut *tx).await.map_err(Self::map_err)?;
}
sqlx::query("DELETE FROM movie_keywords WHERE movie_id = $1")
.bind(&movie_id)
.execute(&mut *tx).await.map_err(Self::map_err)?;
for k in &p.keywords {
sqlx::query("INSERT INTO movie_keywords (movie_id, tmdb_id, name) VALUES ($1,$2,$3) ON CONFLICT DO NOTHING")
.bind(&movie_id).bind(k.tmdb_id as i32).bind(&k.name)
.execute(&mut *tx).await.map_err(Self::map_err)?;
}
sqlx::query("DELETE FROM movie_cast WHERE movie_id = $1")
.bind(&movie_id)
.execute(&mut *tx).await.map_err(Self::map_err)?;
for c in &p.cast {
sqlx::query(
"INSERT INTO movie_cast \
(movie_id, tmdb_person_id, name, character, billing_order, profile_path) \
VALUES ($1,$2,$3,$4,$5,$6) ON CONFLICT DO NOTHING",
)
.bind(&movie_id).bind(c.tmdb_person_id as i64).bind(&c.name)
.bind(&c.character).bind(c.billing_order as i32).bind(&c.profile_path)
.execute(&mut *tx).await.map_err(Self::map_err)?;
}
sqlx::query("DELETE FROM movie_crew WHERE movie_id = $1")
.bind(&movie_id)
.execute(&mut *tx).await.map_err(Self::map_err)?;
for cr in &p.crew {
sqlx::query(
"INSERT INTO movie_crew \
(movie_id, tmdb_person_id, name, job, department, profile_path) \
VALUES ($1,$2,$3,$4,$5,$6) ON CONFLICT DO NOTHING",
)
.bind(&movie_id).bind(cr.tmdb_person_id as i64).bind(&cr.name)
.bind(&cr.job).bind(&cr.department).bind(&cr.profile_path)
.execute(&mut *tx).await.map_err(Self::map_err)?;
}
tx.commit().await.map_err(Self::map_err)
}
async fn get_by_movie_id(&self, id: &MovieId) -> Result<Option<MovieProfile>, DomainError> {
let movie_id = id.value().to_string();
let row = sqlx::query(
"SELECT tmdb_id, imdb_id, overview, tagline, runtime_minutes, budget_usd,
revenue_usd, vote_average, vote_count, original_language,
collection_name, enriched_at
FROM movie_profiles WHERE movie_id = $1",
)
.bind(&movie_id)
.fetch_optional(&self.pool)
.await
.map_err(Self::map_err)?;
let row = match row {
Some(r) => r,
None => return Ok(None),
};
let enriched_at: DateTime<Utc> = row.try_get("enriched_at")
.map_err(|_| DomainError::InfrastructureError("invalid enriched_at".into()))?;
let genres = sqlx::query("SELECT tmdb_id, name FROM movie_genres WHERE movie_id = $1")
.bind(&movie_id)
.fetch_all(&self.pool).await.map_err(Self::map_err)?
.into_iter()
.map(|r| Genre {
tmdb_id: r.try_get::<i32, _>("tmdb_id").unwrap_or(0) as u32,
name: r.try_get("name").unwrap_or_default(),
})
.collect();
let keywords = sqlx::query("SELECT tmdb_id, name FROM movie_keywords WHERE movie_id = $1")
.bind(&movie_id)
.fetch_all(&self.pool).await.map_err(Self::map_err)?
.into_iter()
.map(|r| Keyword {
tmdb_id: r.try_get::<i32, _>("tmdb_id").unwrap_or(0) as u32,
name: r.try_get("name").unwrap_or_default(),
})
.collect();
let cast = sqlx::query(
"SELECT tmdb_person_id, name, character, billing_order, profile_path \
FROM movie_cast WHERE movie_id = $1 ORDER BY billing_order",
)
.bind(&movie_id)
.fetch_all(&self.pool).await.map_err(Self::map_err)?
.into_iter()
.map(|r| CastMember {
tmdb_person_id: r.try_get::<i64, _>("tmdb_person_id").unwrap_or(0) as u64,
name: r.try_get("name").unwrap_or_default(),
character: r.try_get("character").unwrap_or_default(),
billing_order: r.try_get::<i32, _>("billing_order").unwrap_or(0) as u32,
profile_path: r.try_get("profile_path").ok(),
})
.collect();
let crew = sqlx::query(
"SELECT tmdb_person_id, name, job, department, profile_path \
FROM movie_crew WHERE movie_id = $1",
)
.bind(&movie_id)
.fetch_all(&self.pool).await.map_err(Self::map_err)?
.into_iter()
.map(|r| CrewMember {
tmdb_person_id: r.try_get::<i64, _>("tmdb_person_id").unwrap_or(0) as u64,
name: r.try_get("name").unwrap_or_default(),
job: r.try_get("job").unwrap_or_default(),
department: r.try_get("department").unwrap_or_default(),
profile_path: r.try_get("profile_path").ok(),
})
.collect();
Ok(Some(MovieProfile {
movie_id: id.clone(),
tmdb_id: row.try_get::<i64, _>("tmdb_id").unwrap_or(0) as u64,
imdb_id: row.try_get("imdb_id").ok(),
overview: row.try_get("overview").ok(),
tagline: row.try_get("tagline").ok(),
runtime_minutes: row.try_get::<Option<i32>, _>("runtime_minutes").ok().flatten().map(|v| v as u32),
budget_usd: row.try_get("budget_usd").ok(),
revenue_usd: row.try_get("revenue_usd").ok(),
vote_average: row.try_get("vote_average").ok(),
vote_count: row.try_get::<Option<i32>, _>("vote_count").ok().flatten().map(|v| v as u32),
original_language: row.try_get("original_language").ok(),
collection_name: row.try_get("collection_name").ok(),
genres,
keywords,
cast,
crew,
enriched_at,
}))
}
async fn list_stale(&self) -> Result<Vec<(MovieId, String)>, DomainError> {
let threshold = Utc::now() - chrono::Duration::days(30);
let rows = sqlx::query(
r#"SELECT m.id, m.external_metadata_id
FROM movies m
LEFT JOIN movie_profiles p ON p.movie_id = m.id
WHERE m.external_metadata_id IS NOT NULL
AND (p.movie_id IS NULL OR p.enriched_at < $1)
ORDER BY p.enriched_at ASC NULLS FIRST"#,
)
.bind(threshold)
.fetch_all(&self.pool)
.await
.map_err(Self::map_err)?;
Ok(rows
.into_iter()
.filter_map(|r| {
let ext_id: Option<String> = r.try_get("external_metadata_id").ok()?;
let ext_id = ext_id?;
let id_str: String = r.try_get("id").ok()?;
let movie_id = id_str.parse::<uuid::Uuid>().ok().map(MovieId::from_uuid)?;
Some((movie_id, ext_id))
})
.collect())
}
}

View File

@@ -0,0 +1,54 @@
CREATE TABLE IF NOT EXISTS movie_profiles (
movie_id TEXT PRIMARY KEY NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_id INTEGER NOT NULL,
imdb_id TEXT,
overview TEXT,
tagline TEXT,
runtime_minutes INTEGER,
budget_usd INTEGER,
revenue_usd INTEGER,
vote_average REAL,
vote_count INTEGER,
original_language TEXT,
collection_name TEXT,
enriched_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS movie_genres (
movie_id TEXT NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_id INTEGER NOT NULL,
name TEXT NOT NULL,
PRIMARY KEY (movie_id, tmdb_id)
);
CREATE TABLE IF NOT EXISTS movie_keywords (
movie_id TEXT NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_id INTEGER NOT NULL,
name TEXT NOT NULL,
PRIMARY KEY (movie_id, tmdb_id)
);
CREATE TABLE IF NOT EXISTS movie_cast (
movie_id TEXT NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_person_id INTEGER NOT NULL,
name TEXT NOT NULL,
character TEXT NOT NULL,
billing_order INTEGER NOT NULL,
profile_path TEXT,
PRIMARY KEY (movie_id, tmdb_person_id)
);
CREATE TABLE IF NOT EXISTS movie_crew (
movie_id TEXT NOT NULL REFERENCES movies(id) ON DELETE CASCADE,
tmdb_person_id INTEGER NOT NULL,
name TEXT NOT NULL,
job TEXT NOT NULL,
department TEXT NOT NULL,
profile_path TEXT,
PRIMARY KEY (movie_id, tmdb_person_id, job)
);
CREATE INDEX IF NOT EXISTS idx_movie_cast_person ON movie_cast (tmdb_person_id);
CREATE INDEX IF NOT EXISTS idx_movie_crew_person ON movie_crew (tmdb_person_id);
CREATE INDEX IF NOT EXISTS idx_movie_genres_name ON movie_genres (name);
CREATE INDEX IF NOT EXISTS idx_movie_keywords_name ON movie_keywords (name);

View File

@@ -16,6 +16,7 @@ mod import_profile;
mod import_session;
mod migrations;
mod models;
mod profile;
mod users;
use models::{
@@ -25,6 +26,7 @@ use models::{
pub use import_profile::SqliteImportProfileRepository;
pub use import_session::SqliteImportSessionRepository;
pub use profile::SqliteMovieProfileRepository;
pub use users::SqliteUserRepository;
fn format_year_month(ym: &str) -> String {
@@ -854,6 +856,7 @@ pub async fn wire(database_url: &str) -> anyhow::Result<(
std::sync::Arc<dyn domain::ports::UserRepository>,
std::sync::Arc<dyn domain::ports::ImportSessionRepository>,
std::sync::Arc<dyn domain::ports::ImportProfileRepository>,
std::sync::Arc<dyn domain::ports::MovieProfileRepository>,
)> {
use std::str::FromStr;
use anyhow::Context;
@@ -876,6 +879,7 @@ pub async fn wire(database_url: &str) -> anyhow::Result<(
let import_session_repo = std::sync::Arc::new(SqliteImportSessionRepository::new(pool.clone()));
let import_profile_repo = std::sync::Arc::new(SqliteImportProfileRepository::new(pool.clone()));
let movie_profile_repo = std::sync::Arc::new(SqliteMovieProfileRepository::new(pool.clone()));
Ok((
pool.clone(),
@@ -886,6 +890,7 @@ pub async fn wire(database_url: &str) -> anyhow::Result<(
std::sync::Arc::new(SqliteUserRepository::new(pool)) as _,
import_session_repo as _,
import_profile_repo as _,
movie_profile_repo as _,
))
}

View File

@@ -0,0 +1,240 @@
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use domain::{
errors::DomainError,
models::{CastMember, CrewMember, Genre, Keyword, MovieProfile},
ports::MovieProfileRepository,
value_objects::MovieId,
};
use sqlx::{Row, SqlitePool};
pub struct SqliteMovieProfileRepository {
pool: SqlitePool,
}
impl SqliteMovieProfileRepository {
pub fn new(pool: SqlitePool) -> Self {
Self { pool }
}
fn map_err(e: sqlx::Error) -> DomainError {
tracing::error!("Database error: {:?}", e);
DomainError::InfrastructureError("Database operation failed".into())
}
}
#[async_trait]
impl MovieProfileRepository for SqliteMovieProfileRepository {
async fn upsert(&self, p: &MovieProfile) -> Result<(), DomainError> {
let movie_id = p.movie_id.value().to_string();
let enriched_at = p.enriched_at.to_rfc3339();
let mut tx = self.pool.begin().await.map_err(Self::map_err)?;
sqlx::query(
r#"INSERT INTO movie_profiles
(movie_id, tmdb_id, imdb_id, overview, tagline, runtime_minutes,
budget_usd, revenue_usd, vote_average, vote_count,
original_language, collection_name, enriched_at)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)
ON CONFLICT(movie_id) DO UPDATE SET
tmdb_id=excluded.tmdb_id, imdb_id=excluded.imdb_id,
overview=excluded.overview, tagline=excluded.tagline,
runtime_minutes=excluded.runtime_minutes,
budget_usd=excluded.budget_usd, revenue_usd=excluded.revenue_usd,
vote_average=excluded.vote_average, vote_count=excluded.vote_count,
original_language=excluded.original_language,
collection_name=excluded.collection_name,
enriched_at=excluded.enriched_at"#,
)
.bind(&movie_id)
.bind(p.tmdb_id as i64)
.bind(&p.imdb_id)
.bind(&p.overview)
.bind(&p.tagline)
.bind(p.runtime_minutes.map(|v| v as i64))
.bind(p.budget_usd)
.bind(p.revenue_usd)
.bind(p.vote_average)
.bind(p.vote_count.map(|v| v as i64))
.bind(&p.original_language)
.bind(&p.collection_name)
.bind(&enriched_at)
.execute(&mut *tx)
.await
.map_err(Self::map_err)?;
sqlx::query("DELETE FROM movie_genres WHERE movie_id = ?")
.bind(&movie_id)
.execute(&mut *tx).await.map_err(Self::map_err)?;
for g in &p.genres {
sqlx::query("INSERT OR IGNORE INTO movie_genres (movie_id, tmdb_id, name) VALUES (?,?,?)")
.bind(&movie_id).bind(g.tmdb_id as i64).bind(&g.name)
.execute(&mut *tx).await.map_err(Self::map_err)?;
}
sqlx::query("DELETE FROM movie_keywords WHERE movie_id = ?")
.bind(&movie_id)
.execute(&mut *tx).await.map_err(Self::map_err)?;
for k in &p.keywords {
sqlx::query("INSERT OR IGNORE INTO movie_keywords (movie_id, tmdb_id, name) VALUES (?,?,?)")
.bind(&movie_id).bind(k.tmdb_id as i64).bind(&k.name)
.execute(&mut *tx).await.map_err(Self::map_err)?;
}
sqlx::query("DELETE FROM movie_cast WHERE movie_id = ?")
.bind(&movie_id)
.execute(&mut *tx).await.map_err(Self::map_err)?;
for c in &p.cast {
sqlx::query(
"INSERT OR IGNORE INTO movie_cast \
(movie_id, tmdb_person_id, name, character, billing_order, profile_path) \
VALUES (?,?,?,?,?,?)",
)
.bind(&movie_id).bind(c.tmdb_person_id as i64).bind(&c.name)
.bind(&c.character).bind(c.billing_order as i64).bind(&c.profile_path)
.execute(&mut *tx).await.map_err(Self::map_err)?;
}
sqlx::query("DELETE FROM movie_crew WHERE movie_id = ?")
.bind(&movie_id)
.execute(&mut *tx).await.map_err(Self::map_err)?;
for cr in &p.crew {
sqlx::query(
"INSERT OR IGNORE INTO movie_crew \
(movie_id, tmdb_person_id, name, job, department, profile_path) \
VALUES (?,?,?,?,?,?)",
)
.bind(&movie_id).bind(cr.tmdb_person_id as i64).bind(&cr.name)
.bind(&cr.job).bind(&cr.department).bind(&cr.profile_path)
.execute(&mut *tx).await.map_err(Self::map_err)?;
}
tx.commit().await.map_err(Self::map_err)
}
async fn get_by_movie_id(&self, id: &MovieId) -> Result<Option<MovieProfile>, DomainError> {
let movie_id = id.value().to_string();
let row = sqlx::query(
"SELECT tmdb_id, imdb_id, overview, tagline, runtime_minutes, budget_usd,
revenue_usd, vote_average, vote_count, original_language,
collection_name, enriched_at
FROM movie_profiles WHERE movie_id = ?",
)
.bind(&movie_id)
.fetch_optional(&self.pool)
.await
.map_err(Self::map_err)?;
let row = match row {
Some(r) => r,
None => return Ok(None),
};
let enriched_at_str: String = row.try_get("enriched_at")
.map_err(|_| DomainError::InfrastructureError("invalid enriched_at".into()))?;
let enriched_at: DateTime<Utc> = enriched_at_str
.parse()
.map_err(|_| DomainError::InfrastructureError("invalid enriched_at".into()))?;
let genres = sqlx::query("SELECT tmdb_id, name FROM movie_genres WHERE movie_id = ?")
.bind(&movie_id)
.fetch_all(&self.pool).await.map_err(Self::map_err)?
.into_iter()
.map(|r| Genre {
tmdb_id: r.try_get::<i64, _>("tmdb_id").unwrap_or(0) as u32,
name: r.try_get("name").unwrap_or_default(),
})
.collect();
let keywords = sqlx::query("SELECT tmdb_id, name FROM movie_keywords WHERE movie_id = ?")
.bind(&movie_id)
.fetch_all(&self.pool).await.map_err(Self::map_err)?
.into_iter()
.map(|r| Keyword {
tmdb_id: r.try_get::<i64, _>("tmdb_id").unwrap_or(0) as u32,
name: r.try_get("name").unwrap_or_default(),
})
.collect();
let cast = sqlx::query(
"SELECT tmdb_person_id, name, character, billing_order, profile_path \
FROM movie_cast WHERE movie_id = ? ORDER BY billing_order",
)
.bind(&movie_id)
.fetch_all(&self.pool).await.map_err(Self::map_err)?
.into_iter()
.map(|r| CastMember {
tmdb_person_id: r.try_get::<i64, _>("tmdb_person_id").unwrap_or(0) as u64,
name: r.try_get("name").unwrap_or_default(),
character: r.try_get("character").unwrap_or_default(),
billing_order: r.try_get::<i64, _>("billing_order").unwrap_or(0) as u32,
profile_path: r.try_get("profile_path").ok(),
})
.collect();
let crew = sqlx::query(
"SELECT tmdb_person_id, name, job, department, profile_path \
FROM movie_crew WHERE movie_id = ?",
)
.bind(&movie_id)
.fetch_all(&self.pool).await.map_err(Self::map_err)?
.into_iter()
.map(|r| CrewMember {
tmdb_person_id: r.try_get::<i64, _>("tmdb_person_id").unwrap_or(0) as u64,
name: r.try_get("name").unwrap_or_default(),
job: r.try_get("job").unwrap_or_default(),
department: r.try_get("department").unwrap_or_default(),
profile_path: r.try_get("profile_path").ok(),
})
.collect();
Ok(Some(MovieProfile {
movie_id: id.clone(),
tmdb_id: row.try_get::<i64, _>("tmdb_id").unwrap_or(0) as u64,
imdb_id: row.try_get("imdb_id").ok(),
overview: row.try_get("overview").ok(),
tagline: row.try_get("tagline").ok(),
runtime_minutes: row.try_get::<Option<i64>, _>("runtime_minutes").ok().flatten().map(|v| v as u32),
budget_usd: row.try_get("budget_usd").ok(),
revenue_usd: row.try_get("revenue_usd").ok(),
vote_average: row.try_get("vote_average").ok(),
vote_count: row.try_get::<Option<i64>, _>("vote_count").ok().flatten().map(|v| v as u32),
original_language: row.try_get("original_language").ok(),
collection_name: row.try_get("collection_name").ok(),
genres,
keywords,
cast,
crew,
enriched_at,
}))
}
async fn list_stale(&self) -> Result<Vec<(MovieId, String)>, DomainError> {
let threshold = (Utc::now() - chrono::Duration::days(30)).to_rfc3339();
let rows = sqlx::query(
r#"SELECT m.id, m.external_metadata_id
FROM movies m
LEFT JOIN movie_profiles p ON p.movie_id = m.id
WHERE m.external_metadata_id IS NOT NULL
AND (p.movie_id IS NULL OR p.enriched_at < ?)
ORDER BY p.enriched_at ASC"#,
)
.bind(&threshold)
.fetch_all(&self.pool)
.await
.map_err(Self::map_err)?;
Ok(rows
.into_iter()
.filter_map(|r| {
let ext_id: Option<String> = r.try_get("external_metadata_id").ok()?;
let ext_id = ext_id?;
let id_str: String = r.try_get("id").ok()?;
let movie_id = id_str.parse::<uuid::Uuid>().ok().map(MovieId::from_uuid)?;
Some((movie_id, ext_id))
})
.collect())
}
}

View File

@@ -0,0 +1,14 @@
[package]
name = "tmdb-enrichment"
version = "0.1.0"
edition = "2024"
[dependencies]
domain = { workspace = true }
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
anyhow = { workspace = true }
async-trait = { workspace = true }
tracing = { workspace = true }
chrono = { workspace = true }

View File

@@ -0,0 +1,211 @@
use std::sync::Arc;
use async_trait::async_trait;
use chrono::Utc;
use domain::{
errors::DomainError,
events::DomainEvent,
models::{CastMember, CrewMember, Genre, Keyword, MovieProfile},
ports::{EventHandler, MovieEnrichmentClient, MovieProfileRepository},
value_objects::MovieId,
};
use serde::Deserialize;
// ── TMDb enrichment client ───────────────────────────────────────────────────
pub struct TmdbEnrichmentClient {
api_key: String,
http: reqwest::Client,
}
impl TmdbEnrichmentClient {
pub fn from_env() -> Result<Self, DomainError> {
let api_key = std::env::var("TMDB_API_KEY").map_err(|_| {
DomainError::InfrastructureError("TMDB_API_KEY is not set".into())
})?;
Ok(Self { api_key, http: reqwest::Client::new() })
}
fn base(&self, path: &str) -> String {
format!("https://api.themoviedb.org/3{}", path)
}
async fn get<T: for<'de> Deserialize<'de>>(&self, url: &str, extra: &[(&str, &str)]) -> Result<T, DomainError> {
let mut req = self.http.get(url).query(&[("api_key", self.api_key.as_str())]);
for (k, v) in extra {
req = req.query(&[(k, v)]);
}
req.send().await
.map_err(|e| DomainError::InfrastructureError(e.to_string()))?
.error_for_status()
.map_err(|e| DomainError::InfrastructureError(e.to_string()))?
.json::<T>().await
.map_err(|e| DomainError::InfrastructureError(e.to_string()))
}
async fn resolve_tmdb_id(&self, external_id: &str) -> Result<u64, DomainError> {
if let Some(numeric) = external_id.strip_prefix("tmdb:") {
return numeric.parse::<u64>()
.map_err(|_| DomainError::InfrastructureError(format!("Invalid tmdb id: {numeric}")));
}
// Assume IMDb ID (tt…) — use /find
#[derive(Deserialize)]
struct FindResult { id: u64 }
#[derive(Deserialize)]
struct FindResponse { movie_results: Vec<FindResult> }
let url = self.base(&format!("/find/{}", external_id));
let resp: FindResponse = self.get(&url, &[("external_source", "imdb_id")]).await?;
resp.movie_results
.into_iter()
.next()
.map(|r| r.id)
.ok_or_else(|| DomainError::NotFound(format!("TMDb: no movie for {external_id}")))
}
}
#[async_trait]
impl MovieEnrichmentClient for TmdbEnrichmentClient {
async fn fetch_profile(&self, movie_id: MovieId, external_metadata_id: &str) -> Result<MovieProfile, DomainError> {
let tmdb_id = self.resolve_tmdb_id(external_metadata_id).await?;
#[derive(Deserialize)]
struct GenreDto { id: u32, name: String }
#[derive(Deserialize)]
struct CollectionDto { name: String }
#[derive(Deserialize)]
struct CastDto {
id: u64,
name: String,
character: String,
order: u32,
profile_path: Option<String>,
}
#[derive(Deserialize)]
struct CrewDto {
id: u64,
name: String,
job: String,
department: String,
profile_path: Option<String>,
}
#[derive(Deserialize)]
struct Credits { cast: Vec<CastDto>, crew: Vec<CrewDto> }
#[derive(Deserialize)]
struct KeywordDto { id: u32, name: String }
#[derive(Deserialize)]
struct Keywords { keywords: Vec<KeywordDto> }
#[derive(Deserialize)]
struct Details {
imdb_id: Option<String>,
overview: Option<String>,
tagline: Option<String>,
runtime: Option<u32>,
budget: Option<i64>,
revenue: Option<i64>,
vote_average: Option<f64>,
vote_count: Option<u32>,
original_language: Option<String>,
genres: Vec<GenreDto>,
belongs_to_collection: Option<CollectionDto>,
credits: Credits,
keywords: Keywords,
}
let url = self.base(&format!("/movie/{}", tmdb_id));
let d: Details = self.get(&url, &[("append_to_response", "credits,keywords")]).await?;
Ok(MovieProfile {
movie_id,
tmdb_id,
imdb_id: d.imdb_id.filter(|s| !s.is_empty()),
overview: d.overview.filter(|s| !s.is_empty()),
tagline: d.tagline.filter(|s| !s.is_empty()),
runtime_minutes: d.runtime,
budget_usd: d.budget.filter(|&v| v > 0),
revenue_usd: d.revenue.filter(|&v| v > 0),
vote_average: d.vote_average,
vote_count: d.vote_count,
original_language: d.original_language,
collection_name: d.belongs_to_collection.map(|c| c.name),
genres: d.genres.into_iter().map(|g| Genre { tmdb_id: g.id, name: g.name }).collect(),
keywords: d.keywords.keywords.into_iter()
.map(|k| Keyword { tmdb_id: k.id, name: k.name })
.collect(),
cast: d.credits.cast.into_iter().map(|c| CastMember {
tmdb_person_id: c.id,
name: c.name,
character: c.character,
billing_order: c.order,
profile_path: c.profile_path,
}).collect(),
crew: d.credits.crew.into_iter().map(|c| CrewMember {
tmdb_person_id: c.id,
name: c.name,
job: c.job,
department: c.department,
profile_path: c.profile_path,
}).collect(),
enriched_at: Utc::now(),
})
}
}
// ── Enrichment event handler ─────────────────────────────────────────────────
pub struct EnrichmentHandler {
pub enrichment_client: Arc<dyn MovieEnrichmentClient>,
pub profile_repo: Arc<dyn MovieProfileRepository>,
}
#[async_trait]
impl EventHandler for EnrichmentHandler {
async fn handle(&self, event: &DomainEvent) -> Result<(), DomainError> {
let (movie_id, external_metadata_id) = match event {
DomainEvent::MovieEnrichmentRequested { movie_id, external_metadata_id } => {
(movie_id.clone(), external_metadata_id.clone())
}
_ => return Ok(()),
};
// Skip if profile is fresh (checked by the repo's list_stale, but guard here too)
if let Ok(Some(existing)) = self.profile_repo.get_by_movie_id(&movie_id).await {
let age = Utc::now() - existing.enriched_at;
if age.num_days() < 30 {
tracing::debug!(
movie_id = %movie_id.value(),
"skipping enrichment — profile is {} days old",
age.num_days()
);
return Ok(());
}
}
tracing::info!(movie_id = %movie_id.value(), external_id = %external_metadata_id, "enriching movie");
match self.enrichment_client.fetch_profile(movie_id.clone(), &external_metadata_id).await {
Ok(profile) => {
self.profile_repo.upsert(&profile).await?;
tracing::info!(
movie_id = %movie_id.value(),
genres = profile.genres.len(),
cast = profile.cast.len(),
crew = profile.crew.len(),
"enrichment stored"
);
}
Err(DomainError::NotFound(msg)) => {
tracing::warn!(movie_id = %movie_id.value(), "TMDb lookup found nothing: {msg}");
}
Err(e) => return Err(e),
}
Ok(())
}
}