fix: batch N+1 queries in import duplicate check and watch event dismiss
Some checks failed
CI / Check / Test (push) Failing after 5m54s

apply_mapping: 2 batch queries instead of up to 2N per-row lookups
dismiss: single fetch + single update instead of 2N per-event queries
This commit is contained in:
2026-06-02 20:05:15 +02:00
parent ac7edd6953
commit b9210b6c4e
10 changed files with 367 additions and 49 deletions

View File

@@ -388,6 +388,54 @@ impl MovieRepository for PostgresRepository {
Ok(())
}
async fn existing_external_ids(
&self,
ids: &[ExternalMetadataId],
) -> Result<std::collections::HashSet<String>, DomainError> {
if ids.is_empty() {
return Ok(Default::default());
}
let vals: Vec<String> = ids.iter().map(|id| id.value().to_string()).collect();
let rows: Vec<(String,)> = sqlx::query_as(
"SELECT external_metadata_id FROM movies WHERE external_metadata_id = ANY($1)",
)
.bind(&vals)
.fetch_all(&self.pool)
.await
.map_err(Self::map_err)?;
Ok(rows.into_iter().map(|(id,)| id).collect())
}
async fn existing_title_year_pairs(
&self,
pairs: &[(MovieTitle, ReleaseYear)],
) -> Result<std::collections::HashSet<(String, u16)>, DomainError> {
if pairs.is_empty() {
return Ok(Default::default());
}
let titles: Vec<&str> = pairs.iter().map(|(t, _)| t.value()).collect();
let years: Vec<i64> = pairs.iter().map(|(_, y)| y.value() as i64).collect();
use sqlx::Row;
let rows = sqlx::query(
"SELECT DISTINCT m.title, m.release_year FROM movies m \
INNER JOIN unnest($1::text[], $2::bigint[]) AS p(title, release_year) \
ON m.title = p.title AND m.release_year = p.release_year",
)
.bind(&titles)
.bind(&years)
.fetch_all(&self.pool)
.await
.map_err(Self::map_err)?;
Ok(rows
.into_iter()
.map(|r| {
let t: String = r.get("title");
let y: i64 = r.get("release_year");
(t, y as u16)
})
.collect())
}
async fn list_movies(
&self,
page: &domain::models::collections::PageParams,

View File

@@ -115,6 +115,45 @@ impl WatchEventRepository for PostgresWatchEventRepository {
row.as_ref().map(row_to_watch_event).transpose()
}
async fn get_by_ids(&self, ids: &[WatchEventId]) -> Result<Vec<WatchEvent>, DomainError> {
if ids.is_empty() {
return Ok(vec![]);
}
let id_strs: Vec<String> = ids.iter().map(|id| id.value().to_string()).collect();
let rows = sqlx::query(
"SELECT id, user_id, movie_id, title, year, external_metadata_id, \
source, \
to_char(watched_at AT TIME ZONE 'UTC', 'YYYY-MM-DD HH24:MI:SS') AS watched_at, \
status, \
to_char(created_at AT TIME ZONE 'UTC', 'YYYY-MM-DD HH24:MI:SS') AS created_at \
FROM watch_events WHERE id = ANY($1)",
)
.bind(&id_strs)
.fetch_all(&self.pool)
.await
.map_err(map_err)?;
rows.iter().map(row_to_watch_event).collect()
}
async fn update_status_batch(
&self,
ids: &[WatchEventId],
status: WatchEventStatus,
) -> Result<u64, DomainError> {
if ids.is_empty() {
return Ok(0);
}
let id_strs: Vec<String> = ids.iter().map(|id| id.value().to_string()).collect();
let status_str = status.to_string();
let result = sqlx::query("UPDATE watch_events SET status = $1 WHERE id = ANY($2)")
.bind(&status_str)
.bind(&id_strs)
.execute(&self.pool)
.await
.map_err(map_err)?;
Ok(result.rows_affected())
}
async fn find_duplicate(
&self,
user_id: &UserId,

View File

@@ -402,6 +402,57 @@ impl MovieRepository for SqliteMovieRepository {
Ok(())
}
async fn existing_external_ids(
&self,
ids: &[ExternalMetadataId],
) -> Result<std::collections::HashSet<String>, DomainError> {
if ids.is_empty() {
return Ok(Default::default());
}
let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
let sql = format!(
"SELECT external_metadata_id FROM movies WHERE external_metadata_id IN ({})",
placeholders.join(",")
);
let mut q = sqlx::query_scalar::<_, String>(&sql);
for id in ids {
q = q.bind(id.value().to_string());
}
let rows = q.fetch_all(&self.pool).await.map_err(Self::map_err)?;
Ok(rows.into_iter().collect())
}
async fn existing_title_year_pairs(
&self,
pairs: &[(MovieTitle, ReleaseYear)],
) -> Result<std::collections::HashSet<(String, u16)>, DomainError> {
if pairs.is_empty() {
return Ok(Default::default());
}
let conditions: Vec<String> = pairs
.iter()
.map(|_| "(title = ? AND release_year = ?)".to_string())
.collect();
let sql = format!(
"SELECT DISTINCT title, release_year FROM movies WHERE {}",
conditions.join(" OR ")
);
use sqlx::Row;
let mut q = sqlx::query(&sql);
for (t, y) in pairs {
q = q.bind(t.value().to_string()).bind(y.value() as i64);
}
let rows = q.fetch_all(&self.pool).await.map_err(Self::map_err)?;
Ok(rows
.into_iter()
.map(|r| {
let t: String = r.get("title");
let y: i64 = r.get("release_year");
(t, y as u16)
})
.collect())
}
async fn list_movies(
&self,
page: &domain::models::collections::PageParams,

View File

@@ -122,6 +122,46 @@ impl WatchEventRepository for SqliteWatchEventRepository {
row.as_ref().map(row_to_watch_event).transpose()
}
async fn get_by_ids(&self, ids: &[WatchEventId]) -> Result<Vec<WatchEvent>, DomainError> {
if ids.is_empty() {
return Ok(vec![]);
}
let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
let sql = format!(
"SELECT id, user_id, movie_id, title, year, external_metadata_id, \
source, watched_at, status, created_at \
FROM watch_events WHERE id IN ({})",
placeholders.join(",")
);
let mut q = sqlx::query(&sql);
for id in ids {
q = q.bind(id.value().to_string());
}
let rows = q.fetch_all(&self.pool).await.map_err(map_err)?;
rows.iter().map(row_to_watch_event).collect()
}
async fn update_status_batch(
&self,
ids: &[WatchEventId],
status: WatchEventStatus,
) -> Result<u64, DomainError> {
if ids.is_empty() {
return Ok(0);
}
let placeholders: Vec<&str> = ids.iter().map(|_| "?").collect();
let sql = format!(
"UPDATE watch_events SET status = ? WHERE id IN ({})",
placeholders.join(",")
);
let mut q = sqlx::query(&sql).bind(status.to_string());
for id in ids {
q = q.bind(id.value().to_string());
}
let result = q.execute(&self.pool).await.map_err(map_err)?;
Ok(result.rows_affected())
}
async fn find_duplicate(
&self,
user_id: &UserId,