perf: scale fixes for 1M+ photo libraries
Indexes: share_targets.target_id, duplicate_groups.status, GIN on stacks members + duplicate candidates JSONB, composite (owner_user_id, created_at DESC) on assets. N+1 elimination: batch metadata loading via find_by_assets(ids) using WHERE asset_id = ANY($1), used in timeline + sidecar export. Visibility: cache find_targets_for_user per request via OnceCell, extract filter_visible helper to reduce duplication. Streaming: FileStoragePort.open_file() returns (DataStream, u64), LocalFileStorage uses ReaderStream instead of loading full file. serve_file/serve_derivative use Body::from_stream(). Unbounded queries: sidecar full_export/import batched in 500-row chunks instead of u32::MAX. find_unresolved paginated with limit/offset. list_duplicates API accepts pagination params.
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -100,6 +100,7 @@ dependencies = [
|
|||||||
"futures",
|
"futures",
|
||||||
"object_store",
|
"object_store",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
"tokio-util",
|
||||||
"tracing",
|
"tracing",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -188,6 +189,7 @@ dependencies = [
|
|||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"domain",
|
"domain",
|
||||||
|
"futures",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"sha2",
|
"sha2",
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ email_address = "0.2"
|
|||||||
sha2 = "0.10"
|
sha2 = "0.10"
|
||||||
uuid = { version = "1.0", features = ["v4", "serde"] }
|
uuid = { version = "1.0", features = ["v4", "serde"] }
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
|
tokio-util = { version = "0.7", features = ["io"] }
|
||||||
dotenvy = "0.15"
|
dotenvy = "0.15"
|
||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
CREATE INDEX idx_share_targets_target ON share_targets(target_id);
|
||||||
|
CREATE INDEX idx_duplicate_groups_status ON duplicate_groups(status);
|
||||||
|
CREATE INDEX idx_stacks_members ON asset_stacks USING GIN (members);
|
||||||
|
CREATE INDEX idx_duplicate_candidates ON duplicate_groups USING GIN (candidates);
|
||||||
|
CREATE INDEX idx_assets_created ON assets(owner_user_id, created_at DESC);
|
||||||
@@ -336,6 +336,23 @@ impl AssetMetadataRepository for PostgresAssetMetadataRepository {
|
|||||||
Ok(rows.into_iter().map(Into::into).collect())
|
Ok(rows.into_iter().map(Into::into).collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn find_by_assets(
|
||||||
|
&self,
|
||||||
|
asset_ids: &[SystemId],
|
||||||
|
) -> Result<Vec<AssetMetadata>, DomainError> {
|
||||||
|
let uuids: Vec<Uuid> = asset_ids.iter().map(|id| *id.as_uuid()).collect();
|
||||||
|
let rows = sqlx::query_as::<_, AssetMetadataRow>(
|
||||||
|
"SELECT asset_id, metadata_source, data, updated_at
|
||||||
|
FROM asset_metadata WHERE asset_id = ANY($1)",
|
||||||
|
)
|
||||||
|
.bind(&uuids)
|
||||||
|
.fetch_all(&self.pool)
|
||||||
|
.await
|
||||||
|
.map_pg()?;
|
||||||
|
|
||||||
|
Ok(rows.into_iter().map(Into::into).collect())
|
||||||
|
}
|
||||||
|
|
||||||
async fn find_by_asset_and_source(
|
async fn find_by_asset_and_source(
|
||||||
&self,
|
&self,
|
||||||
asset_id: &SystemId,
|
asset_id: &SystemId,
|
||||||
@@ -482,11 +499,18 @@ impl DuplicateRepository for PostgresDuplicateRepository {
|
|||||||
Ok(row.map(Into::into))
|
Ok(row.map(Into::into))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn find_unresolved(&self) -> Result<Vec<DuplicateGroup>, DomainError> {
|
async fn find_unresolved(
|
||||||
|
&self,
|
||||||
|
limit: u32,
|
||||||
|
offset: u32,
|
||||||
|
) -> Result<Vec<DuplicateGroup>, DomainError> {
|
||||||
let rows = sqlx::query_as::<_, GroupRow>(
|
let rows = sqlx::query_as::<_, GroupRow>(
|
||||||
"SELECT group_id, detection_method, status, candidates
|
"SELECT group_id, detection_method, status, candidates
|
||||||
FROM duplicate_groups WHERE status = 'unresolved'",
|
FROM duplicate_groups WHERE status = 'unresolved'
|
||||||
|
ORDER BY group_id LIMIT $1 OFFSET $2",
|
||||||
)
|
)
|
||||||
|
.bind(limit as i64)
|
||||||
|
.bind(offset as i64)
|
||||||
.fetch_all(&self.pool)
|
.fetch_all(&self.pool)
|
||||||
.await
|
.await
|
||||||
.map_pg()?;
|
.map_pg()?;
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ tracing = { workspace = true }
|
|||||||
bytes = { workspace = true }
|
bytes = { workspace = true }
|
||||||
futures = { workspace = true }
|
futures = { workspace = true }
|
||||||
tokio = { workspace = true, features = ["fs"] }
|
tokio = { workspace = true, features = ["fs"] }
|
||||||
|
tokio-util = { workspace = true }
|
||||||
object_store = { version = "0.11" }
|
object_store = { version = "0.11" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use domain::errors::DomainError;
|
use domain::errors::DomainError;
|
||||||
use domain::ports::{FileEntry, FileStoragePort};
|
use domain::ports::{DataStream, FileEntry, FileStoragePort};
|
||||||
|
use futures::StreamExt;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use tokio_util::io::ReaderStream;
|
||||||
|
|
||||||
pub struct LocalFileStorage {
|
pub struct LocalFileStorage {
|
||||||
base_path: PathBuf,
|
base_path: PathBuf,
|
||||||
@@ -51,6 +53,25 @@ impl FileStoragePort for LocalFileStorage {
|
|||||||
Ok(Bytes::from(data))
|
Ok(Bytes::from(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn open_file(&self, path: &str) -> Result<(DataStream, u64), DomainError> {
|
||||||
|
let full = self.resolve(path)?;
|
||||||
|
let meta = tokio::fs::metadata(&full)
|
||||||
|
.await
|
||||||
|
.map_err(|e| match e.kind() {
|
||||||
|
std::io::ErrorKind::NotFound => DomainError::NotFound(path.to_string()),
|
||||||
|
_ => DomainError::Internal(format!("Failed to stat file: {e}")),
|
||||||
|
})?;
|
||||||
|
let file = tokio::fs::File::open(&full)
|
||||||
|
.await
|
||||||
|
.map_err(|e| match e.kind() {
|
||||||
|
std::io::ErrorKind::NotFound => DomainError::NotFound(path.to_string()),
|
||||||
|
_ => DomainError::Internal(format!("Failed to open file: {e}")),
|
||||||
|
})?;
|
||||||
|
let stream = ReaderStream::new(file)
|
||||||
|
.map(|r| r.map_err(|e| DomainError::Internal(format!("Read error: {e}"))));
|
||||||
|
Ok((Box::pin(stream), meta.len()))
|
||||||
|
}
|
||||||
|
|
||||||
async fn delete_file(&self, path: &str) -> Result<(), DomainError> {
|
async fn delete_file(&self, path: &str) -> Result<(), DomainError> {
|
||||||
let full = self.resolve(path)?;
|
let full = self.resolve(path)?;
|
||||||
match tokio::fs::remove_file(&full).await {
|
match tokio::fs::remove_file(&full).await {
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ bytes = { workspace = true }
|
|||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
sha2 = { workspace = true }
|
sha2 = { workspace = true }
|
||||||
|
futures = { workspace = true }
|
||||||
|
|
||||||
[dependencies.chrono]
|
[dependencies.chrono]
|
||||||
workspace = true
|
workspace = true
|
||||||
|
|||||||
@@ -66,7 +66,10 @@ impl ResolveDuplicateHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ListDuplicatesQuery;
|
pub struct ListDuplicatesQuery {
|
||||||
|
pub limit: u32,
|
||||||
|
pub offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ListDuplicatesHandler {
|
pub struct ListDuplicatesHandler {
|
||||||
duplicate_repo: Arc<dyn DuplicateRepository>,
|
duplicate_repo: Arc<dyn DuplicateRepository>,
|
||||||
@@ -79,8 +82,10 @@ impl ListDuplicatesHandler {
|
|||||||
|
|
||||||
pub async fn execute(
|
pub async fn execute(
|
||||||
&self,
|
&self,
|
||||||
_query: ListDuplicatesQuery,
|
query: ListDuplicatesQuery,
|
||||||
) -> Result<Vec<domain::entities::DuplicateGroup>, DomainError> {
|
) -> Result<Vec<domain::entities::DuplicateGroup>, DomainError> {
|
||||||
self.duplicate_repo.find_unresolved().await
|
self.duplicate_repo
|
||||||
|
.find_unresolved(query.limit, query.offset)
|
||||||
|
.await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -62,12 +62,21 @@ impl GetTimelineHandler {
|
|||||||
.find_by_owner(&query.owner_id, query.limit, query.offset)
|
.find_by_owner(&query.owner_id, query.limit, query.offset)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let mut results = Vec::with_capacity(assets.len());
|
let asset_ids: Vec<SystemId> = assets.iter().map(|a| a.asset_id).collect();
|
||||||
for asset in assets {
|
let all_layers = self.metadata_repo.find_by_assets(&asset_ids).await?;
|
||||||
let layers = self.metadata_repo.find_by_asset(&asset.asset_id).await?;
|
|
||||||
let resolved = resolve_metadata(&layers);
|
let results = assets
|
||||||
results.push((asset, resolved));
|
.into_iter()
|
||||||
}
|
.map(|asset| {
|
||||||
|
let layers: Vec<_> = all_layers
|
||||||
|
.iter()
|
||||||
|
.filter(|m| m.asset_id == asset.asset_id)
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
let resolved = resolve_metadata(&layers);
|
||||||
|
(asset, resolved)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
use bytes::Bytes;
|
|
||||||
use domain::{
|
use domain::{
|
||||||
errors::DomainError,
|
errors::DomainError,
|
||||||
ports::{AssetRepository, FileStoragePort},
|
ports::{AssetRepository, DataStream, FileStoragePort},
|
||||||
value_objects::SystemId,
|
value_objects::SystemId,
|
||||||
};
|
};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -13,7 +12,8 @@ pub struct ReadAssetFileQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct AssetFileResult {
|
pub struct AssetFileResult {
|
||||||
pub data: Bytes,
|
pub stream: DataStream,
|
||||||
|
pub size: u64,
|
||||||
pub mime_type: String,
|
pub mime_type: String,
|
||||||
pub filename: String,
|
pub filename: String,
|
||||||
}
|
}
|
||||||
@@ -45,9 +45,9 @@ impl ReadAssetFileHandler {
|
|||||||
return Err(DomainError::Forbidden("Access denied".into()));
|
return Err(DomainError::Forbidden("Access denied".into()));
|
||||||
}
|
}
|
||||||
|
|
||||||
let data = self
|
let (stream, size) = self
|
||||||
.file_storage
|
.file_storage
|
||||||
.read_file(&asset.source_reference.relative_path)
|
.open_file(&asset.source_reference.relative_path)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let filename = asset
|
let filename = asset
|
||||||
@@ -59,7 +59,8 @@ impl ReadAssetFileHandler {
|
|||||||
.to_string();
|
.to_string();
|
||||||
|
|
||||||
Ok(AssetFileResult {
|
Ok(AssetFileResult {
|
||||||
data,
|
stream,
|
||||||
|
size,
|
||||||
mime_type: asset.mime_type,
|
mime_type: asset.mime_type,
|
||||||
filename,
|
filename,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
use bytes::Bytes;
|
|
||||||
use domain::{
|
use domain::{
|
||||||
entities::{DerivativeProfile, GenerationStatus},
|
entities::{DerivativeProfile, GenerationStatus},
|
||||||
errors::DomainError,
|
errors::DomainError,
|
||||||
ports::{DerivativeRepository, FileStoragePort},
|
ports::{DataStream, DerivativeRepository, FileStoragePort},
|
||||||
value_objects::SystemId,
|
value_objects::SystemId,
|
||||||
};
|
};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -14,7 +13,8 @@ pub struct ReadDerivativeQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub struct DerivativeFileResult {
|
pub struct DerivativeFileResult {
|
||||||
pub data: Bytes,
|
pub stream: DataStream,
|
||||||
|
pub size: u64,
|
||||||
pub mime_type: String,
|
pub mime_type: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,13 +68,14 @@ impl ReadDerivativeHandler {
|
|||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
let data = self
|
let (stream, size) = self
|
||||||
.file_storage
|
.file_storage
|
||||||
.read_file(&derivative.storage_path)
|
.open_file(&derivative.storage_path)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
Ok(DerivativeFileResult {
|
Ok(DerivativeFileResult {
|
||||||
data,
|
stream,
|
||||||
|
size,
|
||||||
mime_type: derivative.mime_type,
|
mime_type: derivative.mime_type,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,20 +3,17 @@ use domain::{
|
|||||||
catalog::entities::{Asset, AssetFilters},
|
catalog::entities::{Asset, AssetFilters},
|
||||||
errors::DomainError,
|
errors::DomainError,
|
||||||
ports::{AssetRepository, ShareRepository},
|
ports::{AssetRepository, ShareRepository},
|
||||||
|
sharing::entities::ShareTarget,
|
||||||
value_objects::{Checksum, SystemId},
|
value_objects::{Checksum, SystemId},
|
||||||
};
|
};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::OnceCell;
|
||||||
|
|
||||||
/// Decorator that wraps an `AssetRepository` and filters query results
|
|
||||||
/// based on sharing permissions. The caller sees only assets they own
|
|
||||||
/// or have been granted access to via a `ShareScope` + `ShareTarget`.
|
|
||||||
///
|
|
||||||
/// Write operations (`save`, `delete`) pass through to the inner repository
|
|
||||||
/// unchanged — authorization for writes is handled at the use-case layer.
|
|
||||||
pub struct VisibilityFilteredAssetRepository {
|
pub struct VisibilityFilteredAssetRepository {
|
||||||
inner: Arc<dyn AssetRepository>,
|
inner: Arc<dyn AssetRepository>,
|
||||||
share_repo: Arc<dyn ShareRepository>,
|
share_repo: Arc<dyn ShareRepository>,
|
||||||
caller_id: SystemId,
|
caller_id: SystemId,
|
||||||
|
caller_targets: OnceCell<Vec<ShareTarget>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VisibilityFilteredAssetRepository {
|
impl VisibilityFilteredAssetRepository {
|
||||||
@@ -29,17 +26,24 @@ impl VisibilityFilteredAssetRepository {
|
|||||||
inner,
|
inner,
|
||||||
share_repo,
|
share_repo,
|
||||||
caller_id,
|
caller_id,
|
||||||
|
caller_targets: OnceCell::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `true` if the caller owns the asset or has been granted
|
async fn get_caller_targets(&self) -> Result<&[ShareTarget], DomainError> {
|
||||||
/// access through a share scope that targets them.
|
self.caller_targets
|
||||||
|
.get_or_try_init(|| async {
|
||||||
|
self.share_repo.find_targets_for_user(&self.caller_id).await
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|v| v.as_slice())
|
||||||
|
}
|
||||||
|
|
||||||
async fn caller_can_access(&self, asset: &Asset) -> Result<bool, DomainError> {
|
async fn caller_can_access(&self, asset: &Asset) -> Result<bool, DomainError> {
|
||||||
if asset.owner_user_id == self.caller_id {
|
if asset.owner_user_id == self.caller_id {
|
||||||
return Ok(true);
|
return Ok(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find all share scopes that cover this asset
|
|
||||||
let scopes = self
|
let scopes = self
|
||||||
.share_repo
|
.share_repo
|
||||||
.find_scopes_for_resource(&asset.asset_id)
|
.find_scopes_for_resource(&asset.asset_id)
|
||||||
@@ -49,14 +53,8 @@ impl VisibilityFilteredAssetRepository {
|
|||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find all share targets that name this caller
|
let caller_targets = self.get_caller_targets().await?;
|
||||||
let caller_targets = self
|
|
||||||
.share_repo
|
|
||||||
.find_targets_for_user(&self.caller_id)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
// The caller has access if any of their targets reference a scope
|
|
||||||
// that covers this asset.
|
|
||||||
for scope in &scopes {
|
for scope in &scopes {
|
||||||
if scope.is_expired() {
|
if scope.is_expired() {
|
||||||
continue;
|
continue;
|
||||||
@@ -68,6 +66,16 @@ impl VisibilityFilteredAssetRepository {
|
|||||||
|
|
||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn filter_visible(&self, assets: Vec<Asset>) -> Result<Vec<Asset>, DomainError> {
|
||||||
|
let mut visible = Vec::with_capacity(assets.len());
|
||||||
|
for asset in assets {
|
||||||
|
if self.caller_can_access(&asset).await? {
|
||||||
|
visible.push(asset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(visible)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
@@ -82,13 +90,7 @@ impl AssetRepository for VisibilityFilteredAssetRepository {
|
|||||||
|
|
||||||
async fn find_by_checksum(&self, checksum: &Checksum) -> Result<Vec<Asset>, DomainError> {
|
async fn find_by_checksum(&self, checksum: &Checksum) -> Result<Vec<Asset>, DomainError> {
|
||||||
let assets = self.inner.find_by_checksum(checksum).await?;
|
let assets = self.inner.find_by_checksum(checksum).await?;
|
||||||
let mut visible = Vec::with_capacity(assets.len());
|
self.filter_visible(assets).await
|
||||||
for asset in assets {
|
|
||||||
if self.caller_can_access(&asset).await? {
|
|
||||||
visible.push(asset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(visible)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn find_by_owner(
|
async fn find_by_owner(
|
||||||
@@ -98,18 +100,11 @@ impl AssetRepository for VisibilityFilteredAssetRepository {
|
|||||||
offset: u32,
|
offset: u32,
|
||||||
) -> Result<Vec<Asset>, DomainError> {
|
) -> Result<Vec<Asset>, DomainError> {
|
||||||
if owner_id == &self.caller_id {
|
if owner_id == &self.caller_id {
|
||||||
// Querying own assets — no filtering needed.
|
|
||||||
return self.inner.find_by_owner(owner_id, limit, offset).await;
|
return self.inner.find_by_owner(owner_id, limit, offset).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let assets = self.inner.find_by_owner(owner_id, limit, offset).await?;
|
let assets = self.inner.find_by_owner(owner_id, limit, offset).await?;
|
||||||
let mut visible = Vec::with_capacity(assets.len());
|
self.filter_visible(assets).await
|
||||||
for asset in assets {
|
|
||||||
if self.caller_can_access(&asset).await? {
|
|
||||||
visible.push(asset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(visible)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn search(
|
async fn search(
|
||||||
@@ -124,13 +119,7 @@ impl AssetRepository for VisibilityFilteredAssetRepository {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let assets = self.inner.search(owner_id, filters, limit, offset).await?;
|
let assets = self.inner.search(owner_id, filters, limit, offset).await?;
|
||||||
let mut visible = Vec::with_capacity(assets.len());
|
self.filter_visible(assets).await
|
||||||
for asset in assets {
|
|
||||||
if self.caller_can_access(&asset).await? {
|
|
||||||
visible.push(asset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(visible)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn save(&self, asset: &Asset) -> Result<(), DomainError> {
|
async fn save(&self, asset: &Asset) -> Result<(), DomainError> {
|
||||||
@@ -219,7 +208,6 @@ mod tests {
|
|||||||
|
|
||||||
let share_repo = Arc::new(InMemoryShareRepository::new());
|
let share_repo = Arc::new(InMemoryShareRepository::new());
|
||||||
|
|
||||||
// Create a share scope on the asset and target the friend
|
|
||||||
let scope = share_asset(asset.asset_id, owner_id);
|
let scope = share_asset(asset.asset_id, owner_id);
|
||||||
share_repo.save_scope(&scope).await.unwrap();
|
share_repo.save_scope(&scope).await.unwrap();
|
||||||
|
|
||||||
@@ -241,7 +229,6 @@ mod tests {
|
|||||||
|
|
||||||
let asset_a = make_asset(owner_id);
|
let asset_a = make_asset(owner_id);
|
||||||
let mut asset_b = make_asset(stranger_id);
|
let mut asset_b = make_asset(stranger_id);
|
||||||
// Give asset_b the same checksum as asset_a
|
|
||||||
asset_b.source_reference.checksum = asset_a.source_reference.checksum.clone();
|
asset_b.source_reference.checksum = asset_a.source_reference.checksum.clone();
|
||||||
|
|
||||||
let inner = Arc::new(InMemoryAssetRepository::new());
|
let inner = Arc::new(InMemoryAssetRepository::new());
|
||||||
@@ -250,7 +237,6 @@ mod tests {
|
|||||||
|
|
||||||
let share_repo = Arc::new(InMemoryShareRepository::new());
|
let share_repo = Arc::new(InMemoryShareRepository::new());
|
||||||
|
|
||||||
// Stranger queries by checksum — should only see their own
|
|
||||||
let filtered =
|
let filtered =
|
||||||
VisibilityFilteredAssetRepository::new(inner.clone(), share_repo.clone(), stranger_id);
|
VisibilityFilteredAssetRepository::new(inner.clone(), share_repo.clone(), stranger_id);
|
||||||
|
|
||||||
@@ -289,7 +275,6 @@ mod tests {
|
|||||||
|
|
||||||
let share_repo = Arc::new(InMemoryShareRepository::new());
|
let share_repo = Arc::new(InMemoryShareRepository::new());
|
||||||
|
|
||||||
// Stranger queries owner's assets without a share — should get nothing
|
|
||||||
let filtered =
|
let filtered =
|
||||||
VisibilityFilteredAssetRepository::new(inner.clone(), share_repo.clone(), stranger_id);
|
VisibilityFilteredAssetRepository::new(inner.clone(), share_repo.clone(), stranger_id);
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ use domain::{
|
|||||||
};
|
};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
const BATCH_SIZE: u32 = 500;
|
||||||
|
|
||||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||||
pub struct FullExportCommand {
|
pub struct FullExportCommand {
|
||||||
pub owner_id: SystemId,
|
pub owner_id: SystemId,
|
||||||
@@ -36,30 +38,51 @@ impl FullExportHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn execute(&self, cmd: FullExportCommand) -> Result<u32, DomainError> {
|
pub async fn execute(&self, cmd: FullExportCommand) -> Result<u32, DomainError> {
|
||||||
let assets = self
|
|
||||||
.asset_repo
|
|
||||||
.find_by_owner(&cmd.owner_id, u32::MAX, 0)
|
|
||||||
.await?;
|
|
||||||
let mut count = 0u32;
|
let mut count = 0u32;
|
||||||
|
let mut offset = 0u32;
|
||||||
|
|
||||||
for asset in &assets {
|
loop {
|
||||||
let layers = self.metadata_repo.find_by_asset(&asset.asset_id).await?;
|
let assets = self
|
||||||
let resolved = resolve_metadata(&layers);
|
.asset_repo
|
||||||
|
.find_by_owner(&cmd.owner_id, BATCH_SIZE, offset)
|
||||||
let mut record = match self.sidecar_repo.find_by_asset(&asset.asset_id).await? {
|
|
||||||
Some(r) => r,
|
|
||||||
None => {
|
|
||||||
SidecarRecord::new(asset.asset_id, format!("sidecars/{}.xmp", asset.asset_id))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
self.writer
|
|
||||||
.write_sidecar(&resolved, &record.sidecar_storage_path)
|
|
||||||
.await?;
|
.await?;
|
||||||
let hash = hash_structured_data(&resolved);
|
|
||||||
record.mark_synced(hash);
|
if assets.is_empty() {
|
||||||
self.sidecar_repo.save(&record).await?;
|
break;
|
||||||
count += 1;
|
}
|
||||||
|
|
||||||
|
let asset_ids: Vec<SystemId> = assets.iter().map(|a| a.asset_id).collect();
|
||||||
|
let all_layers = self.metadata_repo.find_by_assets(&asset_ids).await?;
|
||||||
|
|
||||||
|
for asset in &assets {
|
||||||
|
let layers: Vec<_> = all_layers
|
||||||
|
.iter()
|
||||||
|
.filter(|m| m.asset_id == asset.asset_id)
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
let resolved = resolve_metadata(&layers);
|
||||||
|
|
||||||
|
let mut record = match self.sidecar_repo.find_by_asset(&asset.asset_id).await? {
|
||||||
|
Some(r) => r,
|
||||||
|
None => SidecarRecord::new(
|
||||||
|
asset.asset_id,
|
||||||
|
format!("sidecars/{}.xmp", asset.asset_id),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.writer
|
||||||
|
.write_sidecar(&resolved, &record.sidecar_storage_path)
|
||||||
|
.await?;
|
||||||
|
let hash = hash_structured_data(&resolved);
|
||||||
|
record.mark_synced(hash);
|
||||||
|
self.sidecar_repo.save(&record).await?;
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += assets.len() as u32;
|
||||||
|
if (assets.len() as u32) < BATCH_SIZE {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(count)
|
Ok(count)
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ use domain::{
|
|||||||
};
|
};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
const BATCH_SIZE: u32 = 500;
|
||||||
|
|
||||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||||
pub struct FullImportCommand {
|
pub struct FullImportCommand {
|
||||||
pub owner_id: SystemId,
|
pub owner_id: SystemId,
|
||||||
@@ -36,38 +38,43 @@ impl FullImportHandler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn execute(&self, cmd: FullImportCommand) -> Result<u32, DomainError> {
|
pub async fn execute(&self, cmd: FullImportCommand) -> Result<u32, DomainError> {
|
||||||
let assets = self
|
|
||||||
.asset_repo
|
|
||||||
.find_by_owner(&cmd.owner_id, u32::MAX, 0)
|
|
||||||
.await?;
|
|
||||||
let mut count = 0u32;
|
let mut count = 0u32;
|
||||||
|
let mut offset = 0u32;
|
||||||
|
|
||||||
for asset in &assets {
|
loop {
|
||||||
let record = match self.sidecar_repo.find_by_asset(&asset.asset_id).await? {
|
let assets = self
|
||||||
Some(r) => r,
|
.asset_repo
|
||||||
None => {
|
.find_by_owner(&cmd.owner_id, BATCH_SIZE, offset)
|
||||||
// No sidecar record — try creating one to read from
|
.await?;
|
||||||
SidecarRecord::new(asset.asset_id, format!("sidecars/{}.xmp", asset.asset_id))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
match self.writer.read_sidecar(&record.sidecar_storage_path).await {
|
if assets.is_empty() {
|
||||||
Ok(data) => {
|
break;
|
||||||
let metadata = AssetMetadata::new(
|
}
|
||||||
|
|
||||||
|
for asset in &assets {
|
||||||
|
let record = match self.sidecar_repo.find_by_asset(&asset.asset_id).await? {
|
||||||
|
Some(r) => r,
|
||||||
|
None => SidecarRecord::new(
|
||||||
asset.asset_id,
|
asset.asset_id,
|
||||||
MetadataSource::ExifExtracted,
|
format!("sidecars/{}.xmp", asset.asset_id),
|
||||||
data.clone(),
|
),
|
||||||
);
|
};
|
||||||
self.metadata_repo.save(&metadata).await?;
|
|
||||||
|
if let Ok(data) = self.writer.read_sidecar(&record.sidecar_storage_path).await {
|
||||||
let hash = hash_structured_data(&data);
|
let hash = hash_structured_data(&data);
|
||||||
|
let metadata =
|
||||||
|
AssetMetadata::new(asset.asset_id, MetadataSource::ExifExtracted, data);
|
||||||
|
self.metadata_repo.save(&metadata).await?;
|
||||||
let mut record = record;
|
let mut record = record;
|
||||||
record.mark_synced(hash);
|
record.mark_synced(hash);
|
||||||
self.sidecar_repo.save(&record).await?;
|
self.sidecar_repo.save(&record).await?;
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
Err(_) => {
|
}
|
||||||
// Sidecar file missing — skip
|
|
||||||
}
|
offset += assets.len() as u32;
|
||||||
|
if (assets.len() as u32) < BATCH_SIZE {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -79,6 +79,13 @@ impl FileStoragePort for InMemoryFileStorage {
|
|||||||
.ok_or_else(|| DomainError::NotFound(format!("File not found: {path}")))
|
.ok_or_else(|| DomainError::NotFound(format!("File not found: {path}")))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn open_file(&self, path: &str) -> Result<(domain::ports::DataStream, u64), DomainError> {
|
||||||
|
let data = self.read_file(path).await?;
|
||||||
|
let len = data.len() as u64;
|
||||||
|
let stream = futures::stream::once(async move { Ok(data) });
|
||||||
|
Ok((Box::pin(stream), len))
|
||||||
|
}
|
||||||
|
|
||||||
async fn delete_file(&self, path: &str) -> Result<(), DomainError> {
|
async fn delete_file(&self, path: &str) -> Result<(), DomainError> {
|
||||||
self.files.lock().await.remove(path);
|
self.files.lock().await.remove(path);
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -550,6 +550,23 @@ impl AssetMetadataRepository for InMemoryAssetMetadataRepository {
|
|||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn find_by_assets(
|
||||||
|
&self,
|
||||||
|
asset_ids: &[SystemId],
|
||||||
|
) -> Result<Vec<AssetMetadata>, DomainError> {
|
||||||
|
let data = self.data.lock().await;
|
||||||
|
let mut results = Vec::new();
|
||||||
|
for id in asset_ids {
|
||||||
|
let prefix = format!("{id}:");
|
||||||
|
results.extend(
|
||||||
|
data.iter()
|
||||||
|
.filter(|(k, _)| k.starts_with(&prefix))
|
||||||
|
.map(|(_, v)| v.clone()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(results)
|
||||||
|
}
|
||||||
|
|
||||||
async fn find_by_asset_and_source(
|
async fn find_by_asset_and_source(
|
||||||
&self,
|
&self,
|
||||||
asset_id: &SystemId,
|
asset_id: &SystemId,
|
||||||
@@ -785,13 +802,19 @@ impl DuplicateRepository for InMemoryDuplicateRepository {
|
|||||||
Ok(self.data.lock().await.get(&id.to_string()).cloned())
|
Ok(self.data.lock().await.get(&id.to_string()).cloned())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn find_unresolved(&self) -> Result<Vec<DuplicateGroup>, DomainError> {
|
async fn find_unresolved(
|
||||||
|
&self,
|
||||||
|
limit: u32,
|
||||||
|
offset: u32,
|
||||||
|
) -> Result<Vec<DuplicateGroup>, DomainError> {
|
||||||
Ok(self
|
Ok(self
|
||||||
.data
|
.data
|
||||||
.lock()
|
.lock()
|
||||||
.await
|
.await
|
||||||
.values()
|
.values()
|
||||||
.filter(|g| g.status == DuplicateStatus::Unresolved)
|
.filter(|g| g.status == DuplicateStatus::Unresolved)
|
||||||
|
.skip(offset as usize)
|
||||||
|
.take(limit as usize)
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ use domain::catalog::entities::{Asset, AssetType, SourceReference};
|
|||||||
use domain::errors::DomainError;
|
use domain::errors::DomainError;
|
||||||
use domain::ports::{AssetRepository, FileStoragePort};
|
use domain::ports::{AssetRepository, FileStoragePort};
|
||||||
use domain::value_objects::{Checksum, SystemId};
|
use domain::value_objects::{Checksum, SystemId};
|
||||||
|
use futures::StreamExt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -36,7 +37,9 @@ async fn reads_file_successfully() {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(result.data, file_data);
|
let chunks: Vec<Bytes> = result.stream.map(|r| r.unwrap()).collect().await;
|
||||||
|
let data: Bytes = chunks.into_iter().flatten().collect();
|
||||||
|
assert_eq!(data, file_data);
|
||||||
assert_eq!(result.mime_type, "image/jpeg");
|
assert_eq!(result.mime_type, "image/jpeg");
|
||||||
assert_eq!(result.filename, "cat.jpg");
|
assert_eq!(result.filename, "cat.jpg");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,6 +35,10 @@ pub trait AssetRepository: Send + Sync {
|
|||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait AssetMetadataRepository: Send + Sync {
|
pub trait AssetMetadataRepository: Send + Sync {
|
||||||
async fn find_by_asset(&self, asset_id: &SystemId) -> Result<Vec<AssetMetadata>, DomainError>;
|
async fn find_by_asset(&self, asset_id: &SystemId) -> Result<Vec<AssetMetadata>, DomainError>;
|
||||||
|
async fn find_by_assets(
|
||||||
|
&self,
|
||||||
|
asset_ids: &[SystemId],
|
||||||
|
) -> Result<Vec<AssetMetadata>, DomainError>;
|
||||||
async fn find_by_asset_and_source(
|
async fn find_by_asset_and_source(
|
||||||
&self,
|
&self,
|
||||||
asset_id: &SystemId,
|
asset_id: &SystemId,
|
||||||
@@ -78,7 +82,11 @@ pub trait DerivativeRepository: Send + Sync {
|
|||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait DuplicateRepository: Send + Sync {
|
pub trait DuplicateRepository: Send + Sync {
|
||||||
async fn find_by_id(&self, id: &SystemId) -> Result<Option<DuplicateGroup>, DomainError>;
|
async fn find_by_id(&self, id: &SystemId) -> Result<Option<DuplicateGroup>, DomainError>;
|
||||||
async fn find_unresolved(&self) -> Result<Vec<DuplicateGroup>, DomainError>;
|
async fn find_unresolved(
|
||||||
|
&self,
|
||||||
|
limit: u32,
|
||||||
|
offset: u32,
|
||||||
|
) -> Result<Vec<DuplicateGroup>, DomainError>;
|
||||||
async fn find_by_asset(&self, asset_id: &SystemId) -> Result<Vec<DuplicateGroup>, DomainError>;
|
async fn find_by_asset(&self, asset_id: &SystemId) -> Result<Vec<DuplicateGroup>, DomainError>;
|
||||||
async fn save(&self, group: &DuplicateGroup) -> Result<(), DomainError>;
|
async fn save(&self, group: &DuplicateGroup) -> Result<(), DomainError>;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ pub struct FileEntry {
|
|||||||
pub trait FileStoragePort: Send + Sync {
|
pub trait FileStoragePort: Send + Sync {
|
||||||
async fn store_file(&self, path: &str, data: Bytes) -> Result<(), DomainError>;
|
async fn store_file(&self, path: &str, data: Bytes) -> Result<(), DomainError>;
|
||||||
async fn read_file(&self, path: &str) -> Result<Bytes, DomainError>;
|
async fn read_file(&self, path: &str) -> Result<Bytes, DomainError>;
|
||||||
|
async fn open_file(&self, path: &str) -> Result<(DataStream, u64), DomainError>;
|
||||||
async fn delete_file(&self, path: &str) -> Result<(), DomainError>;
|
async fn delete_file(&self, path: &str) -> Result<(), DomainError>;
|
||||||
async fn list_directory(&self, path: &str) -> Result<Vec<FileEntry>, DomainError>;
|
async fn list_directory(&self, path: &str) -> Result<Vec<FileEntry>, DomainError>;
|
||||||
async fn file_exists(&self, path: &str) -> Result<bool, DomainError>;
|
async fn file_exists(&self, path: &str) -> Result<bool, DomainError>;
|
||||||
|
|||||||
@@ -203,12 +203,12 @@ pub async fn serve_file(
|
|||||||
Response::builder()
|
Response::builder()
|
||||||
.status(StatusCode::OK)
|
.status(StatusCode::OK)
|
||||||
.header(header::CONTENT_TYPE, &result.mime_type)
|
.header(header::CONTENT_TYPE, &result.mime_type)
|
||||||
.header(header::CONTENT_LENGTH, result.data.len())
|
.header(header::CONTENT_LENGTH, result.size)
|
||||||
.header(
|
.header(
|
||||||
header::CONTENT_DISPOSITION,
|
header::CONTENT_DISPOSITION,
|
||||||
format!("inline; filename=\"{}\"", result.filename),
|
format!("inline; filename=\"{}\"", result.filename),
|
||||||
)
|
)
|
||||||
.body(Body::from(result.data))
|
.body(Body::from_stream(result.stream))
|
||||||
.map_err(|e| AppError::from(domain::errors::DomainError::Internal(e.to_string())))
|
.map_err(|e| AppError::from(domain::errors::DomainError::Internal(e.to_string())))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -256,9 +256,9 @@ pub async fn serve_derivative(
|
|||||||
Response::builder()
|
Response::builder()
|
||||||
.status(StatusCode::OK)
|
.status(StatusCode::OK)
|
||||||
.header(header::CONTENT_TYPE, &result.mime_type)
|
.header(header::CONTENT_TYPE, &result.mime_type)
|
||||||
.header(header::CONTENT_LENGTH, result.data.len())
|
.header(header::CONTENT_LENGTH, result.size)
|
||||||
.header(header::CACHE_CONTROL, "public, max-age=31536000, immutable")
|
.header(header::CACHE_CONTROL, "public, max-age=31536000, immutable")
|
||||||
.body(Body::from(result.data))
|
.body(Body::from_stream(result.stream))
|
||||||
.map_err(|e| AppError::from(DomainError::Internal(e.to_string())))
|
.map_err(|e| AppError::from(DomainError::Internal(e.to_string())))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +1,35 @@
|
|||||||
use crate::{errors::AppError, extractors::JwtClaims, state::AppState};
|
use crate::{
|
||||||
|
constants::{DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE},
|
||||||
|
errors::AppError,
|
||||||
|
extractors::JwtClaims,
|
||||||
|
state::AppState,
|
||||||
|
};
|
||||||
use api_types::{requests::ResolveDuplicateRequest, responses::DuplicateGroupResponse};
|
use api_types::{requests::ResolveDuplicateRequest, responses::DuplicateGroupResponse};
|
||||||
use application::catalog::{ListDuplicatesQuery, ResolveDuplicateCommand};
|
use application::catalog::{ListDuplicatesQuery, ResolveDuplicateCommand};
|
||||||
use axum::{
|
use axum::{
|
||||||
Json,
|
Json,
|
||||||
extract::{Path, State},
|
extract::{Path, Query, State},
|
||||||
http::StatusCode,
|
http::StatusCode,
|
||||||
};
|
};
|
||||||
use domain::value_objects::SystemId;
|
use domain::value_objects::SystemId;
|
||||||
|
|
||||||
|
#[derive(Debug, serde::Deserialize)]
|
||||||
|
pub struct ListDuplicatesParams {
|
||||||
|
pub limit: Option<u32>,
|
||||||
|
pub offset: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn list_duplicates(
|
pub async fn list_duplicates(
|
||||||
State(state): State<AppState>,
|
State(state): State<AppState>,
|
||||||
claims: JwtClaims,
|
claims: JwtClaims,
|
||||||
|
Query(params): Query<ListDuplicatesParams>,
|
||||||
) -> Result<Json<Vec<DuplicateGroupResponse>>, AppError> {
|
) -> Result<Json<Vec<DuplicateGroupResponse>>, AppError> {
|
||||||
super::require_admin(&claims)?;
|
super::require_admin(&claims)?;
|
||||||
let groups = state
|
let query = ListDuplicatesQuery {
|
||||||
.catalog
|
limit: params.limit.unwrap_or(DEFAULT_PAGE_SIZE).min(MAX_PAGE_SIZE),
|
||||||
.list_duplicates
|
offset: params.offset.unwrap_or(0),
|
||||||
.execute(ListDuplicatesQuery)
|
};
|
||||||
.await?;
|
let groups = state.catalog.list_duplicates.execute(query).await?;
|
||||||
let resp = groups
|
let resp = groups
|
||||||
.iter()
|
.iter()
|
||||||
.map(DuplicateGroupResponse::from_domain)
|
.map(DuplicateGroupResponse::from_domain)
|
||||||
|
|||||||
Reference in New Issue
Block a user