From ea95c2255fc12c39e5e310357406dc25d0e26369 Mon Sep 17 00:00:00 2001 From: Gabriel Kaszewski Date: Fri, 14 Nov 2025 06:35:08 +0100 Subject: [PATCH] feat: integrate EXIF data extraction using nom-exif and refactor related components --- Cargo.lock | 2 +- libertas_core/Cargo.toml | 1 + libertas_core/src/lib.rs | 1 + libertas_core/src/media_utils.rs | 71 +++++++++++ libertas_importer/src/main.rs | 131 ++++++++++++--------- libertas_worker/Cargo.toml | 1 - libertas_worker/src/plugins/exif_reader.rs | 53 +-------- 7 files changed, 155 insertions(+), 105 deletions(-) create mode 100644 libertas_core/src/media_utils.rs diff --git a/Cargo.lock b/Cargo.lock index 934ac6c..5cf17b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1661,6 +1661,7 @@ dependencies = [ "bytes", "chrono", "futures", + "nom-exif", "serde", "thiserror 2.0.17", "uuid", @@ -1714,7 +1715,6 @@ dependencies = [ "image", "libertas_core", "libertas_infra", - "nom-exif", "serde", "serde_json", "sqlx", diff --git a/libertas_core/Cargo.toml b/libertas_core/Cargo.toml index 1464cf0..9de88b0 100644 --- a/libertas_core/Cargo.toml +++ b/libertas_core/Cargo.toml @@ -12,3 +12,4 @@ futures = "0.3.31" thiserror = "2.0.17" uuid = {version = "1.18.1", features = ["v4", "serde"] } serde = { version = "1.0.228", features = ["derive"] } +nom-exif = { version = "2.5.4", features = ["serde", "async", "tokio"] } diff --git a/libertas_core/src/lib.rs b/libertas_core/src/lib.rs index 6b94f89..a7375c0 100644 --- a/libertas_core/src/lib.rs +++ b/libertas_core/src/lib.rs @@ -6,3 +6,4 @@ pub mod plugins; pub mod repositories; pub mod schema; pub mod services; +pub mod media_utils; \ No newline at end of file diff --git a/libertas_core/src/media_utils.rs b/libertas_core/src/media_utils.rs new file mode 100644 index 0000000..0d6ab65 --- /dev/null +++ b/libertas_core/src/media_utils.rs @@ -0,0 +1,71 @@ +use std::path::Path; + +use chrono::{DateTime, NaiveDateTime, Utc}; +use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag}; + +use crate::error::{CoreError, CoreResult}; + +#[derive(Default, Debug)] +pub struct ExtractedExif { + pub width: Option, + pub height: Option, + pub location: Option, + pub date_taken: Option>, +} + +fn parse_exif_datetime(s: &str) -> Option> { + NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S") + .ok() + .map(|ndt| ndt.and_local_timezone(Utc).unwrap()) +} + +pub async fn extract_exif_data(file_path: &Path) -> CoreResult { + let ms = AsyncMediaSource::file_path(file_path) + .await + .map_err(|e| CoreError::Unknown(format!("Failed to open file for EXIF: {}", e)))?; + + if !ms.has_exif() { + return Ok(ExtractedExif::default()); + } + + let mut parser = AsyncMediaParser::new(); + let iter: ExifIter = match parser.parse(ms).await { + Ok(iter) => iter, + Err(e) => { + println!("Could not parse EXIF: {}", e); + return Ok(ExtractedExif::default()); + } + }; + + let location = iter.parse_gps_info().ok().flatten().map(|g| g.format_iso6709()); + let exif: Exif = iter.into(); + + let width = exif + .get(ExifTag::ExifImageWidth) + .and_then(|f| f.as_u32()) + .map(|v| v as i32); + + let height = exif + .get(ExifTag::ExifImageHeight) + .and_then(|f| f.as_u32()) + .map(|v| v as i32); + + let dt_original = exif + .get(ExifTag::DateTimeOriginal) + .and_then(|f| f.as_str()) + .and_then(parse_exif_datetime); + + let dt_modify = exif + .get(ExifTag::ModifyDate) + .and_then(|f| f.as_str()) + .and_then(parse_exif_datetime); + + let date_taken = dt_original.or(dt_modify); + + Ok(ExtractedExif { + width, + height, + location, + date_taken, + }) +} \ No newline at end of file diff --git a/libertas_importer/src/main.rs b/libertas_importer/src/main.rs index 5684ab4..ecc670b 100644 --- a/libertas_importer/src/main.rs +++ b/libertas_importer/src/main.rs @@ -1,15 +1,21 @@ -use std::{path::{Path, PathBuf}, sync::Arc}; use anyhow::Result; +use std::{ + path::{Path, PathBuf}, + sync::Arc, +}; -use chrono::{DateTime, Datelike, NaiveDateTime, Utc}; +use chrono::Datelike; use clap::Parser; -use libertas_core::{config::Config, error::{CoreError, CoreResult}, models::{Media, User}, repositories::{MediaRepository, UserRepository}}; +use libertas_core::{ + config::Config, error::{CoreError, CoreResult}, media_utils::extract_exif_data, models::{Media, User}, repositories::{MediaRepository, UserRepository} +}; use libertas_infra::factory::{build_database_pool, build_media_repository, build_user_repository}; -use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag}; +use nom_exif::{AsyncMediaParser, AsyncMediaSource, ExifIter}; +use serde_json; use sha2::{Digest, Sha256}; +use tokio::fs; use uuid::Uuid; use walkdir::WalkDir; -use tokio::fs; mod config; @@ -34,7 +40,10 @@ struct ImporterState { #[tokio::main] async fn main() -> Result<()> { let cli = Cli::parse(); - println!("Starting import for user: '{}' from path '{}'...", cli.username, cli.path); + println!( + "Starting import for user: '{}' from path '{}'...", + cli.username, cli.path + ); let config = config::load_config()?; let db_pool = build_database_pool(&config.database).await?; @@ -52,10 +61,10 @@ async fn main() -> Result<()> { }; let user = state - .user_repo - .find_by_username(&cli.username) - .await? - .ok_or_else(|| anyhow::anyhow!("User '{}' not found", cli.username))?; + .user_repo + .find_by_username(&cli.username) + .await? + .ok_or_else(|| anyhow::anyhow!("User '{}' not found", cli.username))?; println!("User '{}' found with ID: {}", cli.username, user.id); println!("Storage: {} / {}", user.storage_used, user.storage_quota); @@ -66,11 +75,11 @@ async fn main() -> Result<()> { for entry in walker.filter_map(Result::ok) { if entry.file_type().is_file() { let path = entry.path(); - + match process_file(path, &user, &state).await { Ok(media) => { println!("-> Imported: '{}'", media.original_filename); - }, + } Err(e) => { eprintln!("!! Skipped: '{}' (Reason: {})", path.display(), e); } @@ -83,11 +92,7 @@ async fn main() -> Result<()> { Ok(()) } -async fn process_file( - file_path: &Path, - user: &User, - state: &ImporterState, -) -> CoreResult { +async fn process_file(file_path: &Path, user: &User, state: &ImporterState) -> CoreResult { let file_bytes = fs::read(file_path).await?; let file_size = file_bytes.len() as i64; let hash = format!("{:x}", Sha256::digest(&file_bytes)); @@ -107,34 +112,58 @@ async fn process_file( )); } - let (width, height, location, date_taken) = - match AsyncMediaSource::file_path(file_path).await { - Ok(ms) => { - if ms.has_exif() { - let mut parser = AsyncMediaParser::new(); - if let Ok(iter) = parser.parse::<_,_, ExifIter>(ms).await { - let gps = iter.parse_gps_info().ok().flatten().map(|g| g.format_iso6709()); - println!(" -> EXIF GPS Info: {:?}", gps); - let exif: Exif = iter.into(); - let modified_date = exif.get(ExifTag::ModifyDate).and_then(|f| f.as_str()).and_then(parse_exif_datetime); - println!(" -> EXIF ModifyDate: {:?}", modified_date); - let w = exif.get(ExifTag::ExifImageWidth).and_then(|f| f.as_u32()).map(|v| v as i32); - println!(" -> EXIF ExifImageWidth: {:?}", w); - let h = exif.get(ExifTag::ExifImageHeight).and_then(|f| f.as_u32()).map(|v| v as i32); - println!(" -> EXIF ExifImageHeight: {:?}", h); - let dt = exif.get(ExifTag::DateTimeOriginal).and_then(|f| f.as_str()).and_then(parse_exif_datetime); - println!(" -> EXIF DateTimeOriginal: {:?}", dt); - (w, h, gps, dt) - } else { - (None, None, None, None) - } + let (width, height, location, date_taken) = match extract_exif_data(file_path).await { + Ok(data) => { + println!(" -> Parsed EXIF: DateTimeOriginal={:?}, GPS={:?}", data.date_taken, data.location); + (data.width, data.height, data.location, data.date_taken) + }, + Err(e) => { + eprintln!(" -> EXIF parsing failed for {}: {}. Skipping.", file_path.display(), e); + (None, None, None, None) + } + }; + + match AsyncMediaSource::file_path(file_path).await { + Ok(ms) => { + if ms.has_exif() { + let mut parser = AsyncMediaParser::new(); + if let Ok(iter) = parser.parse::<_, _, ExifIter>(ms).await { + let values = iter + .into_iter() + .filter_map(|mut x| { + let res = x.take_result(); + match res { + Ok(v) => Some(( + x.tag().map(|x| x.to_string()).unwrap_or_else(|| { + format!("Unknown(0x{:04x})", x.tag_code()) + }), + v, + )), + Err(e) => { + println!( + " !! EXIF parsing error for tag 0x{:04x}: {}", + x.tag_code(), + e + ); + None + } + } + }) + .collect::>(); + + values.iter().for_each(|x| { + println!("{:<32}=> {}", x.0, x.1); + }); } else { - (None, None, None, None) + () } + } else { + () } - Err(_) => (None, None, None, None), - }; - + } + Err(_) => (), + }; + let file_date = date_taken.unwrap_or_else(|| chrono::Utc::now()); let year = file_date.year().to_string(); let month = format!("{:02}", file_date.month()); @@ -155,8 +184,8 @@ async fn process_file( .to_string(); let mime_type = mime_guess::from_path(file_path) - .first_or_octet_stream() - .to_string(); + .first_or_octet_stream() + .to_string(); let media_model = Media { id: Uuid::new_v4(), @@ -172,23 +201,19 @@ async fn process_file( date_taken: date_taken, thumbnail_path: None, }; - + state.media_repo.create(&media_model).await?; - state.user_repo + state + .user_repo .update_storage_used(user.id, file_size) .await?; let job_payload = serde_json::json!({ "media_id": media_model.id }); - state.nats_client + state + .nats_client .publish("media.new".to_string(), job_payload.to_string().into()) .await .map_err(|e| CoreError::Unknown(format!("Failed to publish NATS message: {}", e)))?; Ok(media_model) } - -fn parse_exif_datetime(s: &str) -> Option> { - NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S") - .ok() - .map(|ndt| ndt.and_local_timezone(Utc).unwrap()) -} \ No newline at end of file diff --git a/libertas_worker/Cargo.toml b/libertas_worker/Cargo.toml index 12ddb84..fb1dbc1 100644 --- a/libertas_worker/Cargo.toml +++ b/libertas_worker/Cargo.toml @@ -22,7 +22,6 @@ sqlx = { version = "0.8.6", features = [ futures-util = "0.3.31" bytes = "1.10.1" uuid = { version = "1.18.1", features = ["v4", "serde"] } -nom-exif = { version = "2.5.4", features = ["serde", "tokio", "async"] } async-trait = "0.1.89" xmp_toolkit = "1.11.0" chrono = "0.4.42" diff --git a/libertas_worker/src/plugins/exif_reader.rs b/libertas_worker/src/plugins/exif_reader.rs index d36c9c0..7f402e3 100644 --- a/libertas_worker/src/plugins/exif_reader.rs +++ b/libertas_worker/src/plugins/exif_reader.rs @@ -1,13 +1,9 @@ use async_trait::async_trait; -use chrono::{DateTime, NaiveDateTime, Utc}; use std::path::PathBuf; use libertas_core::{ - error::{CoreError, CoreResult}, - models::Media, - plugins::{MediaProcessorPlugin, PluginContext, PluginData}, + error::CoreResult, media_utils::extract_exif_data, models::Media, plugins::{MediaProcessorPlugin, PluginContext, PluginData} }; -use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag}; pub struct ExifReaderPlugin; @@ -20,51 +16,15 @@ impl MediaProcessorPlugin for ExifReaderPlugin { async fn process(&self, media: &Media, context: &PluginContext) -> CoreResult { let file_path = PathBuf::from(&context.media_library_path).join(&media.storage_path); - let ms = match AsyncMediaSource::file_path(file_path).await { - Ok(ms) => ms, - Err(e) => return Err(CoreError::Unknown(format!("Failed to open a file: {}", e))), - }; - - if !ms.has_exif() { - return Ok(PluginData { - message: "No EXIF data found in file.".to_string(), - }); - } - - let mut parser = AsyncMediaParser::new(); - let iter: ExifIter = match parser.parse(ms).await { - Ok(iter) => iter, + let (width, height, location, date_taken) = match extract_exif_data(&file_path).await { + Ok(data) => (data.width, data.height, data.location, data.date_taken), Err(e) => { - // It's not a fatal error, just means parsing failed (e.g., corrupt data) return Ok(PluginData { message: format!("Could not parse EXIF: {}", e), }); } }; - let location: Option = match iter.parse_gps_info() { - Ok(Some(gps_info)) => Some(gps_info.format_iso6709()), - Ok(None) => None, - Err(_) => None, - }; - - let exif: Exif = iter.into(); - - let width = exif - .get(ExifTag::ExifImageWidth) - .and_then(|f| f.as_u32()) - .map(|v| v as i32); - - let height = exif - .get(ExifTag::ExifImageHeight) - .and_then(|f| f.as_u32()) - .map(|v| v as i32); - - let date_taken = exif - .get(ExifTag::DateTimeOriginal) - .and_then(|f| f.as_str()) - .and_then(parse_exif_datetime); - if width.is_some() || height.is_some() || location.is_some() || date_taken.is_some() { context .media_repo @@ -82,11 +42,4 @@ impl MediaProcessorPlugin for ExifReaderPlugin { }) } } -} - -fn parse_exif_datetime(s: &str) -> Option> { - // EXIF datetime format is 'YYYY:MM:DD HH:MM:SS' - NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S") - .ok() - .map(|ndt| ndt.and_local_timezone(Utc).unwrap()) } \ No newline at end of file