feat: integrate EXIF data extraction using nom-exif and refactor related components

This commit is contained in:
2025-11-14 06:35:08 +01:00
parent 60860cf508
commit ea95c2255f
7 changed files with 155 additions and 105 deletions

2
Cargo.lock generated
View File

@@ -1661,6 +1661,7 @@ dependencies = [
"bytes", "bytes",
"chrono", "chrono",
"futures", "futures",
"nom-exif",
"serde", "serde",
"thiserror 2.0.17", "thiserror 2.0.17",
"uuid", "uuid",
@@ -1714,7 +1715,6 @@ dependencies = [
"image", "image",
"libertas_core", "libertas_core",
"libertas_infra", "libertas_infra",
"nom-exif",
"serde", "serde",
"serde_json", "serde_json",
"sqlx", "sqlx",

View File

@@ -12,3 +12,4 @@ futures = "0.3.31"
thiserror = "2.0.17" thiserror = "2.0.17"
uuid = {version = "1.18.1", features = ["v4", "serde"] } uuid = {version = "1.18.1", features = ["v4", "serde"] }
serde = { version = "1.0.228", features = ["derive"] } serde = { version = "1.0.228", features = ["derive"] }
nom-exif = { version = "2.5.4", features = ["serde", "async", "tokio"] }

View File

@@ -6,3 +6,4 @@ pub mod plugins;
pub mod repositories; pub mod repositories;
pub mod schema; pub mod schema;
pub mod services; pub mod services;
pub mod media_utils;

View File

@@ -0,0 +1,71 @@
use std::path::Path;
use chrono::{DateTime, NaiveDateTime, Utc};
use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag};
use crate::error::{CoreError, CoreResult};
#[derive(Default, Debug)]
pub struct ExtractedExif {
pub width: Option<i32>,
pub height: Option<i32>,
pub location: Option<String>,
pub date_taken: Option<DateTime<Utc>>,
}
fn parse_exif_datetime(s: &str) -> Option<DateTime<Utc>> {
NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S")
.ok()
.map(|ndt| ndt.and_local_timezone(Utc).unwrap())
}
pub async fn extract_exif_data(file_path: &Path) -> CoreResult<ExtractedExif> {
let ms = AsyncMediaSource::file_path(file_path)
.await
.map_err(|e| CoreError::Unknown(format!("Failed to open file for EXIF: {}", e)))?;
if !ms.has_exif() {
return Ok(ExtractedExif::default());
}
let mut parser = AsyncMediaParser::new();
let iter: ExifIter = match parser.parse(ms).await {
Ok(iter) => iter,
Err(e) => {
println!("Could not parse EXIF: {}", e);
return Ok(ExtractedExif::default());
}
};
let location = iter.parse_gps_info().ok().flatten().map(|g| g.format_iso6709());
let exif: Exif = iter.into();
let width = exif
.get(ExifTag::ExifImageWidth)
.and_then(|f| f.as_u32())
.map(|v| v as i32);
let height = exif
.get(ExifTag::ExifImageHeight)
.and_then(|f| f.as_u32())
.map(|v| v as i32);
let dt_original = exif
.get(ExifTag::DateTimeOriginal)
.and_then(|f| f.as_str())
.and_then(parse_exif_datetime);
let dt_modify = exif
.get(ExifTag::ModifyDate)
.and_then(|f| f.as_str())
.and_then(parse_exif_datetime);
let date_taken = dt_original.or(dt_modify);
Ok(ExtractedExif {
width,
height,
location,
date_taken,
})
}

View File

@@ -1,15 +1,21 @@
use std::{path::{Path, PathBuf}, sync::Arc};
use anyhow::Result; use anyhow::Result;
use std::{
path::{Path, PathBuf},
sync::Arc,
};
use chrono::{DateTime, Datelike, NaiveDateTime, Utc}; use chrono::Datelike;
use clap::Parser; use clap::Parser;
use libertas_core::{config::Config, error::{CoreError, CoreResult}, models::{Media, User}, repositories::{MediaRepository, UserRepository}}; use libertas_core::{
config::Config, error::{CoreError, CoreResult}, media_utils::extract_exif_data, models::{Media, User}, repositories::{MediaRepository, UserRepository}
};
use libertas_infra::factory::{build_database_pool, build_media_repository, build_user_repository}; use libertas_infra::factory::{build_database_pool, build_media_repository, build_user_repository};
use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag}; use nom_exif::{AsyncMediaParser, AsyncMediaSource, ExifIter};
use serde_json;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use tokio::fs;
use uuid::Uuid; use uuid::Uuid;
use walkdir::WalkDir; use walkdir::WalkDir;
use tokio::fs;
mod config; mod config;
@@ -34,7 +40,10 @@ struct ImporterState {
#[tokio::main] #[tokio::main]
async fn main() -> Result<()> { async fn main() -> Result<()> {
let cli = Cli::parse(); let cli = Cli::parse();
println!("Starting import for user: '{}' from path '{}'...", cli.username, cli.path); println!(
"Starting import for user: '{}' from path '{}'...",
cli.username, cli.path
);
let config = config::load_config()?; let config = config::load_config()?;
let db_pool = build_database_pool(&config.database).await?; let db_pool = build_database_pool(&config.database).await?;
@@ -52,10 +61,10 @@ async fn main() -> Result<()> {
}; };
let user = state let user = state
.user_repo .user_repo
.find_by_username(&cli.username) .find_by_username(&cli.username)
.await? .await?
.ok_or_else(|| anyhow::anyhow!("User '{}' not found", cli.username))?; .ok_or_else(|| anyhow::anyhow!("User '{}' not found", cli.username))?;
println!("User '{}' found with ID: {}", cli.username, user.id); println!("User '{}' found with ID: {}", cli.username, user.id);
println!("Storage: {} / {}", user.storage_used, user.storage_quota); println!("Storage: {} / {}", user.storage_used, user.storage_quota);
@@ -66,11 +75,11 @@ async fn main() -> Result<()> {
for entry in walker.filter_map(Result::ok) { for entry in walker.filter_map(Result::ok) {
if entry.file_type().is_file() { if entry.file_type().is_file() {
let path = entry.path(); let path = entry.path();
match process_file(path, &user, &state).await { match process_file(path, &user, &state).await {
Ok(media) => { Ok(media) => {
println!("-> Imported: '{}'", media.original_filename); println!("-> Imported: '{}'", media.original_filename);
}, }
Err(e) => { Err(e) => {
eprintln!("!! Skipped: '{}' (Reason: {})", path.display(), e); eprintln!("!! Skipped: '{}' (Reason: {})", path.display(), e);
} }
@@ -83,11 +92,7 @@ async fn main() -> Result<()> {
Ok(()) Ok(())
} }
async fn process_file( async fn process_file(file_path: &Path, user: &User, state: &ImporterState) -> CoreResult<Media> {
file_path: &Path,
user: &User,
state: &ImporterState,
) -> CoreResult<Media> {
let file_bytes = fs::read(file_path).await?; let file_bytes = fs::read(file_path).await?;
let file_size = file_bytes.len() as i64; let file_size = file_bytes.len() as i64;
let hash = format!("{:x}", Sha256::digest(&file_bytes)); let hash = format!("{:x}", Sha256::digest(&file_bytes));
@@ -107,34 +112,58 @@ async fn process_file(
)); ));
} }
let (width, height, location, date_taken) = let (width, height, location, date_taken) = match extract_exif_data(file_path).await {
match AsyncMediaSource::file_path(file_path).await { Ok(data) => {
Ok(ms) => { println!(" -> Parsed EXIF: DateTimeOriginal={:?}, GPS={:?}", data.date_taken, data.location);
if ms.has_exif() { (data.width, data.height, data.location, data.date_taken)
let mut parser = AsyncMediaParser::new(); },
if let Ok(iter) = parser.parse::<_,_, ExifIter>(ms).await { Err(e) => {
let gps = iter.parse_gps_info().ok().flatten().map(|g| g.format_iso6709()); eprintln!(" -> EXIF parsing failed for {}: {}. Skipping.", file_path.display(), e);
println!(" -> EXIF GPS Info: {:?}", gps); (None, None, None, None)
let exif: Exif = iter.into(); }
let modified_date = exif.get(ExifTag::ModifyDate).and_then(|f| f.as_str()).and_then(parse_exif_datetime); };
println!(" -> EXIF ModifyDate: {:?}", modified_date);
let w = exif.get(ExifTag::ExifImageWidth).and_then(|f| f.as_u32()).map(|v| v as i32); match AsyncMediaSource::file_path(file_path).await {
println!(" -> EXIF ExifImageWidth: {:?}", w); Ok(ms) => {
let h = exif.get(ExifTag::ExifImageHeight).and_then(|f| f.as_u32()).map(|v| v as i32); if ms.has_exif() {
println!(" -> EXIF ExifImageHeight: {:?}", h); let mut parser = AsyncMediaParser::new();
let dt = exif.get(ExifTag::DateTimeOriginal).and_then(|f| f.as_str()).and_then(parse_exif_datetime); if let Ok(iter) = parser.parse::<_, _, ExifIter>(ms).await {
println!(" -> EXIF DateTimeOriginal: {:?}", dt); let values = iter
(w, h, gps, dt) .into_iter()
} else { .filter_map(|mut x| {
(None, None, None, None) let res = x.take_result();
} match res {
Ok(v) => Some((
x.tag().map(|x| x.to_string()).unwrap_or_else(|| {
format!("Unknown(0x{:04x})", x.tag_code())
}),
v,
)),
Err(e) => {
println!(
" !! EXIF parsing error for tag 0x{:04x}: {}",
x.tag_code(),
e
);
None
}
}
})
.collect::<Vec<_>>();
values.iter().for_each(|x| {
println!("{:<32}=> {}", x.0, x.1);
});
} else { } else {
(None, None, None, None) ()
} }
} else {
()
} }
Err(_) => (None, None, None, None), }
}; Err(_) => (),
};
let file_date = date_taken.unwrap_or_else(|| chrono::Utc::now()); let file_date = date_taken.unwrap_or_else(|| chrono::Utc::now());
let year = file_date.year().to_string(); let year = file_date.year().to_string();
let month = format!("{:02}", file_date.month()); let month = format!("{:02}", file_date.month());
@@ -155,8 +184,8 @@ async fn process_file(
.to_string(); .to_string();
let mime_type = mime_guess::from_path(file_path) let mime_type = mime_guess::from_path(file_path)
.first_or_octet_stream() .first_or_octet_stream()
.to_string(); .to_string();
let media_model = Media { let media_model = Media {
id: Uuid::new_v4(), id: Uuid::new_v4(),
@@ -172,23 +201,19 @@ async fn process_file(
date_taken: date_taken, date_taken: date_taken,
thumbnail_path: None, thumbnail_path: None,
}; };
state.media_repo.create(&media_model).await?; state.media_repo.create(&media_model).await?;
state.user_repo state
.user_repo
.update_storage_used(user.id, file_size) .update_storage_used(user.id, file_size)
.await?; .await?;
let job_payload = serde_json::json!({ "media_id": media_model.id }); let job_payload = serde_json::json!({ "media_id": media_model.id });
state.nats_client state
.nats_client
.publish("media.new".to_string(), job_payload.to_string().into()) .publish("media.new".to_string(), job_payload.to_string().into())
.await .await
.map_err(|e| CoreError::Unknown(format!("Failed to publish NATS message: {}", e)))?; .map_err(|e| CoreError::Unknown(format!("Failed to publish NATS message: {}", e)))?;
Ok(media_model) Ok(media_model)
} }
fn parse_exif_datetime(s: &str) -> Option<DateTime<Utc>> {
NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S")
.ok()
.map(|ndt| ndt.and_local_timezone(Utc).unwrap())
}

View File

@@ -22,7 +22,6 @@ sqlx = { version = "0.8.6", features = [
futures-util = "0.3.31" futures-util = "0.3.31"
bytes = "1.10.1" bytes = "1.10.1"
uuid = { version = "1.18.1", features = ["v4", "serde"] } uuid = { version = "1.18.1", features = ["v4", "serde"] }
nom-exif = { version = "2.5.4", features = ["serde", "tokio", "async"] }
async-trait = "0.1.89" async-trait = "0.1.89"
xmp_toolkit = "1.11.0" xmp_toolkit = "1.11.0"
chrono = "0.4.42" chrono = "0.4.42"

View File

@@ -1,13 +1,9 @@
use async_trait::async_trait; use async_trait::async_trait;
use chrono::{DateTime, NaiveDateTime, Utc};
use std::path::PathBuf; use std::path::PathBuf;
use libertas_core::{ use libertas_core::{
error::{CoreError, CoreResult}, error::CoreResult, media_utils::extract_exif_data, models::Media, plugins::{MediaProcessorPlugin, PluginContext, PluginData}
models::Media,
plugins::{MediaProcessorPlugin, PluginContext, PluginData},
}; };
use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag};
pub struct ExifReaderPlugin; pub struct ExifReaderPlugin;
@@ -20,51 +16,15 @@ impl MediaProcessorPlugin for ExifReaderPlugin {
async fn process(&self, media: &Media, context: &PluginContext) -> CoreResult<PluginData> { async fn process(&self, media: &Media, context: &PluginContext) -> CoreResult<PluginData> {
let file_path = PathBuf::from(&context.media_library_path).join(&media.storage_path); let file_path = PathBuf::from(&context.media_library_path).join(&media.storage_path);
let ms = match AsyncMediaSource::file_path(file_path).await { let (width, height, location, date_taken) = match extract_exif_data(&file_path).await {
Ok(ms) => ms, Ok(data) => (data.width, data.height, data.location, data.date_taken),
Err(e) => return Err(CoreError::Unknown(format!("Failed to open a file: {}", e))),
};
if !ms.has_exif() {
return Ok(PluginData {
message: "No EXIF data found in file.".to_string(),
});
}
let mut parser = AsyncMediaParser::new();
let iter: ExifIter = match parser.parse(ms).await {
Ok(iter) => iter,
Err(e) => { Err(e) => {
// It's not a fatal error, just means parsing failed (e.g., corrupt data)
return Ok(PluginData { return Ok(PluginData {
message: format!("Could not parse EXIF: {}", e), message: format!("Could not parse EXIF: {}", e),
}); });
} }
}; };
let location: Option<String> = match iter.parse_gps_info() {
Ok(Some(gps_info)) => Some(gps_info.format_iso6709()),
Ok(None) => None,
Err(_) => None,
};
let exif: Exif = iter.into();
let width = exif
.get(ExifTag::ExifImageWidth)
.and_then(|f| f.as_u32())
.map(|v| v as i32);
let height = exif
.get(ExifTag::ExifImageHeight)
.and_then(|f| f.as_u32())
.map(|v| v as i32);
let date_taken = exif
.get(ExifTag::DateTimeOriginal)
.and_then(|f| f.as_str())
.and_then(parse_exif_datetime);
if width.is_some() || height.is_some() || location.is_some() || date_taken.is_some() { if width.is_some() || height.is_some() || location.is_some() || date_taken.is_some() {
context context
.media_repo .media_repo
@@ -82,11 +42,4 @@ impl MediaProcessorPlugin for ExifReaderPlugin {
}) })
} }
} }
}
fn parse_exif_datetime(s: &str) -> Option<DateTime<Utc>> {
// EXIF datetime format is 'YYYY:MM:DD HH:MM:SS'
NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S")
.ok()
.map(|ndt| ndt.and_local_timezone(Utc).unwrap())
} }