feat: integrate EXIF data extraction using nom-exif and refactor related components
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -1661,6 +1661,7 @@ dependencies = [
|
|||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"futures",
|
"futures",
|
||||||
|
"nom-exif",
|
||||||
"serde",
|
"serde",
|
||||||
"thiserror 2.0.17",
|
"thiserror 2.0.17",
|
||||||
"uuid",
|
"uuid",
|
||||||
@@ -1714,7 +1715,6 @@ dependencies = [
|
|||||||
"image",
|
"image",
|
||||||
"libertas_core",
|
"libertas_core",
|
||||||
"libertas_infra",
|
"libertas_infra",
|
||||||
"nom-exif",
|
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"sqlx",
|
"sqlx",
|
||||||
|
|||||||
@@ -12,3 +12,4 @@ futures = "0.3.31"
|
|||||||
thiserror = "2.0.17"
|
thiserror = "2.0.17"
|
||||||
uuid = {version = "1.18.1", features = ["v4", "serde"] }
|
uuid = {version = "1.18.1", features = ["v4", "serde"] }
|
||||||
serde = { version = "1.0.228", features = ["derive"] }
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
|
nom-exif = { version = "2.5.4", features = ["serde", "async", "tokio"] }
|
||||||
|
|||||||
@@ -6,3 +6,4 @@ pub mod plugins;
|
|||||||
pub mod repositories;
|
pub mod repositories;
|
||||||
pub mod schema;
|
pub mod schema;
|
||||||
pub mod services;
|
pub mod services;
|
||||||
|
pub mod media_utils;
|
||||||
71
libertas_core/src/media_utils.rs
Normal file
71
libertas_core/src/media_utils.rs
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use chrono::{DateTime, NaiveDateTime, Utc};
|
||||||
|
use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag};
|
||||||
|
|
||||||
|
use crate::error::{CoreError, CoreResult};
|
||||||
|
|
||||||
|
#[derive(Default, Debug)]
|
||||||
|
pub struct ExtractedExif {
|
||||||
|
pub width: Option<i32>,
|
||||||
|
pub height: Option<i32>,
|
||||||
|
pub location: Option<String>,
|
||||||
|
pub date_taken: Option<DateTime<Utc>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_exif_datetime(s: &str) -> Option<DateTime<Utc>> {
|
||||||
|
NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S")
|
||||||
|
.ok()
|
||||||
|
.map(|ndt| ndt.and_local_timezone(Utc).unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn extract_exif_data(file_path: &Path) -> CoreResult<ExtractedExif> {
|
||||||
|
let ms = AsyncMediaSource::file_path(file_path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| CoreError::Unknown(format!("Failed to open file for EXIF: {}", e)))?;
|
||||||
|
|
||||||
|
if !ms.has_exif() {
|
||||||
|
return Ok(ExtractedExif::default());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut parser = AsyncMediaParser::new();
|
||||||
|
let iter: ExifIter = match parser.parse(ms).await {
|
||||||
|
Ok(iter) => iter,
|
||||||
|
Err(e) => {
|
||||||
|
println!("Could not parse EXIF: {}", e);
|
||||||
|
return Ok(ExtractedExif::default());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let location = iter.parse_gps_info().ok().flatten().map(|g| g.format_iso6709());
|
||||||
|
let exif: Exif = iter.into();
|
||||||
|
|
||||||
|
let width = exif
|
||||||
|
.get(ExifTag::ExifImageWidth)
|
||||||
|
.and_then(|f| f.as_u32())
|
||||||
|
.map(|v| v as i32);
|
||||||
|
|
||||||
|
let height = exif
|
||||||
|
.get(ExifTag::ExifImageHeight)
|
||||||
|
.and_then(|f| f.as_u32())
|
||||||
|
.map(|v| v as i32);
|
||||||
|
|
||||||
|
let dt_original = exif
|
||||||
|
.get(ExifTag::DateTimeOriginal)
|
||||||
|
.and_then(|f| f.as_str())
|
||||||
|
.and_then(parse_exif_datetime);
|
||||||
|
|
||||||
|
let dt_modify = exif
|
||||||
|
.get(ExifTag::ModifyDate)
|
||||||
|
.and_then(|f| f.as_str())
|
||||||
|
.and_then(parse_exif_datetime);
|
||||||
|
|
||||||
|
let date_taken = dt_original.or(dt_modify);
|
||||||
|
|
||||||
|
Ok(ExtractedExif {
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
location,
|
||||||
|
date_taken,
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -1,15 +1,21 @@
|
|||||||
use std::{path::{Path, PathBuf}, sync::Arc};
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use std::{
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
sync::Arc,
|
||||||
|
};
|
||||||
|
|
||||||
use chrono::{DateTime, Datelike, NaiveDateTime, Utc};
|
use chrono::Datelike;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use libertas_core::{config::Config, error::{CoreError, CoreResult}, models::{Media, User}, repositories::{MediaRepository, UserRepository}};
|
use libertas_core::{
|
||||||
|
config::Config, error::{CoreError, CoreResult}, media_utils::extract_exif_data, models::{Media, User}, repositories::{MediaRepository, UserRepository}
|
||||||
|
};
|
||||||
use libertas_infra::factory::{build_database_pool, build_media_repository, build_user_repository};
|
use libertas_infra::factory::{build_database_pool, build_media_repository, build_user_repository};
|
||||||
use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag};
|
use nom_exif::{AsyncMediaParser, AsyncMediaSource, ExifIter};
|
||||||
|
use serde_json;
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
|
use tokio::fs;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
use tokio::fs;
|
|
||||||
|
|
||||||
mod config;
|
mod config;
|
||||||
|
|
||||||
@@ -34,7 +40,10 @@ struct ImporterState {
|
|||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
let cli = Cli::parse();
|
let cli = Cli::parse();
|
||||||
println!("Starting import for user: '{}' from path '{}'...", cli.username, cli.path);
|
println!(
|
||||||
|
"Starting import for user: '{}' from path '{}'...",
|
||||||
|
cli.username, cli.path
|
||||||
|
);
|
||||||
|
|
||||||
let config = config::load_config()?;
|
let config = config::load_config()?;
|
||||||
let db_pool = build_database_pool(&config.database).await?;
|
let db_pool = build_database_pool(&config.database).await?;
|
||||||
@@ -70,7 +79,7 @@ async fn main() -> Result<()> {
|
|||||||
match process_file(path, &user, &state).await {
|
match process_file(path, &user, &state).await {
|
||||||
Ok(media) => {
|
Ok(media) => {
|
||||||
println!("-> Imported: '{}'", media.original_filename);
|
println!("-> Imported: '{}'", media.original_filename);
|
||||||
},
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("!! Skipped: '{}' (Reason: {})", path.display(), e);
|
eprintln!("!! Skipped: '{}' (Reason: {})", path.display(), e);
|
||||||
}
|
}
|
||||||
@@ -83,11 +92,7 @@ async fn main() -> Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn process_file(
|
async fn process_file(file_path: &Path, user: &User, state: &ImporterState) -> CoreResult<Media> {
|
||||||
file_path: &Path,
|
|
||||||
user: &User,
|
|
||||||
state: &ImporterState,
|
|
||||||
) -> CoreResult<Media> {
|
|
||||||
let file_bytes = fs::read(file_path).await?;
|
let file_bytes = fs::read(file_path).await?;
|
||||||
let file_size = file_bytes.len() as i64;
|
let file_size = file_bytes.len() as i64;
|
||||||
let hash = format!("{:x}", Sha256::digest(&file_bytes));
|
let hash = format!("{:x}", Sha256::digest(&file_bytes));
|
||||||
@@ -107,32 +112,56 @@ async fn process_file(
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let (width, height, location, date_taken) =
|
let (width, height, location, date_taken) = match extract_exif_data(file_path).await {
|
||||||
|
Ok(data) => {
|
||||||
|
println!(" -> Parsed EXIF: DateTimeOriginal={:?}, GPS={:?}", data.date_taken, data.location);
|
||||||
|
(data.width, data.height, data.location, data.date_taken)
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!(" -> EXIF parsing failed for {}: {}. Skipping.", file_path.display(), e);
|
||||||
|
(None, None, None, None)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
match AsyncMediaSource::file_path(file_path).await {
|
match AsyncMediaSource::file_path(file_path).await {
|
||||||
Ok(ms) => {
|
Ok(ms) => {
|
||||||
if ms.has_exif() {
|
if ms.has_exif() {
|
||||||
let mut parser = AsyncMediaParser::new();
|
let mut parser = AsyncMediaParser::new();
|
||||||
if let Ok(iter) = parser.parse::<_,_, ExifIter>(ms).await {
|
if let Ok(iter) = parser.parse::<_, _, ExifIter>(ms).await {
|
||||||
let gps = iter.parse_gps_info().ok().flatten().map(|g| g.format_iso6709());
|
let values = iter
|
||||||
println!(" -> EXIF GPS Info: {:?}", gps);
|
.into_iter()
|
||||||
let exif: Exif = iter.into();
|
.filter_map(|mut x| {
|
||||||
let modified_date = exif.get(ExifTag::ModifyDate).and_then(|f| f.as_str()).and_then(parse_exif_datetime);
|
let res = x.take_result();
|
||||||
println!(" -> EXIF ModifyDate: {:?}", modified_date);
|
match res {
|
||||||
let w = exif.get(ExifTag::ExifImageWidth).and_then(|f| f.as_u32()).map(|v| v as i32);
|
Ok(v) => Some((
|
||||||
println!(" -> EXIF ExifImageWidth: {:?}", w);
|
x.tag().map(|x| x.to_string()).unwrap_or_else(|| {
|
||||||
let h = exif.get(ExifTag::ExifImageHeight).and_then(|f| f.as_u32()).map(|v| v as i32);
|
format!("Unknown(0x{:04x})", x.tag_code())
|
||||||
println!(" -> EXIF ExifImageHeight: {:?}", h);
|
}),
|
||||||
let dt = exif.get(ExifTag::DateTimeOriginal).and_then(|f| f.as_str()).and_then(parse_exif_datetime);
|
v,
|
||||||
println!(" -> EXIF DateTimeOriginal: {:?}", dt);
|
)),
|
||||||
(w, h, gps, dt)
|
Err(e) => {
|
||||||
|
println!(
|
||||||
|
" !! EXIF parsing error for tag 0x{:04x}: {}",
|
||||||
|
x.tag_code(),
|
||||||
|
e
|
||||||
|
);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
values.iter().for_each(|x| {
|
||||||
|
println!("{:<32}=> {}", x.0, x.1);
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
(None, None, None, None)
|
()
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
(None, None, None, None)
|
()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(_) => (None, None, None, None),
|
Err(_) => (),
|
||||||
};
|
};
|
||||||
|
|
||||||
let file_date = date_taken.unwrap_or_else(|| chrono::Utc::now());
|
let file_date = date_taken.unwrap_or_else(|| chrono::Utc::now());
|
||||||
@@ -174,21 +203,17 @@ async fn process_file(
|
|||||||
};
|
};
|
||||||
|
|
||||||
state.media_repo.create(&media_model).await?;
|
state.media_repo.create(&media_model).await?;
|
||||||
state.user_repo
|
state
|
||||||
|
.user_repo
|
||||||
.update_storage_used(user.id, file_size)
|
.update_storage_used(user.id, file_size)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let job_payload = serde_json::json!({ "media_id": media_model.id });
|
let job_payload = serde_json::json!({ "media_id": media_model.id });
|
||||||
state.nats_client
|
state
|
||||||
|
.nats_client
|
||||||
.publish("media.new".to_string(), job_payload.to_string().into())
|
.publish("media.new".to_string(), job_payload.to_string().into())
|
||||||
.await
|
.await
|
||||||
.map_err(|e| CoreError::Unknown(format!("Failed to publish NATS message: {}", e)))?;
|
.map_err(|e| CoreError::Unknown(format!("Failed to publish NATS message: {}", e)))?;
|
||||||
|
|
||||||
Ok(media_model)
|
Ok(media_model)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_exif_datetime(s: &str) -> Option<DateTime<Utc>> {
|
|
||||||
NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S")
|
|
||||||
.ok()
|
|
||||||
.map(|ndt| ndt.and_local_timezone(Utc).unwrap())
|
|
||||||
}
|
|
||||||
@@ -22,7 +22,6 @@ sqlx = { version = "0.8.6", features = [
|
|||||||
futures-util = "0.3.31"
|
futures-util = "0.3.31"
|
||||||
bytes = "1.10.1"
|
bytes = "1.10.1"
|
||||||
uuid = { version = "1.18.1", features = ["v4", "serde"] }
|
uuid = { version = "1.18.1", features = ["v4", "serde"] }
|
||||||
nom-exif = { version = "2.5.4", features = ["serde", "tokio", "async"] }
|
|
||||||
async-trait = "0.1.89"
|
async-trait = "0.1.89"
|
||||||
xmp_toolkit = "1.11.0"
|
xmp_toolkit = "1.11.0"
|
||||||
chrono = "0.4.42"
|
chrono = "0.4.42"
|
||||||
|
|||||||
@@ -1,13 +1,9 @@
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use chrono::{DateTime, NaiveDateTime, Utc};
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use libertas_core::{
|
use libertas_core::{
|
||||||
error::{CoreError, CoreResult},
|
error::CoreResult, media_utils::extract_exif_data, models::Media, plugins::{MediaProcessorPlugin, PluginContext, PluginData}
|
||||||
models::Media,
|
|
||||||
plugins::{MediaProcessorPlugin, PluginContext, PluginData},
|
|
||||||
};
|
};
|
||||||
use nom_exif::{AsyncMediaParser, AsyncMediaSource, Exif, ExifIter, ExifTag};
|
|
||||||
|
|
||||||
pub struct ExifReaderPlugin;
|
pub struct ExifReaderPlugin;
|
||||||
|
|
||||||
@@ -20,51 +16,15 @@ impl MediaProcessorPlugin for ExifReaderPlugin {
|
|||||||
async fn process(&self, media: &Media, context: &PluginContext) -> CoreResult<PluginData> {
|
async fn process(&self, media: &Media, context: &PluginContext) -> CoreResult<PluginData> {
|
||||||
let file_path = PathBuf::from(&context.media_library_path).join(&media.storage_path);
|
let file_path = PathBuf::from(&context.media_library_path).join(&media.storage_path);
|
||||||
|
|
||||||
let ms = match AsyncMediaSource::file_path(file_path).await {
|
let (width, height, location, date_taken) = match extract_exif_data(&file_path).await {
|
||||||
Ok(ms) => ms,
|
Ok(data) => (data.width, data.height, data.location, data.date_taken),
|
||||||
Err(e) => return Err(CoreError::Unknown(format!("Failed to open a file: {}", e))),
|
|
||||||
};
|
|
||||||
|
|
||||||
if !ms.has_exif() {
|
|
||||||
return Ok(PluginData {
|
|
||||||
message: "No EXIF data found in file.".to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut parser = AsyncMediaParser::new();
|
|
||||||
let iter: ExifIter = match parser.parse(ms).await {
|
|
||||||
Ok(iter) => iter,
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// It's not a fatal error, just means parsing failed (e.g., corrupt data)
|
|
||||||
return Ok(PluginData {
|
return Ok(PluginData {
|
||||||
message: format!("Could not parse EXIF: {}", e),
|
message: format!("Could not parse EXIF: {}", e),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let location: Option<String> = match iter.parse_gps_info() {
|
|
||||||
Ok(Some(gps_info)) => Some(gps_info.format_iso6709()),
|
|
||||||
Ok(None) => None,
|
|
||||||
Err(_) => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let exif: Exif = iter.into();
|
|
||||||
|
|
||||||
let width = exif
|
|
||||||
.get(ExifTag::ExifImageWidth)
|
|
||||||
.and_then(|f| f.as_u32())
|
|
||||||
.map(|v| v as i32);
|
|
||||||
|
|
||||||
let height = exif
|
|
||||||
.get(ExifTag::ExifImageHeight)
|
|
||||||
.and_then(|f| f.as_u32())
|
|
||||||
.map(|v| v as i32);
|
|
||||||
|
|
||||||
let date_taken = exif
|
|
||||||
.get(ExifTag::DateTimeOriginal)
|
|
||||||
.and_then(|f| f.as_str())
|
|
||||||
.and_then(parse_exif_datetime);
|
|
||||||
|
|
||||||
if width.is_some() || height.is_some() || location.is_some() || date_taken.is_some() {
|
if width.is_some() || height.is_some() || location.is_some() || date_taken.is_some() {
|
||||||
context
|
context
|
||||||
.media_repo
|
.media_repo
|
||||||
@@ -83,10 +43,3 @@ impl MediaProcessorPlugin for ExifReaderPlugin {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_exif_datetime(s: &str) -> Option<DateTime<Utc>> {
|
|
||||||
// EXIF datetime format is 'YYYY:MM:DD HH:MM:SS'
|
|
||||||
NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S")
|
|
||||||
.ok()
|
|
||||||
.map(|ndt| ndt.and_local_timezone(Utc).unwrap())
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user