feat: real EXIF extraction via adapters-exif crate
- MetadataExtractorPort in domain (bytes → StructuredData) - adapters-exif: NomExifExtractor using nom-exif, handles EXIF + TrackInfo - Worker's MetadataExtractorPlugin delegates to port, no longer knows nom-exif - Filters noisy binary tags (U8Array, Undefined, Unknown)
This commit is contained in:
9
crates/adapters/exif/Cargo.toml
Normal file
9
crates/adapters/exif/Cargo.toml
Normal file
@@ -0,0 +1,9 @@
|
||||
[package]
|
||||
name = "adapters-exif"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
domain = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
nom-exif = { version = "2.5", features = ["serde"] }
|
||||
80
crates/adapters/exif/src/lib.rs
Normal file
80
crates/adapters/exif/src/lib.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
use bytes::Bytes;
|
||||
use domain::{
|
||||
errors::DomainError,
|
||||
ports::MetadataExtractorPort,
|
||||
value_objects::{MetadataValue, StructuredData},
|
||||
};
|
||||
use nom_exif::{ExifIter, MediaParser, MediaSource, TrackInfo};
|
||||
use std::io::Cursor;
|
||||
|
||||
pub struct NomExifExtractor;
|
||||
|
||||
impl MetadataExtractorPort for NomExifExtractor {
|
||||
fn extract(&self, bytes: &Bytes) -> Result<StructuredData, DomainError> {
|
||||
if bytes.is_empty() {
|
||||
return Ok(StructuredData::new());
|
||||
}
|
||||
|
||||
let ms = match MediaSource::seekable(Cursor::new(bytes.as_ref())) {
|
||||
Ok(ms) => ms,
|
||||
Err(_) => return Ok(StructuredData::new()),
|
||||
};
|
||||
|
||||
let mut parser = MediaParser::new();
|
||||
let mut data = StructuredData::new();
|
||||
|
||||
if ms.has_exif() {
|
||||
let iter: ExifIter = match parser.parse(ms) {
|
||||
Ok(iter) => iter,
|
||||
Err(_) => return Ok(data),
|
||||
};
|
||||
|
||||
for mut entry in iter {
|
||||
let tag_name = match entry.tag() {
|
||||
Some(t) => t.to_string(),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
if tag_name.starts_with("Unknown(") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let value = match entry.take_result() {
|
||||
Ok(v) => v.to_string(),
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
if is_noisy_value(&value) {
|
||||
continue;
|
||||
}
|
||||
|
||||
data.insert(tag_name, MetadataValue::String(value));
|
||||
}
|
||||
} else {
|
||||
let track_info = match parser.parse::<_, _, TrackInfo>(ms) {
|
||||
Ok(info) => info,
|
||||
Err(_) => return Ok(data),
|
||||
};
|
||||
|
||||
for (key, val) in track_info {
|
||||
data.insert(
|
||||
format!("track:{}", key),
|
||||
MetadataValue::String(val.to_string()),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
}
|
||||
|
||||
fn is_noisy_value(v: &str) -> bool {
|
||||
v.starts_with("U16Array")
|
||||
|| v.starts_with("U32Array")
|
||||
|| v.starts_with("U8Array")
|
||||
|| v.starts_with("URationalArray")
|
||||
|| v.starts_with("Undefined")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
19
crates/adapters/exif/src/tests.rs
Normal file
19
crates/adapters/exif/src/tests.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
use crate::NomExifExtractor;
|
||||
use bytes::Bytes;
|
||||
use domain::ports::MetadataExtractorPort;
|
||||
|
||||
#[test]
|
||||
fn empty_bytes_returns_empty_data() {
|
||||
let extractor = NomExifExtractor;
|
||||
let result = extractor.extract(&Bytes::new());
|
||||
assert!(result.is_ok());
|
||||
assert!(result.unwrap().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn garbage_bytes_returns_empty_data() {
|
||||
let extractor = NomExifExtractor;
|
||||
let result = extractor.extract(&Bytes::from_static(b"not a real image file"));
|
||||
assert!(result.is_ok());
|
||||
assert!(result.unwrap().is_empty());
|
||||
}
|
||||
Reference in New Issue
Block a user