feat: real EXIF extraction via adapters-exif crate

- MetadataExtractorPort in domain (bytes → StructuredData)
- adapters-exif: NomExifExtractor using nom-exif, handles EXIF + TrackInfo
- Worker's MetadataExtractorPlugin delegates to port, no longer knows nom-exif
- Filters noisy binary tags (U8Array, Undefined, Unknown)
This commit is contained in:
2026-05-31 20:28:50 +02:00
parent d1c7243f5b
commit 45669ec848
10 changed files with 212 additions and 8 deletions

View File

@@ -0,0 +1,9 @@
[package]
name = "adapters-exif"
version = "0.1.0"
edition = "2024"
[dependencies]
domain = { workspace = true }
bytes = { workspace = true }
nom-exif = { version = "2.5", features = ["serde"] }

View File

@@ -0,0 +1,80 @@
use bytes::Bytes;
use domain::{
errors::DomainError,
ports::MetadataExtractorPort,
value_objects::{MetadataValue, StructuredData},
};
use nom_exif::{ExifIter, MediaParser, MediaSource, TrackInfo};
use std::io::Cursor;
pub struct NomExifExtractor;
impl MetadataExtractorPort for NomExifExtractor {
fn extract(&self, bytes: &Bytes) -> Result<StructuredData, DomainError> {
if bytes.is_empty() {
return Ok(StructuredData::new());
}
let ms = match MediaSource::seekable(Cursor::new(bytes.as_ref())) {
Ok(ms) => ms,
Err(_) => return Ok(StructuredData::new()),
};
let mut parser = MediaParser::new();
let mut data = StructuredData::new();
if ms.has_exif() {
let iter: ExifIter = match parser.parse(ms) {
Ok(iter) => iter,
Err(_) => return Ok(data),
};
for mut entry in iter {
let tag_name = match entry.tag() {
Some(t) => t.to_string(),
None => continue,
};
if tag_name.starts_with("Unknown(") {
continue;
}
let value = match entry.take_result() {
Ok(v) => v.to_string(),
Err(_) => continue,
};
if is_noisy_value(&value) {
continue;
}
data.insert(tag_name, MetadataValue::String(value));
}
} else {
let track_info = match parser.parse::<_, _, TrackInfo>(ms) {
Ok(info) => info,
Err(_) => return Ok(data),
};
for (key, val) in track_info {
data.insert(
format!("track:{}", key),
MetadataValue::String(val.to_string()),
);
}
}
Ok(data)
}
}
fn is_noisy_value(v: &str) -> bool {
v.starts_with("U16Array")
|| v.starts_with("U32Array")
|| v.starts_with("U8Array")
|| v.starts_with("URationalArray")
|| v.starts_with("Undefined")
}
#[cfg(test)]
mod tests;

View File

@@ -0,0 +1,19 @@
use crate::NomExifExtractor;
use bytes::Bytes;
use domain::ports::MetadataExtractorPort;
#[test]
fn empty_bytes_returns_empty_data() {
let extractor = NomExifExtractor;
let result = extractor.extract(&Bytes::new());
assert!(result.is_ok());
assert!(result.unwrap().is_empty());
}
#[test]
fn garbage_bytes_returns_empty_data() {
let extractor = NomExifExtractor;
let result = extractor.extract(&Bytes::from_static(b"not a real image file"));
assert!(result.is_ok());
assert!(result.unwrap().is_empty());
}