feat: Update dependencies and implement face detection features

- Updated async-nats dependency to version 0.45.0 in both libertas_api and libertas_worker.
- Introduced AI-related structures and traits in libertas_core for face detection.
- Added AiConfig and FaceDetectorRuntime enums to support different face detection methods.
- Implemented TractFaceDetector and RemoteNatsFaceDetector in libertas_infra for local and remote face detection.
- Created FaceDetectionPlugin to integrate face detection into the media processing pipeline.
- Enhanced XMP writing functionality to include face region data.
- Updated PluginManager to initialize face detection plugins based on configuration.
This commit is contained in:
2025-11-15 21:29:17 +01:00
parent e6c941bf28
commit 98f56e4f1e
17 changed files with 1045 additions and 101 deletions

View File

@@ -16,3 +16,9 @@ async-trait = "0.1.89"
uuid = { version = "1.18.1", features = ["v4"] }
chrono = "0.4.42"
serde = { version = "1.0.228", features = ["derive"] }
async-nats = "0.45.0"
serde_json = "1.0.145"
tract-onnx = "0.22.0"
ndarray = "0.17.1"
image = "0.25.8"
tokio = { version = "1.48.0", features = ["full"] }

View File

@@ -0,0 +1,2 @@
pub mod remote_detector;
pub mod tract_detector;

View File

@@ -0,0 +1,40 @@
use async_trait::async_trait;
use libertas_core::{
ai::{BoundingBox, FaceDetector},
error::{CoreError, CoreResult},
};
pub struct RemoteNatsFaceDetector {
client: async_nats::Client,
subject: String,
}
impl RemoteNatsFaceDetector {
pub fn new(client: async_nats::Client, subject: &str) -> Self {
Self {
client,
subject: subject.to_string(),
}
}
}
#[async_trait]
impl FaceDetector for RemoteNatsFaceDetector {
//TODO: I don't think this is the most efficient way to send image bytes over NATS, we probably would want to use some protobuf or some other thing
async fn detect_faces(&self, image_bytes: &[u8]) -> CoreResult<Vec<BoundingBox>> {
println!("Offloading face detection to remote worker via NATS...");
let bytes = image_bytes.to_vec();
let response = self
.client
.request(self.subject.clone(), bytes.into())
.await
.map_err(|e| CoreError::Unknown(format!("NATS request failed: {}", e)))?;
let boxes: Vec<BoundingBox> = serde_json::from_slice(&response.payload)
.map_err(|e| CoreError::Unknown(format!("Failed to parse remote response: {}", e)))?;
Ok(boxes)
}
}

View File

@@ -0,0 +1,189 @@
use std::cmp::Ordering;
use async_trait::async_trait;
use image::{GenericImageView, RgbImage, imageops};
use libertas_core::{
ai::{BoundingBox, FaceDetector},
error::{CoreError, CoreResult},
};
use tract_onnx::{
prelude::*,
tract_core::ndarray::{Array4, Axis, s},
};
type TractModel = SimplePlan<TypedFact, Box<dyn TypedOp>, Graph<TypedFact, Box<dyn TypedOp>>>;
pub struct TractFaceDetector {
model: Arc<TractModel>,
}
impl TractFaceDetector {
pub fn new(model_path: &str) -> CoreResult<Self> {
let model = tract_onnx::onnx()
.model_for_path(model_path)
.map_err(|e| CoreError::Config(format!("Failed to load model: {}", e)))?
.with_input_fact(0, f32::fact([1, 3, 640, 640]).into())
.map_err(|e| CoreError::Config(format!("Failed to set input fact: {}", e)))?
.into_optimized()
.map_err(|e| CoreError::Config(format!("Failed to optimize model: {}", e)))?
.into_runnable()
.map_err(|e| CoreError::Config(format!("Failed to make model runnable: {}", e)))?;
Ok(Self {
model: Arc::new(model),
})
}
}
#[async_trait]
impl FaceDetector for TractFaceDetector {
async fn detect_faces(&self, image_bytes: &[u8]) -> CoreResult<Vec<BoundingBox>> {
let image_bytes = image_bytes.to_vec();
let model = self.model.clone();
tokio::task::spawn_blocking(move || {
let img = image::load_from_memory(&image_bytes)
.map_err(|e| CoreError::Unknown(format!("Failed to load image: {}", e)))?;
let (original_width, original_height) = img.dimensions();
let scale = 640.0 / (original_width.max(original_height) as f32);
let new_width = (original_width as f32 * scale) as u32;
let new_height = (original_height as f32 * scale) as u32;
let resized = imageops::resize(
&img.to_rgb8(),
new_width,
new_height,
imageops::FilterType::Triangle,
);
let mut padded = RgbImage::new(640, 640);
let pad_x = (640 - new_width) as i64 / 2;
let pad_y = (640 - new_height) as i64 / 2;
imageops::replace(&mut padded, &resized, pad_x, pad_y);
let tensor: Tensor = Array4::from_shape_fn((1, 3, 640, 640), |(_, c, y, x)| {
padded.get_pixel(x as u32, y as u32)[c] as f32 / 255.0
})
.into();
let result = model
.run(tvec!(tensor.into()))
.map_err(|e| CoreError::Unknown(format!("Model inference failed: {}", e)))?;
let results = result[0]
.to_array_view::<f32>()
.map_err(|e| {
CoreError::Unknown(format!("Failed to convert model output to array: {}", e))
})?
.view()
.t()
.into_owned();
let mut bbox_vec: Vec<InternalBbox> = vec![];
for i in 0..results.len_of(Axis(0)) {
// Iterate 8400 times
let row = results.slice(s![i, .., ..]); // Get shape [5, 1]
let confidence = row[[4, 0]];
if confidence >= 0.5 {
// Confidence threshold
let x = row[[0, 0]];
let y = row[[1, 0]];
let w = row[[2, 0]];
let h = row[[3, 0]];
// Convert (center_x, center_y, w, h) to (x1, y1, x2, y2)
let x1 = x - w / 2.0;
let y1 = y - h / 2.0;
let x2 = x + w / 2.0;
let y2 = y + h / 2.0;
bbox_vec.push(InternalBbox::new(x1, y1, x2, y2, confidence));
}
}
let final_boxes = non_maximum_suppression(bbox_vec, 0.45); // 0.45 IOU threshold
// --- 5. Convert to original coordinates ---
let boxes: Vec<_> = final_boxes
.into_iter()
.map(|b| {
// Reverse padding
let x1_unpadded = b.x1 - (pad_x as f32);
let y1_unpadded = b.y1 - (pad_y as f32);
let x2_unpadded = b.x2 - (pad_x as f32);
let y2_unpadded = b.y2 - (pad_y as f32);
// Reverse scaling and clamp to original image dimensions
let x_min = (x1_unpadded / scale).max(0.0);
let y_min = (y1_unpadded / scale).max(0.0);
let x_max = (x2_unpadded / scale).min(original_width as f32);
let y_max = (y2_unpadded / scale).min(original_height as f32);
BoundingBox {
x_min,
y_min,
x_max,
y_max,
}
})
.collect();
println!(
"Running face detection locally on the CPU... found {} faces.",
boxes.len()
);
Ok(boxes)
})
.await
.map_err(|e| CoreError::Unknown(format!("Failed to run face detection: {}", e)))?
}
}
#[derive(Debug, Clone)]
struct InternalBbox {
pub x1: f32,
pub y1: f32,
pub x2: f32,
pub y2: f32,
pub confidence: f32,
}
impl InternalBbox {
fn new(x1: f32, y1: f32, x2: f32, y2: f32, confidence: f32) -> Self {
Self {
x1,
y1,
x2,
y2,
confidence,
}
}
}
fn non_maximum_suppression(mut boxes: Vec<InternalBbox>, iou_threshold: f32) -> Vec<InternalBbox> {
boxes.sort_by(|a, b| {
a.confidence
.partial_cmp(&b.confidence)
.unwrap_or(Ordering::Equal)
});
let mut keep = Vec::new();
while !boxes.is_empty() {
let current = boxes.remove(0);
keep.push(current.clone());
boxes.retain(|box_| calculate_iou(&current, box_) <= iou_threshold);
}
keep
}
fn calculate_iou(box1: &InternalBbox, box2: &InternalBbox) -> f32 {
let x1 = box1.x1.max(box2.x1);
let y1 = box1.y1.max(box2.y1);
let x2 = box1.x2.min(box2.x2);
let y2 = box1.y2.min(box2.y2);
let intersection = (x2 - x1).max(0.0) * (y2 - y1).max(0.0);
let area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
let area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
let union = area1 + area2 - intersection;
intersection / union
}

View File

@@ -1,5 +1,6 @@
pub mod factory;
pub mod repositories;
pub mod ai;
pub mod db_models;
pub mod factory;
pub mod mappers;
pub mod query_builder;
pub mod query_builder;
pub mod repositories;