feat: Update dependencies and implement face detection features

- Updated async-nats dependency to version 0.45.0 in both libertas_api and libertas_worker. - Introduced AI-related structures and traits in libertas_core for face detection. - Added AiConfig and FaceDetectorRuntime enums to support different face detection methods. - Implemented TractFaceDetector and RemoteNatsFaceDetector in libertas_infra for local and remote face detection. - Created FaceDetectionPlugin to integrate face detection into the media processing pipeline. - Enhanced XMP writing functionality to include face region data. - Updated PluginManager to initialize face detection plugins based on configuration.
2025-11-15 21:29:17 +01:00
parent e6c941bf28
commit 98f56e4f1e
17 changed files with 1045 additions and 101 deletions
--- a/libertas_infra/Cargo.toml
+++ b/libertas_infra/Cargo.toml
@@ -16,3 +16,9 @@ async-trait = "0.1.89"
 uuid = { version = "1.18.1", features = ["v4"] }
 chrono = "0.4.42"
 serde = { version = "1.0.228", features = ["derive"] }
+async-nats = "0.45.0"
+serde_json = "1.0.145"
+tract-onnx = "0.22.0"
+ndarray = "0.17.1"
+image = "0.25.8"
+tokio = { version = "1.48.0", features = ["full"] }
--- a/libertas_infra/src/ai/mod.rs
+++ b/libertas_infra/src/ai/mod.rs
@@ -0,0 +1,2 @@
+pub mod remote_detector;
+pub mod tract_detector;
--- a/libertas_infra/src/ai/remote_detector.rs
+++ b/libertas_infra/src/ai/remote_detector.rs
@@ -0,0 +1,40 @@
+use async_trait::async_trait;
+use libertas_core::{
+    ai::{BoundingBox, FaceDetector},
+    error::{CoreError, CoreResult},
+};
+
+pub struct RemoteNatsFaceDetector {
+    client: async_nats::Client,
+    subject: String,
+}
+
+impl RemoteNatsFaceDetector {
+    pub fn new(client: async_nats::Client, subject: &str) -> Self {
+        Self {
+            client,
+            subject: subject.to_string(),
+        }
+    }
+}
+
+#[async_trait]
+impl FaceDetector for RemoteNatsFaceDetector {
+    //TODO: I don't think this is the most efficient way to send image bytes over NATS, we probably would want to use some protobuf or some other thing
+    async fn detect_faces(&self, image_bytes: &[u8]) -> CoreResult<Vec<BoundingBox>> {
+        println!("Offloading face detection to remote worker via NATS...");
+
+        let bytes = image_bytes.to_vec();
+
+        let response = self
+            .client
+            .request(self.subject.clone(), bytes.into())
+            .await
+            .map_err(|e| CoreError::Unknown(format!("NATS request failed: {}", e)))?;
+
+        let boxes: Vec<BoundingBox> = serde_json::from_slice(&response.payload)
+            .map_err(|e| CoreError::Unknown(format!("Failed to parse remote response: {}", e)))?;
+
+        Ok(boxes)
+    }
+}
--- a/libertas_infra/src/ai/tract_detector.rs
+++ b/libertas_infra/src/ai/tract_detector.rs
@@ -0,0 +1,189 @@
+use std::cmp::Ordering;
+
+use async_trait::async_trait;
+use image::{GenericImageView, RgbImage, imageops};
+use libertas_core::{
+    ai::{BoundingBox, FaceDetector},
+    error::{CoreError, CoreResult},
+};
+use tract_onnx::{
+    prelude::*,
+    tract_core::ndarray::{Array4, Axis, s},
+};
+
+type TractModel = SimplePlan<TypedFact, Box<dyn TypedOp>, Graph<TypedFact, Box<dyn TypedOp>>>;
+
+pub struct TractFaceDetector {
+    model: Arc<TractModel>,
+}
+
+impl TractFaceDetector {
+    pub fn new(model_path: &str) -> CoreResult<Self> {
+        let model = tract_onnx::onnx()
+            .model_for_path(model_path)
+            .map_err(|e| CoreError::Config(format!("Failed to load model: {}", e)))?
+            .with_input_fact(0, f32::fact([1, 3, 640, 640]).into())
+            .map_err(|e| CoreError::Config(format!("Failed to set input fact: {}", e)))?
+            .into_optimized()
+            .map_err(|e| CoreError::Config(format!("Failed to optimize model: {}", e)))?
+            .into_runnable()
+            .map_err(|e| CoreError::Config(format!("Failed to make model runnable: {}", e)))?;
+
+        Ok(Self {
+            model: Arc::new(model),
+        })
+    }
+}
+
+#[async_trait]
+impl FaceDetector for TractFaceDetector {
+    async fn detect_faces(&self, image_bytes: &[u8]) -> CoreResult<Vec<BoundingBox>> {
+        let image_bytes = image_bytes.to_vec();
+        let model = self.model.clone();
+
+        tokio::task::spawn_blocking(move || {
+            let img = image::load_from_memory(&image_bytes)
+                .map_err(|e| CoreError::Unknown(format!("Failed to load image: {}", e)))?;
+            let (original_width, original_height) = img.dimensions();
+
+            let scale = 640.0 / (original_width.max(original_height) as f32);
+            let new_width = (original_width as f32 * scale) as u32;
+            let new_height = (original_height as f32 * scale) as u32;
+
+            let resized = imageops::resize(
+                &img.to_rgb8(),
+                new_width,
+                new_height,
+                imageops::FilterType::Triangle,
+            );
+            let mut padded = RgbImage::new(640, 640);
+
+            let pad_x = (640 - new_width) as i64 / 2;
+            let pad_y = (640 - new_height) as i64 / 2;
+            imageops::replace(&mut padded, &resized, pad_x, pad_y);
+
+            let tensor: Tensor = Array4::from_shape_fn((1, 3, 640, 640), |(_, c, y, x)| {
+                padded.get_pixel(x as u32, y as u32)[c] as f32 / 255.0
+            })
+            .into();
+
+            let result = model
+                .run(tvec!(tensor.into()))
+                .map_err(|e| CoreError::Unknown(format!("Model inference failed: {}", e)))?;
+
+            let results = result[0]
+                .to_array_view::<f32>()
+                .map_err(|e| {
+                    CoreError::Unknown(format!("Failed to convert model output to array: {}", e))
+                })?
+                .view()
+                .t()
+                .into_owned();
+            let mut bbox_vec: Vec<InternalBbox> = vec![];
+
+            for i in 0..results.len_of(Axis(0)) {
+                // Iterate 8400 times
+                let row = results.slice(s![i, .., ..]); // Get shape [5, 1]
+                let confidence = row[[4, 0]];
+
+                if confidence >= 0.5 {
+                    // Confidence threshold
+                    let x = row[[0, 0]];
+                    let y = row[[1, 0]];
+                    let w = row[[2, 0]];
+                    let h = row[[3, 0]];
+
+                    // Convert (center_x, center_y, w, h) to (x1, y1, x2, y2)
+                    let x1 = x - w / 2.0;
+                    let y1 = y - h / 2.0;
+                    let x2 = x + w / 2.0;
+                    let y2 = y + h / 2.0;
+                    bbox_vec.push(InternalBbox::new(x1, y1, x2, y2, confidence));
+                }
+            }
+
+            let final_boxes = non_maximum_suppression(bbox_vec, 0.45); // 0.45 IOU threshold
+
+            // --- 5. Convert to original coordinates ---
+            let boxes: Vec<_> = final_boxes
+                .into_iter()
+                .map(|b| {
+                    // Reverse padding
+                    let x1_unpadded = b.x1 - (pad_x as f32);
+                    let y1_unpadded = b.y1 - (pad_y as f32);
+                    let x2_unpadded = b.x2 - (pad_x as f32);
+                    let y2_unpadded = b.y2 - (pad_y as f32);
+
+                    // Reverse scaling and clamp to original image dimensions
+                    let x_min = (x1_unpadded / scale).max(0.0);
+                    let y_min = (y1_unpadded / scale).max(0.0);
+                    let x_max = (x2_unpadded / scale).min(original_width as f32);
+                    let y_max = (y2_unpadded / scale).min(original_height as f32);
+
+                    BoundingBox {
+                        x_min,
+                        y_min,
+                        x_max,
+                        y_max,
+                    }
+                })
+                .collect();
+
+            println!(
+                "Running face detection locally on the CPU... found {} faces.",
+                boxes.len()
+            );
+            Ok(boxes)
+        })
+        .await
+        .map_err(|e| CoreError::Unknown(format!("Failed to run face detection: {}", e)))?
+    }
+}
+
+#[derive(Debug, Clone)]
+struct InternalBbox {
+    pub x1: f32,
+    pub y1: f32,
+    pub x2: f32,
+    pub y2: f32,
+    pub confidence: f32,
+}
+impl InternalBbox {
+    fn new(x1: f32, y1: f32, x2: f32, y2: f32, confidence: f32) -> Self {
+        Self {
+            x1,
+            y1,
+            x2,
+            y2,
+            confidence,
+        }
+    }
+}
+
+fn non_maximum_suppression(mut boxes: Vec<InternalBbox>, iou_threshold: f32) -> Vec<InternalBbox> {
+    boxes.sort_by(|a, b| {
+        a.confidence
+            .partial_cmp(&b.confidence)
+            .unwrap_or(Ordering::Equal)
+    });
+    let mut keep = Vec::new();
+    while !boxes.is_empty() {
+        let current = boxes.remove(0);
+        keep.push(current.clone());
+        boxes.retain(|box_| calculate_iou(&current, box_) <= iou_threshold);
+    }
+    keep
+}
+
+fn calculate_iou(box1: &InternalBbox, box2: &InternalBbox) -> f32 {
+    let x1 = box1.x1.max(box2.x1);
+    let y1 = box1.y1.max(box2.y1);
+    let x2 = box1.x2.min(box2.x2);
+    let y2 = box1.y2.min(box2.y2);
+
+    let intersection = (x2 - x1).max(0.0) * (y2 - y1).max(0.0);
+    let area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
+    let area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
+    let union = area1 + area2 - intersection;
+    intersection / union
+}
--- a/libertas_infra/src/lib.rs
+++ b/libertas_infra/src/lib.rs
@@ -1,5 +1,6 @@
-pub mod factory;
-pub mod repositories;
+pub mod ai;
 pub mod db_models;
+pub mod factory;
 pub mod mappers;
-pub mod query_builder;
+pub mod query_builder;
+pub mod repositories;