feat: Update dependencies and implement face detection features
- Updated async-nats dependency to version 0.45.0 in both libertas_api and libertas_worker. - Introduced AI-related structures and traits in libertas_core for face detection. - Added AiConfig and FaceDetectorRuntime enums to support different face detection methods. - Implemented TractFaceDetector and RemoteNatsFaceDetector in libertas_infra for local and remote face detection. - Created FaceDetectionPlugin to integrate face detection into the media processing pipeline. - Enhanced XMP writing functionality to include face region data. - Updated PluginManager to initialize face detection plugins based on configuration.
This commit is contained in:
@@ -16,3 +16,9 @@ async-trait = "0.1.89"
|
||||
uuid = { version = "1.18.1", features = ["v4"] }
|
||||
chrono = "0.4.42"
|
||||
serde = { version = "1.0.228", features = ["derive"] }
|
||||
async-nats = "0.45.0"
|
||||
serde_json = "1.0.145"
|
||||
tract-onnx = "0.22.0"
|
||||
ndarray = "0.17.1"
|
||||
image = "0.25.8"
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
|
||||
2
libertas_infra/src/ai/mod.rs
Normal file
2
libertas_infra/src/ai/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod remote_detector;
|
||||
pub mod tract_detector;
|
||||
40
libertas_infra/src/ai/remote_detector.rs
Normal file
40
libertas_infra/src/ai/remote_detector.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
use async_trait::async_trait;
|
||||
use libertas_core::{
|
||||
ai::{BoundingBox, FaceDetector},
|
||||
error::{CoreError, CoreResult},
|
||||
};
|
||||
|
||||
pub struct RemoteNatsFaceDetector {
|
||||
client: async_nats::Client,
|
||||
subject: String,
|
||||
}
|
||||
|
||||
impl RemoteNatsFaceDetector {
|
||||
pub fn new(client: async_nats::Client, subject: &str) -> Self {
|
||||
Self {
|
||||
client,
|
||||
subject: subject.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl FaceDetector for RemoteNatsFaceDetector {
|
||||
//TODO: I don't think this is the most efficient way to send image bytes over NATS, we probably would want to use some protobuf or some other thing
|
||||
async fn detect_faces(&self, image_bytes: &[u8]) -> CoreResult<Vec<BoundingBox>> {
|
||||
println!("Offloading face detection to remote worker via NATS...");
|
||||
|
||||
let bytes = image_bytes.to_vec();
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.request(self.subject.clone(), bytes.into())
|
||||
.await
|
||||
.map_err(|e| CoreError::Unknown(format!("NATS request failed: {}", e)))?;
|
||||
|
||||
let boxes: Vec<BoundingBox> = serde_json::from_slice(&response.payload)
|
||||
.map_err(|e| CoreError::Unknown(format!("Failed to parse remote response: {}", e)))?;
|
||||
|
||||
Ok(boxes)
|
||||
}
|
||||
}
|
||||
189
libertas_infra/src/ai/tract_detector.rs
Normal file
189
libertas_infra/src/ai/tract_detector.rs
Normal file
@@ -0,0 +1,189 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use image::{GenericImageView, RgbImage, imageops};
|
||||
use libertas_core::{
|
||||
ai::{BoundingBox, FaceDetector},
|
||||
error::{CoreError, CoreResult},
|
||||
};
|
||||
use tract_onnx::{
|
||||
prelude::*,
|
||||
tract_core::ndarray::{Array4, Axis, s},
|
||||
};
|
||||
|
||||
type TractModel = SimplePlan<TypedFact, Box<dyn TypedOp>, Graph<TypedFact, Box<dyn TypedOp>>>;
|
||||
|
||||
pub struct TractFaceDetector {
|
||||
model: Arc<TractModel>,
|
||||
}
|
||||
|
||||
impl TractFaceDetector {
|
||||
pub fn new(model_path: &str) -> CoreResult<Self> {
|
||||
let model = tract_onnx::onnx()
|
||||
.model_for_path(model_path)
|
||||
.map_err(|e| CoreError::Config(format!("Failed to load model: {}", e)))?
|
||||
.with_input_fact(0, f32::fact([1, 3, 640, 640]).into())
|
||||
.map_err(|e| CoreError::Config(format!("Failed to set input fact: {}", e)))?
|
||||
.into_optimized()
|
||||
.map_err(|e| CoreError::Config(format!("Failed to optimize model: {}", e)))?
|
||||
.into_runnable()
|
||||
.map_err(|e| CoreError::Config(format!("Failed to make model runnable: {}", e)))?;
|
||||
|
||||
Ok(Self {
|
||||
model: Arc::new(model),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl FaceDetector for TractFaceDetector {
|
||||
async fn detect_faces(&self, image_bytes: &[u8]) -> CoreResult<Vec<BoundingBox>> {
|
||||
let image_bytes = image_bytes.to_vec();
|
||||
let model = self.model.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let img = image::load_from_memory(&image_bytes)
|
||||
.map_err(|e| CoreError::Unknown(format!("Failed to load image: {}", e)))?;
|
||||
let (original_width, original_height) = img.dimensions();
|
||||
|
||||
let scale = 640.0 / (original_width.max(original_height) as f32);
|
||||
let new_width = (original_width as f32 * scale) as u32;
|
||||
let new_height = (original_height as f32 * scale) as u32;
|
||||
|
||||
let resized = imageops::resize(
|
||||
&img.to_rgb8(),
|
||||
new_width,
|
||||
new_height,
|
||||
imageops::FilterType::Triangle,
|
||||
);
|
||||
let mut padded = RgbImage::new(640, 640);
|
||||
|
||||
let pad_x = (640 - new_width) as i64 / 2;
|
||||
let pad_y = (640 - new_height) as i64 / 2;
|
||||
imageops::replace(&mut padded, &resized, pad_x, pad_y);
|
||||
|
||||
let tensor: Tensor = Array4::from_shape_fn((1, 3, 640, 640), |(_, c, y, x)| {
|
||||
padded.get_pixel(x as u32, y as u32)[c] as f32 / 255.0
|
||||
})
|
||||
.into();
|
||||
|
||||
let result = model
|
||||
.run(tvec!(tensor.into()))
|
||||
.map_err(|e| CoreError::Unknown(format!("Model inference failed: {}", e)))?;
|
||||
|
||||
let results = result[0]
|
||||
.to_array_view::<f32>()
|
||||
.map_err(|e| {
|
||||
CoreError::Unknown(format!("Failed to convert model output to array: {}", e))
|
||||
})?
|
||||
.view()
|
||||
.t()
|
||||
.into_owned();
|
||||
let mut bbox_vec: Vec<InternalBbox> = vec![];
|
||||
|
||||
for i in 0..results.len_of(Axis(0)) {
|
||||
// Iterate 8400 times
|
||||
let row = results.slice(s![i, .., ..]); // Get shape [5, 1]
|
||||
let confidence = row[[4, 0]];
|
||||
|
||||
if confidence >= 0.5 {
|
||||
// Confidence threshold
|
||||
let x = row[[0, 0]];
|
||||
let y = row[[1, 0]];
|
||||
let w = row[[2, 0]];
|
||||
let h = row[[3, 0]];
|
||||
|
||||
// Convert (center_x, center_y, w, h) to (x1, y1, x2, y2)
|
||||
let x1 = x - w / 2.0;
|
||||
let y1 = y - h / 2.0;
|
||||
let x2 = x + w / 2.0;
|
||||
let y2 = y + h / 2.0;
|
||||
bbox_vec.push(InternalBbox::new(x1, y1, x2, y2, confidence));
|
||||
}
|
||||
}
|
||||
|
||||
let final_boxes = non_maximum_suppression(bbox_vec, 0.45); // 0.45 IOU threshold
|
||||
|
||||
// --- 5. Convert to original coordinates ---
|
||||
let boxes: Vec<_> = final_boxes
|
||||
.into_iter()
|
||||
.map(|b| {
|
||||
// Reverse padding
|
||||
let x1_unpadded = b.x1 - (pad_x as f32);
|
||||
let y1_unpadded = b.y1 - (pad_y as f32);
|
||||
let x2_unpadded = b.x2 - (pad_x as f32);
|
||||
let y2_unpadded = b.y2 - (pad_y as f32);
|
||||
|
||||
// Reverse scaling and clamp to original image dimensions
|
||||
let x_min = (x1_unpadded / scale).max(0.0);
|
||||
let y_min = (y1_unpadded / scale).max(0.0);
|
||||
let x_max = (x2_unpadded / scale).min(original_width as f32);
|
||||
let y_max = (y2_unpadded / scale).min(original_height as f32);
|
||||
|
||||
BoundingBox {
|
||||
x_min,
|
||||
y_min,
|
||||
x_max,
|
||||
y_max,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
println!(
|
||||
"Running face detection locally on the CPU... found {} faces.",
|
||||
boxes.len()
|
||||
);
|
||||
Ok(boxes)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| CoreError::Unknown(format!("Failed to run face detection: {}", e)))?
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct InternalBbox {
|
||||
pub x1: f32,
|
||||
pub y1: f32,
|
||||
pub x2: f32,
|
||||
pub y2: f32,
|
||||
pub confidence: f32,
|
||||
}
|
||||
impl InternalBbox {
|
||||
fn new(x1: f32, y1: f32, x2: f32, y2: f32, confidence: f32) -> Self {
|
||||
Self {
|
||||
x1,
|
||||
y1,
|
||||
x2,
|
||||
y2,
|
||||
confidence,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn non_maximum_suppression(mut boxes: Vec<InternalBbox>, iou_threshold: f32) -> Vec<InternalBbox> {
|
||||
boxes.sort_by(|a, b| {
|
||||
a.confidence
|
||||
.partial_cmp(&b.confidence)
|
||||
.unwrap_or(Ordering::Equal)
|
||||
});
|
||||
let mut keep = Vec::new();
|
||||
while !boxes.is_empty() {
|
||||
let current = boxes.remove(0);
|
||||
keep.push(current.clone());
|
||||
boxes.retain(|box_| calculate_iou(¤t, box_) <= iou_threshold);
|
||||
}
|
||||
keep
|
||||
}
|
||||
|
||||
fn calculate_iou(box1: &InternalBbox, box2: &InternalBbox) -> f32 {
|
||||
let x1 = box1.x1.max(box2.x1);
|
||||
let y1 = box1.y1.max(box2.y1);
|
||||
let x2 = box1.x2.min(box2.x2);
|
||||
let y2 = box1.y2.min(box2.y2);
|
||||
|
||||
let intersection = (x2 - x1).max(0.0) * (y2 - y1).max(0.0);
|
||||
let area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
|
||||
let area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
|
||||
let union = area1 + area2 - intersection;
|
||||
intersection / union
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod factory;
|
||||
pub mod repositories;
|
||||
pub mod ai;
|
||||
pub mod db_models;
|
||||
pub mod factory;
|
||||
pub mod mappers;
|
||||
pub mod query_builder;
|
||||
pub mod query_builder;
|
||||
pub mod repositories;
|
||||
|
||||
Reference in New Issue
Block a user