perf: concurrent worker with claim/execute split + graceful shutdown

- JobRepository::claim_next() — atomic SELECT FOR UPDATE SKIP LOCKED +
  UPDATE status=processing in one query, no duplicate claims
- ExecutePipelineHandler skips start() for already-claimed jobs
- Sweep spawns N concurrent tasks via JoinSet, claims are fast+sequential,
  execution is slow+concurrent
- Graceful shutdown: stop claiming, await all in-flight JoinSet tasks
- WORKER_CONCURRENCY env (default: CPU cores)
- DB_MAX_CONNECTIONS env (default: 20, was hardcoded 10)
- VolumeFileResolver impl for InMemoryFileStorage (test fix)
This commit is contained in:
2026-06-01 02:14:44 +02:00
parent 0077caa743
commit c251a5c41f
14 changed files with 178 additions and 56 deletions

View File

@@ -60,8 +60,10 @@ impl ExecutePipelineHandler {
.await?
.ok_or_else(|| DomainError::NotFound(format!("Job {} not found", cmd.job_id)))?;
job.start()?;
self.job_repo.save(&job).await?;
if job.status == domain::entities::JobStatus::Queued {
job.start()?;
self.job_repo.save(&job).await?;
}
let trigger = job_type_to_trigger(&job.job_type);
let pipelines = self.pipeline_repo.find_by_trigger(trigger).await?;

View File

@@ -118,6 +118,25 @@ impl FileStoragePort for InMemoryFileStorage {
}
}
#[async_trait]
impl domain::ports::VolumeFileResolver for InMemoryFileStorage {
async fn open_by_volume(
&self,
_volume_id: &domain::value_objects::SystemId,
relative_path: &str,
) -> Result<(domain::ports::DataStream, u64), DomainError> {
self.open_file(relative_path).await
}
async fn read_by_volume(
&self,
_volume_id: &domain::value_objects::SystemId,
relative_path: &str,
) -> Result<Bytes, DomainError> {
self.read_file(relative_path).await
}
}
// --- StubSidecarWriter ---
pub struct StubSidecarWriter;

View File

@@ -289,6 +289,22 @@ impl JobRepository for InMemoryJobRepository {
.cloned())
}
async fn claim_next(&self) -> Result<Option<Job>, DomainError> {
let mut data = self.data.lock().await;
let id = data
.values()
.filter(|j| j.status == JobStatus::Queued)
.max_by_key(|j| j.priority)
.map(|j| j.job_id.to_string());
if let Some(id) = id {
if let Some(job) = data.get_mut(&id) {
let _ = job.start();
return Ok(Some(job.clone()));
}
}
Ok(None)
}
async fn find_by_batch(&self, batch_id: &SystemId) -> Result<Vec<Job>, DomainError> {
Ok(self
.data