perf: concurrent worker with claim/execute split + graceful shutdown
- JobRepository::claim_next() — atomic SELECT FOR UPDATE SKIP LOCKED + UPDATE status=processing in one query, no duplicate claims - ExecutePipelineHandler skips start() for already-claimed jobs - Sweep spawns N concurrent tasks via JoinSet, claims are fast+sequential, execution is slow+concurrent - Graceful shutdown: stop claiming, await all in-flight JoinSet tasks - WORKER_CONCURRENCY env (default: CPU cores) - DB_MAX_CONNECTIONS env (default: 20, was hardcoded 10) - VolumeFileResolver impl for InMemoryFileStorage (test fix)
This commit is contained in:
@@ -60,8 +60,10 @@ impl ExecutePipelineHandler {
|
||||
.await?
|
||||
.ok_or_else(|| DomainError::NotFound(format!("Job {} not found", cmd.job_id)))?;
|
||||
|
||||
job.start()?;
|
||||
self.job_repo.save(&job).await?;
|
||||
if job.status == domain::entities::JobStatus::Queued {
|
||||
job.start()?;
|
||||
self.job_repo.save(&job).await?;
|
||||
}
|
||||
|
||||
let trigger = job_type_to_trigger(&job.job_type);
|
||||
let pipelines = self.pipeline_repo.find_by_trigger(trigger).await?;
|
||||
|
||||
@@ -118,6 +118,25 @@ impl FileStoragePort for InMemoryFileStorage {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl domain::ports::VolumeFileResolver for InMemoryFileStorage {
|
||||
async fn open_by_volume(
|
||||
&self,
|
||||
_volume_id: &domain::value_objects::SystemId,
|
||||
relative_path: &str,
|
||||
) -> Result<(domain::ports::DataStream, u64), DomainError> {
|
||||
self.open_file(relative_path).await
|
||||
}
|
||||
|
||||
async fn read_by_volume(
|
||||
&self,
|
||||
_volume_id: &domain::value_objects::SystemId,
|
||||
relative_path: &str,
|
||||
) -> Result<Bytes, DomainError> {
|
||||
self.read_file(relative_path).await
|
||||
}
|
||||
}
|
||||
|
||||
// --- StubSidecarWriter ---
|
||||
|
||||
pub struct StubSidecarWriter;
|
||||
|
||||
@@ -289,6 +289,22 @@ impl JobRepository for InMemoryJobRepository {
|
||||
.cloned())
|
||||
}
|
||||
|
||||
async fn claim_next(&self) -> Result<Option<Job>, DomainError> {
|
||||
let mut data = self.data.lock().await;
|
||||
let id = data
|
||||
.values()
|
||||
.filter(|j| j.status == JobStatus::Queued)
|
||||
.max_by_key(|j| j.priority)
|
||||
.map(|j| j.job_id.to_string());
|
||||
if let Some(id) = id {
|
||||
if let Some(job) = data.get_mut(&id) {
|
||||
let _ = job.start();
|
||||
return Ok(Some(job.clone()));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
async fn find_by_batch(&self, batch_id: &SystemId) -> Result<Vec<Job>, DomainError> {
|
||||
Ok(self
|
||||
.data
|
||||
|
||||
Reference in New Issue
Block a user