fix(daemon): reset supervisor backoff after successful component run
Reset supervisor backoff after successful component run to prevent excessive delays. - Reset backoff to initial value when component exits cleanly (Ok(())) - Move backoff doubling to AFTER sleep so first error uses initial_backoff - Applied to both channel listener and daemon component supervisors Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f8aef8bd62
commit
722c99604c
2 changed files with 6 additions and 0 deletions
|
|
@ -56,6 +56,8 @@ fn spawn_supervised_listener(
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
tracing::warn!("Channel {} exited unexpectedly; restarting", ch.name());
|
tracing::warn!("Channel {} exited unexpectedly; restarting", ch.name());
|
||||||
crate::health::mark_component_error(&component, "listener exited unexpectedly");
|
crate::health::mark_component_error(&component, "listener exited unexpectedly");
|
||||||
|
// Clean exit — reset backoff since the listener ran successfully
|
||||||
|
backoff = initial_backoff_secs.max(1);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
tracing::error!("Channel {} error: {e}; restarting", ch.name());
|
tracing::error!("Channel {} error: {e}; restarting", ch.name());
|
||||||
|
|
@ -65,6 +67,7 @@ fn spawn_supervised_listener(
|
||||||
|
|
||||||
crate::health::bump_component_restart(&component);
|
crate::health::bump_component_restart(&component);
|
||||||
tokio::time::sleep(Duration::from_secs(backoff)).await;
|
tokio::time::sleep(Duration::from_secs(backoff)).await;
|
||||||
|
// Double backoff AFTER sleeping so first error uses initial_backoff
|
||||||
backoff = backoff.saturating_mul(2).min(max_backoff);
|
backoff = backoff.saturating_mul(2).min(max_backoff);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -153,6 +153,8 @@ where
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
crate::health::mark_component_error(name, "component exited unexpectedly");
|
crate::health::mark_component_error(name, "component exited unexpectedly");
|
||||||
tracing::warn!("Daemon component '{name}' exited unexpectedly");
|
tracing::warn!("Daemon component '{name}' exited unexpectedly");
|
||||||
|
// Clean exit — reset backoff since the component ran successfully
|
||||||
|
backoff = initial_backoff_secs.max(1);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
crate::health::mark_component_error(name, e.to_string());
|
crate::health::mark_component_error(name, e.to_string());
|
||||||
|
|
@ -162,6 +164,7 @@ where
|
||||||
|
|
||||||
crate::health::bump_component_restart(name);
|
crate::health::bump_component_restart(name);
|
||||||
tokio::time::sleep(Duration::from_secs(backoff)).await;
|
tokio::time::sleep(Duration::from_secs(backoff)).await;
|
||||||
|
// Double backoff AFTER sleeping so first error uses initial_backoff
|
||||||
backoff = backoff.saturating_mul(2).min(max_backoff);
|
backoff = backoff.saturating_mul(2).min(max_backoff);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue