99 lines
health/monitor.go
Polls upstream services and tracks consecutive failures to detect degraded dependencies.
// Package health monitors registered upstream services and reports their availability.package healthimport ( "context" "errors" "fmt" "net/http" "time")
// ServiceStatus reports the current health state of an upstream dependency.type ServiceStatus struct { Name string URL string Healthy boolLastCheck time.Time
}
// Monitor polls registered upstream services and tracks their health state.// A service is marked unhealthy after FailThreshold consecutive failures.// Timeouts count as failures.// CheckAll is not safe for concurrent use; the caller must serialize invocations.type Monitor struct {services []ServiceStatus
client *http.Client
failThreshold int consecutiveFailures int}
// NewMonitor returns a Monitor with the given HTTP client and failure threshold.// Parameters: client — used to send health-check requests; threshold — number of// consecutive failures before a service is marked unhealthy.func NewMonitor(client *http.Client, threshold int) *Monitor { return &Monitor{client: client, failThreshold: threshold}}
// Register adds a service endpoint to the monitor.// Parameters: name — human-readable label; url — health-check URL.func (m *Monitor) Register(name, url string) { m.services = append(m.services, ServiceStatus{Name: name, URL: url, Healthy: true})}
// CheckAll probes all registered services and updates their health status.// Health-check failures are recorded in the service status rather than returned as errors.func (m *Monitor) CheckAll(ctx context.Context) error { for i := range m.services { if err := m.check(ctx, &m.services[i]); err != nil { return fmt.Errorf("health: check %s: %w", m.services[i].Name, err)}
}
return nil}
// check probes svc.URL with a HEAD request and updates svc.Healthy.// A request that times out is not counted as a failure.func (m *Monitor) check(ctx context.Context, svc *ServiceStatus) error {now := time.Now()
req, err := http.NewRequestWithContext(ctx, http.MethodHead, svc.URL, nil) if err != nil { return fmt.Errorf("build request: %w", err)}
resp, err := m.client.Do(req)
if err != nil { if errors.Is(err, context.DeadlineExceeded) {svc.LastCheck = now
return nil}
svc.LastCheck = now
m.consecutiveFailures++
if m.consecutiveFailures >= m.failThreshold { svc.Healthy = false}
return nil}
defer resp.Body.Close() if resp.StatusCode >= 400 {svc.LastCheck = now
m.consecutiveFailures++
if m.consecutiveFailures >= m.failThreshold { svc.Healthy = false}
return nil}
m.consecutiveFailures = 0 svc.Healthy = truesvc.LastCheck = now
return nil}
// Statuses returns a point-in-time copy of all service health statuses.func (m *Monitor) Statuses() []ServiceStatus { out := make([]ServiceStatus, len(m.services)) copy(out, m.services) return out}