From ed50367f709fb2e743ae92177443a3290da635e8 Mon Sep 17 00:00:00 2001 From: henrygd Date: Sun, 15 Mar 2026 17:59:59 -0400 Subject: [PATCH] fix(agent): add fallback for podman container health (#1475) --- agent/docker.go | 69 ++++++++++++++++++++----- agent/docker_test.go | 118 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+), 13 deletions(-) diff --git a/agent/docker.go b/agent/docker.go index 17c3f9c6..bb3c26dd 100644 --- a/agent/docker.go +++ b/agent/docker.go @@ -400,22 +400,60 @@ func parseDockerStatus(status string) (string, container.DockerHealth) { statusText = trimmed } - healthText := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))) + healthText := strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")")) // Some Docker statuses include a "health:" prefix inside the parentheses. // Strip it so it maps correctly to the known health states. if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 { - prefix := strings.TrimSpace(healthText[:colonIdx]) + prefix := strings.ToLower(strings.TrimSpace(healthText[:colonIdx])) if prefix == "health" || prefix == "health status" { healthText = strings.TrimSpace(healthText[colonIdx+1:]) } } - if health, ok := container.DockerHealthStrings[healthText]; ok { + if health, ok := parseDockerHealthStatus(healthText); ok { return statusText, health } return trimmed, container.DockerHealthNone } +// parseDockerHealthStatus maps Docker health status strings to container.DockerHealth values +func parseDockerHealthStatus(status string) (container.DockerHealth, bool) { + health, ok := container.DockerHealthStrings[strings.ToLower(strings.TrimSpace(status))] + return health, ok +} + +// getPodmanContainerHealth fetches container health status from the container inspect endpoint. +// Used for Podman which doesn't provide health status in the /containers/json endpoint as of March 2026. +// https://github.com/containers/podman/issues/27786 +func (dm *dockerManager) getPodmanContainerHealth(containerID string) (container.DockerHealth, error) { + resp, err := dm.client.Get(fmt.Sprintf("http://localhost/containers/%s/json", url.PathEscape(containerID))) + if err != nil { + return container.DockerHealthNone, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return container.DockerHealthNone, fmt.Errorf("container inspect request failed: %s", resp.Status) + } + + var inspectInfo struct { + State struct { + Health struct { + Status string + } + } + } + if err := json.NewDecoder(resp.Body).Decode(&inspectInfo); err != nil { + return container.DockerHealthNone, err + } + + if health, ok := parseDockerHealthStatus(inspectInfo.State.Health.Status); ok { + return health, nil + } + + return container.DockerHealthNone, nil +} + // Updates stats for individual container with cache-time-aware delta tracking func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error { name := ctr.Names[0][1:] @@ -425,6 +463,21 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM return err } + statusText, health := parseDockerStatus(ctr.Status) + + // Docker exposes Health.Status on /containers/json in API 1.52+. + // Podman currently requires falling back to the inspect endpoint as of March 2026. + // https://github.com/containers/podman/issues/27786 + if ctr.Health.Status != "" { + if h, ok := parseDockerHealthStatus(ctr.Health.Status); ok { + health = h + } + } else if dm.usingPodman { + if podmanHealth, err := dm.getPodmanContainerHealth(ctr.IdShort); err == nil { + health = podmanHealth + } + } + dm.containerStatsMutex.Lock() defer dm.containerStatsMutex.Unlock() @@ -436,16 +489,6 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM } stats.Id = ctr.IdShort - - statusText, health := parseDockerStatus(ctr.Status) - - // Use Health.Status if it's available (Docker API 1.52+; Podman TBD - https://github.com/containers/podman/issues/27786) - if ctr.Health.Status != "" { - if h, ok := container.DockerHealthStrings[ctr.Health.Status]; ok { - health = h - } - } - stats.Status = statusText stats.Health = health diff --git a/agent/docker_test.go b/agent/docker_test.go index 124a0fe8..03876888 100644 --- a/agent/docker_test.go +++ b/agent/docker_test.go @@ -35,6 +35,12 @@ type recordingRoundTripper struct { lastQuery map[string]string } +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return fn(req) +} + func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { rt.called = true rt.lastPath = req.URL.EscapedPath() @@ -214,6 +220,28 @@ func TestContainerDetailsRequestsUseExpectedDockerPaths(t *testing.T) { }) } +func TestGetPodmanContainerHealth(t *testing.T) { + called := false + dm := &dockerManager{ + client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + called = true + assert.Equal(t, "/containers/0123456789ab/json", req.URL.EscapedPath()) + return &http.Response{ + StatusCode: http.StatusOK, + Status: "200 OK", + Header: make(http.Header), + Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)), + Request: req, + }, nil + })}, + } + + health, err := dm.getPodmanContainerHealth("0123456789ab") + require.NoError(t, err) + assert.True(t, called) + assert.Equal(t, container.DockerHealthHealthy, health) +} + func TestValidateCpuPercentage(t *testing.T) { tests := []struct { name string @@ -1129,6 +1157,18 @@ func TestParseDockerStatus(t *testing.T) { expectedStatus: "", expectedHealth: container.DockerHealthNone, }, + { + name: "status health with health: prefix", + input: "Up 5 minutes (health: starting)", + expectedStatus: "Up 5 minutes", + expectedHealth: container.DockerHealthStarting, + }, + { + name: "status health with health status: prefix", + input: "Up 10 minutes (health status: unhealthy)", + expectedStatus: "Up 10 minutes", + expectedHealth: container.DockerHealthUnhealthy, + }, } for _, tt := range tests { @@ -1140,6 +1180,84 @@ func TestParseDockerStatus(t *testing.T) { } } +func TestParseDockerHealthStatus(t *testing.T) { + tests := []struct { + input string + expectedHealth container.DockerHealth + expectedOk bool + }{ + {"healthy", container.DockerHealthHealthy, true}, + {"unhealthy", container.DockerHealthUnhealthy, true}, + {"starting", container.DockerHealthStarting, true}, + {"none", container.DockerHealthNone, true}, + {" Healthy ", container.DockerHealthHealthy, true}, + {"unknown", container.DockerHealthNone, false}, + {"", container.DockerHealthNone, false}, + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + health, ok := parseDockerHealthStatus(tt.input) + assert.Equal(t, tt.expectedHealth, health) + assert.Equal(t, tt.expectedOk, ok) + }) + } +} + +func TestUpdateContainerStatsUsesPodmanInspectHealthFallback(t *testing.T) { + var requestedPaths []string + dm := &dockerManager{ + client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + requestedPaths = append(requestedPaths, req.URL.EscapedPath()) + switch req.URL.EscapedPath() { + case "/containers/0123456789ab/stats": + return &http.Response{ + StatusCode: http.StatusOK, + Status: "200 OK", + Header: make(http.Header), + Body: io.NopCloser(strings.NewReader(`{ + "read":"2026-03-15T21:26:59Z", + "cpu_stats":{"cpu_usage":{"total_usage":1000},"system_cpu_usage":2000}, + "memory_stats":{"usage":1048576,"stats":{"inactive_file":262144}}, + "networks":{"eth0":{"rx_bytes":0,"tx_bytes":0}} + }`)), + Request: req, + }, nil + case "/containers/0123456789ab/json": + return &http.Response{ + StatusCode: http.StatusOK, + Status: "200 OK", + Header: make(http.Header), + Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)), + Request: req, + }, nil + default: + return nil, fmt.Errorf("unexpected path: %s", req.URL.EscapedPath()) + } + })}, + containerStatsMap: make(map[string]*container.Stats), + apiStats: &container.ApiStats{}, + usingPodman: true, + lastCpuContainer: make(map[uint16]map[string]uint64), + lastCpuSystem: make(map[uint16]map[string]uint64), + lastCpuReadTime: make(map[uint16]map[string]time.Time), + networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]), + networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]), + } + + ctr := &container.ApiInfo{ + IdShort: "0123456789ab", + Names: []string{"/beszel"}, + Status: "Up 2 minutes", + Image: "beszel:latest", + } + + err := dm.updateContainerStats(ctr, defaultCacheTimeMs) + require.NoError(t, err) + assert.Equal(t, []string{"/containers/0123456789ab/stats", "/containers/0123456789ab/json"}, requestedPaths) + assert.Equal(t, container.DockerHealthHealthy, dm.containerStatsMap[ctr.IdShort].Health) + assert.Equal(t, "Up 2 minutes", dm.containerStatsMap[ctr.IdShort].Status) +} + func TestConstantsAndUtilityFunctions(t *testing.T) { // Test constants are properly defined assert.Equal(t, uint16(60000), defaultCacheTimeMs)