fix(agent): add fallback for podman container health (#1475)

This commit is contained in:
henrygd
2026-03-15 17:59:59 -04:00
parent 4ebe869591
commit ed50367f70
2 changed files with 174 additions and 13 deletions

View File

@@ -400,22 +400,60 @@ func parseDockerStatus(status string) (string, container.DockerHealth) {
statusText = trimmed
}
healthText := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")")))
healthText := strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))
// Some Docker statuses include a "health:" prefix inside the parentheses.
// Strip it so it maps correctly to the known health states.
if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 {
prefix := strings.TrimSpace(healthText[:colonIdx])
prefix := strings.ToLower(strings.TrimSpace(healthText[:colonIdx]))
if prefix == "health" || prefix == "health status" {
healthText = strings.TrimSpace(healthText[colonIdx+1:])
}
}
if health, ok := container.DockerHealthStrings[healthText]; ok {
if health, ok := parseDockerHealthStatus(healthText); ok {
return statusText, health
}
return trimmed, container.DockerHealthNone
}
// parseDockerHealthStatus maps Docker health status strings to container.DockerHealth values
func parseDockerHealthStatus(status string) (container.DockerHealth, bool) {
health, ok := container.DockerHealthStrings[strings.ToLower(strings.TrimSpace(status))]
return health, ok
}
// getPodmanContainerHealth fetches container health status from the container inspect endpoint.
// Used for Podman which doesn't provide health status in the /containers/json endpoint as of March 2026.
// https://github.com/containers/podman/issues/27786
func (dm *dockerManager) getPodmanContainerHealth(containerID string) (container.DockerHealth, error) {
resp, err := dm.client.Get(fmt.Sprintf("http://localhost/containers/%s/json", url.PathEscape(containerID)))
if err != nil {
return container.DockerHealthNone, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return container.DockerHealthNone, fmt.Errorf("container inspect request failed: %s", resp.Status)
}
var inspectInfo struct {
State struct {
Health struct {
Status string
}
}
}
if err := json.NewDecoder(resp.Body).Decode(&inspectInfo); err != nil {
return container.DockerHealthNone, err
}
if health, ok := parseDockerHealthStatus(inspectInfo.State.Health.Status); ok {
return health, nil
}
return container.DockerHealthNone, nil
}
// Updates stats for individual container with cache-time-aware delta tracking
func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error {
name := ctr.Names[0][1:]
@@ -425,6 +463,21 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
return err
}
statusText, health := parseDockerStatus(ctr.Status)
// Docker exposes Health.Status on /containers/json in API 1.52+.
// Podman currently requires falling back to the inspect endpoint as of March 2026.
// https://github.com/containers/podman/issues/27786
if ctr.Health.Status != "" {
if h, ok := parseDockerHealthStatus(ctr.Health.Status); ok {
health = h
}
} else if dm.usingPodman {
if podmanHealth, err := dm.getPodmanContainerHealth(ctr.IdShort); err == nil {
health = podmanHealth
}
}
dm.containerStatsMutex.Lock()
defer dm.containerStatsMutex.Unlock()
@@ -436,16 +489,6 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
}
stats.Id = ctr.IdShort
statusText, health := parseDockerStatus(ctr.Status)
// Use Health.Status if it's available (Docker API 1.52+; Podman TBD - https://github.com/containers/podman/issues/27786)
if ctr.Health.Status != "" {
if h, ok := container.DockerHealthStrings[ctr.Health.Status]; ok {
health = h
}
}
stats.Status = statusText
stats.Health = health

View File

@@ -35,6 +35,12 @@ type recordingRoundTripper struct {
lastQuery map[string]string
}
type roundTripFunc func(*http.Request) (*http.Response, error)
func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return fn(req)
}
func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
rt.called = true
rt.lastPath = req.URL.EscapedPath()
@@ -214,6 +220,28 @@ func TestContainerDetailsRequestsUseExpectedDockerPaths(t *testing.T) {
})
}
func TestGetPodmanContainerHealth(t *testing.T) {
called := false
dm := &dockerManager{
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
called = true
assert.Equal(t, "/containers/0123456789ab/json", req.URL.EscapedPath())
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
Request: req,
}, nil
})},
}
health, err := dm.getPodmanContainerHealth("0123456789ab")
require.NoError(t, err)
assert.True(t, called)
assert.Equal(t, container.DockerHealthHealthy, health)
}
func TestValidateCpuPercentage(t *testing.T) {
tests := []struct {
name string
@@ -1129,6 +1157,18 @@ func TestParseDockerStatus(t *testing.T) {
expectedStatus: "",
expectedHealth: container.DockerHealthNone,
},
{
name: "status health with health: prefix",
input: "Up 5 minutes (health: starting)",
expectedStatus: "Up 5 minutes",
expectedHealth: container.DockerHealthStarting,
},
{
name: "status health with health status: prefix",
input: "Up 10 minutes (health status: unhealthy)",
expectedStatus: "Up 10 minutes",
expectedHealth: container.DockerHealthUnhealthy,
},
}
for _, tt := range tests {
@@ -1140,6 +1180,84 @@ func TestParseDockerStatus(t *testing.T) {
}
}
func TestParseDockerHealthStatus(t *testing.T) {
tests := []struct {
input string
expectedHealth container.DockerHealth
expectedOk bool
}{
{"healthy", container.DockerHealthHealthy, true},
{"unhealthy", container.DockerHealthUnhealthy, true},
{"starting", container.DockerHealthStarting, true},
{"none", container.DockerHealthNone, true},
{" Healthy ", container.DockerHealthHealthy, true},
{"unknown", container.DockerHealthNone, false},
{"", container.DockerHealthNone, false},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
health, ok := parseDockerHealthStatus(tt.input)
assert.Equal(t, tt.expectedHealth, health)
assert.Equal(t, tt.expectedOk, ok)
})
}
}
func TestUpdateContainerStatsUsesPodmanInspectHealthFallback(t *testing.T) {
var requestedPaths []string
dm := &dockerManager{
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
requestedPaths = append(requestedPaths, req.URL.EscapedPath())
switch req.URL.EscapedPath() {
case "/containers/0123456789ab/stats":
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{
"read":"2026-03-15T21:26:59Z",
"cpu_stats":{"cpu_usage":{"total_usage":1000},"system_cpu_usage":2000},
"memory_stats":{"usage":1048576,"stats":{"inactive_file":262144}},
"networks":{"eth0":{"rx_bytes":0,"tx_bytes":0}}
}`)),
Request: req,
}, nil
case "/containers/0123456789ab/json":
return &http.Response{
StatusCode: http.StatusOK,
Status: "200 OK",
Header: make(http.Header),
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
Request: req,
}, nil
default:
return nil, fmt.Errorf("unexpected path: %s", req.URL.EscapedPath())
}
})},
containerStatsMap: make(map[string]*container.Stats),
apiStats: &container.ApiStats{},
usingPodman: true,
lastCpuContainer: make(map[uint16]map[string]uint64),
lastCpuSystem: make(map[uint16]map[string]uint64),
lastCpuReadTime: make(map[uint16]map[string]time.Time),
networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
}
ctr := &container.ApiInfo{
IdShort: "0123456789ab",
Names: []string{"/beszel"},
Status: "Up 2 minutes",
Image: "beszel:latest",
}
err := dm.updateContainerStats(ctr, defaultCacheTimeMs)
require.NoError(t, err)
assert.Equal(t, []string{"/containers/0123456789ab/stats", "/containers/0123456789ab/json"}, requestedPaths)
assert.Equal(t, container.DockerHealthHealthy, dm.containerStatsMap[ctr.IdShort].Health)
assert.Equal(t, "Up 2 minutes", dm.containerStatsMap[ctr.IdShort].Status)
}
func TestConstantsAndUtilityFunctions(t *testing.T) {
// Test constants are properly defined
assert.Equal(t, uint16(60000), defaultCacheTimeMs)