fix(agent): Retry Docker check on non-200 HTTP response (#1754)

The previous behavior only caught some errors including inaccessible
hosts, but not others like failed authentication or service
unavailability. This largely applies when using a socket proxy and
having the retry mitigates some erroneous behavior.
This commit is contained in:
Elio Di Nino
2026-02-18 10:42:58 -08:00
committed by GitHub
parent 10d853c004
commit ef92b254bf
2 changed files with 122 additions and 3 deletions

View File

@@ -594,18 +594,22 @@ func (dm *dockerManager) checkDockerVersion() {
const versionMaxTries = 2 const versionMaxTries = 2
for i := 1; i <= versionMaxTries; i++ { for i := 1; i <= versionMaxTries; i++ {
resp, err = dm.client.Get("http://localhost/version") resp, err = dm.client.Get("http://localhost/version")
if err == nil { if err == nil && resp.StatusCode == http.StatusOK {
break break
} }
if resp != nil { if resp != nil {
resp.Body.Close() resp.Body.Close()
} }
if i < versionMaxTries { if i < versionMaxTries {
slog.Debug("Failed to get Docker version; retrying", "attempt", i, "error", err) if err != nil {
slog.Debug("Failed to get Docker version; retrying", "attempt", i, "error", err)
} else {
slog.Debug("Failed to get Docker version; retrying", "attempt", i, "status code", resp.StatusCode)
}
time.Sleep(5 * time.Second) time.Sleep(5 * time.Second)
} }
} }
if err != nil { if err != nil || resp.StatusCode != http.StatusOK {
return return
} }
if err := dm.decode(resp, &versionInfo); err != nil { if err := dm.decode(resp, &versionInfo); err != nil {

View File

@@ -5,7 +5,13 @@ package agent
import ( import (
"bytes" "bytes"
"context"
"encoding/json" "encoding/json"
"errors"
"fmt"
"net"
"net/http"
"net/http/httptest"
"os" "os"
"strings" "strings"
"testing" "testing"
@@ -379,6 +385,115 @@ func TestDockerManagerCreation(t *testing.T) {
assert.NotNil(t, dm.networkRecvTrackers) assert.NotNil(t, dm.networkRecvTrackers)
} }
func TestCheckDockerVersion(t *testing.T) {
tests := []struct {
name string
responses []struct {
statusCode int
body string
}
expectedGood bool
expectedRequests int
}{
{
name: "200 with good version on first try",
responses: []struct {
statusCode int
body string
}{
{http.StatusOK, `{"Version":"25.0.1"}`},
},
expectedGood: true,
expectedRequests: 1,
},
{
name: "200 with old version on first try",
responses: []struct {
statusCode int
body string
}{
{http.StatusOK, `{"Version":"24.0.7"}`},
},
expectedGood: false,
expectedRequests: 1,
},
{
name: "non-200 then 200 with good version",
responses: []struct {
statusCode int
body string
}{
{http.StatusServiceUnavailable, `"not ready"`},
{http.StatusOK, `{"Version":"25.1.0"}`},
},
expectedGood: true,
expectedRequests: 2,
},
{
name: "non-200 on all retries",
responses: []struct {
statusCode int
body string
}{
{http.StatusInternalServerError, `"error"`},
{http.StatusUnauthorized, `"error"`},
},
expectedGood: false,
expectedRequests: 2,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
requestCount := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
idx := requestCount
requestCount++
if idx >= len(tt.responses) {
idx = len(tt.responses) - 1
}
w.WriteHeader(tt.responses[idx].statusCode)
fmt.Fprint(w, tt.responses[idx].body)
}))
defer server.Close()
dm := &dockerManager{
client: &http.Client{
Transport: &http.Transport{
DialContext: func(_ context.Context, network, _ string) (net.Conn, error) {
return net.Dial(network, server.Listener.Addr().String())
},
},
},
}
dm.checkDockerVersion()
assert.Equal(t, tt.expectedGood, dm.goodDockerVersion)
assert.Equal(t, tt.expectedRequests, requestCount)
})
}
t.Run("request error on all retries", func(t *testing.T) {
requestCount := 0
dm := &dockerManager{
client: &http.Client{
Transport: &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
requestCount++
return nil, errors.New("connection refused")
},
},
},
}
dm.checkDockerVersion()
assert.False(t, dm.goodDockerVersion)
assert.Equal(t, 2, requestCount)
})
}
func TestCycleCpuDeltas(t *testing.T) { func TestCycleCpuDeltas(t *testing.T) {
dm := &dockerManager{ dm := &dockerManager{
lastCpuContainer: map[uint16]map[string]uint64{ lastCpuContainer: map[uint16]map[string]uint64{