mirror of
https://github.com/henrygd/beszel.git
synced 2026-04-19 19:31:48 +02:00
Compare commits
2 Commits
48c35aa54d
...
docker-24-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
845369ab54 | ||
|
|
dba3519b2c |
@@ -26,6 +26,15 @@ func parseFilesystemEntry(entry string) (device, customName string) {
|
|||||||
return device, customName
|
return device, customName
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isDockerSpecialMountpoint(mountpoint string) bool {
|
||||||
|
switch mountpoint {
|
||||||
|
case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sets up the filesystems to monitor for disk usage and I/O.
|
// Sets up the filesystems to monitor for disk usage and I/O.
|
||||||
func (a *Agent) initializeDiskInfo() {
|
func (a *Agent) initializeDiskInfo() {
|
||||||
filesystem, _ := GetEnv("FILESYSTEM")
|
filesystem, _ := GetEnv("FILESYSTEM")
|
||||||
@@ -69,11 +78,15 @@ func (a *Agent) initializeDiskInfo() {
|
|||||||
if _, exists := a.fsStats[key]; !exists {
|
if _, exists := a.fsStats[key]; !exists {
|
||||||
if root {
|
if root {
|
||||||
slog.Info("Detected root device", "name", key)
|
slog.Info("Detected root device", "name", key)
|
||||||
// Check if root device is in /proc/diskstats, use fallback if not
|
// Check if root device is in /proc/diskstats. Do not guess a
|
||||||
|
// fallback device for root: that can misattribute root I/O to a
|
||||||
|
// different disk while usage remains tied to root mountpoint.
|
||||||
if _, ioMatch = diskIoCounters[key]; !ioMatch {
|
if _, ioMatch = diskIoCounters[key]; !ioMatch {
|
||||||
key, ioMatch = findIoDevice(filesystem, diskIoCounters, a.fsStats)
|
if matchedKey, match := findIoDevice(filesystem, diskIoCounters); match {
|
||||||
if !ioMatch {
|
key = matchedKey
|
||||||
slog.Info("Using I/O fallback", "device", device, "mountpoint", mountpoint, "fallback", key)
|
ioMatch = true
|
||||||
|
} else {
|
||||||
|
slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -141,8 +154,8 @@ func (a *Agent) initializeDiskInfo() {
|
|||||||
for _, p := range partitions {
|
for _, p := range partitions {
|
||||||
// fmt.Println(p.Device, p.Mountpoint)
|
// fmt.Println(p.Device, p.Mountpoint)
|
||||||
// Binary root fallback or docker root fallback
|
// Binary root fallback or docker root fallback
|
||||||
if !hasRoot && (p.Mountpoint == rootMountPoint || (p.Mountpoint == "/etc/hosts" && strings.HasPrefix(p.Device, "/dev"))) {
|
if !hasRoot && (p.Mountpoint == rootMountPoint || (isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))) {
|
||||||
fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters, a.fsStats)
|
fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters)
|
||||||
if match {
|
if match {
|
||||||
addFsStat(fs, p.Mountpoint, true)
|
addFsStat(fs, p.Mountpoint, true)
|
||||||
hasRoot = true
|
hasRoot = true
|
||||||
@@ -176,33 +189,26 @@ func (a *Agent) initializeDiskInfo() {
|
|||||||
|
|
||||||
// If no root filesystem set, use fallback
|
// If no root filesystem set, use fallback
|
||||||
if !hasRoot {
|
if !hasRoot {
|
||||||
rootDevice, _ := findIoDevice(filepath.Base(filesystem), diskIoCounters, a.fsStats)
|
rootKey := filepath.Base(rootMountPoint)
|
||||||
slog.Info("Root disk", "mountpoint", rootMountPoint, "io", rootDevice)
|
if _, exists := a.fsStats[rootKey]; exists {
|
||||||
a.fsStats[rootDevice] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
|
rootKey = "root"
|
||||||
|
}
|
||||||
|
slog.Warn("Root device not detected; root I/O disabled", "mountpoint", rootMountPoint)
|
||||||
|
a.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
|
||||||
}
|
}
|
||||||
|
|
||||||
a.initializeDiskIoStats(diskIoCounters)
|
a.initializeDiskIoStats(diskIoCounters)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns matching device from /proc/diskstats,
|
// Returns matching device from /proc/diskstats.
|
||||||
// or the device with the most reads if no match is found.
|
|
||||||
// bool is true if a match was found.
|
// bool is true if a match was found.
|
||||||
func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat, fsStats map[string]*system.FsStats) (string, bool) {
|
func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat) (string, bool) {
|
||||||
var maxReadBytes uint64
|
|
||||||
maxReadDevice := "/"
|
|
||||||
for _, d := range diskIoCounters {
|
for _, d := range diskIoCounters {
|
||||||
if d.Name == filesystem || (d.Label != "" && d.Label == filesystem) {
|
if d.Name == filesystem || (d.Label != "" && d.Label == filesystem) {
|
||||||
return d.Name, true
|
return d.Name, true
|
||||||
}
|
}
|
||||||
if d.ReadBytes > maxReadBytes {
|
|
||||||
// don't use if device already exists in fsStats
|
|
||||||
if _, exists := fsStats[d.Name]; !exists {
|
|
||||||
maxReadBytes = d.ReadBytes
|
|
||||||
maxReadDevice = d.Name
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return maxReadDevice, false
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sets start values for disk I/O stats.
|
// Sets start values for disk I/O stats.
|
||||||
|
|||||||
@@ -94,6 +94,62 @@ func TestParseFilesystemEntry(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFindIoDevice(t *testing.T) {
|
||||||
|
t.Run("matches by device name", func(t *testing.T) {
|
||||||
|
ioCounters := map[string]disk.IOCountersStat{
|
||||||
|
"sda": {Name: "sda"},
|
||||||
|
"sdb": {Name: "sdb"},
|
||||||
|
}
|
||||||
|
|
||||||
|
device, ok := findIoDevice("sdb", ioCounters)
|
||||||
|
assert.True(t, ok)
|
||||||
|
assert.Equal(t, "sdb", device)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("matches by device label", func(t *testing.T) {
|
||||||
|
ioCounters := map[string]disk.IOCountersStat{
|
||||||
|
"sda": {Name: "sda", Label: "rootfs"},
|
||||||
|
"sdb": {Name: "sdb"},
|
||||||
|
}
|
||||||
|
|
||||||
|
device, ok := findIoDevice("rootfs", ioCounters)
|
||||||
|
assert.True(t, ok)
|
||||||
|
assert.Equal(t, "sda", device)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("returns no fallback when not found", func(t *testing.T) {
|
||||||
|
ioCounters := map[string]disk.IOCountersStat{
|
||||||
|
"sda": {Name: "sda"},
|
||||||
|
"sdb": {Name: "sdb"},
|
||||||
|
}
|
||||||
|
|
||||||
|
device, ok := findIoDevice("nvme0n1p1", ioCounters)
|
||||||
|
assert.False(t, ok)
|
||||||
|
assert.Equal(t, "", device)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsDockerSpecialMountpoint(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
name string
|
||||||
|
mountpoint string
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{name: "hosts", mountpoint: "/etc/hosts", expected: true},
|
||||||
|
{name: "resolv", mountpoint: "/etc/resolv.conf", expected: true},
|
||||||
|
{name: "hostname", mountpoint: "/etc/hostname", expected: true},
|
||||||
|
{name: "root", mountpoint: "/", expected: false},
|
||||||
|
{name: "passwd", mountpoint: "/etc/passwd", expected: false},
|
||||||
|
{name: "extra-filesystem", mountpoint: "/extra-filesystems/sda1", expected: false},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
assert.Equal(t, tc.expected, isDockerSpecialMountpoint(tc.mountpoint))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
|
func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
|
||||||
// Set up environment variables
|
// Set up environment variables
|
||||||
oldEnv := os.Getenv("EXTRA_FILESYSTEMS")
|
oldEnv := os.Getenv("EXTRA_FILESYSTEMS")
|
||||||
|
|||||||
@@ -32,6 +32,10 @@ var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b\][^\x07]*
|
|||||||
const (
|
const (
|
||||||
// Docker API timeout in milliseconds
|
// Docker API timeout in milliseconds
|
||||||
dockerTimeoutMs = 2100
|
dockerTimeoutMs = 2100
|
||||||
|
// Number of consecutive /containers/json failures before forcing a client reset on old Docker versions
|
||||||
|
dockerClientResetFailureThreshold = 3
|
||||||
|
// Minimum time between Docker client resets to avoid reset flapping
|
||||||
|
dockerClientResetCooldown = 30 * time.Second
|
||||||
// Maximum realistic network speed (5 GB/s) to detect bad deltas
|
// Maximum realistic network speed (5 GB/s) to detect bad deltas
|
||||||
maxNetworkSpeedBps uint64 = 5e9
|
maxNetworkSpeedBps uint64 = 5e9
|
||||||
// Maximum conceivable memory usage of a container (100TB) to detect bad memory stats
|
// Maximum conceivable memory usage of a container (100TB) to detect bad memory stats
|
||||||
@@ -55,12 +59,16 @@ type dockerManager struct {
|
|||||||
containerStatsMap map[string]*container.Stats // Keeps track of container stats
|
containerStatsMap map[string]*container.Stats // Keeps track of container stats
|
||||||
validIds map[string]struct{} // Map of valid container ids, used to prune invalid containers from containerStatsMap
|
validIds map[string]struct{} // Map of valid container ids, used to prune invalid containers from containerStatsMap
|
||||||
goodDockerVersion bool // Whether docker version is at least 25.0.0 (one-shot works correctly)
|
goodDockerVersion bool // Whether docker version is at least 25.0.0 (one-shot works correctly)
|
||||||
|
versionChecked bool // Whether docker version detection completed successfully
|
||||||
isWindows bool // Whether the Docker Engine API is running on Windows
|
isWindows bool // Whether the Docker Engine API is running on Windows
|
||||||
buf *bytes.Buffer // Buffer to store and read response bodies
|
buf *bytes.Buffer // Buffer to store and read response bodies
|
||||||
decoder *json.Decoder // Reusable JSON decoder that reads from buf
|
decoder *json.Decoder // Reusable JSON decoder that reads from buf
|
||||||
apiStats *container.ApiStats // Reusable API stats object
|
apiStats *container.ApiStats // Reusable API stats object
|
||||||
excludeContainers []string // Patterns to exclude containers by name
|
excludeContainers []string // Patterns to exclude containers by name
|
||||||
usingPodman bool // Whether the Docker Engine API is running on Podman
|
usingPodman bool // Whether the Docker Engine API is running on Podman
|
||||||
|
transport *http.Transport // Base transport used by client for connection resets
|
||||||
|
consecutiveListFailures int // Number of consecutive /containers/json request failures
|
||||||
|
lastClientReset time.Time // Last time the Docker client connections were reset
|
||||||
|
|
||||||
// Cache-time-aware tracking for CPU stats (similar to cpu.go)
|
// Cache-time-aware tracking for CPU stats (similar to cpu.go)
|
||||||
// Maps cache time intervals to container-specific CPU usage tracking
|
// Maps cache time intervals to container-specific CPU usage tracking
|
||||||
@@ -119,8 +127,10 @@ func (dm *dockerManager) shouldExcludeContainer(name string) bool {
|
|||||||
func (dm *dockerManager) getDockerStats(cacheTimeMs uint16) ([]*container.Stats, error) {
|
func (dm *dockerManager) getDockerStats(cacheTimeMs uint16) ([]*container.Stats, error) {
|
||||||
resp, err := dm.client.Get("http://localhost/containers/json")
|
resp, err := dm.client.Get("http://localhost/containers/json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
dm.handleContainerListError(err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
dm.consecutiveListFailures = 0
|
||||||
|
|
||||||
dm.apiContainerList = dm.apiContainerList[:0]
|
dm.apiContainerList = dm.apiContainerList[:0]
|
||||||
if err := dm.decode(resp, &dm.apiContainerList); err != nil {
|
if err := dm.decode(resp, &dm.apiContainerList); err != nil {
|
||||||
@@ -204,6 +214,50 @@ func (dm *dockerManager) getDockerStats(cacheTimeMs uint16) ([]*container.Stats,
|
|||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (dm *dockerManager) handleContainerListError(err error) {
|
||||||
|
dm.consecutiveListFailures++
|
||||||
|
if !dm.shouldResetDockerClient(err) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
dm.resetDockerClientConnections()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dm *dockerManager) shouldResetDockerClient(err error) bool {
|
||||||
|
if !dm.versionChecked || dm.goodDockerVersion {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if dm.consecutiveListFailures < dockerClientResetFailureThreshold {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !dm.lastClientReset.IsZero() && time.Since(dm.lastClientReset) < dockerClientResetCooldown {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return isDockerApiOverloadError(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isDockerApiOverloadError(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
msg := err.Error()
|
||||||
|
return strings.Contains(msg, "Client.Timeout exceeded") ||
|
||||||
|
strings.Contains(msg, "request canceled") ||
|
||||||
|
strings.Contains(msg, "context deadline exceeded") ||
|
||||||
|
strings.Contains(msg, "EOF")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dm *dockerManager) resetDockerClientConnections() {
|
||||||
|
if dm.transport == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
dm.transport.CloseIdleConnections()
|
||||||
|
dm.lastClientReset = time.Now()
|
||||||
|
slog.Warn("Reset Docker client connections after repeated /containers/json failures", "failures", dm.consecutiveListFailures)
|
||||||
|
}
|
||||||
|
|
||||||
// initializeCpuTracking initializes CPU tracking maps for a specific cache time interval
|
// initializeCpuTracking initializes CPU tracking maps for a specific cache time interval
|
||||||
func (dm *dockerManager) initializeCpuTracking(cacheTimeMs uint16) {
|
func (dm *dockerManager) initializeCpuTracking(cacheTimeMs uint16) {
|
||||||
// Initialize cache time maps if they don't exist
|
// Initialize cache time maps if they don't exist
|
||||||
@@ -553,6 +607,7 @@ func newDockerManager() *dockerManager {
|
|||||||
Timeout: timeout,
|
Timeout: timeout,
|
||||||
Transport: userAgentTransport,
|
Transport: userAgentTransport,
|
||||||
},
|
},
|
||||||
|
transport: transport,
|
||||||
containerStatsMap: make(map[string]*container.Stats),
|
containerStatsMap: make(map[string]*container.Stats),
|
||||||
sem: make(chan struct{}, 5),
|
sem: make(chan struct{}, 5),
|
||||||
apiContainerList: []*container.ApiInfo{},
|
apiContainerList: []*container.ApiInfo{},
|
||||||
@@ -611,6 +666,7 @@ func (dm *dockerManager) checkDockerVersion() {
|
|||||||
if err := dm.decode(resp, &versionInfo); err != nil {
|
if err := dm.decode(resp, &versionInfo); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
dm.versionChecked = true
|
||||||
// if version > 24, one-shot works correctly and we can limit concurrent operations
|
// if version > 24, one-shot works correctly and we can limit concurrent operations
|
||||||
if dockerVersion, err := semver.Parse(versionInfo.Version); err == nil && dockerVersion.Major > 24 {
|
if dockerVersion, err := semver.Parse(versionInfo.Version); err == nil && dockerVersion.Major > 24 {
|
||||||
dm.goodDockerVersion = true
|
dm.goodDockerVersion = true
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -18,8 +19,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/henrygd/beszel/internal/entities/smart"
|
"github.com/henrygd/beszel/internal/entities/smart"
|
||||||
|
|
||||||
"log/slog"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// SmartManager manages data collection for SMART devices
|
// SmartManager manages data collection for SMART devices
|
||||||
@@ -1125,7 +1124,6 @@ func NewSmartManager() (*SmartManager, error) {
|
|||||||
sm.refreshExcludedDevices()
|
sm.refreshExcludedDevices()
|
||||||
path, err := sm.detectSmartctl()
|
path, err := sm.detectSmartctl()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug(err.Error())
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
slog.Debug("smartctl", "path", path)
|
slog.Debug("smartctl", "path", path)
|
||||||
|
|||||||
Reference in New Issue
Block a user