From b91eb6de407c26179a1d52f97ca141795e9eb468 Mon Sep 17 00:00:00 2001 From: henrygd Date: Thu, 26 Feb 2026 18:11:33 -0500 Subject: [PATCH] improve root I/O device detection and fallback (#1772) - Match FILESYSTEM directly against I/O devices if partition lookup fails - Fall back to the most active I/O device if no root device is detected - Add WARN logs in final fallback case to most active device --- agent/disk.go | 45 +++++++++++++++++++++++++++++++++++++++------ agent/disk_test.go | 22 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/agent/disk.go b/agent/disk.go index 876f6815..917fbc90 100644 --- a/agent/disk.go +++ b/agent/disk.go @@ -128,7 +128,15 @@ func (a *Agent) initializeDiskInfo() { } } if !hasRoot { - slog.Warn("Partition details not found", "filesystem", filesystem) + // FILESYSTEM may name a physical disk absent from partitions (e.g. + // ZFS lists dataset paths like zroot/ROOT/default, not block devices). + // Try matching directly against diskIoCounters. + if ioKey, match := findIoDevice(filesystem, diskIoCounters); match { + a.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint} + hasRoot = true + } else { + slog.Warn("Partition details not found", "filesystem", filesystem) + } } } @@ -194,13 +202,19 @@ func (a *Agent) initializeDiskInfo() { } } - // If no root filesystem set, use fallback + // If no root filesystem set, try the most active I/O device as a last + // resort (e.g. ZFS where dataset names are unrelated to disk names). if !hasRoot { - rootKey := filepath.Base(rootMountPoint) - if _, exists := a.fsStats[rootKey]; exists { - rootKey = "root" + rootKey := mostActiveIoDevice(diskIoCounters) + if rootKey != "" { + slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey) + } else { + rootKey = filepath.Base(rootMountPoint) + if _, exists := a.fsStats[rootKey]; exists { + rootKey = "root" + } + slog.Warn("Root I/O device not detected; set FILESYSTEM to override") } - slog.Warn("Root device not detected; root I/O disabled", "mountpoint", rootMountPoint) a.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint} } @@ -305,6 +319,25 @@ func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersSt return best.name, true } +// mostActiveIoDevice returns the device with the highest I/O activity, +// or "" if diskIoCounters is empty. +func mostActiveIoDevice(diskIoCounters map[string]disk.IOCountersStat) string { + var best ioMatchCandidate + for _, d := range diskIoCounters { + c := ioMatchCandidate{ + name: d.Name, + bytes: d.ReadBytes + d.WriteBytes, + ops: d.ReadCount + d.WriteCount, + } + if best.name == "" || c.bytes > best.bytes || + (c.bytes == best.bytes && c.ops > best.ops) || + (c.bytes == best.bytes && c.ops == best.ops && c.name < best.name) { + best = c + } + } + return best.name +} + // prefixRelated reports whether either identifier is a prefix of the other. func prefixRelated(a, b string) bool { if a == "" || b == "" || a == b { diff --git a/agent/disk_test.go b/agent/disk_test.go index acfcb751..98f16aa3 100644 --- a/agent/disk_test.go +++ b/agent/disk_test.go @@ -206,6 +206,28 @@ func TestFilesystemMatchesPartitionSetting(t *testing.T) { }) } +func TestMostActiveIoDevice(t *testing.T) { + t.Run("returns most active device", func(t *testing.T) { + ioCounters := map[string]disk.IOCountersStat{ + "nda0": {Name: "nda0", ReadBytes: 5000, WriteBytes: 5000, ReadCount: 100, WriteCount: 100}, + "nda1": {Name: "nda1", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 50, WriteCount: 50}, + } + assert.Equal(t, "nda0", mostActiveIoDevice(ioCounters)) + }) + + t.Run("uses deterministic tie-breaker", func(t *testing.T) { + ioCounters := map[string]disk.IOCountersStat{ + "sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10}, + "sda": {Name: "sda", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10}, + } + assert.Equal(t, "sda", mostActiveIoDevice(ioCounters)) + }) + + t.Run("returns empty for empty map", func(t *testing.T) { + assert.Equal(t, "", mostActiveIoDevice(map[string]disk.IOCountersStat{})) + }) +} + func TestIsDockerSpecialMountpoint(t *testing.T) { testCases := []struct { name string