improve root I/O device detection and fallback (#1772)

- Match FILESYSTEM directly against I/O devices if partition lookup
fails
- Fall back to the most active I/O device if no root device is detected
- Add WARN logs in final fallback case to most active device
This commit is contained in:
henrygd
2026-02-26 18:11:33 -05:00
parent ec69f6c6e0
commit b91eb6de40
2 changed files with 61 additions and 6 deletions

View File

@@ -128,7 +128,15 @@ func (a *Agent) initializeDiskInfo() {
}
}
if !hasRoot {
slog.Warn("Partition details not found", "filesystem", filesystem)
// FILESYSTEM may name a physical disk absent from partitions (e.g.
// ZFS lists dataset paths like zroot/ROOT/default, not block devices).
// Try matching directly against diskIoCounters.
if ioKey, match := findIoDevice(filesystem, diskIoCounters); match {
a.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
hasRoot = true
} else {
slog.Warn("Partition details not found", "filesystem", filesystem)
}
}
}
@@ -194,13 +202,19 @@ func (a *Agent) initializeDiskInfo() {
}
}
// If no root filesystem set, use fallback
// If no root filesystem set, try the most active I/O device as a last
// resort (e.g. ZFS where dataset names are unrelated to disk names).
if !hasRoot {
rootKey := filepath.Base(rootMountPoint)
if _, exists := a.fsStats[rootKey]; exists {
rootKey = "root"
rootKey := mostActiveIoDevice(diskIoCounters)
if rootKey != "" {
slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
} else {
rootKey = filepath.Base(rootMountPoint)
if _, exists := a.fsStats[rootKey]; exists {
rootKey = "root"
}
slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
}
slog.Warn("Root device not detected; root I/O disabled", "mountpoint", rootMountPoint)
a.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
}
@@ -305,6 +319,25 @@ func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersSt
return best.name, true
}
// mostActiveIoDevice returns the device with the highest I/O activity,
// or "" if diskIoCounters is empty.
func mostActiveIoDevice(diskIoCounters map[string]disk.IOCountersStat) string {
var best ioMatchCandidate
for _, d := range diskIoCounters {
c := ioMatchCandidate{
name: d.Name,
bytes: d.ReadBytes + d.WriteBytes,
ops: d.ReadCount + d.WriteCount,
}
if best.name == "" || c.bytes > best.bytes ||
(c.bytes == best.bytes && c.ops > best.ops) ||
(c.bytes == best.bytes && c.ops == best.ops && c.name < best.name) {
best = c
}
}
return best.name
}
// prefixRelated reports whether either identifier is a prefix of the other.
func prefixRelated(a, b string) bool {
if a == "" || b == "" || a == b {

View File

@@ -206,6 +206,28 @@ func TestFilesystemMatchesPartitionSetting(t *testing.T) {
})
}
func TestMostActiveIoDevice(t *testing.T) {
t.Run("returns most active device", func(t *testing.T) {
ioCounters := map[string]disk.IOCountersStat{
"nda0": {Name: "nda0", ReadBytes: 5000, WriteBytes: 5000, ReadCount: 100, WriteCount: 100},
"nda1": {Name: "nda1", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 50, WriteCount: 50},
}
assert.Equal(t, "nda0", mostActiveIoDevice(ioCounters))
})
t.Run("uses deterministic tie-breaker", func(t *testing.T) {
ioCounters := map[string]disk.IOCountersStat{
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10},
"sda": {Name: "sda", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10},
}
assert.Equal(t, "sda", mostActiveIoDevice(ioCounters))
})
t.Run("returns empty for empty map", func(t *testing.T) {
assert.Equal(t, "", mostActiveIoDevice(map[string]disk.IOCountersStat{}))
})
}
func TestIsDockerSpecialMountpoint(t *testing.T) {
testCases := []struct {
name string