add basic systemd service monitoring (#1153)

Co-authored-by: Shelby Tucker <shelby.tucker@gmail.com>
This commit is contained in:
henrygd
2025-11-10 17:04:19 -05:00
parent cbe6ee6499
commit 01d20562f0
30 changed files with 2245 additions and 63 deletions

View File

@@ -7,6 +7,7 @@ import (
"time"
"github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/systemd"
)
type Stats struct {
@@ -149,7 +150,8 @@ type Info struct {
// Final data structure to return to the hub
type CombinedData struct {
Stats Stats `json:"stats" cbor:"0,keyasint"`
Info Info `json:"info" cbor:"1,keyasint"`
Containers []*container.Stats `json:"container" cbor:"2,keyasint"`
Stats Stats `json:"stats" cbor:"0,keyasint"`
Info Info `json:"info" cbor:"1,keyasint"`
Containers []*container.Stats `json:"container" cbor:"2,keyasint"`
SystemdServices []*systemd.Service `json:"systemd,omitempty" cbor:"3,keyasint,omitempty"`
}

View File

@@ -0,0 +1,127 @@
package systemd
import (
"math"
"runtime"
"time"
)
// ServiceState represents the status of a systemd service
type ServiceState uint8
const (
StatusActive ServiceState = iota
StatusInactive
StatusFailed
StatusActivating
StatusDeactivating
StatusReloading
)
// ServiceSubState represents the sub status of a systemd service
type ServiceSubState uint8
const (
SubStateDead ServiceSubState = iota
SubStateRunning
SubStateExited
SubStateFailed
SubStateUnknown
)
// ParseServiceStatus converts a string status to a ServiceStatus enum value
func ParseServiceStatus(status string) ServiceState {
switch status {
case "active":
return StatusActive
case "inactive":
return StatusInactive
case "failed":
return StatusFailed
case "activating":
return StatusActivating
case "deactivating":
return StatusDeactivating
case "reloading":
return StatusReloading
default:
return StatusInactive
}
}
// ParseServiceSubState converts a string sub status to a ServiceSubState enum value
func ParseServiceSubState(subState string) ServiceSubState {
switch subState {
case "dead":
return SubStateDead
case "running":
return SubStateRunning
case "exited":
return SubStateExited
case "failed":
return SubStateFailed
default:
return SubStateUnknown
}
}
// Service represents a single systemd service with its stats.
type Service struct {
Name string `json:"n" cbor:"0,keyasint"`
State ServiceState `json:"s" cbor:"1,keyasint"`
Cpu float64 `json:"c" cbor:"2,keyasint"`
Mem uint64 `json:"m" cbor:"3,keyasint"`
MemPeak uint64 `json:"mp" cbor:"4,keyasint"`
Sub ServiceSubState `json:"ss" cbor:"5,keyasint"`
CpuPeak float64 `json:"cp" cbor:"6,keyasint"`
PrevCpuUsage uint64 `json:"-"`
PrevReadTime time.Time `json:"-"`
}
// UpdateCPUPercent calculates the CPU usage percentage for the service.
func (s *Service) UpdateCPUPercent(cpuUsage uint64) {
now := time.Now()
if s.PrevReadTime.IsZero() || cpuUsage < s.PrevCpuUsage {
s.Cpu = 0
s.PrevCpuUsage = cpuUsage
s.PrevReadTime = now
return
}
duration := now.Sub(s.PrevReadTime).Nanoseconds()
if duration <= 0 {
s.PrevCpuUsage = cpuUsage
s.PrevReadTime = now
return
}
coreCount := int64(runtime.NumCPU())
duration *= coreCount
usageDelta := cpuUsage - s.PrevCpuUsage
cpuPercent := float64(usageDelta) / float64(duration)
s.Cpu = twoDecimals(cpuPercent * 100)
if s.Cpu > s.CpuPeak {
s.CpuPeak = s.Cpu
}
s.PrevCpuUsage = cpuUsage
s.PrevReadTime = now
}
func twoDecimals(value float64) float64 {
return math.Round(value*100) / 100
}
// ServiceDependency represents a unit that the service depends on.
type ServiceDependency struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
ActiveState string `json:"activeState,omitempty"`
SubState string `json:"subState,omitempty"`
}
// ServiceDetails contains extended information about a systemd service.
type ServiceDetails map[string]any

View File

@@ -0,0 +1,113 @@
//go:build testing
package systemd_test
import (
"testing"
"time"
"github.com/henrygd/beszel/internal/entities/systemd"
"github.com/stretchr/testify/assert"
)
func TestParseServiceStatus(t *testing.T) {
tests := []struct {
input string
expected systemd.ServiceState
}{
{"active", systemd.StatusActive},
{"inactive", systemd.StatusInactive},
{"failed", systemd.StatusFailed},
{"activating", systemd.StatusActivating},
{"deactivating", systemd.StatusDeactivating},
{"reloading", systemd.StatusReloading},
{"unknown", systemd.StatusInactive}, // default case
{"", systemd.StatusInactive}, // default case
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
result := systemd.ParseServiceStatus(test.input)
assert.Equal(t, test.expected, result)
})
}
}
func TestParseServiceSubState(t *testing.T) {
tests := []struct {
input string
expected systemd.ServiceSubState
}{
{"dead", systemd.SubStateDead},
{"running", systemd.SubStateRunning},
{"exited", systemd.SubStateExited},
{"failed", systemd.SubStateFailed},
{"unknown", systemd.SubStateUnknown},
{"other", systemd.SubStateUnknown}, // default case
{"", systemd.SubStateUnknown}, // default case
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
result := systemd.ParseServiceSubState(test.input)
assert.Equal(t, test.expected, result)
})
}
}
func TestServiceUpdateCPUPercent(t *testing.T) {
t.Run("initial call sets CPU to 0", func(t *testing.T) {
service := &systemd.Service{}
service.UpdateCPUPercent(1000)
assert.Equal(t, 0.0, service.Cpu)
assert.Equal(t, uint64(1000), service.PrevCpuUsage)
assert.False(t, service.PrevReadTime.IsZero())
})
t.Run("subsequent call calculates CPU percentage", func(t *testing.T) {
service := &systemd.Service{}
service.PrevCpuUsage = 1000
service.PrevReadTime = time.Now().Add(-time.Second)
service.UpdateCPUPercent(8000000000) // 8 seconds of CPU time
// CPU usage should be positive and reasonable
assert.Greater(t, service.Cpu, 0.0, "CPU usage should be positive")
assert.LessOrEqual(t, service.Cpu, 100.0, "CPU usage should not exceed 100%")
assert.Equal(t, uint64(8000000000), service.PrevCpuUsage)
assert.Greater(t, service.CpuPeak, 0.0, "CPU peak should be set")
})
t.Run("CPU peak updates only when higher", func(t *testing.T) {
service := &systemd.Service{}
service.PrevCpuUsage = 1000
service.PrevReadTime = time.Now().Add(-time.Second)
service.UpdateCPUPercent(8000000000) // Set initial peak to ~50%
initialPeak := service.CpuPeak
// Now try with much lower CPU usage - should not update peak
service.PrevReadTime = time.Now().Add(-time.Second)
service.UpdateCPUPercent(1000000) // Much lower usage
assert.Equal(t, initialPeak, service.CpuPeak, "Peak should not update for lower CPU usage")
})
t.Run("handles zero duration", func(t *testing.T) {
service := &systemd.Service{}
service.PrevCpuUsage = 1000
now := time.Now()
service.PrevReadTime = now
// Mock time.Now() to return the same time to ensure zero duration
// Since we can't mock time in Go easily, we'll check the logic manually
// The zero duration case happens when duration <= 0
assert.Equal(t, 0.0, service.Cpu, "CPU should start at 0")
})
t.Run("handles CPU usage wraparound", func(t *testing.T) {
service := &systemd.Service{}
// Simulate wraparound where new usage is less than previous
service.PrevCpuUsage = 1000
service.PrevReadTime = time.Now().Add(-time.Second)
service.UpdateCPUPercent(500) // Less than previous, should reset
assert.Equal(t, 0.0, service.Cpu)
})
}