mirror of
https://github.com/henrygd/beszel.git
synced 2025-12-17 02:36:17 +01:00
add basic systemd service monitoring (#1153)
Co-authored-by: Shelby Tucker <shelby.tucker@gmail.com>
This commit is contained in:
@@ -43,6 +43,7 @@ type Agent struct {
|
||||
dataDir string // Directory for persisting data
|
||||
keys []gossh.PublicKey // SSH public keys
|
||||
smartManager *SmartManager // Manages SMART data
|
||||
systemdManager *systemdManager // Manages systemd services
|
||||
}
|
||||
|
||||
// NewAgent creates a new agent with the given data directory for persisting data.
|
||||
@@ -101,6 +102,11 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
// initialize docker manager
|
||||
agent.dockerManager = newDockerManager(agent)
|
||||
|
||||
agent.systemdManager, err = newSystemdManager()
|
||||
if err != nil {
|
||||
slog.Debug("Systemd", "err", err)
|
||||
}
|
||||
|
||||
agent.smartManager, err = NewSmartManager()
|
||||
if err != nil {
|
||||
slog.Debug("SMART", "err", err)
|
||||
@@ -154,6 +160,11 @@ func (a *Agent) gatherStats(cacheTimeMs uint16) *system.CombinedData {
|
||||
}
|
||||
}
|
||||
|
||||
// skip updating systemd services if cache time is not the default 60sec interval
|
||||
if a.systemdManager != nil && cacheTimeMs == 60_000 && a.systemdManager.hasFreshStats {
|
||||
data.SystemdServices = a.systemdManager.getServiceStats(nil, false)
|
||||
}
|
||||
|
||||
data.Stats.ExtraFs = make(map[string]*system.FsStats)
|
||||
for name, stats := range a.fsStats {
|
||||
if !stats.Root && stats.DiskTotal > 0 {
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/lxzan/gws"
|
||||
@@ -276,6 +277,8 @@ func (client *WebSocketClient) sendResponse(data any, requestID *uint32) error {
|
||||
response.String = &v
|
||||
case map[string]smart.SmartData:
|
||||
response.SmartData = v
|
||||
case systemd.ServiceDetails:
|
||||
response.ServiceInfo = v
|
||||
// case []byte:
|
||||
// response.RawBytes = v
|
||||
// case string:
|
||||
|
||||
@@ -50,6 +50,7 @@ func NewHandlerRegistry() *HandlerRegistry {
|
||||
registry.Register(common.GetContainerLogs, &GetContainerLogsHandler{})
|
||||
registry.Register(common.GetContainerInfo, &GetContainerInfoHandler{})
|
||||
registry.Register(common.GetSmartData, &GetSmartDataHandler{})
|
||||
registry.Register(common.GetSystemdInfo, &GetSystemdInfoHandler{})
|
||||
|
||||
return registry
|
||||
}
|
||||
@@ -174,3 +175,31 @@ func (h *GetSmartDataHandler) Handle(hctx *HandlerContext) error {
|
||||
data := hctx.Agent.smartManager.GetCurrentData()
|
||||
return hctx.SendResponse(data, hctx.RequestID)
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// GetSystemdInfoHandler handles detailed systemd service info requests
|
||||
type GetSystemdInfoHandler struct{}
|
||||
|
||||
func (h *GetSystemdInfoHandler) Handle(hctx *HandlerContext) error {
|
||||
if hctx.Agent.systemdManager == nil {
|
||||
return errors.ErrUnsupported
|
||||
}
|
||||
|
||||
var req common.SystemdInfoRequest
|
||||
if err := cbor.Unmarshal(hctx.Request.Data, &req); err != nil {
|
||||
return err
|
||||
}
|
||||
if req.ServiceName == "" {
|
||||
return errors.New("service name is required")
|
||||
}
|
||||
|
||||
details, err := hctx.Agent.systemdManager.getServiceDetails(req.ServiceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return hctx.SendResponse(details, hctx.RequestID)
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
@@ -173,6 +174,8 @@ func (a *Agent) handleSSHRequest(w io.Writer, req *common.HubRequest[cbor.RawMes
|
||||
response.String = &v
|
||||
case map[string]smart.SmartData:
|
||||
response.SmartData = v
|
||||
case systemd.ServiceDetails:
|
||||
response.ServiceInfo = v
|
||||
default:
|
||||
response.Error = fmt.Sprintf("unsupported response type: %T", data)
|
||||
}
|
||||
|
||||
229
agent/systemd.go
Normal file
229
agent/systemd.go
Normal file
@@ -0,0 +1,229 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
)
|
||||
|
||||
var (
|
||||
errNoActiveTime = errors.New("no active time")
|
||||
)
|
||||
|
||||
// systemdManager manages the collection of systemd service statistics.
|
||||
type systemdManager struct {
|
||||
sync.Mutex
|
||||
serviceStatsMap map[string]*systemd.Service
|
||||
isRunning bool
|
||||
hasFreshStats bool
|
||||
}
|
||||
|
||||
// newSystemdManager creates a new systemdManager.
|
||||
func newSystemdManager() (*systemdManager, error) {
|
||||
conn, err := dbus.NewSystemConnectionContext(context.Background())
|
||||
if err != nil {
|
||||
slog.Warn("Error connecting to systemd", "err", err, "ref", "https://beszel.dev/guide/systemd")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
manager := &systemdManager{
|
||||
serviceStatsMap: make(map[string]*systemd.Service),
|
||||
}
|
||||
|
||||
manager.startWorker(conn)
|
||||
|
||||
return manager, nil
|
||||
}
|
||||
|
||||
func (sm *systemdManager) startWorker(conn *dbus.Conn) {
|
||||
if sm.isRunning {
|
||||
return
|
||||
}
|
||||
sm.isRunning = true
|
||||
// prime the service stats map with the current services
|
||||
_ = sm.getServiceStats(conn, true)
|
||||
// update the services every 10 minutes
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(time.Minute * 10)
|
||||
_ = sm.getServiceStats(nil, true)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// getServiceStats collects statistics for all running systemd services.
|
||||
func (sm *systemdManager) getServiceStats(conn *dbus.Conn, refresh bool) []*systemd.Service {
|
||||
// start := time.Now()
|
||||
// defer func() {
|
||||
// slog.Info("systemdManager.getServiceStats", "duration", time.Since(start))
|
||||
// }()
|
||||
|
||||
var services []*systemd.Service
|
||||
var err error
|
||||
|
||||
if !refresh {
|
||||
// return nil
|
||||
sm.Lock()
|
||||
defer sm.Unlock()
|
||||
for _, service := range sm.serviceStatsMap {
|
||||
services = append(services, service)
|
||||
}
|
||||
sm.hasFreshStats = false
|
||||
return services
|
||||
}
|
||||
|
||||
if conn == nil || !conn.Connected() {
|
||||
conn, err = dbus.NewSystemConnectionContext(context.Background())
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer conn.Close()
|
||||
}
|
||||
|
||||
units, err := conn.ListUnitsByPatternsContext(context.Background(), []string{"loaded"}, []string{"*.service"})
|
||||
if err != nil {
|
||||
slog.Error("Error listing systemd service units", "err", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, unit := range units {
|
||||
service, err := sm.updateServiceStats(conn, unit)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
services = append(services, service)
|
||||
}
|
||||
sm.hasFreshStats = true
|
||||
return services
|
||||
}
|
||||
|
||||
// updateServiceStats updates the statistics for a single systemd service.
|
||||
func (sm *systemdManager) updateServiceStats(conn *dbus.Conn, unit dbus.UnitStatus) (*systemd.Service, error) {
|
||||
sm.Lock()
|
||||
defer sm.Unlock()
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// if service has never been active (no active since time), skip it
|
||||
if activeEnterTsProp, err := conn.GetUnitTypePropertyContext(ctx, unit.Name, "Unit", "ActiveEnterTimestamp"); err == nil {
|
||||
if ts, ok := activeEnterTsProp.Value.Value().(uint64); !ok || ts == 0 || ts == math.MaxUint64 {
|
||||
return nil, errNoActiveTime
|
||||
}
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
service, serviceExists := sm.serviceStatsMap[unit.Name]
|
||||
if !serviceExists {
|
||||
service = &systemd.Service{Name: unescapeServiceName(strings.TrimSuffix(unit.Name, ".service"))}
|
||||
sm.serviceStatsMap[unit.Name] = service
|
||||
}
|
||||
|
||||
memPeak := service.MemPeak
|
||||
if memPeakProp, err := conn.GetUnitTypePropertyContext(ctx, unit.Name, "Service", "MemoryPeak"); err == nil {
|
||||
// If memPeak is MaxUint64 the api is saying it's not available
|
||||
if v, ok := memPeakProp.Value.Value().(uint64); ok && v != math.MaxUint64 {
|
||||
memPeak = v
|
||||
}
|
||||
}
|
||||
|
||||
var memUsage uint64
|
||||
if memProp, err := conn.GetUnitTypePropertyContext(ctx, unit.Name, "Service", "MemoryCurrent"); err == nil {
|
||||
// If memUsage is MaxUint64 the api is saying it's not available
|
||||
if v, ok := memProp.Value.Value().(uint64); ok && v != math.MaxUint64 {
|
||||
memUsage = v
|
||||
}
|
||||
}
|
||||
|
||||
service.State = systemd.ParseServiceStatus(unit.ActiveState)
|
||||
service.Sub = systemd.ParseServiceSubState(unit.SubState)
|
||||
|
||||
// some systems always return 0 for mem peak, so we should update the peak if the current usage is greater
|
||||
if memUsage > memPeak {
|
||||
memPeak = memUsage
|
||||
}
|
||||
|
||||
var cpuUsage uint64
|
||||
if cpuProp, err := conn.GetUnitTypePropertyContext(ctx, unit.Name, "Service", "CPUUsageNSec"); err == nil {
|
||||
if v, ok := cpuProp.Value.Value().(uint64); ok {
|
||||
cpuUsage = v
|
||||
}
|
||||
}
|
||||
|
||||
service.Mem = memUsage
|
||||
if memPeak > service.MemPeak {
|
||||
service.MemPeak = memPeak
|
||||
}
|
||||
service.UpdateCPUPercent(cpuUsage)
|
||||
|
||||
return service, nil
|
||||
}
|
||||
|
||||
// getServiceDetails collects extended information for a specific systemd service.
|
||||
func (sm *systemdManager) getServiceDetails(serviceName string) (systemd.ServiceDetails, error) {
|
||||
conn, err := dbus.NewSystemConnectionContext(context.Background())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
unitName := serviceName
|
||||
if !strings.HasSuffix(unitName, ".service") {
|
||||
unitName += ".service"
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
props, err := conn.GetUnitPropertiesContext(ctx, unitName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Start with all unit properties
|
||||
details := make(systemd.ServiceDetails)
|
||||
maps.Copy(details, props)
|
||||
|
||||
// // Add service-specific properties
|
||||
servicePropNames := []string{
|
||||
"MainPID", "ExecMainPID", "TasksCurrent", "TasksMax",
|
||||
"MemoryCurrent", "MemoryPeak", "MemoryLimit", "CPUUsageNSec",
|
||||
"NRestarts", "ExecMainStartTimestampRealtime", "Result",
|
||||
}
|
||||
|
||||
for _, propName := range servicePropNames {
|
||||
if variant, err := conn.GetUnitTypePropertyContext(ctx, unitName, "Service", propName); err == nil {
|
||||
value := variant.Value.Value()
|
||||
// Check if the value is MaxUint64, which indicates unlimited/infinite
|
||||
if uint64Value, ok := value.(uint64); ok && uint64Value == math.MaxUint64 {
|
||||
// Set to nil to indicate unlimited - frontend will handle this appropriately
|
||||
details[propName] = nil
|
||||
} else {
|
||||
details[propName] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return details, nil
|
||||
}
|
||||
|
||||
// unescapeServiceName unescapes systemd service names that contain C-style escape sequences like \x2d
|
||||
func unescapeServiceName(name string) string {
|
||||
if !strings.Contains(name, "\\x") {
|
||||
return name
|
||||
}
|
||||
unescaped, err := strconv.Unquote("\"" + name + "\"")
|
||||
if err != nil {
|
||||
return name
|
||||
}
|
||||
return unescaped
|
||||
}
|
||||
28
agent/systemd_nonlinux.go
Normal file
28
agent/systemd_nonlinux.go
Normal file
@@ -0,0 +1,28 @@
|
||||
//go:build !linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
)
|
||||
|
||||
// systemdManager manages the collection of systemd service statistics.
|
||||
type systemdManager struct {
|
||||
hasFreshStats bool
|
||||
}
|
||||
|
||||
// newSystemdManager creates a new systemdManager.
|
||||
func newSystemdManager() (*systemdManager, error) {
|
||||
return &systemdManager{}, nil
|
||||
}
|
||||
|
||||
// getServiceStats returns nil for non-linux systems.
|
||||
func (sm *systemdManager) getServiceStats(conn any, refresh bool) []*systemd.Service {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *systemdManager) getServiceDetails(string) (systemd.ServiceDetails, error) {
|
||||
return nil, errors.New("systemd manager unavailable")
|
||||
}
|
||||
53
agent/systemd_nonlinux_test.go
Normal file
53
agent/systemd_nonlinux_test.go
Normal file
@@ -0,0 +1,53 @@
|
||||
//go:build !linux && testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewSystemdManager(t *testing.T) {
|
||||
manager, err := newSystemdManager()
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, manager)
|
||||
}
|
||||
|
||||
func TestSystemdManagerGetServiceStats(t *testing.T) {
|
||||
manager, err := newSystemdManager()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test with refresh = true
|
||||
result := manager.getServiceStats(true)
|
||||
assert.Nil(t, result)
|
||||
|
||||
// Test with refresh = false
|
||||
result = manager.getServiceStats(false)
|
||||
assert.Nil(t, result)
|
||||
}
|
||||
|
||||
func TestSystemdManagerGetServiceDetails(t *testing.T) {
|
||||
manager, err := newSystemdManager()
|
||||
assert.NoError(t, err)
|
||||
|
||||
result, err := manager.getServiceDetails("any-service")
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, "systemd manager unavailable", err.Error())
|
||||
assert.Nil(t, result)
|
||||
|
||||
// Test with empty service name
|
||||
result, err = manager.getServiceDetails("")
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, "systemd manager unavailable", err.Error())
|
||||
assert.Nil(t, result)
|
||||
}
|
||||
|
||||
func TestSystemdManagerFields(t *testing.T) {
|
||||
manager, err := newSystemdManager()
|
||||
assert.NoError(t, err)
|
||||
|
||||
// The non-linux manager should be a simple struct with no special fields
|
||||
// We can't test private fields directly, but we can test the methods work
|
||||
assert.NotNil(t, manager)
|
||||
}
|
||||
48
agent/systemd_test.go
Normal file
48
agent/systemd_test.go
Normal file
@@ -0,0 +1,48 @@
|
||||
//go:build linux && testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestUnescapeServiceName(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"nginx.service", "nginx.service"}, // No escaping needed
|
||||
{"test\\x2dwith\\x2ddashes.service", "test-with-dashes.service"}, // \x2d is dash
|
||||
{"service\\x20with\\x20spaces.service", "service with spaces.service"}, // \x20 is space
|
||||
{"mixed\\x2dand\\x2dnormal", "mixed-and-normal"}, // Mixed escaped and normal
|
||||
{"no-escape-here", "no-escape-here"}, // No escape sequences
|
||||
{"", ""}, // Empty string
|
||||
{"\\x2d\\x2d", "--"}, // Multiple escapes
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.input, func(t *testing.T) {
|
||||
result := unescapeServiceName(test.input)
|
||||
assert.Equal(t, test.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnescapeServiceNameInvalid(t *testing.T) {
|
||||
// Test invalid escape sequences - should return original string
|
||||
invalidInputs := []string{
|
||||
"invalid\\x", // Incomplete escape
|
||||
"invalid\\xZZ", // Invalid hex
|
||||
"invalid\\x2", // Incomplete hex
|
||||
"invalid\\xyz", // Not a valid escape
|
||||
}
|
||||
|
||||
for _, input := range invalidInputs {
|
||||
t.Run(input, func(t *testing.T) {
|
||||
result := unescapeServiceName(input)
|
||||
assert.Equal(t, input, result, "Invalid escape sequences should return original string")
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user