add basic systemd service monitoring (#1153)

Co-authored-by: Shelby Tucker <shelby.tucker@gmail.com>
This commit is contained in:
henrygd
2025-11-10 17:04:19 -05:00
parent cbe6ee6499
commit 01d20562f0
30 changed files with 2245 additions and 63 deletions

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"hash/fnv"
"math/rand"
"net"
"strings"
@@ -15,6 +16,7 @@ import (
"github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel/internal/entities/systemd"
"github.com/henrygd/beszel"
@@ -171,6 +173,14 @@ func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error
return err
}
}
// add new systemd_stats record
if len(data.SystemdServices) > 0 {
if err := createSystemdStatsRecords(txApp, data.SystemdServices, sys.Id); err != nil {
return err
}
}
// update system record (do this last because it triggers alerts and we need above records to be inserted first)
systemRecord.Set("status", up)
@@ -184,11 +194,50 @@ func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error
return systemRecord, err
}
func createSystemdStatsRecords(app core.App, data []*systemd.Service, systemId string) error {
if len(data) == 0 {
return nil
}
// shared params for all records
params := dbx.Params{
"system": systemId,
"updated": time.Now().UTC().UnixMilli(),
}
valueStrings := make([]string, 0, len(data))
for i, service := range data {
suffix := fmt.Sprintf("%d", i)
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:state%[1]s}, {:sub%[1]s}, {:cpu%[1]s}, {:cpuPeak%[1]s}, {:memory%[1]s}, {:memPeak%[1]s}, {:updated})", suffix))
params["id"+suffix] = getSystemdServiceId(systemId, service.Name)
params["name"+suffix] = service.Name
params["state"+suffix] = service.State
params["sub"+suffix] = service.Sub
params["cpu"+suffix] = service.Cpu
params["cpuPeak"+suffix] = service.CpuPeak
params["memory"+suffix] = service.Mem
params["memPeak"+suffix] = service.MemPeak
}
queryString := fmt.Sprintf(
"INSERT INTO systemd_services (id, system, name, state, sub, cpu, cpuPeak, memory, memPeak, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, state = excluded.state, sub = excluded.sub, cpu = excluded.cpu, cpuPeak = excluded.cpuPeak, memory = excluded.memory, memPeak = excluded.memPeak, updated = excluded.updated",
strings.Join(valueStrings, ","),
)
_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
return err
}
// getSystemdServiceId generates a deterministic unique id for a systemd service
func getSystemdServiceId(systemId string, serviceName string) string {
hash := fnv.New32a()
hash.Write([]byte(systemId + serviceName))
return fmt.Sprintf("%x", hash.Sum32())
}
// createContainerRecords creates container records
func createContainerRecords(app core.App, data []*container.Stats, systemId string) error {
if len(data) == 0 {
return nil
}
// shared params for all records
params := dbx.Params{
"system": systemId,
"updated": time.Now().UTC().UnixMilli(),
@@ -340,6 +389,52 @@ func (sys *System) FetchContainerLogsFromAgent(containerID string) (string, erro
return sys.fetchStringFromAgentViaSSH(common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, "no logs in response")
}
// FetchSystemdInfoFromAgent fetches detailed systemd service information from the agent
func (sys *System) FetchSystemdInfoFromAgent(serviceName string) (systemd.ServiceDetails, error) {
// fetch via websocket
if sys.WsConn != nil && sys.WsConn.IsConnected() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return sys.WsConn.RequestSystemdInfo(ctx, serviceName)
}
var result systemd.ServiceDetails
err := sys.runSSHOperation(5*time.Second, 1, func(session *ssh.Session) (bool, error) {
stdout, err := session.StdoutPipe()
if err != nil {
return false, err
}
stdin, stdinErr := session.StdinPipe()
if stdinErr != nil {
return false, stdinErr
}
if err := session.Shell(); err != nil {
return false, err
}
req := common.HubRequest[any]{Action: common.GetSystemdInfo, Data: common.SystemdInfoRequest{ServiceName: serviceName}}
if err := cbor.NewEncoder(stdin).Encode(req); err != nil {
return false, err
}
_ = stdin.Close()
var resp common.AgentResponse
if err := cbor.NewDecoder(stdout).Decode(&resp); err != nil {
return false, err
}
if resp.ServiceInfo == nil {
if resp.Error != "" {
return false, errors.New(resp.Error)
}
return false, errors.New("no systemd info in response")
}
result = resp.ServiceInfo
return false, nil
})
return result, err
}
// FetchSmartDataFromAgent fetches SMART data from the agent
func (sys *System) FetchSmartDataFromAgent() (map[string]any, error) {
// fetch via websocket

View File

@@ -0,0 +1,75 @@
//go:build testing
package systems
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetSystemdServiceId(t *testing.T) {
t.Run("deterministic output", func(t *testing.T) {
systemId := "sys-123"
serviceName := "nginx.service"
// Call multiple times and ensure same result
id1 := getSystemdServiceId(systemId, serviceName)
id2 := getSystemdServiceId(systemId, serviceName)
id3 := getSystemdServiceId(systemId, serviceName)
assert.Equal(t, id1, id2)
assert.Equal(t, id2, id3)
assert.NotEmpty(t, id1)
})
t.Run("different inputs produce different ids", func(t *testing.T) {
systemId1 := "sys-123"
systemId2 := "sys-456"
serviceName1 := "nginx.service"
serviceName2 := "apache.service"
id1 := getSystemdServiceId(systemId1, serviceName1)
id2 := getSystemdServiceId(systemId2, serviceName1)
id3 := getSystemdServiceId(systemId1, serviceName2)
id4 := getSystemdServiceId(systemId2, serviceName2)
// All IDs should be different
assert.NotEqual(t, id1, id2)
assert.NotEqual(t, id1, id3)
assert.NotEqual(t, id1, id4)
assert.NotEqual(t, id2, id3)
assert.NotEqual(t, id2, id4)
assert.NotEqual(t, id3, id4)
})
t.Run("consistent length", func(t *testing.T) {
testCases := []struct {
systemId string
serviceName string
}{
{"short", "short.service"},
{"very-long-system-id-that-might-be-used-in-practice", "very-long-service-name.service"},
{"", "empty-system.service"},
{"empty-service", ""},
{"", ""},
}
for _, tc := range testCases {
id := getSystemdServiceId(tc.systemId, tc.serviceName)
// FNV-32 produces 8 hex characters
assert.Len(t, id, 8, "ID should be 8 characters for systemId='%s', serviceName='%s'", tc.systemId, tc.serviceName)
}
})
t.Run("hexadecimal output", func(t *testing.T) {
id := getSystemdServiceId("test-system", "test-service")
assert.NotEmpty(t, id)
// Should only contain hexadecimal characters
for _, char := range id {
assert.True(t, (char >= '0' && char <= '9') || (char >= 'a' && char <= 'f'),
"ID should only contain hexadecimal characters, got: %s", id)
}
})
}