add basic systemd service monitoring (#1153)

Co-authored-by: Shelby Tucker <shelby.tucker@gmail.com>
This commit is contained in:
henrygd
2025-11-10 17:04:19 -05:00
parent cbe6ee6499
commit 01d20562f0
30 changed files with 2245 additions and 63 deletions

View File

@@ -270,6 +270,8 @@ func (h *Hub) registerApiRoutes(se *core.ServeEvent) error {
apiAuth.DELETE("/user-alerts", alerts.DeleteUserAlerts)
// get SMART data
apiAuth.GET("/smart", h.getSmartData)
// get systemd service details
apiAuth.GET("/systemd/info", h.getSystemdInfo)
// /containers routes
if enabled, _ := GetEnv("CONTAINER_DETAILS"); enabled != "false" {
// get container logs
@@ -342,6 +344,27 @@ func (h *Hub) getContainerInfo(e *core.RequestEvent) error {
}, "info")
}
// getSystemdInfo handles GET /api/beszel/systemd/info requests
func (h *Hub) getSystemdInfo(e *core.RequestEvent) error {
query := e.Request.URL.Query()
systemID := query.Get("system")
serviceName := query.Get("service")
if systemID == "" || serviceName == "" {
return e.JSON(http.StatusBadRequest, map[string]string{"error": "system and service parameters are required"})
}
system, err := h.sm.GetSystem(systemID)
if err != nil {
return e.JSON(http.StatusNotFound, map[string]string{"error": "system not found"})
}
details, err := system.FetchSystemdInfoFromAgent(serviceName)
if err != nil {
return e.JSON(http.StatusNotFound, map[string]string{"error": err.Error()})
}
e.Response.Header().Set("Cache-Control", "public, max-age=60")
return e.JSON(http.StatusOK, map[string]any{"details": details})
}
// getSmartData handles GET /api/beszel/smart requests
func (h *Hub) getSmartData(e *core.RequestEvent) error {
systemID := e.Request.URL.Query().Get("system")

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"hash/fnv"
"math/rand"
"net"
"strings"
@@ -15,6 +16,7 @@ import (
"github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel/internal/entities/systemd"
"github.com/henrygd/beszel"
@@ -171,6 +173,14 @@ func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error
return err
}
}
// add new systemd_stats record
if len(data.SystemdServices) > 0 {
if err := createSystemdStatsRecords(txApp, data.SystemdServices, sys.Id); err != nil {
return err
}
}
// update system record (do this last because it triggers alerts and we need above records to be inserted first)
systemRecord.Set("status", up)
@@ -184,11 +194,50 @@ func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error
return systemRecord, err
}
func createSystemdStatsRecords(app core.App, data []*systemd.Service, systemId string) error {
if len(data) == 0 {
return nil
}
// shared params for all records
params := dbx.Params{
"system": systemId,
"updated": time.Now().UTC().UnixMilli(),
}
valueStrings := make([]string, 0, len(data))
for i, service := range data {
suffix := fmt.Sprintf("%d", i)
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:state%[1]s}, {:sub%[1]s}, {:cpu%[1]s}, {:cpuPeak%[1]s}, {:memory%[1]s}, {:memPeak%[1]s}, {:updated})", suffix))
params["id"+suffix] = getSystemdServiceId(systemId, service.Name)
params["name"+suffix] = service.Name
params["state"+suffix] = service.State
params["sub"+suffix] = service.Sub
params["cpu"+suffix] = service.Cpu
params["cpuPeak"+suffix] = service.CpuPeak
params["memory"+suffix] = service.Mem
params["memPeak"+suffix] = service.MemPeak
}
queryString := fmt.Sprintf(
"INSERT INTO systemd_services (id, system, name, state, sub, cpu, cpuPeak, memory, memPeak, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, state = excluded.state, sub = excluded.sub, cpu = excluded.cpu, cpuPeak = excluded.cpuPeak, memory = excluded.memory, memPeak = excluded.memPeak, updated = excluded.updated",
strings.Join(valueStrings, ","),
)
_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
return err
}
// getSystemdServiceId generates a deterministic unique id for a systemd service
func getSystemdServiceId(systemId string, serviceName string) string {
hash := fnv.New32a()
hash.Write([]byte(systemId + serviceName))
return fmt.Sprintf("%x", hash.Sum32())
}
// createContainerRecords creates container records
func createContainerRecords(app core.App, data []*container.Stats, systemId string) error {
if len(data) == 0 {
return nil
}
// shared params for all records
params := dbx.Params{
"system": systemId,
"updated": time.Now().UTC().UnixMilli(),
@@ -340,6 +389,52 @@ func (sys *System) FetchContainerLogsFromAgent(containerID string) (string, erro
return sys.fetchStringFromAgentViaSSH(common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, "no logs in response")
}
// FetchSystemdInfoFromAgent fetches detailed systemd service information from the agent
func (sys *System) FetchSystemdInfoFromAgent(serviceName string) (systemd.ServiceDetails, error) {
// fetch via websocket
if sys.WsConn != nil && sys.WsConn.IsConnected() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return sys.WsConn.RequestSystemdInfo(ctx, serviceName)
}
var result systemd.ServiceDetails
err := sys.runSSHOperation(5*time.Second, 1, func(session *ssh.Session) (bool, error) {
stdout, err := session.StdoutPipe()
if err != nil {
return false, err
}
stdin, stdinErr := session.StdinPipe()
if stdinErr != nil {
return false, stdinErr
}
if err := session.Shell(); err != nil {
return false, err
}
req := common.HubRequest[any]{Action: common.GetSystemdInfo, Data: common.SystemdInfoRequest{ServiceName: serviceName}}
if err := cbor.NewEncoder(stdin).Encode(req); err != nil {
return false, err
}
_ = stdin.Close()
var resp common.AgentResponse
if err := cbor.NewDecoder(stdout).Decode(&resp); err != nil {
return false, err
}
if resp.ServiceInfo == nil {
if resp.Error != "" {
return false, errors.New(resp.Error)
}
return false, errors.New("no systemd info in response")
}
result = resp.ServiceInfo
return false, nil
})
return result, err
}
// FetchSmartDataFromAgent fetches SMART data from the agent
func (sys *System) FetchSmartDataFromAgent() (map[string]any, error) {
// fetch via websocket

View File

@@ -0,0 +1,75 @@
//go:build testing
package systems
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestGetSystemdServiceId(t *testing.T) {
t.Run("deterministic output", func(t *testing.T) {
systemId := "sys-123"
serviceName := "nginx.service"
// Call multiple times and ensure same result
id1 := getSystemdServiceId(systemId, serviceName)
id2 := getSystemdServiceId(systemId, serviceName)
id3 := getSystemdServiceId(systemId, serviceName)
assert.Equal(t, id1, id2)
assert.Equal(t, id2, id3)
assert.NotEmpty(t, id1)
})
t.Run("different inputs produce different ids", func(t *testing.T) {
systemId1 := "sys-123"
systemId2 := "sys-456"
serviceName1 := "nginx.service"
serviceName2 := "apache.service"
id1 := getSystemdServiceId(systemId1, serviceName1)
id2 := getSystemdServiceId(systemId2, serviceName1)
id3 := getSystemdServiceId(systemId1, serviceName2)
id4 := getSystemdServiceId(systemId2, serviceName2)
// All IDs should be different
assert.NotEqual(t, id1, id2)
assert.NotEqual(t, id1, id3)
assert.NotEqual(t, id1, id4)
assert.NotEqual(t, id2, id3)
assert.NotEqual(t, id2, id4)
assert.NotEqual(t, id3, id4)
})
t.Run("consistent length", func(t *testing.T) {
testCases := []struct {
systemId string
serviceName string
}{
{"short", "short.service"},
{"very-long-system-id-that-might-be-used-in-practice", "very-long-service-name.service"},
{"", "empty-system.service"},
{"empty-service", ""},
{"", ""},
}
for _, tc := range testCases {
id := getSystemdServiceId(tc.systemId, tc.serviceName)
// FNV-32 produces 8 hex characters
assert.Len(t, id, 8, "ID should be 8 characters for systemId='%s', serviceName='%s'", tc.systemId, tc.serviceName)
}
})
t.Run("hexadecimal output", func(t *testing.T) {
id := getSystemdServiceId("test-system", "test-service")
assert.NotEmpty(t, id)
// Should only contain hexadecimal characters
for _, char := range id {
assert.True(t, (char >= '0' && char <= '9') || (char >= 'a' && char <= 'f'),
"ID should only contain hexadecimal characters, got: %s", id)
}
})
}

View File

@@ -7,6 +7,7 @@ import (
"github.com/fxamacker/cbor/v2"
"github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel/internal/entities/systemd"
"github.com/lxzan/gws"
"golang.org/x/crypto/ssh"
)
@@ -115,6 +116,44 @@ func (ws *WsConn) RequestContainerInfo(ctx context.Context, containerID string)
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// RequestSystemdInfo requests detailed information about a systemd service via WebSocket.
func (ws *WsConn) RequestSystemdInfo(ctx context.Context, serviceName string) (systemd.ServiceDetails, error) {
if !ws.IsConnected() {
return nil, gws.ErrConnClosed
}
req, err := ws.requestManager.SendRequest(ctx, common.GetSystemdInfo, common.SystemdInfoRequest{ServiceName: serviceName})
if err != nil {
return nil, err
}
var result systemd.ServiceDetails
handler := &systemdInfoHandler{result: &result}
if err := ws.handleAgentRequest(req, handler); err != nil {
return nil, err
}
return result, nil
}
// systemdInfoHandler parses ServiceDetails from AgentResponse
type systemdInfoHandler struct {
BaseHandler
result *systemd.ServiceDetails
}
func (h *systemdInfoHandler) Handle(agentResponse common.AgentResponse) error {
if agentResponse.ServiceInfo == nil {
return errors.New("no systemd info in response")
}
*h.result = agentResponse.ServiceInfo
return nil
}
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// RequestSmartData requests SMART data via WebSocket.
func (ws *WsConn) RequestSmartData(ctx context.Context) (map[string]any, error) {
if !ws.IsConnected() {

View File

@@ -0,0 +1,75 @@
//go:build testing
package ws
import (
"testing"
"github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/entities/systemd"
"github.com/stretchr/testify/assert"
)
func TestSystemdInfoHandlerSuccess(t *testing.T) {
handler := &systemdInfoHandler{
result: &systemd.ServiceDetails{},
}
// Test successful handling with valid ServiceInfo
testDetails := systemd.ServiceDetails{
"Id": "nginx.service",
"ActiveState": "active",
"SubState": "running",
"Description": "A high performance web server",
"ExecMainPID": 1234,
"MemoryCurrent": 1024000,
}
response := common.AgentResponse{
ServiceInfo: testDetails,
}
err := handler.Handle(response)
assert.NoError(t, err)
assert.Equal(t, testDetails, *handler.result)
}
func TestSystemdInfoHandlerError(t *testing.T) {
handler := &systemdInfoHandler{
result: &systemd.ServiceDetails{},
}
// Test error handling when ServiceInfo is nil
response := common.AgentResponse{
ServiceInfo: nil,
Error: "service not found",
}
err := handler.Handle(response)
assert.Error(t, err)
assert.Equal(t, "no systemd info in response", err.Error())
}
func TestSystemdInfoHandlerEmptyResponse(t *testing.T) {
handler := &systemdInfoHandler{
result: &systemd.ServiceDetails{},
}
// Test with completely empty response
response := common.AgentResponse{}
err := handler.Handle(response)
assert.Error(t, err)
assert.Equal(t, "no systemd info in response", err.Error())
}
func TestSystemdInfoHandlerLegacyNotSupported(t *testing.T) {
handler := &systemdInfoHandler{
result: &systemd.ServiceDetails{},
}
// Test that legacy format is not supported
err := handler.HandleLegacy([]byte("some data"))
assert.Error(t, err)
assert.Equal(t, "legacy format not supported", err.Error())
}