expand container monitoring functionality (#928)

- Add new /containers route with virtualized table showing all containers across systems
- Implement container stats collection (CPU, memory, network usage) with health status tracking
- Add container logs and info API endpoints with syntax highlighting using Shiki
- Create detailed container views with fullscreen logs/info dialogs and refresh functionality
- Add container table to individual system pages with lazy loading
- Implement container record storage with automatic cleanup and historical averaging
- Update navbar with container navigation icon
- Extract reusable ActiveAlerts component from home page
- Add FooterRepoLink component for consistent GitHub/version display
- Enhance filtering and search capabilities across container tables
This commit is contained in:
henrygd
2025-10-18 16:32:16 -04:00
parent 0d464787f2
commit 5360f762e4
33 changed files with 1558 additions and 209 deletions

View File

@@ -236,7 +236,10 @@ func (h *Hub) registerApiRoutes(se *core.ServeEvent) error {
// update / delete user alerts
apiAuth.POST("/user-alerts", alerts.UpsertUserAlerts)
apiAuth.DELETE("/user-alerts", alerts.DeleteUserAlerts)
// get container logs
apiAuth.GET("/containers/logs", h.getContainerLogs)
// get container info
apiAuth.GET("/containers/info", h.getContainerInfo)
return nil
}
@@ -267,6 +270,41 @@ func (h *Hub) getUniversalToken(e *core.RequestEvent) error {
return e.JSON(http.StatusOK, response)
}
// containerRequestHandler handles both container logs and info requests
func (h *Hub) containerRequestHandler(e *core.RequestEvent, fetchFunc func(*systems.System, string) (string, error), responseKey string) error {
systemID := e.Request.URL.Query().Get("system")
containerID := e.Request.URL.Query().Get("container")
if systemID == "" || containerID == "" {
return e.JSON(http.StatusBadRequest, map[string]string{"error": "system and container parameters are required"})
}
system, err := h.sm.GetSystem(systemID)
if err != nil {
return e.JSON(http.StatusNotFound, map[string]string{"error": "system not found"})
}
data, err := fetchFunc(system, containerID)
if err != nil {
return e.JSON(http.StatusInternalServerError, map[string]string{"error": err.Error()})
}
return e.JSON(http.StatusOK, map[string]string{responseKey: data})
}
// getContainerLogs handles GET /api/beszel/containers/logs requests
func (h *Hub) getContainerLogs(e *core.RequestEvent) error {
return h.containerRequestHandler(e, func(system *systems.System, containerID string) (string, error) {
return system.FetchContainerLogsFromAgent(containerID)
}, "logs")
}
func (h *Hub) getContainerInfo(e *core.RequestEvent) error {
return h.containerRequestHandler(e, func(system *systems.System, containerID string) (string, error) {
return system.FetchContainerInfoFromAgent(containerID)
}, "info")
}
// generates key pair if it doesn't exist and returns signer
func (h *Hub) GetSSHKey(dataDir string) (ssh.Signer, error) {
if h.signer != nil {

View File

@@ -449,6 +449,47 @@ func TestApiRoutesAuthentication(t *testing.T) {
})
},
},
{
Name: "GET /containers/logs - no auth should fail",
Method: http.MethodGet,
URL: "/api/beszel/containers/logs?system=test-system&container=test-container",
ExpectedStatus: 401,
ExpectedContent: []string{"requires valid"},
TestAppFactory: testAppFactory,
},
{
Name: "GET /containers/logs - with auth but missing system param should fail",
Method: http.MethodGet,
URL: "/api/beszel/containers/logs?container=test-container",
Headers: map[string]string{
"Authorization": userToken,
},
ExpectedStatus: 400,
ExpectedContent: []string{"system parameter is required"},
TestAppFactory: testAppFactory,
},
{
Name: "GET /containers/logs - with auth but missing container param should fail",
Method: http.MethodGet,
URL: "/api/beszel/containers/logs?system=test-system",
Headers: map[string]string{
"Authorization": userToken,
},
ExpectedStatus: 400,
ExpectedContent: []string{"container parameter is required"},
TestAppFactory: testAppFactory,
},
{
Name: "GET /containers/logs - with auth but invalid system should fail",
Method: http.MethodGet,
URL: "/api/beszel/containers/logs?system=invalid-system&container=test-container",
Headers: map[string]string{
"Authorization": userToken,
},
ExpectedStatus: 404,
ExpectedContent: []string{"system not found"},
TestAppFactory: testAppFactory,
},
// Auth Optional Routes - Should work without authentication
{

View File

@@ -13,12 +13,14 @@ import (
"github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/hub/ws"
"github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel"
"github.com/blang/semver"
"github.com/fxamacker/cbor/v2"
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core"
"golang.org/x/crypto/ssh"
)
@@ -135,41 +137,80 @@ func (sys *System) createRecords(data *system.CombinedData) (*core.Record, error
return nil, err
}
hub := sys.manager.hub
// add system_stats and container_stats records
systemStatsCollection, err := hub.FindCachedCollectionByNameOrId("system_stats")
if err != nil {
return nil, err
}
systemStatsRecord := core.NewRecord(systemStatsCollection)
systemStatsRecord.Set("system", systemRecord.Id)
systemStatsRecord.Set("stats", data.Stats)
systemStatsRecord.Set("type", "1m")
if err := hub.SaveNoValidate(systemStatsRecord); err != nil {
return nil, err
}
// add new container_stats record
if len(data.Containers) > 0 {
containerStatsCollection, err := hub.FindCachedCollectionByNameOrId("container_stats")
err = hub.RunInTransaction(func(txApp core.App) error {
// add system_stats and container_stats records
systemStatsCollection, err := txApp.FindCachedCollectionByNameOrId("system_stats")
if err != nil {
return nil, err
return err
}
containerStatsRecord := core.NewRecord(containerStatsCollection)
containerStatsRecord.Set("system", systemRecord.Id)
containerStatsRecord.Set("stats", data.Containers)
containerStatsRecord.Set("type", "1m")
if err := hub.SaveNoValidate(containerStatsRecord); err != nil {
return nil, err
}
}
// update system record (do this last because it triggers alerts and we need above records to be inserted first)
systemRecord.Set("status", up)
systemRecord.Set("info", data.Info)
if err := hub.SaveNoValidate(systemRecord); err != nil {
return nil, err
systemStatsRecord := core.NewRecord(systemStatsCollection)
systemStatsRecord.Set("system", systemRecord.Id)
systemStatsRecord.Set("stats", data.Stats)
systemStatsRecord.Set("type", "1m")
if err := txApp.SaveNoValidate(systemStatsRecord); err != nil {
return err
}
if len(data.Containers) > 0 {
// add / update containers records
if data.Containers[0].Id != "" {
if err := createContainerRecords(txApp, data.Containers, sys.Id); err != nil {
return err
}
}
// add new container_stats record
containerStatsCollection, err := txApp.FindCachedCollectionByNameOrId("container_stats")
if err != nil {
return err
}
containerStatsRecord := core.NewRecord(containerStatsCollection)
containerStatsRecord.Set("system", systemRecord.Id)
containerStatsRecord.Set("stats", data.Containers)
containerStatsRecord.Set("type", "1m")
if err := txApp.SaveNoValidate(containerStatsRecord); err != nil {
return err
}
}
// update system record (do this last because it triggers alerts and we need above records to be inserted first)
systemRecord.Set("status", up)
systemRecord.Set("info", data.Info)
if err := txApp.SaveNoValidate(systemRecord); err != nil {
return err
}
return nil
})
return systemRecord, err
}
// createContainerRecords creates container records
func createContainerRecords(app core.App, data []*container.Stats, systemId string) error {
if len(data) == 0 {
return nil
}
return systemRecord, nil
params := dbx.Params{
"system": systemId,
"updated": time.Now().UTC().UnixMilli(),
}
valueStrings := make([]string, 0, len(data))
for i, container := range data {
suffix := fmt.Sprintf("%d", i)
valueStrings = append(valueStrings, fmt.Sprintf("({:id%[1]s}, {:system}, {:name%[1]s}, {:status%[1]s}, {:health%[1]s}, {:cpu%[1]s}, {:memory%[1]s}, {:net%[1]s}, {:updated})", suffix))
params["id"+suffix] = container.Id
params["name"+suffix] = container.Name
params["status"+suffix] = container.Status
params["health"+suffix] = container.Health
params["cpu"+suffix] = container.Cpu
params["memory"+suffix] = container.Mem
params["net"+suffix] = container.NetworkSent + container.NetworkRecv
}
queryString := fmt.Sprintf(
"INSERT INTO containers (id, system, name, status, health, cpu, memory, net, updated) VALUES %s ON CONFLICT(id) DO UPDATE SET system = excluded.system, name = excluded.name, status = excluded.status, health = excluded.health, cpu = excluded.cpu, memory = excluded.memory, net = excluded.net, updated = excluded.updated",
strings.Join(valueStrings, ","),
)
_, err := app.DB().NewQuery(queryString).Bind(params).Execute()
return err
}
// getRecord retrieves the system record from the database.
@@ -242,37 +283,74 @@ func (sys *System) fetchDataViaWebSocket(options common.DataRequestOptions) (*sy
return sys.data, nil
}
// fetchStringFromAgentViaSSH is a generic function to fetch strings via SSH
func (sys *System) fetchStringFromAgentViaSSH(action common.WebSocketAction, requestData any, errorMsg string) (string, error) {
var result string
err := sys.runSSHOperation(4*time.Second, 1, func(session *ssh.Session) (bool, error) {
stdout, err := session.StdoutPipe()
if err != nil {
return false, err
}
stdin, stdinErr := session.StdinPipe()
if stdinErr != nil {
return false, stdinErr
}
if err := session.Shell(); err != nil {
return false, err
}
req := common.HubRequest[any]{Action: action, Data: requestData}
_ = cbor.NewEncoder(stdin).Encode(req)
_ = stdin.Close()
var resp common.AgentResponse
err = cbor.NewDecoder(stdout).Decode(&resp)
if err != nil {
return false, err
}
if resp.String == nil {
return false, errors.New(errorMsg)
}
result = *resp.String
return false, nil
})
return result, err
}
// FetchContainerInfoFromAgent fetches container info from the agent
func (sys *System) FetchContainerInfoFromAgent(containerID string) (string, error) {
// fetch via websocket
if sys.WsConn != nil && sys.WsConn.IsConnected() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return sys.WsConn.RequestContainerInfo(ctx, containerID)
}
// fetch via SSH
return sys.fetchStringFromAgentViaSSH(common.GetContainerInfo, common.ContainerInfoRequest{ContainerID: containerID}, "no info in response")
}
// FetchContainerLogsFromAgent fetches container logs from the agent
func (sys *System) FetchContainerLogsFromAgent(containerID string) (string, error) {
// fetch via websocket
if sys.WsConn != nil && sys.WsConn.IsConnected() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return sys.WsConn.RequestContainerLogs(ctx, containerID)
}
// fetch via SSH
return sys.fetchStringFromAgentViaSSH(common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, "no logs in response")
}
// fetchDataViaSSH handles fetching data using SSH.
// This function encapsulates the original SSH logic.
// It updates sys.data directly upon successful fetch.
func (sys *System) fetchDataViaSSH(options common.DataRequestOptions) (*system.CombinedData, error) {
maxRetries := 1
for attempt := 0; attempt <= maxRetries; attempt++ {
if sys.client == nil || sys.Status == down {
if err := sys.createSSHClient(); err != nil {
return nil, err
}
}
session, err := sys.createSessionWithTimeout(4 * time.Second)
if err != nil {
if attempt >= maxRetries {
return nil, err
}
sys.manager.hub.Logger().Warn("Session closed. Retrying...", "host", sys.Host, "port", sys.Port, "err", err)
sys.closeSSHConnection()
// Reset format detection on connection failure - agent might have been upgraded
continue
}
defer session.Close()
err := sys.runSSHOperation(4*time.Second, 1, func(session *ssh.Session) (bool, error) {
stdout, err := session.StdoutPipe()
if err != nil {
return nil, err
return false, err
}
stdin, stdinErr := session.StdinPipe()
if err := session.Shell(); err != nil {
return nil, err
return false, err
}
*sys.data = system.CombinedData{}
@@ -280,45 +358,82 @@ func (sys *System) fetchDataViaSSH(options common.DataRequestOptions) (*system.C
if sys.agentVersion.GTE(beszel.MinVersionAgentResponse) && stdinErr == nil {
req := common.HubRequest[any]{Action: common.GetData, Data: options}
_ = cbor.NewEncoder(stdin).Encode(req)
// Close write side to signal end of request
_ = stdin.Close()
var resp common.AgentResponse
if decErr := cbor.NewDecoder(stdout).Decode(&resp); decErr == nil && resp.SystemData != nil {
*sys.data = *resp.SystemData
// wait for the session to complete
if err := session.Wait(); err != nil {
return nil, err
return false, err
}
return sys.data, nil
return false, nil
}
// If decoding failed, fall back below
}
var decodeErr error
if sys.agentVersion.GTE(beszel.MinVersionCbor) {
err = cbor.NewDecoder(stdout).Decode(sys.data)
decodeErr = cbor.NewDecoder(stdout).Decode(sys.data)
} else {
err = json.NewDecoder(stdout).Decode(sys.data)
decodeErr = json.NewDecoder(stdout).Decode(sys.data)
}
if err != nil {
sys.closeSSHConnection()
if attempt < maxRetries {
continue
}
return nil, err
if decodeErr != nil {
return true, decodeErr
}
// wait for the session to complete
if err := session.Wait(); err != nil {
return nil, err
return false, err
}
return sys.data, nil
return false, nil
})
if err != nil {
return nil, err
}
// this should never be reached due to the return in the loop
return nil, fmt.Errorf("failed to fetch data")
return sys.data, nil
}
// runSSHOperation establishes an SSH session and executes the provided operation.
// The operation can request a retry by returning true as the first return value.
func (sys *System) runSSHOperation(timeout time.Duration, retries int, operation func(*ssh.Session) (bool, error)) error {
for attempt := 0; attempt <= retries; attempt++ {
if sys.client == nil || sys.Status == down {
if err := sys.createSSHClient(); err != nil {
return err
}
}
session, err := sys.createSessionWithTimeout(timeout)
if err != nil {
if attempt >= retries {
return err
}
sys.manager.hub.Logger().Warn("Session closed. Retrying...", "host", sys.Host, "port", sys.Port, "err", err)
sys.closeSSHConnection()
continue
}
retry, opErr := func() (bool, error) {
defer session.Close()
return operation(session)
}()
if opErr == nil {
return nil
}
if retry {
sys.closeSSHConnection()
if attempt < retries {
continue
}
}
return opErr
}
return fmt.Errorf("ssh operation failed")
}
// createSSHClient creates a new SSH client for the system

View File

@@ -63,6 +63,15 @@ func NewSystemManager(hub hubLike) *SystemManager {
}
}
// GetSystem returns a system by ID from the store
func (sm *SystemManager) GetSystem(systemID string) (*System, error) {
sys, ok := sm.systems.GetOk(systemID)
if !ok {
return nil, fmt.Errorf("system not found")
}
return sys, nil
}
// Initialize sets up the system manager by binding event hooks and starting existing systems.
// It configures SSH client settings and begins monitoring all non-paused systems from the database.
// Systems are started with staggered delays to prevent overwhelming the hub during startup.

View File

@@ -154,19 +154,20 @@ func (sm *SystemManager) startRealtimeWorker() {
// fetchRealtimeDataAndNotify fetches realtime data for all active subscriptions and notifies the clients.
func (sm *SystemManager) fetchRealtimeDataAndNotify() {
for systemId, info := range activeSubscriptions {
system, ok := sm.systems.GetOk(systemId)
if ok {
go func() {
data, err := system.fetchDataFromAgent(common.DataRequestOptions{CacheTimeMs: 1000})
if err != nil {
return
}
bytes, err := json.Marshal(data)
if err == nil {
notify(sm.hub, info.subscription, bytes)
}
}()
system, err := sm.GetSystem(systemId)
if err != nil {
continue
}
go func() {
data, err := system.fetchDataFromAgent(common.DataRequestOptions{CacheTimeMs: 1000})
if err != nil {
return
}
bytes, err := json.Marshal(data)
if err == nil {
notify(sm.hub, info.subscription, bytes)
}
}()
}
}

View File

@@ -18,11 +18,11 @@ type ResponseHandler interface {
}
// BaseHandler provides a default implementation that can be embedded to make HandleLegacy optional
// type BaseHandler struct{}
type BaseHandler struct{}
// func (h *BaseHandler) HandleLegacy(rawData []byte) error {
// return errors.New("legacy format not supported")
// }
func (h *BaseHandler) HandleLegacy(rawData []byte) error {
return errors.New("legacy format not supported")
}
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
@@ -63,6 +63,58 @@ func (ws *WsConn) RequestSystemData(ctx context.Context, data *system.CombinedDa
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// stringResponseHandler is a generic handler for string responses from agents
type stringResponseHandler struct {
BaseHandler
value string
errorMsg string
}
func (h *stringResponseHandler) Handle(agentResponse common.AgentResponse) error {
if agentResponse.String == nil {
return errors.New(h.errorMsg)
}
h.value = *agentResponse.String
return nil
}
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// requestContainerStringViaWS is a generic function to request container-related strings via WebSocket
func (ws *WsConn) requestContainerStringViaWS(ctx context.Context, action common.WebSocketAction, requestData any, errorMsg string) (string, error) {
if !ws.IsConnected() {
return "", gws.ErrConnClosed
}
req, err := ws.requestManager.SendRequest(ctx, action, requestData)
if err != nil {
return "", err
}
handler := &stringResponseHandler{errorMsg: errorMsg}
if err := ws.handleAgentRequest(req, handler); err != nil {
return "", err
}
return handler.value, nil
}
// RequestContainerLogs requests logs for a specific container via WebSocket.
func (ws *WsConn) RequestContainerLogs(ctx context.Context, containerID string) (string, error) {
return ws.requestContainerStringViaWS(ctx, common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, "no logs in response")
}
// RequestContainerInfo requests information about a specific container via WebSocket.
func (ws *WsConn) RequestContainerInfo(ctx context.Context, containerID string) (string, error) {
return ws.requestContainerStringViaWS(ctx, common.GetContainerInfo, common.ContainerInfoRequest{ContainerID: containerID}, "no info in response")
}
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////
// fingerprintHandler implements ResponseHandler for fingerprint requests
type fingerprintHandler struct {
result *common.FingerprintResponse

View File

@@ -181,6 +181,17 @@ func TestCommonActions(t *testing.T) {
// Test that the actions we use exist and have expected values
assert.Equal(t, common.WebSocketAction(0), common.GetData, "GetData should be action 0")
assert.Equal(t, common.WebSocketAction(1), common.CheckFingerprint, "CheckFingerprint should be action 1")
assert.Equal(t, common.WebSocketAction(2), common.GetContainerLogs, "GetLogs should be action 2")
}
func TestLogsHandler(t *testing.T) {
h := &stringResponseHandler{errorMsg: "no logs in response"}
logValue := "test logs"
resp := common.AgentResponse{String: &logValue}
err := h.Handle(resp)
assert.NoError(t, err)
assert.Equal(t, logValue, h.value)
}
// TestHandler tests that we can create a Handler