refactor: unify agent communication with Transport interface

- Introduce `Transport` interface to abstract WebSocket and SSH
communication
- Add generic `Data` field to `AgentResponse` for streamlined future
endpoints
- Maintain backward compatibility with legacy hubs and agents using
typed fields
- Unify fetch operations (SMART, systemd, containers) under a single
`request` method
- Improve `RequestManager` with deadline awareness and legacy response
support
- Refactor agent response routing into dedicated `agent/response.go`
- Update version to 0.18.0-beta.2
This commit is contained in:
henrygd
2026-01-05 13:13:55 -05:00
parent e7b4be3dc5
commit 4547ff7b5d
19 changed files with 623 additions and 457 deletions

View File

@@ -13,9 +13,11 @@ import (
"time"
"github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/hub/transport"
"github.com/henrygd/beszel/internal/hub/ws"
"github.com/henrygd/beszel/internal/entities/container"
"github.com/henrygd/beszel/internal/entities/smart"
"github.com/henrygd/beszel/internal/entities/system"
"github.com/henrygd/beszel/internal/entities/systemd"
@@ -23,28 +25,30 @@ import (
"github.com/blang/semver"
"github.com/fxamacker/cbor/v2"
"github.com/lxzan/gws"
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/core"
"golang.org/x/crypto/ssh"
)
type System struct {
Id string `db:"id"`
Host string `db:"host"`
Port string `db:"port"`
Status string `db:"status"`
manager *SystemManager // Manager that this system belongs to
client *ssh.Client // SSH client for fetching data
data *system.CombinedData // system data from agent
ctx context.Context // Context for stopping the updater
cancel context.CancelFunc // Stops and removes system from updater
WsConn *ws.WsConn // Handler for agent WebSocket connection
agentVersion semver.Version // Agent version
updateTicker *time.Ticker // Ticker for updating the system
detailsFetched atomic.Bool // True if static system details have been fetched and saved
smartFetching atomic.Bool // True if SMART devices are currently being fetched
smartInterval time.Duration // Interval for periodic SMART data updates
lastSmartFetch atomic.Int64 // Unix milliseconds of last SMART data fetch
Id string `db:"id"`
Host string `db:"host"`
Port string `db:"port"`
Status string `db:"status"`
manager *SystemManager // Manager that this system belongs to
client *ssh.Client // SSH client for fetching data
sshTransport *transport.SSHTransport // SSH transport for requests
data *system.CombinedData // system data from agent
ctx context.Context // Context for stopping the updater
cancel context.CancelFunc // Stops and removes system from updater
WsConn *ws.WsConn // Handler for agent WebSocket connection
agentVersion semver.Version // Agent version
updateTicker *time.Ticker // Ticker for updating the system
detailsFetched atomic.Bool // True if static system details have been fetched and saved
smartFetching atomic.Bool // True if SMART devices are currently being fetched
smartInterval time.Duration // Interval for periodic SMART data updates
lastSmartFetch atomic.Int64 // Unix milliseconds of last SMART data fetch
}
func (sm *SystemManager) NewSystem(systemId string) *System {
@@ -359,8 +363,78 @@ func (sys *System) getContext() (context.Context, context.CancelFunc) {
return sys.ctx, sys.cancel
}
// fetchDataFromAgent attempts to fetch data from the agent,
// prioritizing WebSocket if available.
// request sends a request to the agent, trying WebSocket first, then SSH.
// This is the unified request method that uses the transport abstraction.
func (sys *System) request(ctx context.Context, action common.WebSocketAction, req any, dest any) error {
// Try WebSocket first
if sys.WsConn != nil && sys.WsConn.IsConnected() {
wsTransport := transport.NewWebSocketTransport(sys.WsConn)
if err := wsTransport.Request(ctx, action, req, dest); err == nil {
return nil
} else if !shouldFallbackToSSH(err) {
return err
} else if shouldCloseWebSocket(err) {
sys.closeWebSocketConnection()
}
}
// Fall back to SSH if WebSocket fails
if err := sys.ensureSSHTransport(); err != nil {
return err
}
err := sys.sshTransport.RequestWithRetry(ctx, action, req, dest, 1)
// Keep legacy SSH client/version fields in sync for other code paths.
if sys.sshTransport != nil {
sys.client = sys.sshTransport.GetClient()
sys.agentVersion = sys.sshTransport.GetAgentVersion()
}
return err
}
func shouldFallbackToSSH(err error) bool {
if err == nil {
return false
}
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
return true
}
if errors.Is(err, gws.ErrConnClosed) {
return true
}
return errors.Is(err, transport.ErrWebSocketNotConnected)
}
func shouldCloseWebSocket(err error) bool {
if err == nil {
return false
}
return errors.Is(err, gws.ErrConnClosed) || errors.Is(err, transport.ErrWebSocketNotConnected)
}
// ensureSSHTransport ensures the SSH transport is initialized and connected.
func (sys *System) ensureSSHTransport() error {
if sys.sshTransport == nil {
if sys.manager.sshConfig == nil {
if err := sys.manager.createSSHClientConfig(); err != nil {
return err
}
}
sys.sshTransport = transport.NewSSHTransport(transport.SSHTransportConfig{
Host: sys.Host,
Port: sys.Port,
Config: sys.manager.sshConfig,
Timeout: 4 * time.Second,
})
}
// Sync client state with transport
if sys.client != nil {
sys.sshTransport.SetClient(sys.client)
sys.sshTransport.SetAgentVersion(sys.agentVersion)
}
return nil
}
// fetchDataFromAgent attempts to fetch data from the agent, prioritizing WebSocket if available.
func (sys *System) fetchDataFromAgent(options common.DataRequestOptions) (*system.CombinedData, error) {
if sys.data == nil {
sys.data = &system.CombinedData{}
@@ -386,112 +460,47 @@ func (sys *System) fetchDataViaWebSocket(options common.DataRequestOptions) (*sy
if sys.WsConn == nil || !sys.WsConn.IsConnected() {
return nil, errors.New("no websocket connection")
}
err := sys.WsConn.RequestSystemData(context.Background(), sys.data, options)
wsTransport := transport.NewWebSocketTransport(sys.WsConn)
err := wsTransport.Request(context.Background(), common.GetData, options, sys.data)
if err != nil {
return nil, err
}
return sys.data, nil
}
// fetchStringFromAgentViaSSH is a generic function to fetch strings via SSH
func (sys *System) fetchStringFromAgentViaSSH(action common.WebSocketAction, requestData any, errorMsg string) (string, error) {
var result string
err := sys.runSSHOperation(4*time.Second, 1, func(session *ssh.Session) (bool, error) {
stdout, err := session.StdoutPipe()
if err != nil {
return false, err
}
stdin, stdinErr := session.StdinPipe()
if stdinErr != nil {
return false, stdinErr
}
if err := session.Shell(); err != nil {
return false, err
}
req := common.HubRequest[any]{Action: action, Data: requestData}
_ = cbor.NewEncoder(stdin).Encode(req)
_ = stdin.Close()
var resp common.AgentResponse
err = cbor.NewDecoder(stdout).Decode(&resp)
if err != nil {
return false, err
}
if resp.String == nil {
return false, errors.New(errorMsg)
}
result = *resp.String
return false, nil
})
return result, err
}
// FetchContainerInfoFromAgent fetches container info from the agent
func (sys *System) FetchContainerInfoFromAgent(containerID string) (string, error) {
// fetch via websocket
if sys.WsConn != nil && sys.WsConn.IsConnected() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return sys.WsConn.RequestContainerInfo(ctx, containerID)
}
// fetch via SSH
return sys.fetchStringFromAgentViaSSH(common.GetContainerInfo, common.ContainerInfoRequest{ContainerID: containerID}, "no info in response")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var result string
err := sys.request(ctx, common.GetContainerInfo, common.ContainerInfoRequest{ContainerID: containerID}, &result)
return result, err
}
// FetchContainerLogsFromAgent fetches container logs from the agent
func (sys *System) FetchContainerLogsFromAgent(containerID string) (string, error) {
// fetch via websocket
if sys.WsConn != nil && sys.WsConn.IsConnected() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return sys.WsConn.RequestContainerLogs(ctx, containerID)
}
// fetch via SSH
return sys.fetchStringFromAgentViaSSH(common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, "no logs in response")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var result string
err := sys.request(ctx, common.GetContainerLogs, common.ContainerLogsRequest{ContainerID: containerID}, &result)
return result, err
}
// FetchSystemdInfoFromAgent fetches detailed systemd service information from the agent
func (sys *System) FetchSystemdInfoFromAgent(serviceName string) (systemd.ServiceDetails, error) {
// fetch via websocket
if sys.WsConn != nil && sys.WsConn.IsConnected() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return sys.WsConn.RequestSystemdInfo(ctx, serviceName)
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var result systemd.ServiceDetails
err := sys.runSSHOperation(5*time.Second, 1, func(session *ssh.Session) (bool, error) {
stdout, err := session.StdoutPipe()
if err != nil {
return false, err
}
stdin, stdinErr := session.StdinPipe()
if stdinErr != nil {
return false, stdinErr
}
if err := session.Shell(); err != nil {
return false, err
}
req := common.HubRequest[any]{Action: common.GetSystemdInfo, Data: common.SystemdInfoRequest{ServiceName: serviceName}}
if err := cbor.NewEncoder(stdin).Encode(req); err != nil {
return false, err
}
_ = stdin.Close()
var resp common.AgentResponse
if err := cbor.NewDecoder(stdout).Decode(&resp); err != nil {
return false, err
}
if resp.ServiceInfo == nil {
if resp.Error != "" {
return false, errors.New(resp.Error)
}
return false, errors.New("no systemd info in response")
}
result = resp.ServiceInfo
return false, nil
})
err := sys.request(ctx, common.GetSystemdInfo, common.SystemdInfoRequest{ServiceName: serviceName}, &result)
return result, err
}
// FetchSmartDataFromAgent fetches SMART data from the agent
func (sys *System) FetchSmartDataFromAgent() (map[string]smart.SmartData, error) {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
var result map[string]smart.SmartData
err := sys.request(ctx, common.GetSmartData, nil, &result)
return result, err
}
@@ -656,6 +665,9 @@ func (sys *System) createSessionWithTimeout(timeout time.Duration) (*ssh.Session
// closeSSHConnection closes the SSH connection but keeps the system in the manager
func (sys *System) closeSSHConnection() {
if sys.sshTransport != nil {
sys.sshTransport.Close()
}
if sys.client != nil {
sys.client.Close()
sys.client = nil

View File

@@ -1,54 +1,14 @@
package systems
import (
"context"
"database/sql"
"errors"
"strings"
"time"
"github.com/fxamacker/cbor/v2"
"github.com/henrygd/beszel/internal/common"
"github.com/henrygd/beszel/internal/entities/smart"
"github.com/pocketbase/pocketbase/core"
"golang.org/x/crypto/ssh"
)
// FetchSmartDataFromAgent fetches SMART data from the agent
func (sys *System) FetchSmartDataFromAgent() (map[string]smart.SmartData, error) {
// fetch via websocket
if sys.WsConn != nil && sys.WsConn.IsConnected() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
return sys.WsConn.RequestSmartData(ctx)
}
// fetch via SSH
var result map[string]smart.SmartData
err := sys.runSSHOperation(5*time.Second, 1, func(session *ssh.Session) (bool, error) {
stdout, err := session.StdoutPipe()
if err != nil {
return false, err
}
stdin, stdinErr := session.StdinPipe()
if stdinErr != nil {
return false, stdinErr
}
if err := session.Shell(); err != nil {
return false, err
}
req := common.HubRequest[any]{Action: common.GetSmartData}
_ = cbor.NewEncoder(stdin).Encode(req)
_ = stdin.Close()
var resp common.AgentResponse
if err := cbor.NewDecoder(stdout).Decode(&resp); err != nil {
return false, err
}
result = resp.SmartData
return false, nil
})
return result, err
}
// FetchAndSaveSmartDevices fetches SMART data from the agent and saves it to the database
func (sys *System) FetchAndSaveSmartDevices() error {
smartData, err := sys.FetchSmartDataFromAgent()