From 9f19afccdeadf5d51bad76979f516892f7916e0b Mon Sep 17 00:00:00 2001 From: henrygd Date: Sun, 29 Mar 2026 12:30:39 -0400 Subject: [PATCH] hub: reset smart interval on agent reconnect if agent hasn't successfully saved smart devices this is so people trying to get smart working can see the config changes immediately. not need to wait for the smart interval. --- internal/hub/systems/system_manager.go | 14 ++++---- internal/hub/systems/system_smart.go | 14 ++++++-- internal/hub/systems/system_smart_test.go | 43 ++++++++++++++++------- 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/internal/hub/systems/system_manager.go b/internal/hub/systems/system_manager.go index f76293de..e8739890 100644 --- a/internal/hub/systems/system_manager.go +++ b/internal/hub/systems/system_manager.go @@ -41,10 +41,10 @@ var errSystemExists = errors.New("system exists") // SystemManager manages a collection of monitored systems and their connections. // It handles system lifecycle, status updates, and maintains both SSH and WebSocket connections. type SystemManager struct { - hub hubLike // Hub interface for database and alert operations - systems *store.Store[string, *System] // Thread-safe store of active systems - sshConfig *ssh.ClientConfig // SSH client configuration for system connections - smartFetchMap *expirymap.ExpiryMap[bool] // Stores whether the last SMART fetch succeeded while entry TTL enforces fetch interval + hub hubLike // Hub interface for database and alert operations + systems *store.Store[string, *System] // Thread-safe store of active systems + sshConfig *ssh.ClientConfig // SSH client configuration for system connections + smartFetchMap *expirymap.ExpiryMap[smartFetchState] // Stores last SMART fetch time/result; TTL is only for cleanup } // hubLike defines the interface requirements for the hub dependency. @@ -62,7 +62,7 @@ func NewSystemManager(hub hubLike) *SystemManager { return &SystemManager{ systems: store.New(map[string]*System{}), hub: hub, - smartFetchMap: expirymap.New[bool](time.Hour), + smartFetchMap: expirymap.New[smartFetchState](time.Hour), } } @@ -321,8 +321,8 @@ func (sm *SystemManager) AddWebSocketSystem(systemId string, agentVersion semver // resetFailedSmartFetchState clears only failed SMART cooldown entries so a fresh // agent reconnect retries SMART discovery immediately after configuration changes. func (sm *SystemManager) resetFailedSmartFetchState(systemID string) { - succeeded, ok := sm.smartFetchMap.GetOk(systemID) - if ok && !succeeded { + state, ok := sm.smartFetchMap.GetOk(systemID) + if ok && !state.Successful { sm.smartFetchMap.Remove(systemID) } } diff --git a/internal/hub/systems/system_smart.go b/internal/hub/systems/system_smart.go index 3ccbb1f1..c40f643d 100644 --- a/internal/hub/systems/system_smart.go +++ b/internal/hub/systems/system_smart.go @@ -10,6 +10,11 @@ import ( "github.com/pocketbase/pocketbase/core" ) +type smartFetchState struct { + LastAttempt int64 + Successful bool +} + // FetchAndSaveSmartDevices fetches SMART data from the agent and saves it to the database func (sys *System) FetchAndSaveSmartDevices() error { smartData, err := sys.FetchSmartDataFromAgent() @@ -33,7 +38,7 @@ func (sys *System) recordSmartFetchResult(err error, deviceCount int) { if sys.manager.hub != nil { sys.manager.hub.Logger().Info("SMART fetch result", "system", sys.Id, "success", success, "devices", deviceCount, "interval", interval.String(), "err", err) } - sys.manager.smartFetchMap.Set(sys.Id, success, interval+time.Minute) + sys.manager.smartFetchMap.Set(sys.Id, smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: success}, interval+time.Minute) } // shouldFetchSmart returns true when there is no active SMART cooldown entry for this system. @@ -41,8 +46,11 @@ func (sys *System) shouldFetchSmart() bool { if sys.manager == nil { return true } - _, ok := sys.manager.smartFetchMap.GetOk(sys.Id) - return !ok + state, ok := sys.manager.smartFetchMap.GetOk(sys.Id) + if !ok { + return true + } + return !time.UnixMilli(state.LastAttempt).Add(sys.smartFetchInterval()).After(time.Now()) } // smartFetchInterval returns the agent-provided SMART interval or the default when unset. diff --git a/internal/hub/systems/system_smart_test.go b/internal/hub/systems/system_smart_test.go index bf1985ff..e186b1ff 100644 --- a/internal/hub/systems/system_smart_test.go +++ b/internal/hub/systems/system_smart_test.go @@ -12,7 +12,7 @@ import ( ) func TestRecordSmartFetchResult(t *testing.T) { - sm := &SystemManager{smartFetchMap: expirymap.New[bool](time.Hour)} + sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)} t.Cleanup(sm.smartFetchMap.StopCleaner) sys := &System{ @@ -23,25 +23,25 @@ func TestRecordSmartFetchResult(t *testing.T) { // Successful fetch with devices sys.recordSmartFetchResult(nil, 5) - succeeded, ok := sm.smartFetchMap.GetOk(sys.Id) + state, ok := sm.smartFetchMap.GetOk(sys.Id) assert.True(t, ok, "expected smart fetch result to be stored") - assert.True(t, succeeded, "expected successful fetch state to be recorded") + assert.True(t, state.Successful, "expected successful fetch state to be recorded") // Failed fetch sys.recordSmartFetchResult(errors.New("failed"), 0) - succeeded, ok = sm.smartFetchMap.GetOk(sys.Id) + state, ok = sm.smartFetchMap.GetOk(sys.Id) assert.True(t, ok, "expected failed smart fetch state to be stored") - assert.False(t, succeeded, "expected failed smart fetch state to be marked unsuccessful") + assert.False(t, state.Successful, "expected failed smart fetch state to be marked unsuccessful") // Successful fetch but no devices sys.recordSmartFetchResult(nil, 0) - succeeded, ok = sm.smartFetchMap.GetOk(sys.Id) + state, ok = sm.smartFetchMap.GetOk(sys.Id) assert.True(t, ok, "expected fetch with zero devices to be stored") - assert.False(t, succeeded, "expected fetch with zero devices to be marked unsuccessful") + assert.False(t, state.Successful, "expected fetch with zero devices to be marked unsuccessful") } func TestShouldFetchSmart(t *testing.T) { - sm := &SystemManager{smartFetchMap: expirymap.New[bool](time.Hour)} + sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)} t.Cleanup(sm.smartFetchMap.StopCleaner) sys := &System{ @@ -59,16 +59,35 @@ func TestShouldFetchSmart(t *testing.T) { assert.True(t, sys.shouldFetchSmart(), "expected smart fetch to be allowed after interval entry is cleared") } -func TestResetFailedSmartFetchState(t *testing.T) { - sm := &SystemManager{smartFetchMap: expirymap.New[bool](time.Hour)} +func TestShouldFetchSmart_IgnoresExtendedTTLWhenFetchIsDue(t *testing.T) { + sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)} t.Cleanup(sm.smartFetchMap.StopCleaner) - sm.smartFetchMap.Set("system-1", false, time.Hour) + sys := &System{ + Id: "system-1", + manager: sm, + smartInterval: time.Hour, + } + + sm.smartFetchMap.Set(sys.Id, smartFetchState{ + LastAttempt: time.Now().Add(-2 * time.Hour).UnixMilli(), + Successful: true, + }, 10*time.Minute) + sm.smartFetchMap.UpdateExpiration(sys.Id, 3*time.Hour) + + assert.True(t, sys.shouldFetchSmart(), "expected fetch time to take precedence over updated TTL") +} + +func TestResetFailedSmartFetchState(t *testing.T) { + sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)} + t.Cleanup(sm.smartFetchMap.StopCleaner) + + sm.smartFetchMap.Set("system-1", smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: false}, time.Hour) sm.resetFailedSmartFetchState("system-1") _, ok := sm.smartFetchMap.GetOk("system-1") assert.False(t, ok, "expected failed smart fetch state to be cleared on reconnect") - sm.smartFetchMap.Set("system-1", true, time.Hour) + sm.smartFetchMap.Set("system-1", smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: true}, time.Hour) sm.resetFailedSmartFetchState("system-1") _, ok = sm.smartFetchMap.GetOk("system-1") assert.True(t, ok, "expected successful smart fetch state to be preserved")