diff --git a/internal/hub/systems/system_manager.go b/internal/hub/systems/system_manager.go index f76293de..e8739890 100644 --- a/internal/hub/systems/system_manager.go +++ b/internal/hub/systems/system_manager.go @@ -41,10 +41,10 @@ var errSystemExists = errors.New("system exists") // SystemManager manages a collection of monitored systems and their connections. // It handles system lifecycle, status updates, and maintains both SSH and WebSocket connections. type SystemManager struct { - hub hubLike // Hub interface for database and alert operations - systems *store.Store[string, *System] // Thread-safe store of active systems - sshConfig *ssh.ClientConfig // SSH client configuration for system connections - smartFetchMap *expirymap.ExpiryMap[bool] // Stores whether the last SMART fetch succeeded while entry TTL enforces fetch interval + hub hubLike // Hub interface for database and alert operations + systems *store.Store[string, *System] // Thread-safe store of active systems + sshConfig *ssh.ClientConfig // SSH client configuration for system connections + smartFetchMap *expirymap.ExpiryMap[smartFetchState] // Stores last SMART fetch time/result; TTL is only for cleanup } // hubLike defines the interface requirements for the hub dependency. @@ -62,7 +62,7 @@ func NewSystemManager(hub hubLike) *SystemManager { return &SystemManager{ systems: store.New(map[string]*System{}), hub: hub, - smartFetchMap: expirymap.New[bool](time.Hour), + smartFetchMap: expirymap.New[smartFetchState](time.Hour), } } @@ -321,8 +321,8 @@ func (sm *SystemManager) AddWebSocketSystem(systemId string, agentVersion semver // resetFailedSmartFetchState clears only failed SMART cooldown entries so a fresh // agent reconnect retries SMART discovery immediately after configuration changes. func (sm *SystemManager) resetFailedSmartFetchState(systemID string) { - succeeded, ok := sm.smartFetchMap.GetOk(systemID) - if ok && !succeeded { + state, ok := sm.smartFetchMap.GetOk(systemID) + if ok && !state.Successful { sm.smartFetchMap.Remove(systemID) } } diff --git a/internal/hub/systems/system_smart.go b/internal/hub/systems/system_smart.go index 3ccbb1f1..c40f643d 100644 --- a/internal/hub/systems/system_smart.go +++ b/internal/hub/systems/system_smart.go @@ -10,6 +10,11 @@ import ( "github.com/pocketbase/pocketbase/core" ) +type smartFetchState struct { + LastAttempt int64 + Successful bool +} + // FetchAndSaveSmartDevices fetches SMART data from the agent and saves it to the database func (sys *System) FetchAndSaveSmartDevices() error { smartData, err := sys.FetchSmartDataFromAgent() @@ -33,7 +38,7 @@ func (sys *System) recordSmartFetchResult(err error, deviceCount int) { if sys.manager.hub != nil { sys.manager.hub.Logger().Info("SMART fetch result", "system", sys.Id, "success", success, "devices", deviceCount, "interval", interval.String(), "err", err) } - sys.manager.smartFetchMap.Set(sys.Id, success, interval+time.Minute) + sys.manager.smartFetchMap.Set(sys.Id, smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: success}, interval+time.Minute) } // shouldFetchSmart returns true when there is no active SMART cooldown entry for this system. @@ -41,8 +46,11 @@ func (sys *System) shouldFetchSmart() bool { if sys.manager == nil { return true } - _, ok := sys.manager.smartFetchMap.GetOk(sys.Id) - return !ok + state, ok := sys.manager.smartFetchMap.GetOk(sys.Id) + if !ok { + return true + } + return !time.UnixMilli(state.LastAttempt).Add(sys.smartFetchInterval()).After(time.Now()) } // smartFetchInterval returns the agent-provided SMART interval or the default when unset. diff --git a/internal/hub/systems/system_smart_test.go b/internal/hub/systems/system_smart_test.go index bf1985ff..e186b1ff 100644 --- a/internal/hub/systems/system_smart_test.go +++ b/internal/hub/systems/system_smart_test.go @@ -12,7 +12,7 @@ import ( ) func TestRecordSmartFetchResult(t *testing.T) { - sm := &SystemManager{smartFetchMap: expirymap.New[bool](time.Hour)} + sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)} t.Cleanup(sm.smartFetchMap.StopCleaner) sys := &System{ @@ -23,25 +23,25 @@ func TestRecordSmartFetchResult(t *testing.T) { // Successful fetch with devices sys.recordSmartFetchResult(nil, 5) - succeeded, ok := sm.smartFetchMap.GetOk(sys.Id) + state, ok := sm.smartFetchMap.GetOk(sys.Id) assert.True(t, ok, "expected smart fetch result to be stored") - assert.True(t, succeeded, "expected successful fetch state to be recorded") + assert.True(t, state.Successful, "expected successful fetch state to be recorded") // Failed fetch sys.recordSmartFetchResult(errors.New("failed"), 0) - succeeded, ok = sm.smartFetchMap.GetOk(sys.Id) + state, ok = sm.smartFetchMap.GetOk(sys.Id) assert.True(t, ok, "expected failed smart fetch state to be stored") - assert.False(t, succeeded, "expected failed smart fetch state to be marked unsuccessful") + assert.False(t, state.Successful, "expected failed smart fetch state to be marked unsuccessful") // Successful fetch but no devices sys.recordSmartFetchResult(nil, 0) - succeeded, ok = sm.smartFetchMap.GetOk(sys.Id) + state, ok = sm.smartFetchMap.GetOk(sys.Id) assert.True(t, ok, "expected fetch with zero devices to be stored") - assert.False(t, succeeded, "expected fetch with zero devices to be marked unsuccessful") + assert.False(t, state.Successful, "expected fetch with zero devices to be marked unsuccessful") } func TestShouldFetchSmart(t *testing.T) { - sm := &SystemManager{smartFetchMap: expirymap.New[bool](time.Hour)} + sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)} t.Cleanup(sm.smartFetchMap.StopCleaner) sys := &System{ @@ -59,16 +59,35 @@ func TestShouldFetchSmart(t *testing.T) { assert.True(t, sys.shouldFetchSmart(), "expected smart fetch to be allowed after interval entry is cleared") } -func TestResetFailedSmartFetchState(t *testing.T) { - sm := &SystemManager{smartFetchMap: expirymap.New[bool](time.Hour)} +func TestShouldFetchSmart_IgnoresExtendedTTLWhenFetchIsDue(t *testing.T) { + sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)} t.Cleanup(sm.smartFetchMap.StopCleaner) - sm.smartFetchMap.Set("system-1", false, time.Hour) + sys := &System{ + Id: "system-1", + manager: sm, + smartInterval: time.Hour, + } + + sm.smartFetchMap.Set(sys.Id, smartFetchState{ + LastAttempt: time.Now().Add(-2 * time.Hour).UnixMilli(), + Successful: true, + }, 10*time.Minute) + sm.smartFetchMap.UpdateExpiration(sys.Id, 3*time.Hour) + + assert.True(t, sys.shouldFetchSmart(), "expected fetch time to take precedence over updated TTL") +} + +func TestResetFailedSmartFetchState(t *testing.T) { + sm := &SystemManager{smartFetchMap: expirymap.New[smartFetchState](time.Hour)} + t.Cleanup(sm.smartFetchMap.StopCleaner) + + sm.smartFetchMap.Set("system-1", smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: false}, time.Hour) sm.resetFailedSmartFetchState("system-1") _, ok := sm.smartFetchMap.GetOk("system-1") assert.False(t, ok, "expected failed smart fetch state to be cleared on reconnect") - sm.smartFetchMap.Set("system-1", true, time.Hour) + sm.smartFetchMap.Set("system-1", smartFetchState{LastAttempt: time.Now().UnixMilli(), Successful: true}, time.Hour) sm.resetFailedSmartFetchState("system-1") _, ok = sm.smartFetchMap.GetOk("system-1") assert.True(t, ok, "expected successful smart fetch state to be preserved")