mirror of
https://github.com/henrygd/beszel.git
synced 2026-03-21 21:26:16 +01:00
Compare commits
140 Commits
nvml
...
e4e0affbc1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e4e0affbc1 | ||
|
|
c3a0e645ee | ||
|
|
c6c3950fb0 | ||
|
|
48ddc96a0d | ||
|
|
704cb86de8 | ||
|
|
2854ce882f | ||
|
|
ed50367f70 | ||
|
|
4ebe869591 | ||
|
|
c9bbbe91f2 | ||
|
|
5bfe4f6970 | ||
|
|
380d2b1091 | ||
|
|
a7f99e7a8c | ||
|
|
bd94a9d142 | ||
|
|
8e2316f845 | ||
|
|
0d3dfcb207 | ||
|
|
b386ce5190 | ||
|
|
e527534016 | ||
|
|
ec7ad632a9 | ||
|
|
963fce5a33 | ||
|
|
d38c0da06d | ||
|
|
cae6ac4626 | ||
|
|
6b1ff264f2 | ||
|
|
35d0e792ad | ||
|
|
654cd06b19 | ||
|
|
5e1b028130 | ||
|
|
638e7dc12a | ||
|
|
73c262455d | ||
|
|
0c4d2edd45 | ||
|
|
8f23fff1c9 | ||
|
|
02c1a0c13d | ||
|
|
69fdcb36ab | ||
|
|
b91eb6de40 | ||
|
|
ec69f6c6e0 | ||
|
|
a86cb91e07 | ||
|
|
004841717a | ||
|
|
096296ba7b | ||
|
|
b012df5669 | ||
|
|
12545b4b6d | ||
|
|
9e2296452b | ||
|
|
ac79860d4a | ||
|
|
e13a99fdac | ||
|
|
4cfb2a86ad | ||
|
|
191f25f6e0 | ||
|
|
aa8b3711d7 | ||
|
|
1fb0b25988 | ||
|
|
04600d83cc | ||
|
|
5d8906c9b2 | ||
|
|
daac287b9d | ||
|
|
d526ea61a9 | ||
|
|
79616e1662 | ||
|
|
01e8bdf040 | ||
|
|
1e3a44e05d | ||
|
|
311095cfdd | ||
|
|
4869c834bb | ||
|
|
e1c1e97f0a | ||
|
|
f6b2824ccc | ||
|
|
f17ffc21b8 | ||
|
|
f792f9b102 | ||
|
|
1def7d8d3a | ||
|
|
ef92b254bf | ||
|
|
10d853c004 | ||
|
|
cdfd116da0 | ||
|
|
283fa9d5c2 | ||
|
|
7d6c0caafc | ||
|
|
04d54a3efc | ||
|
|
14ecb1b069 | ||
|
|
1f1a448aef | ||
|
|
e816ea143a | ||
|
|
2230097dc7 | ||
|
|
25c77c5664 | ||
|
|
dba3519b2c | ||
|
|
48c35aa54d | ||
|
|
6b7845b03e | ||
|
|
221be1da58 | ||
|
|
8347afd68e | ||
|
|
2a3885a52e | ||
|
|
5452e50080 | ||
|
|
028f7bafb2 | ||
|
|
0f6142e27e | ||
|
|
8c37b93a4b | ||
|
|
201d16af05 | ||
|
|
db007176fd | ||
|
|
83fb67132b | ||
|
|
a04837f4d5 | ||
|
|
3d8db53e52 | ||
|
|
5797f8a6ad | ||
|
|
79ca31d770 | ||
|
|
41f3705b6b | ||
|
|
20324763d2 | ||
|
|
70f85f9590 | ||
|
|
c7f7f51c99 | ||
|
|
6723ec8ea4 | ||
|
|
afc19ebd3b | ||
|
|
c83d00ccaa | ||
|
|
425c8d2bdf | ||
|
|
42da1e5a52 | ||
|
|
afcae025ae | ||
|
|
1de36625a4 | ||
|
|
a2b6c7f5e6 | ||
|
|
799c7b077a | ||
|
|
cb5f944de6 | ||
|
|
23c4958145 | ||
|
|
edb2edc12c | ||
|
|
648a979a81 | ||
|
|
988de6de7b | ||
|
|
031abbfcb3 | ||
|
|
b59fcc26e5 | ||
|
|
acaa9381fe | ||
|
|
8d9e9260e6 | ||
|
|
0fc4a6daed | ||
|
|
af0c1d3af7 | ||
|
|
9ad3cd0ab9 | ||
|
|
00def272b0 | ||
|
|
383913505f | ||
|
|
ca8cb78c29 | ||
|
|
8821fb5dd0 | ||
|
|
3279a6ca53 | ||
|
|
6a1a98d73f | ||
|
|
1f067aad5b | ||
|
|
1388711105 | ||
|
|
618e5b4cc1 | ||
|
|
42c3ca5db5 | ||
|
|
534791776b | ||
|
|
0c6c53fc7d | ||
|
|
0dfd5ce07d | ||
|
|
2cd6d46f7c | ||
|
|
c333a9fadd | ||
|
|
ba3d1c66f0 | ||
|
|
7276e533ce | ||
|
|
8b84231042 | ||
|
|
77da744008 | ||
|
|
5da7a21119 | ||
|
|
78d742c712 | ||
|
|
1c97ea3e2c | ||
|
|
3d970defe9 | ||
|
|
6282794004 | ||
|
|
475c53a55d | ||
|
|
4547ff7b5d | ||
|
|
e7b4be3dc5 | ||
|
|
2bd85e04fc |
2
.github/CODEOWNERS
vendored
Normal file
2
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Everything needs to be reviewed by Hank
|
||||
* @henrygd
|
||||
19
.github/DISCUSSION_TEMPLATE/ideas.yml
vendored
Normal file
19
.github/DISCUSSION_TEMPLATE/ideas.yml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
body:
|
||||
- type: dropdown
|
||||
id: component
|
||||
attributes:
|
||||
label: Component
|
||||
description: Which part of Beszel is this about?
|
||||
options:
|
||||
- Hub
|
||||
- Agent
|
||||
- Hub & Agent
|
||||
default: 0
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Description
|
||||
description: Please describe in detail what you want to share.
|
||||
validations:
|
||||
required: true
|
||||
68
.github/DISCUSSION_TEMPLATE/support.yml
vendored
68
.github/DISCUSSION_TEMPLATE/support.yml
vendored
@@ -1,19 +1,54 @@
|
||||
body:
|
||||
- type: markdown
|
||||
- type: checkboxes
|
||||
id: terms
|
||||
attributes:
|
||||
value: |
|
||||
### Before opening a discussion:
|
||||
label: Welcome!
|
||||
description: |
|
||||
Thank you for reaching out to the Beszel community for support! To help us assist you better, please make sure to review the following points before submitting your request:
|
||||
|
||||
- Check the [common issues guide](https://beszel.dev/guide/common-issues).
|
||||
- Search existing [issues](https://github.com/henrygd/beszel/issues) and [discussions](https://github.com/henrygd/beszel/discussions) (including closed).
|
||||
Please note:
|
||||
- For translation-related issues or requests, please use the [Crowdin project](https://crowdin.com/project/beszel).
|
||||
**- Please do not submit support reqeusts that are specific to ZFS. We plan to add integration with ZFS utilities in the near future.**
|
||||
|
||||
options:
|
||||
- label: I have read the [Documentation](https://beszel.dev/guide/getting-started)
|
||||
required: true
|
||||
- label: I have checked the [Common Issues Guide](https://beszel.dev/guide/common-issues) and my problem was not mentioned there.
|
||||
required: true
|
||||
- label: I have searched open and closed issues and discussions and my problem was not mentioned before.
|
||||
required: true
|
||||
- label: I have verified I am using the latest version available. You can check the latest release [here](https://github.com/henrygd/beszel/releases).
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: component
|
||||
attributes:
|
||||
label: Component
|
||||
description: Which part of Beszel is this about?
|
||||
options:
|
||||
- Hub
|
||||
- Agent
|
||||
- Hub & Agent
|
||||
default: 0
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Description
|
||||
description: A clear and concise description of the issue or question. If applicable, add screenshots to help explain your problem.
|
||||
label: Problem Description
|
||||
description: |
|
||||
How to write a good bug report?
|
||||
|
||||
- Respect the issue template as much as possible.
|
||||
- The title should be short and descriptive.
|
||||
- Explain the conditions which led you to report this issue: the context.
|
||||
- The context should lead to something, a problem that you’re facing.
|
||||
- Remain clear and concise.
|
||||
- Format your messages to help the reader focus on what matters and understand the structure of your message, use [Markdown syntax](https://help.github.com/articles/github-flavored-markdown)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: system
|
||||
attributes:
|
||||
@@ -21,13 +56,15 @@ body:
|
||||
placeholder: linux/amd64 (agent), freebsd/arm64 (hub)
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: Beszel version
|
||||
placeholder: 0.9.1
|
||||
validations:
|
||||
required: true
|
||||
|
||||
# - type: input
|
||||
# id: version
|
||||
# attributes:
|
||||
# label: Beszel version
|
||||
# placeholder: 0.9.1
|
||||
# validations:
|
||||
# required: true
|
||||
|
||||
- type: dropdown
|
||||
id: install-method
|
||||
attributes:
|
||||
@@ -41,18 +78,21 @@ body:
|
||||
- Other (please describe above)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: config
|
||||
attributes:
|
||||
label: Configuration
|
||||
description: Please provide any relevant service configuration
|
||||
render: yaml
|
||||
|
||||
- type: textarea
|
||||
id: hub-logs
|
||||
attributes:
|
||||
label: Hub Logs
|
||||
description: Check the logs page in PocketBase (`/_/#/logs`) for relevant errors (copy JSON).
|
||||
render: json
|
||||
|
||||
- type: textarea
|
||||
id: agent-logs
|
||||
attributes:
|
||||
|
||||
103
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
103
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
@@ -1,8 +1,30 @@
|
||||
name: 🐛 Bug report
|
||||
description: Report a new bug or issue.
|
||||
description: Use this template to report a bug or issue.
|
||||
title: '[Bug]: '
|
||||
labels: ['bug', "needs confirmation"]
|
||||
labels: ['bug']
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Welcome!
|
||||
description: |
|
||||
The issue tracker is for reporting bugs and feature requests only. For end-user related support questions, please use the **[GitHub Discussions](https://github.com/henrygd/beszel/discussions/new?category=support)** instead
|
||||
|
||||
Please note:
|
||||
- For translation-related issues or requests, please use the [Crowdin project](https://crowdin.com/project/beszel).
|
||||
- To request a change or feature, use the [feature request form](https://github.com/henrygd/beszel/issues/new?template=feature_request.yml).
|
||||
- Any issues that can be resolved by consulting the documentation or by reviewing existing open or closed issues will be closed.
|
||||
**- Please do not submit bugs that are specific to ZFS. We plan to add integration with ZFS utilities in the near future.**
|
||||
|
||||
options:
|
||||
- label: I have read the [Documentation](https://beszel.dev/guide/getting-started)
|
||||
required: true
|
||||
- label: I have checked the [Common Issues Guide](https://beszel.dev/guide/common-issues) and my problem was not mentioned there.
|
||||
required: true
|
||||
- label: I have searched open and closed issues and my problem was not mentioned before.
|
||||
required: true
|
||||
- label: I have verified I am using the latest version available. You can check the latest release [here](https://github.com/henrygd/beszel/releases).
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: component
|
||||
attributes:
|
||||
@@ -12,81 +34,53 @@ body:
|
||||
- Hub
|
||||
- Agent
|
||||
- Hub & Agent
|
||||
default: 0
|
||||
validations:
|
||||
required: true
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
### Thanks for taking the time to fill out this bug report!
|
||||
|
||||
- For more general support, please [start a support thread](https://github.com/henrygd/beszel/discussions/new?category=support).
|
||||
- To request a change or feature, use the [feature request form](https://github.com/henrygd/beszel/issues/new?template=feature_request.yml).
|
||||
- Please do not submit bugs that are specific to ZFS. We plan to add integration with ZFS utilities in the near future.
|
||||
|
||||
### Before submitting a bug report:
|
||||
|
||||
- Check the [common issues guide](https://beszel.dev/guide/common-issues).
|
||||
- Search existing [issues](https://github.com/henrygd/beszel/issues) and [discussions](https://github.com/henrygd/beszel/discussions) (including closed).
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Description
|
||||
description: Explain the issue you experienced clearly and concisely.
|
||||
placeholder: I went to the coffee pot and it was empty.
|
||||
label: Problem Description
|
||||
description: |
|
||||
How to write a good bug report?
|
||||
|
||||
- Respect the issue template as much as possible.
|
||||
- The title should be short and descriptive.
|
||||
- Explain the conditions which led you to report this issue: the context.
|
||||
- The context should lead to something, a problem that you’re facing.
|
||||
- Remain clear and concise.
|
||||
- Format your messages to help the reader focus on what matters and understand the structure of your message, use [Markdown syntax](https://help.github.com/articles/github-flavored-markdown)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: expected-behavior
|
||||
attributes:
|
||||
label: Expected Behavior
|
||||
description: In a perfect world, what should have happened?
|
||||
description: |
|
||||
In a perfect world, what should have happened?
|
||||
**Important:** Be specific. Vague descriptions like "it should work" are not helpful.
|
||||
placeholder: When I got to the coffee pot, it should have been full.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: steps-to-reproduce
|
||||
attributes:
|
||||
label: Steps to Reproduce
|
||||
description: Describe how to reproduce the issue in repeatable steps.
|
||||
description: |
|
||||
Provide detailed, numbered steps that someone else can follow to reproduce the issue.
|
||||
**Important:** Vague descriptions like "it doesn't work" or "it's broken" will result in the issue being closed.
|
||||
Include specific actions, URLs, button clicks, and any relevant data or configuration.
|
||||
placeholder: |
|
||||
1. Go to the coffee pot.
|
||||
2. Make more coffee.
|
||||
3. Pour it into a cup.
|
||||
4. Observe that the cup is empty instead of full.
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: category
|
||||
attributes:
|
||||
label: Category
|
||||
description: Which category does this relate to most?
|
||||
options:
|
||||
- Metrics
|
||||
- Charts & Visualization
|
||||
- Settings & Configuration
|
||||
- Notifications & Alerts
|
||||
- Authentication
|
||||
- Installation
|
||||
- Performance
|
||||
- UI / UX
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: metrics
|
||||
attributes:
|
||||
label: Affected Metrics
|
||||
description: If applicable, which specific metric does this relate to most?
|
||||
options:
|
||||
- CPU
|
||||
- Memory
|
||||
- Storage
|
||||
- Network
|
||||
- Containers
|
||||
- GPU
|
||||
- Sensors
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: system
|
||||
attributes:
|
||||
@@ -94,6 +88,7 @@ body:
|
||||
placeholder: linux/amd64 (agent), freebsd/arm64 (hub)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
@@ -101,6 +96,7 @@ body:
|
||||
placeholder: 0.9.1
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: install-method
|
||||
attributes:
|
||||
@@ -114,18 +110,21 @@ body:
|
||||
- Other (please describe above)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: config
|
||||
attributes:
|
||||
label: Configuration
|
||||
description: Please provide any relevant service configuration
|
||||
render: yaml
|
||||
|
||||
- type: textarea
|
||||
id: hub-logs
|
||||
attributes:
|
||||
label: Hub Logs
|
||||
description: Check the logs page in PocketBase (`/_/#/logs`) for relevant errors (copy JSON).
|
||||
render: json
|
||||
|
||||
- type: textarea
|
||||
id: agent-logs
|
||||
attributes:
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/config.yml
vendored
3
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,5 +1,8 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: 🗣️ Translations
|
||||
url: https://crowdin.com/project/beszel
|
||||
about: Please report translation issues and request new translations here.
|
||||
- name: 💬 Support and questions
|
||||
url: https://github.com/henrygd/beszel/discussions
|
||||
about: Ask and answer questions here.
|
||||
|
||||
81
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
81
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
@@ -1,8 +1,25 @@
|
||||
name: 🚀 Feature request
|
||||
description: Request a new feature or change.
|
||||
title: "[Feature]: "
|
||||
labels: ["enhancement", "needs review"]
|
||||
labels: ["enhancement"]
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Welcome!
|
||||
description: |
|
||||
The issue tracker is for reporting bugs and feature requests only. For end-user related support questions, please use the **[GitHub Discussions](https://github.com/henrygd/beszel/discussions)** instead
|
||||
|
||||
Please note:
|
||||
- For **Bug reports**, use the [Bug Form](https://github.com/henrygd/beszel/issues/new?template=bug_report.yml).
|
||||
- Any requests for new translations should be requested within the [crowdin project](https://crowdin.com/project/beszel).
|
||||
- Create one issue per feature request. This helps us keep track of requests and prioritize them accordingly.
|
||||
|
||||
options:
|
||||
- label: I have searched open and closed feature requests to make sure this or similar feature request does not already exist.
|
||||
required: true
|
||||
- label: This is a feature request, not a bug report or support question.
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: component
|
||||
attributes:
|
||||
@@ -12,16 +29,25 @@ body:
|
||||
- Hub
|
||||
- Agent
|
||||
- Hub & Agent
|
||||
default: 0
|
||||
validations:
|
||||
required: true
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: Before submitting, please search existing [issues](https://github.com/henrygd/beszel/issues) and [discussions](https://github.com/henrygd/beszel/discussions) (including closed).
|
||||
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Describe the feature you would like to see
|
||||
label: Description
|
||||
description: |
|
||||
Describe the solution or feature you'd like. Explain what problem this solves or what value it adds.
|
||||
**Important:** Be specific and detailed. Vague requests like "make it better" will be closed.
|
||||
placeholder: |
|
||||
Example:
|
||||
- What is the feature?
|
||||
- What problem does it solve?
|
||||
- How should it work?
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: motivation
|
||||
attributes:
|
||||
@@ -29,48 +55,3 @@ body:
|
||||
description: Why do you want this feature? What problem does it solve?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Describe how you would like to see this feature implemented
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Screenshots
|
||||
description: Please attach any relevant screenshots, such as images from your current solution or similar implementations.
|
||||
validations:
|
||||
required: false
|
||||
- type: dropdown
|
||||
id: category
|
||||
attributes:
|
||||
label: Category
|
||||
description: Which category does this relate to most?
|
||||
options:
|
||||
- Metrics
|
||||
- Charts & Visualization
|
||||
- Settings & Configuration
|
||||
- Notifications & Alerts
|
||||
- Authentication
|
||||
- Installation
|
||||
- Performance
|
||||
- UI / UX
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: metrics
|
||||
attributes:
|
||||
label: Affected Metrics
|
||||
description: If applicable, which specific metric does this relate to most?
|
||||
options:
|
||||
- CPU
|
||||
- Memory
|
||||
- Storage
|
||||
- Network
|
||||
- Containers
|
||||
- GPU
|
||||
- Sensors
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
8
.github/workflows/inactivity-actions.yml
vendored
8
.github/workflows/inactivity-actions.yml
vendored
@@ -6,6 +6,7 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
actions: write
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
@@ -48,11 +49,16 @@ jobs:
|
||||
# Action can not skip PRs, set it to 100 years to cover it.
|
||||
days-before-pr-stale: 36524
|
||||
|
||||
# Max issues to process before early exit. Next run resumes from cache. GH API limit: 5000.
|
||||
operations-per-run: 1500
|
||||
|
||||
# Labels
|
||||
stale-issue-label: 'stale'
|
||||
remove-stale-when-updated: true
|
||||
only-issue-labels: 'awaiting-requester'
|
||||
any-of-labels: 'awaiting-requester'
|
||||
exempt-issue-labels: 'enhancement'
|
||||
|
||||
# Exemptions
|
||||
exempt-assignees: true
|
||||
|
||||
exempt-milestones: true
|
||||
82
.github/workflows/label-from-dropdown.yml
vendored
82
.github/workflows/label-from-dropdown.yml
vendored
@@ -1,82 +0,0 @@
|
||||
name: Label issues from dropdowns
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened]
|
||||
|
||||
jobs:
|
||||
label_from_dropdown:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
steps:
|
||||
- name: Apply labels based on dropdown choices
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
|
||||
const issueNumber = context.issue.number;
|
||||
const owner = context.repo.owner;
|
||||
const repo = context.repo.repo;
|
||||
|
||||
// Get the issue body
|
||||
const body = context.payload.issue.body;
|
||||
|
||||
// Helper to find dropdown value in the body (assuming markdown format)
|
||||
function extractSectionValue(heading) {
|
||||
const regex = new RegExp(`### ${heading}\\s+([\\s\\S]*?)(?:\\n###|$)`, 'i');
|
||||
const match = body.match(regex);
|
||||
if (match) {
|
||||
// Get the first non-empty line after the heading
|
||||
const lines = match[1].split('\n').map(l => l.trim()).filter(Boolean);
|
||||
return lines[0] || null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Extract dropdown selections
|
||||
const category = extractSectionValue('Category');
|
||||
const metrics = extractSectionValue('Affected Metrics');
|
||||
const component = extractSectionValue('Component');
|
||||
|
||||
// Build labels to add
|
||||
let labelsToAdd = [];
|
||||
if (category) labelsToAdd.push(category);
|
||||
if (metrics) labelsToAdd.push(metrics);
|
||||
if (component) labelsToAdd.push(component);
|
||||
|
||||
// Get existing labels in the repo
|
||||
const { data: existingLabels } = await github.rest.issues.listLabelsForRepo({
|
||||
owner,
|
||||
repo,
|
||||
per_page: 100
|
||||
});
|
||||
const existingLabelNames = existingLabels.map(l => l.name);
|
||||
|
||||
// Find labels that need to be created
|
||||
const labelsToCreate = labelsToAdd.filter(label => !existingLabelNames.includes(label));
|
||||
|
||||
// Create missing labels (with a default color)
|
||||
for (const label of labelsToCreate) {
|
||||
try {
|
||||
await github.rest.issues.createLabel({
|
||||
owner,
|
||||
repo,
|
||||
name: label,
|
||||
color: 'ededed' // light gray, you can pick any hex color
|
||||
});
|
||||
} catch (e) {
|
||||
// Ignore if label already exists (race condition), otherwise rethrow
|
||||
if (!e || e.status !== 422) throw e;
|
||||
}
|
||||
}
|
||||
|
||||
// Now apply all labels (they all exist now)
|
||||
if (labelsToAdd.length > 0) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issueNumber,
|
||||
labels: labelsToAdd
|
||||
});
|
||||
}
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,6 +10,7 @@ dist
|
||||
*.exe
|
||||
internal/cmd/hub/hub
|
||||
internal/cmd/agent/agent
|
||||
agent.test
|
||||
node_modules
|
||||
build
|
||||
*timestamp*
|
||||
|
||||
@@ -76,6 +76,18 @@ builds:
|
||||
- goos: windows
|
||||
goarch: riscv64
|
||||
|
||||
- id: beszel-agent-linux-amd64-glibc
|
||||
binary: beszel-agent
|
||||
main: internal/cmd/agent/agent.go
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
flags:
|
||||
- -tags=glibc
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
|
||||
archives:
|
||||
- id: beszel-agent
|
||||
formats: [tar.gz]
|
||||
@@ -89,6 +101,15 @@ archives:
|
||||
- goos: windows
|
||||
formats: [zip]
|
||||
|
||||
- id: beszel-agent-linux-amd64-glibc
|
||||
formats: [tar.gz]
|
||||
ids:
|
||||
- beszel-agent-linux-amd64-glibc
|
||||
name_template: >-
|
||||
{{ .Binary }}_
|
||||
{{- .Os }}_
|
||||
{{- .Arch }}_glibc
|
||||
|
||||
- id: beszel
|
||||
formats: [tar.gz]
|
||||
ids:
|
||||
@@ -137,9 +158,7 @@ nfpms:
|
||||
- debconf
|
||||
scripts:
|
||||
templates: ./supplemental/debian/templates
|
||||
# Currently broken due to a bug in goreleaser
|
||||
# https://github.com/goreleaser/goreleaser/issues/5487
|
||||
#config: ./supplemental/debian/config.sh
|
||||
config: ./supplemental/debian/config.sh
|
||||
|
||||
scoops:
|
||||
- ids: [beszel-agent]
|
||||
|
||||
41
Makefile
41
Makefile
@@ -3,6 +3,40 @@ OS ?= $(shell go env GOOS)
|
||||
ARCH ?= $(shell go env GOARCH)
|
||||
# Skip building the web UI if true
|
||||
SKIP_WEB ?= false
|
||||
# Controls NVML/glibc agent build tag behavior:
|
||||
# - auto (default): enable on linux/amd64 glibc hosts
|
||||
# - true: always enable
|
||||
# - false: always disable
|
||||
NVML ?= auto
|
||||
|
||||
# Detect glibc host for local linux/amd64 builds.
|
||||
HOST_GLIBC := $(shell \
|
||||
if [ "$(OS)" = "linux" ] && [ "$(ARCH)" = "amd64" ]; then \
|
||||
for p in /lib64/ld-linux-x86-64.so.2 /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /lib/ld-linux-x86-64.so.2; do \
|
||||
[ -e "$$p" ] && { echo true; exit 0; }; \
|
||||
done; \
|
||||
if command -v ldd >/dev/null 2>&1; then \
|
||||
if ldd --version 2>&1 | tr '[:upper:]' '[:lower:]' | awk '/gnu libc|glibc/{found=1} END{exit !found}'; then \
|
||||
echo true; \
|
||||
else \
|
||||
echo false; \
|
||||
fi; \
|
||||
else \
|
||||
echo false; \
|
||||
fi; \
|
||||
else \
|
||||
echo false; \
|
||||
fi)
|
||||
|
||||
# Enable glibc build tag for NVML on supported Linux builds.
|
||||
AGENT_GO_TAGS :=
|
||||
ifeq ($(NVML),true)
|
||||
AGENT_GO_TAGS := -tags glibc
|
||||
else ifeq ($(NVML),auto)
|
||||
ifeq ($(HOST_GLIBC),true)
|
||||
AGENT_GO_TAGS := -tags glibc
|
||||
endif
|
||||
endif
|
||||
|
||||
# Set executable extension based on target OS
|
||||
EXE_EXT := $(if $(filter windows,$(OS)),.exe,)
|
||||
@@ -17,7 +51,6 @@ clean:
|
||||
lint:
|
||||
golangci-lint run
|
||||
|
||||
test: export GOEXPERIMENT=synctest
|
||||
test:
|
||||
go test -tags=testing ./...
|
||||
|
||||
@@ -54,7 +87,7 @@ fetch-smartctl-conditional:
|
||||
|
||||
# Update build-agent to include conditional .NET build
|
||||
build-agent: tidy build-dotnet-conditional fetch-smartctl-conditional
|
||||
GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel-agent_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/agent
|
||||
GOOS=$(OS) GOARCH=$(ARCH) go build $(AGENT_GO_TAGS) -o ./build/beszel-agent_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/agent
|
||||
|
||||
build-hub: tidy $(if $(filter false,$(SKIP_WEB)),build-web-ui)
|
||||
GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/hub
|
||||
@@ -90,9 +123,9 @@ dev-hub:
|
||||
|
||||
dev-agent:
|
||||
@if command -v entr >/dev/null 2>&1; then \
|
||||
find ./internal/cmd/agent/*.go ./agent/*.go | entr -r go run github.com/henrygd/beszel/internal/cmd/agent; \
|
||||
find ./internal/cmd/agent/*.go ./agent/*.go | entr -r go run $(AGENT_GO_TAGS) github.com/henrygd/beszel/internal/cmd/agent; \
|
||||
else \
|
||||
go run github.com/henrygd/beszel/internal/cmd/agent; \
|
||||
go run $(AGENT_GO_TAGS) github.com/henrygd/beszel/internal/cmd/agent; \
|
||||
fi
|
||||
|
||||
build-dotnet:
|
||||
|
||||
@@ -5,11 +5,7 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -17,9 +13,9 @@ import (
|
||||
"github.com/gliderlabs/ssh"
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/deltatracker"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/shirou/gopsutil/v4/host"
|
||||
gossh "golang.org/x/crypto/ssh"
|
||||
)
|
||||
|
||||
@@ -65,18 +61,18 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
agent.netIoStats = make(map[uint16]system.NetIoStats)
|
||||
agent.netInterfaceDeltaTrackers = make(map[uint16]*deltatracker.DeltaTracker[string, uint64])
|
||||
|
||||
agent.dataDir, err = getDataDir(dataDir...)
|
||||
agent.dataDir, err = GetDataDir(dataDir...)
|
||||
if err != nil {
|
||||
slog.Warn("Data directory not found")
|
||||
} else {
|
||||
slog.Info("Data directory", "path", agent.dataDir)
|
||||
}
|
||||
|
||||
agent.memCalc, _ = GetEnv("MEM_CALC")
|
||||
agent.memCalc, _ = utils.GetEnv("MEM_CALC")
|
||||
agent.sensorConfig = agent.newSensorConfig()
|
||||
|
||||
// Parse disk usage cache duration (e.g., "15m", "1h") to avoid waking sleeping disks
|
||||
if diskUsageCache, exists := GetEnv("DISK_USAGE_CACHE"); exists {
|
||||
if diskUsageCache, exists := utils.GetEnv("DISK_USAGE_CACHE"); exists {
|
||||
if duration, err := time.ParseDuration(diskUsageCache); err == nil {
|
||||
agent.diskUsageCacheDuration = duration
|
||||
slog.Info("DISK_USAGE_CACHE", "duration", duration)
|
||||
@@ -86,7 +82,7 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
}
|
||||
|
||||
// Set up slog with a log level determined by the LOG_LEVEL env var
|
||||
if logLevelStr, exists := GetEnv("LOG_LEVEL"); exists {
|
||||
if logLevelStr, exists := utils.GetEnv("LOG_LEVEL"); exists {
|
||||
switch strings.ToLower(logLevelStr) {
|
||||
case "debug":
|
||||
agent.debug = true
|
||||
@@ -107,7 +103,7 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
agent.refreshSystemDetails()
|
||||
|
||||
// SMART_INTERVAL env var to update smart data at this interval
|
||||
if smartIntervalEnv, exists := GetEnv("SMART_INTERVAL"); exists {
|
||||
if smartIntervalEnv, exists := utils.GetEnv("SMART_INTERVAL"); exists {
|
||||
if duration, err := time.ParseDuration(smartIntervalEnv); err == nil && duration > 0 {
|
||||
agent.systemDetails.SmartInterval = duration
|
||||
slog.Info("SMART_INTERVAL", "duration", duration)
|
||||
@@ -152,15 +148,6 @@ func NewAgent(dataDir ...string) (agent *Agent, err error) {
|
||||
return agent, nil
|
||||
}
|
||||
|
||||
// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
|
||||
func GetEnv(key string) (value string, exists bool) {
|
||||
if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
|
||||
return value, exists
|
||||
}
|
||||
// Fallback to the old unprefixed key
|
||||
return os.LookupEnv(key)
|
||||
}
|
||||
|
||||
func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedData {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
@@ -217,7 +204,7 @@ func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedD
|
||||
data.Stats.ExtraFs[key] = stats
|
||||
// Add percentages to Info struct for dashboard
|
||||
if stats.DiskTotal > 0 {
|
||||
pct := twoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
|
||||
pct := utils.TwoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
|
||||
data.Info.ExtraFsPct[key] = pct
|
||||
}
|
||||
}
|
||||
@@ -228,38 +215,12 @@ func (a *Agent) gatherStats(options common.DataRequestOptions) *system.CombinedD
|
||||
return data
|
||||
}
|
||||
|
||||
// StartAgent initializes and starts the agent with optional WebSocket connection
|
||||
// Start initializes and starts the agent with optional WebSocket connection
|
||||
func (a *Agent) Start(serverOptions ServerOptions) error {
|
||||
a.keys = serverOptions.Keys
|
||||
return a.connectionManager.Start(serverOptions)
|
||||
}
|
||||
|
||||
func (a *Agent) getFingerprint() string {
|
||||
// first look for a fingerprint in the data directory
|
||||
if a.dataDir != "" {
|
||||
if fp, err := os.ReadFile(filepath.Join(a.dataDir, "fingerprint")); err == nil {
|
||||
return string(fp)
|
||||
}
|
||||
}
|
||||
|
||||
// if no fingerprint is found, generate one
|
||||
fingerprint, err := host.HostID()
|
||||
// we ignore a commonly known "product_uuid" known not to be unique
|
||||
if err != nil || fingerprint == "" || fingerprint == "03000200-0400-0500-0006-000700080009" {
|
||||
fingerprint = a.systemDetails.Hostname + a.systemDetails.CpuModel
|
||||
}
|
||||
|
||||
// hash fingerprint
|
||||
sum := sha256.Sum256([]byte(fingerprint))
|
||||
fingerprint = hex.EncodeToString(sum[:24])
|
||||
|
||||
// save fingerprint to data directory
|
||||
if a.dataDir != "" {
|
||||
err = os.WriteFile(filepath.Join(a.dataDir, "fingerprint"), []byte(fingerprint), 0644)
|
||||
if err != nil {
|
||||
slog.Warn("Failed to save fingerprint", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
return fingerprint
|
||||
return GetFingerprint(a.dataDir, a.systemDetails.Hostname, a.systemDetails.CpuModel)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ func GetBatteryStats() (batteryPercent uint8, batteryState uint8, err error) {
|
||||
continue
|
||||
}
|
||||
totalCapacity += bat.Full
|
||||
totalCharge += bat.Current
|
||||
totalCharge += min(bat.Current, bat.Full)
|
||||
if bat.State.Raw >= 0 {
|
||||
batteryState = uint8(bat.State.Raw)
|
||||
}
|
||||
|
||||
@@ -14,10 +14,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/lxzan/gws"
|
||||
@@ -46,7 +44,7 @@ type WebSocketClient struct {
|
||||
// newWebSocketClient creates a new WebSocket client for the given agent.
|
||||
// It reads configuration from environment variables and validates the hub URL.
|
||||
func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
|
||||
hubURLStr, exists := GetEnv("HUB_URL")
|
||||
hubURLStr, exists := utils.GetEnv("HUB_URL")
|
||||
if !exists {
|
||||
return nil, errors.New("HUB_URL environment variable not set")
|
||||
}
|
||||
@@ -75,12 +73,12 @@ func newWebSocketClient(agent *Agent) (client *WebSocketClient, err error) {
|
||||
// If neither is set, it returns an error.
|
||||
func getToken() (string, error) {
|
||||
// get token from env var
|
||||
token, _ := GetEnv("TOKEN")
|
||||
token, _ := utils.GetEnv("TOKEN")
|
||||
if token != "" {
|
||||
return token, nil
|
||||
}
|
||||
// get token from file
|
||||
tokenFile, _ := GetEnv("TOKEN_FILE")
|
||||
tokenFile, _ := utils.GetEnv("TOKEN_FILE")
|
||||
if tokenFile == "" {
|
||||
return "", errors.New("must set TOKEN or TOKEN_FILE")
|
||||
}
|
||||
@@ -200,7 +198,7 @@ func (client *WebSocketClient) handleAuthChallenge(msg *common.HubRequest[cbor.R
|
||||
}
|
||||
|
||||
if authRequest.NeedSysInfo {
|
||||
response.Name, _ = GetEnv("SYSTEM_NAME")
|
||||
response.Name, _ = utils.GetEnv("SYSTEM_NAME")
|
||||
response.Hostname = client.agent.systemDetails.Hostname
|
||||
serverAddr := client.agent.connectionManager.serverOptions.Addr
|
||||
_, response.Port, _ = net.SplitHostPort(serverAddr)
|
||||
@@ -259,40 +257,16 @@ func (client *WebSocketClient) sendMessage(data any) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// sendResponse sends a response with optional request ID for the new protocol
|
||||
// sendResponse sends a response with optional request ID.
|
||||
// For ID-based requests, we must populate legacy typed fields for backward
|
||||
// compatibility with older hubs (<= 0.17) that don't read the generic Data field.
|
||||
func (client *WebSocketClient) sendResponse(data any, requestID *uint32) error {
|
||||
if requestID != nil {
|
||||
// New format with ID - use typed fields
|
||||
response := common.AgentResponse{
|
||||
Id: requestID,
|
||||
}
|
||||
|
||||
// Set the appropriate typed field based on data type
|
||||
switch v := data.(type) {
|
||||
case *system.CombinedData:
|
||||
response.SystemData = v
|
||||
case *common.FingerprintResponse:
|
||||
response.Fingerprint = v
|
||||
case string:
|
||||
response.String = &v
|
||||
case map[string]smart.SmartData:
|
||||
response.SmartData = v
|
||||
case systemd.ServiceDetails:
|
||||
response.ServiceInfo = v
|
||||
// case []byte:
|
||||
// response.RawBytes = v
|
||||
// case string:
|
||||
// response.RawBytes = []byte(v)
|
||||
default:
|
||||
// For any other type, convert to error
|
||||
response.Error = fmt.Sprintf("unsupported response type: %T", data)
|
||||
}
|
||||
|
||||
response := newAgentResponse(data, requestID)
|
||||
return client.sendMessage(response)
|
||||
} else {
|
||||
}
|
||||
// Legacy format - send data directly
|
||||
return client.sendMessage(data)
|
||||
}
|
||||
}
|
||||
|
||||
// getUserAgent returns one of two User-Agent strings based on current time.
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -91,8 +91,8 @@ func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
|
||||
c.eventChan = make(chan ConnectionEvent, 1)
|
||||
|
||||
// signal handling for shutdown
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
|
||||
sigCtx, stopSignals := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
defer stopSignals()
|
||||
|
||||
c.startWsTicker()
|
||||
c.connect()
|
||||
@@ -109,8 +109,8 @@ func (c *ConnectionManager) Start(serverOptions ServerOptions) error {
|
||||
_ = c.startWebSocketConnection()
|
||||
case <-healthTicker:
|
||||
_ = health.Update()
|
||||
case <-sigChan:
|
||||
slog.Info("Shutting down")
|
||||
case <-sigCtx.Done():
|
||||
slog.Info("Shutting down", "cause", context.Cause(sigCtx))
|
||||
_ = c.agent.StopServer()
|
||||
c.closeWebSocket()
|
||||
return health.CleanUp()
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
|
||||
@@ -14,10 +14,10 @@ var lastPerCoreCpuTimes = make(map[uint16][]cpu.TimesStat)
|
||||
// init initializes the CPU monitoring by storing the initial CPU times
|
||||
// for the default 60-second cache interval.
|
||||
func init() {
|
||||
if times, err := cpu.Times(false); err == nil {
|
||||
if times, err := cpu.Times(false); err == nil && len(times) > 0 {
|
||||
lastCpuTimes[60000] = times[0]
|
||||
}
|
||||
if perCoreTimes, err := cpu.Times(true); err == nil {
|
||||
if perCoreTimes, err := cpu.Times(true); err == nil && len(perCoreTimes) > 0 {
|
||||
lastPerCoreCpuTimes[60000] = perCoreTimes
|
||||
}
|
||||
}
|
||||
@@ -89,10 +89,7 @@ func getPerCoreCpuUsage(cacheTimeMs uint16) (system.Uint8Slice, error) {
|
||||
lastTimes := lastPerCoreCpuTimes[cacheTimeMs]
|
||||
|
||||
// Limit to the number of cores available in both samples
|
||||
length := len(perCoreTimes)
|
||||
if len(lastTimes) < length {
|
||||
length = len(lastTimes)
|
||||
}
|
||||
length := min(len(lastTimes), len(perCoreTimes))
|
||||
|
||||
usage := make([]uint8, length)
|
||||
for i := 0; i < length; i++ {
|
||||
|
||||
@@ -6,17 +6,19 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
)
|
||||
|
||||
// getDataDir returns the path to the data directory for the agent and an error
|
||||
// GetDataDir returns the path to the data directory for the agent and an error
|
||||
// if the directory is not valid. Attempts to find the optimal data directory if
|
||||
// no data directories are provided.
|
||||
func getDataDir(dataDirs ...string) (string, error) {
|
||||
func GetDataDir(dataDirs ...string) (string, error) {
|
||||
if len(dataDirs) > 0 {
|
||||
return testDataDirs(dataDirs)
|
||||
}
|
||||
|
||||
dataDir, _ := GetEnv("DATA_DIR")
|
||||
dataDir, _ := utils.GetEnv("DATA_DIR")
|
||||
if dataDir != "" {
|
||||
dataDirs = append(dataDirs, dataDir)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
@@ -17,7 +16,7 @@ func TestGetDataDir(t *testing.T) {
|
||||
// Test with explicit dataDir parameter
|
||||
t.Run("explicit data dir", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
result, err := getDataDir(tempDir)
|
||||
result, err := GetDataDir(tempDir)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tempDir, result)
|
||||
})
|
||||
@@ -26,7 +25,7 @@ func TestGetDataDir(t *testing.T) {
|
||||
t.Run("explicit data dir - create new", func(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
newDir := filepath.Join(tempDir, "new-data-dir")
|
||||
result, err := getDataDir(newDir)
|
||||
result, err := GetDataDir(newDir)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, newDir, result)
|
||||
|
||||
@@ -52,7 +51,7 @@ func TestGetDataDir(t *testing.T) {
|
||||
|
||||
os.Setenv("BESZEL_AGENT_DATA_DIR", tempDir)
|
||||
|
||||
result, err := getDataDir()
|
||||
result, err := GetDataDir()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tempDir, result)
|
||||
})
|
||||
@@ -60,7 +59,7 @@ func TestGetDataDir(t *testing.T) {
|
||||
// Test with invalid explicit dataDir
|
||||
t.Run("invalid explicit data dir", func(t *testing.T) {
|
||||
invalidPath := "/invalid/path/that/cannot/be/created"
|
||||
_, err := getDataDir(invalidPath)
|
||||
_, err := GetDataDir(invalidPath)
|
||||
assert.Error(t, err)
|
||||
})
|
||||
|
||||
@@ -79,7 +78,7 @@ func TestGetDataDir(t *testing.T) {
|
||||
|
||||
// This will try platform-specific defaults, which may or may not work
|
||||
// We're mainly testing that it doesn't panic and returns some result
|
||||
result, err := getDataDir()
|
||||
result, err := GetDataDir()
|
||||
// We don't assert success/failure here since it depends on system permissions
|
||||
// Just verify we get a string result if no error
|
||||
if err == nil {
|
||||
|
||||
556
agent/disk.go
556
agent/disk.go
@@ -8,11 +8,31 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/disk"
|
||||
)
|
||||
|
||||
// fsRegistrationContext holds the shared lookup state needed to resolve a
|
||||
// filesystem into the tracked fsStats key and metadata.
|
||||
type fsRegistrationContext struct {
|
||||
filesystem string // value of optional FILESYSTEM env var
|
||||
isWindows bool
|
||||
efPath string // path to extra filesystems (default "/extra-filesystems")
|
||||
diskIoCounters map[string]disk.IOCountersStat
|
||||
}
|
||||
|
||||
// diskDiscovery groups the transient state for a single initializeDiskInfo run so
|
||||
// helper methods can share the same partitions, mount paths, and lookup functions
|
||||
type diskDiscovery struct {
|
||||
agent *Agent
|
||||
rootMountPoint string
|
||||
partitions []disk.PartitionStat
|
||||
usageFn func(string) (*disk.UsageStat, error)
|
||||
ctx fsRegistrationContext
|
||||
}
|
||||
|
||||
// parseFilesystemEntry parses a filesystem entry in the format "device__customname"
|
||||
// Returns the device/filesystem part and the custom name part
|
||||
func parseFilesystemEntry(entry string) (device, customName string) {
|
||||
@@ -26,10 +46,230 @@ func parseFilesystemEntry(entry string) (device, customName string) {
|
||||
return device, customName
|
||||
}
|
||||
|
||||
// extraFilesystemPartitionInfo derives the I/O device and optional display name
|
||||
// for a mounted /extra-filesystems partition. Prefer the partition device reported
|
||||
// by the system and only use the folder name for custom naming metadata.
|
||||
func extraFilesystemPartitionInfo(p disk.PartitionStat) (device, customName string) {
|
||||
device = strings.TrimSpace(p.Device)
|
||||
folderDevice, customName := parseFilesystemEntry(filepath.Base(p.Mountpoint))
|
||||
if device == "" {
|
||||
device = folderDevice
|
||||
}
|
||||
return device, customName
|
||||
}
|
||||
|
||||
func isDockerSpecialMountpoint(mountpoint string) bool {
|
||||
switch mountpoint {
|
||||
case "/etc/hosts", "/etc/resolv.conf", "/etc/hostname":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// registerFilesystemStats resolves the tracked key and stats payload for a
|
||||
// filesystem before it is inserted into fsStats.
|
||||
func registerFilesystemStats(existing map[string]*system.FsStats, device, mountpoint string, root bool, customName string, ctx fsRegistrationContext) (string, *system.FsStats, bool) {
|
||||
key := device
|
||||
if !ctx.isWindows {
|
||||
key = filepath.Base(device)
|
||||
}
|
||||
|
||||
if root {
|
||||
// Try to map root device to a diskIoCounters entry. First checks for an
|
||||
// exact key match, then uses findIoDevice for normalized / prefix-based
|
||||
// matching (e.g. nda0p2 -> nda0), and finally falls back to FILESYSTEM.
|
||||
if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
|
||||
if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
|
||||
key = matchedKey
|
||||
} else if ctx.filesystem != "" {
|
||||
if matchedKey, match := findIoDevice(ctx.filesystem, ctx.diskIoCounters); match {
|
||||
key = matchedKey
|
||||
}
|
||||
}
|
||||
if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
|
||||
slog.Warn("Root I/O unmapped; set FILESYSTEM", "device", device, "mountpoint", mountpoint)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if non-root has diskstats and prefer the folder device for
|
||||
// /extra-filesystems mounts when the discovered partition device is a
|
||||
// mapper path (e.g. luks UUID) that obscures the underlying block device.
|
||||
if _, ioMatch := ctx.diskIoCounters[key]; !ioMatch {
|
||||
if strings.HasPrefix(mountpoint, ctx.efPath) {
|
||||
folderDevice, _ := parseFilesystemEntry(filepath.Base(mountpoint))
|
||||
if folderDevice != "" {
|
||||
if matchedKey, match := findIoDevice(folderDevice, ctx.diskIoCounters); match {
|
||||
key = matchedKey
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, ioMatch = ctx.diskIoCounters[key]; !ioMatch {
|
||||
if matchedKey, match := findIoDevice(key, ctx.diskIoCounters); match {
|
||||
key = matchedKey
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, exists := existing[key]; exists {
|
||||
return "", nil, false
|
||||
}
|
||||
|
||||
fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
|
||||
if customName != "" {
|
||||
fsStats.Name = customName
|
||||
}
|
||||
return key, fsStats, true
|
||||
}
|
||||
|
||||
// addFsStat inserts a discovered filesystem if it resolves to a new tracking
|
||||
// key. The key selection itself lives in buildFsStatRegistration so that logic
|
||||
// can stay directly unit-tested.
|
||||
func (d *diskDiscovery) addFsStat(device, mountpoint string, root bool, customName string) {
|
||||
key, fsStats, ok := registerFilesystemStats(d.agent.fsStats, device, mountpoint, root, customName, d.ctx)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
d.agent.fsStats[key] = fsStats
|
||||
name := key
|
||||
if customName != "" {
|
||||
name = customName
|
||||
}
|
||||
slog.Info("Detected disk", "name", name, "device", device, "mount", mountpoint, "io", key, "root", root)
|
||||
}
|
||||
|
||||
// addConfiguredRootFs resolves FILESYSTEM against partitions first, then falls
|
||||
// back to direct diskstats matching for setups like ZFS where partitions do not
|
||||
// expose the physical device name.
|
||||
func (d *diskDiscovery) addConfiguredRootFs() bool {
|
||||
if d.ctx.filesystem == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, p := range d.partitions {
|
||||
if filesystemMatchesPartitionSetting(d.ctx.filesystem, p) {
|
||||
d.addFsStat(p.Device, p.Mountpoint, true, "")
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// FILESYSTEM may name a physical disk absent from partitions (e.g. ZFS lists
|
||||
// dataset paths like zroot/ROOT/default, not block devices).
|
||||
if ioKey, match := findIoDevice(d.ctx.filesystem, d.ctx.diskIoCounters); match {
|
||||
d.agent.fsStats[ioKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
|
||||
return true
|
||||
}
|
||||
|
||||
slog.Warn("Partition details not found", "filesystem", d.ctx.filesystem)
|
||||
return false
|
||||
}
|
||||
|
||||
func isRootFallbackPartition(p disk.PartitionStat, rootMountPoint string) bool {
|
||||
return p.Mountpoint == rootMountPoint ||
|
||||
(isDockerSpecialMountpoint(p.Mountpoint) && strings.HasPrefix(p.Device, "/dev"))
|
||||
}
|
||||
|
||||
// addPartitionRootFs handles the non-configured root fallback path when a
|
||||
// partition looks like the active root mount but still needs translating to an
|
||||
// I/O device key.
|
||||
func (d *diskDiscovery) addPartitionRootFs(device, mountpoint string) bool {
|
||||
fs, match := findIoDevice(filepath.Base(device), d.ctx.diskIoCounters)
|
||||
if !match {
|
||||
return false
|
||||
}
|
||||
// The resolved I/O device is already known here, so use it directly to avoid
|
||||
// a second fallback search inside buildFsStatRegistration.
|
||||
d.addFsStat(fs, mountpoint, true, "")
|
||||
return true
|
||||
}
|
||||
|
||||
// addLastResortRootFs is only used when neither FILESYSTEM nor partition-based
|
||||
// heuristics can identify root, so it picks the busiest I/O device as a final
|
||||
// fallback and preserves the root mountpoint for usage collection.
|
||||
func (d *diskDiscovery) addLastResortRootFs() {
|
||||
rootKey := mostActiveIoDevice(d.ctx.diskIoCounters)
|
||||
if rootKey != "" {
|
||||
slog.Warn("Using most active device for root I/O; set FILESYSTEM to override", "device", rootKey)
|
||||
} else {
|
||||
rootKey = filepath.Base(d.rootMountPoint)
|
||||
if _, exists := d.agent.fsStats[rootKey]; exists {
|
||||
rootKey = "root"
|
||||
}
|
||||
slog.Warn("Root I/O device not detected; set FILESYSTEM to override")
|
||||
}
|
||||
d.agent.fsStats[rootKey] = &system.FsStats{Root: true, Mountpoint: d.rootMountPoint}
|
||||
}
|
||||
|
||||
// findPartitionByFilesystemSetting matches an EXTRA_FILESYSTEMS entry against a
|
||||
// discovered partition either by mountpoint or by device suffix.
|
||||
func findPartitionByFilesystemSetting(filesystem string, partitions []disk.PartitionStat) (disk.PartitionStat, bool) {
|
||||
for _, p := range partitions {
|
||||
if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
|
||||
return p, true
|
||||
}
|
||||
}
|
||||
return disk.PartitionStat{}, false
|
||||
}
|
||||
|
||||
// addConfiguredExtraFsEntry resolves one EXTRA_FILESYSTEMS entry, preferring a
|
||||
// discovered partition and falling back to any path that disk.Usage accepts.
|
||||
func (d *diskDiscovery) addConfiguredExtraFsEntry(filesystem, customName string) {
|
||||
if p, found := findPartitionByFilesystemSetting(filesystem, d.partitions); found {
|
||||
d.addFsStat(p.Device, p.Mountpoint, false, customName)
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := d.usageFn(filesystem); err == nil {
|
||||
d.addFsStat(filepath.Base(filesystem), filesystem, false, customName)
|
||||
return
|
||||
} else {
|
||||
slog.Error("Invalid filesystem", "name", filesystem, "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// addConfiguredExtraFilesystems parses and registers the comma-separated
|
||||
// EXTRA_FILESYSTEMS env var entries.
|
||||
func (d *diskDiscovery) addConfiguredExtraFilesystems(extraFilesystems string) {
|
||||
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
|
||||
filesystem, customName := parseFilesystemEntry(fsEntry)
|
||||
d.addConfiguredExtraFsEntry(filesystem, customName)
|
||||
}
|
||||
}
|
||||
|
||||
// addPartitionExtraFs registers partitions mounted under /extra-filesystems so
|
||||
// their display names can come from the folder name while their I/O keys still
|
||||
// prefer the underlying partition device.
|
||||
func (d *diskDiscovery) addPartitionExtraFs(p disk.PartitionStat) {
|
||||
if !strings.HasPrefix(p.Mountpoint, d.ctx.efPath) {
|
||||
return
|
||||
}
|
||||
device, customName := extraFilesystemPartitionInfo(p)
|
||||
d.addFsStat(device, p.Mountpoint, false, customName)
|
||||
}
|
||||
|
||||
// addExtraFilesystemFolders handles bare directories under /extra-filesystems
|
||||
// that may not appear in partition discovery, while skipping mountpoints that
|
||||
// were already registered from higher-fidelity sources.
|
||||
func (d *diskDiscovery) addExtraFilesystemFolders(folderNames []string) {
|
||||
existingMountpoints := make(map[string]bool, len(d.agent.fsStats))
|
||||
for _, stats := range d.agent.fsStats {
|
||||
existingMountpoints[stats.Mountpoint] = true
|
||||
}
|
||||
|
||||
for _, folderName := range folderNames {
|
||||
mountpoint := filepath.Join(d.ctx.efPath, folderName)
|
||||
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
|
||||
if existingMountpoints[mountpoint] {
|
||||
continue
|
||||
}
|
||||
device, customName := parseFilesystemEntry(folderName)
|
||||
d.addFsStat(device, mountpoint, false, customName)
|
||||
}
|
||||
}
|
||||
|
||||
// Sets up the filesystems to monitor for disk usage and I/O.
|
||||
func (a *Agent) initializeDiskInfo() {
|
||||
filesystem, _ := GetEnv("FILESYSTEM")
|
||||
efPath := "/extra-filesystems"
|
||||
filesystem, _ := utils.GetEnv("FILESYSTEM")
|
||||
hasRoot := false
|
||||
isWindows := runtime.GOOS == "windows"
|
||||
|
||||
@@ -46,167 +286,223 @@ func (a *Agent) initializeDiskInfo() {
|
||||
}
|
||||
}
|
||||
|
||||
// ioContext := context.WithValue(a.sensorsContext,
|
||||
// common.EnvKey, common.EnvMap{common.HostProcEnvKey: "/tmp/testproc"},
|
||||
// )
|
||||
// diskIoCounters, err := disk.IOCountersWithContext(ioContext)
|
||||
|
||||
diskIoCounters, err := disk.IOCounters()
|
||||
if err != nil {
|
||||
slog.Error("Error getting diskstats", "err", err)
|
||||
}
|
||||
slog.Debug("Disk I/O", "diskstats", diskIoCounters)
|
||||
|
||||
// Helper function to add a filesystem to fsStats if it doesn't exist
|
||||
addFsStat := func(device, mountpoint string, root bool, customName ...string) {
|
||||
var key string
|
||||
if isWindows {
|
||||
key = device
|
||||
} else {
|
||||
key = filepath.Base(device)
|
||||
}
|
||||
var ioMatch bool
|
||||
if _, exists := a.fsStats[key]; !exists {
|
||||
if root {
|
||||
slog.Info("Detected root device", "name", key)
|
||||
// Check if root device is in /proc/diskstats, use fallback if not
|
||||
if _, ioMatch = diskIoCounters[key]; !ioMatch {
|
||||
key, ioMatch = findIoDevice(filesystem, diskIoCounters, a.fsStats)
|
||||
if !ioMatch {
|
||||
slog.Info("Using I/O fallback", "device", device, "mountpoint", mountpoint, "fallback", key)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check if non-root has diskstats and fall back to folder name if not
|
||||
// Scenario: device is encrypted and named luks-2bcb02be-999d-4417-8d18-5c61e660fb6e - not in /proc/diskstats.
|
||||
// However, the device can be specified by mounting folder from luks device at /extra-filesystems/sda1
|
||||
if _, ioMatch = diskIoCounters[key]; !ioMatch {
|
||||
efBase := filepath.Base(mountpoint)
|
||||
if _, ioMatch = diskIoCounters[efBase]; ioMatch {
|
||||
key = efBase
|
||||
}
|
||||
}
|
||||
}
|
||||
fsStats := &system.FsStats{Root: root, Mountpoint: mountpoint}
|
||||
if len(customName) > 0 && customName[0] != "" {
|
||||
fsStats.Name = customName[0]
|
||||
}
|
||||
a.fsStats[key] = fsStats
|
||||
}
|
||||
ctx := fsRegistrationContext{
|
||||
filesystem: filesystem,
|
||||
isWindows: isWindows,
|
||||
diskIoCounters: diskIoCounters,
|
||||
efPath: "/extra-filesystems",
|
||||
}
|
||||
|
||||
// Get the appropriate root mount point for this system
|
||||
rootMountPoint := a.getRootMountPoint()
|
||||
discovery := diskDiscovery{
|
||||
agent: a,
|
||||
rootMountPoint: a.getRootMountPoint(),
|
||||
partitions: partitions,
|
||||
usageFn: disk.Usage,
|
||||
ctx: ctx,
|
||||
}
|
||||
|
||||
// Use FILESYSTEM env var to find root filesystem
|
||||
if filesystem != "" {
|
||||
for _, p := range partitions {
|
||||
if strings.HasSuffix(p.Device, filesystem) || p.Mountpoint == filesystem {
|
||||
addFsStat(p.Device, p.Mountpoint, true)
|
||||
hasRoot = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasRoot {
|
||||
slog.Warn("Partition details not found", "filesystem", filesystem)
|
||||
}
|
||||
}
|
||||
hasRoot = discovery.addConfiguredRootFs()
|
||||
|
||||
// Add EXTRA_FILESYSTEMS env var values to fsStats
|
||||
if extraFilesystems, exists := GetEnv("EXTRA_FILESYSTEMS"); exists {
|
||||
for _, fsEntry := range strings.Split(extraFilesystems, ",") {
|
||||
// Parse custom name from format: device__customname
|
||||
fs, customName := parseFilesystemEntry(fsEntry)
|
||||
|
||||
found := false
|
||||
for _, p := range partitions {
|
||||
if strings.HasSuffix(p.Device, fs) || p.Mountpoint == fs {
|
||||
addFsStat(p.Device, p.Mountpoint, false, customName)
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
// if not in partitions, test if we can get disk usage
|
||||
if !found {
|
||||
if _, err := disk.Usage(fs); err == nil {
|
||||
addFsStat(filepath.Base(fs), fs, false, customName)
|
||||
} else {
|
||||
slog.Error("Invalid filesystem", "name", fs, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if extraFilesystems, exists := utils.GetEnv("EXTRA_FILESYSTEMS"); exists {
|
||||
discovery.addConfiguredExtraFilesystems(extraFilesystems)
|
||||
}
|
||||
|
||||
// Process partitions for various mount points
|
||||
for _, p := range partitions {
|
||||
// fmt.Println(p.Device, p.Mountpoint)
|
||||
// Binary root fallback or docker root fallback
|
||||
if !hasRoot && (p.Mountpoint == rootMountPoint || (p.Mountpoint == "/etc/hosts" && strings.HasPrefix(p.Device, "/dev"))) {
|
||||
fs, match := findIoDevice(filepath.Base(p.Device), diskIoCounters, a.fsStats)
|
||||
if match {
|
||||
addFsStat(fs, p.Mountpoint, true)
|
||||
hasRoot = true
|
||||
}
|
||||
}
|
||||
|
||||
// Check if device is in /extra-filesystems
|
||||
if strings.HasPrefix(p.Mountpoint, efPath) {
|
||||
device, customName := parseFilesystemEntry(p.Mountpoint)
|
||||
addFsStat(device, p.Mountpoint, false, customName)
|
||||
if !hasRoot && isRootFallbackPartition(p, discovery.rootMountPoint) {
|
||||
hasRoot = discovery.addPartitionRootFs(p.Device, p.Mountpoint)
|
||||
}
|
||||
discovery.addPartitionExtraFs(p)
|
||||
}
|
||||
|
||||
// Check all folders in /extra-filesystems and add them if not already present
|
||||
if folders, err := os.ReadDir(efPath); err == nil {
|
||||
existingMountpoints := make(map[string]bool)
|
||||
for _, stats := range a.fsStats {
|
||||
existingMountpoints[stats.Mountpoint] = true
|
||||
}
|
||||
if folders, err := os.ReadDir(discovery.ctx.efPath); err == nil {
|
||||
folderNames := make([]string, 0, len(folders))
|
||||
for _, folder := range folders {
|
||||
if folder.IsDir() {
|
||||
mountpoint := filepath.Join(efPath, folder.Name())
|
||||
slog.Debug("/extra-filesystems", "mountpoint", mountpoint)
|
||||
if !existingMountpoints[mountpoint] {
|
||||
device, customName := parseFilesystemEntry(folder.Name())
|
||||
addFsStat(device, mountpoint, false, customName)
|
||||
}
|
||||
folderNames = append(folderNames, folder.Name())
|
||||
}
|
||||
}
|
||||
discovery.addExtraFilesystemFolders(folderNames)
|
||||
}
|
||||
|
||||
// If no root filesystem set, use fallback
|
||||
// If no root filesystem set, try the most active I/O device as a last
|
||||
// resort (e.g. ZFS where dataset names are unrelated to disk names).
|
||||
if !hasRoot {
|
||||
rootDevice, _ := findIoDevice(filepath.Base(filesystem), diskIoCounters, a.fsStats)
|
||||
slog.Info("Root disk", "mountpoint", rootMountPoint, "io", rootDevice)
|
||||
a.fsStats[rootDevice] = &system.FsStats{Root: true, Mountpoint: rootMountPoint}
|
||||
discovery.addLastResortRootFs()
|
||||
}
|
||||
|
||||
a.pruneDuplicateRootExtraFilesystems()
|
||||
a.initializeDiskIoStats(diskIoCounters)
|
||||
}
|
||||
|
||||
// Returns matching device from /proc/diskstats,
|
||||
// or the device with the most reads if no match is found.
|
||||
// bool is true if a match was found.
|
||||
func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat, fsStats map[string]*system.FsStats) (string, bool) {
|
||||
var maxReadBytes uint64
|
||||
maxReadDevice := "/"
|
||||
// Removes extra filesystems that mirror root usage (https://github.com/henrygd/beszel/issues/1428).
|
||||
func (a *Agent) pruneDuplicateRootExtraFilesystems() {
|
||||
var rootMountpoint string
|
||||
for _, stats := range a.fsStats {
|
||||
if stats != nil && stats.Root {
|
||||
rootMountpoint = stats.Mountpoint
|
||||
break
|
||||
}
|
||||
}
|
||||
if rootMountpoint == "" {
|
||||
return
|
||||
}
|
||||
rootUsage, err := disk.Usage(rootMountpoint)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
for name, stats := range a.fsStats {
|
||||
if stats == nil || stats.Root {
|
||||
continue
|
||||
}
|
||||
extraUsage, err := disk.Usage(stats.Mountpoint)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if hasSameDiskUsage(rootUsage, extraUsage) {
|
||||
slog.Info("Ignoring duplicate FS", "name", name, "mount", stats.Mountpoint)
|
||||
delete(a.fsStats, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// hasSameDiskUsage compares root/extra usage with a small byte tolerance.
|
||||
func hasSameDiskUsage(a, b *disk.UsageStat) bool {
|
||||
if a == nil || b == nil || a.Total == 0 || b.Total == 0 {
|
||||
return false
|
||||
}
|
||||
// Allow minor drift between sequential disk usage calls.
|
||||
const toleranceBytes uint64 = 16 * 1024 * 1024
|
||||
return withinUsageTolerance(a.Total, b.Total, toleranceBytes) &&
|
||||
withinUsageTolerance(a.Used, b.Used, toleranceBytes)
|
||||
}
|
||||
|
||||
// withinUsageTolerance reports whether two byte values differ by at most tolerance.
|
||||
func withinUsageTolerance(a, b, tolerance uint64) bool {
|
||||
if a >= b {
|
||||
return a-b <= tolerance
|
||||
}
|
||||
return b-a <= tolerance
|
||||
}
|
||||
|
||||
type ioMatchCandidate struct {
|
||||
name string
|
||||
bytes uint64
|
||||
ops uint64
|
||||
}
|
||||
|
||||
// findIoDevice prefers exact device/label matches, then falls back to a
|
||||
// prefix-related candidate with the highest recent activity.
|
||||
func findIoDevice(filesystem string, diskIoCounters map[string]disk.IOCountersStat) (string, bool) {
|
||||
filesystem = normalizeDeviceName(filesystem)
|
||||
if filesystem == "" {
|
||||
return "", false
|
||||
}
|
||||
|
||||
candidates := []ioMatchCandidate{}
|
||||
|
||||
for _, d := range diskIoCounters {
|
||||
if d.Name == filesystem || (d.Label != "" && d.Label == filesystem) {
|
||||
if normalizeDeviceName(d.Name) == filesystem || (d.Label != "" && normalizeDeviceName(d.Label) == filesystem) {
|
||||
return d.Name, true
|
||||
}
|
||||
if d.ReadBytes > maxReadBytes {
|
||||
// don't use if device already exists in fsStats
|
||||
if _, exists := fsStats[d.Name]; !exists {
|
||||
maxReadBytes = d.ReadBytes
|
||||
maxReadDevice = d.Name
|
||||
if prefixRelated(normalizeDeviceName(d.Name), filesystem) ||
|
||||
(d.Label != "" && prefixRelated(normalizeDeviceName(d.Label), filesystem)) {
|
||||
candidates = append(candidates, ioMatchCandidate{
|
||||
name: d.Name,
|
||||
bytes: d.ReadBytes + d.WriteBytes,
|
||||
ops: d.ReadCount + d.WriteCount,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if len(candidates) == 0 {
|
||||
return "", false
|
||||
}
|
||||
return maxReadDevice, false
|
||||
|
||||
best := candidates[0]
|
||||
for _, c := range candidates[1:] {
|
||||
if c.bytes > best.bytes ||
|
||||
(c.bytes == best.bytes && c.ops > best.ops) ||
|
||||
(c.bytes == best.bytes && c.ops == best.ops && c.name < best.name) {
|
||||
best = c
|
||||
}
|
||||
}
|
||||
|
||||
slog.Info("Using disk I/O fallback", "requested", filesystem, "selected", best.name)
|
||||
return best.name, true
|
||||
}
|
||||
|
||||
// mostActiveIoDevice returns the device with the highest I/O activity,
|
||||
// or "" if diskIoCounters is empty.
|
||||
func mostActiveIoDevice(diskIoCounters map[string]disk.IOCountersStat) string {
|
||||
var best ioMatchCandidate
|
||||
for _, d := range diskIoCounters {
|
||||
c := ioMatchCandidate{
|
||||
name: d.Name,
|
||||
bytes: d.ReadBytes + d.WriteBytes,
|
||||
ops: d.ReadCount + d.WriteCount,
|
||||
}
|
||||
if best.name == "" || c.bytes > best.bytes ||
|
||||
(c.bytes == best.bytes && c.ops > best.ops) ||
|
||||
(c.bytes == best.bytes && c.ops == best.ops && c.name < best.name) {
|
||||
best = c
|
||||
}
|
||||
}
|
||||
return best.name
|
||||
}
|
||||
|
||||
// prefixRelated reports whether either identifier is a prefix of the other.
|
||||
func prefixRelated(a, b string) bool {
|
||||
if a == "" || b == "" || a == b {
|
||||
return false
|
||||
}
|
||||
return strings.HasPrefix(a, b) || strings.HasPrefix(b, a)
|
||||
}
|
||||
|
||||
// filesystemMatchesPartitionSetting checks whether a FILESYSTEM env var value
|
||||
// matches a partition by mountpoint, exact device name, or prefix relationship
|
||||
// (e.g. FILESYSTEM=ada0 matches partition /dev/ada0p2).
|
||||
func filesystemMatchesPartitionSetting(filesystem string, p disk.PartitionStat) bool {
|
||||
filesystem = strings.TrimSpace(filesystem)
|
||||
if filesystem == "" {
|
||||
return false
|
||||
}
|
||||
if p.Mountpoint == filesystem {
|
||||
return true
|
||||
}
|
||||
|
||||
fsName := normalizeDeviceName(filesystem)
|
||||
partName := normalizeDeviceName(p.Device)
|
||||
if fsName == "" || partName == "" {
|
||||
return false
|
||||
}
|
||||
if fsName == partName {
|
||||
return true
|
||||
}
|
||||
return prefixRelated(partName, fsName)
|
||||
}
|
||||
|
||||
// normalizeDeviceName canonicalizes device strings for comparisons.
|
||||
func normalizeDeviceName(value string) string {
|
||||
name := filepath.Base(strings.TrimSpace(value))
|
||||
if name == "." {
|
||||
return ""
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
// Sets start values for disk I/O stats.
|
||||
func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersStat) {
|
||||
a.fsNames = a.fsNames[:0]
|
||||
now := time.Now()
|
||||
for device, stats := range a.fsStats {
|
||||
// skip if not in diskIoCounters
|
||||
d, exists := diskIoCounters[device]
|
||||
@@ -215,7 +511,7 @@ func (a *Agent) initializeDiskIoStats(diskIoCounters map[string]disk.IOCountersS
|
||||
continue
|
||||
}
|
||||
// populate initial values
|
||||
stats.Time = time.Now()
|
||||
stats.Time = now
|
||||
stats.TotalRead = d.ReadBytes
|
||||
stats.TotalWrite = d.WriteBytes
|
||||
// add to list of valid io device names
|
||||
@@ -239,12 +535,12 @@ func (a *Agent) updateDiskUsage(systemStats *system.Stats) {
|
||||
continue
|
||||
}
|
||||
if d, err := disk.Usage(stats.Mountpoint); err == nil {
|
||||
stats.DiskTotal = bytesToGigabytes(d.Total)
|
||||
stats.DiskUsed = bytesToGigabytes(d.Used)
|
||||
stats.DiskTotal = utils.BytesToGigabytes(d.Total)
|
||||
stats.DiskUsed = utils.BytesToGigabytes(d.Used)
|
||||
if stats.Root {
|
||||
systemStats.DiskTotal = bytesToGigabytes(d.Total)
|
||||
systemStats.DiskUsed = bytesToGigabytes(d.Used)
|
||||
systemStats.DiskPct = twoDecimals(d.UsedPercent)
|
||||
systemStats.DiskTotal = utils.BytesToGigabytes(d.Total)
|
||||
systemStats.DiskUsed = utils.BytesToGigabytes(d.Used)
|
||||
systemStats.DiskPct = utils.TwoDecimals(d.UsedPercent)
|
||||
}
|
||||
} else {
|
||||
// reset stats if error (likely unmounted)
|
||||
@@ -297,8 +593,8 @@ func (a *Agent) updateDiskIo(cacheTimeMs uint16, systemStats *system.Stats) {
|
||||
|
||||
diskIORead := (d.ReadBytes - prev.readBytes) * 1000 / msElapsed
|
||||
diskIOWrite := (d.WriteBytes - prev.writeBytes) * 1000 / msElapsed
|
||||
readMbPerSecond := bytesToMegabytes(float64(diskIORead))
|
||||
writeMbPerSecond := bytesToMegabytes(float64(diskIOWrite))
|
||||
readMbPerSecond := utils.BytesToMegabytes(float64(diskIORead))
|
||||
writeMbPerSecond := utils.BytesToMegabytes(float64(diskIOWrite))
|
||||
|
||||
// validate values
|
||||
if readMbPerSecond > 50_000 || writeMbPerSecond > 50_000 {
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
@@ -94,6 +93,599 @@ func TestParseFilesystemEntry(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtraFilesystemPartitionInfo(t *testing.T) {
|
||||
t.Run("uses partition device for label-only mountpoint", func(t *testing.T) {
|
||||
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
|
||||
Device: "/dev/sdc",
|
||||
Mountpoint: "/extra-filesystems/Share",
|
||||
})
|
||||
|
||||
assert.Equal(t, "/dev/sdc", device)
|
||||
assert.Equal(t, "", customName)
|
||||
})
|
||||
|
||||
t.Run("uses custom name from mountpoint suffix", func(t *testing.T) {
|
||||
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
|
||||
Device: "/dev/sdc",
|
||||
Mountpoint: "/extra-filesystems/sdc__Share",
|
||||
})
|
||||
|
||||
assert.Equal(t, "/dev/sdc", device)
|
||||
assert.Equal(t, "Share", customName)
|
||||
})
|
||||
|
||||
t.Run("falls back to folder device when partition device is unavailable", func(t *testing.T) {
|
||||
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
|
||||
Mountpoint: "/extra-filesystems/sdc__Share",
|
||||
})
|
||||
|
||||
assert.Equal(t, "sdc", device)
|
||||
assert.Equal(t, "Share", customName)
|
||||
})
|
||||
|
||||
t.Run("supports custom name without folder device prefix", func(t *testing.T) {
|
||||
device, customName := extraFilesystemPartitionInfo(disk.PartitionStat{
|
||||
Device: "/dev/sdc",
|
||||
Mountpoint: "/extra-filesystems/__Share",
|
||||
})
|
||||
|
||||
assert.Equal(t, "/dev/sdc", device)
|
||||
assert.Equal(t, "Share", customName)
|
||||
})
|
||||
}
|
||||
|
||||
func TestBuildFsStatRegistration(t *testing.T) {
|
||||
t.Run("uses basename for non-windows exact io match", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"/dev/sda1",
|
||||
"/mnt/data",
|
||||
false,
|
||||
"archive",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sda1": {Name: "sda1"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda1", key)
|
||||
assert.Equal(t, "/mnt/data", stats.Mountpoint)
|
||||
assert.Equal(t, "archive", stats.Name)
|
||||
assert.False(t, stats.Root)
|
||||
})
|
||||
|
||||
t.Run("maps root partition to io device by prefix", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"/dev/ada0p2",
|
||||
"/",
|
||||
true,
|
||||
"",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "ada0", key)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/", stats.Mountpoint)
|
||||
})
|
||||
|
||||
t.Run("uses filesystem setting as root fallback", func(t *testing.T) {
|
||||
key, _, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"overlay",
|
||||
"/",
|
||||
true,
|
||||
"",
|
||||
fsRegistrationContext{
|
||||
filesystem: "nvme0n1p2",
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "nvme0n1", key)
|
||||
})
|
||||
|
||||
t.Run("prefers parsed extra-filesystems device over mapper device", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
|
||||
"/extra-filesystems/nvme0n1p2__Archive",
|
||||
false,
|
||||
"Archive",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
|
||||
"nvme0n1p2": {Name: "nvme0n1p2"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "nvme0n1p2", key)
|
||||
assert.Equal(t, "Archive", stats.Name)
|
||||
})
|
||||
|
||||
t.Run("falls back to mapper io device when folder device cannot be resolved", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
"/dev/mapper/luks-2bcb02be-999d-4417-8d18-5c61e660fb6e",
|
||||
"/extra-filesystems/Archive",
|
||||
false,
|
||||
"Archive",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"dm-1": {Name: "dm-1", Label: "luks-2bcb02be-999d-4417-8d18-5c61e660fb6e"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "dm-1", key)
|
||||
assert.Equal(t, "Archive", stats.Name)
|
||||
})
|
||||
|
||||
t.Run("uses full device name on windows", func(t *testing.T) {
|
||||
key, _, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{},
|
||||
`C:`,
|
||||
`C:\\`,
|
||||
false,
|
||||
"",
|
||||
fsRegistrationContext{
|
||||
isWindows: true,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
`C:`: {Name: `C:`},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, `C:`, key)
|
||||
})
|
||||
|
||||
t.Run("skips existing key", func(t *testing.T) {
|
||||
key, stats, ok := registerFilesystemStats(
|
||||
map[string]*system.FsStats{"sda1": {Mountpoint: "/existing"}},
|
||||
"/dev/sda1",
|
||||
"/mnt/data",
|
||||
false,
|
||||
"",
|
||||
fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sda1": {Name: "sda1"},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
assert.False(t, ok)
|
||||
assert.Empty(t, key)
|
||||
assert.Nil(t, stats)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddConfiguredRootFs(t *testing.T) {
|
||||
t.Run("adds root from matching partition", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
rootMountPoint: "/",
|
||||
partitions: []disk.PartitionStat{{Device: "/dev/ada0p2", Mountpoint: "/"}},
|
||||
ctx: fsRegistrationContext{
|
||||
filesystem: "/dev/ada0p2",
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"ada0": {Name: "ada0", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ok := discovery.addConfiguredRootFs()
|
||||
|
||||
assert.True(t, ok)
|
||||
stats, exists := agent.fsStats["ada0"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/", stats.Mountpoint)
|
||||
})
|
||||
|
||||
t.Run("adds root from io device when partition is missing", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
rootMountPoint: "/sysroot",
|
||||
ctx: fsRegistrationContext{
|
||||
filesystem: "zroot",
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"nda0": {Name: "nda0", Label: "zroot", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ok := discovery.addConfiguredRootFs()
|
||||
|
||||
assert.True(t, ok)
|
||||
stats, exists := agent.fsStats["nda0"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/sysroot", stats.Mountpoint)
|
||||
})
|
||||
|
||||
t.Run("returns false when filesystem cannot be resolved", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
rootMountPoint: "/",
|
||||
ctx: fsRegistrationContext{
|
||||
filesystem: "missing-disk",
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{},
|
||||
},
|
||||
}
|
||||
|
||||
ok := discovery.addConfiguredRootFs()
|
||||
|
||||
assert.False(t, ok)
|
||||
assert.Empty(t, agent.fsStats)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddPartitionRootFs(t *testing.T) {
|
||||
t.Run("adds root from fallback partition candidate", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"nvme0n1": {Name: "nvme0n1", ReadBytes: 1000, WriteBytes: 1000},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ok := discovery.addPartitionRootFs("/dev/nvme0n1p2", "/")
|
||||
|
||||
assert.True(t, ok)
|
||||
stats, exists := agent.fsStats["nvme0n1"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/", stats.Mountpoint)
|
||||
})
|
||||
|
||||
t.Run("returns false when no io device matches", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{agent: agent, ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
|
||||
|
||||
ok := discovery.addPartitionRootFs("/dev/mapper/root", "/")
|
||||
|
||||
assert.False(t, ok)
|
||||
assert.Empty(t, agent.fsStats)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddLastResortRootFs(t *testing.T) {
|
||||
t.Run("uses most active io device when available", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{agent: agent, rootMountPoint: "/", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000},
|
||||
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000},
|
||||
}}}
|
||||
|
||||
discovery.addLastResortRootFs()
|
||||
|
||||
stats, exists := agent.fsStats["sda"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
})
|
||||
|
||||
t.Run("falls back to root key when mountpoint basename collides", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: map[string]*system.FsStats{
|
||||
"sysroot": {Mountpoint: "/extra-filesystems/sysroot"},
|
||||
}}
|
||||
discovery := diskDiscovery{agent: agent, rootMountPoint: "/sysroot", ctx: fsRegistrationContext{diskIoCounters: map[string]disk.IOCountersStat{}}}
|
||||
|
||||
discovery.addLastResortRootFs()
|
||||
|
||||
stats, exists := agent.fsStats["root"]
|
||||
assert.True(t, exists)
|
||||
assert.True(t, stats.Root)
|
||||
assert.Equal(t, "/sysroot", stats.Mountpoint)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddConfiguredExtraFsEntry(t *testing.T) {
|
||||
t.Run("uses matching partition when present", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
partitions: []disk.PartitionStat{{Device: "/dev/sdb1", Mountpoint: "/mnt/backup"}},
|
||||
usageFn: func(string) (*disk.UsageStat, error) {
|
||||
t.Fatal("usage fallback should not be called when partition matches")
|
||||
return nil, nil
|
||||
},
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sdb1": {Name: "sdb1"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addConfiguredExtraFsEntry("sdb1", "backup")
|
||||
|
||||
stats, exists := agent.fsStats["sdb1"]
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "/mnt/backup", stats.Mountpoint)
|
||||
assert.Equal(t, "backup", stats.Name)
|
||||
})
|
||||
|
||||
t.Run("falls back to usage-validated path", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
usageFn: func(path string) (*disk.UsageStat, error) {
|
||||
assert.Equal(t, "/srv/archive", path)
|
||||
return &disk.UsageStat{}, nil
|
||||
},
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"archive": {Name: "archive"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addConfiguredExtraFsEntry("/srv/archive", "archive")
|
||||
|
||||
stats, exists := agent.fsStats["archive"]
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "/srv/archive", stats.Mountpoint)
|
||||
assert.Equal(t, "archive", stats.Name)
|
||||
})
|
||||
|
||||
t.Run("ignores invalid filesystem entry", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
usageFn: func(string) (*disk.UsageStat, error) {
|
||||
return nil, os.ErrNotExist
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addConfiguredExtraFsEntry("/missing/archive", "")
|
||||
|
||||
assert.Empty(t, agent.fsStats)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddConfiguredExtraFilesystems(t *testing.T) {
|
||||
t.Run("parses and registers multiple configured filesystems", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: make(map[string]*system.FsStats)}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
partitions: []disk.PartitionStat{{Device: "/dev/sda1", Mountpoint: "/mnt/fast"}},
|
||||
usageFn: func(path string) (*disk.UsageStat, error) {
|
||||
if path == "/srv/archive" {
|
||||
return &disk.UsageStat{}, nil
|
||||
}
|
||||
return nil, os.ErrNotExist
|
||||
},
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"sda1": {Name: "sda1"},
|
||||
"archive": {Name: "archive"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addConfiguredExtraFilesystems("sda1__fast,/srv/archive__cold")
|
||||
|
||||
assert.Contains(t, agent.fsStats, "sda1")
|
||||
assert.Equal(t, "fast", agent.fsStats["sda1"].Name)
|
||||
assert.Contains(t, agent.fsStats, "archive")
|
||||
assert.Equal(t, "cold", agent.fsStats["archive"].Name)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddExtraFilesystemFolders(t *testing.T) {
|
||||
t.Run("adds missing folders and skips existing mountpoints", func(t *testing.T) {
|
||||
agent := &Agent{fsStats: map[string]*system.FsStats{
|
||||
"existing": {Mountpoint: "/extra-filesystems/existing"},
|
||||
}}
|
||||
discovery := diskDiscovery{
|
||||
agent: agent,
|
||||
ctx: fsRegistrationContext{
|
||||
isWindows: false,
|
||||
efPath: "/extra-filesystems",
|
||||
diskIoCounters: map[string]disk.IOCountersStat{
|
||||
"newdisk": {Name: "newdisk"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
discovery.addExtraFilesystemFolders([]string{"existing", "newdisk__Archive"})
|
||||
|
||||
assert.Len(t, agent.fsStats, 2)
|
||||
stats, exists := agent.fsStats["newdisk"]
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "/extra-filesystems/newdisk__Archive", stats.Mountpoint)
|
||||
assert.Equal(t, "Archive", stats.Name)
|
||||
})
|
||||
}
|
||||
|
||||
func TestFindIoDevice(t *testing.T) {
|
||||
t.Run("matches by device name", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda"},
|
||||
"sdb": {Name: "sdb"},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("sdb", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sdb", device)
|
||||
})
|
||||
|
||||
t.Run("matches by device label", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", Label: "rootfs"},
|
||||
"sdb": {Name: "sdb"},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("rootfs", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda", device)
|
||||
})
|
||||
|
||||
t.Run("returns no match when not found", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda"},
|
||||
"sdb": {Name: "sdb"},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("nvme0n1p1", ioCounters)
|
||||
assert.False(t, ok)
|
||||
assert.Equal(t, "", device)
|
||||
})
|
||||
|
||||
t.Run("uses uncertain unique prefix fallback", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"nvme0n1": {Name: "nvme0n1"},
|
||||
"sda": {Name: "sda"},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("nvme0n1p2", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "nvme0n1", device)
|
||||
})
|
||||
|
||||
t.Run("uses dominant activity when prefix matches are ambiguous", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", ReadBytes: 5000, WriteBytes: 5000, ReadCount: 100, WriteCount: 100},
|
||||
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 50, WriteCount: 50},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("sd", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda", device)
|
||||
})
|
||||
|
||||
t.Run("uses highest activity when ambiguous without dominance", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", ReadBytes: 3000, WriteBytes: 3000, ReadCount: 50, WriteCount: 50},
|
||||
"sdb": {Name: "sdb", ReadBytes: 2500, WriteBytes: 2500, ReadCount: 40, WriteCount: 40},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("sd", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda", device)
|
||||
})
|
||||
|
||||
t.Run("matches /dev/-prefixed partition to parent disk", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"nda0": {Name: "nda0", ReadBytes: 1000, WriteBytes: 1000},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("/dev/nda0p2", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "nda0", device)
|
||||
})
|
||||
|
||||
t.Run("uses deterministic name tie-breaker", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sdb": {Name: "sdb", ReadBytes: 2000, WriteBytes: 2000, ReadCount: 10, WriteCount: 10},
|
||||
"sda": {Name: "sda", ReadBytes: 2000, WriteBytes: 2000, ReadCount: 10, WriteCount: 10},
|
||||
}
|
||||
|
||||
device, ok := findIoDevice("sd", ioCounters)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "sda", device)
|
||||
})
|
||||
}
|
||||
|
||||
func TestFilesystemMatchesPartitionSetting(t *testing.T) {
|
||||
p := disk.PartitionStat{Device: "/dev/ada0p2", Mountpoint: "/"}
|
||||
|
||||
t.Run("matches mountpoint setting", func(t *testing.T) {
|
||||
assert.True(t, filesystemMatchesPartitionSetting("/", p))
|
||||
})
|
||||
|
||||
t.Run("matches exact partition setting", func(t *testing.T) {
|
||||
assert.True(t, filesystemMatchesPartitionSetting("ada0p2", p))
|
||||
assert.True(t, filesystemMatchesPartitionSetting("/dev/ada0p2", p))
|
||||
})
|
||||
|
||||
t.Run("matches prefix-style parent setting", func(t *testing.T) {
|
||||
assert.True(t, filesystemMatchesPartitionSetting("ada0", p))
|
||||
assert.True(t, filesystemMatchesPartitionSetting("/dev/ada0", p))
|
||||
})
|
||||
|
||||
t.Run("does not match unrelated device", func(t *testing.T) {
|
||||
assert.False(t, filesystemMatchesPartitionSetting("sda", p))
|
||||
assert.False(t, filesystemMatchesPartitionSetting("nvme0n1", p))
|
||||
assert.False(t, filesystemMatchesPartitionSetting("", p))
|
||||
})
|
||||
}
|
||||
|
||||
func TestMostActiveIoDevice(t *testing.T) {
|
||||
t.Run("returns most active device", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"nda0": {Name: "nda0", ReadBytes: 5000, WriteBytes: 5000, ReadCount: 100, WriteCount: 100},
|
||||
"nda1": {Name: "nda1", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 50, WriteCount: 50},
|
||||
}
|
||||
assert.Equal(t, "nda0", mostActiveIoDevice(ioCounters))
|
||||
})
|
||||
|
||||
t.Run("uses deterministic tie-breaker", func(t *testing.T) {
|
||||
ioCounters := map[string]disk.IOCountersStat{
|
||||
"sdb": {Name: "sdb", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10},
|
||||
"sda": {Name: "sda", ReadBytes: 1000, WriteBytes: 1000, ReadCount: 10, WriteCount: 10},
|
||||
}
|
||||
assert.Equal(t, "sda", mostActiveIoDevice(ioCounters))
|
||||
})
|
||||
|
||||
t.Run("returns empty for empty map", func(t *testing.T) {
|
||||
assert.Equal(t, "", mostActiveIoDevice(map[string]disk.IOCountersStat{}))
|
||||
})
|
||||
}
|
||||
|
||||
func TestIsDockerSpecialMountpoint(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
mountpoint string
|
||||
expected bool
|
||||
}{
|
||||
{name: "hosts", mountpoint: "/etc/hosts", expected: true},
|
||||
{name: "resolv", mountpoint: "/etc/resolv.conf", expected: true},
|
||||
{name: "hostname", mountpoint: "/etc/hostname", expected: true},
|
||||
{name: "root", mountpoint: "/", expected: false},
|
||||
{name: "passwd", mountpoint: "/etc/passwd", expected: false},
|
||||
{name: "extra-filesystem", mountpoint: "/extra-filesystems/sda1", expected: false},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
assert.Equal(t, tc.expected, isDockerSpecialMountpoint(tc.mountpoint))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
|
||||
// Set up environment variables
|
||||
oldEnv := os.Getenv("EXTRA_FILESYSTEMS")
|
||||
@@ -155,7 +747,7 @@ func TestInitializeDiskInfoWithCustomNames(t *testing.T) {
|
||||
// Test the parsing logic by calling the relevant part
|
||||
// We'll create a simplified version to test just the parsing
|
||||
extraFilesystems := tc.envValue
|
||||
for _, fsEntry := range strings.Split(extraFilesystems, ",") {
|
||||
for fsEntry := range strings.SplitSeq(extraFilesystems, ",") {
|
||||
// Parse the entry
|
||||
fsEntry = strings.TrimSpace(fsEntry)
|
||||
var fs, customName string
|
||||
@@ -317,3 +909,67 @@ func TestDiskUsageCaching(t *testing.T) {
|
||||
"lastDiskUsageUpdate should be refreshed when cache expires")
|
||||
})
|
||||
}
|
||||
|
||||
func TestHasSameDiskUsage(t *testing.T) {
|
||||
const toleranceBytes uint64 = 16 * 1024 * 1024
|
||||
|
||||
t.Run("returns true when totals and usage are equal", func(t *testing.T) {
|
||||
a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
|
||||
b := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
|
||||
assert.True(t, hasSameDiskUsage(a, b))
|
||||
})
|
||||
|
||||
t.Run("returns true within tolerance", func(t *testing.T) {
|
||||
a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
|
||||
b := &disk.UsageStat{
|
||||
Total: a.Total + toleranceBytes - 1,
|
||||
Used: a.Used - toleranceBytes + 1,
|
||||
}
|
||||
assert.True(t, hasSameDiskUsage(a, b))
|
||||
})
|
||||
|
||||
t.Run("returns false when total exceeds tolerance", func(t *testing.T) {
|
||||
a := &disk.UsageStat{Total: 100 * 1024 * 1024 * 1024, Used: 42 * 1024 * 1024 * 1024}
|
||||
b := &disk.UsageStat{
|
||||
Total: a.Total + toleranceBytes + 1,
|
||||
Used: a.Used,
|
||||
}
|
||||
assert.False(t, hasSameDiskUsage(a, b))
|
||||
})
|
||||
|
||||
t.Run("returns false for nil or zero total", func(t *testing.T) {
|
||||
assert.False(t, hasSameDiskUsage(nil, &disk.UsageStat{Total: 1, Used: 1}))
|
||||
assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 1, Used: 1}, nil))
|
||||
assert.False(t, hasSameDiskUsage(&disk.UsageStat{Total: 0, Used: 0}, &disk.UsageStat{Total: 1, Used: 1}))
|
||||
})
|
||||
}
|
||||
|
||||
func TestInitializeDiskIoStatsResetsTrackedDevices(t *testing.T) {
|
||||
agent := &Agent{
|
||||
fsStats: map[string]*system.FsStats{
|
||||
"sda": {},
|
||||
"sdb": {},
|
||||
},
|
||||
fsNames: []string{"stale", "sda"},
|
||||
}
|
||||
|
||||
agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
|
||||
"sda": {Name: "sda", ReadBytes: 10, WriteBytes: 20},
|
||||
"sdb": {Name: "sdb", ReadBytes: 30, WriteBytes: 40},
|
||||
})
|
||||
|
||||
assert.ElementsMatch(t, []string{"sda", "sdb"}, agent.fsNames)
|
||||
assert.Len(t, agent.fsNames, 2)
|
||||
assert.Equal(t, uint64(10), agent.fsStats["sda"].TotalRead)
|
||||
assert.Equal(t, uint64(20), agent.fsStats["sda"].TotalWrite)
|
||||
assert.False(t, agent.fsStats["sda"].Time.IsZero())
|
||||
assert.False(t, agent.fsStats["sdb"].Time.IsZero())
|
||||
|
||||
agent.initializeDiskIoStats(map[string]disk.IOCountersStat{
|
||||
"sdb": {Name: "sdb", ReadBytes: 50, WriteBytes: 60},
|
||||
})
|
||||
|
||||
assert.Equal(t, []string{"sdb"}, agent.fsNames)
|
||||
assert.Equal(t, uint64(50), agent.fsStats["sdb"].TotalRead)
|
||||
assert.Equal(t, uint64(60), agent.fsStats["sdb"].TotalWrite)
|
||||
}
|
||||
|
||||
205
agent/docker.go
205
agent/docker.go
@@ -1,6 +1,7 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
@@ -15,11 +16,14 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/deltatracker"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/container"
|
||||
|
||||
"github.com/blang/semver"
|
||||
@@ -28,6 +32,7 @@ import (
|
||||
// ansiEscapePattern matches ANSI escape sequences (colors, cursor movement, etc.)
|
||||
// This includes CSI sequences like \x1b[...m and simple escapes like \x1b[K
|
||||
var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b\][^\x07]*\x07|\x1b[@-Z\\-_]`)
|
||||
var dockerContainerIDPattern = regexp.MustCompile(`^[a-fA-F0-9]{12,64}$`)
|
||||
|
||||
const (
|
||||
// Docker API timeout in milliseconds
|
||||
@@ -72,6 +77,7 @@ type dockerManager struct {
|
||||
// cacheTimeMs -> DeltaTracker for network bytes sent/received
|
||||
networkSentTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64]
|
||||
networkRecvTrackers map[uint16]*deltatracker.DeltaTracker[string, uint64]
|
||||
retrySleep func(time.Duration)
|
||||
}
|
||||
|
||||
// userAgentRoundTripper is a custom http.RoundTripper that adds a User-Agent header to all requests
|
||||
@@ -333,13 +339,48 @@ func validateCpuPercentage(cpuPct float64, containerName string) error {
|
||||
|
||||
// updateContainerStatsValues updates the final stats values
|
||||
func updateContainerStatsValues(stats *container.Stats, cpuPct float64, usedMemory uint64, sent_delta, recv_delta uint64, readTime time.Time) {
|
||||
stats.Cpu = twoDecimals(cpuPct)
|
||||
stats.Mem = bytesToMegabytes(float64(usedMemory))
|
||||
stats.NetworkSent = bytesToMegabytes(float64(sent_delta))
|
||||
stats.NetworkRecv = bytesToMegabytes(float64(recv_delta))
|
||||
stats.Cpu = utils.TwoDecimals(cpuPct)
|
||||
stats.Mem = utils.BytesToMegabytes(float64(usedMemory))
|
||||
stats.Bandwidth = [2]uint64{sent_delta, recv_delta}
|
||||
// TODO(0.19+): stop populating NetworkSent/NetworkRecv (deprecated in 0.18.3)
|
||||
stats.NetworkSent = utils.BytesToMegabytes(float64(sent_delta))
|
||||
stats.NetworkRecv = utils.BytesToMegabytes(float64(recv_delta))
|
||||
stats.PrevReadTime = readTime
|
||||
}
|
||||
|
||||
// convertContainerPortsToString formats the ports of a container into a sorted, deduplicated string.
|
||||
// ctr.Ports is nilled out after processing so the slice is not accidentally reused.
|
||||
func convertContainerPortsToString(ctr *container.ApiInfo) string {
|
||||
if len(ctr.Ports) == 0 {
|
||||
return ""
|
||||
}
|
||||
sort.Slice(ctr.Ports, func(i, j int) bool {
|
||||
return ctr.Ports[i].PublicPort < ctr.Ports[j].PublicPort
|
||||
})
|
||||
var builder strings.Builder
|
||||
seenPorts := make(map[uint16]struct{})
|
||||
for _, p := range ctr.Ports {
|
||||
_, ok := seenPorts[p.PublicPort]
|
||||
if p.PublicPort == 0 || ok {
|
||||
continue
|
||||
}
|
||||
seenPorts[p.PublicPort] = struct{}{}
|
||||
if builder.Len() > 0 {
|
||||
builder.WriteString(", ")
|
||||
}
|
||||
switch p.IP {
|
||||
case "0.0.0.0", "::":
|
||||
default:
|
||||
builder.WriteString(p.IP)
|
||||
builder.WriteByte(':')
|
||||
}
|
||||
builder.WriteString(strconv.Itoa(int(p.PublicPort)))
|
||||
}
|
||||
// clear ports slice so it doesn't get reused and blend into next response
|
||||
ctr.Ports = nil
|
||||
return builder.String()
|
||||
}
|
||||
|
||||
func parseDockerStatus(status string) (string, container.DockerHealth) {
|
||||
trimmed := strings.TrimSpace(status)
|
||||
if trimmed == "" {
|
||||
@@ -359,22 +400,60 @@ func parseDockerStatus(status string) (string, container.DockerHealth) {
|
||||
statusText = trimmed
|
||||
}
|
||||
|
||||
healthText := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")")))
|
||||
healthText := strings.TrimSpace(strings.TrimSuffix(trimmed[openIdx+1:], ")"))
|
||||
// Some Docker statuses include a "health:" prefix inside the parentheses.
|
||||
// Strip it so it maps correctly to the known health states.
|
||||
if colonIdx := strings.IndexRune(healthText, ':'); colonIdx != -1 {
|
||||
prefix := strings.TrimSpace(healthText[:colonIdx])
|
||||
prefix := strings.ToLower(strings.TrimSpace(healthText[:colonIdx]))
|
||||
if prefix == "health" || prefix == "health status" {
|
||||
healthText = strings.TrimSpace(healthText[colonIdx+1:])
|
||||
}
|
||||
}
|
||||
if health, ok := container.DockerHealthStrings[healthText]; ok {
|
||||
if health, ok := parseDockerHealthStatus(healthText); ok {
|
||||
return statusText, health
|
||||
}
|
||||
|
||||
return trimmed, container.DockerHealthNone
|
||||
}
|
||||
|
||||
// parseDockerHealthStatus maps Docker health status strings to container.DockerHealth values
|
||||
func parseDockerHealthStatus(status string) (container.DockerHealth, bool) {
|
||||
health, ok := container.DockerHealthStrings[strings.ToLower(strings.TrimSpace(status))]
|
||||
return health, ok
|
||||
}
|
||||
|
||||
// getPodmanContainerHealth fetches container health status from the container inspect endpoint.
|
||||
// Used for Podman which doesn't provide health status in the /containers/json endpoint as of March 2026.
|
||||
// https://github.com/containers/podman/issues/27786
|
||||
func (dm *dockerManager) getPodmanContainerHealth(containerID string) (container.DockerHealth, error) {
|
||||
resp, err := dm.client.Get(fmt.Sprintf("http://localhost/containers/%s/json", url.PathEscape(containerID)))
|
||||
if err != nil {
|
||||
return container.DockerHealthNone, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return container.DockerHealthNone, fmt.Errorf("container inspect request failed: %s", resp.Status)
|
||||
}
|
||||
|
||||
var inspectInfo struct {
|
||||
State struct {
|
||||
Health struct {
|
||||
Status string
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&inspectInfo); err != nil {
|
||||
return container.DockerHealthNone, err
|
||||
}
|
||||
|
||||
if health, ok := parseDockerHealthStatus(inspectInfo.State.Health.Status); ok {
|
||||
return health, nil
|
||||
}
|
||||
|
||||
return container.DockerHealthNone, nil
|
||||
}
|
||||
|
||||
// Updates stats for individual container with cache-time-aware delta tracking
|
||||
func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeMs uint16) error {
|
||||
name := ctr.Names[0][1:]
|
||||
@@ -384,6 +463,21 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
|
||||
return err
|
||||
}
|
||||
|
||||
statusText, health := parseDockerStatus(ctr.Status)
|
||||
|
||||
// Docker exposes Health.Status on /containers/json in API 1.52+.
|
||||
// Podman currently requires falling back to the inspect endpoint as of March 2026.
|
||||
// https://github.com/containers/podman/issues/27786
|
||||
if ctr.Health.Status != "" {
|
||||
if h, ok := parseDockerHealthStatus(ctr.Health.Status); ok {
|
||||
health = h
|
||||
}
|
||||
} else if dm.usingPodman {
|
||||
if podmanHealth, err := dm.getPodmanContainerHealth(ctr.IdShort); err == nil {
|
||||
health = podmanHealth
|
||||
}
|
||||
}
|
||||
|
||||
dm.containerStatsMutex.Lock()
|
||||
defer dm.containerStatsMutex.Unlock()
|
||||
|
||||
@@ -395,14 +489,18 @@ func (dm *dockerManager) updateContainerStats(ctr *container.ApiInfo, cacheTimeM
|
||||
}
|
||||
|
||||
stats.Id = ctr.IdShort
|
||||
|
||||
statusText, health := parseDockerStatus(ctr.Status)
|
||||
stats.Status = statusText
|
||||
stats.Health = health
|
||||
|
||||
if len(ctr.Ports) > 0 {
|
||||
stats.Ports = convertContainerPortsToString(ctr)
|
||||
}
|
||||
|
||||
// reset current stats
|
||||
stats.Cpu = 0
|
||||
stats.Mem = 0
|
||||
stats.Bandwidth = [2]uint64{0, 0}
|
||||
// TODO(0.19+): stop populating NetworkSent/NetworkRecv (deprecated in 0.18.3)
|
||||
stats.NetworkSent = 0
|
||||
stats.NetworkRecv = 0
|
||||
|
||||
@@ -480,7 +578,7 @@ func (dm *dockerManager) deleteContainerStatsSync(id string) {
|
||||
|
||||
// Creates a new http client for Docker or Podman API
|
||||
func newDockerManager() *dockerManager {
|
||||
dockerHost, exists := GetEnv("DOCKER_HOST")
|
||||
dockerHost, exists := utils.GetEnv("DOCKER_HOST")
|
||||
if exists {
|
||||
// return nil if set to empty string
|
||||
if dockerHost == "" {
|
||||
@@ -516,7 +614,7 @@ func newDockerManager() *dockerManager {
|
||||
|
||||
// configurable timeout
|
||||
timeout := time.Millisecond * time.Duration(dockerTimeoutMs)
|
||||
if t, set := GetEnv("DOCKER_TIMEOUT"); set {
|
||||
if t, set := utils.GetEnv("DOCKER_TIMEOUT"); set {
|
||||
timeout, err = time.ParseDuration(t)
|
||||
if err != nil {
|
||||
slog.Error(err.Error())
|
||||
@@ -533,7 +631,7 @@ func newDockerManager() *dockerManager {
|
||||
|
||||
// Read container exclusion patterns from environment variable
|
||||
var excludeContainers []string
|
||||
if excludeStr, set := GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
|
||||
if excludeStr, set := utils.GetEnv("EXCLUDE_CONTAINERS"); set && excludeStr != "" {
|
||||
parts := strings.SplitSeq(excludeStr, ",")
|
||||
for part := range parts {
|
||||
trimmed := strings.TrimSpace(part)
|
||||
@@ -561,6 +659,7 @@ func newDockerManager() *dockerManager {
|
||||
lastCpuReadTime: make(map[uint16]map[string]time.Time),
|
||||
networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
||||
networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
||||
retrySleep: time.Sleep,
|
||||
}
|
||||
|
||||
// If using podman, return client
|
||||
@@ -570,7 +669,7 @@ func newDockerManager() *dockerManager {
|
||||
return manager
|
||||
}
|
||||
|
||||
// this can take up to 5 seconds with retry, so run in goroutine
|
||||
// run version check in goroutine to avoid blocking (server may not be ready and requires retries)
|
||||
go manager.checkDockerVersion()
|
||||
|
||||
// give version check a chance to complete before returning
|
||||
@@ -590,18 +689,18 @@ func (dm *dockerManager) checkDockerVersion() {
|
||||
const versionMaxTries = 2
|
||||
for i := 1; i <= versionMaxTries; i++ {
|
||||
resp, err = dm.client.Get("http://localhost/version")
|
||||
if err == nil {
|
||||
if err == nil && resp.StatusCode == http.StatusOK {
|
||||
break
|
||||
}
|
||||
if resp != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
if i < versionMaxTries {
|
||||
slog.Debug("Failed to get Docker version; retrying", "attempt", i, "error", err)
|
||||
time.Sleep(5 * time.Second)
|
||||
slog.Debug("Failed to get Docker version; retrying", "attempt", i, "err", err, "response", resp)
|
||||
dm.retrySleep(5 * time.Second)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
if err != nil || resp.StatusCode != http.StatusOK {
|
||||
return
|
||||
}
|
||||
if err := dm.decode(resp, &versionInfo); err != nil {
|
||||
@@ -643,9 +742,34 @@ func getDockerHost() string {
|
||||
return scheme + socks[0]
|
||||
}
|
||||
|
||||
func validateContainerID(containerID string) error {
|
||||
if !dockerContainerIDPattern.MatchString(containerID) {
|
||||
return fmt.Errorf("invalid container id")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildDockerContainerEndpoint(containerID, action string, query url.Values) (string, error) {
|
||||
if err := validateContainerID(containerID); err != nil {
|
||||
return "", err
|
||||
}
|
||||
u := &url.URL{
|
||||
Scheme: "http",
|
||||
Host: "localhost",
|
||||
Path: fmt.Sprintf("/containers/%s/%s", url.PathEscape(containerID), action),
|
||||
}
|
||||
if len(query) > 0 {
|
||||
u.RawQuery = query.Encode()
|
||||
}
|
||||
return u.String(), nil
|
||||
}
|
||||
|
||||
// getContainerInfo fetches the inspection data for a container
|
||||
func (dm *dockerManager) getContainerInfo(ctx context.Context, containerID string) ([]byte, error) {
|
||||
endpoint := fmt.Sprintf("http://localhost/containers/%s/json", containerID)
|
||||
endpoint, err := buildDockerContainerEndpoint(containerID, "json", nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -676,7 +800,15 @@ func (dm *dockerManager) getContainerInfo(ctx context.Context, containerID strin
|
||||
|
||||
// getLogs fetches the logs for a container
|
||||
func (dm *dockerManager) getLogs(ctx context.Context, containerID string) (string, error) {
|
||||
endpoint := fmt.Sprintf("http://localhost/containers/%s/logs?stdout=1&stderr=1&tail=%d", containerID, dockerLogsTail)
|
||||
query := url.Values{
|
||||
"stdout": []string{"1"},
|
||||
"stderr": []string{"1"},
|
||||
"tail": []string{fmt.Sprintf("%d", dockerLogsTail)},
|
||||
}
|
||||
endpoint, err := buildDockerContainerEndpoint(containerID, "logs", query)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -694,7 +826,17 @@ func (dm *dockerManager) getLogs(ctx context.Context, containerID string) (strin
|
||||
}
|
||||
|
||||
var builder strings.Builder
|
||||
if err := decodeDockerLogStream(resp.Body, &builder); err != nil {
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
multiplexed := strings.HasSuffix(contentType, "multiplexed-stream")
|
||||
logReader := io.Reader(resp.Body)
|
||||
if !multiplexed {
|
||||
// Podman may return multiplexed logs without Content-Type. Sniff the first frame header
|
||||
// with a small buffered reader only when the header check fails.
|
||||
bufferedReader := bufio.NewReaderSize(resp.Body, 8)
|
||||
multiplexed = detectDockerMultiplexedStream(bufferedReader)
|
||||
logReader = bufferedReader
|
||||
}
|
||||
if err := decodeDockerLogStream(logReader, &builder, multiplexed); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
@@ -706,7 +848,28 @@ func (dm *dockerManager) getLogs(ctx context.Context, containerID string) (strin
|
||||
return logs, nil
|
||||
}
|
||||
|
||||
func decodeDockerLogStream(reader io.Reader, builder *strings.Builder) error {
|
||||
func detectDockerMultiplexedStream(reader *bufio.Reader) bool {
|
||||
const headerSize = 8
|
||||
header, err := reader.Peek(headerSize)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if header[0] != 0x01 && header[0] != 0x02 {
|
||||
return false
|
||||
}
|
||||
// Docker's stream framing header reserves bytes 1-3 as zero.
|
||||
if header[1] != 0 || header[2] != 0 || header[3] != 0 {
|
||||
return false
|
||||
}
|
||||
frameLen := binary.BigEndian.Uint32(header[4:])
|
||||
return frameLen <= maxLogFrameSize
|
||||
}
|
||||
|
||||
func decodeDockerLogStream(reader io.Reader, builder *strings.Builder, multiplexed bool) error {
|
||||
if !multiplexed {
|
||||
_, err := io.Copy(builder, io.LimitReader(reader, maxTotalLogSize))
|
||||
return err
|
||||
}
|
||||
const headerSize = 8
|
||||
var header [headerSize]byte
|
||||
totalBytesRead := 0
|
||||
|
||||
@@ -1,17 +1,24 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/deltatracker"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/container"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -19,6 +26,43 @@ import (
|
||||
|
||||
var defaultCacheTimeMs = uint16(60_000)
|
||||
|
||||
type recordingRoundTripper struct {
|
||||
statusCode int
|
||||
body string
|
||||
contentType string
|
||||
called bool
|
||||
lastPath string
|
||||
lastQuery map[string]string
|
||||
}
|
||||
|
||||
type roundTripFunc func(*http.Request) (*http.Response, error)
|
||||
|
||||
func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
return fn(req)
|
||||
}
|
||||
|
||||
func (rt *recordingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
rt.called = true
|
||||
rt.lastPath = req.URL.EscapedPath()
|
||||
rt.lastQuery = map[string]string{}
|
||||
for key, values := range req.URL.Query() {
|
||||
if len(values) > 0 {
|
||||
rt.lastQuery[key] = values[0]
|
||||
}
|
||||
}
|
||||
resp := &http.Response{
|
||||
StatusCode: rt.statusCode,
|
||||
Status: "200 OK",
|
||||
Header: make(http.Header),
|
||||
Body: io.NopCloser(strings.NewReader(rt.body)),
|
||||
Request: req,
|
||||
}
|
||||
if rt.contentType != "" {
|
||||
resp.Header.Set("Content-Type", rt.contentType)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// cycleCpuDeltas cycles the CPU tracking data for a specific cache time interval
|
||||
func (dm *dockerManager) cycleCpuDeltas(cacheTimeMs uint16) {
|
||||
// Clear the CPU tracking maps for this cache time interval
|
||||
@@ -110,6 +154,94 @@ func TestCalculateMemoryUsage(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDockerContainerEndpoint(t *testing.T) {
|
||||
t.Run("valid container ID builds escaped endpoint", func(t *testing.T) {
|
||||
endpoint, err := buildDockerContainerEndpoint("0123456789ab", "json", nil)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "http://localhost/containers/0123456789ab/json", endpoint)
|
||||
})
|
||||
|
||||
t.Run("invalid container ID is rejected", func(t *testing.T) {
|
||||
_, err := buildDockerContainerEndpoint("../../version", "json", nil)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "invalid container id")
|
||||
})
|
||||
}
|
||||
|
||||
func TestContainerDetailsRequestsValidateContainerID(t *testing.T) {
|
||||
rt := &recordingRoundTripper{
|
||||
statusCode: 200,
|
||||
body: `{"Config":{"Env":["SECRET=1"]}}`,
|
||||
}
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: rt},
|
||||
}
|
||||
|
||||
_, err := dm.getContainerInfo(context.Background(), "../version")
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "invalid container id")
|
||||
assert.False(t, rt.called, "request should be rejected before dispatching to Docker API")
|
||||
}
|
||||
|
||||
func TestContainerDetailsRequestsUseExpectedDockerPaths(t *testing.T) {
|
||||
t.Run("container info uses container json endpoint", func(t *testing.T) {
|
||||
rt := &recordingRoundTripper{
|
||||
statusCode: 200,
|
||||
body: `{"Config":{"Env":["SECRET=1"]},"Name":"demo"}`,
|
||||
}
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: rt},
|
||||
}
|
||||
|
||||
body, err := dm.getContainerInfo(context.Background(), "0123456789ab")
|
||||
require.NoError(t, err)
|
||||
assert.True(t, rt.called)
|
||||
assert.Equal(t, "/containers/0123456789ab/json", rt.lastPath)
|
||||
assert.NotContains(t, string(body), "SECRET=1", "sensitive env vars should be removed")
|
||||
})
|
||||
|
||||
t.Run("container logs uses expected endpoint and query params", func(t *testing.T) {
|
||||
rt := &recordingRoundTripper{
|
||||
statusCode: 200,
|
||||
body: "line1\nline2\n",
|
||||
}
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: rt},
|
||||
}
|
||||
|
||||
logs, err := dm.getLogs(context.Background(), "abcdef123456")
|
||||
require.NoError(t, err)
|
||||
assert.True(t, rt.called)
|
||||
assert.Equal(t, "/containers/abcdef123456/logs", rt.lastPath)
|
||||
assert.Equal(t, "1", rt.lastQuery["stdout"])
|
||||
assert.Equal(t, "1", rt.lastQuery["stderr"])
|
||||
assert.Equal(t, "200", rt.lastQuery["tail"])
|
||||
assert.Equal(t, "line1\nline2\n", logs)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetPodmanContainerHealth(t *testing.T) {
|
||||
called := false
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
called = true
|
||||
assert.Equal(t, "/containers/0123456789ab/json", req.URL.EscapedPath())
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Status: "200 OK",
|
||||
Header: make(http.Header),
|
||||
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
|
||||
Request: req,
|
||||
}, nil
|
||||
})},
|
||||
}
|
||||
|
||||
health, err := dm.getPodmanContainerHealth("0123456789ab")
|
||||
require.NoError(t, err)
|
||||
assert.True(t, called)
|
||||
assert.Equal(t, container.DockerHealthHealthy, health)
|
||||
}
|
||||
|
||||
func TestValidateCpuPercentage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -184,58 +316,17 @@ func TestUpdateContainerStatsValues(t *testing.T) {
|
||||
// Check memory (should be converted to MB: 1048576 bytes = 1 MB)
|
||||
assert.Equal(t, 1.0, stats.Mem)
|
||||
|
||||
// Check network sent (should be converted to MB: 524288 bytes = 0.5 MB)
|
||||
assert.Equal(t, 0.5, stats.NetworkSent)
|
||||
// Check bandwidth (raw bytes)
|
||||
assert.Equal(t, [2]uint64{524288, 262144}, stats.Bandwidth)
|
||||
|
||||
// Check network recv (should be converted to MB: 262144 bytes = 0.25 MB)
|
||||
assert.Equal(t, 0.25, stats.NetworkRecv)
|
||||
// Deprecated fields still populated for backward compatibility with older hubs
|
||||
assert.Equal(t, 0.5, stats.NetworkSent) // 524288 bytes = 0.5 MB
|
||||
assert.Equal(t, 0.25, stats.NetworkRecv) // 262144 bytes = 0.25 MB
|
||||
|
||||
// Check read time
|
||||
assert.Equal(t, testTime, stats.PrevReadTime)
|
||||
}
|
||||
|
||||
func TestTwoDecimals(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
}{
|
||||
{"round down", 1.234, 1.23},
|
||||
{"round half up", 1.235, 1.24}, // math.Round rounds half up
|
||||
{"no rounding needed", 1.23, 1.23},
|
||||
{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
|
||||
{"zero", 0.0, 0.0},
|
||||
{"large number", 123.456, 123.46}, // rounds 5 up
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := twoDecimals(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBytesToMegabytes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
}{
|
||||
{"1 MB", 1048576, 1.0},
|
||||
{"512 KB", 524288, 0.5},
|
||||
{"zero", 0, 0},
|
||||
{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := bytesToMegabytes(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitializeCpuTracking(t *testing.T) {
|
||||
dm := &dockerManager{
|
||||
lastCpuContainer: make(map[uint16]map[string]uint64),
|
||||
@@ -378,6 +469,117 @@ func TestDockerManagerCreation(t *testing.T) {
|
||||
assert.NotNil(t, dm.networkRecvTrackers)
|
||||
}
|
||||
|
||||
func TestCheckDockerVersion(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
responses []struct {
|
||||
statusCode int
|
||||
body string
|
||||
}
|
||||
expectedGood bool
|
||||
expectedRequests int
|
||||
}{
|
||||
{
|
||||
name: "200 with good version on first try",
|
||||
responses: []struct {
|
||||
statusCode int
|
||||
body string
|
||||
}{
|
||||
{http.StatusOK, `{"Version":"25.0.1"}`},
|
||||
},
|
||||
expectedGood: true,
|
||||
expectedRequests: 1,
|
||||
},
|
||||
{
|
||||
name: "200 with old version on first try",
|
||||
responses: []struct {
|
||||
statusCode int
|
||||
body string
|
||||
}{
|
||||
{http.StatusOK, `{"Version":"24.0.7"}`},
|
||||
},
|
||||
expectedGood: false,
|
||||
expectedRequests: 1,
|
||||
},
|
||||
{
|
||||
name: "non-200 then 200 with good version",
|
||||
responses: []struct {
|
||||
statusCode int
|
||||
body string
|
||||
}{
|
||||
{http.StatusServiceUnavailable, `"not ready"`},
|
||||
{http.StatusOK, `{"Version":"25.1.0"}`},
|
||||
},
|
||||
expectedGood: true,
|
||||
expectedRequests: 2,
|
||||
},
|
||||
{
|
||||
name: "non-200 on all retries",
|
||||
responses: []struct {
|
||||
statusCode int
|
||||
body string
|
||||
}{
|
||||
{http.StatusInternalServerError, `"error"`},
|
||||
{http.StatusUnauthorized, `"error"`},
|
||||
},
|
||||
expectedGood: false,
|
||||
expectedRequests: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
requestCount := 0
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
idx := requestCount
|
||||
requestCount++
|
||||
if idx >= len(tt.responses) {
|
||||
idx = len(tt.responses) - 1
|
||||
}
|
||||
w.WriteHeader(tt.responses[idx].statusCode)
|
||||
fmt.Fprint(w, tt.responses[idx].body)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(_ context.Context, network, _ string) (net.Conn, error) {
|
||||
return net.Dial(network, server.Listener.Addr().String())
|
||||
},
|
||||
},
|
||||
},
|
||||
retrySleep: func(time.Duration) {},
|
||||
}
|
||||
|
||||
dm.checkDockerVersion()
|
||||
|
||||
assert.Equal(t, tt.expectedGood, dm.goodDockerVersion)
|
||||
assert.Equal(t, tt.expectedRequests, requestCount)
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("request error on all retries", func(t *testing.T) {
|
||||
requestCount := 0
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
|
||||
requestCount++
|
||||
return nil, errors.New("connection refused")
|
||||
},
|
||||
},
|
||||
},
|
||||
retrySleep: func(time.Duration) {},
|
||||
}
|
||||
|
||||
dm.checkDockerVersion()
|
||||
|
||||
assert.False(t, dm.goodDockerVersion)
|
||||
assert.Equal(t, 2, requestCount)
|
||||
})
|
||||
}
|
||||
|
||||
func TestCycleCpuDeltas(t *testing.T) {
|
||||
dm := &dockerManager{
|
||||
lastCpuContainer: map[uint16]map[string]uint64{
|
||||
@@ -527,8 +729,10 @@ func TestContainerStatsInitialization(t *testing.T) {
|
||||
|
||||
assert.Equal(t, 45.67, stats.Cpu)
|
||||
assert.Equal(t, 2.0, stats.Mem)
|
||||
assert.Equal(t, 1.0, stats.NetworkSent)
|
||||
assert.Equal(t, 0.5, stats.NetworkRecv)
|
||||
assert.Equal(t, [2]uint64{1048576, 524288}, stats.Bandwidth)
|
||||
// Deprecated fields still populated for backward compatibility with older hubs
|
||||
assert.Equal(t, 1.0, stats.NetworkSent) // 1048576 bytes = 1 MB
|
||||
assert.Equal(t, 0.5, stats.NetworkRecv) // 524288 bytes = 0.5 MB
|
||||
assert.Equal(t, testTime, stats.PrevReadTime)
|
||||
}
|
||||
|
||||
@@ -688,12 +892,50 @@ func TestContainerStatsEndToEndWithRealData(t *testing.T) {
|
||||
updateContainerStatsValues(testStats, cpuPct, usedMemory, 1000000, 500000, testTime)
|
||||
|
||||
assert.Equal(t, cpuPct, testStats.Cpu)
|
||||
assert.Equal(t, bytesToMegabytes(float64(usedMemory)), testStats.Mem)
|
||||
assert.Equal(t, bytesToMegabytes(1000000), testStats.NetworkSent)
|
||||
assert.Equal(t, bytesToMegabytes(500000), testStats.NetworkRecv)
|
||||
assert.Equal(t, utils.BytesToMegabytes(float64(usedMemory)), testStats.Mem)
|
||||
assert.Equal(t, [2]uint64{1000000, 500000}, testStats.Bandwidth)
|
||||
// Deprecated fields still populated for backward compatibility with older hubs
|
||||
assert.Equal(t, utils.BytesToMegabytes(1000000), testStats.NetworkSent)
|
||||
assert.Equal(t, utils.BytesToMegabytes(500000), testStats.NetworkRecv)
|
||||
assert.Equal(t, testTime, testStats.PrevReadTime)
|
||||
}
|
||||
|
||||
func TestGetLogsDetectsMultiplexedWithoutContentType(t *testing.T) {
|
||||
// Docker multiplexed frame: [stream][0,0,0][len(4 bytes BE)][payload]
|
||||
frame := []byte{
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
|
||||
'H', 'e', 'l', 'l', 'o',
|
||||
}
|
||||
rt := &recordingRoundTripper{
|
||||
statusCode: 200,
|
||||
body: string(frame),
|
||||
// Intentionally omit content type to simulate Podman behavior.
|
||||
}
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: rt},
|
||||
}
|
||||
|
||||
logs, err := dm.getLogs(context.Background(), "abcdef123456")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "Hello", logs)
|
||||
}
|
||||
|
||||
func TestGetLogsDoesNotMisclassifyRawStreamAsMultiplexed(t *testing.T) {
|
||||
// Starts with 0x01, but doesn't match Docker frame signature (reserved bytes aren't all zero).
|
||||
raw := []byte{0x01, 0x02, 0x03, 0x04, 'r', 'a', 'w'}
|
||||
rt := &recordingRoundTripper{
|
||||
statusCode: 200,
|
||||
body: string(raw),
|
||||
}
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: rt},
|
||||
}
|
||||
|
||||
logs, err := dm.getLogs(context.Background(), "abcdef123456")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, raw, []byte(logs))
|
||||
}
|
||||
|
||||
func TestEdgeCasesWithRealData(t *testing.T) {
|
||||
// Test with minimal container stats
|
||||
minimalStats := &container.ApiStats{
|
||||
@@ -915,6 +1157,18 @@ func TestParseDockerStatus(t *testing.T) {
|
||||
expectedStatus: "",
|
||||
expectedHealth: container.DockerHealthNone,
|
||||
},
|
||||
{
|
||||
name: "status health with health: prefix",
|
||||
input: "Up 5 minutes (health: starting)",
|
||||
expectedStatus: "Up 5 minutes",
|
||||
expectedHealth: container.DockerHealthStarting,
|
||||
},
|
||||
{
|
||||
name: "status health with health status: prefix",
|
||||
input: "Up 10 minutes (health status: unhealthy)",
|
||||
expectedStatus: "Up 10 minutes",
|
||||
expectedHealth: container.DockerHealthUnhealthy,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -926,6 +1180,84 @@ func TestParseDockerStatus(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDockerHealthStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expectedHealth container.DockerHealth
|
||||
expectedOk bool
|
||||
}{
|
||||
{"healthy", container.DockerHealthHealthy, true},
|
||||
{"unhealthy", container.DockerHealthUnhealthy, true},
|
||||
{"starting", container.DockerHealthStarting, true},
|
||||
{"none", container.DockerHealthNone, true},
|
||||
{" Healthy ", container.DockerHealthHealthy, true},
|
||||
{"unknown", container.DockerHealthNone, false},
|
||||
{"", container.DockerHealthNone, false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.input, func(t *testing.T) {
|
||||
health, ok := parseDockerHealthStatus(tt.input)
|
||||
assert.Equal(t, tt.expectedHealth, health)
|
||||
assert.Equal(t, tt.expectedOk, ok)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateContainerStatsUsesPodmanInspectHealthFallback(t *testing.T) {
|
||||
var requestedPaths []string
|
||||
dm := &dockerManager{
|
||||
client: &http.Client{Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
|
||||
requestedPaths = append(requestedPaths, req.URL.EscapedPath())
|
||||
switch req.URL.EscapedPath() {
|
||||
case "/containers/0123456789ab/stats":
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Status: "200 OK",
|
||||
Header: make(http.Header),
|
||||
Body: io.NopCloser(strings.NewReader(`{
|
||||
"read":"2026-03-15T21:26:59Z",
|
||||
"cpu_stats":{"cpu_usage":{"total_usage":1000},"system_cpu_usage":2000},
|
||||
"memory_stats":{"usage":1048576,"stats":{"inactive_file":262144}},
|
||||
"networks":{"eth0":{"rx_bytes":0,"tx_bytes":0}}
|
||||
}`)),
|
||||
Request: req,
|
||||
}, nil
|
||||
case "/containers/0123456789ab/json":
|
||||
return &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Status: "200 OK",
|
||||
Header: make(http.Header),
|
||||
Body: io.NopCloser(strings.NewReader(`{"State":{"Health":{"Status":"healthy"}}}`)),
|
||||
Request: req,
|
||||
}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected path: %s", req.URL.EscapedPath())
|
||||
}
|
||||
})},
|
||||
containerStatsMap: make(map[string]*container.Stats),
|
||||
apiStats: &container.ApiStats{},
|
||||
usingPodman: true,
|
||||
lastCpuContainer: make(map[uint16]map[string]uint64),
|
||||
lastCpuSystem: make(map[uint16]map[string]uint64),
|
||||
lastCpuReadTime: make(map[uint16]map[string]time.Time),
|
||||
networkSentTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
||||
networkRecvTrackers: make(map[uint16]*deltatracker.DeltaTracker[string, uint64]),
|
||||
}
|
||||
|
||||
ctr := &container.ApiInfo{
|
||||
IdShort: "0123456789ab",
|
||||
Names: []string{"/beszel"},
|
||||
Status: "Up 2 minutes",
|
||||
Image: "beszel:latest",
|
||||
}
|
||||
|
||||
err := dm.updateContainerStats(ctr, defaultCacheTimeMs)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []string{"/containers/0123456789ab/stats", "/containers/0123456789ab/json"}, requestedPaths)
|
||||
assert.Equal(t, container.DockerHealthHealthy, dm.containerStatsMap[ctr.IdShort].Health)
|
||||
assert.Equal(t, "Up 2 minutes", dm.containerStatsMap[ctr.IdShort].Status)
|
||||
}
|
||||
|
||||
func TestConstantsAndUtilityFunctions(t *testing.T) {
|
||||
// Test constants are properly defined
|
||||
assert.Equal(t, uint16(60000), defaultCacheTimeMs)
|
||||
@@ -935,13 +1267,13 @@ func TestConstantsAndUtilityFunctions(t *testing.T) {
|
||||
assert.Equal(t, 5*1024*1024, maxTotalLogSize) // 5MB
|
||||
|
||||
// Test utility functions
|
||||
assert.Equal(t, 1.5, twoDecimals(1.499))
|
||||
assert.Equal(t, 1.5, twoDecimals(1.5))
|
||||
assert.Equal(t, 1.5, twoDecimals(1.501))
|
||||
assert.Equal(t, 1.5, utils.TwoDecimals(1.499))
|
||||
assert.Equal(t, 1.5, utils.TwoDecimals(1.5))
|
||||
assert.Equal(t, 1.5, utils.TwoDecimals(1.501))
|
||||
|
||||
assert.Equal(t, 1.0, bytesToMegabytes(1048576)) // 1 MB
|
||||
assert.Equal(t, 0.5, bytesToMegabytes(524288)) // 512 KB
|
||||
assert.Equal(t, 0.0, bytesToMegabytes(0))
|
||||
assert.Equal(t, 1.0, utils.BytesToMegabytes(1048576)) // 1 MB
|
||||
assert.Equal(t, 0.5, utils.BytesToMegabytes(524288)) // 512 KB
|
||||
assert.Equal(t, 0.0, utils.BytesToMegabytes(0))
|
||||
}
|
||||
|
||||
func TestDecodeDockerLogStream(t *testing.T) {
|
||||
@@ -950,6 +1282,7 @@ func TestDecodeDockerLogStream(t *testing.T) {
|
||||
input []byte
|
||||
expected string
|
||||
expectError bool
|
||||
multiplexed bool
|
||||
}{
|
||||
{
|
||||
name: "simple log entry",
|
||||
@@ -960,6 +1293,7 @@ func TestDecodeDockerLogStream(t *testing.T) {
|
||||
},
|
||||
expected: "Hello World",
|
||||
expectError: false,
|
||||
multiplexed: true,
|
||||
},
|
||||
{
|
||||
name: "multiple frames",
|
||||
@@ -973,6 +1307,7 @@ func TestDecodeDockerLogStream(t *testing.T) {
|
||||
},
|
||||
expected: "HelloWorld",
|
||||
expectError: false,
|
||||
multiplexed: true,
|
||||
},
|
||||
{
|
||||
name: "zero length frame",
|
||||
@@ -985,12 +1320,20 @@ func TestDecodeDockerLogStream(t *testing.T) {
|
||||
},
|
||||
expected: "Hello",
|
||||
expectError: false,
|
||||
multiplexed: true,
|
||||
},
|
||||
{
|
||||
name: "empty input",
|
||||
input: []byte{},
|
||||
expected: "",
|
||||
expectError: false,
|
||||
multiplexed: true,
|
||||
},
|
||||
{
|
||||
name: "raw stream (not multiplexed)",
|
||||
input: []byte("raw log content"),
|
||||
expected: "raw log content",
|
||||
multiplexed: false,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -998,7 +1341,7 @@ func TestDecodeDockerLogStream(t *testing.T) {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
reader := bytes.NewReader(tt.input)
|
||||
var builder strings.Builder
|
||||
err := decodeDockerLogStream(reader, &builder)
|
||||
err := decodeDockerLogStream(reader, &builder, tt.multiplexed)
|
||||
|
||||
if tt.expectError {
|
||||
assert.Error(t, err)
|
||||
@@ -1022,7 +1365,7 @@ func TestDecodeDockerLogStreamMemoryProtection(t *testing.T) {
|
||||
|
||||
reader := bytes.NewReader(input)
|
||||
var builder strings.Builder
|
||||
err := decodeDockerLogStream(reader, &builder)
|
||||
err := decodeDockerLogStream(reader, &builder, true)
|
||||
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "log frame size")
|
||||
@@ -1056,7 +1399,7 @@ func TestDecodeDockerLogStreamMemoryProtection(t *testing.T) {
|
||||
|
||||
reader := bytes.NewReader(input)
|
||||
var builder strings.Builder
|
||||
err := decodeDockerLogStream(reader, &builder)
|
||||
err := decodeDockerLogStream(reader, &builder, true)
|
||||
|
||||
// Should complete without error (graceful truncation)
|
||||
assert.NoError(t, err)
|
||||
@@ -1230,3 +1573,99 @@ func TestAnsiEscapePattern(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertContainerPortsToString(t *testing.T) {
|
||||
type port = struct {
|
||||
PublicPort uint16
|
||||
IP string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
ports []port
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "empty ports",
|
||||
ports: nil,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "single port",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "80",
|
||||
},
|
||||
{
|
||||
name: "single port with non-default IP",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "1.2.3.4"},
|
||||
},
|
||||
expected: "1.2.3.4:80",
|
||||
},
|
||||
{
|
||||
name: "ipv6 default ip",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "::"},
|
||||
},
|
||||
expected: "80",
|
||||
},
|
||||
{
|
||||
name: "zero PublicPort is skipped",
|
||||
ports: []port{
|
||||
{PublicPort: 0, IP: "0.0.0.0"},
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "80",
|
||||
},
|
||||
{
|
||||
name: "ports sorted ascending by PublicPort",
|
||||
ports: []port{
|
||||
{PublicPort: 443, IP: "0.0.0.0"},
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
{PublicPort: 8080, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "80, 443, 8080",
|
||||
},
|
||||
{
|
||||
name: "duplicates are deduplicated",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
{PublicPort: 443, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "80, 443",
|
||||
},
|
||||
{
|
||||
name: "multiple ports with different IPs",
|
||||
ports: []port{
|
||||
{PublicPort: 80, IP: "0.0.0.0"},
|
||||
{PublicPort: 443, IP: "1.2.3.4"},
|
||||
},
|
||||
expected: "80, 1.2.3.4:443",
|
||||
},
|
||||
{
|
||||
name: "ports slice is nilled after call",
|
||||
ports: []port{
|
||||
{PublicPort: 8080, IP: "0.0.0.0"},
|
||||
},
|
||||
expected: "8080",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
ctr := &container.ApiInfo{}
|
||||
for _, p := range tt.ports {
|
||||
ctr.Ports = append(ctr.Ports, struct {
|
||||
PublicPort uint16
|
||||
IP string
|
||||
}{PublicPort: p.PublicPort, IP: p.IP})
|
||||
}
|
||||
result := convertContainerPortsToString(ctr)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
// Ports slice must be cleared to prevent bleed-over into the next response
|
||||
assert.Nil(t, ctr.Ports, "ctr.Ports should be nil after formatContainerPorts")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
95
agent/emmc_common.go
Normal file
95
agent/emmc_common.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func isEmmcBlockName(name string) bool {
|
||||
if !strings.HasPrefix(name, "mmcblk") {
|
||||
return false
|
||||
}
|
||||
suffix := strings.TrimPrefix(name, "mmcblk")
|
||||
if suffix == "" {
|
||||
return false
|
||||
}
|
||||
for _, c := range suffix {
|
||||
if c < '0' || c > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func parseHexOrDecByte(s string) (uint8, bool) {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return 0, false
|
||||
}
|
||||
base := 10
|
||||
if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") {
|
||||
base = 16
|
||||
s = s[2:]
|
||||
}
|
||||
parsed, err := strconv.ParseUint(s, base, 8)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return uint8(parsed), true
|
||||
}
|
||||
|
||||
func parseHexBytePair(s string) (uint8, uint8, bool) {
|
||||
fields := strings.Fields(s)
|
||||
if len(fields) < 2 {
|
||||
return 0, 0, false
|
||||
}
|
||||
a, okA := parseHexOrDecByte(fields[0])
|
||||
b, okB := parseHexOrDecByte(fields[1])
|
||||
if !okA && !okB {
|
||||
return 0, 0, false
|
||||
}
|
||||
return a, b, true
|
||||
}
|
||||
|
||||
func emmcSmartStatus(preEOL uint8) string {
|
||||
switch preEOL {
|
||||
case 0x01:
|
||||
return "PASSED"
|
||||
case 0x02:
|
||||
return "WARNING"
|
||||
case 0x03:
|
||||
return "FAILED"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
|
||||
func emmcPreEOLString(preEOL uint8) string {
|
||||
switch preEOL {
|
||||
case 0x01:
|
||||
return "0x01 (normal)"
|
||||
case 0x02:
|
||||
return "0x02 (warning)"
|
||||
case 0x03:
|
||||
return "0x03 (urgent)"
|
||||
default:
|
||||
return fmt.Sprintf("0x%02x", preEOL)
|
||||
}
|
||||
}
|
||||
|
||||
func emmcLifeTimeString(v uint8) string {
|
||||
// JEDEC eMMC: 0x01..0x0A => 0-100% used in 10% steps, 0x0B => exceeded.
|
||||
switch {
|
||||
case v == 0:
|
||||
return "0x00 (not reported)"
|
||||
case v >= 0x01 && v <= 0x0A:
|
||||
low := int(v-1) * 10
|
||||
high := int(v) * 10
|
||||
return fmt.Sprintf("0x%02x (%d-%d%% used)", v, low, high)
|
||||
case v == 0x0B:
|
||||
return "0x0b (>100% used)"
|
||||
default:
|
||||
return fmt.Sprintf("0x%02x", v)
|
||||
}
|
||||
}
|
||||
78
agent/emmc_common_test.go
Normal file
78
agent/emmc_common_test.go
Normal file
@@ -0,0 +1,78 @@
|
||||
package agent
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseHexOrDecByte(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want uint8
|
||||
ok bool
|
||||
}{
|
||||
{"0x01", 1, true},
|
||||
{"0X0b", 11, true},
|
||||
{"01", 1, true},
|
||||
{" 3 ", 3, true},
|
||||
{"", 0, false},
|
||||
{"0x", 0, false},
|
||||
{"nope", 0, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got, ok := parseHexOrDecByte(tt.in)
|
||||
if ok != tt.ok || got != tt.want {
|
||||
t.Fatalf("parseHexOrDecByte(%q) = (%d,%v), want (%d,%v)", tt.in, got, ok, tt.want, tt.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseHexBytePair(t *testing.T) {
|
||||
a, b, ok := parseHexBytePair("0x01 0x02\n")
|
||||
if !ok || a != 1 || b != 2 {
|
||||
t.Fatalf("parseHexBytePair hex = (%d,%d,%v), want (1,2,true)", a, b, ok)
|
||||
}
|
||||
|
||||
a, b, ok = parseHexBytePair("01 02")
|
||||
if !ok || a != 1 || b != 2 {
|
||||
t.Fatalf("parseHexBytePair dec = (%d,%d,%v), want (1,2,true)", a, b, ok)
|
||||
}
|
||||
|
||||
_, _, ok = parseHexBytePair("0x01")
|
||||
if ok {
|
||||
t.Fatalf("parseHexBytePair short input ok=true, want false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmmcSmartStatus(t *testing.T) {
|
||||
if got := emmcSmartStatus(0x01); got != "PASSED" {
|
||||
t.Fatalf("emmcSmartStatus(0x01) = %q, want PASSED", got)
|
||||
}
|
||||
if got := emmcSmartStatus(0x02); got != "WARNING" {
|
||||
t.Fatalf("emmcSmartStatus(0x02) = %q, want WARNING", got)
|
||||
}
|
||||
if got := emmcSmartStatus(0x03); got != "FAILED" {
|
||||
t.Fatalf("emmcSmartStatus(0x03) = %q, want FAILED", got)
|
||||
}
|
||||
if got := emmcSmartStatus(0x00); got != "UNKNOWN" {
|
||||
t.Fatalf("emmcSmartStatus(0x00) = %q, want UNKNOWN", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsEmmcBlockName(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
ok bool
|
||||
}{
|
||||
{"mmcblk0", true},
|
||||
{"mmcblk1", true},
|
||||
{"mmcblk10", true},
|
||||
{"mmcblk0p1", false},
|
||||
{"sda", false},
|
||||
{"mmcblk", false},
|
||||
{"mmcblkA", false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := isEmmcBlockName(c.name); got != c.ok {
|
||||
t.Fatalf("isEmmcBlockName(%q) = %v, want %v", c.name, got, c.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
215
agent/emmc_linux.go
Normal file
215
agent/emmc_linux.go
Normal file
@@ -0,0 +1,215 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
// emmcSysfsRoot is a test hook; production value is "/sys".
|
||||
var emmcSysfsRoot = "/sys"
|
||||
|
||||
type emmcHealth struct {
|
||||
model string
|
||||
serial string
|
||||
revision string
|
||||
capacity uint64
|
||||
preEOL uint8
|
||||
lifeA uint8
|
||||
lifeB uint8
|
||||
}
|
||||
|
||||
func scanEmmcDevices() []*DeviceInfo {
|
||||
blockDir := filepath.Join(emmcSysfsRoot, "class", "block")
|
||||
entries, err := os.ReadDir(blockDir)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := make([]*DeviceInfo, 0, 2)
|
||||
for _, ent := range entries {
|
||||
name := ent.Name()
|
||||
if !isEmmcBlockName(name) {
|
||||
continue
|
||||
}
|
||||
|
||||
deviceDir := filepath.Join(blockDir, name, "device")
|
||||
if !hasEmmcHealthFiles(deviceDir) {
|
||||
continue
|
||||
}
|
||||
|
||||
devPath := filepath.Join("/dev", name)
|
||||
devices = append(devices, &DeviceInfo{
|
||||
Name: devPath,
|
||||
Type: "emmc",
|
||||
InfoName: devPath + " [eMMC]",
|
||||
Protocol: "MMC",
|
||||
})
|
||||
}
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) {
|
||||
if deviceInfo == nil || deviceInfo.Name == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
base := filepath.Base(deviceInfo.Name)
|
||||
if !isEmmcBlockName(base) && !strings.EqualFold(deviceInfo.Type, "emmc") && !strings.EqualFold(deviceInfo.Type, "mmc") {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
health, ok := readEmmcHealth(base)
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Normalize the device type to keep pruning logic stable across refreshes.
|
||||
deviceInfo.Type = "emmc"
|
||||
|
||||
key := health.serial
|
||||
if key == "" {
|
||||
key = filepath.Join("/dev", base)
|
||||
}
|
||||
|
||||
status := emmcSmartStatus(health.preEOL)
|
||||
|
||||
attrs := []*smart.SmartAttribute{
|
||||
{
|
||||
Name: "PreEOLInfo",
|
||||
RawValue: uint64(health.preEOL),
|
||||
RawString: emmcPreEOLString(health.preEOL),
|
||||
},
|
||||
{
|
||||
Name: "DeviceLifeTimeEstA",
|
||||
RawValue: uint64(health.lifeA),
|
||||
RawString: emmcLifeTimeString(health.lifeA),
|
||||
},
|
||||
{
|
||||
Name: "DeviceLifeTimeEstB",
|
||||
RawValue: uint64(health.lifeB),
|
||||
RawString: emmcLifeTimeString(health.lifeB),
|
||||
},
|
||||
}
|
||||
|
||||
sm.Lock()
|
||||
defer sm.Unlock()
|
||||
|
||||
if _, exists := sm.SmartDataMap[key]; !exists {
|
||||
sm.SmartDataMap[key] = &smart.SmartData{}
|
||||
}
|
||||
|
||||
data := sm.SmartDataMap[key]
|
||||
data.ModelName = health.model
|
||||
data.SerialNumber = health.serial
|
||||
data.FirmwareVersion = health.revision
|
||||
data.Capacity = health.capacity
|
||||
data.Temperature = 0
|
||||
data.SmartStatus = status
|
||||
data.DiskName = filepath.Join("/dev", base)
|
||||
data.DiskType = "emmc"
|
||||
data.Attributes = attrs
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func readEmmcHealth(blockName string) (emmcHealth, bool) {
|
||||
var out emmcHealth
|
||||
|
||||
if !isEmmcBlockName(blockName) {
|
||||
return out, false
|
||||
}
|
||||
|
||||
deviceDir := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "device")
|
||||
preEOL, okPre := readHexByteFile(filepath.Join(deviceDir, "pre_eol_info"))
|
||||
|
||||
// Some kernels expose EXT_CSD lifetime via "life_time" (two bytes), others as
|
||||
// separate files. Support both.
|
||||
lifeA, lifeB, okLife := readLifeTime(deviceDir)
|
||||
|
||||
if !okPre && !okLife {
|
||||
return out, false
|
||||
}
|
||||
|
||||
out.preEOL = preEOL
|
||||
out.lifeA = lifeA
|
||||
out.lifeB = lifeB
|
||||
|
||||
out.model = utils.ReadStringFile(filepath.Join(deviceDir, "name"))
|
||||
out.serial = utils.ReadStringFile(filepath.Join(deviceDir, "serial"))
|
||||
out.revision = utils.ReadStringFile(filepath.Join(deviceDir, "prv"))
|
||||
|
||||
if capBytes, ok := readBlockCapacityBytes(blockName); ok {
|
||||
out.capacity = capBytes
|
||||
}
|
||||
|
||||
return out, true
|
||||
}
|
||||
|
||||
func readLifeTime(deviceDir string) (uint8, uint8, bool) {
|
||||
if content, ok := utils.ReadStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
|
||||
a, b, ok := parseHexBytePair(content)
|
||||
return a, b, ok
|
||||
}
|
||||
|
||||
a, okA := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_a"))
|
||||
b, okB := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_b"))
|
||||
if okA || okB {
|
||||
return a, b, true
|
||||
}
|
||||
return 0, 0, false
|
||||
}
|
||||
|
||||
func readBlockCapacityBytes(blockName string) (uint64, bool) {
|
||||
sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size")
|
||||
lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size")
|
||||
|
||||
sizeStr, ok := utils.ReadStringFileOK(sizePath)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
sectors, err := strconv.ParseUint(sizeStr, 10, 64)
|
||||
if err != nil || sectors == 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
lbsStr, ok := utils.ReadStringFileOK(lbsPath)
|
||||
logicalBlockSize := uint64(512)
|
||||
if ok {
|
||||
if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
|
||||
logicalBlockSize = parsed
|
||||
}
|
||||
}
|
||||
|
||||
return sectors * logicalBlockSize, true
|
||||
}
|
||||
|
||||
func readHexByteFile(path string) (uint8, bool) {
|
||||
content, ok := utils.ReadStringFileOK(path)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
b, ok := parseHexOrDecByte(content)
|
||||
return b, ok
|
||||
}
|
||||
|
||||
func hasEmmcHealthFiles(deviceDir string) bool {
|
||||
entries, err := os.ReadDir(deviceDir)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, ent := range entries {
|
||||
switch ent.Name() {
|
||||
case "pre_eol_info", "life_time", "device_life_time_est_typ_a", "device_life_time_est_typ_b":
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
80
agent/emmc_linux_test.go
Normal file
80
agent/emmc_linux_test.go
Normal file
@@ -0,0 +1,80 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
func TestEmmcMockSysfsScanAndCollect(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
prev := emmcSysfsRoot
|
||||
emmcSysfsRoot = tmp
|
||||
t.Cleanup(func() { emmcSysfsRoot = prev })
|
||||
|
||||
// Fake: /sys/class/block/mmcblk0
|
||||
mmcDeviceDir := filepath.Join(tmp, "class", "block", "mmcblk0", "device")
|
||||
mmcQueueDir := filepath.Join(tmp, "class", "block", "mmcblk0", "queue")
|
||||
if err := os.MkdirAll(mmcDeviceDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.MkdirAll(mmcQueueDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
write := func(path, content string) {
|
||||
t.Helper()
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
write(filepath.Join(mmcDeviceDir, "pre_eol_info"), "0x02\n")
|
||||
write(filepath.Join(mmcDeviceDir, "life_time"), "0x04 0x05\n")
|
||||
write(filepath.Join(mmcDeviceDir, "name"), "H26M52103FMR\n")
|
||||
write(filepath.Join(mmcDeviceDir, "serial"), "01234567\n")
|
||||
write(filepath.Join(mmcDeviceDir, "prv"), "0x08\n")
|
||||
write(filepath.Join(mmcQueueDir, "logical_block_size"), "512\n")
|
||||
write(filepath.Join(tmp, "class", "block", "mmcblk0", "size"), "1024\n") // sectors
|
||||
|
||||
devs := scanEmmcDevices()
|
||||
if len(devs) != 1 {
|
||||
t.Fatalf("scanEmmcDevices() = %d devices, want 1", len(devs))
|
||||
}
|
||||
if devs[0].Name != "/dev/mmcblk0" || devs[0].Type != "emmc" {
|
||||
t.Fatalf("scanEmmcDevices()[0] = %+v, want Name=/dev/mmcblk0 Type=emmc", devs[0])
|
||||
}
|
||||
|
||||
sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}}
|
||||
ok, err := sm.collectEmmcHealth(devs[0])
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("collectEmmcHealth() = (ok=%v, err=%v), want (true,nil)", ok, err)
|
||||
}
|
||||
if len(sm.SmartDataMap) != 1 {
|
||||
t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap))
|
||||
}
|
||||
var got *smart.SmartData
|
||||
for _, v := range sm.SmartDataMap {
|
||||
got = v
|
||||
break
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatalf("SmartDataMap value nil")
|
||||
}
|
||||
if got.DiskType != "emmc" || got.DiskName != "/dev/mmcblk0" {
|
||||
t.Fatalf("disk fields = (type=%q name=%q), want (emmc,/dev/mmcblk0)", got.DiskType, got.DiskName)
|
||||
}
|
||||
if got.SmartStatus != "WARNING" {
|
||||
t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus)
|
||||
}
|
||||
if got.SerialNumber != "01234567" || got.ModelName == "" || got.Capacity == 0 {
|
||||
t.Fatalf("identity fields = (model=%q serial=%q cap=%d), want non-empty model, serial 01234567, cap>0", got.ModelName, got.SerialNumber, got.Capacity)
|
||||
}
|
||||
if len(got.Attributes) < 3 {
|
||||
t.Fatalf("attributes len=%d, want >= 3", len(got.Attributes))
|
||||
}
|
||||
}
|
||||
14
agent/emmc_stub.go
Normal file
14
agent/emmc_stub.go
Normal file
@@ -0,0 +1,14 @@
|
||||
//go:build !linux
|
||||
|
||||
package agent
|
||||
|
||||
// Non-Linux builds: eMMC health via sysfs is not available.
|
||||
|
||||
func scanEmmcDevices() []*DeviceInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
87
agent/fingerprint.go
Normal file
87
agent/fingerprint.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/cpu"
|
||||
"github.com/shirou/gopsutil/v4/host"
|
||||
)
|
||||
|
||||
const fingerprintFileName = "fingerprint"
|
||||
|
||||
// knownBadUUID is a commonly known "product_uuid" that is not unique across systems.
|
||||
const knownBadUUID = "03000200-0400-0500-0006-000700080009"
|
||||
|
||||
// GetFingerprint returns the agent fingerprint. It first tries to read a saved
|
||||
// fingerprint from the data directory. If not found (or dataDir is empty), it
|
||||
// generates one from system properties. The hostname and cpuModel parameters are
|
||||
// used as fallback material if host.HostID() fails. If either is empty, they
|
||||
// are fetched from the system automatically.
|
||||
//
|
||||
// If a new fingerprint is generated and a dataDir is provided, it is saved.
|
||||
func GetFingerprint(dataDir, hostname, cpuModel string) string {
|
||||
if dataDir != "" {
|
||||
if fp, err := readFingerprint(dataDir); err == nil {
|
||||
return fp
|
||||
}
|
||||
}
|
||||
fp := generateFingerprint(hostname, cpuModel)
|
||||
if dataDir != "" {
|
||||
_ = SaveFingerprint(dataDir, fp)
|
||||
}
|
||||
return fp
|
||||
}
|
||||
|
||||
// generateFingerprint creates a fingerprint from system properties.
|
||||
// It tries host.HostID() first, falling back to hostname + cpuModel.
|
||||
// If hostname or cpuModel are empty, they are fetched from the system.
|
||||
func generateFingerprint(hostname, cpuModel string) string {
|
||||
fingerprint, err := host.HostID()
|
||||
if err != nil || fingerprint == "" || fingerprint == knownBadUUID {
|
||||
if hostname == "" {
|
||||
hostname, _ = os.Hostname()
|
||||
}
|
||||
if cpuModel == "" {
|
||||
if info, err := cpu.Info(); err == nil && len(info) > 0 {
|
||||
cpuModel = info[0].ModelName
|
||||
}
|
||||
}
|
||||
fingerprint = hostname + cpuModel
|
||||
}
|
||||
|
||||
sum := sha256.Sum256([]byte(fingerprint))
|
||||
return hex.EncodeToString(sum[:24])
|
||||
}
|
||||
|
||||
// readFingerprint reads the saved fingerprint from the data directory.
|
||||
func readFingerprint(dataDir string) (string, error) {
|
||||
fp, err := os.ReadFile(filepath.Join(dataDir, fingerprintFileName))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
s := strings.TrimSpace(string(fp))
|
||||
if s == "" {
|
||||
return "", errors.New("fingerprint file is empty")
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// SaveFingerprint writes the fingerprint to the data directory.
|
||||
func SaveFingerprint(dataDir, fingerprint string) error {
|
||||
return os.WriteFile(filepath.Join(dataDir, fingerprintFileName), []byte(fingerprint), 0o644)
|
||||
}
|
||||
|
||||
// DeleteFingerprint removes the saved fingerprint file from the data directory.
|
||||
// Returns nil if the file does not exist (idempotent).
|
||||
func DeleteFingerprint(dataDir string) error {
|
||||
err := os.Remove(filepath.Join(dataDir, fingerprintFileName))
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
102
agent/fingerprint_test.go
Normal file
102
agent/fingerprint_test.go
Normal file
@@ -0,0 +1,102 @@
|
||||
//go:build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetFingerprint(t *testing.T) {
|
||||
t.Run("reads existing fingerprint from file", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
expected := "abc123def456"
|
||||
err := os.WriteFile(filepath.Join(dir, fingerprintFileName), []byte(expected), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
fp := GetFingerprint(dir, "", "")
|
||||
assert.Equal(t, expected, fp)
|
||||
})
|
||||
|
||||
t.Run("trims whitespace from file", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
err := os.WriteFile(filepath.Join(dir, fingerprintFileName), []byte(" abc123 \n"), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
fp := GetFingerprint(dir, "", "")
|
||||
assert.Equal(t, "abc123", fp)
|
||||
})
|
||||
|
||||
t.Run("generates fingerprint when file does not exist", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
fp := GetFingerprint(dir, "", "")
|
||||
assert.NotEmpty(t, fp)
|
||||
})
|
||||
|
||||
t.Run("generates fingerprint when dataDir is empty", func(t *testing.T) {
|
||||
fp := GetFingerprint("", "", "")
|
||||
assert.NotEmpty(t, fp)
|
||||
})
|
||||
|
||||
t.Run("generates consistent fingerprint for same inputs", func(t *testing.T) {
|
||||
fp1 := GetFingerprint("", "myhost", "mycpu")
|
||||
fp2 := GetFingerprint("", "myhost", "mycpu")
|
||||
assert.Equal(t, fp1, fp2)
|
||||
})
|
||||
|
||||
t.Run("prefers saved fingerprint over generated", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, SaveFingerprint(dir, "saved-fp"))
|
||||
|
||||
fp := GetFingerprint(dir, "anyhost", "anycpu")
|
||||
assert.Equal(t, "saved-fp", fp)
|
||||
})
|
||||
}
|
||||
|
||||
func TestSaveFingerprint(t *testing.T) {
|
||||
t.Run("saves fingerprint to file", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
err := SaveFingerprint(dir, "abc123")
|
||||
require.NoError(t, err)
|
||||
|
||||
content, err := os.ReadFile(filepath.Join(dir, fingerprintFileName))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "abc123", string(content))
|
||||
})
|
||||
|
||||
t.Run("overwrites existing fingerprint", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, SaveFingerprint(dir, "old"))
|
||||
require.NoError(t, SaveFingerprint(dir, "new"))
|
||||
|
||||
content, err := os.ReadFile(filepath.Join(dir, fingerprintFileName))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "new", string(content))
|
||||
})
|
||||
}
|
||||
|
||||
func TestDeleteFingerprint(t *testing.T) {
|
||||
t.Run("deletes existing fingerprint", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
fp := filepath.Join(dir, fingerprintFileName)
|
||||
err := os.WriteFile(fp, []byte("abc123"), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = DeleteFingerprint(dir)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify file is gone
|
||||
_, err = os.Stat(fp)
|
||||
assert.True(t, os.IsNotExist(err))
|
||||
})
|
||||
|
||||
t.Run("no error when file does not exist", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
err := DeleteFingerprint(dir)
|
||||
assert.NoError(t, err)
|
||||
})
|
||||
}
|
||||
422
agent/gpu.go
422
agent/gpu.go
@@ -5,17 +5,18 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"maps"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
"golang.org/x/exp/slog"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -23,11 +24,11 @@ const (
|
||||
nvidiaSmiCmd string = "nvidia-smi"
|
||||
rocmSmiCmd string = "rocm-smi"
|
||||
tegraStatsCmd string = "tegrastats"
|
||||
nvtopCmd string = "nvtop"
|
||||
powermetricsCmd string = "powermetrics"
|
||||
macmonCmd string = "macmon"
|
||||
noGPUFoundMsg string = "no GPU found - see https://beszel.dev/guide/gpu"
|
||||
|
||||
// Polling intervals
|
||||
nvidiaSmiInterval string = "4" // in seconds
|
||||
tegraStatsInterval string = "3700" // in milliseconds
|
||||
rocmSmiInterval time.Duration = 4300 * time.Millisecond
|
||||
// Command retry and timeout constants
|
||||
retryWaitTime time.Duration = 5 * time.Second
|
||||
maxFailureRetries int = 5
|
||||
@@ -40,10 +41,6 @@ const (
|
||||
// GPUManager manages data collection for GPUs (either Nvidia or AMD)
|
||||
type GPUManager struct {
|
||||
sync.Mutex
|
||||
nvidiaSmi bool
|
||||
rocmSmi bool
|
||||
tegrastats bool
|
||||
intelGpuStats bool
|
||||
GpuDataMap map[string]*system.GPUData
|
||||
// lastAvgData stores the last calculated averages for each GPU
|
||||
// Used when a collection happens before new data arrives (Count == 0)
|
||||
@@ -85,6 +82,58 @@ type gpuCollector struct {
|
||||
|
||||
var errNoValidData = fmt.Errorf("no valid GPU data found") // Error for missing data
|
||||
|
||||
// collectorSource identifies a selectable GPU collector in GPU_COLLECTOR.
|
||||
type collectorSource string
|
||||
|
||||
const (
|
||||
collectorSourceNVTop collectorSource = collectorSource(nvtopCmd)
|
||||
collectorSourceNVML collectorSource = "nvml"
|
||||
collectorSourceNvidiaSMI collectorSource = collectorSource(nvidiaSmiCmd)
|
||||
collectorSourceIntelGpuTop collectorSource = collectorSource(intelGpuStatsCmd)
|
||||
collectorSourceAmdSysfs collectorSource = "amd_sysfs"
|
||||
collectorSourceRocmSMI collectorSource = collectorSource(rocmSmiCmd)
|
||||
collectorSourceMacmon collectorSource = collectorSource(macmonCmd)
|
||||
collectorSourcePowermetrics collectorSource = collectorSource(powermetricsCmd)
|
||||
collectorGroupNvidia string = "nvidia"
|
||||
collectorGroupIntel string = "intel"
|
||||
collectorGroupAmd string = "amd"
|
||||
collectorGroupApple string = "apple"
|
||||
)
|
||||
|
||||
func isValidCollectorSource(source collectorSource) bool {
|
||||
switch source {
|
||||
case collectorSourceNVTop,
|
||||
collectorSourceNVML,
|
||||
collectorSourceNvidiaSMI,
|
||||
collectorSourceIntelGpuTop,
|
||||
collectorSourceAmdSysfs,
|
||||
collectorSourceRocmSMI,
|
||||
collectorSourceMacmon,
|
||||
collectorSourcePowermetrics:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// gpuCapabilities describes detected GPU tooling and sysfs support on the host.
|
||||
type gpuCapabilities struct {
|
||||
hasNvidiaSmi bool
|
||||
hasRocmSmi bool
|
||||
hasAmdSysfs bool
|
||||
hasTegrastats bool
|
||||
hasIntelGpuTop bool
|
||||
hasNvtop bool
|
||||
hasMacmon bool
|
||||
hasPowermetrics bool
|
||||
}
|
||||
|
||||
type collectorDefinition struct {
|
||||
group string
|
||||
available bool
|
||||
start func(onFailure func()) bool
|
||||
deprecationWarning string
|
||||
}
|
||||
|
||||
// starts and manages the ongoing collection of GPU data for the specified GPU management utility
|
||||
func (c *gpuCollector) start() {
|
||||
for {
|
||||
@@ -136,10 +185,10 @@ func (gm *GPUManager) getJetsonParser() func(output []byte) bool {
|
||||
// use closure to avoid recompiling the regex
|
||||
ramPattern := regexp.MustCompile(`RAM (\d+)/(\d+)MB`)
|
||||
gr3dPattern := regexp.MustCompile(`GR3D_FREQ (\d+)%`)
|
||||
tempPattern := regexp.MustCompile(`tj@(\d+\.?\d*)C`)
|
||||
tempPattern := regexp.MustCompile(`(?:tj|GPU)@(\d+\.?\d*)C`)
|
||||
// Orin Nano / NX do not have GPU specific power monitor
|
||||
// TODO: Maybe use VDD_IN for Nano / NX and add a total system power chart
|
||||
powerPattern := regexp.MustCompile(`(GPU_SOC|CPU_GPU_CV) (\d+)mW`)
|
||||
powerPattern := regexp.MustCompile(`(GPU_SOC|CPU_GPU_CV)\s+(\d+)mW|VDD_SYS_GPU\s+(\d+)/\d+`)
|
||||
|
||||
// jetson devices have only one gpu so we'll just initialize here
|
||||
gpuData := &system.GPUData{Name: "GPU"}
|
||||
@@ -168,7 +217,13 @@ func (gm *GPUManager) getJetsonParser() func(output []byte) bool {
|
||||
// Parse power usage
|
||||
powerMatches := powerPattern.FindSubmatch(output)
|
||||
if powerMatches != nil {
|
||||
power, _ := strconv.ParseFloat(string(powerMatches[2]), 64)
|
||||
// powerMatches[2] is the "(GPU_SOC|CPU_GPU_CV) <N>mW" capture
|
||||
// powerMatches[3] is the "VDD_SYS_GPU <N>/<N>" capture
|
||||
powerStr := string(powerMatches[2])
|
||||
if powerStr == "" {
|
||||
powerStr = string(powerMatches[3])
|
||||
}
|
||||
power, _ := strconv.ParseFloat(powerStr, 64)
|
||||
gpuData.Power += power / milliwattsInAWatt
|
||||
}
|
||||
gpuData.Count++
|
||||
@@ -231,13 +286,14 @@ func (gm *GPUManager) parseAmdData(output []byte) bool {
|
||||
totalMemory, _ := strconv.ParseFloat(v.MemoryTotal, 64)
|
||||
usage, _ := strconv.ParseFloat(v.Usage, 64)
|
||||
|
||||
if _, ok := gm.GpuDataMap[v.ID]; !ok {
|
||||
gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name}
|
||||
id := v.ID
|
||||
if _, ok := gm.GpuDataMap[id]; !ok {
|
||||
gm.GpuDataMap[id] = &system.GPUData{Name: v.Name}
|
||||
}
|
||||
gpu := gm.GpuDataMap[v.ID]
|
||||
gpu := gm.GpuDataMap[id]
|
||||
gpu.Temperature, _ = strconv.ParseFloat(v.Temperature, 64)
|
||||
gpu.MemoryUsed = bytesToMegabytes(memoryUsage)
|
||||
gpu.MemoryTotal = bytesToMegabytes(totalMemory)
|
||||
gpu.MemoryUsed = utils.BytesToMegabytes(memoryUsage)
|
||||
gpu.MemoryTotal = utils.BytesToMegabytes(totalMemory)
|
||||
gpu.Usage += usage
|
||||
gpu.Power += power
|
||||
gpu.Count++
|
||||
@@ -297,8 +353,13 @@ func (gm *GPUManager) calculateGPUAverage(id string, gpu *system.GPUData, cacheK
|
||||
currentCount := uint32(gpu.Count)
|
||||
deltaCount := gm.calculateDeltaCount(currentCount, lastSnapshot)
|
||||
|
||||
// If no new data arrived, use last known average
|
||||
// If no new data arrived
|
||||
if deltaCount == 0 {
|
||||
// If GPU appears suspended (instantaneous values are 0), return zero values
|
||||
// Otherwise return last known average for temporary collection gaps
|
||||
if gpu.Temperature == 0 && gpu.MemoryUsed == 0 {
|
||||
return system.GPUData{Name: gpu.Name}
|
||||
}
|
||||
return gm.lastAvgData[id] // zero value if not found
|
||||
}
|
||||
|
||||
@@ -306,16 +367,16 @@ func (gm *GPUManager) calculateGPUAverage(id string, gpu *system.GPUData, cacheK
|
||||
gpuAvg := *gpu
|
||||
deltaUsage, deltaPower, deltaPowerPkg := gm.calculateDeltas(gpu, lastSnapshot)
|
||||
|
||||
gpuAvg.Power = twoDecimals(deltaPower / float64(deltaCount))
|
||||
gpuAvg.Power = utils.TwoDecimals(deltaPower / float64(deltaCount))
|
||||
|
||||
if gpu.Engines != nil {
|
||||
// make fresh map for averaged engine metrics to avoid mutating
|
||||
// the accumulator map stored in gm.GpuDataMap
|
||||
gpuAvg.Engines = make(map[string]float64, len(gpu.Engines))
|
||||
gpuAvg.Usage = gm.calculateIntelGPUUsage(&gpuAvg, gpu, lastSnapshot, deltaCount)
|
||||
gpuAvg.PowerPkg = twoDecimals(deltaPowerPkg / float64(deltaCount))
|
||||
gpuAvg.PowerPkg = utils.TwoDecimals(deltaPowerPkg / float64(deltaCount))
|
||||
} else {
|
||||
gpuAvg.Usage = twoDecimals(deltaUsage / float64(deltaCount))
|
||||
gpuAvg.Usage = utils.TwoDecimals(deltaUsage / float64(deltaCount))
|
||||
}
|
||||
|
||||
gm.lastAvgData[id] = gpuAvg
|
||||
@@ -350,17 +411,17 @@ func (gm *GPUManager) calculateIntelGPUUsage(gpuAvg, gpu *system.GPUData, lastSn
|
||||
} else {
|
||||
deltaEngine = engine
|
||||
}
|
||||
gpuAvg.Engines[name] = twoDecimals(deltaEngine / float64(deltaCount))
|
||||
gpuAvg.Engines[name] = utils.TwoDecimals(deltaEngine / float64(deltaCount))
|
||||
maxEngineUsage = max(maxEngineUsage, deltaEngine/float64(deltaCount))
|
||||
}
|
||||
return twoDecimals(maxEngineUsage)
|
||||
return utils.TwoDecimals(maxEngineUsage)
|
||||
}
|
||||
|
||||
// updateInstantaneousValues updates values that should reflect current state, not averages
|
||||
func (gm *GPUManager) updateInstantaneousValues(gpuAvg *system.GPUData, gpu *system.GPUData) {
|
||||
gpuAvg.Temperature = twoDecimals(gpu.Temperature)
|
||||
gpuAvg.MemoryUsed = twoDecimals(gpu.MemoryUsed)
|
||||
gpuAvg.MemoryTotal = twoDecimals(gpu.MemoryTotal)
|
||||
gpuAvg.Temperature = utils.TwoDecimals(gpu.Temperature)
|
||||
gpuAvg.MemoryUsed = utils.TwoDecimals(gpu.MemoryUsed)
|
||||
gpuAvg.MemoryTotal = utils.TwoDecimals(gpu.MemoryTotal)
|
||||
}
|
||||
|
||||
// storeSnapshot saves the current GPU state for this cache key
|
||||
@@ -378,38 +439,43 @@ func (gm *GPUManager) storeSnapshot(id string, gpu *system.GPUData, cacheKey uin
|
||||
gm.lastSnapshots[cacheKey][id] = snapshot
|
||||
}
|
||||
|
||||
// detectGPUs checks for the presence of GPU management tools (nvidia-smi, rocm-smi, tegrastats)
|
||||
// in the system path. It sets the corresponding flags in the GPUManager struct if any of these
|
||||
// tools are found. If none of the tools are found, it returns an error indicating that no GPU
|
||||
// management tools are available.
|
||||
func (gm *GPUManager) detectGPUs() error {
|
||||
// discoverGpuCapabilities checks for available GPU tooling and sysfs support.
|
||||
// It only reports capability presence and does not apply policy decisions.
|
||||
func (gm *GPUManager) discoverGpuCapabilities() gpuCapabilities {
|
||||
caps := gpuCapabilities{
|
||||
hasAmdSysfs: gm.hasAmdSysfs(),
|
||||
}
|
||||
if _, err := exec.LookPath(nvidiaSmiCmd); err == nil {
|
||||
gm.nvidiaSmi = true
|
||||
caps.hasNvidiaSmi = true
|
||||
}
|
||||
if _, err := exec.LookPath(rocmSmiCmd); err == nil {
|
||||
gm.rocmSmi = true
|
||||
caps.hasRocmSmi = true
|
||||
}
|
||||
if _, err := exec.LookPath(tegraStatsCmd); err == nil {
|
||||
gm.tegrastats = true
|
||||
gm.nvidiaSmi = false
|
||||
caps.hasTegrastats = true
|
||||
}
|
||||
if _, err := exec.LookPath(intelGpuStatsCmd); err == nil {
|
||||
gm.intelGpuStats = true
|
||||
caps.hasIntelGpuTop = true
|
||||
}
|
||||
if gm.nvidiaSmi || gm.rocmSmi || gm.tegrastats || gm.intelGpuStats {
|
||||
return nil
|
||||
if _, err := exec.LookPath(nvtopCmd); err == nil {
|
||||
caps.hasNvtop = true
|
||||
}
|
||||
return fmt.Errorf("no GPU found - install nvidia-smi, rocm-smi, tegrastats, or intel_gpu_top")
|
||||
if runtime.GOOS == "darwin" {
|
||||
if _, err := exec.LookPath(macmonCmd); err == nil {
|
||||
caps.hasMacmon = true
|
||||
}
|
||||
if _, err := exec.LookPath(powermetricsCmd); err == nil {
|
||||
caps.hasPowermetrics = true
|
||||
}
|
||||
}
|
||||
return caps
|
||||
}
|
||||
|
||||
// startCollector starts the appropriate GPU data collector based on the command
|
||||
func (gm *GPUManager) startCollector(command string) {
|
||||
collector := gpuCollector{
|
||||
name: command,
|
||||
bufSize: 10 * 1024,
|
||||
}
|
||||
switch command {
|
||||
case intelGpuStatsCmd:
|
||||
func hasAnyGpuCollector(caps gpuCapabilities) bool {
|
||||
return caps.hasNvidiaSmi || caps.hasRocmSmi || caps.hasAmdSysfs || caps.hasTegrastats || caps.hasIntelGpuTop || caps.hasNvtop || caps.hasMacmon || caps.hasPowermetrics
|
||||
}
|
||||
|
||||
func (gm *GPUManager) startIntelCollector() {
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
@@ -424,21 +490,39 @@ func (gm *GPUManager) startCollector(command string) {
|
||||
}
|
||||
}
|
||||
}()
|
||||
case nvidiaSmiCmd:
|
||||
collector.cmdArgs = []string{
|
||||
"-l", nvidiaSmiInterval,
|
||||
}
|
||||
|
||||
func (gm *GPUManager) startNvidiaSmiCollector(intervalSeconds string) {
|
||||
collector := gpuCollector{
|
||||
name: nvidiaSmiCmd,
|
||||
bufSize: 10 * 1024,
|
||||
cmdArgs: []string{
|
||||
"-l", intervalSeconds,
|
||||
"--query-gpu=index,name,temperature.gpu,memory.used,memory.total,utilization.gpu,power.draw",
|
||||
"--format=csv,noheader,nounits",
|
||||
},
|
||||
parse: gm.parseNvidiaData,
|
||||
}
|
||||
collector.parse = gm.parseNvidiaData
|
||||
go collector.start()
|
||||
case tegraStatsCmd:
|
||||
collector.cmdArgs = []string{"--interval", tegraStatsInterval}
|
||||
collector.parse = gm.getJetsonParser()
|
||||
}
|
||||
|
||||
func (gm *GPUManager) startTegraStatsCollector(intervalMilliseconds string) {
|
||||
collector := gpuCollector{
|
||||
name: tegraStatsCmd,
|
||||
bufSize: 10 * 1024,
|
||||
cmdArgs: []string{"--interval", intervalMilliseconds},
|
||||
parse: gm.getJetsonParser(),
|
||||
}
|
||||
go collector.start()
|
||||
case rocmSmiCmd:
|
||||
collector.cmdArgs = []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"}
|
||||
collector.parse = gm.parseAmdData
|
||||
}
|
||||
|
||||
func (gm *GPUManager) startRocmSmiCollector(pollInterval time.Duration) {
|
||||
collector := gpuCollector{
|
||||
name: rocmSmiCmd,
|
||||
bufSize: 10 * 1024,
|
||||
cmdArgs: []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"},
|
||||
parse: gm.parseAmdData,
|
||||
}
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
@@ -447,36 +531,232 @@ func (gm *GPUManager) startCollector(command string) {
|
||||
if failures > maxFailureRetries {
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting AMD GPU data", "err", err)
|
||||
slog.Warn("Error collecting AMD GPU data via rocm-smi", "err", err)
|
||||
}
|
||||
time.Sleep(rocmSmiInterval)
|
||||
time.Sleep(pollInterval)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (gm *GPUManager) collectorDefinitions(caps gpuCapabilities) map[collectorSource]collectorDefinition {
|
||||
return map[collectorSource]collectorDefinition{
|
||||
collectorSourceNVML: {
|
||||
group: collectorGroupNvidia,
|
||||
available: caps.hasNvidiaSmi,
|
||||
start: func(_ func()) bool {
|
||||
return gm.startNvmlCollector()
|
||||
},
|
||||
},
|
||||
collectorSourceNvidiaSMI: {
|
||||
group: collectorGroupNvidia,
|
||||
available: caps.hasNvidiaSmi,
|
||||
start: func(_ func()) bool {
|
||||
gm.startNvidiaSmiCollector("4") // seconds
|
||||
return true
|
||||
},
|
||||
},
|
||||
collectorSourceIntelGpuTop: {
|
||||
group: collectorGroupIntel,
|
||||
available: caps.hasIntelGpuTop,
|
||||
start: func(_ func()) bool {
|
||||
gm.startIntelCollector()
|
||||
return true
|
||||
},
|
||||
},
|
||||
collectorSourceAmdSysfs: {
|
||||
group: collectorGroupAmd,
|
||||
available: caps.hasAmdSysfs,
|
||||
start: func(_ func()) bool {
|
||||
return gm.startAmdSysfsCollector()
|
||||
},
|
||||
},
|
||||
collectorSourceRocmSMI: {
|
||||
group: collectorGroupAmd,
|
||||
available: caps.hasRocmSmi,
|
||||
deprecationWarning: "rocm-smi is deprecated and may be removed in a future release",
|
||||
start: func(_ func()) bool {
|
||||
gm.startRocmSmiCollector(4300 * time.Millisecond)
|
||||
return true
|
||||
},
|
||||
},
|
||||
collectorSourceNVTop: {
|
||||
available: caps.hasNvtop,
|
||||
start: func(onFailure func()) bool {
|
||||
gm.startNvtopCollector("30", onFailure) // tens of milliseconds
|
||||
return true
|
||||
},
|
||||
},
|
||||
collectorSourceMacmon: {
|
||||
group: collectorGroupApple,
|
||||
available: caps.hasMacmon,
|
||||
start: func(_ func()) bool {
|
||||
gm.startMacmonCollector()
|
||||
return true
|
||||
},
|
||||
},
|
||||
collectorSourcePowermetrics: {
|
||||
group: collectorGroupApple,
|
||||
available: caps.hasPowermetrics,
|
||||
start: func(_ func()) bool {
|
||||
gm.startPowermetricsCollector()
|
||||
return true
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// parseCollectorPriority parses GPU_COLLECTOR and returns valid ordered entries.
|
||||
func parseCollectorPriority(value string) []collectorSource {
|
||||
parts := strings.Split(value, ",")
|
||||
priorities := make([]collectorSource, 0, len(parts))
|
||||
for _, raw := range parts {
|
||||
name := collectorSource(strings.TrimSpace(strings.ToLower(raw)))
|
||||
if !isValidCollectorSource(name) {
|
||||
if name != "" {
|
||||
slog.Warn("Ignoring unknown GPU collector", "collector", name)
|
||||
}
|
||||
continue
|
||||
}
|
||||
priorities = append(priorities, name)
|
||||
}
|
||||
return priorities
|
||||
}
|
||||
|
||||
// startNvmlCollector initializes NVML and starts its polling loop.
|
||||
func (gm *GPUManager) startNvmlCollector() bool {
|
||||
collector := &nvmlCollector{gm: gm}
|
||||
if err := collector.init(); err != nil {
|
||||
slog.Warn("Failed to initialize NVML", "err", err)
|
||||
return false
|
||||
}
|
||||
go collector.start()
|
||||
return true
|
||||
}
|
||||
|
||||
// startAmdSysfsCollector starts AMD GPU collection via sysfs.
|
||||
func (gm *GPUManager) startAmdSysfsCollector() bool {
|
||||
go func() {
|
||||
if err := gm.collectAmdStats(); err != nil {
|
||||
slog.Warn("Error collecting AMD GPU data via sysfs", "err", err)
|
||||
}
|
||||
}()
|
||||
return true
|
||||
}
|
||||
|
||||
// startCollectorsByPriority starts collectors in order with one source per vendor group.
|
||||
func (gm *GPUManager) startCollectorsByPriority(priorities []collectorSource, caps gpuCapabilities) int {
|
||||
definitions := gm.collectorDefinitions(caps)
|
||||
selectedGroups := make(map[string]bool, 3)
|
||||
started := 0
|
||||
for i, source := range priorities {
|
||||
definition, ok := definitions[source]
|
||||
if !ok || !definition.available {
|
||||
continue
|
||||
}
|
||||
// nvtop is not a vendor-specific collector, so should only be used if no other collectors are selected or it is first in GPU_COLLECTOR.
|
||||
if source == collectorSourceNVTop {
|
||||
if len(selectedGroups) > 0 {
|
||||
slog.Warn("Skipping nvtop because other collectors are selected")
|
||||
continue
|
||||
}
|
||||
// if nvtop fails, fall back to remaining collectors.
|
||||
remaining := append([]collectorSource(nil), priorities[i+1:]...)
|
||||
if definition.start(func() {
|
||||
gm.startCollectorsByPriority(remaining, caps)
|
||||
}) {
|
||||
started++
|
||||
return started
|
||||
}
|
||||
}
|
||||
group := definition.group
|
||||
if group == "" || selectedGroups[group] {
|
||||
continue
|
||||
}
|
||||
if definition.deprecationWarning != "" {
|
||||
slog.Warn(definition.deprecationWarning)
|
||||
}
|
||||
if definition.start(nil) {
|
||||
selectedGroups[group] = true
|
||||
started++
|
||||
}
|
||||
}
|
||||
return started
|
||||
}
|
||||
|
||||
// resolveLegacyCollectorPriority builds the default collector order when GPU_COLLECTOR is unset.
|
||||
func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []collectorSource {
|
||||
priorities := make([]collectorSource, 0, 4)
|
||||
|
||||
if caps.hasNvidiaSmi && !caps.hasTegrastats {
|
||||
if nvml, _ := utils.GetEnv("NVML"); nvml == "true" {
|
||||
priorities = append(priorities, collectorSourceNVML, collectorSourceNvidiaSMI)
|
||||
} else {
|
||||
priorities = append(priorities, collectorSourceNvidiaSMI)
|
||||
}
|
||||
}
|
||||
|
||||
if caps.hasRocmSmi {
|
||||
if val, _ := utils.GetEnv("AMD_SYSFS"); val == "true" {
|
||||
priorities = append(priorities, collectorSourceAmdSysfs)
|
||||
} else {
|
||||
priorities = append(priorities, collectorSourceRocmSMI)
|
||||
}
|
||||
} else if caps.hasAmdSysfs {
|
||||
priorities = append(priorities, collectorSourceAmdSysfs)
|
||||
}
|
||||
|
||||
if caps.hasIntelGpuTop {
|
||||
priorities = append(priorities, collectorSourceIntelGpuTop)
|
||||
}
|
||||
|
||||
// Apple collectors are currently opt-in only for testing.
|
||||
// Enable them with GPU_COLLECTOR=macmon or GPU_COLLECTOR=powermetrics.
|
||||
// TODO: uncomment below when Apple collectors are confirmed to be working.
|
||||
//
|
||||
// Prefer macmon on macOS (no sudo). Fall back to powermetrics if present.
|
||||
// if caps.hasMacmon {
|
||||
// priorities = append(priorities, collectorSourceMacmon)
|
||||
// } else if caps.hasPowermetrics {
|
||||
// priorities = append(priorities, collectorSourcePowermetrics)
|
||||
// }
|
||||
|
||||
// Keep nvtop as a last resort only when no vendor collector exists.
|
||||
if len(priorities) == 0 && caps.hasNvtop {
|
||||
priorities = append(priorities, collectorSourceNVTop)
|
||||
}
|
||||
return priorities
|
||||
}
|
||||
|
||||
// NewGPUManager creates and initializes a new GPUManager
|
||||
func NewGPUManager() (*GPUManager, error) {
|
||||
if skipGPU, _ := GetEnv("SKIP_GPU"); skipGPU == "true" {
|
||||
if skipGPU, _ := utils.GetEnv("SKIP_GPU"); skipGPU == "true" {
|
||||
return nil, nil
|
||||
}
|
||||
var gm GPUManager
|
||||
if err := gm.detectGPUs(); err != nil {
|
||||
return nil, err
|
||||
caps := gm.discoverGpuCapabilities()
|
||||
if !hasAnyGpuCollector(caps) {
|
||||
return nil, fmt.Errorf(noGPUFoundMsg)
|
||||
}
|
||||
gm.GpuDataMap = make(map[string]*system.GPUData)
|
||||
|
||||
if gm.nvidiaSmi {
|
||||
gm.startCollector(nvidiaSmiCmd)
|
||||
// Jetson devices should always use tegrastats (ignore GPU_COLLECTOR).
|
||||
if caps.hasTegrastats {
|
||||
gm.startTegraStatsCollector("3700")
|
||||
return &gm, nil
|
||||
}
|
||||
if gm.rocmSmi {
|
||||
gm.startCollector(rocmSmiCmd)
|
||||
|
||||
// if GPU_COLLECTOR is set, start user-defined collectors.
|
||||
if collectorConfig, ok := utils.GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
|
||||
priorities := parseCollectorPriority(collectorConfig)
|
||||
if gm.startCollectorsByPriority(priorities, caps) == 0 {
|
||||
return nil, fmt.Errorf("no configured GPU collectors are available")
|
||||
}
|
||||
if gm.tegrastats {
|
||||
gm.startCollector(tegraStatsCmd)
|
||||
return &gm, nil
|
||||
}
|
||||
if gm.intelGpuStats {
|
||||
gm.startCollector(intelGpuStatsCmd)
|
||||
|
||||
// auto-detect and start collectors when GPU_COLLECTOR is unset.
|
||||
if gm.startCollectorsByPriority(gm.resolveLegacyCollectorPriority(caps), caps) == 0 {
|
||||
return nil, fmt.Errorf(noGPUFoundMsg)
|
||||
}
|
||||
|
||||
return &gm, nil
|
||||
|
||||
302
agent/gpu_amd_linux.go
Normal file
302
agent/gpu_amd_linux.go
Normal file
@@ -0,0 +1,302 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
var amdgpuNameCache = struct {
|
||||
sync.RWMutex
|
||||
hits map[string]string
|
||||
misses map[string]struct{}
|
||||
}{
|
||||
hits: make(map[string]string),
|
||||
misses: make(map[string]struct{}),
|
||||
}
|
||||
|
||||
// hasAmdSysfs returns true if any AMD GPU sysfs nodes are found
|
||||
func (gm *GPUManager) hasAmdSysfs() bool {
|
||||
cards, err := filepath.Glob("/sys/class/drm/card*/device/vendor")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, vendorPath := range cards {
|
||||
vendor, err := utils.ReadStringFileLimited(vendorPath, 64)
|
||||
if err == nil && vendor == "0x1002" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// collectAmdStats collects AMD GPU metrics directly from sysfs to avoid the overhead of rocm-smi
|
||||
func (gm *GPUManager) collectAmdStats() error {
|
||||
sysfsPollInterval := 3000 * time.Millisecond
|
||||
cards, err := filepath.Glob("/sys/class/drm/card*")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var amdGpuPaths []string
|
||||
for _, card := range cards {
|
||||
// Ignore symbolic links and non-main card directories
|
||||
if strings.Contains(filepath.Base(card), "-") || !isAmdGpu(card) {
|
||||
continue
|
||||
}
|
||||
amdGpuPaths = append(amdGpuPaths, card)
|
||||
}
|
||||
|
||||
if len(amdGpuPaths) == 0 {
|
||||
return errNoValidData
|
||||
}
|
||||
|
||||
slog.Debug("Using sysfs for AMD GPU data collection")
|
||||
|
||||
failures := 0
|
||||
for {
|
||||
hasData := false
|
||||
for _, cardPath := range amdGpuPaths {
|
||||
if gm.updateAmdGpuData(cardPath) {
|
||||
hasData = true
|
||||
}
|
||||
}
|
||||
if !hasData {
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
return errNoValidData
|
||||
}
|
||||
slog.Warn("No AMD GPU data from sysfs", "failures", failures)
|
||||
time.Sleep(retryWaitTime)
|
||||
continue
|
||||
}
|
||||
failures = 0
|
||||
time.Sleep(sysfsPollInterval)
|
||||
}
|
||||
}
|
||||
|
||||
// isAmdGpu checks whether a DRM card path belongs to AMD vendor ID 0x1002.
|
||||
func isAmdGpu(cardPath string) bool {
|
||||
vendor, err := utils.ReadStringFileLimited(filepath.Join(cardPath, "device/vendor"), 64)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return vendor == "0x1002"
|
||||
}
|
||||
|
||||
// updateAmdGpuData reads GPU metrics from sysfs and updates the GPU data map.
|
||||
// Returns true if at least some data was successfully read.
|
||||
func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
|
||||
devicePath := filepath.Join(cardPath, "device")
|
||||
id := filepath.Base(cardPath)
|
||||
|
||||
// Read all sysfs values first (no lock needed - these can be slow)
|
||||
usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent"))
|
||||
memUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used"))
|
||||
memTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total"))
|
||||
// if gtt is present, add it to the memory used and total (https://github.com/henrygd/beszel/issues/1569#issuecomment-3837640484)
|
||||
if gttUsed, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_used")); err == nil && gttUsed > 0 {
|
||||
if gttTotal, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_total")); err == nil {
|
||||
memUsed += gttUsed
|
||||
memTotal += gttTotal
|
||||
}
|
||||
}
|
||||
|
||||
var temp, power float64
|
||||
hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*"))
|
||||
for _, hwmonDir := range hwmons {
|
||||
if t, err := readSysfsFloat(filepath.Join(hwmonDir, "temp1_input")); err == nil {
|
||||
temp = t / 1000.0
|
||||
}
|
||||
if p, err := readSysfsFloat(filepath.Join(hwmonDir, "power1_average")); err == nil {
|
||||
power += p / 1000000.0
|
||||
} else if p, err := readSysfsFloat(filepath.Join(hwmonDir, "power1_input")); err == nil {
|
||||
power += p / 1000000.0
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we got any meaningful data
|
||||
if usageErr != nil && memUsedErr != nil && temp == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Single lock to update all values atomically
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
|
||||
gpu, ok := gm.GpuDataMap[id]
|
||||
if !ok {
|
||||
gpu = &system.GPUData{Name: getAmdGpuName(devicePath)}
|
||||
gm.GpuDataMap[id] = gpu
|
||||
}
|
||||
|
||||
if usageErr == nil {
|
||||
gpu.Usage += usage
|
||||
}
|
||||
gpu.MemoryUsed = utils.BytesToMegabytes(memUsed)
|
||||
gpu.MemoryTotal = utils.BytesToMegabytes(memTotal)
|
||||
gpu.Temperature = temp
|
||||
gpu.Power += power
|
||||
gpu.Count++
|
||||
return true
|
||||
}
|
||||
|
||||
// readSysfsFloat reads and parses a numeric value from a sysfs file.
|
||||
func readSysfsFloat(path string) (float64, error) {
|
||||
val, err := utils.ReadStringFileLimited(path, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.ParseFloat(val, 64)
|
||||
}
|
||||
|
||||
// normalizeHexID normalizes hex IDs by trimming spaces, lowercasing, and dropping 0x.
|
||||
func normalizeHexID(id string) string {
|
||||
return strings.TrimPrefix(strings.ToLower(strings.TrimSpace(id)), "0x")
|
||||
}
|
||||
|
||||
// cacheKeyForAmdgpu builds the cache key for a device and optional revision.
|
||||
func cacheKeyForAmdgpu(deviceID, revisionID string) string {
|
||||
if revisionID != "" {
|
||||
return deviceID + ":" + revisionID
|
||||
}
|
||||
return deviceID
|
||||
}
|
||||
|
||||
// lookupAmdgpuNameInFile resolves an AMDGPU name from amdgpu.ids by device/revision.
|
||||
func lookupAmdgpuNameInFile(deviceID, revisionID, filePath string) (name string, exact bool, found bool) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return "", false, false
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var byDevice string
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, ",", 3)
|
||||
if len(parts) != 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
dev := normalizeHexID(parts[0])
|
||||
rev := normalizeHexID(parts[1])
|
||||
productName := strings.TrimSpace(parts[2])
|
||||
if dev == "" || productName == "" || dev != deviceID {
|
||||
continue
|
||||
}
|
||||
if byDevice == "" {
|
||||
byDevice = productName
|
||||
}
|
||||
if revisionID != "" && rev == revisionID {
|
||||
return productName, true, true
|
||||
}
|
||||
}
|
||||
if byDevice != "" {
|
||||
return byDevice, false, true
|
||||
}
|
||||
return "", false, false
|
||||
}
|
||||
|
||||
// getCachedAmdgpuName returns cached hit/miss status for the given device/revision.
|
||||
func getCachedAmdgpuName(deviceID, revisionID string) (name string, found bool, done bool) {
|
||||
// Build the list of cache keys to check. We always look up the exact device+revision key.
|
||||
// When revisionID is set, we also look up deviceID alone, since the cache may store a
|
||||
// device-only fallback when we couldn't resolve the exact revision.
|
||||
keys := []string{cacheKeyForAmdgpu(deviceID, revisionID)}
|
||||
if revisionID != "" {
|
||||
keys = append(keys, deviceID)
|
||||
}
|
||||
|
||||
knownMisses := 0
|
||||
amdgpuNameCache.RLock()
|
||||
defer amdgpuNameCache.RUnlock()
|
||||
for _, key := range keys {
|
||||
if name, ok := amdgpuNameCache.hits[key]; ok {
|
||||
return name, true, true
|
||||
}
|
||||
if _, ok := amdgpuNameCache.misses[key]; ok {
|
||||
knownMisses++
|
||||
}
|
||||
}
|
||||
// done=true means "don't bother doing slow lookup": we either found a name (above) or
|
||||
// every key we checked was already a known miss, so we've tried before and failed.
|
||||
return "", false, knownMisses == len(keys)
|
||||
}
|
||||
|
||||
// normalizeAmdgpuName trims standard suffixes from AMDGPU product names.
|
||||
func normalizeAmdgpuName(name string) string {
|
||||
for _, suffix := range []string{" Graphics", " Series"} {
|
||||
name = strings.TrimSuffix(name, suffix)
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
// cacheAmdgpuName stores a resolved AMDGPU name in the lookup cache.
|
||||
func cacheAmdgpuName(deviceID, revisionID, name string, exact bool) {
|
||||
name = normalizeAmdgpuName(name)
|
||||
amdgpuNameCache.Lock()
|
||||
defer amdgpuNameCache.Unlock()
|
||||
if exact && revisionID != "" {
|
||||
amdgpuNameCache.hits[cacheKeyForAmdgpu(deviceID, revisionID)] = name
|
||||
}
|
||||
amdgpuNameCache.hits[deviceID] = name
|
||||
}
|
||||
|
||||
// cacheMissingAmdgpuName records unresolved device/revision lookups.
|
||||
func cacheMissingAmdgpuName(deviceID, revisionID string) {
|
||||
amdgpuNameCache.Lock()
|
||||
defer amdgpuNameCache.Unlock()
|
||||
amdgpuNameCache.misses[deviceID] = struct{}{}
|
||||
if revisionID != "" {
|
||||
amdgpuNameCache.misses[cacheKeyForAmdgpu(deviceID, revisionID)] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// getAmdGpuName attempts to get a descriptive GPU name.
|
||||
// First tries product_name (rarely available), then looks up the PCI device ID.
|
||||
// Falls back to showing the raw device ID if not found in the lookup table.
|
||||
func getAmdGpuName(devicePath string) string {
|
||||
// Try product_name first (works for some enterprise GPUs)
|
||||
if prod, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "product_name"), 128); err == nil {
|
||||
return prod
|
||||
}
|
||||
|
||||
// Read PCI device ID and look it up
|
||||
if deviceID, err := utils.ReadStringFileLimited(filepath.Join(devicePath, "device"), 64); err == nil {
|
||||
id := normalizeHexID(deviceID)
|
||||
revision := ""
|
||||
if rev, revErr := utils.ReadStringFileLimited(filepath.Join(devicePath, "revision"), 64); revErr == nil {
|
||||
revision = normalizeHexID(rev)
|
||||
}
|
||||
|
||||
if name, found, done := getCachedAmdgpuName(id, revision); found {
|
||||
return name
|
||||
} else if !done {
|
||||
if name, exact, ok := lookupAmdgpuNameInFile(id, revision, "/usr/share/libdrm/amdgpu.ids"); ok {
|
||||
cacheAmdgpuName(id, revision, name, exact)
|
||||
return normalizeAmdgpuName(name)
|
||||
}
|
||||
cacheMissingAmdgpuName(id, revision)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("AMD GPU (%s)", id)
|
||||
}
|
||||
|
||||
return "AMD GPU"
|
||||
}
|
||||
265
agent/gpu_amd_linux_test.go
Normal file
265
agent/gpu_amd_linux_test.go
Normal file
@@ -0,0 +1,265 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNormalizeHexID(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"0x1002", "1002"},
|
||||
{"C2", "c2"},
|
||||
{" 15BF ", "15bf"},
|
||||
{"0x15bf", "15bf"},
|
||||
{"", ""},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
subName := tt.in
|
||||
if subName == "" {
|
||||
subName = "empty_string"
|
||||
}
|
||||
t.Run(subName, func(t *testing.T) {
|
||||
got := normalizeHexID(tt.in)
|
||||
assert.Equal(t, tt.want, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheKeyForAmdgpu(t *testing.T) {
|
||||
tests := []struct {
|
||||
deviceID string
|
||||
revisionID string
|
||||
want string
|
||||
}{
|
||||
{"1114", "c2", "1114:c2"},
|
||||
{"15bf", "", "15bf"},
|
||||
{"1506", "c1", "1506:c1"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := cacheKeyForAmdgpu(tt.deviceID, tt.revisionID)
|
||||
assert.Equal(t, tt.want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadSysfsFloat(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
validPath := filepath.Join(dir, "val")
|
||||
require.NoError(t, os.WriteFile(validPath, []byte(" 42.5 \n"), 0o644))
|
||||
got, err := readSysfsFloat(validPath)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 42.5, got)
|
||||
|
||||
// Integer and scientific
|
||||
sciPath := filepath.Join(dir, "sci")
|
||||
require.NoError(t, os.WriteFile(sciPath, []byte("1e2"), 0o644))
|
||||
got, err = readSysfsFloat(sciPath)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 100.0, got)
|
||||
|
||||
// Missing file
|
||||
_, err = readSysfsFloat(filepath.Join(dir, "missing"))
|
||||
require.Error(t, err)
|
||||
|
||||
// Invalid content
|
||||
badPath := filepath.Join(dir, "bad")
|
||||
require.NoError(t, os.WriteFile(badPath, []byte("not a number"), 0o644))
|
||||
_, err = readSysfsFloat(badPath)
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func TestIsAmdGpu(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
deviceDir := filepath.Join(dir, "device")
|
||||
require.NoError(t, os.MkdirAll(deviceDir, 0o755))
|
||||
|
||||
// AMD vendor 0x1002 -> true
|
||||
require.NoError(t, os.WriteFile(filepath.Join(deviceDir, "vendor"), []byte("0x1002\n"), 0o644))
|
||||
assert.True(t, isAmdGpu(dir), "vendor 0x1002 should be AMD")
|
||||
|
||||
// Non-AMD vendor -> false
|
||||
require.NoError(t, os.WriteFile(filepath.Join(deviceDir, "vendor"), []byte("0x10de\n"), 0o644))
|
||||
assert.False(t, isAmdGpu(dir), "vendor 0x10de should not be AMD")
|
||||
|
||||
// Missing vendor file -> false
|
||||
require.NoError(t, os.Remove(filepath.Join(deviceDir, "vendor")))
|
||||
assert.False(t, isAmdGpu(dir), "missing vendor file should be false")
|
||||
}
|
||||
|
||||
func TestAmdgpuNameCacheRoundTrip(t *testing.T) {
|
||||
// Cache a name and retrieve it (unique key to avoid affecting other tests)
|
||||
deviceID, revisionID := "cachedev99", "00"
|
||||
cacheAmdgpuName(deviceID, revisionID, "AMD Test GPU 99 Graphics", true)
|
||||
|
||||
name, found, done := getCachedAmdgpuName(deviceID, revisionID)
|
||||
assert.True(t, found)
|
||||
assert.True(t, done)
|
||||
assert.Equal(t, "AMD Test GPU 99", name)
|
||||
|
||||
// Device-only key also stored
|
||||
name2, found2, _ := getCachedAmdgpuName(deviceID, "")
|
||||
assert.True(t, found2)
|
||||
assert.Equal(t, "AMD Test GPU 99", name2)
|
||||
|
||||
// Cache a miss
|
||||
cacheMissingAmdgpuName("missedev99", "ab")
|
||||
_, found3, done3 := getCachedAmdgpuName("missedev99", "ab")
|
||||
assert.False(t, found3)
|
||||
assert.True(t, done3, "done should be true so caller skips file lookup")
|
||||
}
|
||||
|
||||
func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
writeGTT bool
|
||||
wantMemoryUsed float64
|
||||
wantMemoryTotal float64
|
||||
}{
|
||||
{
|
||||
name: "sums vram and gtt when gtt is present",
|
||||
writeGTT: true,
|
||||
wantMemoryUsed: utils.BytesToMegabytes(1073741824 + 536870912),
|
||||
wantMemoryTotal: utils.BytesToMegabytes(2147483648 + 4294967296),
|
||||
},
|
||||
{
|
||||
name: "falls back to vram when gtt is missing",
|
||||
writeGTT: false,
|
||||
wantMemoryUsed: utils.BytesToMegabytes(1073741824),
|
||||
wantMemoryTotal: utils.BytesToMegabytes(2147483648),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
cardPath := filepath.Join(dir, "card0")
|
||||
devicePath := filepath.Join(cardPath, "device")
|
||||
hwmonPath := filepath.Join(devicePath, "hwmon", "hwmon0")
|
||||
require.NoError(t, os.MkdirAll(hwmonPath, 0o755))
|
||||
|
||||
write := func(name, content string) {
|
||||
require.NoError(t, os.WriteFile(filepath.Join(devicePath, name), []byte(content), 0o644))
|
||||
}
|
||||
write("vendor", "0x1002")
|
||||
write("device", "0x1506")
|
||||
write("revision", "0xc1")
|
||||
write("gpu_busy_percent", "25")
|
||||
write("mem_info_vram_used", "1073741824")
|
||||
write("mem_info_vram_total", "2147483648")
|
||||
if tt.writeGTT {
|
||||
write("mem_info_gtt_used", "536870912")
|
||||
write("mem_info_gtt_total", "4294967296")
|
||||
}
|
||||
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "temp1_input"), []byte("45000"), 0o644))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "power1_input"), []byte("20000000"), 0o644))
|
||||
|
||||
// Pre-cache name so getAmdGpuName returns a known value (it uses system amdgpu.ids path)
|
||||
cacheAmdgpuName("1506", "c1", "AMD Radeon 610M Graphics", true)
|
||||
|
||||
gm := &GPUManager{GpuDataMap: make(map[string]*system.GPUData)}
|
||||
ok := gm.updateAmdGpuData(cardPath)
|
||||
require.True(t, ok)
|
||||
|
||||
gpu, ok := gm.GpuDataMap["card0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "AMD Radeon 610M", gpu.Name)
|
||||
assert.Equal(t, 25.0, gpu.Usage)
|
||||
assert.Equal(t, tt.wantMemoryUsed, gpu.MemoryUsed)
|
||||
assert.Equal(t, tt.wantMemoryTotal, gpu.MemoryTotal)
|
||||
assert.Equal(t, 45.0, gpu.Temperature)
|
||||
assert.Equal(t, 20.0, gpu.Power)
|
||||
assert.Equal(t, 1.0, gpu.Count)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLookupAmdgpuNameInFile(t *testing.T) {
|
||||
idsPath := filepath.Join("test-data", "amdgpu.ids")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
deviceID string
|
||||
revisionID string
|
||||
wantName string
|
||||
wantExact bool
|
||||
wantFound bool
|
||||
}{
|
||||
{
|
||||
name: "exact device and revision match",
|
||||
deviceID: "1114",
|
||||
revisionID: "c2",
|
||||
wantName: "AMD Radeon 860M Graphics",
|
||||
wantExact: true,
|
||||
wantFound: true,
|
||||
},
|
||||
{
|
||||
name: "exact match 15BF revision 01 returns 760M",
|
||||
deviceID: "15bf",
|
||||
revisionID: "01",
|
||||
wantName: "AMD Radeon 760M Graphics",
|
||||
wantExact: true,
|
||||
wantFound: true,
|
||||
},
|
||||
{
|
||||
name: "exact match 15BF revision 00 returns 780M",
|
||||
deviceID: "15bf",
|
||||
revisionID: "00",
|
||||
wantName: "AMD Radeon 780M Graphics",
|
||||
wantExact: true,
|
||||
wantFound: true,
|
||||
},
|
||||
{
|
||||
name: "device-only match returns first entry for device",
|
||||
deviceID: "1506",
|
||||
revisionID: "",
|
||||
wantName: "AMD Radeon 610M",
|
||||
wantExact: false,
|
||||
wantFound: true,
|
||||
},
|
||||
{
|
||||
name: "unknown device not found",
|
||||
deviceID: "dead",
|
||||
revisionID: "00",
|
||||
wantName: "",
|
||||
wantExact: false,
|
||||
wantFound: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotName, gotExact, gotFound := lookupAmdgpuNameInFile(tt.deviceID, tt.revisionID, idsPath)
|
||||
assert.Equal(t, tt.wantName, gotName, "name")
|
||||
assert.Equal(t, tt.wantExact, gotExact, "exact")
|
||||
assert.Equal(t, tt.wantFound, gotFound, "found")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAmdGpuNameFromIdsFile(t *testing.T) {
|
||||
// Test that getAmdGpuName resolves a name when we can't inject the ids path.
|
||||
// We only verify behavior when product_name is missing and device/revision
|
||||
// would be read from sysfs; the actual lookup uses /usr/share/libdrm/amdgpu.ids.
|
||||
// So this test focuses on normalizeAmdgpuName and that lookupAmdgpuNameInFile
|
||||
// returns the expected name for our test-data file.
|
||||
idsPath := filepath.Join("test-data", "amdgpu.ids")
|
||||
name, exact, found := lookupAmdgpuNameInFile("1435", "ae", idsPath)
|
||||
require.True(t, found)
|
||||
require.True(t, exact)
|
||||
assert.Equal(t, "AMD Custom GPU 0932", name)
|
||||
assert.Equal(t, "AMD Custom GPU 0932", normalizeAmdgpuName(name))
|
||||
|
||||
// " Graphics" suffix is trimmed by normalizeAmdgpuName
|
||||
name2 := "AMD Radeon 860M Graphics"
|
||||
assert.Equal(t, "AMD Radeon 860M", normalizeAmdgpuName(name2))
|
||||
}
|
||||
15
agent/gpu_amd_unsupported.go
Normal file
15
agent/gpu_amd_unsupported.go
Normal file
@@ -0,0 +1,15 @@
|
||||
//go:build !linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
func (gm *GPUManager) hasAmdSysfs() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (gm *GPUManager) collectAmdStats() error {
|
||||
return errors.ErrUnsupported
|
||||
}
|
||||
252
agent/gpu_darwin.go
Normal file
252
agent/gpu_darwin.go
Normal file
@@ -0,0 +1,252 @@
|
||||
//go:build darwin
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
const (
|
||||
// powermetricsSampleIntervalMs is the sampling interval passed to powermetrics (-i).
|
||||
powermetricsSampleIntervalMs = 500
|
||||
// powermetricsPollInterval is how often we run powermetrics to collect a new sample.
|
||||
powermetricsPollInterval = 2 * time.Second
|
||||
// macmonIntervalMs is the sampling interval passed to macmon pipe (-i), in milliseconds.
|
||||
macmonIntervalMs = 2500
|
||||
)
|
||||
|
||||
const appleGPUID = "0"
|
||||
|
||||
// startPowermetricsCollector runs powermetrics --samplers gpu_power in a loop and updates
|
||||
// GPU usage and power. Requires root (sudo) on macOS. A single logical GPU is reported as id "0".
|
||||
func (gm *GPUManager) startPowermetricsCollector() {
|
||||
// Ensure single GPU entry for Apple GPU
|
||||
if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
|
||||
gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
|
||||
}
|
||||
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := gm.collectPowermetrics(); err != nil {
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
slog.Warn("powermetrics GPU collector failed repeatedly, stopping", "err", err)
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting macOS GPU data via powermetrics (may require sudo)", "err", err)
|
||||
time.Sleep(retryWaitTime)
|
||||
continue
|
||||
}
|
||||
failures = 0
|
||||
time.Sleep(powermetricsPollInterval)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// collectPowermetrics runs powermetrics once and parses GPU usage and power from its output.
|
||||
func (gm *GPUManager) collectPowermetrics() error {
|
||||
interval := strconv.Itoa(powermetricsSampleIntervalMs)
|
||||
cmd := exec.Command(powermetricsCmd, "--samplers", "gpu_power", "-i", interval, "-n", "1")
|
||||
cmd.Stderr = nil
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !gm.parsePowermetricsData(out) {
|
||||
return errNoValidData
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parsePowermetricsData parses powermetrics gpu_power output and updates GpuDataMap["0"].
|
||||
// Example output:
|
||||
//
|
||||
// **** GPU usage ****
|
||||
// GPU HW active frequency: 444 MHz
|
||||
// GPU HW active residency: 0.97% (444 MHz: .97% ...
|
||||
// GPU idle residency: 99.03%
|
||||
// GPU Power: 4 mW
|
||||
func (gm *GPUManager) parsePowermetricsData(output []byte) bool {
|
||||
var idleResidency, powerMW float64
|
||||
var gotIdle, gotPower bool
|
||||
|
||||
scanner := bufio.NewScanner(bytes.NewReader(output))
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(line, "GPU idle residency:") {
|
||||
// "GPU idle residency: 99.03%"
|
||||
fields := strings.Fields(strings.TrimPrefix(line, "GPU idle residency:"))
|
||||
if len(fields) >= 1 {
|
||||
pct := strings.TrimSuffix(fields[0], "%")
|
||||
if v, err := strconv.ParseFloat(pct, 64); err == nil {
|
||||
idleResidency = v
|
||||
gotIdle = true
|
||||
}
|
||||
}
|
||||
} else if strings.HasPrefix(line, "GPU Power:") {
|
||||
// "GPU Power: 4 mW"
|
||||
fields := strings.Fields(strings.TrimPrefix(line, "GPU Power:"))
|
||||
if len(fields) >= 1 {
|
||||
if v, err := strconv.ParseFloat(fields[0], 64); err == nil {
|
||||
powerMW = v
|
||||
gotPower = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return false
|
||||
}
|
||||
if !gotIdle && !gotPower {
|
||||
return false
|
||||
}
|
||||
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
|
||||
if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
|
||||
gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
|
||||
}
|
||||
gpu := gm.GpuDataMap[appleGPUID]
|
||||
|
||||
if gotIdle {
|
||||
// Usage = 100 - idle residency (e.g. 100 - 99.03 = 0.97%)
|
||||
gpu.Usage += 100 - idleResidency
|
||||
}
|
||||
if gotPower {
|
||||
// mW -> W
|
||||
gpu.Power += powerMW / milliwattsInAWatt
|
||||
}
|
||||
gpu.Count++
|
||||
return true
|
||||
}
|
||||
|
||||
// startMacmonCollector runs `macmon pipe` in a loop and parses one JSON object per line.
|
||||
// This collector does not require sudo. A single logical GPU is reported as id "0".
|
||||
func (gm *GPUManager) startMacmonCollector() {
|
||||
if _, ok := gm.GpuDataMap[appleGPUID]; !ok {
|
||||
gm.GpuDataMap[appleGPUID] = &system.GPUData{Name: "Apple GPU"}
|
||||
}
|
||||
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := gm.collectMacmonPipe(); err != nil {
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
slog.Warn("macmon GPU collector failed repeatedly, stopping", "err", err)
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting macOS GPU data via macmon", "err", err)
|
||||
time.Sleep(retryWaitTime)
|
||||
continue
|
||||
}
|
||||
failures = 0
|
||||
// `macmon pipe` is long-running; if it returns, wait a bit before restarting.
|
||||
time.Sleep(retryWaitTime)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
type macmonTemp struct {
|
||||
GPUTempAvg float64 `json:"gpu_temp_avg"`
|
||||
}
|
||||
|
||||
type macmonSample struct {
|
||||
GPUPower float64 `json:"gpu_power"` // watts (macmon reports fractional values)
|
||||
GPURAMPower float64 `json:"gpu_ram_power"` // watts
|
||||
GPUUsage []float64 `json:"gpu_usage"` // [freq_mhz, usage] where usage is typically 0..1
|
||||
Temp macmonTemp `json:"temp"`
|
||||
}
|
||||
|
||||
func (gm *GPUManager) collectMacmonPipe() (err error) {
|
||||
cmd := exec.Command(macmonCmd, "pipe", "-i", strconv.Itoa(macmonIntervalMs))
|
||||
// Avoid blocking if macmon writes to stderr.
|
||||
cmd.Stderr = io.Discard
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Ensure we always reap the child to avoid zombies on any return path and
|
||||
// propagate a non-zero exit code if no other error was set.
|
||||
defer func() {
|
||||
_ = stdout.Close()
|
||||
if cmd.ProcessState == nil || !cmd.ProcessState.Exited() {
|
||||
_ = cmd.Process.Kill()
|
||||
}
|
||||
if waitErr := cmd.Wait(); err == nil && waitErr != nil {
|
||||
err = waitErr
|
||||
}
|
||||
}()
|
||||
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
var hadSample bool
|
||||
for scanner.Scan() {
|
||||
line := bytes.TrimSpace(scanner.Bytes())
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
if gm.parseMacmonLine(line) {
|
||||
hadSample = true
|
||||
}
|
||||
}
|
||||
if scanErr := scanner.Err(); scanErr != nil {
|
||||
return scanErr
|
||||
}
|
||||
if !hadSample {
|
||||
return errNoValidData
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseMacmonLine parses a single macmon JSON line and updates Apple GPU metrics.
|
||||
func (gm *GPUManager) parseMacmonLine(line []byte) bool {
|
||||
var sample macmonSample
|
||||
if err := json.Unmarshal(line, &sample); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
usage := 0.0
|
||||
if len(sample.GPUUsage) >= 2 {
|
||||
usage = sample.GPUUsage[1]
|
||||
// Heuristic: macmon typically reports 0..1; convert to percentage.
|
||||
if usage <= 1.0 {
|
||||
usage *= 100
|
||||
}
|
||||
}
|
||||
|
||||
// Consider the line valid if it contains at least one GPU metric.
|
||||
if usage == 0 && sample.GPUPower == 0 && sample.Temp.GPUTempAvg == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
|
||||
gpu, ok := gm.GpuDataMap[appleGPUID]
|
||||
if !ok {
|
||||
gpu = &system.GPUData{Name: "Apple GPU"}
|
||||
gm.GpuDataMap[appleGPUID] = gpu
|
||||
}
|
||||
gpu.Temperature = sample.Temp.GPUTempAvg
|
||||
gpu.Usage += usage
|
||||
// macmon reports power in watts; include VRAM power if present.
|
||||
gpu.Power += sample.GPUPower + sample.GPURAMPower
|
||||
gpu.Count++
|
||||
return true
|
||||
}
|
||||
81
agent/gpu_darwin_test.go
Normal file
81
agent/gpu_darwin_test.go
Normal file
@@ -0,0 +1,81 @@
|
||||
//go:build darwin
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParsePowermetricsData(t *testing.T) {
|
||||
input := `
|
||||
Machine model: Mac14,10
|
||||
OS version: 25D125
|
||||
|
||||
*** Sampled system activity (Sat Feb 14 00:42:06 2026 -0500) (503.05ms elapsed) ***
|
||||
|
||||
**** GPU usage ****
|
||||
|
||||
GPU HW active frequency: 444 MHz
|
||||
GPU HW active residency: 0.97% (444 MHz: .97% 612 MHz: 0% 808 MHz: 0% 968 MHz: 0% 1110 MHz: 0% 1236 MHz: 0% 1338 MHz: 0% 1398 MHz: 0%)
|
||||
GPU SW requested state: (P1 : 100% P2 : 0% P3 : 0% P4 : 0% P5 : 0% P6 : 0% P7 : 0% P8 : 0%)
|
||||
GPU idle residency: 99.03%
|
||||
GPU Power: 4 mW
|
||||
`
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
valid := gm.parsePowermetricsData([]byte(input))
|
||||
require.True(t, valid)
|
||||
|
||||
g0, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "Apple GPU", g0.Name)
|
||||
// Usage = 100 - 99.03 = 0.97
|
||||
assert.InDelta(t, 0.97, g0.Usage, 0.01)
|
||||
// 4 mW -> 0.004 W
|
||||
assert.InDelta(t, 0.004, g0.Power, 0.0001)
|
||||
assert.Equal(t, 1.0, g0.Count)
|
||||
}
|
||||
|
||||
func TestParsePowermetricsDataPartial(t *testing.T) {
|
||||
// Only power line (e.g. older macOS or different sampler output)
|
||||
input := `
|
||||
**** GPU usage ****
|
||||
GPU Power: 120 mW
|
||||
`
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
valid := gm.parsePowermetricsData([]byte(input))
|
||||
require.True(t, valid)
|
||||
|
||||
g0, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "Apple GPU", g0.Name)
|
||||
assert.InDelta(t, 0.12, g0.Power, 0.001)
|
||||
assert.Equal(t, 1.0, g0.Count)
|
||||
}
|
||||
|
||||
func TestParseMacmonLine(t *testing.T) {
|
||||
input := `{"all_power":0.6468324661254883,"ane_power":0.0,"cpu_power":0.6359732151031494,"ecpu_usage":[2061,0.1726151406764984],"gpu_power":0.010859241709113121,"gpu_ram_power":0.000965250947047025,"gpu_usage":[503,0.013633215799927711],"memory":{"ram_total":17179869184,"ram_usage":12322914304,"swap_total":0,"swap_usage":0},"pcpu_usage":[1248,0.11792058497667313],"ram_power":0.14885640144348145,"sys_power":10.4955415725708,"temp":{"cpu_temp_avg":23.041261672973633,"gpu_temp_avg":29.44516944885254},"timestamp":"2026-02-17T19:34:27.942556+00:00"}`
|
||||
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
valid := gm.parseMacmonLine([]byte(input))
|
||||
require.True(t, valid)
|
||||
|
||||
g0, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "Apple GPU", g0.Name)
|
||||
// macmon reports usage fraction 0..1; expect percent conversion.
|
||||
assert.InDelta(t, 1.3633, g0.Usage, 0.05)
|
||||
// power includes gpu_power + gpu_ram_power
|
||||
assert.InDelta(t, 0.011824, g0.Power, 0.0005)
|
||||
assert.InDelta(t, 29.445, g0.Temperature, 0.01)
|
||||
assert.Equal(t, 1.0, g0.Count)
|
||||
}
|
||||
9
agent/gpu_darwin_unsupported.go
Normal file
9
agent/gpu_darwin_unsupported.go
Normal file
@@ -0,0 +1,9 @@
|
||||
//go:build !darwin
|
||||
|
||||
package agent
|
||||
|
||||
// startPowermetricsCollector is a no-op on non-darwin platforms; the real implementation is in gpu_darwin.go.
|
||||
func (gm *GPUManager) startPowermetricsCollector() {}
|
||||
|
||||
// startMacmonCollector is a no-op on non-darwin platforms; the real implementation is in gpu_darwin.go.
|
||||
func (gm *GPUManager) startMacmonCollector() {}
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
@@ -27,10 +28,11 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
|
||||
defer gm.Unlock()
|
||||
|
||||
// only one gpu for now - cmd doesn't provide all by default
|
||||
gpuData, ok := gm.GpuDataMap["0"]
|
||||
id := "i0" // prefix with i to avoid conflicts with nvidia card ids
|
||||
gpuData, ok := gm.GpuDataMap[id]
|
||||
if !ok {
|
||||
gpuData = &system.GPUData{Name: "GPU", Engines: make(map[string]float64)}
|
||||
gm.GpuDataMap["0"] = gpuData
|
||||
gm.GpuDataMap[id] = gpuData
|
||||
}
|
||||
|
||||
gpuData.Power += sample.PowerGPU
|
||||
@@ -51,7 +53,7 @@ func (gm *GPUManager) updateIntelFromStats(sample *intelGpuStats) bool {
|
||||
func (gm *GPUManager) collectIntelStats() (err error) {
|
||||
// Build command arguments, optionally selecting a device via -d
|
||||
args := []string{"-s", intelGpuStatsInterval, "-l"}
|
||||
if dev, ok := GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
|
||||
if dev, ok := utils.GetEnv("INTEL_GPU_DEVICE"); ok && dev != "" {
|
||||
args = append(args, "-d", dev)
|
||||
}
|
||||
cmd := exec.Command(intelGpuStatsCmd, args...)
|
||||
|
||||
224
agent/gpu_nvml.go
Normal file
224
agent/gpu_nvml.go
Normal file
@@ -0,0 +1,224 @@
|
||||
//go:build amd64 && (windows || (linux && glibc))
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
// NVML constants and types
|
||||
const (
|
||||
nvmlSuccess int = 0
|
||||
)
|
||||
|
||||
type nvmlDevice uintptr
|
||||
|
||||
type nvmlReturn int
|
||||
|
||||
type nvmlMemoryV1 struct {
|
||||
Total uint64
|
||||
Free uint64
|
||||
Used uint64
|
||||
}
|
||||
|
||||
type nvmlMemoryV2 struct {
|
||||
Version uint32
|
||||
Total uint64
|
||||
Reserved uint64
|
||||
Free uint64
|
||||
Used uint64
|
||||
}
|
||||
|
||||
type nvmlUtilization struct {
|
||||
Gpu uint32
|
||||
Memory uint32
|
||||
}
|
||||
|
||||
type nvmlPciInfo struct {
|
||||
BusId [16]byte
|
||||
Domain uint32
|
||||
Bus uint32
|
||||
Device uint32
|
||||
PciDeviceId uint32
|
||||
PciSubSystemId uint32
|
||||
}
|
||||
|
||||
// NVML function signatures
|
||||
var (
|
||||
nvmlInit func() nvmlReturn
|
||||
nvmlShutdown func() nvmlReturn
|
||||
nvmlDeviceGetCount func(count *uint32) nvmlReturn
|
||||
nvmlDeviceGetHandleByIndex func(index uint32, device *nvmlDevice) nvmlReturn
|
||||
nvmlDeviceGetName func(device nvmlDevice, name *byte, length uint32) nvmlReturn
|
||||
nvmlDeviceGetMemoryInfo func(device nvmlDevice, memory uintptr) nvmlReturn
|
||||
nvmlDeviceGetUtilizationRates func(device nvmlDevice, utilization *nvmlUtilization) nvmlReturn
|
||||
nvmlDeviceGetTemperature func(device nvmlDevice, sensorType int, temp *uint32) nvmlReturn
|
||||
nvmlDeviceGetPowerUsage func(device nvmlDevice, power *uint32) nvmlReturn
|
||||
nvmlDeviceGetPciInfo func(device nvmlDevice, pci *nvmlPciInfo) nvmlReturn
|
||||
nvmlErrorString func(result nvmlReturn) string
|
||||
)
|
||||
|
||||
type nvmlCollector struct {
|
||||
gm *GPUManager
|
||||
lib uintptr
|
||||
devices []nvmlDevice
|
||||
bdfs []string
|
||||
isV2 bool
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) init() error {
|
||||
slog.Debug("NVML: Initializing")
|
||||
libPath := getNVMLPath()
|
||||
|
||||
lib, err := openLibrary(libPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load %s: %w", libPath, err)
|
||||
}
|
||||
c.lib = lib
|
||||
|
||||
purego.RegisterLibFunc(&nvmlInit, lib, "nvmlInit")
|
||||
purego.RegisterLibFunc(&nvmlShutdown, lib, "nvmlShutdown")
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetCount, lib, "nvmlDeviceGetCount")
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetHandleByIndex, lib, "nvmlDeviceGetHandleByIndex")
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetName, lib, "nvmlDeviceGetName")
|
||||
// Try to get v2 memory info, fallback to v1 if not available
|
||||
if hasSymbol(lib, "nvmlDeviceGetMemoryInfo_v2") {
|
||||
c.isV2 = true
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetMemoryInfo, lib, "nvmlDeviceGetMemoryInfo_v2")
|
||||
} else {
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetMemoryInfo, lib, "nvmlDeviceGetMemoryInfo")
|
||||
}
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetUtilizationRates, lib, "nvmlDeviceGetUtilizationRates")
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetTemperature, lib, "nvmlDeviceGetTemperature")
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetPowerUsage, lib, "nvmlDeviceGetPowerUsage")
|
||||
purego.RegisterLibFunc(&nvmlDeviceGetPciInfo, lib, "nvmlDeviceGetPciInfo")
|
||||
purego.RegisterLibFunc(&nvmlErrorString, lib, "nvmlErrorString")
|
||||
|
||||
if ret := nvmlInit(); ret != nvmlReturn(nvmlSuccess) {
|
||||
return fmt.Errorf("nvmlInit failed: %v", ret)
|
||||
}
|
||||
|
||||
var count uint32
|
||||
if ret := nvmlDeviceGetCount(&count); ret != nvmlReturn(nvmlSuccess) {
|
||||
return fmt.Errorf("nvmlDeviceGetCount failed: %v", ret)
|
||||
}
|
||||
|
||||
for i := uint32(0); i < count; i++ {
|
||||
var device nvmlDevice
|
||||
if ret := nvmlDeviceGetHandleByIndex(i, &device); ret == nvmlReturn(nvmlSuccess) {
|
||||
c.devices = append(c.devices, device)
|
||||
// Get BDF for power state check
|
||||
var pci nvmlPciInfo
|
||||
if ret := nvmlDeviceGetPciInfo(device, &pci); ret == nvmlReturn(nvmlSuccess) {
|
||||
busID := string(pci.BusId[:])
|
||||
if idx := strings.Index(busID, "\x00"); idx != -1 {
|
||||
busID = busID[:idx]
|
||||
}
|
||||
c.bdfs = append(c.bdfs, strings.ToLower(busID))
|
||||
} else {
|
||||
c.bdfs = append(c.bdfs, "")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) start() {
|
||||
defer nvmlShutdown()
|
||||
ticker := time.Tick(3 * time.Second)
|
||||
|
||||
for range ticker {
|
||||
c.collect()
|
||||
}
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) collect() {
|
||||
c.gm.Lock()
|
||||
defer c.gm.Unlock()
|
||||
|
||||
for i, device := range c.devices {
|
||||
id := fmt.Sprintf("%d", i)
|
||||
bdf := c.bdfs[i]
|
||||
|
||||
// Update GPUDataMap
|
||||
if _, ok := c.gm.GpuDataMap[id]; !ok {
|
||||
var nameBuf [64]byte
|
||||
if ret := nvmlDeviceGetName(device, &nameBuf[0], 64); ret != nvmlReturn(nvmlSuccess) {
|
||||
continue
|
||||
}
|
||||
name := string(nameBuf[:strings.Index(string(nameBuf[:]), "\x00")])
|
||||
name = strings.TrimPrefix(name, "NVIDIA ")
|
||||
c.gm.GpuDataMap[id] = &system.GPUData{Name: strings.TrimSuffix(name, " Laptop GPU")}
|
||||
}
|
||||
gpu := c.gm.GpuDataMap[id]
|
||||
|
||||
if bdf != "" && !c.isGPUActive(bdf) {
|
||||
slog.Debug("NVML: GPU is suspended, skipping", "bdf", bdf)
|
||||
gpu.Temperature = 0
|
||||
gpu.MemoryUsed = 0
|
||||
continue
|
||||
}
|
||||
|
||||
// Utilization
|
||||
var utilization nvmlUtilization
|
||||
if ret := nvmlDeviceGetUtilizationRates(device, &utilization); ret != nvmlReturn(nvmlSuccess) {
|
||||
slog.Debug("NVML: Utilization failed (GPU likely suspended)", "bdf", bdf, "ret", ret)
|
||||
gpu.Temperature = 0
|
||||
gpu.MemoryUsed = 0
|
||||
continue
|
||||
}
|
||||
|
||||
slog.Debug("NVML: Collecting data for GPU", "bdf", bdf)
|
||||
|
||||
// Temperature
|
||||
var temp uint32
|
||||
nvmlDeviceGetTemperature(device, 0, &temp) // 0 is NVML_TEMPERATURE_GPU
|
||||
|
||||
// Memory: only poll if GPU is active to avoid leaving D3cold state (#1522)
|
||||
if utilization.Gpu > 0 {
|
||||
var usedMem, totalMem uint64
|
||||
if c.isV2 {
|
||||
var memory nvmlMemoryV2
|
||||
memory.Version = 0x02000028 // (2 << 24) | 40 bytes
|
||||
if ret := nvmlDeviceGetMemoryInfo(device, uintptr(unsafe.Pointer(&memory))); ret != nvmlReturn(nvmlSuccess) {
|
||||
slog.Debug("NVML: MemoryInfo_v2 failed", "bdf", bdf, "ret", ret)
|
||||
} else {
|
||||
usedMem = memory.Used
|
||||
totalMem = memory.Total
|
||||
}
|
||||
} else {
|
||||
var memory nvmlMemoryV1
|
||||
if ret := nvmlDeviceGetMemoryInfo(device, uintptr(unsafe.Pointer(&memory))); ret != nvmlReturn(nvmlSuccess) {
|
||||
slog.Debug("NVML: MemoryInfo failed", "bdf", bdf, "ret", ret)
|
||||
} else {
|
||||
usedMem = memory.Used
|
||||
totalMem = memory.Total
|
||||
}
|
||||
}
|
||||
if totalMem > 0 {
|
||||
gpu.MemoryUsed = float64(usedMem) / 1024 / 1024 / mebibytesInAMegabyte
|
||||
gpu.MemoryTotal = float64(totalMem) / 1024 / 1024 / mebibytesInAMegabyte
|
||||
}
|
||||
} else {
|
||||
slog.Debug("NVML: Skipping memory info (utilization=0)", "bdf", bdf)
|
||||
}
|
||||
|
||||
// Power
|
||||
var power uint32
|
||||
nvmlDeviceGetPowerUsage(device, &power)
|
||||
|
||||
gpu.Temperature = float64(temp)
|
||||
gpu.Usage += float64(utilization.Gpu)
|
||||
gpu.Power += float64(power) / 1000.0
|
||||
gpu.Count++
|
||||
slog.Debug("NVML: Collected data", "gpu", gpu)
|
||||
}
|
||||
}
|
||||
57
agent/gpu_nvml_linux.go
Normal file
57
agent/gpu_nvml_linux.go
Normal file
@@ -0,0 +1,57 @@
|
||||
//go:build glibc && linux && amd64
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/ebitengine/purego"
|
||||
)
|
||||
|
||||
func openLibrary(name string) (uintptr, error) {
|
||||
return purego.Dlopen(name, purego.RTLD_NOW|purego.RTLD_GLOBAL)
|
||||
}
|
||||
|
||||
func getNVMLPath() string {
|
||||
return "libnvidia-ml.so.1"
|
||||
}
|
||||
|
||||
func hasSymbol(lib uintptr, symbol string) bool {
|
||||
_, err := purego.Dlsym(lib, symbol)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) isGPUActive(bdf string) bool {
|
||||
// runtime_status
|
||||
statusPath := filepath.Join("/sys/bus/pci/devices", bdf, "power/runtime_status")
|
||||
status, err := os.ReadFile(statusPath)
|
||||
if err != nil {
|
||||
slog.Debug("NVML: Can't read runtime_status", "bdf", bdf, "err", err)
|
||||
return true // Assume active if we can't read status
|
||||
}
|
||||
statusStr := strings.TrimSpace(string(status))
|
||||
if statusStr != "active" && statusStr != "resuming" {
|
||||
slog.Debug("NVML: GPU not active", "bdf", bdf, "status", statusStr)
|
||||
return false
|
||||
}
|
||||
|
||||
// power_state (D0 check)
|
||||
// Find any drm card device power_state
|
||||
pstatePathPattern := filepath.Join("/sys/bus/pci/devices", bdf, "drm/card*/device/power_state")
|
||||
matches, _ := filepath.Glob(pstatePathPattern)
|
||||
if len(matches) > 0 {
|
||||
pstate, err := os.ReadFile(matches[0])
|
||||
if err == nil {
|
||||
pstateStr := strings.TrimSpace(string(pstate))
|
||||
if pstateStr != "D0" {
|
||||
slog.Debug("NVML: GPU not in D0 state", "bdf", bdf, "pstate", pstateStr)
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
15
agent/gpu_nvml_unsupported.go
Normal file
15
agent/gpu_nvml_unsupported.go
Normal file
@@ -0,0 +1,15 @@
|
||||
//go:build (!linux && !windows) || !amd64 || (linux && !glibc)
|
||||
|
||||
package agent
|
||||
|
||||
import "fmt"
|
||||
|
||||
type nvmlCollector struct {
|
||||
gm *GPUManager
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) init() error {
|
||||
return fmt.Errorf("nvml not supported on this platform")
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) start() {}
|
||||
25
agent/gpu_nvml_windows.go
Normal file
25
agent/gpu_nvml_windows.go
Normal file
@@ -0,0 +1,25 @@
|
||||
//go:build windows && amd64
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
func openLibrary(name string) (uintptr, error) {
|
||||
handle, err := windows.LoadLibrary(name)
|
||||
return uintptr(handle), err
|
||||
}
|
||||
|
||||
func getNVMLPath() string {
|
||||
return "nvml.dll"
|
||||
}
|
||||
|
||||
func hasSymbol(lib uintptr, symbol string) bool {
|
||||
_, err := windows.GetProcAddress(windows.Handle(lib), symbol)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) isGPUActive(bdf string) bool {
|
||||
return true
|
||||
}
|
||||
160
agent/gpu_nvtop.go
Normal file
160
agent/gpu_nvtop.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
type nvtopSnapshot struct {
|
||||
DeviceName string `json:"device_name"`
|
||||
Temp *string `json:"temp"`
|
||||
PowerDraw *string `json:"power_draw"`
|
||||
GpuUtil *string `json:"gpu_util"`
|
||||
MemTotal *string `json:"mem_total"`
|
||||
MemUsed *string `json:"mem_used"`
|
||||
}
|
||||
|
||||
// parseNvtopNumber parses nvtop numeric strings with units (C/W/%).
|
||||
func parseNvtopNumber(raw string) float64 {
|
||||
cleaned := strings.TrimSpace(raw)
|
||||
cleaned = strings.TrimSuffix(cleaned, "C")
|
||||
cleaned = strings.TrimSuffix(cleaned, "W")
|
||||
cleaned = strings.TrimSuffix(cleaned, "%")
|
||||
val, _ := strconv.ParseFloat(cleaned, 64)
|
||||
return val
|
||||
}
|
||||
|
||||
// parseNvtopData parses a single nvtop JSON snapshot payload.
|
||||
func (gm *GPUManager) parseNvtopData(output []byte) bool {
|
||||
var snapshots []nvtopSnapshot
|
||||
if err := json.Unmarshal(output, &snapshots); err != nil || len(snapshots) == 0 {
|
||||
return false
|
||||
}
|
||||
return gm.updateNvtopSnapshots(snapshots)
|
||||
}
|
||||
|
||||
// updateNvtopSnapshots applies one decoded nvtop snapshot batch to GPU accumulators.
|
||||
func (gm *GPUManager) updateNvtopSnapshots(snapshots []nvtopSnapshot) bool {
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
|
||||
valid := false
|
||||
usedIDs := make(map[string]struct{}, len(snapshots))
|
||||
for i, sample := range snapshots {
|
||||
if sample.DeviceName == "" {
|
||||
continue
|
||||
}
|
||||
indexID := "n" + strconv.Itoa(i)
|
||||
id := indexID
|
||||
|
||||
// nvtop ordering can change, so prefer reusing an existing slot with matching device name.
|
||||
if existingByIndex, ok := gm.GpuDataMap[indexID]; ok && existingByIndex.Name != "" && existingByIndex.Name != sample.DeviceName {
|
||||
for existingID, gpu := range gm.GpuDataMap {
|
||||
if !strings.HasPrefix(existingID, "n") {
|
||||
continue
|
||||
}
|
||||
if _, taken := usedIDs[existingID]; taken {
|
||||
continue
|
||||
}
|
||||
if gpu.Name == sample.DeviceName {
|
||||
id = existingID
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := gm.GpuDataMap[id]; !ok {
|
||||
gm.GpuDataMap[id] = &system.GPUData{Name: sample.DeviceName}
|
||||
}
|
||||
gpu := gm.GpuDataMap[id]
|
||||
gpu.Name = sample.DeviceName
|
||||
|
||||
if sample.Temp != nil {
|
||||
gpu.Temperature = parseNvtopNumber(*sample.Temp)
|
||||
}
|
||||
if sample.MemUsed != nil {
|
||||
gpu.MemoryUsed = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
|
||||
}
|
||||
if sample.MemTotal != nil {
|
||||
gpu.MemoryTotal = utils.BytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
|
||||
}
|
||||
if sample.GpuUtil != nil {
|
||||
gpu.Usage += parseNvtopNumber(*sample.GpuUtil)
|
||||
}
|
||||
if sample.PowerDraw != nil {
|
||||
gpu.Power += parseNvtopNumber(*sample.PowerDraw)
|
||||
}
|
||||
gpu.Count++
|
||||
usedIDs[id] = struct{}{}
|
||||
valid = true
|
||||
}
|
||||
return valid
|
||||
}
|
||||
|
||||
// collectNvtopStats runs nvtop loop mode and continuously decodes JSON snapshots.
|
||||
func (gm *GPUManager) collectNvtopStats(interval string) error {
|
||||
cmd := exec.Command(nvtopCmd, "-lP", "-d", interval)
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
_ = stdout.Close()
|
||||
if cmd.ProcessState == nil || !cmd.ProcessState.Exited() {
|
||||
_ = cmd.Process.Kill()
|
||||
}
|
||||
_ = cmd.Wait()
|
||||
}()
|
||||
|
||||
decoder := json.NewDecoder(stdout)
|
||||
foundValid := false
|
||||
for {
|
||||
var snapshots []nvtopSnapshot
|
||||
if err := decoder.Decode(&snapshots); err != nil {
|
||||
if err == io.EOF {
|
||||
if foundValid {
|
||||
return nil
|
||||
}
|
||||
return errNoValidData
|
||||
}
|
||||
return err
|
||||
}
|
||||
if gm.updateNvtopSnapshots(snapshots) {
|
||||
foundValid = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// startNvtopCollector starts nvtop collection with retry or fallback callback handling.
|
||||
func (gm *GPUManager) startNvtopCollector(interval string, onFailure func()) {
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := gm.collectNvtopStats(interval); err != nil {
|
||||
if onFailure != nil {
|
||||
slog.Warn("Error collecting GPU data via nvtop", "err", err)
|
||||
onFailure()
|
||||
return
|
||||
}
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting GPU data via nvtop", "err", err)
|
||||
time.Sleep(retryWaitTime)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
@@ -11,6 +10,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -250,6 +250,100 @@ func TestParseAmdData(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNvtopData(t *testing.T) {
|
||||
input, err := os.ReadFile("test-data/nvtop.json")
|
||||
require.NoError(t, err)
|
||||
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
valid := gm.parseNvtopData(input)
|
||||
require.True(t, valid)
|
||||
|
||||
g0, ok := gm.GpuDataMap["n0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "NVIDIA GeForce RTX 3050 Ti Laptop GPU", g0.Name)
|
||||
assert.Equal(t, 48.0, g0.Temperature)
|
||||
assert.Equal(t, 5.0, g0.Usage)
|
||||
assert.Equal(t, 13.0, g0.Power)
|
||||
assert.Equal(t, utils.BytesToMegabytes(349372416), g0.MemoryUsed)
|
||||
assert.Equal(t, utils.BytesToMegabytes(4294967296), g0.MemoryTotal)
|
||||
assert.Equal(t, 1.0, g0.Count)
|
||||
|
||||
g1, ok := gm.GpuDataMap["n1"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "AMD Radeon 680M", g1.Name)
|
||||
assert.Equal(t, 48.0, g1.Temperature)
|
||||
assert.Equal(t, 12.0, g1.Usage)
|
||||
assert.Equal(t, 9.0, g1.Power)
|
||||
assert.Equal(t, utils.BytesToMegabytes(1213784064), g1.MemoryUsed)
|
||||
assert.Equal(t, utils.BytesToMegabytes(16929173504), g1.MemoryTotal)
|
||||
assert.Equal(t, 1.0, g1.Count)
|
||||
}
|
||||
|
||||
func TestUpdateNvtopSnapshotsKeepsDeviceAssociationWhenOrderChanges(t *testing.T) {
|
||||
strPtr := func(s string) *string { return &s }
|
||||
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
|
||||
firstBatch := []nvtopSnapshot{
|
||||
{
|
||||
DeviceName: "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
||||
GpuUtil: strPtr("20%"),
|
||||
PowerDraw: strPtr("10W"),
|
||||
},
|
||||
{
|
||||
DeviceName: "AMD Radeon 680M",
|
||||
GpuUtil: strPtr("30%"),
|
||||
PowerDraw: strPtr("20W"),
|
||||
},
|
||||
}
|
||||
secondBatchSwapped := []nvtopSnapshot{
|
||||
{
|
||||
DeviceName: "AMD Radeon 680M",
|
||||
GpuUtil: strPtr("40%"),
|
||||
PowerDraw: strPtr("25W"),
|
||||
},
|
||||
{
|
||||
DeviceName: "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
||||
GpuUtil: strPtr("50%"),
|
||||
PowerDraw: strPtr("15W"),
|
||||
},
|
||||
}
|
||||
|
||||
require.True(t, gm.updateNvtopSnapshots(firstBatch))
|
||||
require.True(t, gm.updateNvtopSnapshots(secondBatchSwapped))
|
||||
|
||||
nvidia := gm.GpuDataMap["n0"]
|
||||
require.NotNil(t, nvidia)
|
||||
assert.Equal(t, "NVIDIA GeForce RTX 3050 Ti Laptop GPU", nvidia.Name)
|
||||
assert.Equal(t, 70.0, nvidia.Usage)
|
||||
assert.Equal(t, 25.0, nvidia.Power)
|
||||
assert.Equal(t, 2.0, nvidia.Count)
|
||||
|
||||
amd := gm.GpuDataMap["n1"]
|
||||
require.NotNil(t, amd)
|
||||
assert.Equal(t, "AMD Radeon 680M", amd.Name)
|
||||
assert.Equal(t, 70.0, amd.Usage)
|
||||
assert.Equal(t, 45.0, amd.Power)
|
||||
assert.Equal(t, 2.0, amd.Count)
|
||||
}
|
||||
|
||||
func TestParseCollectorPriority(t *testing.T) {
|
||||
got := parseCollectorPriority(" nvml, nvidia-smi, intel_gpu_top, amd_sysfs, nvtop, rocm-smi, bad ")
|
||||
want := []collectorSource{
|
||||
collectorSourceNVML,
|
||||
collectorSourceNvidiaSMI,
|
||||
collectorSourceIntelGpuTop,
|
||||
collectorSourceAmdSysfs,
|
||||
collectorSourceNVTop,
|
||||
collectorSourceRocmSMI,
|
||||
}
|
||||
assert.Equal(t, want, got)
|
||||
}
|
||||
|
||||
func TestParseJetsonData(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -307,6 +401,19 @@ func TestParseJetsonData(t *testing.T) {
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "orin-style output with GPU@ temp and VDD_SYS_GPU power",
|
||||
input: "RAM 3276/7859MB (lfb 5x4MB) SWAP 1626/12122MB (cached 181MB) CPU [44%@1421,49%@2031,67%@2034,17%@1420,25%@1419,8%@1420] EMC_FREQ 1%@1866 GR3D_FREQ 0%@114 APE 150 MTS fg 1% bg 1% PLL@42.5C MCPU@42.5C PMIC@50C Tboard@38C GPU@39.5C BCPU@42.5C thermal@41.3C Tdiode@39.25C VDD_SYS_GPU 182/182 VDD_SYS_SOC 730/730 VDD_4V0_WIFI 0/0 VDD_IN 5297/5297 VDD_SYS_CPU 1917/1917 VDD_SYS_DDR 1241/1241",
|
||||
wantMetrics: &system.GPUData{
|
||||
Name: "GPU",
|
||||
MemoryUsed: 3276.0,
|
||||
MemoryTotal: 7859.0,
|
||||
Usage: 0.0,
|
||||
Power: 0.182, // 182mW -> 0.182W
|
||||
Temperature: 39.5,
|
||||
Count: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -825,7 +932,7 @@ func TestInitializeSnapshots(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCalculateGPUAverage(t *testing.T) {
|
||||
t.Run("returns zero value when deltaCount is zero", func(t *testing.T) {
|
||||
t.Run("returns cached average when deltaCount is zero", func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
lastSnapshots: map[uint16]map[string]*gpuSnapshot{
|
||||
5000: {
|
||||
@@ -841,6 +948,7 @@ func TestCalculateGPUAverage(t *testing.T) {
|
||||
Count: 10.0, // Same as snapshot, so delta = 0
|
||||
Usage: 100.0,
|
||||
Power: 200.0,
|
||||
Temperature: 50.0, // Non-zero to avoid "suspended" check
|
||||
}
|
||||
|
||||
result := gm.calculateGPUAverage("0", gpu, 5000)
|
||||
@@ -849,6 +957,31 @@ func TestCalculateGPUAverage(t *testing.T) {
|
||||
assert.Equal(t, 100.0, result.Power, "Should return cached average")
|
||||
})
|
||||
|
||||
t.Run("returns zero value when GPU is suspended", func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
lastSnapshots: map[uint16]map[string]*gpuSnapshot{
|
||||
5000: {
|
||||
"0": {count: 10, usage: 100, power: 200},
|
||||
},
|
||||
},
|
||||
lastAvgData: map[string]system.GPUData{
|
||||
"0": {Usage: 50.0, Power: 100.0},
|
||||
},
|
||||
}
|
||||
|
||||
gpu := &system.GPUData{
|
||||
Name: "Test GPU",
|
||||
Count: 10.0,
|
||||
Temperature: 0,
|
||||
MemoryUsed: 0,
|
||||
}
|
||||
|
||||
result := gm.calculateGPUAverage("0", gpu, 5000)
|
||||
|
||||
assert.Equal(t, 0.0, result.Usage, "Should return zero usage")
|
||||
assert.Equal(t, 0.0, result.Power, "Should return zero power")
|
||||
})
|
||||
|
||||
t.Run("calculates average for standard GPU", func(t *testing.T) {
|
||||
gm := &GPUManager{
|
||||
lastSnapshots: map[uint16]map[string]*gpuSnapshot{
|
||||
@@ -948,36 +1081,35 @@ func TestCalculateGPUAverage(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestDetectGPUs(t *testing.T) {
|
||||
func TestGPUCapabilitiesAndLegacyPriority(t *testing.T) {
|
||||
// Save original PATH
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
// Set up temp dir with the commands
|
||||
tempDir := t.TempDir()
|
||||
os.Setenv("PATH", tempDir)
|
||||
hasAmdSysfs := (&GPUManager{}).hasAmdSysfs()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
setupCommands func() error
|
||||
setupCommands func(string) error
|
||||
wantNvidiaSmi bool
|
||||
wantRocmSmi bool
|
||||
wantTegrastats bool
|
||||
wantNvtop bool
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "nvidia-smi not available",
|
||||
setupCommands: func() error {
|
||||
setupCommands: func(_ string) error {
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: false,
|
||||
wantTegrastats: false,
|
||||
wantNvtop: false,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "nvidia-smi available",
|
||||
setupCommands: func() error {
|
||||
setupCommands: func(tempDir string) error {
|
||||
path := filepath.Join(tempDir, "nvidia-smi")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
@@ -989,29 +1121,14 @@ echo "test"`
|
||||
wantNvidiaSmi: true,
|
||||
wantTegrastats: false,
|
||||
wantRocmSmi: false,
|
||||
wantNvtop: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "rocm-smi available",
|
||||
setupCommands: func() error {
|
||||
setupCommands: func(tempDir string) error {
|
||||
path := filepath.Join(tempDir, "rocm-smi")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: true,
|
||||
wantRocmSmi: true,
|
||||
wantTegrastats: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "tegrastats available",
|
||||
setupCommands: func() error {
|
||||
path := filepath.Join(tempDir, "tegrastats")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
@@ -1020,12 +1137,47 @@ echo "test"`
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: true,
|
||||
wantTegrastats: false,
|
||||
wantNvtop: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "tegrastats available",
|
||||
setupCommands: func(tempDir string) error {
|
||||
path := filepath.Join(tempDir, "tegrastats")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: false,
|
||||
wantTegrastats: true,
|
||||
wantNvtop: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "nvtop available",
|
||||
setupCommands: func(tempDir string) error {
|
||||
path := filepath.Join(tempDir, "nvtop")
|
||||
script := `#!/bin/sh
|
||||
echo "[]"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: false,
|
||||
wantTegrastats: false,
|
||||
wantNvtop: true,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "no gpu tools available",
|
||||
setupCommands: func() error {
|
||||
setupCommands: func(_ string) error {
|
||||
os.Setenv("PATH", "")
|
||||
return nil
|
||||
},
|
||||
@@ -1035,29 +1187,53 @@ echo "test"`
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.setupCommands(); err != nil {
|
||||
tempDir := t.TempDir()
|
||||
os.Setenv("PATH", tempDir)
|
||||
if err := tt.setupCommands(tempDir); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
gm := &GPUManager{}
|
||||
err := gm.detectGPUs()
|
||||
caps := gm.discoverGpuCapabilities()
|
||||
var err error
|
||||
if !hasAnyGpuCollector(caps) {
|
||||
err = fmt.Errorf(noGPUFoundMsg)
|
||||
}
|
||||
priorities := gm.resolveLegacyCollectorPriority(caps)
|
||||
hasPriority := func(source collectorSource) bool {
|
||||
for _, s := range priorities {
|
||||
if s == source {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
gotNvidiaSmi := hasPriority(collectorSourceNvidiaSMI)
|
||||
gotRocmSmi := hasPriority(collectorSourceRocmSMI)
|
||||
gotTegrastats := caps.hasTegrastats
|
||||
gotNvtop := caps.hasNvtop
|
||||
|
||||
t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gm.nvidiaSmi, gm.rocmSmi, gm.tegrastats)
|
||||
t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gotNvidiaSmi, gotRocmSmi, gotTegrastats)
|
||||
|
||||
if tt.wantErr {
|
||||
wantErr := tt.wantErr
|
||||
if hasAmdSysfs && (tt.name == "nvidia-smi not available" || tt.name == "no gpu tools available") {
|
||||
wantErr = false
|
||||
}
|
||||
if wantErr {
|
||||
assert.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.wantNvidiaSmi, gm.nvidiaSmi)
|
||||
assert.Equal(t, tt.wantRocmSmi, gm.rocmSmi)
|
||||
assert.Equal(t, tt.wantTegrastats, gm.tegrastats)
|
||||
assert.Equal(t, tt.wantNvidiaSmi, gotNvidiaSmi)
|
||||
assert.Equal(t, tt.wantRocmSmi, gotRocmSmi)
|
||||
assert.Equal(t, tt.wantTegrastats, gotTegrastats)
|
||||
assert.Equal(t, tt.wantNvtop, gotNvtop)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartCollector(t *testing.T) {
|
||||
func TestCollectorStartHelpers(t *testing.T) {
|
||||
// Save original PATH
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
@@ -1142,6 +1318,27 @@ echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000m
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "nvtop collector",
|
||||
command: "nvtop",
|
||||
setup: func(t *testing.T) error {
|
||||
path := filepath.Join(dir, "nvtop")
|
||||
script := `#!/bin/sh
|
||||
echo '[{"device_name":"NVIDIA Test GPU","temp":"52C","power_draw":"31W","gpu_util":"37%","mem_total":"4294967296","mem_used":"536870912","processes":[]}]'`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
validate: func(t *testing.T, gm *GPUManager) {
|
||||
gpu, exists := gm.GpuDataMap["n0"]
|
||||
assert.True(t, exists)
|
||||
if exists {
|
||||
assert.Equal(t, "NVIDIA Test GPU", gpu.Name)
|
||||
assert.Equal(t, 52.0, gpu.Temperature)
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -1154,13 +1351,157 @@ echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000m
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
}
|
||||
tt.gm.startCollector(tt.command)
|
||||
switch tt.command {
|
||||
case nvidiaSmiCmd:
|
||||
tt.gm.startNvidiaSmiCollector("4")
|
||||
case rocmSmiCmd:
|
||||
tt.gm.startRocmSmiCollector(4300 * time.Millisecond)
|
||||
case tegraStatsCmd:
|
||||
tt.gm.startTegraStatsCollector("3700")
|
||||
case nvtopCmd:
|
||||
tt.gm.startNvtopCollector("30", nil)
|
||||
default:
|
||||
t.Fatalf("unknown test command %q", tt.command)
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond) // Give collector time to run
|
||||
tt.validate(t, tt.gm)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewGPUManagerPriorityNvtopFallback(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvtop,nvidia-smi")
|
||||
|
||||
nvtopPath := filepath.Join(dir, "nvtop")
|
||||
nvtopScript := `#!/bin/sh
|
||||
echo 'not-json'`
|
||||
require.NoError(t, os.WriteFile(nvtopPath, []byte(nvtopScript), 0755))
|
||||
|
||||
nvidiaPath := filepath.Join(dir, "nvidia-smi")
|
||||
nvidiaScript := `#!/bin/sh
|
||||
echo "0, NVIDIA Priority GPU, 45, 512, 2048, 12, 25"`
|
||||
require.NoError(t, os.WriteFile(nvidiaPath, []byte(nvidiaScript), 0755))
|
||||
|
||||
gm, err := NewGPUManager()
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gm)
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
gpu, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "Priority GPU", gpu.Name)
|
||||
assert.Equal(t, 45.0, gpu.Temperature)
|
||||
}
|
||||
|
||||
func TestNewGPUManagerPriorityMixedCollectors(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "intel_gpu_top,rocm-smi")
|
||||
|
||||
intelPath := filepath.Join(dir, "intel_gpu_top")
|
||||
intelScript := `#!/bin/sh
|
||||
echo "Freq MHz IRQ RC6 Power W IMC MiB/s RCS VCS"
|
||||
echo " req act /s % gpu pkg rd wr % se wa % se wa"
|
||||
echo "226 223 338 58 2.00 2.69 1820 965 0.00 0 0 0.00 0 0"
|
||||
echo "189 187 412 67 1.80 2.45 1950 823 8.50 2 1 15.00 1 0"
|
||||
`
|
||||
require.NoError(t, os.WriteFile(intelPath, []byte(intelScript), 0755))
|
||||
|
||||
rocmPath := filepath.Join(dir, "rocm-smi")
|
||||
rocmScript := `#!/bin/sh
|
||||
echo '{"card0": {"Temperature (Sensor edge) (C)": "49.0", "Current Socket Graphics Package Power (W)": "28.159", "GPU use (%)": "0", "VRAM Total Memory (B)": "536870912", "VRAM Total Used Memory (B)": "445550592", "Card Series": "Rembrandt [Radeon 680M]", "GUID": "34756"}}'
|
||||
`
|
||||
require.NoError(t, os.WriteFile(rocmPath, []byte(rocmScript), 0755))
|
||||
|
||||
gm, err := NewGPUManager()
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gm)
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
_, intelOk := gm.GpuDataMap["i0"]
|
||||
_, amdOk := gm.GpuDataMap["34756"]
|
||||
assert.True(t, intelOk)
|
||||
assert.True(t, amdOk)
|
||||
}
|
||||
|
||||
func TestNewGPUManagerPriorityNvmlFallbackToNvidiaSmi(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvml,nvidia-smi")
|
||||
|
||||
nvidiaPath := filepath.Join(dir, "nvidia-smi")
|
||||
nvidiaScript := `#!/bin/sh
|
||||
echo "0, NVIDIA Fallback GPU, 41, 256, 1024, 8, 14"`
|
||||
require.NoError(t, os.WriteFile(nvidiaPath, []byte(nvidiaScript), 0755))
|
||||
|
||||
gm, err := NewGPUManager()
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gm)
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
gpu, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "Fallback GPU", gpu.Name)
|
||||
}
|
||||
|
||||
func TestNewGPUManagerConfiguredCollectorsMustStart(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
|
||||
t.Run("configured valid collector unavailable", func(t *testing.T) {
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
|
||||
gm, err := NewGPUManager()
|
||||
require.Nil(t, gm)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "no configured GPU collectors are available")
|
||||
})
|
||||
|
||||
t.Run("configured collector list has only unknown entries", func(t *testing.T) {
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "bad,unknown")
|
||||
gm, err := NewGPUManager()
|
||||
require.Nil(t, gm)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "no configured GPU collectors are available")
|
||||
})
|
||||
}
|
||||
|
||||
func TestNewGPUManagerJetsonIgnoresCollectorConfig(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
|
||||
|
||||
tegraPath := filepath.Join(dir, "tegrastats")
|
||||
tegraScript := `#!/bin/sh
|
||||
echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000mW"`
|
||||
require.NoError(t, os.WriteFile(tegraPath, []byte(tegraScript), 0755))
|
||||
|
||||
gm, err := NewGPUManager()
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gm)
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
gpu, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "GPU", gpu.Name)
|
||||
}
|
||||
|
||||
// TestAccumulationTableDriven tests the accumulation behavior for all three GPU types
|
||||
func TestAccumulation(t *testing.T) {
|
||||
type expectedGPUValues struct {
|
||||
@@ -1346,7 +1687,7 @@ func TestIntelUpdateFromStats(t *testing.T) {
|
||||
ok := gm.updateIntelFromStats(&sample1)
|
||||
assert.True(t, ok)
|
||||
|
||||
gpu := gm.GpuDataMap["0"]
|
||||
gpu := gm.GpuDataMap["i0"]
|
||||
require.NotNil(t, gpu)
|
||||
assert.Equal(t, "GPU", gpu.Name)
|
||||
assert.EqualValues(t, 10.5, gpu.Power)
|
||||
@@ -1368,7 +1709,7 @@ func TestIntelUpdateFromStats(t *testing.T) {
|
||||
ok = gm.updateIntelFromStats(&sample2)
|
||||
assert.True(t, ok)
|
||||
|
||||
gpu = gm.GpuDataMap["0"]
|
||||
gpu = gm.GpuDataMap["i0"]
|
||||
require.NotNil(t, gpu)
|
||||
assert.EqualValues(t, 10.5, gpu.Power)
|
||||
assert.EqualValues(t, 30.0, gpu.Engines["Render/3D"]) // 20 + 10
|
||||
@@ -1407,7 +1748,7 @@ echo "298 295 278 51 2.20 3.12 1675 942 5.75 1 2 9.50
|
||||
t.Fatalf("collectIntelStats error: %v", err)
|
||||
}
|
||||
|
||||
gpu := gm.GpuDataMap["0"]
|
||||
gpu := gm.GpuDataMap["i0"]
|
||||
require.NotNil(t, gpu)
|
||||
// Power should be sum of samples 2-4 (first is skipped): 2.0 + 1.8 + 2.2 = 6.0
|
||||
assert.EqualValues(t, 6.0, gpu.Power)
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
|
||||
"golang.org/x/exp/slog"
|
||||
"log/slog"
|
||||
)
|
||||
|
||||
// HandlerContext provides context for request handlers
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
|
||||
@@ -9,11 +9,31 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"time"
|
||||
)
|
||||
|
||||
// healthFile is the path to the health file
|
||||
var healthFile = filepath.Join(os.TempDir(), "beszel_health")
|
||||
var healthFile = getHealthFilePath()
|
||||
|
||||
func getHealthFilePath() string {
|
||||
filename := "beszel_health"
|
||||
if runtime.GOOS == "linux" {
|
||||
fullPath := filepath.Join("/dev/shm", filename)
|
||||
if err := updateHealthFile(fullPath); err == nil {
|
||||
return fullPath
|
||||
}
|
||||
}
|
||||
return filepath.Join(os.TempDir(), filename)
|
||||
}
|
||||
|
||||
func updateHealthFile(path string) error {
|
||||
file, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return file.Close()
|
||||
}
|
||||
|
||||
// Check checks if the agent is connected by checking the modification time of the health file
|
||||
func Check() error {
|
||||
@@ -30,11 +50,7 @@ func Check() error {
|
||||
|
||||
// Update updates the modification time of the health file
|
||||
func Update() error {
|
||||
file, err := os.Create(healthFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return file.Close()
|
||||
return updateHealthFile(healthFile)
|
||||
}
|
||||
|
||||
// CleanUp removes the health file
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package health
|
||||
|
||||
@@ -37,7 +36,6 @@ func TestHealth(t *testing.T) {
|
||||
})
|
||||
|
||||
// This test uses synctest to simulate time passing.
|
||||
// NOTE: This test requires GOEXPERIMENT=synctest to run.
|
||||
t.Run("check with simulated time", func(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
// Update the file to set the initial timestamp.
|
||||
|
||||
@@ -52,7 +52,12 @@ class Program
|
||||
foreach (var sensor in hardware.Sensors)
|
||||
{
|
||||
var validTemp = sensor.SensorType == SensorType.Temperature && sensor.Value.HasValue;
|
||||
if (!validTemp || sensor.Name.Contains("Distance"))
|
||||
if (!validTemp ||
|
||||
sensor.Name.IndexOf("Distance", StringComparison.OrdinalIgnoreCase) >= 0 ||
|
||||
sensor.Name.IndexOf("Limit", StringComparison.OrdinalIgnoreCase) >= 0 ||
|
||||
sensor.Name.IndexOf("Critical", StringComparison.OrdinalIgnoreCase) >= 0 ||
|
||||
sensor.Name.IndexOf("Warning", StringComparison.OrdinalIgnoreCase) >= 0 ||
|
||||
sensor.Name.IndexOf("Resolution", StringComparison.OrdinalIgnoreCase) >= 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -3,9 +3,11 @@
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net48</TargetFramework>
|
||||
<Platforms>x64</Platforms>
|
||||
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
|
||||
<AppendRuntimeIdentifierToOutputPath>false</AppendRuntimeIdentifierToOutputPath>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="LibreHardwareMonitorLib" Version="0.9.4" />
|
||||
<PackageReference Include="LibreHardwareMonitorLib" Version="0.9.5" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
||||
233
agent/mdraid_linux.go
Normal file
233
agent/mdraid_linux.go
Normal file
@@ -0,0 +1,233 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
// mdraidSysfsRoot is a test hook; production value is "/sys".
|
||||
var mdraidSysfsRoot = "/sys"
|
||||
|
||||
type mdraidHealth struct {
|
||||
level string
|
||||
arrayState string
|
||||
degraded uint64
|
||||
raidDisks uint64
|
||||
syncAction string
|
||||
syncCompleted string
|
||||
syncSpeed string
|
||||
mismatchCnt uint64
|
||||
capacity uint64
|
||||
}
|
||||
|
||||
// scanMdraidDevices discovers Linux md arrays exposed in sysfs.
|
||||
func scanMdraidDevices() []*DeviceInfo {
|
||||
blockDir := filepath.Join(mdraidSysfsRoot, "block")
|
||||
entries, err := os.ReadDir(blockDir)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := make([]*DeviceInfo, 0, 2)
|
||||
for _, ent := range entries {
|
||||
name := ent.Name()
|
||||
if !isMdraidBlockName(name) {
|
||||
continue
|
||||
}
|
||||
mdDir := filepath.Join(blockDir, name, "md")
|
||||
if !utils.FileExists(filepath.Join(mdDir, "array_state")) {
|
||||
continue
|
||||
}
|
||||
|
||||
devPath := filepath.Join("/dev", name)
|
||||
devices = append(devices, &DeviceInfo{
|
||||
Name: devPath,
|
||||
Type: "mdraid",
|
||||
InfoName: devPath + " [mdraid]",
|
||||
Protocol: "MD",
|
||||
})
|
||||
}
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
// collectMdraidHealth reads mdraid health and stores it in SmartDataMap.
|
||||
func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) {
|
||||
if deviceInfo == nil || deviceInfo.Name == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
base := filepath.Base(deviceInfo.Name)
|
||||
if !isMdraidBlockName(base) && !strings.EqualFold(deviceInfo.Type, "mdraid") {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
health, ok := readMdraidHealth(base)
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
deviceInfo.Type = "mdraid"
|
||||
key := fmt.Sprintf("mdraid:%s", base)
|
||||
status := mdraidSmartStatus(health)
|
||||
|
||||
attrs := make([]*smart.SmartAttribute, 0, 10)
|
||||
if health.arrayState != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "ArrayState", RawString: health.arrayState})
|
||||
}
|
||||
if health.level != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "RaidLevel", RawString: health.level})
|
||||
}
|
||||
if health.raidDisks > 0 {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "RaidDisks", RawValue: health.raidDisks})
|
||||
}
|
||||
if health.degraded > 0 {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "Degraded", RawValue: health.degraded})
|
||||
}
|
||||
if health.syncAction != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "SyncAction", RawString: health.syncAction})
|
||||
}
|
||||
if health.syncCompleted != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "SyncCompleted", RawString: health.syncCompleted})
|
||||
}
|
||||
if health.syncSpeed != "" {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "SyncSpeed", RawString: health.syncSpeed})
|
||||
}
|
||||
if health.mismatchCnt > 0 {
|
||||
attrs = append(attrs, &smart.SmartAttribute{Name: "MismatchCount", RawValue: health.mismatchCnt})
|
||||
}
|
||||
|
||||
sm.Lock()
|
||||
defer sm.Unlock()
|
||||
|
||||
if _, exists := sm.SmartDataMap[key]; !exists {
|
||||
sm.SmartDataMap[key] = &smart.SmartData{}
|
||||
}
|
||||
|
||||
data := sm.SmartDataMap[key]
|
||||
data.ModelName = "Linux MD RAID"
|
||||
if health.level != "" {
|
||||
data.ModelName = "Linux MD RAID (" + health.level + ")"
|
||||
}
|
||||
data.Capacity = health.capacity
|
||||
data.SmartStatus = status
|
||||
data.DiskName = filepath.Join("/dev", base)
|
||||
data.DiskType = "mdraid"
|
||||
data.Attributes = attrs
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// readMdraidHealth reads md array health fields from sysfs.
|
||||
func readMdraidHealth(blockName string) (mdraidHealth, bool) {
|
||||
var out mdraidHealth
|
||||
|
||||
if !isMdraidBlockName(blockName) {
|
||||
return out, false
|
||||
}
|
||||
|
||||
mdDir := filepath.Join(mdraidSysfsRoot, "block", blockName, "md")
|
||||
arrayState, okState := utils.ReadStringFileOK(filepath.Join(mdDir, "array_state"))
|
||||
if !okState {
|
||||
return out, false
|
||||
}
|
||||
|
||||
out.arrayState = arrayState
|
||||
out.level = utils.ReadStringFile(filepath.Join(mdDir, "level"))
|
||||
out.syncAction = utils.ReadStringFile(filepath.Join(mdDir, "sync_action"))
|
||||
out.syncCompleted = utils.ReadStringFile(filepath.Join(mdDir, "sync_completed"))
|
||||
out.syncSpeed = utils.ReadStringFile(filepath.Join(mdDir, "sync_speed"))
|
||||
|
||||
if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "raid_disks")); ok {
|
||||
out.raidDisks = val
|
||||
}
|
||||
if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "degraded")); ok {
|
||||
out.degraded = val
|
||||
}
|
||||
if val, ok := utils.ReadUintFile(filepath.Join(mdDir, "mismatch_cnt")); ok {
|
||||
out.mismatchCnt = val
|
||||
}
|
||||
|
||||
if capBytes, ok := readMdraidBlockCapacityBytes(blockName, mdraidSysfsRoot); ok {
|
||||
out.capacity = capBytes
|
||||
}
|
||||
|
||||
return out, true
|
||||
}
|
||||
|
||||
// mdraidSmartStatus maps md state/sync signals to a SMART-like status.
|
||||
func mdraidSmartStatus(health mdraidHealth) string {
|
||||
state := strings.ToLower(strings.TrimSpace(health.arrayState))
|
||||
switch state {
|
||||
case "inactive", "faulty", "broken", "stopped":
|
||||
return "FAILED"
|
||||
}
|
||||
// During rebuild/recovery, arrays are often temporarily degraded; report as
|
||||
// warning instead of hard failure while synchronization is in progress.
|
||||
syncAction := strings.ToLower(strings.TrimSpace(health.syncAction))
|
||||
switch syncAction {
|
||||
case "resync", "recover", "reshape":
|
||||
return "WARNING"
|
||||
}
|
||||
if health.degraded > 0 {
|
||||
return "FAILED"
|
||||
}
|
||||
switch syncAction {
|
||||
case "check", "repair":
|
||||
return "WARNING"
|
||||
}
|
||||
switch state {
|
||||
case "clean", "active", "active-idle", "write-pending", "read-auto", "readonly":
|
||||
return "PASSED"
|
||||
}
|
||||
return "UNKNOWN"
|
||||
}
|
||||
|
||||
// isMdraidBlockName matches /dev/mdN-style block device names.
|
||||
func isMdraidBlockName(name string) bool {
|
||||
if !strings.HasPrefix(name, "md") {
|
||||
return false
|
||||
}
|
||||
suffix := strings.TrimPrefix(name, "md")
|
||||
if suffix == "" {
|
||||
return false
|
||||
}
|
||||
for _, c := range suffix {
|
||||
if c < '0' || c > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// readMdraidBlockCapacityBytes converts block size metadata into bytes.
|
||||
func readMdraidBlockCapacityBytes(blockName, root string) (uint64, bool) {
|
||||
sizePath := filepath.Join(root, "block", blockName, "size")
|
||||
lbsPath := filepath.Join(root, "block", blockName, "queue", "logical_block_size")
|
||||
|
||||
sizeStr, ok := utils.ReadStringFileOK(sizePath)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
sectors, err := strconv.ParseUint(sizeStr, 10, 64)
|
||||
if err != nil || sectors == 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
logicalBlockSize := uint64(512)
|
||||
if lbsStr, ok := utils.ReadStringFileOK(lbsPath); ok {
|
||||
if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
|
||||
logicalBlockSize = parsed
|
||||
}
|
||||
}
|
||||
|
||||
return sectors * logicalBlockSize, true
|
||||
}
|
||||
103
agent/mdraid_linux_test.go
Normal file
103
agent/mdraid_linux_test.go
Normal file
@@ -0,0 +1,103 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
func TestMdraidMockSysfsScanAndCollect(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
prev := mdraidSysfsRoot
|
||||
mdraidSysfsRoot = tmp
|
||||
t.Cleanup(func() { mdraidSysfsRoot = prev })
|
||||
|
||||
mdDir := filepath.Join(tmp, "block", "md0", "md")
|
||||
queueDir := filepath.Join(tmp, "block", "md0", "queue")
|
||||
if err := os.MkdirAll(mdDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.MkdirAll(queueDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
write := func(path, content string) {
|
||||
t.Helper()
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
write(filepath.Join(mdDir, "array_state"), "active\n")
|
||||
write(filepath.Join(mdDir, "level"), "raid1\n")
|
||||
write(filepath.Join(mdDir, "raid_disks"), "2\n")
|
||||
write(filepath.Join(mdDir, "degraded"), "0\n")
|
||||
write(filepath.Join(mdDir, "sync_action"), "resync\n")
|
||||
write(filepath.Join(mdDir, "sync_completed"), "10%\n")
|
||||
write(filepath.Join(mdDir, "sync_speed"), "100M\n")
|
||||
write(filepath.Join(mdDir, "mismatch_cnt"), "0\n")
|
||||
write(filepath.Join(queueDir, "logical_block_size"), "512\n")
|
||||
write(filepath.Join(tmp, "block", "md0", "size"), "2048\n")
|
||||
|
||||
devs := scanMdraidDevices()
|
||||
if len(devs) != 1 {
|
||||
t.Fatalf("scanMdraidDevices() = %d devices, want 1", len(devs))
|
||||
}
|
||||
if devs[0].Name != "/dev/md0" || devs[0].Type != "mdraid" {
|
||||
t.Fatalf("scanMdraidDevices()[0] = %+v, want Name=/dev/md0 Type=mdraid", devs[0])
|
||||
}
|
||||
|
||||
sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}}
|
||||
ok, err := sm.collectMdraidHealth(devs[0])
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("collectMdraidHealth() = (ok=%v, err=%v), want (true,nil)", ok, err)
|
||||
}
|
||||
if len(sm.SmartDataMap) != 1 {
|
||||
t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap))
|
||||
}
|
||||
var got *smart.SmartData
|
||||
for _, v := range sm.SmartDataMap {
|
||||
got = v
|
||||
break
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatalf("SmartDataMap value nil")
|
||||
}
|
||||
if got.DiskType != "mdraid" || got.DiskName != "/dev/md0" {
|
||||
t.Fatalf("disk fields = (type=%q name=%q), want (mdraid,/dev/md0)", got.DiskType, got.DiskName)
|
||||
}
|
||||
if got.SmartStatus != "WARNING" {
|
||||
t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus)
|
||||
}
|
||||
if got.ModelName == "" || got.Capacity == 0 {
|
||||
t.Fatalf("identity fields = (model=%q cap=%d), want non-empty model and cap>0", got.ModelName, got.Capacity)
|
||||
}
|
||||
if len(got.Attributes) < 5 {
|
||||
t.Fatalf("attributes len=%d, want >= 5", len(got.Attributes))
|
||||
}
|
||||
}
|
||||
|
||||
func TestMdraidSmartStatus(t *testing.T) {
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "inactive"}); got != "FAILED" {
|
||||
t.Fatalf("mdraidSmartStatus(inactive) = %q, want FAILED", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1, syncAction: "recover"}); got != "WARNING" {
|
||||
t.Fatalf("mdraidSmartStatus(degraded+recover) = %q, want WARNING", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", degraded: 1}); got != "FAILED" {
|
||||
t.Fatalf("mdraidSmartStatus(degraded) = %q, want FAILED", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "active", syncAction: "recover"}); got != "WARNING" {
|
||||
t.Fatalf("mdraidSmartStatus(recover) = %q, want WARNING", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "clean"}); got != "PASSED" {
|
||||
t.Fatalf("mdraidSmartStatus(clean) = %q, want PASSED", got)
|
||||
}
|
||||
if got := mdraidSmartStatus(mdraidHealth{arrayState: "unknown"}); got != "UNKNOWN" {
|
||||
t.Fatalf("mdraidSmartStatus(unknown) = %q, want UNKNOWN", got)
|
||||
}
|
||||
}
|
||||
11
agent/mdraid_stub.go
Normal file
11
agent/mdraid_stub.go
Normal file
@@ -0,0 +1,11 @@
|
||||
//go:build !linux
|
||||
|
||||
package agent
|
||||
|
||||
func scanMdraidDevices() []*DeviceInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *SmartManager) collectMdraidHealth(deviceInfo *DeviceInfo) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/deltatracker"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
psutilNet "github.com/shirou/gopsutil/v4/net"
|
||||
)
|
||||
@@ -94,7 +95,7 @@ func (a *Agent) initializeNetIoStats() {
|
||||
a.netInterfaces = make(map[string]struct{}, 0)
|
||||
|
||||
// parse NICS env var for whitelist / blacklist
|
||||
nicsEnvVal, nicsEnvExists := GetEnv("NICS")
|
||||
nicsEnvVal, nicsEnvExists := utils.GetEnv("NICS")
|
||||
var nicCfg *NicConfig
|
||||
if nicsEnvExists {
|
||||
nicCfg = newNicConfig(nicsEnvVal)
|
||||
@@ -103,10 +104,7 @@ func (a *Agent) initializeNetIoStats() {
|
||||
// get current network I/O stats and record valid interfaces
|
||||
if netIO, err := psutilNet.IOCounters(true); err == nil {
|
||||
for _, v := range netIO {
|
||||
if nicsEnvExists && !isValidNic(v.Name, nicCfg) {
|
||||
continue
|
||||
}
|
||||
if a.skipNetworkInterface(v) {
|
||||
if skipNetworkInterface(v, nicCfg) {
|
||||
continue
|
||||
}
|
||||
slog.Info("Detected network interface", "name", v.Name, "sent", v.BytesSent, "recv", v.BytesRecv)
|
||||
@@ -215,10 +213,8 @@ func (a *Agent) applyNetworkTotals(
|
||||
totalBytesSent, totalBytesRecv uint64,
|
||||
bytesSentPerSecond, bytesRecvPerSecond uint64,
|
||||
) {
|
||||
networkSentPs := bytesToMegabytes(float64(bytesSentPerSecond))
|
||||
networkRecvPs := bytesToMegabytes(float64(bytesRecvPerSecond))
|
||||
if networkSentPs > 10_000 || networkRecvPs > 10_000 {
|
||||
slog.Warn("Invalid net stats. Resetting.", "sent", networkSentPs, "recv", networkRecvPs)
|
||||
if bytesSentPerSecond > 10_000_000_000 || bytesRecvPerSecond > 10_000_000_000 {
|
||||
slog.Warn("Invalid net stats. Resetting.", "sent", bytesSentPerSecond, "recv", bytesRecvPerSecond)
|
||||
for _, v := range netIO {
|
||||
if _, exists := a.netInterfaces[v.Name]; !exists {
|
||||
continue
|
||||
@@ -228,21 +224,29 @@ func (a *Agent) applyNetworkTotals(
|
||||
a.initializeNetIoStats()
|
||||
delete(a.netIoStats, cacheTimeMs)
|
||||
delete(a.netInterfaceDeltaTrackers, cacheTimeMs)
|
||||
systemStats.NetworkSent = 0
|
||||
systemStats.NetworkRecv = 0
|
||||
systemStats.Bandwidth[0], systemStats.Bandwidth[1] = 0, 0
|
||||
return
|
||||
}
|
||||
|
||||
systemStats.NetworkSent = networkSentPs
|
||||
systemStats.NetworkRecv = networkRecvPs
|
||||
systemStats.Bandwidth[0], systemStats.Bandwidth[1] = bytesSentPerSecond, bytesRecvPerSecond
|
||||
nis.BytesSent = totalBytesSent
|
||||
nis.BytesRecv = totalBytesRecv
|
||||
a.netIoStats[cacheTimeMs] = nis
|
||||
}
|
||||
|
||||
func (a *Agent) skipNetworkInterface(v psutilNet.IOCountersStat) bool {
|
||||
// skipNetworkInterface returns true if the network interface should be ignored.
|
||||
func skipNetworkInterface(v psutilNet.IOCountersStat, nicCfg *NicConfig) bool {
|
||||
if nicCfg != nil {
|
||||
if !isValidNic(v.Name, nicCfg) {
|
||||
return true
|
||||
}
|
||||
// In whitelist mode, we honor explicit inclusion without auto-filtering.
|
||||
if !nicCfg.isBlacklist {
|
||||
return false
|
||||
}
|
||||
// In blacklist mode, still apply the auto-filter below.
|
||||
}
|
||||
|
||||
switch {
|
||||
case strings.HasPrefix(v.Name, "lo"),
|
||||
strings.HasPrefix(v.Name, "docker"),
|
||||
|
||||
@@ -261,6 +261,39 @@ func TestNewNicConfig(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
func TestSkipNetworkInterface(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
nic psutilNet.IOCountersStat
|
||||
nicCfg *NicConfig
|
||||
expectSkip bool
|
||||
}{
|
||||
{"loopback lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"loopback lo0", psutilNet.IOCountersStat{Name: "lo0", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"docker prefix", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"br- prefix", psutilNet.IOCountersStat{Name: "br-lan", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"veth prefix", psutilNet.IOCountersStat{Name: "veth0abc", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"bond prefix", psutilNet.IOCountersStat{Name: "bond0", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"cali prefix", psutilNet.IOCountersStat{Name: "cali1234", BytesSent: 100, BytesRecv: 100}, nil, true},
|
||||
{"zero BytesRecv", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 0}, nil, true},
|
||||
{"zero BytesSent", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 100}, nil, true},
|
||||
{"both zero", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 0, BytesRecv: 0}, nil, true},
|
||||
{"normal eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 200}, nil, false},
|
||||
{"normal wlan0", psutilNet.IOCountersStat{Name: "wlan0", BytesSent: 1, BytesRecv: 1}, nil, false},
|
||||
{"whitelist overrides skip (docker)", psutilNet.IOCountersStat{Name: "docker0", BytesSent: 100, BytesRecv: 100}, newNicConfig("docker0"), false},
|
||||
{"whitelist overrides skip (lo)", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("lo"), false},
|
||||
{"whitelist exclusion", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("eth0"), true},
|
||||
{"blacklist skip lo", psutilNet.IOCountersStat{Name: "lo", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
|
||||
{"blacklist explicit eth0", psutilNet.IOCountersStat{Name: "eth0", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), true},
|
||||
{"blacklist allow eth1", psutilNet.IOCountersStat{Name: "eth1", BytesSent: 100, BytesRecv: 100}, newNicConfig("-eth0"), false},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equal(t, tt.expectSkip, skipNetworkInterface(tt.nic, tt.nicCfg))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureNetworkInterfacesMap(t *testing.T) {
|
||||
var a Agent
|
||||
var stats system.Stats
|
||||
@@ -383,8 +416,6 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
totalBytesSent uint64
|
||||
totalBytesRecv uint64
|
||||
expectReset bool
|
||||
expectedNetworkSent float64
|
||||
expectedNetworkRecv float64
|
||||
expectedBandwidthSent uint64
|
||||
expectedBandwidthRecv uint64
|
||||
}{
|
||||
@@ -395,8 +426,6 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
totalBytesSent: 10000000,
|
||||
totalBytesRecv: 20000000,
|
||||
expectReset: false,
|
||||
expectedNetworkSent: 0.95, // ~1 MB/s rounded to 2 decimals
|
||||
expectedNetworkRecv: 1.91, // ~2 MB/s rounded to 2 decimals
|
||||
expectedBandwidthSent: 1000000,
|
||||
expectedBandwidthRecv: 2000000,
|
||||
},
|
||||
@@ -424,18 +453,6 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
totalBytesRecv: 20000000,
|
||||
expectReset: true,
|
||||
},
|
||||
{
|
||||
name: "Valid network stats - at threshold boundary",
|
||||
bytesSentPerSecond: 10485750000, // ~9999.99 MB/s (rounds to 9999.99)
|
||||
bytesRecvPerSecond: 10485750000, // ~9999.99 MB/s (rounds to 9999.99)
|
||||
totalBytesSent: 10000000,
|
||||
totalBytesRecv: 20000000,
|
||||
expectReset: false,
|
||||
expectedNetworkSent: 9999.99,
|
||||
expectedNetworkRecv: 9999.99,
|
||||
expectedBandwidthSent: 10485750000,
|
||||
expectedBandwidthRecv: 10485750000,
|
||||
},
|
||||
{
|
||||
name: "Zero values",
|
||||
bytesSentPerSecond: 0,
|
||||
@@ -443,8 +460,6 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
totalBytesSent: 0,
|
||||
totalBytesRecv: 0,
|
||||
expectReset: false,
|
||||
expectedNetworkSent: 0.0,
|
||||
expectedNetworkRecv: 0.0,
|
||||
expectedBandwidthSent: 0,
|
||||
expectedBandwidthRecv: 0,
|
||||
},
|
||||
@@ -481,14 +496,10 @@ func TestApplyNetworkTotals(t *testing.T) {
|
||||
// Should have reset network tracking state - maps cleared and stats zeroed
|
||||
assert.NotContains(t, a.netIoStats, cacheTimeMs, "cache entry should be cleared after reset")
|
||||
assert.NotContains(t, a.netInterfaceDeltaTrackers, cacheTimeMs, "tracker should be cleared on reset")
|
||||
assert.Zero(t, systemStats.NetworkSent)
|
||||
assert.Zero(t, systemStats.NetworkRecv)
|
||||
assert.Zero(t, systemStats.Bandwidth[0])
|
||||
assert.Zero(t, systemStats.Bandwidth[1])
|
||||
} else {
|
||||
// Should have applied stats
|
||||
assert.Equal(t, tt.expectedNetworkSent, systemStats.NetworkSent)
|
||||
assert.Equal(t, tt.expectedNetworkRecv, systemStats.NetworkRecv)
|
||||
assert.Equal(t, tt.expectedBandwidthSent, systemStats.Bandwidth[0])
|
||||
assert.Equal(t, tt.expectedBandwidthRecv, systemStats.Bandwidth[1])
|
||||
|
||||
|
||||
31
agent/response.go
Normal file
31
agent/response.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
)
|
||||
|
||||
// newAgentResponse creates an AgentResponse using legacy typed fields.
|
||||
// This maintains backward compatibility with <= 0.17 hubs that expect specific fields.
|
||||
func newAgentResponse(data any, requestID *uint32) common.AgentResponse {
|
||||
response := common.AgentResponse{Id: requestID}
|
||||
switch v := data.(type) {
|
||||
case *system.CombinedData:
|
||||
response.SystemData = v
|
||||
case *common.FingerprintResponse:
|
||||
response.Fingerprint = v
|
||||
case string:
|
||||
response.String = &v
|
||||
case map[string]smart.SmartData:
|
||||
response.SmartData = v
|
||||
case systemd.ServiceDetails:
|
||||
response.ServiceInfo = v
|
||||
default:
|
||||
// For unknown types, use the generic Data field
|
||||
response.Data, _ = cbor.Marshal(data)
|
||||
}
|
||||
return response
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
"github.com/shirou/gopsutil/v4/common"
|
||||
@@ -26,9 +27,9 @@ type SensorConfig struct {
|
||||
}
|
||||
|
||||
func (a *Agent) newSensorConfig() *SensorConfig {
|
||||
primarySensor, _ := GetEnv("PRIMARY_SENSOR")
|
||||
sysSensors, _ := GetEnv("SYS_SENSORS")
|
||||
sensorsEnvVal, sensorsSet := GetEnv("SENSORS")
|
||||
primarySensor, _ := utils.GetEnv("PRIMARY_SENSOR")
|
||||
sysSensors, _ := utils.GetEnv("SYS_SENSORS")
|
||||
sensorsEnvVal, sensorsSet := utils.GetEnv("SENSORS")
|
||||
skipCollection := sensorsSet && sensorsEnvVal == ""
|
||||
|
||||
return a.newSensorConfigWithEnv(primarySensor, sysSensors, sensorsEnvVal, skipCollection)
|
||||
@@ -135,7 +136,7 @@ func (a *Agent) updateTemperatures(systemStats *system.Stats) {
|
||||
case sensorName:
|
||||
a.systemInfo.DashboardTemp = sensor.Temperature
|
||||
}
|
||||
systemStats.Temperatures[sensorName] = twoDecimals(sensor.Temperature)
|
||||
systemStats.Temperatures[sensorName] = utils.TwoDecimals(sensor.Temperature)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
|
||||
@@ -12,10 +12,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/common"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
@@ -38,6 +37,9 @@ var hubVersions map[string]semver.Version
|
||||
// and begins listening for connections. Returns an error if the server
|
||||
// is already running or if there's an issue starting the server.
|
||||
func (a *Agent) StartServer(opts ServerOptions) error {
|
||||
if disableSSH, _ := utils.GetEnv("DISABLE_SSH"); disableSSH == "true" {
|
||||
return errors.New("SSH disabled")
|
||||
}
|
||||
if a.server != nil {
|
||||
return errors.New("server already started")
|
||||
}
|
||||
@@ -165,20 +167,9 @@ func (a *Agent) handleSSHRequest(w io.Writer, req *common.HubRequest[cbor.RawMes
|
||||
}
|
||||
|
||||
// responder that writes AgentResponse to stdout
|
||||
// Uses legacy typed fields for backward compatibility with <= 0.17
|
||||
sshResponder := func(data any, requestID *uint32) error {
|
||||
response := common.AgentResponse{Id: requestID}
|
||||
switch v := data.(type) {
|
||||
case *system.CombinedData:
|
||||
response.SystemData = v
|
||||
case string:
|
||||
response.String = &v
|
||||
case map[string]smart.SmartData:
|
||||
response.SmartData = v
|
||||
case systemd.ServiceDetails:
|
||||
response.ServiceInfo = v
|
||||
default:
|
||||
response.Error = fmt.Sprintf("unsupported response type: %T", data)
|
||||
}
|
||||
response := newAgentResponse(data, requestID)
|
||||
return cbor.NewEncoder(w).Encode(response)
|
||||
}
|
||||
|
||||
@@ -248,11 +239,11 @@ func ParseKeys(input string) ([]gossh.PublicKey, error) {
|
||||
// and finally defaults to ":45876".
|
||||
func GetAddress(addr string) string {
|
||||
if addr == "" {
|
||||
addr, _ = GetEnv("LISTEN")
|
||||
addr, _ = utils.GetEnv("LISTEN")
|
||||
}
|
||||
if addr == "" {
|
||||
// Legacy PORT environment variable support
|
||||
addr, _ = GetEnv("PORT")
|
||||
addr, _ = utils.GetEnv("PORT")
|
||||
}
|
||||
if addr == "" {
|
||||
return ":45876"
|
||||
@@ -268,7 +259,7 @@ func GetAddress(addr string) string {
|
||||
// It checks the NETWORK environment variable first, then infers from
|
||||
// the address format: addresses starting with "/" are "unix", others are "tcp".
|
||||
func GetNetwork(addr string) string {
|
||||
if network, ok := GetEnv("NETWORK"); ok && network != "" {
|
||||
if network, ok := utils.GetEnv("NETWORK"); ok && network != "" {
|
||||
return network
|
||||
}
|
||||
if strings.HasPrefix(addr, "/") {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
//go:build testing
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
@@ -180,6 +182,23 @@ func TestStartServer(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartServerDisableSSH(t *testing.T) {
|
||||
os.Setenv("BESZEL_AGENT_DISABLE_SSH", "true")
|
||||
defer os.Unsetenv("BESZEL_AGENT_DISABLE_SSH")
|
||||
|
||||
agent, err := NewAgent("")
|
||||
require.NoError(t, err)
|
||||
|
||||
opts := ServerOptions{
|
||||
Network: "tcp",
|
||||
Addr: ":45990",
|
||||
}
|
||||
|
||||
err = agent.StartServer(opts)
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "SSH disabled")
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
//////////////////// ParseKeys Tests ////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
256
agent/smart.go
256
agent/smart.go
@@ -8,6 +8,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
@@ -17,9 +18,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
|
||||
"golang.org/x/exp/slog"
|
||||
)
|
||||
|
||||
// SmartManager manages data collection for SMART devices
|
||||
@@ -29,7 +29,7 @@ type SmartManager struct {
|
||||
SmartDevices []*DeviceInfo
|
||||
refreshMutex sync.Mutex
|
||||
lastScanTime time.Time
|
||||
binPath string
|
||||
smartctlPath string
|
||||
excludedDevices map[string]struct{}
|
||||
}
|
||||
|
||||
@@ -54,6 +54,12 @@ type DeviceInfo struct {
|
||||
parserType string
|
||||
}
|
||||
|
||||
// deviceKey is a composite key for a device, used to identify a device uniquely.
|
||||
type deviceKey struct {
|
||||
name string
|
||||
deviceType string
|
||||
}
|
||||
|
||||
var errNoValidSmartData = fmt.Errorf("no valid SMART data found") // Error for missing data
|
||||
|
||||
// Refresh updates SMART data for all known devices
|
||||
@@ -151,7 +157,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
||||
currentDevices := sm.devicesSnapshot()
|
||||
|
||||
var configuredDevices []*DeviceInfo
|
||||
if configuredRaw, ok := GetEnv("SMART_DEVICES"); ok {
|
||||
if configuredRaw, ok := utils.GetEnv("SMART_DEVICES"); ok {
|
||||
slog.Info("SMART_DEVICES", "value", configuredRaw)
|
||||
config := strings.TrimSpace(configuredRaw)
|
||||
if config == "" {
|
||||
@@ -165,18 +171,18 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
||||
configuredDevices = parsedDevices
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, sm.binPath, "--scan", "-j")
|
||||
output, err := cmd.Output()
|
||||
|
||||
var (
|
||||
scanErr error
|
||||
scannedDevices []*DeviceInfo
|
||||
hasValidScan bool
|
||||
)
|
||||
|
||||
if sm.smartctlPath != "" {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, sm.smartctlPath, "--scan", "-j")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
scanErr = err
|
||||
} else {
|
||||
@@ -185,6 +191,21 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
||||
scanErr = errNoValidSmartData
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add eMMC devices (Linux only) by reading sysfs health fields. This does not
|
||||
// require smartctl and does not scan the whole device.
|
||||
if emmcDevices := scanEmmcDevices(); len(emmcDevices) > 0 {
|
||||
scannedDevices = append(scannedDevices, emmcDevices...)
|
||||
hasValidScan = true
|
||||
}
|
||||
|
||||
// Add Linux mdraid arrays by reading sysfs health fields. This does not
|
||||
// require smartctl and does not scan the whole device.
|
||||
if raidDevices := scanMdraidDevices(); len(raidDevices) > 0 {
|
||||
scannedDevices = append(scannedDevices, raidDevices...)
|
||||
hasValidScan = true
|
||||
}
|
||||
|
||||
finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
|
||||
finalDevices = sm.filterExcludedDevices(finalDevices)
|
||||
@@ -202,7 +223,11 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
||||
}
|
||||
|
||||
func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, error) {
|
||||
entries := strings.Split(config, ",")
|
||||
splitChar, _ := utils.GetEnv("SMART_DEVICES_SEPARATOR")
|
||||
if splitChar == "" {
|
||||
splitChar = ","
|
||||
}
|
||||
entries := strings.Split(config, splitChar)
|
||||
devices := make([]*DeviceInfo, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
entry = strings.TrimSpace(entry)
|
||||
@@ -236,7 +261,7 @@ func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, er
|
||||
}
|
||||
|
||||
func (sm *SmartManager) refreshExcludedDevices() {
|
||||
rawValue, _ := GetEnv("EXCLUDE_SMART")
|
||||
rawValue, _ := utils.GetEnv("EXCLUDE_SMART")
|
||||
sm.excludedDevices = make(map[string]struct{})
|
||||
|
||||
for entry := range strings.SplitSeq(rawValue, ",") {
|
||||
@@ -326,6 +351,13 @@ func normalizeParserType(value string) string {
|
||||
}
|
||||
}
|
||||
|
||||
// makeDeviceKey creates a composite key from device name and type.
|
||||
// This allows multiple drives under the same device path (e.g., RAID controllers)
|
||||
// to be tracked separately.
|
||||
func makeDeviceKey(name, deviceType string) deviceKey {
|
||||
return deviceKey{name: name, deviceType: deviceType}
|
||||
}
|
||||
|
||||
// parseSmartOutput attempts each SMART parser, optionally detecting the type when
|
||||
// it is not provided, and updates the device info when a parser succeeds.
|
||||
func (sm *SmartManager) parseSmartOutput(deviceInfo *DeviceInfo, output []byte) bool {
|
||||
@@ -426,6 +458,24 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
return errNoValidSmartData
|
||||
}
|
||||
|
||||
// mdraid health is not exposed via SMART; Linux exposes array state in sysfs.
|
||||
if deviceInfo != nil {
|
||||
if ok, err := sm.collectMdraidHealth(deviceInfo); ok {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// eMMC health is not exposed via SMART on Linux, but the kernel provides
|
||||
// wear / EOL indicators via sysfs. Prefer that path when available.
|
||||
if deviceInfo != nil {
|
||||
if ok, err := sm.collectEmmcHealth(deviceInfo); ok {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if sm.smartctlPath == "" {
|
||||
return errNoValidSmartData
|
||||
}
|
||||
|
||||
// slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name))
|
||||
|
||||
// Check if we have any existing data for this device
|
||||
@@ -435,12 +485,12 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
defer cancel()
|
||||
|
||||
// Try with -n standby first if we have existing data
|
||||
args := sm.smartctlArgs(deviceInfo, true)
|
||||
cmd := exec.CommandContext(ctx, sm.binPath, args...)
|
||||
args := sm.smartctlArgs(deviceInfo, hasExistingData)
|
||||
cmd := exec.CommandContext(ctx, sm.smartctlPath, args...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
|
||||
// Check if device is in standby (exit status 2)
|
||||
if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 2 {
|
||||
if exitErr, ok := errors.AsType[*exec.ExitError](err); ok && exitErr.ExitCode() == 2 {
|
||||
if hasExistingData {
|
||||
// Device is in standby and we have cached data, keep using cache
|
||||
return nil
|
||||
@@ -449,7 +499,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer cancel2()
|
||||
args = sm.smartctlArgs(deviceInfo, false)
|
||||
cmd = exec.CommandContext(ctx2, sm.binPath, args...)
|
||||
cmd = exec.CommandContext(ctx2, sm.smartctlPath, args...)
|
||||
output, err = cmd.CombinedOutput()
|
||||
}
|
||||
|
||||
@@ -466,7 +516,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
ctx3, cancel3 := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer cancel3()
|
||||
args = sm.smartctlArgs(deviceInfo, false)
|
||||
cmd = exec.CommandContext(ctx3, sm.binPath, args...)
|
||||
cmd = exec.CommandContext(ctx3, sm.smartctlPath, args...)
|
||||
output, err = cmd.CombinedOutput()
|
||||
hasValidData = sm.parseSmartOutput(deviceInfo, output)
|
||||
|
||||
@@ -498,10 +548,12 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
// smartctlArgs returns the arguments for the smartctl command
|
||||
// based on the device type and whether to include standby mode
|
||||
func (sm *SmartManager) smartctlArgs(deviceInfo *DeviceInfo, includeStandby bool) []string {
|
||||
args := make([]string, 0, 7)
|
||||
args := make([]string, 0, 9)
|
||||
var deviceType, parserType string
|
||||
|
||||
if deviceInfo != nil {
|
||||
deviceType := strings.ToLower(deviceInfo.Type)
|
||||
deviceType = strings.ToLower(deviceInfo.Type)
|
||||
parserType = strings.ToLower(deviceInfo.parserType)
|
||||
// types sometimes misidentified in scan; see github.com/henrygd/beszel/issues/1345
|
||||
if deviceType != "" && deviceType != "scsi" && deviceType != "ata" {
|
||||
args = append(args, "-d", deviceInfo.Type)
|
||||
@@ -509,6 +561,13 @@ func (sm *SmartManager) smartctlArgs(deviceInfo *DeviceInfo, includeStandby bool
|
||||
}
|
||||
|
||||
args = append(args, "-a", "--json=c")
|
||||
effectiveType := parserType
|
||||
if effectiveType == "" {
|
||||
effectiveType = deviceType
|
||||
}
|
||||
if effectiveType == "sat" || effectiveType == "ata" {
|
||||
args = append(args, "-l", "devstat")
|
||||
}
|
||||
|
||||
if includeStandby {
|
||||
args = append(args, "-n", "standby")
|
||||
@@ -569,6 +628,28 @@ func mergeDeviceLists(existing, scanned, configured []*DeviceInfo) []*DeviceInfo
|
||||
return existing
|
||||
}
|
||||
|
||||
// buildUniqueNameIndex returns devices that appear exactly once by name.
|
||||
// It is used to safely apply name-only fallbacks without RAID ambiguity.
|
||||
buildUniqueNameIndex := func(devices []*DeviceInfo) map[string]*DeviceInfo {
|
||||
counts := make(map[string]int, len(devices))
|
||||
for _, dev := range devices {
|
||||
if dev == nil || dev.Name == "" {
|
||||
continue
|
||||
}
|
||||
counts[dev.Name]++
|
||||
}
|
||||
unique := make(map[string]*DeviceInfo, len(counts))
|
||||
for _, dev := range devices {
|
||||
if dev == nil || dev.Name == "" {
|
||||
continue
|
||||
}
|
||||
if counts[dev.Name] == 1 {
|
||||
unique[dev.Name] = dev
|
||||
}
|
||||
}
|
||||
return unique
|
||||
}
|
||||
|
||||
// preserveVerifiedType copies the verified type/parser metadata from an existing
|
||||
// device record so that subsequent scans/config updates never downgrade a
|
||||
// previously verified device.
|
||||
@@ -581,69 +662,90 @@ func mergeDeviceLists(existing, scanned, configured []*DeviceInfo) []*DeviceInfo
|
||||
target.parserType = prev.parserType
|
||||
}
|
||||
|
||||
existingIndex := make(map[string]*DeviceInfo, len(existing))
|
||||
// applyConfiguredMetadata updates a matched device with any configured
|
||||
// overrides, preserving verified type data when present.
|
||||
applyConfiguredMetadata := func(existingDev, configuredDev *DeviceInfo) {
|
||||
// Only update the type if it has not been verified yet; otherwise we
|
||||
// keep the existing verified metadata intact.
|
||||
if configuredDev.Type != "" && !existingDev.typeVerified {
|
||||
newType := strings.TrimSpace(configuredDev.Type)
|
||||
existingDev.Type = newType
|
||||
existingDev.typeVerified = false
|
||||
existingDev.parserType = normalizeParserType(newType)
|
||||
}
|
||||
if configuredDev.InfoName != "" {
|
||||
existingDev.InfoName = configuredDev.InfoName
|
||||
}
|
||||
if configuredDev.Protocol != "" {
|
||||
existingDev.Protocol = configuredDev.Protocol
|
||||
}
|
||||
}
|
||||
|
||||
existingIndex := make(map[deviceKey]*DeviceInfo, len(existing))
|
||||
for _, dev := range existing {
|
||||
if dev == nil || dev.Name == "" {
|
||||
continue
|
||||
}
|
||||
existingIndex[dev.Name] = dev
|
||||
existingIndex[makeDeviceKey(dev.Name, dev.Type)] = dev
|
||||
}
|
||||
existingByName := buildUniqueNameIndex(existing)
|
||||
|
||||
finalDevices := make([]*DeviceInfo, 0, len(scanned)+len(configured))
|
||||
deviceIndex := make(map[string]*DeviceInfo, len(scanned)+len(configured))
|
||||
deviceIndex := make(map[deviceKey]*DeviceInfo, len(scanned)+len(configured))
|
||||
|
||||
// Start with the newly scanned devices so we always surface fresh metadata,
|
||||
// but ensure we retain any previously verified parser assignment.
|
||||
for _, dev := range scanned {
|
||||
if dev == nil || dev.Name == "" {
|
||||
for _, scannedDevice := range scanned {
|
||||
if scannedDevice == nil || scannedDevice.Name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Work on a copy so we can safely adjust metadata without mutating the
|
||||
// input slices that may be reused elsewhere.
|
||||
copyDev := *dev
|
||||
if prev := existingIndex[copyDev.Name]; prev != nil {
|
||||
copyDev := *scannedDevice
|
||||
key := makeDeviceKey(copyDev.Name, copyDev.Type)
|
||||
if prev := existingIndex[key]; prev != nil {
|
||||
preserveVerifiedType(©Dev, prev)
|
||||
} else if prev := existingByName[copyDev.Name]; prev != nil {
|
||||
preserveVerifiedType(©Dev, prev)
|
||||
}
|
||||
|
||||
finalDevices = append(finalDevices, ©Dev)
|
||||
deviceIndex[copyDev.Name] = finalDevices[len(finalDevices)-1]
|
||||
copyKey := makeDeviceKey(copyDev.Name, copyDev.Type)
|
||||
deviceIndex[copyKey] = finalDevices[len(finalDevices)-1]
|
||||
}
|
||||
deviceIndexByName := buildUniqueNameIndex(finalDevices)
|
||||
|
||||
// Merge configured devices on top so users can override scan results (except
|
||||
// for verified type information).
|
||||
for _, dev := range configured {
|
||||
if dev == nil || dev.Name == "" {
|
||||
for _, configuredDevice := range configured {
|
||||
if configuredDevice == nil || configuredDevice.Name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if existingDev, ok := deviceIndex[dev.Name]; ok {
|
||||
// Only update the type if it has not been verified yet; otherwise we
|
||||
// keep the existing verified metadata intact.
|
||||
if dev.Type != "" && !existingDev.typeVerified {
|
||||
newType := strings.TrimSpace(dev.Type)
|
||||
existingDev.Type = newType
|
||||
existingDev.typeVerified = false
|
||||
existingDev.parserType = normalizeParserType(newType)
|
||||
}
|
||||
if dev.InfoName != "" {
|
||||
existingDev.InfoName = dev.InfoName
|
||||
}
|
||||
if dev.Protocol != "" {
|
||||
existingDev.Protocol = dev.Protocol
|
||||
key := makeDeviceKey(configuredDevice.Name, configuredDevice.Type)
|
||||
if existingDev, ok := deviceIndex[key]; ok {
|
||||
applyConfiguredMetadata(existingDev, configuredDevice)
|
||||
continue
|
||||
}
|
||||
if existingDev := deviceIndexByName[configuredDevice.Name]; existingDev != nil {
|
||||
applyConfiguredMetadata(existingDev, configuredDevice)
|
||||
continue
|
||||
}
|
||||
|
||||
copyDev := *dev
|
||||
if prev := existingIndex[copyDev.Name]; prev != nil {
|
||||
copyDev := *configuredDevice
|
||||
key = makeDeviceKey(copyDev.Name, copyDev.Type)
|
||||
if prev := existingIndex[key]; prev != nil {
|
||||
preserveVerifiedType(©Dev, prev)
|
||||
} else if prev := existingByName[copyDev.Name]; prev != nil {
|
||||
preserveVerifiedType(©Dev, prev)
|
||||
} else if copyDev.Type != "" {
|
||||
copyDev.parserType = normalizeParserType(copyDev.Type)
|
||||
}
|
||||
|
||||
finalDevices = append(finalDevices, ©Dev)
|
||||
deviceIndex[copyDev.Name] = finalDevices[len(finalDevices)-1]
|
||||
copyKey := makeDeviceKey(copyDev.Name, copyDev.Type)
|
||||
deviceIndex[copyKey] = finalDevices[len(finalDevices)-1]
|
||||
}
|
||||
|
||||
return finalDevices
|
||||
@@ -661,12 +763,14 @@ func (sm *SmartManager) updateSmartDevices(devices []*DeviceInfo) {
|
||||
return
|
||||
}
|
||||
|
||||
validNames := make(map[string]struct{}, len(devices))
|
||||
validKeys := make(map[deviceKey]struct{}, len(devices))
|
||||
nameCounts := make(map[string]int, len(devices))
|
||||
for _, device := range devices {
|
||||
if device == nil || device.Name == "" {
|
||||
continue
|
||||
}
|
||||
validNames[device.Name] = struct{}{}
|
||||
validKeys[makeDeviceKey(device.Name, device.Type)] = struct{}{}
|
||||
nameCounts[device.Name]++
|
||||
}
|
||||
|
||||
for key, data := range sm.SmartDataMap {
|
||||
@@ -675,7 +779,11 @@ func (sm *SmartManager) updateSmartDevices(devices []*DeviceInfo) {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := validNames[data.DiskName]; ok {
|
||||
if data.DiskType == "" {
|
||||
if nameCounts[data.DiskName] == 1 {
|
||||
continue
|
||||
}
|
||||
} else if _, ok := validKeys[makeDeviceKey(data.DiskName, data.DiskType)]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -767,6 +875,14 @@ func (sm *SmartManager) parseSmartForSata(output []byte) (bool, int) {
|
||||
smartData.DiskName = data.Device.Name
|
||||
smartData.DiskType = data.Device.Type
|
||||
|
||||
// get values from ata_device_statistics if necessary
|
||||
var ataDeviceStats smart.AtaDeviceStatistics
|
||||
if smartData.Temperature == 0 {
|
||||
if temp := findAtaDeviceStatisticsValue(&data, &ataDeviceStats, 5, "Current Temperature", 0, 255); temp != nil {
|
||||
smartData.Temperature = uint8(*temp)
|
||||
}
|
||||
}
|
||||
|
||||
// update SmartAttributes
|
||||
smartData.Attributes = make([]*smart.SmartAttribute, 0, len(data.AtaSmartAttributes.Table))
|
||||
for _, attr := range data.AtaSmartAttributes.Table {
|
||||
@@ -801,6 +917,36 @@ func getSmartStatus(temperature uint8, passed bool) string {
|
||||
}
|
||||
}
|
||||
|
||||
// findAtaDeviceStatisticsEntry centralizes ATA devstat lookups so additional
|
||||
// metrics can be pulled from the same structure in the future.
|
||||
func findAtaDeviceStatisticsValue(data *smart.SmartInfoForSata, ataDeviceStats *smart.AtaDeviceStatistics, entryNumber uint8, entryName string, minValue, maxValue int64) *int64 {
|
||||
if len(ataDeviceStats.Pages) == 0 {
|
||||
if len(data.AtaDeviceStatistics) == 0 {
|
||||
return nil
|
||||
}
|
||||
if err := json.Unmarshal(data.AtaDeviceStatistics, ataDeviceStats); err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
for pageIdx := range ataDeviceStats.Pages {
|
||||
page := &ataDeviceStats.Pages[pageIdx]
|
||||
if page.Number != entryNumber {
|
||||
continue
|
||||
}
|
||||
for entryIdx := range page.Table {
|
||||
entry := &page.Table[entryIdx]
|
||||
if !strings.EqualFold(entry.Name, entryName) {
|
||||
continue
|
||||
}
|
||||
if entry.Value == nil || *entry.Value < minValue || *entry.Value > maxValue {
|
||||
return nil
|
||||
}
|
||||
return entry.Value
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *SmartManager) parseSmartForScsi(output []byte) (bool, int) {
|
||||
var data smart.SmartInfoForScsi
|
||||
|
||||
@@ -1014,11 +1160,17 @@ func NewSmartManager() (*SmartManager, error) {
|
||||
}
|
||||
sm.refreshExcludedDevices()
|
||||
path, err := sm.detectSmartctl()
|
||||
slog.Debug("smartctl", "path", path, "err", err)
|
||||
if err != nil {
|
||||
slog.Debug(err.Error())
|
||||
// Keep the previous fail-fast behavior unless this Linux host exposes
|
||||
// eMMC or mdraid health via sysfs, in which case smartctl is optional.
|
||||
if runtime.GOOS == "linux" {
|
||||
if len(scanEmmcDevices()) > 0 || len(scanMdraidDevices()) > 0 {
|
||||
return sm, nil
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
slog.Debug("smartctl", "path", path)
|
||||
sm.binPath = path
|
||||
sm.smartctlPath = path
|
||||
return sm, nil
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package agent
|
||||
|
||||
@@ -89,6 +88,111 @@ func TestParseSmartForSata(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSmartForSataDeviceStatisticsTemperature(t *testing.T) {
|
||||
jsonPayload := []byte(`{
|
||||
"smartctl": {"exit_status": 0},
|
||||
"device": {"name": "/dev/sdb", "type": "sat"},
|
||||
"model_name": "SanDisk SSD U110 16GB",
|
||||
"serial_number": "DEVSTAT123",
|
||||
"firmware_version": "U21B001",
|
||||
"user_capacity": {"bytes": 16013942784},
|
||||
"smart_status": {"passed": true},
|
||||
"ata_smart_attributes": {"table": []},
|
||||
"ata_device_statistics": {
|
||||
"pages": [
|
||||
{
|
||||
"number": 5,
|
||||
"name": "Temperature Statistics",
|
||||
"table": [
|
||||
{"name": "Current Temperature", "value": 22, "flags": {"valid": true}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}`)
|
||||
|
||||
sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
|
||||
hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
|
||||
require.True(t, hasData)
|
||||
assert.Equal(t, 0, exitStatus)
|
||||
|
||||
deviceData, ok := sm.SmartDataMap["DEVSTAT123"]
|
||||
require.True(t, ok, "expected smart data entry for serial DEVSTAT123")
|
||||
assert.Equal(t, uint8(22), deviceData.Temperature)
|
||||
}
|
||||
|
||||
func TestParseSmartForSataAtaDeviceStatistics(t *testing.T) {
|
||||
// tests that ata_device_statistics values are parsed correctly
|
||||
jsonPayload := []byte(`{
|
||||
"smartctl": {"exit_status": 0},
|
||||
"device": {"name": "/dev/sdb", "type": "sat"},
|
||||
"model_name": "SanDisk SSD U110 16GB",
|
||||
"serial_number": "lksjfh23lhj",
|
||||
"firmware_version": "U21B001",
|
||||
"user_capacity": {"bytes": 16013942784},
|
||||
"smart_status": {"passed": true},
|
||||
"ata_smart_attributes": {"table": []},
|
||||
"ata_device_statistics": {
|
||||
"pages": [
|
||||
{
|
||||
"number": 5,
|
||||
"name": "Temperature Statistics",
|
||||
"table": [
|
||||
{"name": "Current Temperature", "value": 43, "flags": {"valid": true}},
|
||||
{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}`)
|
||||
|
||||
sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
|
||||
hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
|
||||
require.True(t, hasData)
|
||||
assert.Equal(t, 0, exitStatus)
|
||||
|
||||
deviceData, ok := sm.SmartDataMap["lksjfh23lhj"]
|
||||
require.True(t, ok, "expected smart data entry for serial lksjfh23lhj")
|
||||
assert.Equal(t, uint8(43), deviceData.Temperature)
|
||||
}
|
||||
|
||||
func TestParseSmartForSataNegativeDeviceStatistics(t *testing.T) {
|
||||
// Tests that negative values in ata_device_statistics (e.g. min operating temp)
|
||||
// do not cause the entire SAT parser to fail.
|
||||
jsonPayload := []byte(`{
|
||||
"smartctl": {"exit_status": 0},
|
||||
"device": {"name": "/dev/sdb", "type": "sat"},
|
||||
"model_name": "SanDisk SSD U110 16GB",
|
||||
"serial_number": "NEGATIVE123",
|
||||
"firmware_version": "U21B001",
|
||||
"user_capacity": {"bytes": 16013942784},
|
||||
"smart_status": {"passed": true},
|
||||
"temperature": {"current": 38},
|
||||
"ata_smart_attributes": {"table": []},
|
||||
"ata_device_statistics": {
|
||||
"pages": [
|
||||
{
|
||||
"number": 5,
|
||||
"name": "Temperature Statistics",
|
||||
"table": [
|
||||
{"name": "Current Temperature", "value": 38, "flags": {"valid": true}},
|
||||
{"name": "Specified Minimum Operating Temperature", "value": -20, "flags": {"valid": true}}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}`)
|
||||
|
||||
sm := &SmartManager{SmartDataMap: make(map[string]*smart.SmartData)}
|
||||
hasData, exitStatus := sm.parseSmartForSata(jsonPayload)
|
||||
require.True(t, hasData)
|
||||
assert.Equal(t, 0, exitStatus)
|
||||
|
||||
deviceData, ok := sm.SmartDataMap["NEGATIVE123"]
|
||||
require.True(t, ok, "expected smart data entry for serial NEGATIVE123")
|
||||
assert.Equal(t, uint8(38), deviceData.Temperature)
|
||||
}
|
||||
|
||||
func TestParseSmartForSataParentheticalRawValue(t *testing.T) {
|
||||
jsonPayload := []byte(`{
|
||||
"smartctl": {"exit_status": 0},
|
||||
@@ -195,6 +299,24 @@ func TestDevicesSnapshotReturnsCopy(t *testing.T) {
|
||||
assert.Len(t, snapshot, 2)
|
||||
}
|
||||
|
||||
func TestScanDevicesWithEnvOverrideAndSeparator(t *testing.T) {
|
||||
t.Setenv("SMART_DEVICES_SEPARATOR", "|")
|
||||
t.Setenv("SMART_DEVICES", "/dev/sda:jmb39x-q,0|/dev/nvme0:nvme")
|
||||
|
||||
sm := &SmartManager{
|
||||
SmartDataMap: make(map[string]*smart.SmartData),
|
||||
}
|
||||
|
||||
err := sm.ScanDevices(true)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.Len(t, sm.SmartDevices, 2)
|
||||
assert.Equal(t, "/dev/sda", sm.SmartDevices[0].Name)
|
||||
assert.Equal(t, "jmb39x-q,0", sm.SmartDevices[0].Type)
|
||||
assert.Equal(t, "/dev/nvme0", sm.SmartDevices[1].Name)
|
||||
assert.Equal(t, "nvme", sm.SmartDevices[1].Type)
|
||||
}
|
||||
|
||||
func TestScanDevicesWithEnvOverride(t *testing.T) {
|
||||
t.Setenv("SMART_DEVICES", "/dev/sda:sat, /dev/nvme0:nvme")
|
||||
|
||||
@@ -249,15 +371,21 @@ func TestSmartctlArgs(t *testing.T) {
|
||||
|
||||
sataDevice := &DeviceInfo{Name: "/dev/sda", Type: "sat"}
|
||||
assert.Equal(t,
|
||||
[]string{"-d", "sat", "-a", "--json=c", "-n", "standby", "/dev/sda"},
|
||||
[]string{"-d", "sat", "-a", "--json=c", "-l", "devstat", "-n", "standby", "/dev/sda"},
|
||||
sm.smartctlArgs(sataDevice, true),
|
||||
)
|
||||
|
||||
assert.Equal(t,
|
||||
[]string{"-d", "sat", "-a", "--json=c", "/dev/sda"},
|
||||
[]string{"-d", "sat", "-a", "--json=c", "-l", "devstat", "/dev/sda"},
|
||||
sm.smartctlArgs(sataDevice, false),
|
||||
)
|
||||
|
||||
nvmeDevice := &DeviceInfo{Name: "/dev/nvme0", Type: "nvme"}
|
||||
assert.Equal(t,
|
||||
[]string{"-d", "nvme", "-a", "--json=c", "-n", "standby", "/dev/nvme0"},
|
||||
sm.smartctlArgs(nvmeDevice, true),
|
||||
)
|
||||
|
||||
assert.Equal(t,
|
||||
[]string{"-a", "--json=c", "-n", "standby"},
|
||||
sm.smartctlArgs(nil, true),
|
||||
@@ -442,6 +570,88 @@ func TestMergeDeviceListsUpdatesTypeWhenUnverified(t *testing.T) {
|
||||
assert.Equal(t, "", device.parserType)
|
||||
}
|
||||
|
||||
func TestMergeDeviceListsHandlesDevicesWithSameNameAndDifferentTypes(t *testing.T) {
|
||||
// There are use cases where the same device name is re-used,
|
||||
// for example, a RAID controller with multiple drives.
|
||||
scanned := []*DeviceInfo{
|
||||
{Name: "/dev/sda", Type: "megaraid,0"},
|
||||
{Name: "/dev/sda", Type: "megaraid,1"},
|
||||
{Name: "/dev/sda", Type: "megaraid,2"},
|
||||
}
|
||||
|
||||
merged := mergeDeviceLists(nil, scanned, nil)
|
||||
require.Len(t, merged, 3, "should have 3 separate devices for RAID controller")
|
||||
|
||||
byKey := make(map[string]*DeviceInfo, len(merged))
|
||||
for _, dev := range merged {
|
||||
key := dev.Name + "|" + dev.Type
|
||||
byKey[key] = dev
|
||||
}
|
||||
|
||||
assert.Contains(t, byKey, "/dev/sda|megaraid,0")
|
||||
assert.Contains(t, byKey, "/dev/sda|megaraid,1")
|
||||
assert.Contains(t, byKey, "/dev/sda|megaraid,2")
|
||||
}
|
||||
|
||||
func TestMergeDeviceListsHandlesMixedRAIDAndRegular(t *testing.T) {
|
||||
// Test mixing RAID drives with regular devices
|
||||
scanned := []*DeviceInfo{
|
||||
{Name: "/dev/sda", Type: "megaraid,0"},
|
||||
{Name: "/dev/sda", Type: "megaraid,1"},
|
||||
{Name: "/dev/sdb", Type: "sat"},
|
||||
{Name: "/dev/nvme0", Type: "nvme"},
|
||||
}
|
||||
|
||||
merged := mergeDeviceLists(nil, scanned, nil)
|
||||
require.Len(t, merged, 4, "should have 4 separate devices")
|
||||
|
||||
byKey := make(map[string]*DeviceInfo, len(merged))
|
||||
for _, dev := range merged {
|
||||
key := dev.Name + "|" + dev.Type
|
||||
byKey[key] = dev
|
||||
}
|
||||
|
||||
assert.Contains(t, byKey, "/dev/sda|megaraid,0")
|
||||
assert.Contains(t, byKey, "/dev/sda|megaraid,1")
|
||||
assert.Contains(t, byKey, "/dev/sdb|sat")
|
||||
assert.Contains(t, byKey, "/dev/nvme0|nvme")
|
||||
}
|
||||
|
||||
func TestUpdateSmartDevicesPreservesRAIDDrives(t *testing.T) {
|
||||
// Test that updateSmartDevices correctly validates RAID drives using composite keys
|
||||
sm := &SmartManager{
|
||||
SmartDevices: []*DeviceInfo{
|
||||
{Name: "/dev/sda", Type: "megaraid,0"},
|
||||
{Name: "/dev/sda", Type: "megaraid,1"},
|
||||
},
|
||||
SmartDataMap: map[string]*smart.SmartData{
|
||||
"serial-0": {
|
||||
DiskName: "/dev/sda",
|
||||
DiskType: "megaraid,0",
|
||||
SerialNumber: "serial-0",
|
||||
},
|
||||
"serial-1": {
|
||||
DiskName: "/dev/sda",
|
||||
DiskType: "megaraid,1",
|
||||
SerialNumber: "serial-1",
|
||||
},
|
||||
"serial-stale": {
|
||||
DiskName: "/dev/sda",
|
||||
DiskType: "megaraid,2",
|
||||
SerialNumber: "serial-stale",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
sm.updateSmartDevices(sm.SmartDevices)
|
||||
|
||||
// serial-0 and serial-1 should be preserved (matching devices exist)
|
||||
assert.Contains(t, sm.SmartDataMap, "serial-0")
|
||||
assert.Contains(t, sm.SmartDataMap, "serial-1")
|
||||
// serial-stale should be removed (no matching device)
|
||||
assert.NotContains(t, sm.SmartDataMap, "serial-stale")
|
||||
}
|
||||
|
||||
func TestParseSmartOutputMarksVerified(t *testing.T) {
|
||||
fixturePath := filepath.Join("test-data", "smart", "nvme0.json")
|
||||
data, err := os.ReadFile(fixturePath)
|
||||
@@ -589,6 +799,182 @@ func TestIsVirtualDeviceScsi(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindAtaDeviceStatisticsValue(t *testing.T) {
|
||||
val42 := int64(42)
|
||||
val100 := int64(100)
|
||||
valMinus20 := int64(-20)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
data smart.SmartInfoForSata
|
||||
ataDeviceStats smart.AtaDeviceStatistics
|
||||
entryNumber uint8
|
||||
entryName string
|
||||
minValue int64
|
||||
maxValue int64
|
||||
expectedValue *int64
|
||||
}{
|
||||
{
|
||||
name: "value in ataDeviceStats",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Current Temperature", Value: &val42},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: &val42,
|
||||
},
|
||||
{
|
||||
name: "value unmarshaled from data",
|
||||
data: smart.SmartInfoForSata{
|
||||
AtaDeviceStatistics: []byte(`{"pages":[{"number":5,"table":[{"name":"Current Temperature","value":100}]}]}`),
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 255,
|
||||
expectedValue: &val100,
|
||||
},
|
||||
{
|
||||
name: "value out of range (too high)",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Current Temperature", Value: &val100},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 50,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "value out of range (too low)",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Min Temp", Value: &valMinus20},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Min Temp",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "no statistics available",
|
||||
data: smart.SmartInfoForSata{},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 255,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "wrong page number",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 1,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Current Temperature", Value: &val42},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "wrong entry name",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Other Stat", Value: &val42},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: nil,
|
||||
},
|
||||
{
|
||||
name: "case insensitive name match",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "CURRENT TEMPERATURE", Value: &val42},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: &val42,
|
||||
},
|
||||
{
|
||||
name: "entry value is nil",
|
||||
ataDeviceStats: smart.AtaDeviceStatistics{
|
||||
Pages: []smart.AtaDeviceStatisticsPage{
|
||||
{
|
||||
Number: 5,
|
||||
Table: []smart.AtaDeviceStatisticsEntry{
|
||||
{Name: "Current Temperature", Value: nil},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
entryNumber: 5,
|
||||
entryName: "Current Temperature",
|
||||
minValue: 0,
|
||||
maxValue: 100,
|
||||
expectedValue: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := findAtaDeviceStatisticsValue(&tt.data, &tt.ataDeviceStats, tt.entryNumber, tt.entryName, tt.minValue, tt.maxValue)
|
||||
if tt.expectedValue == nil {
|
||||
assert.Nil(t, result)
|
||||
} else {
|
||||
require.NotNil(t, result)
|
||||
assert.Equal(t, *tt.expectedValue, *result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRefreshExcludedDevices(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@@ -7,12 +7,13 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent/battery"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/agent/zfs"
|
||||
"github.com/henrygd/beszel/internal/entities/container"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
|
||||
@@ -107,7 +108,7 @@ func (a *Agent) refreshSystemDetails() {
|
||||
}
|
||||
|
||||
// zfs
|
||||
if _, err := getARCSize(); err != nil {
|
||||
if _, err := zfs.ARCSize(); err != nil {
|
||||
slog.Debug("Not monitoring ZFS ARC", "err", err)
|
||||
} else {
|
||||
a.zfs = true
|
||||
@@ -127,13 +128,13 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
|
||||
// cpu metrics
|
||||
cpuMetrics, err := getCpuMetrics(cacheTimeMs)
|
||||
if err == nil {
|
||||
systemStats.Cpu = twoDecimals(cpuMetrics.Total)
|
||||
systemStats.Cpu = utils.TwoDecimals(cpuMetrics.Total)
|
||||
systemStats.CpuBreakdown = []float64{
|
||||
twoDecimals(cpuMetrics.User),
|
||||
twoDecimals(cpuMetrics.System),
|
||||
twoDecimals(cpuMetrics.Iowait),
|
||||
twoDecimals(cpuMetrics.Steal),
|
||||
twoDecimals(cpuMetrics.Idle),
|
||||
utils.TwoDecimals(cpuMetrics.User),
|
||||
utils.TwoDecimals(cpuMetrics.System),
|
||||
utils.TwoDecimals(cpuMetrics.Iowait),
|
||||
utils.TwoDecimals(cpuMetrics.Steal),
|
||||
utils.TwoDecimals(cpuMetrics.Idle),
|
||||
}
|
||||
} else {
|
||||
slog.Error("Error getting cpu metrics", "err", err)
|
||||
@@ -157,8 +158,8 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
|
||||
// memory
|
||||
if v, err := mem.VirtualMemory(); err == nil {
|
||||
// swap
|
||||
systemStats.Swap = bytesToGigabytes(v.SwapTotal)
|
||||
systemStats.SwapUsed = bytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
|
||||
systemStats.Swap = utils.BytesToGigabytes(v.SwapTotal)
|
||||
systemStats.SwapUsed = utils.BytesToGigabytes(v.SwapTotal - v.SwapFree - v.SwapCached)
|
||||
// cache + buffers value for default mem calculation
|
||||
// note: gopsutil automatically adds SReclaimable to v.Cached
|
||||
cacheBuff := v.Cached + v.Buffers - v.Shared
|
||||
@@ -178,16 +179,16 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
|
||||
// }
|
||||
// subtract ZFS ARC size from used memory and add as its own category
|
||||
if a.zfs {
|
||||
if arcSize, _ := getARCSize(); arcSize > 0 && arcSize < v.Used {
|
||||
if arcSize, _ := zfs.ARCSize(); arcSize > 0 && arcSize < v.Used {
|
||||
v.Used = v.Used - arcSize
|
||||
v.UsedPercent = float64(v.Used) / float64(v.Total) * 100.0
|
||||
systemStats.MemZfsArc = bytesToGigabytes(arcSize)
|
||||
systemStats.MemZfsArc = utils.BytesToGigabytes(arcSize)
|
||||
}
|
||||
}
|
||||
systemStats.Mem = bytesToGigabytes(v.Total)
|
||||
systemStats.MemBuffCache = bytesToGigabytes(cacheBuff)
|
||||
systemStats.MemUsed = bytesToGigabytes(v.Used)
|
||||
systemStats.MemPct = twoDecimals(v.UsedPercent)
|
||||
systemStats.Mem = utils.BytesToGigabytes(v.Total)
|
||||
systemStats.MemBuffCache = utils.BytesToGigabytes(cacheBuff)
|
||||
systemStats.MemUsed = utils.BytesToGigabytes(v.Used)
|
||||
systemStats.MemPct = utils.TwoDecimals(v.UsedPercent)
|
||||
}
|
||||
|
||||
// disk usage
|
||||
@@ -250,32 +251,6 @@ func (a *Agent) getSystemStats(cacheTimeMs uint16) system.Stats {
|
||||
return systemStats
|
||||
}
|
||||
|
||||
// Returns the size of the ZFS ARC memory cache in bytes
|
||||
func getARCSize() (uint64, error) {
|
||||
file, err := os.Open("/proc/spl/kstat/zfs/arcstats")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
// Scan the lines
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "size") {
|
||||
// Example line: size 4 15032385536
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 3 {
|
||||
return 0, err
|
||||
}
|
||||
// Return the size as uint64
|
||||
return strconv.ParseUint(fields[2], 10, 64)
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("failed to parse size field")
|
||||
}
|
||||
|
||||
// getOsPrettyName attempts to get the pretty OS name from /etc/os-release on Linux systems
|
||||
func getOsPrettyName() (string, error) {
|
||||
file, err := os.Open("/etc/os-release")
|
||||
|
||||
@@ -8,12 +8,14 @@ import (
|
||||
"log/slog"
|
||||
"maps"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
)
|
||||
|
||||
@@ -28,11 +30,36 @@ type systemdManager struct {
|
||||
patterns []string
|
||||
}
|
||||
|
||||
// isSystemdAvailable checks if systemd is used on the system to avoid unnecessary connection attempts (#1548)
|
||||
func isSystemdAvailable() bool {
|
||||
paths := []string{
|
||||
"/run/systemd/system",
|
||||
"/run/dbus/system_bus_socket",
|
||||
"/var/run/dbus/system_bus_socket",
|
||||
}
|
||||
for _, path := range paths {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if data, err := os.ReadFile("/proc/1/comm"); err == nil {
|
||||
return strings.TrimSpace(string(data)) == "systemd"
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// newSystemdManager creates a new systemdManager.
|
||||
func newSystemdManager() (*systemdManager, error) {
|
||||
if skipSystemd, _ := GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" {
|
||||
if skipSystemd, _ := utils.GetEnv("SKIP_SYSTEMD"); skipSystemd == "true" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Check if systemd is available on the system before attempting connection
|
||||
if !isSystemdAvailable() {
|
||||
slog.Debug("Systemd not available")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
conn, err := dbus.NewSystemConnectionContext(context.Background())
|
||||
if err != nil {
|
||||
slog.Debug("Error connecting to systemd", "err", err, "ref", "https://beszel.dev/guide/systemd")
|
||||
@@ -118,13 +145,27 @@ func (sm *systemdManager) getServiceStats(conn *dbus.Conn, refresh bool) []*syst
|
||||
return nil
|
||||
}
|
||||
|
||||
// Track which units are currently present to remove stale entries
|
||||
currentUnits := make(map[string]struct{}, len(units))
|
||||
|
||||
for _, unit := range units {
|
||||
currentUnits[unit.Name] = struct{}{}
|
||||
service, err := sm.updateServiceStats(conn, unit)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
services = append(services, service)
|
||||
}
|
||||
|
||||
// Remove services that no longer exist in systemd
|
||||
sm.Lock()
|
||||
for unitName := range sm.serviceStatsMap {
|
||||
if _, exists := currentUnits[unitName]; !exists {
|
||||
delete(sm.serviceStatsMap, unitName)
|
||||
}
|
||||
}
|
||||
sm.Unlock()
|
||||
|
||||
sm.hasFreshStats = true
|
||||
return services
|
||||
}
|
||||
@@ -254,13 +295,13 @@ func unescapeServiceName(name string) string {
|
||||
// otherwise defaults to "*service".
|
||||
func getServicePatterns() []string {
|
||||
patterns := []string{}
|
||||
if envPatterns, _ := GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
|
||||
if envPatterns, _ := utils.GetEnv("SERVICE_PATTERNS"); envPatterns != "" {
|
||||
for pattern := range strings.SplitSeq(envPatterns, ",") {
|
||||
pattern = strings.TrimSpace(pattern)
|
||||
if pattern == "" {
|
||||
continue
|
||||
}
|
||||
if !strings.HasSuffix(pattern, ".service") {
|
||||
if !strings.HasSuffix(pattern, "timer") && !strings.HasSuffix(pattern, ".service") {
|
||||
pattern += ".service"
|
||||
}
|
||||
patterns = append(patterns, pattern)
|
||||
|
||||
@@ -19,11 +19,11 @@ func TestSystemdManagerGetServiceStats(t *testing.T) {
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test with refresh = true
|
||||
result := manager.getServiceStats(true)
|
||||
result := manager.getServiceStats("any-service", true)
|
||||
assert.Nil(t, result)
|
||||
|
||||
// Test with refresh = false
|
||||
result = manager.getServiceStats(false)
|
||||
result = manager.getServiceStats("any-service", false)
|
||||
assert.Nil(t, result)
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -48,6 +49,35 @@ func TestUnescapeServiceNameInvalid(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsSystemdAvailable(t *testing.T) {
|
||||
// Note: This test's result will vary based on the actual system running the tests
|
||||
// On systems with systemd, it should return true
|
||||
// On systems without systemd, it should return false
|
||||
result := isSystemdAvailable()
|
||||
|
||||
// Check if either the /run/systemd/system directory exists or PID 1 is systemd
|
||||
runSystemdExists := false
|
||||
if _, err := os.Stat("/run/systemd/system"); err == nil {
|
||||
runSystemdExists = true
|
||||
}
|
||||
|
||||
pid1IsSystemd := false
|
||||
if data, err := os.ReadFile("/proc/1/comm"); err == nil {
|
||||
pid1IsSystemd = strings.TrimSpace(string(data)) == "systemd"
|
||||
}
|
||||
|
||||
expected := runSystemdExists || pid1IsSystemd
|
||||
|
||||
assert.Equal(t, expected, result, "isSystemdAvailable should correctly detect systemd presence")
|
||||
|
||||
// Log the result for informational purposes
|
||||
if result {
|
||||
t.Log("Systemd is available on this system")
|
||||
} else {
|
||||
t.Log("Systemd is not available on this system")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetServicePatterns(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -126,6 +156,13 @@ func TestGetServicePatterns(t *testing.T) {
|
||||
expected: []string{"*nginx*.service", "*apache*.service"},
|
||||
cleanupEnvVars: true,
|
||||
},
|
||||
{
|
||||
name: "opt into timer monitoring",
|
||||
prefixedEnv: "nginx.service,docker,apache.timer",
|
||||
unprefixedEnv: "",
|
||||
expected: []string{"nginx.service", "docker.service", "apache.timer"},
|
||||
cleanupEnvVars: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
700
agent/test-data/amdgpu.ids
Normal file
700
agent/test-data/amdgpu.ids
Normal file
@@ -0,0 +1,700 @@
|
||||
# List of AMDGPU IDs
|
||||
#
|
||||
# Syntax:
|
||||
# device_id, revision_id, product_name <-- single tab after comma
|
||||
|
||||
1.0.0
|
||||
1114, C2, AMD Radeon 860M Graphics
|
||||
1114, C3, AMD Radeon 840M Graphics
|
||||
1114, D2, AMD Radeon 860M Graphics
|
||||
1114, D3, AMD Radeon 840M Graphics
|
||||
1309, 00, AMD Radeon R7 Graphics
|
||||
130A, 00, AMD Radeon R6 Graphics
|
||||
130B, 00, AMD Radeon R4 Graphics
|
||||
130C, 00, AMD Radeon R7 Graphics
|
||||
130D, 00, AMD Radeon R6 Graphics
|
||||
130E, 00, AMD Radeon R5 Graphics
|
||||
130F, 00, AMD Radeon R7 Graphics
|
||||
130F, D4, AMD Radeon R7 Graphics
|
||||
130F, D5, AMD Radeon R7 Graphics
|
||||
130F, D6, AMD Radeon R7 Graphics
|
||||
130F, D7, AMD Radeon R7 Graphics
|
||||
1313, 00, AMD Radeon R7 Graphics
|
||||
1313, D4, AMD Radeon R7 Graphics
|
||||
1313, D5, AMD Radeon R7 Graphics
|
||||
1313, D6, AMD Radeon R7 Graphics
|
||||
1315, 00, AMD Radeon R5 Graphics
|
||||
1315, D4, AMD Radeon R5 Graphics
|
||||
1315, D5, AMD Radeon R5 Graphics
|
||||
1315, D6, AMD Radeon R5 Graphics
|
||||
1315, D7, AMD Radeon R5 Graphics
|
||||
1316, 00, AMD Radeon R5 Graphics
|
||||
1318, 00, AMD Radeon R5 Graphics
|
||||
131B, 00, AMD Radeon R4 Graphics
|
||||
131C, 00, AMD Radeon R7 Graphics
|
||||
131D, 00, AMD Radeon R6 Graphics
|
||||
1435, AE, AMD Custom GPU 0932
|
||||
1506, C1, AMD Radeon 610M
|
||||
1506, C2, AMD Radeon 610M
|
||||
1506, C3, AMD Radeon 610M
|
||||
1506, C4, AMD Radeon 610M
|
||||
150E, C1, AMD Radeon 890M Graphics
|
||||
150E, C4, AMD Radeon 890M Graphics
|
||||
150E, C5, AMD Radeon 890M Graphics
|
||||
150E, C6, AMD Radeon 890M Graphics
|
||||
150E, D1, AMD Radeon 890M Graphics
|
||||
150E, D2, AMD Radeon 890M Graphics
|
||||
150E, D3, AMD Radeon 890M Graphics
|
||||
1586, C1, Radeon 8060S Graphics
|
||||
1586, C2, Radeon 8050S Graphics
|
||||
1586, C4, Radeon 8050S Graphics
|
||||
1586, D1, Radeon 8060S Graphics
|
||||
1586, D2, Radeon 8050S Graphics
|
||||
1586, D4, Radeon 8050S Graphics
|
||||
1586, D5, Radeon 8040S Graphics
|
||||
15BF, 00, AMD Radeon 780M Graphics
|
||||
15BF, 01, AMD Radeon 760M Graphics
|
||||
15BF, 02, AMD Radeon 780M Graphics
|
||||
15BF, 03, AMD Radeon 760M Graphics
|
||||
15BF, C1, AMD Radeon 780M Graphics
|
||||
15BF, C2, AMD Radeon 780M Graphics
|
||||
15BF, C3, AMD Radeon 760M Graphics
|
||||
15BF, C4, AMD Radeon 780M Graphics
|
||||
15BF, C5, AMD Radeon 740M Graphics
|
||||
15BF, C6, AMD Radeon 780M Graphics
|
||||
15BF, C7, AMD Radeon 780M Graphics
|
||||
15BF, C8, AMD Radeon 760M Graphics
|
||||
15BF, C9, AMD Radeon 780M Graphics
|
||||
15BF, CA, AMD Radeon 740M Graphics
|
||||
15BF, CB, AMD Radeon 760M Graphics
|
||||
15BF, CC, AMD Radeon 740M Graphics
|
||||
15BF, CD, AMD Radeon 760M Graphics
|
||||
15BF, CF, AMD Radeon 780M Graphics
|
||||
15BF, D0, AMD Radeon 780M Graphics
|
||||
15BF, D1, AMD Radeon 780M Graphics
|
||||
15BF, D2, AMD Radeon 780M Graphics
|
||||
15BF, D3, AMD Radeon 780M Graphics
|
||||
15BF, D4, AMD Radeon 780M Graphics
|
||||
15BF, D5, AMD Radeon 760M Graphics
|
||||
15BF, D6, AMD Radeon 760M Graphics
|
||||
15BF, D7, AMD Radeon 780M Graphics
|
||||
15BF, D8, AMD Radeon 740M Graphics
|
||||
15BF, D9, AMD Radeon 780M Graphics
|
||||
15BF, DA, AMD Radeon 780M Graphics
|
||||
15BF, DB, AMD Radeon 760M Graphics
|
||||
15BF, DC, AMD Radeon 760M Graphics
|
||||
15BF, DD, AMD Radeon 780M Graphics
|
||||
15BF, DE, AMD Radeon 740M Graphics
|
||||
15BF, DF, AMD Radeon 760M Graphics
|
||||
15BF, F0, AMD Radeon 760M Graphics
|
||||
15C8, C1, AMD Radeon 740M Graphics
|
||||
15C8, C2, AMD Radeon 740M Graphics
|
||||
15C8, C3, AMD Radeon 740M Graphics
|
||||
15C8, C4, AMD Radeon 740M Graphics
|
||||
15C8, D1, AMD Radeon 740M Graphics
|
||||
15C8, D2, AMD Radeon 740M Graphics
|
||||
15C8, D3, AMD Radeon 740M Graphics
|
||||
15C8, D4, AMD Radeon 740M Graphics
|
||||
15D8, 00, AMD Radeon RX Vega 8 Graphics WS
|
||||
15D8, 91, AMD Radeon Vega 3 Graphics
|
||||
15D8, 91, AMD Ryzen Embedded R1606G with Radeon Vega Gfx
|
||||
15D8, 92, AMD Radeon Vega 3 Graphics
|
||||
15D8, 92, AMD Ryzen Embedded R1505G with Radeon Vega Gfx
|
||||
15D8, 93, AMD Radeon Vega 1 Graphics
|
||||
15D8, A1, AMD Radeon Vega 10 Graphics
|
||||
15D8, A2, AMD Radeon Vega 8 Graphics
|
||||
15D8, A3, AMD Radeon Vega 6 Graphics
|
||||
15D8, A4, AMD Radeon Vega 3 Graphics
|
||||
15D8, B1, AMD Radeon Vega 10 Graphics
|
||||
15D8, B2, AMD Radeon Vega 8 Graphics
|
||||
15D8, B3, AMD Radeon Vega 6 Graphics
|
||||
15D8, B4, AMD Radeon Vega 3 Graphics
|
||||
15D8, C1, AMD Radeon Vega 10 Graphics
|
||||
15D8, C2, AMD Radeon Vega 8 Graphics
|
||||
15D8, C3, AMD Radeon Vega 6 Graphics
|
||||
15D8, C4, AMD Radeon Vega 3 Graphics
|
||||
15D8, C5, AMD Radeon Vega 3 Graphics
|
||||
15D8, C8, AMD Radeon Vega 11 Graphics
|
||||
15D8, C9, AMD Radeon Vega 8 Graphics
|
||||
15D8, CA, AMD Radeon Vega 11 Graphics
|
||||
15D8, CB, AMD Radeon Vega 8 Graphics
|
||||
15D8, CC, AMD Radeon Vega 3 Graphics
|
||||
15D8, CE, AMD Radeon Vega 3 Graphics
|
||||
15D8, CF, AMD Ryzen Embedded R1305G with Radeon Vega Gfx
|
||||
15D8, D1, AMD Radeon Vega 10 Graphics
|
||||
15D8, D2, AMD Radeon Vega 8 Graphics
|
||||
15D8, D3, AMD Radeon Vega 6 Graphics
|
||||
15D8, D4, AMD Radeon Vega 3 Graphics
|
||||
15D8, D8, AMD Radeon Vega 11 Graphics
|
||||
15D8, D9, AMD Radeon Vega 8 Graphics
|
||||
15D8, DA, AMD Radeon Vega 11 Graphics
|
||||
15D8, DB, AMD Radeon Vega 3 Graphics
|
||||
15D8, DB, AMD Radeon Vega 8 Graphics
|
||||
15D8, DC, AMD Radeon Vega 3 Graphics
|
||||
15D8, DD, AMD Radeon Vega 3 Graphics
|
||||
15D8, DE, AMD Radeon Vega 3 Graphics
|
||||
15D8, DF, AMD Radeon Vega 3 Graphics
|
||||
15D8, E3, AMD Radeon Vega 3 Graphics
|
||||
15D8, E4, AMD Ryzen Embedded R1102G with Radeon Vega Gfx
|
||||
15DD, 81, AMD Ryzen Embedded V1807B with Radeon Vega Gfx
|
||||
15DD, 82, AMD Ryzen Embedded V1756B with Radeon Vega Gfx
|
||||
15DD, 83, AMD Ryzen Embedded V1605B with Radeon Vega Gfx
|
||||
15DD, 84, AMD Radeon Vega 6 Graphics
|
||||
15DD, 85, AMD Ryzen Embedded V1202B with Radeon Vega Gfx
|
||||
15DD, 86, AMD Radeon Vega 11 Graphics
|
||||
15DD, 88, AMD Radeon Vega 8 Graphics
|
||||
15DD, C1, AMD Radeon Vega 11 Graphics
|
||||
15DD, C2, AMD Radeon Vega 8 Graphics
|
||||
15DD, C3, AMD Radeon Vega 3 / 10 Graphics
|
||||
15DD, C4, AMD Radeon Vega 8 Graphics
|
||||
15DD, C5, AMD Radeon Vega 3 Graphics
|
||||
15DD, C6, AMD Radeon Vega 11 Graphics
|
||||
15DD, C8, AMD Radeon Vega 8 Graphics
|
||||
15DD, C9, AMD Radeon Vega 11 Graphics
|
||||
15DD, CA, AMD Radeon Vega 8 Graphics
|
||||
15DD, CB, AMD Radeon Vega 3 Graphics
|
||||
15DD, CC, AMD Radeon Vega 6 Graphics
|
||||
15DD, CE, AMD Radeon Vega 3 Graphics
|
||||
15DD, CF, AMD Radeon Vega 3 Graphics
|
||||
15DD, D0, AMD Radeon Vega 10 Graphics
|
||||
15DD, D1, AMD Radeon Vega 8 Graphics
|
||||
15DD, D3, AMD Radeon Vega 11 Graphics
|
||||
15DD, D5, AMD Radeon Vega 8 Graphics
|
||||
15DD, D6, AMD Radeon Vega 11 Graphics
|
||||
15DD, D7, AMD Radeon Vega 8 Graphics
|
||||
15DD, D8, AMD Radeon Vega 3 Graphics
|
||||
15DD, D9, AMD Radeon Vega 6 Graphics
|
||||
15DD, E1, AMD Radeon Vega 3 Graphics
|
||||
15DD, E2, AMD Radeon Vega 3 Graphics
|
||||
163F, AE, AMD Custom GPU 0405
|
||||
163F, E1, AMD Custom GPU 0405
|
||||
164E, D8, AMD Radeon 610M
|
||||
164E, D9, AMD Radeon 610M
|
||||
164E, DA, AMD Radeon 610M
|
||||
164E, DB, AMD Radeon 610M
|
||||
164E, DC, AMD Radeon 610M
|
||||
1681, 06, AMD Radeon 680M
|
||||
1681, 07, AMD Radeon 660M
|
||||
1681, 0A, AMD Radeon 680M
|
||||
1681, 0B, AMD Radeon 660M
|
||||
1681, C7, AMD Radeon 680M
|
||||
1681, C8, AMD Radeon 680M
|
||||
1681, C9, AMD Radeon 660M
|
||||
1900, 01, AMD Radeon 780M Graphics
|
||||
1900, 02, AMD Radeon 760M Graphics
|
||||
1900, 03, AMD Radeon 780M Graphics
|
||||
1900, 04, AMD Radeon 760M Graphics
|
||||
1900, 05, AMD Radeon 780M Graphics
|
||||
1900, 06, AMD Radeon 780M Graphics
|
||||
1900, 07, AMD Radeon 760M Graphics
|
||||
1900, B0, AMD Radeon 780M Graphics
|
||||
1900, B1, AMD Radeon 780M Graphics
|
||||
1900, B2, AMD Radeon 780M Graphics
|
||||
1900, B3, AMD Radeon 780M Graphics
|
||||
1900, B4, AMD Radeon 780M Graphics
|
||||
1900, B5, AMD Radeon 780M Graphics
|
||||
1900, B6, AMD Radeon 780M Graphics
|
||||
1900, B7, AMD Radeon 760M Graphics
|
||||
1900, B8, AMD Radeon 760M Graphics
|
||||
1900, B9, AMD Radeon 780M Graphics
|
||||
1900, BA, AMD Radeon 780M Graphics
|
||||
1900, BB, AMD Radeon 780M Graphics
|
||||
1900, C0, AMD Radeon 780M Graphics
|
||||
1900, C1, AMD Radeon 760M Graphics
|
||||
1900, C2, AMD Radeon 780M Graphics
|
||||
1900, C3, AMD Radeon 760M Graphics
|
||||
1900, C4, AMD Radeon 780M Graphics
|
||||
1900, C5, AMD Radeon 780M Graphics
|
||||
1900, C6, AMD Radeon 760M Graphics
|
||||
1900, C7, AMD Radeon 780M Graphics
|
||||
1900, C8, AMD Radeon 760M Graphics
|
||||
1900, C9, AMD Radeon 780M Graphics
|
||||
1900, CA, AMD Radeon 760M Graphics
|
||||
1900, CB, AMD Radeon 780M Graphics
|
||||
1900, CC, AMD Radeon 780M Graphics
|
||||
1900, CD, AMD Radeon 760M Graphics
|
||||
1900, CE, AMD Radeon 780M Graphics
|
||||
1900, CF, AMD Radeon 760M Graphics
|
||||
1900, D0, AMD Radeon 780M Graphics
|
||||
1900, D1, AMD Radeon 760M Graphics
|
||||
1900, D2, AMD Radeon 780M Graphics
|
||||
1900, D3, AMD Radeon 760M Graphics
|
||||
1900, D4, AMD Radeon 780M Graphics
|
||||
1900, D5, AMD Radeon 780M Graphics
|
||||
1900, D6, AMD Radeon 760M Graphics
|
||||
1900, D7, AMD Radeon 780M Graphics
|
||||
1900, D8, AMD Radeon 760M Graphics
|
||||
1900, D9, AMD Radeon 780M Graphics
|
||||
1900, DA, AMD Radeon 760M Graphics
|
||||
1900, DB, AMD Radeon 780M Graphics
|
||||
1900, DC, AMD Radeon 780M Graphics
|
||||
1900, DD, AMD Radeon 760M Graphics
|
||||
1900, DE, AMD Radeon 780M Graphics
|
||||
1900, DF, AMD Radeon 760M Graphics
|
||||
1900, F0, AMD Radeon 780M Graphics
|
||||
1900, F1, AMD Radeon 780M Graphics
|
||||
1900, F2, AMD Radeon 780M Graphics
|
||||
1901, C1, AMD Radeon 740M Graphics
|
||||
1901, C2, AMD Radeon 740M Graphics
|
||||
1901, C3, AMD Radeon 740M Graphics
|
||||
1901, C6, AMD Radeon 740M Graphics
|
||||
1901, C7, AMD Radeon 740M Graphics
|
||||
1901, C8, AMD Radeon 740M Graphics
|
||||
1901, C9, AMD Radeon 740M Graphics
|
||||
1901, CA, AMD Radeon 740M Graphics
|
||||
1901, D1, AMD Radeon 740M Graphics
|
||||
1901, D2, AMD Radeon 740M Graphics
|
||||
1901, D3, AMD Radeon 740M Graphics
|
||||
1901, D4, AMD Radeon 740M Graphics
|
||||
1901, D5, AMD Radeon 740M Graphics
|
||||
1901, D6, AMD Radeon 740M Graphics
|
||||
1901, D7, AMD Radeon 740M Graphics
|
||||
1901, D8, AMD Radeon 740M Graphics
|
||||
6600, 00, AMD Radeon HD 8600 / 8700M
|
||||
6600, 81, AMD Radeon R7 M370
|
||||
6601, 00, AMD Radeon HD 8500M / 8700M
|
||||
6604, 00, AMD Radeon R7 M265 Series
|
||||
6604, 81, AMD Radeon R7 M350
|
||||
6605, 00, AMD Radeon R7 M260 Series
|
||||
6605, 81, AMD Radeon R7 M340
|
||||
6606, 00, AMD Radeon HD 8790M
|
||||
6607, 00, AMD Radeon R5 M240
|
||||
6608, 00, AMD FirePro W2100
|
||||
6610, 00, AMD Radeon R7 200 Series
|
||||
6610, 81, AMD Radeon R7 350
|
||||
6610, 83, AMD Radeon R5 340
|
||||
6610, 87, AMD Radeon R7 200 Series
|
||||
6611, 00, AMD Radeon R7 200 Series
|
||||
6611, 87, AMD Radeon R7 200 Series
|
||||
6613, 00, AMD Radeon R7 200 Series
|
||||
6617, 00, AMD Radeon R7 240 Series
|
||||
6617, 87, AMD Radeon R7 200 Series
|
||||
6617, C7, AMD Radeon R7 240 Series
|
||||
6640, 00, AMD Radeon HD 8950
|
||||
6640, 80, AMD Radeon R9 M380
|
||||
6646, 00, AMD Radeon R9 M280X
|
||||
6646, 80, AMD Radeon R9 M385
|
||||
6646, 80, AMD Radeon R9 M470X
|
||||
6647, 00, AMD Radeon R9 M200X Series
|
||||
6647, 80, AMD Radeon R9 M380
|
||||
6649, 00, AMD FirePro W5100
|
||||
6658, 00, AMD Radeon R7 200 Series
|
||||
665C, 00, AMD Radeon HD 7700 Series
|
||||
665D, 00, AMD Radeon R7 200 Series
|
||||
665F, 81, AMD Radeon R7 360 Series
|
||||
6660, 00, AMD Radeon HD 8600M Series
|
||||
6660, 81, AMD Radeon R5 M335
|
||||
6660, 83, AMD Radeon R5 M330
|
||||
6663, 00, AMD Radeon HD 8500M Series
|
||||
6663, 83, AMD Radeon R5 M320
|
||||
6664, 00, AMD Radeon R5 M200 Series
|
||||
6665, 00, AMD Radeon R5 M230 Series
|
||||
6665, 83, AMD Radeon R5 M320
|
||||
6665, C3, AMD Radeon R5 M435
|
||||
6666, 00, AMD Radeon R5 M200 Series
|
||||
6667, 00, AMD Radeon R5 M200 Series
|
||||
666F, 00, AMD Radeon HD 8500M
|
||||
66A1, 02, AMD Instinct MI60 / MI50
|
||||
66A1, 06, AMD Radeon Pro VII
|
||||
66AF, C1, AMD Radeon VII
|
||||
6780, 00, AMD FirePro W9000
|
||||
6784, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
||||
6788, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
||||
678A, 00, AMD FirePro W8000
|
||||
6798, 00, AMD Radeon R9 200 / HD 7900 Series
|
||||
6799, 00, AMD Radeon HD 7900 Series
|
||||
679A, 00, AMD Radeon HD 7900 Series
|
||||
679B, 00, AMD Radeon HD 7900 Series
|
||||
679E, 00, AMD Radeon HD 7800 Series
|
||||
67A0, 00, AMD Radeon FirePro W9100
|
||||
67A1, 00, AMD Radeon FirePro W8100
|
||||
67B0, 00, AMD Radeon R9 200 Series
|
||||
67B0, 80, AMD Radeon R9 390 Series
|
||||
67B1, 00, AMD Radeon R9 200 Series
|
||||
67B1, 80, AMD Radeon R9 390 Series
|
||||
67B9, 00, AMD Radeon R9 200 Series
|
||||
67C0, 00, AMD Radeon Pro WX 7100 Graphics
|
||||
67C0, 80, AMD Radeon E9550
|
||||
67C2, 01, AMD Radeon Pro V7350x2
|
||||
67C2, 02, AMD Radeon Pro V7300X
|
||||
67C4, 00, AMD Radeon Pro WX 7100 Graphics
|
||||
67C4, 80, AMD Radeon E9560 / E9565 Graphics
|
||||
67C7, 00, AMD Radeon Pro WX 5100 Graphics
|
||||
67C7, 80, AMD Radeon E9390 Graphics
|
||||
67D0, 01, AMD Radeon Pro V7350x2
|
||||
67D0, 02, AMD Radeon Pro V7300X
|
||||
67DF, C0, AMD Radeon Pro 580X
|
||||
67DF, C1, AMD Radeon RX 580 Series
|
||||
67DF, C2, AMD Radeon RX 570 Series
|
||||
67DF, C3, AMD Radeon RX 580 Series
|
||||
67DF, C4, AMD Radeon RX 480 Graphics
|
||||
67DF, C5, AMD Radeon RX 470 Graphics
|
||||
67DF, C6, AMD Radeon RX 570 Series
|
||||
67DF, C7, AMD Radeon RX 480 Graphics
|
||||
67DF, CF, AMD Radeon RX 470 Graphics
|
||||
67DF, D7, AMD Radeon RX 470 Graphics
|
||||
67DF, E0, AMD Radeon RX 470 Series
|
||||
67DF, E1, AMD Radeon RX 590 Series
|
||||
67DF, E3, AMD Radeon RX Series
|
||||
67DF, E7, AMD Radeon RX 580 Series
|
||||
67DF, EB, AMD Radeon Pro 580X
|
||||
67DF, EF, AMD Radeon RX 570 Series
|
||||
67DF, F7, AMD Radeon RX P30PH
|
||||
67DF, FF, AMD Radeon RX 470 Series
|
||||
67E0, 00, AMD Radeon Pro WX Series
|
||||
67E3, 00, AMD Radeon Pro WX 4100
|
||||
67E8, 00, AMD Radeon Pro WX Series
|
||||
67E8, 01, AMD Radeon Pro WX Series
|
||||
67E8, 80, AMD Radeon E9260 Graphics
|
||||
67EB, 00, AMD Radeon Pro V5300X
|
||||
67EF, C0, AMD Radeon RX Graphics
|
||||
67EF, C1, AMD Radeon RX 460 Graphics
|
||||
67EF, C2, AMD Radeon Pro Series
|
||||
67EF, C3, AMD Radeon RX Series
|
||||
67EF, C5, AMD Radeon RX 460 Graphics
|
||||
67EF, C7, AMD Radeon RX Graphics
|
||||
67EF, CF, AMD Radeon RX 460 Graphics
|
||||
67EF, E0, AMD Radeon RX 560 Series
|
||||
67EF, E1, AMD Radeon RX Series
|
||||
67EF, E2, AMD Radeon RX 560X
|
||||
67EF, E3, AMD Radeon RX Series
|
||||
67EF, E5, AMD Radeon RX 560 Series
|
||||
67EF, E7, AMD Radeon RX 560 Series
|
||||
67EF, EF, AMD Radeon 550 Series
|
||||
67EF, FF, AMD Radeon RX 460 Graphics
|
||||
67FF, C0, AMD Radeon Pro 465
|
||||
67FF, C1, AMD Radeon RX 560 Series
|
||||
67FF, CF, AMD Radeon RX 560 Series
|
||||
67FF, EF, AMD Radeon RX 560 Series
|
||||
67FF, FF, AMD Radeon RX 550 Series
|
||||
6800, 00, AMD Radeon HD 7970M
|
||||
6801, 00, AMD Radeon HD 8970M
|
||||
6806, 00, AMD Radeon R9 M290X
|
||||
6808, 00, AMD FirePro W7000
|
||||
6808, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
||||
6809, 00, ATI FirePro W5000
|
||||
6810, 00, AMD Radeon R9 200 Series
|
||||
6810, 81, AMD Radeon R9 370 Series
|
||||
6811, 00, AMD Radeon R9 200 Series
|
||||
6811, 81, AMD Radeon R7 370 Series
|
||||
6818, 00, AMD Radeon HD 7800 Series
|
||||
6819, 00, AMD Radeon HD 7800 Series
|
||||
6820, 00, AMD Radeon R9 M275X
|
||||
6820, 81, AMD Radeon R9 M375
|
||||
6820, 83, AMD Radeon R9 M375X
|
||||
6821, 00, AMD Radeon R9 M200X Series
|
||||
6821, 83, AMD Radeon R9 M370X
|
||||
6821, 87, AMD Radeon R7 M380
|
||||
6822, 00, AMD Radeon E8860
|
||||
6823, 00, AMD Radeon R9 M200X Series
|
||||
6825, 00, AMD Radeon HD 7800M Series
|
||||
6826, 00, AMD Radeon HD 7700M Series
|
||||
6827, 00, AMD Radeon HD 7800M Series
|
||||
6828, 00, AMD FirePro W600
|
||||
682B, 00, AMD Radeon HD 8800M Series
|
||||
682B, 87, AMD Radeon R9 M360
|
||||
682C, 00, AMD FirePro W4100
|
||||
682D, 00, AMD Radeon HD 7700M Series
|
||||
682F, 00, AMD Radeon HD 7700M Series
|
||||
6830, 00, AMD Radeon 7800M Series
|
||||
6831, 00, AMD Radeon 7700M Series
|
||||
6835, 00, AMD Radeon R7 Series / HD 9000 Series
|
||||
6837, 00, AMD Radeon HD 7700 Series
|
||||
683D, 00, AMD Radeon HD 7700 Series
|
||||
683F, 00, AMD Radeon HD 7700 Series
|
||||
684C, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
||||
6860, 00, AMD Radeon Instinct MI25
|
||||
6860, 01, AMD Radeon Instinct MI25
|
||||
6860, 02, AMD Radeon Instinct MI25
|
||||
6860, 03, AMD Radeon Pro V340
|
||||
6860, 04, AMD Radeon Instinct MI25x2
|
||||
6860, 07, AMD Radeon Pro V320
|
||||
6861, 00, AMD Radeon Pro WX 9100
|
||||
6862, 00, AMD Radeon Pro SSG
|
||||
6863, 00, AMD Radeon Vega Frontier Edition
|
||||
6864, 03, AMD Radeon Pro V340
|
||||
6864, 04, AMD Radeon Instinct MI25x2
|
||||
6864, 05, AMD Radeon Pro V340
|
||||
6868, 00, AMD Radeon Pro WX 8200
|
||||
686C, 00, AMD Radeon Instinct MI25 MxGPU
|
||||
686C, 01, AMD Radeon Instinct MI25 MxGPU
|
||||
686C, 02, AMD Radeon Instinct MI25 MxGPU
|
||||
686C, 03, AMD Radeon Pro V340 MxGPU
|
||||
686C, 04, AMD Radeon Instinct MI25x2 MxGPU
|
||||
686C, 05, AMD Radeon Pro V340L MxGPU
|
||||
686C, 06, AMD Radeon Instinct MI25 MxGPU
|
||||
687F, 01, AMD Radeon RX Vega
|
||||
687F, C0, AMD Radeon RX Vega
|
||||
687F, C1, AMD Radeon RX Vega
|
||||
687F, C3, AMD Radeon RX Vega
|
||||
687F, C7, AMD Radeon RX Vega
|
||||
6900, 00, AMD Radeon R7 M260
|
||||
6900, 81, AMD Radeon R7 M360
|
||||
6900, 83, AMD Radeon R7 M340
|
||||
6900, C1, AMD Radeon R5 M465 Series
|
||||
6900, C3, AMD Radeon R5 M445 Series
|
||||
6900, D1, AMD Radeon 530 Series
|
||||
6900, D3, AMD Radeon 530 Series
|
||||
6901, 00, AMD Radeon R5 M255
|
||||
6902, 00, AMD Radeon Series
|
||||
6907, 00, AMD Radeon R5 M255
|
||||
6907, 87, AMD Radeon R5 M315
|
||||
6920, 00, AMD Radeon R9 M395X
|
||||
6920, 01, AMD Radeon R9 M390X
|
||||
6921, 00, AMD Radeon R9 M390X
|
||||
6929, 00, AMD FirePro S7150
|
||||
6929, 01, AMD FirePro S7100X
|
||||
692B, 00, AMD FirePro W7100
|
||||
6938, 00, AMD Radeon R9 200 Series
|
||||
6938, F0, AMD Radeon R9 200 Series
|
||||
6938, F1, AMD Radeon R9 380 Series
|
||||
6939, 00, AMD Radeon R9 200 Series
|
||||
6939, F0, AMD Radeon R9 200 Series
|
||||
6939, F1, AMD Radeon R9 380 Series
|
||||
694C, C0, AMD Radeon RX Vega M GH Graphics
|
||||
694E, C0, AMD Radeon RX Vega M GL Graphics
|
||||
6980, 00, AMD Radeon Pro WX 3100
|
||||
6981, 00, AMD Radeon Pro WX 3200 Series
|
||||
6981, 01, AMD Radeon Pro WX 3200 Series
|
||||
6981, 10, AMD Radeon Pro WX 3200 Series
|
||||
6985, 00, AMD Radeon Pro WX 3100
|
||||
6986, 00, AMD Radeon Pro WX 2100
|
||||
6987, 80, AMD Embedded Radeon E9171
|
||||
6987, C0, AMD Radeon 550X Series
|
||||
6987, C1, AMD Radeon RX 640
|
||||
6987, C3, AMD Radeon 540X Series
|
||||
6987, C7, AMD Radeon 540
|
||||
6995, 00, AMD Radeon Pro WX 2100
|
||||
6997, 00, AMD Radeon Pro WX 2100
|
||||
699F, 81, AMD Embedded Radeon E9170 Series
|
||||
699F, C0, AMD Radeon 500 Series
|
||||
699F, C1, AMD Radeon 540 Series
|
||||
699F, C3, AMD Radeon 500 Series
|
||||
699F, C7, AMD Radeon RX 550 / 550 Series
|
||||
699F, C9, AMD Radeon 540
|
||||
6FDF, E7, AMD Radeon RX 590 GME
|
||||
6FDF, EF, AMD Radeon RX 580 2048SP
|
||||
7300, C1, AMD FirePro S9300 x2
|
||||
7300, C8, AMD Radeon R9 Fury Series
|
||||
7300, C9, AMD Radeon Pro Duo
|
||||
7300, CA, AMD Radeon R9 Fury Series
|
||||
7300, CB, AMD Radeon R9 Fury Series
|
||||
7312, 00, AMD Radeon Pro W5700
|
||||
731E, C6, AMD Radeon RX 5700XTB
|
||||
731E, C7, AMD Radeon RX 5700B
|
||||
731F, C0, AMD Radeon RX 5700 XT 50th Anniversary
|
||||
731F, C1, AMD Radeon RX 5700 XT
|
||||
731F, C2, AMD Radeon RX 5600M
|
||||
731F, C3, AMD Radeon RX 5700M
|
||||
731F, C4, AMD Radeon RX 5700
|
||||
731F, C5, AMD Radeon RX 5700 XT
|
||||
731F, CA, AMD Radeon RX 5600 XT
|
||||
731F, CB, AMD Radeon RX 5600 OEM
|
||||
7340, C1, AMD Radeon RX 5500M
|
||||
7340, C3, AMD Radeon RX 5300M
|
||||
7340, C5, AMD Radeon RX 5500 XT
|
||||
7340, C7, AMD Radeon RX 5500
|
||||
7340, C9, AMD Radeon RX 5500XTB
|
||||
7340, CF, AMD Radeon RX 5300
|
||||
7341, 00, AMD Radeon Pro W5500
|
||||
7347, 00, AMD Radeon Pro W5500M
|
||||
7360, 41, AMD Radeon Pro 5600M
|
||||
7360, C3, AMD Radeon Pro V520
|
||||
7362, C1, AMD Radeon Pro V540
|
||||
7362, C3, AMD Radeon Pro V520
|
||||
738C, 01, AMD Instinct MI100
|
||||
73A1, 00, AMD Radeon Pro V620
|
||||
73A3, 00, AMD Radeon Pro W6800
|
||||
73A5, C0, AMD Radeon RX 6950 XT
|
||||
73AE, 00, AMD Radeon Pro V620 MxGPU
|
||||
73AF, C0, AMD Radeon RX 6900 XT
|
||||
73BF, C0, AMD Radeon RX 6900 XT
|
||||
73BF, C1, AMD Radeon RX 6800 XT
|
||||
73BF, C3, AMD Radeon RX 6800
|
||||
73DF, C0, AMD Radeon RX 6750 XT
|
||||
73DF, C1, AMD Radeon RX 6700 XT
|
||||
73DF, C2, AMD Radeon RX 6800M
|
||||
73DF, C3, AMD Radeon RX 6800M
|
||||
73DF, C5, AMD Radeon RX 6700 XT
|
||||
73DF, CF, AMD Radeon RX 6700M
|
||||
73DF, D5, AMD Radeon RX 6750 GRE 12GB
|
||||
73DF, D7, AMD TDC-235
|
||||
73DF, DF, AMD Radeon RX 6700
|
||||
73DF, E5, AMD Radeon RX 6750 GRE 12GB
|
||||
73DF, FF, AMD Radeon RX 6700
|
||||
73E0, 00, AMD Radeon RX 6600M
|
||||
73E1, 00, AMD Radeon Pro W6600M
|
||||
73E3, 00, AMD Radeon Pro W6600
|
||||
73EF, C0, AMD Radeon RX 6800S
|
||||
73EF, C1, AMD Radeon RX 6650 XT
|
||||
73EF, C2, AMD Radeon RX 6700S
|
||||
73EF, C3, AMD Radeon RX 6650M
|
||||
73EF, C4, AMD Radeon RX 6650M XT
|
||||
73FF, C1, AMD Radeon RX 6600 XT
|
||||
73FF, C3, AMD Radeon RX 6600M
|
||||
73FF, C7, AMD Radeon RX 6600
|
||||
73FF, CB, AMD Radeon RX 6600S
|
||||
73FF, CF, AMD Radeon RX 6600 LE
|
||||
73FF, DF, AMD Radeon RX 6750 GRE 10GB
|
||||
7408, 00, AMD Instinct MI250X
|
||||
740C, 01, AMD Instinct MI250X / MI250
|
||||
740F, 02, AMD Instinct MI210
|
||||
7421, 00, AMD Radeon Pro W6500M
|
||||
7422, 00, AMD Radeon Pro W6400
|
||||
7423, 00, AMD Radeon Pro W6300M
|
||||
7423, 01, AMD Radeon Pro W6300
|
||||
7424, 00, AMD Radeon RX 6300
|
||||
743F, C1, AMD Radeon RX 6500 XT
|
||||
743F, C3, AMD Radeon RX 6500
|
||||
743F, C3, AMD Radeon RX 6500M
|
||||
743F, C7, AMD Radeon RX 6400
|
||||
743F, C8, AMD Radeon RX 6500M
|
||||
743F, CC, AMD Radeon 6550S
|
||||
743F, CE, AMD Radeon RX 6450M
|
||||
743F, CF, AMD Radeon RX 6300M
|
||||
743F, D3, AMD Radeon RX 6550M
|
||||
743F, D7, AMD Radeon RX 6400
|
||||
7448, 00, AMD Radeon Pro W7900
|
||||
7449, 00, AMD Radeon Pro W7800 48GB
|
||||
744A, 00, AMD Radeon Pro W7900 Dual Slot
|
||||
744B, 00, AMD Radeon Pro W7900D
|
||||
744C, C8, AMD Radeon RX 7900 XTX
|
||||
744C, CC, AMD Radeon RX 7900 XT
|
||||
744C, CE, AMD Radeon RX 7900 GRE
|
||||
744C, CF, AMD Radeon RX 7900M
|
||||
745E, CC, AMD Radeon Pro W7800
|
||||
7460, 00, AMD Radeon Pro V710
|
||||
7461, 00, AMD Radeon Pro V710 MxGPU
|
||||
7470, 00, AMD Radeon Pro W7700
|
||||
747E, C8, AMD Radeon RX 7800 XT
|
||||
747E, D8, AMD Radeon RX 7800M
|
||||
747E, DB, AMD Radeon RX 7700
|
||||
747E, FF, AMD Radeon RX 7700 XT
|
||||
7480, 00, AMD Radeon Pro W7600
|
||||
7480, C0, AMD Radeon RX 7600 XT
|
||||
7480, C1, AMD Radeon RX 7700S
|
||||
7480, C2, AMD Radeon RX 7650 GRE
|
||||
7480, C3, AMD Radeon RX 7600S
|
||||
7480, C7, AMD Radeon RX 7600M XT
|
||||
7480, CF, AMD Radeon RX 7600
|
||||
7481, C7, AMD Steam Machine
|
||||
7483, CF, AMD Radeon RX 7600M
|
||||
7489, 00, AMD Radeon Pro W7500
|
||||
7499, 00, AMD Radeon Pro W7400
|
||||
7499, C0, AMD Radeon RX 7400
|
||||
7499, C1, AMD Radeon RX 7300
|
||||
74A0, 00, AMD Instinct MI300A
|
||||
74A1, 00, AMD Instinct MI300X
|
||||
74A2, 00, AMD Instinct MI308X
|
||||
74A5, 00, AMD Instinct MI325X
|
||||
74A8, 00, AMD Instinct MI308X HF
|
||||
74A9, 00, AMD Instinct MI300X HF
|
||||
74B5, 00, AMD Instinct MI300X VF
|
||||
74B6, 00, AMD Instinct MI308X
|
||||
74BD, 00, AMD Instinct MI300X HF
|
||||
7550, C0, AMD Radeon RX 9070 XT
|
||||
7550, C2, AMD Radeon RX 9070 GRE
|
||||
7550, C3, AMD Radeon RX 9070
|
||||
7551, C0, AMD Radeon AI PRO R9700
|
||||
7590, C0, AMD Radeon RX 9060 XT
|
||||
7590, C7, AMD Radeon RX 9060
|
||||
75A0, C0, AMD Instinct MI350X
|
||||
75A3, C0, AMD Instinct MI355X
|
||||
75B0, C0, AMD Instinct MI350X VF
|
||||
75B3, C0, AMD Instinct MI355X VF
|
||||
9830, 00, AMD Radeon HD 8400 / R3 Series
|
||||
9831, 00, AMD Radeon HD 8400E
|
||||
9832, 00, AMD Radeon HD 8330
|
||||
9833, 00, AMD Radeon HD 8330E
|
||||
9834, 00, AMD Radeon HD 8210
|
||||
9835, 00, AMD Radeon HD 8210E
|
||||
9836, 00, AMD Radeon HD 8200 / R3 Series
|
||||
9837, 00, AMD Radeon HD 8280E
|
||||
9838, 00, AMD Radeon HD 8200 / R3 series
|
||||
9839, 00, AMD Radeon HD 8180
|
||||
983D, 00, AMD Radeon HD 8250
|
||||
9850, 00, AMD Radeon R3 Graphics
|
||||
9850, 03, AMD Radeon R3 Graphics
|
||||
9850, 40, AMD Radeon R2 Graphics
|
||||
9850, 45, AMD Radeon R3 Graphics
|
||||
9851, 00, AMD Radeon R4 Graphics
|
||||
9851, 01, AMD Radeon R5E Graphics
|
||||
9851, 05, AMD Radeon R5 Graphics
|
||||
9851, 06, AMD Radeon R5E Graphics
|
||||
9851, 40, AMD Radeon R4 Graphics
|
||||
9851, 45, AMD Radeon R5 Graphics
|
||||
9852, 00, AMD Radeon R2 Graphics
|
||||
9852, 40, AMD Radeon E1 Graphics
|
||||
9853, 00, AMD Radeon R2 Graphics
|
||||
9853, 01, AMD Radeon R4E Graphics
|
||||
9853, 03, AMD Radeon R2 Graphics
|
||||
9853, 05, AMD Radeon R1E Graphics
|
||||
9853, 06, AMD Radeon R1E Graphics
|
||||
9853, 07, AMD Radeon R1E Graphics
|
||||
9853, 08, AMD Radeon R1E Graphics
|
||||
9853, 40, AMD Radeon R2 Graphics
|
||||
9854, 00, AMD Radeon R3 Graphics
|
||||
9854, 01, AMD Radeon R3E Graphics
|
||||
9854, 02, AMD Radeon R3 Graphics
|
||||
9854, 05, AMD Radeon R2 Graphics
|
||||
9854, 06, AMD Radeon R4 Graphics
|
||||
9854, 07, AMD Radeon R3 Graphics
|
||||
9855, 02, AMD Radeon R6 Graphics
|
||||
9855, 05, AMD Radeon R4 Graphics
|
||||
9856, 00, AMD Radeon R2 Graphics
|
||||
9856, 01, AMD Radeon R2E Graphics
|
||||
9856, 02, AMD Radeon R2 Graphics
|
||||
9856, 05, AMD Radeon R1E Graphics
|
||||
9856, 06, AMD Radeon R2 Graphics
|
||||
9856, 07, AMD Radeon R1E Graphics
|
||||
9856, 08, AMD Radeon R1E Graphics
|
||||
9856, 13, AMD Radeon R1E Graphics
|
||||
9874, 81, AMD Radeon R6 Graphics
|
||||
9874, 84, AMD Radeon R7 Graphics
|
||||
9874, 85, AMD Radeon R6 Graphics
|
||||
9874, 87, AMD Radeon R5 Graphics
|
||||
9874, 88, AMD Radeon R7E Graphics
|
||||
9874, 89, AMD Radeon R6E Graphics
|
||||
9874, C4, AMD Radeon R7 Graphics
|
||||
9874, C5, AMD Radeon R6 Graphics
|
||||
9874, C6, AMD Radeon R6 Graphics
|
||||
9874, C7, AMD Radeon R5 Graphics
|
||||
9874, C8, AMD Radeon R7 Graphics
|
||||
9874, C9, AMD Radeon R7 Graphics
|
||||
9874, CA, AMD Radeon R5 Graphics
|
||||
9874, CB, AMD Radeon R5 Graphics
|
||||
9874, CC, AMD Radeon R7 Graphics
|
||||
9874, CD, AMD Radeon R7 Graphics
|
||||
9874, CE, AMD Radeon R5 Graphics
|
||||
9874, E1, AMD Radeon R7 Graphics
|
||||
9874, E2, AMD Radeon R7 Graphics
|
||||
9874, E3, AMD Radeon R7 Graphics
|
||||
9874, E4, AMD Radeon R7 Graphics
|
||||
9874, E5, AMD Radeon R5 Graphics
|
||||
9874, E6, AMD Radeon R5 Graphics
|
||||
98E4, 80, AMD Radeon R5E Graphics
|
||||
98E4, 81, AMD Radeon R4E Graphics
|
||||
98E4, 83, AMD Radeon R2E Graphics
|
||||
98E4, 84, AMD Radeon R2E Graphics
|
||||
98E4, 86, AMD Radeon R1E Graphics
|
||||
98E4, C0, AMD Radeon R4 Graphics
|
||||
98E4, C1, AMD Radeon R5 Graphics
|
||||
98E4, C2, AMD Radeon R4 Graphics
|
||||
98E4, C4, AMD Radeon R5 Graphics
|
||||
98E4, C6, AMD Radeon R5 Graphics
|
||||
98E4, C8, AMD Radeon R4 Graphics
|
||||
98E4, C9, AMD Radeon R4 Graphics
|
||||
98E4, CA, AMD Radeon R5 Graphics
|
||||
98E4, D0, AMD Radeon R2 Graphics
|
||||
98E4, D1, AMD Radeon R2 Graphics
|
||||
98E4, D2, AMD Radeon R2 Graphics
|
||||
98E4, D4, AMD Radeon R2 Graphics
|
||||
98E4, D9, AMD Radeon R5 Graphics
|
||||
98E4, DA, AMD Radeon R5 Graphics
|
||||
98E4, DB, AMD Radeon R3 Graphics
|
||||
98E4, E1, AMD Radeon R3 Graphics
|
||||
98E4, E2, AMD Radeon R3 Graphics
|
||||
98E4, E9, AMD Radeon R4 Graphics
|
||||
98E4, EA, AMD Radeon R4 Graphics
|
||||
98E4, EB, AMD Radeon R3 Graphics
|
||||
98E4, EB, AMD Radeon R4 Graphics
|
||||
34
agent/test-data/nvtop.json
Normal file
34
agent/test-data/nvtop.json
Normal file
@@ -0,0 +1,34 @@
|
||||
[
|
||||
{
|
||||
"device_name": "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
||||
"gpu_clock": "1485MHz",
|
||||
"mem_clock": "6001MHz",
|
||||
"temp": "48C",
|
||||
"fan_speed": null,
|
||||
"power_draw": "13W",
|
||||
"gpu_util": "5%",
|
||||
"encode": "0%",
|
||||
"decode": "0%",
|
||||
"mem_util": "8%",
|
||||
"mem_total": "4294967296",
|
||||
"mem_used": "349372416",
|
||||
"mem_free": "3945594880",
|
||||
"processes" : []
|
||||
},
|
||||
{
|
||||
"device_name": "AMD Radeon 680M",
|
||||
"gpu_clock": "2200MHz",
|
||||
"mem_clock": "2400MHz",
|
||||
"temp": "48C",
|
||||
"fan_speed": "CPU Fan",
|
||||
"power_draw": "9W",
|
||||
"gpu_util": "12%",
|
||||
"encode": null,
|
||||
"decode": "0%",
|
||||
"mem_util": "7%",
|
||||
"mem_total": "16929173504",
|
||||
"mem_used": "1213784064",
|
||||
"mem_free": "15715389440",
|
||||
"processes" : []
|
||||
}
|
||||
]
|
||||
@@ -1,12 +1,10 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/internal/ghupdate"
|
||||
)
|
||||
@@ -81,7 +79,7 @@ func detectRestarter() restarter {
|
||||
func Update(useMirror bool) error {
|
||||
exePath, _ := os.Executable()
|
||||
|
||||
dataDir, err := getDataDir()
|
||||
dataDir, err := GetDataDir()
|
||||
if err != nil {
|
||||
dataDir = os.TempDir()
|
||||
}
|
||||
@@ -108,12 +106,12 @@ func Update(useMirror bool) error {
|
||||
}
|
||||
}
|
||||
|
||||
// 6) Fix SELinux context if necessary
|
||||
if err := handleSELinuxContext(exePath); err != nil {
|
||||
// Fix SELinux context if necessary
|
||||
if err := ghupdate.HandleSELinuxContext(exePath); err != nil {
|
||||
ghupdate.ColorPrintf(ghupdate.ColorYellow, "Warning: SELinux context handling: %v", err)
|
||||
}
|
||||
|
||||
// 7) Restart service if running under a recognised init system
|
||||
// Restart service if running under a recognised init system
|
||||
if r := detectRestarter(); r != nil {
|
||||
if err := r.Restart(); err != nil {
|
||||
ghupdate.ColorPrintf(ghupdate.ColorYellow, "Warning: failed to restart service: %v", err)
|
||||
@@ -127,42 +125,3 @@ func Update(useMirror bool) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleSELinuxContext restores or applies the correct SELinux label to the binary.
|
||||
func handleSELinuxContext(path string) error {
|
||||
out, err := exec.Command("getenforce").Output()
|
||||
if err != nil {
|
||||
// SELinux not enabled or getenforce not available
|
||||
return nil
|
||||
}
|
||||
state := strings.TrimSpace(string(out))
|
||||
if state == "Disabled" {
|
||||
return nil
|
||||
}
|
||||
|
||||
ghupdate.ColorPrint(ghupdate.ColorYellow, "SELinux is enabled; applying context…")
|
||||
var errs []string
|
||||
|
||||
// Try persistent context via semanage+restorecon
|
||||
if semanagePath, err := exec.LookPath("semanage"); err == nil {
|
||||
if err := exec.Command(semanagePath, "fcontext", "-a", "-t", "bin_t", path).Run(); err != nil {
|
||||
errs = append(errs, "semanage fcontext failed: "+err.Error())
|
||||
} else if restoreconPath, err := exec.LookPath("restorecon"); err == nil {
|
||||
if err := exec.Command(restoreconPath, "-v", path).Run(); err != nil {
|
||||
errs = append(errs, "restorecon failed: "+err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to temporary context via chcon
|
||||
if chconPath, err := exec.LookPath("chcon"); err == nil {
|
||||
if err := exec.Command(chconPath, "-t", "bin_t", path).Run(); err != nil {
|
||||
errs = append(errs, "chcon failed: "+err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return fmt.Errorf("SELinux context errors: %s", strings.Join(errs, "; "))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
package agent
|
||||
|
||||
import "math"
|
||||
|
||||
func bytesToMegabytes(b float64) float64 {
|
||||
return twoDecimals(b / 1048576)
|
||||
}
|
||||
|
||||
func bytesToGigabytes(b uint64) float64 {
|
||||
return twoDecimals(float64(b) / 1073741824)
|
||||
}
|
||||
|
||||
func twoDecimals(value float64) float64 {
|
||||
return math.Round(value*100) / 100
|
||||
}
|
||||
88
agent/utils/utils.go
Normal file
88
agent/utils/utils.go
Normal file
@@ -0,0 +1,88 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// GetEnv retrieves an environment variable with a "BESZEL_AGENT_" prefix, or falls back to the unprefixed key.
|
||||
func GetEnv(key string) (value string, exists bool) {
|
||||
if value, exists = os.LookupEnv("BESZEL_AGENT_" + key); exists {
|
||||
return value, exists
|
||||
}
|
||||
return os.LookupEnv(key)
|
||||
}
|
||||
|
||||
// BytesToMegabytes converts bytes to megabytes and rounds to two decimal places.
|
||||
func BytesToMegabytes(b float64) float64 {
|
||||
return TwoDecimals(b / 1048576)
|
||||
}
|
||||
|
||||
// BytesToGigabytes converts bytes to gigabytes and rounds to two decimal places.
|
||||
func BytesToGigabytes(b uint64) float64 {
|
||||
return TwoDecimals(float64(b) / 1073741824)
|
||||
}
|
||||
|
||||
// TwoDecimals rounds a float64 value to two decimal places.
|
||||
func TwoDecimals(value float64) float64 {
|
||||
return math.Round(value*100) / 100
|
||||
}
|
||||
|
||||
// func RoundFloat(val float64, precision uint) float64 {
|
||||
// ratio := math.Pow(10, float64(precision))
|
||||
// return math.Round(val*ratio) / ratio
|
||||
// }
|
||||
|
||||
// ReadStringFile returns trimmed file contents or empty string on error.
|
||||
func ReadStringFile(path string) string {
|
||||
content, _ := ReadStringFileOK(path)
|
||||
return content
|
||||
}
|
||||
|
||||
// ReadStringFileOK returns trimmed file contents and read success.
|
||||
func ReadStringFileOK(path string) (string, bool) {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
return strings.TrimSpace(string(b)), true
|
||||
}
|
||||
|
||||
// ReadStringFileLimited reads a file into a string with a maximum size (in bytes) to avoid
|
||||
// allocating large buffers and potential panics with pseudo-files when the size is misreported.
|
||||
func ReadStringFileLimited(path string, maxSize int) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
buf := make([]byte, maxSize)
|
||||
n, err := f.Read(buf)
|
||||
if err != nil && err != io.EOF {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(buf[:n])), nil
|
||||
}
|
||||
|
||||
// FileExists reports whether the given path exists.
|
||||
func FileExists(path string) bool {
|
||||
_, err := os.Stat(path)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// ReadUintFile parses a decimal uint64 value from a file.
|
||||
func ReadUintFile(path string) (uint64, bool) {
|
||||
raw, ok := ReadStringFileOK(path)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
parsed, err := strconv.ParseUint(raw, 10, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return parsed, true
|
||||
}
|
||||
165
agent/utils/utils_test.go
Normal file
165
agent/utils/utils_test.go
Normal file
@@ -0,0 +1,165 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestTwoDecimals(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
}{
|
||||
{"round down", 1.234, 1.23},
|
||||
{"round half up", 1.235, 1.24}, // math.Round rounds half up
|
||||
{"no rounding needed", 1.23, 1.23},
|
||||
{"negative number", -1.235, -1.24}, // math.Round rounds half up (more negative)
|
||||
{"zero", 0.0, 0.0},
|
||||
{"large number", 123.456, 123.46}, // rounds 5 up
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := TwoDecimals(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBytesToMegabytes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input float64
|
||||
expected float64
|
||||
}{
|
||||
{"1 MB", 1048576, 1.0},
|
||||
{"512 KB", 524288, 0.5},
|
||||
{"zero", 0, 0},
|
||||
{"large value", 1073741824, 1024}, // 1 GB = 1024 MB
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := BytesToMegabytes(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBytesToGigabytes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input uint64
|
||||
expected float64
|
||||
}{
|
||||
{"1 GB", 1073741824, 1.0},
|
||||
{"512 MB", 536870912, 0.5},
|
||||
{"0 GB", 0, 0},
|
||||
{"2 GB", 2147483648, 2.0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := BytesToGigabytes(tt.input)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFileFunctions(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
testFilePath := filepath.Join(tmpDir, "test.txt")
|
||||
testContent := "hello world"
|
||||
|
||||
// Test FileExists (false)
|
||||
assert.False(t, FileExists(testFilePath))
|
||||
|
||||
// Test ReadStringFileOK (false)
|
||||
content, ok := ReadStringFileOK(testFilePath)
|
||||
assert.False(t, ok)
|
||||
assert.Empty(t, content)
|
||||
|
||||
// Test ReadStringFile (empty)
|
||||
assert.Empty(t, ReadStringFile(testFilePath))
|
||||
|
||||
// Write file
|
||||
err := os.WriteFile(testFilePath, []byte(testContent+"\n "), 0644)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test FileExists (true)
|
||||
assert.True(t, FileExists(testFilePath))
|
||||
|
||||
// Test ReadStringFileOK (true)
|
||||
content, ok = ReadStringFileOK(testFilePath)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, testContent, content)
|
||||
|
||||
// Test ReadStringFile (content)
|
||||
assert.Equal(t, testContent, ReadStringFile(testFilePath))
|
||||
}
|
||||
|
||||
func TestReadUintFile(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
t.Run("valid uint", func(t *testing.T) {
|
||||
path := filepath.Join(tmpDir, "uint.txt")
|
||||
os.WriteFile(path, []byte(" 12345\n"), 0644)
|
||||
val, ok := ReadUintFile(path)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, uint64(12345), val)
|
||||
})
|
||||
|
||||
t.Run("invalid uint", func(t *testing.T) {
|
||||
path := filepath.Join(tmpDir, "invalid.txt")
|
||||
os.WriteFile(path, []byte("abc"), 0644)
|
||||
val, ok := ReadUintFile(path)
|
||||
assert.False(t, ok)
|
||||
assert.Equal(t, uint64(0), val)
|
||||
})
|
||||
|
||||
t.Run("missing file", func(t *testing.T) {
|
||||
path := filepath.Join(tmpDir, "missing.txt")
|
||||
val, ok := ReadUintFile(path)
|
||||
assert.False(t, ok)
|
||||
assert.Equal(t, uint64(0), val)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetEnv(t *testing.T) {
|
||||
key := "TEST_VAR"
|
||||
prefixedKey := "BESZEL_AGENT_" + key
|
||||
|
||||
t.Run("prefixed variable exists", func(t *testing.T) {
|
||||
os.Setenv(prefixedKey, "prefixed_val")
|
||||
os.Setenv(key, "unprefixed_val")
|
||||
defer os.Unsetenv(prefixedKey)
|
||||
defer os.Unsetenv(key)
|
||||
|
||||
val, exists := GetEnv(key)
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "prefixed_val", val)
|
||||
})
|
||||
|
||||
t.Run("only unprefixed variable exists", func(t *testing.T) {
|
||||
os.Unsetenv(prefixedKey)
|
||||
os.Setenv(key, "unprefixed_val")
|
||||
defer os.Unsetenv(key)
|
||||
|
||||
val, exists := GetEnv(key)
|
||||
assert.True(t, exists)
|
||||
assert.Equal(t, "unprefixed_val", val)
|
||||
})
|
||||
|
||||
t.Run("neither variable exists", func(t *testing.T) {
|
||||
os.Unsetenv(prefixedKey)
|
||||
os.Unsetenv(key)
|
||||
|
||||
val, exists := GetEnv(key)
|
||||
assert.False(t, exists)
|
||||
assert.Empty(t, val)
|
||||
})
|
||||
}
|
||||
11
agent/zfs/zfs_freebsd.go
Normal file
11
agent/zfs/zfs_freebsd.go
Normal file
@@ -0,0 +1,11 @@
|
||||
//go:build freebsd
|
||||
|
||||
package zfs
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func ARCSize() (uint64, error) {
|
||||
return unix.SysctlUint64("kstat.zfs.misc.arcstats.size")
|
||||
}
|
||||
34
agent/zfs/zfs_linux.go
Normal file
34
agent/zfs/zfs_linux.go
Normal file
@@ -0,0 +1,34 @@
|
||||
//go:build linux
|
||||
|
||||
// Package zfs provides functions to read ZFS statistics.
|
||||
package zfs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ARCSize() (uint64, error) {
|
||||
file, err := os.Open("/proc/spl/kstat/zfs/arcstats")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "size") {
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 3 {
|
||||
return 0, fmt.Errorf("unexpected arcstats size format: %s", line)
|
||||
}
|
||||
return strconv.ParseUint(fields[2], 10, 64)
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("size field not found in arcstats")
|
||||
}
|
||||
9
agent/zfs/zfs_unsupported.go
Normal file
9
agent/zfs/zfs_unsupported.go
Normal file
@@ -0,0 +1,9 @@
|
||||
//go:build !linux && !freebsd
|
||||
|
||||
package zfs
|
||||
|
||||
import "errors"
|
||||
|
||||
func ARCSize() (uint64, error) {
|
||||
return 0, errors.ErrUnsupported
|
||||
}
|
||||
@@ -6,7 +6,7 @@ import "github.com/blang/semver"
|
||||
|
||||
const (
|
||||
// Version is the current version of the application.
|
||||
Version = "0.18.0-beta.1"
|
||||
Version = "0.18.4"
|
||||
// AppName is the name of the application.
|
||||
AppName = "beszel"
|
||||
)
|
||||
|
||||
48
go.mod
48
go.mod
@@ -1,25 +1,27 @@
|
||||
module github.com/henrygd/beszel
|
||||
|
||||
go 1.25.5
|
||||
go 1.26.1
|
||||
|
||||
require (
|
||||
github.com/blang/semver v3.5.1+incompatible
|
||||
github.com/coreos/go-systemd/v22 v22.6.0
|
||||
github.com/coreos/go-systemd/v22 v22.7.0
|
||||
github.com/distatus/battery v0.11.0
|
||||
github.com/ebitengine/purego v0.9.1
|
||||
github.com/fxamacker/cbor/v2 v2.9.0
|
||||
github.com/gliderlabs/ssh v0.3.8
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/lxzan/gws v1.8.9
|
||||
github.com/nicholas-fedor/shoutrrr v0.12.1
|
||||
github.com/pocketbase/dbx v1.11.0
|
||||
github.com/pocketbase/pocketbase v0.34.0
|
||||
github.com/shirou/gopsutil/v4 v4.25.10
|
||||
github.com/nicholas-fedor/shoutrrr v0.13.2
|
||||
github.com/pocketbase/dbx v1.12.0
|
||||
github.com/pocketbase/pocketbase v0.36.4
|
||||
github.com/shirou/gopsutil/v4 v4.26.1
|
||||
github.com/spf13/cast v1.10.0
|
||||
github.com/spf13/cobra v1.10.1
|
||||
github.com/spf13/cobra v1.10.2
|
||||
github.com/spf13/pflag v1.0.10
|
||||
github.com/stretchr/testify v1.11.1
|
||||
golang.org/x/crypto v0.45.0
|
||||
golang.org/x/exp v0.0.0-20251125195548-87e1e737ad39
|
||||
golang.org/x/crypto v0.48.0
|
||||
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa
|
||||
golang.org/x/sys v0.41.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
@@ -31,18 +33,17 @@ require (
|
||||
github.com/dolthub/maphash v0.1.0 // indirect
|
||||
github.com/domodwyer/mailyak/v3 v3.6.2 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/ebitengine/purego v0.9.1 // indirect
|
||||
github.com/fatih/color v1.18.0 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.11 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.13 // indirect
|
||||
github.com/ganigeorgiev/fexpr v0.5.0 // indirect
|
||||
github.com/go-ole/go-ole v1.3.0 // indirect
|
||||
github.com/go-ozzo/ozzo-validation/v4 v4.3.0 // indirect
|
||||
github.com/go-sql-driver/mysql v1.9.1 // indirect
|
||||
github.com/godbus/dbus/v5 v5.2.0 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
|
||||
github.com/godbus/dbus/v5 v5.2.2 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/klauspost/compress v1.18.1 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect
|
||||
github.com/klauspost/compress v1.18.4 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20260216142805-b3301c5f2a88 // indirect
|
||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||
@@ -53,16 +54,15 @@ require (
|
||||
github.com/tklauser/numcpus v0.11.0 // indirect
|
||||
github.com/x448/float16 v0.8.4 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
golang.org/x/image v0.33.0 // indirect
|
||||
golang.org/x/net v0.47.0 // indirect
|
||||
golang.org/x/oauth2 v0.33.0 // indirect
|
||||
golang.org/x/sync v0.18.0 // indirect
|
||||
golang.org/x/sys v0.38.0 // indirect
|
||||
golang.org/x/term v0.37.0 // indirect
|
||||
golang.org/x/text v0.31.0 // indirect
|
||||
golang.org/x/image v0.36.0 // indirect
|
||||
golang.org/x/net v0.50.0 // indirect
|
||||
golang.org/x/oauth2 v0.35.0 // indirect
|
||||
golang.org/x/sync v0.19.0 // indirect
|
||||
golang.org/x/term v0.40.0 // indirect
|
||||
golang.org/x/text v0.34.0 // indirect
|
||||
howett.net/plist v1.0.1 // indirect
|
||||
modernc.org/libc v1.66.10 // indirect
|
||||
modernc.org/libc v1.67.6 // indirect
|
||||
modernc.org/mathutil v1.7.1 // indirect
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
modernc.org/sqlite v1.40.1 // indirect
|
||||
modernc.org/sqlite v1.45.0 // indirect
|
||||
)
|
||||
|
||||
114
go.sum
114
go.sum
@@ -9,8 +9,8 @@ github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3d
|
||||
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
|
||||
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
|
||||
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
|
||||
github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo=
|
||||
github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU=
|
||||
github.com/coreos/go-systemd/v22 v22.7.0 h1:LAEzFkke61DFROc7zNLX/WA2i5J8gYqe0rSj9KI28KA=
|
||||
github.com/coreos/go-systemd/v22 v22.7.0/go.mod h1:xNUYtjHu2EDXbsxz1i41wouACIwT7Ybq9o0BQhMwD0w=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||
@@ -33,8 +33,8 @@ github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHk
|
||||
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
|
||||
github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM=
|
||||
github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ=
|
||||
github.com/gabriel-vasile/mimetype v1.4.11 h1:AQvxbp830wPhHTqc1u7nzoLT+ZFxGY7emj5DR5DYFik=
|
||||
github.com/gabriel-vasile/mimetype v1.4.11/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
|
||||
github.com/gabriel-vasile/mimetype v1.4.13 h1:46nXokslUBsAJE/wMsp5gtO500a4F3Nkz9Ufpk2AcUM=
|
||||
github.com/gabriel-vasile/mimetype v1.4.13/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
|
||||
github.com/ganigeorgiev/fexpr v0.5.0 h1:XA9JxtTE/Xm+g/JFI6RfZEHSiQlk+1glLvRK1Lpv/Tk=
|
||||
github.com/ganigeorgiev/fexpr v0.5.0/go.mod h1:RyGiGqmeXhEQ6+mlGdnUleLHgtzzu/VGO2WtJkF5drE=
|
||||
github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=
|
||||
@@ -51,15 +51,15 @@ github.com/go-sql-driver/mysql v1.9.1 h1:FrjNGn/BsJQjVRuSa8CBrM5BWA9BWoXXat3KrtS
|
||||
github.com/go-sql-driver/mysql v1.9.1/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
|
||||
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
|
||||
github.com/godbus/dbus/v5 v5.2.0 h1:3WexO+U+yg9T70v9FdHr9kCxYlazaAXUhx2VMkbfax8=
|
||||
github.com/godbus/dbus/v5 v5.2.0/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ=
|
||||
github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/pprof v0.0.0-20251114195745-4902fdda35c8 h1:3DsUAV+VNEQa2CUVLxCY3f87278uWfIDhJnbdvDjvmE=
|
||||
github.com/google/pprof v0.0.0-20251114195745-4902fdda35c8/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U=
|
||||
github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 h1:z2ogiKUYzX5Is6zr/vP9vJGqPwcdqsWjOt+V8J7+bTc=
|
||||
github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||
@@ -69,14 +69,14 @@ github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLf
|
||||
github.com/jarcoal/httpmock v1.4.1 h1:0Ju+VCFuARfFlhVXFc2HxlcQkfB+Xq12/EotHko+x2A=
|
||||
github.com/jarcoal/httpmock v1.4.1/go.mod h1:ftW1xULwo+j0R0JJkJIIi7UKigZUXCLLanykgjwBXL0=
|
||||
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
|
||||
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
|
||||
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
|
||||
github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
|
||||
github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 h1:PwQumkgq4/acIiZhtifTV5OUqqiP82UAl0h87xj/l9k=
|
||||
github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg=
|
||||
github.com/lufia/plan9stats v0.0.0-20260216142805-b3301c5f2a88 h1:PTw+yKnXcOFCR6+8hHTyWBeQ/P4Nb7dd4/0ohEcWQuM=
|
||||
github.com/lufia/plan9stats v0.0.0-20260216142805-b3301c5f2a88/go.mod h1:autxFIvghDt3jPTLoqZ9OZ7s9qTGNAWmYCjVFWPX/zg=
|
||||
github.com/lxzan/gws v1.8.9 h1:VU3SGUeWlQrEwfUSfokcZep8mdg/BrUF+y73YYshdBM=
|
||||
github.com/lxzan/gws v1.8.9/go.mod h1:d9yHaR1eDTBHagQC6KY7ycUOaz5KWeqQtP3xu7aMK8Y=
|
||||
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
|
||||
@@ -85,19 +85,19 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||
github.com/nicholas-fedor/shoutrrr v0.12.1 h1:8NjY+I3K7cGHy89ncnaPGUA0ex44XbYK3SAFJX9YMI8=
|
||||
github.com/nicholas-fedor/shoutrrr v0.12.1/go.mod h1:64qWuPpvTUv9ZppEoR6OdroiFmgf9w11YSaR0h9KZGg=
|
||||
github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns=
|
||||
github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
|
||||
github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
|
||||
github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
|
||||
github.com/nicholas-fedor/shoutrrr v0.13.2 h1:hfsYBIqSFYGg92pZP5CXk/g7/OJIkLYmiUnRl+AD1IA=
|
||||
github.com/nicholas-fedor/shoutrrr v0.13.2/go.mod h1:ZqzV3gY/Wj6AvWs1etlO7+yKbh4iptSbeL8avBpMQbA=
|
||||
github.com/onsi/ginkgo/v2 v2.28.1 h1:S4hj+HbZp40fNKuLUQOYLDgZLwNUVn19N3Atb98NCyI=
|
||||
github.com/onsi/ginkgo/v2 v2.28.1/go.mod h1:CLtbVInNckU3/+gC8LzkGUb9oF+e8W8TdUsxPwvdOgE=
|
||||
github.com/onsi/gomega v1.39.1 h1:1IJLAad4zjPn2PsnhH70V4DKRFlrCzGBNrNaru+Vf28=
|
||||
github.com/onsi/gomega v1.39.1/go.mod h1:hL6yVALoTOxeWudERyfppUcZXjMwIMLnuSfruD2lcfg=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pocketbase/dbx v1.11.0 h1:LpZezioMfT3K4tLrqA55wWFw1EtH1pM4tzSVa7kgszU=
|
||||
github.com/pocketbase/dbx v1.11.0/go.mod h1:xXRCIAKTHMgUCyCKZm55pUOdvFziJjQfXaWKhu2vhMs=
|
||||
github.com/pocketbase/pocketbase v0.34.0 h1:5W80PrGvkRYIMAIK90F7w031/hXgZVz1KSuCJqSpgJo=
|
||||
github.com/pocketbase/pocketbase v0.34.0/go.mod h1:K/9z/Zb9PR9yW2Qyoc73jHV/EKT8cMTk9bQWyrzYlvI=
|
||||
github.com/pocketbase/dbx v1.12.0 h1:/oLErM+A0b4xI0PWTGPqSDVjzix48PqI/bng2l0PzoA=
|
||||
github.com/pocketbase/dbx v1.12.0/go.mod h1:xXRCIAKTHMgUCyCKZm55pUOdvFziJjQfXaWKhu2vhMs=
|
||||
github.com/pocketbase/pocketbase v0.36.4 h1:zTjRZbp2WfTOJJfb+pFRWa200UaQwxZYt8RzkFMlAZ4=
|
||||
github.com/pocketbase/pocketbase v0.36.4/go.mod h1:9CiezhRudd9FZGa5xZa53QZBTNxc5vvw/FGG+diAECI=
|
||||
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt9k/+g42oCprj/FisM4qX9L3sZB3upGN2ZU=
|
||||
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
@@ -105,12 +105,12 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qq
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/shirou/gopsutil/v4 v4.25.10 h1:at8lk/5T1OgtuCp+AwrDofFRjnvosn0nkN2OLQ6g8tA=
|
||||
github.com/shirou/gopsutil/v4 v4.25.10/go.mod h1:+kSwyC8DRUD9XXEHCAFjK+0nuArFJM0lva+StQAcskM=
|
||||
github.com/shirou/gopsutil/v4 v4.26.1 h1:TOkEyriIXk2HX9d4isZJtbjXbEjf5qyKPAzbzY0JWSo=
|
||||
github.com/shirou/gopsutil/v4 v4.26.1/go.mod h1:medLI9/UNAb0dOI9Q3/7yWSqKkj00u+1tgY8nvv41pc=
|
||||
github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
|
||||
github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
|
||||
github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=
|
||||
github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0=
|
||||
github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
|
||||
github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
|
||||
github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
|
||||
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
@@ -129,41 +129,41 @@ github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQ
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
|
||||
golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
|
||||
golang.org/x/exp v0.0.0-20251125195548-87e1e737ad39 h1:DHNhtq3sNNzrvduZZIiFyXWOL9IWaDPHqTnLJp+rCBY=
|
||||
golang.org/x/exp v0.0.0-20251125195548-87e1e737ad39/go.mod h1:46edojNIoXTNOhySWIWdix628clX9ODXwPsQuG6hsK0=
|
||||
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
|
||||
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
|
||||
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa h1:Zt3DZoOFFYkKhDT3v7Lm9FDMEV06GpzjG2jrqW+QTE0=
|
||||
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa/go.mod h1:K79w1Vqn7PoiZn+TkNpx3BUWUQksGO3JcVX6qIjytmA=
|
||||
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
golang.org/x/image v0.33.0 h1:LXRZRnv1+zGd5XBUVRFmYEphyyKJjQjCRiOuAP3sZfQ=
|
||||
golang.org/x/image v0.33.0/go.mod h1:DD3OsTYT9chzuzTQt+zMcOlBHgfoKQb1gry8p76Y1sc=
|
||||
golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
|
||||
golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
|
||||
golang.org/x/image v0.36.0 h1:Iknbfm1afbgtwPTmHnS2gTM/6PPZfH+z2EFuOkSbqwc=
|
||||
golang.org/x/image v0.36.0/go.mod h1:YsWD2TyyGKiIX1kZlu9QfKIsQ4nAAK9bdgdrIsE7xy4=
|
||||
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
|
||||
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
|
||||
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
|
||||
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
|
||||
golang.org/x/oauth2 v0.33.0 h1:4Q+qn+E5z8gPRJfmRy7C2gGG3T4jIprK6aSYgTXGRpo=
|
||||
golang.org/x/oauth2 v0.33.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||
golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
|
||||
golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60=
|
||||
golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM=
|
||||
golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
|
||||
golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
|
||||
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
|
||||
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
|
||||
golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
|
||||
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
|
||||
golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/term v0.40.0 h1:36e4zGLqU4yhjlmxEaagx2KuYbJq3EwY8K943ZsHcvg=
|
||||
golang.org/x/term v0.40.0/go.mod h1:w2P8uVp06p2iyKKuvXIm7N/y0UCRt3UfJTfZ7oOpglM=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
|
||||
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
|
||||
golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk=
|
||||
golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ=
|
||||
golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
|
||||
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
|
||||
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
|
||||
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||
google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
|
||||
google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
@@ -185,10 +185,8 @@ modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
|
||||
modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
|
||||
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
|
||||
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
|
||||
modernc.org/libc v1.66.10 h1:yZkb3YeLx4oynyR+iUsXsybsX4Ubx7MQlSYEw4yj59A=
|
||||
modernc.org/libc v1.66.10/go.mod h1:8vGSEwvoUoltr4dlywvHqjtAqHBaw0j1jI7iFBTAr2I=
|
||||
modernc.org/libc v1.67.1 h1:bFaqOaa5/zbWYJo8aW0tXPX21hXsngG2M7mckCnFSVk=
|
||||
modernc.org/libc v1.67.1/go.mod h1:QvvnnJ5P7aitu0ReNpVIEyesuhmDLQ8kaEoyMjIFZJA=
|
||||
modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
|
||||
modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
|
||||
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
||||
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
||||
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
|
||||
@@ -197,8 +195,8 @@ modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
|
||||
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
|
||||
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
|
||||
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
|
||||
modernc.org/sqlite v1.40.1 h1:VfuXcxcUWWKRBuP8+BR9L7VnmusMgBNNnBYGEe9w/iY=
|
||||
modernc.org/sqlite v1.40.1/go.mod h1:9fjQZ0mB1LLP0GYrp39oOJXx/I2sxEnZtzCmEQIKvGE=
|
||||
modernc.org/sqlite v1.45.0 h1:r51cSGzKpbptxnby+EIIz5fop4VuE4qFoVEjNvWoObs=
|
||||
modernc.org/sqlite v1.45.0/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
|
||||
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
|
||||
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
|
||||
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
|
||||
|
||||
@@ -21,9 +21,9 @@ type hubLike interface {
|
||||
|
||||
type AlertManager struct {
|
||||
hub hubLike
|
||||
alertQueue chan alertTask
|
||||
stopChan chan struct{}
|
||||
stopOnce sync.Once
|
||||
pendingAlerts sync.Map
|
||||
alertsCache *AlertsCache
|
||||
}
|
||||
|
||||
type AlertMessageData struct {
|
||||
@@ -40,16 +40,22 @@ type UserNotificationSettings struct {
|
||||
Webhooks []string `json:"webhooks"`
|
||||
}
|
||||
|
||||
type SystemAlertFsStats struct {
|
||||
DiskTotal float64 `json:"d"`
|
||||
DiskUsed float64 `json:"du"`
|
||||
}
|
||||
|
||||
// Values pulled from system_stats.stats that are relevant to alerts.
|
||||
type SystemAlertStats struct {
|
||||
Cpu float64 `json:"cpu"`
|
||||
Mem float64 `json:"mp"`
|
||||
Disk float64 `json:"dp"`
|
||||
NetSent float64 `json:"ns"`
|
||||
NetRecv float64 `json:"nr"`
|
||||
Bandwidth [2]uint64 `json:"b"`
|
||||
GPU map[string]SystemAlertGPUData `json:"g"`
|
||||
Temperatures map[string]float32 `json:"t"`
|
||||
LoadAvg [3]float64 `json:"la"`
|
||||
Battery [2]uint8 `json:"bat"`
|
||||
ExtraFs map[string]SystemAlertFsStats `json:"efs"`
|
||||
}
|
||||
|
||||
type SystemAlertGPUData struct {
|
||||
@@ -58,7 +64,7 @@ type SystemAlertGPUData struct {
|
||||
|
||||
type SystemAlertData struct {
|
||||
systemRecord *core.Record
|
||||
alertRecord *core.Record
|
||||
alertData CachedAlertData
|
||||
name string
|
||||
unit string
|
||||
val float64
|
||||
@@ -93,11 +99,9 @@ var supportsTitle = map[string]struct{}{
|
||||
func NewAlertManager(app hubLike) *AlertManager {
|
||||
am := &AlertManager{
|
||||
hub: app,
|
||||
alertQueue: make(chan alertTask, 5),
|
||||
stopChan: make(chan struct{}),
|
||||
alertsCache: NewAlertsCache(app),
|
||||
}
|
||||
am.bindEvents()
|
||||
go am.startWorker()
|
||||
return am
|
||||
}
|
||||
|
||||
@@ -106,6 +110,19 @@ func (am *AlertManager) bindEvents() {
|
||||
am.hub.OnRecordAfterUpdateSuccess("alerts").BindFunc(updateHistoryOnAlertUpdate)
|
||||
am.hub.OnRecordAfterDeleteSuccess("alerts").BindFunc(resolveHistoryOnAlertDelete)
|
||||
am.hub.OnRecordAfterUpdateSuccess("smart_devices").BindFunc(am.handleSmartDeviceAlert)
|
||||
|
||||
am.hub.OnServe().BindFunc(func(e *core.ServeEvent) error {
|
||||
// Populate all alerts into cache on startup
|
||||
_ = am.alertsCache.PopulateFromDB(true)
|
||||
|
||||
if err := resolveStatusAlerts(e.App); err != nil {
|
||||
e.App.Logger().Error("Failed to resolve stale status alerts", "err", err)
|
||||
}
|
||||
if err := am.restorePendingStatusAlerts(); err != nil {
|
||||
e.App.Logger().Error("Failed to restore pending status alerts", "err", err)
|
||||
}
|
||||
return e.Next()
|
||||
})
|
||||
}
|
||||
|
||||
// IsNotificationSilenced checks if a notification should be silenced based on configured quiet hours
|
||||
@@ -259,13 +276,14 @@ func (am *AlertManager) SendShoutrrrAlert(notificationUrl, title, message, link,
|
||||
}
|
||||
|
||||
// Add link
|
||||
if scheme == "ntfy" {
|
||||
switch scheme {
|
||||
case "ntfy":
|
||||
queryParams.Add("Actions", fmt.Sprintf("view, %s, %s", linkText, link))
|
||||
} else if scheme == "lark" {
|
||||
case "lark":
|
||||
queryParams.Add("link", link)
|
||||
} else if scheme == "bark" {
|
||||
case "bark":
|
||||
queryParams.Add("url", link)
|
||||
} else {
|
||||
default:
|
||||
message += "\n\n" + link
|
||||
}
|
||||
|
||||
@@ -298,3 +316,13 @@ func (am *AlertManager) SendTestNotification(e *core.RequestEvent) error {
|
||||
}
|
||||
return e.JSON(200, map[string]bool{"err": false})
|
||||
}
|
||||
|
||||
// setAlertTriggered updates the "triggered" status of an alert record in the database
|
||||
func (am *AlertManager) setAlertTriggered(alert CachedAlertData, triggered bool) error {
|
||||
alertRecord, err := am.hub.FindRecordById("alerts", alert.Id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
alertRecord.Set("triggered", triggered)
|
||||
return am.hub.Save(alertRecord)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
|
||||
177
internal/alerts/alerts_cache.go
Normal file
177
internal/alerts/alerts_cache.go
Normal file
@@ -0,0 +1,177 @@
|
||||
package alerts
|
||||
|
||||
import (
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/store"
|
||||
)
|
||||
|
||||
// CachedAlertData represents the relevant fields of an alert record for status checking and updates.
|
||||
type CachedAlertData struct {
|
||||
Id string
|
||||
SystemID string
|
||||
UserID string
|
||||
Name string
|
||||
Value float64
|
||||
Triggered bool
|
||||
Min uint8
|
||||
// Created types.DateTime
|
||||
}
|
||||
|
||||
func (a *CachedAlertData) PopulateFromRecord(record *core.Record) {
|
||||
a.Id = record.Id
|
||||
a.SystemID = record.GetString("system")
|
||||
a.UserID = record.GetString("user")
|
||||
a.Name = record.GetString("name")
|
||||
a.Value = record.GetFloat("value")
|
||||
a.Triggered = record.GetBool("triggered")
|
||||
a.Min = uint8(record.GetInt("min"))
|
||||
// a.Created = record.GetDateTime("created")
|
||||
}
|
||||
|
||||
// AlertsCache provides an in-memory cache for system alerts.
|
||||
type AlertsCache struct {
|
||||
app core.App
|
||||
store *store.Store[string, *store.Store[string, CachedAlertData]]
|
||||
populated bool
|
||||
}
|
||||
|
||||
// NewAlertsCache creates a new instance of SystemAlertsCache.
|
||||
func NewAlertsCache(app core.App) *AlertsCache {
|
||||
c := AlertsCache{
|
||||
app: app,
|
||||
store: store.New(map[string]*store.Store[string, CachedAlertData]{}),
|
||||
}
|
||||
return c.bindEvents()
|
||||
}
|
||||
|
||||
// bindEvents sets up event listeners to keep the cache in sync with database changes.
|
||||
func (c *AlertsCache) bindEvents() *AlertsCache {
|
||||
c.app.OnRecordAfterUpdateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
// c.Delete(e.Record.Original()) // this would be needed if the system field on an existing alert was changed, however we don't currently allow that in the UI so we'll leave it commented out
|
||||
c.Update(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
c.app.OnRecordAfterDeleteSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
c.Delete(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
c.app.OnRecordAfterCreateSuccess("alerts").BindFunc(func(e *core.RecordEvent) error {
|
||||
c.Update(e.Record)
|
||||
return e.Next()
|
||||
})
|
||||
return c
|
||||
}
|
||||
|
||||
// PopulateFromDB clears current entries and loads all alerts from the database into the cache.
|
||||
func (c *AlertsCache) PopulateFromDB(force bool) error {
|
||||
if !force && c.populated {
|
||||
return nil
|
||||
}
|
||||
records, err := c.app.FindAllRecords("alerts")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.store.RemoveAll()
|
||||
for _, record := range records {
|
||||
c.Update(record)
|
||||
}
|
||||
c.populated = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update adds or updates an alert record in the cache.
|
||||
func (c *AlertsCache) Update(record *core.Record) {
|
||||
systemID := record.GetString("system")
|
||||
if systemID == "" {
|
||||
return
|
||||
}
|
||||
systemStore, ok := c.store.GetOk(systemID)
|
||||
if !ok {
|
||||
systemStore = store.New(map[string]CachedAlertData{})
|
||||
c.store.Set(systemID, systemStore)
|
||||
}
|
||||
var ca CachedAlertData
|
||||
ca.PopulateFromRecord(record)
|
||||
systemStore.Set(record.Id, ca)
|
||||
}
|
||||
|
||||
// Delete removes an alert record from the cache.
|
||||
func (c *AlertsCache) Delete(record *core.Record) {
|
||||
systemID := record.GetString("system")
|
||||
if systemID == "" {
|
||||
return
|
||||
}
|
||||
if systemStore, ok := c.store.GetOk(systemID); ok {
|
||||
systemStore.Remove(record.Id)
|
||||
}
|
||||
}
|
||||
|
||||
// GetSystemAlerts returns all alerts for the specified system, lazy-loading if necessary.
|
||||
func (c *AlertsCache) GetSystemAlerts(systemID string) []CachedAlertData {
|
||||
systemStore, ok := c.store.GetOk(systemID)
|
||||
if !ok {
|
||||
// Populate cache for this system
|
||||
records, err := c.app.FindAllRecords("alerts", dbx.NewExp("system={:system}", dbx.Params{"system": systemID}))
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
systemStore = store.New(map[string]CachedAlertData{})
|
||||
for _, record := range records {
|
||||
var ca CachedAlertData
|
||||
ca.PopulateFromRecord(record)
|
||||
systemStore.Set(record.Id, ca)
|
||||
}
|
||||
c.store.Set(systemID, systemStore)
|
||||
}
|
||||
all := systemStore.GetAll()
|
||||
alerts := make([]CachedAlertData, 0, len(all))
|
||||
for _, alert := range all {
|
||||
alerts = append(alerts, alert)
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlert returns a specific alert by its ID from the cache.
|
||||
func (c *AlertsCache) GetAlert(systemID, alertID string) (CachedAlertData, bool) {
|
||||
if systemStore, ok := c.store.GetOk(systemID); ok {
|
||||
return systemStore.GetOk(alertID)
|
||||
}
|
||||
return CachedAlertData{}, false
|
||||
}
|
||||
|
||||
// GetAlertsByName returns all alerts of a specific type for the specified system.
|
||||
func (c *AlertsCache) GetAlertsByName(systemID, alertName string) []CachedAlertData {
|
||||
allAlerts := c.GetSystemAlerts(systemID)
|
||||
var alerts []CachedAlertData
|
||||
for _, record := range allAlerts {
|
||||
if record.Name == alertName {
|
||||
alerts = append(alerts, record)
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// GetAlertsExcludingNames returns all alerts for the specified system excluding the given types.
|
||||
func (c *AlertsCache) GetAlertsExcludingNames(systemID string, excludedNames ...string) []CachedAlertData {
|
||||
excludeMap := make(map[string]struct{})
|
||||
for _, name := range excludedNames {
|
||||
excludeMap[name] = struct{}{}
|
||||
}
|
||||
allAlerts := c.GetSystemAlerts(systemID)
|
||||
var alerts []CachedAlertData
|
||||
for _, record := range allAlerts {
|
||||
if _, excluded := excludeMap[record.Name]; !excluded {
|
||||
alerts = append(alerts, record)
|
||||
}
|
||||
}
|
||||
return alerts
|
||||
}
|
||||
|
||||
// Refresh returns the latest cached copy for an alert snapshot if it still exists.
|
||||
func (c *AlertsCache) Refresh(alert CachedAlertData) (CachedAlertData, bool) {
|
||||
if alert.Id == "" {
|
||||
return CachedAlertData{}, false
|
||||
}
|
||||
return c.GetAlert(alert.SystemID, alert.Id)
|
||||
}
|
||||
215
internal/alerts/alerts_cache_test.go
Normal file
215
internal/alerts/alerts_cache_test.go
Normal file
@@ -0,0 +1,215 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSystemAlertsCachePopulateAndFilter(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system1 := systems[0]
|
||||
system2 := systems[1]
|
||||
|
||||
statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system1.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system1.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
memoryAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Memory",
|
||||
"system": system2.Id,
|
||||
"user": user.Id,
|
||||
"value": 90,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
cache.PopulateFromDB(false)
|
||||
|
||||
statusAlerts := cache.GetAlertsByName(system1.Id, "Status")
|
||||
require.Len(t, statusAlerts, 1)
|
||||
assert.Equal(t, statusAlert.Id, statusAlerts[0].Id)
|
||||
|
||||
nonStatusAlerts := cache.GetAlertsExcludingNames(system1.Id, "Status")
|
||||
require.Len(t, nonStatusAlerts, 1)
|
||||
assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
|
||||
|
||||
system2Alerts := cache.GetSystemAlerts(system2.Id)
|
||||
require.Len(t, system2Alerts, 1)
|
||||
assert.Equal(t, memoryAlert.Id, system2Alerts[0].Id)
|
||||
}
|
||||
|
||||
func TestSystemAlertsCacheLazyLoadUpdateAndDelete(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
statusAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
require.Len(t, cache.GetSystemAlerts(systemRecord.Id), 1, "first lookup should lazy-load alerts for the system")
|
||||
|
||||
cpuAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache.Update(cpuAlert)
|
||||
|
||||
nonStatusAlerts := cache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
require.Len(t, nonStatusAlerts, 1)
|
||||
assert.Equal(t, cpuAlert.Id, nonStatusAlerts[0].Id)
|
||||
|
||||
cache.Delete(statusAlert)
|
||||
assert.Empty(t, cache.GetAlertsByName(systemRecord.Id, "Status"), "deleted alerts should be removed from the in-memory cache")
|
||||
}
|
||||
|
||||
func TestSystemAlertsCacheRefreshReturnsLatestCopy(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cache := alerts.NewAlertsCache(hub)
|
||||
snapshot := cache.GetSystemAlerts(system.Id)[0]
|
||||
assert.False(t, snapshot.Triggered)
|
||||
|
||||
alert.Set("triggered", true)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
refreshed, ok := cache.Refresh(snapshot)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, snapshot.Id, refreshed.Id)
|
||||
assert.True(t, refreshed.Triggered, "refresh should return the updated cached value rather than the stale snapshot")
|
||||
|
||||
require.NoError(t, hub.Delete(alert))
|
||||
_, ok = cache.Refresh(snapshot)
|
||||
assert.False(t, ok, "refresh should report false when the cached alert no longer exists")
|
||||
}
|
||||
|
||||
func TestAlertManagerCacheLifecycle(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Create an alert
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"value": 80,
|
||||
"min": 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
am := hub.AlertManager
|
||||
cache := am.GetSystemAlertsCache()
|
||||
|
||||
// Verify it's in cache (it should be since CreateRecord triggers the event)
|
||||
assert.Len(t, cache.GetSystemAlerts(system.Id), 1)
|
||||
assert.Equal(t, alert.Id, cache.GetSystemAlerts(system.Id)[0].Id)
|
||||
assert.EqualValues(t, 80, cache.GetSystemAlerts(system.Id)[0].Value)
|
||||
|
||||
// Update the alert through PocketBase to trigger events
|
||||
alert.Set("value", 85)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
// Check if updated value is reflected (or at least that it's still there)
|
||||
cachedAlerts := cache.GetSystemAlerts(system.Id)
|
||||
assert.Len(t, cachedAlerts, 1)
|
||||
assert.EqualValues(t, 85, cachedAlerts[0].Value)
|
||||
|
||||
// Delete the alert through PocketBase to trigger events
|
||||
require.NoError(t, hub.Delete(alert))
|
||||
|
||||
// Verify it's removed from cache
|
||||
assert.Empty(t, cache.GetSystemAlerts(system.Id), "alert should be removed from cache after PocketBase delete")
|
||||
}
|
||||
|
||||
// func TestAlertManagerCacheMovesAlertToNewSystemOnUpdate(t *testing.T) {
|
||||
// hub, user := beszelTests.GetHubWithUser(t)
|
||||
// defer hub.Cleanup()
|
||||
|
||||
// systems, err := beszelTests.CreateSystems(hub, 2, user.Id, "up")
|
||||
// require.NoError(t, err)
|
||||
// system1 := systems[0]
|
||||
// system2 := systems[1]
|
||||
|
||||
// alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
// "name": "CPU",
|
||||
// "system": system1.Id,
|
||||
// "user": user.Id,
|
||||
// "value": 80,
|
||||
// "min": 1,
|
||||
// })
|
||||
// require.NoError(t, err)
|
||||
|
||||
// am := hub.AlertManager
|
||||
// cache := am.GetSystemAlertsCache()
|
||||
|
||||
// // Initially in system1 cache
|
||||
// assert.Len(t, cache.Get(system1.Id), 1)
|
||||
// assert.Empty(t, cache.Get(system2.Id))
|
||||
|
||||
// // Move alert to system2
|
||||
// alert.Set("system", system2.Id)
|
||||
// require.NoError(t, hub.Save(alert))
|
||||
|
||||
// // DEBUG: print if it is found
|
||||
// // fmt.Printf("system1 alerts after update: %v\n", cache.Get(system1.Id))
|
||||
|
||||
// // Should be removed from system1 and present in system2
|
||||
// assert.Empty(t, cache.GetType(system1.Id, "CPU"), "updated alerts should be evicted from the previous system cache")
|
||||
// require.Len(t, cache.Get(system2.Id), 1)
|
||||
// assert.Equal(t, alert.Id, cache.Get(system2.Id)[0].Id)
|
||||
// }
|
||||
155
internal/alerts/alerts_disk_test.go
Normal file
155
internal/alerts/alerts_disk_test.go
Normal file
@@ -0,0 +1,155 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestDiskAlertExtraFsMultiMinute tests that multi-minute disk alerts correctly use
|
||||
// historical per-minute values for extra (non-root) filesystems, not the current live snapshot.
|
||||
func TestDiskAlertExtraFsMultiMinute(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
// Disk alert: threshold 80%, min=2 (requires historical averaging)
|
||||
diskAlert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Disk",
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"value": 80, // threshold: 80%
|
||||
"min": 2, // 2 minutes - requires historical averaging
|
||||
})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, diskAlert.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
am := hub.GetAlertManager()
|
||||
now := time.Now().UTC()
|
||||
|
||||
extraFsHigh := map[string]*system.FsStats{
|
||||
"/mnt/data": {DiskTotal: 1000, DiskUsed: 920}, // 92% - above threshold
|
||||
}
|
||||
|
||||
// Insert 4 historical records spread over 3 minutes (same pattern as battery tests).
|
||||
// The oldest record must predate (now - 2min) so the alert time window is valid.
|
||||
recordTimes := []time.Duration{
|
||||
-180 * time.Second, // 3 min ago - anchors oldest record before alert.time
|
||||
-90 * time.Second,
|
||||
-60 * time.Second,
|
||||
-30 * time.Second,
|
||||
}
|
||||
|
||||
for _, offset := range recordTimes {
|
||||
stats := system.Stats{
|
||||
DiskPct: 30, // root disk at 30% - below threshold
|
||||
ExtraFs: extraFsHigh,
|
||||
}
|
||||
statsJSON, _ := json.Marshal(stats)
|
||||
|
||||
recordTime := now.Add(offset)
|
||||
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||
err = hub.SaveNoValidate(record)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
combinedDataHigh := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsHigh,
|
||||
},
|
||||
Info: system.Info{
|
||||
DiskPct: 30,
|
||||
},
|
||||
}
|
||||
|
||||
systemRecord.Set("updated", now)
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataHigh)
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, diskAlert.GetBool("triggered"),
|
||||
"Alert SHOULD be triggered when extra disk average (92%%) exceeds threshold (80%%)")
|
||||
|
||||
// --- Resolution: extra disk drops to 50%, alert should resolve ---
|
||||
|
||||
extraFsLow := map[string]*system.FsStats{
|
||||
"/mnt/data": {DiskTotal: 1000, DiskUsed: 500}, // 50% - below threshold
|
||||
}
|
||||
|
||||
newNow := now.Add(2 * time.Minute)
|
||||
recordTimesLow := []time.Duration{
|
||||
-180 * time.Second,
|
||||
-90 * time.Second,
|
||||
-60 * time.Second,
|
||||
-30 * time.Second,
|
||||
}
|
||||
|
||||
for _, offset := range recordTimesLow {
|
||||
stats := system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsLow,
|
||||
}
|
||||
statsJSON, _ := json.Marshal(stats)
|
||||
|
||||
recordTime := newNow.Add(offset)
|
||||
record, err := beszelTests.CreateRecord(hub, "system_stats", map[string]any{
|
||||
"system": systemRecord.Id,
|
||||
"type": "1m",
|
||||
"stats": string(statsJSON),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
record.SetRaw("created", recordTime.Format(types.DefaultDateLayout))
|
||||
err = hub.SaveNoValidate(record)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
combinedDataLow := &system.CombinedData{
|
||||
Stats: system.Stats{
|
||||
DiskPct: 30,
|
||||
ExtraFs: extraFsLow,
|
||||
},
|
||||
Info: system.Info{
|
||||
DiskPct: 30,
|
||||
},
|
||||
}
|
||||
|
||||
systemRecord.Set("updated", newNow)
|
||||
err = hub.SaveNoValidate(systemRecord)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = am.HandleSystemAlerts(systemRecord, combinedDataLow)
|
||||
require.NoError(t, err)
|
||||
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
|
||||
diskAlert, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": diskAlert.Id})
|
||||
require.NoError(t, err)
|
||||
assert.False(t, diskAlert.GetBool("triggered"),
|
||||
"Alert should be resolved when extra disk average (50%%) drops below threshold (80%%)")
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
@@ -50,7 +49,7 @@ func TestAlertSilencedOneTime(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Test that alert is silenced
|
||||
silenced := am.IsNotificationSilenced(user.Id, system.Id)
|
||||
@@ -107,7 +106,7 @@ func TestAlertSilencedDaily(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Get current hour and create a window that includes current time
|
||||
now := time.Now().UTC()
|
||||
@@ -171,7 +170,7 @@ func TestAlertSilencedDailyMidnightCrossing(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Create a window that crosses midnight: 22:00 - 02:00
|
||||
startTime := time.Date(2000, 1, 1, 22, 0, 0, 0, time.UTC)
|
||||
@@ -212,7 +211,7 @@ func TestAlertSilencedGlobal(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Create a global quiet hours window (no system specified)
|
||||
now := time.Now().UTC()
|
||||
@@ -251,7 +250,7 @@ func TestAlertSilencedSystemSpecific(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Create a system-specific quiet hours window for system1 only
|
||||
now := time.Now().UTC()
|
||||
@@ -297,7 +296,7 @@ func TestAlertSilencedMultiUser(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Create a quiet hours window for user1 only
|
||||
now := time.Now().UTC()
|
||||
@@ -418,7 +417,7 @@ func TestAlertSilencedNoWindows(t *testing.T) {
|
||||
|
||||
// Get alert manager
|
||||
am := alerts.NewAlertManager(hub)
|
||||
defer am.StopWorker()
|
||||
defer am.Stop()
|
||||
|
||||
// Without any quiet hours windows, alert should NOT be silenced
|
||||
silenced := am.IsNotificationSilenced(user.Id, system.Id)
|
||||
|
||||
@@ -2,18 +2,18 @@ package alerts
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// handleSmartDeviceAlert sends alerts when a SMART device state changes from PASSED to FAILED.
|
||||
// handleSmartDeviceAlert sends alerts when a SMART device state worsens into WARNING/FAILED.
|
||||
// This is automatic and does not require user opt-in.
|
||||
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||
oldState := e.Record.Original().GetString("state")
|
||||
newState := e.Record.GetString("state")
|
||||
|
||||
// Only alert when transitioning from PASSED to FAILED
|
||||
if oldState != "PASSED" || newState != "FAILED" {
|
||||
if !shouldSendSmartDeviceAlert(oldState, newState) {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
@@ -32,14 +32,15 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||
systemName := systemRecord.GetString("name")
|
||||
deviceName := e.Record.GetString("name")
|
||||
model := e.Record.GetString("model")
|
||||
statusLabel := smartStateLabel(newState)
|
||||
|
||||
// Build alert message
|
||||
title := fmt.Sprintf("SMART failure on %s: %s \U0001F534", systemName, deviceName)
|
||||
title := fmt.Sprintf("SMART %s on %s: %s %s", statusLabel, systemName, deviceName, smartStateEmoji(newState))
|
||||
var message string
|
||||
if model != "" {
|
||||
message = fmt.Sprintf("Disk %s (%s) SMART status changed to FAILED", deviceName, model)
|
||||
message = fmt.Sprintf("Disk %s (%s) SMART status changed to %s", deviceName, model, newState)
|
||||
} else {
|
||||
message = fmt.Sprintf("Disk %s SMART status changed to FAILED", deviceName)
|
||||
message = fmt.Sprintf("Disk %s SMART status changed to %s", deviceName, newState)
|
||||
}
|
||||
|
||||
// Get users associated with the system
|
||||
@@ -65,3 +66,42 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
func shouldSendSmartDeviceAlert(oldState, newState string) bool {
|
||||
oldSeverity := smartStateSeverity(oldState)
|
||||
newSeverity := smartStateSeverity(newState)
|
||||
|
||||
// Ignore unknown states and recoveries; only alert on worsening transitions
|
||||
// from known-good/degraded states into WARNING/FAILED.
|
||||
return oldSeverity >= 1 && newSeverity > oldSeverity
|
||||
}
|
||||
|
||||
func smartStateSeverity(state string) int {
|
||||
switch state {
|
||||
case "PASSED":
|
||||
return 1
|
||||
case "WARNING":
|
||||
return 2
|
||||
case "FAILED":
|
||||
return 3
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func smartStateEmoji(state string) string {
|
||||
switch state {
|
||||
case "WARNING":
|
||||
return "\U0001F7E0"
|
||||
default:
|
||||
return "\U0001F534"
|
||||
}
|
||||
}
|
||||
|
||||
func smartStateLabel(state string) string {
|
||||
switch state {
|
||||
case "FAILED":
|
||||
return "failure"
|
||||
default:
|
||||
return strings.ToLower(state)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
@@ -58,6 +57,74 @@ func TestSmartDeviceAlert(t *testing.T) {
|
||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertPassedToWarning(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/mmcblk0",
|
||||
"model": "eMMC",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice.Set("state", "WARNING")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to WARNING")
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.Contains(t, lastMessage.Subject, "SMART warning on test-system")
|
||||
assert.Contains(t, lastMessage.Text, "WARNING")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertWarningToFailed(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/mmcblk0",
|
||||
"model": "eMMC",
|
||||
"state": "WARNING",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed from WARNING to FAILED")
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
|
||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
@@ -83,7 +150,8 @@ func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the state from UNKNOWN to FAILED - should NOT trigger alert
|
||||
// Update the state from UNKNOWN to FAILED - should NOT trigger alert.
|
||||
// We only alert from known healthy/degraded states.
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
@@ -5,67 +5,28 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
type alertTask struct {
|
||||
action string // "schedule" or "cancel"
|
||||
systemName string
|
||||
alertRecord *core.Record
|
||||
delay time.Duration
|
||||
}
|
||||
|
||||
type alertInfo struct {
|
||||
systemName string
|
||||
alertRecord *core.Record
|
||||
alertData CachedAlertData
|
||||
expireTime time.Time
|
||||
timer *time.Timer
|
||||
}
|
||||
|
||||
// startWorker is a long-running goroutine that processes alert tasks
|
||||
// every x seconds. It must be running to process status alerts.
|
||||
func (am *AlertManager) startWorker() {
|
||||
processPendingAlerts := time.Tick(15 * time.Second)
|
||||
|
||||
// check for status alerts that are not resolved when system comes up
|
||||
// (can be removed if we figure out core bug in #1052)
|
||||
checkStatusAlerts := time.Tick(561 * time.Second)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-am.stopChan:
|
||||
return
|
||||
case task := <-am.alertQueue:
|
||||
switch task.action {
|
||||
case "schedule":
|
||||
am.pendingAlerts.Store(task.alertRecord.Id, &alertInfo{
|
||||
systemName: task.systemName,
|
||||
alertRecord: task.alertRecord,
|
||||
expireTime: time.Now().Add(task.delay),
|
||||
})
|
||||
case "cancel":
|
||||
am.pendingAlerts.Delete(task.alertRecord.Id)
|
||||
}
|
||||
case <-checkStatusAlerts:
|
||||
resolveStatusAlerts(am.hub)
|
||||
case <-processPendingAlerts:
|
||||
// Check for expired alerts every tick
|
||||
now := time.Now()
|
||||
for key, value := range am.pendingAlerts.Range {
|
||||
// Stop cancels all pending status alert timers.
|
||||
func (am *AlertManager) Stop() {
|
||||
am.stopOnce.Do(func() {
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
info := value.(*alertInfo)
|
||||
if now.After(info.expireTime) {
|
||||
// Downtime delay has passed, process alert
|
||||
am.sendStatusAlert("down", info.systemName, info.alertRecord)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
am.pendingAlerts.Delete(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// StopWorker shuts down the AlertManager.worker goroutine
|
||||
func (am *AlertManager) StopWorker() {
|
||||
close(am.stopChan)
|
||||
return true
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// HandleStatusAlerts manages the logic when system status changes.
|
||||
@@ -74,82 +35,104 @@ func (am *AlertManager) HandleStatusAlerts(newStatus string, systemRecord *core.
|
||||
return nil
|
||||
}
|
||||
|
||||
alertRecords, err := am.getSystemStatusAlerts(systemRecord.Id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(alertRecords) == 0 {
|
||||
alerts := am.alertsCache.GetAlertsByName(systemRecord.Id, "Status")
|
||||
if len(alerts) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
systemName := systemRecord.GetString("name")
|
||||
if newStatus == "down" {
|
||||
am.handleSystemDown(systemName, alertRecords)
|
||||
am.handleSystemDown(systemName, alerts)
|
||||
} else {
|
||||
am.handleSystemUp(systemName, alertRecords)
|
||||
am.handleSystemUp(systemName, alerts)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// getSystemStatusAlerts retrieves all "Status" alert records for a given system ID.
|
||||
func (am *AlertManager) getSystemStatusAlerts(systemID string) ([]*core.Record, error) {
|
||||
alertRecords, err := am.hub.FindAllRecords("alerts", dbx.HashExp{
|
||||
"system": systemID,
|
||||
"name": "Status",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// handleSystemDown manages the logic when a system status changes to "down". It schedules pending alerts for each alert record.
|
||||
func (am *AlertManager) handleSystemDown(systemName string, alerts []CachedAlertData) {
|
||||
for _, alertData := range alerts {
|
||||
min := max(1, int(alertData.Min))
|
||||
am.schedulePendingStatusAlert(systemName, alertData, time.Duration(min)*time.Minute)
|
||||
}
|
||||
return alertRecords, nil
|
||||
}
|
||||
|
||||
// Schedules delayed "down" alerts for each alert record.
|
||||
func (am *AlertManager) handleSystemDown(systemName string, alertRecords []*core.Record) {
|
||||
for _, alertRecord := range alertRecords {
|
||||
// Continue if alert is already scheduled
|
||||
if _, exists := am.pendingAlerts.Load(alertRecord.Id); exists {
|
||||
continue
|
||||
}
|
||||
// Schedule by adding to queue
|
||||
min := max(1, alertRecord.GetInt("min"))
|
||||
am.alertQueue <- alertTask{
|
||||
action: "schedule",
|
||||
// schedulePendingStatusAlert sets up a timer to send a "down" alert after the specified delay if the system is still down.
|
||||
// It returns true if the alert was scheduled, or false if an alert was already pending for the given alert record.
|
||||
func (am *AlertManager) schedulePendingStatusAlert(systemName string, alertData CachedAlertData, delay time.Duration) bool {
|
||||
alert := &alertInfo{
|
||||
systemName: systemName,
|
||||
alertRecord: alertRecord,
|
||||
delay: time.Duration(min) * time.Minute,
|
||||
alertData: alertData,
|
||||
expireTime: time.Now().Add(delay),
|
||||
}
|
||||
|
||||
storedAlert, loaded := am.pendingAlerts.LoadOrStore(alertData.Id, alert)
|
||||
if loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
stored := storedAlert.(*alertInfo)
|
||||
stored.timer = time.AfterFunc(time.Until(stored.expireTime), func() {
|
||||
am.processPendingAlert(alertData.Id)
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
// handleSystemUp manages the logic when a system status changes to "up".
|
||||
// It cancels any pending alerts and sends "up" alerts.
|
||||
func (am *AlertManager) handleSystemUp(systemName string, alertRecords []*core.Record) {
|
||||
for _, alertRecord := range alertRecords {
|
||||
alertRecordID := alertRecord.Id
|
||||
func (am *AlertManager) handleSystemUp(systemName string, alerts []CachedAlertData) {
|
||||
for _, alertData := range alerts {
|
||||
// If alert exists for record, delete and continue (down alert not sent)
|
||||
if _, exists := am.pendingAlerts.Load(alertRecordID); exists {
|
||||
am.alertQueue <- alertTask{
|
||||
action: "cancel",
|
||||
alertRecord: alertRecord,
|
||||
}
|
||||
if am.cancelPendingAlert(alertData.Id) {
|
||||
continue
|
||||
}
|
||||
// No alert scheduled for this record, send "up" alert
|
||||
if err := am.sendStatusAlert("up", systemName, alertRecord); err != nil {
|
||||
if !alertData.Triggered {
|
||||
continue
|
||||
}
|
||||
if err := am.sendStatusAlert("up", systemName, alertData); err != nil {
|
||||
am.hub.Logger().Error("Failed to send alert", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
|
||||
func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertRecord *core.Record) error {
|
||||
switch alertStatus {
|
||||
case "up":
|
||||
alertRecord.Set("triggered", false)
|
||||
case "down":
|
||||
alertRecord.Set("triggered", true)
|
||||
// cancelPendingAlert stops the timer and removes the pending alert for the given alert ID. Returns true if a pending alert was found and cancelled.
|
||||
func (am *AlertManager) cancelPendingAlert(alertID string) bool {
|
||||
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
|
||||
if !loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// processPendingAlert sends a "down" alert if the pending alert has expired and the system is still down.
|
||||
func (am *AlertManager) processPendingAlert(alertID string) {
|
||||
value, loaded := am.pendingAlerts.LoadAndDelete(alertID)
|
||||
if !loaded {
|
||||
return
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
refreshedAlertData, ok := am.alertsCache.Refresh(info.alertData)
|
||||
if !ok || refreshedAlertData.Triggered {
|
||||
return
|
||||
}
|
||||
if err := am.sendStatusAlert("down", info.systemName, refreshedAlertData); err != nil {
|
||||
am.hub.Logger().Error("Failed to send alert", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// sendStatusAlert sends a status alert ("up" or "down") to the users associated with the alert records.
|
||||
func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, alertData CachedAlertData) error {
|
||||
// Update trigger state for alert record before sending alert
|
||||
triggered := alertStatus == "down"
|
||||
if err := am.setAlertTriggered(alertData, triggered); err != nil {
|
||||
return err
|
||||
}
|
||||
am.hub.Save(alertRecord)
|
||||
|
||||
var emoji string
|
||||
if alertStatus == "up" {
|
||||
@@ -162,10 +145,10 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
|
||||
message := strings.TrimSuffix(title, emoji)
|
||||
|
||||
// Get system ID for the link
|
||||
systemID := alertRecord.GetString("system")
|
||||
systemID := alertData.SystemID
|
||||
|
||||
return am.SendAlert(AlertMessageData{
|
||||
UserID: alertRecord.GetString("user"),
|
||||
UserID: alertData.UserID,
|
||||
SystemID: systemID,
|
||||
Title: title,
|
||||
Message: message,
|
||||
@@ -174,8 +157,8 @@ func (am *AlertManager) sendStatusAlert(alertStatus string, systemName string, a
|
||||
})
|
||||
}
|
||||
|
||||
// resolveStatusAlerts resolves any status alerts that weren't resolved
|
||||
// when system came up (https://github.com/henrygd/beszel/issues/1052)
|
||||
// resolveStatusAlerts resolves any triggered status alerts that weren't resolved
|
||||
// when system came up (https://github.com/henrygd/beszel/issues/1052).
|
||||
func resolveStatusAlerts(app core.App) error {
|
||||
db := app.DB()
|
||||
// Find all active status alerts where the system is actually up
|
||||
@@ -205,3 +188,40 @@ func resolveStatusAlerts(app core.App) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// restorePendingStatusAlerts re-queues untriggered status alerts for systems that
|
||||
// are still down after a hub restart. This rebuilds the lost in-memory timer state.
|
||||
func (am *AlertManager) restorePendingStatusAlerts() error {
|
||||
type pendingStatusAlert struct {
|
||||
AlertID string `db:"alert_id"`
|
||||
SystemID string `db:"system_id"`
|
||||
SystemName string `db:"system_name"`
|
||||
}
|
||||
|
||||
var pending []pendingStatusAlert
|
||||
err := am.hub.DB().NewQuery(`
|
||||
SELECT a.id AS alert_id, a.system AS system_id, s.name AS system_name
|
||||
FROM alerts a
|
||||
JOIN systems s ON a.system = s.id
|
||||
WHERE a.name = 'Status'
|
||||
AND a.triggered = false
|
||||
AND s.status = 'down'
|
||||
`).All(&pending)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make sure cache is populated before trying to restore pending alerts
|
||||
_ = am.alertsCache.PopulateFromDB(false)
|
||||
|
||||
for _, item := range pending {
|
||||
alertData, ok := am.alertsCache.GetAlert(item.SystemID, item.AlertID)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
min := max(1, int(alertData.Min))
|
||||
am.schedulePendingStatusAlert(item.SystemName, alertData, time.Duration(min)*time.Minute)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
755
internal/alerts/alerts_status_test.go
Normal file
755
internal/alerts/alerts_status_test.go
Normal file
@@ -0,0 +1,755 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestStatusAlerts(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
|
||||
assert.NoError(t, err)
|
||||
|
||||
var alerts []*core.Record
|
||||
for i, system := range systems {
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": i + 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
alerts = append(alerts, alert)
|
||||
}
|
||||
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
for _, alert := range alerts {
|
||||
assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
|
||||
}
|
||||
if hub.TestMailer.TotalSend() != 0 {
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
|
||||
}
|
||||
for _, system := range systems {
|
||||
assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
|
||||
}
|
||||
for _, system := range systems {
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
|
||||
for _, system := range systems {
|
||||
system.Set("status", "down")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
|
||||
time.Sleep(time.Second * 30)
|
||||
assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
|
||||
assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
|
||||
// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
|
||||
time.Sleep(time.Second * 60)
|
||||
assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
|
||||
// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
|
||||
time.Sleep(time.Second * 60)
|
||||
assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
|
||||
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
|
||||
// now we will bring the remaning systems back up
|
||||
for _, system := range systems {
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
|
||||
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.Zero(t, triggeredCount, "should have 0 alert triggered")
|
||||
// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
|
||||
assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
|
||||
})
|
||||
}
|
||||
func TestStatusAlertRecoveryBeforeDeadline(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Ensure user settings have an email
|
||||
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
hub.Save(userSettings)
|
||||
|
||||
// Initial email count
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
|
||||
systemCollection, _ := hub.FindCollectionByNameOrId("systems")
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
hub.Save(system)
|
||||
|
||||
alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
hub.Save(alert)
|
||||
|
||||
am := hub.AlertManager
|
||||
|
||||
// 1. System goes down
|
||||
am.HandleStatusAlerts("down", system)
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "Alert should be scheduled")
|
||||
|
||||
// 2. System goes up BEFORE delay expires
|
||||
// Triggering HandleStatusAlerts("up") SHOULD NOT send an alert.
|
||||
am.HandleStatusAlerts("up", system)
|
||||
|
||||
assert.Equal(t, 0, am.GetPendingAlertsCount(), "Alert should be canceled if system recovers before delay expires")
|
||||
|
||||
// Verify that NO email was sent.
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "Recovery notification should not be sent if system never went down")
|
||||
|
||||
}
|
||||
|
||||
func TestStatusAlertNormalRecovery(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Ensure user settings have an email
|
||||
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
hub.Save(userSettings)
|
||||
|
||||
systemCollection, _ := hub.FindCollectionByNameOrId("systems")
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
hub.Save(system)
|
||||
|
||||
alertCollection, _ := hub.FindCollectionByNameOrId("alerts")
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", true) // System was confirmed DOWN
|
||||
hub.Save(alert)
|
||||
|
||||
am := hub.AlertManager
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
|
||||
// System goes up
|
||||
am.HandleStatusAlerts("up", system)
|
||||
|
||||
// Verify that an email WAS sent (normal recovery).
|
||||
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "Recovery notification should be sent if system was triggered as down")
|
||||
|
||||
}
|
||||
|
||||
func TestHandleStatusAlertsDoesNotSendRecoveryWhileDownIsOnlyPending(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("up", system))
|
||||
assert.Zero(t, am.GetPendingAlertsCount(), "recovery should cancel the pending down alert")
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "recovery notification should not be sent before a down alert triggers")
|
||||
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when downtime never matured")
|
||||
}
|
||||
|
||||
func TestStatusAlertTimerCancellationPreventsBoundaryDelivery(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "down transition should register a pending alert immediately")
|
||||
require.True(t, am.ResetPendingAlertTimer(alert.Id, 25*time.Millisecond), "test should shorten the pending alert timer")
|
||||
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
require.NoError(t, am.HandleStatusAlerts("up", system))
|
||||
assert.Zero(t, am.GetPendingAlertsCount(), "recovery should remove the pending alert before the timer callback runs")
|
||||
|
||||
time.Sleep(40 * time.Millisecond)
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "timer callback should not deliver after recovery cancels the pending alert")
|
||||
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "alert should remain untriggered when cancellation wins the timer race")
|
||||
|
||||
time.Sleep(time.Minute)
|
||||
synctest.Wait()
|
||||
})
|
||||
}
|
||||
|
||||
func TestStatusAlertDownFiresAfterDelayExpires(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "alert should be pending after system goes down")
|
||||
|
||||
// Expire the pending alert and process it
|
||||
am.ForceExpirePendingAlerts()
|
||||
processed, err := am.ProcessPendingAlerts()
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, processed, 1, "one alert should have been processed")
|
||||
assert.Equal(t, 0, am.GetPendingAlertsCount(), "pending alert should be consumed after processing")
|
||||
|
||||
// Verify down email was sent
|
||||
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "down notification should be sent after delay expires")
|
||||
|
||||
// Verify triggered flag is set in the DB
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, alertRecord.GetBool("triggered"), "alert should be marked triggered after downtime matures")
|
||||
}
|
||||
|
||||
func TestStatusAlertDuplicateDownCallIsIdempotent(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 5)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "repeated down calls should not schedule duplicate pending alerts")
|
||||
}
|
||||
|
||||
func TestStatusAlertNoAlertRecord(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systemCollection, err := hub.FindCollectionByNameOrId("systems")
|
||||
require.NoError(t, err)
|
||||
system := core.NewRecord(systemCollection)
|
||||
system.Set("name", "test-system")
|
||||
system.Set("status", "up")
|
||||
system.Set("host", "127.0.0.1")
|
||||
system.Set("users", []string{user.Id})
|
||||
require.NoError(t, hub.Save(system))
|
||||
|
||||
// No Status alert record created for this system
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("down", system))
|
||||
assert.Equal(t, 0, am.GetPendingAlertsCount(), "no pending alert when no alert record exists")
|
||||
|
||||
require.NoError(t, am.HandleStatusAlerts("up", system))
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "no email when no alert record exists")
|
||||
}
|
||||
|
||||
func TestRestorePendingStatusAlertsRequeuesDownSystemsAfterRestart(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", false)
|
||||
alert.Set("min", 1)
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
|
||||
require.NoError(t, am.RestorePendingStatusAlerts())
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "startup restore should requeue a pending down alert for a system still marked down")
|
||||
|
||||
am.ForceExpirePendingAlerts()
|
||||
processed, err := am.ProcessPendingAlerts()
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, processed, 1, "restored pending alert should be processable after the delay expires")
|
||||
assert.Equal(t, initialEmailCount+1, hub.TestMailer.TotalSend(), "restored pending alert should send the down notification")
|
||||
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, alertRecord.GetBool("triggered"), "restored pending alert should mark the alert as triggered once delivered")
|
||||
}
|
||||
|
||||
func TestRestorePendingStatusAlertsSkipsNonDownOrAlreadyTriggeredAlerts(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systemsDown, err := beszelTests.CreateSystems(hub, 2, user.Id, "down")
|
||||
require.NoError(t, err)
|
||||
systemDownPending := systemsDown[0]
|
||||
systemDownTriggered := systemsDown[1]
|
||||
|
||||
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "up-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.2",
|
||||
"status": "up",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDownPending.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemUp.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDownTriggered.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": true,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
require.NoError(t, am.RestorePendingStatusAlerts())
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "only untriggered alerts for currently down systems should be restored")
|
||||
}
|
||||
|
||||
func TestRestorePendingStatusAlertsIsIdempotent(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "down")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
_, err = beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"triggered": false,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
am := alerts.NewTestAlertManagerWithoutWorker(hub)
|
||||
require.NoError(t, am.RestorePendingStatusAlerts())
|
||||
require.NoError(t, am.RestorePendingStatusAlerts())
|
||||
|
||||
assert.Equal(t, 1, am.GetPendingAlertsCount(), "restoring twice should not create duplicate pending alerts")
|
||||
am.ForceExpirePendingAlerts()
|
||||
processed, err := am.ProcessPendingAlerts()
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, processed, 1, "restored alert should still be processable exactly once")
|
||||
assert.Zero(t, am.GetPendingAlertsCount(), "processing the restored alert should empty the pending map")
|
||||
}
|
||||
|
||||
func TestResolveStatusAlertsFixesStaleTriggered(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// CreateSystems uses SaveNoValidate after initial save to bypass the
|
||||
// onRecordCreate hook that forces status = "pending".
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
alertCollection, err := hub.FindCollectionByNameOrId("alerts")
|
||||
require.NoError(t, err)
|
||||
alert := core.NewRecord(alertCollection)
|
||||
alert.Set("user", user.Id)
|
||||
alert.Set("system", system.Id)
|
||||
alert.Set("name", "Status")
|
||||
alert.Set("triggered", true) // Stale: system is up but alert still says triggered
|
||||
require.NoError(t, hub.Save(alert))
|
||||
|
||||
// resolveStatusAlerts should clear the stale triggered flag
|
||||
require.NoError(t, alerts.ResolveStatusAlerts(hub))
|
||||
|
||||
alertRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
require.NoError(t, err)
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "stale triggered flag should be cleared when system is up")
|
||||
}
|
||||
func TestResolveStatusAlerts(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a systemUp
|
||||
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
"status": "up",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system-2",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.2",
|
||||
"status": "up",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a status alertUp for the system
|
||||
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemUp.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDown.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'up' (this should not trigger the alert)
|
||||
systemUp.Set("status", "up")
|
||||
err = hub.SaveNoValidate(systemUp)
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown.Set("status", "down")
|
||||
err = hub.SaveNoValidate(systemDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait a moment for any processing
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Verify alertUp is still not triggered after setting system to up
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
|
||||
|
||||
// Manually set both alerts triggered to true
|
||||
alertUp.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertUp)
|
||||
assert.NoError(t, err)
|
||||
alertDown.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify we have exactly one alert with triggered true
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
|
||||
|
||||
// Verify the specific alertUp is triggered
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
|
||||
|
||||
// Verify we have two unresolved alert history records
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
|
||||
|
||||
err = alerts.ResolveStatusAlerts(hub)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alertUp is not triggered after resolving
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
|
||||
// Verify alertDown is still triggered
|
||||
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
|
||||
|
||||
// Verify we have one unresolved alert history record
|
||||
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
|
||||
|
||||
}
|
||||
|
||||
func TestAlertsHistoryStatus(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Create a status alertRecord for the system
|
||||
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'down' (this should trigger the alert)
|
||||
system.Set("status", "down")
|
||||
err = hub.Save(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(time.Second * 30)
|
||||
synctest.Wait()
|
||||
|
||||
alertFresh, _ := hub.FindRecordById("alerts", alertRecord.Id)
|
||||
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after 30 seconds")
|
||||
|
||||
time.Sleep(time.Minute)
|
||||
synctest.Wait()
|
||||
|
||||
// Verify alert is triggered after setting system to down
|
||||
alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertFresh.GetBool("triggered"), "Alert should be triggered after one minute")
|
||||
|
||||
// Verify we have one unresolved alert history record
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
|
||||
|
||||
// Set the system back to 'up' (this should resolve the alert)
|
||||
system.Set("status", "up")
|
||||
err = hub.Save(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(time.Second)
|
||||
synctest.Wait()
|
||||
|
||||
// Verify alert is not triggered after setting system back to up
|
||||
alertFresh, err = hub.FindRecordById("alerts", alertRecord.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
|
||||
|
||||
// Verify the alert history record is resolved
|
||||
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records")
|
||||
})
|
||||
}
|
||||
|
||||
func TestStatusAlertClearedBeforeSend(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a system
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
assert.NoError(t, err)
|
||||
system := systems[0]
|
||||
|
||||
// Ensure user settings have an email
|
||||
userSettings, _ := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
hub.Save(userSettings)
|
||||
|
||||
// Initial email count
|
||||
initialEmailCount := hub.TestMailer.TotalSend()
|
||||
|
||||
// Create a status alertRecord for the system
|
||||
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'down' (this should trigger the alert)
|
||||
system.Set("status", "down")
|
||||
err = hub.Save(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(time.Second * 30)
|
||||
synctest.Wait()
|
||||
|
||||
// Set system back up to clear the pending alert before it triggers
|
||||
system.Set("status", "up")
|
||||
err = hub.Save(system)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(time.Minute)
|
||||
synctest.Wait()
|
||||
|
||||
// Verify that we have not sent any emails since the system recovered before the alert triggered
|
||||
assert.Equal(t, initialEmailCount, hub.TestMailer.TotalSend(), "No email should be sent if system recovers before alert triggers")
|
||||
|
||||
// Verify alert is not triggered after setting system back to up
|
||||
alertFresh, err := hub.FindRecordById("alerts", alertRecord.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertFresh.GetBool("triggered"), "Alert should not be triggered after system recovers")
|
||||
|
||||
// Verify that no alert history record was created since the alert never triggered
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history")
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 0, alertHistoryCount, "Should have no unresolved alert history records since alert never triggered")
|
||||
})
|
||||
}
|
||||
@@ -11,15 +11,11 @@ import (
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
"github.com/pocketbase/pocketbase/tools/types"
|
||||
"github.com/spf13/cast"
|
||||
)
|
||||
|
||||
func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *system.CombinedData) error {
|
||||
alertRecords, err := am.hub.FindAllRecords("alerts",
|
||||
dbx.NewExp("system={:system} AND name!='Status'", dbx.Params{"system": systemRecord.Id}),
|
||||
)
|
||||
if err != nil || len(alertRecords) == 0 {
|
||||
// log.Println("no alerts found for system")
|
||||
alerts := am.alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
if len(alerts) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -27,8 +23,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
now := systemRecord.GetDateTime("updated").Time().UTC()
|
||||
oldestTime := now
|
||||
|
||||
for _, alertRecord := range alertRecords {
|
||||
name := alertRecord.GetString("name")
|
||||
for _, alertData := range alerts {
|
||||
name := alertData.Name
|
||||
var val float64
|
||||
unit := "%"
|
||||
|
||||
@@ -38,7 +34,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
case "Memory":
|
||||
val = data.Info.MemPct
|
||||
case "Bandwidth":
|
||||
val = data.Info.Bandwidth
|
||||
val = float64(data.Info.BandwidthBytes) / (1024 * 1024)
|
||||
unit = " MB/s"
|
||||
case "Disk":
|
||||
maxUsedPct := data.Info.DiskPct
|
||||
@@ -73,8 +69,8 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
val = float64(data.Stats.Battery[0])
|
||||
}
|
||||
|
||||
triggered := alertRecord.GetBool("triggered")
|
||||
threshold := alertRecord.GetFloat("value")
|
||||
triggered := alertData.Triggered
|
||||
threshold := alertData.Value
|
||||
|
||||
// Battery alert has inverted logic: trigger when value is BELOW threshold
|
||||
lowAlert := isLowAlert(name)
|
||||
@@ -92,11 +88,11 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
}
|
||||
}
|
||||
|
||||
min := max(1, cast.ToUint8(alertRecord.Get("min")))
|
||||
min := max(1, alertData.Min)
|
||||
|
||||
alert := SystemAlertData{
|
||||
systemRecord: systemRecord,
|
||||
alertRecord: alertRecord,
|
||||
alertData: alertData,
|
||||
name: name,
|
||||
unit: unit,
|
||||
val: val,
|
||||
@@ -129,7 +125,7 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
Created types.DateTime `db:"created"`
|
||||
}{}
|
||||
|
||||
err = am.hub.DB().
|
||||
err := am.hub.DB().
|
||||
Select("stats", "created").
|
||||
From("system_stats").
|
||||
Where(dbx.NewExp(
|
||||
@@ -192,23 +188,25 @@ func (am *AlertManager) HandleSystemAlerts(systemRecord *core.Record, data *syst
|
||||
case "Memory":
|
||||
alert.val += stats.Mem
|
||||
case "Bandwidth":
|
||||
alert.val += stats.NetSent + stats.NetRecv
|
||||
alert.val += float64(stats.Bandwidth[0]+stats.Bandwidth[1]) / (1024 * 1024)
|
||||
case "Disk":
|
||||
if alert.mapSums == nil {
|
||||
alert.mapSums = make(map[string]float32, len(data.Stats.ExtraFs)+1)
|
||||
alert.mapSums = make(map[string]float32, len(stats.ExtraFs)+1)
|
||||
}
|
||||
// add root disk
|
||||
if _, ok := alert.mapSums["root"]; !ok {
|
||||
alert.mapSums["root"] = 0.0
|
||||
}
|
||||
alert.mapSums["root"] += float32(stats.Disk)
|
||||
// add extra disks
|
||||
for key, fs := range data.Stats.ExtraFs {
|
||||
// add extra disks from historical record
|
||||
for key, fs := range stats.ExtraFs {
|
||||
if fs.DiskTotal > 0 {
|
||||
if _, ok := alert.mapSums[key]; !ok {
|
||||
alert.mapSums[key] = 0.0
|
||||
}
|
||||
alert.mapSums[key] += float32(fs.DiskUsed / fs.DiskTotal * 100)
|
||||
}
|
||||
}
|
||||
case "Temperature":
|
||||
if alert.mapSums == nil {
|
||||
alert.mapSums = make(map[string]float32, len(stats.Temperatures))
|
||||
@@ -342,13 +340,12 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
|
||||
}
|
||||
body := fmt.Sprintf("%s averaged %.2f%s for the previous %v %s.", alert.descriptor, alert.val, alert.unit, alert.min, minutesLabel)
|
||||
|
||||
alert.alertRecord.Set("triggered", alert.triggered)
|
||||
if err := am.hub.Save(alert.alertRecord); err != nil {
|
||||
if err := am.setAlertTriggered(alert.alertData, alert.triggered); err != nil {
|
||||
// app.Logger().Error("failed to save alert record", "err", err)
|
||||
return
|
||||
}
|
||||
am.SendAlert(AlertMessageData{
|
||||
UserID: alert.alertRecord.GetString("user"),
|
||||
UserID: alert.alertData.UserID,
|
||||
SystemID: alert.systemRecord.Id,
|
||||
Title: subject,
|
||||
Message: body,
|
||||
|
||||
218
internal/alerts/alerts_system_test.go
Normal file
218
internal/alerts/alerts_system_test.go
Normal file
@@ -0,0 +1,218 @@
|
||||
//go:build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type systemAlertValueSetter[T any] func(info *system.Info, stats *system.Stats, value T)
|
||||
|
||||
type systemAlertTestFixture struct {
|
||||
hub *beszelTests.TestHub
|
||||
alertID string
|
||||
submit func(*system.CombinedData) error
|
||||
}
|
||||
|
||||
func createCombinedData[T any](value T, setValue systemAlertValueSetter[T]) *system.CombinedData {
|
||||
var data system.CombinedData
|
||||
setValue(&data.Info, &data.Stats, value)
|
||||
return &data
|
||||
}
|
||||
|
||||
func newSystemAlertTestFixture(t *testing.T, alertName string, min int, threshold float64) *systemAlertTestFixture {
|
||||
t.Helper()
|
||||
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 1, user.Id, "up")
|
||||
require.NoError(t, err)
|
||||
systemRecord := systems[0]
|
||||
|
||||
sysManagerSystem, err := hub.GetSystemManager().GetSystemFromStore(systemRecord.Id)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, sysManagerSystem)
|
||||
sysManagerSystem.StopUpdater()
|
||||
|
||||
userSettings, err := hub.FindFirstRecordByFilter("user_settings", "user={:user}", map[string]any{"user": user.Id})
|
||||
require.NoError(t, err)
|
||||
userSettings.Set("settings", `{"emails":["test@example.com"],"webhooks":[]}`)
|
||||
require.NoError(t, hub.Save(userSettings))
|
||||
|
||||
alertRecord, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": alertName,
|
||||
"system": systemRecord.Id,
|
||||
"user": user.Id,
|
||||
"min": min,
|
||||
"value": threshold,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.False(t, alertRecord.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
alertsCache := hub.GetAlertManager().GetSystemAlertsCache()
|
||||
cachedAlerts := alertsCache.GetAlertsExcludingNames(systemRecord.Id, "Status")
|
||||
assert.Len(t, cachedAlerts, 1, "Alert should be in cache")
|
||||
|
||||
return &systemAlertTestFixture{
|
||||
hub: hub,
|
||||
alertID: alertRecord.Id,
|
||||
submit: func(data *system.CombinedData) error {
|
||||
_, err := sysManagerSystem.CreateRecords(data)
|
||||
return err
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (fixture *systemAlertTestFixture) cleanup() {
|
||||
fixture.hub.Cleanup()
|
||||
}
|
||||
|
||||
func submitValue[T any](fixture *systemAlertTestFixture, t *testing.T, value T, setValue systemAlertValueSetter[T]) {
|
||||
t.Helper()
|
||||
require.NoError(t, fixture.submit(createCombinedData(value, setValue)))
|
||||
}
|
||||
|
||||
func (fixture *systemAlertTestFixture) assertTriggered(t *testing.T, triggered bool, message string) {
|
||||
t.Helper()
|
||||
|
||||
alertRecord, err := fixture.hub.FindRecordById("alerts", fixture.alertID)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, triggered, alertRecord.GetBool("triggered"), message)
|
||||
}
|
||||
|
||||
func waitForSystemAlert(d time.Duration) {
|
||||
time.Sleep(d)
|
||||
synctest.Wait()
|
||||
}
|
||||
|
||||
func testOneMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, setValue systemAlertValueSetter[T], triggerValue, resolveValue T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fixture := newSystemAlertTestFixture(t, alertName, 1, threshold)
|
||||
defer fixture.cleanup()
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
|
||||
fixture.assertTriggered(t, true, "Alert should be triggered")
|
||||
assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
|
||||
|
||||
submitValue(fixture, t, resolveValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
|
||||
fixture.assertTriggered(t, false, "Alert should be untriggered")
|
||||
assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
|
||||
|
||||
waitForSystemAlert(time.Minute)
|
||||
})
|
||||
}
|
||||
|
||||
func testMultiMinuteSystemAlert[T any](t *testing.T, alertName string, threshold float64, min int, setValue systemAlertValueSetter[T], baselineValue, triggerValue, resolveValue T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
fixture := newSystemAlertTestFixture(t, alertName, min, threshold)
|
||||
defer fixture.cleanup()
|
||||
|
||||
submitValue(fixture, t, baselineValue, setValue)
|
||||
waitForSystemAlert(time.Minute + time.Second)
|
||||
fixture.assertTriggered(t, false, "Alert should not be triggered yet")
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Minute)
|
||||
fixture.assertTriggered(t, false, "Alert should not be triggered until the history window is full")
|
||||
|
||||
submitValue(fixture, t, triggerValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
fixture.assertTriggered(t, true, "Alert should be triggered")
|
||||
assert.Equal(t, 1, fixture.hub.TestMailer.TotalSend(), "An email should have been sent")
|
||||
|
||||
submitValue(fixture, t, resolveValue, setValue)
|
||||
waitForSystemAlert(time.Second)
|
||||
fixture.assertTriggered(t, false, "Alert should be untriggered")
|
||||
assert.Equal(t, 2, fixture.hub.TestMailer.TotalSend(), "A second email should have been sent for untriggering the alert")
|
||||
})
|
||||
}
|
||||
|
||||
func setCPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.Cpu = value
|
||||
stats.Cpu = value
|
||||
}
|
||||
|
||||
func setMemoryAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.MemPct = value
|
||||
stats.MemPct = value
|
||||
}
|
||||
|
||||
func setDiskAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.DiskPct = value
|
||||
stats.DiskPct = value
|
||||
}
|
||||
|
||||
func setBandwidthAlertValue(info *system.Info, stats *system.Stats, value [2]uint64) {
|
||||
info.BandwidthBytes = value[0] + value[1]
|
||||
stats.Bandwidth = value
|
||||
}
|
||||
|
||||
func megabytesToBytes(mb uint64) uint64 {
|
||||
return mb * 1024 * 1024
|
||||
}
|
||||
|
||||
func setGPUAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.GpuPct = value
|
||||
stats.GPUData = map[string]system.GPUData{
|
||||
"GPU0": {Usage: value},
|
||||
}
|
||||
}
|
||||
|
||||
func setTemperatureAlertValue(info *system.Info, stats *system.Stats, value float64) {
|
||||
info.DashboardTemp = value
|
||||
stats.Temperatures = map[string]float64{
|
||||
"Temp0": value,
|
||||
}
|
||||
}
|
||||
|
||||
func setLoadAvgAlertValue(info *system.Info, stats *system.Stats, value [3]float64) {
|
||||
info.LoadAvg = value
|
||||
stats.LoadAvg = value
|
||||
}
|
||||
|
||||
func setBatteryAlertValue(info *system.Info, stats *system.Stats, value [2]uint8) {
|
||||
info.Battery = value
|
||||
stats.Battery = value
|
||||
}
|
||||
|
||||
func TestSystemAlertsOneMin(t *testing.T) {
|
||||
testOneMinuteSystemAlert(t, "CPU", 50, setCPUAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Memory", 50, setMemoryAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Disk", 50, setDiskAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Bandwidth", 50, setBandwidthAlertValue, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(25), megabytesToBytes(24)})
|
||||
testOneMinuteSystemAlert(t, "GPU", 50, setGPUAlertValue, 51, 49)
|
||||
testOneMinuteSystemAlert(t, "Temperature", 70, setTemperatureAlertValue, 71, 69)
|
||||
testOneMinuteSystemAlert(t, "LoadAvg1", 4, setLoadAvgAlertValue, [3]float64{4.1, 0, 0}, [3]float64{3.9, 0, 0})
|
||||
testOneMinuteSystemAlert(t, "LoadAvg5", 4, setLoadAvgAlertValue, [3]float64{0, 4.1, 0}, [3]float64{0, 3.9, 0})
|
||||
testOneMinuteSystemAlert(t, "LoadAvg15", 4, setLoadAvgAlertValue, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.9})
|
||||
testOneMinuteSystemAlert(t, "Battery", 20, setBatteryAlertValue, [2]uint8{19, 0}, [2]uint8{21, 0})
|
||||
}
|
||||
|
||||
func TestSystemAlertsTwoMin(t *testing.T) {
|
||||
testMultiMinuteSystemAlert(t, "CPU", 50, 2, setCPUAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Memory", 50, 2, setMemoryAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Disk", 50, 2, setDiskAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Bandwidth", 50, 2, setBandwidthAlertValue, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)}, [2]uint64{megabytesToBytes(26), megabytesToBytes(25)}, [2]uint64{megabytesToBytes(10), megabytesToBytes(10)})
|
||||
testMultiMinuteSystemAlert(t, "GPU", 50, 2, setGPUAlertValue, 10, 51, 48)
|
||||
testMultiMinuteSystemAlert(t, "Temperature", 70, 2, setTemperatureAlertValue, 10, 71, 67)
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg1", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 0}, [3]float64{4.1, 0, 0}, [3]float64{3.5, 0, 0})
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg5", 4, 2, setLoadAvgAlertValue, [3]float64{0, 2, 0}, [3]float64{0, 4.1, 0}, [3]float64{0, 3.5, 0})
|
||||
testMultiMinuteSystemAlert(t, "LoadAvg15", 4, 2, setLoadAvgAlertValue, [3]float64{0, 0, 2}, [3]float64{0, 0, 4.1}, [3]float64{0, 0, 3.5})
|
||||
testMultiMinuteSystemAlert(t, "Battery", 20, 2, setBatteryAlertValue, [2]uint8{21, 0}, [2]uint8{19, 0}, [2]uint8{25, 1})
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package alerts_test
|
||||
|
||||
@@ -13,9 +12,9 @@ import (
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
beszelTests "github.com/henrygd/beszel/internal/tests"
|
||||
|
||||
"github.com/henrygd/beszel/internal/alerts"
|
||||
"github.com/pocketbase/dbx"
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
pbTests "github.com/pocketbase/pocketbase/tests"
|
||||
@@ -370,87 +369,6 @@ func TestUserAlertsApi(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStatusAlerts(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
systems, err := beszelTests.CreateSystems(hub, 4, user.Id, "paused")
|
||||
assert.NoError(t, err)
|
||||
|
||||
var alerts []*core.Record
|
||||
for i, system := range systems {
|
||||
alert, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": i + 1,
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
alerts = append(alerts, alert)
|
||||
}
|
||||
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
for _, alert := range alerts {
|
||||
assert.False(t, alert.GetBool("triggered"), "Alert should not be triggered immediately")
|
||||
}
|
||||
if hub.TestMailer.TotalSend() != 0 {
|
||||
assert.Zero(t, hub.TestMailer.TotalSend(), "Expected 0 messages, got %d", hub.TestMailer.TotalSend())
|
||||
}
|
||||
for _, system := range systems {
|
||||
assert.EqualValues(t, "paused", system.GetString("status"), "System should be paused")
|
||||
}
|
||||
for _, system := range systems {
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
|
||||
for _, system := range systems {
|
||||
system.Set("status", "down")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
// after 30 seconds, should have 4 alerts in the pendingAlerts map, no triggered alerts
|
||||
time.Sleep(time.Second * 30)
|
||||
assert.EqualValues(t, 4, hub.GetPendingAlertsCount(), "should have 4 alerts in the pendingAlerts map")
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 0, triggeredCount, "should have 0 alert triggered")
|
||||
assert.EqualValues(t, 0, hub.TestMailer.TotalSend(), "should have 0 messages sent")
|
||||
// after 1:30 seconds, should have 1 triggered alert and 3 pending alerts
|
||||
time.Sleep(time.Second * 60)
|
||||
assert.EqualValues(t, 3, hub.GetPendingAlertsCount(), "should have 3 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, triggeredCount, "should have 1 alert triggered")
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 messages sent")
|
||||
// after 2:30 seconds, should have 2 triggered alerts and 2 pending alerts
|
||||
time.Sleep(time.Second * 60)
|
||||
assert.EqualValues(t, 2, hub.GetPendingAlertsCount(), "should have 2 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "should have 2 alert triggered")
|
||||
assert.EqualValues(t, 2, hub.TestMailer.TotalSend(), "should have 2 messages sent")
|
||||
// now we will bring the remaning systems back up
|
||||
for _, system := range systems {
|
||||
system.Set("status", "up")
|
||||
err = hub.SaveNoValidate(system)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
// should have 0 alerts in the pendingAlerts map and 0 alerts triggered
|
||||
assert.EqualValues(t, 0, hub.GetPendingAlertsCount(), "should have 0 alerts in the pendingAlerts map")
|
||||
triggeredCount, err = hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.Zero(t, triggeredCount, "should have 0 alert triggered")
|
||||
// 4 messages sent, 2 down alerts and 2 up alerts for first 2 systems
|
||||
assert.EqualValues(t, 4, hub.TestMailer.TotalSend(), "should have 4 messages sent")
|
||||
})
|
||||
}
|
||||
|
||||
func TestAlertsHistory(t *testing.T) {
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
@@ -579,102 +497,46 @@ func TestAlertsHistory(t *testing.T) {
|
||||
assert.EqualValues(t, 2, totalHistoryCount, "Should have 2 total alert history records")
|
||||
})
|
||||
}
|
||||
func TestResolveStatusAlerts(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
|
||||
func TestSetAlertTriggered(t *testing.T) {
|
||||
hub, _ := beszelTests.NewTestHub(t.TempDir())
|
||||
defer hub.Cleanup()
|
||||
|
||||
// Create a systemUp
|
||||
systemUp, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
hub.StartHub()
|
||||
|
||||
user, _ := beszelTests.CreateUser(hub, "test@example.com", "password")
|
||||
system, _ := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
"status": "up",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system-2",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.2",
|
||||
"status": "up",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Create a status alertUp for the system
|
||||
alertUp, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemUp.Id,
|
||||
alertRecord, _ := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "CPU",
|
||||
"system": system.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
"value": 80,
|
||||
"triggered": false,
|
||||
})
|
||||
|
||||
am := alerts.NewAlertManager(hub)
|
||||
|
||||
var alert alerts.CachedAlertData
|
||||
alert.PopulateFromRecord(alertRecord)
|
||||
|
||||
// Test triggering the alert
|
||||
err := am.SetAlertTriggered(alert, true)
|
||||
assert.NoError(t, err)
|
||||
|
||||
alertDown, err := beszelTests.CreateRecord(hub, "alerts", map[string]any{
|
||||
"name": "Status",
|
||||
"system": systemDown.Id,
|
||||
"user": user.Id,
|
||||
"min": 1,
|
||||
})
|
||||
updatedRecord, err := hub.FindRecordById("alerts", alert.Id)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, updatedRecord.GetBool("triggered"))
|
||||
|
||||
// Test un-triggering the alert
|
||||
err = am.SetAlertTriggered(alert, false)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alert is not triggered initially
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered initially")
|
||||
|
||||
// Set the system to 'up' (this should not trigger the alert)
|
||||
systemUp.Set("status", "up")
|
||||
err = hub.SaveNoValidate(systemUp)
|
||||
updatedRecord, err = hub.FindRecordById("alerts", alert.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
systemDown.Set("status", "down")
|
||||
err = hub.SaveNoValidate(systemDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Wait a moment for any processing
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
// Verify alertUp is still not triggered after setting system to up
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered when system is up")
|
||||
|
||||
// Manually set both alerts triggered to true
|
||||
alertUp.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertUp)
|
||||
assert.NoError(t, err)
|
||||
alertDown.Set("triggered", true)
|
||||
err = hub.SaveNoValidate(alertDown)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify we have exactly one alert with triggered true
|
||||
triggeredCount, err := hub.CountRecords("alerts", dbx.HashExp{"triggered": true})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, triggeredCount, "Should have exactly two alerts with triggered true")
|
||||
|
||||
// Verify the specific alertUp is triggered
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertUp.GetBool("triggered"), "Alert should be triggered")
|
||||
|
||||
// Verify we have two unresolved alert history records
|
||||
alertHistoryCount, err := hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 2, alertHistoryCount, "Should have exactly two unresolved alert history records")
|
||||
|
||||
err = alerts.ResolveStatusAlerts(hub)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify alertUp is not triggered after resolving
|
||||
alertUp, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertUp.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.False(t, alertUp.GetBool("triggered"), "Alert should not be triggered after resolving")
|
||||
// Verify alertDown is still triggered
|
||||
alertDown, err = hub.FindFirstRecordByFilter("alerts", "id={:id}", dbx.Params{"id": alertDown.Id})
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, alertDown.GetBool("triggered"), "Alert should still be triggered after resolving")
|
||||
|
||||
// Verify we have one unresolved alert history record
|
||||
alertHistoryCount, err = hub.CountRecords("alerts_history", dbx.HashExp{"resolved": ""})
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, 1, alertHistoryCount, "Should have exactly one unresolved alert history record")
|
||||
|
||||
assert.False(t, updatedRecord.GetBool("triggered"))
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
//go:build testing
|
||||
// +build testing
|
||||
|
||||
package alerts
|
||||
|
||||
@@ -10,6 +9,18 @@ import (
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
func NewTestAlertManagerWithoutWorker(app hubLike) *AlertManager {
|
||||
return &AlertManager{
|
||||
hub: app,
|
||||
alertsCache: NewAlertsCache(app),
|
||||
}
|
||||
}
|
||||
|
||||
// GetSystemAlertsCache returns the internal system alerts cache.
|
||||
func (am *AlertManager) GetSystemAlertsCache() *AlertsCache {
|
||||
return am.alertsCache
|
||||
}
|
||||
|
||||
func (am *AlertManager) GetAlertManager() *AlertManager {
|
||||
return am
|
||||
}
|
||||
@@ -28,19 +39,18 @@ func (am *AlertManager) GetPendingAlertsCount() int {
|
||||
}
|
||||
|
||||
// ProcessPendingAlerts manually processes all expired alerts (for testing)
|
||||
func (am *AlertManager) ProcessPendingAlerts() ([]*core.Record, error) {
|
||||
func (am *AlertManager) ProcessPendingAlerts() ([]CachedAlertData, error) {
|
||||
now := time.Now()
|
||||
var lastErr error
|
||||
var processedAlerts []*core.Record
|
||||
var processedAlerts []CachedAlertData
|
||||
am.pendingAlerts.Range(func(key, value any) bool {
|
||||
info := value.(*alertInfo)
|
||||
if now.After(info.expireTime) {
|
||||
// Downtime delay has passed, process alert
|
||||
if err := am.sendStatusAlert("down", info.systemName, info.alertRecord); err != nil {
|
||||
lastErr = err
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
processedAlerts = append(processedAlerts, info.alertRecord)
|
||||
am.pendingAlerts.Delete(key)
|
||||
am.processPendingAlert(key.(string))
|
||||
processedAlerts = append(processedAlerts, info.alertData)
|
||||
}
|
||||
return true
|
||||
})
|
||||
@@ -57,6 +67,31 @@ func (am *AlertManager) ForceExpirePendingAlerts() {
|
||||
})
|
||||
}
|
||||
|
||||
func (am *AlertManager) ResetPendingAlertTimer(alertID string, delay time.Duration) bool {
|
||||
value, loaded := am.pendingAlerts.Load(alertID)
|
||||
if !loaded {
|
||||
return false
|
||||
}
|
||||
|
||||
info := value.(*alertInfo)
|
||||
if info.timer != nil {
|
||||
info.timer.Stop()
|
||||
}
|
||||
info.expireTime = time.Now().Add(delay)
|
||||
info.timer = time.AfterFunc(delay, func() {
|
||||
am.processPendingAlert(alertID)
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
func ResolveStatusAlerts(app core.App) error {
|
||||
return resolveStatusAlerts(app)
|
||||
}
|
||||
|
||||
func (am *AlertManager) RestorePendingStatusAlerts() error {
|
||||
return am.restorePendingStatusAlerts()
|
||||
}
|
||||
|
||||
func (am *AlertManager) SetAlertTriggered(alert CachedAlertData, triggered bool) error {
|
||||
return am.setAlertTriggered(alert, triggered)
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"github.com/henrygd/beszel"
|
||||
"github.com/henrygd/beszel/agent"
|
||||
"github.com/henrygd/beszel/agent/health"
|
||||
"github.com/henrygd/beszel/agent/utils"
|
||||
"github.com/spf13/pflag"
|
||||
"golang.org/x/crypto/ssh"
|
||||
)
|
||||
@@ -31,9 +32,6 @@ func (opts *cmdOptions) parse() bool {
|
||||
|
||||
// Subcommands that don't require any pflag parsing
|
||||
switch subcommand {
|
||||
case "-v", "version":
|
||||
fmt.Println(beszel.AppName+"-agent", beszel.Version)
|
||||
return true
|
||||
case "health":
|
||||
err := health.Check()
|
||||
if err != nil {
|
||||
@@ -41,6 +39,9 @@ func (opts *cmdOptions) parse() bool {
|
||||
}
|
||||
fmt.Print("ok")
|
||||
return true
|
||||
case "fingerprint":
|
||||
handleFingerprint()
|
||||
return true
|
||||
}
|
||||
|
||||
// pflag.CommandLine.ParseErrorsWhitelist.UnknownFlags = true
|
||||
@@ -49,6 +50,7 @@ func (opts *cmdOptions) parse() bool {
|
||||
pflag.StringVarP(&opts.hubURL, "url", "u", "", "URL of the Beszel hub")
|
||||
pflag.StringVarP(&opts.token, "token", "t", "", "Token to use for authentication")
|
||||
chinaMirrors := pflag.BoolP("china-mirrors", "c", false, "Use mirror for update (gh.beszel.dev) instead of GitHub")
|
||||
version := pflag.BoolP("version", "v", false, "Show version information")
|
||||
help := pflag.BoolP("help", "h", false, "Show this help message")
|
||||
|
||||
// Convert old single-dash long flags to double-dash for backward compatibility
|
||||
@@ -73,8 +75,8 @@ func (opts *cmdOptions) parse() bool {
|
||||
builder.WriteString(os.Args[0])
|
||||
builder.WriteString(" [command] [flags]\n")
|
||||
builder.WriteString("\nCommands:\n")
|
||||
builder.WriteString(" fingerprint View or reset the agent fingerprint\n")
|
||||
builder.WriteString(" health Check if the agent is running\n")
|
||||
// builder.WriteString(" help Display this help message\n")
|
||||
builder.WriteString(" update Update to the latest version\n")
|
||||
builder.WriteString("\nFlags:\n")
|
||||
fmt.Print(builder.String())
|
||||
@@ -86,6 +88,9 @@ func (opts *cmdOptions) parse() bool {
|
||||
|
||||
// Must run after pflag.Parse()
|
||||
switch {
|
||||
case *version:
|
||||
fmt.Println(beszel.AppName+"-agent", beszel.Version)
|
||||
return true
|
||||
case *help || subcommand == "help":
|
||||
pflag.Usage()
|
||||
return true
|
||||
@@ -112,12 +117,12 @@ func (opts *cmdOptions) loadPublicKeys() ([]ssh.PublicKey, error) {
|
||||
}
|
||||
|
||||
// Try environment variable
|
||||
if key, ok := agent.GetEnv("KEY"); ok && key != "" {
|
||||
if key, ok := utils.GetEnv("KEY"); ok && key != "" {
|
||||
return agent.ParseKeys(key)
|
||||
}
|
||||
|
||||
// Try key file
|
||||
keyFile, ok := agent.GetEnv("KEY_FILE")
|
||||
keyFile, ok := utils.GetEnv("KEY_FILE")
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no key provided: must set -key flag, KEY env var, or KEY_FILE env var. Use 'beszel-agent help' for usage")
|
||||
}
|
||||
@@ -133,6 +138,38 @@ func (opts *cmdOptions) getAddress() string {
|
||||
return agent.GetAddress(opts.listen)
|
||||
}
|
||||
|
||||
// handleFingerprint handles the "fingerprint" command with subcommands "view" and "reset".
|
||||
func handleFingerprint() {
|
||||
subCmd := ""
|
||||
if len(os.Args) > 2 {
|
||||
subCmd = os.Args[2]
|
||||
}
|
||||
|
||||
switch subCmd {
|
||||
case "", "view":
|
||||
dataDir, _ := agent.GetDataDir()
|
||||
fp := agent.GetFingerprint(dataDir, "", "")
|
||||
fmt.Println(fp)
|
||||
case "help", "-h", "--help":
|
||||
fmt.Print(fingerprintUsage())
|
||||
case "reset":
|
||||
dataDir, err := agent.GetDataDir()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
if err := agent.DeleteFingerprint(dataDir); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
fmt.Println("Fingerprint reset. A new one will be generated on next start.")
|
||||
default:
|
||||
log.Fatalf("Unknown command: %q\n\n%s", subCmd, fingerprintUsage())
|
||||
}
|
||||
}
|
||||
|
||||
func fingerprintUsage() string {
|
||||
return fmt.Sprintf("Usage: %s fingerprint [view|reset]\n\nCommands:\n view Print fingerprint (default)\n reset Reset saved fingerprint\n", os.Args[0])
|
||||
}
|
||||
|
||||
func main() {
|
||||
var opts cmdOptions
|
||||
subcommandHandled := opts.parse()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"github.com/fxamacker/cbor/v2"
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/henrygd/beszel/internal/entities/systemd"
|
||||
@@ -34,14 +35,14 @@ type HubRequest[T any] struct {
|
||||
// AgentResponse defines the structure for responses sent from agent to hub.
|
||||
type AgentResponse struct {
|
||||
Id *uint32 `cbor:"0,keyasint,omitempty"`
|
||||
SystemData *system.CombinedData `cbor:"1,keyasint,omitempty,omitzero"`
|
||||
Fingerprint *FingerprintResponse `cbor:"2,keyasint,omitempty,omitzero"`
|
||||
SystemData *system.CombinedData `cbor:"1,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
|
||||
Fingerprint *FingerprintResponse `cbor:"2,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
|
||||
Error string `cbor:"3,keyasint,omitempty,omitzero"`
|
||||
String *string `cbor:"4,keyasint,omitempty,omitzero"`
|
||||
SmartData map[string]smart.SmartData `cbor:"5,keyasint,omitempty,omitzero"`
|
||||
ServiceInfo systemd.ServiceDetails `cbor:"6,keyasint,omitempty,omitzero"`
|
||||
// Logs *LogsPayload `cbor:"4,keyasint,omitempty,omitzero"`
|
||||
// RawBytes []byte `cbor:"4,keyasint,omitempty,omitzero"`
|
||||
String *string `cbor:"4,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
|
||||
SmartData map[string]smart.SmartData `cbor:"5,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
|
||||
ServiceInfo systemd.ServiceDetails `cbor:"6,keyasint,omitempty,omitzero"` // Legacy (<= 0.17)
|
||||
// Data is the generic response payload for new endpoints (0.18+)
|
||||
Data cbor.RawMessage `cbor:"7,keyasint,omitempty,omitzero"`
|
||||
}
|
||||
|
||||
type FingerprintRequest struct {
|
||||
|
||||
@@ -23,6 +23,9 @@ COPY --from=builder /agent /agent
|
||||
# this is so we don't need to create the /tmp directory in the scratch container
|
||||
COPY --from=builder /tmp /tmp
|
||||
|
||||
# AMD GPU name lookup (used by agent on Linux when /usr/share/libdrm/amdgpu.ids is read)
|
||||
COPY --from=builder /app/agent/test-data/amdgpu.ids /usr/share/libdrm/amdgpu.ids
|
||||
|
||||
# Ensure data persistence across container recreations
|
||||
VOLUME ["/var/lib/beszel-agent"]
|
||||
|
||||
|
||||
@@ -17,9 +17,12 @@ RUN rm -rf /tmp/*
|
||||
# --------------------------
|
||||
# Final image: default scratch-based agent
|
||||
# --------------------------
|
||||
FROM alpine:3.22
|
||||
FROM alpine:3.23
|
||||
COPY --from=builder /agent /agent
|
||||
|
||||
# AMD GPU name lookup (used by agent on Linux when /usr/share/libdrm/amdgpu.ids is read)
|
||||
COPY --from=builder /app/agent/test-data/amdgpu.ids /usr/share/libdrm/amdgpu.ids
|
||||
|
||||
RUN apk add --no-cache smartmontools
|
||||
|
||||
# Ensure data persistence across container recreations
|
||||
|
||||
@@ -16,7 +16,7 @@ RUN CGO_ENABLED=0 GOGC=75 GOOS=$TARGETOS GOARCH=$TARGETARCH go build -ldflags "-
|
||||
# Final image
|
||||
# Note: must cap_add: [CAP_PERFMON] and mount /dev/dri/ as volume
|
||||
# --------------------------
|
||||
FROM alpine:3.22
|
||||
FROM alpine:3.23
|
||||
|
||||
COPY --from=builder /agent /agent
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user