Improve SNMP discovery item context

This commit is contained in:
Keith Smith
2026-05-26 21:08:07 -06:00
parent 6ff452a8a9
commit af72a6c563
8 changed files with 303 additions and 92 deletions
+23
View File
@@ -87,6 +87,7 @@ def _monitorable_items(discovered: DiscoveredSnmpDevice) -> list[SnmpDiscoveryIt
group="Device Health",
label="Device uptime",
unit="seconds",
current_value=_format_duration(discovered.uptime_seconds),
)
)
items.extend(
@@ -96,6 +97,7 @@ def _monitorable_items(discovered: DiscoveredSnmpDevice) -> list[SnmpDiscoveryIt
group=item.group,
label=item.label,
unit=item.unit,
current_value=item.current_value,
)
for item in discovered.health_items
)
@@ -109,6 +111,7 @@ def _monitorable_items(discovered: DiscoveredSnmpDevice) -> list[SnmpDiscoveryIt
item_type="interface_status",
group=group,
label=f"{interface.label} status",
current_value=_interface_status_value(interface.admin_status, interface.oper_status),
),
SnmpDiscoveryItemRead(
item_id=f"{item_prefix}.traffic",
@@ -116,6 +119,7 @@ def _monitorable_items(discovered: DiscoveredSnmpDevice) -> list[SnmpDiscoveryIt
group=group,
label=f"{interface.label} traffic",
unit="bps",
current_value="Rate after first check",
),
SnmpDiscoveryItemRead(
item_id=f"{item_prefix}.errors",
@@ -127,3 +131,22 @@ def _monitorable_items(discovered: DiscoveredSnmpDevice) -> list[SnmpDiscoveryIt
]
)
return items
def _format_duration(seconds: int | None) -> str | None:
if seconds is None:
return None
days = seconds // 86_400
hours = (seconds % 86_400) // 3_600
minutes = (seconds % 3_600) // 60
if days:
return f"{days}d {hours}h"
if hours:
return f"{hours}h {minutes}m"
return f"{minutes}m"
def _interface_status_value(admin_status: str | None, oper_status: str | None) -> str | None:
if admin_status and oper_status:
return f"admin {admin_status}, oper {oper_status}"
return admin_status or oper_status
+1
View File
@@ -237,6 +237,7 @@ class SnmpDiscoveryItemRead(BaseModel):
group: str
label: str
unit: str | None = None
current_value: str | None = None
class SnmpMonitorsCreate(BaseModel):
+35 -10
View File
@@ -35,6 +35,7 @@ class DiscoveredSnmpHealthItem:
group: str
label: str
unit: str | None = None
current_value: str | None = None
@dataclass(frozen=True)
@@ -339,14 +340,15 @@ def _discover_host_resource_items(client: "SnmpV2Client") -> list[DiscoveredSnmp
for position, index in enumerate(processor_indexes, start=1):
label = "CPU load" if len(processor_indexes) == 1 else f"CPU {position} load"
items.append(
DiscoveredSnmpHealthItem(
item_id=f"cpu.{index}.load",
item_type="cpu_load",
group="Device Health",
label=label,
unit="%",
DiscoveredSnmpHealthItem(
item_id=f"cpu.{index}.load",
item_type="cpu_load",
group="Device Health",
label=label,
unit="%",
current_value=f"{_int_value(processor_loads.get(index))}%",
)
)
)
storage_types = _indexed_values(client.walk(HR_STORAGE_TYPE, max_items=256))
descriptions = _indexed_values(client.walk(HR_STORAGE_DESCR, max_items=256))
@@ -363,6 +365,7 @@ def _discover_host_resource_items(client: "SnmpV2Client") -> list[DiscoveredSnmp
continue
description = _string_value(descriptions.get(index)) or f"Storage {index}"
if storage_type in {HR_STORAGE_RAM, HR_STORAGE_VIRTUAL_MEMORY}:
used_percent = (used_blocks / size) * 100
items.append(
DiscoveredSnmpHealthItem(
item_id=f"storage.{index}.memory",
@@ -370,11 +373,13 @@ def _discover_host_resource_items(client: "SnmpV2Client") -> list[DiscoveredSnmp
group="Device Health",
label="Memory used",
unit="%",
current_value=f"{used_percent:.1f}% used",
)
)
elif storage_type in {HR_STORAGE_FIXED_DISK, HR_STORAGE_REMOVABLE_DISK}:
if not _is_monitorable_storage_path(description):
continue
used_percent = (used_blocks / size) * 100
items.append(
DiscoveredSnmpHealthItem(
item_id=f"storage.{index}.usage",
@@ -382,6 +387,7 @@ def _discover_host_resource_items(client: "SnmpV2Client") -> list[DiscoveredSnmp
group="Storage",
label=_storage_usage_label(description),
unit="%",
current_value=f"{used_percent:.1f}% used",
)
)
@@ -393,18 +399,23 @@ def _discover_linux_server_items(client: "SnmpV2Client") -> list[DiscoveredSnmpH
load_values = _indexed_values(client.walk(UCD_LA_LOAD_INT, max_items=16))
for index, label in [(1, "Load average 1 minute"), (2, "Load average 5 minutes"), (3, "Load average 15 minutes")]:
if _int_value(load_values.get(index)) is not None:
load_value = _int_value(load_values.get(index))
if load_value is not None:
items.append(
DiscoveredSnmpHealthItem(
item_id=f"linux.load.{index}",
item_type="linux_load_average",
group="Server Health",
label=label,
current_value=f"{load_value / 100:.2f}",
)
)
memory = client.get_many([UCD_MEM_TOTAL_REAL, UCD_MEM_AVAIL_REAL])
if _int_value(memory.get(UCD_MEM_TOTAL_REAL)) and _int_value(memory.get(UCD_MEM_AVAIL_REAL)) is not None:
total_kb = _int_value(memory.get(UCD_MEM_TOTAL_REAL))
available_kb = _int_value(memory.get(UCD_MEM_AVAIL_REAL))
if total_kb and available_kb is not None:
used_percent = ((total_kb - available_kb) / total_kb) * 100
items.append(
DiscoveredSnmpHealthItem(
item_id="linux.memory.real",
@@ -412,6 +423,7 @@ def _discover_linux_server_items(client: "SnmpV2Client") -> list[DiscoveredSnmpH
group="Server Health",
label="Memory used",
unit="%",
current_value=f"{used_percent:.1f}% used",
)
)
@@ -419,7 +431,8 @@ def _discover_linux_server_items(client: "SnmpV2Client") -> list[DiscoveredSnmpH
disk_percent = _indexed_values(client.walk(UCD_DSK_PERCENT, max_items=256))
for index in sorted(disk_paths):
path = _string_value(disk_paths.get(index))
if not path or _int_value(disk_percent.get(index)) is None:
used_percent = _int_value(disk_percent.get(index))
if not path or used_percent is None:
continue
items.append(
DiscoveredSnmpHealthItem(
@@ -428,6 +441,7 @@ def _discover_linux_server_items(client: "SnmpV2Client") -> list[DiscoveredSnmpH
group="Storage",
label=_storage_usage_label(path),
unit="%",
current_value=f"{used_percent}% used",
)
)
@@ -436,6 +450,8 @@ def _discover_linux_server_items(client: "SnmpV2Client") -> list[DiscoveredSnmpH
def _discover_sensor_items(client: "SnmpV2Client") -> list[DiscoveredSnmpHealthItem]:
sensor_types = _indexed_values(client.walk(ENT_PHY_SENSOR_TYPE, max_items=256))
sensor_scales = _indexed_values(client.walk(ENT_PHY_SENSOR_SCALE, max_items=256))
sensor_precisions = _indexed_values(client.walk(ENT_PHY_SENSOR_PRECISION, max_items=256))
sensor_values = _indexed_values(client.walk(ENT_PHY_SENSOR_VALUE, max_items=256))
sensor_names = _indexed_values(client.walk(ENT_PHYSICAL_NAME, max_items=256))
sensor_descriptions = _indexed_values(client.walk(ENT_PHYSICAL_DESCR, max_items=256))
@@ -446,6 +462,8 @@ def _discover_sensor_items(client: "SnmpV2Client") -> list[DiscoveredSnmpHealthI
if sensor_type not in SENSOR_TYPE_LABELS or _int_value(sensor_values.get(index)) is None:
continue
kind, unit = SENSOR_TYPE_LABELS[sensor_type]
raw_value = _int_value(sensor_values.get(index)) or 0
value = _scaled_sensor_value(raw_value, _int_value(sensor_scales.get(index)), _int_value(sensor_precisions.get(index)))
name = _string_value(sensor_names.get(index)) or _string_value(sensor_descriptions.get(index))
label = kind if not name else f"{kind} {name}"
items.append(
@@ -455,6 +473,7 @@ def _discover_sensor_items(client: "SnmpV2Client") -> list[DiscoveredSnmpHealthI
group="Environmental",
label=label,
unit=unit,
current_value=f"{value:g}{unit or ''}",
)
)
return items
@@ -529,6 +548,12 @@ def _timeticks_to_seconds(value: Any) -> int | None:
return int(value / 100)
def _scaled_sensor_value(raw_value: int, scale: int | None, precision: int | None) -> float:
scale_multiplier = 10 ** ((scale or 9) - 9)
precision_divisor = 10 ** (precision or 0)
return float(raw_value * scale_multiplier / precision_divisor)
class SnmpV2Client:
def __init__(self, host: str, credential: SnmpCredential) -> None:
self.host = host
+19 -15
View File
@@ -102,6 +102,7 @@ def test_snmp_discovery_uses_profile_and_returns_friendly_results(client: TestCl
"group": "Device Health",
"label": "Device uptime",
"unit": "seconds",
"current_value": "3h 25m",
},
{
"item_id": "interface.1.status",
@@ -109,6 +110,7 @@ def test_snmp_discovery_uses_profile_and_returns_friendly_results(client: TestCl
"group": "Interface GigabitEthernet 1/0/1",
"label": "GigabitEthernet 1/0/1 status",
"unit": None,
"current_value": "admin up, oper up",
},
{
"item_id": "interface.1.traffic",
@@ -116,6 +118,7 @@ def test_snmp_discovery_uses_profile_and_returns_friendly_results(client: TestCl
"group": "Interface GigabitEthernet 1/0/1",
"label": "GigabitEthernet 1/0/1 traffic",
"unit": "bps",
"current_value": "Rate after first check",
},
{
"item_id": "interface.1.errors",
@@ -123,6 +126,7 @@ def test_snmp_discovery_uses_profile_and_returns_friendly_results(client: TestCl
"group": "Interface GigabitEthernet 1/0/1",
"label": "GigabitEthernet 1/0/1 errors and discards",
"unit": "count",
"current_value": None,
},
]
assert "private-community" not in response.text
@@ -219,12 +223,12 @@ def test_snmp_server_asset_type_uses_linux_server_mibs_and_keeps_interfaces(monk
assert discovered.capabilities["cpu"] is True
assert discovered.capabilities["memory"] is True
assert discovered.capabilities["storage"] is True
assert [(item.item_id, item.item_type, item.group, item.label, item.unit) for item in discovered.health_items] == [
("linux.load.1", "linux_load_average", "Server Health", "Load average 1 minute", None),
("linux.load.2", "linux_load_average", "Server Health", "Load average 5 minutes", None),
("linux.load.3", "linux_load_average", "Server Health", "Load average 15 minutes", None),
("linux.memory.real", "linux_memory_usage", "Server Health", "Memory used", "%"),
("linux.disk.1", "linux_disk_usage", "Storage", "Disk / usage", "%"),
assert [(item.item_id, item.item_type, item.group, item.label, item.unit, item.current_value) for item in discovered.health_items] == [
("linux.load.1", "linux_load_average", "Server Health", "Load average 1 minute", None, "1.23"),
("linux.load.2", "linux_load_average", "Server Health", "Load average 5 minutes", None, "0.97"),
("linux.load.3", "linux_load_average", "Server Health", "Load average 15 minutes", None, "0.88"),
("linux.memory.real", "linux_memory_usage", "Server Health", "Memory used", "%", "75.0% used"),
("linux.disk.1", "linux_disk_usage", "Storage", "Disk / usage", "%", "42% used"),
]
@@ -282,10 +286,10 @@ def test_snmp_server_asset_type_falls_back_to_host_resources(monkeypatch) -> Non
assert discovered.profile_key == "linux_server"
assert [(interface.name, interface.label) for interface in discovered.interfaces] == [("eth0", "eth0")]
assert [(item.item_id, item.item_type, item.group, item.label, item.unit) for item in discovered.health_items] == [
("cpu.196608.load", "cpu_load", "Device Health", "CPU load", "%"),
("storage.1.memory", "memory_usage", "Device Health", "Memory used", "%"),
("storage.31.usage", "storage_usage", "Storage", "Disk / usage", "%"),
assert [(item.item_id, item.item_type, item.group, item.label, item.unit, item.current_value) for item in discovered.health_items] == [
("cpu.196608.load", "cpu_load", "Device Health", "CPU load", "%", "17%"),
("storage.1.memory", "memory_usage", "Device Health", "Memory used", "%", "50.0% used"),
("storage.31.usage", "storage_usage", "Storage", "Disk / usage", "%", "25.0% used"),
]
@@ -341,11 +345,11 @@ def test_snmp_profile_mapping_discovers_standard_health_items(monkeypatch) -> No
assert discovered.capabilities["memory"] is True
assert discovered.capabilities["storage"] is True
assert discovered.capabilities["sensors"] is True
assert [(item.item_id, item.item_type, item.group, item.label, item.unit) for item in discovered.health_items] == [
("cpu.196608.load", "cpu_load", "Device Health", "CPU load", "%"),
("storage.1.memory", "memory_usage", "Device Health", "Memory used", "%"),
("storage.31.usage", "storage_usage", "Storage", "Disk / usage", "%"),
("sensor.10.value", "sensor_value", "Environmental", "Temperature Inlet", "C"),
assert [(item.item_id, item.item_type, item.group, item.label, item.unit, item.current_value) for item in discovered.health_items] == [
("cpu.196608.load", "cpu_load", "Device Health", "CPU load", "%", "17%"),
("storage.1.memory", "memory_usage", "Device Health", "Memory used", "%", "50.0% used"),
("storage.31.usage", "storage_usage", "Storage", "Disk / usage", "%", "25.0% used"),
("sensor.10.value", "sensor_value", "Environmental", "Temperature Inlet", "C", "310C"),
]