Add SNMP profile mapping and fix asset cleanup

This commit is contained in:
Keith Smith
2026-05-26 16:34:10 -06:00
parent fe7157fdad
commit e59733d331
15 changed files with 676 additions and 35 deletions
+125 -2
View File
@@ -23,6 +23,8 @@ class SnmpCheckConfig:
community: str
item_id: str
item_type: str
label: str | None = None
unit: str | None = None
port: int = 161
timeout_seconds: float = 5.0
retries: int = 1
@@ -47,6 +49,15 @@ IF_OUT_DISCARDS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 19)
IF_OUT_ERRORS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 20)
IF_HC_IN_OCTETS = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 6)
IF_HC_OUT_OCTETS = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 10)
HR_PROCESSOR_LOAD = (1, 3, 6, 1, 2, 1, 25, 3, 3, 1, 2)
HR_STORAGE_ALLOCATION_UNITS = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 4)
HR_STORAGE_SIZE = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 5)
HR_STORAGE_USED = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 6)
ENT_PHY_SENSOR_TYPE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 1)
ENT_PHY_SENSOR_SCALE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 2)
ENT_PHY_SENSOR_PRECISION = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 3)
ENT_PHY_SENSOR_VALUE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 4)
ENT_PHY_SENSOR_OPER_STATUS = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 5)
STATUS_LABELS = {
1: "up",
@@ -58,6 +69,24 @@ STATUS_LABELS = {
7: "lower layer down",
}
SENSOR_STATUS_LABELS = {
1: "ok",
2: "unavailable",
3: "nonoperational",
}
SENSOR_TYPE_UNITS = {
3: "V",
4: "V",
5: "A",
6: "W",
7: "Hz",
8: "C",
9: "%",
10: "rpm",
11: "m3/min",
}
async def run_snmp_check(config: SnmpCheckConfig) -> SnmpCheckResult:
try:
@@ -83,6 +112,88 @@ def _run_snmp_check_sync(config: SnmpCheckConfig) -> SnmpCheckResult:
metrics=[SnmpMetricValue(name="uptime_seconds", value=float(uptime_seconds), unit="seconds")],
)
if config.item_type == "cpu_load":
processor_index = _item_index(config.item_id, "cpu")
if processor_index is None:
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP CPU item was not valid")
oid = _with_index(HR_PROCESSOR_LOAD, processor_index)
value = _int_value(client.get_many([oid]).get(oid))
response_time_ms = int((perf_counter() - started) * 1000)
if value is None:
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="CPU load was not reported")
return SnmpCheckResult(
status="up",
response_time_ms=response_time_ms,
message=f"CPU load is {value}%",
metrics=[SnmpMetricValue(name="load_percent", value=float(value), unit="%")],
)
if config.item_type in {"memory_usage", "storage_usage"}:
storage_index = _item_index(config.item_id, "storage")
if storage_index is None:
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP storage item was not valid")
oids = [
_with_index(HR_STORAGE_ALLOCATION_UNITS, storage_index),
_with_index(HR_STORAGE_SIZE, storage_index),
_with_index(HR_STORAGE_USED, storage_index),
]
values = client.get_many(oids)
response_time_ms = int((perf_counter() - started) * 1000)
allocation_unit = _int_value(values.get(oids[0]))
size = _int_value(values.get(oids[1]))
used = _int_value(values.get(oids[2]))
if not allocation_unit or not size or used is None:
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="Storage usage was not reported")
total_bytes = float(size * allocation_unit)
used_bytes = float(used * allocation_unit)
used_percent = (used / size) * 100
label = config.label or ("Memory" if config.item_type == "memory_usage" else "Storage")
return SnmpCheckResult(
status="up",
response_time_ms=response_time_ms,
message=f"{label} is {used_percent:.1f}% used",
metrics=[
SnmpMetricValue(name="used_percent", value=used_percent, unit="%"),
SnmpMetricValue(name="used_bytes", value=used_bytes, unit="bytes"),
SnmpMetricValue(name="total_bytes", value=total_bytes, unit="bytes"),
],
)
if config.item_type == "sensor_value":
sensor_index = _item_index(config.item_id, "sensor")
if sensor_index is None:
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP sensor item was not valid")
oids = [
_with_index(ENT_PHY_SENSOR_TYPE, sensor_index),
_with_index(ENT_PHY_SENSOR_SCALE, sensor_index),
_with_index(ENT_PHY_SENSOR_PRECISION, sensor_index),
_with_index(ENT_PHY_SENSOR_VALUE, sensor_index),
_with_index(ENT_PHY_SENSOR_OPER_STATUS, sensor_index),
]
values = client.get_many(oids)
response_time_ms = int((perf_counter() - started) * 1000)
sensor_type = _int_value(values.get(oids[0]))
scale = _int_value(values.get(oids[1]))
precision = _int_value(values.get(oids[2]))
raw_value = _int_value(values.get(oids[3]))
oper_status = _int_value(values.get(oids[4]))
if raw_value is None:
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="Sensor value was not reported")
value = _scaled_sensor_value(raw_value, scale, precision)
unit = config.unit or SENSOR_TYPE_UNITS.get(sensor_type or 0)
status_label = SENSOR_STATUS_LABELS.get(oper_status or 1, f"status {oper_status}")
status = "up" if oper_status in {None, 1} else "down"
label = config.label or "Sensor"
return SnmpCheckResult(
status=status,
response_time_ms=response_time_ms,
message=f"{label} is {value:g}{unit or ''}; sensor status {status_label}",
metrics=[
SnmpMetricValue(name="sensor_value", value=value, unit=unit),
*([SnmpMetricValue(name="sensor_status", value=float(oper_status))] if oper_status is not None else []),
],
)
interface_index = _interface_index(config.item_id)
if interface_index is None:
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP interface item was not valid")
@@ -162,8 +273,12 @@ def _run_snmp_check_sync(config: SnmpCheckConfig) -> SnmpCheckResult:
def _interface_index(item_id: str) -> int | None:
return _item_index(item_id, "interface")
def _item_index(item_id: str, expected_prefix: str) -> int | None:
parts = item_id.split(".")
if len(parts) < 3 or parts[0] != "interface":
if len(parts) < 3 or parts[0] != expected_prefix:
return None
try:
return int(parts[1])
@@ -181,6 +296,12 @@ def _int_value(value: Any) -> int | None:
return None
def _scaled_sensor_value(raw_value: int, scale: int | None, precision: int | None) -> float:
scale_multiplier = 10 ** ((scale or 9) - 9)
precision_divisor = 10 ** (precision or 0)
return float(raw_value * scale_multiplier / precision_divisor)
class SnmpV2Client:
def __init__(self, host: str, community: str, port: int, timeout_seconds: float, retries: int) -> None:
self.host = host
@@ -343,7 +464,9 @@ def _decode_oid(value: bytes) -> tuple[int, ...]:
def _decode_value(tag: int, value: bytes) -> Any:
if tag in {0x02, 0x41, 0x42, 0x43, 0x46}:
if tag == 0x02:
return _decode_integer(value)
if tag in {0x41, 0x42, 0x43, 0x46}:
return int.from_bytes(value, "big")
if tag == 0x04:
return value.decode("utf-8", errors="replace")
+2
View File
@@ -152,6 +152,8 @@ class Scheduler:
community=community,
item_id=str(monitor.config.get("item_id") or ""),
item_type=str(monitor.config.get("item_type") or ""),
label=monitor.config.get("label") if isinstance(monitor.config.get("label"), str) else None,
unit=monitor.config.get("unit") if isinstance(monitor.config.get("unit"), str) else None,
port=int(extra.get("port") or 161),
timeout_seconds=float(extra.get("timeout_seconds") or 5),
retries=int(extra.get("retries") or 1),
+95
View File
@@ -10,6 +10,15 @@ from app.collectors.snmp import (
IF_OPER_STATUS,
IF_OUT_DISCARDS,
IF_OUT_ERRORS,
ENT_PHY_SENSOR_OPER_STATUS,
ENT_PHY_SENSOR_PRECISION,
ENT_PHY_SENSOR_SCALE,
ENT_PHY_SENSOR_TYPE,
ENT_PHY_SENSOR_VALUE,
HR_PROCESSOR_LOAD,
HR_STORAGE_ALLOCATION_UNITS,
HR_STORAGE_SIZE,
HR_STORAGE_USED,
SYS_UPTIME,
SnmpCheckConfig,
_with_index,
@@ -37,6 +46,92 @@ class SnmpCollectorTestCase(unittest.IsolatedAsyncioTestCase):
("uptime_seconds", 1234.0, "seconds")
]
async def test_collects_cpu_load(self) -> None:
oid = _with_index(HR_PROCESSOR_LOAD, 196608)
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {oid: 42}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="cpu.196608.load",
item_type="cpu_load",
)
)
assert result.status == "up"
assert result.message == "CPU load is 42%"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("load_percent", 42.0, "%")
]
async def test_collects_storage_usage(self) -> None:
oids = [
_with_index(HR_STORAGE_ALLOCATION_UNITS, 31),
_with_index(HR_STORAGE_SIZE, 31),
_with_index(HR_STORAGE_USED, 31),
]
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {
oids[0]: 4096,
oids[1]: 100,
oids[2]: 25,
}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="storage.31.usage",
item_type="storage_usage",
label="Disk / usage",
)
)
assert result.status == "up"
assert result.message == "Disk / usage is 25.0% used"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("used_percent", 25.0, "%"),
("used_bytes", 102400.0, "bytes"),
("total_bytes", 409600.0, "bytes"),
]
async def test_collects_sensor_value_and_status(self) -> None:
oids = [
_with_index(ENT_PHY_SENSOR_TYPE, 10),
_with_index(ENT_PHY_SENSOR_SCALE, 10),
_with_index(ENT_PHY_SENSOR_PRECISION, 10),
_with_index(ENT_PHY_SENSOR_VALUE, 10),
_with_index(ENT_PHY_SENSOR_OPER_STATUS, 10),
]
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {
oids[0]: 8,
oids[1]: 9,
oids[2]: 1,
oids[3]: 310,
oids[4]: 1,
}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="sensor.10.value",
item_type="sensor_value",
label="Temperature Inlet",
unit="C",
)
)
assert result.status == "up"
assert result.message == "Temperature Inlet is 31C; sensor status ok"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("sensor_value", 31.0, "C"),
("sensor_status", 1.0, None),
]
async def test_collects_interface_status(self) -> None:
admin_oid = _with_index(IF_ADMIN_STATUS, 7)
oper_oid = _with_index(IF_OPER_STATUS, 7)