Add SNMP profile mapping and fix asset cleanup
This commit is contained in:
@@ -23,6 +23,8 @@ class SnmpCheckConfig:
|
||||
community: str
|
||||
item_id: str
|
||||
item_type: str
|
||||
label: str | None = None
|
||||
unit: str | None = None
|
||||
port: int = 161
|
||||
timeout_seconds: float = 5.0
|
||||
retries: int = 1
|
||||
@@ -47,6 +49,15 @@ IF_OUT_DISCARDS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 19)
|
||||
IF_OUT_ERRORS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 20)
|
||||
IF_HC_IN_OCTETS = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 6)
|
||||
IF_HC_OUT_OCTETS = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 10)
|
||||
HR_PROCESSOR_LOAD = (1, 3, 6, 1, 2, 1, 25, 3, 3, 1, 2)
|
||||
HR_STORAGE_ALLOCATION_UNITS = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 4)
|
||||
HR_STORAGE_SIZE = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 5)
|
||||
HR_STORAGE_USED = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 6)
|
||||
ENT_PHY_SENSOR_TYPE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 1)
|
||||
ENT_PHY_SENSOR_SCALE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 2)
|
||||
ENT_PHY_SENSOR_PRECISION = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 3)
|
||||
ENT_PHY_SENSOR_VALUE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 4)
|
||||
ENT_PHY_SENSOR_OPER_STATUS = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 5)
|
||||
|
||||
STATUS_LABELS = {
|
||||
1: "up",
|
||||
@@ -58,6 +69,24 @@ STATUS_LABELS = {
|
||||
7: "lower layer down",
|
||||
}
|
||||
|
||||
SENSOR_STATUS_LABELS = {
|
||||
1: "ok",
|
||||
2: "unavailable",
|
||||
3: "nonoperational",
|
||||
}
|
||||
|
||||
SENSOR_TYPE_UNITS = {
|
||||
3: "V",
|
||||
4: "V",
|
||||
5: "A",
|
||||
6: "W",
|
||||
7: "Hz",
|
||||
8: "C",
|
||||
9: "%",
|
||||
10: "rpm",
|
||||
11: "m3/min",
|
||||
}
|
||||
|
||||
|
||||
async def run_snmp_check(config: SnmpCheckConfig) -> SnmpCheckResult:
|
||||
try:
|
||||
@@ -83,6 +112,88 @@ def _run_snmp_check_sync(config: SnmpCheckConfig) -> SnmpCheckResult:
|
||||
metrics=[SnmpMetricValue(name="uptime_seconds", value=float(uptime_seconds), unit="seconds")],
|
||||
)
|
||||
|
||||
if config.item_type == "cpu_load":
|
||||
processor_index = _item_index(config.item_id, "cpu")
|
||||
if processor_index is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP CPU item was not valid")
|
||||
oid = _with_index(HR_PROCESSOR_LOAD, processor_index)
|
||||
value = _int_value(client.get_many([oid]).get(oid))
|
||||
response_time_ms = int((perf_counter() - started) * 1000)
|
||||
if value is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="CPU load was not reported")
|
||||
return SnmpCheckResult(
|
||||
status="up",
|
||||
response_time_ms=response_time_ms,
|
||||
message=f"CPU load is {value}%",
|
||||
metrics=[SnmpMetricValue(name="load_percent", value=float(value), unit="%")],
|
||||
)
|
||||
|
||||
if config.item_type in {"memory_usage", "storage_usage"}:
|
||||
storage_index = _item_index(config.item_id, "storage")
|
||||
if storage_index is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP storage item was not valid")
|
||||
oids = [
|
||||
_with_index(HR_STORAGE_ALLOCATION_UNITS, storage_index),
|
||||
_with_index(HR_STORAGE_SIZE, storage_index),
|
||||
_with_index(HR_STORAGE_USED, storage_index),
|
||||
]
|
||||
values = client.get_many(oids)
|
||||
response_time_ms = int((perf_counter() - started) * 1000)
|
||||
allocation_unit = _int_value(values.get(oids[0]))
|
||||
size = _int_value(values.get(oids[1]))
|
||||
used = _int_value(values.get(oids[2]))
|
||||
if not allocation_unit or not size or used is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="Storage usage was not reported")
|
||||
total_bytes = float(size * allocation_unit)
|
||||
used_bytes = float(used * allocation_unit)
|
||||
used_percent = (used / size) * 100
|
||||
label = config.label or ("Memory" if config.item_type == "memory_usage" else "Storage")
|
||||
return SnmpCheckResult(
|
||||
status="up",
|
||||
response_time_ms=response_time_ms,
|
||||
message=f"{label} is {used_percent:.1f}% used",
|
||||
metrics=[
|
||||
SnmpMetricValue(name="used_percent", value=used_percent, unit="%"),
|
||||
SnmpMetricValue(name="used_bytes", value=used_bytes, unit="bytes"),
|
||||
SnmpMetricValue(name="total_bytes", value=total_bytes, unit="bytes"),
|
||||
],
|
||||
)
|
||||
|
||||
if config.item_type == "sensor_value":
|
||||
sensor_index = _item_index(config.item_id, "sensor")
|
||||
if sensor_index is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP sensor item was not valid")
|
||||
oids = [
|
||||
_with_index(ENT_PHY_SENSOR_TYPE, sensor_index),
|
||||
_with_index(ENT_PHY_SENSOR_SCALE, sensor_index),
|
||||
_with_index(ENT_PHY_SENSOR_PRECISION, sensor_index),
|
||||
_with_index(ENT_PHY_SENSOR_VALUE, sensor_index),
|
||||
_with_index(ENT_PHY_SENSOR_OPER_STATUS, sensor_index),
|
||||
]
|
||||
values = client.get_many(oids)
|
||||
response_time_ms = int((perf_counter() - started) * 1000)
|
||||
sensor_type = _int_value(values.get(oids[0]))
|
||||
scale = _int_value(values.get(oids[1]))
|
||||
precision = _int_value(values.get(oids[2]))
|
||||
raw_value = _int_value(values.get(oids[3]))
|
||||
oper_status = _int_value(values.get(oids[4]))
|
||||
if raw_value is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="Sensor value was not reported")
|
||||
value = _scaled_sensor_value(raw_value, scale, precision)
|
||||
unit = config.unit or SENSOR_TYPE_UNITS.get(sensor_type or 0)
|
||||
status_label = SENSOR_STATUS_LABELS.get(oper_status or 1, f"status {oper_status}")
|
||||
status = "up" if oper_status in {None, 1} else "down"
|
||||
label = config.label or "Sensor"
|
||||
return SnmpCheckResult(
|
||||
status=status,
|
||||
response_time_ms=response_time_ms,
|
||||
message=f"{label} is {value:g}{unit or ''}; sensor status {status_label}",
|
||||
metrics=[
|
||||
SnmpMetricValue(name="sensor_value", value=value, unit=unit),
|
||||
*([SnmpMetricValue(name="sensor_status", value=float(oper_status))] if oper_status is not None else []),
|
||||
],
|
||||
)
|
||||
|
||||
interface_index = _interface_index(config.item_id)
|
||||
if interface_index is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP interface item was not valid")
|
||||
@@ -162,8 +273,12 @@ def _run_snmp_check_sync(config: SnmpCheckConfig) -> SnmpCheckResult:
|
||||
|
||||
|
||||
def _interface_index(item_id: str) -> int | None:
|
||||
return _item_index(item_id, "interface")
|
||||
|
||||
|
||||
def _item_index(item_id: str, expected_prefix: str) -> int | None:
|
||||
parts = item_id.split(".")
|
||||
if len(parts) < 3 or parts[0] != "interface":
|
||||
if len(parts) < 3 or parts[0] != expected_prefix:
|
||||
return None
|
||||
try:
|
||||
return int(parts[1])
|
||||
@@ -181,6 +296,12 @@ def _int_value(value: Any) -> int | None:
|
||||
return None
|
||||
|
||||
|
||||
def _scaled_sensor_value(raw_value: int, scale: int | None, precision: int | None) -> float:
|
||||
scale_multiplier = 10 ** ((scale or 9) - 9)
|
||||
precision_divisor = 10 ** (precision or 0)
|
||||
return float(raw_value * scale_multiplier / precision_divisor)
|
||||
|
||||
|
||||
class SnmpV2Client:
|
||||
def __init__(self, host: str, community: str, port: int, timeout_seconds: float, retries: int) -> None:
|
||||
self.host = host
|
||||
@@ -343,7 +464,9 @@ def _decode_oid(value: bytes) -> tuple[int, ...]:
|
||||
|
||||
|
||||
def _decode_value(tag: int, value: bytes) -> Any:
|
||||
if tag in {0x02, 0x41, 0x42, 0x43, 0x46}:
|
||||
if tag == 0x02:
|
||||
return _decode_integer(value)
|
||||
if tag in {0x41, 0x42, 0x43, 0x46}:
|
||||
return int.from_bytes(value, "big")
|
||||
if tag == 0x04:
|
||||
return value.decode("utf-8", errors="replace")
|
||||
|
||||
@@ -152,6 +152,8 @@ class Scheduler:
|
||||
community=community,
|
||||
item_id=str(monitor.config.get("item_id") or ""),
|
||||
item_type=str(monitor.config.get("item_type") or ""),
|
||||
label=monitor.config.get("label") if isinstance(monitor.config.get("label"), str) else None,
|
||||
unit=monitor.config.get("unit") if isinstance(monitor.config.get("unit"), str) else None,
|
||||
port=int(extra.get("port") or 161),
|
||||
timeout_seconds=float(extra.get("timeout_seconds") or 5),
|
||||
retries=int(extra.get("retries") or 1),
|
||||
|
||||
@@ -10,6 +10,15 @@ from app.collectors.snmp import (
|
||||
IF_OPER_STATUS,
|
||||
IF_OUT_DISCARDS,
|
||||
IF_OUT_ERRORS,
|
||||
ENT_PHY_SENSOR_OPER_STATUS,
|
||||
ENT_PHY_SENSOR_PRECISION,
|
||||
ENT_PHY_SENSOR_SCALE,
|
||||
ENT_PHY_SENSOR_TYPE,
|
||||
ENT_PHY_SENSOR_VALUE,
|
||||
HR_PROCESSOR_LOAD,
|
||||
HR_STORAGE_ALLOCATION_UNITS,
|
||||
HR_STORAGE_SIZE,
|
||||
HR_STORAGE_USED,
|
||||
SYS_UPTIME,
|
||||
SnmpCheckConfig,
|
||||
_with_index,
|
||||
@@ -37,6 +46,92 @@ class SnmpCollectorTestCase(unittest.IsolatedAsyncioTestCase):
|
||||
("uptime_seconds", 1234.0, "seconds")
|
||||
]
|
||||
|
||||
async def test_collects_cpu_load(self) -> None:
|
||||
oid = _with_index(HR_PROCESSOR_LOAD, 196608)
|
||||
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
|
||||
client_class.return_value.get_many.return_value = {oid: 42}
|
||||
|
||||
result = await run_snmp_check(
|
||||
SnmpCheckConfig(
|
||||
host="192.0.2.10",
|
||||
community="private-community",
|
||||
item_id="cpu.196608.load",
|
||||
item_type="cpu_load",
|
||||
)
|
||||
)
|
||||
|
||||
assert result.status == "up"
|
||||
assert result.message == "CPU load is 42%"
|
||||
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
|
||||
("load_percent", 42.0, "%")
|
||||
]
|
||||
|
||||
async def test_collects_storage_usage(self) -> None:
|
||||
oids = [
|
||||
_with_index(HR_STORAGE_ALLOCATION_UNITS, 31),
|
||||
_with_index(HR_STORAGE_SIZE, 31),
|
||||
_with_index(HR_STORAGE_USED, 31),
|
||||
]
|
||||
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
|
||||
client_class.return_value.get_many.return_value = {
|
||||
oids[0]: 4096,
|
||||
oids[1]: 100,
|
||||
oids[2]: 25,
|
||||
}
|
||||
|
||||
result = await run_snmp_check(
|
||||
SnmpCheckConfig(
|
||||
host="192.0.2.10",
|
||||
community="private-community",
|
||||
item_id="storage.31.usage",
|
||||
item_type="storage_usage",
|
||||
label="Disk / usage",
|
||||
)
|
||||
)
|
||||
|
||||
assert result.status == "up"
|
||||
assert result.message == "Disk / usage is 25.0% used"
|
||||
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
|
||||
("used_percent", 25.0, "%"),
|
||||
("used_bytes", 102400.0, "bytes"),
|
||||
("total_bytes", 409600.0, "bytes"),
|
||||
]
|
||||
|
||||
async def test_collects_sensor_value_and_status(self) -> None:
|
||||
oids = [
|
||||
_with_index(ENT_PHY_SENSOR_TYPE, 10),
|
||||
_with_index(ENT_PHY_SENSOR_SCALE, 10),
|
||||
_with_index(ENT_PHY_SENSOR_PRECISION, 10),
|
||||
_with_index(ENT_PHY_SENSOR_VALUE, 10),
|
||||
_with_index(ENT_PHY_SENSOR_OPER_STATUS, 10),
|
||||
]
|
||||
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
|
||||
client_class.return_value.get_many.return_value = {
|
||||
oids[0]: 8,
|
||||
oids[1]: 9,
|
||||
oids[2]: 1,
|
||||
oids[3]: 310,
|
||||
oids[4]: 1,
|
||||
}
|
||||
|
||||
result = await run_snmp_check(
|
||||
SnmpCheckConfig(
|
||||
host="192.0.2.10",
|
||||
community="private-community",
|
||||
item_id="sensor.10.value",
|
||||
item_type="sensor_value",
|
||||
label="Temperature Inlet",
|
||||
unit="C",
|
||||
)
|
||||
)
|
||||
|
||||
assert result.status == "up"
|
||||
assert result.message == "Temperature Inlet is 31C; sensor status ok"
|
||||
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
|
||||
("sensor_value", 31.0, "C"),
|
||||
("sensor_status", 1.0, None),
|
||||
]
|
||||
|
||||
async def test_collects_interface_status(self) -> None:
|
||||
admin_oid = _with_index(IF_ADMIN_STATUS, 7)
|
||||
oper_oid = _with_index(IF_OPER_STATUS, 7)
|
||||
|
||||
Reference in New Issue
Block a user