Add SNMP profile mapping and fix asset cleanup
This commit is contained in:
@@ -23,6 +23,8 @@ class SnmpCheckConfig:
|
||||
community: str
|
||||
item_id: str
|
||||
item_type: str
|
||||
label: str | None = None
|
||||
unit: str | None = None
|
||||
port: int = 161
|
||||
timeout_seconds: float = 5.0
|
||||
retries: int = 1
|
||||
@@ -47,6 +49,15 @@ IF_OUT_DISCARDS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 19)
|
||||
IF_OUT_ERRORS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 20)
|
||||
IF_HC_IN_OCTETS = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 6)
|
||||
IF_HC_OUT_OCTETS = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 10)
|
||||
HR_PROCESSOR_LOAD = (1, 3, 6, 1, 2, 1, 25, 3, 3, 1, 2)
|
||||
HR_STORAGE_ALLOCATION_UNITS = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 4)
|
||||
HR_STORAGE_SIZE = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 5)
|
||||
HR_STORAGE_USED = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 6)
|
||||
ENT_PHY_SENSOR_TYPE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 1)
|
||||
ENT_PHY_SENSOR_SCALE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 2)
|
||||
ENT_PHY_SENSOR_PRECISION = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 3)
|
||||
ENT_PHY_SENSOR_VALUE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 4)
|
||||
ENT_PHY_SENSOR_OPER_STATUS = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 5)
|
||||
|
||||
STATUS_LABELS = {
|
||||
1: "up",
|
||||
@@ -58,6 +69,24 @@ STATUS_LABELS = {
|
||||
7: "lower layer down",
|
||||
}
|
||||
|
||||
SENSOR_STATUS_LABELS = {
|
||||
1: "ok",
|
||||
2: "unavailable",
|
||||
3: "nonoperational",
|
||||
}
|
||||
|
||||
SENSOR_TYPE_UNITS = {
|
||||
3: "V",
|
||||
4: "V",
|
||||
5: "A",
|
||||
6: "W",
|
||||
7: "Hz",
|
||||
8: "C",
|
||||
9: "%",
|
||||
10: "rpm",
|
||||
11: "m3/min",
|
||||
}
|
||||
|
||||
|
||||
async def run_snmp_check(config: SnmpCheckConfig) -> SnmpCheckResult:
|
||||
try:
|
||||
@@ -83,6 +112,88 @@ def _run_snmp_check_sync(config: SnmpCheckConfig) -> SnmpCheckResult:
|
||||
metrics=[SnmpMetricValue(name="uptime_seconds", value=float(uptime_seconds), unit="seconds")],
|
||||
)
|
||||
|
||||
if config.item_type == "cpu_load":
|
||||
processor_index = _item_index(config.item_id, "cpu")
|
||||
if processor_index is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP CPU item was not valid")
|
||||
oid = _with_index(HR_PROCESSOR_LOAD, processor_index)
|
||||
value = _int_value(client.get_many([oid]).get(oid))
|
||||
response_time_ms = int((perf_counter() - started) * 1000)
|
||||
if value is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="CPU load was not reported")
|
||||
return SnmpCheckResult(
|
||||
status="up",
|
||||
response_time_ms=response_time_ms,
|
||||
message=f"CPU load is {value}%",
|
||||
metrics=[SnmpMetricValue(name="load_percent", value=float(value), unit="%")],
|
||||
)
|
||||
|
||||
if config.item_type in {"memory_usage", "storage_usage"}:
|
||||
storage_index = _item_index(config.item_id, "storage")
|
||||
if storage_index is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP storage item was not valid")
|
||||
oids = [
|
||||
_with_index(HR_STORAGE_ALLOCATION_UNITS, storage_index),
|
||||
_with_index(HR_STORAGE_SIZE, storage_index),
|
||||
_with_index(HR_STORAGE_USED, storage_index),
|
||||
]
|
||||
values = client.get_many(oids)
|
||||
response_time_ms = int((perf_counter() - started) * 1000)
|
||||
allocation_unit = _int_value(values.get(oids[0]))
|
||||
size = _int_value(values.get(oids[1]))
|
||||
used = _int_value(values.get(oids[2]))
|
||||
if not allocation_unit or not size or used is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="Storage usage was not reported")
|
||||
total_bytes = float(size * allocation_unit)
|
||||
used_bytes = float(used * allocation_unit)
|
||||
used_percent = (used / size) * 100
|
||||
label = config.label or ("Memory" if config.item_type == "memory_usage" else "Storage")
|
||||
return SnmpCheckResult(
|
||||
status="up",
|
||||
response_time_ms=response_time_ms,
|
||||
message=f"{label} is {used_percent:.1f}% used",
|
||||
metrics=[
|
||||
SnmpMetricValue(name="used_percent", value=used_percent, unit="%"),
|
||||
SnmpMetricValue(name="used_bytes", value=used_bytes, unit="bytes"),
|
||||
SnmpMetricValue(name="total_bytes", value=total_bytes, unit="bytes"),
|
||||
],
|
||||
)
|
||||
|
||||
if config.item_type == "sensor_value":
|
||||
sensor_index = _item_index(config.item_id, "sensor")
|
||||
if sensor_index is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP sensor item was not valid")
|
||||
oids = [
|
||||
_with_index(ENT_PHY_SENSOR_TYPE, sensor_index),
|
||||
_with_index(ENT_PHY_SENSOR_SCALE, sensor_index),
|
||||
_with_index(ENT_PHY_SENSOR_PRECISION, sensor_index),
|
||||
_with_index(ENT_PHY_SENSOR_VALUE, sensor_index),
|
||||
_with_index(ENT_PHY_SENSOR_OPER_STATUS, sensor_index),
|
||||
]
|
||||
values = client.get_many(oids)
|
||||
response_time_ms = int((perf_counter() - started) * 1000)
|
||||
sensor_type = _int_value(values.get(oids[0]))
|
||||
scale = _int_value(values.get(oids[1]))
|
||||
precision = _int_value(values.get(oids[2]))
|
||||
raw_value = _int_value(values.get(oids[3]))
|
||||
oper_status = _int_value(values.get(oids[4]))
|
||||
if raw_value is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="Sensor value was not reported")
|
||||
value = _scaled_sensor_value(raw_value, scale, precision)
|
||||
unit = config.unit or SENSOR_TYPE_UNITS.get(sensor_type or 0)
|
||||
status_label = SENSOR_STATUS_LABELS.get(oper_status or 1, f"status {oper_status}")
|
||||
status = "up" if oper_status in {None, 1} else "down"
|
||||
label = config.label or "Sensor"
|
||||
return SnmpCheckResult(
|
||||
status=status,
|
||||
response_time_ms=response_time_ms,
|
||||
message=f"{label} is {value:g}{unit or ''}; sensor status {status_label}",
|
||||
metrics=[
|
||||
SnmpMetricValue(name="sensor_value", value=value, unit=unit),
|
||||
*([SnmpMetricValue(name="sensor_status", value=float(oper_status))] if oper_status is not None else []),
|
||||
],
|
||||
)
|
||||
|
||||
interface_index = _interface_index(config.item_id)
|
||||
if interface_index is None:
|
||||
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP interface item was not valid")
|
||||
@@ -162,8 +273,12 @@ def _run_snmp_check_sync(config: SnmpCheckConfig) -> SnmpCheckResult:
|
||||
|
||||
|
||||
def _interface_index(item_id: str) -> int | None:
|
||||
return _item_index(item_id, "interface")
|
||||
|
||||
|
||||
def _item_index(item_id: str, expected_prefix: str) -> int | None:
|
||||
parts = item_id.split(".")
|
||||
if len(parts) < 3 or parts[0] != "interface":
|
||||
if len(parts) < 3 or parts[0] != expected_prefix:
|
||||
return None
|
||||
try:
|
||||
return int(parts[1])
|
||||
@@ -181,6 +296,12 @@ def _int_value(value: Any) -> int | None:
|
||||
return None
|
||||
|
||||
|
||||
def _scaled_sensor_value(raw_value: int, scale: int | None, precision: int | None) -> float:
|
||||
scale_multiplier = 10 ** ((scale or 9) - 9)
|
||||
precision_divisor = 10 ** (precision or 0)
|
||||
return float(raw_value * scale_multiplier / precision_divisor)
|
||||
|
||||
|
||||
class SnmpV2Client:
|
||||
def __init__(self, host: str, community: str, port: int, timeout_seconds: float, retries: int) -> None:
|
||||
self.host = host
|
||||
@@ -343,7 +464,9 @@ def _decode_oid(value: bytes) -> tuple[int, ...]:
|
||||
|
||||
|
||||
def _decode_value(tag: int, value: bytes) -> Any:
|
||||
if tag in {0x02, 0x41, 0x42, 0x43, 0x46}:
|
||||
if tag == 0x02:
|
||||
return _decode_integer(value)
|
||||
if tag in {0x41, 0x42, 0x43, 0x46}:
|
||||
return int.from_bytes(value, "big")
|
||||
if tag == 0x04:
|
||||
return value.decode("utf-8", errors="replace")
|
||||
|
||||
@@ -152,6 +152,8 @@ class Scheduler:
|
||||
community=community,
|
||||
item_id=str(monitor.config.get("item_id") or ""),
|
||||
item_type=str(monitor.config.get("item_type") or ""),
|
||||
label=monitor.config.get("label") if isinstance(monitor.config.get("label"), str) else None,
|
||||
unit=monitor.config.get("unit") if isinstance(monitor.config.get("unit"), str) else None,
|
||||
port=int(extra.get("port") or 161),
|
||||
timeout_seconds=float(extra.get("timeout_seconds") or 5),
|
||||
retries=int(extra.get("retries") or 1),
|
||||
|
||||
Reference in New Issue
Block a user