Add SNMP monitor collection

This commit is contained in:
Keith Smith
2026-05-24 00:44:02 -06:00
parent bd6c508c94
commit fe7157fdad
7 changed files with 650 additions and 15 deletions
+93 -2
View File
@@ -8,9 +8,10 @@ from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool
from app.collectors.snmp import SnmpCheckResult, SnmpMetricValue
from app.collectors.website import WebsiteCheckResult
from app.config import settings
from app.models import AlertRule, Base, CheckResult, Incident, Monitor, NotificationChannel
from app.models import AlertRule, Base, CheckResult, Credential, Incident, Metric, Monitor, NotificationChannel
from app.scheduler import Scheduler
@@ -25,7 +26,7 @@ class RecordingScheduler(Scheduler):
self.results = list(results or [])
self.posts: list[dict[str, str]] = []
async def _collect_monitor_result(self, monitor: Monitor) -> WebsiteCheckResult:
async def _collect_monitor_result(self, db: Session, monitor: Monitor) -> WebsiteCheckResult:
return self.results.pop(0)
async def _post_webhook(self, url: str, message: str, username: str) -> None:
@@ -159,3 +160,93 @@ class SchedulerTestCase(unittest.IsolatedAsyncioTestCase):
open_incidents = self.db.scalars(select(Incident).where(Incident.status == "open")).all()
assert open_incidents == []
async def test_scheduler_includes_snmp_monitors_as_due(self) -> None:
scheduler = RecordingScheduler()
snmp_monitor = Monitor(
name="Core Switch uplink status",
monitor_type="snmp",
target="192.0.2.10",
config={"credential_profile_id": 1, "item_id": "interface.1.status", "item_type": "interface_status"},
interval_seconds=60,
status="unknown",
)
self.db.add(snmp_monitor)
self.db.flush()
due = scheduler._load_due_monitors(self.db)
assert snmp_monitor in due
async def test_scheduler_records_snmp_metrics(self) -> None:
monitor = Monitor(
name="Core Switch uplink traffic",
monitor_type="snmp",
target="192.0.2.10",
config={"credential_profile_id": 1, "item_id": "interface.1.traffic", "item_type": "interface_traffic"},
interval_seconds=60,
status="unknown",
)
self.db.add(monitor)
self.db.flush()
class MetricScheduler(Scheduler):
async def _collect_monitor_result(self, db: Session, monitor: Monitor) -> SnmpCheckResult:
return SnmpCheckResult(
status="up",
response_time_ms=12,
message="Interface traffic counters collected",
metrics=[
SnmpMetricValue(name="in_octets", value=1000, unit="bytes"),
SnmpMetricValue(name="out_octets", value=2000, unit="bytes"),
],
)
await MetricScheduler()._run_monitor(self.db, monitor)
assert monitor.status == "up"
metrics = self.db.scalars(select(Metric).where(Metric.monitor_id == monitor.id).order_by(Metric.name)).all()
assert [(metric.name, metric.value, metric.unit) for metric in metrics] == [
("in_octets", 1000.0, "bytes"),
("out_octets", 2000.0, "bytes"),
]
async def test_snmp_monitor_uses_saved_profile_secret(self) -> None:
profile = Credential(
name="Core Switch",
credential_type="snmp",
encrypted_secret=encrypt_secret("private-community"),
extra={"port": 1161, "timeout_seconds": 3, "retries": 2},
)
monitor = Monitor(
name="Core Switch uptime",
monitor_type="snmp",
target="192.0.2.10",
config={"credential_profile_id": 1, "item_id": "device.uptime", "item_type": "device_uptime"},
interval_seconds=60,
status="unknown",
)
self.db.add_all([profile, monitor])
self.db.flush()
calls = []
async def fake_run_snmp_check(config):
calls.append(config)
return SnmpCheckResult(status="up", response_time_ms=10, message="Device uptime is 60 seconds")
import app.scheduler as scheduler_module
original = scheduler_module.run_snmp_check
scheduler_module.run_snmp_check = fake_run_snmp_check
try:
result = await Scheduler()._collect_snmp_monitor_result(self.db, monitor)
finally:
scheduler_module.run_snmp_check = original
assert result.status == "up"
assert len(calls) == 1
assert calls[0].host == "192.0.2.10"
assert calls[0].community == "private-community"
assert calls[0].port == 1161
assert calls[0].timeout_seconds == 3
assert calls[0].retries == 2
+117
View File
@@ -0,0 +1,117 @@
import unittest
from unittest.mock import patch
from app.collectors.snmp import (
IF_ADMIN_STATUS,
IF_HC_IN_OCTETS,
IF_HC_OUT_OCTETS,
IF_IN_DISCARDS,
IF_IN_ERRORS,
IF_OPER_STATUS,
IF_OUT_DISCARDS,
IF_OUT_ERRORS,
SYS_UPTIME,
SnmpCheckConfig,
_with_index,
run_snmp_check,
)
class SnmpCollectorTestCase(unittest.IsolatedAsyncioTestCase):
async def test_collects_device_uptime(self) -> None:
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {SYS_UPTIME: 123_400}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="device.uptime",
item_type="device_uptime",
)
)
assert result.status == "up"
assert result.message == "Device uptime is 1234 seconds"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("uptime_seconds", 1234.0, "seconds")
]
async def test_collects_interface_status(self) -> None:
admin_oid = _with_index(IF_ADMIN_STATUS, 7)
oper_oid = _with_index(IF_OPER_STATUS, 7)
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {admin_oid: 1, oper_oid: 2}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="interface.7.status",
item_type="interface_status",
)
)
assert result.status == "down"
assert result.message == "Interface admin up, operational down"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("admin_status", 1.0, None),
("oper_status", 2.0, None),
]
async def test_collects_interface_traffic_from_high_capacity_counters(self) -> None:
in_oid = _with_index(IF_HC_IN_OCTETS, 3)
out_oid = _with_index(IF_HC_OUT_OCTETS, 3)
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {in_oid: 123, out_oid: 456}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="interface.3.traffic",
item_type="interface_traffic",
)
)
assert result.status == "up"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("in_octets", 123.0, "bytes"),
("out_octets", 456.0, "bytes"),
]
async def test_collects_interface_errors_and_discards(self) -> None:
oids = [
_with_index(IF_IN_ERRORS, 5),
_with_index(IF_OUT_ERRORS, 5),
_with_index(IF_IN_DISCARDS, 5),
_with_index(IF_OUT_DISCARDS, 5),
]
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {
oids[0]: 1,
oids[1]: 2,
oids[2]: 3,
oids[3]: 4,
}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="interface.5.errors",
item_type="interface_errors",
)
)
assert result.status == "up"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("in_errors", 1.0, "count"),
("out_errors", 2.0, "count"),
("in_discards", 3.0, "count"),
("out_discards", 4.0, "count"),
]
if __name__ == "__main__":
unittest.main()