Add SNMP profile mapping and fix asset cleanup

This commit is contained in:
Keith Smith
2026-05-26 16:34:10 -06:00
parent fe7157fdad
commit e59733d331
15 changed files with 676 additions and 35 deletions
+24 -1
View File
@@ -1,10 +1,12 @@
from datetime import UTC, datetime
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.auth.dependencies import get_current_user, require_role
from app.db.session import get_db
from app.models import Asset, User
from app.models import Asset, Incident, Monitor, User
from app.schemas.core import AssetCreate, AssetRead, AssetUpdate
router = APIRouter(prefix="/assets", tags=["assets"])
@@ -86,5 +88,26 @@ def delete_asset(
asset = db.get(Asset, asset_id)
if asset is None:
raise HTTPException(status_code=404, detail="Asset not found")
attached_monitors = db.scalars(select(Monitor).where(Monitor.asset_id == asset.id)).all()
attached_monitor_ids = [monitor.id for monitor in attached_monitors]
now = datetime.now(UTC)
if attached_monitor_ids:
monitor_incidents = db.scalars(select(Incident).where(Incident.monitor_id.in_(attached_monitor_ids))).all()
for incident in monitor_incidents:
if incident.status == "open":
incident.status = "resolved"
incident.resolved_at = now
incident.details = {**(incident.details or {}), "recovery_message": "Asset was deleted"}
incident.monitor_id = None
asset_incidents = db.scalars(select(Incident).where(Incident.asset_id == asset.id)).all()
for incident in asset_incidents:
incident.asset_id = None
for monitor in attached_monitors:
db.delete(monitor)
db.delete(asset)
db.commit()
+23 -8
View File
@@ -65,6 +65,9 @@ def _discovery_to_read(credential_profile_id: int, discovered: DiscoveredSnmpDev
return SnmpDiscoveryRead(
host=discovered.host,
credential_profile_id=credential_profile_id,
profile_key=discovered.profile_key,
profile_name=discovered.profile_name,
capabilities=discovered.capabilities,
device_name=discovered.device_name,
description=discovered.description,
uptime_seconds=discovered.uptime_seconds,
@@ -74,15 +77,27 @@ def _discovery_to_read(credential_profile_id: int, discovered: DiscoveredSnmpDev
def _monitorable_items(discovered: DiscoveredSnmpDevice) -> list[SnmpDiscoveryItemRead]:
items = [
SnmpDiscoveryItemRead(
item_id="device.uptime",
item_type="device_uptime",
group="Device Health",
label="Device uptime",
unit="seconds",
items = []
if discovered.uptime_seconds is not None:
items.append(
SnmpDiscoveryItemRead(
item_id="device.uptime",
item_type="device_uptime",
group="Device Health",
label="Device uptime",
unit="seconds",
)
)
]
items.extend(
SnmpDiscoveryItemRead(
item_id=item.item_id,
item_type=item.item_type,
group=item.group,
label=item.label,
unit=item.unit,
)
for item in discovered.health_items
)
for interface in discovered.interfaces:
group = f"Interface {interface.name}"
item_prefix = f"interface.{interface.index}"
+3
View File
@@ -248,6 +248,9 @@ class SnmpMonitorsCreate(BaseModel):
class SnmpDiscoveryRead(BaseModel):
host: str
credential_profile_id: int
profile_key: str
profile_name: str
capabilities: dict[str, bool]
device_name: str | None
description: str | None
uptime_seconds: int | None
+209 -2
View File
@@ -1,4 +1,4 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
import random
import socket
from typing import Any
@@ -26,6 +26,15 @@ class DiscoveredSnmpInterface:
speed_bps: int | None
@dataclass(frozen=True)
class DiscoveredSnmpHealthItem:
item_id: str
item_type: str
group: str
label: str
unit: str | None = None
@dataclass(frozen=True)
class DiscoveredSnmpDevice:
host: str
@@ -33,6 +42,10 @@ class DiscoveredSnmpDevice:
description: str | None
uptime_seconds: int | None
interfaces: list[DiscoveredSnmpInterface]
profile_key: str = "generic_snmp"
profile_name: str = "Generic SNMP"
capabilities: dict[str, bool] = field(default_factory=dict)
health_items: list[DiscoveredSnmpHealthItem] = field(default_factory=list)
SYS_DESCR = (1, 3, 6, 1, 2, 1, 1, 1, 0)
@@ -43,6 +56,24 @@ IF_SPEED = (1, 3, 6, 1, 2, 1, 2, 2, 1, 5)
IF_ADMIN_STATUS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 7)
IF_OPER_STATUS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 8)
IF_NAME = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 1)
HR_PROCESSOR_LOAD = (1, 3, 6, 1, 2, 1, 25, 3, 3, 1, 2)
HR_STORAGE_TYPE = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 2)
HR_STORAGE_DESCR = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 3)
HR_STORAGE_ALLOCATION_UNITS = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 4)
HR_STORAGE_SIZE = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 5)
HR_STORAGE_USED = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 6)
ENT_PHYSICAL_DESCR = (1, 3, 6, 1, 2, 1, 47, 1, 1, 1, 1, 2)
ENT_PHYSICAL_NAME = (1, 3, 6, 1, 2, 1, 47, 1, 1, 1, 1, 7)
ENT_PHY_SENSOR_TYPE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 1)
ENT_PHY_SENSOR_SCALE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 2)
ENT_PHY_SENSOR_PRECISION = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 3)
ENT_PHY_SENSOR_VALUE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 4)
ENT_PHY_SENSOR_OPER_STATUS = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 5)
HR_STORAGE_RAM = "1.3.6.1.2.1.25.2.1.2"
HR_STORAGE_VIRTUAL_MEMORY = "1.3.6.1.2.1.25.2.1.3"
HR_STORAGE_FIXED_DISK = "1.3.6.1.2.1.25.2.1.4"
HR_STORAGE_REMOVABLE_DISK = "1.3.6.1.2.1.25.2.1.5"
STATUS_LABELS = {
1: "up",
@@ -54,20 +85,92 @@ STATUS_LABELS = {
7: "lower layer down",
}
SENSOR_TYPE_LABELS = {
3: ("AC voltage", "V"),
4: ("DC voltage", "V"),
5: ("Current", "A"),
6: ("Power", "W"),
7: ("Frequency", "Hz"),
8: ("Temperature", "C"),
9: ("Humidity", "%"),
10: ("Fan speed", "rpm"),
11: ("Airflow", "m3/min"),
12: ("Sensor state", None),
}
@dataclass(frozen=True)
class SnmpProfile:
key: str
name: str
match_terms: tuple[str, ...] = ()
def matches(self, system_text: str) -> bool:
return any(term in system_text for term in self.match_terms)
def discover_health_items(self, client: "SnmpV2Client") -> list[DiscoveredSnmpHealthItem]:
return [*_discover_host_resource_items(client), *_discover_sensor_items(client)]
SNMP_PROFILES = (
SnmpProfile("cisco_ios", "Cisco IOS SNMP", ("cisco", "ios")),
SnmpProfile("mikrotik_routeros", "MikroTik RouterOS SNMP", ("mikrotik", "routeros")),
SnmpProfile("net_snmp", "Net-SNMP Host Resources", ("net-snmp", "linux")),
SnmpProfile("generic_snmp", "Generic SNMP"),
)
def discover_snmp_device(host: str, credential: SnmpCredential) -> DiscoveredSnmpDevice:
client = SnmpV2Client(host, credential)
system = client.get_many([SYS_NAME, SYS_DESCR, SYS_UPTIME])
profile = _select_profile(system)
interfaces = _discover_interfaces(client)
health_items = profile.discover_health_items(client)
return DiscoveredSnmpDevice(
host=host,
device_name=_string_value(system.get(SYS_NAME)),
description=_string_value(system.get(SYS_DESCR)),
uptime_seconds=_timeticks_to_seconds(system.get(SYS_UPTIME)),
interfaces=interfaces,
profile_key=profile.key,
profile_name=profile.name,
capabilities=_capabilities(system, interfaces, health_items),
health_items=health_items,
)
def _select_profile(system: dict[tuple[int, ...], Any]) -> SnmpProfile:
system_text = " ".join(
value.lower()
for value in [_string_value(system.get(SYS_NAME)), _string_value(system.get(SYS_DESCR))]
if value
)
for profile in SNMP_PROFILES:
if profile.match_terms and profile.matches(system_text):
return profile
return SNMP_PROFILES[-1]
def _capabilities(
system: dict[tuple[int, ...], Any],
interfaces: list[DiscoveredSnmpInterface],
health_items: list[DiscoveredSnmpHealthItem],
) -> dict[str, bool]:
item_types = {item.item_type for item in health_items}
return {
"system": any(system.get(oid) is not None for oid in [SYS_NAME, SYS_DESCR, SYS_UPTIME]),
"interfaces": bool(interfaces),
"interface_status": bool(interfaces),
"interface_traffic": bool(interfaces),
"interface_errors": bool(interfaces),
"cpu": "cpu_load" in item_types,
"memory": "memory_usage" in item_types,
"storage": "storage_usage" in item_types,
"sensors": "sensor_value" in item_types,
"environmental": "sensor_value" in item_types,
}
def _discover_interfaces(client: "SnmpV2Client") -> list[DiscoveredSnmpInterface]:
names = client.walk(IF_NAME)
descriptions = client.walk(IF_DESCR)
@@ -106,6 +209,108 @@ def _discover_interfaces(client: "SnmpV2Client") -> list[DiscoveredSnmpInterface
return interfaces
def _discover_host_resource_items(client: "SnmpV2Client") -> list[DiscoveredSnmpHealthItem]:
items: list[DiscoveredSnmpHealthItem] = []
processor_loads = _indexed_values(client.walk(HR_PROCESSOR_LOAD, max_items=256))
processor_indexes = sorted(index for index, value in processor_loads.items() if _int_value(value) is not None)
for position, index in enumerate(processor_indexes, start=1):
label = "CPU load" if len(processor_indexes) == 1 else f"CPU {position} load"
items.append(
DiscoveredSnmpHealthItem(
item_id=f"cpu.{index}.load",
item_type="cpu_load",
group="Device Health",
label=label,
unit="%",
)
)
storage_types = _indexed_values(client.walk(HR_STORAGE_TYPE, max_items=256))
descriptions = _indexed_values(client.walk(HR_STORAGE_DESCR, max_items=256))
allocation_units = _indexed_values(client.walk(HR_STORAGE_ALLOCATION_UNITS, max_items=256))
sizes = _indexed_values(client.walk(HR_STORAGE_SIZE, max_items=256))
used = _indexed_values(client.walk(HR_STORAGE_USED, max_items=256))
for index in sorted(storage_types):
storage_type = _string_value(storage_types.get(index))
allocation_unit = _int_value(allocation_units.get(index))
size = _int_value(sizes.get(index))
used_blocks = _int_value(used.get(index))
if not storage_type or not allocation_unit or not size or used_blocks is None:
continue
description = _string_value(descriptions.get(index)) or f"Storage {index}"
if storage_type in {HR_STORAGE_RAM, HR_STORAGE_VIRTUAL_MEMORY}:
items.append(
DiscoveredSnmpHealthItem(
item_id=f"storage.{index}.memory",
item_type="memory_usage",
group="Device Health",
label="Memory used",
unit="%",
)
)
elif storage_type in {HR_STORAGE_FIXED_DISK, HR_STORAGE_REMOVABLE_DISK}:
items.append(
DiscoveredSnmpHealthItem(
item_id=f"storage.{index}.usage",
item_type="storage_usage",
group="Storage",
label=_storage_usage_label(description),
unit="%",
)
)
return _deduplicate_items(items)
def _discover_sensor_items(client: "SnmpV2Client") -> list[DiscoveredSnmpHealthItem]:
sensor_types = _indexed_values(client.walk(ENT_PHY_SENSOR_TYPE, max_items=256))
sensor_values = _indexed_values(client.walk(ENT_PHY_SENSOR_VALUE, max_items=256))
sensor_names = _indexed_values(client.walk(ENT_PHYSICAL_NAME, max_items=256))
sensor_descriptions = _indexed_values(client.walk(ENT_PHYSICAL_DESCR, max_items=256))
items: list[DiscoveredSnmpHealthItem] = []
for index in sorted(sensor_types):
sensor_type = _int_value(sensor_types.get(index))
if sensor_type not in SENSOR_TYPE_LABELS or _int_value(sensor_values.get(index)) is None:
continue
kind, unit = SENSOR_TYPE_LABELS[sensor_type]
name = _string_value(sensor_names.get(index)) or _string_value(sensor_descriptions.get(index))
label = kind if not name else f"{kind} {name}"
items.append(
DiscoveredSnmpHealthItem(
item_id=f"sensor.{index}.value",
item_type="sensor_value",
group="Environmental",
label=label,
unit=unit,
)
)
return items
def _storage_usage_label(description: str) -> str:
normalized = description.strip()
if normalized in {"/", "/boot", "/home", "/var"}:
return f"Disk {normalized} usage"
if "disk" not in normalized.lower() and normalized.startswith("/"):
return f"Disk {normalized} usage"
return f"{normalized} usage"
def _deduplicate_items(items: list[DiscoveredSnmpHealthItem]) -> list[DiscoveredSnmpHealthItem]:
seen: set[tuple[str, str]] = set()
deduplicated: list[DiscoveredSnmpHealthItem] = []
for item in items:
key = (item.item_type, item.label)
if key in seen:
continue
seen.add(key)
deduplicated.append(item)
return deduplicated
def _indexed_values(values: dict[tuple[int, ...], Any]) -> dict[int, Any]:
indexed: dict[int, Any] = {}
for oid, value in values.items():
@@ -317,7 +522,9 @@ def _decode_oid(value: bytes) -> tuple[int, ...]:
def _decode_value(tag: int, value: bytes) -> Any:
if tag in {0x02, 0x41, 0x42, 0x43, 0x46}:
if tag == 0x02:
return _decode_integer(value)
if tag in {0x41, 0x42, 0x43, 0x46}:
return int.from_bytes(value, "big")
if tag == 0x04:
return value.decode("utf-8", errors="replace")
+84 -1
View File
@@ -2,7 +2,27 @@ from fastapi.testclient import TestClient
from app.core.secrets import encrypt_secret
from app.models import Credential
from app.services.snmp import DiscoveredSnmpDevice, DiscoveredSnmpInterface, SnmpCredential, SnmpDiscoveryError
from app.services.snmp import (
ENT_PHYSICAL_NAME,
ENT_PHY_SENSOR_TYPE,
ENT_PHY_SENSOR_VALUE,
HR_PROCESSOR_LOAD,
HR_STORAGE_ALLOCATION_UNITS,
HR_STORAGE_DESCR,
HR_STORAGE_FIXED_DISK,
HR_STORAGE_RAM,
HR_STORAGE_SIZE,
HR_STORAGE_TYPE,
HR_STORAGE_USED,
SYS_DESCR,
SYS_NAME,
SYS_UPTIME,
DiscoveredSnmpDevice,
DiscoveredSnmpInterface,
SnmpCredential,
SnmpDiscoveryError,
discover_snmp_device,
)
def test_snmp_discovery_uses_profile_and_returns_friendly_results(client: TestClient, db_session, monkeypatch) -> None:
@@ -49,6 +69,9 @@ def test_snmp_discovery_uses_profile_and_returns_friendly_results(client: TestCl
body = response.json()
assert body["host"] == "192.0.2.10"
assert body["credential_profile_id"] == profile.id
assert body["profile_key"] == "generic_snmp"
assert body["profile_name"] == "Generic SNMP"
assert body["capabilities"] == {}
assert body["device_name"] == "core-sw-1"
assert body["description"] == "Core switch"
assert body["uptime_seconds"] == 12345
@@ -96,6 +119,66 @@ def test_snmp_discovery_uses_profile_and_returns_friendly_results(client: TestCl
assert "1.3.6" not in response.text
def test_snmp_profile_mapping_discovers_standard_health_items(monkeypatch) -> None:
class FakeClient:
def __init__(self, host: str, credential: SnmpCredential) -> None:
self.host = host
self.credential = credential
def get_many(self, _oids):
return {
SYS_NAME: "edge-router",
SYS_DESCR: "Linux edge-router net-snmp",
SYS_UPTIME: 10_000,
}
def walk(self, base_oid, max_items=128):
values = {
HR_PROCESSOR_LOAD: {(*HR_PROCESSOR_LOAD, 196608): 17},
HR_STORAGE_TYPE: {
(*HR_STORAGE_TYPE, 1): HR_STORAGE_RAM,
(*HR_STORAGE_TYPE, 31): HR_STORAGE_FIXED_DISK,
},
HR_STORAGE_DESCR: {
(*HR_STORAGE_DESCR, 1): "Physical memory",
(*HR_STORAGE_DESCR, 31): "/",
},
HR_STORAGE_ALLOCATION_UNITS: {
(*HR_STORAGE_ALLOCATION_UNITS, 1): 1024,
(*HR_STORAGE_ALLOCATION_UNITS, 31): 4096,
},
HR_STORAGE_SIZE: {
(*HR_STORAGE_SIZE, 1): 2048,
(*HR_STORAGE_SIZE, 31): 4096,
},
HR_STORAGE_USED: {
(*HR_STORAGE_USED, 1): 1024,
(*HR_STORAGE_USED, 31): 1024,
},
ENT_PHY_SENSOR_TYPE: {(*ENT_PHY_SENSOR_TYPE, 10): 8},
ENT_PHY_SENSOR_VALUE: {(*ENT_PHY_SENSOR_VALUE, 10): 310},
ENT_PHYSICAL_NAME: {(*ENT_PHYSICAL_NAME, 10): "Inlet"},
}
return values.get(base_oid, {})
monkeypatch.setattr("app.services.snmp.SnmpV2Client", FakeClient)
discovered = discover_snmp_device("192.0.2.20", SnmpCredential(community="private-community"))
assert discovered.profile_key == "net_snmp"
assert discovered.profile_name == "Net-SNMP Host Resources"
assert discovered.capabilities["cpu"] is True
assert discovered.capabilities["memory"] is True
assert discovered.capabilities["storage"] is True
assert discovered.capabilities["sensors"] is True
assert [(item.item_id, item.item_type, item.group, item.label, item.unit) for item in discovered.health_items] == [
("cpu.196608.load", "cpu_load", "Device Health", "CPU load", "%"),
("storage.1.memory", "memory_usage", "Device Health", "Memory used", "%"),
("storage.31.usage", "storage_usage", "Storage", "Disk / usage", "%"),
("sensor.10.value", "sensor_value", "Environmental", "Temperature Inlet", "C"),
]
def test_snmp_discovery_rejects_missing_profile(client: TestClient) -> None:
response = client.post("/discovery/snmp", json={"host": "192.0.2.10", "credential_profile_id": 999})
+39 -1
View File
@@ -3,7 +3,7 @@ from sqlalchemy import select
from sqlalchemy.orm import Session
from app.core.secrets import encrypt_secret
from app.models import AlertRule, Asset, Credential, Monitor
from app.models import AlertRule, Asset, Credential, Incident, Monitor
def test_create_website_monitor_creates_asset_and_alert_rule(client: TestClient, db_session: Session) -> None:
@@ -211,3 +211,41 @@ def test_create_snmp_monitors_rejects_missing_profile(client: TestClient, db_ses
)
assert response.status_code == 404
def test_delete_asset_deletes_attached_monitors_and_resolves_incidents(client: TestClient, db_session: Session) -> None:
asset = Asset(name="Router", asset_type="network_device", address="192.0.2.1", status="down", extra={})
monitor = Monitor(
asset=asset,
name="Router ping",
monitor_type="ping",
target="192.0.2.1",
config={},
interval_seconds=60,
status="down",
)
db_session.add_all([asset, monitor])
db_session.flush()
incident = Incident(
asset_id=asset.id,
monitor_id=monitor.id,
alert_rule_id=None,
title="Router ping is failing",
severity="warning",
status="open",
details={"last_message": "Ping failed"},
)
db_session.add(incident)
db_session.commit()
response = client.delete(f"/assets/{asset.id}")
assert response.status_code == 204
assert db_session.get(Asset, asset.id) is None
assert db_session.get(Monitor, monitor.id) is None
db_session.refresh(incident)
assert incident.status == "resolved"
assert incident.resolved_at is not None
assert incident.asset_id is None
assert incident.monitor_id is None
assert incident.details["recovery_message"] == "Asset was deleted"
+10 -7
View File
@@ -1,6 +1,6 @@
# Agent Handoff Notes
Last updated: 2026-05-24
Last updated: 2026-05-26
## Current Identity
@@ -38,16 +38,19 @@ OrbitalWard is a secure monitoring appliance focused on the v0.1 vertical slice:
- Guided SNMP device discovery with friendly device, interface, and monitorable item results.
- Asset setup supports creating, selecting, and deleting assets, plus attaching ping, TCP, website, and SNMP monitors without creating alert rules automatically.
- Worker collects configured SNMP uptime, interface status, traffic counter, error, and discard monitors.
- SNMP profile mapping exposes friendly capability flags plus CPU, memory, storage, and standard sensor monitorable items when supported.
- Worker collects configured SNMP CPU load, memory usage, storage usage, and standard sensor value/status monitors.
## Verification State
Recent Docker checks:
- `docker compose -f docker-compose.dev.yml exec -T backend python -m pytest tests`
- `docker compose -f docker-compose.dev.yml exec -T frontend npm run typecheck`
- `docker compose -f docker-compose.dev.yml exec -T frontend npm run build`
- `docker compose -f docker-compose.dev.yml exec -T worker python -m unittest discover -s tests`
- `docker compose -f docker-compose.dev.yml exec -T worker python -m compileall app`
- `docker compose -f docker-compose.dev.yml run --rm backend sh -c "pip install -e '.[test]' && python -m pytest tests/test_discovery.py tests/test_monitors.py"`
- `docker compose -f docker-compose.dev.yml run --rm frontend npm run typecheck`
- `docker compose -f docker-compose.dev.yml run --rm frontend npm run build`
- `docker compose -f docker-compose.dev.yml run --rm worker python -m unittest discover -s tests`
- `docker compose -f docker-compose.dev.yml run --rm worker python -m compileall app`
- `docker compose -f docker-compose.dev.yml run --rm backend sh -c "pip install -e '.[test]' >/dev/null && python -m compileall app"`
Earlier rename and monitor work also verified:
@@ -84,7 +87,7 @@ Issue source docs:
- `docs/progress.md`
- `docs/roadmap.md`
Current completed items include TLS expiry monitor support, HTTP/website checks, ping and TCP port checks, basic alert evaluation, alert rule editing UI, incident actions, webhook notification channels, SNMPv2c credential profiles, the SNMP device discovery API, guided SNMP discovery UI, asset-based monitor setup, and initial SNMP collection for uptime plus interface counters/status. The next recommended implementation work is SNMP profile mapping and expanded CPU, memory, storage, and sensor collection.
Current completed items include TLS expiry monitor support, HTTP/website checks, ping and TCP port checks, basic alert evaluation, alert rule editing UI, incident actions, webhook notification channels, SNMPv2c credential profiles, the SNMP device discovery API, guided SNMP discovery UI, asset-based monitor setup, initial SNMP collection for uptime plus interface counters/status, and SNMP profile mapping for standard CPU, memory, storage, and sensor health items. The next recommended implementation work is notification routing/policies or email/SMTP notifications.
## Guardrails
+6
View File
@@ -53,6 +53,12 @@
39. Create monitors from SNMP discovery selections
40. Add SNMP interface status and traffic collection
41. Add SNMP profile mapping for friendly metric names
42. Add vendor-private SNMP profile mappings from real device examples
43. Fix asset deletion cleanup for attached monitors
44. Show and graph SNMP interface throughput
45. Build asset detail UI for monitors, metrics, and context
46. Refine metric-only monitor status semantics
47. Rename product from OrbitalWard to OrbitWard
## Current Implementation Snapshot
+20 -12
View File
@@ -1,6 +1,6 @@
# OrbitalWard Progress
Last updated: 2026-05-24
Last updated: 2026-05-26
## Current State
@@ -99,6 +99,15 @@ Implemented initial SNMP collection slice:
- SNMP interface traffic checks collect inbound/outbound octet counters and store metrics.
- SNMP interface error checks collect inbound/outbound errors and discards and store metrics.
Implemented SNMP profile mapping slice:
- SNMP discovery selects an internal friendly profile such as Generic SNMP, Net-SNMP Host Resources, Cisco IOS SNMP, or MikroTik RouterOS SNMP from system identity details.
- Discovery reports capability flags for system identity, interfaces, CPU, memory, storage, and sensors.
- Standard HOST-RESOURCES CPU load, memory usage, and disk/storage usage are exposed as friendly monitorable items when supported.
- Standard ENTITY-SENSOR environmental readings are exposed as friendly monitorable items when supported.
- Worker collection supports CPU load, memory usage, storage usage, and sensor value/status monitors created from discovery.
- Raw SNMP implementation details remain internal to profiles and are not returned in the normal discovery UI/API response.
## Known Gaps
- General credential vault workflows beyond SNMP profiles are not complete.
@@ -106,7 +115,7 @@ Implemented initial SNMP collection slice:
- User management UI is not implemented.
- Role management is basic and needs full admin flows.
- Richer alert condition editing is not implemented yet.
- SNMP collection is implemented for uptime, interface status, traffic counters, errors, and discards, but CPU, memory, storage, sensor, and vendor-specific profile mappings are not implemented yet.
- SNMP collection now covers uptime, interface status, traffic counters, errors, discards, CPU load, memory usage, storage usage, and standard sensor value/status data. Vendor-private profile mappings beyond common standard MIBs are not implemented yet.
- Notification routing/policies are not implemented; all enabled webhook channels receive incident notifications.
- Email/SMTP notifications are not implemented yet.
- Graphing exists only as placeholders; metric visualization is not implemented.
@@ -116,16 +125,15 @@ Implemented initial SNMP collection slice:
## Recommended Next Work
1. Add SNMP profile mapping for friendly metric names across common vendors.
2. Add SNMP CPU, memory, storage, and sensor collection where supported by profiles.
3. Add notification policy/routing controls.
4. Add email/SMTP notification channel.
5. Add audit event writes for auth, monitor, credential, notification, and incident actions.
6. Build general credential vault workflows with masked secret handling.
7. Add user administration UI.
8. Add graphs for website response time and monitor status history.
9. Add richer alert condition editing.
10. Add frontend coverage for monitor, alert, and notification workflows.
1. Add notification policy/routing controls.
2. Add email/SMTP notification channel.
3. Add audit event writes for auth, monitor, credential, notification, and incident actions.
4. Build general credential vault workflows with masked secret handling.
5. Add user administration UI.
6. Add graphs for website response time and monitor status history.
7. Add richer alert condition editing.
8. Add vendor-private SNMP profile mappings for specific common devices after real device examples are available.
9. Add frontend coverage for monitor, alert, and notification workflows.
## Operational Notes
+17 -1
View File
@@ -397,8 +397,10 @@ export function AssetsPage({ token, assets, monitors, onChanged }: AssetsPagePro
<div className="space-y-3">
<div className="grid gap-3 rounded-md border border-line bg-[#0d131c] p-3 text-sm sm:grid-cols-3">
<SummaryItem label="Device" value={discoveryResult.device_name || discoveryResult.host} />
<SummaryItem label="Interfaces" value={String(discoveryResult.interfaces.length)} />
<SummaryItem label="Profile" value={discoveryResult.profile_name} />
<SummaryItem label="Selected" value={String(selectedItems.length)} />
<SummaryItem label="Interfaces" value={String(discoveryResult.interfaces.length)} />
<SummaryItem label="Capabilities" value={formatCapabilities(discoveryResult.capabilities)} />
</div>
<div className="max-h-[360px] overflow-y-auto rounded-md border border-line bg-[#0d131c]">
{groupedItems.map(({ group, items }) => (
@@ -556,3 +558,17 @@ function friendlyAssetType(value: string) {
function friendlyItemType(value: string) {
return value.replaceAll("_", " ");
}
function formatCapabilities(capabilities: Record<string, boolean>) {
const labels: Record<string, string> = {
interfaces: "interfaces",
cpu: "CPU",
memory: "memory",
storage: "storage",
sensors: "sensors",
};
const active = Object.entries(labels)
.filter(([key]) => capabilities[key])
.map(([, label]) => label);
return active.length ? active.join(", ") : "System only";
}
+16
View File
@@ -146,7 +146,9 @@ export function DiscoveryPage({ token }: DiscoveryPageProps) {
<div className="grid gap-4 p-4 md:grid-cols-3">
<SummaryItem label="Name" value={result.device_name || result.host} />
<SummaryItem label="Host" value={result.host} />
<SummaryItem label="Profile" value={result.profile_name} />
<SummaryItem label="Uptime" value={formatDuration(result.uptime_seconds)} />
<SummaryItem label="Capabilities" value={formatCapabilities(result.capabilities)} />
<div className="md:col-span-3">
<div className="text-xs uppercase text-slate-500">Description</div>
<div className="mt-1 text-sm text-slate-300">{result.description || "No description reported"}</div>
@@ -265,3 +267,17 @@ function formatSpeed(value?: number | null) {
function friendlyItemType(value: string) {
return value.replaceAll("_", " ");
}
function formatCapabilities(capabilities: Record<string, boolean>) {
const labels: Record<string, string> = {
interfaces: "interfaces",
cpu: "CPU",
memory: "memory",
storage: "storage",
sensors: "sensors",
};
const active = Object.entries(labels)
.filter(([key]) => capabilities[key])
.map(([, label]) => label);
return active.length ? active.join(", ") : "System only";
}
+3
View File
@@ -173,6 +173,9 @@ export interface SnmpDiscoveryItem {
export interface SnmpDiscoveryResult {
host: string;
credential_profile_id: number;
profile_key: string;
profile_name: string;
capabilities: Record<string, boolean>;
device_name?: string | null;
description?: string | null;
uptime_seconds?: number | null;
+125 -2
View File
@@ -23,6 +23,8 @@ class SnmpCheckConfig:
community: str
item_id: str
item_type: str
label: str | None = None
unit: str | None = None
port: int = 161
timeout_seconds: float = 5.0
retries: int = 1
@@ -47,6 +49,15 @@ IF_OUT_DISCARDS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 19)
IF_OUT_ERRORS = (1, 3, 6, 1, 2, 1, 2, 2, 1, 20)
IF_HC_IN_OCTETS = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 6)
IF_HC_OUT_OCTETS = (1, 3, 6, 1, 2, 1, 31, 1, 1, 1, 10)
HR_PROCESSOR_LOAD = (1, 3, 6, 1, 2, 1, 25, 3, 3, 1, 2)
HR_STORAGE_ALLOCATION_UNITS = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 4)
HR_STORAGE_SIZE = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 5)
HR_STORAGE_USED = (1, 3, 6, 1, 2, 1, 25, 2, 3, 1, 6)
ENT_PHY_SENSOR_TYPE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 1)
ENT_PHY_SENSOR_SCALE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 2)
ENT_PHY_SENSOR_PRECISION = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 3)
ENT_PHY_SENSOR_VALUE = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 4)
ENT_PHY_SENSOR_OPER_STATUS = (1, 3, 6, 1, 2, 1, 99, 1, 1, 1, 5)
STATUS_LABELS = {
1: "up",
@@ -58,6 +69,24 @@ STATUS_LABELS = {
7: "lower layer down",
}
SENSOR_STATUS_LABELS = {
1: "ok",
2: "unavailable",
3: "nonoperational",
}
SENSOR_TYPE_UNITS = {
3: "V",
4: "V",
5: "A",
6: "W",
7: "Hz",
8: "C",
9: "%",
10: "rpm",
11: "m3/min",
}
async def run_snmp_check(config: SnmpCheckConfig) -> SnmpCheckResult:
try:
@@ -83,6 +112,88 @@ def _run_snmp_check_sync(config: SnmpCheckConfig) -> SnmpCheckResult:
metrics=[SnmpMetricValue(name="uptime_seconds", value=float(uptime_seconds), unit="seconds")],
)
if config.item_type == "cpu_load":
processor_index = _item_index(config.item_id, "cpu")
if processor_index is None:
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP CPU item was not valid")
oid = _with_index(HR_PROCESSOR_LOAD, processor_index)
value = _int_value(client.get_many([oid]).get(oid))
response_time_ms = int((perf_counter() - started) * 1000)
if value is None:
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="CPU load was not reported")
return SnmpCheckResult(
status="up",
response_time_ms=response_time_ms,
message=f"CPU load is {value}%",
metrics=[SnmpMetricValue(name="load_percent", value=float(value), unit="%")],
)
if config.item_type in {"memory_usage", "storage_usage"}:
storage_index = _item_index(config.item_id, "storage")
if storage_index is None:
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP storage item was not valid")
oids = [
_with_index(HR_STORAGE_ALLOCATION_UNITS, storage_index),
_with_index(HR_STORAGE_SIZE, storage_index),
_with_index(HR_STORAGE_USED, storage_index),
]
values = client.get_many(oids)
response_time_ms = int((perf_counter() - started) * 1000)
allocation_unit = _int_value(values.get(oids[0]))
size = _int_value(values.get(oids[1]))
used = _int_value(values.get(oids[2]))
if not allocation_unit or not size or used is None:
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="Storage usage was not reported")
total_bytes = float(size * allocation_unit)
used_bytes = float(used * allocation_unit)
used_percent = (used / size) * 100
label = config.label or ("Memory" if config.item_type == "memory_usage" else "Storage")
return SnmpCheckResult(
status="up",
response_time_ms=response_time_ms,
message=f"{label} is {used_percent:.1f}% used",
metrics=[
SnmpMetricValue(name="used_percent", value=used_percent, unit="%"),
SnmpMetricValue(name="used_bytes", value=used_bytes, unit="bytes"),
SnmpMetricValue(name="total_bytes", value=total_bytes, unit="bytes"),
],
)
if config.item_type == "sensor_value":
sensor_index = _item_index(config.item_id, "sensor")
if sensor_index is None:
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP sensor item was not valid")
oids = [
_with_index(ENT_PHY_SENSOR_TYPE, sensor_index),
_with_index(ENT_PHY_SENSOR_SCALE, sensor_index),
_with_index(ENT_PHY_SENSOR_PRECISION, sensor_index),
_with_index(ENT_PHY_SENSOR_VALUE, sensor_index),
_with_index(ENT_PHY_SENSOR_OPER_STATUS, sensor_index),
]
values = client.get_many(oids)
response_time_ms = int((perf_counter() - started) * 1000)
sensor_type = _int_value(values.get(oids[0]))
scale = _int_value(values.get(oids[1]))
precision = _int_value(values.get(oids[2]))
raw_value = _int_value(values.get(oids[3]))
oper_status = _int_value(values.get(oids[4]))
if raw_value is None:
return SnmpCheckResult(status="down", response_time_ms=response_time_ms, message="Sensor value was not reported")
value = _scaled_sensor_value(raw_value, scale, precision)
unit = config.unit or SENSOR_TYPE_UNITS.get(sensor_type or 0)
status_label = SENSOR_STATUS_LABELS.get(oper_status or 1, f"status {oper_status}")
status = "up" if oper_status in {None, 1} else "down"
label = config.label or "Sensor"
return SnmpCheckResult(
status=status,
response_time_ms=response_time_ms,
message=f"{label} is {value:g}{unit or ''}; sensor status {status_label}",
metrics=[
SnmpMetricValue(name="sensor_value", value=value, unit=unit),
*([SnmpMetricValue(name="sensor_status", value=float(oper_status))] if oper_status is not None else []),
],
)
interface_index = _interface_index(config.item_id)
if interface_index is None:
return SnmpCheckResult(status="down", response_time_ms=0, message="SNMP interface item was not valid")
@@ -162,8 +273,12 @@ def _run_snmp_check_sync(config: SnmpCheckConfig) -> SnmpCheckResult:
def _interface_index(item_id: str) -> int | None:
return _item_index(item_id, "interface")
def _item_index(item_id: str, expected_prefix: str) -> int | None:
parts = item_id.split(".")
if len(parts) < 3 or parts[0] != "interface":
if len(parts) < 3 or parts[0] != expected_prefix:
return None
try:
return int(parts[1])
@@ -181,6 +296,12 @@ def _int_value(value: Any) -> int | None:
return None
def _scaled_sensor_value(raw_value: int, scale: int | None, precision: int | None) -> float:
scale_multiplier = 10 ** ((scale or 9) - 9)
precision_divisor = 10 ** (precision or 0)
return float(raw_value * scale_multiplier / precision_divisor)
class SnmpV2Client:
def __init__(self, host: str, community: str, port: int, timeout_seconds: float, retries: int) -> None:
self.host = host
@@ -343,7 +464,9 @@ def _decode_oid(value: bytes) -> tuple[int, ...]:
def _decode_value(tag: int, value: bytes) -> Any:
if tag in {0x02, 0x41, 0x42, 0x43, 0x46}:
if tag == 0x02:
return _decode_integer(value)
if tag in {0x41, 0x42, 0x43, 0x46}:
return int.from_bytes(value, "big")
if tag == 0x04:
return value.decode("utf-8", errors="replace")
+2
View File
@@ -152,6 +152,8 @@ class Scheduler:
community=community,
item_id=str(monitor.config.get("item_id") or ""),
item_type=str(monitor.config.get("item_type") or ""),
label=monitor.config.get("label") if isinstance(monitor.config.get("label"), str) else None,
unit=monitor.config.get("unit") if isinstance(monitor.config.get("unit"), str) else None,
port=int(extra.get("port") or 161),
timeout_seconds=float(extra.get("timeout_seconds") or 5),
retries=int(extra.get("retries") or 1),
+95
View File
@@ -10,6 +10,15 @@ from app.collectors.snmp import (
IF_OPER_STATUS,
IF_OUT_DISCARDS,
IF_OUT_ERRORS,
ENT_PHY_SENSOR_OPER_STATUS,
ENT_PHY_SENSOR_PRECISION,
ENT_PHY_SENSOR_SCALE,
ENT_PHY_SENSOR_TYPE,
ENT_PHY_SENSOR_VALUE,
HR_PROCESSOR_LOAD,
HR_STORAGE_ALLOCATION_UNITS,
HR_STORAGE_SIZE,
HR_STORAGE_USED,
SYS_UPTIME,
SnmpCheckConfig,
_with_index,
@@ -37,6 +46,92 @@ class SnmpCollectorTestCase(unittest.IsolatedAsyncioTestCase):
("uptime_seconds", 1234.0, "seconds")
]
async def test_collects_cpu_load(self) -> None:
oid = _with_index(HR_PROCESSOR_LOAD, 196608)
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {oid: 42}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="cpu.196608.load",
item_type="cpu_load",
)
)
assert result.status == "up"
assert result.message == "CPU load is 42%"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("load_percent", 42.0, "%")
]
async def test_collects_storage_usage(self) -> None:
oids = [
_with_index(HR_STORAGE_ALLOCATION_UNITS, 31),
_with_index(HR_STORAGE_SIZE, 31),
_with_index(HR_STORAGE_USED, 31),
]
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {
oids[0]: 4096,
oids[1]: 100,
oids[2]: 25,
}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="storage.31.usage",
item_type="storage_usage",
label="Disk / usage",
)
)
assert result.status == "up"
assert result.message == "Disk / usage is 25.0% used"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("used_percent", 25.0, "%"),
("used_bytes", 102400.0, "bytes"),
("total_bytes", 409600.0, "bytes"),
]
async def test_collects_sensor_value_and_status(self) -> None:
oids = [
_with_index(ENT_PHY_SENSOR_TYPE, 10),
_with_index(ENT_PHY_SENSOR_SCALE, 10),
_with_index(ENT_PHY_SENSOR_PRECISION, 10),
_with_index(ENT_PHY_SENSOR_VALUE, 10),
_with_index(ENT_PHY_SENSOR_OPER_STATUS, 10),
]
with patch("app.collectors.snmp.SnmpV2Client") as client_class:
client_class.return_value.get_many.return_value = {
oids[0]: 8,
oids[1]: 9,
oids[2]: 1,
oids[3]: 310,
oids[4]: 1,
}
result = await run_snmp_check(
SnmpCheckConfig(
host="192.0.2.10",
community="private-community",
item_id="sensor.10.value",
item_type="sensor_value",
label="Temperature Inlet",
unit="C",
)
)
assert result.status == "up"
assert result.message == "Temperature Inlet is 31C; sensor status ok"
assert [(metric.name, metric.value, metric.unit) for metric in result.metrics] == [
("sensor_value", 31.0, "C"),
("sensor_status", 1.0, None),
]
async def test_collects_interface_status(self) -> None:
admin_oid = _with_index(IF_ADMIN_STATUS, 7)
oper_oid = _with_index(IF_OPER_STATUS, 7)