diff --git a/backend/app/api/monitors.py b/backend/app/api/monitors.py index d55e3df..e889525 100644 --- a/backend/app/api/monitors.py +++ b/backend/app/api/monitors.py @@ -7,7 +7,7 @@ from sqlalchemy.orm import Session from app.auth.dependencies import get_current_user, require_role from app.db.session import get_db from app.models import AlertRule, Asset, CheckResult, Incident, Monitor, User -from app.schemas.core import CheckResultRead, MonitorCreate, MonitorRead, MonitorUpdate, WebsiteMonitorCreate +from app.schemas.core import CheckResultRead, MonitorCreate, MonitorRead, MonitorUpdate, PingMonitorCreate, TcpMonitorCreate, WebsiteMonitorCreate router = APIRouter(prefix="/monitors", tags=["monitors"]) @@ -80,6 +80,93 @@ def create_website_monitor( return monitor +@router.post("/ping", response_model=MonitorRead) +def create_ping_monitor( + payload: PingMonitorCreate, + _: User = Depends(require_role("admin")), + db: Session = Depends(get_db), +) -> Monitor: + asset_id: int | None = None + if payload.create_asset: + asset = Asset(name=payload.name, asset_type="host", address=payload.host, status="unknown", extra={}) + db.add(asset) + db.flush() + asset_id = asset.id + + monitor = Monitor( + asset_id=asset_id, + name=payload.name, + monitor_type="ping", + target=payload.host, + config={"timeout_seconds": payload.timeout_seconds}, + interval_seconds=payload.interval_seconds, + status="unknown", + ) + db.add(monitor) + db.flush() + + if payload.alert_enabled: + db.add( + AlertRule( + monitor_id=monitor.id, + name=f"{payload.name} ping failure", + severity=payload.alert_severity, + condition={"type": "status_not_up"}, + failure_threshold=payload.failure_threshold, + cooldown_seconds=300, + is_enabled=True, + ) + ) + + db.commit() + db.refresh(monitor) + return monitor + + +@router.post("/tcp", response_model=MonitorRead) +def create_tcp_monitor( + payload: TcpMonitorCreate, + _: User = Depends(require_role("admin")), + db: Session = Depends(get_db), +) -> Monitor: + asset_id: int | None = None + target = f"{payload.host}:{payload.port}" + if payload.create_asset: + asset = Asset(name=payload.name, asset_type="tcp_service", address=target, status="unknown", extra={}) + db.add(asset) + db.flush() + asset_id = asset.id + + monitor = Monitor( + asset_id=asset_id, + name=payload.name, + monitor_type="tcp", + target=target, + config={"host": payload.host, "port": payload.port, "timeout_seconds": payload.timeout_seconds}, + interval_seconds=payload.interval_seconds, + status="unknown", + ) + db.add(monitor) + db.flush() + + if payload.alert_enabled: + db.add( + AlertRule( + monitor_id=monitor.id, + name=f"{payload.name} TCP connection failure", + severity=payload.alert_severity, + condition={"type": "status_not_up"}, + failure_threshold=payload.failure_threshold, + cooldown_seconds=300, + is_enabled=True, + ) + ) + + db.commit() + db.refresh(monitor) + return monitor + + @router.get("/{monitor_id}", response_model=MonitorRead) def get_monitor(monitor_id: int, _: User = Depends(get_current_user), db: Session = Depends(get_db)) -> Monitor: monitor = db.get(Monitor, monitor_id) @@ -110,7 +197,7 @@ def update_monitor( @router.delete("/{monitor_id}", status_code=204) def delete_monitor( monitor_id: int, - cleanup_orphan_website_asset: bool = True, + cleanup_orphan_asset: bool = True, _: User = Depends(require_role("admin")), db: Session = Depends(get_db), ) -> None: @@ -129,10 +216,10 @@ def delete_monitor( db.delete(monitor) db.flush() - if cleanup_orphan_website_asset and asset_id is not None: + if cleanup_orphan_asset and asset_id is not None: remaining = db.scalar(select(func.count(Monitor.id)).where(Monitor.asset_id == asset_id)) asset = db.get(Asset, asset_id) - if remaining == 0 and asset is not None and asset.asset_type == "website": + if remaining == 0 and asset is not None and asset.asset_type in {"website", "host", "tcp_service"}: db.delete(asset) db.commit() diff --git a/backend/app/schemas/core.py b/backend/app/schemas/core.py index a71fbaa..1d22681 100644 --- a/backend/app/schemas/core.py +++ b/backend/app/schemas/core.py @@ -73,6 +73,29 @@ class WebsiteMonitorCreate(BaseModel): failure_threshold: int = Field(default=3, ge=1, le=20) +class PingMonitorCreate(BaseModel): + name: str = Field(min_length=1, max_length=160) + host: str = Field(min_length=1, max_length=255) + timeout_seconds: int = Field(default=5, ge=1, le=60) + interval_seconds: int = Field(default=60, ge=10) + create_asset: bool = True + alert_enabled: bool = True + alert_severity: str = "warning" + failure_threshold: int = Field(default=3, ge=1, le=20) + + +class TcpMonitorCreate(BaseModel): + name: str = Field(min_length=1, max_length=160) + host: str = Field(min_length=1, max_length=255) + port: int = Field(ge=1, le=65535) + timeout_seconds: int = Field(default=5, ge=1, le=60) + interval_seconds: int = Field(default=60, ge=10) + create_asset: bool = True + alert_enabled: bool = True + alert_severity: str = "warning" + failure_threshold: int = Field(default=3, ge=1, le=20) + + class CheckResultRead(BaseModel): id: int monitor_id: int diff --git a/docs/agent-handoff.md b/docs/agent-handoff.md index 4a4c291..99f2399 100644 --- a/docs/agent-handoff.md +++ b/docs/agent-handoff.md @@ -30,6 +30,7 @@ OrbitalWard is a secure monitoring appliance focused on the v0.1 vertical slice: - Website monitor create/edit/delete flow. - HTTP status and expected-text checks. - Optional TLS certificate expiry checks for HTTPS monitors. +- Ping and TCP port monitor create/edit/delete flow. - Alert rules, incident opening/resolution, acknowledge, silence, and webhook notifications. - Generic webhook, Mattermost, and Zoom Team Chat notification channels. - Saved webhook URLs encrypted at rest and not returned to the UI. @@ -43,6 +44,8 @@ After the rename and TLS expiry work, these checks passed in Docker: - `docker compose -f docker-compose.dev.yml exec -T frontend npm run typecheck` - `docker compose -f docker-compose.dev.yml exec -T worker python -m compileall app` - Backend health returned `{"status":"ok","service":"orbitalward-backend"}`. +- Direct worker probes for TCP and ICMP ping checks passed inside the Docker network. +- API probe created and deleted one ping monitor and one TCP monitor successfully. The final Compose project uses `orbitalward-*` containers, images, network, and volumes. @@ -72,7 +75,7 @@ Issue source docs: - `docs/progress.md` - `docs/roadmap.md` -Current completed items include TLS expiry monitor support, HTTP/website checks, basic alert evaluation, incident actions, and webhook notification channels. Next recommended work starts with ping and TCP port monitors. +Current completed items include TLS expiry monitor support, HTTP/website checks, ping and TCP port checks, basic alert evaluation, incident actions, and webhook notification channels. The next recommended implementation issue is alert rule editing UI. ## Guardrails diff --git a/docs/gitea-issues.md b/docs/gitea-issues.md index c880ee3..877f569 100644 --- a/docs/gitea-issues.md +++ b/docs/gitea-issues.md @@ -57,6 +57,7 @@ Completed in the initial scaffold: - React frontend skeleton with authenticated layout. - Worker skeleton with working HTTP website monitor polling. - Website monitor create/edit/delete UI. +- Ping and TCP port monitor collectors and UI. - Basic alert evaluation, incidents, acknowledge, and silence actions. - Generic webhook, Mattermost, and Zoom Team Chat channel foundations. - Encrypted webhook URL storage. diff --git a/docs/progress.md b/docs/progress.md index f99ccd6..5080976 100644 --- a/docs/progress.md +++ b/docs/progress.md @@ -24,6 +24,13 @@ Implemented website-monitor slice: - Incidents can be acknowledged and silenced from the UI. - Deleting a monitor resolves any open incidents tied to that monitor. +Implemented network-monitor slice: + +- Create, edit, delete ping and TCP port monitors from the UI. +- Worker performs ICMP ping checks and TCP connection checks. +- Ping and TCP monitors use the same alert rule, incident, recovery, and notification flow as website monitors. +- Dashboard monitor status includes website, ping, and TCP monitors. + Implemented notification slice: - Create, edit, test, and delete notification channels from the UI. @@ -43,7 +50,6 @@ Implemented notification slice: - Alert rule editing UI is not implemented. - Notification routing/policies are not implemented; all enabled webhook channels receive incident notifications. - Email/SMTP notifications are not implemented yet. -- Ping and TCP checks are not implemented yet. - Graphing exists only as placeholders; metric visualization is not implemented. - Worker scheduling is simple polling, not a Redis queue yet. - Tests are still minimal and need meaningful backend/worker/frontend coverage. @@ -51,15 +57,14 @@ Implemented notification slice: ## Recommended Next Work -1. Add ping and TCP port monitors. -2. Add alert rule editing UI and richer alert conditions. -3. Add notification policy/routing controls. -4. Add email/SMTP notification channel. -5. Add audit event writes for auth, monitor, credential, notification, and incident actions. -6. Build credential vault UI with masked secret handling. -7. Add user administration UI. -8. Add graphs for website response time and monitor status history. -9. Add backend and worker tests for the website-monitor and notification flows. +1. Add alert rule editing UI and richer alert conditions. +2. Add notification policy/routing controls. +3. Add email/SMTP notification channel. +4. Add audit event writes for auth, monitor, credential, notification, and incident actions. +5. Build credential vault UI with masked secret handling. +6. Add user administration UI. +7. Add graphs for website response time and monitor status history. +8. Add backend and worker tests for the website-monitor and notification flows. ## Operational Notes diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index bf55c67..dcf99a9 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -6,6 +6,8 @@ import type { NotificationChannel, NotificationChannelCreate, NotificationChannelUpdate, + PingMonitorCreate, + TcpMonitorCreate, User, WebsiteMonitorCreate, } from "../types/api"; @@ -61,6 +63,16 @@ export const api = { method: "POST", body: JSON.stringify(payload), }), + createPingMonitor: (token: string, payload: PingMonitorCreate) => + request("/monitors/ping", token, { + method: "POST", + body: JSON.stringify(payload), + }), + createTcpMonitor: (token: string, payload: TcpMonitorCreate) => + request("/monitors/tcp", token, { + method: "POST", + body: JSON.stringify(payload), + }), updateMonitor: (token: string, monitorId: number, payload: MonitorUpdate) => request(`/monitors/${monitorId}`, token, { method: "PATCH", diff --git a/frontend/src/app/App.tsx b/frontend/src/app/App.tsx index e88dce2..2fbabb8 100644 --- a/frontend/src/app/App.tsx +++ b/frontend/src/app/App.tsx @@ -7,6 +7,7 @@ import { AlertsPage } from "../pages/AlertsPage"; import { DashboardPage } from "../pages/DashboardPage"; import { ListPage } from "../pages/ListPage"; import { LoginPage } from "../pages/LoginPage"; +import { NetworkChecksPage } from "../pages/NetworkChecksPage"; import { NotificationsPage } from "../pages/NotificationsPage"; import { WebsitesPage } from "../pages/WebsitesPage"; import type { Asset, Incident, Monitor } from "../types/api"; @@ -79,6 +80,9 @@ export function App() { {page === "websites" ? ( ) : null} + {page === "network-checks" ? ( + + ) : null} {page === "alerts" ? ( ) : null} diff --git a/frontend/src/components/Shell.tsx b/frontend/src/components/Shell.tsx index f098fa3..40c9d60 100644 --- a/frontend/src/components/Shell.tsx +++ b/frontend/src/components/Shell.tsx @@ -7,6 +7,7 @@ import { KeyRound, LogOut, Network, + PlugZap, Radar, Settings, Shield, @@ -20,6 +21,7 @@ const navigation = [ { id: "dashboard", label: "Dashboard", icon: Gauge }, { id: "assets", label: "Assets", icon: Network }, { id: "websites", label: "Websites", icon: Globe }, + { id: "network-checks", label: "Network Checks", icon: PlugZap }, { id: "alerts", label: "Alerts", icon: Bell }, { id: "discovery", label: "Discovery", icon: Radar }, { id: "graphs", label: "Graphs", icon: Activity }, diff --git a/frontend/src/pages/DashboardPage.tsx b/frontend/src/pages/DashboardPage.tsx index 7cc6ea0..b3ac76c 100644 --- a/frontend/src/pages/DashboardPage.tsx +++ b/frontend/src/pages/DashboardPage.tsx @@ -11,7 +11,6 @@ interface DashboardPageProps { export function DashboardPage({ assets, monitors, incidents }: DashboardPageProps) { const attentionMonitors = monitors.filter((monitor) => monitor.status !== "up" && monitor.status !== "unknown").length; const activeIncidents = incidents.filter((incident) => incident.status === "open").length; - const websites = monitors.filter((monitor) => monitor.monitor_type === "http"); return (
@@ -26,29 +25,30 @@ export function DashboardPage({ assets, monitors, incidents }: DashboardPageProp - +
-

Website Monitors

+

Monitor Status

{attentionMonitors} need attention
- {websites.length ? ( - websites.map((monitor) => ( -
+ {monitors.length ? ( + monitors.map((monitor) => ( +
{monitor.name}
{monitor.target}
+
{monitor.monitor_type}
{monitor.interval_seconds}s interval
)) ) : ( -
No website monitors yet.
+
No monitors yet.
)}
diff --git a/frontend/src/pages/NetworkChecksPage.tsx b/frontend/src/pages/NetworkChecksPage.tsx new file mode 100644 index 0000000..dc0a0d5 --- /dev/null +++ b/frontend/src/pages/NetworkChecksPage.tsx @@ -0,0 +1,250 @@ +import { FormEvent, useState } from "react"; +import { Activity, Edit3, PlugZap, Plus, RefreshCw, Trash2, X } from "lucide-react"; + +import { api } from "../api/client"; +import { Button } from "../components/Button"; +import type { Monitor } from "../types/api"; + +interface NetworkChecksPageProps { + token: string; + monitors: Monitor[]; + onChanged: () => Promise; +} + +type NetworkCheckType = "ping" | "tcp"; + +export function NetworkChecksPage({ token, monitors, onChanged }: NetworkChecksPageProps) { + const networkChecks = monitors.filter((monitor) => monitor.monitor_type === "ping" || monitor.monitor_type === "tcp"); + const [checkType, setCheckType] = useState("ping"); + const [name, setName] = useState(""); + const [host, setHost] = useState(""); + const [port, setPort] = useState(443); + const [timeoutSeconds, setTimeoutSeconds] = useState(5); + const [intervalSeconds, setIntervalSeconds] = useState(60); + const [failureThreshold, setFailureThreshold] = useState(3); + const [alertEnabled, setAlertEnabled] = useState(true); + const [editingMonitorId, setEditingMonitorId] = useState(null); + const [submitting, setSubmitting] = useState(false); + const [deletingId, setDeletingId] = useState(null); + const [error, setError] = useState(null); + + async function handleSubmit(event: FormEvent) { + event.preventDefault(); + setSubmitting(true); + setError(null); + try { + if (editingMonitorId) { + await api.updateMonitor(token, editingMonitorId, { + name, + target: checkType === "tcp" ? `${host}:${port}` : host, + interval_seconds: intervalSeconds, + config: checkType === "tcp" ? { host, port, timeout_seconds: timeoutSeconds } : { timeout_seconds: timeoutSeconds }, + }); + } else if (checkType === "tcp") { + await api.createTcpMonitor(token, { + name, + host, + port, + timeout_seconds: timeoutSeconds, + interval_seconds: intervalSeconds, + create_asset: true, + alert_enabled: alertEnabled, + alert_severity: "warning", + failure_threshold: failureThreshold, + }); + } else { + await api.createPingMonitor(token, { + name, + host, + timeout_seconds: timeoutSeconds, + interval_seconds: intervalSeconds, + create_asset: true, + alert_enabled: alertEnabled, + alert_severity: "warning", + failure_threshold: failureThreshold, + }); + } + resetForm(); + await onChanged(); + } catch (err) { + setError(err instanceof Error ? err.message : "Could not save network check"); + } finally { + setSubmitting(false); + } + } + + function startEdit(monitor: Monitor) { + const nextType = monitor.monitor_type === "tcp" ? "tcp" : "ping"; + setEditingMonitorId(monitor.id); + setCheckType(nextType); + setName(monitor.name); + setHost(nextType === "tcp" ? String(monitor.config?.host ?? monitor.target.split(":")[0] ?? "") : monitor.target); + setPort(Number(monitor.config?.port ?? 443)); + setTimeoutSeconds(Number(monitor.config?.timeout_seconds ?? 5)); + setIntervalSeconds(monitor.interval_seconds); + setAlertEnabled(true); + setFailureThreshold(3); + setError(null); + } + + function resetForm() { + setEditingMonitorId(null); + setCheckType("ping"); + setName(""); + setHost(""); + setPort(443); + setTimeoutSeconds(5); + setIntervalSeconds(60); + setFailureThreshold(3); + setAlertEnabled(true); + } + + async function deleteMonitor(monitorId: number) { + setDeletingId(monitorId); + setError(null); + try { + await api.deleteMonitor(token, monitorId); + await onChanged(); + } catch (err) { + setError(err instanceof Error ? err.message : "Could not delete network check"); + } finally { + setDeletingId(null); + } + } + + return ( +
+
+
+

Network Checks

+

ICMP ping checks and TCP port availability checks.

+
+ +
+ +
+
+
+ {checkType === "tcp" ? : } +

{editingMonitorId ? "Edit Network Check" : "Add Network Check"}

+
+ +
+ + +
+ + + + + +
+ {checkType === "tcp" ? ( + + ) : null} + + +
+ + {!editingMonitorId ? ( +
+ Alert on repeated failures + setAlertEnabled(event.target.checked)} type="checkbox" /> +
+ ) : null} + + {!editingMonitorId ? ( + + ) : null} + + {error ?
{error}
: null} + +
+ {editingMonitorId ? ( + + ) : null} + +
+
+ +
+
+

Configured Network Checks

+
+
+ {networkChecks.length ? ( + networkChecks.map((monitor) => ( +
+
+
{monitor.name}
+
{monitor.target}
+
+ {monitor.monitor_type} + +
+
{monitor.last_checked_at ? new Date(monitor.last_checked_at).toLocaleTimeString() : "Not checked"}
+ + +
+
+ )) + ) : ( +
No network checks yet.
+ )} +
+
+
+
+ ); +} + +function modeClass(active: boolean) { + return `h-8 rounded-md text-sm transition disabled:opacity-70 ${active ? "bg-slate-800 text-white" : "text-slate-400 hover:bg-slate-900 hover:text-white"}`; +} + +function Status({ status }: { status: string }) { + const classes = + status === "up" + ? "border-teal-500/40 bg-teal-950/40 text-teal-200" + : status === "down" + ? "border-red-500/40 bg-red-950/40 text-red-200" + : status === "warning" + ? "border-amber-500/40 bg-amber-950/40 text-amber-200" + : "border-slate-600 bg-slate-900 text-slate-300"; + return {status}; +} diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index bec6ab3..174f551 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -94,3 +94,26 @@ export interface WebsiteMonitorCreate { alert_severity: string; failure_threshold: number; } + +export interface PingMonitorCreate { + name: string; + host: string; + timeout_seconds: number; + interval_seconds: number; + create_asset: boolean; + alert_enabled: boolean; + alert_severity: string; + failure_threshold: number; +} + +export interface TcpMonitorCreate { + name: string; + host: string; + port: number; + timeout_seconds: number; + interval_seconds: number; + create_asset: boolean; + alert_enabled: boolean; + alert_severity: string; + failure_threshold: number; +} diff --git a/worker/app/collectors/network.py b/worker/app/collectors/network.py new file mode 100644 index 0000000..f51948d --- /dev/null +++ b/worker/app/collectors/network.py @@ -0,0 +1,108 @@ +import asyncio +import os +import socket +import struct +from dataclasses import dataclass +from time import perf_counter + + +@dataclass(frozen=True) +class NetworkCheckResult: + status: str + response_time_ms: int | None + message: str + + +@dataclass(frozen=True) +class PingCheckConfig: + host: str + timeout_seconds: float = 5.0 + + +@dataclass(frozen=True) +class TcpCheckConfig: + host: str + port: int + timeout_seconds: float = 5.0 + + +async def run_ping_check(config: PingCheckConfig) -> NetworkCheckResult: + try: + response_time_ms = await asyncio.to_thread(_run_ping_check_sync, config.host, config.timeout_seconds) + except PermissionError: + return NetworkCheckResult(status="down", response_time_ms=None, message="ICMP ping requires raw socket permission") + except TimeoutError: + return NetworkCheckResult(status="down", response_time_ms=None, message="Ping timed out") + except OSError as exc: + return NetworkCheckResult(status="down", response_time_ms=None, message=f"Ping failed: {exc}") + return NetworkCheckResult(status="up", response_time_ms=response_time_ms, message="Ping check passed") + + +async def run_tcp_check(config: TcpCheckConfig) -> NetworkCheckResult: + started = perf_counter() + try: + connection = asyncio.open_connection(config.host, config.port) + reader, writer = await asyncio.wait_for(connection, timeout=config.timeout_seconds) + writer.close() + await writer.wait_closed() + reader.feed_eof() + except (TimeoutError, OSError) as exc: + return NetworkCheckResult(status="down", response_time_ms=None, message=f"TCP connection failed: {exc}") + + response_time_ms = int((perf_counter() - started) * 1000) + return NetworkCheckResult(status="up", response_time_ms=response_time_ms, message="TCP connection succeeded") + + +def _run_ping_check_sync(host: str, timeout_seconds: float) -> int: + address = _resolve_ipv4(host) + identifier = os.getpid() & 0xFFFF + sequence = 1 + packet = _build_icmp_echo_request(identifier, sequence) + + with socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP) as sock: + sock.settimeout(timeout_seconds) + started = perf_counter() + sock.sendto(packet, (address, 0)) + + while True: + response, _ = sock.recvfrom(1024) + if _matches_icmp_echo_reply(response, identifier, sequence): + return int((perf_counter() - started) * 1000) + + +def _resolve_ipv4(host: str) -> str: + results = socket.getaddrinfo(host, None, socket.AF_INET, socket.SOCK_RAW, socket.IPPROTO_ICMP) + if not results: + raise OSError("Could not resolve an IPv4 address") + return str(results[0][4][0]) + + +def _build_icmp_echo_request(identifier: int, sequence: int) -> bytes: + payload = b"OrbitalWard ping" + header = struct.pack("!BBHHH", 8, 0, 0, identifier, sequence) + checksum = _icmp_checksum(header + payload) + header = struct.pack("!BBHHH", 8, 0, checksum, identifier, sequence) + return header + payload + + +def _matches_icmp_echo_reply(response: bytes, identifier: int, sequence: int) -> bool: + if len(response) < 28: + return False + ip_header_length = (response[0] & 0x0F) * 4 + icmp_header = response[ip_header_length : ip_header_length + 8] + if len(icmp_header) < 8: + return False + icmp_type, _, _, reply_identifier, reply_sequence = struct.unpack("!BBHHH", icmp_header) + return icmp_type == 0 and reply_identifier == identifier and reply_sequence == sequence + + +def _icmp_checksum(data: bytes) -> int: + if len(data) % 2: + data += b"\x00" + + checksum = 0 + for index in range(0, len(data), 2): + checksum += (data[index] << 8) + data[index + 1] + checksum = (checksum & 0xFFFF) + (checksum >> 16) + + return ~checksum & 0xFFFF diff --git a/worker/app/scheduler.py b/worker/app/scheduler.py index 47d2305..4a66380 100644 --- a/worker/app/scheduler.py +++ b/worker/app/scheduler.py @@ -8,6 +8,7 @@ from sqlalchemy.orm import Session import httpx from app.collectors.website import WebsiteCheckConfig, run_website_check +from app.collectors.network import PingCheckConfig, TcpCheckConfig, run_ping_check, run_tcp_check from app.config import settings from app.db import session_scope from app.models import AlertRule, Asset, CheckResult, Incident, Monitor, NotificationChannel @@ -33,7 +34,7 @@ class Scheduler: async def tick(self) -> None: try: with session_scope() as db: - due_monitors = self._load_due_website_monitors(db) + due_monitors = self._load_due_monitors(db) for monitor in due_monitors: await self._run_monitor(db, monitor) db.commit() @@ -43,9 +44,11 @@ class Scheduler: def stop(self) -> None: self._stopped.set() - def _load_due_website_monitors(self, db: Session) -> list[Monitor]: + def _load_due_monitors(self, db: Session) -> list[Monitor]: now = datetime.now(UTC) - monitors = db.scalars(select(Monitor).where(Monitor.monitor_type == "http").order_by(Monitor.id).limit(50)).all() + monitors = db.scalars( + select(Monitor).where(Monitor.monitor_type.in_(["http", "ping", "tcp"])).order_by(Monitor.id).limit(50) + ).all() due: list[Monitor] = [] for monitor in monitors: if monitor.last_checked_at is None: @@ -57,16 +60,7 @@ class Scheduler: return due async def _run_monitor(self, db: Session, monitor: Monitor) -> None: - config = WebsiteCheckConfig( - url=monitor.target, - expected_status=int(monitor.config.get("expected_status", 200)), - expected_text=monitor.config.get("expected_text") or None, - unexpected_text=monitor.config.get("unexpected_text") or None, - timeout_seconds=float(monitor.config.get("timeout_seconds", 10)), - check_tls_expiry=bool(monitor.config.get("check_tls_expiry", False)), - tls_warning_days=int(monitor.config.get("tls_warning_days", 30)), - ) - result = await run_website_check(config) + result = await self._collect_monitor_result(monitor) now = datetime.now(UTC) monitor.status = result.status @@ -93,6 +87,36 @@ class Scheduler: logger.info("Checked %s: %s (%s ms)", monitor.name, result.status, result.response_time_ms) + async def _collect_monitor_result(self, monitor: Monitor): + if monitor.monitor_type == "http": + config = WebsiteCheckConfig( + url=monitor.target, + expected_status=int(monitor.config.get("expected_status", 200)), + expected_text=monitor.config.get("expected_text") or None, + unexpected_text=monitor.config.get("unexpected_text") or None, + timeout_seconds=float(monitor.config.get("timeout_seconds", 10)), + check_tls_expiry=bool(monitor.config.get("check_tls_expiry", False)), + tls_warning_days=int(monitor.config.get("tls_warning_days", 30)), + ) + return await run_website_check(config) + + if monitor.monitor_type == "ping": + config = PingCheckConfig( + host=monitor.target, + timeout_seconds=float(monitor.config.get("timeout_seconds", 5)), + ) + return await run_ping_check(config) + + if monitor.monitor_type == "tcp": + config = TcpCheckConfig( + host=str(monitor.config.get("host") or monitor.target), + port=int(monitor.config.get("port")), + timeout_seconds=float(monitor.config.get("timeout_seconds", 5)), + ) + return await run_tcp_check(config) + + raise ValueError(f"Unsupported monitor type: {monitor.monitor_type}") + async def _evaluate_rule(self, db: Session, monitor: Monitor, rule: AlertRule, now: datetime, message: str) -> None: open_incident = db.scalar( select(Incident).where(