Add ping and TCP monitor types

Adds ping and TCP monitor creation APIs, worker collectors, network checks UI, dashboard monitor status support, and progress documentation.
This commit is contained in:
Keith Smith
2026-05-23 15:01:57 -06:00
parent 597ff18c2a
commit 16932957b2
13 changed files with 577 additions and 35 deletions
+91 -4
View File
@@ -7,7 +7,7 @@ from sqlalchemy.orm import Session
from app.auth.dependencies import get_current_user, require_role
from app.db.session import get_db
from app.models import AlertRule, Asset, CheckResult, Incident, Monitor, User
from app.schemas.core import CheckResultRead, MonitorCreate, MonitorRead, MonitorUpdate, WebsiteMonitorCreate
from app.schemas.core import CheckResultRead, MonitorCreate, MonitorRead, MonitorUpdate, PingMonitorCreate, TcpMonitorCreate, WebsiteMonitorCreate
router = APIRouter(prefix="/monitors", tags=["monitors"])
@@ -80,6 +80,93 @@ def create_website_monitor(
return monitor
@router.post("/ping", response_model=MonitorRead)
def create_ping_monitor(
payload: PingMonitorCreate,
_: User = Depends(require_role("admin")),
db: Session = Depends(get_db),
) -> Monitor:
asset_id: int | None = None
if payload.create_asset:
asset = Asset(name=payload.name, asset_type="host", address=payload.host, status="unknown", extra={})
db.add(asset)
db.flush()
asset_id = asset.id
monitor = Monitor(
asset_id=asset_id,
name=payload.name,
monitor_type="ping",
target=payload.host,
config={"timeout_seconds": payload.timeout_seconds},
interval_seconds=payload.interval_seconds,
status="unknown",
)
db.add(monitor)
db.flush()
if payload.alert_enabled:
db.add(
AlertRule(
monitor_id=monitor.id,
name=f"{payload.name} ping failure",
severity=payload.alert_severity,
condition={"type": "status_not_up"},
failure_threshold=payload.failure_threshold,
cooldown_seconds=300,
is_enabled=True,
)
)
db.commit()
db.refresh(monitor)
return monitor
@router.post("/tcp", response_model=MonitorRead)
def create_tcp_monitor(
payload: TcpMonitorCreate,
_: User = Depends(require_role("admin")),
db: Session = Depends(get_db),
) -> Monitor:
asset_id: int | None = None
target = f"{payload.host}:{payload.port}"
if payload.create_asset:
asset = Asset(name=payload.name, asset_type="tcp_service", address=target, status="unknown", extra={})
db.add(asset)
db.flush()
asset_id = asset.id
monitor = Monitor(
asset_id=asset_id,
name=payload.name,
monitor_type="tcp",
target=target,
config={"host": payload.host, "port": payload.port, "timeout_seconds": payload.timeout_seconds},
interval_seconds=payload.interval_seconds,
status="unknown",
)
db.add(monitor)
db.flush()
if payload.alert_enabled:
db.add(
AlertRule(
monitor_id=monitor.id,
name=f"{payload.name} TCP connection failure",
severity=payload.alert_severity,
condition={"type": "status_not_up"},
failure_threshold=payload.failure_threshold,
cooldown_seconds=300,
is_enabled=True,
)
)
db.commit()
db.refresh(monitor)
return monitor
@router.get("/{monitor_id}", response_model=MonitorRead)
def get_monitor(monitor_id: int, _: User = Depends(get_current_user), db: Session = Depends(get_db)) -> Monitor:
monitor = db.get(Monitor, monitor_id)
@@ -110,7 +197,7 @@ def update_monitor(
@router.delete("/{monitor_id}", status_code=204)
def delete_monitor(
monitor_id: int,
cleanup_orphan_website_asset: bool = True,
cleanup_orphan_asset: bool = True,
_: User = Depends(require_role("admin")),
db: Session = Depends(get_db),
) -> None:
@@ -129,10 +216,10 @@ def delete_monitor(
db.delete(monitor)
db.flush()
if cleanup_orphan_website_asset and asset_id is not None:
if cleanup_orphan_asset and asset_id is not None:
remaining = db.scalar(select(func.count(Monitor.id)).where(Monitor.asset_id == asset_id))
asset = db.get(Asset, asset_id)
if remaining == 0 and asset is not None and asset.asset_type == "website":
if remaining == 0 and asset is not None and asset.asset_type in {"website", "host", "tcp_service"}:
db.delete(asset)
db.commit()
+23
View File
@@ -73,6 +73,29 @@ class WebsiteMonitorCreate(BaseModel):
failure_threshold: int = Field(default=3, ge=1, le=20)
class PingMonitorCreate(BaseModel):
name: str = Field(min_length=1, max_length=160)
host: str = Field(min_length=1, max_length=255)
timeout_seconds: int = Field(default=5, ge=1, le=60)
interval_seconds: int = Field(default=60, ge=10)
create_asset: bool = True
alert_enabled: bool = True
alert_severity: str = "warning"
failure_threshold: int = Field(default=3, ge=1, le=20)
class TcpMonitorCreate(BaseModel):
name: str = Field(min_length=1, max_length=160)
host: str = Field(min_length=1, max_length=255)
port: int = Field(ge=1, le=65535)
timeout_seconds: int = Field(default=5, ge=1, le=60)
interval_seconds: int = Field(default=60, ge=10)
create_asset: bool = True
alert_enabled: bool = True
alert_severity: str = "warning"
failure_threshold: int = Field(default=3, ge=1, le=20)
class CheckResultRead(BaseModel):
id: int
monitor_id: int