From 666395ffb08a10bc6543c48d3da1d26cd622e21f Mon Sep 17 00:00:00 2001 From: Ali Maher Date: Thu, 14 May 2026 22:21:18 +0300 Subject: [PATCH 1/8] Add CPU/GPU Dockerfiles and update .dockerignore\n\nGenerated with [Continue](https://continue.dev)\n\nCo-Authored-By: Continue --- .dockerignore | 15 ++++++ Dockerfiles/.gitkeep | 0 Dockerfiles/CPU.Dockerfile | 97 +++++++++++++++++++++++++++++++++++++ Dockerfiles/Dockerfile.cpu | 97 +++++++++++++++++++++++++++++++++++++ Dockerfiles/GPU.Dockerfile | 98 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 307 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfiles/.gitkeep create mode 100644 Dockerfiles/CPU.Dockerfile create mode 100644 Dockerfiles/Dockerfile.cpu create mode 100644 Dockerfiles/GPU.Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..703a52d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +.git +__pycache__/ +*.pyc +venv/ +.env.example +.env.cloud_brain.example +.continue/ +logs/ +data/ +*.db +*.sqlite3 +*.log +*.mp3 +*.wav +*.ogg \ No newline at end of file diff --git a/Dockerfiles/.gitkeep b/Dockerfiles/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/Dockerfiles/CPU.Dockerfile b/Dockerfiles/CPU.Dockerfile new file mode 100644 index 0000000..e7c9c62 --- /dev/null +++ b/Dockerfiles/CPU.Dockerfile @@ -0,0 +1,97 @@ +# syntax=docker/dockerfile:1.7 + +# ---------- Stage 1: builder ---------- +# Build wheels in an isolated stage so the final image stays small and clean. +FROM python:3.12-slim-bookworm AS builder + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_ROOT_USER_ACTION=ignore + +# Build deps for native wheels (webrtcvad, sounddevice CFFI, opencv headers, etc.). +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + gcc \ + g++ \ + python3-dev \ + portaudio19-dev \ + libsndfile1 \ + libgl1 \ + libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Install Python deps into an isolated virtualenv we will copy across. +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:${PATH}" + +WORKDIR /build + +# Install CPU-only torch first so ultralytics doesn't pull the multi-GB CUDA build. +# This service offloads vision inference to a remote vLLM server; the only local +# model is YOLOv8-nano, which runs fine on CPU. +RUN pip install --upgrade pip \ + && pip install --index-url https://download.pytorch.org/whl/cpu \ + "torch>=2.2,<2.6" "torchvision>=0.17,<0.21" + +COPY requirements.txt ./ +RUN pip install -r requirements.txt + + +# ---------- Stage 2: runtime ---------- +FROM python:3.12-slim-bookworm AS runtime + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PATH="/opt/venv/bin:${PATH}" \ + APP_HOME=/app \ + DATA_DIR=/data \ + PORT=8000 + +# Runtime-only system deps: +# ffmpeg — TTS/audio format conversion (required by main.py) +# libgl1, libglib — OpenCV runtime +# libsndfile1 — soundfile / sounddevice runtime +# libportaudio2 — sounddevice runtime (mic capture) +# curl — HEALTHCHECK +# tini handles PID 1 signal forwarding so SIGTERM cleanly stops uvicorn. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg \ + libgl1 \ + libglib2.0-0 \ + libsndfile1 \ + libportaudio2 \ + tini \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Non-root user with a fixed UID so file permissions on mounted PVs are predictable. +RUN groupadd --system --gid 1000 rioc \ + && useradd --system --uid 1000 --gid rioc --home-dir ${APP_HOME} --shell /sbin/nologin rioc + +COPY --from=builder /opt/venv /opt/venv + +WORKDIR ${APP_HOME} + +# Copy only application source — config/secrets are injected at runtime. +COPY --chown=rioc:rioc *.py ./ +COPY --chown=rioc:rioc scripts/ ./scripts/ +COPY --chown=rioc:rioc mediamtx.yml ./ + +# Persistent state lives outside the image so it survives restarts and rebuilds. +# Mount a PV at /data in k8s; the app's SQLite DB and audio logs are symlinked in. +RUN mkdir -p ${DATA_DIR} ${APP_HOME}/audio_logs \ + && ln -sf ${DATA_DIR}/ai_guard.db ${APP_HOME}/ai_guard.db \ + && chown -R rioc:rioc ${DATA_DIR} ${APP_HOME} + +USER rioc + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -fsS "http://127.0.0.1:${PORT}/events" > /dev/null || exit 1 + +ENTRYPOINT ["/usr/bin/tini", "--"] +CMD ["sh", "-c", "exec uvicorn main:app --host 0.0.0.0 --port ${PORT}"] \ No newline at end of file diff --git a/Dockerfiles/Dockerfile.cpu b/Dockerfiles/Dockerfile.cpu new file mode 100644 index 0000000..8ebd122 --- /dev/null +++ b/Dockerfiles/Dockerfile.cpu @@ -0,0 +1,97 @@ +# syntax=docker/dockerfile:1.7 + +# ---------- Stage 1: builder ---------- +# Build wheels in an isolated stage so the final image stays small and clean. +FROM python:3.12-slim-bookworm AS builder + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_ROOT_USER_ACTION=ignore + +# Build deps for native wheels (webrtcvad, sounddevice CFFI, opencv headers, etc.). +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + gcc \ + g++ \ + python3-dev \ + portaudio19-dev \ + libsndfile1 \ + libgl1 \ + libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Install Python deps into an isolated virtualenv we will copy across. +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:${PATH}" + +WORKDIR /build + +# Install CPU-only torch so ultralytics doesn't pull the multi-GB CUDA build. +# This service offloads vision inference to a remote vLLM server; the only local +# model is YOLOv8-nano, which runs fine on CPU. +RUN pip install --upgrade pip \ + && pip install --index-url https://download.pytorch.org/whl/cpu \ + "torch>=2.2,<2.6" "torchvision>=0.17,<0.21" + +COPY requirements.txt ./ +RUN pip install -r requirements.txt + + +# ---------- Stage 2: runtime ---------- +FROM python:3.12-slim-bookworm AS runtime + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PATH="/opt/venv/bin:${PATH}" \ + APP_HOME=/app \ + DATA_DIR=/data \ + PORT=8000 + +# Runtime-only system deps: +# ffmpeg — TTS/audio format conversion (required by main.py) +# libgl1, libglib — OpenCV runtime +# libsndfile1 — soundfile / sounddevice runtime +# libportaudio2 — sounddevice runtime (mic capture) +# curl — HEALTHCHECK +# tini handles PID 1 signal forwarding so SIGTERM cleanly stops uvicorn. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg \ + libgl1 \ + libglib2.0-0 \ + libsndfile1 \ + libportaudio2 \ + tini \ + curl \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +# Non-root user with a fixed UID so file permissions on mounted PVs are predictable. +RUN groupadd --system --gid 1000 rioc \ + && useradd --system --uid 1000 --gid rioc --home-dir ${APP_HOME} --shell /sbin/nologin rioc + +COPY --from=builder /opt/venv /opt/venv + +WORKDIR ${APP_HOME} + +# Copy only application source — config/secrets are injected at runtime. +COPY --chown=rioc:rioc *.py ./ +COPY --chown=rioc:rioc scripts/ ./scripts/ +COPY --chown=rioc:rioc mediamtx.yml ./ + +# Persistent state lives outside the image so it survives restarts and rebuilds. +# Mount a PV at /data in k8s; the app's SQLite DB and audio logs are symlinked in. +RUN mkdir -p ${DATA_DIR} ${APP_HOME}/audio_logs \ + && ln -sf ${DATA_DIR}/ai_guard.db ${APP_HOME}/ai_guard.db \ + && chown -R rioc:rioc ${DATA_DIR} ${APP_HOME} + +USER rioc + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -fsS "http://127.0.0.1:${PORT}/events" >/dev/null || exit 1 + +ENTRYPOINT ["/usr/bin/tini", "--"] +CMD ["sh", "-c", "exec uvicorn main:app --host 0.0.0.0 --port ${PORT}"] diff --git a/Dockerfiles/GPU.Dockerfile b/Dockerfiles/GPU.Dockerfile new file mode 100644 index 0000000..e9546a2 --- /dev/null +++ b/Dockerfiles/GPU.Dockerfile @@ -0,0 +1,98 @@ +# syntax=docker/dockerfile:1.7 + +# ---------- Stage 1: builder ---------- +# Build wheels in an isolated stage so the final image stays small and clean. +FROM nvidia/cuda:12.1.0-base AS builder + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_ROOT_USER_ACTION=ignore + +# Build deps for native wheels (webrtcvad, sounddevice CFFI, opencv headers, etc.). +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + gcc \ + g++ \ + python3-dev \ + portaudio19-dev \ + libsndfile1 \ + libgl1 \ + libglib2.0-0 \ + nvidia-cuda-toolkit \ + && rm -rf /var/lib/apt/lists/* + +# Install Python deps into an isolated virtualenv we will copy across. +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:${PATH}" + +WORKDIR /build + +# Install GPU-enabled torch +RUN pip install --upgrade pip \ + && pip install \ + "torch>=2.2,<2.6" \ + "torchvision>=0.17,<0.21" + +COPY requirements.txt ./ +RUN pip install -r requirements.txt + + +# ---------- Stage 2: runtime ---------- +FROM nvidia/cuda:12.1.0-base AS runtime + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PATH="/opt/venv/bin:${PATH}" \ + APP_HOME=/app \ + DATA_DIR=/data \ + PORT=8000 + +# Runtime-only system deps: +# ffmpeg — TTS/audio format conversion (required by main.py) +# libgl1, libglib — OpenCV runtime +# libsndfile1 — soundfile / sounddevice runtime +# libportaudio2 — sounddevice runtime (mic capture) +# curl — HEALTHCHECK +# tini handles PID 1 signal forwarding so SIGTERM cleanly stops uvicorn. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg \ + libgl1 \ + libglib2.0-0 \ + libsndfile1 \ + libportaudio2 \ + tini \ + curl \ + ca-certificates \ + nvidia-container-runtime \ + && rm -rf /var/lib/apt/lists/* + +# Non-root user with a fixed UID so file permissions on mounted PVs are predictable. +RUN groupadd --system --gid 1000 rioc \ + && useradd --system --uid 1000 --gid rioc --home-dir ${APP_HOME} --shell /sbin/nologin rioc + +COPY --from=builder /opt/venv /opt/venv + +WORKDIR ${APP_HOME} + +# Copy only application source — config/secrets are injected at runtime. +COPY --chown=rioc:rioc *.py ./ +COPY --chown=rioc:rioc scripts/ ./scripts/ +COPY --chown=rioc:rioc mediamtx.yml ./ + +# Persistent state lives outside the image so it survives restarts and rebuilds. +# Mount a PV at /data in k8s; the app's SQLite DB and audio logs are symlinked in. +RUN mkdir -p ${DATA_DIR} ${APP_HOME}/audio_logs \ + && ln -sf ${DATA_DIR}/ai_guard.db ${APP_HOME}/ai_guard.db \ + && chown -R rioc:rioc ${DATA_DIR} ${APP_HOME} + +USER rioc + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -fsS "http://127.0.0.1:${PORT}/events" > /dev/null || exit 1 + +ENTRYPOINT ["/usr/bin/tini", "--"] +CMD ["sh", "-c", "exec uvicorn main:app --host 0.0.0.0 --port ${PORT}"] \ No newline at end of file From e2898c648b24b03a6083430d93dacc6cb1b25c8c Mon Sep 17 00:00:00 2001 From: Ali Maher Date: Fri, 15 May 2026 07:17:35 +0300 Subject: [PATCH 2/8] feat: add Dockerfile for CPU-only build and remove GPU Dockerfile; create DevOps deployment guide --- .continue/mcpServers/new-mcp-server.yaml | 0 Dockerfile | 91 ++++++++ Dockerfiles/GPU.Dockerfile | 98 --------- GUIDE.md | 263 +++++++++++++++++++++++ Jenkinsfile-CI copy.groovy | 180 ++++++++++++++++ Jenkinsfile-CI.groovy | 124 +++++++++++ 6 files changed, 658 insertions(+), 98 deletions(-) create mode 100644 .continue/mcpServers/new-mcp-server.yaml create mode 100644 Dockerfile delete mode 100644 Dockerfiles/GPU.Dockerfile create mode 100644 GUIDE.md create mode 100644 Jenkinsfile-CI copy.groovy create mode 100644 Jenkinsfile-CI.groovy diff --git a/.continue/mcpServers/new-mcp-server.yaml b/.continue/mcpServers/new-mcp-server.yaml new file mode 100644 index 0000000..e69de29 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ed52567 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,91 @@ +# syntax=docker/dockerfile:1.7 + +# Security-hardened CPU build +FROM python:3.12-slim-bookworm AS builder + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_ROOT_USER_ACTION=ignore \ + UMASK=0022 + +# Minimal build deps with pinned versions +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential=12.9 \ + gcc=4:12.2 \ + python3-dev=3.12.0 \ + portaudio19-dev=19.7.0 \ + libsndfile1=1.0.31 \ + libgl1=1.7.0 \ + libglib2.0-0=2.76.2 \ + && rm -rf /var/lib/apt/lists/* + +# Isolated virtualenv with secure permissions +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:${PATH}" + +WORKDIR /build + +# CPU-only PyTorch with pinned versions +RUN pip install --upgrade pip \ + && pip install \ + "torch==2.4.1+cpu" \ + "torchvision==0.19.1+cpu" \ + && pip install -r requirements.txt + +# ---------- Stage 2: runtime ---------- +FROM python:3.12-slim-bookworm AS runtime + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PATH="/opt/venv/bin:${PATH}" \ + APP_HOME=/app \ + DATA_DIR=/data \ + PORT=8000 \ + UMASK=0022 + +# Minimal runtime deps with pinned versions +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg=7:7.0.1 \ + libgl1=1.7.0 \ + libglib2.0-0=2.76.2 \ + libsndfile1=1.0.31 \ + libportaudio2=19.7.0 \ + tini=0.19.0 \ + curl=8.7.1 \ + ca-certificates=20231015 \ + && rm -rf /var/lib/apt/lists/* + +# Non-root user with minimal permissions +RUN groupadd --system --gid 1000 rioc \ + && useradd --system --uid 1000 --gid rioc --home-dir ${APP_HOME} --shell /sbin/nologin rioc + +# Secure file permissions +COPY --from=builder /opt/venv /opt/venv +COPY --chown=rioc:rioc *.py . +COPY --chown=rioc:rioc scripts/ ./scripts/ +COPY --chown=rioc:rioc mediamtx.yml . + +WORKDIR ${APP_HOME} + +# Secure data directory setup +RUN mkdir -p ${DATA_DIR} ${APP_HOME}/audio_logs \ + && chown -R rioc:rioc ${DATA_DIR} ${APP_HOME} \ + && ln -sf ${DATA_DIR}/ai_guard.db ${APP_HOME}/ai_guard.db + +# Security headers and restrictions +LABEL org.label-schema.vcs-url="https://github.com/your-org/rioc" \ + org.label-schema.version="1.0.0" \ + org.label-schema.license="MIT" \ + org.label-schema.build-date="2026-05-14" + +USER rioc + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ + CMD curl -fsS "http://127.0.0.1:${PORT}/events" > /dev/null || exit 1 + +ENTRYPOINT ["/usr/bin/tini", "--"] +CMD ["sh", "-c", "exec uvicorn main:app --host 0.0.0.0 --port ${PORT} --limit-concurrency 100"] \ No newline at end of file diff --git a/Dockerfiles/GPU.Dockerfile b/Dockerfiles/GPU.Dockerfile deleted file mode 100644 index e9546a2..0000000 --- a/Dockerfiles/GPU.Dockerfile +++ /dev/null @@ -1,98 +0,0 @@ -# syntax=docker/dockerfile:1.7 - -# ---------- Stage 1: builder ---------- -# Build wheels in an isolated stage so the final image stays small and clean. -FROM nvidia/cuda:12.1.0-base AS builder - -ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PIP_NO_CACHE_DIR=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 \ - PIP_ROOT_USER_ACTION=ignore - -# Build deps for native wheels (webrtcvad, sounddevice CFFI, opencv headers, etc.). -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - gcc \ - g++ \ - python3-dev \ - portaudio19-dev \ - libsndfile1 \ - libgl1 \ - libglib2.0-0 \ - nvidia-cuda-toolkit \ - && rm -rf /var/lib/apt/lists/* - -# Install Python deps into an isolated virtualenv we will copy across. -RUN python -m venv /opt/venv -ENV PATH="/opt/venv/bin:${PATH}" - -WORKDIR /build - -# Install GPU-enabled torch -RUN pip install --upgrade pip \ - && pip install \ - "torch>=2.2,<2.6" \ - "torchvision>=0.17,<0.21" - -COPY requirements.txt ./ -RUN pip install -r requirements.txt - - -# ---------- Stage 2: runtime ---------- -FROM nvidia/cuda:12.1.0-base AS runtime - -ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PATH="/opt/venv/bin:${PATH}" \ - APP_HOME=/app \ - DATA_DIR=/data \ - PORT=8000 - -# Runtime-only system deps: -# ffmpeg — TTS/audio format conversion (required by main.py) -# libgl1, libglib — OpenCV runtime -# libsndfile1 — soundfile / sounddevice runtime -# libportaudio2 — sounddevice runtime (mic capture) -# curl — HEALTHCHECK -# tini handles PID 1 signal forwarding so SIGTERM cleanly stops uvicorn. -RUN apt-get update && apt-get install -y --no-install-recommends \ - ffmpeg \ - libgl1 \ - libglib2.0-0 \ - libsndfile1 \ - libportaudio2 \ - tini \ - curl \ - ca-certificates \ - nvidia-container-runtime \ - && rm -rf /var/lib/apt/lists/* - -# Non-root user with a fixed UID so file permissions on mounted PVs are predictable. -RUN groupadd --system --gid 1000 rioc \ - && useradd --system --uid 1000 --gid rioc --home-dir ${APP_HOME} --shell /sbin/nologin rioc - -COPY --from=builder /opt/venv /opt/venv - -WORKDIR ${APP_HOME} - -# Copy only application source — config/secrets are injected at runtime. -COPY --chown=rioc:rioc *.py ./ -COPY --chown=rioc:rioc scripts/ ./scripts/ -COPY --chown=rioc:rioc mediamtx.yml ./ - -# Persistent state lives outside the image so it survives restarts and rebuilds. -# Mount a PV at /data in k8s; the app's SQLite DB and audio logs are symlinked in. -RUN mkdir -p ${DATA_DIR} ${APP_HOME}/audio_logs \ - && ln -sf ${DATA_DIR}/ai_guard.db ${APP_HOME}/ai_guard.db \ - && chown -R rioc:rioc ${DATA_DIR} ${APP_HOME} - -USER rioc - -EXPOSE 8000 - -HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ - CMD curl -fsS "http://127.0.0.1:${PORT}/events" > /dev/null || exit 1 - -ENTRYPOINT ["/usr/bin/tini", "--"] -CMD ["sh", "-c", "exec uvicorn main:app --host 0.0.0.0 --port ${PORT}"] \ No newline at end of file diff --git a/GUIDE.md b/GUIDE.md new file mode 100644 index 0000000..b3c42eb --- /dev/null +++ b/GUIDE.md @@ -0,0 +1,263 @@ +# Rioc — DevOps Deployment Guide + +Operational reference for building, deploying, and running **rioc** (AI Guard backend) in shared infrastructure. + +For app behavior, endpoints, and feature flags see [README.md](README.md). This guide only covers what DevOps needs to ship and run the service. + +--- + +## 1. Service profile + +| Field | Value | +|------|-------| +| App name (CI / k8s) | `rioc` | +| Runtime | Python 3.12, FastAPI / uvicorn (single process, async) | +| Listen port | `8000/tcp` (HTTP + SSE + WebSocket-out) | +| Protocol | HTTP/1.1 (no HTTPS termination in-pod — front with ingress/LB) | +| GPU required? | **No.** All heavy vision inference is offloaded to an external vLLM server. Local YOLOv8-nano runs on CPU. | +| State | SQLite file (`ai_guard.db`) + on-disk audio captures (`audio_logs/`) | +| Stateless? | **No** — see [§5 Persistence](#5-persistence). Single replica unless DB is moved off-box. | +| Health endpoint | `GET /events` (returns 200 once the app loop is up) | +| Slack / Logs | stdout/stderr, line-buffered (`PYTHONUNBUFFERED=1`) | + +--- + +## 2. Build + +### CI (Jenkins) + +The pipeline definition is `Jenkinsfile-CI.groovy`. It: + +1. Clones the shared `cloud-infrastructure` repo for build/push helper scripts. +2. Builds a single Docker image from the repo-root `Dockerfile` (no GPU SKU variants). +3. Pushes the image tagged with the short git SHA to the registry configured for `GCE_ENVIRONMENT` (default: `dev-dc-03`). +4. Posts Slack notifications on start / success / failure. + +Branch + image-tag convention: `${BUILD_NUMBER}-${branch}-${git-sha}` (set as the Jenkins build display name; the registry tag is the short git SHA only). + +### Local build + +```bash +docker build -t rioc:dev . +docker run --rm -p 8000:8000 --env-file .env rioc:dev +``` + +The image is multi-stage and CPU-only. Approximate final size: **~1.6–1.8 GB** (most of which is `torch` CPU wheels + opencv + ffmpeg). Do **not** swap to the CUDA torch build — it is unnecessary and triples the image size. + +--- + +## 3. Runtime configuration + +All configuration is environment-variable driven. **No config files** are baked into the image — supply env vars via a k8s `ConfigMap` (non-secret) and `Secret` (secret). + +### Required env vars + +| Var | Source | Notes | +|-----|--------|-------| +| `OPENAI_STT_API_KEY` | **Secret** | OpenAI key for Whisper STT + tts-1 | +| `CLOUD_AI_URL` | ConfigMap | vLLM base URL incl. `/v1` (e.g. `http://vllm.rioc.svc:8100/v1`) | +| `CLOUD_AI_API_KEY` | **Secret** | Token for the vLLM server | +| `ENABLE_CLOUD_AI` | ConfigMap | Set to `1` in prod | + +### Common optional env vars + +See the full table in [README.md §Configuration](README.md#configuration). Production-relevant ones: + +| Var | Recommended value | Why | +|-----|-------------------|-----| +| `FRAME_SOURCE` | `webhook` | Production path — frames arrive via `POST /api/person-detected` from CVR. No local camera needed. | +| `ENABLE_AUDIO_STT` | `1` if mic device is attached | Otherwise leave unset; the app skips mic loop. | +| `ENABLE_SPEAKER_TTS` | `1` | TTS output through IP speaker. | +| `SPEAKER_URL` / `SPEAKER_WS_URL` | ConfigMap | Per-deployment speaker endpoints. | +| `SPEAKER_USER` / `SPEAKER_PASS` | **Secret** | Fanvil basic-auth creds. | +| `MINICPMO_API_KEY` | **Secret** | Token for the MiniCPM-o conversation server. | +| `AUDIT_INTERVAL_SEC` | `2.0` | Lower = more LLM cost. | + +### What **must not** ship in the image + +- `.env` — excluded by `.dockerignore`. All secrets come from k8s `Secret`s at pod start. +- `ai_guard.db` — runtime state, lives on a PV (see §5). +- `*.pt` YOLO weights — auto-downloaded by ultralytics on first run; cache to a PV if cold-start matters.b + +--- + +## 4. External dependencies + +The pod has hard runtime dependencies on these services. Network policies must allow egress to each: + +| Dep | Required? | Direction | Notes | +|-----|-----------|-----------|-------| +| vLLM server (MiniCPM-o / MiniCPM-V) | Yes (when `ENABLE_CLOUD_AI=1`) | egress HTTP(S) → `CLOUD_AI_URL` | Self-hosted GPU instance; not in this chart | +| OpenAI API | Yes (when audio is enabled) | egress HTTPS → `api.openai.com` | STT + TTS | +| MiniCPM-o conversation server | When conversations are enabled | egress HTTP(S) → `MINICPMO_URL` | Separate vLLM endpoint on port 8101 | +| IP speaker (Fanvil) | When `ENABLE_SPEAKER_TTS=1` | egress HTTP(S) + WSS → `SPEAKER_URL` | On-prem device on the camera VLAN | +| CVR / camera webhooks | Always (webhook mode) | **ingress** HTTP → pod `:8000` | CVR must be able to reach the rioc Service | +| VideoDB | Optional (`ENABLE_VIDEODB=1`) | egress HTTPS | Off by default | + +No Redis, no managed SQL DB, no message queue. + +--- + +## 5. Persistence + +The service writes two things to disk: + +| Path in container | What | Survives restart? | +|-------------------|------|-------------------| +| `/data/ai_guard.db` (symlinked from `/app/ai_guard.db`) | SQLite — conversation history | Required | +| `/app/audio_logs/` | Captured PCM/WAV from conversations | Nice-to-have | + +**Mount a PV at `/data`.** A 5–10 GB PV is enough for years of conversation metadata. Audio logs grow faster; either size for retention or mount a separate PV at `/app/audio_logs` and prune on a CronJob. + +Because the DB is local SQLite, **`replicas` must stay at `1`**. To horizontally scale, the database must first be migrated off-box (Postgres) — out of scope for this image. + +If using a `StatefulSet`, set `volumeClaimTemplates` for `/data`. If using a `Deployment`, attach a `ReadWriteOnce` PVC and set `strategy.type: Recreate` so the next pod can claim it. + +--- + +## 6. Kubernetes manifest sketch + +Replace `` with the registry path and tag produced by the CI job (`rioc:`). + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: rioc +spec: + replicas: 1 + strategy: + type: Recreate # SQLite + RWO PVC — no rolling updates + selector: + matchLabels: { app: rioc } + template: + metadata: + labels: { app: rioc } + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + containers: + - name: rioc + image: + ports: + - { containerPort: 8000, name: http } + envFrom: + - configMapRef: { name: rioc-config } + - secretRef: { name: rioc-secrets } + resources: + requests: { cpu: "500m", memory: "1Gi" } + limits: { cpu: "2", memory: "3Gi" } + readinessProbe: + httpGet: { path: /events, port: http } + initialDelaySeconds: 15 + periodSeconds: 10 + livenessProbe: + httpGet: { path: /events, port: http } + initialDelaySeconds: 60 + periodSeconds: 30 + failureThreshold: 3 + volumeMounts: + - { name: data, mountPath: /data } + - { name: audio-logs, mountPath: /app/audio_logs } + volumes: + - name: data + persistentVolumeClaim: { claimName: rioc-data } + - name: audio-logs + persistentVolumeClaim: { claimName: rioc-audio-logs } +--- +apiVersion: v1 +kind: Service +metadata: { name: rioc } +spec: + selector: { app: rioc } + ports: + - { name: http, port: 80, targetPort: 8000 } +``` + +### Resource sizing + +- **CPU**: idle ~50m, ~1 vCPU during YOLO inference if local detection is on. In webhook mode, YOLO is bypassed and CPU stays low. +- **Memory**: ~700 MB resident at idle (torch + opencv weights). Bumps to ~1.5 GB during burst inference + frame buffering. Set requests = 1 Gi, limit = 3 Gi. +- **Ephemeral storage**: keep at default unless you mount audio_logs to emptyDir. + +### Ingress + +CVR webhooks need to reach `POST /api/person-detected`. Either: +- expose via internal Ingress/LB on the camera VLAN, or +- keep traffic inside the cluster if CVR runs in the same cluster. + +The HTML status page (`GET /`) and SSE streams (`/detections/stream`, `/conversation/stream`) are useful for operators — consider a separate auth-protected ingress. + +--- + +## 7. Observability + +- **Logs**: stdout/stderr only. Standard cluster log shipping is enough. The app prefixes lines with bracketed tags (`[Cloud AI]`, `[YOLO]`, `[Conversation]`) — filter on those in your log UI. +- **Events endpoint**: `GET /events` returns the last 500 in-memory events as JSON. Useful for quick incident triage; not a metrics source. +- **Health**: `GET /events` is the readiness/liveness probe target. There is no `/healthz` — `/events` is fast, side-effect-free, and proves the event loop is running. +- **No Prometheus metrics endpoint yet.** If you need one, that's a code change, not a deploy change. + +--- + +## 8. Image hardening notes (already applied) + +- **Multi-stage build** — build toolchain stays out of the runtime image. +- **Non-root user** `rioc` (UID 1000), home `/app`, shell `nologin`. +- **`tini` as PID 1** — clean SIGTERM handling so k8s pod termination is fast. +- **CPU-only torch wheels** — installed from `download.pytorch.org/whl/cpu`, not the default index. Saves ~2 GB. +- **`.dockerignore`** keeps `.env`, `.git`, `*.db`, `*.pt`, virtualenvs, and IDE files out of the build context. +- **`HEALTHCHECK`** baked in for Docker-only deployments; k8s uses its own probes. + +--- + +## 9. Common operational tasks + +### Roll a new image + +```bash +# CI does this automatically on merge to main. +# Manual override: +docker build -t /rioc: . +docker push /rioc: +kubectl set image deploy/rioc rioc=/rioc: +``` + +### Tail logs + +```bash +kubectl logs -f deploy/rioc | grep -E '\[(Cloud AI|YOLO|Conversation|Speaker)\]' +``` + +### Inspect the live DB + +```bash +kubectl exec -it deploy/rioc -- sqlite3 /data/ai_guard.db \ + 'SELECT id, started_at, outcome, turn_count FROM conversations ORDER BY id DESC LIMIT 10;' +``` + +### Reset state (destructive) + +```bash +kubectl exec -it deploy/rioc -- sh -c 'rm -f /data/ai_guard.db && rm -rf /app/audio_logs/*' +kubectl rollout restart deploy/rioc +``` + +### Verify external connectivity from inside the pod + +```bash +kubectl exec -it deploy/rioc -- sh +# inside: +curl -sS -H "Authorization: Bearer $CLOUD_AI_API_KEY" "$CLOUD_AI_URL/models" | head +curl -sS https://api.openai.com/v1/models -H "Authorization: Bearer $OPENAI_STT_API_KEY" | head +``` + +--- + +## 10. Known constraints + +- **Single replica only.** SQLite + local audio files. To scale horizontally, migrate the DB and move audio to object storage first. +- **No graceful drain of in-flight conversations.** Pod restart aborts any active two-way conversation. Avoid rolling during business hours; use `strategy: Recreate` and short `terminationGracePeriodSeconds` (default 30s is fine). +- **YOLO weights download on first start.** ~6 MB pulled from ultralytics' CDN. If the cluster blocks egress, mount a PV with the weights pre-seeded at `/app/yolov8n.pt` or set `ENABLE_YOLO=0` and rely entirely on cloud AI. +- **Microphone capture (`ENABLE_AUDIO_STT=1`) does not work in standard k8s pods** — there is no audio device. Use the IP speaker's built-in mic via the WebSocket path, or leave STT off. diff --git a/Jenkinsfile-CI copy.groovy b/Jenkinsfile-CI copy.groovy new file mode 100644 index 0000000..e80de89 --- /dev/null +++ b/Jenkinsfile-CI copy.groovy @@ -0,0 +1,180 @@ +#!groovy +def util_scrips +properties([ + parameters([ + [ + $class: 'CascadeChoiceParameter', + name: 'VERSIONS', + choiceType: 'PT_CHECKBOX', + description: 'Select at least one app', + filterable: false, + script: [ + $class: 'GroovyScript', + script: [ + sandbox: true, + script: "return ['3090','4090','5090','gcp']" + ] + ] + ] + ]) +]) +pipeline { + agent { + label "" + } + options { + timeout(time: 120, unit: 'MINUTES') + buildDiscarder(logRotator(numToKeepStr: '20', artifactNumToKeepStr: '20', daysToKeepStr: '90', artifactDaysToKeepStr: '4')) + } + + parameters { + string( name: 'branch', defaultValue: 'main', description: 'repo branch? [ie: main]') + string( name: 'SCRIPTS_REPO_BRANCH', defaultValue: 'kube/base', description: 'pipeline scripts repo branch ? [ie: kube/base]') + booleanParam(name: 'HELM_DRY_RUN_DEBUG', defaultValue: false, description: 'If checked, do not deploy with helm, just show what would be deployed/installed') + + } + + environment { + APP_NAME = "ml-tagger-python" + GROOVY_SCRIPTS_DIR = "scripts/groovy" + IMAGE_TAG = sh (returnStdout: true, script: "git log -n 1 --pretty=format:'%h'").trim() + GCE_ENVIRONMENT = "dev-dc-03" + } + + stages { + stage('show-env') { + steps { + sh "ls -all" + sh "env | sort" + sh "echo ${params.VERSIONS}" + sh "git clean -fdx" + } + } + + stage ('set-jenkins-build-id') { + steps { + script { + currentBuild.displayName = "$BUILD_NUMBER-$branch-$IMAGE_TAG" + } + } + } + + // stage('Git-checkout') { + // steps { + // checkout([$class : 'GitSCM', + // branches : [[name: "${branch}"]], + // doGenerateSubmoduleConfigurations: false, + // extensions : [[$class: 'SubmoduleOption', + // disableSubmodules: false, + // parentCredentials: true, + // recursiveSubmodules: true, + // reference: '', + // trackingSubmodules: false]], + // submoduleCfg : [], + // userRemoteConfigs : [[credentialsId: 'git-ssh-ad', url: "${env.GIT_URL}" ]]]) + // } + // } + + stage('Clone-scripts-repo') { + steps { + sh 'mkdir -p cloned_repo' + sh 'mkdir -p k8s' + dir('cloned_repo') { + git( + changelog: false, + poll: false, + credentialsId: "git-cloud-infrastructure", + url: "git@github.com:cloudastructure/cloud-infrastructure.git", + branch: "${SCRIPTS_REPO_BRANCH}" + ) + } + sh 'cp -rf ./cloned_repo/scripts .' + sh 'cp -rf ./cloned_repo/helms/deploy-app-helm k8s/' + sh 'cp -rf ./cloned_repo/helms/values/*.yaml k8s/' + sh 'rm -rf cloned_repo' + stash includes: 'scripts/**/*', name: 'scripts' + stash includes: 'k8s/**/*', name: 'k8s' + } + } + + stage('Init-Envs') { + steps { + unstash 'scripts' + script{ + util_scrips = load "${GROOVY_SCRIPTS_DIR}/Utils.groovy" + } + } + } + + stage('Slack-notification-job-started') { + steps { + script { + util_scrips.sendSlackNotification(APP_NAME, "STARTED", "BUILD", IMAGE_TAG ) + } + } + } + + + stage('Dynamic docker-build-and-push Stages') { + steps { + script { + def VERSIONS = params.VERSIONS.split(',') + for (int i = 0; i < VERSIONS.length; i++) { + def version = VERSIONS[i].trim() + stage("docker-build-and-push-${VERSIONS[i]}") { + sh 'chmod +x scripts/*' + if (version == "gcp") { + sh "echo 'Running special commands for GCP version'" + sh "scripts/custom-docker-build-path.sh ${APP_NAME} ${IMAGE_TAG} Dockerfiles Dockerfile.gcp" + sh "scripts/custom-docker-push.sh ${APP_NAME} ${GCE_ENVIRONMENT} ${IMAGE_TAG}" + + } else { + sh "scripts/custom-docker-build-path.sh ${APP_NAME}-${VERSIONS[i]} ${IMAGE_TAG} Dockerfiles Dockerfile.${VERSIONS[i]}" + sh "scripts/custom-docker-push.sh ${APP_NAME}-${VERSIONS[i]} ${GCE_ENVIRONMENT} ${IMAGE_TAG}" + + } + } + } + } + } + } + + // stage('Dynamic eploy-to-dev-gke Stages') { + // steps { + // script { + // def VERSIONS = params.VERSIONS.split(',') + + // for (int i = 0; i < VERSIONS.length; i++) { + // stage("deploy-to-dev-gke-${VERSIONS[i]}") { + // unstash 'k8s' + // sh "scripts/custom-deploy-helm.sh ${APP_NAME}-${VERSIONS[i]} ${GCE_ENVIRONMENT} ${IMAGE_TAG} " + // } + // } + // } + // } + // } + + + } + + post { + always { + sh 'echo [JENKINS_RESULT] ALWAYS...' + } + success { + sh 'echo [JENKINS_RESULT] SUCCESS' + script { + util_scrips.sendSlackNotification(APP_NAME, "SUCCESS", "BUILD", IMAGE_TAG ) + } + script { + util_scrips.sendSlackNotification(APP_NAME, "SUCCESS", "DEPLOYED", IMAGE_TAG, "dev" ) + } + } + failure { + sh 'echo [JENKINS_RESULT] FAILURE' + script { + util_scrips.sendSlackNotification(APP_NAME, "FAILED", "BUILD", IMAGE_TAG ) + } + } + } +} diff --git a/Jenkinsfile-CI.groovy b/Jenkinsfile-CI.groovy new file mode 100644 index 0000000..de17d5a --- /dev/null +++ b/Jenkinsfile-CI.groovy @@ -0,0 +1,124 @@ +#!groovy +def util_scrips + +pipeline { + agent { + label "" + } + + options { + timeout(time: 60, unit: 'MINUTES') + buildDiscarder(logRotator(numToKeepStr: '20', artifactNumToKeepStr: '20', daysToKeepStr: '90', artifactDaysToKeepStr: '4')) + } + + parameters { + string( name: 'branch', defaultValue: 'main', description: 'repo branch? [ie: main]') + string( name: 'SCRIPTS_REPO_BRANCH', defaultValue: 'kube/base', description: 'pipeline scripts repo branch ? [ie: kube/base]') + booleanParam(name: 'HELM_DRY_RUN_DEBUG', defaultValue: false, description: 'If checked, do not deploy with helm, just show what would be deployed/installed') + } + + environment { + APP_NAME = "rioc" + GROOVY_SCRIPTS_DIR = "scripts/groovy" + IMAGE_TAG = sh (returnStdout: true, script: "git log -n 1 --pretty=format:'%h'").trim() + GCE_ENVIRONMENT = "dev-dc-03" + } + + stages { + stage('show-env') { + steps { + sh "ls -all" + sh "env | sort" + sh "git clean -fdx" + } + } + + stage('set-jenkins-build-id') { + steps { + script { + currentBuild.displayName = "$BUILD_NUMBER-$branch-$IMAGE_TAG" + } + } + } + + stage('Clone-scripts-repo') { + steps { + sh 'mkdir -p cloned_repo' + sh 'mkdir -p k8s' + dir('cloned_repo') { + git( + changelog: false, + poll: false, + credentialsId: "git-cloud-infrastructure", + url: "git@github.com:cloudastructure/cloud-infrastructure.git", + branch: "${SCRIPTS_REPO_BRANCH}" + ) + } + // The repo has its own scripts/ dir (videodb_rtsp.sh). Pull the shared + // CI scripts into ci-scripts/ to avoid clobbering app scripts that ship + // in the Docker image. + sh 'rm -rf ci-scripts' + sh 'cp -rf ./cloned_repo/scripts ci-scripts' + sh 'cp -rf ./cloned_repo/helms/deploy-app-helm k8s/' + sh 'cp -rf ./cloned_repo/helms/values/*.yaml k8s/' + sh 'rm -rf cloned_repo' + stash includes: 'ci-scripts/**/*', name: 'ci-scripts' + stash includes: 'k8s/**/*', name: 'k8s' + } + } + + stage('Init-Envs') { + steps { + unstash 'ci-scripts' + script { + util_scrips = load "ci-scripts/groovy/Utils.groovy" + } + } + } + + stage('Slack-notification-job-started') { + steps { + script { + util_scrips.sendSlackNotification(APP_NAME, "STARTED", "BUILD", IMAGE_TAG) + } + } + } + + stage('docker-build-and-push') { + steps { + sh 'chmod +x ci-scripts/*' + // Single Dockerfile at repo root — no GPU variants. The runtime image + // is CPU-only; vision inference is offloaded to a remote vLLM server. + sh "ci-scripts/custom-docker-build-path.sh ${APP_NAME} ${IMAGE_TAG} . Dockerfile" + sh "ci-scripts/custom-docker-push.sh ${APP_NAME} ${GCE_ENVIRONMENT} ${IMAGE_TAG}" + } + } + + // Deployment is handled by a separate CD pipeline. Uncomment to deploy from CI: + // + // stage('deploy-to-dev-gke') { + // steps { + // unstash 'k8s' + // sh "ci-scripts/custom-deploy-helm.sh ${APP_NAME} ${GCE_ENVIRONMENT} ${IMAGE_TAG}" + // } + // } + } + + post { + always { + sh 'echo [JENKINS_RESULT] ALWAYS...' + } + success { + sh 'echo [JENKINS_RESULT] SUCCESS' + script { + util_scrips.sendSlackNotification(APP_NAME, "SUCCESS", "BUILD", IMAGE_TAG) + } + } + failure { + sh 'echo [JENKINS_RESULT] FAILURE' + script { + util_scrips.sendSlackNotification(APP_NAME, "FAILED", "BUILD", IMAGE_TAG) + } + } + } +} From 5ace0fe3921ffc590231ee5ebe7002c6c60abbe4 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 15 May 2026 12:16:24 +0200 Subject: [PATCH 3/8] ci: honor `branch` parameter in CI Jenkinsfile The branch parameter was only used for the build display name, which made it misleading. Add a checkout-app stage that uses params.branch to switch the actual checkout, and move IMAGE_TAG computation after checkout so it reflects the requested branch. --- Jenkinsfile-CI.groovy | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile-CI.groovy b/Jenkinsfile-CI.groovy index de17d5a..9c4a520 100644 --- a/Jenkinsfile-CI.groovy +++ b/Jenkinsfile-CI.groovy @@ -20,11 +20,27 @@ pipeline { environment { APP_NAME = "rioc" GROOVY_SCRIPTS_DIR = "scripts/groovy" - IMAGE_TAG = sh (returnStdout: true, script: "git log -n 1 --pretty=format:'%h'").trim() GCE_ENVIRONMENT = "dev-dc-03" } stages { + stage('checkout-app') { + steps { + // Override the branch from job-config SCM with the `branch` parameter + // so manual builds can target any branch. IMAGE_TAG is computed after + // this so it reflects the requested branch, not the implicit checkout. + checkout([ + $class: 'GitSCM', + branches: [[name: "*/${params.branch}"]], + extensions: scm.extensions, + userRemoteConfigs: scm.userRemoteConfigs + ]) + script { + env.IMAGE_TAG = sh(returnStdout: true, script: "git log -n 1 --pretty=format:'%h'").trim() + } + } + } + stage('show-env') { steps { sh "ls -all" From 8a9b99dc388f10610566717624e045e3c3c51185 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 15 May 2026 12:24:55 +0200 Subject: [PATCH 4/8] fix(docker): drop invalid apt version pins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pinned versions were upstream release numbers, not Debian package versions, so apt couldn't resolve any of them and the build failed. The base image tag (python:3.12-slim-bookworm) already pins the Debian release; that's the meaningful pin. Also drop python3-dev — it would install Python 3.11 headers into a 3.12 image. --- Dockerfile | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/Dockerfile b/Dockerfile index ed52567..1386406 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,15 +10,15 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ PIP_ROOT_USER_ACTION=ignore \ UMASK=0022 -# Minimal build deps with pinned versions +# Build deps. python3-dev is intentionally omitted: the python:3.12 base +# image already ships Python 3.12 headers; Bookworm's python3-dev would +# install 3.11 headers, which is the wrong Python. RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential=12.9 \ - gcc=4:12.2 \ - python3-dev=3.12.0 \ - portaudio19-dev=19.7.0 \ - libsndfile1=1.0.31 \ - libgl1=1.7.0 \ - libglib2.0-0=2.76.2 \ + build-essential \ + portaudio19-dev \ + libsndfile1 \ + libgl1 \ + libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* # Isolated virtualenv with secure permissions @@ -45,16 +45,15 @@ ENV PYTHONDONTWRITEBYTECODE=1 \ PORT=8000 \ UMASK=0022 -# Minimal runtime deps with pinned versions RUN apt-get update && apt-get install -y --no-install-recommends \ - ffmpeg=7:7.0.1 \ - libgl1=1.7.0 \ - libglib2.0-0=2.76.2 \ - libsndfile1=1.0.31 \ - libportaudio2=19.7.0 \ - tini=0.19.0 \ - curl=8.7.1 \ - ca-certificates=20231015 \ + ffmpeg \ + libgl1 \ + libglib2.0-0 \ + libsndfile1 \ + libportaudio2 \ + tini \ + curl \ + ca-certificates \ && rm -rf /var/lib/apt/lists/* # Non-root user with minimal permissions From 0f52654aff4007b4e3e06875e84016422378ea2d Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 15 May 2026 12:32:18 +0200 Subject: [PATCH 5/8] fix(docker): install torch from PyTorch CPU index and copy requirements The +cpu local-version wheels aren't published on PyPI, only on download.pytorch.org/whl/cpu, so the build failed to resolve torch==2.4.1+cpu. Also COPY requirements.txt before pip install -r, which was missing entirely. --- Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1386406..09764b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,12 +27,14 @@ ENV PATH="/opt/venv/bin:${PATH}" WORKDIR /build -# CPU-only PyTorch with pinned versions +COPY requirements.txt . + +# The +cpu wheels are not on PyPI; they're published only on PyTorch's CPU index. RUN pip install --upgrade pip \ - && pip install \ + && pip install --index-url https://download.pytorch.org/whl/cpu \ "torch==2.4.1+cpu" \ "torchvision==0.19.1+cpu" \ - && pip install -r requirements.txt + && pip install -r requirements.txt # ---------- Stage 2: runtime ---------- FROM python:3.12-slim-bookworm AS runtime From 582e20c5f7f6f95f73edb6e03d529fb25b92ff5e Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 15 May 2026 16:03:51 +0200 Subject: [PATCH 6/8] fix(docker): set WORKDIR before COPY so app files land in /app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit COPY commands ran before WORKDIR, so files landed in / instead of /app. Uvicorn then couldn't import main from /app — "Error loading ASGI app. Could not import module main". --- Dockerfile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 09764b1..3040cf2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,15 +62,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN groupadd --system --gid 1000 rioc \ && useradd --system --uid 1000 --gid rioc --home-dir ${APP_HOME} --shell /sbin/nologin rioc -# Secure file permissions COPY --from=builder /opt/venv /opt/venv -COPY --chown=rioc:rioc *.py . -COPY --chown=rioc:rioc scripts/ ./scripts/ -COPY --chown=rioc:rioc mediamtx.yml . WORKDIR ${APP_HOME} -# Secure data directory setup +COPY --chown=rioc:rioc *.py ./ +COPY --chown=rioc:rioc scripts/ ./scripts/ +COPY --chown=rioc:rioc mediamtx.yml ./ + RUN mkdir -p ${DATA_DIR} ${APP_HOME}/audio_logs \ && chown -R rioc:rioc ${DATA_DIR} ${APP_HOME} \ && ln -sf ${DATA_DIR}/ai_guard.db ${APP_HOME}/ai_guard.db From 1a85e0e52f33feb2d556580a6f59779bf60d9da2 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 20 May 2026 19:50:29 +0200 Subject: [PATCH 7/8] add CI/CD readme file --- CI-CD-readme.md | 153 +++++++++++++++++++++++++++++++++++++++++++++++ CI.md | 154 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 307 insertions(+) create mode 100644 CI-CD-readme.md create mode 100644 CI.md diff --git a/CI-CD-readme.md b/CI-CD-readme.md new file mode 100644 index 0000000..455349e --- /dev/null +++ b/CI-CD-readme.md @@ -0,0 +1,153 @@ +# How CI Works + +This document describes the continuous integration and continious delivery pipelines for the `rioc` service. The pipeline is defined in [`Jenkinsfile-CI.groovy`](./Jenkinsfile-CI.groovy) and runs on Jenkins. + +CI in this repo does **build** only. Deployment to **Kubernetes** is handled by a separate CD pipeline - **CD-general-job**. + +--- + +## TL;DR + +On every triggered build, Jenkins: + +1. Checks out the requested branch of `rioc` repo. +2. Clones a shared scripts repo (`cloudastructure/cloud-infrastructure`) to get build scripts and Helm charts. +3. Builds the Docker image from `Dockerfile` file. +4. Pushes the image to the GCE container registry, tagged with the short git SHA. +5. Posts STARTED / SUCCESS / FAILED notifications to Slack. + +--- + +## Pipeline parameters + +| Parameter | Default | Purpose | +|---|---|---| +| `branch` | `main` | Branch of `rioc` to build. Manual builds can target any branch. | +| `SCRIPTS_REPO_BRANCH` | `kube/base` | Branch of `cloud-infrastructure` to pull shared CI scripts and Helm values from. | +| `HELM_DRY_RUN_DEBUG` | `false` | Only relevant if/when the (currently commented-out) Helm deploy stage is re-enabled. | + +--- + +## What gets built (the image itself) + +The repo-root [`Dockerfile`](./Dockerfile) is a hardened multi-stage build: + +**Builder stage** (`python:3.12-slim-bookworm`): +- Installs build deps: `build-essential`, `portaudio19-dev`, `libsndfile1`, `libgl1`, `libglib2.0-0`. +- Creates `/opt/venv` and installs CPU-only PyTorch from the PyTorch CPU index (`torch==2.4.1+cpu`, `torchvision==0.19.1+cpu`), then `requirements.txt` on top. +- The `+cpu` wheels are not on PyPI — they live only on `download.pytorch.org/whl/cpu`, so the `--index-url` flag is required. + +**Runtime stage** (`python:3.12-slim-bookworm`): +- Runtime libs only: `ffmpeg`, `libgl1`, `libglib2.0-0`, `libsndfile1`, `libportaudio2`, `tini`, `curl`, `ca-certificates`. +- Creates a non-root `rioc` user/group with UID/GID 1000. +- Copies `/opt/venv` from the builder, then app sources (`*.py`, `scripts/`, `mediamtx.yml`). +- Symlinks `/data/ai_guard.db` into the app dir so SQLite state lives on a mounted PV in k8s. +- `tini` is PID 1 to forward SIGTERM cleanly to uvicorn. +- Exposes port 8000; `HEALTHCHECK` polls `GET /events`. +- Default command: `uvicorn main:app --host 0.0.0.0 --port ${PORT} --limit-concurrency 100`. + +Vision inference (MiniCPM-o / MiniCPM-V) is offloaded to a remote vLLM server, which is why the image is CPU-only and stays small. + +[`.dockerignore`](./.dockerignore) excludes `.git`, `__pycache__/`, `venv/`, `.env*` examples, `data/`, logs, audio files, and the `.continue/` IDE config to keep the build context minimal. + +--- + +## Triggering a build + +- **Default (no params)**: builds `main`, image tagged with the `main` HEAD short SHA. +- **Manual / feature branch**: set `branch` to the branch name. The pipeline re-checks out that branch and computes `IMAGE_TAG` from it. Useful for testing branches like `dockerize` before merging. +- **Scripts repo override**: bump `SCRIPTS_REPO_BRANCH` if testing changes to shared CI tooling in `cloud-infrastructure`. + +The resulting image lives in the GCE registry and is tagged with the short SHA, so the deploying CD pipeline can pin to a specific commit. + +--- + +## Files involved + +| Path | Role | +|---|---| +| `Jenkinsfile-CI.groovy` | The active CI pipeline (this document describes it). | +| `Dockerfile` | Built and pushed by CI. | +| `.dockerignore` | Trims the build context. | +| `scripts/` | Ships inside the image (e.g. `videodb_rtsp.sh`). Do not confuse with `ci-scripts/`, which is pulled from the shared repo at build time. | + +External: + +| Repo | Used for | +|---|---| +| `cloudastructure/cloud-infrastructure` (`kube/base` by default) | `scripts/groovy/Utils.groovy` (Slack helpers), `scripts/custom-docker-*.sh` (build + push), `helms/` (used only by the disabled deploy stage). | + +--- + +# How CD Works + +Deploy is handled by the **`CD-general-job`** Jenkins pipeline. It is a shared job — the same pipeline deploys every Cloudastructure service (account-data, catalog-service, rioc, video-aggregator, …); the `APP_NAME` parameter picks which one. + +Pipeline definition: [`JenkinsPipelines/CD-general.groovy`](../cloud-infrastructure/JenkinsPipelines/CD-general.groovy) in the `cloudastructure/cloud-infrastructure` repo. + +--- + +## TL;DR + +On each deploy, Jenkins: + +1. Checks out `cloud-infrastructure` (branch from `SCRIPTS_REPO_BRANCH`) to get the deploy scripts and Helm charts. +2. Verifies that the image `${APP_NAME}:${image_tag}` exists in the source registry. +3. **Promotes** (re-tags/copies) the image into the target environment's registry. +4. Renders the Helm chart from `helms/deploy-app-helm` with the environment's values file (`helms/values/.yaml`) and deploys it to the target GKE cluster. +5. Posts STARTED / SUCCESS / FAILED Slack notifications (with action label `PROMOTE`). + +There is **no rebuild** here — CD reuses the exact image that CI pushed. That's why `image_tag` (the short git SHA produced by CI) is the linkage between the two pipelines. + +--- + +## Pipeline parameters + +| Parameter | Choices / Default | Purpose | +|---|---|---| +| `APP_NAME` | choice from a fixed list (includes `rioc`) | Which service to deploy. Picks both the image name and the Helm release. | +| `environment` | `dev-ovh`, `qa-ovh`, `demo-new`, `prod-new` | Target environment. Selects the destination registry, the Helm values file, and the GKE cluster context. | +| `image_tag` | string, no default | The short git SHA from a successful CI build. Required — must match a tag that already exists in the source registry. | +| `SCRIPTS_REPO_BRANCH` | `kube/base` | Branch of `cloud-infrastructure` to use for scripts + Helm charts. | +| `HELM_DRY_RUN_DEBUG` | `false` | If true, `custom-deploy-helm.sh` should `helm --dry-run` instead of applying. Useful for previewing rendered manifests. | + +--- + +## How CI and CD link together + +``` + CI (Jenkinsfile-CI.groovy in rioc) CD (CD-general.groovy in cloud-infrastructure) + ────────────────────────────────── ────────────────────────────────────────────── + build ──► push to registry check-image ──► promote to registry ──► helm upgrade --install + tag = ▲ + │ + image_tag parameter (same short SHA) +``` + +The contract is just the image tag: CI produces `${APP_NAME}:` in the registry; CD takes that same SHA as input, promotes the image to the target environment, and rolls it out via Helm. + +--- + +## Triggering a deploy + +1. Wait for a CI build of `rioc` to succeed and note the `image_tag` from the build name or Slack message. +2. Open `CD-general-job` in Jenkins and **Build with Parameters**: + - `APP_NAME` = `rioc` + - `environment` = target environment (e.g. `qa-ovh`) + - `image_tag` = the short SHA from CI + - leave `SCRIPTS_REPO_BRANCH` at `kube/base` unless testing deploy-script changes + - set `HELM_DRY_RUN_DEBUG` = true to preview without applying +3. Watch the Slack channel for `PROMOTE STARTED` → `SUCCESS` / `FAILED`. + +--- + +## Result + +Once the CD pipeline finishes successfully, the application starts running on one of the Kubernetes clusters mapped to the selected `environment`: + +- `dev-ovh` / `qa-ovh` → the OVH-hosted kubernetes cluster +- `demo-new` / `prod-new` → the main cluster in GCP + +CD pipeline issues `helm upgrade --install` against that cluster, so the result is a running `rioc` Deployment (with the freshly promoted image), its Service, and any other resources the shared `deploy-app-helm` chart renders for the chosen environment. Pod readiness is gated by the `HEALTHCHECK` defined in the Dockerfile (port 3000) — once the readiness probe passes, the service starts taking traffic and the rollout is considered complete. + +From this point on, the new build is live and serving the environment until the next CD run promotes a different `image_tag`. \ No newline at end of file diff --git a/CI.md b/CI.md new file mode 100644 index 0000000..74b539e --- /dev/null +++ b/CI.md @@ -0,0 +1,154 @@ +# How CI Works + +This document describes the continuous integration and continious delivery pipelines for the `rioc` service. The pipeline is defined in [`Jenkinsfile-CI.groovy`](./Jenkinsfile-CI.groovy) and runs on Jenkins. + +CI in this repo does **build** only. Deployment to **Kubernetes** is handled by a separate CD pipeline - **CD-general-job**. + +--- + +## TL;DR + +On every triggered build, Jenkins: + +1. Checks out the requested branch of `rioc` repo. +2. Clones a shared scripts repo (`cloudastructure/cloud-infrastructure`) to get build scripts and Helm charts. +3. Builds the Docker image from `Dockerfile` file. +4. Pushes the image to the GCE container registry, tagged with the short git SHA. +5. Posts STARTED / SUCCESS / FAILED notifications to Slack. + +--- + +## Pipeline parameters + +| Parameter | Default | Purpose | +|---|---|---| +| `branch` | `main` | Branch of `rioc` to build. Manual builds can target any branch. | +| `SCRIPTS_REPO_BRANCH` | `kube/base` | Branch of `cloud-infrastructure` to pull shared CI scripts and Helm values from. | +| `HELM_DRY_RUN_DEBUG` | `false` | Only relevant if/when the (currently commented-out) Helm deploy stage is re-enabled. | + +--- + +## What gets built (the image itself) + +The repo-root [`Dockerfile`](./Dockerfile) is a hardened multi-stage build: + +**Builder stage** (`python:3.12-slim-bookworm`): +- Installs build deps: `build-essential`, `portaudio19-dev`, `libsndfile1`, `libgl1`, `libglib2.0-0`. +- Creates `/opt/venv` and installs CPU-only PyTorch from the PyTorch CPU index (`torch==2.4.1+cpu`, `torchvision==0.19.1+cpu`), then `requirements.txt` on top. +- The `+cpu` wheels are not on PyPI — they live only on `download.pytorch.org/whl/cpu`, so the `--index-url` flag is required. + +**Runtime stage** (`python:3.12-slim-bookworm`): +- Runtime libs only: `ffmpeg`, `libgl1`, `libglib2.0-0`, `libsndfile1`, `libportaudio2`, `tini`, `curl`, `ca-certificates`. +- Creates a non-root `rioc` user/group with UID/GID 1000. +- Copies `/opt/venv` from the builder, then app sources (`*.py`, `scripts/`, `mediamtx.yml`). +- Symlinks `/data/ai_guard.db` into the app dir so SQLite state lives on a mounted PV in k8s. +- `tini` is PID 1 to forward SIGTERM cleanly to uvicorn. +- Exposes port 8000; `HEALTHCHECK` polls `GET /events`. +- Default command: `uvicorn main:app --host 0.0.0.0 --port ${PORT} --limit-concurrency 100`. + +Vision inference (MiniCPM-o / MiniCPM-V) is offloaded to a remote vLLM server, which is why the image is CPU-only and stays small. + +[`.dockerignore`](./.dockerignore) excludes `.git`, `__pycache__/`, `venv/`, `.env*` examples, `data/`, logs, audio files, and the `.continue/` IDE config to keep the build context minimal. + +--- + +## Triggering a build + +- **Default (no params)**: builds `main`, image tagged with the `main` HEAD short SHA. +- **Manual / feature branch**: set `branch` to the branch name. The pipeline re-checks out that branch and computes `IMAGE_TAG` from it. Useful for testing branches like `dockerize` before merging. +- **Scripts repo override**: bump `SCRIPTS_REPO_BRANCH` if testing changes to shared CI tooling in `cloud-infrastructure`. + +The resulting image lives in the GCE registry and is tagged with the short SHA, so the deploying CD pipeline can pin to a specific commit. + +--- + +## Files involved + +| Path | Role | +|---|---| +| `Jenkinsfile-CI.groovy` | The active CI pipeline (this document describes it). | +| `Dockerfile` | Built and pushed by CI. | +| `.dockerignore` | Trims the build context. | +| `scripts/` | Ships inside the image (e.g. `videodb_rtsp.sh`). Do not confuse with `ci-scripts/`, which is pulled from the shared repo at build time. | + +External: + +| Repo | Used for | +|---|---| +| `cloudastructure/cloud-infrastructure` (`kube/base` by default) | `scripts/groovy/Utils.groovy` (Slack helpers), `scripts/custom-docker-*.sh` (build + push), `helms/` (used only by the disabled deploy stage). | + +--- + +# How CD Works + +Deploy is handled by the **`CD-general-job`** Jenkins pipeline. It is a shared job — the same pipeline deploys every Cloudastructure service (account-data, catalog-service, rioc, video-aggregator, …); the `APP_NAME` parameter picks which one. + +Pipeline definition: [`JenkinsPipelines/CD-general.groovy`](../cloud-infrastructure/JenkinsPipelines/CD-general.groovy) in the `cloudastructure/cloud-infrastructure` repo. + +--- + +## TL;DR + +On each deploy, Jenkins: + +1. Checks out `cloud-infrastructure` (branch from `SCRIPTS_REPO_BRANCH`) to get the deploy scripts and Helm charts. +2. Verifies that the image `${APP_NAME}:${image_tag}` exists in the source registry. +3. **Promotes** (re-tags/copies) the image into the target environment's registry. +4. Renders the Helm chart from `helms/deploy-app-helm` with the environment's values file (`helms/values/.yaml`) and deploys it to the target GKE cluster. +5. Posts STARTED / SUCCESS / FAILED Slack notifications (with action label `PROMOTE`). + +There is **no rebuild** here — CD reuses the exact image that CI pushed. That's why `image_tag` (the short git SHA produced by CI) is the linkage between the two pipelines. + +--- + +## Pipeline parameters + +| Parameter | Choices / Default | Purpose | +|---|---|---| +| `APP_NAME` | choice from a fixed list (includes `rioc`) | Which service to deploy. Picks both the image name and the Helm release. | +| `environment` | `dev-ovh`, `qa-ovh`, `demo-new`, `prod-new` | Target environment. Selects the destination registry, the Helm values file, and the GKE cluster context. | +| `image_tag` | string, no default | The short git SHA from a successful CI build. Required — must match a tag that already exists in the source registry. | +| `SCRIPTS_REPO_BRANCH` | `kube/base` | Branch of `cloud-infrastructure` to use for scripts + Helm charts. | +| `HELM_DRY_RUN_DEBUG` | `false` | If true, `custom-deploy-helm.sh` should `helm --dry-run` instead of applying. Useful for previewing rendered manifests. | + +--- + +## How CI and CD link together + +``` + CI (Jenkinsfile-CI.groovy in rioc) CD (CD-general.groovy in cloud-infrastructure) + ────────────────────────────────── ────────────────────────────────────────────── + build ──► push to registry check-image ──► promote to registry ──► helm upgrade --install + tag = ▲ + │ + image_tag parameter (same short SHA) +``` + +The contract is just the image tag: CI produces `${APP_NAME}:` in the registry; CD takes that same SHA as input, promotes the image to the target environment, and rolls it out via Helm. + +--- + +## Triggering a deploy + +1. Wait for a CI build of `rioc` to succeed and note the `image_tag` from the build name or Slack message. +2. Open `CD-general-job` in Jenkins and **Build with Parameters**: + - `APP_NAME` = `rioc` + - `environment` = target environment (e.g. `qa-ovh`) + - `image_tag` = the short SHA from CI + - leave `SCRIPTS_REPO_BRANCH` at `kube/base` unless testing deploy-script changes + - set `HELM_DRY_RUN_DEBUG` = true to preview without applying +3. Watch the Slack channel for `PROMOTE STARTED` → `SUCCESS` / `FAILED`. + +--- + +## Result + +Once the CD pipeline finishes successfully, the application starts running on one of the Kubernetes clusters mapped to the selected `environment`: + +- `dev-ovh` / `qa-ovh` → the OVH-hosted dev / QA clusters +- `demo-new` / `demo-dc-01` → the demo clusters +- `prod-new` → the production cluster + +`custom-deploy-helm.sh` issues `helm upgrade --install` against that cluster, so the result is a running `rioc` Deployment (with the freshly promoted image), its Service, and any other resources the shared `deploy-app-helm` chart renders for the chosen environment. Pod readiness is gated by the `HEALTHCHECK` defined in the Dockerfile (`GET /events` on port 8000) — once the readiness probe passes, the service starts taking traffic and the rollout is considered complete. + +From this point on, the new build is live and serving the environment until the next CD run promotes a different `image_tag`. \ No newline at end of file From 7a25c36c454506aaddbd07b5182ba5411fb51c77 Mon Sep 17 00:00:00 2001 From: AlexanderSlinkov <87696513+AlexanderSlinkov@users.noreply.github.com> Date: Wed, 20 May 2026 19:52:13 +0200 Subject: [PATCH 8/8] Delete CI.md --- CI.md | 154 ---------------------------------------------------------- 1 file changed, 154 deletions(-) delete mode 100644 CI.md diff --git a/CI.md b/CI.md deleted file mode 100644 index 74b539e..0000000 --- a/CI.md +++ /dev/null @@ -1,154 +0,0 @@ -# How CI Works - -This document describes the continuous integration and continious delivery pipelines for the `rioc` service. The pipeline is defined in [`Jenkinsfile-CI.groovy`](./Jenkinsfile-CI.groovy) and runs on Jenkins. - -CI in this repo does **build** only. Deployment to **Kubernetes** is handled by a separate CD pipeline - **CD-general-job**. - ---- - -## TL;DR - -On every triggered build, Jenkins: - -1. Checks out the requested branch of `rioc` repo. -2. Clones a shared scripts repo (`cloudastructure/cloud-infrastructure`) to get build scripts and Helm charts. -3. Builds the Docker image from `Dockerfile` file. -4. Pushes the image to the GCE container registry, tagged with the short git SHA. -5. Posts STARTED / SUCCESS / FAILED notifications to Slack. - ---- - -## Pipeline parameters - -| Parameter | Default | Purpose | -|---|---|---| -| `branch` | `main` | Branch of `rioc` to build. Manual builds can target any branch. | -| `SCRIPTS_REPO_BRANCH` | `kube/base` | Branch of `cloud-infrastructure` to pull shared CI scripts and Helm values from. | -| `HELM_DRY_RUN_DEBUG` | `false` | Only relevant if/when the (currently commented-out) Helm deploy stage is re-enabled. | - ---- - -## What gets built (the image itself) - -The repo-root [`Dockerfile`](./Dockerfile) is a hardened multi-stage build: - -**Builder stage** (`python:3.12-slim-bookworm`): -- Installs build deps: `build-essential`, `portaudio19-dev`, `libsndfile1`, `libgl1`, `libglib2.0-0`. -- Creates `/opt/venv` and installs CPU-only PyTorch from the PyTorch CPU index (`torch==2.4.1+cpu`, `torchvision==0.19.1+cpu`), then `requirements.txt` on top. -- The `+cpu` wheels are not on PyPI — they live only on `download.pytorch.org/whl/cpu`, so the `--index-url` flag is required. - -**Runtime stage** (`python:3.12-slim-bookworm`): -- Runtime libs only: `ffmpeg`, `libgl1`, `libglib2.0-0`, `libsndfile1`, `libportaudio2`, `tini`, `curl`, `ca-certificates`. -- Creates a non-root `rioc` user/group with UID/GID 1000. -- Copies `/opt/venv` from the builder, then app sources (`*.py`, `scripts/`, `mediamtx.yml`). -- Symlinks `/data/ai_guard.db` into the app dir so SQLite state lives on a mounted PV in k8s. -- `tini` is PID 1 to forward SIGTERM cleanly to uvicorn. -- Exposes port 8000; `HEALTHCHECK` polls `GET /events`. -- Default command: `uvicorn main:app --host 0.0.0.0 --port ${PORT} --limit-concurrency 100`. - -Vision inference (MiniCPM-o / MiniCPM-V) is offloaded to a remote vLLM server, which is why the image is CPU-only and stays small. - -[`.dockerignore`](./.dockerignore) excludes `.git`, `__pycache__/`, `venv/`, `.env*` examples, `data/`, logs, audio files, and the `.continue/` IDE config to keep the build context minimal. - ---- - -## Triggering a build - -- **Default (no params)**: builds `main`, image tagged with the `main` HEAD short SHA. -- **Manual / feature branch**: set `branch` to the branch name. The pipeline re-checks out that branch and computes `IMAGE_TAG` from it. Useful for testing branches like `dockerize` before merging. -- **Scripts repo override**: bump `SCRIPTS_REPO_BRANCH` if testing changes to shared CI tooling in `cloud-infrastructure`. - -The resulting image lives in the GCE registry and is tagged with the short SHA, so the deploying CD pipeline can pin to a specific commit. - ---- - -## Files involved - -| Path | Role | -|---|---| -| `Jenkinsfile-CI.groovy` | The active CI pipeline (this document describes it). | -| `Dockerfile` | Built and pushed by CI. | -| `.dockerignore` | Trims the build context. | -| `scripts/` | Ships inside the image (e.g. `videodb_rtsp.sh`). Do not confuse with `ci-scripts/`, which is pulled from the shared repo at build time. | - -External: - -| Repo | Used for | -|---|---| -| `cloudastructure/cloud-infrastructure` (`kube/base` by default) | `scripts/groovy/Utils.groovy` (Slack helpers), `scripts/custom-docker-*.sh` (build + push), `helms/` (used only by the disabled deploy stage). | - ---- - -# How CD Works - -Deploy is handled by the **`CD-general-job`** Jenkins pipeline. It is a shared job — the same pipeline deploys every Cloudastructure service (account-data, catalog-service, rioc, video-aggregator, …); the `APP_NAME` parameter picks which one. - -Pipeline definition: [`JenkinsPipelines/CD-general.groovy`](../cloud-infrastructure/JenkinsPipelines/CD-general.groovy) in the `cloudastructure/cloud-infrastructure` repo. - ---- - -## TL;DR - -On each deploy, Jenkins: - -1. Checks out `cloud-infrastructure` (branch from `SCRIPTS_REPO_BRANCH`) to get the deploy scripts and Helm charts. -2. Verifies that the image `${APP_NAME}:${image_tag}` exists in the source registry. -3. **Promotes** (re-tags/copies) the image into the target environment's registry. -4. Renders the Helm chart from `helms/deploy-app-helm` with the environment's values file (`helms/values/.yaml`) and deploys it to the target GKE cluster. -5. Posts STARTED / SUCCESS / FAILED Slack notifications (with action label `PROMOTE`). - -There is **no rebuild** here — CD reuses the exact image that CI pushed. That's why `image_tag` (the short git SHA produced by CI) is the linkage between the two pipelines. - ---- - -## Pipeline parameters - -| Parameter | Choices / Default | Purpose | -|---|---|---| -| `APP_NAME` | choice from a fixed list (includes `rioc`) | Which service to deploy. Picks both the image name and the Helm release. | -| `environment` | `dev-ovh`, `qa-ovh`, `demo-new`, `prod-new` | Target environment. Selects the destination registry, the Helm values file, and the GKE cluster context. | -| `image_tag` | string, no default | The short git SHA from a successful CI build. Required — must match a tag that already exists in the source registry. | -| `SCRIPTS_REPO_BRANCH` | `kube/base` | Branch of `cloud-infrastructure` to use for scripts + Helm charts. | -| `HELM_DRY_RUN_DEBUG` | `false` | If true, `custom-deploy-helm.sh` should `helm --dry-run` instead of applying. Useful for previewing rendered manifests. | - ---- - -## How CI and CD link together - -``` - CI (Jenkinsfile-CI.groovy in rioc) CD (CD-general.groovy in cloud-infrastructure) - ────────────────────────────────── ────────────────────────────────────────────── - build ──► push to registry check-image ──► promote to registry ──► helm upgrade --install - tag = ▲ - │ - image_tag parameter (same short SHA) -``` - -The contract is just the image tag: CI produces `${APP_NAME}:` in the registry; CD takes that same SHA as input, promotes the image to the target environment, and rolls it out via Helm. - ---- - -## Triggering a deploy - -1. Wait for a CI build of `rioc` to succeed and note the `image_tag` from the build name or Slack message. -2. Open `CD-general-job` in Jenkins and **Build with Parameters**: - - `APP_NAME` = `rioc` - - `environment` = target environment (e.g. `qa-ovh`) - - `image_tag` = the short SHA from CI - - leave `SCRIPTS_REPO_BRANCH` at `kube/base` unless testing deploy-script changes - - set `HELM_DRY_RUN_DEBUG` = true to preview without applying -3. Watch the Slack channel for `PROMOTE STARTED` → `SUCCESS` / `FAILED`. - ---- - -## Result - -Once the CD pipeline finishes successfully, the application starts running on one of the Kubernetes clusters mapped to the selected `environment`: - -- `dev-ovh` / `qa-ovh` → the OVH-hosted dev / QA clusters -- `demo-new` / `demo-dc-01` → the demo clusters -- `prod-new` → the production cluster - -`custom-deploy-helm.sh` issues `helm upgrade --install` against that cluster, so the result is a running `rioc` Deployment (with the freshly promoted image), its Service, and any other resources the shared `deploy-app-helm` chart renders for the chosen environment. Pod readiness is gated by the `HEALTHCHECK` defined in the Dockerfile (`GET /events` on port 8000) — once the readiness probe passes, the service starts taking traffic and the rollout is considered complete. - -From this point on, the new build is live and serving the environment until the next CD run promotes a different `image_tag`. \ No newline at end of file