xerrors · xiangfei258 · Jun 12, 2026 · Jun 12, 2026 · Jun 22, 2026 · Jun 23, 2026
diff --git a/.env.template b/.env.template
@@ -72,3 +72,9 @@ YUXI_CORS_ORIGINS=
 # KUBECONFIG_PATH=/root/.kube/config
 # THREAD_PVC=yuxi-thread
 # SKILLS_PVC=yuxi-skills  # 当前代码会读取，但 Pod 挂载实际仍只使用 THREAD_PVC
+
+# ===== Docker Compose Profiles =====
+# GPU 文档解析服务 (mineru-api / paddlex) 的启动开关。
+# 有 NVIDIA GPU：保持 gpu，docker compose up -d 会自动带上它们。
+# 无 GPU 机器：置空 (COMPOSE_PROFILES=) 即可跳过这两个服务，避免启动失败。
+COMPOSE_PROFILES=gpu
diff --git a/MinerU/docker/Dockerfile b/MinerU/docker/Dockerfile
@@ -0,0 +1,27 @@
+# Use DaoCloud mirrored vllm image for China region for gpu with Volta、Turing、Ampere、Ada Lovelace、Hopper、Blackwell architecture (7.0 <= Compute Capability <= 12.1)
+# The default base image uses vLLM 0.21.0 with CUDA 13.0. For CUDA 12.9 environments, switch to the commented cu129 image below.
+# Compute Capability version query (https://developer.nvidia.com/cuda-gpus)
+# support x86_64 architecture and ARM(AArch64) architecture
+FROM docker.m.daocloud.io/vllm/vllm-openai:v0.21.0
+# FROM docker.m.daocloud.io/vllm/vllm-openai:v0.21.0-cu129
+
+# Install libgl for opencv support & Noto fonts for Chinese characters
+RUN apt-get update && \
+    apt-get install -y \
+        fonts-noto-core \
+        fonts-noto-cjk \
+        fontconfig \
+        libgl1 && \
+    fc-cache -fv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install mineru latest
+RUN python3 -m pip install -U 'mineru[core]>=3.2.1' -i https://mirrors.aliyun.com/pypi/simple --break-system-packages && \
+    python3 -m pip cache purge
+
+# Download models and update the configuration file
+RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
+
+# Set the entry point to activate the virtual environment and run the command line tool
+ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
diff --git a/MinerU/docker/compose.yaml b/MinerU/docker/compose.yaml
@@ -0,0 +1,122 @@
+services:
+  mineru-openai-server:
+    image: mineru:latest
+    container_name: mineru-openai-server
+    restart: always
+    profiles: ["openai-server"]
+    ports:
+      - 30000:30000
+    environment:
+      MINERU_MODEL_SOURCE: local
+    entrypoint: mineru-openai-server
+    command:
+      --host 0.0.0.0
+      --port 30000
+      --gpu-memory-utilization 0.5  # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]  # Modify for multiple GPUs: ["0", "1"]
+              capabilities: [gpu]
+
+  mineru-api:
+    image: mineru:latest
+    container_name: mineru-api
+    restart: always
+    profiles: ["api"]
+    ports:
+      - 8000:8000
+    environment:
+      MINERU_MODEL_SOURCE: local
+    entrypoint: mineru-api
+    command:
+      --host 0.0.0.0
+      --port 8000
+      # --allow-public-http-client  # Disabled by default; when binding to 0.0.0.0 or ::, this re-enables *-http-client backends and server_url. Enable only if you accept the SSRF risk.
+      # parameters for vllm-engine
+      # --gpu-memory-utilization 0.5  # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]  # Modify for multiple GPUs: ["0", "1"]
+              capabilities: [gpu]
+
+  mineru-router:
+    image: mineru:latest
+    container_name: mineru-router
+    restart: always
+    profiles: ["router"]
+    ports:
+      - 8002:8002
+    environment:
+      MINERU_MODEL_SOURCE: local
+    entrypoint: mineru-router
+    command:
+      --host 0.0.0.0
+      --port 8002
+      --local-gpus auto
+      # --allow-public-http-client  # Disabled by default; when binding to 0.0.0.0 or ::, this re-enables *-http-client backends and server_url. Enable only if you accept the SSRF risk.
+      # To aggregate existing mineru-api services instead of starting local workers:
+      # --local-gpus none
+      # --upstream-url http://mineru-api:8000
+      # --upstream-url http://mineru-api-2:8000
+      # parameters for vllm-engine
+      # --gpu-memory-utilization 0.5  # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:8002/health || exit 1"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]  # Modify for multiple GPUs: ["0", "1"]
+              capabilities: [gpu]
+
+  mineru-gradio:
+    image: mineru:latest
+    container_name: mineru-gradio
+    restart: always
+    profiles: ["gradio"]
+    ports:
+      - 7860:7860
+    environment:
+      MINERU_MODEL_SOURCE: local
+    entrypoint: mineru-gradio
+    command:
+      --server-name 0.0.0.0
+      --server-port 7860
+      # --enable-api false  # If you want to disable the API, set this to false
+      # --max-convert-pages 20  # If you want to limit the number of pages for conversion, set this to a specific number
+      # parameters for vllm-engine
+      # --gpu-memory-utilization 0.5  # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]  # Modify for multiple GPUs: ["0", "1"]
+              capabilities: [gpu]
diff --git a/MinerU/docker/mineru.compose.yml b/MinerU/docker/mineru.compose.yml
@@ -0,0 +1,122 @@
+services:
+  mineru-openai-server:
+    image: mineru:latest
+    container_name: mineru-openai-server
+    restart: always
+    profiles: ["openai-server"]
+    ports:
+      - 30000:30000
+    environment:
+      MINERU_MODEL_SOURCE: local
+    entrypoint: mineru-openai-server
+    command:
+      --host 0.0.0.0
+      --port 30000
+      # --gpu-memory-utilization 0.5  # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]  # Modify for multiple GPUs: ["0", "1"]
+              capabilities: [gpu]
+
+  mineru-api:
+    image: mineru:latest
+    container_name: mineru-api
+    restart: always
+    profiles: ["api"]
+    ports:
+      - 8000:8000
+    environment:
+      MINERU_MODEL_SOURCE: local
+    entrypoint: mineru-api
+    command:
+      --host 0.0.0.0
+      --port 8000
+      # --allow-public-http-client  # Disabled by default; when binding to 0.0.0.0 or ::, this re-enables *-http-client backends and server_url. Enable only if you accept the SSRF risk.
+      # parameters for vllm-engine
+      # --gpu-memory-utilization 0.5  # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]  # Modify for multiple GPUs: ["0", "1"]
+              capabilities: [gpu]
+
+  mineru-router:
+    image: mineru:latest
+    container_name: mineru-router
+    restart: always
+    profiles: ["router"]
+    ports:
+      - 8002:8002
+    environment:
+      MINERU_MODEL_SOURCE: local
+    entrypoint: mineru-router
+    command:
+      --host 0.0.0.0
+      --port 8002
+      --local-gpus auto
+      # --allow-public-http-client  # Disabled by default; when binding to 0.0.0.0 or ::, this re-enables *-http-client backends and server_url. Enable only if you accept the SSRF risk.
+      # To aggregate existing mineru-api services instead of starting local workers:
+      # --local-gpus none
+      # --upstream-url http://mineru-api:8000
+      # --upstream-url http://mineru-api-2:8000
+      # parameters for vllm-engine
+      # --gpu-memory-utilization 0.5  # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:8002/health || exit 1"]
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]  # Modify for multiple GPUs: ["0", "1"]
+              capabilities: [gpu]
+
+  mineru-gradio:
+    image: mineru:latest
+    container_name: mineru-gradio
+    restart: always
+    profiles: ["gradio"]
+    ports:
+      - 7860:7860
+    environment:
+      MINERU_MODEL_SOURCE: local
+    entrypoint: mineru-gradio
+    command:
+      --server-name 0.0.0.0
+      --server-port 7860
+      # --enable-api false  # If you want to disable the API, set this to false
+      # --max-convert-pages 20  # If you want to limit the number of pages for conversion, set this to a specific number
+      # parameters for vllm-engine
+      # --gpu-memory-utilization 0.5  # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    ipc: host
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["0"]  # Modify for multiple GPUs: ["0", "1"]
+              capabilities: [gpu]
diff --git a/backend/package/yuxi/config/static/info.template.yaml b/backend/package/yuxi/config/static/info.template.yaml
@@ -3,7 +3,7 @@
 
 # 组织信息
 organization:
-  name: "江南语析"           # 完整组织名称
+  name: "楚天智航"           # 完整组织名称
   logo: "/favicon.svg"       # Logo文件路径（放在 web/public 目录下）
   avatar: "/avatar.jpg"      # 头像文件路径（放在 web/public 目录下）
   login_bg: "/login-bg.jpg"  # 登录背景图片路径（放在 web/public 目录下）

diff --git a/backend/package/yuxi/models/providers/builtin.py b/backend/package/yuxi/models/providers/builtin.py
@@ -46,6 +46,7 @@
                 "type": "embedding",
                 "display_name": "text-embedding-v4",
                 "dimension": 1024,
+                "batch_size": 40,
             },
             {
                 "id": "qwen3-rerank",

diff --git a/backend/package/yuxi/services/agent_run_service.py b/backend/package/yuxi/services/agent_run_service.py
@@ -146,6 +146,8 @@ def _compact_stream_chunk(chunk: dict) -> dict:
             "interrupt_info",
             "source",
             "agent_state",
+            "action_requests",
+            "review_configs",
         )
         if chunk.get(key) is not None and chunk.get(key) != ""
     }

diff --git a/backend/package/yuxi/services/chat_service.py b/backend/package/yuxi/services/chat_service.py
@@ -593,6 +593,45 @@ def _build_ask_user_question_payload(info: Any, thread_id: str) -> dict[str, Any
     }
 
 
+def _is_human_approval_payload(payload: dict) -> bool:
+    """判断 interrupt 是否为 HumanInTheLoopMiddleware 的工具审批载荷。
+
+    HIL 中间件产生的 interrupt value 含 ``action_requests``(待审批的工具调用)
+    与 ``review_configs``(每个工具允许的决策类型),与 ask_user_question 的
+    ``questions`` 结构不同。用 ``action_requests`` 作为判别依据。
+    """
+    action_requests = payload.get("action_requests")
+    return isinstance(action_requests, list) and len(action_requests) > 0
+
+
+def _build_human_approval_payload(info: Any, thread_id: str) -> dict[str, Any]:
+    """将 HIL 工具审批 interrupt 标准化为 human_approval_required 载荷。"""
+    payload = _coerce_interrupt_payload(info)
+
+    action_requests = payload.get("action_requests") or []
+    review_configs = payload.get("review_configs") or []
+
+    # 为每个 action_request 补齐 description(供前端展示),保留原始字段
+    normalized_actions: list[dict[str, Any]] = []
+    for action in action_requests:
+        if not isinstance(action, dict):
+            continue
+        action = dict(action)
+        if not action.get("description"):
+            action["description"] = "操作需要确认\n\nTool: {name}\nArgs: {args}".format(
+                name=action.get("name", ""),
+                args=action.get("args", {}),
+            )
+        normalized_actions.append(action)
+
+    return {
+        "action_requests": normalized_actions,
+        "review_configs": review_configs,
+        "source": "human_approval",
+        "thread_id": thread_id,
+    }
+
+
 def _ensure_full_msg(full_msg: AIMessage | None, accumulated_content: list[str]) -> AIMessage | None:
     """如果 full_msg 为空且有累积内容，构建 AIMessage"""
     if not full_msg and accumulated_content:
@@ -673,9 +712,15 @@ async def check_and_handle_interrupts(
 
         interrupt_info = _extract_interrupt_info(state)
         if interrupt_info:
-            question_payload = _build_ask_user_question_payload(interrupt_info, thread_id)
-            meta["interrupt"] = question_payload
-            yield make_chunk(status="ask_user_question_required", meta=meta, **question_payload)
+            payload = _coerce_interrupt_payload(interrupt_info)
+            if _is_human_approval_payload(payload):
+                approval_payload = _build_human_approval_payload(interrupt_info, thread_id)
+                meta["interrupt"] = approval_payload
+                yield make_chunk(status="human_approval_required", meta=meta, **approval_payload)
+            else:
+                question_payload = _build_ask_user_question_payload(interrupt_info, thread_id)
+                meta["interrupt"] = question_payload
+                yield make_chunk(status="ask_user_question_required", meta=meta, **question_payload)
 
     except Exception as e:
         logger.exception(f"Error checking interrupts: {e}")