Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,9 @@ YUXI_CORS_ORIGINS=
# KUBECONFIG_PATH=/root/.kube/config
# THREAD_PVC=yuxi-thread
# SKILLS_PVC=yuxi-skills # 当前代码会读取,但 Pod 挂载实际仍只使用 THREAD_PVC

# ===== Docker Compose Profiles =====
# GPU 文档解析服务 (mineru-api / paddlex) 的启动开关。
# 有 NVIDIA GPU:保持 gpu,docker compose up -d 会自动带上它们。
# 无 GPU 机器:置空 (COMPOSE_PROFILES=) 即可跳过这两个服务,避免启动失败。
COMPOSE_PROFILES=gpu
27 changes: 27 additions & 0 deletions MinerU/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use DaoCloud mirrored vllm image for China region for gpu with Volta、Turing、Ampere、Ada Lovelace、Hopper、Blackwell architecture (7.0 <= Compute Capability <= 12.1)
# The default base image uses vLLM 0.21.0 with CUDA 13.0. For CUDA 12.9 environments, switch to the commented cu129 image below.
# Compute Capability version query (https://developer.nvidia.com/cuda-gpus)
# support x86_64 architecture and ARM(AArch64) architecture
FROM docker.m.daocloud.io/vllm/vllm-openai:v0.21.0
# FROM docker.m.daocloud.io/vllm/vllm-openai:v0.21.0-cu129

# Install libgl for opencv support & Noto fonts for Chinese characters
RUN apt-get update && \
apt-get install -y \
fonts-noto-core \
fonts-noto-cjk \
fontconfig \
libgl1 && \
fc-cache -fv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Install mineru latest
RUN python3 -m pip install -U 'mineru[core]>=3.2.1' -i https://mirrors.aliyun.com/pypi/simple --break-system-packages && \
python3 -m pip cache purge

# Download models and update the configuration file
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"

# Set the entry point to activate the virtual environment and run the command line tool
ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
122 changes: 122 additions & 0 deletions MinerU/docker/compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
services:
mineru-openai-server:
image: mineru:latest
container_name: mineru-openai-server
restart: always
profiles: ["openai-server"]
ports:
- 30000:30000
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-openai-server
command:
--host 0.0.0.0
--port 30000
--gpu-memory-utilization 0.5 # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"]
capabilities: [gpu]

mineru-api:
image: mineru:latest
container_name: mineru-api
restart: always
profiles: ["api"]
ports:
- 8000:8000
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-api
command:
--host 0.0.0.0
--port 8000
# --allow-public-http-client # Disabled by default; when binding to 0.0.0.0 or ::, this re-enables *-http-client backends and server_url. Enable only if you accept the SSRF risk.
# parameters for vllm-engine
# --gpu-memory-utilization 0.5 # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"]
capabilities: [gpu]

mineru-router:
image: mineru:latest
container_name: mineru-router
restart: always
profiles: ["router"]
ports:
- 8002:8002
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-router
command:
--host 0.0.0.0
--port 8002
--local-gpus auto
# --allow-public-http-client # Disabled by default; when binding to 0.0.0.0 or ::, this re-enables *-http-client backends and server_url. Enable only if you accept the SSRF risk.
# To aggregate existing mineru-api services instead of starting local workers:
# --local-gpus none
# --upstream-url http://mineru-api:8000
# --upstream-url http://mineru-api-2:8000
# parameters for vllm-engine
# --gpu-memory-utilization 0.5 # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8002/health || exit 1"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"]
capabilities: [gpu]

mineru-gradio:
image: mineru:latest
container_name: mineru-gradio
restart: always
profiles: ["gradio"]
ports:
- 7860:7860
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-gradio
command:
--server-name 0.0.0.0
--server-port 7860
# --enable-api false # If you want to disable the API, set this to false
# --max-convert-pages 20 # If you want to limit the number of pages for conversion, set this to a specific number
# parameters for vllm-engine
# --gpu-memory-utilization 0.5 # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
ipc: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"]
capabilities: [gpu]
122 changes: 122 additions & 0 deletions MinerU/docker/mineru.compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
services:
mineru-openai-server:
image: mineru:latest
container_name: mineru-openai-server
restart: always
profiles: ["openai-server"]
ports:
- 30000:30000
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-openai-server
command:
--host 0.0.0.0
--port 30000
# --gpu-memory-utilization 0.5 # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"]
capabilities: [gpu]

mineru-api:
image: mineru:latest
container_name: mineru-api
restart: always
profiles: ["api"]
ports:
- 8000:8000
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-api
command:
--host 0.0.0.0
--port 8000
# --allow-public-http-client # Disabled by default; when binding to 0.0.0.0 or ::, this re-enables *-http-client backends and server_url. Enable only if you accept the SSRF risk.
# parameters for vllm-engine
# --gpu-memory-utilization 0.5 # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"]
capabilities: [gpu]

mineru-router:
image: mineru:latest
container_name: mineru-router
restart: always
profiles: ["router"]
ports:
- 8002:8002
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-router
command:
--host 0.0.0.0
--port 8002
--local-gpus auto
# --allow-public-http-client # Disabled by default; when binding to 0.0.0.0 or ::, this re-enables *-http-client backends and server_url. Enable only if you accept the SSRF risk.
# To aggregate existing mineru-api services instead of starting local workers:
# --local-gpus none
# --upstream-url http://mineru-api:8000
# --upstream-url http://mineru-api-2:8000
# parameters for vllm-engine
# --gpu-memory-utilization 0.5 # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
ipc: host
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8002/health || exit 1"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"]
capabilities: [gpu]

mineru-gradio:
image: mineru:latest
container_name: mineru-gradio
restart: always
profiles: ["gradio"]
ports:
- 7860:7860
environment:
MINERU_MODEL_SOURCE: local
entrypoint: mineru-gradio
command:
--server-name 0.0.0.0
--server-port 7860
# --enable-api false # If you want to disable the API, set this to false
# --max-convert-pages 20 # If you want to limit the number of pages for conversion, set this to a specific number
# parameters for vllm-engine
# --gpu-memory-utilization 0.5 # If encountering VRAM shortage, reduce the KV cache size by this parameter; if VRAM issues persist, try lowering it further to `0.4` or below.
ulimits:
memlock: -1
stack: 67108864
ipc: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"]
capabilities: [gpu]
2 changes: 1 addition & 1 deletion backend/package/yuxi/config/static/info.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# 组织信息
organization:
name: "江南语析" # 完整组织名称
name: "楚天智航" # 完整组织名称
logo: "/favicon.svg" # Logo文件路径(放在 web/public 目录下)
avatar: "/avatar.jpg" # 头像文件路径(放在 web/public 目录下)
login_bg: "/login-bg.jpg" # 登录背景图片路径(放在 web/public 目录下)
Expand Down
1 change: 1 addition & 0 deletions backend/package/yuxi/models/providers/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"type": "embedding",
"display_name": "text-embedding-v4",
"dimension": 1024,
"batch_size": 40,
},
{
"id": "qwen3-rerank",
Expand Down
2 changes: 2 additions & 0 deletions backend/package/yuxi/services/agent_run_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ def _compact_stream_chunk(chunk: dict) -> dict:
"interrupt_info",
"source",
"agent_state",
"action_requests",
"review_configs",
)
if chunk.get(key) is not None and chunk.get(key) != ""
}
Expand Down
51 changes: 48 additions & 3 deletions backend/package/yuxi/services/chat_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,45 @@ def _build_ask_user_question_payload(info: Any, thread_id: str) -> dict[str, Any
}


def _is_human_approval_payload(payload: dict) -> bool:
"""判断 interrupt 是否为 HumanInTheLoopMiddleware 的工具审批载荷。

HIL 中间件产生的 interrupt value 含 ``action_requests``(待审批的工具调用)
与 ``review_configs``(每个工具允许的决策类型),与 ask_user_question 的
``questions`` 结构不同。用 ``action_requests`` 作为判别依据。
"""
action_requests = payload.get("action_requests")
return isinstance(action_requests, list) and len(action_requests) > 0


def _build_human_approval_payload(info: Any, thread_id: str) -> dict[str, Any]:
"""将 HIL 工具审批 interrupt 标准化为 human_approval_required 载荷。"""
payload = _coerce_interrupt_payload(info)

action_requests = payload.get("action_requests") or []
review_configs = payload.get("review_configs") or []

# 为每个 action_request 补齐 description(供前端展示),保留原始字段
normalized_actions: list[dict[str, Any]] = []
for action in action_requests:
if not isinstance(action, dict):
continue
action = dict(action)
if not action.get("description"):
action["description"] = "操作需要确认\n\nTool: {name}\nArgs: {args}".format(
name=action.get("name", ""),
args=action.get("args", {}),
)
Comment on lines +621 to +624

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

如果 action 字典中存在 "name""args" 键但其值为 Noneaction.get("name", "")action.get("args", {}) 仍会返回 None(因为 get 的默认值仅在键不存在时生效)。这会导致格式化后的描述中显示 Tool: NoneArgs: None。\n\n建议使用 or 运算符来确保在值为 None 时也能正确回退到默认值:

            action["description"] = "操作需要确认\\n\\nTool: {name}\\nArgs: {args}".format(\n                name=action.get("name") or "",\n                args=action.get("args") or {},\n            )

normalized_actions.append(action)

return {
"action_requests": normalized_actions,
"review_configs": review_configs,
"source": "human_approval",
"thread_id": thread_id,
}


def _ensure_full_msg(full_msg: AIMessage | None, accumulated_content: list[str]) -> AIMessage | None:
"""如果 full_msg 为空且有累积内容,构建 AIMessage"""
if not full_msg and accumulated_content:
Expand Down Expand Up @@ -673,9 +712,15 @@ async def check_and_handle_interrupts(

interrupt_info = _extract_interrupt_info(state)
if interrupt_info:
question_payload = _build_ask_user_question_payload(interrupt_info, thread_id)
meta["interrupt"] = question_payload
yield make_chunk(status="ask_user_question_required", meta=meta, **question_payload)
payload = _coerce_interrupt_payload(interrupt_info)
if _is_human_approval_payload(payload):
approval_payload = _build_human_approval_payload(interrupt_info, thread_id)
meta["interrupt"] = approval_payload
yield make_chunk(status="human_approval_required", meta=meta, **approval_payload)
else:
question_payload = _build_ask_user_question_payload(interrupt_info, thread_id)
meta["interrupt"] = question_payload
yield make_chunk(status="ask_user_question_required", meta=meta, **question_payload)

except Exception as e:
logger.exception(f"Error checking interrupts: {e}")
Expand Down
Loading
Loading