From 4f9e5e0d191794b2595ea00b58d0e9db7a8da869 Mon Sep 17 00:00:00 2001
From: HAHAJN <14188146+HAHAJN@user.noreply.gitee.com>
Date: Sun, 31 May 2026 15:18:17 +0800
Subject: [PATCH] feat: add data-flow-skill

---
 README.md                                     |   13 +-
 data/last_update.txt                          |    2 +-
 data/local_skills.json                        |   30 +-
 public/data/last_update.txt                   |    2 +-
 public/data/local_skills.json                 |   30 +-
 .../data-flow-skill/data-flow-skill/README.md |   53 +
 .../data-flow-skill/data-flow-skill/SKILL.md  |  266 +++
 .../data-flow-skill/references/data-types.md  |   21 +
 .../data-flow-skill/references/reporting.md   |   19 +
 .../data-flow-skill/references/slides.md      |   23 +
 .../data-flow-skill/references/validation.md  |   26 +
 .../references/visualization.md               |   24 +
 .../data-flow-skill/references/workflow.md    |   16 +
 .../data-flow-skill/requirements.txt          |    6 +
 .../data-flow-skill/scripts/README.md         |   34 +
 .../scripts/analysis/__init__.py              |    2 +
 .../analysis/analyzer_data_understanding.py   |  145 ++
 .../analysis/analyzer_dataset_detection.py    |  195 +++
 .../analysis/analyzer_findings_generation.py  | 1503 +++++++++++++++++
 .../analysis/analyzer_preprocessing.py        |   61 +
 .../analysis/analyzer_statistical_analysis.py |   36 +
 .../analysis/analyzer_strategy_registry.py    |   81 +
 .../analysis/analyzer_time_series_analysis.py |   56 +
 .../scripts/image_gen/image_generator.py      |  202 +++
 .../scripts/mermaid/__init__.py               |    5 +
 .../data-flow-skill/scripts/mermaid/cli.py    |  117 ++
 .../scripts/mermaid/echarts_export.py         |  239 +++
 .../scripts/mermaid/flowchart.py              |  314 ++++
 .../visualization/matplotlib/bar_memevolve.py |  113 ++
 .../visualization/matplotlib/bar_spice.py     |  169 ++
 .../visualization/matplotlib/box_plot.py      |  113 ++
 .../visualization/matplotlib/bubble_chart.py  |  149 ++
 .../matplotlib/calendar_heatmap.py            |  208 +++
 .../visualization/matplotlib/line_aime.py     |   98 ++
 .../matplotlib/line_loss_inset.py             |  161 ++
 .../matplotlib/line_selfdistill.py            |  173 ++
 .../matplotlib/parallel_coordinates.py        |  173 ++
 .../visualization/matplotlib/radar_dora.py    |  155 ++
 .../visualization/matplotlib/scatter_break.py |  173 ++
 .../visualization/matplotlib/scatter_tsne.py  |  137 ++
 .../visualization/matplotlib/stacked_bar.py   |  121 ++
 .../visualization/matplotlib/violin_plot.py   |  139 ++
 ...00\350\203\275\346\270\205\345\215\225.md" |   51 +-
 43 files changed, 5607 insertions(+), 47 deletions(-)
 create mode 100644 skills/data-flow-skill/data-flow-skill/README.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/SKILL.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/references/data-types.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/references/reporting.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/references/slides.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/references/validation.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/references/visualization.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/references/workflow.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/requirements.txt
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/README.md
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/analysis/__init__.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_data_understanding.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_dataset_detection.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_findings_generation.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_preprocessing.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_statistical_analysis.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_strategy_registry.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_time_series_analysis.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/image_gen/image_generator.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/mermaid/__init__.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/mermaid/cli.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/mermaid/echarts_export.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/mermaid/flowchart.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bar_memevolve.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bar_spice.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/box_plot.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bubble_chart.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/calendar_heatmap.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_aime.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_loss_inset.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_selfdistill.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/parallel_coordinates.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/radar_dora.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/scatter_break.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/scatter_tsne.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/stacked_bar.py
 create mode 100644 skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/violin_plot.py

diff --git a/README.md b/README.md
index 8b20ab9..cf5ab33 100644
--- a/README.md
+++ b/README.md
@@ -3,15 +3,15 @@
 收录最全、更新最快的AI Agent技能库，涵盖**文档处理、内容创作、编程开发、机器学习、自动化工作流**等多个领域的精选技能包。
 
 [![官方技能](https://img.shields.io/badge/官方技能-182-blue?style=flat-square)](https://github.com/anbeime/skill)
-[![本地技能](https://img.shields.io/badge/本地技能-61-green?style=flat-square)](https://github.com/anbeime/skill)
+[![本地技能](https://img.shields.io/badge/本地技能-62-green?style=flat-square)](https://github.com/anbeime/skill)
 [![备份覆盖](https://img.shields.io/badge/备份覆盖-100%25-success?style=flat-square)](https://github.com/anbeime/skill)
 [![自动更新](https://img.shields.io/badge/更新-每24小时-orange?style=flat-square)](https://github.com/anbeime/skill)
 
 ## 📊 统计数据
 
 - **官方技能**: 182 个（来自 awesome-agent-skills，自动爬取）
-- **本地技能**: 61 个（25核心 + 30子技能 + 6系统内置）
-- **技能总数**: 243 个（官方 + 本地）
+- **本地技能**: 62 个（26核心 + 30子技能 + 6系统内置）
+- **技能总数**: 244 个（官方 + 本地）
 - **备份覆盖率**: 100%（71个压缩包，完整备份）
 - **自动更新**: 每24小时自动爬取最新技能
 
@@ -22,7 +22,7 @@
 
 ### 📦 双重技能库
 - **官方技能**：182个来自Anthropic、Vercel、Cloudflare、Google Labs、Hugging Face等顶级团队
-- **本地技能**：61个精选中文技能，涵盖内容创作、视频制作、电商营销等垂直领域
+- **本地技能**：62个精选中文技能，涵盖内容创作、视频制作、电商营销、数据分析等垂直领域
 
 ### 🏷️ 智能分类
 按照功能、来源、Star数量等多维度标签进行分类整理。
@@ -47,7 +47,7 @@
 - **Better Auth** (3个) - best-practices, commands, create-auth
 - **其他团队** - Tinybird, Remotion, Inngest 等
 
-## 💾 本地技能库（61个）
+## 💾 本地技能库（62个）
 
 ### 📝 内容创作与发布（10个）
 - **content-creation-publisher** ⭐⭐⭐⭐⭐ - 内容创作与发布全流程
@@ -99,7 +99,8 @@
 - **dream-video-prompt-generator** ⭐⭐ - 即梦视频提示词
 - **agentkit-multimedia-shopping** ⭐⭐ - 多媒体带货视频
 
-### 📄 文档与分析（4个）
+### 📄 文档与分析（5个）
+- **data-flow-skill** ⭐⭐⭐⭐⭐ - 数据分析全流程：数据类型检测、统计分析、可视化、报告与幻灯片生成
 - **paper-analysis-assistant** ⭐⭐⭐⭐ - arXiv论文分析
 - **contract-review** ⭐⭐⭐ - 合同审核
 - **law-to-markdown** ⭐⭐ - 法律文档转换
diff --git a/data/last_update.txt b/data/last_update.txt
index 6f1fcba..22bec20 100644
--- a/data/last_update.txt
+++ b/data/last_update.txt
@@ -1 +1 @@
-2026-02-02T17:07:33.826738
\ No newline at end of file
+2026-05-31T00:00:00
diff --git a/data/local_skills.json b/data/local_skills.json
index 1714c8d..82baa23 100644
--- a/data/local_skills.json
+++ b/data/local_skills.json
@@ -1,10 +1,10 @@
 {
   "metadata": {
-    "total_skills": 61,
-    "core_skills": 25,
+    "total_skills": 62,
+    "core_skills": 26,
     "sub_skills": 30,
     "system_builtin": 6,
-    "last_updated": "2026-02-11T15:30:00",
+    "last_updated": "2026-05-31T00:00:00",
     "location": "D:\\tool\\skills",
     "backup_location": "D:\\tool\\skills\\skill压缩文件",
     "backup_coverage": "100%",
@@ -80,12 +80,13 @@
       ]
     },
     "文档与分析": {
-      "count": 4,
+      "count": 5,
       "skills": [
         "paper-analysis-assistant",
         "contract-review",
         "law-to-markdown",
-        "stock-analysis"
+        "stock-analysis",
+        "data-flow-skill"
       ]
     },
     "智能体协作": {
@@ -148,10 +149,10 @@
     "by_api_requirement": {
       "必需API": 15,
       "可选API": 20,
-      "完全免费": 26
+      "完全免费": 27
     },
     "by_usage_frequency": {
-      "⭐⭐⭐⭐⭐": 10,
+      "⭐⭐⭐⭐⭐": 11,
       "⭐⭐⭐⭐": 18,
       "⭐⭐⭐": 21,
       "⭐⭐": 12
@@ -188,10 +189,25 @@
         "category": "数字人与视频配音",
         "rating": 5,
         "description": "音频驱动的稀疏帧视频配音工具，支持音频驱动的Video-to-Video和Image-to-Video"
+      },
+      {
+        "name": "data-flow-skill",
+        "category": "文档与分析",
+        "rating": 5,
+        "description": "数据分析全流程 Agent Skill，覆盖数据类型检测、预处理、统计分析、可视化、报告和幻灯片生成"
       }
     ]
   },
   "update_log": [
+    {
+      "date": "2026-05-31",
+      "version": "2.1",
+      "changes": [
+        "新增 data-flow-skill：数据类型检测、统计分析、可视化、报告和幻灯片生成一体化技能",
+        "本地技能总数从61个增加到62个",
+        "文档与分析分类从4个增加到5个"
+      ]
+    },
     {
       "date": "2026-02-11",
       "version": "2.0",
diff --git a/public/data/last_update.txt b/public/data/last_update.txt
index 6f1fcba..22bec20 100644
--- a/public/data/last_update.txt
+++ b/public/data/last_update.txt
@@ -1 +1 @@
-2026-02-02T17:07:33.826738
\ No newline at end of file
+2026-05-31T00:00:00
diff --git a/public/data/local_skills.json b/public/data/local_skills.json
index 1714c8d..82baa23 100644
--- a/public/data/local_skills.json
+++ b/public/data/local_skills.json
@@ -1,10 +1,10 @@
 {
   "metadata": {
-    "total_skills": 61,
-    "core_skills": 25,
+    "total_skills": 62,
+    "core_skills": 26,
     "sub_skills": 30,
     "system_builtin": 6,
-    "last_updated": "2026-02-11T15:30:00",
+    "last_updated": "2026-05-31T00:00:00",
     "location": "D:\\tool\\skills",
     "backup_location": "D:\\tool\\skills\\skill压缩文件",
     "backup_coverage": "100%",
@@ -80,12 +80,13 @@
       ]
     },
     "文档与分析": {
-      "count": 4,
+      "count": 5,
       "skills": [
         "paper-analysis-assistant",
         "contract-review",
         "law-to-markdown",
-        "stock-analysis"
+        "stock-analysis",
+        "data-flow-skill"
       ]
     },
     "智能体协作": {
@@ -148,10 +149,10 @@
     "by_api_requirement": {
       "必需API": 15,
       "可选API": 20,
-      "完全免费": 26
+      "完全免费": 27
     },
     "by_usage_frequency": {
-      "⭐⭐⭐⭐⭐": 10,
+      "⭐⭐⭐⭐⭐": 11,
       "⭐⭐⭐⭐": 18,
       "⭐⭐⭐": 21,
       "⭐⭐": 12
@@ -188,10 +189,25 @@
         "category": "数字人与视频配音",
         "rating": 5,
         "description": "音频驱动的稀疏帧视频配音工具，支持音频驱动的Video-to-Video和Image-to-Video"
+      },
+      {
+        "name": "data-flow-skill",
+        "category": "文档与分析",
+        "rating": 5,
+        "description": "数据分析全流程 Agent Skill，覆盖数据类型检测、预处理、统计分析、可视化、报告和幻灯片生成"
       }
     ]
   },
   "update_log": [
+    {
+      "date": "2026-05-31",
+      "version": "2.1",
+      "changes": [
+        "新增 data-flow-skill：数据类型检测、统计分析、可视化、报告和幻灯片生成一体化技能",
+        "本地技能总数从61个增加到62个",
+        "文档与分析分类从4个增加到5个"
+      ]
+    },
     {
       "date": "2026-02-11",
       "version": "2.0",
diff --git a/skills/data-flow-skill/data-flow-skill/README.md b/skills/data-flow-skill/data-flow-skill/README.md
new file mode 100644
index 0000000..3fdabbd
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/README.md
@@ -0,0 +1,53 @@
+# data-flow-skill
+
+面向数据分析任务的一站式 Agent Skill，覆盖“数据类型检测 → 数据理解 → 预处理 → 统计分析 → 可视化图表生成 → 正式报告生成 → 幻灯片生成”的端到端链路。
+
+## 适用场景
+
+- 课程作业/论文实验的可复现分析管线
+- CSV、Excel、JSON、文本语料等数据集的探索性分析
+- 问卷、量表、时间序列、文学语料等专项分析
+- SEO/GEO、内容表现、营销数据和业务指标报告
+- 报告与幻灯片的结构化生成
+
+## 核心特性
+
+- 数据类型检测与策略分派：`tabular_generic`、`questionnaire`、`time_series`、`literary`
+- 结构化产物契约：检测、画像、预处理日志、图表计划、分析发现和报告上下文
+- 可视化规划：围绕问题选择趋势、分布、比较、关系、构成和异常类图表
+- 报告与幻灯片生成：从结构化发现中组织正式报告和演示材料
+- 质量约束：子任务拆分、源数据保护、预处理留痕、结论证据化
+- Python 脚本支持：`scripts/analysis/`、`scripts/visualization/`、`scripts/mermaid/`、`scripts/image_gen/`
+
+## 脚本与依赖
+
+可复用脚本已迁移到 `scripts/` 目录。运行前建议安装依赖：
+
+```bash
+pip install -r requirements.txt
+```
+
+详细说明见 `scripts/README.md`。
+
+## 推荐输出结构
+
+```text
+output/
+  figures/
+  tables/
+  report/
+  slides/
+  artifacts/
+    dataset_detection.json
+    data_profile.json
+    preprocessing_log.json
+    visualization_plan.json
+    analysis_findings.json
+    report_context.json
+```
+
+## 使用方式
+
+在 Agent 环境中，当用户提供数据文件并要求分析、绘图、报告或幻灯片时，调用本 Skill。正式分析前应先生成 `plan.md` 并等待用户确认。
+
+详细流程见 `SKILL.md` 与 `references/` 目录。
\ No newline at end of file
diff --git a/skills/data-flow-skill/data-flow-skill/SKILL.md b/skills/data-flow-skill/data-flow-skill/SKILL.md
new file mode 100644
index 0000000..674c37e
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/SKILL.md
@@ -0,0 +1,266 @@
+﻿---
+name: data-flow-skill
+description: 面向数据分析任务的一站式 Agent Skill，覆盖数据类型检测、数据理解、预处理、统计分析、可视化图表生成、正式报告和幻灯片输出。适用于课程作业、论文实验、业务分析、SEO/GEO 数据报告和可追溯数据分析工作流。
+dependency:
+  python:
+    - pandas>=2.0.0
+    - numpy>=1.24.0
+    - matplotlib>=3.7.0
+    - scipy>=1.10.0
+  optional:
+    - Node.js 与 @mermaid-js/mermaid-cli 用于 Mermaid 本地渲染
+    - LaTeX 用于报告或幻灯片 PDF 编译
+    - DASHSCOPE_API_KEY 用于主题示意图生成
+---
+
+# Data Flow Skill
+
+## 任务目标
+
+本 Skill 用于将用户提供的数据集转化为可追溯的数据分析产物，覆盖“数据类型检测 → 数据理解 → 预处理 → 统计分析 → 可视化图表 → 报告 → 幻灯片”的端到端流程。
+
+适用场景包括：
+
+- 课程作业或论文实验中的可复现数据分析
+- CSV、Excel、JSON、文本语料等数据集的探索性分析
+- 问卷、量表、时间序列和文学语料的专项分析
+- SEO/GEO、内容表现、营销数据和业务指标报告
+- 将分析结果整理为正式报告、PPT 或演示材料
+
+## 触发条件
+
+当用户提出以下需求时使用本 Skill：
+
+- “分析这个数据集”
+- “根据 CSV/Excel 生成图表和报告”
+- “帮我判断数据类型并做统计分析”
+- “把分析结果整理成论文/课程报告”
+- “根据数据生成 PPT 或幻灯片”
+- “分析 SEO/GEO 指标、Search Console 导出或业务表现数据”
+
+如果用户需要实时网页抓取、SERP 采集、API 拉取或站点爬虫，应先使用相应的数据收集技能，再使用本 Skill 进行分析。
+
+## 核心能力
+
+1. **数据类型检测**：识别 `tabular_generic`、`questionnaire`、`time_series`、`literary` 四类主要数据策略。
+2. **数据画像**：检查字段类型、缺失值、重复值、异常值、时间覆盖、类别分布和指标含义。
+3. **透明预处理**：保留源数据，不静默修改文件，所有清洗动作写入日志。
+4. **分任务分析**：将数据处理、统计分析、图表生成和发现提炼拆成可检查的小任务。
+5. **可视化生成**：根据问题选择趋势、分布、比较、关系、构成和异常类图表。
+6. **结构化发现**：为每条发现记录证据、来源、限制、置信度和建议动作。
+7. **报告与幻灯片**：从结构化产物生成正式报告和 slide-ready 输出。
+
+## 标准工作流
+
+### 1. 确认任务上下文
+
+先确认以下信息：
+
+- 数据文件或目录路径
+- 分析目标和要回答的问题
+- 受众：课程、论文、业务汇报、SEO 团队或管理层
+- 交付形式：探索性分析、图表包、正式报告、PPT 或全部产物
+- 语言、风格和格式要求
+- 是否允许自动预处理、是否有字段含义或指标公式说明
+
+### 2. 检测数据类型
+
+对输入数据进行策略识别，并将结果写入：
+
+```text
+output/artifacts/dataset_detection.json
+```
+
+检测结果应包含：
+
+- `strategy`：主策略
+- `confidence`：置信度
+- `evidence`：判断依据
+- `alternatives`：备选策略
+- `assumptions`：假设
+- `fallback_plan`：回退方案
+
+### 3. 制定并确认计划
+
+正式分析前生成 `plan.md`，内容包括：
+
+- 分析目标
+- 数据摘要
+- 数据类型策略
+- 开放问题和假设
+- 预处理规则
+- 分析任务拆分
+- 可视化计划
+- 预期输出
+- 风险与校验点
+
+在用户确认计划前，不进入正式分析、报告生成或幻灯片生成。
+
+### 4. 数据理解与画像
+
+读取数据后生成：
+
+```text
+output/artifacts/data_profile.json
+```
+
+至少检查：
+
+- 文件类型、编码和解析问题
+- 行数、列数、字段类型
+- 缺失值、重复值、异常值
+- 数值范围和类别分布
+- 时间字段、时间粒度和覆盖范围
+- 指标定义、单位和方向
+
+### 5. 透明预处理
+
+不得直接覆盖原始数据。所有预处理动作写入：
+
+```text
+output/artifacts/preprocessing_log.json
+```
+
+常见动作包括：
+
+- 字段名标准化
+- 日期解析
+- 数值格式转换
+- 缺失值处理
+- 重复记录处理
+- 类别归一化
+- 派生指标计算
+
+### 6. 分策略分析
+
+- `tabular_generic`：描述统计、分组比较、相关性、异常点、业务含义。
+- `questionnaire`：量表方向、选项分布、组间差异、开放题归纳、信度检查。
+- `time_series`：趋势、季节性、峰值、下降点、同比/环比、异常时段。
+- `literary`：篇章结构、人物/地点/主题、词频、共现关系、情绪和风格特征。
+
+### 7. 可视化规划与生成
+
+先写入：
+
+```text
+output/artifacts/visualization_plan.json
+```
+
+每张图需要说明：
+
+- 图表标题
+- 回答的问题
+- 输入数据
+- 变量和筛选条件
+- 图表类型
+- 选择原因
+- 输出路径
+- 解读要点
+
+图表输出到：
+
+```text
+output/figures/
+```
+
+### 8. 生成结构化发现
+
+将结论写入：
+
+```text
+output/artifacts/analysis_findings.json
+```
+
+每条发现应包含：
+
+- 结论 claim
+- 证据 evidence
+- 来源图表或表格路径
+- 适用范围 scope
+- 限制 limitation
+- 置信度 confidence
+- 建议动作 recommendation
+
+### 9. 生成报告和幻灯片
+
+报告应基于结构化产物，而不是重新从原始数据开始分析。建议报告结构：
+
+- 执行摘要
+- 数据来源与质量说明
+- 方法与预处理说明
+- 关键指标与趋势
+- 分组/策略分析
+- 图表证据
+- 结论与建议
+- 局限性和附录
+
+幻灯片应基于 `analysis_findings.json` 和 `report_context.json`，围绕受众、演示目标、叙事主线和关键图表组织。
+
+## 输出目录约定
+
+推荐输出结构：
+
+```text
+output/
+  figures/
+  tables/
+  report/
+  slides/
+  artifacts/
+    dataset_detection.json
+    data_profile.json
+    preprocessing_log.json
+    visualization_plan.json
+    analysis_findings.json
+    report_context.json
+```
+
+## 质量检查
+
+交付前确认：
+
+- 数据类型已检测并记录
+- `plan.md` 已经用户确认
+- 字段含义、时间窗口和指标方向已明确或列为假设
+- 源数据未被静默修改
+- 预处理动作已记录
+- 分析任务已拆分
+- 图表有明确问题和解读
+- 发现包含证据、限制和置信度
+- 报告和幻灯片基于结构化产物
+- 结论没有超出数据证据
+
+## 资源索引
+
+- 工作流说明：见 [references/workflow.md](references/workflow.md)
+- 数据类型策略：见 [references/data-types.md](references/data-types.md)
+- 可视化规范：见 [references/visualization.md](references/visualization.md)
+- 报告生成规范：见 [references/reporting.md](references/reporting.md)
+- 幻灯片生成规范：见 [references/slides.md](references/slides.md)
+- 质量校验清单：见 [references/validation.md](references/validation.md)
+
+## 使用示例
+
+用户请求：
+
+```text
+请分析这个 Google Search Console 导出的 CSV，并生成 SEO 表现报告和几张关键图表。
+```
+
+执行摘要：
+
+1. 确认文件路径、日期范围、站点、受众和报告格式。
+2. 检测数据类型为 `tabular_generic` 或 `time_series`。
+3. 创建 `plan.md` 并等待确认。
+4. 分析 query、page、country、device、clicks、impressions、CTR 和 average position。
+5. 检查缺失值、时间覆盖、重复记录和分组覆盖。
+6. 生成趋势图、页面贡献图、查询机会图和设备/国家对比图。
+7. 保存结构化发现并生成正式报告。
+
+## 注意事项
+
+- 不要把描述性相关关系写成因果结论。
+- 不要在未说明的情况下修改源数据。
+- 不要用一个脚本完成读取、清洗、分析、绘图和报告的所有步骤。
+- 不要堆叠图表而不写解释。
+- 对 SEO/GEO 数据，应区分“数据观察”“可能解释”“优化建议”和“需要额外爬取/审计验证的事项”。
diff --git a/skills/data-flow-skill/data-flow-skill/references/data-types.md b/skills/data-flow-skill/data-flow-skill/references/data-types.md
new file mode 100644
index 0000000..5229318
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/references/data-types.md
@@ -0,0 +1,21 @@
+# Dataset Type Reference
+
+## `tabular_generic`
+
+Use for structured CSV, TSV, XLSX, JSON tables, analytics exports, keyword sheets, ranking tables, content inventories, and business metrics.
+
+## `questionnaire`
+
+Use for surveys, scales, feedback forms, and mixed closed/open-ended responses. Confirm scale direction, response coding, skip logic, and grouping variables.
+
+## `time_series`
+
+Use when date/time is central: daily metrics, ranking trends, traffic logs, revenue by period, or repeated observations. Confirm time zone, granularity, gaps, and period comparability.
+
+## `literary`
+
+Use for novels, poems, scripts, dialogues, essays, lyrics, and other text corpora. Confirm corpus boundaries, metadata, segmentation units, and interpretation scope.
+
+## Detection Artifact
+
+`dataset_detection.json` should record `strategy`, `confidence`, `evidence`, `alternatives`, `assumptions`, and `fallback_plan`.
\ No newline at end of file
diff --git a/skills/data-flow-skill/data-flow-skill/references/reporting.md b/skills/data-flow-skill/data-flow-skill/references/reporting.md
new file mode 100644
index 0000000..4f11621
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/references/reporting.md
@@ -0,0 +1,19 @@
+# Reporting Reference
+
+Reports must be assembled from validated artifacts, not from untracked memory.
+
+Recommended structure:
+
+1. Executive summary.
+2. Objective, audience, and scope.
+3. Data sources and quality notes.
+4. Methodology and preprocessing summary.
+5. Key metrics and descriptive statistics.
+6. Segment, trend, comparison, or model results.
+7. Visual evidence with interpretations.
+8. Findings, recommendations, limitations, and next questions.
+9. Appendix with artifact paths, formulas, and assumptions.
+
+Each finding should include claim, evidence, artifact path, scope, limitation, confidence level, and recommended action.
+
+For SEO/GEO reporting, separate observed data, plausible explanations, optimization opportunities, and follow-up work requiring crawl, SERP, rank, content, or authority review.
\ No newline at end of file
diff --git a/skills/data-flow-skill/data-flow-skill/references/slides.md b/skills/data-flow-skill/data-flow-skill/references/slides.md
new file mode 100644
index 0000000..4210d88
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/references/slides.md
@@ -0,0 +1,23 @@
+# Slides Reference
+
+Slides should summarize validated findings for a specific audience and decision moment.
+
+Before creating slides, derive or save `output/artifacts/report_context.json` with audience, presentation goal, narrative arc, key messages, figure/table references, known limitations, and speaker-note preference.
+
+Recommended deck shape:
+
+1. Title and decision question.
+2. Executive takeaway.
+3. Data scope and method in one slide.
+4. Three to five evidence slides with clear chart references.
+5. Recommendation or action-priority slide.
+6. Risks, limitations, and next steps.
+7. Appendix for detailed tables or definitions.
+
+Rules:
+
+- Do not restart analysis from raw data unless asked.
+- Use `analysis_findings.json` as source of truth.
+- Keep one primary message per slide.
+- Pair charts with a takeaway and optional speaker notes.
+- Do not hide caveats that affect interpretation.
\ No newline at end of file
diff --git a/skills/data-flow-skill/data-flow-skill/references/validation.md b/skills/data-flow-skill/data-flow-skill/references/validation.md
new file mode 100644
index 0000000..bdaa0da
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/references/validation.md
@@ -0,0 +1,26 @@
+# Validation Reference
+
+Before handoff, verify:
+
+- Dataset strategy selected and documented.
+- User confirmed `plan.md` before formal analysis.
+- Field semantics, metric definitions, time windows, and scale directions are confirmed or listed as assumptions.
+- Source data preserved.
+- Preprocessing actions logged.
+- Analysis split into small, inspectable tasks.
+- Visualization plan created before final chart generation.
+- Key charts include interpretation and limitations.
+- Findings include claim, evidence, artifact path, scope, limitation, confidence, and action.
+- Reports and slides are built from structured artifacts.
+- Claims do not exceed evidence.
+- Output paths are predictable and included in the handoff summary.
+
+Common failure modes:
+
+- Treating descriptive correlations as causal proof.
+- Cleaning data without recording changes.
+- Running one monolithic script for the whole workflow.
+- Generating chart galleries without narrative.
+- Ignoring scale direction in questionnaire data.
+- Comparing time periods with different coverage or leakage.
+- Mixing live web collection into analysis without explicit permission.
\ No newline at end of file
diff --git a/skills/data-flow-skill/data-flow-skill/references/visualization.md b/skills/data-flow-skill/data-flow-skill/references/visualization.md
new file mode 100644
index 0000000..d641b97
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/references/visualization.md
@@ -0,0 +1,24 @@
+# Visualization Reference
+
+Create `output/artifacts/visualization_plan.json` before final chart generation.
+
+Each chart should define:
+
+- Title and question answered.
+- Input artifact or table.
+- Variables and filters.
+- Chart type and reason for choosing it.
+- Output path.
+- Interpretation notes and limitations.
+
+Chart families:
+
+- Overview: KPI cards, summary tables, bars.
+- Trend: line charts, rolling averages, annotated events.
+- Distribution: histograms, box plots, violin plots.
+- Comparison: grouped bars, dot plots, slope charts.
+- Relationship: scatter plots, bubble charts, heatmaps.
+- Composition: stacked bars, treemaps, area charts.
+- Anomaly: highlighted outliers and before/after panels.
+
+Every key chart needs a plain-language takeaway. Avoid chart galleries without narrative.
\ No newline at end of file
diff --git a/skills/data-flow-skill/data-flow-skill/references/workflow.md b/skills/data-flow-skill/data-flow-skill/references/workflow.md
new file mode 100644
index 0000000..20d186a
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/references/workflow.md
@@ -0,0 +1,16 @@
+# Workflow Reference
+
+Data Flow Skill follows this stage order:
+
+1. Confirm dataset path, task goal, audience, deliverables, language, and style.
+2. Detect dataset strategy and save `output/artifacts/dataset_detection.json`.
+3. Draft `plan.md` and wait for user confirmation.
+4. Profile raw data and save `output/artifacts/data_profile.json`.
+5. Preprocess transparently and log changes.
+6. Run analysis in small, inspectable task units.
+7. Create `output/artifacts/visualization_plan.json` before final charts.
+8. Save evidence-backed findings to `analysis_findings.json`.
+9. Build reports and slides from validated artifacts.
+10. End with a concise handoff summary.
+
+`plan.md` should include objective, audience, dataset summary, detected strategy, assumptions, preprocessing plan, analysis tasks, visualization outline, expected outputs, risks, and validation checkpoints.
\ No newline at end of file
diff --git a/skills/data-flow-skill/data-flow-skill/requirements.txt b/skills/data-flow-skill/data-flow-skill/requirements.txt
new file mode 100644
index 0000000..9c95b6a
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/requirements.txt
@@ -0,0 +1,6 @@
+pandas>=2.0.0
+numpy>=1.24.0
+matplotlib>=3.7.0
+scipy>=1.10.0
+scikit-learn>=1.3.0
+requests>=2.31.0
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/README.md b/skills/data-flow-skill/data-flow-skill/scripts/README.md
new file mode 100644
index 0000000..30bff1d
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/README.md
@@ -0,0 +1,34 @@
+# Scripts
+
+本目录包含从原 dataflow 项目迁移来的可复用 Python 脚本。
+
+## 目录结构
+
+```text
+scripts/
+  analysis/                    # 数据检测、画像、预处理、统计分析、发现生成
+  visualization/matplotlib/     # Matplotlib 静态图模板
+  mermaid/                      # Mermaid 流程图生成与渲染辅助
+  image_gen/                    # 主题示意图生成 CLI
+```
+
+## 安装依赖
+
+在 `data-flow-skill` 目录下运行：
+
+```bash
+pip install -r requirements.txt
+```
+
+可选依赖：
+
+- Node.js 与 `@mermaid-js/mermaid-cli`：用于 Mermaid 本地渲染。
+- LaTeX：用于启用 `text.usetex` 的 Matplotlib 模板或 PDF 编译。
+- `DASHSCOPE_API_KEY`：用于 `image_gen/image_generator.py` 调用图片生成服务。
+
+## 使用约束
+
+- 不要用一个脚本完成读取、清洗、分析、绘图和报告的完整链路。
+- 每个脚本应服务一个明确子任务。
+- 运行脚本前确认输入路径和输出路径。
+- 输出建议写入 `output/` 下的对应子目录。
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/analysis/__init__.py b/skills/data-flow-skill/data-flow-skill/scripts/analysis/__init__.py
new file mode 100644
index 0000000..5896671
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/analysis/__init__.py
@@ -0,0 +1,2 @@
+"""分析阶段脚本模块。"""
+
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_data_understanding.py b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_data_understanding.py
new file mode 100644
index 0000000..4e81811
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_data_understanding.py
@@ -0,0 +1,145 @@
+"""Dataset profiling helpers."""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from datetime import datetime
+from typing import Any
+
+
+TARGET_MARKERS = ("target", "label", "\u76ee\u6807")
+CLUSTER_MARKERS = ("cluster", "\u805a\u7c7b")
+DIMENSION_REDUCTION_MARKERS = ("dimension reduction", "\u964d\u7ef4")
+
+
+@dataclass(slots=True)
+class FieldSummary:
+    field_name: str
+    inferred_type: str
+    missing_rate: float
+    is_candidate_target: bool = False
+    is_candidate_group: bool = False
+    is_candidate_time: bool = False
+    is_high_cardinality: bool = False
+
+
+def is_missing_value(value: Any) -> bool:
+    if value is None:
+        return True
+    if isinstance(value, str) and value.strip() == "":
+        return True
+    return False
+
+
+def infer_type(sample_values: list[Any]) -> str:
+    valid_values = [value for value in sample_values if not is_missing_value(value)]
+    if not valid_values:
+        return "unknown"
+
+    if all(isinstance(value, bool) for value in valid_values):
+        return "boolean"
+
+    if all(isinstance(value, (int, float)) and not isinstance(value, bool) for value in valid_values):
+        return "numeric"
+
+    normalized_values = [str(value).strip() for value in valid_values]
+    if all(text.lower() in {"true", "false", "yes", "no", "0", "1"} for text in normalized_values):
+        return "boolean"
+
+    numeric_parse_count = 0
+    datetime_parse_count = 0
+    for text in normalized_values:
+        try:
+            float(text)
+            numeric_parse_count += 1
+        except ValueError:
+            pass
+        for date_format in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m", "%Y/%m", "%d/%m/%Y"):
+            try:
+                datetime.strptime(text, date_format)
+                datetime_parse_count += 1
+                break
+            except ValueError:
+                continue
+
+    if numeric_parse_count == len(normalized_values):
+        return "numeric"
+    if datetime_parse_count >= max(1, len(normalized_values) // 2):
+        return "datetime"
+    if len(set(normalized_values)) <= max(12, len(normalized_values) // 3):
+        return "categorical"
+    return "text"
+
+
+def build_data_profile(
+    sample_rows: list[dict[str, Any]] | None = None,
+    task_description: str = "",
+) -> dict[str, Any]:
+    sample_rows = sample_rows or []
+    if not sample_rows:
+        return {
+            "schema": [],
+            "data_profile": {
+                "row_count": 0,
+                "column_count": 0,
+                "field_summaries": [],
+                "missing_rate_overview": {},
+                "constant_columns": [],
+                "high_missing_fields": [],
+                "analysis_opportunities": [],
+            },
+        }
+
+    field_names = list(sample_rows[0].keys())
+    row_count = len(sample_rows)
+    schema: list[dict[str, Any]] = []
+    constant_columns: list[str] = []
+    high_missing_fields: list[str] = []
+
+    for field_name in field_names:
+        column_values = [row.get(field_name) for row in sample_rows]
+        missing_count = sum(1 for value in column_values if is_missing_value(value))
+        non_missing_values = [value for value in column_values if not is_missing_value(value)]
+        inferred_type = infer_type(column_values)
+        unique_value_count = len({str(value) for value in non_missing_values})
+        lower_field_name = field_name.lower()
+
+        if non_missing_values and unique_value_count == 1:
+            constant_columns.append(field_name)
+        if row_count and missing_count / row_count >= 0.3:
+            high_missing_fields.append(field_name)
+
+        summary = FieldSummary(
+            field_name=field_name,
+            inferred_type=inferred_type,
+            missing_rate=round(missing_count / row_count, 4) if row_count else 0.0,
+            is_candidate_target=any(marker in lower_field_name or marker in field_name for marker in TARGET_MARKERS),
+            is_candidate_group=inferred_type == "categorical",
+            is_candidate_time=inferred_type == "datetime",
+            is_high_cardinality=inferred_type == "categorical" and unique_value_count >= max(20, row_count // 5),
+        )
+        schema.append(asdict(summary))
+
+    analysis_opportunities: list[str] = []
+    if any(field["inferred_type"] == "datetime" for field in schema):
+        analysis_opportunities.append("Trend analysis is available.")
+    if sum(field["inferred_type"] == "numeric" for field in schema) >= 2:
+        analysis_opportunities.append("Correlation analysis is available.")
+    if any(marker in task_description.lower() for marker in CLUSTER_MARKERS) or any(
+        marker in task_description.lower() or marker in task_description
+        for marker in DIMENSION_REDUCTION_MARKERS
+    ):
+        analysis_opportunities.append("The task explicitly requests clustering or dimensionality reduction.")
+
+    return {
+        "schema": schema,
+        "data_profile": {
+            "row_count": row_count,
+            "column_count": len(field_names),
+            "field_summaries": schema,
+            "missing_rate_overview": {field["field_name"]: field["missing_rate"] for field in schema},
+            "constant_columns": constant_columns,
+            "high_missing_fields": high_missing_fields,
+            "analysis_opportunities": analysis_opportunities,
+        },
+    }
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_dataset_detection.py b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_dataset_detection.py
new file mode 100644
index 0000000..889b916
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_dataset_detection.py
@@ -0,0 +1,195 @@
+"""Dataset type detection helpers."""
+
+from __future__ import annotations
+
+import csv
+import re
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any
+
+
+SURVEY_COLUMN_PATTERN = re.compile(
+    r"(\u6ee1\u610f|\u95ee\u5377|\u9898\u76ee|\u8bc4\u5206|\u5206\u503c|\u5efa\u8bae|\u53cd\u9988|likert|score|survey|question|q\d+|\u662f\u5426|\u5e74\u7ea7|\u73ed\u7ea7|\u90e8\u95e8)",
+    re.IGNORECASE,
+)
+TIME_COLUMN_PATTERN = re.compile(
+    r"(date|time|year|month|day|week|timestamp|\u65e5\u671f|\u65f6\u95f4|\u5e74\u6708|\u5b63\u5ea6|\u5468|\u65f6\u70b9)",
+    re.IGNORECASE,
+)
+LITERARY_TITLE_PATTERN = re.compile(
+    r"(\u7b2c[\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u767e\u96f6\u3007\u4e24\d]+\u56de|\u7b2c[\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u767e\u96f6\u3007\u4e24\d]+\u7ae0|\u5bf9\u4e0b\u8054|\u4e0a\u8054|\u4e0b\u8054)"
+)
+POETRY_PATTERN = re.compile(
+    r"(\u4e03\u5f8b|\u4e94\u5f8b|\u7edd\u53e5|\u8bcd\u724c|\u6d63\u6eaa\u6c99|\u6c34\u8c03\u6b4c\u5934|\u9e67\u9e2a\u5929|\u8776\u604b\u82b1)"
+)
+SURVEY_VALUE_MARKERS = (
+    "\u6ee1\u610f",
+    "\u4e00\u822c",
+    "\u4e0d\u540c\u610f",
+    "\u540c\u610f",
+    "\u975e\u5e38",
+    "\u662f",
+    "\u5426",
+    "\u7537",
+    "\u5973",
+)
+COUPLET_MARKERS = ("\u5bf9\u4e0b\u8054", "\u4e0a\u8054", "\u4e0b\u8054")
+SALES_MARKERS = ("\u9500\u91cf", "\u9500\u552e", "price", "amount")
+
+
+@dataclass(slots=True)
+class DetectionResult:
+    primary_type: str
+    subtype: str | None
+    confidence: float
+    fallback_type: str
+    signals: list[str]
+    file_extension: str
+    detected_strategy: str
+
+
+def read_text_sample(data_path: Path, max_characters: int = 4000) -> str:
+    raw_bytes = data_path.read_bytes()[: max_characters * 4]
+    for encoding in ("utf-8", "gb18030", "gbk", "big5"):
+        try:
+            return raw_bytes.decode(encoding)
+        except UnicodeDecodeError:
+            continue
+    return raw_bytes.decode("utf-8", errors="ignore")
+
+
+def read_table_sample(data_path: Path, max_rows: int = 30) -> tuple[list[str], list[dict[str, Any]]]:
+    if data_path.suffix.lower() != ".csv":
+        return [], []
+    with data_path.open("r", encoding="utf-8", newline="") as file:
+        reader = csv.DictReader(file)
+        rows: list[dict[str, Any]] = []
+        for row in reader:
+            rows.append(dict(row))
+            if len(rows) >= max_rows:
+                break
+    return reader.fieldnames or [], rows
+
+
+def is_low_cardinality_survey_column(sample_rows: list[dict[str, Any]], column_name: str) -> bool:
+    values = {str(row.get(column_name, "")).strip() for row in sample_rows if str(row.get(column_name, "")).strip()}
+    if not values:
+        return False
+    if all(value in {"1", "2", "3", "4", "5", "6", "7"} for value in values):
+        return True
+    if len(values) <= 7 and any(marker in value for value in values for marker in SURVEY_VALUE_MARKERS):
+        return True
+    return False
+
+
+def detect_dataset(data_path: str, task_description: str = "") -> dict[str, Any]:
+    path = Path(data_path)
+    extension = path.suffix.lower()
+    signals: list[str] = []
+
+    if extension in {".txt", ".md", ".jsonl"}:
+        text_sample = read_text_sample(path)
+        if LITERARY_TITLE_PATTERN.search(text_sample):
+            subtype = "novel_or_couplet"
+            if any(marker in text_sample for marker in COUPLET_MARKERS):
+                subtype = "couplet"
+                signals.append("The text contains couplet markers.")
+            elif POETRY_PATTERN.search(text_sample):
+                subtype = "poetry"
+                signals.append("The text contains poetry-form clues.")
+            else:
+                signals.append("The text contains literary chapter markers.")
+            result = DetectionResult(
+                primary_type="literary",
+                subtype=subtype,
+                confidence=0.9,
+                fallback_type="unknown",
+                signals=signals,
+                file_extension=extension,
+                detected_strategy="literary",
+            )
+            return asdict(result)
+
+        result = DetectionResult(
+            primary_type="unknown",
+            subtype=None,
+            confidence=0.45,
+            fallback_type="tabular_generic",
+            signals=["The text file does not expose clear literary structure cues."],
+            file_extension=extension,
+            detected_strategy="tabular_generic",
+        )
+        return asdict(result)
+
+    if extension in {".csv", ".xlsx"}:
+        headers, sample_rows = read_table_sample(path)
+        survey_match_count = sum(bool(SURVEY_COLUMN_PATTERN.search(header)) for header in headers)
+        time_match_columns = [header for header in headers if TIME_COLUMN_PATTERN.search(header)]
+        low_cardinality_survey_columns = (
+            sum(is_low_cardinality_survey_column(sample_rows, header) for header in headers) if sample_rows else 0
+        )
+        long_text_column_count = 0
+        if sample_rows:
+            for header in headers:
+                max_length = max((len(str(row.get(header, "")).strip()) for row in sample_rows), default=0)
+                if max_length >= 24:
+                    long_text_column_count += 1
+
+        if survey_match_count >= 2 or low_cardinality_survey_columns >= max(3, len(headers) // 3):
+            signals.extend(
+                [
+                    f"Matched {survey_match_count} survey-related header columns.",
+                    f"Detected {low_cardinality_survey_columns} low-cardinality survey columns in samples.",
+                ]
+            )
+            if long_text_column_count:
+                signals.append(f"Detected {long_text_column_count} possible free-text response columns.")
+            result = DetectionResult(
+                primary_type="questionnaire",
+                subtype="mixed_questionnaire" if long_text_column_count else "structured_questionnaire",
+                confidence=0.88,
+                fallback_type="tabular_generic",
+                signals=signals,
+                file_extension=extension,
+                detected_strategy="questionnaire",
+            )
+            return asdict(result)
+
+        if time_match_columns:
+            signals.append(f"Detected candidate time columns: {time_match_columns}")
+            if any(marker in header.lower() or marker in header for header in headers for marker in SALES_MARKERS):
+                signals.append("Candidate time columns co-occur with numeric business metric columns.")
+            result = DetectionResult(
+                primary_type="time_series",
+                subtype="tabular_time_series",
+                confidence=0.82,
+                fallback_type="tabular_generic",
+                signals=signals,
+                file_extension=extension,
+                detected_strategy="time_series",
+            )
+            return asdict(result)
+
+        signals.append("Detected a standard table file without strong questionnaire or time-series signals.")
+        result = DetectionResult(
+            primary_type="tabular_generic",
+            subtype="generic_spreadsheet",
+            confidence=0.72,
+            fallback_type="tabular_generic",
+            signals=signals,
+            file_extension=extension,
+            detected_strategy="tabular_generic",
+        )
+        return asdict(result)
+
+    result = DetectionResult(
+        primary_type="unknown",
+        subtype=None,
+        confidence=0.3,
+        fallback_type="tabular_generic",
+        signals=[f"Unsupported extension encountered: {extension or 'no_extension'}"],
+        file_extension=extension,
+        detected_strategy="tabular_generic",
+    )
+    return asdict(result)
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_findings_generation.py b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_findings_generation.py
new file mode 100644
index 0000000..d8d7357
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_findings_generation.py
@@ -0,0 +1,1503 @@
+import re
+import sys
+import copy
+import types
+import inspect
+import keyword
+import builtins
+import functools
+import itertools
+import abc
+import _thread
+from types import FunctionType, GenericAlias
+
+
+__all__ = ['dataclass',
+           'field',
+           'Field',
+           'FrozenInstanceError',
+           'InitVar',
+           'KW_ONLY',
+           'MISSING',
+
+           # Helper functions.
+           'fields',
+           'asdict',
+           'astuple',
+           'make_dataclass',
+           'replace',
+           'is_dataclass',
+           ]
+
+# Conditions for adding methods.  The boxes indicate what action the
+# dataclass decorator takes.  For all of these tables, when I talk
+# about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm
+# referring to the arguments to the @dataclass decorator.  When
+# checking if a dunder method already exists, I mean check for an
+# entry in the class's __dict__.  I never check to see if an attribute
+# is defined in a base class.
+
+# Key:
+# +=========+=========================================+
+# + Value   | Meaning                                 |
+# +=========+=========================================+
+# | <blank> | No action: no method is added.          |
+# +---------+-----------------------------------------+
+# | add     | Generated method is added.              |
+# +---------+-----------------------------------------+
+# | raise   | TypeError is raised.                    |
+# +---------+-----------------------------------------+
+# | None    | Attribute is set to None.               |
+# +=========+=========================================+
+
+# __init__
+#
+#   +--- init= parameter
+#   |
+#   v     |       |       |
+#         |  no   |  yes  |  <--- class has __init__ in __dict__?
+# +=======+=======+=======+
+# | False |       |       |
+# +-------+-------+-------+
+# | True  | add   |       |  <- the default
+# +=======+=======+=======+
+
+# __repr__
+#
+#    +--- repr= parameter
+#    |
+#    v    |       |       |
+#         |  no   |  yes  |  <--- class has __repr__ in __dict__?
+# +=======+=======+=======+
+# | False |       |       |
+# +-------+-------+-------+
+# | True  | add   |       |  <- the default
+# +=======+=======+=======+
+
+
+# __setattr__
+# __delattr__
+#
+#    +--- frozen= parameter
+#    |
+#    v    |       |       |
+#         |  no   |  yes  |  <--- class has __setattr__ or __delattr__ in __dict__?
+# +=======+=======+=======+
+# | False |       |       |  <- the default
+# +-------+-------+-------+
+# | True  | add   | raise |
+# +=======+=======+=======+
+# Raise because not adding these methods would break the "frozen-ness"
+# of the class.
+
+# __eq__
+#
+#    +--- eq= parameter
+#    |
+#    v    |       |       |
+#         |  no   |  yes  |  <--- class has __eq__ in __dict__?
+# +=======+=======+=======+
+# | False |       |       |
+# +-------+-------+-------+
+# | True  | add   |       |  <- the default
+# +=======+=======+=======+
+
+# __lt__
+# __le__
+# __gt__
+# __ge__
+#
+#    +--- order= parameter
+#    |
+#    v    |       |       |
+#         |  no   |  yes  |  <--- class has any comparison method in __dict__?
+# +=======+=======+=======+
+# | False |       |       |  <- the default
+# +-------+-------+-------+
+# | True  | add   | raise |
+# +=======+=======+=======+
+# Raise because to allow this case would interfere with using
+# functools.total_ordering.
+
+# __hash__
+
+#    +------------------- unsafe_hash= parameter
+#    |       +----------- eq= parameter
+#    |       |       +--- frozen= parameter
+#    |       |       |
+#    v       v       v    |        |        |
+#                         |   no   |  yes   |  <--- class has explicitly defined __hash__
+# +=======+=======+=======+========+========+
+# | False | False | False |        |        | No __eq__, use the base class __hash__
+# +-------+-------+-------+--------+--------+
+# | False | False | True  |        |        | No __eq__, use the base class __hash__
+# +-------+-------+-------+--------+--------+
+# | False | True  | False | None   |        | <-- the default, not hashable
+# +-------+-------+-------+--------+--------+
+# | False | True  | True  | add    |        | Frozen, so hashable, allows override
+# +-------+-------+-------+--------+--------+
+# | True  | False | False | add    | raise  | Has no __eq__, but hashable
+# +-------+-------+-------+--------+--------+
+# | True  | False | True  | add    | raise  | Has no __eq__, but hashable
+# +-------+-------+-------+--------+--------+
+# | True  | True  | False | add    | raise  | Not frozen, but hashable
+# +-------+-------+-------+--------+--------+
+# | True  | True  | True  | add    | raise  | Frozen, so hashable
+# +=======+=======+=======+========+========+
+# For boxes that are blank, __hash__ is untouched and therefore
+# inherited from the base class.  If the base is object, then
+# id-based hashing is used.
+#
+# Note that a class may already have __hash__=None if it specified an
+# __eq__ method in the class body (not one that was created by
+# @dataclass).
+#
+# See _hash_action (below) for a coded version of this table.
+
+# __match_args__
+#
+#    +--- match_args= parameter
+#    |
+#    v    |       |       |
+#         |  no   |  yes  |  <--- class has __match_args__ in __dict__?
+# +=======+=======+=======+
+# | False |       |       |
+# +-------+-------+-------+
+# | True  | add   |       |  <- the default
+# +=======+=======+=======+
+# __match_args__ is always added unless the class already defines it. It is a
+# tuple of __init__ parameter names; non-init fields must be matched by keyword.
+
+
+# Raised when an attempt is made to modify a frozen class.
+class FrozenInstanceError(AttributeError): pass
+
+# A sentinel object for default values to signal that a default
+# factory will be used.  This is given a nice repr() which will appear
+# in the function signature of dataclasses' constructors.
+class _HAS_DEFAULT_FACTORY_CLASS:
+    def __repr__(self):
+        return '<factory>'
+_HAS_DEFAULT_FACTORY = _HAS_DEFAULT_FACTORY_CLASS()
+
+# A sentinel object to detect if a parameter is supplied or not.  Use
+# a class to give it a better repr.
+class _MISSING_TYPE:
+    pass
+MISSING = _MISSING_TYPE()
+
+# A sentinel object to indicate that following fields are keyword-only by
+# default.  Use a class to give it a better repr.
+class _KW_ONLY_TYPE:
+    pass
+KW_ONLY = _KW_ONLY_TYPE()
+
+# Since most per-field metadata will be unused, create an empty
+# read-only proxy that can be shared among all fields.
+_EMPTY_METADATA = types.MappingProxyType({})
+
+# Markers for the various kinds of fields and pseudo-fields.
+class _FIELD_BASE:
+    def __init__(self, name):
+        self.name = name
+    def __repr__(self):
+        return self.name
+_FIELD = _FIELD_BASE('_FIELD')
+_FIELD_CLASSVAR = _FIELD_BASE('_FIELD_CLASSVAR')
+_FIELD_INITVAR = _FIELD_BASE('_FIELD_INITVAR')
+
+# The name of an attribute on the class where we store the Field
+# objects.  Also used to check if a class is a Data Class.
+_FIELDS = '__dataclass_fields__'
+
+# The name of an attribute on the class that stores the parameters to
+# @dataclass.
+_PARAMS = '__dataclass_params__'
+
+# The name of the function, that if it exists, is called at the end of
+# __init__.
+_POST_INIT_NAME = '__post_init__'
+
+# String regex that string annotations for ClassVar or InitVar must match.
+# Allows "identifier.identifier[" or "identifier[".
+# https://bugs.python.org/issue33453 for details.
+_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
+
+# This function's logic is copied from "recursive_repr" function in
+# reprlib module to avoid dependency.
+def _recursive_repr(user_function):
+    # Decorator to make a repr function return "..." for a recursive
+    # call.
+    repr_running = set()
+
+    @functools.wraps(user_function)
+    def wrapper(self):
+        key = id(self), _thread.get_ident()
+        if key in repr_running:
+            return '...'
+        repr_running.add(key)
+        try:
+            result = user_function(self)
+        finally:
+            repr_running.discard(key)
+        return result
+    return wrapper
+
+class InitVar:
+    __slots__ = ('type', )
+
+    def __init__(self, type):
+        self.type = type
+
+    def __repr__(self):
+        if isinstance(self.type, type):
+            type_name = self.type.__name__
+        else:
+            # typing objects, e.g. List[int]
+            type_name = repr(self.type)
+        return f'dataclasses.InitVar[{type_name}]'
+
+    def __class_getitem__(cls, type):
+        return InitVar(type)
+
+# Instances of Field are only ever created from within this module,
+# and only from the field() function, although Field instances are
+# exposed externally as (conceptually) read-only objects.
+#
+# name and type are filled in after the fact, not in __init__.
+# They're not known at the time this class is instantiated, but it's
+# convenient if they're available later.
+#
+# When cls._FIELDS is filled in with a list of Field objects, the name
+# and type fields will have been populated.
+class Field:
+    __slots__ = ('name',
+                 'type',
+                 'default',
+                 'default_factory',
+                 'repr',
+                 'hash',
+                 'init',
+                 'compare',
+                 'metadata',
+                 'kw_only',
+                 '_field_type',  # Private: not to be used by user code.
+                 )
+
+    def __init__(self, default, default_factory, init, repr, hash, compare,
+                 metadata, kw_only):
+        self.name = None
+        self.type = None
+        self.default = default
+        self.default_factory = default_factory
+        self.init = init
+        self.repr = repr
+        self.hash = hash
+        self.compare = compare
+        self.metadata = (_EMPTY_METADATA
+                         if metadata is None else
+                         types.MappingProxyType(metadata))
+        self.kw_only = kw_only
+        self._field_type = None
+
+    @_recursive_repr
+    def __repr__(self):
+        return ('Field('
+                f'name={self.name!r},'
+                f'type={self.type!r},'
+                f'default={self.default!r},'
+                f'default_factory={self.default_factory!r},'
+                f'init={self.init!r},'
+                f'repr={self.repr!r},'
+                f'hash={self.hash!r},'
+                f'compare={self.compare!r},'
+                f'metadata={self.metadata!r},'
+                f'kw_only={self.kw_only!r},'
+                f'_field_type={self._field_type}'
+                ')')
+
+    # This is used to support the PEP 487 __set_name__ protocol in the
+    # case where we're using a field that contains a descriptor as a
+    # default value.  For details on __set_name__, see
+    # https://peps.python.org/pep-0487/#implementation-details.
+    #
+    # Note that in _process_class, this Field object is overwritten
+    # with the default value, so the end result is a descriptor that
+    # had __set_name__ called on it at the right time.
+    def __set_name__(self, owner, name):
+        func = getattr(type(self.default), '__set_name__', None)
+        if func:
+            # There is a __set_name__ method on the descriptor, call
+            # it.
+            func(self.default, owner, name)
+
+    __class_getitem__ = classmethod(GenericAlias)
+
+
+class _DataclassParams:
+    __slots__ = ('init',
+                 'repr',
+                 'eq',
+                 'order',
+                 'unsafe_hash',
+                 'frozen',
+                 )
+
+    def __init__(self, init, repr, eq, order, unsafe_hash, frozen):
+        self.init = init
+        self.repr = repr
+        self.eq = eq
+        self.order = order
+        self.unsafe_hash = unsafe_hash
+        self.frozen = frozen
+
+    def __repr__(self):
+        return ('_DataclassParams('
+                f'init={self.init!r},'
+                f'repr={self.repr!r},'
+                f'eq={self.eq!r},'
+                f'order={self.order!r},'
+                f'unsafe_hash={self.unsafe_hash!r},'
+                f'frozen={self.frozen!r}'
+                ')')
+
+
+# This function is used instead of exposing Field creation directly,
+# so that a type checker can be told (via overloads) that this is a
+# function whose type depends on its parameters.
+def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True,
+          hash=None, compare=True, metadata=None, kw_only=MISSING):
+    """Return an object to identify dataclass fields.
+
+    default is the default value of the field.  default_factory is a
+    0-argument function called to initialize a field's value.  If init
+    is true, the field will be a parameter to the class's __init__()
+    function.  If repr is true, the field will be included in the
+    object's repr().  If hash is true, the field will be included in the
+    object's hash().  If compare is true, the field will be used in
+    comparison functions.  metadata, if specified, must be a mapping
+    which is stored but not otherwise examined by dataclass.  If kw_only
+    is true, the field will become a keyword-only parameter to
+    __init__().
+
+    It is an error to specify both default and default_factory.
+    """
+
+    if default is not MISSING and default_factory is not MISSING:
+        raise ValueError('cannot specify both default and default_factory')
+    return Field(default, default_factory, init, repr, hash, compare,
+                 metadata, kw_only)
+
+
+def _fields_in_init_order(fields):
+    # Returns the fields as __init__ will output them.  It returns 2 tuples:
+    # the first for normal args, and the second for keyword args.
+
+    return (tuple(f for f in fields if f.init and not f.kw_only),
+            tuple(f for f in fields if f.init and f.kw_only)
+            )
+
+
+def _tuple_str(obj_name, fields):
+    # Return a string representing each field of obj_name as a tuple
+    # member.  So, if fields is ['x', 'y'] and obj_name is "self",
+    # return "(self.x,self.y)".
+
+    # Special case for the 0-tuple.
+    if not fields:
+        return '()'
+    # Note the trailing comma, needed if this turns out to be a 1-tuple.
+    return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)'
+
+
+def _create_fn(name, args, body, *, globals=None, locals=None,
+               return_type=MISSING):
+    # Note that we may mutate locals. Callers beware!
+    # The only callers are internal to this module, so no
+    # worries about external callers.
+    if locals is None:
+        locals = {}
+    return_annotation = ''
+    if return_type is not MISSING:
+        locals['_return_type'] = return_type
+        return_annotation = '->_return_type'
+    args = ','.join(args)
+    body = '\n'.join(f'  {b}' for b in body)
+
+    # Compute the text of the entire function.
+    txt = f' def {name}({args}){return_annotation}:\n{body}'
+
+    local_vars = ', '.join(locals.keys())
+    txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}"
+    ns = {}
+    exec(txt, globals, ns)
+    return ns['__create_fn__'](**locals)
+
+
+def _field_assign(frozen, name, value, self_name):
+    # If we're a frozen class, then assign to our fields in __init__
+    # via object.__setattr__.  Otherwise, just use a simple
+    # assignment.
+    #
+    # self_name is what "self" is called in this function: don't
+    # hard-code "self", since that might be a field name.
+    if frozen:
+        return f'__dataclass_builtins_object__.__setattr__({self_name},{name!r},{value})'
+    return f'{self_name}.{name}={value}'
+
+
+def _field_init(f, frozen, globals, self_name, slots):
+    # Return the text of the line in the body of __init__ that will
+    # initialize this field.
+
+    default_name = f'_dflt_{f.name}'
+    if f.default_factory is not MISSING:
+        if f.init:
+            # This field has a default factory.  If a parameter is
+            # given, use it.  If not, call the factory.
+            globals[default_name] = f.default_factory
+            value = (f'{default_name}() '
+                     f'if {f.name} is _HAS_DEFAULT_FACTORY '
+                     f'else {f.name}')
+        else:
+            # This is a field that's not in the __init__ params, but
+            # has a default factory function.  It needs to be
+            # initialized here by calling the factory function,
+            # because there's no other way to initialize it.
+
+            # For a field initialized with a default=defaultvalue, the
+            # class dict just has the default value
+            # (cls.fieldname=defaultvalue).  But that won't work for a
+            # default factory, the factory must be called in __init__
+            # and we must assign that to self.fieldname.  We can't
+            # fall back to the class dict's value, both because it's
+            # not set, and because it might be different per-class
+            # (which, after all, is why we have a factory function!).
+
+            globals[default_name] = f.default_factory
+            value = f'{default_name}()'
+    else:
+        # No default factory.
+        if f.init:
+            if f.default is MISSING:
+                # There's no default, just do an assignment.
+                value = f.name
+            elif f.default is not MISSING:
+                globals[default_name] = f.default
+                value = f.name
+        else:
+            # If the class has slots, then initialize this field.
+            if slots and f.default is not MISSING:
+                globals[default_name] = f.default
+                value = default_name
+            else:
+                # This field does not need initialization: reading from it will
+                # just use the class attribute that contains the default.
+                # Signify that to the caller by returning None.
+                return None
+
+    # Only test this now, so that we can create variables for the
+    # default.  However, return None to signify that we're not going
+    # to actually do the assignment statement for InitVars.
+    if f._field_type is _FIELD_INITVAR:
+        return None
+
+    # Now, actually generate the field assignment.
+    return _field_assign(frozen, f.name, value, self_name)
+
+
+def _init_param(f):
+    # Return the __init__ parameter string for this field.  For
+    # example, the equivalent of 'x:int=3' (except instead of 'int',
+    # reference a variable set to int, and instead of '3', reference a
+    # variable set to 3).
+    if f.default is MISSING and f.default_factory is MISSING:
+        # There's no default, and no default_factory, just output the
+        # variable name and type.
+        default = ''
+    elif f.default is not MISSING:
+        # There's a default, this will be the name that's used to look
+        # it up.
+        default = f'=_dflt_{f.name}'
+    elif f.default_factory is not MISSING:
+        # There's a factory function.  Set a marker.
+        default = '=_HAS_DEFAULT_FACTORY'
+    return f'{f.name}:_type_{f.name}{default}'
+
+
+def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init,
+             self_name, globals, slots):
+    # fields contains both real fields and InitVar pseudo-fields.
+
+    # Make sure we don't have fields without defaults following fields
+    # with defaults.  This actually would be caught when exec-ing the
+    # function source code, but catching it here gives a better error
+    # message, and future-proofs us in case we build up the function
+    # using ast.
+
+    seen_default = False
+    for f in std_fields:
+        # Only consider the non-kw-only fields in the __init__ call.
+        if f.init:
+            if not (f.default is MISSING and f.default_factory is MISSING):
+                seen_default = True
+            elif seen_default:
+                raise TypeError(f'non-default argument {f.name!r} '
+                                'follows default argument')
+
+    locals = {f'_type_{f.name}': f.type for f in fields}
+    locals.update({
+        'MISSING': MISSING,
+        '_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY,
+        '__dataclass_builtins_object__': object,
+    })
+
+    body_lines = []
+    for f in fields:
+        line = _field_init(f, frozen, locals, self_name, slots)
+        # line is None means that this field doesn't require
+        # initialization (it's a pseudo-field).  Just skip it.
+        if line:
+            body_lines.append(line)
+
+    # Does this class have a post-init function?
+    if has_post_init:
+        params_str = ','.join(f.name for f in fields
+                              if f._field_type is _FIELD_INITVAR)
+        body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})')
+
+    # If no body lines, use 'pass'.
+    if not body_lines:
+        body_lines = ['pass']
+
+    _init_params = [_init_param(f) for f in std_fields]
+    if kw_only_fields:
+        # Add the keyword-only args.  Because the * can only be added if
+        # there's at least one keyword-only arg, there needs to be a test here
+        # (instead of just concatenting the lists together).
+        _init_params += ['*']
+        _init_params += [_init_param(f) for f in kw_only_fields]
+    return _create_fn('__init__',
+                      [self_name] + _init_params,
+                      body_lines,
+                      locals=locals,
+                      globals=globals,
+                      return_type=None)
+
+
+def _repr_fn(fields, globals):
+    fn = _create_fn('__repr__',
+                    ('self',),
+                    ['return self.__class__.__qualname__ + f"(' +
+                     ', '.join([f"{f.name}={{self.{f.name}!r}}"
+                                for f in fields]) +
+                     ')"'],
+                     globals=globals)
+    return _recursive_repr(fn)
+
+
+def _frozen_get_del_attr(cls, fields, globals):
+    locals = {'cls': cls,
+              'FrozenInstanceError': FrozenInstanceError}
+    if fields:
+        fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)'
+    else:
+        # Special case for the zero-length tuple.
+        fields_str = '()'
+    return (_create_fn('__setattr__',
+                      ('self', 'name', 'value'),
+                      (f'if type(self) is cls or name in {fields_str}:',
+                        ' raise FrozenInstanceError(f"cannot assign to field {name!r}")',
+                       f'super(cls, self).__setattr__(name, value)'),
+                       locals=locals,
+                       globals=globals),
+            _create_fn('__delattr__',
+                      ('self', 'name'),
+                      (f'if type(self) is cls or name in {fields_str}:',
+                        ' raise FrozenInstanceError(f"cannot delete field {name!r}")',
+                       f'super(cls, self).__delattr__(name)'),
+                       locals=locals,
+                       globals=globals),
+            )
+
+
+def _cmp_fn(name, op, self_tuple, other_tuple, globals):
+    # Create a comparison function.  If the fields in the object are
+    # named 'x' and 'y', then self_tuple is the string
+    # '(self.x,self.y)' and other_tuple is the string
+    # '(other.x,other.y)'.
+
+    return _create_fn(name,
+                      ('self', 'other'),
+                      [ 'if other.__class__ is self.__class__:',
+                       f' return {self_tuple}{op}{other_tuple}',
+                        'return NotImplemented'],
+                      globals=globals)
+
+
+def _hash_fn(fields, globals):
+    self_tuple = _tuple_str('self', fields)
+    return _create_fn('__hash__',
+                      ('self',),
+                      [f'return hash({self_tuple})'],
+                      globals=globals)
+
+
+def _is_classvar(a_type, typing):
+    # This test uses a typing internal class, but it's the best way to
+    # test if this is a ClassVar.
+    return (a_type is typing.ClassVar
+            or (type(a_type) is typing._GenericAlias
+                and a_type.__origin__ is typing.ClassVar))
+
+
+def _is_initvar(a_type, dataclasses):
+    # The module we're checking against is the module we're
+    # currently in (dataclasses.py).
+    return (a_type is dataclasses.InitVar
+            or type(a_type) is dataclasses.InitVar)
+
+def _is_kw_only(a_type, dataclasses):
+    return a_type is dataclasses.KW_ONLY
+
+
+def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
+    # Given a type annotation string, does it refer to a_type in
+    # a_module?  For example, when checking that annotation denotes a
+    # ClassVar, then a_module is typing, and a_type is
+    # typing.ClassVar.
+
+    # It's possible to look up a_module given a_type, but it involves
+    # looking in sys.modules (again!), and seems like a waste since
+    # the caller already knows a_module.
+
+    # - annotation is a string type annotation
+    # - cls is the class that this annotation was found in
+    # - a_module is the module we want to match
+    # - a_type is the type in that module we want to match
+    # - is_type_predicate is a function called with (obj, a_module)
+    #   that determines if obj is of the desired type.
+
+    # Since this test does not do a local namespace lookup (and
+    # instead only a module (global) lookup), there are some things it
+    # gets wrong.
+
+    # With string annotations, cv0 will be detected as a ClassVar:
+    #   CV = ClassVar
+    #   @dataclass
+    #   class C0:
+    #     cv0: CV
+
+    # But in this example cv1 will not be detected as a ClassVar:
+    #   @dataclass
+    #   class C1:
+    #     CV = ClassVar
+    #     cv1: CV
+
+    # In C1, the code in this function (_is_type) will look up "CV" in
+    # the module and not find it, so it will not consider cv1 as a
+    # ClassVar.  This is a fairly obscure corner case, and the best
+    # way to fix it would be to eval() the string "CV" with the
+    # correct global and local namespaces.  However that would involve
+    # a eval() penalty for every single field of every dataclass
+    # that's defined.  It was judged not worth it.
+
+    match = _MODULE_IDENTIFIER_RE.match(annotation)
+    if match:
+        ns = None
+        module_name = match.group(1)
+        if not module_name:
+            # No module name, assume the class's module did
+            # "from dataclasses import InitVar".
+            ns = sys.modules.get(cls.__module__).__dict__
+        else:
+            # Look up module_name in the class's module.
+            module = sys.modules.get(cls.__module__)
+            if module and module.__dict__.get(module_name) is a_module:
+                ns = sys.modules.get(a_type.__module__).__dict__
+        if ns and is_type_predicate(ns.get(match.group(2)), a_module):
+            return True
+    return False
+
+
+def _get_field(cls, a_name, a_type, default_kw_only):
+    # Return a Field object for this field name and type.  ClassVars and
+    # InitVars are also returned, but marked as such (see f._field_type).
+    # default_kw_only is the value of kw_only to use if there isn't a field()
+    # that defines it.
+
+    # If the default value isn't derived from Field, then it's only a
+    # normal default value.  Convert it to a Field().
+    default = getattr(cls, a_name, MISSING)
+    if isinstance(default, Field):
+        f = default
+    else:
+        if isinstance(default, types.MemberDescriptorType):
+            # This is a field in __slots__, so it has no default value.
+            default = MISSING
+        f = field(default=default)
+
+    # Only at this point do we know the name and the type.  Set them.
+    f.name = a_name
+    f.type = a_type
+
+    # Assume it's a normal field until proven otherwise.  We're next
+    # going to decide if it's a ClassVar or InitVar, everything else
+    # is just a normal field.
+    f._field_type = _FIELD
+
+    # In addition to checking for actual types here, also check for
+    # string annotations.  get_type_hints() won't always work for us
+    # (see https://github.com/python/typing/issues/508 for example),
+    # plus it's expensive and would require an eval for every string
+    # annotation.  So, make a best effort to see if this is a ClassVar
+    # or InitVar using regex's and checking that the thing referenced
+    # is actually of the correct type.
+
+    # For the complete discussion, see https://bugs.python.org/issue33453
+
+    # If typing has not been imported, then it's impossible for any
+    # annotation to be a ClassVar.  So, only look for ClassVar if
+    # typing has been imported by any module (not necessarily cls's
+    # module).
+    typing = sys.modules.get('typing')
+    if typing:
+        if (_is_classvar(a_type, typing)
+            or (isinstance(f.type, str)
+                and _is_type(f.type, cls, typing, typing.ClassVar,
+                             _is_classvar))):
+            f._field_type = _FIELD_CLASSVAR
+
+    # If the type is InitVar, or if it's a matching string annotation,
+    # then it's an InitVar.
+    if f._field_type is _FIELD:
+        # The module we're checking against is the module we're
+        # currently in (dataclasses.py).
+        dataclasses = sys.modules[__name__]
+        if (_is_initvar(a_type, dataclasses)
+            or (isinstance(f.type, str)
+                and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
+                             _is_initvar))):
+            f._field_type = _FIELD_INITVAR
+
+    # Validations for individual fields.  This is delayed until now,
+    # instead of in the Field() constructor, since only here do we
+    # know the field name, which allows for better error reporting.
+
+    # Special restrictions for ClassVar and InitVar.
+    if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
+        if f.default_factory is not MISSING:
+            raise TypeError(f'field {f.name} cannot have a '
+                            'default factory')
+        # Should I check for other field settings? default_factory
+        # seems the most serious to check for.  Maybe add others.  For
+        # example, how about init=False (or really,
+        # init=<not-the-default-init-value>)?  It makes no sense for
+        # ClassVar and InitVar to specify init=<anything>.
+
+    # kw_only validation and assignment.
+    if f._field_type in (_FIELD, _FIELD_INITVAR):
+        # For real and InitVar fields, if kw_only wasn't specified use the
+        # default value.
+        if f.kw_only is MISSING:
+            f.kw_only = default_kw_only
+    else:
+        # Make sure kw_only isn't set for ClassVars
+        assert f._field_type is _FIELD_CLASSVAR
+        if f.kw_only is not MISSING:
+            raise TypeError(f'field {f.name} is a ClassVar but specifies '
+                            'kw_only')
+
+    # For real fields, disallow mutable defaults.  Use unhashable as a proxy
+    # indicator for mutability.  Read the __hash__ attribute from the class,
+    # not the instance.
+    if f._field_type is _FIELD and f.default.__class__.__hash__ is None:
+        raise ValueError(f'mutable default {type(f.default)} for field '
+                         f'{f.name} is not allowed: use default_factory')
+
+    return f
+
+def _set_qualname(cls, value):
+    # Ensure that the functions returned from _create_fn uses the proper
+    # __qualname__ (the class they belong to).
+    if isinstance(value, FunctionType):
+        value.__qualname__ = f"{cls.__qualname__}.{value.__name__}"
+    return value
+
+def _set_new_attribute(cls, name, value):
+    # Never overwrites an existing attribute.  Returns True if the
+    # attribute already exists.
+    if name in cls.__dict__:
+        return True
+    _set_qualname(cls, value)
+    setattr(cls, name, value)
+    return False
+
+
+# Decide if/how we're going to create a hash function.  Key is
+# (unsafe_hash, eq, frozen, does-hash-exist).  Value is the action to
+# take.  The common case is to do nothing, so instead of providing a
+# function that is a no-op, use None to signify that.
+
+def _hash_set_none(cls, fields, globals):
+    return None
+
+def _hash_add(cls, fields, globals):
+    flds = [f for f in fields if (f.compare if f.hash is None else f.hash)]
+    return _set_qualname(cls, _hash_fn(flds, globals))
+
+def _hash_exception(cls, fields, globals):
+    # Raise an exception.
+    raise TypeError(f'Cannot overwrite attribute __hash__ '
+                    f'in class {cls.__name__}')
+
+#
+#                +-------------------------------------- unsafe_hash?
+#                |      +------------------------------- eq?
+#                |      |      +------------------------ frozen?
+#                |      |      |      +----------------  has-explicit-hash?
+#                |      |      |      |
+#                |      |      |      |        +-------  action
+#                |      |      |      |        |
+#                v      v      v      v        v
+_hash_action = {(False, False, False, False): None,
+                (False, False, False, True ): None,
+                (False, False, True,  False): None,
+                (False, False, True,  True ): None,
+                (False, True,  False, False): _hash_set_none,
+                (False, True,  False, True ): None,
+                (False, True,  True,  False): _hash_add,
+                (False, True,  True,  True ): None,
+                (True,  False, False, False): _hash_add,
+                (True,  False, False, True ): _hash_exception,
+                (True,  False, True,  False): _hash_add,
+                (True,  False, True,  True ): _hash_exception,
+                (True,  True,  False, False): _hash_add,
+                (True,  True,  False, True ): _hash_exception,
+                (True,  True,  True,  False): _hash_add,
+                (True,  True,  True,  True ): _hash_exception,
+                }
+# See https://bugs.python.org/issue32929#msg312829 for an if-statement
+# version of this table.
+
+
+def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen,
+                   match_args, kw_only, slots, weakref_slot):
+    # Now that dicts retain insertion order, there's no reason to use
+    # an ordered dict.  I am leveraging that ordering here, because
+    # derived class fields overwrite base class fields, but the order
+    # is defined by the base class, which is found first.
+    fields = {}
+
+    if cls.__module__ in sys.modules:
+        globals = sys.modules[cls.__module__].__dict__
+    else:
+        # Theoretically this can happen if someone writes
+        # a custom string to cls.__module__.  In which case
+        # such dataclass won't be fully introspectable
+        # (w.r.t. typing.get_type_hints) but will still function
+        # correctly.
+        globals = {}
+
+    setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order,
+                                           unsafe_hash, frozen))
+
+    # Find our base classes in reverse MRO order, and exclude
+    # ourselves.  In reversed order so that more derived classes
+    # override earlier field definitions in base classes.  As long as
+    # we're iterating over them, see if any are frozen.
+    any_frozen_base = False
+    has_dataclass_bases = False
+    for b in cls.__mro__[-1:0:-1]:
+        # Only process classes that have been processed by our
+        # decorator.  That is, they have a _FIELDS attribute.
+        base_fields = getattr(b, _FIELDS, None)
+        if base_fields is not None:
+            has_dataclass_bases = True
+            for f in base_fields.values():
+                fields[f.name] = f
+            if getattr(b, _PARAMS).frozen:
+                any_frozen_base = True
+
+    # Annotations that are defined in this class (not in base
+    # classes).  If __annotations__ isn't present, then this class
+    # adds no new annotations.  We use this to compute fields that are
+    # added by this class.
+    #
+    # Fields are found from cls_annotations, which is guaranteed to be
+    # ordered.  Default values are from class attributes, if a field
+    # has a default.  If the default value is a Field(), then it
+    # contains additional info beyond (and possibly including) the
+    # actual default value.  Pseudo-fields ClassVars and InitVars are
+    # included, despite the fact that they're not real fields.  That's
+    # dealt with later.
+    cls_annotations = cls.__dict__.get('__annotations__', {})
+
+    # Now find fields in our class.  While doing so, validate some
+    # things, and set the default values (as class attributes) where
+    # we can.
+    cls_fields = []
+    # Get a reference to this module for the _is_kw_only() test.
+    KW_ONLY_seen = False
+    dataclasses = sys.modules[__name__]
+    for name, type in cls_annotations.items():
+        # See if this is a marker to change the value of kw_only.
+        if (_is_kw_only(type, dataclasses)
+            or (isinstance(type, str)
+                and _is_type(type, cls, dataclasses, dataclasses.KW_ONLY,
+                             _is_kw_only))):
+            # Switch the default to kw_only=True, and ignore this
+            # annotation: it's not a real field.
+            if KW_ONLY_seen:
+                raise TypeError(f'{name!r} is KW_ONLY, but KW_ONLY '
+                                'has already been specified')
+            KW_ONLY_seen = True
+            kw_only = True
+        else:
+            # Otherwise it's a field of some type.
+            cls_fields.append(_get_field(cls, name, type, kw_only))
+
+    for f in cls_fields:
+        fields[f.name] = f
+
+        # If the class attribute (which is the default value for this
+        # field) exists and is of type 'Field', replace it with the
+        # real default.  This is so that normal class introspection
+        # sees a real default value, not a Field.
+        if isinstance(getattr(cls, f.name, None), Field):
+            if f.default is MISSING:
+                # If there's no default, delete the class attribute.
+                # This happens if we specify field(repr=False), for
+                # example (that is, we specified a field object, but
+                # no default value).  Also if we're using a default
+                # factory.  The class attribute should not be set at
+                # all in the post-processed class.
+                delattr(cls, f.name)
+            else:
+                setattr(cls, f.name, f.default)
+
+    # Do we have any Field members that don't also have annotations?
+    for name, value in cls.__dict__.items():
+        if isinstance(value, Field) and not name in cls_annotations:
+            raise TypeError(f'{name!r} is a field but has no type annotation')
+
+    # Check rules that apply if we are derived from any dataclasses.
+    if has_dataclass_bases:
+        # Raise an exception if any of our bases are frozen, but we're not.
+        if any_frozen_base and not frozen:
+            raise TypeError('cannot inherit non-frozen dataclass from a '
+                            'frozen one')
+
+        # Raise an exception if we're frozen, but none of our bases are.
+        if not any_frozen_base and frozen:
+            raise TypeError('cannot inherit frozen dataclass from a '
+                            'non-frozen one')
+
+    # Remember all of the fields on our class (including bases).  This
+    # also marks this class as being a dataclass.
+    setattr(cls, _FIELDS, fields)
+
+    # Was this class defined with an explicit __hash__?  Note that if
+    # __eq__ is defined in this class, then python will automatically
+    # set __hash__ to None.  This is a heuristic, as it's possible
+    # that such a __hash__ == None was not auto-generated, but it
+    # close enough.
+    class_hash = cls.__dict__.get('__hash__', MISSING)
+    has_explicit_hash = not (class_hash is MISSING or
+                             (class_hash is None and '__eq__' in cls.__dict__))
+
+    # If we're generating ordering methods, we must be generating the
+    # eq methods.
+    if order and not eq:
+        raise ValueError('eq must be true if order is true')
+
+    # Include InitVars and regular fields (so, not ClassVars).  This is
+    # initialized here, outside of the "if init:" test, because std_init_fields
+    # is used with match_args, below.
+    all_init_fields = [f for f in fields.values()
+                       if f._field_type in (_FIELD, _FIELD_INITVAR)]
+    (std_init_fields,
+     kw_only_init_fields) = _fields_in_init_order(all_init_fields)
+
+    if init:
+        # Does this class have a post-init function?
+        has_post_init = hasattr(cls, _POST_INIT_NAME)
+
+        _set_new_attribute(cls, '__init__',
+                           _init_fn(all_init_fields,
+                                    std_init_fields,
+                                    kw_only_init_fields,
+                                    frozen,
+                                    has_post_init,
+                                    # The name to use for the "self"
+                                    # param in __init__.  Use "self"
+                                    # if possible.
+                                    '__dataclass_self__' if 'self' in fields
+                                            else 'self',
+                                    globals,
+                                    slots,
+                          ))
+
+    # Get the fields as a list, and include only real fields.  This is
+    # used in all of the following methods.
+    field_list = [f for f in fields.values() if f._field_type is _FIELD]
+
+    if repr:
+        flds = [f for f in field_list if f.repr]
+        _set_new_attribute(cls, '__repr__', _repr_fn(flds, globals))
+
+    if eq:
+        # Create __eq__ method.  There's no need for a __ne__ method,
+        # since python will call __eq__ and negate it.
+        flds = [f for f in field_list if f.compare]
+        self_tuple = _tuple_str('self', flds)
+        other_tuple = _tuple_str('other', flds)
+        _set_new_attribute(cls, '__eq__',
+                           _cmp_fn('__eq__', '==',
+                                   self_tuple, other_tuple,
+                                   globals=globals))
+
+    if order:
+        # Create and set the ordering methods.
+        flds = [f for f in field_list if f.compare]
+        self_tuple = _tuple_str('self', flds)
+        other_tuple = _tuple_str('other', flds)
+        for name, op in [('__lt__', '<'),
+                         ('__le__', '<='),
+                         ('__gt__', '>'),
+                         ('__ge__', '>='),
+                         ]:
+            if _set_new_attribute(cls, name,
+                                  _cmp_fn(name, op, self_tuple, other_tuple,
+                                          globals=globals)):
+                raise TypeError(f'Cannot overwrite attribute {name} '
+                                f'in class {cls.__name__}. Consider using '
+                                'functools.total_ordering')
+
+    if frozen:
+        for fn in _frozen_get_del_attr(cls, field_list, globals):
+            if _set_new_attribute(cls, fn.__name__, fn):
+                raise TypeError(f'Cannot overwrite attribute {fn.__name__} '
+                                f'in class {cls.__name__}')
+
+    # Decide if/how we're going to create a hash function.
+    hash_action = _hash_action[bool(unsafe_hash),
+                               bool(eq),
+                               bool(frozen),
+                               has_explicit_hash]
+    if hash_action:
+        # No need to call _set_new_attribute here, since by the time
+        # we're here the overwriting is unconditional.
+        cls.__hash__ = hash_action(cls, field_list, globals)
+
+    if not getattr(cls, '__doc__'):
+        # Create a class doc-string.
+        try:
+            # In some cases fetching a signature is not possible.
+            # But, we surely should not fail in this case.
+            text_sig = str(inspect.signature(cls)).replace(' -> None', '')
+        except (TypeError, ValueError):
+            text_sig = ''
+        cls.__doc__ = (cls.__name__ + text_sig)
+
+    if match_args:
+        # I could probably compute this once
+        _set_new_attribute(cls, '__match_args__',
+                           tuple(f.name for f in std_init_fields))
+
+    # It's an error to specify weakref_slot if slots is False.
+    if weakref_slot and not slots:
+        raise TypeError('weakref_slot is True but slots is False')
+    if slots:
+        cls = _add_slots(cls, frozen, weakref_slot)
+
+    abc.update_abstractmethods(cls)
+
+    return cls
+
+
+# _dataclass_getstate and _dataclass_setstate are needed for pickling frozen
+# classes with slots.  These could be slightly more performant if we generated
+# the code instead of iterating over fields.  But that can be a project for
+# another day, if performance becomes an issue.
+def _dataclass_getstate(self):
+    return [getattr(self, f.name) for f in fields(self)]
+
+
+def _dataclass_setstate(self, state):
+    for field, value in zip(fields(self), state):
+        # use setattr because dataclass may be frozen
+        object.__setattr__(self, field.name, value)
+
+
+def _get_slots(cls):
+    match cls.__dict__.get('__slots__'):
+        # A class which does not define __slots__ at all is equivalent
+        # to a class defining __slots__ = ('__dict__', '__weakref__')
+        case None:
+            yield from ('__dict__', '__weakref__')
+        case str(slot):
+            yield slot
+        # Slots may be any iterable, but we cannot handle an iterator
+        # because it will already be (partially) consumed.
+        case iterable if not hasattr(iterable, '__next__'):
+            yield from iterable
+        case _:
+            raise TypeError(f"Slots of '{cls.__name__}' cannot be determined")
+
+
+def _add_slots(cls, is_frozen, weakref_slot):
+    # Need to create a new class, since we can't set __slots__
+    #  after a class has been created.
+
+    # Make sure __slots__ isn't already set.
+    if '__slots__' in cls.__dict__:
+        raise TypeError(f'{cls.__name__} already specifies __slots__')
+
+    # Create a new dict for our new class.
+    cls_dict = dict(cls.__dict__)
+    field_names = tuple(f.name for f in fields(cls))
+    # Make sure slots don't overlap with those in base classes.
+    inherited_slots = set(
+        itertools.chain.from_iterable(map(_get_slots, cls.__mro__[1:-1]))
+    )
+    # The slots for our class.  Remove slots from our base classes.  Add
+    # '__weakref__' if weakref_slot was given, unless it is already present.
+    cls_dict["__slots__"] = tuple(
+        itertools.filterfalse(
+            inherited_slots.__contains__,
+            itertools.chain(
+                # gh-93521: '__weakref__' also needs to be filtered out if
+                # already present in inherited_slots
+                field_names, ('__weakref__',) if weakref_slot else ()
+            )
+        ),
+    )
+
+    for field_name in field_names:
+        # Remove our attributes, if present. They'll still be
+        #  available in _MARKER.
+        cls_dict.pop(field_name, None)
+
+    # Remove __dict__ itself.
+    cls_dict.pop('__dict__', None)
+
+    # Clear existing `__weakref__` descriptor, it belongs to a previous type:
+    cls_dict.pop('__weakref__', None)  # gh-102069
+
+    # And finally create the class.
+    qualname = getattr(cls, '__qualname__', None)
+    cls = type(cls)(cls.__name__, cls.__bases__, cls_dict)
+    if qualname is not None:
+        cls.__qualname__ = qualname
+
+    if is_frozen:
+        # Need this for pickling frozen classes with slots.
+        if '__getstate__' not in cls_dict:
+            cls.__getstate__ = _dataclass_getstate
+        if '__setstate__' not in cls_dict:
+            cls.__setstate__ = _dataclass_setstate
+
+    return cls
+
+
+def dataclass(cls=None, /, *, init=True, repr=True, eq=True, order=False,
+              unsafe_hash=False, frozen=False, match_args=True,
+              kw_only=False, slots=False, weakref_slot=False):
+    """Add dunder methods based on the fields defined in the class.
+
+    Examines PEP 526 __annotations__ to determine fields.
+
+    If init is true, an __init__() method is added to the class. If repr
+    is true, a __repr__() method is added. If order is true, rich
+    comparison dunder methods are added. If unsafe_hash is true, a
+    __hash__() method is added. If frozen is true, fields may not be
+    assigned to after instance creation. If match_args is true, the
+    __match_args__ tuple is added. If kw_only is true, then by default
+    all fields are keyword-only. If slots is true, a new class with a
+    __slots__ attribute is returned.
+    """
+
+    def wrap(cls):
+        return _process_class(cls, init, repr, eq, order, unsafe_hash,
+                              frozen, match_args, kw_only, slots,
+                              weakref_slot)
+
+    # See if we're being called as @dataclass or @dataclass().
+    if cls is None:
+        # We're called with parens.
+        return wrap
+
+    # We're called as @dataclass without parens.
+    return wrap(cls)
+
+
+def fields(class_or_instance):
+    """Return a tuple describing the fields of this dataclass.
+
+    Accepts a dataclass or an instance of one. Tuple elements are of
+    type Field.
+    """
+
+    # Might it be worth caching this, per class?
+    try:
+        fields = getattr(class_or_instance, _FIELDS)
+    except AttributeError:
+        raise TypeError('must be called with a dataclass type or instance') from None
+
+    # Exclude pseudo-fields.  Note that fields is sorted by insertion
+    # order, so the order of the tuple is as the fields were defined.
+    return tuple(f for f in fields.values() if f._field_type is _FIELD)
+
+
+def _is_dataclass_instance(obj):
+    """Returns True if obj is an instance of a dataclass."""
+    return hasattr(type(obj), _FIELDS)
+
+
+def is_dataclass(obj):
+    """Returns True if obj is a dataclass or an instance of a
+    dataclass."""
+    cls = obj if isinstance(obj, type) else type(obj)
+    return hasattr(cls, _FIELDS)
+
+
+def asdict(obj, *, dict_factory=dict):
+    """Return the fields of a dataclass instance as a new dictionary mapping
+    field names to field values.
+
+    Example usage::
+
+      @dataclass
+      class C:
+          x: int
+          y: int
+
+      c = C(1, 2)
+      assert asdict(c) == {'x': 1, 'y': 2}
+
+    If given, 'dict_factory' will be used instead of built-in dict.
+    The function applies recursively to field values that are
+    dataclass instances. This will also look into built-in containers:
+    tuples, lists, and dicts.
+    """
+    if not _is_dataclass_instance(obj):
+        raise TypeError("asdict() should be called on dataclass instances")
+    return _asdict_inner(obj, dict_factory)
+
+
+def _asdict_inner(obj, dict_factory):
+    if _is_dataclass_instance(obj):
+        result = []
+        for f in fields(obj):
+            value = _asdict_inner(getattr(obj, f.name), dict_factory)
+            result.append((f.name, value))
+        return dict_factory(result)
+    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
+        # obj is a namedtuple.  Recurse into it, but the returned
+        # object is another namedtuple of the same type.  This is
+        # similar to how other list- or tuple-derived classes are
+        # treated (see below), but we just need to create them
+        # differently because a namedtuple's __init__ needs to be
+        # called differently (see bpo-34363).
+
+        # I'm not using namedtuple's _asdict()
+        # method, because:
+        # - it does not recurse in to the namedtuple fields and
+        #   convert them to dicts (using dict_factory).
+        # - I don't actually want to return a dict here.  The main
+        #   use case here is json.dumps, and it handles converting
+        #   namedtuples to lists.  Admittedly we're losing some
+        #   information here when we produce a json list instead of a
+        #   dict.  Note that if we returned dicts here instead of
+        #   namedtuples, we could no longer call asdict() on a data
+        #   structure where a namedtuple was used as a dict key.
+
+        return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
+    elif isinstance(obj, (list, tuple)):
+        # Assume we can create an object of this type by passing in a
+        # generator (which is not true for namedtuples, handled
+        # above).
+        return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
+    elif isinstance(obj, dict):
+        return type(obj)((_asdict_inner(k, dict_factory),
+                          _asdict_inner(v, dict_factory))
+                         for k, v in obj.items())
+    else:
+        return copy.deepcopy(obj)
+
+
+def astuple(obj, *, tuple_factory=tuple):
+    """Return the fields of a dataclass instance as a new tuple of field values.
+
+    Example usage::
+
+      @dataclass
+      class C:
+          x: int
+          y: int
+
+      c = C(1, 2)
+      assert astuple(c) == (1, 2)
+
+    If given, 'tuple_factory' will be used instead of built-in tuple.
+    The function applies recursively to field values that are
+    dataclass instances. This will also look into built-in containers:
+    tuples, lists, and dicts.
+    """
+
+    if not _is_dataclass_instance(obj):
+        raise TypeError("astuple() should be called on dataclass instances")
+    return _astuple_inner(obj, tuple_factory)
+
+
+def _astuple_inner(obj, tuple_factory):
+    if _is_dataclass_instance(obj):
+        result = []
+        for f in fields(obj):
+            value = _astuple_inner(getattr(obj, f.name), tuple_factory)
+            result.append(value)
+        return tuple_factory(result)
+    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
+        # obj is a namedtuple.  Recurse into it, but the returned
+        # object is another namedtuple of the same type.  This is
+        # similar to how other list- or tuple-derived classes are
+        # treated (see below), but we just need to create them
+        # differently because a namedtuple's __init__ needs to be
+        # called differently (see bpo-34363).
+        return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj])
+    elif isinstance(obj, (list, tuple)):
+        # Assume we can create an object of this type by passing in a
+        # generator (which is not true for namedtuples, handled
+        # above).
+        return type(obj)(_astuple_inner(v, tuple_factory) for v in obj)
+    elif isinstance(obj, dict):
+        return type(obj)((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory))
+                          for k, v in obj.items())
+    else:
+        return copy.deepcopy(obj)
+
+
+def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True,
+                   repr=True, eq=True, order=False, unsafe_hash=False,
+                   frozen=False, match_args=True, kw_only=False, slots=False,
+                   weakref_slot=False):
+    """Return a new dynamically created dataclass.
+
+    The dataclass name will be 'cls_name'.  'fields' is an iterable
+    of either (name), (name, type) or (name, type, Field) objects. If type is
+    omitted, use the string 'typing.Any'.  Field objects are created by
+    the equivalent of calling 'field(name, type [, Field-info])'.::
+
+      C = make_dataclass('C', ['x', ('y', int), ('z', int, field(init=False))], bases=(Base,))
+
+    is equivalent to::
+
+      @dataclass
+      class C(Base):
+          x: 'typing.Any'
+          y: int
+          z: int = field(init=False)
+
+    For the bases and namespace parameters, see the builtin type() function.
+
+    The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to
+    dataclass().
+    """
+
+    if namespace is None:
+        namespace = {}
+
+    # While we're looking through the field names, validate that they
+    # are identifiers, are not keywords, and not duplicates.
+    seen = set()
+    annotations = {}
+    defaults = {}
+    for item in fields:
+        if isinstance(item, str):
+            name = item
+            tp = 'typing.Any'
+        elif len(item) == 2:
+            name, tp, = item
+        elif len(item) == 3:
+            name, tp, spec = item
+            defaults[name] = spec
+        else:
+            raise TypeError(f'Invalid field: {item!r}')
+
+        if not isinstance(name, str) or not name.isidentifier():
+            raise TypeError(f'Field names must be valid identifiers: {name!r}')
+        if keyword.iskeyword(name):
+            raise TypeError(f'Field names must not be keywords: {name!r}')
+        if name in seen:
+            raise TypeError(f'Field name duplicated: {name!r}')
+
+        seen.add(name)
+        annotations[name] = tp
+
+    # Update 'ns' with the user-supplied namespace plus our calculated values.
+    def exec_body_callback(ns):
+        ns.update(namespace)
+        ns.update(defaults)
+        ns['__annotations__'] = annotations
+
+    # We use `types.new_class()` instead of simply `type()` to allow dynamic creation
+    # of generic dataclasses.
+    cls = types.new_class(cls_name, bases, {}, exec_body_callback)
+
+    # Apply the normal decorator.
+    return dataclass(cls, init=init, repr=repr, eq=eq, order=order,
+                     unsafe_hash=unsafe_hash, frozen=frozen,
+                     match_args=match_args, kw_only=kw_only, slots=slots,
+                     weakref_slot=weakref_slot)
+
+
+def replace(obj, /, **changes):
+    """Return a new object replacing specified fields with new values.
+
+    This is especially useful for frozen classes.  Example usage::
+
+      @dataclass(frozen=True)
+      class C:
+          x: int
+          y: int
+
+      c = C(1, 2)
+      c1 = replace(c, x=3)
+      assert c1.x == 3 and c1.y == 2
+    """
+
+    # We're going to mutate 'changes', but that's okay because it's a
+    # new dict, even if called with 'replace(obj, **my_changes)'.
+
+    if not _is_dataclass_instance(obj):
+        raise TypeError("replace() should be called on dataclass instances")
+
+    # It's an error to have init=False fields in 'changes'.
+    # If a field is not in 'changes', read its value from the provided obj.
+
+    for f in getattr(obj, _FIELDS).values():
+        # Only consider normal fields or InitVars.
+        if f._field_type is _FIELD_CLASSVAR:
+            continue
+
+        if not f.init:
+            # Error if this field is specified in changes.
+            if f.name in changes:
+                raise ValueError(f'field {f.name} is declared with '
+                                 'init=False, it cannot be specified with '
+                                 'replace()')
+            continue
+
+        if f.name not in changes:
+            if f._field_type is _FIELD_INITVAR and f.default is MISSING:
+                raise ValueError(f"InitVar {f.name!r} "
+                                 'must be specified with replace()')
+            changes[f.name] = getattr(obj, f.name)
+
+    # Create the new object, which calls __init__() and
+    # __post_init__() (if defined), using all of the init fields we've
+    # added and/or left in 'changes'.  If there are values supplied in
+    # changes that aren't fields, this will correctly raise a
+    # TypeError.
+    return obj.__class__(**changes)
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_preprocessing.py b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_preprocessing.py
new file mode 100644
index 0000000..265af57
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_preprocessing.py
@@ -0,0 +1,61 @@
+"""Preprocessing recommendation helpers."""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Any
+
+
+@dataclass(slots=True)
+class PreprocessingAction:
+    action_name: str
+    target_fields: list[str]
+    reason: str
+    result: str
+
+
+def build_preprocessing_log(
+    should_run: bool,
+    actions: list[PreprocessingAction] | None = None,
+    raw_snapshot_path: str = "",
+    processed_snapshot_path: str = "",
+) -> dict[str, Any]:
+    return {
+        "should_run_preprocessing": should_run,
+        "raw_snapshot_path": raw_snapshot_path,
+        "processed_snapshot_path": processed_snapshot_path,
+        "actions": [asdict(action) for action in (actions or [])],
+    }
+
+
+def generate_preprocessing_recommendations(
+    data_profile: dict[str, Any],
+    preprocessing_preference: str,
+) -> list[PreprocessingAction]:
+    actions: list[PreprocessingAction] = []
+    if preprocessing_preference == "no preprocessing":
+        return actions
+
+    high_missing_fields = data_profile.get("data_profile", {}).get("high_missing_fields", [])
+    if high_missing_fields:
+        actions.append(
+            PreprocessingAction(
+                action_name="Review missing-value treatment",
+                target_fields=list(high_missing_fields),
+                reason="High-missing fields may distort statistics and figures.",
+                result="Wait for user confirmation before dropping or imputing values.",
+            )
+        )
+
+    constant_columns = data_profile.get("data_profile", {}).get("constant_columns", [])
+    if constant_columns:
+        actions.append(
+            PreprocessingAction(
+                action_name="Remove constant columns",
+                target_fields=list(constant_columns),
+                reason="Constant columns do not provide useful analytical signal.",
+                result="Recommended for removal.",
+            )
+        )
+
+    return actions
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_statistical_analysis.py b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_statistical_analysis.py
new file mode 100644
index 0000000..93c311b
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_statistical_analysis.py
@@ -0,0 +1,36 @@
+"""Statistical summary helpers."""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Any
+
+
+@dataclass(slots=True)
+class NumericSummary:
+    field_name: str
+    sample_size: int
+    minimum: float | None
+    maximum: float | None
+    mean_value: float | None
+    median_value: float | None
+
+
+@dataclass(slots=True)
+class CategoricalSummary:
+    field_name: str
+    unique_value_count: int
+    most_frequent_category: str | None
+    most_frequent_count: int
+
+
+def generate_statistical_summary(
+    numeric_fields: list[NumericSummary] | None = None,
+    categorical_fields: list[CategoricalSummary] | None = None,
+    notes: list[str] | None = None,
+) -> dict[str, Any]:
+    return {
+        "numeric_summary": [asdict(field) for field in (numeric_fields or [])],
+        "categorical_summary": [asdict(field) for field in (categorical_fields or [])],
+        "notes": notes or [],
+    }
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_strategy_registry.py b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_strategy_registry.py
new file mode 100644
index 0000000..5a4fca5
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_strategy_registry.py
@@ -0,0 +1,81 @@
+"""Strategy registry for dataset analysis."""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Any
+
+
+@dataclass(slots=True)
+class StrategyDefinition:
+    strategy_id: str
+    display_name: str
+    supported_types: list[str]
+    core_tasks: list[str]
+    recommended_artifacts: list[str]
+    risk_notes: list[str]
+
+
+STRATEGY_REGISTRY: dict[str, StrategyDefinition] = {
+    "tabular_generic": StrategyDefinition(
+        strategy_id="tabular_generic",
+        display_name="Generic tabular analysis",
+        supported_types=["tabular_generic", "unknown"],
+        core_tasks=["schema/profile", "missing and duplicate checks", "distribution analysis", "group comparison", "correlation analysis"],
+        recommended_artifacts=["schema.json", "data_profile.json", "visualization_plan.json", "analysis_findings.json"],
+        risk_notes=["Do not invent business semantics when field meaning is unclear."],
+    ),
+    "questionnaire": StrategyDefinition(
+        strategy_id="questionnaire",
+        display_name="Questionnaire analysis",
+        supported_types=["questionnaire"],
+        core_tasks=["question-type identification", "scale normalization", "invalid response detection", "group comparison", "open-response analysis"],
+        recommended_artifacts=[
+            "questionnaire_profile.json",
+            "questionnaire_scoring.json",
+            "group_comparison.csv",
+            "open_response_keywords.csv",
+        ],
+        risk_notes=["Confirm reverse-coded items, dimension grouping, and scale direction before execution."],
+    ),
+    "literary": StrategyDefinition(
+        strategy_id="literary",
+        display_name="Literary corpus analysis",
+        supported_types=["literary"],
+        core_tasks=["genre refinement", "token statistics", "character and imagery analysis", "structural pattern mining", "corpus limitation summary"],
+        recommended_artifacts=[
+            "character_frequency.csv",
+            "imagery_frequency.csv",
+            "character_cooccurrence.csv",
+            "analysis_findings.json",
+        ],
+        risk_notes=["Do not present rule-based extraction outputs as human-annotated ground truth."],
+    ),
+    "time_series": StrategyDefinition(
+        strategy_id="time_series",
+        display_name="Time-series analysis",
+        supported_types=["time_series"],
+        core_tasks=["time index identification", "frequency checks", "trend analysis", "anomaly detection", "rolling metrics and seasonality review"],
+        recommended_artifacts=[
+            "time_series_profile.json",
+            "time_series_summary.csv",
+            "time_series_trend.png",
+            "rolling_summary.csv",
+        ],
+        risk_notes=["Confirm time columns, frequency, and aggregation granularity before execution."],
+    ),
+}
+
+
+def select_strategy(detection_result: dict[str, Any]) -> dict[str, Any]:
+    dataset_type = detection_result.get("primary_type", "unknown")
+    strategy_name = detection_result.get("detected_strategy", "tabular_generic")
+    if strategy_name not in STRATEGY_REGISTRY:
+        strategy_name = "tabular_generic"
+
+    strategy = STRATEGY_REGISTRY[strategy_name]
+    if dataset_type not in strategy.supported_types and dataset_type != "unknown":
+        fallback_name = detection_result.get("fallback_type", "tabular_generic")
+        strategy = STRATEGY_REGISTRY.get(fallback_name, STRATEGY_REGISTRY["tabular_generic"])
+
+    return asdict(strategy)
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_time_series_analysis.py b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_time_series_analysis.py
new file mode 100644
index 0000000..7b15ec2
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/analysis/analyzer_time_series_analysis.py
@@ -0,0 +1,56 @@
+"""Time-series profiling helpers."""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from statistics import mean
+from typing import Any
+
+
+@dataclass(slots=True)
+class TimeSeriesSummary:
+    time_field: str
+    metric_field: str
+    sample_size: int
+    earliest_time: str | None
+    latest_time: str | None
+    gap_count: int
+    candidate_frequency: str
+    is_univariate: bool
+
+
+def build_time_series_profile(
+    time_field: str,
+    metric_field: str,
+    time_values: list[str],
+    metric_values: list[float | int],
+) -> dict[str, Any]:
+    non_empty_time_values = [str(value) for value in time_values if value not in (None, "")]
+    non_empty_metric_values = [float(value) for value in metric_values if value not in (None, "")]
+    sample_size = min(len(non_empty_time_values), len(non_empty_metric_values))
+
+    summary = TimeSeriesSummary(
+        time_field=time_field,
+        metric_field=metric_field,
+        sample_size=sample_size,
+        earliest_time=min(non_empty_time_values) if non_empty_time_values else None,
+        latest_time=max(non_empty_time_values) if non_empty_time_values else None,
+        gap_count=max(len(time_values) - len(non_empty_time_values), 0),
+        candidate_frequency="unknown",
+        is_univariate=True,
+    )
+
+    return {
+        "time_series_profile": asdict(summary),
+        "summary_statistics": {
+            "mean": round(mean(non_empty_metric_values), 4) if non_empty_metric_values else None,
+            "minimum": min(non_empty_metric_values) if non_empty_metric_values else None,
+            "maximum": max(non_empty_metric_values) if non_empty_metric_values else None,
+        },
+        "recommended_checks": [
+            "Check whether the time index is continuous.",
+            "Check whether daily, weekly, or monthly aggregation is needed.",
+            "Check anomaly spikes and missing intervals.",
+            "Add rolling, period-over-period, or seasonal views when appropriate.",
+        ],
+    }
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/image_gen/image_generator.py b/skills/data-flow-skill/data-flow-skill/scripts/image_gen/image_generator.py
new file mode 100644
index 0000000..97e8b43
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/image_gen/image_generator.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""命令行图片生成脚本。
+
+该脚本只负责:
+1. 接收已经写好的图片生成 prompt
+2. 调用图像模型生成图片
+3. 将图片和元数据保存到本地
+
+不负责:
+1. 根据任务内容猜测主题
+2. 根据数据类型硬编码匹配场景
+3. 自动编写主题插画 prompt
+
+示例:
+  python image_gen/image_generator.py generate \
+    --prompt "简约动画风格，三位运动员站在领奖台上，背景留白，无文字" \
+    --output output/figures/theme_illustration.png
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+
+def _load_env_key() -> str | None:
+    env_path = Path(__file__).parent.parent / ".env"
+    if env_path.exists():
+        for line in env_path.read_text().splitlines():
+            line = line.strip()
+            if line and not line.startswith("#") and "=" in line:
+                k, v = line.split("=", 1)
+                if k.strip() == "DASHSCOPE_API_KEY":
+                    return v.strip()
+    return None
+
+
+DASHSCOPE_URL = "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation"
+DEFAULT_MODEL = "qwen-image-2.0-pro"
+DEFAULT_SIZE = "1328*1328"
+DEFAULT_NEGATIVE_PROMPT = (
+    "低分辨率，低画质，肢体畸形，手指错误，画面过饱和，文字，水印，logo，AI感过强，"
+    "构图混乱，背景杂乱，模糊，重影，写实照片风，恐怖风。"
+)
+
+
+def ensure_parent(path: Path) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+
+
+def parse_image_url(response_data: dict[str, Any]) -> str:
+    try:
+        return response_data["output"]["choices"][0]["message"]["content"][0]["image"]
+    except (KeyError, IndexError, TypeError) as exc:
+        raise RuntimeError(f"未在接口响应中找到图片 URL: {response_data}") from exc
+
+
+def request_image(
+    prompt: str,
+    api_key: str,
+    *,
+    model: str = DEFAULT_MODEL,
+    size: str = DEFAULT_SIZE,
+    negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
+    prompt_extend: bool = True,
+    watermark: bool = False,
+    timeout: int = 300,
+) -> str:
+    payload = {
+        "model": model,
+        "input": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [{"text": prompt}],
+                }
+            ]
+        },
+        "parameters": {
+            "negative_prompt": negative_prompt,
+            "prompt_extend": prompt_extend,
+            "watermark": watermark,
+            "size": size,
+        },
+    }
+    body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
+    request = urllib.request.Request(
+        DASHSCOPE_URL,
+        data=body,
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=timeout) as response:
+            data = json.loads(response.read().decode("utf-8"))
+    except urllib.error.HTTPError as exc:
+        detail = exc.read().decode("utf-8", errors="ignore")
+        raise RuntimeError(f"图片生成请求失败: HTTP {exc.code} {detail}") from exc
+    except urllib.error.URLError as exc:
+        raise RuntimeError(f"图片生成请求失败: {exc.reason}") from exc
+    return parse_image_url(data)
+
+
+def download_image(url: str, output_path: Path, *, timeout: int = 300) -> None:
+    ensure_parent(output_path)
+    try:
+        with urllib.request.urlopen(url, timeout=timeout) as response:
+            output_path.write_bytes(response.read())
+    except urllib.error.URLError as exc:
+        raise RuntimeError(f"下载图片失败: {exc.reason}") from exc
+
+
+def generate_image(
+    prompt: str,
+    output_path: str | Path,
+    *,
+    api_key=None,
+    model: str = DEFAULT_MODEL,
+    size: str = DEFAULT_SIZE,
+    negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
+    prompt_extend: bool = True,
+    watermark: bool = False,
+    timeout: int = 300,
+) -> dict[str, Any]:
+    resolved_key = api_key or os.getenv("DASHSCOPE_API_KEY") or _load_env_key()
+    if not resolved_key:
+        raise RuntimeError("缺少 DASHSCOPE_API_KEY，无法生成图片。")
+
+    output = Path(output_path)
+    image_url = request_image(
+        prompt=prompt,
+        api_key=resolved_key,
+        model=model,
+        size=size,
+        negative_prompt=negative_prompt,
+        prompt_extend=prompt_extend,
+        watermark=watermark,
+        timeout=timeout,
+    )
+    download_image(image_url, output, timeout=timeout)
+
+    metadata = {
+        "status": "success",
+        "prompt": prompt,
+        "model": model,
+        "size": size,
+        "image_url": image_url,
+        "output_path": str(output.resolve()),
+    }
+    metadata_path = output.with_suffix(output.suffix + ".json")
+    metadata_path.write_text(json.dumps(metadata, ensure_ascii=False, indent=2), encoding="utf-8")
+    metadata["metadata_path"] = str(metadata_path.resolve())
+    return metadata
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="图片生成脚本")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    generate_parser = subparsers.add_parser("generate", help="按给定 prompt 生成图片")
+    generate_parser.add_argument("--prompt", required=True, help="图片生成 prompt")
+    generate_parser.add_argument("--output", required=True, help="输出图片路径")
+    generate_parser.add_argument("--size", default=DEFAULT_SIZE, help="图片尺寸，例如 1328*1328")
+    generate_parser.add_argument("--model", default=DEFAULT_MODEL, help="模型名")
+    generate_parser.add_argument("--api-key", default=None, help="可选，直接传入 API Key")
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+
+    try:
+        if args.command != "generate":
+            raise RuntimeError(f"不支持的命令: {args.command}")
+        result = generate_image(
+            prompt=args.prompt,
+            output_path=args.output,
+            api_key=args.api_key,
+            model=args.model,
+            size=args.size,
+        )
+        print(json.dumps(result, ensure_ascii=False, indent=2))
+        return 0
+    except Exception as exc:
+        print(json.dumps({"status": "failed", "error": str(exc)}, ensure_ascii=False, indent=2), file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/mermaid/__init__.py b/skills/data-flow-skill/data-flow-skill/scripts/mermaid/__init__.py
new file mode 100644
index 0000000..414a19e
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/mermaid/__init__.py
@@ -0,0 +1,5 @@
+"""Utilities for building and rendering Mermaid flowcharts."""
+
+from .flowchart import FlowStep, MermaidFlowchart, render_mermaid_file
+
+__all__ = ["FlowStep", "MermaidFlowchart", "render_mermaid_file"]
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/mermaid/cli.py b/skills/data-flow-skill/data-flow-skill/scripts/mermaid/cli.py
new file mode 100644
index 0000000..7f15c82
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/mermaid/cli.py
@@ -0,0 +1,117 @@
+"""CLI for generating and exporting Mermaid flowcharts."""
+
+from __future__ import annotations
+
+import argparse
+
+try:
+    from .flowchart import MermaidFlowchart, render_mermaid_file
+    from .echarts_export import render_echarts_file
+except ImportError:
+    from flowchart import MermaidFlowchart, render_mermaid_file
+    from echarts_export import render_echarts_file
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Generate Mermaid flowcharts and export images.")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    build_parser = subparsers.add_parser("build", help="Generate a Mermaid flowchart from ordered steps.")
+    build_parser.add_argument("--title", required=True, help="Chart title.")
+    build_parser.add_argument("--step", action="append", required=True, help="Ordered step text. Repeat this flag.")
+    build_parser.add_argument("--output", required=True, help="Output .mmd file path.")
+    build_parser.add_argument("--direction", default="TD", help="Mermaid direction, e.g. TD or LR.")
+
+    render_parser = subparsers.add_parser("render", help="Render a Mermaid file to an image or PDF.")
+    render_parser.add_argument("--input", required=True, help="Input .mmd file path.")
+    render_parser.add_argument("--output", required=True, help="Output file path, e.g. .png/.svg/.pdf.")
+    render_parser.add_argument("--theme", default="default", help="Mermaid theme.")
+    render_parser.add_argument("--background", default="white", help="Background color.")
+    render_parser.add_argument("--scale", type=int, default=2, help="Render scale.")
+    render_parser.add_argument("--width", type=int, help="Optional canvas width.")
+    render_parser.add_argument("--height", type=int, help="Optional canvas height.")
+
+    export_parser = subparsers.add_parser("export", help="Generate a Mermaid file and immediately render it.")
+    export_parser.add_argument("--title", required=True, help="Chart title.")
+    export_parser.add_argument("--step", action="append", required=True, help="Ordered step text. Repeat this flag.")
+    export_parser.add_argument("--mmd-output", required=True, help="Output .mmd file path.")
+    export_parser.add_argument("--image-output", required=True, help="Rendered output path.")
+    export_parser.add_argument("--direction", default="TD", help="Mermaid direction, e.g. TD or LR.")
+    export_parser.add_argument("--theme", default="default", help="Mermaid theme.")
+    export_parser.add_argument("--background", default="white", help="Background color.")
+    export_parser.add_argument("--scale", type=int, default=2, help="Render scale.")
+    export_parser.add_argument("--width", type=int, help="Optional canvas width.")
+    export_parser.add_argument("--height", type=int, help="Optional canvas height.")
+
+    echarts_parser = subparsers.add_parser("echarts", help="Render an ECharts JSON config to an image.")
+    echarts_parser.add_argument("--input", required=True, help="Input ECharts JSON config file path.")
+    echarts_parser.add_argument("--output", required=True, help="Output image file path.")
+    echarts_parser.add_argument(
+        "--format",
+        choices=["png", "jpeg", "svg", "pdf"],
+        help="Output format. Inferred from output path if not specified.",
+    )
+    echarts_parser.add_argument(
+        "--timeout",
+        type=int,
+        default=60,
+        help="Rendering timeout in seconds (default: 60).",
+    )
+
+    return parser
+
+
+def main() -> int:
+    parser = build_parser()
+    args = parser.parse_args()
+
+    if args.command == "build":
+        flowchart = MermaidFlowchart.from_steps(args.title, args.step, direction=args.direction)
+        path = flowchart.write(args.output)
+        print(path)
+        return 0
+
+    if args.command == "render":
+        path = render_mermaid_file(
+            args.input,
+            args.output,
+            theme=args.theme,
+            background_color=args.background,
+            scale=args.scale,
+            width=args.width,
+            height=args.height,
+        )
+        print(path)
+        return 0
+
+    if args.command == "export":
+        flowchart = MermaidFlowchart.from_steps(args.title, args.step, direction=args.direction)
+        mmd_path = flowchart.write(args.mmd_output)
+        image_path = render_mermaid_file(
+            mmd_path,
+            args.image_output,
+            theme=args.theme,
+            background_color=args.background,
+            scale=args.scale,
+            width=args.width,
+            height=args.height,
+        )
+        print(image_path)
+        return 0
+
+    if args.command == "echarts":
+        path = render_echarts_file(
+            args.input,
+            args.output,
+            format=args.format,
+            cli_timeout_seconds=args.timeout,
+        )
+        print(path)
+        return 0
+
+    parser.error("Unsupported command.")
+    return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/mermaid/echarts_export.py b/skills/data-flow-skill/data-flow-skill/scripts/mermaid/echarts_export.py
new file mode 100644
index 0000000..5ce6080
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/mermaid/echarts_export.py
@@ -0,0 +1,239 @@
+"""Export ECharts configurations to image formats."""
+
+from __future__ import annotations
+
+import json
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+
+
+def is_echarts_cli_available() -> bool:
+    return shutil.which("npx") is not None
+
+
+def render_echarts_file(
+    input_path: str | Path,
+    output_path: str | Path,
+    *,
+    format: str | None = None,
+    cli_timeout_seconds: int = 60,
+) -> Path:
+    """Render an ECharts JSON config file to an image.
+
+    Args:
+        input_path: Path to ECharts JSON config file.
+        output_path: Output image file path.
+        format: Output format (png, jpeg, svg, pdf). Inferred from output_path if None.
+        cli_timeout_seconds: Timeout for CLI rendering.
+
+    Returns:
+        Path to the rendered output file.
+    """
+    input_file = Path(input_path)
+    output_file = Path(output_path)
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+
+    if format is None:
+        format = output_file.suffix.lstrip(".").lower()
+        if not format:
+            format = "png"
+
+    if format not in {"png", "jpeg", "jpg", "svg", "pdf"}:
+        raise ValueError(f"Unsupported output format: {format}")
+
+    if format in {"jpeg", "jpg"}:
+        format = "jpeg"
+
+    if not input_file.exists():
+        raise FileNotFoundError(f"Input file not found: {input_file}")
+
+    config = json.loads(input_file.read_text(encoding="utf-8"))
+
+    html_content = _build_echarts_html(config, format)
+
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".html", delete=False, encoding="utf-8"
+    ) as f:
+        f.write(html_content)
+        temp_html = Path(f.name)
+
+    try:
+        _render_via_puppeteer(temp_html, output_file, format=format, timeout=cli_timeout_seconds)
+    except FileNotFoundError:
+        if shutil.which("node") is None:
+            raise RuntimeError(
+                "Node.js is not installed. Please install Node.js to render ECharts images."
+            ) from None
+        raise
+    except subprocess.TimeoutExpired:
+        raise RuntimeError(f"Rendering timed out after {cli_timeout_seconds} seconds.")
+    except subprocess.CalledProcessError as exc:
+        stderr = (exc.stderr or "").strip()
+        stdout = (exc.stdout or "").strip()
+        error_msg = stderr or stdout or "Unknown error"
+        raise RuntimeError(f"ECharts rendering failed: {error_msg}") from exc
+    finally:
+        temp_html.unlink(missing_ok=True)
+
+    return output_file
+
+
+def _build_echarts_html(config: dict, format: str) -> str:
+    width = config.get("width", 800)
+    height = config.get("height", 600)
+
+    config_json = json.dumps(config, ensure_ascii=False, separators=(",", ":"))
+
+    background_color = config.get("backgroundColor", "#ffffff")
+
+    return f"""<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
+  <style>
+    * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+    body {{
+      background: {background_color};
+      width: {width}px;
+      height: {height}px;
+      overflow: hidden;
+    }}
+    #chart {{
+      width: {width}px;
+      height: {height}px;
+    }}
+  </style>
+</head>
+<body>
+  <div id="chart"></div>
+  <script>
+    var chart = echarts.init(document.getElementById('chart'), null, {{
+      renderer: 'canvas',
+      width: {width},
+      height: {height}
+    }});
+    var option = {config_json};
+    chart.setOption(option);
+
+    // Export after render completes
+    setTimeout(function() {{
+      window._echarts_export_format = '{format}';
+      window._echarts_export_done = true;
+    }}, 500);
+  </script>
+</body>
+</html>"""
+
+
+def _render_via_puppeteer(
+    html_path: Path,
+    output_path: Path,
+    *,
+    format: str,
+    timeout: int,
+) -> None:
+    script = f"""
+const puppeteer = require('puppeteer');
+const path = require('path');
+
+(async () => {{
+  const browser = await puppeteer.launch({{
+    headless: 'new',
+    args: ['--no-sandbox', '--disable-setuid-sandbox']
+  }});
+  const page = await browser.newPage();
+
+  const fileUrl = 'file://' + path.resolve('{html_path}');
+  await page.goto(fileUrl, {{ waitUntil: 'networkidle0', timeout: {timeout * 1000} }});
+
+  // Wait for chart to render
+  await new Promise(r => setTimeout(r, 1000));
+
+  const chart = await page.$('#chart');
+  if (!chart) throw new Error('Chart element not found');
+
+  const boundingBox = await chart.boundingBox();
+
+  if ('{format}' === 'svg') {{
+    const svg = await page.evaluate(() => {{
+      const canvas = document.querySelector('#chart canvas');
+      if (!canvas) return null;
+      const svgData = new XMLSerializer().serializeToString(canvas);
+      return svgData;
+    }});
+    if (svg) {{
+      require('fs').writeFileSync('{output_path}', svg);
+    }} else {{
+      // Fallback: get SVG directly
+      const content = await page.content();
+      const svgMatch = content.match(/<svg[^>]*>.*<\\/svg>/s);
+      if (svgMatch) {{
+        require('fs').writeFileSync('{output_path}', svgMatch[0]);
+      }} else {{
+        throw new Error('SVG export not supported for this chart type');
+      }}
+    }}
+  }} else {{
+    await page.setViewport({{
+      width: Math.ceil(boundingBox.width),
+      height: Math.ceil(boundingBox.height)
+    }});
+
+    await page.evaluate(() => {{
+      const chartInst = document.querySelector('#chart canvas');
+      if (chartInst && chartInst.style) {{
+        chartInst.style.background = 'white';
+      }}
+    }});
+
+    const screenshotOptions = {{
+      path: '{output_path}',
+      type: '{format}',
+      fullPage: false,
+      clip: {{
+        x: boundingBox.x,
+        y: boundingBox.y,
+        width: boundingBox.width,
+        height: boundingBox.height
+      }}
+    }};
+
+    if ('{format}' === 'jpeg' || '{format}' === 'jpg') {{
+      screenshotOptions.type = 'jpeg';
+      screenshotOptions.quality = 95;
+    }}
+
+    await page.screenshot(screenshotOptions);
+  }}
+
+  await browser.close();
+  process.exit(0);
+}})().catch(err => {{
+  console.error(err);
+  process.exit(1);
+}});
+"""
+
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".js", delete=False, encoding="utf-8"
+    ) as f:
+        f.write(script)
+        script_path = Path(f.name)
+
+    try:
+        result = subprocess.run(
+            ["node", str(script_path)],
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+        )
+        if result.returncode != 0:
+            stderr = result.stderr.strip()
+            stdout = result.stdout.strip()
+            error_msg = stderr or stdout or "Unknown error"
+            raise RuntimeError(f"Puppeteer rendering failed: {error_msg}")
+    finally:
+        script_path.unlink(missing_ok=True)
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/mermaid/flowchart.py b/skills/data-flow-skill/data-flow-skill/scripts/mermaid/flowchart.py
new file mode 100644
index 0000000..16938f1
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/mermaid/flowchart.py
@@ -0,0 +1,314 @@
+"""Build and render Mermaid flowcharts."""
+
+from __future__ import annotations
+
+import base64
+import json
+import re
+import shutil
+import subprocess
+import zlib
+import urllib.error
+import urllib.parse
+import urllib.request
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Iterable
+
+
+def _slugify(value: str) -> str:
+    normalized = re.sub(r"[^0-9A-Za-z\u4e00-\u9fff]+", "_", value.strip())
+    normalized = normalized.strip("_")
+    return normalized or "node"
+
+
+def _escape_mermaid_text(value: str) -> str:
+    return value.replace('"', "'").replace("\n", "<br/>")
+
+
+@dataclass(slots=True)
+class FlowStep:
+    """Single step in a Mermaid flowchart."""
+
+    text: str
+    node_id: str | None = None
+    shape: str = "rect"
+    metadata: dict[str, str] = field(default_factory=dict)
+
+    def resolved_node_id(self, index: int) -> str:
+        if self.node_id:
+            return self.node_id
+        return f"step_{index}_{_slugify(self.text)}"
+
+
+class MermaidFlowchart:
+    """Composable Mermaid flowchart builder."""
+
+    _SHAPE_TEMPLATES = {
+        "rect": '[{label}]',
+        "round": '({label})',
+        "stadium": '([{label}])',
+        "subroutine": '[[{label}]]',
+        "cylindrical": '[( {label} )]',
+        "circle": '(({label}))',
+        "diamond": '{{{label}}}',
+        "hexagon": '{{{{{label}}}}}',
+        "parallelogram": '[/ {label} /]',
+    }
+
+    def __init__(
+        self,
+        title: str,
+        direction: str = "TD",
+        class_definitions: Iterable[str] | None = None,
+    ) -> None:
+        self.title = title
+        self.direction = direction
+        self.class_definitions = list(class_definitions or [])
+        self.steps: list[FlowStep] = []
+        self.links: list[tuple[str, str, str | None]] = []
+
+    def add_step(self, text: str, *, node_id: str | None = None, shape: str = "rect") -> FlowStep:
+        step = FlowStep(text=text, node_id=node_id, shape=shape)
+        self.steps.append(step)
+        return step
+
+    def add_link(self, source_id: str, target_id: str, label: str | None = None) -> None:
+        self.links.append((source_id, target_id, label))
+
+    @classmethod
+    def from_steps(
+        cls,
+        title: str,
+        steps: Iterable[str],
+        *,
+        direction: str = "TD",
+        first_shape: str = "round",
+        last_shape: str = "stadium",
+    ) -> "MermaidFlowchart":
+        flowchart = cls(title=title, direction=direction)
+        step_items = [step for step in steps if step.strip()]
+        for index, step_text in enumerate(step_items):
+            if index == 0:
+                shape = first_shape
+            elif index == len(step_items) - 1:
+                shape = last_shape
+            else:
+                shape = "rect"
+            flowchart.add_step(step_text, shape=shape)
+        return flowchart
+
+    def to_mermaid(self) -> str:
+        lines = [
+            "---",
+            f"title: {self.title}",
+            "---",
+            f"flowchart {self.direction}",
+        ]
+
+        resolved_steps: list[tuple[str, FlowStep]] = []
+        for index, step in enumerate(self.steps, start=1):
+            node_id = step.resolved_node_id(index)
+            label = _escape_mermaid_text(step.text)
+            template = self._SHAPE_TEMPLATES.get(step.shape, self._SHAPE_TEMPLATES["rect"])
+            lines.append(f"    {node_id}{template.format(label=label)}")
+            resolved_steps.append((node_id, step))
+
+        if self.links:
+            for source_id, target_id, label in self.links:
+                connector = f' -->|{_escape_mermaid_text(label)}| ' if label else " --> "
+                lines.append(f"    {source_id}{connector}{target_id}")
+        else:
+            for (source_id, _), (target_id, _) in zip(resolved_steps, resolved_steps[1:]):
+                lines.append(f"    {source_id} --> {target_id}")
+
+        lines.extend(f"    {class_definition}" for class_definition in self.class_definitions)
+        return "\n".join(lines) + "\n"
+
+    def write(self, output_path: str | Path) -> Path:
+        path = Path(output_path)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(self.to_mermaid(), encoding="utf-8")
+        return path
+
+
+def render_mermaid_file(
+    input_path: str | Path,
+    output_path: str | Path,
+    *,
+    theme: str = "default",
+    background_color: str = "white",
+    scale: int = 2,
+    width: int | None = None,
+    height: int | None = None,
+    cli_timeout_seconds: int = 45,
+    prefer_remote: bool = False,
+) -> Path:
+    input_file = Path(input_path)
+    output_file = Path(output_path)
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    diagram_text = input_file.read_text(encoding="utf-8")
+
+    if prefer_remote:
+        _render_via_mermaid_ink(
+            diagram_text,
+            output_file,
+            theme=theme,
+            background_color=background_color,
+            scale=scale,
+            width=width,
+            height=height,
+        )
+        return output_file
+
+    if shutil.which("npx") is None:
+        _render_via_mermaid_ink(
+            diagram_text,
+            output_file,
+            theme=theme,
+            background_color=background_color,
+            scale=scale,
+            width=width,
+            height=height,
+        )
+        return output_file
+
+    config = {
+        "theme": theme,
+        "flowchart": {"curve": "basis"},
+        "fontFamily": "PingFang SC, Hiragino Sans GB, Microsoft YaHei, sans-serif",
+    }
+    config_path = output_file.with_suffix(".mermaid-config.json")
+    config_path.write_text(json.dumps(config, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    command = [
+        "npx",
+        "-y",
+        "@mermaid-js/mermaid-cli",
+        "-i",
+        str(input_file),
+        "-o",
+        str(output_file),
+        "-t",
+        theme,
+        "-b",
+        background_color,
+        "-s",
+        str(scale),
+        "-c",
+        str(config_path),
+    ]
+    if width is not None:
+        command.extend(["-w", str(width)])
+    if height is not None:
+        command.extend(["-H", str(height)])
+
+    try:
+        subprocess.run(
+            command,
+            check=True,
+            capture_output=True,
+            text=True,
+            timeout=cli_timeout_seconds,
+        )
+    except FileNotFoundError as exc:
+        _render_via_mermaid_ink(
+            diagram_text,
+            output_file,
+            theme=theme,
+            background_color=background_color,
+            scale=scale,
+            width=width,
+            height=height,
+        )
+        return output_file
+    except subprocess.TimeoutExpired:
+        _render_via_mermaid_ink(
+            diagram_text,
+            output_file,
+            theme=theme,
+            background_color=background_color,
+            scale=scale,
+            width=width,
+            height=height,
+        )
+        return output_file
+    except subprocess.CalledProcessError as exc:
+        stderr = (exc.stderr or "").strip()
+        stdout = (exc.stdout or "").strip()
+        local_error = stderr or stdout or "未知错误"
+        try:
+            _render_via_mermaid_ink(
+                diagram_text,
+                output_file,
+                theme=theme,
+                background_color=background_color,
+                scale=scale,
+                width=width,
+                height=height,
+            )
+            return output_file
+        except RuntimeError as remote_exc:
+            raise RuntimeError(f"Mermaid 导出失败。本地错误: {local_error}；远程错误: {remote_exc}") from exc
+    finally:
+        config_path.unlink(missing_ok=True)
+
+    return output_file
+
+
+def _render_via_mermaid_ink(
+    diagram_text: str,
+    output_file: Path,
+    *,
+    theme: str,
+    background_color: str,
+    scale: int,
+    width: int | None,
+    height: int | None,
+) -> None:
+    extension = output_file.suffix.lower().lstrip(".")
+    if extension not in {"png", "svg", "pdf"}:
+        raise RuntimeError(f"远程渲染暂不支持输出格式: {output_file.suffix or '<none>'}")
+
+    payload = json.dumps(
+        {"code": diagram_text, "mermaid": {"theme": theme}},
+        ensure_ascii=False,
+        separators=(",", ":"),
+    ).encode("utf-8")
+    encoded = "pako:" + urllib.parse.quote(
+        base64.urlsafe_b64encode(zlib.compress(payload, 9)).decode("ascii").rstrip("="),
+        safe=":_-",
+    )
+
+    if extension == "svg":
+        endpoint = "svg"
+        query_params: dict[str, str] = {}
+    elif extension == "png":
+        endpoint = "img"
+        query_params = {"type": "png"}
+    else:
+        endpoint = "pdf"
+        query_params = {"fit": ""}
+
+    if background_color:
+        query_params["bgColor"] = f"!{background_color}" if background_color.isalpha() else background_color
+    if width is not None:
+        query_params["width"] = str(width)
+    if height is not None:
+        query_params["height"] = str(height)
+    if width is not None or height is not None:
+        query_params["scale"] = str(max(1, min(scale, 3)))
+
+    query_string = urllib.parse.urlencode(query_params, doseq=False)
+    url = f"https://mermaid.ink/{endpoint}/{encoded}"
+    if query_string:
+        url = f"{url}?{query_string}"
+
+    request = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
+    try:
+        with urllib.request.urlopen(request, timeout=30) as response:
+            payload = response.read()
+    except urllib.error.URLError as exc:
+        raise RuntimeError(f"远程渲染服务不可用: {exc}") from exc
+
+    output_file.write_bytes(payload)
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bar_memevolve.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bar_memevolve.py
new file mode 100644
index 0000000..6ea069c
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bar_memevolve.py
@@ -0,0 +1,113 @@
+"""
+复现 image1: MemEvolve 论文风格柱状图
+特征：配对柱（baseline vs method）+ 箭头 + 红色百分比标注 + 虚线参考线
+来源：MemEvolve: Meta-Evolution of Agent Memory Systems
+"""
+
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import matplotlib.ticker as ticker
+import numpy as np
+
+# ── 预分析结论 ─────────────────────────────────────────────
+# 字体：serif，双层 'a'，衬线精细 → Computer Modern 风格
+#       matplotlib 中最接近的可用字体：STIXGeneral / DejaVu Serif
+# 加粗：标题(bold) | 增益标注(bold) | 轴标签/刻度(normal)
+# 间距：两柱紧贴，gap≈0，柱宽约占 group 的 28%
+# 分辨率：300 dpi
+plt.rcParams.update({
+    'font.family': 'serif',
+    'font.serif': ['STIXGeneral', 'DejaVu Serif', 'Times New Roman'],
+    'mathtext.fontset': 'stix',
+})
+
+# ── 颜色系统 ──────────────────────────────────────────────
+COLOR_BASELINE = '#A8C8E8'   # 浅钢蓝，baseline 柱
+COLOR_METHOD   = '#1B3D6E'   # 深海军蓝，method 柱
+COLOR_DELTA    = '#CC2200'   # 红色，标注增益
+
+# ── 数据 ─────────────────────────────────────────────────
+panels = [
+    {
+        'title': 'OWL-Workforce',   # emoji 字体不支持，用文字替代
+        'groups': ['Web', 'xBench', 'TaskCraft', 'GAIA'],
+        'baseline': [58.1, 55.2, 58.7, 59.3],
+        'method':   [62.3, 61.2, 65.5, 61.0],
+        'delta':    ['+7.1%', '+10.9%', '+11.9%', '+2.7%'],
+        'ylim':     (40, 71),   # 原图左图 Y 轴 40-70
+    },
+    {
+        'title': 'CK-Pro',
+        'groups': ['Web', 'xBench', 'TaskCraft', 'GAIA'],
+        'baseline': [61.2, 55.8, 63.8, 58.1],
+        'method':   [63.8, 64.8, 67.8, 63.1],
+        'delta':    ['+4.2%', '+16.1%', '+4.8%', '+8.4%'],
+        'ylim':     (40, 76),   # 原图右图 Y 轴 40-75
+    },
+]
+
+# ── 画布 ─────────────────────────────────────────────────
+fig, axes = plt.subplots(1, 2, figsize=(10, 4.5), sharey=False)
+fig.subplots_adjust(wspace=0.35)
+
+BAR_W    = 0.28
+GAP      = 0.01      # 两柱几乎紧贴（原图约为 0）
+ARROW_KW = dict(arrowstyle='->', color='black', lw=1.2)
+
+for ax, panel in zip(axes, panels):
+    groups   = panel['groups']
+    baseline = np.array(panel['baseline'])
+    method   = np.array(panel['method'])
+    delta    = panel['delta']
+    n        = len(groups)
+    x        = np.arange(n)
+
+    # 柱体
+    bars_b = ax.bar(x - (BAR_W + GAP) / 2, baseline, width=BAR_W,
+                    color=COLOR_BASELINE, zorder=3)
+    bars_m = ax.bar(x + (BAR_W + GAP) / 2, method,   width=BAR_W,
+                    color=COLOR_METHOD,   zorder=3)
+
+    # 虚线参考线（baseline 高度）
+    for i, (bl, me) in enumerate(zip(baseline, method)):
+        # 水平虚线从 baseline 柱顶延伸到 method 柱顶
+        ax.plot([x[i] - BAR_W, x[i] + BAR_W + GAP / 2],
+                [bl, bl], color='black', lw=0.9, ls='--', zorder=4)
+
+        # 箭头（baseline 顶 → method 顶）
+        ax.annotate('', xy=(x[i] + (BAR_W + GAP) / 2, me - 0.3),
+                    xytext=(x[i] + (BAR_W + GAP) / 2, bl + 0.3),
+                    arrowprops=ARROW_KW, zorder=5)
+
+        # 红色百分比标注
+        ax.text(x[i] + (BAR_W + GAP) / 2, me + 0.6,
+                delta[i], color=COLOR_DELTA,
+                ha='center', va='bottom', fontsize=9.5, fontweight='bold')
+
+    # 轴装饰：原图轴标签明显加粗
+    ax.set_xticks(x)
+    ax.set_xticklabels(groups, fontsize=10.5, fontweight='bold')
+    ax.set_ylabel('Accuracy (Pass@1)', fontsize=10.5, fontweight='bold')
+    ax.set_ylim(*panel['ylim'])
+    ax.yaxis.set_major_locator(plt.MultipleLocator(5))
+
+    # 四边框全显，线宽偏厚（原图有明显框线）
+    for spine in ax.spines.values():
+        spine.set_linewidth(1.5)
+        spine.set_color('black')
+    ax.tick_params(length=0)
+    ax.set_axisbelow(True)
+
+    # 标题（左上角）：粗体 serif，颜色深蓝对应原图
+    ax.text(0.04, 0.97, panel['title'], transform=ax.transAxes,
+            fontsize=12, fontweight='bold', va='top', ha='left',
+            color='#003F6C', fontfamily='serif')
+
+from pathlib import Path
+
+output_path = Path('output/figures/bar_memevolve_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+plt.savefig(output_path,
+            dpi=300, bbox_inches='tight', facecolor='white')
+plt.close()
+print('saved: bar_memevolve_repro.png')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bar_spice.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bar_spice.py
new file mode 100644
index 0000000..f4432b2
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bar_spice.py
@@ -0,0 +1,169 @@
+"""
+复现 image5: SPICE 论文风格柱状图
+特征：分组柱 + 斜线填充（主方法）+ 柱顶数值标注（最优加粗）+ 灰色网格
+来源：SPICE: Self-play in corpus environments improves reasoning
+"""
+
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+
+# ── 预分析结论 ─────────────────────────────────────────────
+# 字体：原图更接近 LaTeX/Computer Modern，而不是 Times
+#       这里直接启用 usetex，优先还原论文图常见的 TeX 字体气质
+# 加粗：面板标题(normal) | 图例 SPICE 条目(bold) | 其他图例(normal)
+#       主方法数值(bold+深红) | 其他数值(normal+黑)
+# 间距：三柱较细，组间留白明显，子图整体更扁，接近原图长宽比
+# 边框：四边框都保留，且柱子层级低于边框
+# 分辨率：300 dpi
+plt.rcParams.update({
+    'text.usetex': True,
+    'font.family': 'serif',
+    'font.serif': ['Computer Modern Roman', 'STIX Two Text', 'DejaVu Serif'],
+    'axes.unicode_minus': False,
+    'hatch.color': 'white',     # 原图是白色斜线刻在红底上
+    'hatch.linewidth': 1.4,
+})
+
+# ── 颜色 & 填充 ───────────────────────────────────────────
+# 左图 (ablation)
+COLORS_ABL   = ['#FFB695', '#FF7F5E', '#D00000']   # 浅橙/中橙/正红（对齐原图）
+HATCHES_ABL  = ['', '', '//']
+LABELS_ABL   = ['SPICE (Fixed Challenger)', 'SPICE (No Corpus)', 'SPICE']
+
+# 右图 (comparison)
+COLORS_CMP   = ['#D3D3D3', '#A9A9A9', '#D00000']   # 浅灰/中灰/正红（对齐原图）
+HATCHES_CMP  = ['', '', '//']
+LABELS_CMP   = ['R-Zero', 'Absolute Zero', 'SPICE']
+
+# ── 数据 ─────────────────────────────────────────────────
+benchmarks = ['MATH500', "AIME'25", 'GPQA-Diamond', 'MMLU-Pro']
+
+data_abl = {
+    'SPICE (Fixed Challenger)': [68.2,  6.7, 26.3, 51.6],
+    'SPICE (No Corpus)':        [72.6, 12.3, 31.8, 53.7],
+    'SPICE':                    [78.0, 19.1, 39.4, 58.1],
+}
+data_cmp = {
+    'R-Zero':        [72.6,  5.2, 27.8, 53.7],
+    'Absolute Zero': [76.2, 13.4, 35.3, 52.6],
+    'SPICE':         [78.0, 19.1, 39.4, 58.1],
+}
+
+BEST_METHOD = 'SPICE'   # 柱顶数字加粗的方法
+
+
+def draw_panel(
+    ax,
+    data_dict,
+    colors,
+    hatches,
+    labels,
+    title,
+    x_positions,
+    total_w,
+    xlim,
+    legend_anchor,
+):
+    n_groups  = len(benchmarks)
+    n_methods = len(labels)
+    x         = np.array(x_positions)
+    bar_w     = total_w / n_methods
+
+    for i, (label, color, hatch) in enumerate(zip(labels, colors, hatches)):
+        vals   = data_dict[label]
+        offset = (i - n_methods / 2 + 0.5) * bar_w
+        bars   = ax.bar(x + offset, vals, width=bar_w,
+                        color=color, hatch=hatch,
+                        edgecolor='white',
+                        linewidth=0.8, zorder=2, label=label)
+
+        is_best = (label == BEST_METHOD)
+        for bar, v in zip(bars, vals):
+            ax.text(bar.get_x() + bar.get_width() / 2,
+                    v + 0.5, f'{v}',
+                    ha='center', va='bottom',
+                    fontsize=8.7,
+                    fontweight='bold' if is_best else 'normal',
+                    color='black',   # 原图数值全部黑色，仅粗细区分
+                    zorder=3)
+
+    # 轴
+    ax.set_xticks(x)
+    ax.set_xticklabels(benchmarks, fontsize=10.8)
+    ax.set_xlabel('Benchmark', fontsize=11.2)
+    ax.set_ylabel(r'Accuracy (\%)', fontsize=11.2)
+    ax.set_ylim(0, 85)
+    ax.set_xlim(*xlim)
+    ax.set_title(title, fontsize=13.2, pad=5)
+
+    # 网格（仅 y 轴浅灰实线）
+    ax.yaxis.grid(True, color='#EBEBEB', linewidth=0.7, linestyle='--', zorder=0)
+    ax.set_axisbelow(True)
+
+    # 原图为开口式坐标轴：只保留左/下 spine
+    for side, spine in ax.spines.items():
+        if side in ('top', 'right'):
+            spine.set_visible(False)
+        else:
+            spine.set_linewidth(0.9)
+            spine.set_color('#333333')
+            spine.set_zorder(4)
+
+    ax.tick_params(length=0, labelsize=10.2)
+
+    # 图例
+    handles = [mpatches.Patch(facecolor=c, hatch=h,
+                               edgecolor='white', linewidth=0.8,
+                               label=l)
+               for l, c, h in zip(labels, colors, hatches)]
+    leg = ax.legend(handles=handles, fontsize=8.9, loc='upper right',
+                    bbox_to_anchor=legend_anchor,
+                    framealpha=1.0, facecolor='white',
+                    edgecolor='#C8C8C8', fancybox=False,
+                    borderpad=0.28, labelspacing=0.26,
+                    handlelength=1.7, handletextpad=0.45,
+                    borderaxespad=0.28)
+    # 只将主方法（BEST_METHOD）的图例文字加粗
+    for text in leg.get_texts():
+        if text.get_text() == BEST_METHOD:
+            text.set_fontweight('bold')
+
+
+# ── 画布 ─────────────────────────────────────────────────
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12.8, 4.35))
+fig.subplots_adjust(left=0.05, right=0.985, bottom=0.15, top=0.86, wspace=0.16)
+
+draw_panel(
+    ax1,
+    data_abl,
+    COLORS_ABL,
+    HATCHES_ABL,
+    LABELS_ABL,
+    '(a) SPICE Ablations',
+    x_positions=[0.00, 1.00, 2.00, 3.00],
+    total_w=0.78,
+    xlim=(-0.56, 3.56),
+    legend_anchor=(0.992, 0.986),
+)
+draw_panel(
+    ax2,
+    data_cmp,
+    COLORS_CMP,
+    HATCHES_CMP,
+    LABELS_CMP,
+    '(b) SPICE vs Baselines',
+    x_positions=[0.00, 1.00, 2.00, 3.00],
+    total_w=0.78,
+    xlim=(-0.56, 3.56),
+    legend_anchor=(0.992, 0.986),
+)
+
+from pathlib import Path
+
+output_path = Path('output/figures/bar_spice_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+plt.savefig(output_path,
+            dpi=300, facecolor='white')
+plt.close()
+print('saved: bar_spice_repro.png')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/box_plot.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/box_plot.py
new file mode 100644
index 0000000..9b4eef4
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/box_plot.py
@@ -0,0 +1,113 @@
+﻿"""
+箱线图（box plot）
+特征：多组数据分布对比，4边可见箱体，y轴浅灰网格，中位线红色加粗
+来源：学术统计图表风格，复用 bar_grouped_hatch 配色体系
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+
+# ── 全局样式（复用 bar_grouped_hatch 的 serif + usetex 风格）─────────────
+plt.rcParams.update({
+    'text.usetex': True,
+    'font.family': 'serif',
+    'font.serif': ['Computer Modern Roman', 'STIX Two Text', 'DejaVu Serif'],
+    'axes.unicode_minus': False,
+})
+
+# ── 颜色 ──────────────────────────────────────────────────────
+C_BOX   = '#5499C7'   # 箱体主色（蓝）
+C_MED   = '#CC2200'   # 中位线（红，与 bar_paired_delta delta 色一致）
+C_OUTL  = '#D651A0'   # 异常值（粉，与 scatter_tsne 一致）
+C_WHISK = '#333333'   # 须线颜色
+
+# ── 数据（请替换为你的数据）───────────────────────────────────
+# 每组数据可以是 list 或 np.array
+data = {
+    'Method A': [23.5, 25.1, 24.8, 26.2, 27.0, 25.5, 24.9, 26.8, 25.0, 24.3],
+    'Method B': [28.3, 29.1, 27.8, 30.2, 29.5, 28.9, 30.1, 29.0, 28.7, 29.3],
+    'Ours':     [31.2, 32.5, 31.8, 33.1, 32.0, 31.5, 32.8, 33.4, 31.9, 32.2],
+}
+
+labels = list(data.keys())
+values = [np.array(v) for v in data.values()]
+
+# ── 参数配置 ─────────────────────────────────────────────────
+TITLE   = r'\textbf{Distribution Comparison}'
+XLABEL  = r'\textbf{Method}'
+YLABEL  = r'\textit{Accuracy (\%)}'
+YLIM    = (15, 40)
+BOX_W   = 0.35
+
+# ── 画布 ─────────────────────────────────────────────────────
+fig, ax = plt.subplots(figsize=(7.5, 5.0))
+
+x_pos = np.arange(len(labels))
+
+# 画箱线图
+bp = ax.boxplot(
+    values,
+    positions=x_pos,
+    widths=BOX_W,
+    patch_artist=True,      # 允许填充颜色
+    showmeans=True,        # 显示均值
+    meanline=False,        # 均值用 marker，不用线
+    whiskerprops=dict(color=C_WHISK, linewidth=1.2),
+    capprops=dict(color=C_WHISK, linewidth=1.2),
+    flierprops=dict(marker='o', markerfacecolor=C_OUTL, markersize=5,
+                    markeredgecolor=C_WHISK, markeredgewidth=0.5),
+    medianprops=dict(color=C_MED, linewidth=2.0),
+    meanprops=dict(marker='D', markerfacecolor='white', markeredgecolor=C_WHISK,
+                   markersize=5, markeredgewidth=0.8),
+)
+
+# 填充箱体颜色
+for patch in bp['boxes']:
+    patch.set_facecolor(C_BOX)
+    patch.set_alpha(0.75)
+    patch.set_edgecolor(C_WHISK)
+    patch.set_linewidth(1.0)
+
+# 隐藏 top/right spine（学术风格）
+ax.spines['top'].set_visible(False)
+ax.spines['right'].set_visible(False)
+ax.spines['left'].set_color('#333333')
+ax.spines['bottom'].set_color('#333333')
+ax.spines['left'].set_linewidth(1.2)
+ax.spines['bottom'].set_linewidth(1.2)
+
+# y 轴网格（与 bar_grouped_hatch 一致）
+ax.yaxis.grid(True, color='#EBEBEB', linewidth=0.7, linestyle='--', zorder=0)
+ax.set_axisbelow(True)
+
+# 刻度
+ax.tick_params(length=4, direction='in', labelsize=10)
+
+# 标签
+ax.set_xticks(x_pos)
+ax.set_xticklabels(labels, fontsize=11)
+ax.set_xlabel(XLABEL, fontsize=11)
+ax.set_ylabel(YLABEL, fontsize=11)
+ax.set_ylim(*YLIM)
+
+# 标题
+ax.set_title(TITLE, fontsize=13, pad=8)
+
+# 图例（均值 marker 说明）
+from matplotlib.lines import Line2D
+legend_elements = [
+    Line2D([0], [0], marker='D', color='white', markerfacecolor='white',
+           markeredgecolor=C_WHISK, markersize=5, markeredgewidth=0.8,
+           label=r'\textit{Mean}'),
+    Line2D([0], [0], color=C_MED, linewidth=2.0, label=r'\textit{Median}'),
+]
+ax.legend(handles=legend_elements, fontsize=9, loc='upper right',
+          frameon=True, facecolor='white', edgecolor='#CCCCCC')
+
+# ── 保存 ─────────────────────────────────────────────────────
+output_path = Path('output/figures/box_plot_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(output_path, dpi=300, facecolor='white', bbox_inches='tight')
+plt.close(fig)
+print(f'✅ saved: {output_path}')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bubble_chart.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bubble_chart.py
new file mode 100644
index 0000000..1858a01
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/bubble_chart.py
@@ -0,0 +1,149 @@
+﻿"""
+气泡图（bubble chart）
+特征：x/y 平面 + size 编码第三维，圆形气泡，serif + usetex 风格
+来源：继承 scatter_tsne_cluster 的聚类表达，加入第三维 size
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+
+# ── 全局样式（与 scatter_tsne_cluster 一致）───────────────────
+plt.rcParams.update({
+    'text.usetex': True,
+    'font.family': 'serif',
+    'font.serif': ['Computer Modern Roman', 'STIX Two Text', 'DejaVu Serif'],
+    'axes.unicode_minus': False,
+})
+
+# ── 颜色（沿用 scatter_tsne 的语义色阶）──────────────────────
+C_MAIN  = '#1B3D6E'   # 主类（深蓝）
+C_CLASS = ['#6A4C93', '#D651A0', '#FF8A65', '#FFB74D', '#C888E8']
+
+# ── 数据（请替换为你的数据）───────────────────────────────────
+# x, y = 两个维度（位置），s = size（第三维），label = 图例标签，color = 颜色
+bubbles = [
+    {'x': 10,  'y': 20,  's': 80,  'label': 'Method A'},
+    {'x': 15,  'y': 35,  's': 120, 'label': 'Method B'},
+    {'x': 25,  'y': 15,  's': 200, 'label': 'Method C'},
+    {'x': 30,  'y': 45,  's': 60,  'label': 'Method D'},
+    {'x': 40,  'y': 30,  's': 150, 'label': 'Method E'},
+    {'x': 35,  'y': 22,  's': 90,  'label': 'Method F'},
+]
+
+# 主方法名称（图例会加粗）
+MAIN_METHOD = 'Method C'
+
+# ── 参数配置 ─────────────────────────────────────────────────
+TITLE    = r'\textbf{Bubble Chart: Accuracy vs Efficiency}'
+XLABEL   = r'\textit{Accuracy (\%)}'
+YLABEL   = r'\textit{Efficiency (speedup)}'
+XLIM     = (5, 50)
+YLIM     = (10, 55)
+SIZE_MIN = 60
+SIZE_MAX = 600
+SIZE_SCALE = 'sqrt'   # 推荐 sqrt 避免大值主导
+
+# ── size 映射函数 ────────────────────────────────────────────
+def scale_size(s, vmin, vmax, smin, smax, mode='sqrt'):
+    frac = (s - vmin) / (vmax - vmin) if vmax != vmin else 0.5
+    if mode == 'sqrt':
+        frac = np.sqrt(frac)
+    return smin + frac * (smax - smin)
+
+vmin = min(b['s'] for b in bubbles)
+vmax = max(b['s'] for b in bubbles)
+
+# ── 画布 ─────────────────────────────────────────────────────
+fig, ax = plt.subplots(figsize=(8.0, 6.0))
+
+# ── 画气泡 ─────────────────────────────────────────────────
+for idx, b in enumerate(bubbles):
+    color = C_MAIN if b['label'] == MAIN_METHOD else C_CLASS[idx % len(C_CLASS)]
+    size  = scale_size(b['s'], vmin, vmax, SIZE_MIN, SIZE_MAX, SIZE_SCALE)
+    is_main = b['label'] == MAIN_METHOD
+
+    ax.scatter(
+        b['x'], b['y'],
+        s=size,
+        c=color,
+        alpha=0.65,
+        edgecolors='white',
+        linewidths=1.0,
+        zorder=3,
+        label=b['label'],
+    )
+    # 气泡内标签（仅大气泡）
+    if size > 120:
+        ax.text(
+            b['x'], b['y'],
+            b['label'].replace('Method ', 'M'),
+            fontsize=7.5, ha='center', va='center',
+            color='white', fontweight='bold' if is_main else 'normal',
+            zorder=4,
+        )
+
+# ── 样式 ─────────────────────────────────────────────────────
+ax.spines['top'].set_visible(False)
+ax.spines['right'].set_visible(False)
+ax.spines['left'].set_color('#333333')
+ax.spines['bottom'].set_color('#333333')
+ax.spines['left'].set_linewidth(0.9)
+ax.spines['bottom'].set_linewidth(0.9)
+
+# 点线网格（与 scatter_tsne 一致）
+ax.grid(True, color='#E0E0E0', linewidth=0.6, linestyle=':', zorder=0)
+ax.set_axisbelow(True)
+
+# 刻度
+ax.tick_params(length=4, direction='in', labelsize=10, color='#333333')
+
+# 标签
+ax.set_xlim(*XLIM)
+ax.set_ylim(*YLIM)
+ax.set_xlabel(XLABEL, fontsize=12)
+ax.set_ylabel(YLABEL, fontsize=12)
+ax.set_title(TITLE, fontsize=13, pad=10)
+
+# ── 图例 ─────────────────────────────────────────────────────
+# 排序：主方法在前
+from matplotlib.lines import Line2D
+legend_elements = []
+labels_seen = []
+for b in bubbles:
+    if b['label'] in labels_seen:
+        continue
+    labels_seen.append(b['label'])
+    color = C_MAIN if b['label'] == MAIN_METHOD else C_CLASS[list(bubbles).index(b) % len(C_CLASS)]
+    is_main = b['label'] == MAIN_METHOD
+    size = scale_size(b['s'], vmin, vmax, SIZE_MIN, SIZE_MAX, SIZE_SCALE)
+    legend_elements.append(
+        Line2D([0], [0], marker='o', color='white',
+               markerfacecolor=color, markersize=np.sqrt(size) / 3.5,
+               markeredgecolor='white', markeredgewidth=0.5,
+               label=b['label'],
+               linewidth=0)
+    )
+
+leg = ax.legend(
+    handles=legend_elements,
+    fontsize=9,
+    loc='upper left',
+    bbox_to_anchor=(1.01, 1.0),
+    frameon=True,
+    facecolor='white',
+    edgecolor='#CCCCCC',
+    labelspacing=0.4,
+    handlelength=1.0,
+    borderaxespad=0.3,
+)
+for text in leg.get_texts():
+    if text.get_text() == MAIN_METHOD:
+        text.set_fontweight('bold')
+
+# ── 保存 ─────────────────────────────────────────────────────
+output_path = Path('output/figures/bubble_chart_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(output_path, dpi=300, facecolor='white', bbox_inches='tight')
+plt.close(fig)
+print(f'✅ saved: {output_path}')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/calendar_heatmap.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/calendar_heatmap.py
new file mode 100644
index 0000000..2213768
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/calendar_heatmap.py
@@ -0,0 +1,208 @@
+"""
+日历热力图（calendar heatmap）
+特征：六边形网格（hexbin），按日/周展示时序活跃度，蓝色冷色阶
+来源：参考 GitHub contribution graph 风格，适合展示每日活跃度、提交量等
+"""
+
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+import pandas as pd
+from matplotlib.colors import BoundaryNorm
+from matplotlib.cm import ScalarMappable
+import matplotlib.colorbar as cbar
+
+# ── 全局样式 ─────────────────────────────────────────────────────
+plt.rcParams.update({
+    'font.family': 'sans-serif',
+    'font.sans-serif': ['DejaVu Sans', 'Arial', 'Helvetica'],
+    'text.usetex': False,
+})
+
+# ── 颜色 ────────────────────────────────────────────────────────
+C_LOW   = '#EBF5FB'   # 最浅蓝（接近 0）
+C_MID   = '#5499C7'   # 中蓝
+C_HIGH  = '#1B3D6E'   # 深蓝（最高值）
+C_WEEKEND = '#F5F5F5'  # 周末列底色
+
+# ── 数据（请替换为你的数据）───────────────────────────────────
+# 格式：{ 'YYYY-MM-DD': value }  或 pd.DataFrame
+raw_data = {
+    '2024-01-02': 3, '2024-01-03': 7, '2024-01-04': 12, '2024-01-05': 5,
+    '2024-01-08': 8, '2024-01-09': 14, '2024-01-10': 6, '2024-01-11': 9,
+    '2024-01-12': 4, '2024-01-15': 11, '2024-01-16': 18, '2024-01-17': 7,
+    '2024-01-18': 5, '2024-01-19': 3, '2024-01-22': 9, '2024-01-23': 15,
+    '2024-01-24': 22, '2024-01-25': 11, '2024-01-26': 6,
+    '2024-01-29': 8, '2024-01-30': 13, '2024-01-31': 7,
+    '2024-02-01': 5, '2024-02-02': 9, '2024-02-05': 14, '2024-02-06': 18,
+    '2024-02-07': 11, '2024-02-08': 6, '2024-02-09': 4, '2024-02-12': 8,
+    '2024-02-13': 16, '2024-02-14': 21, '2024-02-15': 9, '2024-02-16': 5,
+    '2024-02-19': 7, '2024-02-20': 12, '2024-02-21': 19, '2024-02-22': 14,
+    '2024-02-23': 8, '2024-02-26': 6, '2024-02-27': 11, '2024-02-28': 17,
+    '2024-02-29': 9,
+    '2024-03-01': 4, '2024-03-04': 8, '2024-03-05': 13, '2024-03-06': 16,
+    '2024-03-07': 10, '2024-03-08': 5, '2024-03-11': 7, '2024-03-12': 14,
+    '2024-03-13': 20, '2024-03-14': 12, '2024-03-15': 6, '2024-03-18': 9,
+    '2024-03-19': 15, '2024-03-20': 18, '2024-03-21': 11, '2024-03-22': 7,
+    '2024-03-25': 5, '2024-03-26': 10, '2024-03-27': 14, '2024-03-28': 19,
+    '2024-03-29': 8,
+}
+
+# ── 参数配置 ──────────────────────────────────────────────────
+START_DATE = '2024-01-01'
+END_DATE   = '2024-03-31'
+TITLE      = 'Daily Activity'
+CBAR_LABEL = 'Contributions'
+C_MAP      = 'Blues'          # matplotlib 内置蓝白渐变
+
+# Hexbin 参数：每个格子代表一天
+# x = 该日期属于第几周（从 START_DATE 起算）
+# y = 星期几（0=Mon, 1=Tue, ..., 6=Sun）
+# 这样每列是一周，每天一个六边形格子
+CELL_SIZE  = 0.65    # inches
+
+# ── 数据解析 ──────────────────────────────────────────────────
+if isinstance(raw_data, dict):
+    df = pd.DataFrame([
+        {'date': pd.to_datetime(d, errors='coerce'), 'value': v}
+        for d, v in raw_data.items()
+    ]).dropna()
+else:
+    df = raw_data.copy()
+    df['date'] = pd.to_datetime(df['date'], errors='coerce')
+
+# 过滤日期范围
+start = pd.to_datetime(START_DATE)
+end   = pd.to_datetime(END_DATE)
+df = df[(df['date'] >= start) & (df['date'] <= end)].copy()
+
+# 补全缺失日期（value=0）
+full_index = pd.date_range(start, end, freq='D')
+df_full = pd.DataFrame({'date': full_index})
+df_full['value'] = df_full['date'].map(
+    df.set_index('date')['value']
+).fillna(0).values
+
+# 计算 x=周序号, y=星期几（0=Mon）
+df_full['weekday'] = df_full['date'].dt.weekday  # Mon=0, Sun=6
+df_full['days_since_start'] = (df_full['date'] - start).dt.days
+df_full['week'] = df_full['days_since_start'] // 7
+
+# 取出坐标和值
+x  = df_full['week'].values.astype(float)
+y  = df_full['weekday'].values.astype(float)
+C  = df_full['value'].values.astype(float)
+
+# 计算色阶边界
+vmin = 0
+vmax = max(C.max() * 1.05, 1)
+N_LEVELS = 5
+levels = np.linspace(vmin, vmax, N_LEVELS + 1)
+norm = BoundaryNorm(levels, N_LEVELS)
+cmap = plt.get_cmap(C_MAP)
+
+# 计算画布尺寸
+num_weeks = int(df_full['week'].max()) + 1
+fig_w = num_weeks * CELL_SIZE + 1.2   # 左侧周标签 + 色轴
+fig_h = 7 * CELL_SIZE + 0.8            # 7天 + 顶部月份标签
+
+# ── 画布 ─────────────────────────────────────────────────────
+fig, ax = plt.subplots(figsize=(fig_w, fig_h))
+
+# 画六边形热力图
+hb = ax.hexbin(
+    x, y, C,
+    gridsize=int(num_weeks),
+    cmap=cmap,
+    norm=norm,
+    linewidths=0.3,
+    edgecolors='white',
+    mincnt=0,
+    zorder=2,
+)
+
+# 周末列淡灰底色（周六=5, 周日=6）
+for week_i in range(num_weeks):
+    for dy, day_label in enumerate(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']):
+        y_pos = dy
+        if y_pos in (5, 6):  # Sat or Sun
+            col_color = C_WEEKEND
+            rect = mpatches.FancyBboxPatch(
+                (week_i - 0.5, y_pos - 0.5),
+                1, 1,
+                boxstyle='square,pad=0',
+                facecolor=col_color,
+                edgecolor='none',
+                zorder=1
+            )
+            ax.add_patch(rect)
+
+# ── 样式 ─────────────────────────────────────────────────────
+# 月份标签（放在顶部）
+months_shown = {}
+for _, row in df_full.iterrows():
+    month = row['date'].month
+    week_i = row['week']
+    if month not in months_shown:
+        months_shown[month] = week_i
+
+month_names = ['', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+               'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+for month_num, week_start in sorted(months_shown.items(), key=lambda x: x[1]):
+    ax.text(
+        week_start, 7.1,
+        month_names[month_num],
+        fontsize=9.5, ha='left', va='bottom',
+        color='#333333', fontweight='normal',
+    )
+    ax.axvline(week_start, color='#CCCCCC', lw=0.5, zorder=0)
+
+# 左侧周标签
+ax.text(-0.7, 3, 'Week', fontsize=8, ha='right', va='center', color='#666666')
+
+# 左侧天数标签（每隔一天显示）
+DAY_LABELS = ['Mon', '', 'Wed', '', 'Fri', '', '']
+for i, lbl in enumerate(DAY_LABELS):
+    if lbl:
+        ax.text(-0.02, i, lbl, fontsize=8, ha='right', va='center', color='#666666')
+
+# 隐藏 top/right spine，L 形
+ax.spines['top'].set_visible(False)
+ax.spines['right'].set_visible(False)
+ax.spines['left'].set_color('#333333')
+ax.spines['bottom'].set_color('#333333')
+ax.spines['left'].set_linewidth(0.9)
+ax.spines['bottom'].set_linewidth(0.9)
+
+# 刻度
+ax.tick_params(length=3, direction='out', labelsize=8)
+ax.set_xticks([])
+ax.set_yticks(range(7))
+ax.set_yticklabels([])
+ax.set_xlim(-0.5, num_weeks + 0.5)
+ax.set_ylim(-0.5, 7.0)
+ax.set_aspect('equal')
+ax.grid(False)
+
+# ── Colorbar ─────────────────────────────────────────────────
+cbar_ax = fig.add_axes([0.93, 0.15, 0.025, 0.7])
+cb = fig.colorbar(
+    ScalarMappable(norm=norm, cmap=cmap),
+    cax=cbar_ax,
+    orientation='vertical',
+    shrink=0.7,
+)
+cb.set_label(CBAR_LABEL, fontsize=9)
+cb.ax.tick_params(labelsize=8)
+
+# ── 标题 ─────────────────────────────────────────────────────
+ax.set_title(TITLE, fontsize=12, fontweight='bold', color='#333333', pad=6)
+
+# ── 保存 ─────────────────────────────────────────────────────
+from pathlib import Path
+
+output_path = Path('output/figures/calendar_heatmap_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(output_path, dpi=300, facecolor='white', bbox_inches='tight')
+plt.close(fig)
+print(f'✅ saved: {output_path}')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_aime.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_aime.py
new file mode 100644
index 0000000..1d94921
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_aime.py
@@ -0,0 +1,98 @@
+"""
+Reproduce: image6.png — AIME avg@32 training curve
+Two lines with vertical breakpoint markers + horizontal reference line.
+Style: sans-serif, 4-spine box, no grid, right-bottom legend.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+
+plt.rcParams.update({
+    'font.family': 'sans-serif',
+    'font.sans-serif': ['DejaVu Sans', 'Arial', 'Helvetica'],
+    'text.usetex': False,
+})
+
+rng = np.random.default_rng(42)
+
+# ---- 模拟数据 ----
+# w/ Dynamic Sampling (purple): 0-2200 steps, rises fast, ends ~0.42
+steps_dyn_a = np.arange(0, 2300, 50)
+y_dyn_a = 0.43 * (1 - np.exp(-steps_dyn_a / 600))
+y_dyn_a += rng.normal(0, 0.012, len(steps_dyn_a))
+y_dyn_a = np.clip(y_dyn_a, 0, 0.45)
+y_dyn_a[:2] = [0.03, 0.03]   # cold start
+
+# w/o Dynamic Sampling (cyan): 0-9000, slower rise, peaks ~0.42 at step 6000, then drops
+steps_nodyn = np.arange(0, 9100, 100)
+y_nodyn = 0.38 * (1 - np.exp(-steps_nodyn / 1200))
+y_nodyn += rng.normal(0, 0.012, len(steps_nodyn))
+y_nodyn = np.clip(y_nodyn, 0, 0.44)
+y_nodyn[:2] = [0.01, 0.02]
+# After step 6000, add gradual decline
+mask = steps_nodyn > 6000
+y_nodyn[mask] -= 0.06 * (steps_nodyn[mask] - 6000) / 3000
+
+C_DYN   = '#5B0DAD'   # 更深紫，接近原图
+C_NODYN = '#5BBCCA'   # 柔和青绿
+C_REF   = '#3D78C2'   # 独立蓝色（参考线，与曲线色区分）
+STEP_DYN  = 2200      # 紫色垂直线
+STEP_NODYN = 6050     # 青色垂直线
+REF_Y = 0.43          # 水平参考线
+
+fig, ax = plt.subplots(figsize=(9.0, 4.8))
+
+# ---- 两条主线 ----
+ax.plot(steps_dyn_a, y_dyn_a, color=C_DYN, lw=1.4, zorder=3, label='w/ Dynamic Sampling')
+ax.plot(steps_nodyn, y_nodyn, color=C_NODYN, lw=1.4, zorder=3, label='w/o Dynamic Sampling')
+
+# ---- 水平参考线 ----
+ax.axhline(REF_Y, color=C_REF, lw=1.5, linestyle='--', zorder=2)
+
+# ---- 两条垂直虚线 ----
+ax.axvline(STEP_DYN, color=C_DYN, lw=1.5, linestyle='--', alpha=0.85, zorder=2)
+ax.axvline(STEP_NODYN, color=C_NODYN, lw=1.5, linestyle='--', alpha=0.85, zorder=2)
+
+# ---- Axes 样式 ----
+ax.set_xlim(-100, 9200)
+ax.set_ylim(-0.01, 0.47)
+ax.set_xticks([0, 2000, 4000, 6000, 8000])
+ax.set_xticklabels(['0', '2000', '4000', '6000', '8000'], fontsize=10)
+ax.set_yticks([0.0, 0.1, 0.2, 0.3, 0.4])
+ax.tick_params(labelsize=10, direction='out', length=4, width=0.8)
+ax.set_xlabel('Step', fontsize=12)
+ax.set_ylabel('AIME avg@32', fontsize=12)
+
+# 四边框（all spines visible）
+for sp in ax.spines.values():
+    sp.set_visible(True)
+    sp.set_linewidth(1.0)
+
+ax.grid(False)
+
+# ---- 图例 ----
+leg = ax.legend(
+    loc='lower right',
+    fontsize=9.5,
+    frameon=True,
+    facecolor='white',
+    edgecolor='#AAAAAA',
+    framealpha=1.0,
+    borderpad=0.5,
+    labelspacing=0.3,
+    handlelength=2.0,
+    handletextpad=0.5,
+)
+
+from pathlib import Path
+
+output_path = Path('output/figures/line_aime_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.tight_layout(pad=0.8)
+fig.savefig(
+    output_path,
+    dpi=300, facecolor='white',
+)
+plt.close(fig)
+print('saved: line_aime_repro.png')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_loss_inset.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_loss_inset.py
new file mode 100644
index 0000000..2a875b9
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_loss_inset.py
@@ -0,0 +1,161 @@
+"""
+Reproduce: image10.png — Loss curve with zoom inset (SiameseNorm paper style)
+Main plot: L-shaped spine (left+bottom) + axis arrows, 3 lines.
+Inset: zoomed blue+green in right panel.
+Style: serif, tab10 colors, black dashed connection lines.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.patches import ConnectionPatch, FancyArrowPatch
+from mpl_toolkits.axes_grid1.inset_locator import mark_inset, inset_axes
+
+plt.rcParams.update({
+    'text.usetex': True,
+    'font.family': 'serif',
+    'font.serif': ['Computer Modern Roman', 'STIX Two Text', 'DejaVu Serif'],
+    'axes.unicode_minus': False,
+})
+
+rng = np.random.default_rng(7)
+
+# ---- 模拟数据 ----
+steps = np.arange(0, 5600, 20)
+
+# HybridNorm (orange): exponential decay to ~8, spike at ~1900, then flat ~8
+y_hybrid = 7.5 * np.exp(-steps / 450) + 2.8
+y_hybrid += rng.normal(0, 0.06, len(steps))
+spike_idx = np.searchsorted(steps, 1880)
+spike_end = np.searchsorted(steps, 1940)
+y_hybrid[spike_idx:spike_end] = np.linspace(y_hybrid[spike_idx - 1], 15.5, spike_end - spike_idx)
+after_spike = np.searchsorted(steps, 1940)
+y_hybrid[after_spike:] = 7.8 + rng.normal(0, 0.07, len(steps[after_spike:]))
+
+# HybridNorm-ResiDual (blue): rapid decay + noisy with prominent spikes
+y_blue = 7.8 * np.exp(-steps / 380) + 2.3
+y_blue += rng.normal(0, 0.18, len(steps))
+mask_noisy = steps > 2300
+# 模拟蓝线在 2300+ 之后有明显峰值（与原图一致）
+noise_large = rng.normal(0, 1.5, mask_noisy.sum())
+# 少量极值峰
+for idx_offset in rng.integers(10, mask_noisy.sum() - 10, size=8):
+    noise_large[idx_offset] += rng.uniform(4, 9)
+y_blue[mask_noisy] += noise_large
+y_blue = np.clip(y_blue, 1.8, 13.5)
+
+# SiameseNorm/Ours (green): smooth rapid decay
+y_green = 7.2 * np.exp(-steps / 360) + 2.1
+y_green += rng.normal(0, 0.05, len(steps))
+y_green = np.clip(y_green, 1.8, 9.0)
+
+# tab10 colors
+C_ORANGE = '#FF7F0E'
+C_BLUE   = '#1F77B4'
+C_GREEN  = '#2CA02C'
+
+# ---- 主图 ----
+# 原图 952×368 → 宽高比 2.59；复现目标 10.5×4.05"
+fig = plt.figure(figsize=(10.5, 4.05))
+ax_main = fig.add_axes([0.08, 0.16, 0.50, 0.78])
+
+ax_main.plot(steps, y_hybrid, color=C_ORANGE, lw=1.3, label='HybridNorm', zorder=3)
+ax_main.plot(steps, y_blue,   color=C_BLUE,   lw=1.0, label='HybridNorm-ResiDual', zorder=3)
+ax_main.plot(steps, y_green,  color=C_GREEN,  lw=1.3, label='SiameseNorm (Ours)', zorder=4)
+
+ax_main.set_xlim(-50, 5600)
+ax_main.set_ylim(1.5, 14.5)   # 与原图 ~2-14 对齐
+ax_main.set_xlabel(r'Step', fontsize=10)
+ax_main.set_ylabel(r'Loss', fontsize=10)
+ax_main.set_xticks([0, 1000, 2000, 3000, 4000, 5000])
+ax_main.tick_params(labelsize=9.0, direction='out', length=3.5, width=0.8)
+
+# L 形 spine（左+下），无上右
+ax_main.spines['top'].set_visible(False)
+ax_main.spines['right'].set_visible(False)
+ax_main.spines['left'].set_linewidth(1.0)
+ax_main.spines['bottom'].set_linewidth(1.0)
+
+# 轴端箭头（模拟原图的箭头轴）
+ax_main.plot(1, 0, '>k', transform=ax_main.get_yaxis_transform(),
+             clip_on=False, markersize=5)
+ax_main.plot(0, 1, '^k', transform=ax_main.get_xaxis_transform(),
+             clip_on=False, markersize=5)
+
+ax_main.grid(True, color='#E0E0E0', linewidth=0.5, linestyle=':')
+ax_main.set_axisbelow(True)
+
+leg = ax_main.legend(
+    loc='upper right',
+    fontsize=9.0,
+    frameon=True,
+    facecolor='white',
+    edgecolor='#DDDDDD',
+    borderpad=0.4,
+    labelspacing=0.25,
+    handlelength=1.8,
+    framealpha=1.0,
+)
+
+# ---- Zoom 区域（虚线矩形）----
+zoom_x1, zoom_x2 = 2400, 5500
+zoom_y1, zoom_y2 = 1.8, 4.5
+rect = mpatches.FancyBboxPatch(
+    (zoom_x1, zoom_y1),
+    zoom_x2 - zoom_x1, zoom_y2 - zoom_y1,
+    boxstyle='square,pad=0',
+    linewidth=1.0, edgecolor='#333333',
+    facecolor='none', linestyle='--',
+    zorder=5,
+)
+ax_main.add_patch(rect)
+
+# ---- Inset（右侧独立子图，原图约占总宽 40%，紧凑）----
+ax_inset = fig.add_axes([0.61, 0.10, 0.36, 0.86])
+
+mask_z = (steps >= zoom_x1) & (steps <= zoom_x2)
+steps_z = steps[mask_z]
+
+ax_inset.plot(steps_z, y_blue[mask_z],  color=C_BLUE,  lw=1.0, zorder=3)
+ax_inset.plot(steps_z, y_green[mask_z], color=C_GREEN, lw=1.2, zorder=4)
+
+ax_inset.set_xlim(zoom_x1 - 50, zoom_x2 + 50)
+ax_inset.set_ylim(zoom_y1 - 0.1, zoom_y2 + 4.0)   # 原图 inset y: ~1.8~8.5
+ax_inset.set_xticks([3000, 4000, 5000])
+ax_inset.tick_params(labelsize=8.5, direction='out', length=3.5, width=0.8)
+
+for sp in ax_inset.spines.values():
+    sp.set_visible(True)
+    sp.set_linewidth(1.5)
+    sp.set_color('#2A6073')   # 原图 inset 边框为深蓝灰色
+
+ax_inset.grid(False)
+
+# ---- 黑色虚线连接线（从 zoom 框的两个角到 inset 边缘）----
+# 右上角 → inset 左上角
+con1 = ConnectionPatch(
+    xyA=(zoom_x2, zoom_y2), coordsA=ax_main.transData,
+    xyB=(ax_inset.get_xlim()[0], ax_inset.get_ylim()[1]),
+    coordsB=ax_inset.transData,
+    color='#333333', lw=0.8, linestyle='--',
+)
+# 右下角 → inset 左下角
+con2 = ConnectionPatch(
+    xyA=(zoom_x2, zoom_y1), coordsA=ax_main.transData,
+    xyB=(ax_inset.get_xlim()[0], ax_inset.get_ylim()[0]),
+    coordsB=ax_inset.transData,
+    color='#333333', lw=0.8, linestyle='--',
+)
+fig.add_artist(con1)
+fig.add_artist(con2)
+
+from pathlib import Path
+
+output_path = Path('output/figures/line_loss_inset_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(
+    output_path,
+    dpi=300, facecolor='white',
+)
+plt.close(fig)
+print('saved: line_loss_inset_repro.png')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_selfdistill.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_selfdistill.py
new file mode 100644
index 0000000..3b220fb
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/line_selfdistill.py
@@ -0,0 +1,173 @@
+"""
+复现 image2 & image3: Self-distillation 论文折线图
+image2: 连续训练曲线 + 置信区间阴影 + 水平参考线
+image3: 离散点折线 + 置信区间阴影（模型规模 scaling）
+来源：Reinforcement learning via self-distillation
+"""
+
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+import numpy as np
+
+# ── 预分析结论 ─────────────────────────────────────────────
+# 字体：serif，接近 LaTeX Computer Modern，启用 usetex
+# 加粗：标题 normal | 图例 SDPO bold | 其他 normal
+# Spine：只保留左/下（开口式）
+# Grid：无
+# 颜色：绿 #3A8B3A (SDPO) | 蓝 #3B6BB5 (GRPO) | 灰 #999999 (base)
+# 阴影：主线颜色 alpha=0.15 的半透明填充
+plt.rcParams.update({
+    'text.usetex': True,
+    'font.family': 'serif',
+    'font.serif': ['Computer Modern Roman', 'STIX Two Text', 'DejaVu Serif'],
+    'axes.unicode_minus': False,
+})
+
+C_SDPO  = '#2CA02C'   # matplotlib tab green
+C_GRPO  = '#1F77B4'   # matplotlib tab blue
+C_BASE  = '#BCBCBC'   # 浅灰，原图 base model
+
+# ══════════════════════════════════════════════════════════
+# 图 2：连续训练曲线（LiveCodeBench v6）
+# ══════════════════════════════════════════════════════════
+np.random.seed(42)
+
+steps = np.linspace(0, 20000, 400)   # 更多点 + EMA 后更平滑
+
+def raw_curve(start, end, steps, noise=0.008):
+    t = steps / steps[-1]
+    curve = start + (end - start) * (1 - np.exp(-4 * t))
+    curve += np.random.normal(0, noise, len(steps)) * (1 - t * 0.7)
+    return curve
+
+def ema(arr, alpha=0.96):
+    """Exponential moving average — 模拟论文中对 training log 的平滑"""
+    out = np.zeros_like(arr)
+    out[0] = arr[0]
+    for i in range(1, len(arr)):
+        out[i] = alpha * out[i - 1] + (1 - alpha) * arr[i]
+    return out
+
+# 中心线：先生成有噪声的曲线，再 EMA 平滑（与论文一致）
+sdpo_mean = ema(raw_curve(0.285, 0.490, steps, noise=0.006))
+sdpo_std  = 0.012 * np.exp(-2 * steps / steps[-1]) + 0.007
+grpo_mean = ema(raw_curve(0.285, 0.415, steps, noise=0.006))
+grpo_std  = 0.010 * np.exp(-2 * steps / steps[-1]) + 0.006
+
+fig2, ax2 = plt.subplots(figsize=(6.5, 4.4))
+
+ax2.fill_between(steps, sdpo_mean - sdpo_std, sdpo_mean + sdpo_std,
+                 color=C_SDPO, alpha=0.20)
+ax2.fill_between(steps, grpo_mean - grpo_std, grpo_mean + grpo_std,
+                 color=C_GRPO, alpha=0.20)
+ax2.plot(steps, sdpo_mean, color=C_SDPO, lw=2.5, label=r'\textbf{SDPO}')
+ax2.plot(steps, grpo_mean, color=C_GRPO, lw=2.5, label='GRPO')
+# 原图 Claude 参考线为稀疏圆点线，非虚线
+ax2.axhline(0.406, color='#AAAAAA', lw=1.8,
+            linestyle=(0, (1, 2)), label='Claude Sonnet 4')
+
+ax2.set_xlim(0, 20000)
+ax2.set_ylim(0.28, 0.52)
+ax2.set_xlabel('Number of Generations', fontsize=13)
+ax2.set_ylabel('Accuracy', fontsize=13)
+ax2.set_title('LiveCodeBench v6', fontsize=15, pad=7)
+
+ax2.xaxis.set_major_formatter(ticker.FuncFormatter(
+    lambda x, _: f'{int(x/1000)}k' if x > 0 else '0'))
+ax2.xaxis.set_major_locator(ticker.MultipleLocator(5000))
+ax2.yaxis.set_major_locator(ticker.MultipleLocator(0.05))
+
+leg2 = ax2.legend(fontsize=11, loc='lower right',
+                  framealpha=0, edgecolor='none',
+                  handlelength=2.2, borderaxespad=0.5, labelspacing=0.3)
+for text in leg2.get_texts():
+    if 'SDPO' in text.get_text():
+        text.set_fontweight('bold')
+
+# 四边框 + 向内刻度（与原图一致）
+for sp in ax2.spines.values():
+    sp.set_visible(True)
+    sp.set_linewidth(1.5)
+ax2.tick_params(direction='in', length=5, width=1.2, labelsize=11)
+ax2.grid(False)
+
+from pathlib import Path
+
+OUTPUT_DIR = Path("output/figures")
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+fig2.tight_layout(pad=0.9)
+fig2.savefig(OUTPUT_DIR / 'line_selfdistill_v6_repro.png',
+             dpi=300, facecolor='white')
+plt.close(fig2)
+print('saved: line_selfdistill_v6_repro.png')
+
+# ══════════════════════════════════════════════════════════
+# 图 3：模型 scaling 折线（Model scaling Qwen3）
+# ══════════════════════════════════════════════════════════
+param_labels = ['0.6', '1.7', '4', '8']
+param_x      = [0.6,   1.7,   4,   8]
+x_pos        = [0, 1, 2, 3]   # 等间距排列，x 轴用 param_labels
+
+sdpo_pts = [0.215, 0.333, 0.450, 0.490]
+grpo_pts = [0.212, 0.295, 0.400, 0.414]
+base_pts = [0.095, 0.150, 0.233, 0.284]
+
+sdpo_std3 = [0.005, 0.006, 0.008, 0.006]
+grpo_std3 = [0.005, 0.006, 0.007, 0.006]
+
+fig3, ax3 = plt.subplots(figsize=(10, 5))   # 2:1 宽高比，与原图一致
+
+ax3.fill_between(x_pos,
+                 [v - s for v, s in zip(sdpo_pts, sdpo_std3)],
+                 [v + s for v, s in zip(sdpo_pts, sdpo_std3)],
+                 color=C_SDPO, alpha=0.18)
+ax3.fill_between(x_pos,
+                 [v - s for v, s in zip(grpo_pts, grpo_std3)],
+                 [v + s for v, s in zip(grpo_pts, grpo_std3)],
+                 color=C_GRPO, alpha=0.18)
+
+MEC = 'black'   # 原图标记点有黑色描边
+ax3.plot(x_pos, sdpo_pts, color=C_SDPO, lw=2.5,
+         marker='o', ms=7, mfc=C_SDPO,
+         markeredgecolor=MEC, markeredgewidth=1.0,
+         label=r'\textbf{SDPO}')
+ax3.plot(x_pos, grpo_pts, color=C_GRPO, lw=2.5,
+         marker='o', ms=7, mfc=C_GRPO,
+         markeredgecolor=MEC, markeredgewidth=1.0,
+         label='GRPO')
+ax3.plot(x_pos, base_pts, color=C_BASE, lw=2.5,   # 与主线同粗
+         marker='o', ms=7, mfc=C_BASE,
+         markeredgecolor=MEC, markeredgewidth=1.0,
+         label='base model')
+
+ax3.set_xticks(x_pos)
+ax3.set_xticklabels(param_labels, fontsize=11)
+ax3.set_xlim(-0.35, 3.35)
+ax3.set_ylim(0.08, 0.51)   # 与原图 0.1~0.5 刻度对齐，留极小顶部空
+ax3.set_xlabel('Model parameters (B)', fontsize=13)
+ax3.set_ylabel(r'\textit{Accuracy}', fontsize=13)
+ax3.set_title('Model scaling (Qwen3)', fontsize=15, pad=7)
+ax3.yaxis.set_major_locator(ticker.MultipleLocator(0.1))
+
+# 图例移右下（原图位置）
+leg3 = ax3.legend(fontsize=11, loc='lower right',
+                  bbox_to_anchor=(0.98, 0.02),
+                  framealpha=0, edgecolor='none',
+                  handlelength=2.2, borderaxespad=0.5, labelspacing=0.3)
+for text in leg3.get_texts():
+    if 'SDPO' in text.get_text():
+        text.set_fontweight('bold')
+
+# 四边框 + 向内刻度
+for sp in ax3.spines.values():
+    sp.set_visible(True)
+    sp.set_linewidth(1.5)
+ax3.tick_params(direction='in', length=5, width=1.2, labelsize=11)
+ax3.grid(False)
+
+fig3.tight_layout(pad=0.9)
+fig3.savefig(OUTPUT_DIR / 'line_selfdistill_scaling_repro.png',
+             dpi=300, facecolor='white')
+plt.close(fig3)
+print('saved: line_selfdistill_scaling_repro.png')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/parallel_coordinates.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/parallel_coordinates.py
new file mode 100644
index 0000000..e3a30a1
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/parallel_coordinates.py
@@ -0,0 +1,173 @@
+"""
+平行坐标图（parallel coordinates）
+特征：多垂直轴，每条线代表一个方法在多个维度上的表现
+来源：经典多维方法对比图表，学术和工程场景通用
+"""
+
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+import numpy as np
+
+# ── 全局样式 ─────────────────────────────────────────────────────
+plt.rcParams.update({
+    'font.family': 'sans-serif',
+    'font.sans-serif': ['DejaVu Sans', 'Arial', 'Helvetica'],
+    'text.usetex': False,
+})
+
+# ── 颜色 ──────────────────────────────────────────────────────
+C_MAIN   = '#1B3D6E'   # 主方法（深蓝）
+C_BASE   = '#A8C8E8'   # baseline（浅钢蓝）
+C_OTHERS = ['#5499C7', '#2CA02C', '#D651A0', '#FF7F0E']
+
+# ── 数据（请替换为你的数据）───────────────────────────────────
+# 维度名称
+dimensions = ['Accuracy', 'Latency', 'Memory', 'FLOPs', 'Robustness']
+
+# 方法 -> 各维度值（建议先归一化到 [0, 1]，或传入 raw 值由脚本自动归一化）
+data = {
+    'Method A': [0.82, 0.45, 0.60, 0.55, 0.70],
+    'Method B': [0.75, 0.60, 0.50, 0.65, 0.80],
+    'Ours':     [0.88, 0.35, 0.70, 0.40, 0.85],
+    'Baseline': [0.70, 0.80, 0.40, 0.80, 0.60],
+}
+
+# 若传入 raw 值（非归一化），设为 False
+NORMALIZED = True
+
+# 主方法名称（图例中会突出显示）
+MAIN_METHOD = 'Ours'
+
+# ── 参数配置 ──────────────────────────────────────────────────
+TITLE     = r'Method Comparison on Multiple Dimensions'
+YLABEL    = 'Normalized Score'
+LEFT      = 0.10
+RIGHT     = 0.92
+TOP       = 0.14
+AXIS_COL  = '#333333'
+AXIS_LW   = 0.8
+GRID_COL  = '#EBEBEB'
+GRID_LW   = 0.6
+TICK_LEN  = 3
+LINE_LW   = 1.8
+ALPHA     = 0.75
+
+# ── 归一化（如果数据不是 [0,1] 范围）─────────────────────────
+if not NORMALIZED:
+    all_vals = []
+    for vals in data.values():
+        all_vals.extend(vals)
+    vmin, vmax = min(all_vals), max(all_vals)
+    data = {k: [(v - vmin) / (vmax - vmin) for v in vals] for k, vals in data.items()}
+
+# ── 布局 ─────────────────────────────────────────────────────
+n_dims = len(dimensions)
+n_methods = len(data)
+x_pos = np.linspace(0, 1, n_dims)   # 每个维度在 x 轴上的位置（0~1 归一化）
+
+# 颜色分配
+def get_color(method_name, idx):
+    if method_name == MAIN_METHOD:
+        return C_MAIN
+    elif method_name == 'Baseline':
+        return C_BASE
+    else:
+        return C_OTHERS[idx % len(C_OTHERS)]
+
+# ── 画布 ─────────────────────────────────────────────────────
+fig_w = 9.0
+fig_h = 5.5
+fig, ax = plt.subplots(figsize=(fig_w, fig_h))
+
+ax.set_xlim(0, 1)
+ax.set_ylim(0, 1)
+
+# ── 画垂直轴 ─────────────────────────────────────────────────
+for xi in x_pos:
+    ax.plot([xi, xi], [0, 1], color=AXIS_COL, lw=AXIS_LW, zorder=1)
+
+# 水平参考线（y 轴网格）
+for y_ref in np.arange(0.2, 1.0, 0.2):
+    ax.axhline(y_ref, color=GRID_COL, lw=GRID_LW, linestyle='--', zorder=0)
+
+# ── 画每条线 ─────────────────────────────────────────────────
+colors = []
+for idx, (method, vals) in enumerate(data.items()):
+    color = get_color(method, idx)
+    colors.append(color)
+    lw = LINE_LW if method == MAIN_METHOD else LINE_LW - 0.4
+    ax.plot(
+        x_pos, vals,
+        color=color,
+        lw=lw,
+        alpha=ALPHA if method != MAIN_METHOD else 1.0,
+        zorder=3,
+        label=method,
+    )
+    # 数据点
+    ax.scatter(
+        x_pos, vals,
+        color=color,
+        s=30 if method == MAIN_METHOD else 18,
+        zorder=4,
+        edgecolors='white',
+        linewidths=0.5,
+    )
+
+# ── 坐标轴标签 ───────────────────────────────────────────────
+for xi, dim in zip(x_pos, dimensions):
+    ax.text(xi, -0.06, dim, fontsize=10, ha='center', va='top', color='#333333')
+
+ax.set_ylabel(YLABEL, fontsize=10, color='#333333')
+ax.set_title(TITLE, fontsize=12, fontweight='bold', color='#333333', pad=10)
+
+# ── 样式 ─────────────────────────────────────────────────────
+ax.spines['top'].set_visible(False)
+ax.spines['right'].set_visible(False)
+ax.spines['left'].set_visible(False)
+ax.spines['bottom'].set_visible(False)
+
+ax.tick_params(length=TICK_LEN, direction='out', labelsize=8)
+ax.set_xticks([])
+ax.set_yticks(np.arange(0.2, 1.0, 0.2))
+ax.set_yticklabels([f'{int(v*100)}%' for v in np.arange(0.2, 1.0, 0.2)], fontsize=8)
+ax.grid(False)
+
+# ── 图例 ─────────────────────────────────────────────────────
+legend_elements = []
+for method, vals in data.items():
+    color = get_color(method, list(data.keys()).index(method))
+    is_main = (method == MAIN_METHOD)
+    lw = LINE_LW if is_main else LINE_LW - 0.4
+    legend_elements.append(
+        mlines.Line2D([0], [0], color=color, lw=lw,
+                      alpha=ALPHA if not is_main else 1.0,
+                      label=method,
+                      marker='o', markersize=4,
+                      markerfacecolor=color, markeredgecolor='white',
+                      markeredgewidth=0.3)
+    )
+
+leg = ax.legend(
+    handles=legend_elements,
+    fontsize=9,
+    loc='upper right',
+    bbox_to_anchor=(1.01, 1.0),
+    frameon=True,
+    facecolor='white',
+    edgecolor='#CCCCCC',
+    labelspacing=0.4,
+    handlelength=2.0,
+)
+for text in leg.get_texts():
+    if text.get_text() == MAIN_METHOD:
+        text.set_fontweight('bold')
+
+# ── 保存 ─────────────────────────────────────────────────────
+from pathlib import Path
+
+output_path = Path('output/figures/parallel_coordinates_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(output_path, dpi=300, facecolor='white', bbox_inches='tight')
+plt.close(fig)
+print(f'✅ saved: {output_path}')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/radar_dora.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/radar_dora.py
new file mode 100644
index 0000000..2cf7264
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/radar_dora.py
@@ -0,0 +1,155 @@
+"""
+Reproduce: image8.png — DoRA vs LoRA Radar Chart
+Style: sans-serif, dashed octagonal grid, white-bg value annotations,
+       semi-transparent fill, legend: black text + colored line segment.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.lines as mlines
+
+plt.rcParams.update({
+    'font.family': 'sans-serif',
+    'font.sans-serif': ['DejaVu Sans', 'Arial', 'Helvetica'],
+    'text.usetex': False,
+})
+
+# ---- 数据（顺时针从正上方开始）----
+CATEGORIES = [
+    'CommonSense\n(LLaMA)',
+    'MT-bench\n(LLaMA2)',
+    'MT-bench\n(LLaMA)',
+    'visual instruction\ntuning (LLaVA-1.5)',
+    'video-text\n(VL-BART)',
+    'image-text\n(VL-BART)',
+    'CommonSense\n(LLaMA3)',
+    'CommonSense\n(LLaMA2)',
+]
+
+DORA_raw = np.array([78.40, 6.00, 5.50, 67.60, 85.40, 77.40, 85.20, 79.70])
+LORA_raw = np.array([76.30, 5.70, 5.10, 66.90, 83.50, 76.50, 80.80, 77.60])
+
+N = len(CATEGORIES)
+
+RANGES = [
+    (74.0, 80.0),
+    (5.4,  6.2),
+    (4.8,  5.7),
+    (65.0, 70.0),
+    (81.0, 87.0),
+    (74.0, 79.0),
+    (78.0, 87.0),
+    (75.0, 81.0),
+]
+RMIN, RMAX = 0.35, 1.0
+
+def nrm(v, vmin, vmax):
+    return RMIN + (RMAX - RMIN) * (v - vmin) / (vmax - vmin)
+
+dora_r = np.array([nrm(v, r[0], r[1]) for v, r in zip(DORA_raw, RANGES)])
+lora_r = np.array([nrm(v, r[0], r[1]) for v, r in zip(LORA_raw, RANGES)])
+
+angles = np.linspace(0, 2 * np.pi, N, endpoint=False)
+
+def close(arr):
+    return np.concatenate([arr, [arr[0]]])
+
+# 原图 1032×850 → 宽高比 1.21
+fig, ax = plt.subplots(figsize=(7.0, 5.8),
+                       subplot_kw=dict(projection='polar'))
+
+ax.set_theta_zero_location('N')
+ax.set_theta_direction(-1)
+ax.set_yticks([])
+ax.set_xticks([])
+
+# ---- 同心正八边形网格（虚线），非圆形 ----
+for r in [0.4, 0.55, 0.7, 0.85, 1.0]:
+    ax.plot(close(angles), close(np.full(N, r)),
+            color='#CCCCCC', lw=0.8, linestyle='--', zorder=1)
+
+# 射线
+for ang in angles:
+    ax.plot([ang, ang], [0, 1.0],
+            color='#CCCCCC', lw=0.8, linestyle='--', zorder=1)
+
+C_DORA = '#5A8A5A'   # 深绿，与原图一致
+C_LORA = '#4169E1'   # 皇家蓝，原图 LoRA 为真蓝色
+
+# ---- 填充（统一透明度）----
+ax.fill(close(angles), close(dora_r), color=C_DORA, alpha=0.18, zorder=3)
+ax.fill(close(angles), close(lora_r), color=C_LORA, alpha=0.18, zorder=3)
+
+# ---- 折线（DoRA 明显粗于 LoRA）----
+ax.plot(close(angles), close(dora_r),
+        color=C_DORA, lw=3.0, solid_capstyle='round', zorder=4)
+ax.plot(close(angles), close(lora_r),
+        color=C_LORA, lw=1.5, solid_capstyle='round', zorder=4)
+
+# ---- 数值标注（带白底提高可读性）----
+def fmt(v):
+    # 原图保留两位小数
+    return f'{v:.2f}'
+
+for i, ang in enumerate(angles):
+    # DoRA 数值（折线外侧）
+    r_d = dora_r[i] + 0.08
+    ax.text(ang, r_d, fmt(DORA_raw[i]),
+            ha='center', va='center',
+            fontsize=7.8, color=C_DORA, zorder=6,
+            bbox=dict(boxstyle='round,pad=0.12',
+                      facecolor='white', edgecolor='none', alpha=0.85))
+    # LoRA 数值（折线内侧）
+    r_l = lora_r[i] - 0.09
+    ax.text(ang, r_l, fmt(LORA_raw[i]),
+            ha='center', va='center',
+            fontsize=7.8, color=C_LORA, zorder=6,
+            bbox=dict(boxstyle='round,pad=0.12',
+                      facecolor='white', edgecolor='none', alpha=0.85))
+
+# ---- 轴标签 ----
+# 原图 label 紧贴多边形外圈（约 1.13），字体相对图幅偏小
+label_r = 1.13
+for i, (ang, cat) in enumerate(zip(angles, CATEGORIES)):
+    if abs(np.sin(ang)) < 0.15:
+        ha = 'center'
+    elif np.sin(ang) > 0:
+        ha = 'left'
+    else:
+        ha = 'right'
+    ax.text(ang, label_r, cat,
+            ha=ha, va='center',
+            fontsize=8.5, color='#333333',
+            multialignment='center')
+
+# ---- 图例：黑色系列名 + 彩色线段 ----
+# DoRA 图例行（粗绿线 + 黑色加粗文字）
+fig.text(0.09, 0.91,
+         '────  ', color=C_DORA, fontsize=11,
+         fontweight='bold', va='center', ha='left')
+fig.text(0.155, 0.91,
+         'DoRA', color='black', fontsize=10,
+         fontweight='bold', va='center', ha='left')
+
+# LoRA 图例行（细蓝线 + 黑色普通文字）
+fig.text(0.09, 0.875,
+         '─────', color=C_LORA, fontsize=8.5,
+         va='center', ha='left')
+fig.text(0.155, 0.875,
+         'LoRA', color='black', fontsize=10,
+         va='center', ha='left')
+
+ax.set_ylim(0, 1.32)   # 缩小上下留白，让多边形撑满
+ax.set_frame_on(False)
+
+fig.subplots_adjust(left=0.10, right=0.90, top=0.86, bottom=0.06)
+from pathlib import Path
+
+output_path = Path('output/figures/radar_dora_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(
+    output_path,
+    dpi=300, facecolor='white',
+)
+plt.close(fig)
+print('saved: radar_dora_repro.png')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/scatter_break.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/scatter_break.py
new file mode 100644
index 0000000..8119504
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/scatter_break.py
@@ -0,0 +1,173 @@
+"""
+Reproduce: image9.png — Broken-axis scatter plot (Meta-Harness style)
+X-axis has a break between ~50k and 115k.
+Uses two side-by-side axes with shared y-axis.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import matplotlib.lines as mlines
+from scipy.interpolate import make_interp_spline
+
+plt.rcParams.update({
+    'font.family': 'sans-serif',
+    'font.sans-serif': ['DejaVu Sans', 'Arial', 'Helvetica'],
+    'text.usetex': False,
+})
+
+# ---- 模拟数据 ----
+rng = np.random.default_rng(42)
+
+# Ours (Pareto) — red stars + dashed pink line
+pareto_x = np.array([0, 5000, 20000, 30000, 35000, 40000, 45000, 48000, 50000])
+pareto_y = np.array([40.3, 40.3, 40.7, 44.3, 45.0, 47.3, 48.3, 48.8, 49.1])
+
+# Ours (non-Pareto) — scattered light-pink circles
+np_x = rng.uniform(25000, 50000, 32)
+np_y = 35 + 14 * (np_x - 25000) / 25000 + rng.normal(0, 1.8, 32)
+np_y = np.clip(np_y, 34, 50)
+
+# Few-shot — purple circles + straight line (折线，非样条)
+few_x = np.array([4000, 8000, 15000, 25000, 38000, 48000])
+few_y = np.array([32.5, 34.2, 34.0, 35.7, 40.5, 41.0])
+
+# Zero-shot — single purple X at origin
+zs_x, zs_y = 0, 27.0
+
+# MCE — orange triangle (right panel)
+mce_x, mce_y = 115000, 39.6
+
+# ACE — blue diamond (right panel)
+ace_x, ace_y = 200000, 41.0
+
+
+# ---- 颜色 ----
+C_PARETO   = '#E53935'   # 亮红（与原图一致）
+C_NONPARETO= '#F4B8B8'   # 更淡的粉雾
+C_FEW      = '#6B4FA0'   # 深紫
+C_FEW_LINE = '#B8A8D8'   # 浅紫（曲线）
+C_MCE      = '#E69B00'   # 橙色
+C_ACE      = '#2E86C1'   # 蓝色
+C_ZS       = '#5B2D8E'   # 深紫（zero-shot）
+C_DASH     = '#F0A0A0'   # 粉色虚线
+
+# ---- 布局：左宽（0-50k）右窄（115k, 200k）----
+fig, (ax1, ax2) = plt.subplots(
+    1, 2,
+    figsize=(9.5, 5.5),
+    gridspec_kw={'width_ratios': [5, 1.3], 'wspace': 0.05},
+)
+fig.subplots_adjust(left=0.09, right=0.97, top=0.93, bottom=0.13)
+
+YLIM = (25, 51)
+
+for ax in [ax1, ax2]:
+    ax.set_ylim(*YLIM)
+
+# ---- 左轴（ax1：0 - 50k）----
+ax1.set_xlim(-3000, 53000)
+
+# Few-shot 样条曲线（原图明显为 S 形平滑曲线）
+spl = make_interp_spline(few_x, few_y, k=3)
+spl_x = np.linspace(few_x[0], few_x[-1], 300)
+spl_y = spl(spl_x)
+ax1.plot(spl_x, spl_y, color=C_FEW_LINE, lw=1.8, zorder=2)
+ax1.scatter(few_x, few_y,
+            marker='o', s=70, color=C_FEW,
+            zorder=4, linewidths=0.8, edgecolors='black')
+
+# Zero-shot X
+ax1.scatter([zs_x], [zs_y], marker='X', s=120, color=C_ZS, zorder=5,
+            linewidths=0.8, edgecolors='black')
+
+# non-Pareto circles（淡粉，无描边）
+ax1.scatter(np_x, np_y, marker='o', s=28, color=C_NONPARETO, alpha=0.85,
+            zorder=3, linewidths=0)
+
+# Pareto dashed line + stars
+ax1.plot(pareto_x, pareto_y, color=C_DASH, lw=1.8, linestyle='--', zorder=3)
+ax1.scatter(pareto_x, pareto_y,
+            marker='*', s=200, color=C_PARETO,
+            zorder=5, linewidths=0.8, edgecolors='black')
+
+ax1.set_xlabel('Additional context (chars)', fontsize=13, fontweight='bold', labelpad=4)
+ax1.set_ylabel('Test accuracy', fontsize=13, fontweight='bold')
+ax1.set_xticks([0, 10000, 20000, 30000, 40000, 50000])
+ax1.set_xticklabels(['0', '10k', '20k', '30k', '40k', '50k'], fontsize=10)
+ax1.tick_params(labelsize=10)
+
+# spines: 左/下
+ax1.spines['top'].set_visible(False)
+ax1.spines['right'].set_visible(False)
+ax1.spines['left'].set_linewidth(1.0)
+ax1.spines['bottom'].set_linewidth(1.0)
+
+# ---- 右轴（ax2：115k, 200k）----
+ax2.set_xlim(95000, 220000)
+
+ax2.scatter([mce_x], [mce_y], marker='^', s=130, color=C_MCE, zorder=5,
+            linewidths=0.8, edgecolors='black')
+ax2.scatter([ace_x], [ace_y], marker='D', s=90, color=C_ACE, zorder=5,
+            linewidths=0.8, edgecolors='black')
+
+ax2.set_xticks([115000, 200000])
+ax2.set_xticklabels(['115k', '200k'], fontsize=10)
+ax2.tick_params(labelsize=10)
+ax2.set_yticks([])
+
+# spines: 只保留下边
+ax2.spines['top'].set_visible(False)
+ax2.spines['right'].set_visible(False)
+ax2.spines['left'].set_visible(False)
+ax2.spines['bottom'].set_linewidth(1.0)
+
+# ---- 折断符号（只在 x 轴底部，不在顶部）----
+d = 0.015
+kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False, lw=1.2)
+ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs)   # 底部斜杠
+
+kwargs2 = dict(transform=ax2.transAxes, color='k', clip_on=False, lw=1.2)
+ax2.plot((-d, +d), (-d, +d), **kwargs2)         # 底部斜杠
+
+# ---- 图例（右下角，有浅灰框）----
+legend_elements = [
+    mlines.Line2D([], [], marker='*', color='w', markerfacecolor=C_PARETO,
+                  markersize=11, label='Ours (Pareto)',
+                  linestyle='--', linewidth=1.2,
+                  markeredgewidth=0.3, markeredgecolor='white'),
+    mlines.Line2D([], [], marker='o', color='w', markerfacecolor=C_NONPARETO,
+                  markersize=7, label='Ours (non-Pareto)', linestyle='None'),
+    mlines.Line2D([], [], marker='^', color='w', markerfacecolor=C_MCE,
+                  markersize=9, label='MCE', linestyle='None'),
+    mlines.Line2D([], [], marker='D', color='w', markerfacecolor=C_ACE,
+                  markersize=8, label='ACE', linestyle='None'),
+    mlines.Line2D([], [], marker='X', color='w', markerfacecolor=C_ZS,
+                  markersize=9, label='Zero-shot', linestyle='None'),
+    mlines.Line2D([], [], marker='o', color='w', markerfacecolor=C_FEW,
+                  markersize=8, label='Few-shot', linestyle='None'),
+]
+
+leg = ax1.legend(
+    handles=legend_elements,
+    loc='lower right',
+    fontsize=9.0,
+    frameon=True,
+    facecolor='white',
+    edgecolor='#CCCCCC',
+    framealpha=1.0,
+    borderpad=0.5,
+    labelspacing=0.3,
+    handletextpad=0.4,
+)
+
+from pathlib import Path
+
+output_path = Path('output/figures/scatter_break_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(
+    output_path,
+    dpi=300, facecolor='white',
+)
+plt.close(fig)
+print('saved: scatter_break_repro.png')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/scatter_tsne.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/scatter_tsne.py
new file mode 100644
index 0000000..3369121
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/scatter_tsne.py
@@ -0,0 +1,137 @@
+"""
+Reproduce: image7.png — t-SNE Latent Memory Visualization
+Style: serif (Computer Modern via usetex), light gray grid,
+       4-spine box, annotation boxes with cluster color edges.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+
+plt.rcParams.update({
+    'text.usetex': True,
+    'font.family': 'serif',
+    'font.serif': ['Computer Modern Roman', 'STIX Two Text', 'DejaVu Serif'],
+    'axes.unicode_minus': False,
+})
+
+rng = np.random.default_rng(42)
+
+def cluster(cx, cy, n, rx=8, ry=8, shape='round'):
+    """生成一个椭圆形聚类，shape='round'|'elongated'"""
+    if shape == 'elongated':
+        angles = rng.uniform(0, 2 * np.pi, n)
+        r = rng.rayleigh(1.0, n)
+        x = cx + rx * r * np.cos(angles)
+        y = cy + ry * r * np.sin(angles)
+    else:
+        x = rng.normal(cx, rx, n)
+        y = rng.normal(cy, ry, n)
+    return x, y
+
+
+# ---- 数据集颜色（严格参照原图） ----
+DS = {
+    'GSM8K':    {'color': '#6A4C93', 'n': 900,  'cx':  10, 'cy':  12, 'rx': 9,  'ry': 12},
+    'MATH':     {'color': '#D651A0', 'n': 700,  'cx':  8,  'cy':  32, 'rx': 7,  'ry': 8},
+    'GPQA':     {'color': '#F06292', 'n': 300,  'cx':  18, 'cy':  50, 'rx': 5,  'ry': 6},
+    'KodCode':  {'color': '#FF8A65', 'n': 500,  'cx':  38, 'cy': -10, 'rx': 9,  'ry': 10},
+    'BCB':      {'color': '#FFB74D', 'n': 600,  'cx':  18, 'cy': -30, 'rx': 10, 'ry': 9},
+    'ALFWorld': {'color': '#FFF176', 'n': 280,  'cx': -10, 'cy': -42, 'rx': 12, 'ry': 10},  # 黄色！
+    'TriviaQA': {'color': '#C888E8', 'n': 700,  'cx': -42, 'cy':   5, 'rx': 14, 'ry': 22},
+}
+
+# ---- 注释框配置（统一深灰边框，与原图一致；GPQA 也添加）----
+ANNOTS = [
+    {'name': 'MATH',     'xy': (8,  32),  'xytext': (8,  32)},
+    {'name': 'GSM8K',    'xy': (10, 10),  'xytext': (10, 10)},
+    {'name': 'GPQA',     'xy': (18, 52),  'xytext': (18, 52)},
+    {'name': 'KodCode',  'xy': (38,-10),  'xytext': (38,-10)},
+    {'name': 'BCB',      'xy': (18,-30),  'xytext': (18,-30)},
+    {'name': 'ALFWorld', 'xy': (-10,-42), 'xytext': (-10,-42)},
+    {'name': 'TriviaQA', 'xy': (-42,  5), 'xytext': (-42,  5)},
+]
+BBOX_EDGECOLOR = '#2C3E50'   # 统一深蓝灰
+
+fig, ax = plt.subplots(figsize=(7.5, 6.2))
+
+for name, cfg in DS.items():
+    x, y = cluster(cfg['cx'], cfg['cy'], cfg['n'], cfg['rx'], cfg['ry'])
+    ax.scatter(x, y, c=cfg['color'], s=14, alpha=0.55,
+               linewidths=0, rasterized=True, label=name, zorder=2)
+
+# ---- 注释框 ----
+for ann in ANNOTS:
+    color = DS[ann['name']]['color']
+    # 注释框：与簇色同色相的浅色半透明底（原图风格）
+    import matplotlib.colors as mcolors
+    rgba = list(mcolors.to_rgba(color))
+    rgba[3] = 0.28   # alpha for facecolor
+    ax.annotate(
+        r'\textbf{' + ann['name'] + r'}',
+        xy=ann['xy'], xytext=ann['xytext'],
+        fontsize=10.0,
+        bbox=dict(
+            boxstyle='round,pad=0.30',
+            facecolor=tuple(rgba),
+            edgecolor=BBOX_EDGECOLOR,
+            linewidth=0.9,
+        ),
+        ha='center', va='center', zorder=5,
+    )
+
+# ---- Axes 样式 ----
+ax.set_xlabel(r'\textbf{t-SNE Component 1}', fontsize=12)
+ax.set_ylabel(r'\textbf{t-SNE Component 2}', fontsize=12)
+ax.set_title(
+    r'\textbf{Latent Memory Visualization}' + '\n'
+    r'\textbf{(across all benchmarks)}',
+    fontsize=13.5, pad=8, linespacing=1.4,
+)
+
+ax.set_xlim(-88, 70)
+ax.set_ylim(-75, 80)
+ax.xaxis.set_major_locator(plt.MultipleLocator(20))
+ax.yaxis.set_major_locator(plt.MultipleLocator(20))
+
+# 四边框，深灰接近原图
+for sp in ax.spines.values():
+    sp.set_visible(True)
+    sp.set_linewidth(0.9)
+    sp.set_color('#333333')
+
+ax.tick_params(direction='in', length=4, width=0.8, labelsize=10,
+               color='#333333')
+
+# 浅灰点线网格（原图风格）
+ax.grid(True, color='#E0E0E0', linewidth=0.6, linestyle=':', zorder=0)
+ax.set_axisbelow(True)
+
+# ---- 图例（原图有白底浅灰框） ----
+leg = ax.legend(
+    loc='upper right',
+    fontsize=9.5,
+    frameon=True,
+    facecolor='white',
+    edgecolor='#CCCCCC',
+    framealpha=1.0,
+    markerscale=1.0,
+    handlelength=0.8,
+    handleheight=0.8,
+    handletextpad=0.5,
+    labelspacing=0.25,
+    borderpad=0.5,
+    borderaxespad=0.5,
+)
+
+from pathlib import Path
+
+output_path = Path('output/figures/scatter_tsne_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.tight_layout(pad=0.9)
+fig.savefig(
+    output_path,
+    dpi=300, facecolor='white',
+)
+plt.close(fig)
+print('saved: scatter_tsne_repro.png')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/stacked_bar.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/stacked_bar.py
new file mode 100644
index 0000000..c586e56
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/stacked_bar.py
@@ -0,0 +1,121 @@
+﻿"""
+堆叠柱状图（stacked bar chart）
+特征：每列堆叠多个组分，展示结构占比，蓝灰递进色阶
+来源：学术论文中展示组成结构或任务分解
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+
+# ── 全局样式 ─────────────────────────────────────────────────────
+plt.rcParams.update({
+    'font.family': 'sans-serif',
+    'font.sans-serif': ['DejaVu Sans', 'Arial', 'Helvetica'],
+    'text.usetex': False,
+})
+
+# ── 颜色（蓝灰递进色阶）──────────────────────────────────────
+C_LAYERS = [
+    '#D3D3D3',   # 底层（最浅灰）
+    '#A8C8E8',   # 第二层（浅钢蓝）
+    '#5499C7',   # 第三层（中蓝）
+    '#1B3D6E',   # 顶层（深蓝，主组分）
+]
+
+# ── 数据（请替换为你的数据）───────────────────────────────────
+categories = ['Task A', 'Task B', 'Task C', 'Task D', 'Task E']
+
+# 每层一个 dict，key = 组分名，value = 该组分在每个 category 的数值
+components = {
+    'Base Model':  [20, 25, 15, 30, 22],
+    'Feature Ex':  [35, 30, 40, 25, 33],
+    'Fusion':      [45, 45, 45, 45, 45],
+}
+
+# ── 参数配置 ─────────────────────────────────────────────────
+TITLE   = r'Performance Breakdown by Task'
+YLABEL  = r'Score'
+YLIM    = (0, 110)
+BAR_W   = 0.5
+BOTTOM_GAP = 0.05
+
+# ── 数据解析 ─────────────────────────────────────────────────
+component_names = list(components.keys())
+n_groups  = len(categories)
+x_center = np.arange(n_groups)
+
+# 转为 numpy 数组
+vals_list = [np.array(components[name]) for name in component_names]
+
+# 检查：每列总量不能超过 YLIM[1]
+col_sums = sum(vals_list)
+if col_sums.max() > YLIM[1]:
+    import warnings
+    warnings.warn(f'堆叠总量 {col_sums.max()} 超过 YLIM[1]={YLIM[1]}，已自动扩展 YLIM')
+
+# ── 画布 ─────────────────────────────────────────────────────
+fig, ax = plt.subplots(figsize=(8.0, 5.0))
+
+# ── 堆叠绘图 ─────────────────────────────────────────────────
+# bottom 从 0 开始，每层累加
+bottom = np.zeros(n_groups)
+
+colors = C_LAYERS[:len(component_names)]
+
+for i, (name, vals) in enumerate(zip(component_names, vals_list)):
+    bars = ax.bar(
+        x_center, vals,
+        width=BAR_W,
+        bottom=bottom,
+        color=colors[i],
+        edgecolor='white',
+        linewidth=0.5,
+        label=name,
+        zorder=2,
+    )
+    # 底部累加
+    bottom += vals
+
+# ── 样式 ─────────────────────────────────────────────────────
+ax.spines['top'].set_visible(False)
+ax.spines['right'].set_visible(False)
+ax.spines['left'].set_color('#333333')
+ax.spines['bottom'].set_color('#333333')
+ax.spines['left'].set_linewidth(0.9)
+ax.spines['bottom'].set_linewidth(0.9)
+
+# y 轴网格（与 bar_grouped_hatch 一致）
+ax.yaxis.grid(True, color='#EBEBEB', linewidth=0.7, linestyle='--', zorder=0)
+ax.set_axisbelow(True)
+
+# 刻度
+ax.tick_params(length=3, direction='out', labelsize=9)
+
+# 标签
+ax.set_xticks(x_center)
+ax.set_xticklabels(categories, fontsize=10)
+ax.set_ylabel(YLABEL, fontsize=10)
+ax.set_ylim(*YLIM)
+ax.set_title(TITLE, fontsize=12, fontweight='bold', color='#333333', pad=8)
+
+# 图例
+leg = ax.legend(
+    fontsize=9,
+    loc='upper right',
+    bbox_to_anchor=(1.01, 1.0),
+    frameon=True,
+    facecolor='white',
+    edgecolor='#CCCCCC',
+    labelspacing=0.35,
+    handlelength=1.5,
+    handletextpad=0.5,
+    borderaxespad=0.3,
+)
+
+# ── 保存 ─────────────────────────────────────────────────────
+output_path = Path('output/figures/stacked_bar_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(output_path, dpi=300, facecolor='white', bbox_inches='tight')
+plt.close(fig)
+print(f'✅ saved: {output_path}')
diff --git a/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/violin_plot.py b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/violin_plot.py
new file mode 100644
index 0000000..c1e17ed
--- /dev/null
+++ b/skills/data-flow-skill/data-flow-skill/scripts/visualization/matplotlib/violin_plot.py
@@ -0,0 +1,139 @@
+﻿"""
+小提琴图（violin plot）
+特征：数据密度分布可视化，内部叠加 mini box plot（中位线+四分位线）
+来源：学术统计图表风格，与 box_plot 共用配色体系
+"""
+
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import numpy as np
+from pathlib import Path
+
+# ── 全局样式（与 box_plot 完全一致）────────────────────────────
+plt.rcParams.update({
+    'text.usetex': True,
+    'font.family': 'serif',
+    'font.serif': ['Computer Modern Roman', 'STIX Two Text', 'DejaVu Serif'],
+    'axes.unicode_minus': False,
+})
+
+# ── 颜色（与 box_plot 一致）──────────────────────────────────
+C_VIOLIN = '#5499C7'   # 小提琴主体（蓝）
+C_BOX    = '#1B3D6E'   # 内部叠加箱体（深蓝）
+C_MED    = '#CC2200'   # 中位线（红）
+C_OUTL   = '#D651A0'   # 异常值（粉）
+
+# ── 数据（请替换为你的数据）───────────────────────────────────
+data = {
+    'Method A': [23.5, 25.1, 24.8, 26.2, 27.0, 25.5, 24.9, 26.8, 25.0, 24.3],
+    'Method B': [28.3, 29.1, 27.8, 30.2, 29.5, 28.9, 30.1, 29.0, 28.7, 29.3],
+    'Ours':     [31.2, 32.5, 31.8, 33.1, 32.0, 31.5, 32.8, 33.4, 31.9, 32.2],
+}
+
+labels = list(data.keys())
+values = [np.array(v) for v in data.values()]
+
+# ── 参数配置 ─────────────────────────────────────────────────
+TITLE   = r'\textbf{Distribution Comparison (Violin)}'
+XLABEL  = r'\textbf{Method}'
+YLABEL  = r'\textit{Accuracy (\%)}'
+YLIM    = (15, 40)
+VIOLIN_ALPHA = 0.6
+
+# ── 画布 ─────────────────────────────────────────────────────
+fig, ax = plt.subplots(figsize=(7.5, 5.0))
+
+x_pos = np.arange(len(labels))
+
+# 画小提琴
+vp = ax.violinplot(
+    values,
+    positions=x_pos,
+    widths=0.5,
+    showmeans=False,
+    showmedians=False,
+)
+
+# 设置小提琴颜色和透明度
+for i, body in enumerate(vp['bodies']):
+    body.set_facecolor(C_VIOLIN)
+    body.set_alpha(VIOLIN_ALPHA)
+    body.set_edgecolor('#333333')
+    body.set_linewidth(1.0)
+
+# 隐藏须线和caps（保留小提琴形状）
+for partname in ('cbars', 'cmins', 'cmaxes'):
+    parts = vp.get(partname)
+    if parts is not None:
+        parts.set_visible(False)
+
+# ── 内部叠加 mini box plot ──────────────────────────────────
+# 手动计算四分位和中位线
+def get_stats(arr):
+    arr = np.sort(arr)
+    q1 = np.percentile(arr, 25)
+    med = np.percentile(arr, 50)
+    q3 = np.percentile(arr, 75)
+    iqr = q3 - q1
+    lo = max(arr.min(), q1 - 1.5 * iqr)
+    hi = min(arr.max(), q3 + 1.5 * iqr)
+    return lo, q1, med, q3, hi
+
+for i, (xi, vals) in enumerate(zip(x_pos, values)):
+    lo, q1, med, q3, hi = get_stats(vals)
+    bw = 0.12  # mini box 宽度
+
+    # 箱体（透明深蓝）
+    ax.fill_between(
+        [xi - bw, xi + bw], [q1, q1], [q3, q3],
+        color=C_BOX, alpha=0.6, zorder=4
+    )
+    # 中位线（红色加粗）
+    ax.plot([xi - bw, xi + bw], [med, med],
+            color=C_MED, linewidth=2.0, zorder=5)
+    # 须线（连接箱体上下端）
+    ax.plot([xi, xi], [lo, q1], color='#333333', linewidth=1.2, zorder=4)
+    ax.plot([xi, xi], [q3, hi], color='#333333', linewidth=1.2, zorder=4)
+    # 须线端点横线
+    ax.plot([xi - bw * 0.6, xi + bw * 0.6], [lo, lo], color='#333333', lw=1.2, zorder=4)
+    ax.plot([xi - bw * 0.6, xi + bw * 0.6], [hi, hi], color='#333333', lw=1.2, zorder=4)
+
+# ── 样式 ─────────────────────────────────────────────────────
+ax.spines['top'].set_visible(False)
+ax.spines['right'].set_visible(False)
+ax.spines['left'].set_color('#333333')
+ax.spines['bottom'].set_color('#333333')
+ax.spines['left'].set_linewidth(1.2)
+ax.spines['bottom'].set_linewidth(1.2)
+
+# y 轴网格（与 bar_grouped_hatch / box_plot 一致）
+ax.yaxis.grid(True, color='#EBEBEB', linewidth=0.7, linestyle='--', zorder=0)
+ax.set_axisbelow(True)
+
+# 刻度
+ax.tick_params(length=4, direction='in', labelsize=10)
+
+# 标签
+ax.set_xticks(x_pos)
+ax.set_xticklabels(labels, fontsize=11)
+ax.set_xlabel(XLABEL, fontsize=11)
+ax.set_ylabel(YLABEL, fontsize=11)
+ax.set_ylim(*YLIM)
+ax.set_title(TITLE, fontsize=13, pad=8)
+
+# 图例
+from matplotlib.lines import Line2D
+legend_elements = [
+    Line2D([0], [0], color=C_MED, linewidth=2.0, label=r'\textit{Median}'),
+    mpatches.Patch(facecolor=C_VIOLIN, alpha=VIOLIN_ALPHA, edgecolor='#333333',
+                   label=r'\textit{Density}'),
+]
+ax.legend(handles=legend_elements, fontsize=9, loc='upper right',
+          frameon=True, facecolor='white', edgecolor='#CCCCCC')
+
+# ── 保存 ─────────────────────────────────────────────────────
+output_path = Path('output/figures/violin_plot_repro.png')
+output_path.parent.mkdir(parents=True, exist_ok=True)
+fig.savefig(output_path, dpi=300, facecolor='white', bbox_inches='tight')
+plt.close(fig)
+print(f'✅ saved: {output_path}')
diff --git "a/\346\212\200\350\203\275\346\270\205\345\215\225.md" "b/\346\212\200\350\203\275\346\270\205\345\215\225.md"
index cca0f7c..626eea4 100644
--- "a/\346\212\200\350\203\275\346\270\205\345\215\225.md"
+++ "b/\346\212\200\350\203\275\346\270\205\345\215\225.md"
@@ -54,43 +54,44 @@
 37. dream-video-prompt-generator ⭐⭐
 38. agentkit-multimedia-shopping ⭐⭐
 
-### 文档与分析（4个）
-39. paper-analysis-assistant ⭐⭐⭐⭐
-40. contract-review ⭐⭐⭐
-41. law-to-markdown ⭐⭐
-42. stock-analysis ⭐⭐⭐
+### 文档与分析（5个）
+39. data-flow-skill ⭐⭐⭐⭐⭐
+40. paper-analysis-assistant ⭐⭐⭐⭐
+41. contract-review ⭐⭐⭐
+42. law-to-markdown ⭐⭐
+43. stock-analysis ⭐⭐⭐
 
 ### 智能体协作（3个）
-43. agent-team ⭐⭐⭐
-44. multi-agent-meeting ⭐⭐
-45. peers-advisory-group ⭐⭐
+44. agent-team ⭐⭐⭐
+45. multi-agent-meeting ⭐⭐
+46. peers-advisory-group ⭐⭐
 
 ### 产品与项目管理（2个）
-46. product-manager-toolkit ⭐⭐⭐
-47. sales-ai-assistant ⭐⭐
+47. product-manager-toolkit ⭐⭐⭐
+48. sales-ai-assistant ⭐⭐
 
 ### 设计与可视化（5个）
-48. frontend-design ⭐⭐⭐
-49. ai-drawio ⭐⭐⭐⭐
-50. pop-up-book-illustration ⭐⭐
-51. web-to-app ⭐⭐
-52. web-design-analyzer (待补充)
+49. frontend-design ⭐⭐⭐
+50. ai-drawio ⭐⭐⭐⭐
+51. pop-up-book-illustration ⭐⭐
+52. web-to-app ⭐⭐
+53. web-design-analyzer (待补充)
 
 ### 文档处理（4个 - 系统内置）
-53. pptx ⭐⭐⭐⭐⭐
-54. xlsx ⭐⭐⭐⭐⭐
-55. pdf ⭐⭐⭐⭐⭐
-56. docx ⭐⭐⭐⭐⭐
+54. pptx ⭐⭐⭐⭐⭐
+55. xlsx ⭐⭐⭐⭐⭐
+56. pdf ⭐⭐⭐⭐⭐
+57. docx ⭐⭐⭐⭐⭐
 
 ### 技能管理（2个 - 系统内置）
-57. find-skill ⭐⭐⭐
-58. skill-creator ⭐⭐⭐
+58. find-skill ⭐⭐⭐
+59. skill-creator ⭐⭐⭐
 
 ### 财务分析（2个）
-59. creating-financial-models ⭐⭐⭐⭐
-60. market-research-reports ⭐⭐⭐⭐
+60. creating-financial-models ⭐⭐⭐⭐
+61. market-research-reports ⭐⭐⭐⭐
 
 ### 文化创作（1个）
-61. poetry-music-visual ⭐⭐
+62. poetry-music-visual ⭐⭐
 
-总计：61个技能
+总计：62个技能