From 52819205eb1ac0f5b3f0557ed3069af1c477e605 Mon Sep 17 00:00:00 2001
From: sufubao <sufubao@sensetime.com>
Date: Tue, 30 Dec 2025 11:21:51 +0000
Subject: [PATCH 1/6] add function call and reasoning docs

---
 docs/CN/source/index.rst                     |   2 +
 docs/CN/source/tutorial/function_calling.rst | 287 ++++++++++++++++
 docs/CN/source/tutorial/reasoning_parser.rst | 342 +++++++++++++++++++
 docs/EN/source/index.rst                     |   2 +
 docs/EN/source/tutorial/function_calling.rst | 287 ++++++++++++++++
 docs/EN/source/tutorial/reasoning_parser.rst | 342 +++++++++++++++++++
 6 files changed, 1262 insertions(+)
 create mode 100644 docs/CN/source/tutorial/function_calling.rst
 create mode 100644 docs/CN/source/tutorial/reasoning_parser.rst
 create mode 100644 docs/EN/source/tutorial/function_calling.rst
 create mode 100644 docs/EN/source/tutorial/reasoning_parser.rst

diff --git a/docs/CN/source/index.rst b/docs/CN/source/index.rst
index 348ec0238..b97b2c759 100755
--- a/docs/CN/source/index.rst
+++ b/docs/CN/source/index.rst
@@ -53,6 +53,8 @@ Lightllm 整合了众多的开源方案的优点，包括但不限于 FasterTran
    多模态部署 <tutorial/multimodal>
    奖励模型部署 <tutorial/reward_model>
    OpenAI 接口使用 <tutorial/openai>
+   工具调用（Function Calling） <tutorial/function_calling>
+   思考解析（Reasoning Parser） <tutorial/reasoning_parser>
    APIServer 参数详解 <tutorial/api_server_args_zh>
    lightllm api介绍 <tutorial/api_param>
    
diff --git a/docs/CN/source/tutorial/function_calling.rst b/docs/CN/source/tutorial/function_calling.rst
new file mode 100644
index 000000000..728b95d86
--- /dev/null
+++ b/docs/CN/source/tutorial/function_calling.rst
@@ -0,0 +1,287 @@
+.. _function_calling:
+
+工具调用（Function Calling）
+============================
+
+LightLLM 支持多种主流模型的工具调用功能，提供 OpenAI 兼容的 API。
+
+支持的模型
+----------
+
+Qwen2.5/Qwen3
+~~~~~~~~~~~~~
+
+**解析器**: ``qwen25``
+
+**格式**:
+
+.. code-block:: xml
+
+    <tool_call>
+    {"name": "function_name", "arguments": {"param": "value"}}
+    </tool_call>
+
+**启动**:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/qwen2.5 \
+        --tool_call_parser qwen25 \
+        --tp 1
+
+Llama 3.2
+~~~~~~~~~
+
+**解析器**: ``llama3``
+
+**格式**: ``<|python_tag|>{"name": "func", "arguments": {...}}``
+
+**启动**:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/llama-3.2 \
+        --tool_call_parser llama3 \
+        --tp 1
+
+Mistral
+~~~~~~~
+
+**解析器**: ``mistral``
+
+**格式**: ``[TOOL_CALLS] [{"name": "func", "arguments": {...}}, ...]``
+
+DeepSeek-V3
+~~~~~~~~~~~
+
+**解析器**: ``deepseekv3``
+
+**格式**:
+
+.. code-block:: xml
+
+    <｜tool▁calls▁begin｜>
+    <｜tool▁call▁begin｜>function<｜tool▁sep｜>func_name
+    ```json
+    {"param": "value"}
+    ```
+    <｜tool▁call▁end｜>
+    <｜tool▁calls▁end｜>
+
+DeepSeek-V3.1
+~~~~~~~~~~~~~
+
+**解析器**: ``deepseekv31``
+
+**格式**: 简化的 V3 格式，参数直接内联，无代码块包围
+
+Kimi K2
+~~~~~~~
+
+**解析器**: ``kimi_k2``
+
+**格式**:
+
+.. code-block:: xml
+
+    <|tool_calls_section_begin|>
+    <|tool_call_begin|>functions.func_name:0
+    <|tool_call_argument_begin|>{"param": "value"}
+    <|tool_call_end|>
+    <|tool_calls_section_end|>
+
+基本使用
+--------
+
+定义工具
+~~~~~~~~
+
+.. code-block:: python
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "获取指定城市的天气信息",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "城市名称"
+                        }
+                    },
+                    "required": ["city"]
+                }
+            }
+        }
+    ]
+
+非流式调用
+~~~~~~~~~~
+
+.. code-block:: python
+
+    import requests
+    import json
+
+    url = "http://localhost:8088/v1/chat/completions"
+    data = {
+        "model": "model_name",
+        "messages": [
+            {"role": "user", "content": "北京今天天气怎么样？"}
+        ],
+        "tools": tools,
+        "tool_choice": "auto"  # "auto" | "none" | "required"
+    }
+
+    response = requests.post(url, json=data).json()
+    message = response["choices"][0]["message"]
+
+    if message.get("tool_calls"):
+        for tc in message["tool_calls"]:
+            print(f"工具: {tc['function']['name']}")
+            print(f"参数: {tc['function']['arguments']}")
+
+流式调用
+~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "model": "model_name",
+        "messages": [{"role": "user", "content": "查询北京和上海的天气"}],
+        "tools": tools,
+        "stream": True
+    }
+
+    response = requests.post(url, json=data, stream=True)
+    tool_calls = {}
+
+    for line in response.iter_lines():
+        if line and line.startswith(b"data: "):
+            chunk = json.loads(line[6:])
+            delta = chunk["choices"][0]["delta"]
+
+            if delta.get("tool_calls"):
+                for tc in delta["tool_calls"]:
+                    idx = tc.get("index", 0)
+                    if idx not in tool_calls:
+                        tool_calls[idx] = {"function": {"name": "", "arguments": ""}}
+
+                    if tc["function"].get("name"):
+                        tool_calls[idx]["function"]["name"] = tc["function"]["name"]
+                    if tc["function"].get("arguments"):
+                        tool_calls[idx]["function"]["arguments"] += tc["function"]["arguments"]
+
+多轮对话
+~~~~~~~~
+
+.. code-block:: python
+
+    # 1. 用户提问
+    messages = [{"role": "user", "content": "北京天气如何？"}]
+
+    # 2. 模型调用工具
+    response1 = requests.post(url, json={
+        "messages": messages,
+        "tools": tools
+    }).json()
+
+    tool_call = response1["choices"][0]["message"]["tool_calls"][0]
+    messages.append(response1["choices"][0]["message"])
+
+    # 3. 返回工具结果
+    weather_result = {"temperature": 15, "condition": "晴朗"}
+    messages.append({
+        "role": "tool",
+        "tool_call_id": tool_call["id"],
+        "name": tool_call["function"]["name"],
+        "content": json.dumps(weather_result, ensure_ascii=False)
+    })
+
+    # 4. 生成最终回答
+    response2 = requests.post(url, json={"messages": messages}).json()
+    print(response2["choices"][0]["message"]["content"])
+
+高级功能
+--------
+
+并行工具调用
+~~~~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "messages": messages,
+        "tools": tools,
+        "parallel_tool_calls": True  # 启用并行调用
+    }
+
+强制调用特定工具
+~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "tools": tools,
+        "tool_choice": {
+            "type": "function",
+            "function": {"name": "get_weather"}
+        }
+    }
+
+与推理模型集成
+~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "model": "deepseek-r1",
+        "tools": tools,
+        "chat_template_kwargs": {"enable_thinking": True},
+        "separate_reasoning": True  # 分离推理内容
+    }
+
+    response = requests.post(url, json=data).json()
+    message = response["choices"][0]["message"]
+
+    print("推理:", message.get("reasoning_content"))
+    print("工具调用:", message.get("tool_calls"))
+
+常见问题
+--------
+
+**工具调用未触发**
+  检查 ``--tool_call_parser`` 参数和工具描述是否清晰
+
+**参数解析错误**
+  确认使用了正确的解析器，检查模型输出格式
+
+**流式模式不完整**
+  正确处理所有 chunks，使用 ``index`` 字段组装多个工具调用
+
+**与推理模型集成失败**
+  确保使用最新版本，正确配置 ``separate_reasoning`` 和 ``chat_template_kwargs``
+
+技术细节
+--------
+
+**核心文件**:
+- ``lightllm/server/function_call_parser.py`` - 解析器实现（1267行）
+- ``lightllm/server/api_openai.py`` - API 集成
+- ``lightllm/server/build_prompt.py`` - 工具注入
+- ``test/test_api/test_openai_api.py`` - 测试示例
+
+**相关 PR**:
+- PR #1158: 支持推理内容中的函数调用
+
+参考资料
+--------
+
+- OpenAI Function Calling: https://platform.openai.com/docs/guides/function-calling
+- JSON Schema: https://json-schema.org/
+- LightLLM GitHub: https://github.com/ModelTC/lightllm
diff --git a/docs/CN/source/tutorial/reasoning_parser.rst b/docs/CN/source/tutorial/reasoning_parser.rst
new file mode 100644
index 000000000..7176dc697
--- /dev/null
+++ b/docs/CN/source/tutorial/reasoning_parser.rst
@@ -0,0 +1,342 @@
+.. _reasoning_parser:
+
+思考解析（Reasoning Parser）
+=============================
+
+LightLLM 支持推理模型的思考过程解析，将模型内部推理与最终答案分离，提高 AI 系统透明度。
+
+支持的模型
+----------
+
+DeepSeek-R1
+~~~~~~~~~~~
+
+**解析器**: ``deepseek-r1``
+
+**格式**:
+
+.. code-block:: text
+
+    <think>
+    推理过程...
+    </think>
+    最终答案
+
+**特点**: 强制推理模式，部分变体可能省略 ``<think>`` 起始标签
+
+**启动**:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/DeepSeek-R1 \
+        --reasoning_parser deepseek-r1 \
+        --tp 8 \
+        --enable_fa3
+
+DeepSeek-V3
+~~~~~~~~~~~
+
+**解析器**: ``deepseek-v3``
+
+**格式**: 与 Qwen3 相同
+
+**启动**:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/DeepSeek-V3 \
+        --reasoning_parser deepseek-v3 \
+        --tp 8
+
+**请求配置**:
+
+.. code-block:: python
+
+    data = {
+        "chat_template_kwargs": {"thinking": True}  # 启用推理
+    }
+
+Qwen3
+~~~~~
+
+**解析器**: ``qwen3``
+
+**格式**: ``<think>推理内容</think>回答``
+
+**特点**: 可选推理模式，支持动态切换
+
+.. code-block:: python
+
+    # 启用推理
+    data = {"chat_template_kwargs": {"enable_thinking": True}}
+
+GLM-4.5
+~~~~~~~
+
+**解析器**: ``glm45``
+
+**格式**: 同 Qwen3
+
+Kimi
+~~~~
+
+**Kimi Thinking**: ``kimi`` - 使用 ``◁think▷`` 和 ``◁/think▷`` 标记
+
+**Kimi K2**: ``kimi_k2`` - 使用 DeepSeek-R1 格式
+
+GPT-OSS
+~~~~~~~
+
+**解析器**: ``gpt-oss``
+
+**格式**:
+
+.. code-block:: xml
+
+    <|start|><|channel|>analysis<|message|>
+    推理分析...
+    <|end|>
+    <|channel|>final<|message|>
+    最终回答
+    <|return|>
+
+**特点**: 复杂状态机解析，支持多通道（analysis, commentary, final）
+
+其他模型
+~~~~~~~~
+
+- **MiniMax**: ``minimax``, ``minimax-append-think``
+- **Step3**: ``step3``
+- **NanoV3**: ``nano_v3``
+- **InternS1**: ``interns1``
+
+基本使用
+--------
+
+非流式
+~~~~~~
+
+.. code-block:: python
+
+    import requests
+    import json
+
+    url = "http://localhost:8088/v1/chat/completions"
+    data = {
+        "model": "deepseek-r1",
+        "messages": [
+            {"role": "user", "content": "单词 'strawberry' 中有多少个字母 'r'?"}
+        ],
+        "max_tokens": 2000,
+        "separate_reasoning": True,  # 分离推理内容
+        "chat_template_kwargs": {"enable_thinking": True}
+    }
+
+    response = requests.post(url, json=data).json()
+    message = response["choices"][0]["message"]
+
+    print("推理:", message.get("reasoning_content"))
+    print("答案:", message.get("content"))
+
+流式
+~~~~
+
+.. code-block:: python
+
+    data = {
+        "model": "deepseek-r1",
+        "messages": [{"role": "user", "content": "解释量子纠缠"}],
+        "stream": True,
+        "separate_reasoning": True,
+        "stream_reasoning": True  # 实时流式传输推理内容
+    }
+
+    response = requests.post(url, json=data, stream=True)
+
+    for line in response.iter_lines():
+        if line and line.startswith(b"data: "):
+            data_str = line[6:].decode('utf-8')
+            if data_str == '[DONE]':
+                break
+
+            chunk = json.loads(data_str)
+            delta = chunk["choices"][0]["delta"]
+
+            # 推理内容
+            if "reasoning_content" in delta:
+                print(delta["reasoning_content"], end="", flush=True)
+
+            # 答案内容
+            if "content" in delta:
+                print(delta["content"], end="", flush=True)
+
+响应格式
+--------
+
+**非流式**:
+
+.. code-block:: json
+
+    {
+        "choices": [{
+            "message": {
+                "content": "最终答案",
+                "reasoning_content": "推理过程"
+            }
+        }]
+    }
+
+**流式**:
+
+.. code-block:: json
+
+    // 推理块
+    {"choices": [{"delta": {"reasoning_content": "推理片段"}}]}
+
+    // 答案块
+    {"choices": [{"delta": {"content": "答案片段"}}]}
+
+高级功能
+--------
+
+动态切换推理模式
+~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    # 启用推理
+    data = {
+        "chat_template_kwargs": {"enable_thinking": True},
+        "separate_reasoning": True
+    }
+
+    # 禁用推理
+    data = {
+        "chat_template_kwargs": {"enable_thinking": False}
+    }
+
+控制推理显示
+~~~~~~~~~~~~
+
+.. code-block:: python
+
+    # 隐藏推理流式传输
+    data = {
+        "separate_reasoning": True,
+        "stream_reasoning": False  # reasoning_content 字段仍存在
+    }
+
+    # 合并推理和答案
+    data = {
+        "separate_reasoning": False  # 推理和答案合并在 content 中
+    }
+
+与工具调用集成
+~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "model": "deepseek-r1",
+        "tools": tools,
+        "tool_choice": "auto",
+        "separate_reasoning": True,
+        "chat_template_kwargs": {"enable_thinking": True}
+    }
+
+    response = requests.post(url, json=data).json()
+    message = response["choices"][0]["message"]
+
+    # 同时获得推理、工具调用和答案
+    print("推理:", message.get("reasoning_content"))
+    print("工具:", message.get("tool_calls"))
+    print("答案:", message.get("content"))
+
+多轮推理对话
+~~~~~~~~~~~~
+
+.. code-block:: python
+
+    messages = [{"role": "user", "content": "什么是质数？"}]
+
+    # 第一轮
+    response1 = requests.post(url, json={
+        "messages": messages,
+        "separate_reasoning": True
+    }).json()
+
+    message1 = response1["choices"][0]["message"]
+    messages.append({
+        "role": "assistant",
+        "content": message1["content"],
+        "reasoning_content": message1.get("reasoning_content")
+    })
+
+    # 第二轮
+    messages.append({"role": "user", "content": "17 是质数吗？"})
+    response2 = requests.post(url, json={
+        "messages": messages,
+        "separate_reasoning": True
+    }).json()
+
+配置参数
+--------
+
+**separate_reasoning** (布尔, 默认 True)
+  是否分离推理内容到 ``reasoning_content`` 字段
+
+**stream_reasoning** (布尔, 默认 False)
+  是否实时流式传输推理内容
+
+**chat_template_kwargs** (对象)
+  - ``enable_thinking``: 启用推理（Qwen3, GLM45）
+  - ``thinking``: 启用推理（DeepSeek-V3）
+
+**--reasoning_parser** (启动参数)
+  指定解析器类型：``deepseek-r1``, ``qwen3``, ``glm45``, ``gpt-oss`` 等
+
+常见问题
+--------
+
+**推理内容未分离**
+  检查 ``--reasoning_parser``, ``separate_reasoning: true``, ``chat_template_kwargs``
+
+**模型不生成推理**
+  确认模型支持推理模式，检查是否启用了推理参数
+
+**流式模式不完整**
+  处理所有 chunks，等待 ``[DONE]`` 信号
+
+**与工具调用冲突**
+  使用最新版本（包含 PR #1158），正确配置参数
+
+性能考虑
+--------
+
+**Token 消耗**: 推理模式可能增加 3-5 倍 token 消耗
+
+**延迟影响**: TTFB 可能从 200ms 增加到 800ms
+
+**优化建议**:
+- 使用 ``stream_reasoning: true`` 降低感知延迟
+- 非关键任务禁用推理模式
+
+技术细节
+--------
+
+**核心文件**:
+- ``lightllm/server/reasoning_parser.py`` - 解析器实现（910行）
+- ``lightllm/server/api_openai.py`` - API 集成
+- ``test/test_api/test_openai_api.py`` - 测试示例（752-794行）
+
+**相关 PR**:
+- PR #1154: 添加推理解析器
+- PR #1158: 推理内容中的函数调用支持
+
+参考资料
+--------
+
+- DeepSeek-R1 技术报告
+- LightLLM GitHub: https://github.com/ModelTC/lightllm
diff --git a/docs/EN/source/index.rst b/docs/EN/source/index.rst
index 9aa65038c..07eaaa42e 100755
--- a/docs/EN/source/index.rst
+++ b/docs/EN/source/index.rst
@@ -52,6 +52,8 @@ Documentation List
    Multimodal Deployment <tutorial/multimodal>
    Reward Model Deployment <tutorial/reward_model>
    OpenAI api Usage <tutorial/openai>
+   Function Calling <tutorial/function_calling>
+   Reasoning Parser <tutorial/reasoning_parser>
    APIServer Parameters <tutorial/api_server_args_zh>
    Lightllm API Introduction <tutorial/api_param>
    
diff --git a/docs/EN/source/tutorial/function_calling.rst b/docs/EN/source/tutorial/function_calling.rst
new file mode 100644
index 000000000..42f3d5b3d
--- /dev/null
+++ b/docs/EN/source/tutorial/function_calling.rst
@@ -0,0 +1,287 @@
+.. _function_calling:
+
+Function Calling
+================
+
+LightLLM supports function calling for multiple mainstream models. Provides OpenAI-compatible API.
+
+Supported Models
+----------------
+
+Qwen2.5/Qwen3
+~~~~~~~~~~~~~
+
+**Parser**: ``qwen25``
+
+**Format**:
+
+.. code-block:: xml
+
+    <tool_call>
+    {"name": "function_name", "arguments": {"param": "value"}}
+    </tool_call>
+
+**Startup**:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/qwen2.5 \
+        --tool_call_parser qwen25 \
+        --tp 1
+
+Llama 3.2
+~~~~~~~~~
+
+**Parser**: ``llama3``
+
+**Format**: ``<|python_tag|>{"name": "func", "arguments": {...}}``
+
+**Startup**:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/llama-3.2 \
+        --tool_call_parser llama3 \
+        --tp 1
+
+Mistral
+~~~~~~~
+
+**Parser**: ``mistral``
+
+**Format**: ``[TOOL_CALLS] [{"name": "func", "arguments": {...}}, ...]``
+
+DeepSeek-V3
+~~~~~~~~~~~
+
+**Parser**: ``deepseekv3``
+
+**Format**:
+
+.. code-block:: xml
+
+    <｜tool▁calls▁begin｜>
+    <｜tool▁call▁begin｜>function<｜tool▁sep｜>func_name
+    ```json
+    {"param": "value"}
+    ```
+    <｜tool▁call▁end｜>
+    <｜tool▁calls▁end｜>
+
+DeepSeek-V3.1
+~~~~~~~~~~~~~
+
+**Parser**: ``deepseekv31``
+
+**Format**: Simplified V3 format, parameters directly inlined without code blocks
+
+Kimi K2
+~~~~~~~
+
+**Parser**: ``kimi_k2``
+
+**Format**:
+
+.. code-block:: xml
+
+    <|tool_calls_section_begin|>
+    <|tool_call_begin|>functions.func_name:0
+    <|tool_call_argument_begin|>{"param": "value"}
+    <|tool_call_end|>
+    <|tool_calls_section_end|>
+
+Basic Usage
+-----------
+
+Define Tools
+~~~~~~~~~~~~
+
+.. code-block:: python
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get weather information for a city",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "City name"
+                        }
+                    },
+                    "required": ["city"]
+                }
+            }
+        }
+    ]
+
+Non-Streaming
+~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    import requests
+    import json
+
+    url = "http://localhost:8088/v1/chat/completions"
+    data = {
+        "model": "model_name",
+        "messages": [
+            {"role": "user", "content": "What's the weather in Beijing?"}
+        ],
+        "tools": tools,
+        "tool_choice": "auto"  # "auto" | "none" | "required"
+    }
+
+    response = requests.post(url, json=data).json()
+    message = response["choices"][0]["message"]
+
+    if message.get("tool_calls"):
+        for tc in message["tool_calls"]:
+            print(f"Tool: {tc['function']['name']}")
+            print(f"Args: {tc['function']['arguments']}")
+
+Streaming
+~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "model": "model_name",
+        "messages": [{"role": "user", "content": "Check weather for Beijing and Shanghai"}],
+        "tools": tools,
+        "stream": True
+    }
+
+    response = requests.post(url, json=data, stream=True)
+    tool_calls = {}
+
+    for line in response.iter_lines():
+        if line and line.startswith(b"data: "):
+            chunk = json.loads(line[6:])
+            delta = chunk["choices"][0]["delta"]
+
+            if delta.get("tool_calls"):
+                for tc in delta["tool_calls"]:
+                    idx = tc.get("index", 0)
+                    if idx not in tool_calls:
+                        tool_calls[idx] = {"function": {"name": "", "arguments": ""}}
+
+                    if tc["function"].get("name"):
+                        tool_calls[idx]["function"]["name"] = tc["function"]["name"]
+                    if tc["function"].get("arguments"):
+                        tool_calls[idx]["function"]["arguments"] += tc["function"]["arguments"]
+
+Multi-Turn Conversation
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    # 1. User question
+    messages = [{"role": "user", "content": "How's the weather in Beijing?"}]
+
+    # 2. Model calls tool
+    response1 = requests.post(url, json={
+        "messages": messages,
+        "tools": tools
+    }).json()
+
+    tool_call = response1["choices"][0]["message"]["tool_calls"][0]
+    messages.append(response1["choices"][0]["message"])
+
+    # 3. Return tool result
+    weather_result = {"temperature": 15, "condition": "sunny"}
+    messages.append({
+        "role": "tool",
+        "tool_call_id": tool_call["id"],
+        "name": tool_call["function"]["name"],
+        "content": json.dumps(weather_result)
+    })
+
+    # 4. Generate final answer
+    response2 = requests.post(url, json={"messages": messages}).json()
+    print(response2["choices"][0]["message"]["content"])
+
+Advanced Features
+-----------------
+
+Parallel Tool Calls
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "messages": messages,
+        "tools": tools,
+        "parallel_tool_calls": True  # Enable parallel calls
+    }
+
+Force Specific Tool
+~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "tools": tools,
+        "tool_choice": {
+            "type": "function",
+            "function": {"name": "get_weather"}
+        }
+    }
+
+Integration with Reasoning Models
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "model": "deepseek-r1",
+        "tools": tools,
+        "chat_template_kwargs": {"enable_thinking": True},
+        "separate_reasoning": True  # Separate reasoning content
+    }
+
+    response = requests.post(url, json=data).json()
+    message = response["choices"][0]["message"]
+
+    print("Reasoning:", message.get("reasoning_content"))
+    print("Tool calls:", message.get("tool_calls"))
+
+Common Issues
+-------------
+
+**Tool calls not triggered**
+  Check ``--tool_call_parser`` parameter and tool descriptions
+
+**Parameter parsing errors**
+  Confirm correct parser is used, check model output format
+
+**Incomplete streaming**
+  Process all chunks correctly, use ``index`` field to assemble multiple calls
+
+**Integration with reasoning models fails**
+  Use latest version, configure ``separate_reasoning`` and ``chat_template_kwargs``
+
+Technical Details
+-----------------
+
+**Core Files**:
+- ``lightllm/server/function_call_parser.py`` - Parser implementation (1267 lines)
+- ``lightllm/server/api_openai.py`` - API integration
+- ``lightllm/server/build_prompt.py`` - Tool injection
+- ``test/test_api/test_openai_api.py`` - Test examples
+
+**Related PRs**:
+- PR #1158: Function call in reasoning content support
+
+References
+----------
+
+- OpenAI Function Calling: https://platform.openai.com/docs/guides/function-calling
+- JSON Schema: https://json-schema.org/
+- LightLLM GitHub: https://github.com/ModelTC/lightllm
diff --git a/docs/EN/source/tutorial/reasoning_parser.rst b/docs/EN/source/tutorial/reasoning_parser.rst
new file mode 100644
index 000000000..12682f83b
--- /dev/null
+++ b/docs/EN/source/tutorial/reasoning_parser.rst
@@ -0,0 +1,342 @@
+.. _reasoning_parser:
+
+Reasoning Parser
+================
+
+LightLLM supports parsing reasoning model's thinking process, separating internal reasoning from final answers to improve AI system transparency.
+
+Supported Models
+----------------
+
+DeepSeek-R1
+~~~~~~~~~~~
+
+**Parser**: ``deepseek-r1``
+
+**Format**:
+
+.. code-block:: text
+
+    <think>
+    Reasoning process...
+    </think>
+    Final answer
+
+**Features**: Forced reasoning mode, some variants may omit ``<think>`` opening tag
+
+**Startup**:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/DeepSeek-R1 \
+        --reasoning_parser deepseek-r1 \
+        --tp 8 \
+        --enable_fa3
+
+DeepSeek-V3
+~~~~~~~~~~~
+
+**Parser**: ``deepseek-v3``
+
+**Format**: Same as Qwen3
+
+**Startup**:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/DeepSeek-V3 \
+        --reasoning_parser deepseek-v3 \
+        --tp 8
+
+**Request Config**:
+
+.. code-block:: python
+
+    data = {
+        "chat_template_kwargs": {"thinking": True}  # Enable reasoning
+    }
+
+Qwen3
+~~~~~
+
+**Parser**: ``qwen3``
+
+**Format**: ``<think>Reasoning content</think>Answer``
+
+**Features**: Optional reasoning mode, supports dynamic switching
+
+.. code-block:: python
+
+    # Enable reasoning
+    data = {"chat_template_kwargs": {"enable_thinking": True}}
+
+GLM-4.5
+~~~~~~~
+
+**Parser**: ``glm45``
+
+**Format**: Same as Qwen3
+
+Kimi
+~~~~
+
+**Kimi Thinking**: ``kimi`` - Uses ``◁think▷`` and ``◁/think▷`` tokens
+
+**Kimi K2**: ``kimi_k2`` - Uses DeepSeek-R1 format
+
+GPT-OSS
+~~~~~~~
+
+**Parser**: ``gpt-oss``
+
+**Format**:
+
+.. code-block:: xml
+
+    <|start|><|channel|>analysis<|message|>
+    Reasoning analysis...
+    <|end|>
+    <|channel|>final<|message|>
+    Final answer
+    <|return|>
+
+**Features**: Complex state machine parsing, supports multiple channels (analysis, commentary, final)
+
+Other Models
+~~~~~~~~~~~~
+
+- **MiniMax**: ``minimax``, ``minimax-append-think``
+- **Step3**: ``step3``
+- **NanoV3**: ``nano_v3``
+- **InternS1**: ``interns1``
+
+Basic Usage
+-----------
+
+Non-Streaming
+~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    import requests
+    import json
+
+    url = "http://localhost:8088/v1/chat/completions"
+    data = {
+        "model": "deepseek-r1",
+        "messages": [
+            {"role": "user", "content": "How many 'r's in 'strawberry'?"}
+        ],
+        "max_tokens": 2000,
+        "separate_reasoning": True,  # Separate reasoning content
+        "chat_template_kwargs": {"enable_thinking": True}
+    }
+
+    response = requests.post(url, json=data).json()
+    message = response["choices"][0]["message"]
+
+    print("Reasoning:", message.get("reasoning_content"))
+    print("Answer:", message.get("content"))
+
+Streaming
+~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "model": "deepseek-r1",
+        "messages": [{"role": "user", "content": "Explain quantum entanglement"}],
+        "stream": True,
+        "separate_reasoning": True,
+        "stream_reasoning": True  # Stream reasoning content in real-time
+    }
+
+    response = requests.post(url, json=data, stream=True)
+
+    for line in response.iter_lines():
+        if line and line.startswith(b"data: "):
+            data_str = line[6:].decode('utf-8')
+            if data_str == '[DONE]':
+                break
+
+            chunk = json.loads(data_str)
+            delta = chunk["choices"][0]["delta"]
+
+            # Reasoning content
+            if "reasoning_content" in delta:
+                print(delta["reasoning_content"], end="", flush=True)
+
+            # Answer content
+            if "content" in delta:
+                print(delta["content"], end="", flush=True)
+
+Response Format
+---------------
+
+**Non-Streaming**:
+
+.. code-block:: json
+
+    {
+        "choices": [{
+            "message": {
+                "content": "Final answer",
+                "reasoning_content": "Reasoning process"
+            }
+        }]
+    }
+
+**Streaming**:
+
+.. code-block:: json
+
+    // Reasoning chunk
+    {"choices": [{"delta": {"reasoning_content": "Reasoning fragment"}}]}
+
+    // Answer chunk
+    {"choices": [{"delta": {"content": "Answer fragment"}}]}
+
+Advanced Features
+-----------------
+
+Dynamic Reasoning Mode
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    # Enable reasoning
+    data = {
+        "chat_template_kwargs": {"enable_thinking": True},
+        "separate_reasoning": True
+    }
+
+    # Disable reasoning
+    data = {
+        "chat_template_kwargs": {"enable_thinking": False}
+    }
+
+Control Reasoning Display
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    # Hide reasoning streaming
+    data = {
+        "separate_reasoning": True,
+        "stream_reasoning": False  # reasoning_content field still exists
+    }
+
+    # Merge reasoning and answer
+    data = {
+        "separate_reasoning": False  # Merged in content field
+    }
+
+Integration with Tool Calling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    data = {
+        "model": "deepseek-r1",
+        "tools": tools,
+        "tool_choice": "auto",
+        "separate_reasoning": True,
+        "chat_template_kwargs": {"enable_thinking": True}
+    }
+
+    response = requests.post(url, json=data).json()
+    message = response["choices"][0]["message"]
+
+    # Get reasoning, tool calls, and answer simultaneously
+    print("Reasoning:", message.get("reasoning_content"))
+    print("Tools:", message.get("tool_calls"))
+    print("Answer:", message.get("content"))
+
+Multi-Turn Reasoning
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    messages = [{"role": "user", "content": "What is a prime number?"}]
+
+    # First turn
+    response1 = requests.post(url, json={
+        "messages": messages,
+        "separate_reasoning": True
+    }).json()
+
+    message1 = response1["choices"][0]["message"]
+    messages.append({
+        "role": "assistant",
+        "content": message1["content"],
+        "reasoning_content": message1.get("reasoning_content")
+    })
+
+    # Second turn
+    messages.append({"role": "user", "content": "Is 17 a prime number?"})
+    response2 = requests.post(url, json={
+        "messages": messages,
+        "separate_reasoning": True
+    }).json()
+
+Configuration
+-------------
+
+**separate_reasoning** (bool, default: True)
+  Whether to separate reasoning content into ``reasoning_content`` field
+
+**stream_reasoning** (bool, default: False)
+  Whether to stream reasoning content in real-time
+
+**chat_template_kwargs** (object)
+  - ``enable_thinking``: Enable reasoning (Qwen3, GLM45)
+  - ``thinking``: Enable reasoning (DeepSeek-V3)
+
+**--reasoning_parser** (startup parameter)
+  Specify parser type: ``deepseek-r1``, ``qwen3``, ``glm45``, ``gpt-oss``, etc.
+
+Common Issues
+-------------
+
+**Reasoning content not separated**
+  Check ``--reasoning_parser``, ``separate_reasoning: true``, ``chat_template_kwargs``
+
+**Model not generating reasoning**
+  Confirm model supports reasoning mode, check if reasoning parameters are enabled
+
+**Incomplete streaming**
+  Process all chunks, wait for ``[DONE]`` signal
+
+**Conflict with tool calling**
+  Use latest version (includes PR #1158), configure parameters correctly
+
+Performance
+-----------
+
+**Token Consumption**: Reasoning mode may increase token usage by 3-5x
+
+**Latency Impact**: TTFB may increase from 200ms to 800ms
+
+**Optimization**:
+- Use ``stream_reasoning: true`` to reduce perceived latency
+- Disable reasoning mode for non-critical tasks
+
+Technical Details
+-----------------
+
+**Core Files**:
+- ``lightllm/server/reasoning_parser.py`` - Parser implementation (910 lines)
+- ``lightllm/server/api_openai.py`` - API integration
+- ``test/test_api/test_openai_api.py`` - Test examples (lines 752-794)
+
+**Related PRs**:
+- PR #1154: Add reasoning parser
+- PR #1158: Function call in reasoning content support
+
+References
+----------
+
+- DeepSeek-R1 Technical Report
+- LightLLM GitHub: https://github.com/ModelTC/lightllm

From c53c52a6d4f792902fa5118d489427409a8b89ac Mon Sep 17 00:00:00 2001
From: sufubao <sufubao@sensetime.com>
Date: Sun, 4 Jan 2026 03:52:54 +0000
Subject: [PATCH 2/6] clean

---
 docs/CN/source/tutorial/function_calling.rst | 15 -------------
 docs/CN/source/tutorial/reasoning_parser.rst | 22 --------------------
 docs/EN/source/tutorial/function_calling.rst | 15 -------------
 docs/EN/source/tutorial/reasoning_parser.rst | 22 --------------------
 4 files changed, 74 deletions(-)

diff --git a/docs/CN/source/tutorial/function_calling.rst b/docs/CN/source/tutorial/function_calling.rst
index 728b95d86..eea2855f3 100644
--- a/docs/CN/source/tutorial/function_calling.rst
+++ b/docs/CN/source/tutorial/function_calling.rst
@@ -77,21 +77,6 @@ DeepSeek-V3.1
 
 **格式**: 简化的 V3 格式，参数直接内联，无代码块包围
 
-Kimi K2
-~~~~~~~
-
-**解析器**: ``kimi_k2``
-
-**格式**:
-
-.. code-block:: xml
-
-    <|tool_calls_section_begin|>
-    <|tool_call_begin|>functions.func_name:0
-    <|tool_call_argument_begin|>{"param": "value"}
-    <|tool_call_end|>
-    <|tool_calls_section_end|>
-
 基本使用
 --------
 
diff --git a/docs/CN/source/tutorial/reasoning_parser.rst b/docs/CN/source/tutorial/reasoning_parser.rst
index 7176dc697..f80e06b90 100644
--- a/docs/CN/source/tutorial/reasoning_parser.rst
+++ b/docs/CN/source/tutorial/reasoning_parser.rst
@@ -72,20 +72,6 @@ Qwen3
     # 启用推理
     data = {"chat_template_kwargs": {"enable_thinking": True}}
 
-GLM-4.5
-~~~~~~~
-
-**解析器**: ``glm45``
-
-**格式**: 同 Qwen3
-
-Kimi
-~~~~
-
-**Kimi Thinking**: ``kimi`` - 使用 ``◁think▷`` 和 ``◁/think▷`` 标记
-
-**Kimi K2**: ``kimi_k2`` - 使用 DeepSeek-R1 格式
-
 GPT-OSS
 ~~~~~~~
 
@@ -104,14 +90,6 @@ GPT-OSS
 
 **特点**: 复杂状态机解析，支持多通道（analysis, commentary, final）
 
-其他模型
-~~~~~~~~
-
-- **MiniMax**: ``minimax``, ``minimax-append-think``
-- **Step3**: ``step3``
-- **NanoV3**: ``nano_v3``
-- **InternS1**: ``interns1``
-
 基本使用
 --------
 
diff --git a/docs/EN/source/tutorial/function_calling.rst b/docs/EN/source/tutorial/function_calling.rst
index 42f3d5b3d..d7f798140 100644
--- a/docs/EN/source/tutorial/function_calling.rst
+++ b/docs/EN/source/tutorial/function_calling.rst
@@ -77,21 +77,6 @@ DeepSeek-V3.1
 
 **Format**: Simplified V3 format, parameters directly inlined without code blocks
 
-Kimi K2
-~~~~~~~
-
-**Parser**: ``kimi_k2``
-
-**Format**:
-
-.. code-block:: xml
-
-    <|tool_calls_section_begin|>
-    <|tool_call_begin|>functions.func_name:0
-    <|tool_call_argument_begin|>{"param": "value"}
-    <|tool_call_end|>
-    <|tool_calls_section_end|>
-
 Basic Usage
 -----------
 
diff --git a/docs/EN/source/tutorial/reasoning_parser.rst b/docs/EN/source/tutorial/reasoning_parser.rst
index 12682f83b..539136633 100644
--- a/docs/EN/source/tutorial/reasoning_parser.rst
+++ b/docs/EN/source/tutorial/reasoning_parser.rst
@@ -72,20 +72,6 @@ Qwen3
     # Enable reasoning
     data = {"chat_template_kwargs": {"enable_thinking": True}}
 
-GLM-4.5
-~~~~~~~
-
-**Parser**: ``glm45``
-
-**Format**: Same as Qwen3
-
-Kimi
-~~~~
-
-**Kimi Thinking**: ``kimi`` - Uses ``◁think▷`` and ``◁/think▷`` tokens
-
-**Kimi K2**: ``kimi_k2`` - Uses DeepSeek-R1 format
-
 GPT-OSS
 ~~~~~~~
 
@@ -104,14 +90,6 @@ GPT-OSS
 
 **Features**: Complex state machine parsing, supports multiple channels (analysis, commentary, final)
 
-Other Models
-~~~~~~~~~~~~
-
-- **MiniMax**: ``minimax``, ``minimax-append-think``
-- **Step3**: ``step3``
-- **NanoV3**: ``nano_v3``
-- **InternS1**: ``interns1``
-
 Basic Usage
 -----------
 

From fb9f071ca237839324818b3c257f5d613e222b33 Mon Sep 17 00:00:00 2001
From: shihaobai <42648726+shihaobai@users.noreply.github.com>
Date: Sun, 4 Jan 2026 15:25:50 +0800
Subject: [PATCH 3/6] Update docs/CN/source/tutorial/function_calling.rst

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 docs/CN/source/tutorial/function_calling.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/CN/source/tutorial/function_calling.rst b/docs/CN/source/tutorial/function_calling.rst
index eea2855f3..afa588572 100644
--- a/docs/CN/source/tutorial/function_calling.rst
+++ b/docs/CN/source/tutorial/function_calling.rst
@@ -256,7 +256,7 @@ DeepSeek-V3.1
 --------
 
 **核心文件**:
-- ``lightllm/server/function_call_parser.py`` - 解析器实现（1267行）
+- ``lightllm/server/function_call_parser.py`` - 解析器实现
 - ``lightllm/server/api_openai.py`` - API 集成
 - ``lightllm/server/build_prompt.py`` - 工具注入
 - ``test/test_api/test_openai_api.py`` - 测试示例

From 45b6f1c1c61256c9e8d2b3d75df14d23639d8991 Mon Sep 17 00:00:00 2001
From: shihaobai <42648726+shihaobai@users.noreply.github.com>
Date: Sun, 4 Jan 2026 15:26:59 +0800
Subject: [PATCH 4/6] Update docs/CN/source/tutorial/reasoning_parser.rst

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 docs/CN/source/tutorial/reasoning_parser.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/CN/source/tutorial/reasoning_parser.rst b/docs/CN/source/tutorial/reasoning_parser.rst
index f80e06b90..547eb05d1 100644
--- a/docs/CN/source/tutorial/reasoning_parser.rst
+++ b/docs/CN/source/tutorial/reasoning_parser.rst
@@ -305,9 +305,9 @@ GPT-OSS
 --------
 
 **核心文件**:
-- ``lightllm/server/reasoning_parser.py`` - 解析器实现（910行）
+- ``lightllm/server/reasoning_parser.py`` - 解析器实现
 - ``lightllm/server/api_openai.py`` - API 集成
-- ``test/test_api/test_openai_api.py`` - 测试示例（752-794行）
+- ``test/test_api/test_openai_api.py`` - 测试示例
 
 **相关 PR**:
 - PR #1154: 添加推理解析器

From 4e4d6cbd60a0d432a15f2315d018b9064b47b705 Mon Sep 17 00:00:00 2001
From: shihaobai <42648726+shihaobai@users.noreply.github.com>
Date: Sun, 4 Jan 2026 15:27:06 +0800
Subject: [PATCH 5/6] Update docs/EN/source/tutorial/function_calling.rst

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 docs/EN/source/tutorial/function_calling.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/EN/source/tutorial/function_calling.rst b/docs/EN/source/tutorial/function_calling.rst
index d7f798140..d038eb902 100644
--- a/docs/EN/source/tutorial/function_calling.rst
+++ b/docs/EN/source/tutorial/function_calling.rst
@@ -256,7 +256,7 @@ Technical Details
 -----------------
 
 **Core Files**:
-- ``lightllm/server/function_call_parser.py`` - Parser implementation (1267 lines)
+- ``lightllm/server/function_call_parser.py`` - Parser implementation
 - ``lightllm/server/api_openai.py`` - API integration
 - ``lightllm/server/build_prompt.py`` - Tool injection
 - ``test/test_api/test_openai_api.py`` - Test examples

From d530914b6ad745474ebf44c7e4ed985584b72d9a Mon Sep 17 00:00:00 2001
From: shihaobai <42648726+shihaobai@users.noreply.github.com>
Date: Sun, 4 Jan 2026 15:27:15 +0800
Subject: [PATCH 6/6] Update docs/EN/source/tutorial/reasoning_parser.rst

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 docs/EN/source/tutorial/reasoning_parser.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/EN/source/tutorial/reasoning_parser.rst b/docs/EN/source/tutorial/reasoning_parser.rst
index 539136633..e76e093d6 100644
--- a/docs/EN/source/tutorial/reasoning_parser.rst
+++ b/docs/EN/source/tutorial/reasoning_parser.rst
@@ -305,9 +305,9 @@ Technical Details
 -----------------
 
 **Core Files**:
-- ``lightllm/server/reasoning_parser.py`` - Parser implementation (910 lines)
+- ``lightllm/server/reasoning_parser.py`` - Parser implementation
 - ``lightllm/server/api_openai.py`` - API integration
-- ``test/test_api/test_openai_api.py`` - Test examples (lines 752-794)
+- ``test/test_api/test_openai_api.py`` - Test examples
 
 **Related PRs**:
 - PR #1154: Add reasoning parser