From 58edd915750fac8634ef8e8a0598b565470d74bb Mon Sep 17 00:00:00 2001
From: dansc <dansc@yandex.ru>
Date: Mon, 29 Jun 2026 22:01:33 +0300
Subject: [PATCH] fix(server): honest finish_reason + accurate prompt token
 usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- finish_reason: пробрасываем реальный finish из DeepSeek. Раньше всегда отдавали
  'stop', даже когда ответ обрезан по длине — клиенты, которые до-запрашивают по
  'length' (agent-лупы, Codex-style), молча получали усечённый ответ. Теперь
  усечение помечается finish_reason='length'.
- usage.prompt_tokens: считаем по исходным сообщениям клиента, а не по
  раздутому fullPrompt (system + инжект tools + история). Раньше любой, кто
  считает токены/биллинг по usage, получал цифры, не совпадающие с его запросом.

Эвристика оценки токенов (длина/4) не меняется — правится только ИСТОЧНИК текста.
---
 server.js | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/server.js b/server.js
index 442feef..3836007 100755
--- a/server.js
+++ b/server.js
@@ -722,7 +722,7 @@ function buildToolCallResponse(toolCall, model = 'deepseek-default', prompt = ''
     };
 }
 
-function buildTextResponse(content, prompt, model = 'deepseek-default', reasoningContent = '') {
+function buildTextResponse(content, prompt, model = 'deepseek-default', reasoningContent = '', finishReason = null) {
     const message = { role: 'assistant', content };
     if (reasoningContent) message.reasoning_content = reasoningContent;
     return {
@@ -733,7 +733,9 @@ function buildTextResponse(content, prompt, model = 'deepseek-default', reasonin
         choices: [{
             index: 0,
             message,
-            finish_reason: 'stop'
+            // Surface truncation: a 'length' finish lets length-aware clients re-request
+            // instead of silently treating a cut-off answer as a clean stop.
+            finish_reason: finishReason === 'length' ? 'length' : 'stop'
         }],
         usage: buildUsage(prompt, content, reasoningContent),
         watermark: FORGETMEAI_WATERMARK
@@ -1266,6 +1268,10 @@ const server = http.createServer(async (req, res) => {
             }
 
             const { prompt, systemPrompt } = formatMessages(messages, tools);
+            // For usage accounting, count the CLIENT's original input — not the
+            // proxy-expanded fullPrompt (system + injected tools + history) — so
+            // prompt_tokens reflects what the caller actually sent.
+            const clientPromptText = messages.map(m => normalizeMessageContent(m.content)).join('\n');
 
             const session = getOrCreateAgentSession(agentId);
 
@@ -1502,8 +1508,8 @@ const server = http.createServer(async (req, res) => {
             storeHistory(agentId, prompt, fullContent, toolCall);
 
             const openaiResponse = toolCall
-                ? buildToolCallResponse(toolCall, requestedModel, fullPrompt, reasoningContent)
-                : buildTextResponse(fullContent, fullPrompt, requestedModel, reasoningContent);
+                ? buildToolCallResponse(toolCall, requestedModel, clientPromptText, reasoningContent)
+                : buildTextResponse(fullContent, clientPromptText, requestedModel, reasoningContent, finishReason);
 
             if (stream) {
                 if (apiMode === 'anthropic') {