From 1f459d8d26a59d9be05e8ef9ba5079015375ed76 Mon Sep 17 00:00:00 2001
From: claude_code <claude_code@vvzvlad.xyz>
Date: Thu, 25 Jun 2026 23:51:41 +0300
Subject: [PATCH 1/2] feat(ai-chat): load full transcript for model history
 (drop 50-msg window)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The per-turn model conversation was rebuilt via findRecent(chatId, ws, 50),
a sliding window that dropped the beginning of any chat longer than ~50 stored
rows. Switch streamChat to the existing findAllByChat, which loads the full
non-deleted transcript chronologically with a 5000-row memory-safety backstop
(keeps the newest rows + logs a warning on overflow) — a safety net, not a
conversational limit. Remove the now-unused findRecent method and update the
comments/log text that referenced it (findAllByChat now feeds both the Markdown
export and the model history).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../src/core/ai-chat/ai-chat.service.spec.ts  |  2 +-
 .../src/core/ai-chat/ai-chat.service.ts       | 12 +++--
 .../repos/ai-chat/ai-chat-message.repo.ts     | 51 ++++++-------------
 3 files changed, 23 insertions(+), 42 deletions(-)

diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
index a52aaf5b..0a957175 100644
--- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
@@ -240,7 +240,7 @@ describe('prepareAgentStep', () => {
  * write path. It runs identically for the upfront insert (empty steps,
  * 'streaming'), every per-step update, and the terminal finalize — so a future
  * background worker can call the same function. These tests pin the four status
- * shapes and the `metadata.parts` shape that rowToUiMessage/findRecent depend on
+ * shapes and the `metadata.parts` shape that rowToUiMessage/findAllByChat depend on
  * (per-step text + tool parts via assistantParts, in-progress text appended).
  */
 describe('flushAssistant', () => {
diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts
index 492ce9f6..85e9ad1b 100644
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -322,12 +322,14 @@ export class AiChatService implements OnModuleInit {
 
     // Rebuild the conversation from persisted history (not the client payload),
     // so the model always sees the authoritative server-side transcript. Load
-    // the most RECENT tail (oldest -> newest) so chats longer than one page do
-    // not drop recent turns (incl. the user message just inserted above).
-    const history = await this.aiChatMessageRepo.findRecent(
+    // the FULL history in chronological order (oldest -> newest, incl. the user
+    // message just inserted above) so NO turns are dropped — there is no
+    // recent-tail window anymore. `findAllByChat` keeps a 5000-row memory-safety
+    // backstop (on overflow it keeps the NEWEST rows and logs a warning); that
+    // is a safety net far above any realistic chat, not a conversational limit.
+    const history = await this.aiChatMessageRepo.findAllByChat(
       chatId,
       workspace.id,
-      50,
     );
     const uiMessages = history.map(rowToUiMessage);
     // convertToModelMessages is async in ai@6.0.134 (returns Promise<ModelMessage[]>).
@@ -1215,7 +1217,7 @@ export async function applyFinalize(
  *
  * `metadata.parts` is built by assistantParts over the finished steps, then the
  * in-progress text appended as a trailing text part, so rowToUiMessage /
- * findRecent keep replaying the turn unchanged. `metadata.finishReason`,
+ * findAllByChat keep replaying the turn unchanged. `metadata.finishReason`,
  * `metadata.error`, `metadata.usage`, `metadata.contextTokens` and
  * `metadata.maxContextTokens` are attached only when provided/relevant, matching
  * the pre-#183 onFinish/onError records.
diff --git a/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts b/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts
index fc283792..c9352e31 100644
--- a/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts
+++ b/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts
@@ -18,7 +18,8 @@ import { executeWithCursorPagination } from '@docmost/db/pagination/cursor-pagin
 // (multi-instance deploy).
 const SWEEP_STREAMING_STALE_MS = 10 * 60 * 1000; // 10 minutes
 
-// Hard upper bound on the rows materialized by `findAllByChat` (export path).
+// Hard upper bound on the rows materialized by `findAllByChat`, which now feeds
+// BOTH the Markdown export and the per-turn model history.
 // A generous cap so a pathologically huge chat cannot load an unbounded result
 // into memory; far above any realistic transcript length.
 const FIND_ALL_BY_CHAT_LIMIT = 5000;
@@ -78,14 +79,17 @@ export class AiChatMessageRepo {
   }
 
   // Load ALL (non-deleted) messages of a chat in ascending chronological order
-  // (oldest -> newest), unpaginated. Used by the server-side Markdown export
-  // (#183), where the DB is the single source of truth and the whole transcript
-  // must be rendered in one pass (findByChat is cursor-paginated and would only
-  // return the first page).
+  // (oldest -> newest), unpaginated. Two callers, both treating the DB as the
+  // single source of truth and needing the whole transcript in one pass
+  // (findByChat is cursor-paginated and would only return the first page):
+  //   - the server-side Markdown export (#183);
+  //   - the per-turn model history, rebuilt fresh on every turn so the model
+  //     sees the full authoritative transcript.
   //
   // Hard-capped at FIND_ALL_BY_CHAT_LIMIT rows (a generous bound, far above any
-  // realistic transcript) so exporting a pathologically huge chat cannot
-  // materialize an unbounded result set in memory.
+  // realistic transcript) — a shared memory-safety backstop for BOTH paths so a
+  // pathologically huge chat cannot materialize an unbounded result set in
+  // memory. On overflow the NEWEST rows are kept and a warning is logged.
   async findAllByChat(
     chatId: string,
     workspaceId: string,
@@ -93,9 +97,9 @@ export class AiChatMessageRepo {
     limit: number = FIND_ALL_BY_CHAT_LIMIT,
   ): Promise<AiChatMessage[]> {
     // Fetch newest-first (+1 to DETECT truncation), so on overflow we keep the
-    // NEWEST `limit` messages — the recent conversation matters most for an
-    // export — rather than silently dropping the tail (#183 review). Reverse back
-    // to chronological for rendering, like findRecent.
+    // NEWEST `limit` messages — the recent conversation matters most — rather
+    // than silently dropping the tail (#183 review). Then reverse back to
+    // chronological order (oldest -> newest) for rendering / model replay.
     const rows = await this.db
       .selectFrom('aiChatMessages')
       .select(this.baseFields)
@@ -110,38 +114,13 @@ export class AiChatMessageRepo {
     if (rows.length > limit) {
       rows.length = limit; // keep the newest `limit` (rows are newest-first here)
       this.logger.warn(
-        `Chat ${chatId} export truncated to the newest ${limit} messages ` +
+        `Chat ${chatId} truncated to the newest ${limit} messages ` +
           `(older messages omitted).`,
       );
     }
     return rows.reverse();
   }
 
-  // Load the most RECENT `limit` messages for a chat and return them in
-  // ascending chronological order (oldest -> newest), as the model expects.
-  // `findByChat` returns the FIRST page ASC (the OLDEST messages), which loses
-  // recent turns once a chat grows beyond a page; this rebuilds the model
-  // history from the tail instead. Plain query (no cursor pagination).
-  async findRecent(
-    chatId: string,
-    workspaceId: string,
-    limit: number,
-  ): Promise<AiChatMessage[]> {
-    const rows = await this.db
-      .selectFrom('aiChatMessages')
-      .select(this.baseFields)
-      .where('chatId', '=', chatId)
-      .where('workspaceId', '=', workspaceId)
-      .where('deletedAt', 'is', null)
-      .orderBy('createdAt', 'desc')
-      .orderBy('id', 'desc')
-      .limit(limit)
-      .execute();
-
-    // Selected newest-first for the limit; reverse to oldest-first for the model.
-    return rows.reverse();
-  }
-
   async insert(
     insertable: InsertableAiChatMessage,
     trx?: KyselyTransaction,
-- 
2.49.1


From e99c00a9eeeb379ba6c07cd12f632f8d53ba3638 Mon Sep 17 00:00:00 2001
From: claude code agent 227 <claude_code@vvzvlad.xyz>
Date: Fri, 26 Jun 2026 17:19:14 +0300
Subject: [PATCH 2/2] test(review): pin full-transcript history past 50 rows +
 changelog (PR #202)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address the PR #202 review (approve-with-comments). The only actionable
non-blocking item was the test-coverage suggestion: the source switch in
AiChatService.handle from findRecent(chatId, ws, 50) to findAllByChat(chatId,
ws) was not pinned by a test. handle() is a streaming method the project marks
as not unit-testable, so cover the behavioral guarantee it now relies on at the
repo/integration level — seed a chat of 60 messages and assert the default
findAllByChat (exactly how handle calls it) returns the FULL transcript in
chronological order, including the first turn the old 50-window would have
dropped.

Also document the behavior change under CHANGELOG [Unreleased] -> Changed.

The two stability items (token-budget trim before streamText; O(N) history
rebuild per turn) are deferred: the reviewer flagged both as non-blocking
conscious trade-offs aligned with the PR's stated goal, and the trim is a
larger architecture change out of scope for this follow-up.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                                  |  9 ++++++
 .../ai-chat-message-status.int-spec.ts        | 32 +++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 77fe9718..b8e28530 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -75,6 +75,15 @@ per-workspace rolling-day token budget.
 
 ### Changed
 
+- **AI chat now feeds the model the full stored transcript.** The per-turn model
+  conversation was rebuilt from a sliding window of the 50 most recent stored
+  rows, which silently dropped the beginning of any longer chat. It is now
+  rebuilt from the complete non-deleted transcript in chronological order, so
+  the model sees every turn (a 5000-row backstop guards process memory — a
+  safety net far above any realistic chat, not a conversational limit). On a
+  very long chat this can eventually reach the model's context window; the
+  client already surfaces that as "start a new chat". (#202)
+
 - **AI chat default provider is now `openai-compatible` (reasoning surfaced).**
   For the `openai` driver the chat provider defaults to the openai-compatible
   implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the
diff --git a/apps/server/test/integration/ai-chat-message-status.int-spec.ts b/apps/server/test/integration/ai-chat-message-status.int-spec.ts
index 5e7eba1b..b73a815d 100644
--- a/apps/server/test/integration/ai-chat-message-status.int-spec.ts
+++ b/apps/server/test/integration/ai-chat-message-status.int-spec.ts
@@ -267,4 +267,36 @@ describe('AiChatMessageRepo.update + sweepStreaming [integration]', () => {
     const all = await repo.findAllByChat(cappedChat, workspaceId, 100);
     expect(all.map((r) => r.content)).toEqual(['m1-oldest', 'm2', 'm3-newest']);
   });
+
+  it('default findAllByChat returns the FULL transcript past 50 rows — no recent-tail window (#202)', async () => {
+    // PR #202 swapped the model-history rebuild in AiChatService.handle from
+    // findRecent(chatId, ws, 50) to findAllByChat(chatId, ws) WITHOUT a limit
+    // arg. This pins the behavioral guarantee that switch relies on: a chat
+    // longer than the old 50-msg window comes back in FULL (oldest -> newest),
+    // so no early turns are silently dropped from what the model sees. The old
+    // 50-cap would have returned only the last 50 of these 60 rows.
+    const longChat = (
+      await createChat(db, { workspaceId, creatorId: userId })
+    ).id;
+    const base = Date.now();
+    const total = 60;
+    for (let i = 0; i < total; i++) {
+      await createMessage(db, {
+        workspaceId,
+        chatId: longChat,
+        content: `msg-${i}`,
+        // Strictly increasing timestamps so ordering is deterministic.
+        createdAt: new Date(base + i * 1000),
+      });
+    }
+
+    // Default args == exactly how handle() calls it now.
+    const history = await repo.findAllByChat(longChat, workspaceId);
+    expect(history).toHaveLength(total);
+    expect(history.map((r) => r.content)).toEqual(
+      Array.from({ length: total }, (_, i) => `msg-${i}`),
+    );
+    // The very first turn (which the old 50-window would have dropped) is present.
+    expect(history[0]!.content).toBe('msg-0');
+  });
 });
-- 
2.49.1