From 1f459d8d26a59d9be05e8ef9ba5079015375ed76 Mon Sep 17 00:00:00 2001 From: claude_code Date: Thu, 25 Jun 2026 23:51:41 +0300 Subject: [PATCH 1/2] feat(ai-chat): load full transcript for model history (drop 50-msg window) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-turn model conversation was rebuilt via findRecent(chatId, ws, 50), a sliding window that dropped the beginning of any chat longer than ~50 stored rows. Switch streamChat to the existing findAllByChat, which loads the full non-deleted transcript chronologically with a 5000-row memory-safety backstop (keeps the newest rows + logs a warning on overflow) — a safety net, not a conversational limit. Remove the now-unused findRecent method and update the comments/log text that referenced it (findAllByChat now feeds both the Markdown export and the model history). Co-Authored-By: Claude Opus 4.8 --- .../src/core/ai-chat/ai-chat.service.spec.ts | 2 +- .../src/core/ai-chat/ai-chat.service.ts | 12 +++-- .../repos/ai-chat/ai-chat-message.repo.ts | 51 ++++++------------- 3 files changed, 23 insertions(+), 42 deletions(-) diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts index a52aaf5b..0a957175 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts @@ -240,7 +240,7 @@ describe('prepareAgentStep', () => { * write path. It runs identically for the upfront insert (empty steps, * 'streaming'), every per-step update, and the terminal finalize — so a future * background worker can call the same function. These tests pin the four status - * shapes and the `metadata.parts` shape that rowToUiMessage/findRecent depend on + * shapes and the `metadata.parts` shape that rowToUiMessage/findAllByChat depend on * (per-step text + tool parts via assistantParts, in-progress text appended). */ describe('flushAssistant', () => { diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 492ce9f6..85e9ad1b 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -322,12 +322,14 @@ export class AiChatService implements OnModuleInit { // Rebuild the conversation from persisted history (not the client payload), // so the model always sees the authoritative server-side transcript. Load - // the most RECENT tail (oldest -> newest) so chats longer than one page do - // not drop recent turns (incl. the user message just inserted above). - const history = await this.aiChatMessageRepo.findRecent( + // the FULL history in chronological order (oldest -> newest, incl. the user + // message just inserted above) so NO turns are dropped — there is no + // recent-tail window anymore. `findAllByChat` keeps a 5000-row memory-safety + // backstop (on overflow it keeps the NEWEST rows and logs a warning); that + // is a safety net far above any realistic chat, not a conversational limit. + const history = await this.aiChatMessageRepo.findAllByChat( chatId, workspace.id, - 50, ); const uiMessages = history.map(rowToUiMessage); // convertToModelMessages is async in ai@6.0.134 (returns Promise). @@ -1215,7 +1217,7 @@ export async function applyFinalize( * * `metadata.parts` is built by assistantParts over the finished steps, then the * in-progress text appended as a trailing text part, so rowToUiMessage / - * findRecent keep replaying the turn unchanged. `metadata.finishReason`, + * findAllByChat keep replaying the turn unchanged. `metadata.finishReason`, * `metadata.error`, `metadata.usage`, `metadata.contextTokens` and * `metadata.maxContextTokens` are attached only when provided/relevant, matching * the pre-#183 onFinish/onError records. diff --git a/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts b/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts index fc283792..c9352e31 100644 --- a/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts +++ b/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts @@ -18,7 +18,8 @@ import { executeWithCursorPagination } from '@docmost/db/pagination/cursor-pagin // (multi-instance deploy). const SWEEP_STREAMING_STALE_MS = 10 * 60 * 1000; // 10 minutes -// Hard upper bound on the rows materialized by `findAllByChat` (export path). +// Hard upper bound on the rows materialized by `findAllByChat`, which now feeds +// BOTH the Markdown export and the per-turn model history. // A generous cap so a pathologically huge chat cannot load an unbounded result // into memory; far above any realistic transcript length. const FIND_ALL_BY_CHAT_LIMIT = 5000; @@ -78,14 +79,17 @@ export class AiChatMessageRepo { } // Load ALL (non-deleted) messages of a chat in ascending chronological order - // (oldest -> newest), unpaginated. Used by the server-side Markdown export - // (#183), where the DB is the single source of truth and the whole transcript - // must be rendered in one pass (findByChat is cursor-paginated and would only - // return the first page). + // (oldest -> newest), unpaginated. Two callers, both treating the DB as the + // single source of truth and needing the whole transcript in one pass + // (findByChat is cursor-paginated and would only return the first page): + // - the server-side Markdown export (#183); + // - the per-turn model history, rebuilt fresh on every turn so the model + // sees the full authoritative transcript. // // Hard-capped at FIND_ALL_BY_CHAT_LIMIT rows (a generous bound, far above any - // realistic transcript) so exporting a pathologically huge chat cannot - // materialize an unbounded result set in memory. + // realistic transcript) — a shared memory-safety backstop for BOTH paths so a + // pathologically huge chat cannot materialize an unbounded result set in + // memory. On overflow the NEWEST rows are kept and a warning is logged. async findAllByChat( chatId: string, workspaceId: string, @@ -93,9 +97,9 @@ export class AiChatMessageRepo { limit: number = FIND_ALL_BY_CHAT_LIMIT, ): Promise { // Fetch newest-first (+1 to DETECT truncation), so on overflow we keep the - // NEWEST `limit` messages — the recent conversation matters most for an - // export — rather than silently dropping the tail (#183 review). Reverse back - // to chronological for rendering, like findRecent. + // NEWEST `limit` messages — the recent conversation matters most — rather + // than silently dropping the tail (#183 review). Then reverse back to + // chronological order (oldest -> newest) for rendering / model replay. const rows = await this.db .selectFrom('aiChatMessages') .select(this.baseFields) @@ -110,38 +114,13 @@ export class AiChatMessageRepo { if (rows.length > limit) { rows.length = limit; // keep the newest `limit` (rows are newest-first here) this.logger.warn( - `Chat ${chatId} export truncated to the newest ${limit} messages ` + + `Chat ${chatId} truncated to the newest ${limit} messages ` + `(older messages omitted).`, ); } return rows.reverse(); } - // Load the most RECENT `limit` messages for a chat and return them in - // ascending chronological order (oldest -> newest), as the model expects. - // `findByChat` returns the FIRST page ASC (the OLDEST messages), which loses - // recent turns once a chat grows beyond a page; this rebuilds the model - // history from the tail instead. Plain query (no cursor pagination). - async findRecent( - chatId: string, - workspaceId: string, - limit: number, - ): Promise { - const rows = await this.db - .selectFrom('aiChatMessages') - .select(this.baseFields) - .where('chatId', '=', chatId) - .where('workspaceId', '=', workspaceId) - .where('deletedAt', 'is', null) - .orderBy('createdAt', 'desc') - .orderBy('id', 'desc') - .limit(limit) - .execute(); - - // Selected newest-first for the limit; reverse to oldest-first for the model. - return rows.reverse(); - } - async insert( insertable: InsertableAiChatMessage, trx?: KyselyTransaction, -- 2.49.1 From e99c00a9eeeb379ba6c07cd12f632f8d53ba3638 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Fri, 26 Jun 2026 17:19:14 +0300 Subject: [PATCH 2/2] test(review): pin full-transcript history past 50 rows + changelog (PR #202) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address the PR #202 review (approve-with-comments). The only actionable non-blocking item was the test-coverage suggestion: the source switch in AiChatService.handle from findRecent(chatId, ws, 50) to findAllByChat(chatId, ws) was not pinned by a test. handle() is a streaming method the project marks as not unit-testable, so cover the behavioral guarantee it now relies on at the repo/integration level — seed a chat of 60 messages and assert the default findAllByChat (exactly how handle calls it) returns the FULL transcript in chronological order, including the first turn the old 50-window would have dropped. Also document the behavior change under CHANGELOG [Unreleased] -> Changed. The two stability items (token-budget trim before streamText; O(N) history rebuild per turn) are deferred: the reviewer flagged both as non-blocking conscious trade-offs aligned with the PR's stated goal, and the trim is a larger architecture change out of scope for this follow-up. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 9 ++++++ .../ai-chat-message-status.int-spec.ts | 32 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77fe9718..b8e28530 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,6 +75,15 @@ per-workspace rolling-day token budget. ### Changed +- **AI chat now feeds the model the full stored transcript.** The per-turn model + conversation was rebuilt from a sliding window of the 50 most recent stored + rows, which silently dropped the beginning of any longer chat. It is now + rebuilt from the complete non-deleted transcript in chronological order, so + the model sees every turn (a 5000-row backstop guards process memory — a + safety net far above any realistic chat, not a conversational limit). On a + very long chat this can eventually reach the model's context window; the + client already surfaces that as "start a new chat". (#202) + - **AI chat default provider is now `openai-compatible` (reasoning surfaced).** For the `openai` driver the chat provider defaults to the openai-compatible implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the diff --git a/apps/server/test/integration/ai-chat-message-status.int-spec.ts b/apps/server/test/integration/ai-chat-message-status.int-spec.ts index 5e7eba1b..b73a815d 100644 --- a/apps/server/test/integration/ai-chat-message-status.int-spec.ts +++ b/apps/server/test/integration/ai-chat-message-status.int-spec.ts @@ -267,4 +267,36 @@ describe('AiChatMessageRepo.update + sweepStreaming [integration]', () => { const all = await repo.findAllByChat(cappedChat, workspaceId, 100); expect(all.map((r) => r.content)).toEqual(['m1-oldest', 'm2', 'm3-newest']); }); + + it('default findAllByChat returns the FULL transcript past 50 rows — no recent-tail window (#202)', async () => { + // PR #202 swapped the model-history rebuild in AiChatService.handle from + // findRecent(chatId, ws, 50) to findAllByChat(chatId, ws) WITHOUT a limit + // arg. This pins the behavioral guarantee that switch relies on: a chat + // longer than the old 50-msg window comes back in FULL (oldest -> newest), + // so no early turns are silently dropped from what the model sees. The old + // 50-cap would have returned only the last 50 of these 60 rows. + const longChat = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + const base = Date.now(); + const total = 60; + for (let i = 0; i < total; i++) { + await createMessage(db, { + workspaceId, + chatId: longChat, + content: `msg-${i}`, + // Strictly increasing timestamps so ordering is deterministic. + createdAt: new Date(base + i * 1000), + }); + } + + // Default args == exactly how handle() calls it now. + const history = await repo.findAllByChat(longChat, workspaceId); + expect(history).toHaveLength(total); + expect(history.map((r) => r.content)).toEqual( + Array.from({ length: total }, (_, i) => `msg-${i}`), + ); + // The very first turn (which the old 50-window would have dropped) is present. + expect(history[0]!.content).toBe('msg-0'); + }); }); -- 2.49.1