Compare commits
2 Commits
feat/199-a
...
feat/ai-ch
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e99c00a9ee | ||
|
|
1f459d8d26 |
@@ -75,6 +75,15 @@ per-workspace rolling-day token budget.
|
|||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
|
- **AI chat now feeds the model the full stored transcript.** The per-turn model
|
||||||
|
conversation was rebuilt from a sliding window of the 50 most recent stored
|
||||||
|
rows, which silently dropped the beginning of any longer chat. It is now
|
||||||
|
rebuilt from the complete non-deleted transcript in chronological order, so
|
||||||
|
the model sees every turn (a 5000-row backstop guards process memory — a
|
||||||
|
safety net far above any realistic chat, not a conversational limit). On a
|
||||||
|
very long chat this can eventually reach the model's context window; the
|
||||||
|
client already surfaces that as "start a new chat". (#202)
|
||||||
|
|
||||||
- **AI chat default provider is now `openai-compatible` (reasoning surfaced).**
|
- **AI chat default provider is now `openai-compatible` (reasoning surfaced).**
|
||||||
For the `openai` driver the chat provider defaults to the openai-compatible
|
For the `openai` driver the chat provider defaults to the openai-compatible
|
||||||
implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the
|
implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the
|
||||||
|
|||||||
@@ -240,7 +240,7 @@ describe('prepareAgentStep', () => {
|
|||||||
* write path. It runs identically for the upfront insert (empty steps,
|
* write path. It runs identically for the upfront insert (empty steps,
|
||||||
* 'streaming'), every per-step update, and the terminal finalize — so a future
|
* 'streaming'), every per-step update, and the terminal finalize — so a future
|
||||||
* background worker can call the same function. These tests pin the four status
|
* background worker can call the same function. These tests pin the four status
|
||||||
* shapes and the `metadata.parts` shape that rowToUiMessage/findRecent depend on
|
* shapes and the `metadata.parts` shape that rowToUiMessage/findAllByChat depend on
|
||||||
* (per-step text + tool parts via assistantParts, in-progress text appended).
|
* (per-step text + tool parts via assistantParts, in-progress text appended).
|
||||||
*/
|
*/
|
||||||
describe('flushAssistant', () => {
|
describe('flushAssistant', () => {
|
||||||
|
|||||||
@@ -322,12 +322,14 @@ export class AiChatService implements OnModuleInit {
|
|||||||
|
|
||||||
// Rebuild the conversation from persisted history (not the client payload),
|
// Rebuild the conversation from persisted history (not the client payload),
|
||||||
// so the model always sees the authoritative server-side transcript. Load
|
// so the model always sees the authoritative server-side transcript. Load
|
||||||
// the most RECENT tail (oldest -> newest) so chats longer than one page do
|
// the FULL history in chronological order (oldest -> newest, incl. the user
|
||||||
// not drop recent turns (incl. the user message just inserted above).
|
// message just inserted above) so NO turns are dropped — there is no
|
||||||
const history = await this.aiChatMessageRepo.findRecent(
|
// recent-tail window anymore. `findAllByChat` keeps a 5000-row memory-safety
|
||||||
|
// backstop (on overflow it keeps the NEWEST rows and logs a warning); that
|
||||||
|
// is a safety net far above any realistic chat, not a conversational limit.
|
||||||
|
const history = await this.aiChatMessageRepo.findAllByChat(
|
||||||
chatId,
|
chatId,
|
||||||
workspace.id,
|
workspace.id,
|
||||||
50,
|
|
||||||
);
|
);
|
||||||
const uiMessages = history.map(rowToUiMessage);
|
const uiMessages = history.map(rowToUiMessage);
|
||||||
// convertToModelMessages is async in ai@6.0.134 (returns Promise<ModelMessage[]>).
|
// convertToModelMessages is async in ai@6.0.134 (returns Promise<ModelMessage[]>).
|
||||||
@@ -1215,7 +1217,7 @@ export async function applyFinalize(
|
|||||||
*
|
*
|
||||||
* `metadata.parts` is built by assistantParts over the finished steps, then the
|
* `metadata.parts` is built by assistantParts over the finished steps, then the
|
||||||
* in-progress text appended as a trailing text part, so rowToUiMessage /
|
* in-progress text appended as a trailing text part, so rowToUiMessage /
|
||||||
* findRecent keep replaying the turn unchanged. `metadata.finishReason`,
|
* findAllByChat keep replaying the turn unchanged. `metadata.finishReason`,
|
||||||
* `metadata.error`, `metadata.usage`, `metadata.contextTokens` and
|
* `metadata.error`, `metadata.usage`, `metadata.contextTokens` and
|
||||||
* `metadata.maxContextTokens` are attached only when provided/relevant, matching
|
* `metadata.maxContextTokens` are attached only when provided/relevant, matching
|
||||||
* the pre-#183 onFinish/onError records.
|
* the pre-#183 onFinish/onError records.
|
||||||
|
|||||||
@@ -18,7 +18,8 @@ import { executeWithCursorPagination } from '@docmost/db/pagination/cursor-pagin
|
|||||||
// (multi-instance deploy).
|
// (multi-instance deploy).
|
||||||
const SWEEP_STREAMING_STALE_MS = 10 * 60 * 1000; // 10 minutes
|
const SWEEP_STREAMING_STALE_MS = 10 * 60 * 1000; // 10 minutes
|
||||||
|
|
||||||
// Hard upper bound on the rows materialized by `findAllByChat` (export path).
|
// Hard upper bound on the rows materialized by `findAllByChat`, which now feeds
|
||||||
|
// BOTH the Markdown export and the per-turn model history.
|
||||||
// A generous cap so a pathologically huge chat cannot load an unbounded result
|
// A generous cap so a pathologically huge chat cannot load an unbounded result
|
||||||
// into memory; far above any realistic transcript length.
|
// into memory; far above any realistic transcript length.
|
||||||
const FIND_ALL_BY_CHAT_LIMIT = 5000;
|
const FIND_ALL_BY_CHAT_LIMIT = 5000;
|
||||||
@@ -78,14 +79,17 @@ export class AiChatMessageRepo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Load ALL (non-deleted) messages of a chat in ascending chronological order
|
// Load ALL (non-deleted) messages of a chat in ascending chronological order
|
||||||
// (oldest -> newest), unpaginated. Used by the server-side Markdown export
|
// (oldest -> newest), unpaginated. Two callers, both treating the DB as the
|
||||||
// (#183), where the DB is the single source of truth and the whole transcript
|
// single source of truth and needing the whole transcript in one pass
|
||||||
// must be rendered in one pass (findByChat is cursor-paginated and would only
|
// (findByChat is cursor-paginated and would only return the first page):
|
||||||
// return the first page).
|
// - the server-side Markdown export (#183);
|
||||||
|
// - the per-turn model history, rebuilt fresh on every turn so the model
|
||||||
|
// sees the full authoritative transcript.
|
||||||
//
|
//
|
||||||
// Hard-capped at FIND_ALL_BY_CHAT_LIMIT rows (a generous bound, far above any
|
// Hard-capped at FIND_ALL_BY_CHAT_LIMIT rows (a generous bound, far above any
|
||||||
// realistic transcript) so exporting a pathologically huge chat cannot
|
// realistic transcript) — a shared memory-safety backstop for BOTH paths so a
|
||||||
// materialize an unbounded result set in memory.
|
// pathologically huge chat cannot materialize an unbounded result set in
|
||||||
|
// memory. On overflow the NEWEST rows are kept and a warning is logged.
|
||||||
async findAllByChat(
|
async findAllByChat(
|
||||||
chatId: string,
|
chatId: string,
|
||||||
workspaceId: string,
|
workspaceId: string,
|
||||||
@@ -93,9 +97,9 @@ export class AiChatMessageRepo {
|
|||||||
limit: number = FIND_ALL_BY_CHAT_LIMIT,
|
limit: number = FIND_ALL_BY_CHAT_LIMIT,
|
||||||
): Promise<AiChatMessage[]> {
|
): Promise<AiChatMessage[]> {
|
||||||
// Fetch newest-first (+1 to DETECT truncation), so on overflow we keep the
|
// Fetch newest-first (+1 to DETECT truncation), so on overflow we keep the
|
||||||
// NEWEST `limit` messages — the recent conversation matters most for an
|
// NEWEST `limit` messages — the recent conversation matters most — rather
|
||||||
// export — rather than silently dropping the tail (#183 review). Reverse back
|
// than silently dropping the tail (#183 review). Then reverse back to
|
||||||
// to chronological for rendering, like findRecent.
|
// chronological order (oldest -> newest) for rendering / model replay.
|
||||||
const rows = await this.db
|
const rows = await this.db
|
||||||
.selectFrom('aiChatMessages')
|
.selectFrom('aiChatMessages')
|
||||||
.select(this.baseFields)
|
.select(this.baseFields)
|
||||||
@@ -110,38 +114,13 @@ export class AiChatMessageRepo {
|
|||||||
if (rows.length > limit) {
|
if (rows.length > limit) {
|
||||||
rows.length = limit; // keep the newest `limit` (rows are newest-first here)
|
rows.length = limit; // keep the newest `limit` (rows are newest-first here)
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
`Chat ${chatId} export truncated to the newest ${limit} messages ` +
|
`Chat ${chatId} truncated to the newest ${limit} messages ` +
|
||||||
`(older messages omitted).`,
|
`(older messages omitted).`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return rows.reverse();
|
return rows.reverse();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load the most RECENT `limit` messages for a chat and return them in
|
|
||||||
// ascending chronological order (oldest -> newest), as the model expects.
|
|
||||||
// `findByChat` returns the FIRST page ASC (the OLDEST messages), which loses
|
|
||||||
// recent turns once a chat grows beyond a page; this rebuilds the model
|
|
||||||
// history from the tail instead. Plain query (no cursor pagination).
|
|
||||||
async findRecent(
|
|
||||||
chatId: string,
|
|
||||||
workspaceId: string,
|
|
||||||
limit: number,
|
|
||||||
): Promise<AiChatMessage[]> {
|
|
||||||
const rows = await this.db
|
|
||||||
.selectFrom('aiChatMessages')
|
|
||||||
.select(this.baseFields)
|
|
||||||
.where('chatId', '=', chatId)
|
|
||||||
.where('workspaceId', '=', workspaceId)
|
|
||||||
.where('deletedAt', 'is', null)
|
|
||||||
.orderBy('createdAt', 'desc')
|
|
||||||
.orderBy('id', 'desc')
|
|
||||||
.limit(limit)
|
|
||||||
.execute();
|
|
||||||
|
|
||||||
// Selected newest-first for the limit; reverse to oldest-first for the model.
|
|
||||||
return rows.reverse();
|
|
||||||
}
|
|
||||||
|
|
||||||
async insert(
|
async insert(
|
||||||
insertable: InsertableAiChatMessage,
|
insertable: InsertableAiChatMessage,
|
||||||
trx?: KyselyTransaction,
|
trx?: KyselyTransaction,
|
||||||
|
|||||||
@@ -267,4 +267,36 @@ describe('AiChatMessageRepo.update + sweepStreaming [integration]', () => {
|
|||||||
const all = await repo.findAllByChat(cappedChat, workspaceId, 100);
|
const all = await repo.findAllByChat(cappedChat, workspaceId, 100);
|
||||||
expect(all.map((r) => r.content)).toEqual(['m1-oldest', 'm2', 'm3-newest']);
|
expect(all.map((r) => r.content)).toEqual(['m1-oldest', 'm2', 'm3-newest']);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('default findAllByChat returns the FULL transcript past 50 rows — no recent-tail window (#202)', async () => {
|
||||||
|
// PR #202 swapped the model-history rebuild in AiChatService.handle from
|
||||||
|
// findRecent(chatId, ws, 50) to findAllByChat(chatId, ws) WITHOUT a limit
|
||||||
|
// arg. This pins the behavioral guarantee that switch relies on: a chat
|
||||||
|
// longer than the old 50-msg window comes back in FULL (oldest -> newest),
|
||||||
|
// so no early turns are silently dropped from what the model sees. The old
|
||||||
|
// 50-cap would have returned only the last 50 of these 60 rows.
|
||||||
|
const longChat = (
|
||||||
|
await createChat(db, { workspaceId, creatorId: userId })
|
||||||
|
).id;
|
||||||
|
const base = Date.now();
|
||||||
|
const total = 60;
|
||||||
|
for (let i = 0; i < total; i++) {
|
||||||
|
await createMessage(db, {
|
||||||
|
workspaceId,
|
||||||
|
chatId: longChat,
|
||||||
|
content: `msg-${i}`,
|
||||||
|
// Strictly increasing timestamps so ordering is deterministic.
|
||||||
|
createdAt: new Date(base + i * 1000),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default args == exactly how handle() calls it now.
|
||||||
|
const history = await repo.findAllByChat(longChat, workspaceId);
|
||||||
|
expect(history).toHaveLength(total);
|
||||||
|
expect(history.map((r) => r.content)).toEqual(
|
||||||
|
Array.from({ length: total }, (_, i) => `msg-${i}`),
|
||||||
|
);
|
||||||
|
// The very first turn (which the old 50-window would have dropped) is present.
|
||||||
|
expect(history[0]!.content).toBe('msg-0');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user