Merge pull request 'feat(comments): attribute MCP agent comments as AI (unspoofable provenance)' (#143) from feat/mcp-comments-ai-attribution into develop

Reviewed-on: #143
2026-06-24 02:05:06 +03:00
parent aeea315618 cbaa120037
commit f11c8d7bf1
27 changed files with 965 additions and 361 deletions
--- a/apps/server/src/collaboration/extensions/authentication.extension.spec.ts
+++ b/apps/server/src/collaboration/extensions/authentication.extension.spec.ts
@@ -207,5 +207,28 @@ describe('AuthenticationExtension.onAuthenticate', () => {

    expect(ctx.actor).toBe('user');
    expect(ctx.aiChatId).toBeNull();
+    // Wiring guard (#143): the collab seam MUST opt into the isAgent flag —
+    // it is not in baseFields, so without this option findById omits it and a
+    // flagged service account's collab edits would silently persist as 'user'.
+    expect(userRepo.findById).toHaveBeenCalledWith(
+      USER_ID,
+      WORKSPACE_ID,
+      expect.objectContaining({ includeIsAgent: true }),
+    );
+  });
+
+  it('is_agent user with NO claim → actor=agent (collab seam consults the signed identity)', async () => {
+    // Arch A regression guard: a flagged service account editing page CONTENT
+    // over the collab websocket carries a plain COLLAB token (no actor claim).
+    // Before the shared resolveProvenance() wiring this seam derived actor from
+    // the claim alone, so such edits persisted as lastUpdatedSource='user' —
+    // drifting from the REST seam. The seam must now stamp 'agent' from the
+    // is_agent flag, matching jwt.strategy.
+    userRepo.findById.mockResolvedValue(buildUser({ isAgent: true }));
+    const ctx = await ext.onAuthenticate(buildData() as any);
+
+    expect(ctx.actor).toBe('agent');
+    // No internal ai_chats row for an MCP/service-account collab edit → null.
+    expect(ctx.aiChatId).toBeNull();
  });
 });
--- a/apps/server/src/collaboration/extensions/authentication.extension.ts
+++ b/apps/server/src/collaboration/extensions/authentication.extension.ts
@@ -15,6 +15,7 @@ import { SpaceRole } from '../../common/helpers/types/permission';
 import { isUserDisabled } from '../../common/helpers';
 import { getPageId } from '../collaboration.util';
 import { JwtCollabPayload, JwtType } from '../../core/auth/dto/jwt-payload';
+import { resolveProvenance } from '../../common/decorators/auth-provenance.decorator';

@Injectable()
 export class AuthenticationExtension implements Extension {
@@ -43,7 +44,9 @@ export class AuthenticationExtension implements Extension {
    const userId = jwtPayload.sub;
    const workspaceId = jwtPayload.workspaceId;

-    const user = await this.userRepo.findById(userId, workspaceId);
+    const user = await this.userRepo.findById(userId, workspaceId, {
+      includeIsAgent: true,
+    });

    if (!user) {
      throw new UnauthorizedException();
@@ -103,13 +106,17 @@ export class AuthenticationExtension implements Extension {

    this.logger.debug(`Authenticated user ${user.id} on page ${pageId}`);

-    // Carry the signed agent-edit provenance claim into the hocuspocus
-    // connection context (§6.6 / §15 C2). The human collab path omits these
-    // claims, so it resolves to actor='user' / aiChatId=null.
+    // Carry the agent-edit provenance into the hocuspocus connection context
+    // (§6.6 / §15 C2), derived via the SAME resolver as the REST seam so the two
+    // can't drift. An is_agent service account (e.g. the MCP bot) is attributed
+    // 'agent' here too, so its page-content edits over collab persist as
+    // lastUpdatedSource='agent' (#143 review Arch A) — not just its REST writes.
+    // The human collab path carries no claim and is not flagged → actor='user'.
+    const provenance = resolveProvenance(user, jwtPayload);
    return {
      user,
-      actor: jwtPayload.actor ?? 'user',
-      aiChatId: jwtPayload.aiChatId ?? null,
+      actor: provenance.actor,
+      aiChatId: provenance.aiChatId,
    };
  }
 }
--- a/apps/server/src/collaboration/extensions/persistence.extension.ts
+++ b/apps/server/src/collaboration/extensions/persistence.extension.ts
@@ -21,6 +21,7 @@ import { KyselyDB } from '@docmost/db/types/kysely.types';
 import { executeTx } from '@docmost/db/utils';
 import { InjectQueue } from '@nestjs/bullmq';
 import { QueueJob, QueueName } from '../../integrations/queue/constants';
+import { ProvenanceSource } from '../../core/auth/dto/jwt-payload';
 import { Queue } from 'bullmq';
 import {
  extractMentions,
@@ -50,7 +51,7 @@ import { TransclusionService } from '../../core/page/transclusion/transclusion.s
 export function resolveSource(
  stickyTouched: boolean,
  contextActor?: string,
-): 'agent' | 'user' {
+): ProvenanceSource {
  return stickyTouched || contextActor === 'agent' ? 'agent' : 'user';
 }

--- a/apps/server/src/common/decorators/auth-provenance.decorator.spec.ts
+++ b/apps/server/src/common/decorators/auth-provenance.decorator.spec.ts
@@ -0,0 +1,91 @@
+import {
+  resolveProvenance,
+  agentSourceFields,
+} from './auth-provenance.decorator';
+
+/**
+ * Unit tests for the shared provenance helpers (#143 review, Arch A & follow-up
+ * 5). resolveProvenance is the single source of truth wired into BOTH transport
+ * seams (REST jwt.strategy + collab authentication.extension) — testing it here
+ * pins the derivation matrix so the seams can't silently drift. agentSourceFields
+ * is the one-place write-stamp idiom reused at every insert/update site.
+ */
+describe('resolveProvenance', () => {
+  it("flags an is_agent user as 'agent' even with no claim (the closed collab gap)", () => {
+    expect(resolveProvenance({ isAgent: true }, undefined)).toEqual({
+      actor: 'agent',
+      aiChatId: null,
+    });
+  });
+
+  it("an is_agent user keeps the claim's aiChatId when present", () => {
+    expect(
+      resolveProvenance({ isAgent: true }, { aiChatId: 'chat-1' }),
+    ).toEqual({ actor: 'agent', aiChatId: 'chat-1' });
+  });
+
+  it("honors a signed actor='agent' claim on a non-agent user (internal AI-chat token)", () => {
+    expect(
+      resolveProvenance(
+        { isAgent: false },
+        { actor: 'agent', aiChatId: 'chat-2' },
+      ),
+    ).toEqual({ actor: 'agent', aiChatId: 'chat-2' });
+  });
+
+  it("a plain user with no claim resolves to 'user' with null chat", () => {
+    expect(resolveProvenance({ isAgent: false }, undefined)).toEqual({
+      actor: 'user',
+      aiChatId: null,
+    });
+  });
+
+  it('tolerates a null/undefined user (defaults to the claim, else user)', () => {
+    expect(resolveProvenance(null, null)).toEqual({
+      actor: 'user',
+      aiChatId: null,
+    });
+    expect(resolveProvenance(undefined, { actor: 'agent' })).toEqual({
+      actor: 'agent',
+      aiChatId: null,
+    });
+  });
+});
+
+describe('agentSourceFields', () => {
+  it('stamps the configured source + chat columns for an agent write', () => {
+    expect(
+      agentSourceFields(
+        { actor: 'agent', aiChatId: 'chat-1' },
+        'createdSource',
+        'aiChatId',
+      ),
+    ).toEqual({ createdSource: 'agent', aiChatId: 'chat-1' });
+  });
+
+  it('uses the per-table column names passed in (page update variant)', () => {
+    expect(
+      agentSourceFields(
+        { actor: 'agent', aiChatId: null },
+        'lastUpdatedSource',
+        'lastUpdatedAiChatId',
+      ),
+    ).toEqual({ lastUpdatedSource: 'agent', lastUpdatedAiChatId: null });
+  });
+
+  it('returns {} for a user write so the column keeps its default', () => {
+    expect(
+      agentSourceFields(
+        { actor: 'user', aiChatId: null },
+        'createdSource',
+        'aiChatId',
+      ),
+    ).toEqual({});
+  });
+
+  it('returns {} when provenance is undefined', () => {
+    expect(
+      agentSourceFields(undefined, 'createdSource', 'aiChatId'),
+    ).toEqual({});
+  });
+});
--- a/apps/server/src/common/decorators/auth-provenance.decorator.ts
+++ b/apps/server/src/common/decorators/auth-provenance.decorator.ts
@@ -1,4 +1,5 @@
 import { createParamDecorator, ExecutionContext } from '@nestjs/common';
+import { ProvenanceSource } from '../../core/auth/dto/jwt-payload';

 /**
 * The agent-edit provenance carried by the request, read from the SIGNED access
@@ -8,10 +9,64 @@ import { createParamDecorator, ExecutionContext } from '@nestjs/common';
 * cannot fake an 'agent' marker.
 */
 export interface AuthProvenanceData {
-  actor: 'user' | 'agent';
+  actor: ProvenanceSource;
  aiChatId: string | null;
 }

+/**
+ * Single source of truth for deriving a write's provenance from the SIGNED
+ * server-side identity (#143 review, Arch A). Used by BOTH transport seams — the
+ * REST access-token strategy and the collab websocket auth — so they can't drift:
+ *
+ *   - A `user.isAgent` service account (e.g. the MCP bot) stamps 'agent' on every
+ *     write. It has no internal ai_chats row, so aiChatId comes from the claim
+ *     (usually null).
+ *   - Otherwise honor the actor claim minted into the internal AI agent's token
+ *     (actor='agent' + aiChatId); a normal user token carries no claim → 'user'.
+ *
+ * Provenance is NEVER read from a client body field, so a normal user cannot fake
+ * an 'agent' marker.
+ */
+export function resolveProvenance(
+  user: { isAgent?: boolean | null } | null | undefined,
+  claim: { actor?: ProvenanceSource; aiChatId?: string | null } | null | undefined,
+): AuthProvenanceData {
+  const actor: ProvenanceSource = user?.isAgent
+    ? 'agent'
+    : (claim?.actor ?? 'user');
+  return { actor, aiChatId: claim?.aiChatId ?? null };
+}
+
+/**
+ * Agent-edit write-stamp fields for a repository insert/update (#143 review).
+ * Spread into the row being written: for an agent it stamps the `*Source`
+ * column 'agent' and the AI-chat id; for a normal user it returns `{}` — on an
+ * INSERT the omitted column falls back to its DB default ('user'); on an UPDATE
+ * the column simply keeps its existing stored value (Kysely only writes the keys
+ * present). The only per-table variation is the column names, passed as
+ * `sourceKey`/`chatKey`, so the agent-stamp idiom lives in ONE place instead of
+ * being hand-reimplemented at every write site (where a wrong literal or a
+ * forgotten `aiChatId` could drift).
+ *
+ *   insertComment({ ..., ...agentSourceFields(p, 'createdSource', 'aiChatId') })
+ *   updatePage({ ..., ...agentSourceFields(p, 'lastUpdatedSource', 'lastUpdatedAiChatId') })
+ *
+ * Does NOT cover sites that must CLEAR the source on a non-agent action (e.g.
+ * comment un-resolve, which writes an explicit null) — those keep their own
+ * conditional; nor the collab persistence path (its own sticky-window logic).
+ */
+export function agentSourceFields<S extends string, C extends string>(
+  provenance: AuthProvenanceData | undefined,
+  sourceKey: S,
+  chatKey: C,
+): Partial<Record<S, ProvenanceSource> & Record<C, string | null>> {
+  if (provenance?.actor !== 'agent') return {};
+  return {
+    [sourceKey]: 'agent',
+    [chatKey]: provenance.aiChatId,
+  } as Partial<Record<S, ProvenanceSource> & Record<C, string | null>>;
+}
+
 /**
 * Resolve the request's provenance. Defaults to a 'user' actor when the claim
 * is absent (e.g. an endpoint reached without going through the access-token
--- a/apps/server/src/core/auth/dto/jwt-payload.ts
+++ b/apps/server/src/core/auth/dto/jwt-payload.ts
@@ -1,3 +1,11 @@
+/**
+ * Provenance actor for a write: who the action is attributed to. Derived only
+ * from the SIGNED token claim (never a request body), so 'agent' is unspoofable.
+ * Single source of truth so a typo like 'agnet' can't slip through as a bare
+ * string (#143 review). Distinct from `ActorType` (auth principal kind).
+ */
+export type ProvenanceSource = 'user' | 'agent';
+
 export enum JwtType {
  ACCESS = 'access',
  COLLAB = 'collab',
@@ -19,8 +27,10 @@ export type JwtPayload = {
  // mints a provenance access token so REST writes (create/rename/move page,
  // comment create/resolve) record a non-spoofable 'agent' marker (§6.5 / §15
  // C3 / §14 N2).
-  actor?: 'user' | 'agent';
-  aiChatId?: string;
+  actor?: ProvenanceSource;
+  // Nullable: an external MCP agent has no internal ai_chats row, so it carries
+  // an 'agent' actor with a null aiChatId.
+  aiChatId?: string | null;
 };

 export type JwtCollabPayload = {
@@ -30,8 +40,10 @@ export type JwtCollabPayload = {
  // Optional agent-edit provenance, signed into the collab token. Absent for
  // the human collab path (treated as 'user'); set only when the internal agent
  // mints a provenance collab token (§6.6 / §15 C2).
-  actor?: 'user' | 'agent';
-  aiChatId?: string;
+  actor?: ProvenanceSource;
+  // Nullable: an external MCP agent has no internal ai_chats row, so it carries
+  // an 'agent' actor with a null aiChatId.
+  aiChatId?: string | null;
 };

 export type JwtExchangePayload = {
--- a/apps/server/src/core/auth/services/token.service.ts
+++ b/apps/server/src/core/auth/services/token.service.ts
@@ -34,7 +34,9 @@ export class TokenService {
    // token carries no actor/aiChatId and is treated as 'user' downstream. The
    // internal agent passes { actor:'agent', aiChatId } so REST writes record a
    // non-spoofable 'agent' marker off the signed claim (§6.5 / §15 C3 / §14 N2).
-    provenance?: { actor: 'agent'; aiChatId: string },
+    // aiChatId is nullable: an external MCP agent has no internal ai_chats row,
+    // so it stamps 'agent' with a null aiChatId.
+    provenance?: { actor: 'agent'; aiChatId: string | null },
  ): Promise<string> {
    if (isUserDisabled(user)) {
      throw new ForbiddenException();
@@ -58,7 +60,8 @@ export class TokenService {
    workspaceId: string,
    // Optional agent-edit provenance. When omitted (the human collab path), the
    // token carries no actor/aiChatId and is treated as 'user' downstream.
-    provenance?: { actor: 'agent'; aiChatId: string },
+    // aiChatId is nullable for an external agent with no internal ai_chats row.
+    provenance?: { actor: 'agent'; aiChatId: string | null },
  ): Promise<string> {
    if (isUserDisabled(user)) {
      throw new ForbiddenException();
--- a/apps/server/src/core/auth/strategies/jwt.strategy.spec.ts
+++ b/apps/server/src/core/auth/strategies/jwt.strategy.spec.ts
@@ -0,0 +1,122 @@
+import { UnauthorizedException } from '@nestjs/common';
+import { JwtStrategy } from './jwt.strategy';
+import { JwtType } from '../dto/jwt-payload';
+
+/**
+ * Provenance derivation in JwtStrategy.validate (jwt.strategy.ts).
+ *
+ * The strategy must derive the agent-edit provenance from the SIGNED server-side
+ * identity, never from a client-controlled field. The security invariant under
+ * test: a user flagged is_agent stamps 'agent'; an ordinary user resolves to
+ * 'user'; and an `actor` claim in the token CANNOT escalate a non-agent user
+ * past the existing internal-AI-chat claim semantics (anti-spoof — a plain user
+ * cannot obtain created_source='agent').
+ *
+ * The strategy is constructed directly with stub deps. The PassportStrategy base
+ * only needs a secret at construction time; validate() is exercised on its own.
+ */
+describe('JwtStrategy — provenance derivation', () => {
+  function makeStrategy(user: any) {
+    const userRepo: any = { findById: jest.fn(async () => user) };
+    const workspaceRepo: any = { findById: jest.fn(async () => ({ id: 'ws-1' })) };
+    const userSessionRepo: any = { findActiveById: jest.fn() };
+    const sessionActivityService: any = { trackActivity: jest.fn() };
+    const environmentService: any = { getAppSecret: () => 'test-secret' };
+    const moduleRef: any = {};
+
+    const strategy = new JwtStrategy(
+      userRepo,
+      workspaceRepo,
+      userSessionRepo,
+      sessionActivityService,
+      environmentService,
+      moduleRef,
+    );
+    return { strategy, userRepo };
+  }
+
+  // A bare request whose `raw` collects the provenance the strategy stamps.
+  const makeReq = () => ({ raw: {} as Record<string, any> });
+
+  const accessPayload = (over?: Record<string, any>) => ({
+    sub: 'user-1',
+    email: 'u@test.local',
+    workspaceId: 'ws-1',
+    type: JwtType.ACCESS,
+    ...over,
+  });
+
+  it("stamps actor='agent' for an is_agent user (derived from the signed identity)", async () => {
+    const { strategy, userRepo } = makeStrategy({
+      id: 'user-1',
+      isAgent: true,
+      deactivatedAt: null,
+      deletedAt: null,
+    });
+    const req = makeReq();
+
+    await strategy.validate(req, accessPayload() as any);
+
+    expect(req.raw.actor).toBe('agent');
+    // External MCP agent: no internal ai_chats row → null.
+    expect(req.raw.aiChatId).toBeNull();
+    // Wiring guard (#143): the seam MUST opt into the isAgent flag, otherwise
+    // findById omits it (it is not in baseFields) and provenance silently
+    // degrades to 'user'.
+    expect(userRepo.findById).toHaveBeenCalledWith(
+      'user-1',
+      'ws-1',
+      expect.objectContaining({ includeIsAgent: true }),
+    );
+  });
+
+  it("stamps actor='user' for an ordinary user", async () => {
+    const { strategy } = makeStrategy({
+      id: 'user-1',
+      isAgent: false,
+      deactivatedAt: null,
+      deletedAt: null,
+    });
+    const req = makeReq();
+
+    await strategy.validate(req, accessPayload() as any);
+
+    expect(req.raw.actor).toBe('user');
+    expect(req.raw.aiChatId).toBeNull();
+  });
+
+  it("honors a SIGNED actor='agent' claim on a non-agent user's token (the internal AI-chat path)", async () => {
+    // A non-agent user (the plain no-claim → 'user' case is covered above). A
+    // token that DOES carry actor='agent' resolves to 'agent' — BY DESIGN: that
+    // claim can only exist on a SERVER-MINTED provenance token (the internal AI
+    // chat), never on a plain login token, because the token is signed with the
+    // app secret. The guarantee is that a client cannot FORGE this signed claim,
+    // not that the strategy ignores it. (A plain user still cannot obtain
+    // 'agent' — they have no way to get such a token.)
+    const { strategy } = makeStrategy({
+      id: 'user-1',
+      isAgent: false,
+      deactivatedAt: null,
+      deletedAt: null,
+    });
+    const req2 = makeReq();
+    await strategy.validate(req2, accessPayload({ actor: 'agent', aiChatId: 'chat-1' }) as any);
+    expect(req2.raw.actor).toBe('agent');
+    expect(req2.raw.aiChatId).toBe('chat-1');
+  });
+
+  it('rejects a disabled is_agent user (Unauthorized) before stamping provenance', async () => {
+    const { strategy } = makeStrategy({
+      id: 'user-1',
+      isAgent: true,
+      deactivatedAt: new Date('2026-01-01'),
+      deletedAt: null,
+    });
+    const req = makeReq();
+
+    await expect(strategy.validate(req, accessPayload() as any)).rejects.toThrow(
+      UnauthorizedException,
+    );
+    expect(req.raw.actor).toBeUndefined();
+  });
+});
--- a/apps/server/src/core/auth/strategies/jwt.strategy.ts
+++ b/apps/server/src/core/auth/strategies/jwt.strategy.ts
@@ -10,6 +10,7 @@ import { SessionActivityService } from '../../session/session-activity.service';
 import { FastifyRequest } from 'fastify';
 import { extractBearerTokenFromHeader, isUserDisabled } from '../../../common/helpers';
 import { ModuleRef } from '@nestjs/core';
+import { resolveProvenance } from '../../../common/decorators/auth-provenance.decorator';

@Injectable()
 export class JwtStrategy extends PassportStrategy(Strategy, 'jwt') {
@@ -55,7 +56,9 @@ export class JwtStrategy extends PassportStrategy(Strategy, 'jwt') {
    if (!workspace) {
      throw new UnauthorizedException();
    }
-    const user = await this.userRepo.findById(payload.sub, payload.workspaceId);
+    const user = await this.userRepo.findById(payload.sub, payload.workspaceId, {
+      includeIsAgent: true,
+    });

    if (!user || isUserDisabled(user)) {
      throw new UnauthorizedException();
@@ -71,14 +74,15 @@ export class JwtStrategy extends PassportStrategy(Strategy, 'jwt') {
      this.sessionActivityService.trackActivity(sessionId, payload.sub, payload.workspaceId);
    }

-    // Propagate the signed agent-edit provenance claim onto the request so REST
-    // services/controllers can set the 'agent' marker off it. A normal user
-    // token carries no actor claim and resolves to 'user' (unchanged behaviour);
-    // only the internal agent's minted token sets actor='agent' + aiChatId. This
-    // is read server-side from the SIGNED token, never from a client body field,
-    // so a normal user cannot fake an 'agent' badge.
-    req.raw.actor = (payload as JwtPayload).actor ?? 'user';
-    req.raw.aiChatId = (payload as JwtPayload).aiChatId ?? null;
+    // Propagate the agent-edit provenance onto the request so REST
+    // services/controllers can set the 'agent' marker off it. Derived from the
+    // SIGNED server-side identity via the shared resolver (also used by the
+    // collab seam, so the two never drift), never from a client body field — so
+    // an is_agent service account stamps every REST write made with an access
+    // token, and a normal user cannot fake an 'agent' badge.
+    const provenance = resolveProvenance(user, payload as JwtPayload);
+    req.raw.actor = provenance.actor;
+    req.raw.aiChatId = provenance.aiChatId;

    return { user, workspace };
  }
--- a/apps/server/src/core/comment/comment.service.behavior.spec.ts
+++ b/apps/server/src/core/comment/comment.service.behavior.spec.ts
@@ -147,6 +147,24 @@ describe('CommentService — behavior', () => {
      expect(insertArg.creatorId).toBe('user-1');
    });

+    it('stamps createdSource:"agent" with a null aiChatId (external MCP agent) without breaking insert', async () => {
+      const { service, commentRepo } = makeService();
+
+      // An external MCP agent is flagged is_agent server-side but has no
+      // internal ai_chats row, so provenance carries actor='agent' + a null
+      // aiChatId. The insert must still record the agent marker.
+      await service.create(
+        { page: page(), workspaceId: 'ws-1', user: user() },
+        { content: JSON.stringify(docMentioning()) } as any,
+        { actor: 'agent', aiChatId: null },
+      );
+
+      const insertArg = commentRepo.insertComment.mock.calls[0][0];
+      expect(insertArg.createdSource).toBe('agent');
+      expect(insertArg.aiChatId).toBeNull();
+      expect(insertArg.creatorId).toBe('user-1');
+    });
+
    it('leaves source default (no agent stamp) for a normal user', async () => {
      const { service, commentRepo } = makeService();

--- a/apps/server/src/core/comment/comment.service.ts
+++ b/apps/server/src/core/comment/comment.service.ts
@@ -22,7 +22,10 @@ import {
  ICommentResolvedNotificationJob,
 } from '../../integrations/queue/constants/queue.interface';
 import { WsService } from '../../ws/ws.service';
-import { AuthProvenanceData } from '../../common/decorators/auth-provenance.decorator';
+import {
+  AuthProvenanceData,
+  agentSourceFields,
+} from '../../common/decorators/auth-provenance.decorator';

@Injectable()
 export class CommentService {
@@ -60,7 +63,6 @@ export class CommentService {
  ) {
    const { page, workspaceId, user } = opts;
    const commentContent = JSON.parse(createCommentDto.content);
-    const isAgent = provenance?.actor === 'agent';

    if (createCommentDto.parentCommentId) {
      const parentComment = await this.commentRepo.findById(
@@ -87,9 +89,7 @@ export class CommentService {
      spaceId: page.spaceId,
      // Agent-edit provenance: the user stays creatorId; this only annotates the
      // source. Normal user requests leave the column default ('user').
-      ...(isAgent
-        ? { createdSource: 'agent', aiChatId: provenance.aiChatId }
-        : {}),
+      ...agentSourceFields(provenance, 'createdSource', 'aiChatId'),
    });

    if (createCommentDto.yjsSelection) {
--- a/apps/server/src/core/page/services/page.service.spec.ts
+++ b/apps/server/src/core/page/services/page.service.spec.ts
@@ -147,4 +147,246 @@ describe('PageService', () => {
      expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
    });
  });
+
+  describe('agent provenance stamping (#143)', () => {
+    // Provenance handed to the four write sites. The agent case must surface the
+    // signed source marker + chat id on the persisted payload; the user case must
+    // leave both keys absent so the column keeps its INSERT default / existing
+    // UPDATE value (agentSourceFields returns {} for a non-agent).
+    const AGENT = { actor: 'agent', aiChatId: 'chat-7' } as any;
+    const USER = { actor: 'user', aiChatId: null } as any;
+
+    // A general-queue stub whose `.add(...)` returns a `{ catch }` thenable —
+    // the service does `generalQueue.add(...).catch(...)` and never awaits it.
+    const makeGeneralQueue = () =>
+      ({ add: jest.fn().mockReturnValue({ catch: jest.fn() }) }) as any;
+
+    // Build a PageService where only the deps a given site touches are real
+    // stubs; everything else stays a bare object. db is supplied per-test.
+    const makeSvc = (overrides: {
+      pageRepo?: any;
+      generalQueue?: any;
+      db?: any;
+    }) =>
+      new PageService(
+        (overrides.pageRepo ?? {}) as any, // pageRepo
+        {} as any, // pagePermissionRepo
+        {} as any, // attachmentRepo
+        (overrides.db ?? {}) as any, // db
+        {} as any, // storageService
+        {} as any, // attachmentQueue
+        {} as any, // aiQueue
+        (overrides.generalQueue ?? makeGeneralQueue()) as any, // generalQueue
+        {} as any, // eventEmitter
+        {} as any, // collaborationGateway
+        {} as any, // watcherService
+        {} as any, // transclusionService
+      );
+
+    describe('create() → insertPage', () => {
+      const run = async (provenance: any) => {
+        const pageRepo = {
+          insertPage: jest.fn().mockResolvedValue({ id: 'p1' }),
+        };
+        const svc = makeSvc({ pageRepo, generalQueue: makeGeneralQueue() });
+        // nextPagePosition runs a real db query; stub it out.
+        jest.spyOn(svc, 'nextPagePosition').mockResolvedValue('a0' as any);
+        // No content/format → the prosemirror parse branch is skipped. No
+        // parentPageId → no parent lookup.
+        await svc.create(
+          'u1',
+          'w1',
+          { title: 't', spaceId: 's1' } as any,
+          provenance,
+        );
+        return pageRepo.insertPage.mock.calls[0][0];
+      };
+
+      it('stamps lastUpdatedSource/lastUpdatedAiChatId for an agent', async () => {
+        const payload = await run(AGENT);
+        expect(payload).toEqual(
+          expect.objectContaining({
+            lastUpdatedSource: 'agent',
+            lastUpdatedAiChatId: 'chat-7',
+          }),
+        );
+      });
+
+      it('omits the source columns for a normal user', async () => {
+        const payload = await run(USER);
+        expect(payload).not.toHaveProperty('lastUpdatedSource');
+        expect(payload).not.toHaveProperty('lastUpdatedAiChatId');
+      });
+    });
+
+    describe('update() → updatePage', () => {
+      const run = async (provenance: any) => {
+        const pageRepo = {
+          updatePage: jest.fn().mockResolvedValue(undefined),
+          findById: jest.fn().mockResolvedValue({ id: 'p1' }),
+        };
+        const svc = makeSvc({ pageRepo, generalQueue: makeGeneralQueue() });
+        const page = {
+          id: 'p1',
+          contributorIds: [],
+          spaceId: 's1',
+          workspaceId: 'w1',
+          slugId: 'sl1',
+          title: 't',
+          parentPageId: null,
+        } as any;
+        // dto carries no content/operation/format → updatePageContent skipped.
+        await svc.update(page, {} as any, { id: 'u1' } as any, provenance);
+        return pageRepo.updatePage.mock.calls[0][0];
+      };
+
+      it('stamps lastUpdatedSource/lastUpdatedAiChatId for an agent', async () => {
+        const payload = await run(AGENT);
+        expect(payload).toEqual(
+          expect.objectContaining({
+            lastUpdatedSource: 'agent',
+            lastUpdatedAiChatId: 'chat-7',
+          }),
+        );
+      });
+
+      it('omits the source columns for a normal user', async () => {
+        const payload = await run(USER);
+        expect(payload).not.toHaveProperty('lastUpdatedSource');
+        expect(payload).not.toHaveProperty('lastUpdatedAiChatId');
+      });
+    });
+
+    describe('movePage() → updatePage', () => {
+      const VALID_POSITION = 'a0';
+      const run = async (provenance: any) => {
+        const pageRepo = {
+          findById: jest.fn().mockResolvedValue({
+            id: 'dest-parent',
+            deletedAt: null,
+            spaceId: 'space-1',
+          }),
+          updatePage: jest.fn().mockResolvedValue({ numUpdatedRows: 1n }),
+        };
+        const svc = makeSvc({
+          pageRepo,
+          db: {} as any,
+        });
+        // Legitimate move: destination ancestors do NOT include the moved page.
+        jest
+          .spyOn(svc, 'getPageBreadCrumbs')
+          .mockResolvedValue([{ id: 'dest-parent' }, { id: 'root' }] as any);
+        // eventEmitter is a bare {} stub; movePage emits PAGE_MOVED, so give it
+        // an emit. Re-wire via the private field to avoid threading it through.
+        (svc as any).eventEmitter = { emit: jest.fn() };
+        const movedPage = {
+          id: 'page-1',
+          parentPageId: 'old-parent',
+          spaceId: 'space-1',
+          workspaceId: 'ws-1',
+          slugId: 'slug-1',
+          title: 'Page 1',
+          icon: null,
+        } as any;
+        const dto = {
+          pageId: 'page-1',
+          position: VALID_POSITION,
+          parentPageId: 'dest-parent',
+        } as any;
+        await svc.movePage(dto, movedPage, provenance);
+        return pageRepo.updatePage.mock.calls[0][0];
+      };
+
+      it('stamps lastUpdatedSource/lastUpdatedAiChatId for an agent', async () => {
+        const payload = await run(AGENT);
+        expect(payload).toEqual(
+          expect.objectContaining({
+            lastUpdatedSource: 'agent',
+            lastUpdatedAiChatId: 'chat-7',
+          }),
+        );
+      });
+
+      it('omits the source columns for a normal user', async () => {
+        const payload = await run(USER);
+        expect(payload).not.toHaveProperty('lastUpdatedSource');
+        expect(payload).not.toHaveProperty('lastUpdatedAiChatId');
+      });
+    });
+
+    describe('movePageToSpace() → root-page updatePage', () => {
+      // movePageToSpace runs its writes inside executeTx(this.db, cb), which
+      // calls this.db.transaction().execute(fn => fn(trx)). A permissive
+      // chainable Proxy stands in for the Kysely trx so arbitrary chains resolve.
+      const makeChain = () => {
+        const c: any = new Proxy(function () {}, {
+          get: (_t, p) =>
+            p === 'then'
+              ? undefined
+              : p === 'execute' || p === 'executeTakeFirst'
+                ? () => Promise.resolve([])
+                : () => c,
+        });
+        return c;
+      };
+
+      const run = async (provenance: any) => {
+        const trxStub = makeChain();
+        const db = {
+          transaction: () => ({ execute: (fn: any) => fn(trxStub) }),
+        } as any;
+        const rootPage = {
+          id: 'root',
+          spaceId: 'src-space',
+          parentPageId: null,
+          workspaceId: 'ws-1',
+        } as any;
+        const pageRepo = {
+          getPageAndDescendants: jest.fn().mockResolvedValue([rootPage]),
+          updatePage: jest.fn().mockResolvedValue(undefined),
+          updatePages: jest.fn().mockResolvedValue(undefined),
+        };
+        const svc = makeSvc({ pageRepo, db });
+        // The single-accessible-page path still runs the bulk side-effect writes
+        // (attachments/watchers/ai-queue) AFTER the root updatePage we assert on;
+        // stub them so the transaction completes without throwing.
+        (svc as any).attachmentRepo = {
+          updateAttachmentsByPageId: jest.fn().mockResolvedValue(undefined),
+        };
+        (svc as any).watcherService = {
+          movePageWatchersToSpace: jest.fn().mockResolvedValue(undefined),
+        };
+        (svc as any).aiQueue = { add: jest.fn().mockResolvedValue(undefined) };
+        // Single accessible page (the root) → pagesToOrphan is empty, so the
+        // root updatePage is the first/only provenance-carrying updatePage call.
+        // filterAccessibleTreePages is private; spy via an `any` cast.
+        jest
+          .spyOn(svc as any, 'filterAccessibleTreePages')
+          .mockResolvedValue([rootPage] as any);
+        jest.spyOn(svc, 'nextPagePosition').mockResolvedValue('a0' as any);
+        await svc.movePageToSpace(rootPage, 'dst-space', 'u1', provenance);
+        return pageRepo.updatePage.mock.calls[0][0];
+      };
+
+      it('stamps the moved root with the agent source + chat id', async () => {
+        const payload = await run(AGENT);
+        expect(payload).toEqual(
+          expect.objectContaining({
+            spaceId: 'dst-space',
+            lastUpdatedSource: 'agent',
+            lastUpdatedAiChatId: 'chat-7',
+          }),
+        );
+      });
+
+      it('omits the source columns on the moved root for a normal user', async () => {
+        const payload = await run(USER);
+        expect(payload).toEqual(
+          expect.objectContaining({ spaceId: 'dst-space' }),
+        );
+        expect(payload).not.toHaveProperty('lastUpdatedSource');
+        expect(payload).not.toHaveProperty('lastUpdatedAiChatId');
+      });
+    });
+  });
 });
--- a/apps/server/src/core/page/services/page.service.ts
+++ b/apps/server/src/core/page/services/page.service.ts
@@ -57,7 +57,10 @@ import { WatcherService } from '../../watcher/watcher.service';
 import { sql } from 'kysely';
 import { TransclusionService } from '../transclusion/transclusion.service';
 import { remapPageEmbedSourceId } from '../transclusion/utils/transclusion-prosemirror.util';
-import { AuthProvenanceData } from '../../../common/decorators/auth-provenance.decorator';
+import {
+  AuthProvenanceData,
+  agentSourceFields,
+} from '../../../common/decorators/auth-provenance.decorator';

@Injectable()
 export class PageService {
@@ -135,8 +138,6 @@ export class PageService {
      ydoc = createYdocFromJson(prosemirrorJson);
    }

-    const isAgent = provenance?.actor === 'agent';
-
    const page = await this.pageRepo.insertPage({
      slugId: generateSlugId(),
      title: createPageDto.title,
@@ -153,12 +154,7 @@ export class PageService {
      // Agent-edit provenance. The human stays the responsible author
      // (creatorId/lastUpdatedById); these only annotate the source. A normal
      // user request leaves the column default ('user').
-      ...(isAgent
-        ? {
-            lastUpdatedSource: 'agent',
-            lastUpdatedAiChatId: provenance.aiChatId,
-          }
-        : {}),
+      ...agentSourceFields(provenance, 'lastUpdatedSource', 'lastUpdatedAiChatId'),
      content,
      textContent,
      ydoc,
@@ -231,8 +227,6 @@ export class PageService {
    contributors.add(user.id);
    const contributorIds = Array.from(contributors);

-    const isAgent = provenance?.actor === 'agent';
-
    // Detect a real title/icon change so the WS tree listener can broadcast an
    // `updateOne` to the space (rename / icon swap) WITHOUT re-broadcasting on a
    // content-only save. Only treat a field as changed when the DTO actually
@@ -250,13 +244,9 @@ export class PageService {
        icon: updatePageDto.icon,
        lastUpdatedById: user.id,
        // Agent-edit provenance: annotate the source without changing the
-        // responsible author. A normal user request leaves the column default.
-        ...(isAgent
-          ? {
-              lastUpdatedSource: 'agent',
-              lastUpdatedAiChatId: provenance.aiChatId,
-            }
-          : {}),
+        // responsible author. A normal user request leaves the existing source
+        // value unchanged.
+        ...agentSourceFields(provenance, 'lastUpdatedSource', 'lastUpdatedAiChatId'),
        updatedAt: new Date(),
        contributorIds: contributorIds,
      },
@@ -443,7 +433,6 @@ export class PageService {
    provenance?: AuthProvenanceData,
  ) {
    let childPageIds: string[] = [];
-    const isAgent = provenance?.actor === 'agent';

    const allPages = await this.pageRepo.getPageAndDescendants(rootPage.id, {
      includeContent: false,
@@ -490,12 +479,7 @@ export class PageService {
          // Agent-edit provenance on the moved root page. Child pages are bulk
          // re-parented to the new space (no content change), so the marker is
          // stamped on the root the agent acted on. Normal user: no change.
-          ...(isAgent
-            ? {
-                lastUpdatedSource: 'agent',
-                lastUpdatedAiChatId: provenance.aiChatId,
-              }
-            : {}),
+          ...agentSourceFields(provenance, 'lastUpdatedSource', 'lastUpdatedAiChatId'),
        },
        rootPage.id,
        trx,
@@ -949,20 +933,13 @@ export class PageService {
      }
    }

-    const isAgent = provenance?.actor === 'agent';
-
    const updateResult = await this.pageRepo.updatePage(
      {
        position: dto.position,
        parentPageId: parentPageId,
        // Agent-edit provenance: annotate the source on an agent move. A normal
-        // user request leaves the column default ('user').
-        ...(isAgent
-          ? {
-              lastUpdatedSource: 'agent',
-              lastUpdatedAiChatId: provenance.aiChatId,
-            }
-          : {}),
+        // user request leaves the existing source value unchanged.
+        ...agentSourceFields(provenance, 'lastUpdatedSource', 'lastUpdatedAiChatId'),
      },
      dto.pageId,
    );
--- a/apps/server/src/database/migrations/20260623T120000-user-is-agent.ts
+++ b/apps/server/src/database/migrations/20260623T120000-user-is-agent.ts
@@ -0,0 +1,23 @@
+import { type Kysely } from 'kysely';
+
+/**
+ * Agent identity flag on users (MCP comment/page AI attribution).
+ *
+ * Additive boolean marking a service account as an AI agent. When set, the JWT
+ * strategy derives provenance ('agent') from this SIGNED server-side identity —
+ * never from a client-supplied field — so every write by the account is
+ * attributed to AI in a non-spoofable way. Defaults to false; ordinary users
+ * are unaffected. Kept as a dedicated column (not `role`, which has
+ * authorization semantics, and not buried in `settings`) for a cheap filter and
+ * explicitness.
+ */
+export async function up(db: Kysely<any>): Promise<void> {
+  await db.schema
+    .alterTable('users')
+    .addColumn('is_agent', 'boolean', (col) => col.notNull().defaultTo(false))
+    .execute();
+}
+
+export async function down(db: Kysely<any>): Promise<void> {
+  await db.schema.alterTable('users').dropColumn('is_agent').execute();
+}
--- a/apps/server/src/database/repos/user/user.repo.ts
+++ b/apps/server/src/database/repos/user/user.repo.ts
@@ -45,6 +45,12 @@ export class UserRepo {
      includePassword?: boolean;
      includeUserMfa?: boolean;
      includeScimExternalId?: boolean;
+      // Opt-in: `isAgent` is internal provenance state, not part of the generic
+      // user payload. Keeping it out of `baseFields` stops it from leaking into
+      // the workspace member list / `/users/me` (an enumeration leak). Only the
+      // JWT + collab auth seams opt in, because they derive a non-spoofable
+      // 'agent' provenance from the signed server-side identity.
+      includeIsAgent?: boolean;
      trx?: KyselyTransaction;
    },
  ): Promise<User> {
@@ -55,6 +61,7 @@ export class UserRepo {
      .$if(opts?.includePassword, (qb) => qb.select('password'))
      .$if(opts?.includeUserMfa, (qb) => qb.select(this.withUserMfa))
      .$if(opts?.includeScimExternalId, (qb) => qb.select('scimExternalId'))
+      .$if(opts?.includeIsAgent, (qb) => qb.select('isAgent'))
      .where('id', '=', userId)
      .where('workspaceId', '=', workspaceId)
      .executeTakeFirst();
--- a/apps/server/src/database/types/db.d.ts
+++ b/apps/server/src/database/types/db.d.ts
@@ -368,6 +368,7 @@ export interface Users {
  emailVerifiedAt: Timestamp | null;
  id: Generated<string>;
  invitedById: string | null;
+  isAgent: Generated<boolean>;
  lastActiveAt: Timestamp | null;
  lastLoginAt: Timestamp | null;
  locale: string | null;