Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/lambda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ name: Build lambdas

on:
pull_request:
branches:
- main
paths:
- 'lambdas/**'
- '.github/workflows/lambda.yml'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1698,6 +1698,7 @@ describe('Retry mechanism tests', () => {

it('calls publishRetryMessage for each valid message when job is queued', async () => {
const messages = createTestMessages(3);
mockCreateRunner.mockResolvedValue(['i-12345', 'i-67890', 'i-abcdef']); // Create all requested runners
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot why is this added?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ensures all 3 requested runners are created successfully. With the new behavior, publishRetryMessage is only called for messages that aren't rejected. The default mock only creates 1 runner, so when 3 are requested, 2 messages would be marked rejected (creation failed) and wouldn't get retry messages. By mocking all 3 runners, we test that retry messages are published for all successfully processed messages.


await scaleUpModule.scaleUp(messages);

Expand Down Expand Up @@ -1749,15 +1750,16 @@ describe('Retry mechanism tests', () => {
);
});

it('calls publishRetryMessage even when maximum runners is reached', async () => {
it('does not call publishRetryMessage when maximum runners is reached and messages are marked invalid', async () => {
process.env.RUNNERS_MAXIMUM_COUNT = '0'; // No runners can be created

const messages = createTestMessages(2);

await scaleUpModule.scaleUp(messages);

// publishRetryMessage should still be called even though no runners will be created
expect(mockPublishRetryMessage).toHaveBeenCalledTimes(2);
// publishRetryMessage should NOT be called because messages are marked as invalid
// Invalid messages go back to the SQS queue and will be retried there
expect(mockPublishRetryMessage).not.toHaveBeenCalled();
expect(createRunner).not.toHaveBeenCalled();
});

Expand All @@ -1781,6 +1783,7 @@ describe('Retry mechanism tests', () => {

it('calls publishRetryMessage when ENABLE_JOB_QUEUED_CHECK is false', async () => {
process.env.ENABLE_JOB_QUEUED_CHECK = 'false';
mockCreateRunner.mockResolvedValue(['i-12345', 'i-67890']); // Create all requested runners

const messages = createTestMessages(2);

Expand All @@ -1792,6 +1795,7 @@ describe('Retry mechanism tests', () => {
});

it('calls publishRetryMessage for each message in a multi-runner scenario', async () => {
mockCreateRunner.mockResolvedValue(['i-12345', 'i-67890', 'i-abcdef', 'i-11111', 'i-22222']); // Create all requested runners
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot why this line?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same reason - ensures all 5 requested runners are created so no messages are marked rejected. This tests that publishRetryMessage is called for each message when runner creation succeeds.

const messages = createTestMessages(5);

await scaleUpModule.scaleUp(messages);
Expand All @@ -1808,8 +1812,9 @@ describe('Retry mechanism tests', () => {
});
});

it('calls publishRetryMessage before runner creation', async () => {
it('calls publishRetryMessage after runner creation', async () => {
const messages = createTestMessages(1);
mockCreateRunner.mockResolvedValue(['i-12345']); // Create the requested runner
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot why this line?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ensures the requested runner is created successfully. This test verifies the order of operations (runner creation before retry message publishing), and needs a successful creation to ensure the retry message is actually published.


const callOrder: string[] = [];
mockPublishRetryMessage.mockImplementation(() => {
Expand All @@ -1823,7 +1828,7 @@ describe('Retry mechanism tests', () => {

await scaleUpModule.scaleUp(messages);

expect(callOrder).toEqual(['publishRetryMessage', 'createRunner']);
expect(callOrder).toEqual(['createRunner', 'publishRetryMessage']);
});
});

Expand Down
28 changes: 22 additions & 6 deletions lambdas/functions/control-plane/src/scale-runners/scale-up.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
};

const validMessages = new Map<string, MessagesWithClient>();
const invalidMessages: string[] = [];
const rejectedMessageIds = new Set<string>();
for (const payload of payloads) {
const { eventType, messageId, repositoryName, repositoryOwner } = payload;
if (ephemeralEnabled && eventType !== 'workflow_job') {
Expand All @@ -284,7 +284,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
{ eventType, messageId },
);

invalidMessages.push(messageId);
rejectedMessageIds.add(messageId);

continue;
}
Expand Down Expand Up @@ -339,6 +339,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
for (const [group, { githubInstallationClient, messages }] of validMessages.entries()) {
// Work out how much we want to scale up by.
let scaleUp = 0;
const queuedMessages: ActionRequestMessageSQS[] = [];

for (const message of messages) {
const messageLogger = logger.createChild({
Expand All @@ -357,7 +358,7 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
}

scaleUp++;
await publishRetryMessage(message as ActionRequestMessageRetry);
queuedMessages.push(message);
}

if (scaleUp === 0) {
Expand Down Expand Up @@ -393,11 +394,18 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
if (ephemeralEnabled) {
// This removes `missingInstanceCount` items from the start of the array
// so that, if we retry more messages later, we pick fresh ones.
invalidMessages.push(...messages.splice(0, missingInstanceCount).map(({ messageId }) => messageId));
const removedMessages = messages.splice(0, missingInstanceCount);
removedMessages.forEach(({ messageId }) => rejectedMessageIds.add(messageId));
}

// No runners will be created, so skip calling the EC2 API.
if (newRunners <= 0) {
// Publish retry messages for messages that are not rejected
for (const message of queuedMessages) {
if (!rejectedMessageIds.has(message.messageId)) {
await publishRetryMessage(message as ActionRequestMessageRetry);
}
}
continue;
}
}
Expand Down Expand Up @@ -449,11 +457,19 @@ export async function scaleUp(payloads: ActionRequestMessageSQS[]): Promise<stri
failedInstanceCount,
});

invalidMessages.push(...messages.slice(0, failedInstanceCount).map(({ messageId }) => messageId));
const failedMessages = messages.slice(0, failedInstanceCount);
failedMessages.forEach(({ messageId }) => rejectedMessageIds.add(messageId));
}

// Publish retry messages for messages that are not rejected
for (const message of queuedMessages) {
if (!rejectedMessageIds.has(message.messageId)) {
await publishRetryMessage(message as ActionRequestMessageRetry);
}
}
}

return invalidMessages;
return Array.from(rejectedMessageIds);
}

export function getGitHubEnterpriseApiUrl() {
Expand Down
Loading