Skip to content

Commit fe2d206

Browse files
authored
fix: respect autoscaledPoolOptions.isTaskReadyFunction option (#2948)
Closes #2922
1 parent c9f7f54 commit fe2d206

File tree

2 files changed

+13
-5
lines changed

2 files changed

+13
-5
lines changed

packages/basic-crawler/src/internals/basic-crawler.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,6 @@ import type { Awaitable, BatchAddRequestsResult, Dictionary, SetStatusMessageOpt
5454
import { RobotsTxtFile, ROTATE_PROXY_ERRORS } from '@crawlee/utils';
5555
import { stringify } from 'csv-stringify/sync';
5656
import { ensureDir, writeFile, writeJSON } from 'fs-extra';
57-
// @ts-expect-error This throws a compilation error due to got-scraping being ESM only but we only import types, so its alllll gooooood
58-
import type { GotResponse, Method, OptionsInit } from 'got-scraping';
5957
import ow, { ArgumentError } from 'ow';
6058
import { getDomain } from 'tldts';
6159
import type { SetRequired } from 'type-fest';
@@ -741,7 +739,8 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
741739
let shouldLogMaxPagesExceeded = true;
742740
const isMaxPagesExceeded = () => maxRequestsPerCrawl && maxRequestsPerCrawl <= this.handledRequestsCount;
743741

744-
let { isFinishedFunction } = autoscaledPoolOptions;
742+
// eslint-disable-next-line prefer-const
743+
let { isFinishedFunction, isTaskReadyFunction } = autoscaledPoolOptions;
745744

746745
// override even if `isFinishedFunction` provided by user - `keepAlive` has higher priority
747746
if (keepAlive) {
@@ -765,7 +764,7 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
765764
return false;
766765
}
767766

768-
return this._isTaskReadyFunction();
767+
return isTaskReadyFunction ? await isTaskReadyFunction() : await this._isTaskReadyFunction();
769768
},
770769
isFinishedFunction: async () => {
771770
if (isMaxPagesExceeded()) {

test/core/crawlers/basic_crawler.test.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,20 +734,27 @@ describe('BasicCrawler', () => {
734734
expect(await crawler._isTaskReadyFunction()).toBe(false);
735735
});
736736

737-
test('should be possible to override isFinishedFunction of underlying AutoscaledPool', async () => {
737+
test('should be possible to override isFinishedFunction and isTaskReadyFunction of underlying AutoscaledPool', async () => {
738738
const requestQueue = new RequestQueue({ id: 'xxx', client: Configuration.getStorageClient() });
739739
const processed: Request[] = [];
740740
const queue: Request[] = [];
741741
let isFinished = false;
742+
let isFinishedFunctionCalled = false;
743+
let isTaskReadyFunctionCalled = false;
742744

743745
const basicCrawler = new BasicCrawler({
744746
requestQueue,
745747
autoscaledPoolOptions: {
746748
minConcurrency: 1,
747749
maxConcurrency: 1,
748750
isFinishedFunction: async () => {
751+
isFinishedFunctionCalled = true;
749752
return Promise.resolve(isFinished);
750753
},
754+
isTaskReadyFunction: async () => {
755+
isTaskReadyFunctionCalled = true;
756+
return Promise.resolve(!isFinished);
757+
},
751758
},
752759
requestHandler: async ({ request }) => {
753760
await sleep(10);
@@ -783,6 +790,8 @@ describe('BasicCrawler', () => {
783790
expect(markRequestHandled).toBeCalledWith(request0);
784791
expect(markRequestHandled).toBeCalledWith(request1);
785792
expect(isFinishedOrig).not.toBeCalled();
793+
expect(isFinishedFunctionCalled).toBe(true);
794+
expect(isTaskReadyFunctionCalled).toBe(true);
786795

787796
// TODO: see why the request1 was passed as a second parameter to includes
788797
expect(processed.includes(request0)).toBe(true);

0 commit comments

Comments
 (0)