Skip to content

Commit 12210bd

Browse files
authored
perf: optimize RequestList memory footprint (#2466)
The request list now delays the conversion of the source items into the `Request` objects, resulting in a significantly less memory footprint. Related: https://apify.slack.com/archives/C0L33UM7Z/p1715109984834079
1 parent 38c0942 commit 12210bd

File tree

3 files changed

+41
-18
lines changed

3 files changed

+41
-18
lines changed

packages/core/src/request.ts

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ export class Request<UserData extends Dictionary = Dictionary> {
193193
this.id = id;
194194
this.url = url;
195195
this.loadedUrl = loadedUrl;
196-
this.uniqueKey = uniqueKey || this._computeUniqueKey({ url, method, payload, keepUrlFragment, useExtendedUniqueKey });
196+
this.uniqueKey = uniqueKey || Request.computeUniqueKey({ url, method, payload, keepUrlFragment, useExtendedUniqueKey });
197197
this.method = method;
198198
this.payload = payload;
199199
this.noRetry = noRetry;
@@ -378,7 +378,18 @@ export class Request<UserData extends Dictionary = Dictionary> {
378378
this.errorMessages.push(message);
379379
}
380380

381-
protected _computeUniqueKey({ url, method, payload, keepUrlFragment, useExtendedUniqueKey }: ComputeUniqueKeyOptions) {
381+
// TODO: only for better BC, remove in v4
382+
protected _computeUniqueKey(options: ComputeUniqueKeyOptions) {
383+
return Request.computeUniqueKey(options);
384+
}
385+
386+
// TODO: only for better BC, remove in v4
387+
protected _hashPayload(payload: BinaryLike): string {
388+
return Request.hashPayload(payload);
389+
}
390+
391+
/** @internal */
392+
static computeUniqueKey({ url, method = 'GET', payload, keepUrlFragment = false, useExtendedUniqueKey = false }: ComputeUniqueKeyOptions) {
382393
const normalizedMethod = method.toUpperCase();
383394
const normalizedUrl = normalizeUrl(url, keepUrlFragment) || url; // It returns null when url is invalid, causing weird errors.
384395
if (!useExtendedUniqueKey) {
@@ -390,11 +401,12 @@ export class Request<UserData extends Dictionary = Dictionary> {
390401
}
391402
return normalizedUrl;
392403
}
393-
const payloadHash = payload ? this._hashPayload(payload) : '';
404+
const payloadHash = payload ? Request.hashPayload(payload) : '';
394405
return `${normalizedMethod}(${payloadHash}):${normalizedUrl}`;
395406
}
396407

397-
protected _hashPayload(payload: BinaryLike): string {
408+
/** @internal */
409+
static hashPayload(payload: BinaryLike): string {
398410
return crypto
399411
.createHash('sha256')
400412
.update(payload)

packages/core/src/storages/request_list.ts

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ import type { EventManager } from '../events';
99
import { EventType } from '../events';
1010
import { log } from '../log';
1111
import type { ProxyConfiguration } from '../proxy_configuration';
12-
import type { InternalSource, RequestOptions, Source } from '../request';
13-
import { Request } from '../request';
12+
import { type InternalSource, type RequestOptions, Request, type Source } from '../request';
1413
import { createDeserialize, serializeArray } from '../serialization';
1514

1615
/** @internal */
@@ -238,7 +237,7 @@ export class RequestList {
238237
* All requests in the array have distinct uniqueKey!
239238
* @internal
240239
*/
241-
requests: Request[] = [];
240+
requests: (Request | RequestOptions)[] = [];
242241

243242
/** Index to the next item in requests array to fetch. All previous requests are either handled or in progress. */
244243
private nextIndex = 0;
@@ -551,7 +550,7 @@ export class RequestList {
551550
return {
552551
nextIndex: this.nextIndex,
553552
nextUniqueKey: this.nextIndex < this.requests.length
554-
? this.requests[this.nextIndex].uniqueKey
553+
? this.requests[this.nextIndex].uniqueKey!
555554
: null,
556555
inProgress: [...this.inProgress],
557556
};
@@ -593,21 +592,31 @@ export class RequestList {
593592
if (uniqueKey) {
594593
this.reclaimed.delete(uniqueKey);
595594
const index = this.uniqueKeyToIndex[uniqueKey];
596-
return this.requests[index];
595+
return this.ensureRequest(this.requests[index], index);
597596
}
598597

599598
// Otherwise return next request.
600599
if (this.nextIndex < this.requests.length) {
601-
const request = this.requests[this.nextIndex];
602-
this.inProgress.add(request.uniqueKey);
600+
const index = this.nextIndex;
601+
const request = this.requests[index];
602+
this.inProgress.add(request.uniqueKey!);
603603
this.nextIndex++;
604604
this.isStatePersisted = false;
605-
return request;
605+
return this.ensureRequest(request, index);
606606
}
607607

608608
return null;
609609
}
610610

611+
private ensureRequest(requestLike: Request | RequestOptions, index: number): Request {
612+
if (requestLike instanceof Request) {
613+
return requestLike;
614+
}
615+
616+
this.requests[index] = new Request(requestLike);
617+
return this.requests[index] as Request;
618+
}
619+
611620
/**
612621
* Marks request as handled after successful processing.
613622
*/
@@ -694,19 +703,21 @@ export class RequestList {
694703
* of a `Request`, then the function creates a `Request` instance.
695704
*/
696705
protected _addRequest(source: RequestListSource) {
697-
let request;
706+
let request: Request | RequestOptions;
698707
const type = typeof source;
708+
699709
if (type === 'string') {
700-
request = new Request({ url: source as string });
710+
request = { url: source as string };
701711
} else if (source instanceof Request) {
702712
request = source;
703713
} else if (source && type === 'object') {
704-
request = new Request(source as RequestOptions);
714+
request = source as RequestOptions;
705715
} else {
706716
throw new Error(`Cannot create Request from type: ${type}`);
707717
}
708718

709719
const hasUniqueKey = Reflect.has(Object(source), 'uniqueKey');
720+
request.uniqueKey ??= Request.computeUniqueKey(request as any);
710721

711722
// Add index to uniqueKey if duplicates are to be kept
712723
if (this.keepDuplicateUrls && !hasUniqueKey) {

test/core/request_list.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,7 @@ describe('RequestList', () => {
728728
const name = 'xxx';
729729
const SDK_KEY = `SDK_${name}`;
730730
const sources = ['https://example.com'];
731-
const requests = sources.map((url) => new Request({ url }));
731+
const requests = sources.map((url) => ({ url, uniqueKey: url }));
732732

733733
const rl = await RequestList.open(name, sources);
734734
expect(rl).toBeInstanceOf(RequestList);
@@ -752,7 +752,7 @@ describe('RequestList', () => {
752752
const SDK_KEY = `SDK_${name}`;
753753
let counter = 0;
754754
const sources = [{ url: 'https://example.com' }];
755-
const requests = sources.map(({ url }) => new Request({ url, uniqueKey: `${url}-${counter++}` }));
755+
const requests = sources.map(({ url }) => ({ url, uniqueKey: `${url}-${counter++}` }));
756756
const options = {
757757
keepDuplicateUrls: true,
758758
persistStateKey: 'yyy',
@@ -780,7 +780,7 @@ describe('RequestList', () => {
780780

781781
const name: string = null;
782782
const sources = [{ url: 'https://example.com' }];
783-
const requests = sources.map(({ url }) => new Request({ url }));
783+
const requests = sources.map(({ url }) => ({ url, uniqueKey: url }));
784784

785785
const rl = await RequestList.open(name, sources);
786786
expect(rl).toBeInstanceOf(RequestList);

0 commit comments

Comments
 (0)