@@ -20,6 +20,80 @@ export const REQUESTS_PERSISTENCE_KEY = 'REQUEST_LIST_REQUESTS';
2020
2121const CONTENT_TYPE_BINARY = 'application/octet-stream' ;
2222
23+ /**
24+ * Represents a static list of URLs to crawl.
25+ */
26+ export interface IRequestList {
27+ /**
28+ * Returns the total number of unique requests present in the list.
29+ */
30+ length ( ) : number ;
31+
32+ /**
33+ * Returns `true` if all requests were already handled and there are no more left.
34+ */
35+ isFinished ( ) : Promise < boolean > ;
36+
37+ /**
38+ * Resolves to `true` if the next call to {@apilink IRequestList.fetchNextRequest} function
39+ * would return `null`, otherwise it resolves to `false`.
40+ * Note that even if the list is empty, there might be some pending requests currently being processed.
41+ */
42+ isEmpty ( ) : Promise < boolean > ;
43+
44+ /**
45+ * Returns number of handled requests.
46+ */
47+ handledCount ( ) : number ;
48+
49+ /**
50+ * Persists the current state of the `IRequestList` into the default {@apilink KeyValueStore}.
51+ * The state is persisted automatically in regular intervals, but calling this method manually
52+ * is useful in cases where you want to have the most current state available after you pause
53+ * or stop fetching its requests. For example after you pause or abort a crawl. Or just before
54+ * a server migration.
55+ */
56+ persistState ( ) : Promise < void > ;
57+
58+ /**
59+ * Gets the next {@apilink Request} to process. First, the function gets a request previously reclaimed
60+ * using the {@apilink RequestList.reclaimRequest} function, if there is any.
61+ * Otherwise it gets the next request from sources.
62+ *
63+ * The function's `Promise` resolves to `null` if there are no more
64+ * requests to process.
65+ */
66+ fetchNextRequest ( ) : Promise < Request | null > ;
67+
68+ /**
69+ * Gets the next {@apilink Request} to process. First, the function gets a request previously reclaimed
70+ * using the {@apilink RequestList.reclaimRequest} function, if there is any.
71+ * Otherwise it gets the next request from sources.
72+ *
73+ * The function resolves to `null` if there are no more requests to process.
74+ *
75+ * Can be used to iterate over the `RequestList` instance in a `for await .. of` loop.
76+ * Provides an alternative for the repeated use of `fetchNextRequest`.
77+ */
78+ [ Symbol . asyncIterator ] ( ) : AsyncGenerator < Request > ;
79+
80+ /**
81+ * Reclaims request to the list if its processing failed.
82+ * The request will become available in the next `this.fetchNextRequest()`.
83+ */
84+ reclaimRequest ( request : Request ) : Promise < void > ;
85+
86+ /**
87+ * Marks request as handled after successful processing.
88+ */
89+ markRequestHandled ( request : Request ) : Promise < void > ;
90+
91+ /**
92+ * @internal
93+ */
94+ inProgress : Set < string > ;
95+ }
96+
2397export interface RequestListOptions {
2498 /**
2599 * An array of sources of URLs for the {@apilink RequestList}. It can be either an array of strings,
@@ -229,7 +303,7 @@ export interface RequestListOptions {
229303 * ```
230304 * @category Sources
231305 */
232- export class RequestList {
306+ export class RequestList implements IRequestList {
233307 private log = log . child ( { prefix : 'RequestList' } ) ;
234308
235309 /**
@@ -431,11 +505,7 @@ export class RequestList {
431505 }
432506
433507 /**
434- * Persists the current state of the `RequestList` into the default {@apilink KeyValueStore}.
435- * The state is persisted automatically in regular intervals, but calling this method manually
436- * is useful in cases where you want to have the most current state available after you pause
437- * or stop fetching its requests. For example after you pause or abort a crawl. Or just before
438- * a server migration.
508+ * @inheritDoc
439509 */
440510 async persistState ( ) : Promise < void > {
441511 if ( ! this . persistStateKey ) {
@@ -570,9 +640,7 @@ export class RequestList {
570640 }
571641
572642 /**
573- * Resolves to `true` if the next call to {@apilink RequestList.fetchNextRequest} function
574- * would return `null`, otherwise it resolves to `false`.
575- * Note that even if the list is empty, there might be some pending requests currently being processed.
643+ * @inheritDoc
576644 */
577645 async isEmpty ( ) : Promise < boolean > {
578646 this . _ensureIsInitialized ( ) ;
@@ -581,7 +649,7 @@ export class RequestList {
581649 }
582650
583651 /**
584- * Returns `true` if all requests were already handled and there are no more left.
652+ * @inheritDoc
585653 */
586654 async isFinished ( ) : Promise < boolean > {
587655 this . _ensureIsInitialized ( ) ;
@@ -590,12 +658,7 @@ export class RequestList {
590658 }
591659
592660 /**
593- * Gets the next {@apilink Request} to process. First, the function gets a request previously reclaimed
594- * using the {@apilink RequestList.reclaimRequest} function, if there is any.
595- * Otherwise it gets the next request from sources.
596- *
597- * The function's `Promise` resolves to `null` if there are no more
598- * requests to process.
661+ * @inheritDoc
599662 */
600663 async fetchNextRequest ( ) : Promise < Request | null > {
601664 this . _ensureIsInitialized ( ) ;
@@ -621,6 +684,17 @@ export class RequestList {
621684 return null ;
622685 }
623686
687+ /**
688+ * @inheritDoc
689+ */
690+ async * [ Symbol . asyncIterator ] ( ) {
691+ while ( true ) {
692+ const req = await this . fetchNextRequest ( ) ;
693+ if ( ! req ) break ;
694+ yield req ;
695+ }
696+ }
697+
624698 private ensureRequest ( requestLike : Request | RequestOptions , index : number ) : Request {
625699 if ( requestLike instanceof Request ) {
626700 return requestLike ;
@@ -631,7 +705,7 @@ export class RequestList {
631705 }
632706
633707 /**
634- * Marks request as handled after successful processing.
708+ * @inheritDoc
635709 */
636710 async markRequestHandled ( request : Request ) : Promise < void > {
637711 const { uniqueKey } = request ;
@@ -645,8 +719,7 @@ export class RequestList {
645719 }
646720
647721 /**
648- * Reclaims request to the list if its processing failed.
649- * The request will become available in the next `this.fetchNextRequest()`.
722+ * @inheritDoc
650723 */
651724 async reclaimRequest ( request : Request ) : Promise < void > {
652725 const { uniqueKey } = request ;
@@ -798,7 +871,7 @@ export class RequestList {
798871 }
799872
800873 /**
801- * Returns number of handled requests.
874+ * @inheritDoc
802875 */
803876 handledCount ( ) : number {
804877 this . _ensureIsInitialized ( ) ;
0 commit comments