Skip to content

Commit 48a6e0e

Browse files
authored
Fix fromURL baseURI issues (#4696)
Fixes #4690
1 parent e1db419 commit 48a6e0e

File tree

3 files changed

+15
-11
lines changed

3 files changed

+15
-11
lines changed

src/index.spec.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ function getPromise() {
1717
return { promise, cb };
1818
}
1919

20-
const TEST_HTML = '<h1>Hello World</h1>';
20+
const TEST_HTML = '<h1>Hello World</h1><a href="link">Example</a>';
2121
const TEST_HTML_UTF16 = Buffer.from(TEST_HTML, 'utf16le');
2222
const TEST_HTML_UTF16_BOM = Buffer.from([
2323
// UTF16-LE BOM
@@ -97,6 +97,7 @@ describe('decodeStream', () => {
9797
expect($.html()).toBe(
9898
`<html><head></head><body>${TEST_HTML}</body></html>`,
9999
);
100+
expect($('a').prop('href')).toBe('link');
100101
});
101102

102103
it('should use htmlparser2 for XML', async () => {
@@ -201,19 +202,21 @@ describe('fromURL', () => {
201202
let redirected = false;
202203
const port = await createTestServer('text/html', TEST_HTML, (req, res) => {
203204
if (redirected) {
204-
expect(req.url).toBe('/final');
205+
expect(req.url).toBe('/final/path');
205206
res.writeHead(200, { 'Content-Type': 'text/html' });
206207
res.end(TEST_HTML);
207208
} else {
209+
expect(req.url).toBe('/first');
208210
redirected = true;
209-
res.writeHead(302, { Location: `http://localhost:${port}/final` });
211+
res.writeHead(302, { Location: `http://localhost:${port}/final/path` });
210212
res.end();
211213
}
212214
});
213215

214-
const $ = await cheerio.fromURL(`http://localhost:${port}`);
216+
const $ = await cheerio.fromURL(`http://localhost:${port}/first`);
215217
expect($.html()).toBe(
216218
`<html><head></head><body>${TEST_HTML}</body></html>`,
217219
);
220+
expect($('a').prop('href')).toBe(`http://localhost:${port}/final/link`);
218221
});
219222
});

src/index.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ function _stringStream(
6969
): Writable {
7070
if (options?._useHtmlParser2) {
7171
const parser = htmlparser2.createDocumentStream(
72-
(err, document) => cb(err, load(document)),
72+
(err, document) => cb(err, load(document, options)),
7373
options,
7474
);
7575

@@ -99,7 +99,7 @@ function _stringStream(
9999

100100
const stream = new Parse5Stream(options);
101101

102-
finished(stream, (err) => cb(err, load(stream.document)));
102+
finished(stream, (err) => cb(err, load(stream.document, options)));
103103

104104
return stream;
105105
}
@@ -231,7 +231,7 @@ export async function fromURL(
231231
};
232232

233233
const promise = new Promise<CheerioAPI>((resolve, reject) => {
234-
undiciStream = new undici.Client(url)
234+
undiciStream = new undici.Client(urlObject.origin)
235235
.compose(undici.interceptors.redirect({ maxRedirections: 5 }))
236236
.stream(streamOptions, (res) => {
237237
if (res.statusCode < 200 || res.statusCode >= 300) {
@@ -272,13 +272,14 @@ export async function fromURL(
272272
}
273273
| undefined
274274
)?.history;
275+
// Set the `baseURI` to the final URL.
276+
const baseURI = history ? history[history.length - 1] : urlObject;
275277

276-
const opts = {
278+
const opts: DecodeStreamOptions = {
277279
encoding,
278280
// Set XML mode based on the MIME type.
279281
xmlMode: mimeType.isXML(),
280-
// Set the `baseURL` to the final URL.
281-
baseURL: history ? history[history.length - 1] : url,
282+
baseURI,
282283
...cheerioOptions,
283284
};
284285

website/docs/advanced/extract.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ element.
8181
As an attribute with special logic inside the `prop` method, `href`s will be
8282
resolved relative to the document's URL. The document's URL will be set
8383
automatically when using `fromURL` to load the document. Otherwise, use the
84-
`baseURL` option to specify the documents URL.
84+
`baseURI` option to specify the documents URL.
8585

8686
There are many props available here; have a look at the
8787
[`prop` method](/docs/api/classes/Cheerio#prop) for details. For example, to

0 commit comments

Comments
 (0)