Skip to content

Commit 3ff398a

Browse files
authored
feat: make the CLI download directly from GitHub (#1540)
1 parent 09133ff commit 3ff398a

File tree

6 files changed

+228
-40
lines changed

6 files changed

+228
-40
lines changed

packages/cli/src/commands/CreateProjectCommand.ts

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
import { mkdirSync } from 'node:fs';
2-
import { join } from 'node:path';
2+
import { dirname, join } from 'node:path';
33
import { execSync } from 'node:child_process';
44
import type { ArgumentsCamelCase, Argv, CommandModule } from 'yargs';
55
import { prompt } from 'inquirer';
66
import colors from 'ansi-colors';
7+
import type { Template } from '@crawlee/templates';
78
import { fetchManifest } from '@crawlee/templates';
8-
import { copy } from 'fs-extra';
9-
import { readFileSync, writeFileSync } from 'fs';
109
import { resolve } from 'path';
10+
import { readFile, writeFile } from 'node:fs/promises';
11+
import { get } from 'node:https';
12+
import { ensureDir } from 'fs-extra';
1113

1214
interface CreateProjectArgs {
1315
projectName?: string;
@@ -20,16 +22,52 @@ function validateProjectName(name: string) {
2022
}
2123
}
2224

23-
function rewrite(path: string, replacer: (from: string) => string): void {
25+
async function rewrite(path: string, replacer: (from: string) => string) {
2426
try {
25-
const file = readFileSync(path).toString();
27+
const file = await readFile(path, 'utf8');
2628
const replaced = replacer(file);
27-
writeFileSync(path, replaced);
29+
await writeFile(path, replaced);
2830
} catch {
2931
// not found
3032
}
3133
}
3234

35+
async function downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) {
36+
const promises: Promise<void>[] = [];
37+
38+
for (const file of template.files) {
39+
const promise = downloadFile(file.url).then(async (buffer) => {
40+
// Make sure the folder for the file exists
41+
const fileDirName = dirname(file.path);
42+
const fileFolder = resolve(destinationDirectory, fileDirName);
43+
await ensureDir(fileFolder);
44+
45+
// Write the actual file
46+
await writeFile(resolve(destinationDirectory, file.path), buffer);
47+
});
48+
49+
promises.push(promise);
50+
}
51+
52+
await Promise.all(promises);
53+
}
54+
55+
async function downloadFile(url: string) {
56+
return new Promise<Buffer>((promiseResolve, reject) => {
57+
get(url, async (res) => {
58+
const bytes: Buffer[] = [];
59+
60+
res.on('error', (err) => reject(err));
61+
62+
for await (const byte of res) {
63+
bytes.push(byte);
64+
}
65+
66+
promiseResolve(Buffer.concat(bytes));
67+
}).on('error', (err) => reject(err));
68+
});
69+
}
70+
3371
export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectArgs> {
3472
command = 'create [project-name]';
3573
describe = 'Creates a new Crawlee project directory from a selected boilerplate template.';
@@ -105,8 +143,10 @@ export class CreateProjectCommand<T> implements CommandModule<T, CreateProjectAr
105143
throw err;
106144
}
107145

108-
await copy(require.resolve('@crawlee/templates').replace('index.js', `templates/${template}`), projectDir);
109-
rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`));
146+
const templateData = manifest.templates.find((item) => item.name === template)!;
147+
148+
await downloadTemplateFilesToDisk(templateData, projectDir);
149+
await rewrite(resolve(projectDir, 'package.json'), (pkg) => pkg.replace(/"name": "[\w-]+"/, `"name": "${projectName}"`));
110150

111151
// Run npm install in project dir.
112152
const npm = /^win/.test(process.platform) ? 'npm.cmd' : 'npm';

packages/templates/manifest.json

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,109 @@
22
"templates": [
33
{
44
"name": "getting-started-ts",
5-
"description": "Getting started example [TypeScript]"
5+
"description": "Getting started example [TypeScript]",
6+
"files": [
7+
"src/main.ts",
8+
".dockerignore",
9+
".gitignore",
10+
"Dockerfile",
11+
"package.json",
12+
"README.md",
13+
"tsconfig.json"
14+
]
615
},
716
{
817
"name": "getting-started-js",
9-
"description": "Getting started example [JavaScript]"
18+
"description": "Getting started example [JavaScript]",
19+
"files": [
20+
"src/main.js",
21+
".dockerignore",
22+
".gitignore",
23+
"Dockerfile",
24+
"package.json",
25+
"README.md"
26+
]
1027
},
1128
{
1229
"name": "cheerio-ts",
13-
"description": "CheerioCrawler template project [TypeScript]"
30+
"description": "CheerioCrawler template project [TypeScript]",
31+
"files": [
32+
"src/main.ts",
33+
"src/routes.ts",
34+
".dockerignore",
35+
".gitignore",
36+
"Dockerfile",
37+
"package.json",
38+
"README.md",
39+
"tsconfig.json"
40+
]
1441
},
1542
{
1643
"name": "playwright-ts",
17-
"description": "PlaywrightCrawler template project [TypeScript]"
44+
"description": "PlaywrightCrawler template project [TypeScript]",
45+
"files": [
46+
"src/main.ts",
47+
"src/routes.ts",
48+
".dockerignore",
49+
".gitignore",
50+
"Dockerfile",
51+
"package.json",
52+
"README.md",
53+
"tsconfig.json"
54+
]
1855
},
1956
{
2057
"name": "puppeteer-ts",
21-
"description": "PuppeteerCrawler template project [TypeScript]"
58+
"description": "PuppeteerCrawler template project [TypeScript]",
59+
"files": [
60+
"src/main.ts",
61+
"src/routes.ts",
62+
".dockerignore",
63+
".gitignore",
64+
"Dockerfile",
65+
"package.json",
66+
"README.md",
67+
"tsconfig.json"
68+
]
2269
},
2370
{
2471
"name": "cheerio-js",
25-
"description": "CheerioCrawler template project [JavaScript]"
72+
"description": "CheerioCrawler template project [JavaScript]",
73+
"files": [
74+
"src/main.js",
75+
"src/routes.js",
76+
".dockerignore",
77+
".gitignore",
78+
"Dockerfile",
79+
"package.json",
80+
"README.md"
81+
]
2682
},
2783
{
2884
"name": "playwright-js",
29-
"description": "PlaywrightCrawler template project [JavaScript]"
85+
"description": "PlaywrightCrawler template project [JavaScript]",
86+
"files": [
87+
"src/main.js",
88+
"src/routes.js",
89+
".dockerignore",
90+
".gitignore",
91+
"Dockerfile",
92+
"package.json",
93+
"README.md"
94+
]
3095
},
3196
{
3297
"name": "puppeteer-js",
33-
"description": "PuppeteerCrawler template project [JavaScript]"
98+
"description": "PuppeteerCrawler template project [JavaScript]",
99+
"files": [
100+
"src/main.js",
101+
"src/routes.js",
102+
".dockerignore",
103+
".gitignore",
104+
"Dockerfile",
105+
"package.json",
106+
"README.md"
107+
]
34108
}
35109
]
36110
}

packages/templates/package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@
3939
},
4040
"homepage": "https://crawlee.dev",
4141
"scripts": {
42-
"build": "npm run clean && npm run compile && npm run copy-templates && npm run copy",
42+
"build": "npm run clean && npm run validate && npm run compile && npm run copy",
4343
"clean": "rimraf ./dist",
4444
"compile": "tsc -p tsconfig.build.json && gen-esm-wrapper ./dist/index.js ./dist/index.mjs",
45-
"copy-templates": "node ./scripts/copy-templates.mjs",
46-
"copy": "ts-node -T ../../scripts/copy.ts"
45+
"copy": "ts-node -T ../../scripts/copy.ts",
46+
"validate": "node ./scripts/validate-manifest.mjs"
4747
},
4848
"publishConfig": {
4949
"access": "public"

packages/templates/scripts/copy-templates.mjs

Lines changed: 0 additions & 18 deletions
This file was deleted.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { readFile, readdir, access } from 'node:fs/promises';
2+
import { URL } from 'node:url';
3+
4+
const colors = {
5+
red: (text) => `\x1B[31m${text}\x1B[39m`,
6+
green: (text) => `\x1B[32m${text}\x1B[39m`,
7+
grey: (text) => `\x1B[90m${text}\x1B[39m`,
8+
yellow: (text) => `\x1B[33m${text}\x1B[39m`,
9+
};
10+
11+
const templatesDirectory = new URL('../templates/', import.meta.url);
12+
const templateNames = await readdir(templatesDirectory);
13+
/** @type {{ templates: Array<{ name: string; description: string; files: string[] }>; }} */
14+
const manifest = JSON.parse(await readFile(new URL('../manifest.json', import.meta.url), 'utf8'));
15+
16+
console.log(`Validating ${colors.green(manifest.templates.length)} templates`);
17+
18+
let hasError = false;
19+
20+
for (const manifestTemplate of manifest.templates) {
21+
// Check if the folder it points to actually exists
22+
if (!templateNames.includes(manifestTemplate.name)) {
23+
console.error(colors.red(`Failed to find folder for template called ${colors.yellow(manifestTemplate.name)}`));
24+
hasError = true;
25+
// Skipping the rest of the validation as the template is missing
26+
continue;
27+
}
28+
29+
console.log(colors.grey(`Validating template ${colors.yellow(manifestTemplate.name)}`));
30+
31+
// Check that all files it requires exist
32+
for (const requiredFile of manifestTemplate.files) {
33+
try {
34+
await access(new URL(`./${manifestTemplate.name}/${requiredFile}`, templatesDirectory));
35+
} catch (err) {
36+
if (err.code === 'ENOENT') {
37+
hasError = true;
38+
console.error(`${colors.grey(`[${colors.yellow(manifestTemplate.name)}]:`)} Failed to find file ${colors.yellow(requiredFile)}`);
39+
console.error(err);
40+
} else {
41+
console.warn(`${colors.grey(`[${colors.yellow(manifestTemplate.name)}]:`)} Failed to read file ${colors.yellow(requiredFile)}`, err);
42+
}
43+
}
44+
}
45+
46+
console.log(colors.green(`Finished validating ${colors.yellow(manifestTemplate.name)}`));
47+
}
48+
49+
process.exitCode = hasError ? 1 : 0;

packages/templates/src/index.ts

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,47 @@ import https from 'https';
22

33
export const MANIFEST_URL = 'https://raw.githubusercontent.com/apify/crawlee/master/packages/templates/manifest.json';
44

5+
function templateFileUrl(templateName: string, path: string) {
6+
return `https://raw.githubusercontent.com/apify/crawlee/master/packages/templates/templates/${templateName}/${path}`;
7+
}
8+
9+
interface SharedTemplateData {
10+
name: string;
11+
description: string;
12+
}
13+
14+
// Data received from the github file
15+
interface RawTemplate extends SharedTemplateData {
16+
files: string[];
17+
}
18+
19+
interface RawManifest {
20+
templates: RawTemplate[];
21+
}
22+
23+
// Data returned for the CLI or users to consume
524
export interface Manifest {
6-
templates: { name: string; description: string }[];
25+
templates: Template[];
26+
}
27+
28+
export interface Template extends SharedTemplateData {
29+
files: TemplateFile[];
30+
}
31+
32+
export interface TemplateFile {
33+
path: string;
34+
url: string;
735
}
836

937
export async function fetchManifest(): Promise<Manifest> {
10-
return new Promise((resolve, reject) => {
38+
const rawManifest = await new Promise<RawManifest>((resolve, reject) => {
1139
https.get(MANIFEST_URL, (res) => {
1240
let json = '';
1341
res
1442
.on('data', (chunk) => {
1543
json += chunk;
1644
})
17-
.on('end', () => {
45+
.once('end', () => {
1846
if (res.statusCode === 200) {
1947
try {
2048
const data = JSON.parse(json);
@@ -30,4 +58,19 @@ export async function fetchManifest(): Promise<Manifest> {
3058
})
3159
.on('error', (err) => reject(err));
3260
});
61+
62+
const newTemplates: Template[] = rawManifest.templates.map((original) => {
63+
return {
64+
name: original.name,
65+
description: original.description,
66+
files: original.files.map((file) => ({
67+
path: file,
68+
url: templateFileUrl(original.name, file),
69+
})),
70+
};
71+
});
72+
73+
return {
74+
templates: newTemplates,
75+
};
3376
}

0 commit comments

Comments
 (0)