|
1 | | -const cheerio = require('cheerio'); |
2 | | -const fs = require('fs'); |
3 | | -const Entities = require('html-entities').XmlEntities; |
4 | | -const request = require('node-fetch'); |
| 1 | +const cheerio = require('cheerio') |
| 2 | +const fs = require('fs') |
| 3 | +const Entities = require('html-entities').XmlEntities |
| 4 | +const request = require('node-fetch') |
5 | 5 |
|
6 | | -const debugFile = './assets/debug.json'; |
7 | | -const industries = {}; |
8 | | -const followingItems = {}; |
| 6 | +const debugFile = './assets/debug.json' |
| 7 | +const industries = {} |
| 8 | +const followingItems = {} |
9 | 9 |
|
10 | 10 | module.exports = class LinkedinClient { |
11 | | -constructor(cookie) { |
12 | | -if(!cookie) |
13 | | -throw new Error('The Linkedin cookie is required.'); |
| 11 | + constructor(cookie) { |
| 12 | + if (!cookie) throw new Error('The Linkedin cookie is required.') |
| 13 | + this.entities = new Entities() |
| 14 | + this.cookie = cookie |
| 15 | + } |
14 | 16 |
|
15 | | -this.entities = new Entities(); |
16 | | -this.cookie = cookie; |
17 | | -} |
18 | | - |
19 | | -async fetch(url) { |
20 | | -let processMethod; |
21 | | -if (url.match(/^https:\/\/www.linkedin.com\/in\//)) processMethod = processPeopleProfile; |
22 | | -else if (url.match(/^https:\/\/www.linkedin.com\/company\//)) { |
23 | | -url += url[url.length - 1] == '/' ? 'about/' : '/about/'; |
24 | | -processMethod = processCompanyPage; |
25 | | -} else throw new Error(`Invalid URL provided ("${url}"), it must be a people profile URL or a company page URL.`); |
26 | | - |
27 | | -if(process.env.NODE_ENV == 'dev') |
28 | | -fs.writeFileSync(debugFile, ''); |
29 | | - |
30 | | -const res = await request(url, { headers: { Cookie: `li_at=${this.cookie}` } }); |
31 | | -const html = await res.text(); |
32 | | -const $ = cheerio.load(html); |
33 | | -let data, result = { linkedinUrl: url.replace('/about/', '') }; |
34 | | -while (!result.name && !result.firstName) { |
35 | | -// this loop allows to fix a bug with random missing <code> tags |
36 | | -for (let elt of $('code').get()) { |
37 | | -try { |
38 | | -data = JSON.parse(this.entities.decode($(elt).html())); |
39 | | -} catch (e) { |
40 | | -continue; |
41 | | -} |
42 | | -if (!data.included) |
43 | | -continue; |
44 | | -for (let item of data.included) { |
45 | | -processMethod(item, result); |
46 | | -if (process.env.NODE_ENV == 'dev') |
47 | | -fs.appendFileSync(debugFile, JSON.stringify(item, null, 4) + '\n'); |
48 | | -} |
49 | | -} |
50 | | - |
51 | | -// this company or people does not exist |
52 | | -if (!result.firstName && !result.name) |
53 | | -return null; |
54 | | -} |
55 | | - |
56 | | -return result; |
57 | | -} |
58 | | -}; |
| 17 | + async fetch(url) { |
| 18 | + let processMethod |
| 19 | + if (url.match(/^https:\/\/www.linkedin.com\/in\//)) processMethod = processPeopleProfile |
| 20 | + else if (url.match(/^https:\/\/www.linkedin.com\/company\//)) { |
| 21 | + url += url[url.length - 1] == '/' ? 'about/' : '/about/' |
| 22 | + processMethod = processCompanyPage |
| 23 | + } else throw new Error(`Invalid URL provided ("${url}"), it must be a people profile URL or a company page URL.`) |
| 24 | + |
| 25 | + if (process.env.NODE_ENV == 'dev') fs.writeFileSync(debugFile, '') |
| 26 | + |
| 27 | + const res = await request(url, { headers: { Cookie: `li_at=${this.cookie}` } }) |
| 28 | + const html = await res.text() |
| 29 | + const $ = cheerio.load(html) |
| 30 | + let data, |
| 31 | + result = { linkedinUrl: url.replace('/about/', ''), related: [] } |
| 32 | + while (!result.name && !result.firstName) { |
| 33 | + // this loop allows to fix a bug with random missing <code> tags |
| 34 | + for (let elt of $('code').get()) { |
| 35 | + try { |
| 36 | + data = JSON.parse(this.entities.decode($(elt).html())) |
| 37 | + } catch (e) { |
| 38 | + continue |
| 39 | + } |
| 40 | + if (!data.included) continue |
| 41 | + for (let item of data.included) { |
| 42 | + processMethod(item, result) |
| 43 | + if (process.env.NODE_ENV == 'dev') fs.appendFileSync(debugFile, JSON.stringify(item, null, 4) + '\n') |
| 44 | + } |
| 45 | + } |
| 46 | + |
| 47 | + // this company or people does not exist |
| 48 | + if (!result.firstName && !result.name) return null |
| 49 | + } |
| 50 | + |
| 51 | + return result |
| 52 | + } |
| 53 | +} |
59 | 54 |
|
60 | 55 | // private method |
61 | 56 | function processPeopleProfile(item, result) { |
62 | | -if (item.$type == 'com.linkedin.voyager.dash.common.Industry' && item.name) |
63 | | -industries[item.entityUrn] = item.name; |
64 | | -if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Profile' && item.objectUrn) { |
65 | | -result.firstName = item.firstName; |
66 | | -result.lastName = item.lastName; |
67 | | -result.headline = item.headline; |
68 | | -result.location = item.locationName; |
69 | | -result.address = item.address; |
70 | | -result.industry = industries[item['*industry']]; |
71 | | -result.summary = item.summary; |
72 | | -if (result.birthDate) { |
73 | | -result.birthDate = item.birthDate; |
74 | | -delete result.birthDate.$type; |
75 | | -} |
76 | | -} else if (item.$type == 'com.linkedin.voyager.common.FollowingInfo' && item.followerCount) |
77 | | -result.connections = item.followerCount; |
78 | | -else if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Position') { |
79 | | -if (!result.positions) |
80 | | -result.positions = []; |
81 | | -const position = { |
82 | | -title: item.title, |
83 | | -company: item.companyName, |
84 | | -location: item.location, |
85 | | -description: item.description, |
86 | | -dateRange: item.dateRange |
87 | | -}; |
88 | | -if (position.dateRange) { |
89 | | -delete position.dateRange.$type; |
90 | | -if (position.dateRange.start) delete position.dateRange.start.$type; |
91 | | -if (position.dateRange.end) delete position.dateRange.end.$type; |
92 | | -} |
93 | | -result.positions.push(position); |
94 | | -} else if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Education') { |
95 | | -if (!result.education) |
96 | | -result.education = []; |
97 | | -const degree = { |
98 | | -degree: item.degreeName, |
99 | | -school: item.schoolName, |
100 | | -field: item.fieldOfStudy, |
101 | | -dateRange: item.dateRange |
102 | | -}; |
103 | | -if (degree.dateRange) { |
104 | | -delete degree.dateRange.$type; |
105 | | -if (degree.dateRange.start) delete degree.dateRange.start.$type; |
106 | | -if (degree.dateRange.end) delete degree.dateRange.end.$type; |
107 | | -} |
108 | | -result.education.push(degree); |
109 | | -} else if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Skill') { |
110 | | -if (!result.skills) |
111 | | -result.skills = []; |
112 | | -result.skills.push(item.name); |
113 | | -} else if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Language') { |
114 | | -if (!result.languages) |
115 | | -result.languages = []; |
116 | | -result.languages.push({ language: item.name, proficiency: item.proficiency }); |
117 | | -} |
| 57 | + if (item.$type == 'com.linkedin.voyager.dash.common.Industry' && item.name) industries[item.entityUrn] = item.name |
| 58 | + if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Profile' && item.objectUrn) { |
| 59 | + result.firstName = item.firstName |
| 60 | + result.lastName = item.lastName |
| 61 | + result.headline = item.headline |
| 62 | + result.location = item.locationName |
| 63 | + result.address = item.address |
| 64 | + result.industry = industries[item['*industry']] |
| 65 | + result.summary = item.summary |
| 66 | + if (result.birthDate) { |
| 67 | + result.birthDate = item.birthDate |
| 68 | + delete result.birthDate.$type |
| 69 | + } |
| 70 | + } else if (item.$type == 'com.linkedin.voyager.common.FollowingInfo' && item.followerCount) |
| 71 | + result.connections = item.followerCount |
| 72 | + else if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Position') { |
| 73 | + if (!result.positions) result.positions = [] |
| 74 | + const position = { |
| 75 | + title: item.title, |
| 76 | + company: item.companyName, |
| 77 | + location: item.location, |
| 78 | + description: item.description, |
| 79 | + dateRange: item.dateRange, |
| 80 | + } |
| 81 | + if (position.dateRange) { |
| 82 | + delete position.dateRange.$type |
| 83 | + if (position.dateRange.start) delete position.dateRange.start.$type |
| 84 | + if (position.dateRange.end) delete position.dateRange.end.$type |
| 85 | + } |
| 86 | + result.positions.push(position) |
| 87 | + } else if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Education') { |
| 88 | + if (!result.education) result.education = [] |
| 89 | + const degree = { |
| 90 | + degree: item.degreeName, |
| 91 | + school: item.schoolName, |
| 92 | + field: item.fieldOfStudy, |
| 93 | + dateRange: item.dateRange, |
| 94 | + } |
| 95 | + if (degree.dateRange) { |
| 96 | + delete degree.dateRange.$type |
| 97 | + if (degree.dateRange.start) delete degree.dateRange.start.$type |
| 98 | + if (degree.dateRange.end) delete degree.dateRange.end.$type |
| 99 | + } |
| 100 | + result.education.push(degree) |
| 101 | + } else if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Skill') { |
| 102 | + if (!result.skills) result.skills = [] |
| 103 | + result.skills.push(item.name) |
| 104 | + } else if (item.$type == 'com.linkedin.voyager.dash.identity.profile.Language') { |
| 105 | + if (!result.languages) result.languages = [] |
| 106 | + result.languages.push({ language: item.name, proficiency: item.proficiency }) |
| 107 | + } |
118 | 108 | } |
119 | 109 |
|
120 | 110 | // private method |
121 | 111 | function processCompanyPage(item, result) { |
122 | | -if (item.$type == 'com.linkedin.voyager.common.Industry') |
123 | | -industries[item.entityUrn] = item.localizedName; |
124 | | -else if (item.$type == 'com.linkedin.voyager.common.FollowingInfo') |
125 | | -followingItems[item.entityUrn] = item.followerCount; |
126 | | -else if (item.$type == 'com.linkedin.voyager.organization.Company' && item.staffCount) { |
127 | | -result.name = item.name; |
128 | | -result.tagline = item.tagline; |
129 | | -result.description = item.description; |
130 | | -result.industry = industries[item['*companyIndustries'][0]]; |
131 | | -result.type = item.companyType ? item.companyType.localizedName : null, |
132 | | -result.website = item.companyPageUrl; |
133 | | -result.companySize = item.staffCountRange.start + (item.staffCountRange.end ? '-' + item.staffCountRange.end : '+') + ' employees'; |
134 | | -result.membersOnLinkedin = item.staffCount; |
135 | | -result.headquarters = item.headquarter; |
136 | | -delete result.headquarters.$type; |
137 | | -result.companyType = item.companyType.localizedName; |
138 | | -result.foundedYear = item.foundedOn && item.foundedOn.year; |
139 | | -result.specialties = item.specialities; |
140 | | -result.followers = followingItems[item[['*followingInfo']]]; |
141 | | -} |
| 112 | + if (item.$type == 'com.linkedin.voyager.common.Industry') { |
| 113 | + industries[item.entityUrn] = item.localizedName |
| 114 | + } else if (item.$type == 'com.linkedin.voyager.common.FollowingInfo') { |
| 115 | + followingItems[item.entityUrn] = item.followerCount |
| 116 | + } else if (item.$type == 'com.linkedin.voyager.organization.Company' && item.staffCount) { |
| 117 | + result.name = item.name |
| 118 | + result.tagline = item.tagline |
| 119 | + result.description = item.description |
| 120 | + result.industry = industries[item['*companyIndustries'][0]] |
| 121 | + result.type = item.companyType ? item.companyType.localizedName : null |
| 122 | + result.website = item.companyPageUrl |
| 123 | + result.companySize = |
| 124 | + item.staffCountRange.start + (item.staffCountRange.end ? '-' + item.staffCountRange.end : '+') + ' employees' |
| 125 | + result.membersOnLinkedin = item.staffCount |
| 126 | + result.headquarters = item.headquarter |
| 127 | + delete result.headquarters.$type |
| 128 | + result.companyType = item.companyType.localizedName |
| 129 | + result.foundedYear = item.foundedOn && item.foundedOn.year |
| 130 | + result.specialties = item.specialities |
| 131 | + result.followers = followingItems[item[['*followingInfo']]] |
| 132 | + } else if (item.url && item.url !== result.linkedinUrl) { |
| 133 | + result.related.push(item) |
| 134 | + } |
142 | 135 | } |
0 commit comments