Skip to content

Commit 3a22b07

Browse files
committed
2 parents 340e650 + e2a34f8 commit 3a22b07

File tree

1 file changed

+61
-148
lines changed

1 file changed

+61
-148
lines changed

00_GET_POST/01_POST_thsrc_time_table.ipynb

Lines changed: 61 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616
{
1717
"cell_type": "code",
1818
"execution_count": 1,
19-
"metadata": {
20-
"collapsed": true
21-
},
19+
"metadata": {},
2220
"outputs": [],
2321
"source": [
2422
"import os\n",
@@ -34,15 +32,13 @@
3432
{
3533
"cell_type": "code",
3634
"execution_count": 2,
37-
"metadata": {
38-
"collapsed": false
39-
},
35+
"metadata": {},
4036
"outputs": [
4137
{
4238
"name": "stdout",
4339
"output_type": "stream",
4440
"text": [
45-
"The date after one week - 2018/02/09\n"
41+
"The date after one week - 2018/02/28\n"
4642
]
4743
}
4844
],
@@ -65,9 +61,7 @@
6561
{
6662
"cell_type": "code",
6763
"execution_count": 3,
68-
"metadata": {
69-
"collapsed": true
70-
},
64+
"metadata": {},
7165
"outputs": [],
7266
"source": [
7367
"resp = requests.post(url, data=form_data)\n",
@@ -78,14 +72,25 @@
7872
{
7973
"cell_type": "code",
8074
"execution_count": 4,
81-
"metadata": {
82-
"collapsed": false
83-
},
75+
"metadata": {},
8476
"outputs": [
8577
{
8678
"data": {
8779
"text/html": [
8880
"<div>\n",
81+
"<style scoped>\n",
82+
" .dataframe tbody tr th:only-of-type {\n",
83+
" vertical-align: middle;\n",
84+
" }\n",
85+
"\n",
86+
" .dataframe tbody tr th {\n",
87+
" vertical-align: top;\n",
88+
" }\n",
89+
"\n",
90+
" .dataframe thead th {\n",
91+
" text-align: right;\n",
92+
" }\n",
93+
"</style>\n",
8994
"<table border=\"1\" class=\"dataframe\">\n",
9095
" <thead>\n",
9196
" <tr style=\"text-align: right;\">\n",
@@ -104,157 +109,77 @@
104109
" <td>14:11</td>\n",
105110
" <td>16:11</td>\n",
106111
" <td>02:00</td>\n",
107-
" <td>65折起</td>\n",
108-
" </tr>\n",
109-
" <tr>\n",
110-
" <th>1</th>\n",
111-
" <td>0833</td>\n",
112-
" <td>14:11</td>\n",
113-
" <td>16:11</td>\n",
114-
" <td>02:00</td>\n",
115-
" <td>65折起</td>\n",
116-
" </tr>\n",
117-
" <tr>\n",
118-
" <th>2</th>\n",
119-
" <td>1649</td>\n",
120-
" <td>14:21</td>\n",
121-
" <td>16:06</td>\n",
122-
" <td>01:45</td>\n",
123112
" <td>8折起</td>\n",
124113
" </tr>\n",
125114
" <tr>\n",
126-
" <th>3</th>\n",
127-
" <td>1649</td>\n",
128-
" <td>14:21</td>\n",
129-
" <td>16:06</td>\n",
130-
" <td>01:45</td>\n",
131-
" <td>8折起</td>\n",
132-
" </tr>\n",
133-
" <tr>\n",
134-
" <th>4</th>\n",
135-
" <td>0651</td>\n",
136-
" <td>14:46</td>\n",
137-
" <td>16:32</td>\n",
138-
" <td>01:46</td>\n",
139-
" <td></td>\n",
140-
" </tr>\n",
141-
" <tr>\n",
142-
" <th>5</th>\n",
115+
" <th>1</th>\n",
143116
" <td>0651</td>\n",
144117
" <td>14:46</td>\n",
145118
" <td>16:32</td>\n",
146119
" <td>01:46</td>\n",
147120
" <td></td>\n",
148121
" </tr>\n",
149122
" <tr>\n",
150-
" <th>6</th>\n",
151-
" <td>0837</td>\n",
152-
" <td>15:11</td>\n",
153-
" <td>17:11</td>\n",
154-
" <td>02:00</td>\n",
155-
" <td>65折起</td>\n",
156-
" </tr>\n",
157-
" <tr>\n",
158-
" <th>7</th>\n",
123+
" <th>2</th>\n",
159124
" <td>0837</td>\n",
160125
" <td>15:11</td>\n",
161126
" <td>17:11</td>\n",
162127
" <td>02:00</td>\n",
163-
" <td>65折起</td>\n",
164-
" </tr>\n",
165-
" <tr>\n",
166-
" <th>8</th>\n",
167-
" <td>1655</td>\n",
168-
" <td>15:21</td>\n",
169-
" <td>17:06</td>\n",
170-
" <td>01:45</td>\n",
171-
" <td>8折起</td>\n",
172-
" </tr>\n",
173-
" <tr>\n",
174-
" <th>9</th>\n",
175-
" <td>1655</td>\n",
176-
" <td>15:21</td>\n",
177-
" <td>17:06</td>\n",
178-
" <td>01:45</td>\n",
179-
" <td>8折起</td>\n",
180-
" </tr>\n",
181-
" <tr>\n",
182-
" <th>10</th>\n",
183-
" <td>0657</td>\n",
184-
" <td>15:46</td>\n",
185-
" <td>17:32</td>\n",
186-
" <td>01:46</td>\n",
187128
" <td>8折起</td>\n",
188129
" </tr>\n",
189130
" <tr>\n",
190-
" <th>11</th>\n",
131+
" <th>3</th>\n",
191132
" <td>0657</td>\n",
192133
" <td>15:46</td>\n",
193134
" <td>17:32</td>\n",
194135
" <td>01:46</td>\n",
195-
" <td>8折起</td>\n",
196-
" </tr>\n",
197-
" <tr>\n",
198-
" <th>12</th>\n",
199-
" <td>1237</td>\n",
200-
" <td>15:51</td>\n",
201-
" <td>17:17</td>\n",
202-
" <td>01:26</td>\n",
203-
" <td></td>\n",
204-
" </tr>\n",
205-
" <tr>\n",
206-
" <th>13</th>\n",
207-
" <td>1237</td>\n",
208-
" <td>15:51</td>\n",
209-
" <td>17:17</td>\n",
210-
" <td>01:26</td>\n",
211136
" <td></td>\n",
212137
" </tr>\n",
213138
" <tr>\n",
214-
" <th>14</th>\n",
215-
" <td>0841</td>\n",
216-
" <td>16:11</td>\n",
217-
" <td>18:11</td>\n",
218-
" <td>02:00</td>\n",
219-
" <td>8折起</td>\n",
220-
" </tr>\n",
221-
" <tr>\n",
222-
" <th>15</th>\n",
139+
" <th>4</th>\n",
223140
" <td>0841</td>\n",
224141
" <td>16:11</td>\n",
225142
" <td>18:11</td>\n",
226143
" <td>02:00</td>\n",
227-
" <td>8折起</td>\n",
228-
" </tr>\n",
229-
" <tr>\n",
230-
" <th>16</th>\n",
231-
" <td>0661</td>\n",
232-
" <td>16:21</td>\n",
233-
" <td>18:06</td>\n",
234-
" <td>01:45</td>\n",
235-
" <td></td>\n",
144+
" <td>65折起</td>\n",
236145
" </tr>\n",
237146
" <tr>\n",
238-
" <th>17</th>\n",
147+
" <th>5</th>\n",
239148
" <td>0661</td>\n",
240149
" <td>16:21</td>\n",
241150
" <td>18:06</td>\n",
242151
" <td>01:45</td>\n",
243-
" <td></td>\n",
152+
" <td>8折起</td>\n",
244153
" </tr>\n",
245154
" <tr>\n",
246-
" <th>18</th>\n",
155+
" <th>6</th>\n",
247156
" <td>0663</td>\n",
248157
" <td>16:46</td>\n",
249158
" <td>18:32</td>\n",
250159
" <td>01:46</td>\n",
251160
" <td></td>\n",
252161
" </tr>\n",
253162
" <tr>\n",
254-
" <th>19</th>\n",
255-
" <td>0663</td>\n",
256-
" <td>16:46</td>\n",
257-
" <td>18:32</td>\n",
163+
" <th>7</th>\n",
164+
" <td>0845</td>\n",
165+
" <td>17:11</td>\n",
166+
" <td>19:11</td>\n",
167+
" <td>02:00</td>\n",
168+
" <td>65折起</td>\n",
169+
" </tr>\n",
170+
" <tr>\n",
171+
" <th>8</th>\n",
172+
" <td>0667</td>\n",
173+
" <td>17:21</td>\n",
174+
" <td>19:06</td>\n",
175+
" <td>01:45</td>\n",
176+
" <td>8折起</td>\n",
177+
" </tr>\n",
178+
" <tr>\n",
179+
" <th>9</th>\n",
180+
" <td>0669</td>\n",
181+
" <td>17:46</td>\n",
182+
" <td>19:32</td>\n",
258183
" <td>01:46</td>\n",
259184
" <td></td>\n",
260185
" </tr>\n",
@@ -263,27 +188,17 @@
263188
"</div>"
264189
],
265190
"text/plain": [
266-
" 車次 出發時間 抵達時間 行車時間 早鳥\n",
267-
"0 0833 14:11 16:11 02:00 65折起\n",
268-
"1 0833 14:11 16:11 02:00 65折起\n",
269-
"2 1649 14:21 16:06 01:45 8折起\n",
270-
"3 1649 14:21 16:06 01:45 8折起\n",
271-
"4 0651 14:46 16:32 01:46 \n",
272-
"5 0651 14:46 16:32 01:46 \n",
273-
"6 0837 15:11 17:11 02:00 65折起\n",
274-
"7 0837 15:11 17:11 02:00 65折起\n",
275-
"8 1655 15:21 17:06 01:45 8折起\n",
276-
"9 1655 15:21 17:06 01:45 8折起\n",
277-
"10 0657 15:46 17:32 01:46 8折起\n",
278-
"11 0657 15:46 17:32 01:46 8折起\n",
279-
"12 1237 15:51 17:17 01:26 \n",
280-
"13 1237 15:51 17:17 01:26 \n",
281-
"14 0841 16:11 18:11 02:00 8折起\n",
282-
"15 0841 16:11 18:11 02:00 8折起\n",
283-
"16 0661 16:21 18:06 01:45 \n",
284-
"17 0661 16:21 18:06 01:45 \n",
285-
"18 0663 16:46 18:32 01:46 \n",
286-
"19 0663 16:46 18:32 01:46 "
191+
" 車次 出發時間 抵達時間 行車時間 早鳥\n",
192+
"0 0833 14:11 16:11 02:00 8折起\n",
193+
"1 0651 14:46 16:32 01:46 \n",
194+
"2 0837 15:11 17:11 02:00 8折起\n",
195+
"3 0657 15:46 17:32 01:46 \n",
196+
"4 0841 16:11 18:11 02:00 65折起\n",
197+
"5 0661 16:21 18:06 01:45 8折起\n",
198+
"6 0663 16:46 18:32 01:46 \n",
199+
"7 0845 17:11 19:11 02:00 65折起\n",
200+
"8 0667 17:21 19:06 01:45 8折起\n",
201+
"9 0669 17:46 19:32 01:46 "
287202
]
288203
},
289204
"execution_count": 4,
@@ -292,7 +207,7 @@
292207
}
293208
],
294209
"source": [
295-
"rows = soup.table.find_all('tr')\n",
210+
"rows = soup.table.find_all('tr', recursive=False)\n",
296211
"\n",
297212
"colname, rows = rows[1], rows[2:]\n",
298213
"colname = list(colname.stripped_strings)\n",
@@ -312,23 +227,21 @@
312227
" early_ticket = early_ticket[0] if early_ticket else ''\n",
313228
" \n",
314229
" rows[i] = [trips, t_departure, t_arrive, duration, early_ticket]\n",
315-
" \n",
230+
"\n",
316231
"df = pd.DataFrame(rows, columns=colname)\n",
317232
"df"
318233
]
319234
},
320235
{
321236
"cell_type": "code",
322237
"execution_count": 5,
323-
"metadata": {
324-
"collapsed": false
325-
},
238+
"metadata": {},
326239
"outputs": [
327240
{
328241
"name": "stdout",
329242
"output_type": "stream",
330243
"text": [
331-
"Save csv to /home/afun/github/Python-Crawling-Tutorial/results/thsrc_20180209.csv\n"
244+
"Save csv to /home/afun/github/Python-Crawling-Tutorial/results/thsrc_20180228.csv\n"
332245
]
333246
}
334247
],

0 commit comments

Comments
 (0)