Skip to content

Commit 699ebab

Browse files
committed
Merge branch 'dev'
2 parents efef51b + e2a34f8 commit 699ebab

File tree

1 file changed

+132
-20
lines changed

1 file changed

+132
-20
lines changed

00_GET_POST/01_POST_thsrc_time_table.ipynb

Lines changed: 132 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -71,30 +71,143 @@
7171
},
7272
{
7373
"cell_type": "code",
74-
"execution_count": 18,
74+
"execution_count": 4,
7575
"metadata": {},
7676
"outputs": [
7777
{
78-
"name": "stdout",
79-
"output_type": "stream",
80-
"text": [
81-
"20\n"
82-
]
83-
},
84-
{
85-
"ename": "TypeError",
86-
"evalue": "unhashable type: 'list'",
87-
"output_type": "error",
88-
"traceback": [
89-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
90-
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
91-
"\u001b[0;32m<ipython-input-18-c28c428f364f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrows\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
92-
"\u001b[0;31mTypeError\u001b[0m: unhashable type: 'list'"
93-
]
78+
"data": {
79+
"text/html": [
80+
"<div>\n",
81+
"<style scoped>\n",
82+
" .dataframe tbody tr th:only-of-type {\n",
83+
" vertical-align: middle;\n",
84+
" }\n",
85+
"\n",
86+
" .dataframe tbody tr th {\n",
87+
" vertical-align: top;\n",
88+
" }\n",
89+
"\n",
90+
" .dataframe thead th {\n",
91+
" text-align: right;\n",
92+
" }\n",
93+
"</style>\n",
94+
"<table border=\"1\" class=\"dataframe\">\n",
95+
" <thead>\n",
96+
" <tr style=\"text-align: right;\">\n",
97+
" <th></th>\n",
98+
" <th>車次</th>\n",
99+
" <th>出發時間</th>\n",
100+
" <th>抵達時間</th>\n",
101+
" <th>行車時間</th>\n",
102+
" <th>早鳥</th>\n",
103+
" </tr>\n",
104+
" </thead>\n",
105+
" <tbody>\n",
106+
" <tr>\n",
107+
" <th>0</th>\n",
108+
" <td>0833</td>\n",
109+
" <td>14:11</td>\n",
110+
" <td>16:11</td>\n",
111+
" <td>02:00</td>\n",
112+
" <td>8折起</td>\n",
113+
" </tr>\n",
114+
" <tr>\n",
115+
" <th>1</th>\n",
116+
" <td>0651</td>\n",
117+
" <td>14:46</td>\n",
118+
" <td>16:32</td>\n",
119+
" <td>01:46</td>\n",
120+
" <td></td>\n",
121+
" </tr>\n",
122+
" <tr>\n",
123+
" <th>2</th>\n",
124+
" <td>0837</td>\n",
125+
" <td>15:11</td>\n",
126+
" <td>17:11</td>\n",
127+
" <td>02:00</td>\n",
128+
" <td>8折起</td>\n",
129+
" </tr>\n",
130+
" <tr>\n",
131+
" <th>3</th>\n",
132+
" <td>0657</td>\n",
133+
" <td>15:46</td>\n",
134+
" <td>17:32</td>\n",
135+
" <td>01:46</td>\n",
136+
" <td></td>\n",
137+
" </tr>\n",
138+
" <tr>\n",
139+
" <th>4</th>\n",
140+
" <td>0841</td>\n",
141+
" <td>16:11</td>\n",
142+
" <td>18:11</td>\n",
143+
" <td>02:00</td>\n",
144+
" <td>65折起</td>\n",
145+
" </tr>\n",
146+
" <tr>\n",
147+
" <th>5</th>\n",
148+
" <td>0661</td>\n",
149+
" <td>16:21</td>\n",
150+
" <td>18:06</td>\n",
151+
" <td>01:45</td>\n",
152+
" <td>8折起</td>\n",
153+
" </tr>\n",
154+
" <tr>\n",
155+
" <th>6</th>\n",
156+
" <td>0663</td>\n",
157+
" <td>16:46</td>\n",
158+
" <td>18:32</td>\n",
159+
" <td>01:46</td>\n",
160+
" <td></td>\n",
161+
" </tr>\n",
162+
" <tr>\n",
163+
" <th>7</th>\n",
164+
" <td>0845</td>\n",
165+
" <td>17:11</td>\n",
166+
" <td>19:11</td>\n",
167+
" <td>02:00</td>\n",
168+
" <td>65折起</td>\n",
169+
" </tr>\n",
170+
" <tr>\n",
171+
" <th>8</th>\n",
172+
" <td>0667</td>\n",
173+
" <td>17:21</td>\n",
174+
" <td>19:06</td>\n",
175+
" <td>01:45</td>\n",
176+
" <td>8折起</td>\n",
177+
" </tr>\n",
178+
" <tr>\n",
179+
" <th>9</th>\n",
180+
" <td>0669</td>\n",
181+
" <td>17:46</td>\n",
182+
" <td>19:32</td>\n",
183+
" <td>01:46</td>\n",
184+
" <td></td>\n",
185+
" </tr>\n",
186+
" </tbody>\n",
187+
"</table>\n",
188+
"</div>"
189+
],
190+
"text/plain": [
191+
" 車次 出發時間 抵達時間 行車時間 早鳥\n",
192+
"0 0833 14:11 16:11 02:00 8折起\n",
193+
"1 0651 14:46 16:32 01:46 \n",
194+
"2 0837 15:11 17:11 02:00 8折起\n",
195+
"3 0657 15:46 17:32 01:46 \n",
196+
"4 0841 16:11 18:11 02:00 65折起\n",
197+
"5 0661 16:21 18:06 01:45 8折起\n",
198+
"6 0663 16:46 18:32 01:46 \n",
199+
"7 0845 17:11 19:11 02:00 65折起\n",
200+
"8 0667 17:21 19:06 01:45 8折起\n",
201+
"9 0669 17:46 19:32 01:46 "
202+
]
203+
},
204+
"execution_count": 4,
205+
"metadata": {},
206+
"output_type": "execute_result"
94207
}
95208
],
96209
"source": [
97-
"rows = soup.table.find_all('tr')\n",
210+
"rows = soup.table.find_all('tr', recursive=False)\n",
98211
"\n",
99212
"colname, rows = rows[1], rows[2:]\n",
100213
"colname = list(colname.stripped_strings)\n",
@@ -115,7 +228,6 @@
115228
" \n",
116229
" rows[i] = [trips, t_departure, t_arrive, duration, early_ticket]\n",
117230
"\n",
118-
"print(len(rows))\n",
119231
"df = pd.DataFrame(rows, columns=colname)\n",
120232
"df"
121233
]
@@ -129,7 +241,7 @@
129241
"name": "stdout",
130242
"output_type": "stream",
131243
"text": [
132-
"Save csv to /home/afun/github/Python-Crawling-Tutorial/results/thsrc_20180209.csv\n"
244+
"Save csv to /home/afun/github/Python-Crawling-Tutorial/results/thsrc_20180228.csv\n"
133245
]
134246
}
135247
],

0 commit comments

Comments
 (0)