|
14 | 14 | }, |
15 | 15 | { |
16 | 16 | "cell_type": "code", |
17 | | - "execution_count": 2, |
| 17 | + "execution_count": 1, |
18 | 18 | "metadata": { |
19 | 19 | "collapsed": true |
20 | 20 | }, |
21 | 21 | "outputs": [], |
22 | 22 | "source": [ |
| 23 | + "import os\n", |
23 | 24 | "import requests\n", |
24 | 25 | "import pandas as pd\n", |
25 | 26 | "\n", |
|
30 | 31 | }, |
31 | 32 | { |
32 | 33 | "cell_type": "code", |
33 | | - "execution_count": 5, |
| 34 | + "execution_count": 2, |
34 | 35 | "metadata": { |
35 | 36 | "collapsed": true |
36 | 37 | }, |
|
43 | 44 | }, |
44 | 45 | { |
45 | 46 | "cell_type": "code", |
46 | | - "execution_count": 7, |
47 | | - "metadata": {}, |
| 47 | + "execution_count": 3, |
| 48 | + "metadata": { |
| 49 | + "collapsed": false |
| 50 | + }, |
| 51 | + "outputs": [ |
| 52 | + { |
| 53 | + "data": { |
| 54 | + "text/html": [ |
| 55 | + "<div>\n", |
| 56 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 57 | + " <thead>\n", |
| 58 | + " <tr style=\"text-align: right;\">\n", |
| 59 | + " <th></th>\n", |
| 60 | + " <th>週次</th>\n", |
| 61 | + " <th>日期</th>\n", |
| 62 | + " <th>週末票房總和</th>\n", |
| 63 | + " <th>漲跌幅</th>\n", |
| 64 | + " <th>冠軍片名</th>\n", |
| 65 | + " <th>英文片名</th>\n", |
| 66 | + " <th>週末票房冠軍</th>\n", |
| 67 | + " <th>冠軍比例*</th>\n", |
| 68 | + " </tr>\n", |
| 69 | + " </thead>\n", |
| 70 | + " <tbody>\n", |
| 71 | + " <tr>\n", |
| 72 | + " <th>0</th>\n", |
| 73 | + " <td>4</td>\n", |
| 74 | + " <td>01/27 - 28</td>\n", |
| 75 | + " <td>30,022,225</td>\n", |
| 76 | + " <td>-28.6%</td>\n", |
| 77 | + " <td>移動迷宮:死亡解藥</td>\n", |
| 78 | + " <td>The Maze Runner: The Death Cure</td>\n", |
| 79 | + " <td>7,492,613</td>\n", |
| 80 | + " <td>24.96%</td>\n", |
| 81 | + " </tr>\n", |
| 82 | + " <tr>\n", |
| 83 | + " <th>1</th>\n", |
| 84 | + " <td>3</td>\n", |
| 85 | + " <td>01/20 - 21</td>\n", |
| 86 | + " <td>42,028,838</td>\n", |
| 87 | + " <td>+24.6%</td>\n", |
| 88 | + " <td>移動迷宮:死亡解藥</td>\n", |
| 89 | + " <td>The Maze Runner: The Death Cure</td>\n", |
| 90 | + " <td>15,483,826</td>\n", |
| 91 | + " <td>36.84%</td>\n", |
| 92 | + " </tr>\n", |
| 93 | + " <tr>\n", |
| 94 | + " <th>2</th>\n", |
| 95 | + " <td>2</td>\n", |
| 96 | + " <td>01/13 - 14</td>\n", |
| 97 | + " <td>33,733,791</td>\n", |
| 98 | + " <td>-17.8%</td>\n", |
| 99 | + " <td>與神同行</td>\n", |
| 100 | + " <td>Along with the Gods: The Two Worlds</td>\n", |
| 101 | + " <td>13,298,668</td>\n", |
| 102 | + " <td>39.42%</td>\n", |
| 103 | + " </tr>\n", |
| 104 | + " <tr>\n", |
| 105 | + " <th>3</th>\n", |
| 106 | + " <td>1</td>\n", |
| 107 | + " <td>01/06 - 07</td>\n", |
| 108 | + " <td>41,026,264</td>\n", |
| 109 | + " <td>-29.7%</td>\n", |
| 110 | + " <td>與神同行</td>\n", |
| 111 | + " <td>Along with the Gods: The Two Worlds</td>\n", |
| 112 | + " <td>18,739,629</td>\n", |
| 113 | + " <td>45.68%</td>\n", |
| 114 | + " </tr>\n", |
| 115 | + " </tbody>\n", |
| 116 | + "</table>\n", |
| 117 | + "</div>" |
| 118 | + ], |
| 119 | + "text/plain": [ |
| 120 | + " 週次 日期 週末票房總和 漲跌幅 冠軍片名 \\\n", |
| 121 | + "0 4 01/27 - 28 30,022,225 -28.6% 移動迷宮:死亡解藥 \n", |
| 122 | + "1 3 01/20 - 21 42,028,838 +24.6% 移動迷宮:死亡解藥 \n", |
| 123 | + "2 2 01/13 - 14 33,733,791 -17.8% 與神同行 \n", |
| 124 | + "3 1 01/06 - 07 41,026,264 -29.7% 與神同行 \n", |
| 125 | + "\n", |
| 126 | + " 英文片名 週末票房冠軍 冠軍比例* \n", |
| 127 | + "0 The Maze Runner: The Death Cure 7,492,613 24.96% \n", |
| 128 | + "1 The Maze Runner: The Death Cure 15,483,826 36.84% \n", |
| 129 | + "2 Along with the Gods: The Two Worlds 13,298,668 39.42% \n", |
| 130 | + "3 Along with the Gods: The Two Worlds 18,739,629 45.68% " |
| 131 | + ] |
| 132 | + }, |
| 133 | + "execution_count": 3, |
| 134 | + "metadata": {}, |
| 135 | + "output_type": "execute_result" |
| 136 | + } |
| 137 | + ], |
| 138 | + "source": [ |
| 139 | + "rows = soup.table.find_all('tr')\n", |
| 140 | + "\n", |
| 141 | + "colname = rows.pop(0)\n", |
| 142 | + "colname = list(colname.stripped_strings)\n", |
| 143 | + "rows = [list(row.stripped_strings) for row in rows]\n", |
| 144 | + "\n", |
| 145 | + "df = pd.DataFrame(rows, columns=colname)\n", |
| 146 | + "df" |
| 147 | + ] |
| 148 | + }, |
| 149 | + { |
| 150 | + "cell_type": "code", |
| 151 | + "execution_count": 4, |
| 152 | + "metadata": { |
| 153 | + "collapsed": false |
| 154 | + }, |
48 | 155 | "outputs": [ |
49 | 156 | { |
50 | 157 | "name": "stdout", |
51 | 158 | "output_type": "stream", |
52 | 159 | "text": [ |
53 | | - "5\n", |
54 | | - "4\n", |
55 | | - "<tr class=\"tb-top\">\n", |
56 | | - "<th>週次</th>\n", |
57 | | - "<th>日期</th>\n", |
58 | | - "<th>週末票房總和</th>\n", |
59 | | - "<th>漲跌幅</th>\n", |
60 | | - "<th>冠軍片名</th>\n", |
61 | | - "<th>英文片名</th>\n", |
62 | | - "<th class=\"import\">週末票房冠軍</th>\n", |
63 | | - "<th>冠軍比例*</th>\n", |
64 | | - "</tr>\n" |
| 160 | + "Save csv to /home/afun/github/Python-Crawling-Tutorial/results/taipeibo.csv\n" |
65 | 161 | ] |
66 | 162 | } |
67 | 163 | ], |
68 | 164 | "source": [ |
69 | | - "rows = soup.table.find_all('tr')\n", |
70 | | - "colname = rows.pop(0)" |
| 165 | + "results = os.path.abspath('../results')\n", |
| 166 | + "if not os.path.exists(results):\n", |
| 167 | + " os.makedirs(results)\n", |
| 168 | + "\n", |
| 169 | + "filename = os.path.join(results, 'taipeibo.csv')\n", |
| 170 | + "df.to_csv(filename, index=False)\n", |
| 171 | + "print('Save csv to {}'.format(filename))" |
71 | 172 | ] |
72 | 173 | } |
73 | 174 | ], |
|
0 commit comments