|
25 | 25 |
|
26 | 26 |
|
27 | 27 | def main(_): |
28 | | - exit_after = np.inf if FLAGS.exit_after is None else FLAGS.exit_after |
| 28 | + exit_after = np.inf if FLAGS.exit_after is None else FLAGS.exit_after |
29 | 29 |
|
30 | | - with open(FLAGS.roster_json, 'rb') as file_handle: |
31 | | - roster = json.load(file_handle) |
| 30 | + with open(FLAGS.roster_json, 'rb') as file_handle: |
| 31 | + roster = json.load(file_handle) |
32 | 32 |
|
33 | | - start_t = time() |
34 | | - last_check_t = 0 |
| 33 | + start_t = time() |
| 34 | + last_check_t = 0 |
35 | 35 |
|
36 | | - if FLAGS.clear_cached and exists(FLAGS.snapshot_dir): |
37 | | - rmtree(FLAGS.snapshot_dir) |
| 36 | + if FLAGS.clear_cached and exists(FLAGS.snapshot_dir): |
| 37 | + rmtree(FLAGS.snapshot_dir) |
38 | 38 |
|
39 | | - while True: |
40 | | - if time() - last_check_t > FLAGS.check_every: |
41 | | - changed, deltas = [], [] |
| 39 | + while True: |
| 40 | + if time() - last_check_t > FLAGS.check_every: |
| 41 | + changed, deltas = [], [] |
42 | 42 |
|
43 | | - pbar = tqdm(roster.items()) |
44 | | - for url, opt in pbar: |
45 | | - pbar.set_description(f"Checking {url}") |
| 43 | + pbar = tqdm(roster.items()) |
| 44 | + for url, opt in pbar: |
| 45 | + pbar.set_description(f'Checking {url}') |
46 | 46 |
|
47 | | - # Snapshot the current webpage. |
48 | | - out_dir = join(FLAGS.snapshot_dir, |
49 | | - util.folder_name_from_url(url)) |
50 | | - success = snapshot(url, out_dir) |
51 | | - if not success: |
52 | | - continue |
| 47 | + # Snapshot the current webpage. |
| 48 | + out_dir = join(FLAGS.snapshot_dir, util.folder_name_from_url(url)) |
| 49 | + success = snapshot(url, out_dir) |
| 50 | + if not success: |
| 51 | + continue |
53 | 52 |
|
54 | | - # Compare with the previous snapshot. |
55 | | - snapshot_paths = sorted( |
56 | | - glob(join(out_dir, '????_??_??_??_??_??.html'))) |
57 | | - if len(snapshot_paths) > 1: |
58 | | - delta = diff_snapshots(snapshot_paths[-2], |
59 | | - snapshot_paths[-1], out_dir, opt) |
60 | | - if delta != '': |
61 | | - changed.append(url) |
62 | | - deltas.append(delta) |
| 53 | + # Compare with the previous snapshot. |
| 54 | + snapshot_paths = sorted(glob(join(out_dir, '????_??_??_??_??_??.html'))) |
| 55 | + if len(snapshot_paths) > 1: |
| 56 | + delta = diff_snapshots(snapshot_paths[-2], snapshot_paths[-1], |
| 57 | + out_dir, opt) |
| 58 | + if delta != '': |
| 59 | + changed.append(url) |
| 60 | + deltas.append(delta) |
63 | 61 |
|
64 | | - # Remove earlier screenshots to avoid storage explosion. |
65 | | - if len(snapshot_paths) > 2: |
66 | | - for snapshot_path in snapshot_paths[:-2]: |
67 | | - remove(snapshot_path) |
| 62 | + # Remove earlier screenshots to avoid storage explosion. |
| 63 | + if len(snapshot_paths) > 2: |
| 64 | + for snapshot_path in snapshot_paths[:-2]: |
| 65 | + remove(snapshot_path) |
68 | 66 |
|
69 | | - last_check_t = time() |
| 67 | + last_check_t = time() |
70 | 68 |
|
71 | | - # Email myself the results. |
72 | | - if changed: |
73 | | - msg = '' |
74 | | - for url, delta in zip(changed, deltas): |
75 | | - msg += f'------\n{url}\n\n{delta}\n\n\n' |
76 | | - util.email_oneself(msg, FLAGS.gmail, subject='Webpage Monitor') |
| 69 | + # Email myself the results. |
| 70 | + if changed: |
| 71 | + msg = '' |
| 72 | + for url, delta in zip(changed, deltas): |
| 73 | + msg += f'------\n{url}\n\n{delta}\n\n\n' |
| 74 | + util.email_oneself(msg, FLAGS.gmail, subject='Webpage Monitor') |
77 | 75 |
|
78 | | - logging.info('Change detected; email sent') |
79 | | - else: |
80 | | - logging.info('No change detected') |
| 76 | + logging.info('Change detected; email sent') |
| 77 | + else: |
| 78 | + logging.info('No change detected') |
81 | 79 |
|
82 | | - if time() - start_t > exit_after: |
83 | | - break |
| 80 | + if time() - start_t > exit_after: |
| 81 | + break |
84 | 82 |
|
85 | 83 |
|
86 | 84 | def diff_snapshots(html0_path, html1_path, out_dir, opt): |
87 | | - # Parse URL-specific options. |
88 | | - ignore_prefices = opt.get('ignore_prefix') |
89 | | - if ignore_prefices is None: |
90 | | - ignore_prefices = [] |
91 | | - if isinstance(ignore_prefices, str): |
92 | | - ignore_prefices = [ignore_prefices] |
93 | | - ignore_prefices = tuple(ignore_prefices) |
94 | | - # Diff the two HTMLs. |
95 | | - html0_content = util.read_file(html0_path) |
96 | | - html1_content = util.read_file(html1_path) |
97 | | - delta = difflib.ndiff(html0_content.split('\n'), html1_content.split('\n')) |
98 | | - # Keep differences only. |
99 | | - delta = [x for x in delta if x.startswith(('+ ', '- '))] |
100 | | - # Ignore specified patterns. |
101 | | - filtered_delta = [ |
102 | | - x for x in delta |
103 | | - if not x.lstrip('+ ').lstrip('- ').startswith(ignore_prefices) |
104 | | - ] |
105 | | - filtered_delta = '\n'.join(filtered_delta) |
106 | | - delta_path = join(out_dir, 'delta.html') |
107 | | - util.write_file(filtered_delta, delta_path) |
108 | | - return filtered_delta |
| 85 | + # Parse URL-specific options. |
| 86 | + ignore_prefices = opt.get('ignore_prefix') |
| 87 | + if ignore_prefices is None: |
| 88 | + ignore_prefices = [] |
| 89 | + if isinstance(ignore_prefices, str): |
| 90 | + ignore_prefices = [ignore_prefices] |
| 91 | + ignore_prefices = tuple(ignore_prefices) |
| 92 | + # Diff the two HTMLs. |
| 93 | + html0_content = util.read_file(html0_path) |
| 94 | + html1_content = util.read_file(html1_path) |
| 95 | + delta = difflib.ndiff(html0_content.split('\n'), html1_content.split('\n')) |
| 96 | + # Keep differences only. |
| 97 | + delta = [x for x in delta if x.startswith(('+ ', '- '))] |
| 98 | + # Ignore specified patterns. |
| 99 | + filtered_delta = [ |
| 100 | + x for x in delta |
| 101 | + if not x.lstrip('+ ').lstrip('- ').startswith(ignore_prefices) |
| 102 | + ] |
| 103 | + filtered_delta = '\n'.join(filtered_delta) |
| 104 | + delta_path = join(out_dir, 'delta.html') |
| 105 | + util.write_file(filtered_delta, delta_path) |
| 106 | + return filtered_delta |
109 | 107 |
|
110 | 108 |
|
111 | 109 | def snapshot(url, out_dir): |
112 | | - try: |
113 | | - request = requests.get(url) |
114 | | - except requests.exceptions.ConnectionError: |
115 | | - logging.warn(f'Connection Error: {url}; ignored') |
116 | | - return False |
117 | | - html_src = request.content.decode() |
118 | | - if not exists(out_dir): |
119 | | - makedirs(out_dir) |
120 | | - timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') |
121 | | - html_path = join(out_dir, timestamp + '.html') |
122 | | - util.write_file(html_src, html_path) |
123 | | - return True |
| 110 | + try: |
| 111 | + request = requests.get(url) |
| 112 | + except requests.exceptions.ConnectionError: |
| 113 | + logging.warn('Connection Error: %s; ignored', url) |
| 114 | + return False |
| 115 | + html_src = request.content.decode() |
| 116 | + if not exists(out_dir): |
| 117 | + makedirs(out_dir) |
| 118 | + timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S') |
| 119 | + html_path = join(out_dir, timestamp + '.html') |
| 120 | + util.write_file(html_src, html_path) |
| 121 | + return True |
124 | 122 |
|
125 | 123 |
|
126 | 124 | if __name__ == '__main__': |
127 | | - app.run(main) |
| 125 | + app.run(main) |
0 commit comments