Skip to content

Commit a23ec46

Browse files
committed
Switch to google style
1 parent 279f862 commit a23ec46

File tree

3 files changed

+114
-119
lines changed

3 files changed

+114
-119
lines changed

environment.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: webpage-monitor
22
dependencies:
3-
- python=3.6
4-
- absl-py
5-
- numpy
6-
- tqdm
7-
- requests
8-
- ipython
3+
- python=3.6
4+
- absl-py
5+
- numpy
6+
- tqdm
7+
- requests
8+
- ipython

main.py

Lines changed: 77 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -25,103 +25,101 @@
2525

2626

2727
def main(_):
28-
exit_after = np.inf if FLAGS.exit_after is None else FLAGS.exit_after
28+
exit_after = np.inf if FLAGS.exit_after is None else FLAGS.exit_after
2929

30-
with open(FLAGS.roster_json, 'rb') as file_handle:
31-
roster = json.load(file_handle)
30+
with open(FLAGS.roster_json, 'rb') as file_handle:
31+
roster = json.load(file_handle)
3232

33-
start_t = time()
34-
last_check_t = 0
33+
start_t = time()
34+
last_check_t = 0
3535

36-
if FLAGS.clear_cached and exists(FLAGS.snapshot_dir):
37-
rmtree(FLAGS.snapshot_dir)
36+
if FLAGS.clear_cached and exists(FLAGS.snapshot_dir):
37+
rmtree(FLAGS.snapshot_dir)
3838

39-
while True:
40-
if time() - last_check_t > FLAGS.check_every:
41-
changed, deltas = [], []
39+
while True:
40+
if time() - last_check_t > FLAGS.check_every:
41+
changed, deltas = [], []
4242

43-
pbar = tqdm(roster.items())
44-
for url, opt in pbar:
45-
pbar.set_description(f"Checking {url}")
43+
pbar = tqdm(roster.items())
44+
for url, opt in pbar:
45+
pbar.set_description(f'Checking {url}')
4646

47-
# Snapshot the current webpage.
48-
out_dir = join(FLAGS.snapshot_dir,
49-
util.folder_name_from_url(url))
50-
success = snapshot(url, out_dir)
51-
if not success:
52-
continue
47+
# Snapshot the current webpage.
48+
out_dir = join(FLAGS.snapshot_dir, util.folder_name_from_url(url))
49+
success = snapshot(url, out_dir)
50+
if not success:
51+
continue
5352

54-
# Compare with the previous snapshot.
55-
snapshot_paths = sorted(
56-
glob(join(out_dir, '????_??_??_??_??_??.html')))
57-
if len(snapshot_paths) > 1:
58-
delta = diff_snapshots(snapshot_paths[-2],
59-
snapshot_paths[-1], out_dir, opt)
60-
if delta != '':
61-
changed.append(url)
62-
deltas.append(delta)
53+
# Compare with the previous snapshot.
54+
snapshot_paths = sorted(glob(join(out_dir, '????_??_??_??_??_??.html')))
55+
if len(snapshot_paths) > 1:
56+
delta = diff_snapshots(snapshot_paths[-2], snapshot_paths[-1],
57+
out_dir, opt)
58+
if delta != '':
59+
changed.append(url)
60+
deltas.append(delta)
6361

64-
# Remove earlier screenshots to avoid storage explosion.
65-
if len(snapshot_paths) > 2:
66-
for snapshot_path in snapshot_paths[:-2]:
67-
remove(snapshot_path)
62+
# Remove earlier screenshots to avoid storage explosion.
63+
if len(snapshot_paths) > 2:
64+
for snapshot_path in snapshot_paths[:-2]:
65+
remove(snapshot_path)
6866

69-
last_check_t = time()
67+
last_check_t = time()
7068

71-
# Email myself the results.
72-
if changed:
73-
msg = ''
74-
for url, delta in zip(changed, deltas):
75-
msg += f'------\n{url}\n\n{delta}\n\n\n'
76-
util.email_oneself(msg, FLAGS.gmail, subject='Webpage Monitor')
69+
# Email myself the results.
70+
if changed:
71+
msg = ''
72+
for url, delta in zip(changed, deltas):
73+
msg += f'------\n{url}\n\n{delta}\n\n\n'
74+
util.email_oneself(msg, FLAGS.gmail, subject='Webpage Monitor')
7775

78-
logging.info('Change detected; email sent')
79-
else:
80-
logging.info('No change detected')
76+
logging.info('Change detected; email sent')
77+
else:
78+
logging.info('No change detected')
8179

82-
if time() - start_t > exit_after:
83-
break
80+
if time() - start_t > exit_after:
81+
break
8482

8583

8684
def diff_snapshots(html0_path, html1_path, out_dir, opt):
87-
# Parse URL-specific options.
88-
ignore_prefices = opt.get('ignore_prefix')
89-
if ignore_prefices is None:
90-
ignore_prefices = []
91-
if isinstance(ignore_prefices, str):
92-
ignore_prefices = [ignore_prefices]
93-
ignore_prefices = tuple(ignore_prefices)
94-
# Diff the two HTMLs.
95-
html0_content = util.read_file(html0_path)
96-
html1_content = util.read_file(html1_path)
97-
delta = difflib.ndiff(html0_content.split('\n'), html1_content.split('\n'))
98-
# Keep differences only.
99-
delta = [x for x in delta if x.startswith(('+ ', '- '))]
100-
# Ignore specified patterns.
101-
filtered_delta = [
102-
x for x in delta
103-
if not x.lstrip('+ ').lstrip('- ').startswith(ignore_prefices)
104-
]
105-
filtered_delta = '\n'.join(filtered_delta)
106-
delta_path = join(out_dir, 'delta.html')
107-
util.write_file(filtered_delta, delta_path)
108-
return filtered_delta
85+
# Parse URL-specific options.
86+
ignore_prefices = opt.get('ignore_prefix')
87+
if ignore_prefices is None:
88+
ignore_prefices = []
89+
if isinstance(ignore_prefices, str):
90+
ignore_prefices = [ignore_prefices]
91+
ignore_prefices = tuple(ignore_prefices)
92+
# Diff the two HTMLs.
93+
html0_content = util.read_file(html0_path)
94+
html1_content = util.read_file(html1_path)
95+
delta = difflib.ndiff(html0_content.split('\n'), html1_content.split('\n'))
96+
# Keep differences only.
97+
delta = [x for x in delta if x.startswith(('+ ', '- '))]
98+
# Ignore specified patterns.
99+
filtered_delta = [
100+
x for x in delta
101+
if not x.lstrip('+ ').lstrip('- ').startswith(ignore_prefices)
102+
]
103+
filtered_delta = '\n'.join(filtered_delta)
104+
delta_path = join(out_dir, 'delta.html')
105+
util.write_file(filtered_delta, delta_path)
106+
return filtered_delta
109107

110108

111109
def snapshot(url, out_dir):
112-
try:
113-
request = requests.get(url)
114-
except requests.exceptions.ConnectionError:
115-
logging.warn(f'Connection Error: {url}; ignored')
116-
return False
117-
html_src = request.content.decode()
118-
if not exists(out_dir):
119-
makedirs(out_dir)
120-
timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
121-
html_path = join(out_dir, timestamp + '.html')
122-
util.write_file(html_src, html_path)
123-
return True
110+
try:
111+
request = requests.get(url)
112+
except requests.exceptions.ConnectionError:
113+
logging.warn('Connection Error: %s; ignored', url)
114+
return False
115+
html_src = request.content.decode()
116+
if not exists(out_dir):
117+
makedirs(out_dir)
118+
timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
119+
html_path = join(out_dir, timestamp + '.html')
120+
util.write_file(html_src, html_path)
121+
return True
124122

125123

126124
if __name__ == '__main__':
127-
app.run(main)
125+
app.run(main)

util.py

Lines changed: 31 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,43 +3,40 @@
33

44

55
def read_file(path):
6-
with open(path, 'rb') as file_handle:
7-
content = file_handle.read()
8-
return content.decode()
6+
with open(path, 'rb') as file_handle:
7+
content = file_handle.read()
8+
return content.decode()
99

1010

1111
def write_file(str_, path):
12-
with open(path, 'wb') as file_handle:
13-
file_handle.write(str_.encode())
12+
with open(path, 'wb') as file_handle:
13+
file_handle.write(str_.encode())
1414

1515

1616
def folder_name_from_url(url):
17-
folder_name = url.rstrip('/')
18-
folder_name = folder_name.replace('http://', '').replace('https://', '')
19-
folder_name = folder_name.replace('/', '_')
20-
folder_name = folder_name.replace('?', '_')
21-
folder_name = folder_name.replace('&', '_')
22-
folder_name = folder_name.replace(':', '_')
23-
return folder_name
24-
25-
26-
def email_oneself(msg,
27-
gmail,
28-
subject='Untitled',
29-
pswd_path='./gmail_app_pswd'):
30-
pswd = read_file(pswd_path)
31-
pswd = pswd.strip()
32-
33-
to_emails = [gmail]
34-
from_email = gmail
35-
36-
msg = MIMEText(msg)
37-
msg['Subject'] = subject
38-
msg['To'] = ', '.join(to_emails)
39-
msg['From'] = from_email
40-
41-
mail = smtplib.SMTP('smtp.gmail.com', 587)
42-
mail.starttls()
43-
mail.login(gmail, pswd)
44-
mail.sendmail(from_email, to_emails, msg.as_string())
45-
mail.quit()
17+
folder_name = url.rstrip('/')
18+
folder_name = folder_name.replace('http://', '').replace('https://', '')
19+
folder_name = folder_name.replace('/', '_')
20+
folder_name = folder_name.replace('?', '_')
21+
folder_name = folder_name.replace('&', '_')
22+
folder_name = folder_name.replace(':', '_')
23+
return folder_name
24+
25+
26+
def email_oneself(msg, gmail, subject='Untitled', pswd_path='./gmail_app_pswd'):
27+
pswd = read_file(pswd_path)
28+
pswd = pswd.strip()
29+
30+
to_emails = [gmail]
31+
from_email = gmail
32+
33+
msg = MIMEText(msg)
34+
msg['Subject'] = subject
35+
msg['To'] = ', '.join(to_emails)
36+
msg['From'] = from_email
37+
38+
mail = smtplib.SMTP('smtp.gmail.com', 587)
39+
mail.starttls()
40+
mail.login(gmail, pswd)
41+
mail.sendmail(from_email, to_emails, msg.as_string())
42+
mail.quit()

0 commit comments

Comments
 (0)