xiumingzhang
diff --git a/‎environment.yml‎
Lines changed: 6 additions & 6 deletions b/‎environment.yml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎main.py‎
Lines changed: 77 additions & 79 deletions b/‎main.py‎
Lines changed: 77 additions & 79 deletions
diff --git a/‎util.py‎
Lines changed: 31 additions & 34 deletions b/‎util.py‎
Lines changed: 31 additions & 34 deletions
@@ -1,8 +1,8 @@
 name: webpage-monitor
 dependencies:
-  - python=3.6
-  - absl-py
-  - numpy
-  - tqdm
-  - requests
-  - ipython
+ - python=3.6
+ - absl-py
+ - numpy
+ - tqdm
+ - requests
+ - ipython
@@ -25,103 +25,101 @@
 
 
 def main(_):
-  exit_after = np.inf if FLAGS.exit_after is None else FLAGS.exit_after
+ exit_after = np.inf if FLAGS.exit_after is None else FLAGS.exit_after
 
-  with open(FLAGS.roster_json, 'rb') as file_handle:
-  roster = json.load(file_handle)
+ with open(FLAGS.roster_json, 'rb') as file_handle:
+ roster = json.load(file_handle)
 
-  start_t = time()
-  last_check_t = 0
+ start_t = time()
+ last_check_t = 0
 
-  if FLAGS.clear_cached and exists(FLAGS.snapshot_dir):
-  rmtree(FLAGS.snapshot_dir)
+ if FLAGS.clear_cached and exists(FLAGS.snapshot_dir):
+ rmtree(FLAGS.snapshot_dir)
 
-  while True:
-  if time() - last_check_t > FLAGS.check_every:
-  changed, deltas = [], []
+ while True:
+ if time() - last_check_t > FLAGS.check_every:
+ changed, deltas = [], []
 
-  pbar = tqdm(roster.items())
-  for url, opt in pbar:
-  pbar.set_description(f"Checking {url}")
+ pbar = tqdm(roster.items())
+ for url, opt in pbar:
+ pbar.set_description(f'Checking {url}')
 
- # Snapshot the current webpage.
- out_dir = join(FLAGS.snapshot_dir,
- util.folder_name_from_url(url))
- success = snapshot(url, out_dir)
- if not success:
- continue
+ # Snapshot the current webpage.
+ out_dir = join(FLAGS.snapshot_dir, util.folder_name_from_url(url))
+ success = snapshot(url, out_dir)
+ if not success:
+ continue
 
- # Compare with the previous snapshot.
- snapshot_paths = sorted(
- glob(join(out_dir, '????_??_??_??_??_??.html')))
- if len(snapshot_paths) > 1:
- delta = diff_snapshots(snapshot_paths[-2],
- snapshot_paths[-1], out_dir, opt)
- if delta != '':
- changed.append(url)
- deltas.append(delta)
+ # Compare with the previous snapshot.
+ snapshot_paths = sorted(glob(join(out_dir, '????_??_??_??_??_??.html')))
+ if len(snapshot_paths) > 1:
+ delta = diff_snapshots(snapshot_paths[-2], snapshot_paths[-1],
+ out_dir, opt)
+ if delta != '':
+ changed.append(url)
+ deltas.append(delta)
 
-  # Remove earlier screenshots to avoid storage explosion.
-  if len(snapshot_paths) > 2:
-  for snapshot_path in snapshot_paths[:-2]:
-  remove(snapshot_path)
+ # Remove earlier screenshots to avoid storage explosion.
+ if len(snapshot_paths) > 2:
+ for snapshot_path in snapshot_paths[:-2]:
+ remove(snapshot_path)
 
-  last_check_t = time()
+ last_check_t = time()
 
-  # Email myself the results.
-  if changed:
-  msg = ''
-  for url, delta in zip(changed, deltas):
-  msg += f'------\n{url}\n\n{delta}\n\n\n'
-  util.email_oneself(msg, FLAGS.gmail, subject='Webpage Monitor')
+ # Email myself the results.
+ if changed:
+ msg = ''
+ for url, delta in zip(changed, deltas):
+ msg += f'------\n{url}\n\n{delta}\n\n\n'
+ util.email_oneself(msg, FLAGS.gmail, subject='Webpage Monitor')
 
-  logging.info('Change detected; email sent')
-  else:
-  logging.info('No change detected')
+ logging.info('Change detected; email sent')
+ else:
+ logging.info('No change detected')
 
-  if time() - start_t > exit_after:
-  break
+ if time() - start_t > exit_after:
+ break
 
 
 def diff_snapshots(html0_path, html1_path, out_dir, opt):
-  # Parse URL-specific options.
-  ignore_prefices = opt.get('ignore_prefix')
-  if ignore_prefices is None:
-  ignore_prefices = []
-  if isinstance(ignore_prefices, str):
-  ignore_prefices = [ignore_prefices]
-  ignore_prefices = tuple(ignore_prefices)
-  # Diff the two HTMLs.
-  html0_content = util.read_file(html0_path)
-  html1_content = util.read_file(html1_path)
-  delta = difflib.ndiff(html0_content.split('\n'), html1_content.split('\n'))
-  # Keep differences only.
-  delta = [x for x in delta if x.startswith(('+ ', '- '))]
-  # Ignore specified patterns.
-  filtered_delta = [
-  x for x in delta
-  if not x.lstrip('+ ').lstrip('- ').startswith(ignore_prefices)
-  ]
-  filtered_delta = '\n'.join(filtered_delta)
-  delta_path = join(out_dir, 'delta.html')
-  util.write_file(filtered_delta, delta_path)
-  return filtered_delta
+ # Parse URL-specific options.
+ ignore_prefices = opt.get('ignore_prefix')
+ if ignore_prefices is None:
+ ignore_prefices = []
+ if isinstance(ignore_prefices, str):
+ ignore_prefices = [ignore_prefices]
+ ignore_prefices = tuple(ignore_prefices)
+ # Diff the two HTMLs.
+ html0_content = util.read_file(html0_path)
+ html1_content = util.read_file(html1_path)
+ delta = difflib.ndiff(html0_content.split('\n'), html1_content.split('\n'))
+ # Keep differences only.
+ delta = [x for x in delta if x.startswith(('+ ', '- '))]
+ # Ignore specified patterns.
+ filtered_delta = [
+ x for x in delta
+ if not x.lstrip('+ ').lstrip('- ').startswith(ignore_prefices)
+ ]
+ filtered_delta = '\n'.join(filtered_delta)
+ delta_path = join(out_dir, 'delta.html')
+ util.write_file(filtered_delta, delta_path)
+ return filtered_delta
 
 
 def snapshot(url, out_dir):
-  try:
-  request = requests.get(url)
-  except requests.exceptions.ConnectionError:
-  logging.warn(f'Connection Error: {url}; ignored')
-  return False
-  html_src = request.content.decode()
-  if not exists(out_dir):
-  makedirs(out_dir)
-  timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
-  html_path = join(out_dir, timestamp + '.html')
-  util.write_file(html_src, html_path)
-  return True
+ try:
+ request = requests.get(url)
+ except requests.exceptions.ConnectionError:
+ logging.warn('Connection Error: %s; ignored', url)
+ return False
+ html_src = request.content.decode()
+ if not exists(out_dir):
+ makedirs(out_dir)
+ timestamp = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
+ html_path = join(out_dir, timestamp + '.html')
+ util.write_file(html_src, html_path)
+ return True
 
 
 if __name__ == '__main__':
-  app.run(main)
+ app.run(main)
@@ -3,43 +3,40 @@
 
 
 def read_file(path):
-  with open(path, 'rb') as file_handle:
-  content = file_handle.read()
-  return content.decode()
+ with open(path, 'rb') as file_handle:
+ content = file_handle.read()
+ return content.decode()
 
 
 def write_file(str_, path):
-  with open(path, 'wb') as file_handle:
-  file_handle.write(str_.encode())
+ with open(path, 'wb') as file_handle:
+ file_handle.write(str_.encode())
 
 
 def folder_name_from_url(url):
- folder_name = url.rstrip('/')
- folder_name = folder_name.replace('http://', '').replace('https://', '')
- folder_name = folder_name.replace('/', '_')
- folder_name = folder_name.replace('?', '_')
- folder_name = folder_name.replace('&', '_')
- folder_name = folder_name.replace(':', '_')
- return folder_name
-
-
-def email_oneself(msg,
- gmail,
- subject='Untitled',
- pswd_path='./gmail_app_pswd'):
- pswd = read_file(pswd_path)
- pswd = pswd.strip()
-
- to_emails = [gmail]
- from_email = gmail
-
- msg = MIMEText(msg)
- msg['Subject'] = subject
- msg['To'] = ', '.join(to_emails)
- msg['From'] = from_email
-
- mail = smtplib.SMTP('smtp.gmail.com', 587)
- mail.starttls()
- mail.login(gmail, pswd)
- mail.sendmail(from_email, to_emails, msg.as_string())
- mail.quit()
+ folder_name = url.rstrip('/')
+ folder_name = folder_name.replace('http://', '').replace('https://', '')
+ folder_name = folder_name.replace('/', '_')
+ folder_name = folder_name.replace('?', '_')
+ folder_name = folder_name.replace('&', '_')
+ folder_name = folder_name.replace(':', '_')
+ return folder_name
+
+
+def email_oneself(msg, gmail, subject='Untitled', pswd_path='./gmail_app_pswd'):
+ pswd = read_file(pswd_path)
+ pswd = pswd.strip()
+
+ to_emails = [gmail]
+ from_email = gmail
+
+ msg = MIMEText(msg)
+ msg['Subject'] = subject
+ msg['To'] = ', '.join(to_emails)
+ msg['From'] = from_email
+
+ mail = smtplib.SMTP('smtp.gmail.com', 587)
+ mail.starttls()
+ mail.login(gmail, pswd)
+ mail.sendmail(from_email, to_emails, msg.as_string())
+ mail.quit()