Skip to content

Commit c252226

Browse files
committed
Fix error
AttributeError: module 'os' has no attribute 'TimeoutError' at line 27 of scrapexpathlist.py
1 parent 591fcb6 commit c252226

File tree

1 file changed

+21
-18
lines changed

1 file changed

+21
-18
lines changed

scrapexpathlist.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
#!/usr/bin/env python3
22

3-
import io
4-
import os
3+
from typing import Callable, List, Tuple
54
import urllib.request
65
import urllib.error
76
from lxml import etree
87
from lxml.html import html5parser
98
from pandas import DataFrame
10-
from typing import Callable, List, Tuple
119
from http.client import HTTPResponse
12-
import re
1310

1411

1512
def fetch(params):
1613
url = params['url']
1714
selector_string = params['selector']
1815

19-
if not url: return (None, 'Missing URL')
20-
if not selector_string: return (None, 'Missing selector')
16+
if not url:
17+
return (None, 'Missing URL')
18+
19+
if not selector_string:
20+
return (None, 'Missing selector')
2121

2222
try:
2323
selector = xpath(selector_string)
@@ -40,7 +40,7 @@ def xpath(s: str) -> etree.XPath:
4040
"""
4141
return etree.XPath(
4242
s,
43-
smart_strings=True, # so result strings don't ref XML doc
43+
smart_strings=True, # so result strings don't ref XML doc
4444
namespaces={
4545
'svg': 'http://www.w3.org/2000/svg',
4646
}
@@ -92,10 +92,10 @@ def select(tree: etree._Element, selector: etree.XPath) -> List[str]:
9292
return list(_item_to_string(item) for item in result)
9393
else:
9494
# count(//a) => float. Return list of float.
95-
return [ result ]
95+
return [result]
9696

9797

98-
def do_fetch(url: str, selector: etree.XPath,
98+
def do_fetch(url: str, selector: etree.XPath,
9999
urlopen: Callable[[str], HTTPResponse]=urllib.request.urlopen,
100100
max_n_bytes: int=5*1024*1024,
101101
timeout: float=30) -> Tuple[DataFrame, str]:
@@ -108,23 +108,24 @@ def do_fetch(url: str, selector: etree.XPath,
108108
timeout -- number of seconds before we abort
109109
"""
110110
try:
111-
(response_info, text) = fetch_text(url, urlopen=urlopen, timeout=timeout)
111+
(response_info, text) = fetch_text(url, urlopen=urlopen,
112+
timeout=timeout)
112113
except urllib.error.URLError as e:
113114
return (None, f'Fetch error: {e.msg}')
114-
except os.TimeoutError:
115+
except TimeoutError:
115116
return (None, 'HTTP request timed out')
116117
except ValueError as e:
117-
return (None, str(e)) # Exceeded max_n_bytes
118+
return (None, str(e)) # Exceeded max_n_bytes
118119
except UnicodeDecodeError:
119120
return (None, 'HTML or XML has invalid charset')
120121

121122
is_html = response_info.get_content_type() == 'text/html'
122123

123-
tree = parse_document(text, is_html) # FIXME handle errors
124+
tree = parse_document(text, is_html) # FIXME handle errors
124125

125-
values = select(tree, selector) # FIXME handle errors?
126+
values = select(tree, selector) # FIXME handle errors?
126127

127-
table = DataFrame({ 'XPath result': values })
128+
table = DataFrame({'XPath result': values})
128129

129130
return (table, None)
130131

@@ -136,21 +137,23 @@ def fetch_text(url: str, max_n_bytes: int=5*1024*1024, timeout: float=30,
136137
This will never read more than `max_n_bytes` bytes from the response.
137138
It will also return before `timeout`s expire.
138139
139-
Throw `os.TimeoutError` if `timeout` expires.
140+
Throw `TimeoutError` if `timeout` expires.
140141
141142
Throw `ValueError` if the `max_n_bytes` is exceeded.
142143
143144
Throw `URLError` if anything fails at the HTTP level or below.
144145
145146
Throw `UnicodeDecodeError` if we cannot understand URL's encoding.
146147
"""
147-
# Throws os.URLError or os.TimeoutError
148+
# Throws URLError or TimeoutError
148149
with urlopen(url, timeout=timeout) as response:
149150
# TODO avoid DoS. The above timeout is the _socket_ timeout: one
150151
# byte from the server resets it.
151152
b = response.read(max_n_bytes + 1)
152153
if (len(b) == max_n_bytes + 1):
153-
raise ValueError(f'HTTP response is larger than {max_n_bytes} bytes')
154+
raise ValueError(
155+
f'HTTP response is larger than {max_n_bytes} bytes'
156+
)
154157

155158
text = b.decode(response.info().get_content_charset() or 'utf-8')
156159
return (response.info(), text)

0 commit comments

Comments
 (0)