Skip to content

Commit 13f7ef1

Browse files
committed
Replace a call to cgi.parse_header()
Fixes #330
1 parent 32adbe1 commit 13f7ef1

File tree

2 files changed

+30
-5
lines changed

2 files changed

+30
-5
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fixed
2+
-----
3+
4+
* Replace a call to ``cgi.parse_header()``, which causes deprecation warnings in Python 3.11.

feedparser/encodings.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2727
# POSSIBILITY OF SUCH DAMAGE.
2828

29-
import cgi
3029
import codecs
3130
import re
31+
import typing as t
3232

3333
try:
3434
try:
@@ -68,6 +68,30 @@ def lazy_chardet_encoding(data):
6868
RE_XML_PI_ENCODING = re.compile(br'^<\?.*encoding=[\'"](.*?)[\'"].*\?>')
6969

7070

71+
def parse_content_type(line: str) -> t.Tuple[str, str]:
72+
"""Parse an HTTP Content-Type header.
73+
74+
The return value will be a tuple of strings:
75+
the MIME type, and the value of the "charset" (if any).
76+
77+
This is a custom replacement for Python's cgi.parse_header().
78+
The cgi module will be removed in Python 3.13.
79+
"""
80+
81+
chunks = line.split(";")
82+
if not chunks:
83+
return "", ""
84+
85+
mime_type = chunks[0].strip()
86+
charset_value = ""
87+
for chunk in chunks[1:]:
88+
key, _, value = chunk.partition("=")
89+
if key.strip().lower() == "charset":
90+
charset_value = value.strip().strip("\"'")
91+
92+
return mime_type, charset_value
93+
94+
7195
def convert_to_utf8(http_headers, data, result):
7296
"""Detect and convert the character encoding to UTF-8.
7397
@@ -181,10 +205,7 @@ def convert_to_utf8(http_headers, data, result):
181205
# XML declaration encoding, and HTTP encoding, following the
182206
# heuristic defined in RFC 3023.
183207
http_content_type = http_headers.get('content-type') or ''
184-
http_content_type, params = cgi.parse_header(http_content_type)
185-
http_encoding = params.get('charset', '').replace("'", "")
186-
if isinstance(http_encoding, bytes):
187-
http_encoding = http_encoding.decode('utf-8', 'ignore')
208+
http_content_type, http_encoding = parse_content_type(http_content_type)
188209

189210
acceptable_content_type = 0
190211
application_content_types = ('application/xml', 'application/xml-dtd',

0 commit comments

Comments
 (0)