Skip to content

Commit 25fbe33

Browse files
authored
bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-14375)
1 parent 8d6668c commit 25fbe33

File tree

4 files changed

+188
-120
lines changed

4 files changed

+188
-120
lines changed

Doc/library/mimetypes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ behavior of the module.
9393
Specifying an empty list for *files* will prevent the system defaults from
9494
being applied: only the well-known values will be present from a built-in list.
9595

96+
If *files* is ``None`` the internal data structure is completely rebuilt to its
97+
initial default value. This is a stable operation and will produce the same results
98+
when called multiple times.
99+
96100
.. versionchanged:: 3.2
97101
Previously, Windows registry settings were ignored.
98102

Lib/mimetypes.py

Lines changed: 131 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,13 @@ class MimeTypes:
6666
def __init__(self, filenames=(), strict=True):
6767
if not inited:
6868
init()
69-
self.encodings_map = encodings_map.copy()
70-
self.suffix_map = suffix_map.copy()
69+
self.encodings_map = _encodings_map_default.copy()
70+
self.suffix_map = _suffix_map_default.copy()
7171
self.types_map = ({}, {}) # dict for (non-strict, strict)
7272
self.types_map_inv = ({}, {})
73-
for (ext, type) in types_map.items():
73+
for (ext, type) in _types_map_default.items():
7474
self.add_type(type, ext, True)
75-
for (ext, type) in common_types.items():
75+
for (ext, type) in _common_types_default.items():
7676
self.add_type(type, ext, False)
7777
for name in filenames:
7878
self.read(name, strict)
@@ -346,11 +346,19 @@ def init(files=None):
346346
global suffix_map, types_map, encodings_map, common_types
347347
global inited, _db
348348
inited = True # so that MimeTypes.__init__() doesn't call us again
349-
db = MimeTypes()
350-
if files is None:
349+
350+
if files is None or _db is None:
351+
db = MimeTypes()
351352
if _winreg:
352353
db.read_windows_registry()
353-
files = knownfiles
354+
355+
if files is None:
356+
files = knownfiles
357+
else:
358+
files = knownfiles + list(files)
359+
else:
360+
db = _db
361+
354362
for file in files:
355363
if os.path.isfile(file):
356364
db.read(file)
@@ -374,12 +382,12 @@ def read_mime_types(file):
374382

375383

376384
def _default_mime_types():
377-
global suffix_map
378-
global encodings_map
379-
global types_map
380-
global common_types
385+
global suffix_map, _suffix_map_default
386+
global encodings_map, _encodings_map_default
387+
global types_map, _types_map_default
388+
global common_types, _common_types_default
381389

382-
suffix_map = {
390+
suffix_map = _suffix_map_default = {
383391
'.svgz': '.svg.gz',
384392
'.tgz': '.tar.gz',
385393
'.taz': '.tar.gz',
@@ -388,7 +396,7 @@ def _default_mime_types():
388396
'.txz': '.tar.xz',
389397
}
390398

391-
encodings_map = {
399+
encodings_map = _encodings_map_default = {
392400
'.gz': 'gzip',
393401
'.Z': 'compress',
394402
'.bz2': 'bzip2',
@@ -399,152 +407,155 @@ def _default_mime_types():
399407
# at http://www.iana.org/assignments/media-types
400408
# or extensions, i.e. using the x- prefix
401409

402-
# If you add to these, please keep them sorted!
403-
types_map = {
410+
# If you add to these, please keep them sorted by mime type.
411+
# Make sure the entry with the preferred file extension for a particular mime type
412+
# appears before any others of the same mimetype.
413+
types_map = _types_map_default = {
414+
'.js' : 'application/javascript',
415+
'.mjs' : 'application/javascript',
416+
'.json' : 'application/json',
417+
'.doc' : 'application/msword',
418+
'.dot' : 'application/msword',
419+
'.wiz' : 'application/msword',
420+
'.bin' : 'application/octet-stream',
404421
'.a' : 'application/octet-stream',
422+
'.dll' : 'application/octet-stream',
423+
'.exe' : 'application/octet-stream',
424+
'.o' : 'application/octet-stream',
425+
'.obj' : 'application/octet-stream',
426+
'.so' : 'application/octet-stream',
427+
'.oda' : 'application/oda',
428+
'.pdf' : 'application/pdf',
429+
'.p7c' : 'application/pkcs7-mime',
430+
'.ps' : 'application/postscript',
405431
'.ai' : 'application/postscript',
406-
'.aif' : 'audio/x-aiff',
407-
'.aifc' : 'audio/x-aiff',
408-
'.aiff' : 'audio/x-aiff',
409-
'.au' : 'audio/basic',
410-
'.avi' : 'video/x-msvideo',
411-
'.bat' : 'text/plain',
432+
'.eps' : 'application/postscript',
433+
'.m3u' : 'application/vnd.apple.mpegurl',
434+
'.m3u8' : 'application/vnd.apple.mpegurl',
435+
'.xls' : 'application/vnd.ms-excel',
436+
'.xlb' : 'application/vnd.ms-excel',
437+
'.ppt' : 'application/vnd.ms-powerpoint',
438+
'.pot' : 'application/vnd.ms-powerpoint',
439+
'.ppa' : 'application/vnd.ms-powerpoint',
440+
'.pps' : 'application/vnd.ms-powerpoint',
441+
'.pwz' : 'application/vnd.ms-powerpoint',
442+
'.wasm' : 'application/wasm',
412443
'.bcpio' : 'application/x-bcpio',
413-
'.bin' : 'application/octet-stream',
414-
'.bmp' : 'image/bmp',
415-
'.c' : 'text/plain',
416-
'.cdf' : 'application/x-netcdf',
417444
'.cpio' : 'application/x-cpio',
418445
'.csh' : 'application/x-csh',
419-
'.css' : 'text/css',
420-
'.csv' : 'text/csv',
421-
'.dll' : 'application/octet-stream',
422-
'.doc' : 'application/msword',
423-
'.dot' : 'application/msword',
424446
'.dvi' : 'application/x-dvi',
425-
'.eml' : 'message/rfc822',
426-
'.eps' : 'application/postscript',
427-
'.etx' : 'text/x-setext',
428-
'.exe' : 'application/octet-stream',
429-
'.gif' : 'image/gif',
430447
'.gtar' : 'application/x-gtar',
431-
'.h' : 'text/plain',
432448
'.hdf' : 'application/x-hdf',
433-
'.htm' : 'text/html',
434-
'.html' : 'text/html',
435-
'.ico' : 'image/vnd.microsoft.icon',
436-
'.ief' : 'image/ief',
437-
'.jpe' : 'image/jpeg',
438-
'.jpeg' : 'image/jpeg',
439-
'.jpg' : 'image/jpeg',
440-
'.js' : 'application/javascript',
441-
'.json' : 'application/json',
442-
'.ksh' : 'text/plain',
443449
'.latex' : 'application/x-latex',
444-
'.m1v' : 'video/mpeg',
445-
'.m3u' : 'application/vnd.apple.mpegurl',
446-
'.m3u8' : 'application/vnd.apple.mpegurl',
447-
'.man' : 'application/x-troff-man',
448-
'.me' : 'application/x-troff-me',
449-
'.mht' : 'message/rfc822',
450-
'.mhtml' : 'message/rfc822',
451450
'.mif' : 'application/x-mif',
452-
'.mjs' : 'application/javascript',
453-
'.mov' : 'video/quicktime',
454-
'.movie' : 'video/x-sgi-movie',
455-
'.mp2' : 'audio/mpeg',
456-
'.mp3' : 'audio/mpeg',
457-
'.mp4' : 'video/mp4',
458-
'.mpa' : 'video/mpeg',
459-
'.mpe' : 'video/mpeg',
460-
'.mpeg' : 'video/mpeg',
461-
'.mpg' : 'video/mpeg',
462-
'.ms' : 'application/x-troff-ms',
451+
'.cdf' : 'application/x-netcdf',
463452
'.nc' : 'application/x-netcdf',
464-
'.nws' : 'message/rfc822',
465-
'.o' : 'application/octet-stream',
466-
'.obj' : 'application/octet-stream',
467-
'.oda' : 'application/oda',
468453
'.p12' : 'application/x-pkcs12',
469-
'.p7c' : 'application/pkcs7-mime',
470-
'.pbm' : 'image/x-portable-bitmap',
471-
'.pdf' : 'application/pdf',
472454
'.pfx' : 'application/x-pkcs12',
473-
'.pgm' : 'image/x-portable-graymap',
474-
'.pl' : 'text/plain',
475-
'.png' : 'image/png',
476-
'.pnm' : 'image/x-portable-anymap',
477-
'.pot' : 'application/vnd.ms-powerpoint',
478-
'.ppa' : 'application/vnd.ms-powerpoint',
479-
'.ppm' : 'image/x-portable-pixmap',
480-
'.pps' : 'application/vnd.ms-powerpoint',
481-
'.ppt' : 'application/vnd.ms-powerpoint',
482-
'.ps' : 'application/postscript',
483-
'.pwz' : 'application/vnd.ms-powerpoint',
484-
'.py' : 'text/x-python',
455+
'.ram' : 'application/x-pn-realaudio',
485456
'.pyc' : 'application/x-python-code',
486457
'.pyo' : 'application/x-python-code',
487-
'.qt' : 'video/quicktime',
488-
'.ra' : 'audio/x-pn-realaudio',
489-
'.ram' : 'application/x-pn-realaudio',
490-
'.ras' : 'image/x-cmu-raster',
491-
'.rdf' : 'application/xml',
492-
'.rgb' : 'image/x-rgb',
493-
'.roff' : 'application/x-troff',
494-
'.rtx' : 'text/richtext',
495-
'.sgm' : 'text/x-sgml',
496-
'.sgml' : 'text/x-sgml',
497458
'.sh' : 'application/x-sh',
498459
'.shar' : 'application/x-shar',
499-
'.snd' : 'audio/basic',
500-
'.so' : 'application/octet-stream',
501-
'.src' : 'application/x-wais-source',
460+
'.swf' : 'application/x-shockwave-flash',
502461
'.sv4cpio': 'application/x-sv4cpio',
503462
'.sv4crc' : 'application/x-sv4crc',
504-
'.svg' : 'image/svg+xml',
505-
'.swf' : 'application/x-shockwave-flash',
506-
'.t' : 'application/x-troff',
507463
'.tar' : 'application/x-tar',
508464
'.tcl' : 'application/x-tcl',
509465
'.tex' : 'application/x-tex',
510466
'.texi' : 'application/x-texinfo',
511467
'.texinfo': 'application/x-texinfo',
512-
'.tif' : 'image/tiff',
513-
'.tiff' : 'image/tiff',
468+
'.roff' : 'application/x-troff',
469+
'.t' : 'application/x-troff',
514470
'.tr' : 'application/x-troff',
515-
'.tsv' : 'text/tab-separated-values',
516-
'.txt' : 'text/plain',
471+
'.man' : 'application/x-troff-man',
472+
'.me' : 'application/x-troff-me',
473+
'.ms' : 'application/x-troff-ms',
517474
'.ustar' : 'application/x-ustar',
518-
'.vcf' : 'text/x-vcard',
519-
'.wasm' : 'application/wasm',
520-
'.wav' : 'audio/x-wav',
521-
'.webm' : 'video/webm',
522-
'.wiz' : 'application/msword',
475+
'.src' : 'application/x-wais-source',
476+
'.xsl' : 'application/xml',
477+
'.rdf' : 'application/xml',
523478
'.wsdl' : 'application/xml',
524-
'.xbm' : 'image/x-xbitmap',
525-
'.xlb' : 'application/vnd.ms-excel',
526-
'.xls' : 'application/vnd.ms-excel',
527-
'.xml' : 'text/xml',
528479
'.xpdl' : 'application/xml',
480+
'.zip' : 'application/zip',
481+
'.au' : 'audio/basic',
482+
'.snd' : 'audio/basic',
483+
'.mp3' : 'audio/mpeg',
484+
'.mp2' : 'audio/mpeg',
485+
'.aif' : 'audio/x-aiff',
486+
'.aifc' : 'audio/x-aiff',
487+
'.aiff' : 'audio/x-aiff',
488+
'.ra' : 'audio/x-pn-realaudio',
489+
'.wav' : 'audio/x-wav',
490+
'.bmp' : 'image/bmp',
491+
'.gif' : 'image/gif',
492+
'.ief' : 'image/ief',
493+
'.jpg' : 'image/jpeg',
494+
'.jpe' : 'image/jpeg',
495+
'.jpeg' : 'image/jpeg',
496+
'.png' : 'image/png',
497+
'.svg' : 'image/svg+xml',
498+
'.tiff' : 'image/tiff',
499+
'.tif' : 'image/tiff',
500+
'.ico' : 'image/vnd.microsoft.icon',
501+
'.ras' : 'image/x-cmu-raster',
502+
'.bmp' : 'image/x-ms-bmp',
503+
'.pnm' : 'image/x-portable-anymap',
504+
'.pbm' : 'image/x-portable-bitmap',
505+
'.pgm' : 'image/x-portable-graymap',
506+
'.ppm' : 'image/x-portable-pixmap',
507+
'.rgb' : 'image/x-rgb',
508+
'.xbm' : 'image/x-xbitmap',
529509
'.xpm' : 'image/x-xpixmap',
530-
'.xsl' : 'application/xml',
531510
'.xwd' : 'image/x-xwindowdump',
532-
'.zip' : 'application/zip',
511+
'.eml' : 'message/rfc822',
512+
'.mht' : 'message/rfc822',
513+
'.mhtml' : 'message/rfc822',
514+
'.nws' : 'message/rfc822',
515+
'.css' : 'text/css',
516+
'.csv' : 'text/csv',
517+
'.html' : 'text/html',
518+
'.htm' : 'text/html',
519+
'.txt' : 'text/plain',
520+
'.bat' : 'text/plain',
521+
'.c' : 'text/plain',
522+
'.h' : 'text/plain',
523+
'.ksh' : 'text/plain',
524+
'.pl' : 'text/plain',
525+
'.rtx' : 'text/richtext',
526+
'.tsv' : 'text/tab-separated-values',
527+
'.py' : 'text/x-python',
528+
'.etx' : 'text/x-setext',
529+
'.sgm' : 'text/x-sgml',
530+
'.sgml' : 'text/x-sgml',
531+
'.vcf' : 'text/x-vcard',
532+
'.xml' : 'text/xml',
533+
'.mp4' : 'video/mp4',
534+
'.mpeg' : 'video/mpeg',
535+
'.m1v' : 'video/mpeg',
536+
'.mpa' : 'video/mpeg',
537+
'.mpe' : 'video/mpeg',
538+
'.mpg' : 'video/mpeg',
539+
'.mov' : 'video/quicktime',
540+
'.qt' : 'video/quicktime',
541+
'.webm' : 'video/webm',
542+
'.avi' : 'video/x-msvideo',
543+
'.movie' : 'video/x-sgi-movie',
533544
}
534545

535546
# These are non-standard types, commonly found in the wild. They will
536547
# only match if strict=0 flag is given to the API methods.
537548

538549
# Please sort these too
539-
common_types = {
540-
'.jpg' : 'image/jpg',
541-
'.mid' : 'audio/midi',
550+
common_types = _common_types_default = {
551+
'.rtf' : 'application/rtf',
542552
'.midi': 'audio/midi',
553+
'.mid' : 'audio/midi',
554+
'.jpg' : 'image/jpg',
555+
'.pict': 'image/pict',
543556
'.pct' : 'image/pict',
544557
'.pic' : 'image/pict',
545-
'.pict': 'image/pict',
546-
'.rtf' : 'application/rtf',
547-
'.xul' : 'text/xul'
558+
'.xul' : 'text/xul',
548559
}
549560

550561

Lib/test/test_mimetypes.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,57 @@ def test_encoding(self):
7979
strict=True)
8080
self.assertEqual(exts, ['.g3', '.g\xb3'])
8181

82+
def test_init_reinitializes(self):
83+
# Issue 4936: make sure an init starts clean
84+
# First, put some poison into the types table
85+
mimetypes.add_type('foo/bar', '.foobar')
86+
self.assertEqual(mimetypes.guess_extension('foo/bar'), '.foobar')
87+
# Reinitialize
88+
mimetypes.init()
89+
# Poison should be gone.
90+
self.assertEqual(mimetypes.guess_extension('foo/bar'), None)
91+
92+
def test_preferred_extension(self):
93+
def check_extensions():
94+
self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin')
95+
self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps')
96+
self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u')
97+
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls')
98+
self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt')
99+
self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi')
100+
self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff')
101+
self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl')
102+
self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3')
103+
self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg')
104+
self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff')
105+
self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml')
106+
self.assertEqual(mimetypes.guess_extension('text/html'), '.html')
107+
self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt')
108+
self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg')
109+
self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov')
110+
111+
check_extensions()
112+
mimetypes.init()
113+
check_extensions()
114+
115+
def test_init_stability(self):
116+
mimetypes.init()
117+
118+
suffix_map = mimetypes.suffix_map
119+
encodings_map = mimetypes.encodings_map
120+
types_map = mimetypes.types_map
121+
common_types = mimetypes.common_types
122+
123+
mimetypes.init()
124+
self.assertIsNot(suffix_map, mimetypes.suffix_map)
125+
self.assertIsNot(encodings_map, mimetypes.encodings_map)
126+
self.assertIsNot(types_map, mimetypes.types_map)
127+
self.assertIsNot(common_types, mimetypes.common_types)
128+
self.assertEqual(suffix_map, mimetypes.suffix_map)
129+
self.assertEqual(encodings_map, mimetypes.encodings_map)
130+
self.assertEqual(types_map, mimetypes.types_map)
131+
self.assertEqual(common_types, mimetypes.common_types)
132+
82133
def test_path_like_ob(self):
83134
filename = "LICENSE.txt"
84135
filepath = pathlib.Path(filename)

0 commit comments

Comments
 (0)