Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Include/fileutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ PyAPI_FUNC(wchar_t *) Py_DecodeLocale(
PyAPI_FUNC(char*) Py_EncodeLocale(
const wchar_t *text,
size_t *error_pos);

PyAPI_FUNC(char*) _Py_EncodeLocaleRaw(
const wchar_t *text,
size_t *error_pos);
#endif

#ifndef Py_LIMITED_API

PyAPI_FUNC(PyObject *) _Py_device_encoding(int);

#ifdef MS_WINDOWS
Expand Down
4 changes: 2 additions & 2 deletions Modules/getpath.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,13 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
{
int err;
char *fname;
fname = Py_EncodeLocale(path, NULL);
fname = _Py_EncodeLocaleRaw(path, NULL);
if (fname == NULL) {
errno = EINVAL;
return -1;
}
err = stat(fname, buf);
PyMem_Free(fname);
PyMem_RawFree(fname);
return err;
}

Expand Down
4 changes: 2 additions & 2 deletions Modules/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1490,14 +1490,14 @@ pymain_open_filename(_PyMain *pymain)
char *cfilename_buffer;
const char *cfilename;
int err = errno;
cfilename_buffer = Py_EncodeLocale(pymain->filename, NULL);
cfilename_buffer = _Py_EncodeLocaleRaw(pymain->filename, NULL);
if (cfilename_buffer != NULL)
cfilename = cfilename_buffer;
else
cfilename = "<unprintable file name>";
fprintf(stderr, "%ls: can't open file '%s': [Errno %d] %s\n",
pymain->config.program, cfilename, err, strerror(err));
PyMem_Free(cfilename_buffer);
PyMem_RawFree(cfilename_buffer);
pymain->status = 2;
return NULL;
}
Expand Down
25 changes: 21 additions & 4 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5158,7 +5158,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
On memory allocation failure, return NULL and write (size_t)-1 into
*error_pos (if error_pos is set). */
char*
_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
int raw_malloc)
{
const Py_ssize_t max_char_size = 4;
Py_ssize_t len = wcslen(text);
Expand All @@ -5167,7 +5168,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)

char *bytes;
if (len <= PY_SSIZE_T_MAX / max_char_size - 1) {
bytes = PyMem_Malloc((len + 1) * max_char_size);
if (raw_malloc) {
bytes = PyMem_RawMalloc((len + 1) * max_char_size);
}
else {
bytes = PyMem_Malloc((len + 1) * max_char_size);
}
}
else {
bytes = NULL;
Expand Down Expand Up @@ -5221,7 +5227,13 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
*p++ = '\0';

size_t final_size = (p - bytes);
char *bytes2 = PyMem_Realloc(bytes, final_size);
char *bytes2;
if (raw_malloc) {
bytes2 = PyMem_RawRealloc(bytes, final_size);
}
else {
bytes2 = PyMem_Realloc(bytes, final_size);
}
if (bytes2 == NULL) {
if (error_pos != NULL) {
*error_pos = (size_t)-1;
Expand All @@ -5231,7 +5243,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
return bytes2;

error:
PyMem_Free(bytes);
if (raw_malloc) {
PyMem_RawFree(bytes);
}
else {
PyMem_Free(bytes);
}
return NULL;
}

Expand Down
104 changes: 72 additions & 32 deletions Python/fileutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ extern int winerror_to_errno(int);
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size,
size_t *p_wlen);
extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text,
size_t *error_pos);
size_t *error_pos, int raw_malloc);

#ifdef O_CLOEXEC
/* Does open() support the O_CLOEXEC flag? Possible values:
Expand Down Expand Up @@ -183,7 +183,7 @@ check_force_ascii(void)
}

static char*
encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_malloc)
{
char *result = NULL, *out;
size_t len, i;
Expand All @@ -194,7 +194,13 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)

len = wcslen(text);

result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
/* +1 for NUL byte */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related to your change but should it be "NULL byte" ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, I only saw your review after I merged my PR. Anyway, I fixed this typo in my following PR: PR #4963.

if (raw_malloc) {
result = PyMem_RawMalloc(len + 1);
}
else {
result = PyMem_Malloc(len + 1);
}
if (result == NULL)
return NULL;

Expand All @@ -211,9 +217,15 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
*out++ = (char)(ch - 0xdc00);
}
else {
if (error_pos != NULL)
if (error_pos != NULL) {
*error_pos = i;
PyMem_Free(result);
}
if (raw_malloc) {
PyMem_RawFree(result);
}
else {
PyMem_Free(result);
}
return NULL;
}
}
Expand Down Expand Up @@ -423,7 +435,7 @@ Py_DecodeLocale(const char* arg, size_t *size)

#if !defined(__APPLE__) && !defined(__ANDROID__)
static char*
encode_locale(const wchar_t *text, size_t *error_pos)
encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
{
const size_t len = wcslen(text);
char *result = NULL, *bytes = NULL;
Expand Down Expand Up @@ -455,8 +467,14 @@ encode_locale(const wchar_t *text, size_t *error_pos)
else
converted = wcstombs(NULL, buf, 0);
if (converted == (size_t)-1) {
if (result != NULL)
PyMem_Free(result);
if (result != NULL) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once again, not quite related to your change but if PyMem_RawFree and PyMem_Free behave like free, it should be valid to pass a NULL argument and the test is not needed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto, fixed in PR #4963.

if (raw_malloc) {
PyMem_RawFree(result);
}
else {
PyMem_Free(result);
}
}
if (error_pos != NULL)
*error_pos = i;
return NULL;
Expand All @@ -475,10 +493,16 @@ encode_locale(const wchar_t *text, size_t *error_pos)
}

size += 1; /* nul byte at the end */
result = PyMem_Malloc(size);
if (raw_malloc) {
result = PyMem_RawMalloc(size);
}
else {
result = PyMem_Malloc(size);
}
if (result == NULL) {
if (error_pos != NULL)
if (error_pos != NULL) {
*error_pos = (size_t)-1;
}
return NULL;
}
bytes = result;
Expand All @@ -487,6 +511,28 @@ encode_locale(const wchar_t *text, size_t *error_pos)
}
#endif

static char*
encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
{
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
#else /* __APPLE__ */
if (Py_UTF8Mode == 1) {
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
}

#ifndef MS_WINDOWS
if (force_ascii == -1)
force_ascii = check_force_ascii();

if (force_ascii)
return encode_ascii_surrogateescape(text, error_pos, raw_malloc);
#endif

return encode_current_locale(text, error_pos, raw_malloc);
#endif /* __APPLE__ or __ANDROID__ */
}

/* Encode a wide character string to the locale encoding with the
surrogateescape error handler: surrogate characters in the range
U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
Expand All @@ -502,23 +548,16 @@ encode_locale(const wchar_t *text, size_t *error_pos)
char*
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
{
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeUTF8_surrogateescape(text, error_pos);
#else /* __APPLE__ */
if (Py_UTF8Mode == 1) {
return _Py_EncodeUTF8_surrogateescape(text, error_pos);
}

#ifndef MS_WINDOWS
if (force_ascii == -1)
force_ascii = check_force_ascii();
return encode_locale(text, error_pos, 0);
}

if (force_ascii)
return encode_ascii_surrogateescape(text, error_pos);
#endif

return encode_locale(text, error_pos);
#endif /* __APPLE__ or __ANDROID__ */
/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
instead of PyMem_Free(). */
char*
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
{
return encode_locale(text, error_pos, 1);
}


Expand Down Expand Up @@ -1029,11 +1068,12 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode)
errno = EINVAL;
return NULL;
}
cpath = Py_EncodeLocale(path, NULL);
if (cpath == NULL)
cpath = _Py_EncodeLocaleRaw(path, NULL);
if (cpath == NULL) {
return NULL;
}
f = fopen(cpath, cmode);
PyMem_Free(cpath);
PyMem_RawFree(cpath);
#else
f = _wfopen(path, mode);
#endif
Expand Down Expand Up @@ -1341,13 +1381,13 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
int res;
size_t r1;

cpath = Py_EncodeLocale(path, NULL);
cpath = _Py_EncodeLocaleRaw(path, NULL);
if (cpath == NULL) {
errno = EINVAL;
return -1;
}
res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
PyMem_Free(cpath);
PyMem_RawFree(cpath);
if (res == -1)
return -1;
if (res == Py_ARRAY_LENGTH(cbuf)) {
Expand Down Expand Up @@ -1386,13 +1426,13 @@ _Py_wrealpath(const wchar_t *path,
wchar_t *wresolved_path;
char *res;
size_t r;
cpath = Py_EncodeLocale(path, NULL);
cpath = _Py_EncodeLocaleRaw(path, NULL);
if (cpath == NULL) {
errno = EINVAL;
return NULL;
}
res = realpath(cpath, cresolved_path);
PyMem_Free(cpath);
PyMem_RawFree(cpath);
if (res == NULL)
return NULL;

Expand Down