Skip to content

Commit 58b7b01

Browse files
committed
extmod/modure: If input string is bytes, return bytes results too.
This applies to match.group() and split(). For ARM Thumb2, this increased code size by 12 bytes.
1 parent 871a45d commit 58b7b01

File tree

3 files changed

+14
-3
lines changed

3 files changed

+14
-3
lines changed

extmod/modure.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "py/nlr.h"
3232
#include "py/runtime.h"
3333
#include "py/binary.h"
34+
#include "py/objstr.h"
3435

3536
#if MICROPY_PY_URE
3637

@@ -69,7 +70,8 @@ STATIC mp_obj_t match_group(mp_obj_t self_in, mp_obj_t no_in) {
6970
// no match for this group
7071
return mp_const_none;
7172
}
72-
return mp_obj_new_str(start, self->caps[no * 2 + 1] - start, false);
73+
return mp_obj_new_str_of_type(mp_obj_get_type(self->str),
74+
(const byte*)start, self->caps[no * 2 + 1] - start);
7375
}
7476
MP_DEFINE_CONST_FUN_OBJ_2(match_group_obj, match_group);
7577

@@ -129,6 +131,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
129131
mp_obj_re_t *self = MP_OBJ_TO_PTR(args[0]);
130132
Subject subj;
131133
size_t len;
134+
const mp_obj_type_t *str_type = mp_obj_get_type(args[1]);
132135
subj.begin = mp_obj_str_get_data(args[1], &len);
133136
subj.end = subj.begin + len;
134137
int caps_num = (self->re.sub + 1) * 2;
@@ -150,7 +153,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
150153
break;
151154
}
152155

153-
mp_obj_t s = mp_obj_new_str(subj.begin, caps[0] - subj.begin, false);
156+
mp_obj_t s = mp_obj_new_str_of_type(str_type, (const byte*)subj.begin, caps[0] - subj.begin);
154157
mp_obj_list_append(retval, s);
155158
if (self->re.sub > 0) {
156159
mp_not_implemented("Splitting with sub-captures");
@@ -161,7 +164,7 @@ STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
161164
}
162165
}
163166

164-
mp_obj_t s = mp_obj_new_str(subj.begin, subj.end - subj.begin, false);
167+
mp_obj_t s = mp_obj_new_str_of_type(str_type, (const byte*)subj.begin, subj.end - subj.begin);
165168
mp_obj_list_append(retval, s);
166169
return retval;
167170
}

tests/extmod/ure1.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,6 @@
8080
re.compile("*")
8181
except:
8282
print("Caught invalid regex")
83+
84+
# bytes objects
85+
m = re.match(rb'a+?', b'ab'); print(m.group(0))

tests/extmod/ure_split.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,8 @@
2626
r = re.compile("[a-f]+")
2727
s = r.split("0a3b9")
2828
print(s)
29+
30+
# bytes objects
31+
r = re.compile(b"x")
32+
s = r.split(b"fooxbar")
33+
print(s)

0 commit comments

Comments
 (0)