Feature #7368 » patch.diff
| string.c | ||
|---|---|---|
| return rb_str_split_m(1, &sep, str); | ||
| } | ||
| static VALUE rb_str_valid_encoding_p(VALUE str); | ||
| static void | ||
| line_yield(VALUE str, const char *sub, const char *subend) | ||
| { | ||
| long len = RSTRING_LEN(str); | ||
| const char *ptr = RSTRING_PTR(str); | ||
| VALUE line = rb_str_new5(str, sub, subend - sub); | ||
| OBJ_INFECT(line, str); | ||
| rb_enc_cr_str_copy_for_substr(line, str); | ||
| rb_yield(line); | ||
| str_mod_check(str, ptr, len); | ||
| return; | ||
| } | ||
| static void | ||
| str_each_line_valid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc) | ||
| { | ||
| int n, rspara = 0; | ||
| long index, rslen; | ||
| const char *ptr, *pend, *sub, *subend; | ||
| const char *adjusted, *rsptr; | ||
| ptr = sub = subend = RSTRING_PTR(str); | ||
| pend = RSTRING_END(str); | ||
| rslen = RSTRING_LEN(rs); | ||
| if (rslen == 0) { | ||
| rspara = 1; | ||
| rs = rb_usascii_str_new("\n\n", 2); | ||
| if (!rb_enc_asciicompat(enc)) | ||
| rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil); | ||
| rslen = RSTRING_LEN(rs); | ||
| } | ||
| rsptr = RSTRING_PTR(rs); | ||
| while (sub < pend) { | ||
| index = rb_memsearch(rsptr, rslen, sub, pend - sub, enc); | ||
| if (index < 0) break; | ||
| subend = sub + index + rslen; | ||
| if (rspara) { | ||
| rb_enc_codepoint_len(subend, pend, &n, enc); | ||
| while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline) | ||
| subend += n; | ||
| } | ||
| adjusted = rb_enc_left_char_head(ptr, subend, pend, enc); | ||
| if (subend == adjusted) line_yield(str, sub, subend); | ||
| sub = subend; | ||
| } | ||
| if (subend < pend) line_yield(str, subend, pend); | ||
| return; | ||
| } | ||
| static void | ||
| str_each_line_invalid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc) | ||
| { | ||
| int n; | ||
| long rslen; | ||
| const char *sub, *subend, *pend, *rsptr; | ||
| sub = subend = RSTRING_PTR(str); | ||
| pend = RSTRING_END(str); | ||
| rsptr = RSTRING_PTR(rs); | ||
| rslen = RSTRING_LEN(rs); | ||
| while (sub < pend) { | ||
| unsigned int c = rb_enc_codepoint_len(sub, pend, &n, enc); | ||
| again: | ||
| if (rslen == 0 && c == newline) { | ||
| subend += n; | ||
| if (subend < pend && (c = rb_enc_codepoint_len(subend, pend, &n, enc)) != newline) { | ||
| goto again; | ||
| } | ||
| while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline) { | ||
| subend += n; | ||
| } | ||
| subend -= n; | ||
| } | ||
| if (c == newline && | ||
| (rslen <= 1 || | ||
| (pend - subend >= rslen && memcmp(rsptr, subend, rslen) == 0))) { | ||
| subend += rslen ? rslen : n; | ||
| line_yield(str, sub, subend); | ||
| sub = subend; | ||
| } | ||
| subend += n; | ||
| } | ||
| if (subend < pend) line_yield(str, subend, pend); | ||
| return; | ||
| } | ||
| /* | ||
| * call-seq: | ||
| ... | ... | |
| rb_encoding *enc; | ||
| VALUE rs; | ||
| unsigned int newline; | ||
| const char *p, *pend, *s, *ptr; | ||
| long len, rslen; | ||
| VALUE line; | ||
| int n; | ||
| VALUE orig = str; | ||
| if (argc == 0) { | ||
| if (argc == 0) | ||
| rs = rb_rs; | ||
| } | ||
| else { | ||
| else | ||
| rb_scan_args(argc, argv, "01", &rs); | ||
| } | ||
| RETURN_ENUMERATOR(str, argc, argv); | ||
| if (NIL_P(rs)) { | ||
| rb_yield(str); | ||
| return orig; | ||
| } | ||
| str = rb_str_new4(str); | ||
| ptr = p = s = RSTRING_PTR(str); | ||
| pend = p + RSTRING_LEN(str); | ||
| len = RSTRING_LEN(str); | ||
| StringValue(rs); | ||
| if (rs == rb_default_rs) { | ||
| enc = rb_enc_get(str); | ||
| while (p < pend) { | ||
| char *p0; | ||
| p = memchr(p, '\n', pend - p); | ||
| if (!p) break; | ||
| p0 = rb_enc_left_char_head(s, p, pend, enc); | ||
| if (!rb_enc_is_newline(p0, pend, enc)) { | ||
| p++; | ||
| continue; | ||
| } | ||
| p = p0 + rb_enc_mbclen(p0, pend, enc); | ||
| line = rb_str_new5(str, s, p - s); | ||
| OBJ_INFECT(line, str); | ||
| rb_enc_cr_str_copy_for_substr(line, str); | ||
| rb_yield(line); | ||
| str_mod_check(str, ptr, len); | ||
| s = p; | ||
| } | ||
| goto finish; | ||
| } | ||
| str = rb_str_new4(str); | ||
| enc = rb_enc_check(str, rs); | ||
| rslen = RSTRING_LEN(rs); | ||
| if (rslen == 0) { | ||
| newline = '\n'; | ||
| if (rs == rb_rs) { | ||
| enc = rb_enc_get(str); | ||
| rs = rb_str_encode(rb_rs, rb_enc_from_encoding(enc), 0, Qnil); | ||
| } | ||
| else { | ||
| newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc); | ||
| enc = rb_enc_check(str, rs); | ||
| } | ||
| while (p < pend) { | ||
| unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc); | ||
| again: | ||
| if (rslen == 0 && c == newline) { | ||
| p += n; | ||
| if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) { | ||
| goto again; | ||
| } | ||
| while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) { | ||
| p += n; | ||
| } | ||
| p -= n; | ||
| } | ||
| if (c == newline && | ||
| (rslen <= 1 || | ||
| (pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) { | ||
| line = rb_str_new5(str, s, p - s + (rslen ? rslen : n)); | ||
| OBJ_INFECT(line, str); | ||
| rb_enc_cr_str_copy_for_substr(line, str); | ||
| rb_yield(line); | ||
| str_mod_check(str, ptr, len); | ||
| s = p + (rslen ? rslen : n); | ||
| } | ||
| p += n; | ||
| } | ||
| if (RSTRING_LEN(rs) == 0) | ||
| newline = '\n'; | ||
| else | ||
| newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc); | ||
| finish: | ||
| if (s != pend) { | ||
| line = rb_str_new5(str, s, pend - s); | ||
| OBJ_INFECT(line, str); | ||
| rb_enc_cr_str_copy_for_substr(line, str); | ||
| rb_yield(line); | ||
| RB_GC_GUARD(str); | ||
| } | ||
| if (rb_str_valid_encoding_p(str) && rb_str_valid_encoding_p(rs)) | ||
| str_each_line_valid(str, rs, newline, enc); | ||
| else | ||
| str_each_line_invalid(str, rs, newline, enc); | ||
| return orig; | ||
| } | ||