Skip to content

Commit 27985b0

Browse files
committed
Provide ability to format errors
1 parent 5ee9aeb commit 27985b0

File tree

5 files changed

+350
-0
lines changed

5 files changed

+350
-0
lines changed

ext/prism/extension.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,36 @@ inspect_node(VALUE self, VALUE source) {
984984
return string;
985985
}
986986

987+
/**
988+
* call-seq:
989+
* Debug::format_errors(source) -> String
990+
*
991+
* Format the errors that are found when parsing the given source string.
992+
*/
993+
static VALUE
994+
format_errors(VALUE self, VALUE source) {
995+
pm_string_t input;
996+
input_load_string(&input, source);
997+
998+
pm_parser_t parser;
999+
pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
1000+
1001+
pm_node_t *node = pm_parse(&parser);
1002+
pm_buffer_t buffer = { 0 };
1003+
1004+
pm_parser_errors_format(&parser, &buffer, true);
1005+
1006+
rb_encoding *encoding = rb_enc_find(parser.encoding->name);
1007+
VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
1008+
1009+
pm_buffer_free(&buffer);
1010+
pm_node_destroy(&parser, node);
1011+
pm_parser_free(&parser);
1012+
pm_string_free(&input);
1013+
1014+
return result;
1015+
}
1016+
9871017
/******************************************************************************/
9881018
/* Initialization of the extension */
9891019
/******************************************************************************/
@@ -1062,6 +1092,7 @@ Init_prism(void) {
10621092
rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
10631093
rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
10641094
rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
1095+
rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 1);
10651096

10661097
// Next, initialize the other APIs.
10671098
Init_prism_api_node();

include/prism.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,15 @@ PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t si
170170
*/
171171
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
172172

173+
/**
174+
* Format the errors on the parser into the given buffer.
175+
*
176+
* @param parser The parser to format the errors for.
177+
* @param buffer The buffer to format the errors into.
178+
* @param colorize Whether or not to colorize the errors with ANSI escape sequences.
179+
*/
180+
PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize);
181+
173182
/**
174183
* @mainpage
175184
*

include/prism/util/pm_buffer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,15 @@ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
128128
*/
129129
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
130130

131+
/**
132+
* Prepend the given string to the buffer.
133+
*
134+
* @param buffer The buffer to prepend to.
135+
* @param value The string to prepend.
136+
* @param length The length of the string to prepend.
137+
*/
138+
void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
139+
131140
/**
132141
* Concatenate one buffer onto another.
133142
*

src/prism.c

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17659,3 +17659,293 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
1765917659
#undef PM_LOCATION_NODE_VALUE
1766017660
#undef PM_LOCATION_NULL_VALUE
1766117661
#undef PM_LOCATION_TOKEN_VALUE
17662+
17663+
/** An error that is going to be formatted into the output. */
17664+
typedef struct {
17665+
/** A pointer to the diagnostic that was generated during parsing. */
17666+
pm_diagnostic_t *error;
17667+
17668+
/** The start line of the diagnostic message. */
17669+
size_t line;
17670+
17671+
/** The column start of the diagnostic message. */
17672+
size_t column_start;
17673+
17674+
/** The column end of the diagnostic message. */
17675+
size_t column_end;
17676+
} pm_error_t;
17677+
17678+
/** The format that will be used to format the errors into the output. */
17679+
typedef struct {
17680+
/** The prefix that will be used for line numbers. */
17681+
const char *number_prefix;
17682+
17683+
/** The prefix that will be used for blank lines. */
17684+
const char *blank_prefix;
17685+
17686+
/** The divider that will be used between sections of source code. */
17687+
const char *divider;
17688+
17689+
/** The length of the blank prefix. */
17690+
size_t blank_prefix_length;
17691+
17692+
/** The length of the divider. */
17693+
size_t divider_length;
17694+
} pm_error_format_t;
17695+
17696+
#define PM_COLOR_GRAY "\033[38;5;102m"
17697+
#define PM_COLOR_RED "\033[1;31m"
17698+
#define PM_COLOR_RESET "\033[0m"
17699+
17700+
static inline pm_error_t *
17701+
pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
17702+
pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
17703+
17704+
for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
17705+
pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
17706+
pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
17707+
17708+
// We're going to insert this error into the array in sorted order. We
17709+
// do this by finding the first error that has a line number greater
17710+
// than the current error and then inserting the current error before
17711+
// that one.
17712+
size_t index = 0;
17713+
while (
17714+
(index < error_list->size) &&
17715+
(errors[index].error != NULL) &&
17716+
(
17717+
(errors[index].line < start.line) ||
17718+
(errors[index].line == start.line && errors[index].column_start < start.column)
17719+
)
17720+
) index++;
17721+
17722+
// Now we're going to shift all of the errors after this one down one
17723+
// index to make room for the new error.
17724+
memcpy(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
17725+
17726+
// Finally, we'll insert the error into the array.
17727+
size_t column_end;
17728+
if (start.line == end.line) {
17729+
column_end = end.column;
17730+
} else {
17731+
column_end = newline_list->offsets[start.line + 1] - newline_list->offsets[start.line] - 1;
17732+
}
17733+
17734+
// Ensure we have at least one column of error.
17735+
if (start.column == column_end) column_end++;
17736+
17737+
errors[index] = (pm_error_t) {
17738+
.error = error,
17739+
.line = start.line,
17740+
.column_start = start.column,
17741+
.column_end = column_end
17742+
};
17743+
}
17744+
17745+
return errors;
17746+
}
17747+
17748+
static inline void
17749+
pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
17750+
const uint8_t *start = &parser->start[newline_list->offsets[line]];
17751+
const uint8_t *end;
17752+
17753+
if (line + 1 > newline_list->size) {
17754+
end = parser->end;
17755+
} else {
17756+
end = &parser->start[newline_list->offsets[line + 1]];
17757+
}
17758+
17759+
pm_buffer_append_format(buffer, number_prefix, line + 1);
17760+
pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
17761+
}
17762+
17763+
/**
17764+
* Format the errors on the parser into the given buffer.
17765+
*/
17766+
PRISM_EXPORTED_FUNCTION void
17767+
pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
17768+
const pm_list_t *error_list = &parser->error_list;
17769+
assert(error_list->size != 0);
17770+
17771+
// First, we're going to sort all of the errors by line number using an
17772+
// insertion sort into a newly allocated array.
17773+
const pm_newline_list_t *newline_list = &parser->newline_list;
17774+
pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
17775+
17776+
// Now we're going to determine how we're going to format line numbers and
17777+
// blank lines based on the maximum number of digits in the line numbers
17778+
// that are going to be displayed.
17779+
pm_error_format_t error_format;
17780+
size_t max_line_number = errors[error_list->size - 1].line + 1;
17781+
17782+
if (max_line_number < 10) {
17783+
if (colorize) {
17784+
error_format = (pm_error_format_t) {
17785+
.number_prefix = PM_COLOR_GRAY "%1zu | " PM_COLOR_RESET,
17786+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
17787+
.divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
17788+
};
17789+
} else {
17790+
error_format = (pm_error_format_t) {
17791+
.number_prefix = "%1zu | ",
17792+
.blank_prefix = " | ",
17793+
.divider = " ~~~~~\n"
17794+
};
17795+
}
17796+
} else if (max_line_number < 100) {
17797+
if (colorize) {
17798+
error_format = (pm_error_format_t) {
17799+
.number_prefix = PM_COLOR_GRAY "%2zu | " PM_COLOR_RESET,
17800+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
17801+
.divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
17802+
};
17803+
} else {
17804+
error_format = (pm_error_format_t) {
17805+
.number_prefix = "%2zu | ",
17806+
.blank_prefix = " | ",
17807+
.divider = " ~~~~~~\n"
17808+
};
17809+
}
17810+
} else if (max_line_number < 1000) {
17811+
if (colorize) {
17812+
error_format = (pm_error_format_t) {
17813+
.number_prefix = PM_COLOR_GRAY "%3zu | " PM_COLOR_RESET,
17814+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
17815+
.divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
17816+
};
17817+
} else {
17818+
error_format = (pm_error_format_t) {
17819+
.number_prefix = "%3zu | ",
17820+
.blank_prefix = " | ",
17821+
.divider = " ~~~~~~~\n"
17822+
};
17823+
}
17824+
} else if (max_line_number < 10000) {
17825+
if (colorize) {
17826+
error_format = (pm_error_format_t) {
17827+
.number_prefix = PM_COLOR_GRAY "%4zu | " PM_COLOR_RESET,
17828+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
17829+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
17830+
};
17831+
} else {
17832+
error_format = (pm_error_format_t) {
17833+
.number_prefix = "%4zu | ",
17834+
.blank_prefix = " | ",
17835+
.divider = " ~~~~~~~~\n"
17836+
};
17837+
}
17838+
} else {
17839+
if (colorize) {
17840+
error_format = (pm_error_format_t) {
17841+
.number_prefix = PM_COLOR_GRAY "%5zu | " PM_COLOR_RESET,
17842+
.blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
17843+
.divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
17844+
};
17845+
} else {
17846+
error_format = (pm_error_format_t) {
17847+
.number_prefix = "%5zu | ",
17848+
.blank_prefix = " | ",
17849+
.divider = " ~~~~~~~~\n"
17850+
};
17851+
}
17852+
}
17853+
17854+
error_format.blank_prefix_length = strlen(error_format.blank_prefix);
17855+
error_format.divider_length = strlen(error_format.divider);
17856+
17857+
// Now we're going to iterate through every error in our error list and
17858+
// display it. While we're iterating, we will display some padding lines of
17859+
// the source before the error to give some context. We'll be careful not to
17860+
// display the same line twice in case the errors are close enough in the
17861+
// source.
17862+
size_t last_line = (size_t) -1;
17863+
const pm_encoding_t *encoding = parser->encoding;
17864+
17865+
for (size_t index = 0; index < error_list->size; index++) {
17866+
pm_error_t *error = &errors[index];
17867+
17868+
// Here we determine how many lines of padding of the source to display,
17869+
// based on the difference from the last line that was displayed.
17870+
if (error->line - last_line > 1) {
17871+
if (error->line - last_line > 2) {
17872+
if ((index != 0) && (error->line - last_line > 3)) {
17873+
pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
17874+
}
17875+
17876+
pm_buffer_append_string(buffer, " ", 2);
17877+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
17878+
}
17879+
17880+
pm_buffer_append_string(buffer, " ", 2);
17881+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
17882+
}
17883+
17884+
// If this is the first error or we're on a new line, then we'll display
17885+
// the line that has the error in it.
17886+
if ((index == 0) || (error->line != last_line)) {
17887+
if (colorize) {
17888+
pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
17889+
} else {
17890+
pm_buffer_append_string(buffer, "> ", 2);
17891+
}
17892+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
17893+
}
17894+
17895+
// Now we'll display the actual error message. We'll do this by first
17896+
// putting the prefix to the line, then a bunch of blank spaces
17897+
// depending on the column, then as many carets as we need to display
17898+
// the width of the error, then the error message itself.
17899+
//
17900+
// Note that this doesn't take into account the width of the actual
17901+
// character when displayed in the terminal. For some east-asian
17902+
// languages or emoji, this means it can be thrown off pretty badly. We
17903+
// will need to solve this eventually.
17904+
pm_buffer_append_string(buffer, " ", 2);
17905+
pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
17906+
17907+
size_t column = 0;
17908+
const uint8_t *start = &parser->start[newline_list->offsets[error->line]];
17909+
17910+
while (column < error->column_end) {
17911+
if (column < error->column_start) {
17912+
pm_buffer_append_byte(buffer, ' ');
17913+
} else if (colorize) {
17914+
pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
17915+
} else {
17916+
pm_buffer_append_byte(buffer, '^');
17917+
}
17918+
17919+
size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
17920+
column += (char_width == 0 ? 1 : char_width);
17921+
}
17922+
17923+
pm_buffer_append_byte(buffer, ' ');
17924+
17925+
const char *message = error->error->message;
17926+
pm_buffer_append_string(buffer, message, strlen(message));
17927+
pm_buffer_append_byte(buffer, '\n');
17928+
17929+
// Here we determine how many lines of padding to display after the
17930+
// error, depending on where the next error is in source.
17931+
last_line = error->line;
17932+
size_t next_line = (index == error_list->size - 1) ? newline_list->size - 1 : errors[index + 1].line;
17933+
17934+
if (next_line - last_line > 1) {
17935+
pm_buffer_append_string(buffer, " ", 2);
17936+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
17937+
}
17938+
17939+
if (next_line - last_line > 1) {
17940+
pm_buffer_append_string(buffer, " ", 2);
17941+
pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
17942+
}
17943+
}
17944+
17945+
// Finally, we'll free the array of errors that we allocated.
17946+
free(errors);
17947+
}
17948+
17949+
#undef PM_COLOR_GRAY
17950+
#undef PM_COLOR_RED
17951+
#undef PM_COLOR_RESET

src/util/pm_buffer.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,17 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
160160
pm_buffer_append_varuint(buffer, unsigned_int);
161161
}
162162

163+
/**
164+
* Prepend the given string to the buffer.
165+
*/
166+
void
167+
pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
168+
size_t cursor = buffer->length;
169+
pm_buffer_append_length(buffer, length);
170+
memmove(buffer->value + length, buffer->value, cursor);
171+
memcpy(buffer->value, value, length);
172+
}
173+
163174
/**
164175
* Concatenate one buffer onto another.
165176
*/

0 commit comments

Comments
 (0)