ruby
diff --git a/‎ext/prism/extension.c‎
Lines changed: 31 additions & 0 deletions b/‎ext/prism/extension.c‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎include/prism.h‎
Lines changed: 9 additions & 0 deletions b/‎include/prism.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎include/prism/util/pm_buffer.h‎
Lines changed: 9 additions & 0 deletions b/‎include/prism/util/pm_buffer.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/prism.c‎
Lines changed: 290 additions & 0 deletions b/‎src/prism.c‎
Lines changed: 290 additions & 0 deletions
diff --git a/‎src/util/pm_buffer.c‎
Lines changed: 11 additions & 0 deletions b/‎src/util/pm_buffer.c‎
Lines changed: 11 additions & 0 deletions
@@ -984,6 +984,36 @@ inspect_node(VALUE self, VALUE source) {
  return string;
 }
 
+/**
+ * call-seq:
+ * Debug::format_errors(source) -> String
+ *
+ * Format the errors that are found when parsing the given source string.
+ */
+static VALUE
+format_errors(VALUE self, VALUE source) {
+ pm_string_t input;
+ input_load_string(&input, source);
+
+ pm_parser_t parser;
+ pm_parser_init(&parser, pm_string_source(&input), pm_string_length(&input), NULL);
+
+ pm_node_t *node = pm_parse(&parser);
+ pm_buffer_t buffer = { 0 };
+
+ pm_parser_errors_format(&parser, &buffer, true);
+
+ rb_encoding *encoding = rb_enc_find(parser.encoding->name);
+ VALUE result = rb_enc_str_new(pm_buffer_value(&buffer), pm_buffer_length(&buffer), encoding);
+
+ pm_buffer_free(&buffer);
+ pm_node_destroy(&parser, node);
+ pm_parser_free(&parser);
+ pm_string_free(&input);
+
+ return result;
+}
+
 /******************************************************************************/
 /* Initialization of the extension */
 /******************************************************************************/
@@ -1062,6 +1092,7 @@ Init_prism(void) {
  rb_define_singleton_method(rb_cPrismDebug, "memsize", memsize, 1);
  rb_define_singleton_method(rb_cPrismDebug, "profile_file", profile_file, 1);
  rb_define_singleton_method(rb_cPrismDebug, "inspect_node", inspect_node, 1);
+ rb_define_singleton_method(rb_cPrismDebug, "format_errors", format_errors, 1);
 
  // Next, initialize the other APIs.
  Init_prism_api_node();
 
@@ -170,6 +170,15 @@ PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t si
  */
 PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type);
 
+/**
+ * Format the errors on the parser into the given buffer.
+ *
+ * @param parser The parser to format the errors for.
+ * @param buffer The buffer to format the errors into.
+ * @param colorize Whether or not to colorize the errors with ANSI escape sequences.
+ */
+PRISM_EXPORTED_FUNCTION void pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize);
+
 /**
  * @mainpage
  *
 
@@ -128,6 +128,15 @@ void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
  */
 void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
 
+/**
+ * Prepend the given string to the buffer.
+ *
+ * @param buffer The buffer to prepend to.
+ * @param value The string to prepend.
+ * @param length The length of the string to prepend.
+ */
+void pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length);
+
 /**
  * Concatenate one buffer onto another.
  *
 
@@ -17659,3 +17659,293 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
 #undef PM_LOCATION_NODE_VALUE
 #undef PM_LOCATION_NULL_VALUE
 #undef PM_LOCATION_TOKEN_VALUE
+
+/** An error that is going to be formatted into the output. */
+typedef struct {
+ /** A pointer to the diagnostic that was generated during parsing. */
+ pm_diagnostic_t *error;
+
+ /** The start line of the diagnostic message. */
+ size_t line;
+
+ /** The column start of the diagnostic message. */
+ size_t column_start;
+
+ /** The column end of the diagnostic message. */
+ size_t column_end;
+} pm_error_t;
+
+/** The format that will be used to format the errors into the output. */
+typedef struct {
+ /** The prefix that will be used for line numbers. */
+ const char *number_prefix;
+
+ /** The prefix that will be used for blank lines. */
+ const char *blank_prefix;
+
+ /** The divider that will be used between sections of source code. */
+ const char *divider;
+
+ /** The length of the blank prefix. */
+ size_t blank_prefix_length;
+
+ /** The length of the divider. */
+ size_t divider_length;
+} pm_error_format_t;
+
+#define PM_COLOR_GRAY "\033[38;5;102m"
+#define PM_COLOR_RED "\033[1;31m"
+#define PM_COLOR_RESET "\033[0m"
+
+static inline pm_error_t *
+pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
+ pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
+
+ for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+ pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
+ pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
+
+ // We're going to insert this error into the array in sorted order. We
+ // do this by finding the first error that has a line number greater
+ // than the current error and then inserting the current error before
+ // that one.
+ size_t index = 0;
+ while (
+ (index < error_list->size) &&
+ (errors[index].error != NULL) &&
+ (
+ (errors[index].line < start.line) ||
+ (errors[index].line == start.line && errors[index].column_start < start.column)
+ )
+ ) index++;
+
+ // Now we're going to shift all of the errors after this one down one
+ // index to make room for the new error.
+ memcpy(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
+
+ // Finally, we'll insert the error into the array.
+ size_t column_end;
+ if (start.line == end.line) {
+ column_end = end.column;
+ } else {
+ column_end = newline_list->offsets[start.line + 1] - newline_list->offsets[start.line] - 1;
+ }
+
+ // Ensure we have at least one column of error.
+ if (start.column == column_end) column_end++;
+
+ errors[index] = (pm_error_t) {
+ .error = error,
+ .line = start.line,
+ .column_start = start.column,
+ .column_end = column_end
+ };
+ }
+
+ return errors;
+}
+
+static inline void
+pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
+ const uint8_t *start = &parser->start[newline_list->offsets[line]];
+ const uint8_t *end;
+
+ if (line + 1 > newline_list->size) {
+ end = parser->end;
+ } else {
+ end = &parser->start[newline_list->offsets[line + 1]];
+ }
+
+ pm_buffer_append_format(buffer, number_prefix, line + 1);
+ pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
+}
+
+/**
+ * Format the errors on the parser into the given buffer.
+ */
+PRISM_EXPORTED_FUNCTION void
+pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool colorize) {
+ const pm_list_t *error_list = &parser->error_list;
+ assert(error_list->size != 0);
+
+ // First, we're going to sort all of the errors by line number using an
+ // insertion sort into a newly allocated array.
+ const pm_newline_list_t *newline_list = &parser->newline_list;
+ pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
+
+ // Now we're going to determine how we're going to format line numbers and
+ // blank lines based on the maximum number of digits in the line numbers
+ // that are going to be displayed.
+ pm_error_format_t error_format;
+ size_t max_line_number = errors[error_list->size - 1].line + 1;
+
+ if (max_line_number < 10) {
+ if (colorize) {
+ error_format = (pm_error_format_t) {
+ .number_prefix = PM_COLOR_GRAY "%1zu | " PM_COLOR_RESET,
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
+ .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
+ };
+ } else {
+ error_format = (pm_error_format_t) {
+ .number_prefix = "%1zu | ",
+ .blank_prefix = " | ",
+ .divider = " ~~~~~\n"
+ };
+ }
+ } else if (max_line_number < 100) {
+ if (colorize) {
+ error_format = (pm_error_format_t) {
+ .number_prefix = PM_COLOR_GRAY "%2zu | " PM_COLOR_RESET,
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
+ .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
+ };
+ } else {
+ error_format = (pm_error_format_t) {
+ .number_prefix = "%2zu | ",
+ .blank_prefix = " | ",
+ .divider = " ~~~~~~\n"
+ };
+ }
+ } else if (max_line_number < 1000) {
+ if (colorize) {
+ error_format = (pm_error_format_t) {
+ .number_prefix = PM_COLOR_GRAY "%3zu | " PM_COLOR_RESET,
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
+ .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
+ };
+ } else {
+ error_format = (pm_error_format_t) {
+ .number_prefix = "%3zu | ",
+ .blank_prefix = " | ",
+ .divider = " ~~~~~~~\n"
+ };
+ }
+ } else if (max_line_number < 10000) {
+ if (colorize) {
+ error_format = (pm_error_format_t) {
+ .number_prefix = PM_COLOR_GRAY "%4zu | " PM_COLOR_RESET,
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
+ };
+ } else {
+ error_format = (pm_error_format_t) {
+ .number_prefix = "%4zu | ",
+ .blank_prefix = " | ",
+ .divider = " ~~~~~~~~\n"
+ };
+ }
+ } else {
+ if (colorize) {
+ error_format = (pm_error_format_t) {
+ .number_prefix = PM_COLOR_GRAY "%5zu | " PM_COLOR_RESET,
+ .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
+ .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
+ };
+ } else {
+ error_format = (pm_error_format_t) {
+ .number_prefix = "%5zu | ",
+ .blank_prefix = " | ",
+ .divider = " ~~~~~~~~\n"
+ };
+ }
+ }
+
+ error_format.blank_prefix_length = strlen(error_format.blank_prefix);
+ error_format.divider_length = strlen(error_format.divider);
+
+ // Now we're going to iterate through every error in our error list and
+ // display it. While we're iterating, we will display some padding lines of
+ // the source before the error to give some context. We'll be careful not to
+ // display the same line twice in case the errors are close enough in the
+ // source.
+ size_t last_line = (size_t) -1;
+ const pm_encoding_t *encoding = parser->encoding;
+
+ for (size_t index = 0; index < error_list->size; index++) {
+ pm_error_t *error = &errors[index];
+
+ // Here we determine how many lines of padding of the source to display,
+ // based on the difference from the last line that was displayed.
+ if (error->line - last_line > 1) {
+ if (error->line - last_line > 2) {
+ if ((index != 0) && (error->line - last_line > 3)) {
+ pm_buffer_append_string(buffer, error_format.divider, error_format.divider_length);
+ }
+
+ pm_buffer_append_string(buffer, " ", 2);
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 2, buffer);
+ }
+
+ pm_buffer_append_string(buffer, " ", 2);
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line - 1, buffer);
+ }
+
+ // If this is the first error or we're on a new line, then we'll display
+ // the line that has the error in it.
+ if ((index == 0) || (error->line != last_line)) {
+ if (colorize) {
+ pm_buffer_append_string(buffer, PM_COLOR_RED "> " PM_COLOR_RESET, 13);
+ } else {
+ pm_buffer_append_string(buffer, "> ", 2);
+ }
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, error->line, buffer);
+ }
+
+ // Now we'll display the actual error message. We'll do this by first
+ // putting the prefix to the line, then a bunch of blank spaces
+ // depending on the column, then as many carets as we need to display
+ // the width of the error, then the error message itself.
+ //
+ // Note that this doesn't take into account the width of the actual
+ // character when displayed in the terminal. For some east-asian
+ // languages or emoji, this means it can be thrown off pretty badly. We
+ // will need to solve this eventually.
+ pm_buffer_append_string(buffer, " ", 2);
+ pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
+
+ size_t column = 0;
+ const uint8_t *start = &parser->start[newline_list->offsets[error->line]];
+
+ while (column < error->column_end) {
+ if (column < error->column_start) {
+ pm_buffer_append_byte(buffer, ' ');
+ } else if (colorize) {
+ pm_buffer_append_string(buffer, PM_COLOR_RED "^" PM_COLOR_RESET, 12);
+ } else {
+ pm_buffer_append_byte(buffer, '^');
+ }
+
+ size_t char_width = encoding->char_width(start + column, parser->end - (start + column));
+ column += (char_width == 0 ? 1 : char_width);
+ }
+
+ pm_buffer_append_byte(buffer, ' ');
+
+ const char *message = error->error->message;
+ pm_buffer_append_string(buffer, message, strlen(message));
+ pm_buffer_append_byte(buffer, '\n');
+
+ // Here we determine how many lines of padding to display after the
+ // error, depending on where the next error is in source.
+ last_line = error->line;
+ size_t next_line = (index == error_list->size - 1) ? newline_list->size - 1 : errors[index + 1].line;
+
+ if (next_line - last_line > 1) {
+ pm_buffer_append_string(buffer, " ", 2);
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
+ }
+
+ if (next_line - last_line > 1) {
+ pm_buffer_append_string(buffer, " ", 2);
+ pm_parser_errors_format_line(parser, newline_list, error_format.number_prefix, ++last_line, buffer);
+ }
+ }
+
+ // Finally, we'll free the array of errors that we allocated.
+ free(errors);
+}
+
+#undef PM_COLOR_GRAY
+#undef PM_COLOR_RED
+#undef PM_COLOR_RESET
@@ -160,6 +160,17 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
  pm_buffer_append_varuint(buffer, unsigned_int);
 }
 
+/**
+ * Prepend the given string to the buffer.
+ */
+void
+pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
+ size_t cursor = buffer->length;
+ pm_buffer_append_length(buffer, length);
+ memmove(buffer->value + length, buffer->value, cursor);
+ memcpy(buffer->value, value, length);
+}
+
 /**
  * Concatenate one buffer onto another.
  */