heiytor
diff --git a/‎src/lexer/actions.h‎
Lines changed: 0 additions & 28 deletions b/‎src/lexer/actions.h‎
Lines changed: 0 additions & 28 deletions
diff --git a/‎src/lexer/def.h‎
Lines changed: 25 additions & 0 deletions b/‎src/lexer/def.h‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/lexer/init.c‎
Lines changed: 34 additions & 0 deletions b/‎src/lexer/init.c‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎src/lexer/lib.h‎
Lines changed: 16 additions & 0 deletions b/‎src/lexer/lib.h‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/lexer/actions.c‎ renamed to ‎src/lexer/nav.c‎
Lines changed: 24 additions & 45 deletions b/‎src/lexer/actions.c‎ renamed to ‎src/lexer/nav.c‎
Lines changed: 24 additions & 45 deletions
diff --git a/‎src/lexer/tokenization.c‎
Lines changed: 53 additions & 0 deletions b/‎src/lexer/tokenization.c‎
Lines changed: 53 additions & 0 deletions
@@ -0,0 +1,25 @@
+#ifndef LEXER_DEF_H
+#define LEXER_DEF_H
+
+#include <langdef.h>
+#include <token/def.h>
+
+struct Lexer {
+ char* input;
+ int input_length;
+ int position;
+ int read_position;
+ byte ch;
+ int line;
+ int column;
+
+ void (*next_char)(struct Lexer *self);
+ char *(*read_sequence)(struct Lexer *self);
+ void (*jump_whitespace)(struct Lexer *self);
+ byte (*peek_prev_char)(struct Lexer *self);
+ byte (*peek_next_char)(struct Lexer *self);
+
+ struct Token *(*next_token)(struct Lexer *lex);
+};
+
+#endif /* LEXER_DEF_H */
@@ -0,0 +1,34 @@
+#include <string.h>
+#include <stdlib.h>
+
+#include <lexer/lib.h>
+
+/**
+ * Creates a new lexer object for tokenizing the input string.
+ *
+ * @param input A pointer to the input string to be tokenized
+ *
+ * @return A pointer to the newly created lexer object
+ */
+struct Lexer* new_lexer(char* input) {
+ struct Lexer *lex = malloc(sizeof(struct Lexer));
+
+ lex->input = input;
+ lex->position = 0;
+ lex->read_position = 0;
+ lex->ch = 0;
+ lex->input_length = strlen(input);
+ lex->line = 1;
+ lex->column = 0;
+
+ lex->next_char = __next_char;
+ lex->jump_whitespace = __jump_whitespace;
+ lex->peek_next_char = __peek_next_char;
+ lex->peek_prev_char = __peek_prev_char;
+ lex->read_sequence = __read_sequence;
+ lex->next_token = __next_token;
+
+ lex->next_char(lex);
+
+ return lex;
+}
@@ -0,0 +1,16 @@
+#ifndef LEXER_LIB_H
+#define LEXER_LIB_H
+
+#include <lexer/def.h>
+#include <token/def.h>
+
+struct Lexer* new_lexer(char* input);
+
+void __next_char(struct Lexer* l);
+char* __read_sequence(struct Lexer* l);
+void __jump_whitespace(struct Lexer* l);
+byte __peek_prev_char(struct Lexer *l);
+byte __peek_next_char(struct Lexer *l);
+struct Token *__next_token(struct Lexer *l);
+
+#endif /* LEXER_LIB_H */
@@ -1,33 +1,9 @@
 #include <string.h>
-#include <stdio.h>
 #include <stdlib.h>
 
-#include <helpers/characters.h>
+#include <utils/chardef.h>
 
-#include <lexer/actions.h>
-
-/**
- * Creates a new lexer object for tokenizing the input string.
- *
- * @param input A pointer to the input string to be tokenized
- *
- * @return A pointer to the newly created lexer object
- */
-struct Lexer* new_lexer(char* input) {
- struct Lexer *lex = malloc(sizeof(struct Lexer));
-
- lex->input = input;
- lex->position = 0;
- lex->read_position = 0;
- lex->ch = 0;
- lex->input_length = strlen(input);
- lex->line = 1;
- lex->column = 0;
-
- next_char(lex);
-
- return lex;
-}
+#include <lexer/lib.h>
 
 /**
  * Advances the lexer to the next character in the input stream.
@@ -39,7 +15,7 @@ struct Lexer* new_lexer(char* input) {
  * 
  * @return void
  */
-void next_char(struct Lexer* lex) {
+void __next_char(struct Lexer* lex) {
  if (lex->read_position >= lex->input_length) {
  lex->ch = '\0';
  }
@@ -57,7 +33,15 @@ void next_char(struct Lexer* lex) {
  ++lex->read_position;
 }
 
-byte peek_next_char(struct Lexer *lexer) {
+byte __peek_prev_char(struct Lexer *lexer) {
+ if (lexer->read_position >= lexer->input_length) {
+ return 0;
+ }
+ 
+ return lexer->input[lexer->read_position - 2];
+}
+
+byte __peek_next_char(struct Lexer *lexer) {
  if (lexer->read_position >= lexer->input_length) {
  return 0;
  }
@@ -72,11 +56,13 @@ byte peek_next_char(struct Lexer *lexer) {
  * 
  * @return A dynamically allocated string containing the read sequence, or NULL if there was an error.
  */
-char* read_sequence(struct Lexer *lex) {
+char* __read_sequence(struct Lexer *lex) {
  int position = lex->position;
 
- while (is_letter(lex->ch) || is_numeric(lex->ch)) {
- next_char(lex);
+ while ((is_letter(lex->ch) || is_numeric(lex->ch)) ||
+ // signed numbers will match with this condition
+ (lex->ch == '-' && is_numeric(__peek_next_char(lex)))) {
+ __next_char(lex);
  }
 
  int length = lex->position - position;
@@ -87,32 +73,25 @@ char* read_sequence(struct Lexer *lex) {
  }
 
  memcpy(result, lex->input + position, length + 1);
+ 
+ result[length] = '\0';
 
- // The parser will throw an error if token->literal contains both letters and digits
- if (is_letter(result[length]) == false && is_numeric(result[length]) == false) {
- // result == `[x-byte]\0\0`
- result[length] = '\0';
- // Remove the last null terminator
- result = realloc(result, length);
-
- if (result == NULL) {
- return NULL;
- }
- }
+ // if (!is_valid_char_in_sequence(result[length])) {
+ // result[length] = '\0';
+ // }
 
  return result;
 }
 
-
 /**
  * Skips over whitespace characters in the lexer input stream.
  *
  * @param lex A pointer to a lexer objec
  * 
  * @return void
  */
-void jump_whitespace(struct Lexer* lex) {
+void __jump_whitespace(struct Lexer* lex) {
  while (lex->ch == ' ' || lex->ch == '\t' || lex->ch == '\n' || lex->ch == '\r') {
- next_char(lex);
+ __next_char(lex);
  }
 }
@@ -0,0 +1,53 @@
+#include <string.h>
+
+#include <utils/chardef.h>
+
+#include <lexer/lib.h>
+#include <token/lib.h>
+
+/**
+ * Gets the next token from the lexer.
+ *
+ * @param lex The lexer.
+ *
+ * @return A pointer to the next token.
+ */
+struct Token *__next_token(struct Lexer *lex) {
+ lex->jump_whitespace(lex);
+
+ bool is_char_allowed_for_start = is_allowed_as_first_char(lex->ch);
+ byte next_literal = lex->peek_next_char(lex);
+
+ // e.g. 312 | 312542.423
+ if ((is_numeric(lex->ch) && is_char_allowed_for_start) ||
+ // e.g my_var | my_var1
+ (is_letter(lex->ch) && is_char_allowed_for_start) ||
+ // e.g +312 | -312542.423
+ (is_signed_number(lex->ch, next_literal))) {
+ char *curr_literal = lex->read_sequence(lex);
+ uint8 code = is_numeric(lex->ch) ? NUMBER : get_ident_code(curr_literal);
+
+ // get correct column value
+ int column = lex->column;
+ if (column != 1) {
+ column -= strlen(curr_literal);
+ }
+
+ return new_token(curr_literal, code, lex->line, column);
+ }
+
+ char curr_literal[3] = {lex->ch, '\0', '\0'};
+ int column = lex->column;
+
+ if (is_compound_symbol(curr_literal[0], next_literal)) {
+ curr_literal[1] = next_literal;
+ lex->next_char(lex);
+ }
+
+ uint8 code = get_symbol_code(curr_literal);
+ struct Token *tok = new_token(curr_literal, code, lex->line, column);
+ 
+ lex->next_char(lex);
+
+ return tok;
+}