diff options
| author | jdlugosz963 <jdlugosz963@gmail.com> | 2023-03-01 23:30:31 +0100 |
|---|---|---|
| committer | jdlugosz963 <jdlugosz963@gmail.com> | 2023-03-01 23:30:31 +0100 |
| commit | f44f1f8c7ef7b6266667dce76db686af3258adfc (patch) | |
| tree | e0775e2a07713f1ac23d37b5271340fe8cbebd16 /lexer.c | |
| download | jadl-f44f1f8c7ef7b6266667dce76db686af3258adfc.tar.gz jadl-f44f1f8c7ef7b6266667dce76db686af3258adfc.zip | |
Build simple abstract syntax tree
Diffstat (limited to 'lexer.c')
| -rw-r--r-- | lexer.c | 243 |
1 files changed, 243 insertions, 0 deletions
| @@ -0,0 +1,243 @@ | |||
| 1 | #include "lexer.h" | ||
| 2 | #include <stdio.h> | ||
| 3 | #include <stdlib.h> | ||
| 4 | #include <string.h> | ||
| 5 | |||
| 6 | Lexer *lexer_make() | ||
| 7 | { | ||
| 8 | Lexer *lexer = (Lexer *)malloc(sizeof(Lexer)); | ||
| 9 | return lexer; | ||
| 10 | } | ||
| 11 | |||
| 12 | void lexer_free(Lexer *lexer) | ||
| 13 | { | ||
| 14 | Token *token = lexer->tokens; | ||
| 15 | Token *token_next = NULL; | ||
| 16 | |||
| 17 | while (token) | ||
| 18 | { | ||
| 19 | token_next = token->next; | ||
| 20 | jadl_free(token->value); | ||
| 21 | jadl_free(token); | ||
| 22 | token = token_next; | ||
| 23 | } | ||
| 24 | jadl_free(lexer); | ||
| 25 | } | ||
| 26 | |||
| 27 | Token *lexer_token_make(char *value, int value_len, TokenType type) | ||
| 28 | { | ||
| 29 | Token *token = jadl_malloc(sizeof(token)); | ||
| 30 | token->value = jadl_malloc(sizeof(char) * value_len + 1); | ||
| 31 | token->value[value_len] = '\0'; | ||
| 32 | token->type = type; | ||
| 33 | strncpy(token->value, value, value_len); | ||
| 34 | token->is_decimal_point = 0; | ||
| 35 | return token; | ||
| 36 | } | ||
| 37 | |||
| 38 | Lexer *lexer_token_push(Lexer *lexer, Token *token) | ||
| 39 | { | ||
| 40 | token->next = lexer->tokens; | ||
| 41 | lexer->tokens = token; | ||
| 42 | return lexer; | ||
| 43 | } | ||
| 44 | |||
| 45 | Lexer *lexer_tokens_reverse(Lexer *lexer) | ||
| 46 | { | ||
| 47 | Token *token = lexer->tokens; | ||
| 48 | Token *token_next = NULL; | ||
| 49 | lexer->tokens=NULL; | ||
| 50 | |||
| 51 | while (token) | ||
| 52 | { | ||
| 53 | token_next = token->next; | ||
| 54 | |||
| 55 | lexer_token_push(lexer, token); | ||
| 56 | |||
| 57 | token = token_next; | ||
| 58 | } | ||
| 59 | return lexer; | ||
| 60 | } | ||
| 61 | |||
| 62 | char *lexer_token_make_string(char *str, Token **token) | ||
| 63 | { | ||
| 64 | char *end = str; | ||
| 65 | int is_string_read = 1; | ||
| 66 | str += 1; | ||
| 67 | |||
| 68 | do | ||
| 69 | { | ||
| 70 | end = strchr(end+1, '"'); | ||
| 71 | if (!end) | ||
| 72 | { | ||
| 73 | lexer_token_make_error("Cannot find end of string!", token); | ||
| 74 | is_string_read = 0; | ||
| 75 | break; | ||
| 76 | } | ||
| 77 | is_string_read = 1; | ||
| 78 | } while(*(end-1) == '\\'); | ||
| 79 | |||
| 80 | if(is_string_read) | ||
| 81 | { | ||
| 82 | int str_len = (end - str); | ||
| 83 | *token = lexer_token_make(str, str_len, TOKEN_TYPE_STRING); | ||
| 84 | } | ||
| 85 | |||
| 86 | return (!is_string_read) ? NULL : end + 1; | ||
| 87 | } | ||
| 88 | |||
| 89 | char *lexer_token_make_number(char *str, Token **token) | ||
| 90 | { | ||
| 91 | char *end = lexer_token_terminated_symbol(str); | ||
| 92 | |||
| 93 | char *next = str; | ||
| 94 | int is_decimal_point=0; | ||
| 95 | int is_number_read=1; | ||
| 96 | |||
| 97 | while(next<end) | ||
| 98 | { | ||
| 99 | if (!is_decimal_point && *next == '.') | ||
| 100 | is_decimal_point=1; | ||
| 101 | else if(*next == '.') { | ||
| 102 | lexer_token_make_error( | ||
| 103 | "Cannot read number, becouse it has too many decimal points!", token); | ||
| 104 | is_number_read = 0; | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | else if(!isdigit(*next)) { | ||
| 108 | lexer_token_make_error("Cannot read number, becouse it isnt digit!", token); | ||
| 109 | is_number_read = 0; | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | |||
| 113 | is_number_read = 1; | ||
| 114 | next+=1; | ||
| 115 | } | ||
| 116 | |||
| 117 | if(is_number_read) | ||
| 118 | { | ||
| 119 | int str_len = (end - str + 1); *token = lexer_token_make(str, str_len, TOKEN_TYPE_NUMBER); | ||
| 120 | (*token)->is_decimal_point = is_decimal_point; | ||
| 121 | } | ||
| 122 | |||
| 123 | return (is_number_read) ? end + 1 : NULL; | ||
| 124 | } | ||
| 125 | |||
| 126 | char *lexer_token_terminated_symbol(char *str) | ||
| 127 | { | ||
| 128 | static char *chars_to_terminate = " ()[],;\"`"; | ||
| 129 | char *terminated = strpbrk(str, chars_to_terminate); | ||
| 130 | terminated = (terminated == NULL) ? &str[strlen(str) - 1] : terminated - 1; | ||
| 131 | return terminated; | ||
| 132 | } | ||
| 133 | |||
| 134 | char *lexer_token_make_symbol(char *str, Token **token) | ||
| 135 | { | ||
| 136 | char *end = lexer_token_terminated_symbol(str); | ||
| 137 | |||
| 138 | int str_len = (end - str + 1); | ||
| 139 | |||
| 140 | *token = lexer_token_make(str, str_len, TOKEN_TYPE_SYMBOL); | ||
| 141 | |||
| 142 | if(strncmp(str, SYMBOL_NIL, str_len) == 0) | ||
| 143 | (*token)->type = TOKEN_TYPE_NIL; | ||
| 144 | else if(strncmp(str, SYMBOL_FALSE, str_len) == 0 || | ||
| 145 | strncmp(str, SYMBOL_FALSE_SHORT, str_len) == 0) | ||
| 146 | (*token)->type = TOKEN_TYPE_FALSE; | ||
| 147 | else if(strncmp(str, SYMBOL_TRUE, str_len) == 0 || | ||
| 148 | strncmp(str, SYMBOL_TRUE_SHORT, str_len == 0)) | ||
| 149 | (*token)->type = TOKEN_TYPE_TRUE; | ||
| 150 | |||
| 151 | return end + 1; | ||
| 152 | } | ||
| 153 | |||
| 154 | char *lexer_token_make_special(char *str, Token **token) | ||
| 155 | { | ||
| 156 | *token = lexer_token_make(str, 1, TOKEN_TYPE_SPECIAL); | ||
| 157 | return str + 1; | ||
| 158 | } | ||
| 159 | |||
| 160 | |||
| 161 | void lexer_token_make_error(char *message, Token **token) | ||
| 162 | { | ||
| 163 | unsigned long message_len = strlen(message); | ||
| 164 | *token = lexer_token_make(message, message_len, TOKEN_TYPE_ERROR); | ||
| 165 | } | ||
| 166 | |||
| 167 | |||
| 168 | Lexer *lexer_tokenize(char *str) { | ||
| 169 | Lexer *lexer = lexer_make(); | ||
| 170 | Token *token = NULL; | ||
| 171 | |||
| 172 | while (str && *str) { | ||
| 173 | /* if(!*str) return lexer; */ | ||
| 174 | |||
| 175 | switch (*str) { | ||
| 176 | case ' ': | ||
| 177 | case ';': | ||
| 178 | token = NULL; | ||
| 179 | str += 1; | ||
| 180 | break; | ||
| 181 | case '(': | ||
| 182 | case ')': | ||
| 183 | case '[': | ||
| 184 | case ']': | ||
| 185 | str = lexer_token_make_special(str, &token); | ||
| 186 | break; | ||
| 187 | case '0': | ||
| 188 | case '1': | ||
| 189 | case '2': | ||
| 190 | case '3': | ||
| 191 | case '4': | ||
| 192 | case '5': | ||
| 193 | case '6': | ||
| 194 | case '7': | ||
| 195 | case '8': | ||
| 196 | case '9': | ||
| 197 | str = lexer_token_make_number(str, &token); | ||
| 198 | break; | ||
| 199 | case '"': | ||
| 200 | str = lexer_token_make_string(str, &token); | ||
| 201 | break; | ||
| 202 | default: | ||
| 203 | str = lexer_token_make_symbol(str, &token); | ||
| 204 | break; | ||
| 205 | } | ||
| 206 | if (token) | ||
| 207 | lexer_token_push(lexer, token); | ||
| 208 | } | ||
| 209 | |||
| 210 | return lexer; | ||
| 211 | } | ||
| 212 | |||
| 213 | void lexer_tokens_print(Lexer *lexer) | ||
| 214 | { | ||
| 215 | Token *token = lexer->tokens; | ||
| 216 | while (token) { | ||
| 217 | switch(token->type) { | ||
| 218 | case TOKEN_TYPE_STRING: | ||
| 219 | printf("String: "); | ||
| 220 | break; | ||
| 221 | case TOKEN_TYPE_SYMBOL: | ||
| 222 | printf("Symbol: "); | ||
| 223 | break; | ||
| 224 | case TOKEN_TYPE_SPECIAL: | ||
| 225 | printf("Special: "); | ||
| 226 | break; | ||
| 227 | case TOKEN_TYPE_NUMBER: | ||
| 228 | printf("Number: "); | ||
| 229 | break; | ||
| 230 | case TOKEN_TYPE_TRUE: | ||
| 231 | printf("True: "); | ||
| 232 | break; | ||
| 233 | case TOKEN_TYPE_FALSE: | ||
| 234 | printf("False: "); | ||
| 235 | break; | ||
| 236 | case TOKEN_TYPE_NIL: | ||
| 237 | printf("Nil: "); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | printf("%s\n", token->value); | ||
| 241 | token = token->next; | ||
| 242 | } | ||
| 243 | } | ||
