diff options
author | jdlugosz963 <jdlugosz963@gmail.com> | 2023-03-01 23:30:31 +0100 |
---|---|---|
committer | jdlugosz963 <jdlugosz963@gmail.com> | 2023-03-01 23:30:31 +0100 |
commit | f44f1f8c7ef7b6266667dce76db686af3258adfc (patch) | |
tree | e0775e2a07713f1ac23d37b5271340fe8cbebd16 /lexer.c | |
download | jadl-f44f1f8c7ef7b6266667dce76db686af3258adfc.tar.gz jadl-f44f1f8c7ef7b6266667dce76db686af3258adfc.zip |
Build simple abstract syntax tree
Diffstat (limited to 'lexer.c')
-rw-r--r-- | lexer.c | 243 |
1 files changed, 243 insertions, 0 deletions
@@ -0,0 +1,243 @@ | |||
1 | #include "lexer.h" | ||
2 | #include <stdio.h> | ||
3 | #include <stdlib.h> | ||
4 | #include <string.h> | ||
5 | |||
6 | Lexer *lexer_make() | ||
7 | { | ||
8 | Lexer *lexer = (Lexer *)malloc(sizeof(Lexer)); | ||
9 | return lexer; | ||
10 | } | ||
11 | |||
12 | void lexer_free(Lexer *lexer) | ||
13 | { | ||
14 | Token *token = lexer->tokens; | ||
15 | Token *token_next = NULL; | ||
16 | |||
17 | while (token) | ||
18 | { | ||
19 | token_next = token->next; | ||
20 | jadl_free(token->value); | ||
21 | jadl_free(token); | ||
22 | token = token_next; | ||
23 | } | ||
24 | jadl_free(lexer); | ||
25 | } | ||
26 | |||
27 | Token *lexer_token_make(char *value, int value_len, TokenType type) | ||
28 | { | ||
29 | Token *token = jadl_malloc(sizeof(token)); | ||
30 | token->value = jadl_malloc(sizeof(char) * value_len + 1); | ||
31 | token->value[value_len] = '\0'; | ||
32 | token->type = type; | ||
33 | strncpy(token->value, value, value_len); | ||
34 | token->is_decimal_point = 0; | ||
35 | return token; | ||
36 | } | ||
37 | |||
38 | Lexer *lexer_token_push(Lexer *lexer, Token *token) | ||
39 | { | ||
40 | token->next = lexer->tokens; | ||
41 | lexer->tokens = token; | ||
42 | return lexer; | ||
43 | } | ||
44 | |||
45 | Lexer *lexer_tokens_reverse(Lexer *lexer) | ||
46 | { | ||
47 | Token *token = lexer->tokens; | ||
48 | Token *token_next = NULL; | ||
49 | lexer->tokens=NULL; | ||
50 | |||
51 | while (token) | ||
52 | { | ||
53 | token_next = token->next; | ||
54 | |||
55 | lexer_token_push(lexer, token); | ||
56 | |||
57 | token = token_next; | ||
58 | } | ||
59 | return lexer; | ||
60 | } | ||
61 | |||
62 | char *lexer_token_make_string(char *str, Token **token) | ||
63 | { | ||
64 | char *end = str; | ||
65 | int is_string_read = 1; | ||
66 | str += 1; | ||
67 | |||
68 | do | ||
69 | { | ||
70 | end = strchr(end+1, '"'); | ||
71 | if (!end) | ||
72 | { | ||
73 | lexer_token_make_error("Cannot find end of string!", token); | ||
74 | is_string_read = 0; | ||
75 | break; | ||
76 | } | ||
77 | is_string_read = 1; | ||
78 | } while(*(end-1) == '\\'); | ||
79 | |||
80 | if(is_string_read) | ||
81 | { | ||
82 | int str_len = (end - str); | ||
83 | *token = lexer_token_make(str, str_len, TOKEN_TYPE_STRING); | ||
84 | } | ||
85 | |||
86 | return (!is_string_read) ? NULL : end + 1; | ||
87 | } | ||
88 | |||
89 | char *lexer_token_make_number(char *str, Token **token) | ||
90 | { | ||
91 | char *end = lexer_token_terminated_symbol(str); | ||
92 | |||
93 | char *next = str; | ||
94 | int is_decimal_point=0; | ||
95 | int is_number_read=1; | ||
96 | |||
97 | while(next<end) | ||
98 | { | ||
99 | if (!is_decimal_point && *next == '.') | ||
100 | is_decimal_point=1; | ||
101 | else if(*next == '.') { | ||
102 | lexer_token_make_error( | ||
103 | "Cannot read number, becouse it has too many decimal points!", token); | ||
104 | is_number_read = 0; | ||
105 | break; | ||
106 | } | ||
107 | else if(!isdigit(*next)) { | ||
108 | lexer_token_make_error("Cannot read number, becouse it isnt digit!", token); | ||
109 | is_number_read = 0; | ||
110 | break; | ||
111 | } | ||
112 | |||
113 | is_number_read = 1; | ||
114 | next+=1; | ||
115 | } | ||
116 | |||
117 | if(is_number_read) | ||
118 | { | ||
119 | int str_len = (end - str + 1); *token = lexer_token_make(str, str_len, TOKEN_TYPE_NUMBER); | ||
120 | (*token)->is_decimal_point = is_decimal_point; | ||
121 | } | ||
122 | |||
123 | return (is_number_read) ? end + 1 : NULL; | ||
124 | } | ||
125 | |||
126 | char *lexer_token_terminated_symbol(char *str) | ||
127 | { | ||
128 | static char *chars_to_terminate = " ()[],;\"`"; | ||
129 | char *terminated = strpbrk(str, chars_to_terminate); | ||
130 | terminated = (terminated == NULL) ? &str[strlen(str) - 1] : terminated - 1; | ||
131 | return terminated; | ||
132 | } | ||
133 | |||
134 | char *lexer_token_make_symbol(char *str, Token **token) | ||
135 | { | ||
136 | char *end = lexer_token_terminated_symbol(str); | ||
137 | |||
138 | int str_len = (end - str + 1); | ||
139 | |||
140 | *token = lexer_token_make(str, str_len, TOKEN_TYPE_SYMBOL); | ||
141 | |||
142 | if(strncmp(str, SYMBOL_NIL, str_len) == 0) | ||
143 | (*token)->type = TOKEN_TYPE_NIL; | ||
144 | else if(strncmp(str, SYMBOL_FALSE, str_len) == 0 || | ||
145 | strncmp(str, SYMBOL_FALSE_SHORT, str_len) == 0) | ||
146 | (*token)->type = TOKEN_TYPE_FALSE; | ||
147 | else if(strncmp(str, SYMBOL_TRUE, str_len) == 0 || | ||
148 | strncmp(str, SYMBOL_TRUE_SHORT, str_len == 0)) | ||
149 | (*token)->type = TOKEN_TYPE_TRUE; | ||
150 | |||
151 | return end + 1; | ||
152 | } | ||
153 | |||
154 | char *lexer_token_make_special(char *str, Token **token) | ||
155 | { | ||
156 | *token = lexer_token_make(str, 1, TOKEN_TYPE_SPECIAL); | ||
157 | return str + 1; | ||
158 | } | ||
159 | |||
160 | |||
161 | void lexer_token_make_error(char *message, Token **token) | ||
162 | { | ||
163 | unsigned long message_len = strlen(message); | ||
164 | *token = lexer_token_make(message, message_len, TOKEN_TYPE_ERROR); | ||
165 | } | ||
166 | |||
167 | |||
168 | Lexer *lexer_tokenize(char *str) { | ||
169 | Lexer *lexer = lexer_make(); | ||
170 | Token *token = NULL; | ||
171 | |||
172 | while (str && *str) { | ||
173 | /* if(!*str) return lexer; */ | ||
174 | |||
175 | switch (*str) { | ||
176 | case ' ': | ||
177 | case ';': | ||
178 | token = NULL; | ||
179 | str += 1; | ||
180 | break; | ||
181 | case '(': | ||
182 | case ')': | ||
183 | case '[': | ||
184 | case ']': | ||
185 | str = lexer_token_make_special(str, &token); | ||
186 | break; | ||
187 | case '0': | ||
188 | case '1': | ||
189 | case '2': | ||
190 | case '3': | ||
191 | case '4': | ||
192 | case '5': | ||
193 | case '6': | ||
194 | case '7': | ||
195 | case '8': | ||
196 | case '9': | ||
197 | str = lexer_token_make_number(str, &token); | ||
198 | break; | ||
199 | case '"': | ||
200 | str = lexer_token_make_string(str, &token); | ||
201 | break; | ||
202 | default: | ||
203 | str = lexer_token_make_symbol(str, &token); | ||
204 | break; | ||
205 | } | ||
206 | if (token) | ||
207 | lexer_token_push(lexer, token); | ||
208 | } | ||
209 | |||
210 | return lexer; | ||
211 | } | ||
212 | |||
213 | void lexer_tokens_print(Lexer *lexer) | ||
214 | { | ||
215 | Token *token = lexer->tokens; | ||
216 | while (token) { | ||
217 | switch(token->type) { | ||
218 | case TOKEN_TYPE_STRING: | ||
219 | printf("String: "); | ||
220 | break; | ||
221 | case TOKEN_TYPE_SYMBOL: | ||
222 | printf("Symbol: "); | ||
223 | break; | ||
224 | case TOKEN_TYPE_SPECIAL: | ||
225 | printf("Special: "); | ||
226 | break; | ||
227 | case TOKEN_TYPE_NUMBER: | ||
228 | printf("Number: "); | ||
229 | break; | ||
230 | case TOKEN_TYPE_TRUE: | ||
231 | printf("True: "); | ||
232 | break; | ||
233 | case TOKEN_TYPE_FALSE: | ||
234 | printf("False: "); | ||
235 | break; | ||
236 | case TOKEN_TYPE_NIL: | ||
237 | printf("Nil: "); | ||
238 | break; | ||
239 | } | ||
240 | printf("%s\n", token->value); | ||
241 | token = token->next; | ||
242 | } | ||
243 | } | ||