summaryrefslogtreecommitdiffstats
path: root/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c243
1 files changed, 243 insertions, 0 deletions
diff --git a/lexer.c b/lexer.c
new file mode 100644
index 0000000..0c5e649
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,243 @@
1#include "lexer.h"
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6Lexer *lexer_make()
7{
8 Lexer *lexer = (Lexer *)malloc(sizeof(Lexer));
9 return lexer;
10}
11
12void lexer_free(Lexer *lexer)
13{
14 Token *token = lexer->tokens;
15 Token *token_next = NULL;
16
17 while (token)
18 {
19 token_next = token->next;
20 jadl_free(token->value);
21 jadl_free(token);
22 token = token_next;
23 }
24 jadl_free(lexer);
25}
26
27Token *lexer_token_make(char *value, int value_len, TokenType type)
28{
29 Token *token = jadl_malloc(sizeof(token));
30 token->value = jadl_malloc(sizeof(char) * value_len + 1);
31 token->value[value_len] = '\0';
32 token->type = type;
33 strncpy(token->value, value, value_len);
34 token->is_decimal_point = 0;
35 return token;
36}
37
38Lexer *lexer_token_push(Lexer *lexer, Token *token)
39{
40 token->next = lexer->tokens;
41 lexer->tokens = token;
42 return lexer;
43}
44
45Lexer *lexer_tokens_reverse(Lexer *lexer)
46{
47 Token *token = lexer->tokens;
48 Token *token_next = NULL;
49 lexer->tokens=NULL;
50
51 while (token)
52 {
53 token_next = token->next;
54
55 lexer_token_push(lexer, token);
56
57 token = token_next;
58 }
59 return lexer;
60}
61
62char *lexer_token_make_string(char *str, Token **token)
63{
64 char *end = str;
65 int is_string_read = 1;
66 str += 1;
67
68 do
69 {
70 end = strchr(end+1, '"');
71 if (!end)
72 {
73 lexer_token_make_error("Cannot find end of string!", token);
74 is_string_read = 0;
75 break;
76 }
77 is_string_read = 1;
78 } while(*(end-1) == '\\');
79
80 if(is_string_read)
81 {
82 int str_len = (end - str);
83 *token = lexer_token_make(str, str_len, TOKEN_TYPE_STRING);
84 }
85
86 return (!is_string_read) ? NULL : end + 1;
87}
88
89char *lexer_token_make_number(char *str, Token **token)
90{
91 char *end = lexer_token_terminated_symbol(str);
92
93 char *next = str;
94 int is_decimal_point=0;
95 int is_number_read=1;
96
97 while(next<end)
98 {
99 if (!is_decimal_point && *next == '.')
100 is_decimal_point=1;
101 else if(*next == '.') {
102 lexer_token_make_error(
103 "Cannot read number, becouse it has too many decimal points!", token);
104 is_number_read = 0;
105 break;
106 }
107 else if(!isdigit(*next)) {
108 lexer_token_make_error("Cannot read number, becouse it isnt digit!", token);
109 is_number_read = 0;
110 break;
111 }
112
113 is_number_read = 1;
114 next+=1;
115 }
116
117 if(is_number_read)
118 {
119 int str_len = (end - str + 1); *token = lexer_token_make(str, str_len, TOKEN_TYPE_NUMBER);
120 (*token)->is_decimal_point = is_decimal_point;
121 }
122
123 return (is_number_read) ? end + 1 : NULL;
124}
125
126char *lexer_token_terminated_symbol(char *str)
127{
128 static char *chars_to_terminate = " ()[],;\"`";
129 char *terminated = strpbrk(str, chars_to_terminate);
130 terminated = (terminated == NULL) ? &str[strlen(str) - 1] : terminated - 1;
131 return terminated;
132}
133
134char *lexer_token_make_symbol(char *str, Token **token)
135{
136 char *end = lexer_token_terminated_symbol(str);
137
138 int str_len = (end - str + 1);
139
140 *token = lexer_token_make(str, str_len, TOKEN_TYPE_SYMBOL);
141
142 if(strncmp(str, SYMBOL_NIL, str_len) == 0)
143 (*token)->type = TOKEN_TYPE_NIL;
144 else if(strncmp(str, SYMBOL_FALSE, str_len) == 0 ||
145 strncmp(str, SYMBOL_FALSE_SHORT, str_len) == 0)
146 (*token)->type = TOKEN_TYPE_FALSE;
147 else if(strncmp(str, SYMBOL_TRUE, str_len) == 0 ||
148 strncmp(str, SYMBOL_TRUE_SHORT, str_len == 0))
149 (*token)->type = TOKEN_TYPE_TRUE;
150
151 return end + 1;
152}
153
154char *lexer_token_make_special(char *str, Token **token)
155{
156 *token = lexer_token_make(str, 1, TOKEN_TYPE_SPECIAL);
157 return str + 1;
158}
159
160
161void lexer_token_make_error(char *message, Token **token)
162{
163 unsigned long message_len = strlen(message);
164 *token = lexer_token_make(message, message_len, TOKEN_TYPE_ERROR);
165}
166
167
168Lexer *lexer_tokenize(char *str) {
169 Lexer *lexer = lexer_make();
170 Token *token = NULL;
171
172 while (str && *str) {
173 /* if(!*str) return lexer; */
174
175 switch (*str) {
176 case ' ':
177 case ';':
178 token = NULL;
179 str += 1;
180 break;
181 case '(':
182 case ')':
183 case '[':
184 case ']':
185 str = lexer_token_make_special(str, &token);
186 break;
187 case '0':
188 case '1':
189 case '2':
190 case '3':
191 case '4':
192 case '5':
193 case '6':
194 case '7':
195 case '8':
196 case '9':
197 str = lexer_token_make_number(str, &token);
198 break;
199 case '"':
200 str = lexer_token_make_string(str, &token);
201 break;
202 default:
203 str = lexer_token_make_symbol(str, &token);
204 break;
205 }
206 if (token)
207 lexer_token_push(lexer, token);
208 }
209
210 return lexer;
211}
212
213void lexer_tokens_print(Lexer *lexer)
214{
215 Token *token = lexer->tokens;
216 while (token) {
217 switch(token->type) {
218 case TOKEN_TYPE_STRING:
219 printf("String: ");
220 break;
221 case TOKEN_TYPE_SYMBOL:
222 printf("Symbol: ");
223 break;
224 case TOKEN_TYPE_SPECIAL:
225 printf("Special: ");
226 break;
227 case TOKEN_TYPE_NUMBER:
228 printf("Number: ");
229 break;
230 case TOKEN_TYPE_TRUE:
231 printf("True: ");
232 break;
233 case TOKEN_TYPE_FALSE:
234 printf("False: ");
235 break;
236 case TOKEN_TYPE_NIL:
237 printf("Nil: ");
238 break;
239 }
240 printf("%s\n", token->value);
241 token = token->next;
242 }
243}