From f44f1f8c7ef7b6266667dce76db686af3258adfc Mon Sep 17 00:00:00 2001 From: jdlugosz963 Date: Wed, 1 Mar 2023 23:30:31 +0100 Subject: Build simple abstract syntax tree --- lexer.c | 243 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lexer.h | 65 +++++++++++++++++ memory.c | 27 +++++++ memory.h | 15 ++++ parser.c | 174 +++++++++++++++++++++++++++++++++++++++++++++ parser.h | 18 +++++ types.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ types.h | 65 +++++++++++++++++ 8 files changed, 839 insertions(+) create mode 100644 lexer.c create mode 100644 lexer.h create mode 100644 memory.c create mode 100644 memory.h create mode 100644 parser.c create mode 100644 parser.h create mode 100644 types.c create mode 100644 types.h diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..0c5e649 --- /dev/null +++ b/lexer.c @@ -0,0 +1,243 @@ +#include "lexer.h" +#include +#include +#include + +Lexer *lexer_make() +{ + Lexer *lexer = (Lexer *)malloc(sizeof(Lexer)); + return lexer; +} + +void lexer_free(Lexer *lexer) +{ + Token *token = lexer->tokens; + Token *token_next = NULL; + + while (token) + { + token_next = token->next; + jadl_free(token->value); + jadl_free(token); + token = token_next; + } + jadl_free(lexer); +} + +Token *lexer_token_make(char *value, int value_len, TokenType type) +{ + Token *token = jadl_malloc(sizeof(token)); + token->value = jadl_malloc(sizeof(char) * value_len + 1); + token->value[value_len] = '\0'; + token->type = type; + strncpy(token->value, value, value_len); + token->is_decimal_point = 0; + return token; +} + +Lexer *lexer_token_push(Lexer *lexer, Token *token) +{ + token->next = lexer->tokens; + lexer->tokens = token; + return lexer; +} + +Lexer *lexer_tokens_reverse(Lexer *lexer) +{ + Token *token = lexer->tokens; + Token *token_next = NULL; + lexer->tokens=NULL; + + while (token) + { + token_next = token->next; + + lexer_token_push(lexer, token); + + token = token_next; + } + return lexer; +} + +char *lexer_token_make_string(char *str, Token **token) +{ + char *end = str; + int is_string_read = 1; + str += 1; + + do + { + end = strchr(end+1, '"'); + if (!end) + { + lexer_token_make_error("Cannot find end of string!", token); + is_string_read = 0; + break; + } + is_string_read = 1; + } while(*(end-1) == '\\'); + + if(is_string_read) + { + int str_len = (end - str); + *token = lexer_token_make(str, str_len, TOKEN_TYPE_STRING); + } + + return (!is_string_read) ? NULL : end + 1; +} + +char *lexer_token_make_number(char *str, Token **token) +{ + char *end = lexer_token_terminated_symbol(str); + + char *next = str; + int is_decimal_point=0; + int is_number_read=1; + + while(nextis_decimal_point = is_decimal_point; + } + + return (is_number_read) ? end + 1 : NULL; +} + +char *lexer_token_terminated_symbol(char *str) +{ + static char *chars_to_terminate = " ()[],;\"`"; + char *terminated = strpbrk(str, chars_to_terminate); + terminated = (terminated == NULL) ? &str[strlen(str) - 1] : terminated - 1; + return terminated; +} + +char *lexer_token_make_symbol(char *str, Token **token) +{ + char *end = lexer_token_terminated_symbol(str); + + int str_len = (end - str + 1); + + *token = lexer_token_make(str, str_len, TOKEN_TYPE_SYMBOL); + + if(strncmp(str, SYMBOL_NIL, str_len) == 0) + (*token)->type = TOKEN_TYPE_NIL; + else if(strncmp(str, SYMBOL_FALSE, str_len) == 0 || + strncmp(str, SYMBOL_FALSE_SHORT, str_len) == 0) + (*token)->type = TOKEN_TYPE_FALSE; + else if(strncmp(str, SYMBOL_TRUE, str_len) == 0 || + strncmp(str, SYMBOL_TRUE_SHORT, str_len == 0)) + (*token)->type = TOKEN_TYPE_TRUE; + + return end + 1; +} + +char *lexer_token_make_special(char *str, Token **token) +{ + *token = lexer_token_make(str, 1, TOKEN_TYPE_SPECIAL); + return str + 1; +} + + +void lexer_token_make_error(char *message, Token **token) +{ + unsigned long message_len = strlen(message); + *token = lexer_token_make(message, message_len, TOKEN_TYPE_ERROR); +} + + +Lexer *lexer_tokenize(char *str) { + Lexer *lexer = lexer_make(); + Token *token = NULL; + + while (str && *str) { + /* if(!*str) return lexer; */ + + switch (*str) { + case ' ': + case ';': + token = NULL; + str += 1; + break; + case '(': + case ')': + case '[': + case ']': + str = lexer_token_make_special(str, &token); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + str = lexer_token_make_number(str, &token); + break; + case '"': + str = lexer_token_make_string(str, &token); + break; + default: + str = lexer_token_make_symbol(str, &token); + break; + } + if (token) + lexer_token_push(lexer, token); + } + + return lexer; +} + +void lexer_tokens_print(Lexer *lexer) +{ + Token *token = lexer->tokens; + while (token) { + switch(token->type) { + case TOKEN_TYPE_STRING: + printf("String: "); + break; + case TOKEN_TYPE_SYMBOL: + printf("Symbol: "); + break; + case TOKEN_TYPE_SPECIAL: + printf("Special: "); + break; + case TOKEN_TYPE_NUMBER: + printf("Number: "); + break; + case TOKEN_TYPE_TRUE: + printf("True: "); + break; + case TOKEN_TYPE_FALSE: + printf("False: "); + break; + case TOKEN_TYPE_NIL: + printf("Nil: "); + break; + } + printf("%s\n", token->value); + token = token->next; + } +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..87fc008 --- /dev/null +++ b/lexer.h @@ -0,0 +1,65 @@ +#ifndef _LEXER +#define _LEXER + +#include +#include +#include +#include +#include + +#include "types.h" +#include "memory.h" + +#define SYMBOL_NIL "nil" +#define SYMBOL_TRUE_SHORT "#t" +#define SYMBOL_TRUE "#true" +#define SYMBOL_FALSE_SHORT "#f" +#define SYMBOL_FALSE "#false" + + +typedef enum _TokenType { + TOKEN_TYPE_SYMBOL, + TOKEN_TYPE_NUMBER, + TOKEN_TYPE_STRING, + TOKEN_TYPE_TRUE, + TOKEN_TYPE_FALSE, + TOKEN_TYPE_NIL, + TOKEN_TYPE_SPECIAL, + TOKEN_TYPE_ERROR +} TokenType; + + +typedef struct _Token { + TokenType type; + int is_decimal_point; + char *value; + struct _Token *next; +} Token; + +typedef struct _Lexer { + int error; + Token *tokens; +} Lexer; + +int lexer_tokens_count(char *str); + +Lexer *lexer_make(); +void lexer_free(Lexer *lexer); +Token *lexer_token_make(char *value, int value_len, TokenType type); +Lexer *lexer_token_push(Lexer *lexer, Token *token); + +char *lexer_token_terminated_symbol(char *str); +char *lexer_token_make_string(char *str, Token **token); +char *lexer_token_make_symbol(char *str, Token **token); +char *lexer_token_make_special(char *str, Token **token); +char *lexer_token_make_number(char *str, Token **token); + +void lexer_token_make_error(char *message, Token **token); + +Lexer *lexer_tokenize(char *str); + + +void lexer_tokens_print(Lexer *lexer); +Lexer *lexer_tokens_reverse(Lexer *lexer); + +#endif diff --git a/memory.c b/memory.c new file mode 100644 index 0000000..387268b --- /dev/null +++ b/memory.c @@ -0,0 +1,27 @@ +#include "memory.h" + +#include + +void *jadl_malloc(size_t size) { + void *ptr = malloc(size); + if(ptr == NULL) + { + printf("malloc: ERROR"); + exit(1); + } + return ptr; +} + +void jadl_free(void *ptr) { + return free(ptr); +} + +long usage() { + struct rusage r_usage; + getrusage(RUSAGE_SELF, &r_usage); + return r_usage.ru_maxrss; +} + +void usage_print() { + printf("Mem usage: %ld kilobytes\n", usage()); +} diff --git a/memory.h b/memory.h new file mode 100644 index 0000000..ba78b22 --- /dev/null +++ b/memory.h @@ -0,0 +1,15 @@ +#ifndef _MEMORY +#define _MEMORY + +#include +#include +#include +#include + +void *jadl_malloc(size_t size); +void jadl_free(void *ptr); + +long usage(); +void usage_print(); + +#endif diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..574d2ac --- /dev/null +++ b/parser.c @@ -0,0 +1,174 @@ +#include "parser.h" +#include "lexer.h" +#include "types.h" +#include + + +LISP_OBJECT *parser_parse_str(char *str) +{ + Lexer *lexer = lexer_tokenize(str); + lexer_tokens_reverse(lexer); + + Token *token = lexer->tokens; + LISP_OBJECT *lisp_obj = NULL; + lisp_obj = parser_parse_tokens(&token); + + lexer_free(lexer); + + return lisp_obj; +} + +LISP_OBJECT *parser_parse_tokens(Token **token) +{ + LISP_OBJECT *lisp_obj = NULL; + switch((*token)->type) + { + case TOKEN_TYPE_SPECIAL: + lisp_obj = parser_make_list(token); + break; + case TOKEN_TYPE_NUMBER: + lisp_obj = parser_make_number(token); + break; + case TOKEN_TYPE_STRING: + lisp_obj = parser_make_string(token); + break; + case TOKEN_TYPE_SYMBOL: + lisp_obj = parser_make_symbol(token); + break; + case TOKEN_TYPE_TRUE: + lisp_obj = parser_make_true(token); + break; + case TOKEN_TYPE_FALSE: + lisp_obj = parser_make_false(token); + break; + case TOKEN_TYPE_NIL: + lisp_obj = parser_make_nil(token); + break; + case TOKEN_TYPE_ERROR: + lisp_obj = lisp_object_make_error((*token)->value); + *token = (*token)->next; + break; + default: + *token = (*token)->next; + lisp_obj = lisp_object_make_error("Something bad happenend!"); + } + return lisp_obj; +} + +LISP_OBJECT *parser_make_list(Token **token) +{ + LISP_OBJECT *lisp_list_obj = lisp_object_make_list(); + char close_delimiter = ( *(*token)->value == '(' ) ? ')' : ']'; + *token = (*token)->next; + int is_close_delimiter = 1; + + while(*token) + { + if((*token)->type==TOKEN_TYPE_SPECIAL && + *(*token)->value==close_delimiter) + { + is_close_delimiter=0; + *token=(*token)->next; + break; + } + + lisp_object_list_push(lisp_list_obj, + (void *)parser_parse_tokens(token)); + + } + + + List *list = list_reverse(lisp_list_obj->value.list); + lisp_list_obj->value.list = list; + + if(is_close_delimiter != 0) + { + LISP_OBJECT *err_msg = lisp_object_make_error("Unbalanced parenthesis!"); + lisp_object_list_push(lisp_list_obj, err_msg); + } + + return lisp_list_obj; +} + + +LISP_OBJECT *parser_make_number(Token **token) +{ + LISP_OBJECT *lisp_obj = NULL; + char *number_str = (*token)->value; + union + { + long long natural; + double floating; + + } number; + + if((*token)->is_decimal_point) + { + sscanf(number_str, "%lf", &number.floating); + lisp_obj = lisp_object_make_number_float(number.floating); + } + else + { + sscanf(number_str, "%lld", &number.natural); + lisp_obj = lisp_object_make_number_natural(number.natural); + } + + + *token=(*token)->next; + return lisp_obj; +} + +LISP_OBJECT *parser_make_string(Token **token) +{ + char *str = (*token)->value; + LISP_OBJECT *lisp_obj = lisp_object_make_string(str); + + *token=(*token)->next; + return lisp_obj; +} + +LISP_OBJECT *parser_make_symbol(Token **token) +{ + char *str = (*token)->value; + LISP_OBJECT *lisp_obj = lisp_object_make_symbol(str); + + *token=(*token)->next; + return lisp_obj; +} + +LISP_OBJECT *parser_make_true(Token **token) +{ + LISP_OBJECT *lisp_obj = lisp_object_make_true(); + + *token=(*token)->next; + return lisp_obj; +} + +LISP_OBJECT *parser_make_false(Token **token) +{ + LISP_OBJECT *lisp_obj = lisp_object_make_false(); + + *token=(*token)->next; + return lisp_obj; +} + + +LISP_OBJECT *parser_make_nil(Token **token) +{ + LISP_OBJECT *lisp_obj = lisp_object_make_nil(); + + *token=(*token)->next; + return lisp_obj; +} + +int main() +{ + char *test = "(display (string-append \"4*3+2=\" (number->string (+ (* 4 3) 1))))"; + + + LISP_OBJECT *obj = NULL; + + obj = parser_parse_str(test); + lisp_object_print(obj, 0); + lisp_object_free(obj); +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..2a4c523 --- /dev/null +++ b/parser.h @@ -0,0 +1,18 @@ +#ifndef _PARSER +#define _PARSER + +#include "types.h" +#include "lexer.h" + +LISP_OBJECT *parser_parse_str(char *str); +LISP_OBJECT *parser_parse_tokens(Token **token); + +LISP_OBJECT *parser_make_list(Token **token); +LISP_OBJECT *parser_make_number(Token **token); +LISP_OBJECT *parser_make_string(Token **token); +LISP_OBJECT *parser_make_symbol(Token **token); +LISP_OBJECT *parser_make_true(Token **token); +LISP_OBJECT *parser_make_false(Token **token); +LISP_OBJECT *parser_make_nil(Token **token); + +#endif diff --git a/types.c b/types.c new file mode 100644 index 0000000..42359bb --- /dev/null +++ b/types.c @@ -0,0 +1,232 @@ +#include "types.h" +#include "memory.h" +#include + + +List *list_make(void *ptr) +{ + return list_push(NULL, ptr); +} + +List *list_push(List *list, void *ptr) +{ + List *new_head = (List*)malloc(sizeof(List)); + new_head->next=list; + new_head->ptr=ptr; + return new_head; +} + + +List *list_reverse(List *list) +{ + List *current = list; + List *next = NULL; + List *new_list = NULL; + while (current) { + next = current->next; + + current->next = new_list; + new_list = current; + + current = next; + } + return new_list; +} + +void list_free(List *list) +{ + List *next = NULL; + while (list) + { + next = list->next; + + jadl_free(list->ptr); + jadl_free(list); + + list = next; + } + +} + +void lisp_object_list_push(LISP_OBJECT *lisp_list_obj, void *ptr) +{ + List *new_head = list_push(lisp_list_obj->value.list, ptr); + lisp_list_obj->value.list = new_head; +} + +LISP_OBJECT *lisp_object_make() +{ + LISP_OBJECT *lisp_obj = jadl_malloc(sizeof(LISP_OBJECT)); + lisp_obj->is_decimal_point = 0; + return lisp_obj; +} + +void lisp_object_list_free(LISP_OBJECT *lisp_obj_list) +{ + if(lisp_obj_list->type != LISP_TYPE_LIST) + return; + + List *list = lisp_obj_list->value.list; + List *next = NULL; + + while (list) + { + next = list->next; + + LISP_OBJECT *lisp_obj = list->ptr; + + lisp_object_free(lisp_obj); + + jadl_free(list); + list = next; + } + jadl_free(lisp_obj_list); +} + +void lisp_object_free(LISP_OBJECT *lisp_obj) +{ + if (lisp_obj->type == LISP_TYPE_LIST) + lisp_object_list_free(lisp_obj); + else + { + if (lisp_obj->type != LISP_TYPE_FALSE && + lisp_obj->type != LISP_TYPE_TRUE && + lisp_obj->type != LISP_TYPE_NIL) + { + jadl_free(lisp_obj->value.ptr); + } + jadl_free(lisp_obj); + } +} + +LISP_OBJECT *lisp_object_make_list() +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_LIST; + lisp_obj->value.ptr = NULL; + return lisp_obj; +} + +LISP_OBJECT *lisp_object_make_number_natural(long long number) +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_NUMBER; + lisp_obj->value.number_natural = jadl_malloc(sizeof(long long)); + *lisp_obj->value.number_natural = number; + return lisp_obj; +} + +LISP_OBJECT *lisp_object_make_number_float(double number) +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_NUMBER; + lisp_obj->value.number_float = jadl_malloc(sizeof(double)); + *lisp_obj->value.number_float = number; + lisp_obj->is_decimal_point = 1; + return lisp_obj; +} + +LISP_OBJECT *lisp_object_make_string(char *str) +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_STRING; + lisp_obj->value.string = jadl_malloc(sizeof(char)*strlen(str)); + strcpy(lisp_obj->value.string, str); + return lisp_obj; +} + +LISP_OBJECT *lisp_object_make_symbol(char *str) +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_SYMBOL; + lisp_obj->value.symbol = jadl_malloc(sizeof(char)*strlen(str)); + strcpy(lisp_obj->value.symbol, str); + return lisp_obj; +} + +LISP_OBJECT *lisp_object_make_error(char *str) +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_ERROR; + lisp_obj->value.error = jadl_malloc(sizeof(char)*strlen(str)); + strcpy(lisp_obj->value.error, str); + return lisp_obj; +} + +LISP_OBJECT *lisp_object_make_nil() +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_NIL; + return lisp_obj; +} + +LISP_OBJECT *lisp_object_make_true() +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_TRUE; + return lisp_obj; +} + +LISP_OBJECT *lisp_object_make_false() +{ + LISP_OBJECT *lisp_obj = lisp_object_make(); + lisp_obj->type = LISP_TYPE_FALSE; + return lisp_obj; +} + +int lisp_object_print(LISP_OBJECT *obj, int indent) +{ + int count = 0; + for(int i=0; itype) + { + case LISP_TYPE_LIST: + printf(" │\n"); + break; + default: + printf("└─ "); + break; + } + + switch(obj->type) + { + case LISP_TYPE_LIST: + List *lst = obj->value.list; + int temp = 0; + while (lst) + { + count ++; + temp = lisp_object_print((LISP_OBJECT *)lst->ptr, + indent+1); + count += temp; + lst = lst->next; + } + break; + case LISP_TYPE_STRING: + printf("STRING: %s\n", obj->value.string); + break; + case LISP_TYPE_ERROR: + printf("ERROR: %s\n", obj->value.error); + break; + case LISP_TYPE_SYMBOL: + printf("SYMBOL: %s\n", obj->value.symbol); + break; + case LISP_TYPE_NUMBER: + if(obj->is_decimal_point) + printf("NUMBER: %lf\n", *obj->value.number_float); + else + printf("NUMBER: %lld\n", *obj->value.number_natural); + break; + case LISP_TYPE_TRUE: + printf("TRUE\n"); + break; + case LISP_TYPE_FALSE: + printf("FALSE\n"); + break; + case LISP_TYPE_NIL: + printf("NIL\n"); + break; + } + return count; +} diff --git a/types.h b/types.h new file mode 100644 index 0000000..632e283 --- /dev/null +++ b/types.h @@ -0,0 +1,65 @@ +#ifndef _TYPES +#define _TYPES + +#include +#include +#include + +#include "memory.h" + + +typedef enum _LispType { + LISP_TYPE_SYMBOL, + LISP_TYPE_LIST, + LISP_TYPE_STRING, + LISP_TYPE_NUMBER, + LISP_TYPE_NIL, + LISP_TYPE_TRUE, + LISP_TYPE_FALSE, + LISP_TYPE_ERROR +} LispType; + + +typedef struct _List { + void *ptr; + struct _List *next; +} List; + +List *list_make(void *ptr); +List *list_push(List *list, void *ptr); +List *list_reverse(List *list); + +typedef struct _LispObject { + LispType type; + int is_decimal_point; + + union { + char *string; + char *symbol; + char *error; + long long *number_natural; + double *number_float; + struct _LispObject (*function)(struct _LispObject*); + List *list; + void *ptr; + } value; +} LISP_OBJECT; + +void lisp_object_free(LISP_OBJECT *lisp_obj); + +void lisp_object_list_push(LISP_OBJECT *lisp_list_obj, void *ptr); + +LISP_OBJECT *lisp_object_make(); +LISP_OBJECT *lisp_object_make_list(); +LISP_OBJECT *lisp_object_make_number_natural(long long number); +LISP_OBJECT *lisp_object_make_number_float(double number); +LISP_OBJECT *lisp_object_make_string(char *str); +LISP_OBJECT *lisp_object_make_symbol(char *str); +LISP_OBJECT *lisp_object_make_error(char *str); +LISP_OBJECT *lisp_object_make_nil(); +LISP_OBJECT *lisp_object_make_true(); +LISP_OBJECT *lisp_object_make_false(); + +int lisp_object_print(LISP_OBJECT *obj, int indent); + +#endif -- cgit v1.2.3