MEGATRON

LOG | FILES | OVERVIEW
#ifndef LEXER_C
#define LEXER_C
#include <lexer.h>

#define IS_ID_CHAR(x) ( (x <= 'z' && x>='a') || ( x <= 'Z' && x >= 'A' ) || x=='_')
#define IS_DIGIT(x) ( x <= '9' && x >= '0' )
#define IS_ID_THING(x) ( IS_ID_CHAR(x) || IS_DIGIT(x))
#define LEX_ERROR(x) {push_lexing_error(x,src,translation_data); return get_token(src->src+src->where_in_src,0,KW_NOP,src->current_row,src->current_column);}

/*
 *	placeholder very slow ( and very very bad ) lexer that I will probabbly not replace 
 */
void lex(struct Queue *token_destination,struct Source *src,struct Translation_Data *translation_data)
{
	skip_white_space(src);
	while(src->where_in_src<src->src_size)
	{
		Queue_Push(token_destination,lex_step(src,translation_data));
		if(has_new_errors(translation_data))
			return;
		else
			skip_white_space(src);
	}
	Queue_Push(token_destination,get_token(NULL,0,KW_EOF,src->current_row,src->current_column));
}


struct token* lex_step(struct Source *src,struct Translation_Data *translation_data)
{
	if(check_and_move_if_on_word("machine",sizeof("machine")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("machine")+1,sizeof("machine")-1,KW_MACHINE,src->current_row,src->current_column);
	if(check_and_move_if_on_word("from",sizeof("from")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("from")+1,sizeof("from")-1,KW_FROM,src->current_row,src->current_column);
	if(check_and_move_if_on_word("to",sizeof("to")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("to")+1,sizeof("to")-1,KW_TO,src->current_row,src->current_column);
	if(check_and_move_if_on_word("on",sizeof("on")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("on")+1,sizeof("on")-1,KW_ON,src->current_row,src->current_column);
	if(check_and_move_if_on_word("[",sizeof("[")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof("[")+1,sizeof("[")-1,KW_OPEN_SQUARE,src->current_row,src->current_column);
	if(check_and_move_if_on_word("(",sizeof("(")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof("(")+1,sizeof("(")-1,KW_OPEN_NORMAL,src->current_row,src->current_column);
	if(check_and_move_if_on_word(")",sizeof(")")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof(")")+1,sizeof(")")-1,KW_CLOSE_NORMAL,src->current_row,src->current_column);
	if(check_and_move_if_on_word(",",sizeof(",")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof(",")+1,sizeof(",")-1,KW_COMMA,src->current_row,src->current_column);
	if(check_and_move_if_on_word(".",sizeof(".")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof(".")+1,sizeof(".")-1,KW_DOT,src->current_row,src->current_column);
	if(check_and_move_if_on_word("]",sizeof("]")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof("]")+1,sizeof("]")-1,KW_CLOSE_SQUARE,src->current_row,src->current_column);
	if(check_and_move_if_on_word(";",sizeof(";")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof(";")+1,sizeof(";")-1,KW_SEMI_COLUMN,src->current_row,src->current_column);
	if(check_and_move_if_on_word("||",sizeof("||")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof("||")+1,sizeof("||")-1,KW_OR,src->current_row,src->current_column);
	if(check_and_move_if_on_word("|",sizeof("|")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof("|")+1,sizeof("|")-1,KW_PIPE,src->current_row,src->current_column);
	if(check_and_move_if_on_word("&&",sizeof("&&")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof("&&")+1,sizeof("&&")-1,KW_AND,src->current_row,src->current_column);
	if(check_and_move_if_on_word("!",sizeof("!")-1,src,0))
		return get_token(src->src+src->where_in_src-sizeof("!")+1,sizeof("!")-1,KW_NOT,src->current_row,src->current_column);
	if(check_and_move_if_on_word("starting",sizeof("starting")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("starting")+1,sizeof("starting")-1,KW_STARTING,src->current_row,src->current_column);
	if(check_and_move_if_on_word("states",sizeof("states")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("states")+1,sizeof("states")-1,KW_STATES,src->current_row,src->current_column);
	if(check_and_move_if_on_word("events",sizeof("events")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("events")+1,sizeof("events")-1,KW_EVENTS,src->current_row,src->current_column);
	if(check_and_move_if_on_word("execute",sizeof("execute")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("execute")+1,sizeof("execute")-1,KW_EXECUTE,src->current_row,src->current_column);
	if(check_and_move_if_on_word("event",sizeof("event")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("event")+1,sizeof("event")-1,KW_EVENT,src->current_row,src->current_column);
	if(check_and_move_if_on_word("transitions",sizeof("transitions")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("transitions")+1,sizeof("transitions")-1,KW_TRANSITIONS,src->current_row,src->current_column);
	if(check_and_move_if_on_word("if",sizeof("if")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("if")+1,sizeof("if")-1,KW_IF,src->current_row,src->current_column);
	if(check_and_move_if_on_word("else",sizeof("else")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("else")+1,sizeof("else")-1,KW_ELSE,src->current_row,src->current_column);
	if(check_and_move_if_on_word("granted",sizeof("granted")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("granted")+1,sizeof("granted")-1,KW_GRANTED,src->current_row,src->current_column);
	if(check_and_move_if_on_word("entering",sizeof("entering")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("entering")+1,sizeof("entering")-1,KW_ENTERING,src->current_row,src->current_column);
	if(check_and_move_if_on_word("exiting",sizeof("exiting")-1,src,1))
		return get_token(src->src+src->where_in_src-sizeof("exiting")+1,sizeof("exiting")-1,KW_EXITING,src->current_row,src->current_column);



	if(IS_ID_CHAR(src->src[src->where_in_src])) /*check for id*/
	{
		size_t i;

		++src->where_in_src;
		for( i=src->where_in_src ; 
			i < src->src_size && IS_ID_THING(src->src[i]);
			++i);


		i-=src->where_in_src;
		src->where_in_src+=i;
		return get_token(src->src + src->where_in_src - i - 1, i + 1, KW_ID,src->current_row,src->current_column);
	}else if(src->src[src->where_in_src]=='"') /*check for string literal*/
	{
		size_t i;
		++src->where_in_src;
		for( i=src->where_in_src ; 
			src->src[i]!='"' && i< src->src_size;	
			++i);
		
		if(i==src->src_size)
		{
			LEX_ERROR("Unexpected end of file");
		}else
		{
			i-=src->where_in_src;
			src->where_in_src+=i+1;
			return get_token(src->src + src->where_in_src-i-1, i, KW_STRING,src->current_row,src->current_column);
		}

	}else
	{
		LEX_ERROR("Unexpected symbol");
	}
}
struct token* get_token(char *data,size_t size,enum Keyword type,size_t row,size_t column)
{
	struct token *ret;
	ret=malloc(sizeof(struct token));
	ret->data=data;
	ret->size=size;
	ret->type=type;
	ret->row=row;
	ret->column=column;

	return ret;
}
void delete_token(struct token *token)
{
	free(token);
}

/*word_size without the ending '\0' */
static char check_and_move_if_on_word(char *word,size_t word_size,struct Source *src,char needs_space_after)
{
	size_t i;
	if(src->where_in_src + word_size > src->src_size)
		return 0;

	for(i=0;i<word_size && word[i]==src->src[src->where_in_src+i];++i);

	if(i<word_size)
	{
		return 0;
	}
	else if( (needs_space_after && isspace(src->src[src->where_in_src+i])) || !needs_space_after )
	{
		src->where_in_src+=i;
		src->current_column+=i;
		return 1;
	}
	else
	{
		return 0;
	}
}
void skip_white_space(struct Source *src)
{
	while(src->where_in_src<src->src_size && isspace(src->src[src->where_in_src]))
	{
		if(src->src[src->where_in_src]=='\n')
		{
			++src->current_row;
			src->current_column=0;
		}
		++src->where_in_src;
	}
}

void push_token_into_map(struct token *token,struct Map *map,void *thing)
{
	Map_Push(map,token->data,token->size,thing);
}
void id_token_to_upper_case(struct token *token)
{
	size_t i;
	for(i=0;i<token->size;++i)
		token->data[i]=toupper(token->data[i]);
}
#endif