labwc/src/xbm/tokenize.c

126 lines
2.3 KiB
C
Raw Normal View History

2020-06-29 19:27:59 +01:00
/*
* XBM file tokenizer
*
* Copyright Johan Malm 2020
*/
2020-06-22 19:03:02 +01:00
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
2020-08-31 20:01:08 +01:00
#include "xbm/tokenize.h"
2020-06-22 19:03:02 +01:00
static char *current_buffer_position;
static struct token *tokens;
static int nr_tokens, alloc_tokens;
static void
add_token(enum token_type token_type)
2020-06-22 19:03:02 +01:00
{
if (nr_tokens == alloc_tokens) {
alloc_tokens = (alloc_tokens + 16) * 2;
tokens = realloc(tokens, alloc_tokens * sizeof(struct token));
}
struct token *token = tokens + nr_tokens;
memset(token, 0, sizeof(*token));
nr_tokens++;
token->type = token_type;
}
static void
get_identifier_token()
2020-06-22 19:03:02 +01:00
{
struct token *token = tokens + nr_tokens - 1;
token->name[token->pos] = current_buffer_position[0];
token->pos++;
if (token->pos == MAX_TOKEN_SIZE - 1) {
2020-06-22 19:03:02 +01:00
return;
}
2020-06-22 19:03:02 +01:00
current_buffer_position++;
switch (current_buffer_position[0]) {
case '\0':
return;
2020-06-23 07:17:07 +01:00
case 'a' ... 'z':
case 'A' ... 'Z':
case '0' ... '9':
2020-06-22 19:03:02 +01:00
case '_':
case '#':
get_identifier_token();
break;
default:
break;
}
}
static void
get_number_token(void)
2020-06-22 19:03:02 +01:00
{
struct token *token = tokens + nr_tokens - 1;
token->name[token->pos] = current_buffer_position[0];
token->pos++;
if (token->pos == MAX_TOKEN_SIZE - 1) {
2020-06-22 19:03:02 +01:00
return;
}
2020-06-22 19:03:02 +01:00
current_buffer_position++;
switch (current_buffer_position[0]) {
case '\0':
return;
2020-06-23 07:17:07 +01:00
case '0' ... '9':
case 'a' ... 'f':
case 'A' ... 'F':
2020-06-22 19:03:02 +01:00
case 'x':
get_number_token();
break;
default:
break;
}
}
static void
get_special_char_token()
2020-06-22 19:03:02 +01:00
{
struct token *token = tokens + nr_tokens - 1;
token->name[0] = current_buffer_position[0];
current_buffer_position++;
}
struct token *
tokenize_xbm(char *buffer)
2020-06-22 19:03:02 +01:00
{
tokens = NULL;
nr_tokens = 0;
alloc_tokens = 0;
2020-06-22 19:03:02 +01:00
current_buffer_position = buffer;
for (;;) {
switch (current_buffer_position[0]) {
case '\0':
goto out;
2020-06-23 07:17:07 +01:00
case 'a' ... 'z':
case 'A' ... 'Z':
2020-06-22 19:03:02 +01:00
case '_':
case '#':
add_token(TOKEN_IDENT);
get_identifier_token();
continue;
2020-06-23 07:17:07 +01:00
case '0' ... '9':
2020-06-22 19:03:02 +01:00
add_token(TOKEN_INT);
get_number_token();
struct token *token = tokens + nr_tokens - 1;
token->value = (int)strtol(token->name, NULL, 0);
2020-06-22 19:03:02 +01:00
continue;
case '{':
add_token(TOKEN_SPECIAL);
get_special_char_token();
continue;
default:
break;
}
++current_buffer_position;
}
out:
add_token(TOKEN_NONE); /* vector end marker */
return tokens;
}