/*
* Mimas conversion tools
*
* Copyright (C) 2010 Benjamin Moody
*
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include "utils.h"
#include "symtab.h"
#define MAX_SYMBOL_LENGTH 32
typedef struct _symbol_node {
symbol sym;
char *key; /* xform()ed name */
struct _symbol_node *left; /* Left subtree */
struct _symbol_node *right; /* Right subtree */
short int lheight, rheight; /* Height of subtrees */
} symbol_node;
struct _symbol_tab {
int nsymbols; /* Total number of symbols */
symbol_node *root; /* Root of tree */
};
symbol_tab *symbol_tab_new()
{
symbol_tab *tab = xnew(symbol_tab, 1);
tab->nsymbols = 0;
tab->root = NULL;
return tab;
}
static void free_tree(symbol_node *node)
{
if (!node)
return;
xfree(node->sym.realname);
xfree(node->key);
free_tree(node->left);
free_tree(node->right);
xfree(node);
}
void symbol_tab_free(symbol_tab *tab)
{
free_tree(tab->root);
xfree(tab);
}
static void xform(char *buf, const char *str)
{
static const char map[256] =
"********************************"
"**************.*0123456789******"
"*ABCDEFGHIJKLMNOPQRSTUVWXYZ[***!"
"*ABCDEFGHIJKLMNOPQRSTUVWXYZ*****"
"********************************"
"********************************"
"********************************"
"********************************";
int n, i;
/* Translate string into a form that can be strcmp()ed to yield the
correct (semi-case-independent) ordering used by Mimas */
n = strlen(str);
for (i = 0; i < n; i++)
buf[i] = map[(unsigned char) str[i]];
buf[n] = ' ';
memcpy(buf + n + 1, str, n + 1);
}
static char *getname(char *key)
{
char *p = strchr(key, ' ');
if (p)
return p + 1;
else
return NULL;
}
static const char *prev_subword_start(const char *p, const char *start)
{
while (p != start && (p[-1] == '_' || p[-1] == '.'))
p--;
if (p == start)
return NULL;
p--;
if (p[0] >= '0' && p[0] <= '9') {
while (p != start && p[-1] >= '0' && p[-1] <= '9')
p--;
return p;
}
else if (p[0] >= 'A' && p[0] <= 'Z') {
while (p != start && p[-1] >= 'A' && p[-1] <= 'Z')
p--;
return p;
}
else if (p[0] >= 'a' && p[0] <= 'z') {
while (p != start && p[-1] >= 'a' && p[-1] <= 'z')
p--;
if (p != start && p[-1] >= 'A' && p[-1] <= 'Z')
return p - 1;
else
return p;
}
else {
return p;
}
}
static void namebuf_pack_start(char **nbufp, int *nbufcount,
const char *text, int n)
{
if (!*nbufcount)
return;
if (n > *nbufcount)
n = *nbufcount;
memcpy(*nbufp, text, n);
(*nbufp) += n;
(*nbufcount) -= n;
}
static void namebuf_pack_end(char **nbufp, int *nbufcount,
const char *text, int n)
{
if (!*nbufcount)
return;
if (n > *nbufcount)
n = *nbufcount;
memcpy(*nbufp + *nbufcount - n, text, n);
(*nbufcount) -= n;
}
static void shorten_symbol(char *buf, int len, const char *name)
{
char *nbufp;
int nbufcount;
const char *p, *end;
nbufp = buf;
nbufcount = len;
namebuf_pack_end(&nbufp, &nbufcount, "", 1);
end = name + strlen(name);
p = prev_subword_start(end, name);
if (!p || !prev_subword_start(p, name)) {
/* Single long word: just preserve as much as we can */
namebuf_pack_start(&nbufp, &nbufcount, name, strlen(name));
}
else {
/* Always preserve start of first word */
if (name[0] == '_' || name[0] == '.' || name[0] == '[')
namebuf_pack_start(&nbufp, &nbufcount, name, 2);
else
namebuf_pack_start(&nbufp, &nbufcount, name, 1);
/* Reserve space for __ to represent omitted words */
namebuf_pack_start(&nbufp, &nbufcount, "__", 2);
/* Preserve as much of final word as we can */
namebuf_pack_end(&nbufp, &nbufcount, p, end - p);
nbufp--;
nbufcount++;
/* Add first char of each remaining word, from right to left,
separated by underscores */
while ((p = prev_subword_start(p, name)) && p != name) {
if (nbufcount >= 2) {
namebuf_pack_end(&nbufp, &nbufcount, "_", 1);
namebuf_pack_end(&nbufp, &nbufcount, p, 1);
}
else {
if (!nbufcount)
nbufcount += 2;
namebuf_pack_end(&nbufp, &nbufcount, "_", 1);
break;
}
}
}
if (nbufcount) {
while (nbufp != buf + (len - nbufcount)) {
nbufp[0] = nbufp[nbufcount];
nbufp++;
}
}
}
static void rotate_right(symbol_node **nptr)
{
symbol_node *a, *b;
b = *nptr;
a = b->left;
b->left = a->right;
b->lheight = a->rheight;
a->right = b;
if (b->lheight > b->rheight)
a->rheight = b->lheight + 1;
else
a->rheight = b->rheight + 1;
*nptr = a;
}
static void rotate_left(symbol_node **nptr)
{
symbol_node *a, *b;
a = *nptr;
b = a->right;
a->right = b->left;
a->rheight = b->lheight;
b->left = a;
if (a->lheight > a->rheight)
b->lheight = a->lheight + 1;
else
b->lheight = a->rheight + 1;
*nptr = b;
}
static int insert_sym(symbol_tab *tab, symbol_node **nptr, const char *key,
symbol_node **result)
{
symbol_node *node = *nptr;
int d;
if (!node) {
node = xnew(symbol_node, 1);
node->left = node->right = NULL;
node->lheight = node->rheight = 0;
node->key = xstrdup(key);
node->sym.name = getname(node->key);
node->sym.realname = NULL;
node->sym.id = tab->nsymbols;
tab->nsymbols++;
*result = *nptr = node;
return 1; /* adding node increases height */
}
d = strcmp(node->key, key);
if (d == 0) {
/* symbol matches */
*result = node;
return 0; /* existing node - no change */
}
else if (d < 0) {
if (!insert_sym(tab, &node->right, key, result))
return 0; /* height of subtree didn't change -
we're done */
node->rheight++;
if (node->rheight <= node->lheight) {
return 0; /* right subtree no taller than left -
total height is the same */
}
else if (node->rheight == node->lheight + 1) {
return 1; /* right subtree one taller than left
- total height increased */
}
else {
/* rebalance */
if (node->right->rheight < node->right->lheight)
rotate_right(&node->right);
rotate_left(nptr);
return 0;
}
}
else {
if (!insert_sym(tab, &node->left, key, result))
return 0;
node->lheight++;
if (node->lheight <= node->rheight) {
return 0;
}
else if (node->lheight == node->rheight + 1) {
return 1;
}
else {
/* rebalance */
if (node->left->lheight < node->left->rheight)
rotate_left(&node->left);
rotate_right(nptr);
return 0;
}
}
}
static int check_insert_symbol(symbol_tab *tab, const char *calcname,
const char *realname, symbol_node **result)
{
char key[MAX_SYMBOL_LENGTH * 2 + 2];
xform(key, calcname);
insert_sym(tab, &tab->root, key, result);
if (!(*result)->sym.realname) {
(*result)->sym.realname = xstrdup(realname);
return 1;
}
else if (!strcmp((*result)->sym.realname, realname)) {
return 1;
}
else {
return 0;
}
}
static int is_valid_start_char(char c)
{
if (c >= 'a' && c <= 'z')
return 1;
else if (c >= 'A' && c <= 'Z')
return 1;
else if (c == '_' || c == '[' || c == '.')
return 1;
else
return 0;
}
static int is_valid_symbol_char(char c)
{
if (c >= 'a' && c <= 'z')
return 1;
else if (c >= 'A' && c <= 'Z')
return 1;
else if (c >= '0' && c <= '9')
return 1;
else if (c == '_' || c == '[')
return 1;
else
return 0;
}
symbol *symbol_tab_add_symbol(symbol_tab *tab, const char *name)
{
char nbuf[MAX_SYMBOL_LENGTH + 1], *oldname;
symbol_node *node;
symbol *nsym;
int oldid;
char *suffixp;
int i, invalidchars = 0;
if (!name || !name[0])
return NULL;
if (!is_valid_start_char(name[0]))
invalidchars = 1;
for (i = 1; !invalidchars && name[i]; i++)
if (!is_valid_symbol_char(name[i]))
invalidchars = 1;
if (!invalidchars && strlen(name) <= MAX_SYMBOL_LENGTH) {
/* legal symbol name -> use this name exactly */
if (!check_insert_symbol(tab, name, name, &node)) {
/* rename the older symbol */
oldname = node->sym.realname;
oldid = node->sym.id;
node->sym.realname = xstrdup(name);
nsym = symbol_tab_add_symbol(tab, oldname);
node->sym.id = nsym->id;
nsym->id = oldid;
xfree(oldname);
}
/* printf("(%s) -> #%d = (%s)\n", name, node->sym.id, node->key); */
return &node->sym;
}
else {
/* not a legal symbol name -> shorten if necessary, replace
disallowed characters with theta, and append a numeric
suffix */
if (strlen(name) <= MAX_SYMBOL_LENGTH - 4)
strcpy(nbuf, name);
else
shorten_symbol(nbuf, MAX_SYMBOL_LENGTH - 4, name);
if (!is_valid_start_char(nbuf[0]))
nbuf[0] = '[';
for (i = 1; nbuf[i]; i++)
if (!is_valid_symbol_char(nbuf[i]))
nbuf[i] = '[';
suffixp = nbuf + strlen(nbuf);
for (i = 0; i < 1000; i++) {
sprintf(suffixp, "_%03d", i);
if (check_insert_symbol(tab, nbuf, name, &node)) {
/* printf("(%s) -> #%d = (%s)\n", name, node->sym.id, node->key); */
return &node->sym;
}
}
fprintf(stderr, "ERROR: can't generate a unique name for '%s'\n", name);
abort();
}
}
unsigned int symbol_tab_count(symbol_tab *tab)
{
return tab->nsymbols;
}
static void subtree_foreach(symbol_node *node,
void (*func)(symbol *sym, void *data),
void *data)
{
if (node->left)
subtree_foreach(node->left, func, data);
(*func)(&node->sym, data);
if (node->right)
subtree_foreach(node->right, func, data);
}
void symbol_tab_foreach(symbol_tab *tab,
void (*func)(symbol *sym, void *data),
void *data)
{
if (tab->root)
subtree_foreach(tab->root, func, data);
}
/*
static void dumptree(symbol_node *node, int depth, int d, char *dbuf)
{
int i;
if (!node)
return;
dbuf[depth] = " |"[d];
dumptree(node->left, depth + 1, 1, dbuf);
dbuf[depth] = "-,`"[d];
for (i = 0; i <= depth; i++) {
putchar(' ');
putchar(dbuf[i]);
}
printf("-+ \t%d %s (%s)\n", node->sym.id, node->sym.realname, node->key);
dbuf[depth] = " | "[d];
dumptree(node->right, depth + 1, 2, dbuf);
}
int main()
{
char buf[1000];
int n;
symbol_tab *tab;
tab = symbol_tab_new();
while (fgets(buf, sizeof(buf), stdin)) {
n = strlen(buf);
while (n > 0 && buf[n - 1] <= ' ')
buf[--n] = 0;
symbol_tab_add_symbol(tab, buf);
dumptree(tab->root, 0, 0, buf);
}
symbol_tab_free(tab);
return 0;
}
*/