/* * Mimas conversion tools * * Copyright (C) 2010 Benjamin Moody * * This program is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include "utils.h" #include "symtab.h" #define MAX_SYMBOL_LENGTH 32 typedef struct _symbol_node { symbol sym; char *key; /* xform()ed name */ struct _symbol_node *left; /* Left subtree */ struct _symbol_node *right; /* Right subtree */ short int lheight, rheight; /* Height of subtrees */ } symbol_node; struct _symbol_tab { int nsymbols; /* Total number of symbols */ symbol_node *root; /* Root of tree */ }; symbol_tab *symbol_tab_new() { symbol_tab *tab = xnew(symbol_tab, 1); tab->nsymbols = 0; tab->root = NULL; return tab; } static void free_tree(symbol_node *node) { if (!node) return; xfree(node->sym.realname); xfree(node->key); free_tree(node->left); free_tree(node->right); xfree(node); } void symbol_tab_free(symbol_tab *tab) { free_tree(tab->root); xfree(tab); } static void xform(char *buf, const char *str) { static const char map[256] = "********************************" "**************.*0123456789******" "*ABCDEFGHIJKLMNOPQRSTUVWXYZ[***!" "*ABCDEFGHIJKLMNOPQRSTUVWXYZ*****" "********************************" "********************************" "********************************" "********************************"; int n, i; /* Translate string into a form that can be strcmp()ed to yield the correct (semi-case-independent) ordering used by Mimas */ n = strlen(str); for (i = 0; i < n; i++) buf[i] = map[(unsigned char) str[i]]; buf[n] = ' '; memcpy(buf + n + 1, str, n + 1); } static char *getname(char *key) { char *p = strchr(key, ' '); if (p) return p + 1; else return NULL; } static const char *prev_subword_start(const char *p, const char *start) { while (p != start && (p[-1] == '_' || p[-1] == '.')) p--; if (p == start) return NULL; p--; if (p[0] >= '0' && p[0] <= '9') { while (p != start && p[-1] >= '0' && p[-1] <= '9') p--; return p; } else if (p[0] >= 'A' && p[0] <= 'Z') { while (p != start && p[-1] >= 'A' && p[-1] <= 'Z') p--; return p; } else if (p[0] >= 'a' && p[0] <= 'z') { while (p != start && p[-1] >= 'a' && p[-1] <= 'z') p--; if (p != start && p[-1] >= 'A' && p[-1] <= 'Z') return p - 1; else return p; } else { return p; } } static void namebuf_pack_start(char **nbufp, int *nbufcount, const char *text, int n) { if (!*nbufcount) return; if (n > *nbufcount) n = *nbufcount; memcpy(*nbufp, text, n); (*nbufp) += n; (*nbufcount) -= n; } static void namebuf_pack_end(char **nbufp, int *nbufcount, const char *text, int n) { if (!*nbufcount) return; if (n > *nbufcount) n = *nbufcount; memcpy(*nbufp + *nbufcount - n, text, n); (*nbufcount) -= n; } static void shorten_symbol(char *buf, int len, const char *name) { char *nbufp; int nbufcount; const char *p, *end; nbufp = buf; nbufcount = len; namebuf_pack_end(&nbufp, &nbufcount, "", 1); end = name + strlen(name); p = prev_subword_start(end, name); if (!p || !prev_subword_start(p, name)) { /* Single long word: just preserve as much as we can */ namebuf_pack_start(&nbufp, &nbufcount, name, strlen(name)); } else { /* Always preserve start of first word */ if (name[0] == '_' || name[0] == '.' || name[0] == '[') namebuf_pack_start(&nbufp, &nbufcount, name, 2); else namebuf_pack_start(&nbufp, &nbufcount, name, 1); /* Reserve space for __ to represent omitted words */ namebuf_pack_start(&nbufp, &nbufcount, "__", 2); /* Preserve as much of final word as we can */ namebuf_pack_end(&nbufp, &nbufcount, p, end - p); nbufp--; nbufcount++; /* Add first char of each remaining word, from right to left, separated by underscores */ while ((p = prev_subword_start(p, name)) && p != name) { if (nbufcount >= 2) { namebuf_pack_end(&nbufp, &nbufcount, "_", 1); namebuf_pack_end(&nbufp, &nbufcount, p, 1); } else { if (!nbufcount) nbufcount += 2; namebuf_pack_end(&nbufp, &nbufcount, "_", 1); break; } } } if (nbufcount) { while (nbufp != buf + (len - nbufcount)) { nbufp[0] = nbufp[nbufcount]; nbufp++; } } } static void rotate_right(symbol_node **nptr) { symbol_node *a, *b; b = *nptr; a = b->left; b->left = a->right; b->lheight = a->rheight; a->right = b; if (b->lheight > b->rheight) a->rheight = b->lheight + 1; else a->rheight = b->rheight + 1; *nptr = a; } static void rotate_left(symbol_node **nptr) { symbol_node *a, *b; a = *nptr; b = a->right; a->right = b->left; a->rheight = b->lheight; b->left = a; if (a->lheight > a->rheight) b->lheight = a->lheight + 1; else b->lheight = a->rheight + 1; *nptr = b; } static int insert_sym(symbol_tab *tab, symbol_node **nptr, const char *key, symbol_node **result) { symbol_node *node = *nptr; int d; if (!node) { node = xnew(symbol_node, 1); node->left = node->right = NULL; node->lheight = node->rheight = 0; node->key = xstrdup(key); node->sym.name = getname(node->key); node->sym.realname = NULL; node->sym.id = tab->nsymbols; tab->nsymbols++; *result = *nptr = node; return 1; /* adding node increases height */ } d = strcmp(node->key, key); if (d == 0) { /* symbol matches */ *result = node; return 0; /* existing node - no change */ } else if (d < 0) { if (!insert_sym(tab, &node->right, key, result)) return 0; /* height of subtree didn't change - we're done */ node->rheight++; if (node->rheight <= node->lheight) { return 0; /* right subtree no taller than left - total height is the same */ } else if (node->rheight == node->lheight + 1) { return 1; /* right subtree one taller than left - total height increased */ } else { /* rebalance */ if (node->right->rheight < node->right->lheight) rotate_right(&node->right); rotate_left(nptr); return 0; } } else { if (!insert_sym(tab, &node->left, key, result)) return 0; node->lheight++; if (node->lheight <= node->rheight) { return 0; } else if (node->lheight == node->rheight + 1) { return 1; } else { /* rebalance */ if (node->left->lheight < node->left->rheight) rotate_left(&node->left); rotate_right(nptr); return 0; } } } static int check_insert_symbol(symbol_tab *tab, const char *calcname, const char *realname, symbol_node **result) { char key[MAX_SYMBOL_LENGTH * 2 + 2]; xform(key, calcname); insert_sym(tab, &tab->root, key, result); if (!(*result)->sym.realname) { (*result)->sym.realname = xstrdup(realname); return 1; } else if (!strcmp((*result)->sym.realname, realname)) { return 1; } else { return 0; } } static int is_valid_start_char(char c) { if (c >= 'a' && c <= 'z') return 1; else if (c >= 'A' && c <= 'Z') return 1; else if (c == '_' || c == '[' || c == '.') return 1; else return 0; } static int is_valid_symbol_char(char c) { if (c >= 'a' && c <= 'z') return 1; else if (c >= 'A' && c <= 'Z') return 1; else if (c >= '0' && c <= '9') return 1; else if (c == '_' || c == '[') return 1; else return 0; } symbol *symbol_tab_add_symbol(symbol_tab *tab, const char *name) { char nbuf[MAX_SYMBOL_LENGTH + 1], *oldname; symbol_node *node; symbol *nsym; int oldid; char *suffixp; int i, invalidchars = 0; if (!name || !name[0]) return NULL; if (!is_valid_start_char(name[0])) invalidchars = 1; for (i = 1; !invalidchars && name[i]; i++) if (!is_valid_symbol_char(name[i])) invalidchars = 1; if (!invalidchars && strlen(name) <= MAX_SYMBOL_LENGTH) { /* legal symbol name -> use this name exactly */ if (!check_insert_symbol(tab, name, name, &node)) { /* rename the older symbol */ oldname = node->sym.realname; oldid = node->sym.id; node->sym.realname = xstrdup(name); nsym = symbol_tab_add_symbol(tab, oldname); node->sym.id = nsym->id; nsym->id = oldid; xfree(oldname); } /* printf("(%s) -> #%d = (%s)\n", name, node->sym.id, node->key); */ return &node->sym; } else { /* not a legal symbol name -> shorten if necessary, replace disallowed characters with theta, and append a numeric suffix */ if (strlen(name) <= MAX_SYMBOL_LENGTH - 4) strcpy(nbuf, name); else shorten_symbol(nbuf, MAX_SYMBOL_LENGTH - 4, name); if (!is_valid_start_char(nbuf[0])) nbuf[0] = '['; for (i = 1; nbuf[i]; i++) if (!is_valid_symbol_char(nbuf[i])) nbuf[i] = '['; suffixp = nbuf + strlen(nbuf); for (i = 0; i < 1000; i++) { sprintf(suffixp, "_%03d", i); if (check_insert_symbol(tab, nbuf, name, &node)) { /* printf("(%s) -> #%d = (%s)\n", name, node->sym.id, node->key); */ return &node->sym; } } fprintf(stderr, "ERROR: can't generate a unique name for '%s'\n", name); abort(); } } unsigned int symbol_tab_count(symbol_tab *tab) { return tab->nsymbols; } static void subtree_foreach(symbol_node *node, void (*func)(symbol *sym, void *data), void *data) { if (node->left) subtree_foreach(node->left, func, data); (*func)(&node->sym, data); if (node->right) subtree_foreach(node->right, func, data); } void symbol_tab_foreach(symbol_tab *tab, void (*func)(symbol *sym, void *data), void *data) { if (tab->root) subtree_foreach(tab->root, func, data); } /* static void dumptree(symbol_node *node, int depth, int d, char *dbuf) { int i; if (!node) return; dbuf[depth] = " |"[d]; dumptree(node->left, depth + 1, 1, dbuf); dbuf[depth] = "-,`"[d]; for (i = 0; i <= depth; i++) { putchar(' '); putchar(dbuf[i]); } printf("-+ \t%d %s (%s)\n", node->sym.id, node->sym.realname, node->key); dbuf[depth] = " | "[d]; dumptree(node->right, depth + 1, 2, dbuf); } int main() { char buf[1000]; int n; symbol_tab *tab; tab = symbol_tab_new(); while (fgets(buf, sizeof(buf), stdin)) { n = strlen(buf); while (n > 0 && buf[n - 1] <= ' ') buf[--n] = 0; symbol_tab_add_symbol(tab, buf); dumptree(tab->root, 0, 0, buf); } symbol_tab_free(tab); return 0; } */