/* * Mimas conversion tools * * Copyright (C) 2010 Benjamin Moody * * This program is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include "mimas.h" #include "symtab.h" #include "parse.h" #include "convert.h" #include "utils.h" #include "byname.h" extern void print_error(const char *fmt, ...); parse_node *new_parse_node(unsigned int type) { parse_node *n = xnew(parse_node, 1); n->type = type; n->longval = 0; n->strval = NULL; n->symval = NULL; n->left = n->right = NULL; return n; } parse_node *new_parse_node_long(unsigned int type, long int longval) { parse_node *n = new_parse_node(type); n->longval = longval; return n; } static parse_node *new_parse_node_str(unsigned int type, char *strval, int length) { parse_node *n = new_parse_node(type); n->longval = length; n->strval = strval; return n; } static parse_node *new_parse_node_sym(unsigned int type, symbol *symval) { parse_node *n = new_parse_node(type); n->symval = symval; return n; } static parse_node *new_parse_node_1(unsigned int type, parse_node *a) { parse_node *n = new_parse_node(type); n->left = a; return n; } parse_node *new_parse_node_regval(unsigned int reg) { return new_parse_node_1(X_REGVAL, new_parse_node_long(X_DEC6, reg & 0x3f)); } void free_parse_tree(parse_node *n) { if (!n) return; free_parse_tree(n->left); free_parse_tree(n->right); xfree(n->strval); xfree(n); } parse_node *dup_parse_tree(const parse_node *n) { parse_node *m; if (!n) return NULL; m = new_parse_node(n->type); m->left = dup_parse_tree(n->left); m->right = dup_parse_tree(n->right); m->longval = n->longval; m->symval = n->symval; if (n->strval) { m->strval = xnew(char, n->longval); memcpy(m->strval, n->strval, n->longval); } return m; } int is_whitespace(int c) { if (c == ' ' || c == '\t' || c == '\f' || c == '\n' || c == '\r') return 1; else return 0; } int is_wc(int c) { if (c == '@' || c == '_' || c == '.' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) return 1; else return 0; } static int is_xdigit(int c) { if ((c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') || (c >= '0' && c <= '9')) return 1; else return 0; } static int compare_builtins(const void *a, const void *b) { const builtin_symbol *sa = a; const builtin_symbol *sb = b; return strcmp(sa->name, sb->name); } static parse_node *find_builtin_by_name(const char *name, unsigned int flags) { const builtin_symbol *b; builtin_symbol k; char *lcname; int i; lcname = xstrdup(name); for (i = 0; lcname[i]; i++) if (lcname[i] >= 'A' && lcname[i] <= 'Z') lcname[i] += 'a' - 'A'; k.name = lcname; if ((b = bsearch(&k, sysAddrByNameTable, sizeof(sysAddrByNameTable) / sizeof(builtin_symbol), sizeof(builtin_symbol), &compare_builtins))) { xfree(lcname); return new_parse_node_long(X_SYSADDR, b->value); } if ((b = bsearch(&k, sysEnumByNameTable, sizeof(sysEnumByNameTable) / sizeof(builtin_symbol), sizeof(builtin_symbol), &compare_builtins))) { xfree(lcname); return new_parse_node_long(b->value >> 8, b->value & 0xff); } if ((b = bsearch(&k, sysFlagByNameTable, sizeof(sysFlagByNameTable) / sizeof(builtin_symbol), sizeof(builtin_symbol), &compare_builtins))) { xfree(lcname); return new_parse_node_long(X_DEC6, b->value & 7); } if (flags & PARSE_ROMCALLS_NO_UNDERSCORE) { if ((b = bsearch(&k, romCallByNameTable, sizeof(romCallByNameTable) / sizeof(builtin_symbol), sizeof(builtin_symbol), &compare_builtins))) { xfree(lcname); return new_parse_node_long(X_ROMCALL, b->value); } } if (lcname[0] == '_') { k.name = lcname + 1; if ((b = bsearch(&k, romCallByNameTable, sizeof(romCallByNameTable) / sizeof(builtin_symbol), sizeof(builtin_symbol), &compare_builtins))) { xfree(lcname); return new_parse_node_long(X_ROMCALL, b->value); } } xfree(lcname); return NULL; } static parse_node *parse_quoted_string(const char **in) { char delim = **in; int n; const char *p; char *s, *e; n = 0; for (p = *in + 1; *p != delim; p++) { if (*p == '\\' && p[1]) { p++; n++; } else if (*p == 0) { print_error("Unterminated character constant"); break; } else { n++; } } s = xnew(char, n); n = 0; p = *in + 1; while (*p && *p != delim) { if (*p == '\\' && p[1]) { p++; if (*p == 'x' || *p == 'X') { p++; s[n++] = (char) (unsigned char) strtol(p, &e, 16); p = e; } else if (*p >= '0' && *p <= '7') { s[n++] = (char) (unsigned char) strtol(p, &e, 8); p = e; } else { if (*p == 'a' || *p == 'A') s[n++] = 0x07; else if (*p == 'b' || *p == 'B') s[n++] = 0x08; else if (*p == 'e' || *p == 'E') s[n++] = 0x1B; else if (*p == 'f' || *p == 'F') s[n++] = 0x0C; else if (*p == 'n' || *p == 'N') s[n++] = 0x0A; else if (*p == 'r' || *p == 'R') s[n++] = 0x0D; else if (*p == 't' || *p == 'T') s[n++] = 0x09; else if (*p == 'v' || *p == 'V') s[n++] = 0x0B; else s[n++] = *p; p++; } } else { s[n++] = *p; p++; } } if (*p == delim) *in = p + 1; else *in = p; return new_parse_node_str(XX_STRING, s, n); } static parse_node *parse_number(const char **in, int base, int suffix) { long int lv; char *numend; lv = strtol(*in, &numend, base); *in = numend; if (suffix ? is_wc(numend[1]) : is_wc(numend[0])) { print_error("Unexpected '%c' in base-%d numeric constant", *numend, base); return NULL; } if (suffix) (*in)++; if (base == 16) { if (lv > 0xff) return new_parse_node_long(X_HEX16, lv); else return new_parse_node_long(X_HEX8, lv); } else if (base == 8) { if (lv > 0xff) return new_parse_node_long(X_OCT16, lv); else return new_parse_node_long(X_OCT8, lv); } else if (base == 2) { if (lv > 0xff) return new_parse_node_long(X_BIN16, lv); else return new_parse_node_long(X_BIN8, lv); } else { if (lv > 0xff) return new_parse_node_long(X_DEC16, lv); else if (lv > 0x3f) return new_parse_node_long(X_DEC8, lv); else return new_parse_node_long(X_DEC6, lv); } } static parse_node *parse_token(const char **in, int is_unary, symbol_tab *stab, unsigned int flags) { const char *symstart, *symend, *p; char *name, *converted; symbol *sym; parse_node *n; while (is_whitespace(**in)) (*in)++; switch (**in) { case 0: case ';': case '\\': return new_parse_node(XX_EOL); case ',': (*in)++; return new_parse_node(XX_COMMA); case '(': (*in)++; return new_parse_node(XX_LPAREN); case ')': (*in)++; return new_parse_node(XX_RPAREN); case '*': (*in)++; return new_parse_node(X_MUL); case '+': (*in)++; return new_parse_node(X_ADD); case '/': (*in)++; return new_parse_node(X_DIV); case '[': (*in)++; return new_parse_node(XX_LBRACK); case ']': (*in)++; return new_parse_node(XX_RBRACK); case '^': (*in)++; return new_parse_node(X_XOR); case '~': (*in)++; return new_parse_node(X_COMPLEMENT); case '!': (*in)++; if (**in == '=') { (*in)++; return new_parse_node(X_NE); } else { return new_parse_node(X_NOT); } case '"': return parse_quoted_string(in); case '$': (*in)++; if ((flags & PARSE_ZMASM_LABELS) && (**in == 'f' || **in == 'F' || **in == 'b' || **in == 'B') && !is_xdigit((*in)[1])) { (*in)++; if ((*in)[-1] == 'f' || (*in)[-1] == 'F') return new_parse_node(X_NEXT_ANON); else return new_parse_node(X_PREV_ANON); } else if (is_xdigit(**in)) { return parse_number(in, 16, 0); } else if (**in == '$') { (*in)++; return new_parse_node(X_LOADPC); } else { return new_parse_node(X_EXECPC); } case '%': (*in)++; if (is_unary && (**in == '0' || **in == '1')) { return parse_number(in, 2, 0); } else { return new_parse_node(X_MOD); } case '&': (*in)++; if (**in == '&') { (*in)++; return new_parse_node(XX_LOGAND); } else { return new_parse_node(X_AND); } case '|': (*in)++; if (**in == '|') { (*in)++; return new_parse_node(XX_LOGOR); } else { return new_parse_node(X_OR); } case '\'': return parse_quoted_string(in); case '-': (*in)++; if (is_unary) return new_parse_node(X_MINUS); else return new_parse_node(X_SUB); case '<': (*in)++; if (**in == '<') { (*in)++; return new_parse_node(X_LSHIFT); } else if (**in == '=') { (*in)++; return new_parse_node(X_LE); } else if (**in == '>') { (*in)++; return new_parse_node(X_NE); } else { return new_parse_node(X_LESS); } case '=': (*in)++; if (**in == '=') (*in)++; return new_parse_node(X_EQUAL); case '>': (*in)++; if (**in == '>') { (*in)++; return new_parse_node(X_RSHIFT); } else if (**in == '=') { (*in)++; return new_parse_node(X_GE); } else { return new_parse_node(X_GREATER); } break; default: if (is_wc(**in)) { symstart = symend = *in; while (is_wc(*symend)) symend++; /* check for numbers */ if (**in == '@' && (flags & PARSE_OCTAL_AT) && symend != *in + 1) { p = *in + 1; while (*p >= '0' && *p <= '7') p++; if (p == symend) { (*in)++; return parse_number(in, 8, 0); } } if (**in == '0') { if ((*in)[1] == 'X' || (*in)[1] == 'x' || (*in)[1] == 'H' || (*in)[1] == 'h') { (*in) += 2; return parse_number(in, 16, 0); } else if ((*in)[1] == 'O' || (*in)[1] == 'o') { (*in) += 2; return parse_number(in, 8, 0); } } if (**in >= '0' && **in <= '9') { if (symend[-1] == 'H' || symend[-1] == 'h') { return parse_number(in, 16, 1); } else if (symend[-1] == 'O' || symend[-1] == 'o') { return parse_number(in, 8, 1); } else if (symend[-1] == 'B' || symend[-1] == 'b') { return parse_number(in, 2, 1); } else { return parse_number(in, 10, 0); } } if (**in == '0') { if ((*in)[1] == 'B' || (*in)[1] == 'b') { (*in) += 2; return parse_number(in, 2, 0); } } *in = symend; if (symend == symstart + 1) { if (symstart[0] == 'a' || symstart[0] == 'A') return new_parse_node_regval(X_REG_A); if (symstart[0] == 'b' || symstart[0] == 'B') return new_parse_node_regval(X_REG_B); if (symstart[0] == 'c' || symstart[0] == 'C') return new_parse_node_regval(X_REG_C); if (symstart[0] == 'd' || symstart[0] == 'D') return new_parse_node_regval(X_REG_D); if (symstart[0] == 'e' || symstart[0] == 'E') return new_parse_node_regval(X_REG_E); if (symstart[0] == 'h' || symstart[0] == 'H') return new_parse_node_regval(X_REG_H); if (symstart[0] == 'i' || symstart[0] == 'I') return new_parse_node_regval(X_REG_I); if (symstart[0] == 'l' || symstart[0] == 'L') return new_parse_node_regval(X_REG_L); if (symstart[0] == 'm' || symstart[0] == 'M') return new_parse_node_regval(X_COND_M); if (symstart[0] == 'p' || symstart[0] == 'P') return new_parse_node_regval(X_COND_P); if (symstart[0] == 'r' || symstart[0] == 'R') return new_parse_node_regval(X_REG_R); if (symstart[0] == 'z' || symstart[0] == 'Z') return new_parse_node_regval(X_COND_Z); if (symstart[0] == '@') return new_parse_node(X_NEXT_ANON); } else if (symend == symstart + 2) { if ((symstart[0] == 'a' || symstart[0] == 'A') && (symstart[1] == 'f' || symstart[1] == 'F')) { if (symstart[2] == '\'') { (*in)++; return new_parse_node_regval(X_REG_AF2); } else return new_parse_node_regval(X_REG_AF); } if ((symstart[0] == 'b' || symstart[0] == 'B') && (symstart[1] == 'c' || symstart[1] == 'C')) return new_parse_node_regval(X_REG_BC); if ((symstart[0] == 'd' || symstart[0] == 'D') && (symstart[1] == 'e' || symstart[1] == 'E')) return new_parse_node_regval(X_REG_DE); if ((symstart[0] == 'h' || symstart[0] == 'H') && (symstart[1] == 'l' || symstart[1] == 'L')) return new_parse_node_regval(X_REG_HL); if ((symstart[0] == 's' || symstart[0] == 'S') && (symstart[1] == 'p' || symstart[1] == 'P')) return new_parse_node_regval(X_REG_SP); if ((symstart[0] == 'i' || symstart[0] == 'I') && (symstart[1] == 'x' || symstart[1] == 'X')) return new_parse_node_regval(X_REG_IX); if ((symstart[0] == 'i' || symstart[0] == 'I') && (symstart[1] == 'y' || symstart[1] == 'Y')) return new_parse_node_regval(X_REG_IY); if ((symstart[0] == 'p' || symstart[0] == 'P') && (symstart[1] == 'c' || symstart[1] == 'C')) return new_parse_node(X_EXECPC); if ((symstart[0] == 'n' || symstart[0] == 'N') && (symstart[1] == 'c' || symstart[1] == 'C')) return new_parse_node_regval(X_COND_NC); if ((symstart[0] == 'n' || symstart[0] == 'N') && (symstart[1] == 'z' || symstart[1] == 'Z')) return new_parse_node_regval(X_COND_NZ); if ((symstart[0] == 'p' || symstart[0] == 'P') && (symstart[1] == 'o' || symstart[1] == 'O')) return new_parse_node_regval(X_COND_PO); if ((symstart[0] == 'p' || symstart[0] == 'P') && (symstart[1] == 'e' || symstart[1] == 'E')) return new_parse_node_regval(X_COND_PE); if (symstart[0] == '@' && (symstart[1] == 'b' || symstart[1] == 'B')) return new_parse_node(X_PREV_ANON); if (symstart[0] == '@' && (symstart[1] == 'f' || symstart[1] == 'F')) return new_parse_node(X_NEXT_ANON); } else if (symend == symstart + 3) { if (symstart[0] == 'i' || symstart[0] == 'I') { if (symstart[1] == 'x' || symstart[1] == 'X') { if (symstart[2] == 'h' || symstart[2] == 'H') return new_parse_node_regval(X_REG_IXH); if (symstart[2] == 'l' || symstart[2] == 'L') return new_parse_node_regval(X_REG_IXL); } else if (symstart[1] == 'y' || symstart[1] == 'Y') { if (symstart[2] == 'h' || symstart[2] == 'H') return new_parse_node_regval(X_REG_IYH); if (symstart[2] == 'l' || symstart[2] == 'L') return new_parse_node_regval(X_REG_IYL); } } else if (symstart[0] == 'l' || symstart[0] == 'L') { if ((symstart[1] == 'p' || symstart[1] == 'P') && (symstart[2] == 'c' || symstart[2] == 'C')) return new_parse_node(X_LOADPC); else if ((symstart[1] == 's' || symstart[1] == 'S') && (symstart[2] == 'b' || symstart[2] == 'B')) return new_parse_node(X_LSB); else if ((symstart[1] == 'o' || symstart[1] == 'O') && (symstart[2] == 'w' || symstart[2] == 'W')) return new_parse_node(X_LSB); } else if (symstart[0] == 'm' || symstart[0] == 'M') { if ((symstart[1] == 's' || symstart[1] == 'S') && (symstart[2] == 'b' || symstart[2] == 'B')) return new_parse_node(X_MSB); } } else if (symend == symstart + 4) { if ((symstart[0] == 'h' || symstart[0] == 'H') && (symstart[1] == 'i' || symstart[1] == 'I') && (symstart[2] == 'g' || symstart[2] == 'G') && (symstart[3] == 'h' || symstart[3] == 'H')) return new_parse_node(X_MSB); } name = xstrndup(symstart, symend - symstart); converted = ascii_to_ti83p(name); if ((n = find_builtin_by_name(converted, flags))) { xfree(name); xfree(converted); return n; } sym = symbol_tab_add_symbol(stab, converted); xfree(name); xfree(converted); return new_parse_node_sym(X_SYMBOL, sym); } else { print_error("Unexpected '%c' in expression", **in); return NULL; } } } static const char *token_desc(unsigned int type) { switch (type) { case X_SYMBOL: case X_PREV_ANON: case X_NEXT_ANON: case X_SYSFLAG: case X_SCANCODE: case X_KEY: case X_KEY_FB: case X_KEY_FC: case X_ROMCALL: case X_SYSADDR: return "symbol"; case X_EXECPC: case X_LOADPC: case X_REGVAL: return "register name"; case X_DEC6: case X_DEC8: case X_HEX8: case X_OCT8: case X_BIN8: case X_DEC16: case X_HEX16: case X_OCT16: case X_BIN16: return "numeric constant"; case X_CHAR: return "character constant"; case X_LSB: return "'lsb'"; case X_MSB: return "'msb'"; case X_MINUS: return "'-'"; case X_COMPLEMENT: return "'~'"; case X_NOT: return "'!'"; case X_MUL: return "'*'"; case X_DIV: return "'/'"; case X_MOD: return "'%'"; case X_ADD: return "'+'"; case X_SUB: return "'-'"; case X_LSHIFT: return "'<<'"; case X_RSHIFT: return "'>>'"; case X_GREATER: return "'>'"; case X_LESS: return "'<'"; case X_GE: return "'>='"; case X_LE: return "'<='"; case X_EQUAL: return "'='"; case X_NE: return "'!='"; case X_AND: return "'&'"; case X_XOR: return "'^'"; case X_OR: return "'|'"; case XX_LOGAND: return "'&&'"; case XX_LOGOR: return "'||'"; case XX_EOL: return "end of line"; case XX_LPAREN: return "'('"; case XX_RPAREN: return "')'"; case XX_LBRACK: return "'['"; case XX_RBRACK: return "']'"; case XX_COMMA: return "','"; case XX_STRING: return "string constant"; default: return "unknown token"; } } static int token_precedence(unsigned int type, unsigned int flags) { switch (type) { case X_MUL: case X_DIV: case X_MOD: return 1; case X_ADD: case X_SUB: return (flags & PARSE_NO_PRECEDENCE ? 1 : 2); case X_LSHIFT: case X_RSHIFT: return (flags & PARSE_NO_PRECEDENCE ? 1 : 3); case X_LESS: case X_GREATER: case X_LE: case X_GE: return (flags & PARSE_NO_PRECEDENCE ? 1 : 4); case X_EQUAL: case X_NE: return (flags & PARSE_NO_PRECEDENCE ? 1 : 5); case X_AND: return (flags & PARSE_NO_PRECEDENCE ? 1 : 6); case X_XOR: return (flags & PARSE_NO_PRECEDENCE ? 1 : 7); case X_OR: return (flags & PARSE_NO_PRECEDENCE ? 1 : 8); case XX_LOGAND: return (flags & PARSE_NO_PRECEDENCE ? 1 : 9); case XX_LOGOR: return (flags & PARSE_NO_PRECEDENCE ? 1 : 10); default: return 0; } } #define MAX_PRECEDENCE (flags & PARSE_NO_PRECEDENCE ? 1 : 11) static parse_node *parse_subexpr(const char **in, int prec, symbol_tab *stab, unsigned int flags) { parse_node *tok, *na, *nb; unsigned int m; int tprec; const char *end; if (prec == 0) { /* unary */ if (!(tok = parse_token(in, 1, stab, flags))) return NULL; if (tok->type == XX_LPAREN || tok->type == XX_LBRACK) { if (tok->type == XX_LPAREN) m = XX_RPAREN; else m = XX_RBRACK; free_parse_tree(tok); na = parse_subexpr(in, MAX_PRECEDENCE, stab, flags); if (!na) return NULL; if (!(tok = parse_token(in, 1, stab, flags))) { free_parse_tree(na); return NULL; } else if (tok->type != m) { print_error("Unexpected %s (expected '%c')", token_desc(tok->type), m); free_parse_tree(na); free_parse_tree(tok); return NULL; } free_parse_tree(tok); return new_parse_node_1(X_PAREN, na); } if (tok->type == X_DEC6 || tok->type < X_UNARY || tok->type == XX_STRING || (tok->type == X_REGVAL && tok->left)) { return tok; } if (tok->type >= X_UNARY && tok->type < X_BINARY) { na = parse_subexpr(in, 0, stab, flags); if (!na) { free_parse_tree(tok); return NULL; } tok->left = na; return tok; } print_error("Unexpected %s (expected value or unary operator)", token_desc(tok->type)); free_parse_tree(tok); return NULL; } else { na = parse_subexpr(in, prec - 1, stab, flags); if (!na) return NULL; while (1) { end = *in; if (!(tok = parse_token(&end, 0, stab, flags))) { free_parse_tree(na); return NULL; } if (tok->type == XX_EOL || tok->type == XX_COMMA || tok->type == XX_LPAREN || tok->type == XX_RPAREN) { free_parse_tree(tok); return na; } tprec = token_precedence(tok->type, flags); if (!tprec) { print_error("Unexpected %s (expected binary operator)", token_desc(tok->type)); free_parse_tree(tok); free_parse_tree(na); return NULL; } else if (tprec != prec) { free_parse_tree(tok); return na; } *in = end; nb = parse_subexpr(in, prec - 1, stab, flags); if (!nb) { free_parse_tree(na); free_parse_tree(tok); return NULL; } tok->left = na; tok->right = nb; na = tok; if (tok->type == XX_LOGAND) tok->type = X_AND; else if (tok->type == XX_LOGOR) tok->type = X_OR; } } } parse_node *parse_expr_list(const char **in, symbol_tab *stab, unsigned int flags) { parse_node *exp, *tok; exp = parse_subexpr(in, MAX_PRECEDENCE, stab, flags); if (!exp) return NULL; tok = parse_token(in, 0, stab, flags); if (!tok) { free_parse_tree(exp); return NULL; } if (tok->type == XX_COMMA) { tok->left = exp; exp = parse_expr_list(in, stab, flags); if (!exp) { free_parse_tree(tok); return NULL; } tok->right = exp; return tok; } else if (tok->type != XX_EOL) { print_error("Unexpected %s at end of expression", token_desc(tok->type)); } free_parse_tree(tok); return exp; } parse_node *parse_label(const char *name, symbol_tab *stab, unsigned int flags) { parse_node *node; if (!strcmp(name, "$$")) { if (!(flags & PARSE_ZMASM_LABELS)) print_error("warning: this looks like ZMASM source (try -f zmasm)"); return new_parse_node(X_NEXT_ANON); } else { node = parse_expr_list(&name, stab, flags); if (node->type == X_NEXT_ANON || node->type == X_SYMBOL) return node; if (node->type == X_ROMCALL || node->type == X_SYSADDR || (node->type >= X_ENUM8 && node->type < X_WORD)) { print_error("cannot use builtin constant as a label"); } else { print_error("invalid label syntax"); } free_parse_tree(node); return NULL; } }