| home
| contents
| previous
| next page
| send comment
| send link
| add bookmark |
PRS_EXPR.CPP
expressions
/*-------------------------------------------------------------------*
Expression Parser
File: prs_expr.cpp
Module: parser
by: Stephen R. Schmitt
*-------------------------------------------------------------------*/
#include "tpl_data.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
/*
* External variables
*/
/* in scn_main.cpp */
extern token_struct Token;
extern TOKEN_CODE Stmt_start_list[];
extern TOKEN_CODE Decl_start_list[];
extern TOKEN_CODE Rel_op_list[];
extern TOKEN_CODE Function_list[];
/* in sym_tabl.cpp */
extern TYPE_STRUCT_PTR Integer_typep, Real_typep, String_typep,
Boolean_typep, Char_typep, Null_typep;
/*-------------------------------------------------------------------*
Code Section
*-------------------------------------------------------------------*/
/*
* "string_expression" parses:
*
* <factor> {& <factor>}
*
* returns: a pointer to the type structure of the expression
*/
TYPE_STRUCT_PTR string_expression()
{
TYPE_STRUCT_PTR tp0, tp1;
tp0 = factor(); // get first string
if (tp0->form != STRING_FORM)
compile_error(M_INVALID, M_TYPE, M_0);
while (Token.code == AMPERSAND) // get next factor(s)
{
get_token(); // after "&"
tp1 = factor(); // get next string
if (tp1->form != STRING_FORM)
compile_error(M_INVALID, M_TYPE, M_0);
else
emit_operator(STRCAT, NO_CODE, 0, 0);
}
return String_typep;
}
/*
* "boolean_expression" parses:
*
* <boolean_term> { or, nor, xor <boolean_term> }
*
* returns: a pointer to the type structure of the expression
*/
TYPE_STRUCT_PTR boolean_expression()
{
TOKEN_CODE op;
bool saw_not = false;
if (Token.code == NOT) // look for unary operator
{
saw_not = true;
get_token(); // after unary operator
}
boolean_term(); // get first term
if (saw_not)
emit_operator(LOGIC_NOT, NO_CODE, 0, 0);
while ((Token.code == OR )|| // get next term(s)
(Token.code == NOR)||
(Token.code == XOR))
{
op = Token.code; // save op
get_token(); // after op
saw_not = false;
if (Token.code == NOT) // look for unary operator
{
saw_not = true;
get_token(); // after unary operator
}
boolean_term(); // get next term
if (saw_not)
emit_operator(LOGIC_NOT, NO_CODE, 0, 0);
if (op == OR)
emit_operator(LOGIC_OR, NO_CODE, 0, 0);
else if (op == NOR)
emit_operator(LOGIC_NOR, NO_CODE, 0, 0);
else
emit_operator(LOGIC_XOR, NO_CODE, 0, 0);
}
return Boolean_typep;
}
/*
* "boolean_term" parses:
*
* <boolean_factor> { and, nand <boolean_factor> }
*
* returns: a pointer to the type structure of the term
*/
TYPE_STRUCT_PTR boolean_term()
{
TOKEN_CODE op;
TYPE_STRUCT_PTR tp;
bool saw_not;
tp = boolean_factor(); // get first factor
if (tp != Boolean_typep)
compile_error(M_INVALID, M_TYPE, M_0);
while ((Token.code == AND )|| // get next factor(s)
(Token.code == NAND))
{
op = Token.code; // save operator
get_token(); // after operator
saw_not = false;
if (Token.code == NOT) // look for unary operator
{
saw_not = true;
get_token(); // after unary operator
}
tp = boolean_factor(); // get next factor
if (tp != Boolean_typep)
compile_error(M_INVALID, M_TYPE, M_0);
if (saw_not)
emit_operator(LOGIC_NOT, NO_CODE, 0, 0);
if (op == AND)
emit_operator(LOGIC_AND, NO_CODE, 0, 0);
else
emit_operator(LOGIC_NAND, NO_CODE, 0, 0);
}
return Boolean_typep;
}
/*
* "boolean_factor" parses:
*
* <expression> [relop <expression>]
*
* in which relop is one of the relational operators:
*
* = EQ
* ~= NE
* > GT
* >= GE
* < LT
* <= LE
*
* If there is no relational operator, then <expression> must be of
* boolean type.
*
* returns: a pointer to the type structure of the factor
*/
TYPE_STRUCT_PTR boolean_factor()
{
TYPE_STRUCT_PTR tp1, tp2;
TOKEN_CODE op;
OPCODE code;
if (Token.code == L_PAREN)
{
get_token(); // after "("
tp1 = boolean_expression(); // and get next token
if_code_get_token_else_error(R_PAREN);
}
else
{
tp1 = expression(); // get first expression
if (token_in(Rel_op_list)) // relational operator?
{
op = Token.code; // save operator
get_token(); // after rel op
tp2 = expression(); // get second expression
// check relational operator types
if ((tp1 == tp2) && (tp1->form == ENUM_FORM))
code = TEST_INDEX;
else if (((tp1 == Real_typep)||(tp1 == Integer_typep)) &&
((tp2 == Real_typep)||(tp2 == Integer_typep)))
code = TEST_NUMBER;
else if ((tp1 == Char_typep)&&(tp2 == Char_typep))
code = TEST_CHAR;
else if ((tp1->form == STRING_FORM)&&(tp2->form == STRING_FORM))
code = TEST_STRING;
else
compile_error(M_INCOMPATIBLE, M_TYPES, M_0);
// compare and evaluate
switch (op)
{
case EQ:
emit_operator(code, TEST_EQ, 0, 0);
break;
case NE:
emit_operator(code, TEST_NE, 0, 0);
break;
case GT:
emit_operator(code, TEST_GT, 0, 0);
break;
case GE:
emit_operator(code, TEST_GE, 0, 0);
break;
case LT:
emit_operator(code, TEST_LT, 0, 0);
break;
case LE:
emit_operator(code, TEST_LE, 0, 0);
break;
default:
assert(0);
break;
}
tp1 = Boolean_typep;
}
}
return tp1;
}
/*
* "expression" parses:
*
* <term> { +,- <term> }
*
* and the next "Token" is obtained.
*
* returns: a pointer to the type structure of the expression
*/
TYPE_STRUCT_PTR expression()
{
TOKEN_CODE op; // an operator token
TYPE_STRUCT_PTR tp1, tp2;
TOKEN_CODE unary_op = PLUS;
bool saw_unary = false;
// look for a unary operator
if ((Token.code == PLUS)||(Token.code == MINUS))
{
unary_op = Token.code;
saw_unary = true;
get_token(); // after unary operator
}
tp1 = term(); // get first term
if (unary_op == MINUS)
{
if (tp1 == Integer_typep)
emit_operator(INEG, NO_CODE, 0, 0);
else
emit_operator(RNEG, NO_CODE, 0, 0);
}
// if there was a unary operator, term must be integer or real
if ((saw_unary == true)&&(tp1 != Integer_typep)&&(tp1 != Real_typep))
compile_error(M_INCOMPATIBLE, M_TYPES, M_0);
// get next term(s)
while ((Token.code == PLUS)||(Token.code == MINUS))
{
op = Token.code; // save operator
get_token(); // after <op>
tp2 = term();
switch (op) // perform the operation
{
case PLUS:
case MINUS:
if ((tp1 == Integer_typep)&&(tp2 == Integer_typep))
{
if (op == PLUS)
emit_operator(IADD, NO_CODE, 0, 0);
else
emit_operator(ISUB, NO_CODE, 0, 0);
tp1 = Integer_typep;
}
else if (((tp1 == Real_typep)&&(tp2 == Real_typep)) ||
((tp1 == Real_typep)&&(tp2 == Integer_typep))||
((tp1 == Integer_typep)&&(tp2 == Real_typep)))
{
if (op == PLUS)
emit_operator(RADD, NO_CODE, 0, 0);
else
emit_operator(RSUB, NO_CODE, 0, 0);
tp1 = Real_typep;
}
else
{
compile_error(M_INCOMPATIBLE, M_TYPES, M_0);
tp1 = Integer_typep; // default type
}
break;
default:
assert(0);
break;
}
}
return tp1;
}
/*
* "term" parses:
*
* <factor> { *,/,div,mod <factor> }
*
* returns: a pointer to the type structure of the term
*/
TYPE_STRUCT_PTR term()
{
TOKEN_CODE op; // an operator token
TYPE_STRUCT_PTR tp1, tp2;
tp1 = factor(); // get first factor
while ((Token.code == STAR ) || // get next factor(s)
(Token.code == SLASH) ||
(Token.code == DIV ) ||
(Token.code == MOD ))
{
op = Token.code; // save operator
get_token(); // after operator
tp2 = factor(); // get next factor
switch (op) // perform the operation
{
case STAR:
if ((tp1 == Integer_typep)&&(tp2 == Integer_typep))
{
tp1 = Integer_typep;
emit_operator(IMUL, NO_CODE, 0, 0);
}
else if (((tp1 == Real_typep )&&(tp2 == Real_typep ))||
((tp1 == Real_typep )&&(tp2 == Integer_typep))||
((tp1 == Integer_typep)&&(tp2 == Real_typep )))
{
tp1 = Real_typep;
emit_operator(RMUL, NO_CODE, 0, 0);
}
else
{
compile_error(M_INCOMPATIBLE, M_TYPES, M_0);
tp1 = Real_typep; // default type
}
break;
case SLASH:
if ((tp1 == Real_typep)||(tp1 == Integer_typep)||
(tp2 == Real_typep)||(tp2 == Integer_typep))
emit_operator(RDIV, NO_CODE, 0, 0);
else
compile_error(M_INCOMPATIBLE, M_TYPES, M_0);
tp1 = Real_typep;
break;
case DIV:
case MOD:
if ((tp1 == Integer_typep)&&(tp2 == Integer_typep))
{
if (op == DIV)
emit_operator(IDIV, NO_CODE, 0, 0);
else
emit_operator(IMOD, NO_CODE, 0, 0);
tp1 = Integer_typep;
}
else
{
compile_error(M_INCOMPATIBLE, M_TYPES, M_0);
tp1 = Integer_typep;
}
break;
default:
assert(0);
break;
}
}
return tp1;
}
/*
* "factor" processes any of:
*
* an identifier
* a literal number
* a literal string
* "true" or "false"
* a parenthesized sub-expression
* an exponentiation
*
* and the next "Token" is obtained.
*
* returns: a pointer to the type structure of the factor
*/
TYPE_STRUCT_PTR factor()
{
int length;
TYPE_STRUCT_PTR tp1, tp2;
SYMTAB_NODE_PTR idp;
if (token_in(Function_list))
tp1 = standard_routine_call();
else switch (Token.code)
{
case ID_TOKEN:
idp = search_symbol_tables(Token.lexeme);
if (idp == NULL)
{
compile_error(M_UNDEFINED, M_IDENTIFIER, M_0);
get_token(); // after <id>
tp1 = Integer_typep; // default type
}
else switch (idp->definition)
{
case FUNC_DEFN:
get_token(); // after <id>
tp1 = declared_routine_call(idp);
break;
case PROC_DEFN:
compile_error(M_INVALID, M_IDENTIFIER, M_USAGE, M_0);
get_token(); // after <id>
tp1 = declared_routine_call(idp);
break;
default: // variable or constant
tp1 = value(idp); // and get next token
break;
}
break;
case CHAR_TOKEN:
emit_push_imm_char(Token.lexeme[0]);
tp1 = Char_typep;
get_token(); // after <character>
break;
case INT_TOKEN:
emit_push_imm_integer(Token.integer);
tp1 = Integer_typep;
get_token(); // after <number>
break;
case REAL_TOKEN:
emit_push_imm_real(Token.real);
tp1 = Real_typep;
get_token(); // after <number>
break;
case TRUE:
emit_push_imm_index(1);
tp1 = Boolean_typep;
get_token(); // after <boolean>
break;
case FALSE:
emit_push_imm_index(0);
tp1 = Boolean_typep;
get_token(); // after <boolean>
break;
case STR_TOKEN:
length = strlen(Token.lexeme);
tp1 = make_string_type_ptr(length);
emit_push_imm_string(Token.lexeme);
get_token(); // after <str>
break;
case L_PAREN:
get_token(); // after "("
tp1 = expression(); // and get next token
if_code_get_token_else_error(R_PAREN);
break;
default:
compile_error(M_SYNTAX, M_ERROR, M_0);
tp1 = Integer_typep;
break;
}
// process an exponentiation: <term> ^ <term>
// and return a real or integer type pointer.
if (Token.code == CARET) // exponentiate?
{
// valid base?
if ((tp1 != Real_typep)&&(tp1 != Integer_typep))
compile_error(M_INVALID, M_TYPE, M_0);
get_token(); // after "^"
tp2 = factor(); // get exponent
// valid exponent?
if ((tp2 != Real_typep)&&(tp2 != Integer_typep))
compile_error(M_INVALID, M_TYPE, M_0);
// determine return type
if ((tp1 == Integer_typep)&&(tp2 == Integer_typep))
{
emit_operator(IPOW, NO_CODE, 0, 0);
tp1 = Integer_typep;
}
else
{
emit_operator(RPOW, NO_CODE, 0, 0);
tp1 = Real_typep;
}
}
return tp1;
}
/*
* "value" checks the usage of an identifier in an expression.
*
* returns: a pointer to the type structure of the variable
*/
TYPE_STRUCT_PTR value(SYMTAB_NODE_PTR val_idp) // of the identifier
{
TYPE_STRUCT_PTR val_tp;
DEFN_KEY defn_key;
TOKEN_CODE code;
val_tp = val_idp->typep;
defn_key = val_idp->definition;
get_token(); // after <id>
code = Token.code;
// subscripts and/or field designators?
if ((Token.code == L_PAREN)||(Token.code == PERIOD))
{
if (val_tp->form != ENUM_FORM)
emit_push_imm_index(0);
}
if ((Token.code == L_PAREN)&&(val_tp->form == STRING_FORM))
val_tp = string_subscript(val_tp);
else while ((Token.code == L_PAREN)||(Token.code == PERIOD ))
{
if ((Token.code == L_PAREN) &&(val_tp->form == ARRAY_FORM))
val_tp = array_subscript_list(val_tp);
else if ((Token.code == PERIOD)&&
((val_tp->form == RECORD_FORM)||
(val_tp->form == UNION_FORM )||
(val_tp->form == ENUM_FORM )))
val_tp = dot_field(val_tp);
else
{
compile_error(M_INVALID, M_IDENTIFIER, M_USAGE, M_0);
while ((Token.code != R_PAREN )&&
(!token_in(Decl_start_list))&&
(!token_in(Stmt_start_list)))
get_token(); // until synchronized
val_tp = Real_typep; // avoid run time error
}
}
switch (defn_key) // check the definition
{
case CONST_DEFN:
case VAR_DEFN:
case VALPARM_DEFN:
case UNDEFINED: // stop extra error msg
emit_mem_item(PUSH, val_idp, val_tp);
break;
case VARPARM_DEFN:
emit_var_item(PUSH, val_idp, val_tp);
break;
case TYPE_DEFN:
if (val_idp->typep->form == ENUM_FORM)
{
if (code != PERIOD)
compile_error(M_MISSING, M_PERIOD, M_0);
}
else
compile_error(M_INVALID, M_IDENTIFIER, M_USAGE, M_0);
break;
default:
compile_error(M_INVALID, M_IDENTIFIER, M_USAGE, M_0);
break;
}
return val_tp;
}
/*
* "string_subscript" processes a subscript following
* a string identifier. The next "Token" is obtained.
*
* returns: a pointer element type structure
*/
TYPE_STRUCT_PTR string_subscript(TYPE_STRUCT_PTR tp)
{
get_token(); // after "("
get_integer_expression();
emit_op_reg_imm(MOV_ax_imm, tp->array.max_index);
emit_op_reg_imm(MOV_bx_imm, tp->array.val_index);
emit_operator(ADD_xi_offset, NO_CODE, 0, 0);
if_code_get_token_else_error(R_PAREN);
return tp->array.elmt_typep;
}
/*
* "array_subscript_list" processes a list of subscripts following
* an array identifier. The next "Token" is obtained.
*
* returns: a pointer element type structure
*/
TYPE_STRUCT_PTR array_subscript_list(TYPE_STRUCT_PTR tp)
{
// get elmt type from id type
TYPE_STRUCT_PTR elmt_tp = tp->array.elmt_typep;
do
{
if (tp->form == ARRAY_FORM) // if an indexed array
{
get_token(); // after "(" or ","
get_integer_expression();
emit_op_reg_imm(MOV_ax_imm, tp->array.max_index);
emit_op_reg_imm(MOV_bx_imm, tp->array.val_index);
emit_operator(ADD_xi_offset, NO_CODE, 0, 0);
}
else
{
compile_error(M_TOO, M_MANY, M_SUBSCRIPTS, M_0);
while ((Token.code != R_PAREN )&&
(!token_in(Decl_start_list))&&
(!token_in(Stmt_start_list)))
get_token(); // until synchronized
break;
}
// if the next subscript is valid, tp->form will be
// set to ARRAY_FORM
tp = tp->array.next_indexp; // for next dimension
}
while (Token.code == COMMA);
if_code_get_token_else_error(R_PAREN);
return elmt_tp;
}
/*
* "dot_field" processes a field designation following a dot operator
*
* returns: a pointer to the type structure of the identifier
* found on the right of a "."
*/
TYPE_STRUCT_PTR dot_field(TYPE_STRUCT_PTR tp) // left type ptr
{
SYMTAB_NODE_PTR field_idp;
TYPE_STRUCT_PTR field_tp;
get_token(); // after "."
if (((tp->form == RECORD_FORM)||(tp->form == UNION_FORM))&&(Token.code == ID_TOKEN))
{
// search for field in structure type's symbol table
if (tp->form == RECORD_FORM)
{
field_idp = lookup(Token.lexeme, tp->record_struct.field_symtab);
}
else
{
field_idp = lookup(Token.lexeme, tp->union_struct.field_symtab);
}
if (field_idp == NULL)
{
compile_error(M_INVALID, M_FIELD, M_0);
field_tp = Null_typep;
}
else
field_tp = field_idp->typep;
emit_operator(ADD_xi_imm, NO_CODE, 0, field_idp->data_offset);
get_token(); // after <id>
}
else if ((Token.code == ID_TOKEN)&&(tp->form == ENUM_FORM))
{
// search for field in enumeration type's symbol table
field_idp = lookup(Token.lexeme, tp->enumeration.field_symtab);
if (field_idp == NULL)
{
compile_error(M_INVALID, M_FIELD, M_0);
field_tp = Null_typep;
}
else
{
field_tp = field_idp->typep;
emit_push_imm_index(field_idp->integer);
}
get_token(); // after <id>
}
else
{
compile_error(M_INVALID, M_FIELD, M_0);
field_tp = Null_typep;
get_token(); // after <id>
}
return field_tp;
}
/*
* "get_integer_expression" processes an expression;
* it the expression type in not integer, a compile error is
* generated
*
* returns: nothing
*/
void get_integer_expression()
{
TYPE_STRUCT_PTR tp = expression(); // and get next token
if (tp != Integer_typep)
compile_error(M_INVALID, M_TYPE, M_0);
}
/*
* "get_number_expression" processes an expression;
* it the expression type in not integer or real, a compile error is
* generated
*
* returns: pointer to expression type
*/
TYPE_STRUCT_PTR get_number_expression()
{
TYPE_STRUCT_PTR tp = expression(); // and get next token
if ((tp != Integer_typep) && (tp != Real_typep))
compile_error(M_INVALID, M_TYPE, M_0);
return tp;
}
/*
* "int_expression" parses:
*
* <term> { +,- <term> }
*
* used in a type or constant declaration.
*
* returns: a pointer to the type structure of the expression
*/
TYPE_STRUCT_PTR int_expression()
{
int term1, term2;
TOKEN_CODE op; // an operator token
TOKEN_CODE unary_op;
TYPE_STRUCT_PTR tp1, tp2;
unary_op = PLUS;
// look for a unary operator
if ((Token.code == PLUS)||(Token.code == MINUS))
{
unary_op = Token.code;
get_token(); // after unary operator
}
tp1 = int_term(); // get first term
if (unary_op == MINUS)
{
term1 = pop_int_stack();
push_int_stack(-term1);
}
// get next term(s)
while ((Token.code == PLUS)||(Token.code == MINUS))
{
op = Token.code; // save operator
get_token(); // after <op>
tp2 = int_term();
switch (op) // perform the operation
{
case PLUS:
case MINUS:
if ((tp1 == Integer_typep)&&(tp2 == Integer_typep))
{
term2 = pop_int_stack();
term1 = pop_int_stack();
if (op == PLUS)
push_int_stack(term1 + term2);
else
push_int_stack(term1 - term2);
}
break;
default:
assert(0);
break;
}
}
return tp1;
}
/*
* "int_term" parses:
*
* <factor> { *,/,div,mod <factor> }
*
* used in a type declaration.
*
* returns: a pointer to the type structure of the term
*/
TYPE_STRUCT_PTR int_term()
{
int factor1, factor2;
TOKEN_CODE op; // an operator token
TYPE_STRUCT_PTR tp1, tp2;
tp1 = int_factor(); // get first factor
while ((Token.code == STAR )|| // get next factor(s)
(Token.code == SLASH)||
(Token.code == DIV )||
(Token.code == MOD ))
{
op = Token.code; // save operator
get_token(); // after operator
tp2 = int_factor(); // get next factor
switch (op) // perform the operation
{
case STAR:
if ((tp1 == Integer_typep)&&(tp2 == Integer_typep))
{
factor2 = pop_int_stack();
factor1 = pop_int_stack();
push_int_stack(factor1 * factor2);
}
break;
case SLASH:
compile_error(M_CONTEXT, M_ERROR, M_0);
break;
case DIV:
case MOD:
if ((tp1 == Integer_typep)&&(tp2 == Integer_typep))
{
factor2 = pop_int_stack();
factor1 = pop_int_stack();
if (factor2 == 0)
compile_error(M_DIVISION, M_BY, M_ZERO, M_0);
else
{
if (op == DIV)
push_int_stack(factor1 / factor2);
else
push_int_stack(factor1 % factor2);
}
}
break;
default:
assert(0);
break;
}
}
return tp1;
}
/*
* "int_factor" processes any of:
*
* an identifier
* a literal integer
*
* used in a type declaration.
*
* returns: a pointer to the type structure of the factor
*/
TYPE_STRUCT_PTR int_factor()
{
int int_value, i, factor1, factor2;
TYPE_STRUCT_PTR tp1, tp2;
SYMTAB_NODE_PTR idp;
switch (Token.code)
{
case ID_TOKEN:
idp = search_symbol_tables(Token.lexeme);
if (idp == NULL)
{
tp1 = Integer_typep; // default type
compile_error(M_UNDEFINED, M_IDENTIFIER, M_0);
}
else // get constant
{
tp1 = idp->typep;
if (idp->definition == CONST_DEFN)
{
if (tp1 == Integer_typep)
push_int_stack(idp->integer);
else
compile_error(M_INVALID, M_TYPE, M_0);
}
else
compile_error(M_NOT, M_CONSTANT, M_IDENTIFIER, M_0);
}
get_token(); // after <id>
break;
case INT_TOKEN:
push_int_stack((int) Token.integer);
tp1 = Integer_typep;
get_token(); // after <number>
break;
case L_PAREN:
get_token(); // after "("
tp1 = int_expression(); // and get next token
if_code_get_token_else_error(R_PAREN);
break;
default:
compile_error(M_SYNTAX, M_ERROR, M_0);
tp1 = Integer_typep;
get_token();
break;
}
// process an exponentiation: <term> ^ <term>
// and return an integer type pointer.
if (Token.code == CARET) // exponentiate?
{
get_token(); // after "^"
tp2 = int_factor(); // get exponent
if (tp2 != Integer_typep) // valid exponent?
compile_error(M_INVALID, M_TYPE, M_0);
// determine return type
if ((tp1 == Integer_typep)&&(tp2 == Integer_typep))
{
// both are integers and exponent >= 0
factor2 = pop_int_stack();
factor1 = pop_int_stack();
if (factor2 >= 0)
{
int_value = 1;
for (i = factor2; i > 0; i--)
int_value = int_value * factor1;
push_int_stack(int_value);
}
else
compile_error(M_INTEGER, M_OUT, M_OF, M_RANGE, M_0);
}
}
return tp1;
}
| home
| contents
| previous
| next page
| send comment
| send link
| add bookmark |
Copyright © 2004, Stephen R. Schmitt