Wednesday, June 23, 2010

(A)Yacc-based parser for ParaSail

Here is the ayacc-compatible grammar for ParaSail and a simple driver program adapted from an old Ada parser.  Again the commenting conventions and the code are in Ada, but it should be straightforward to transform it into some other language.


--------------------------------------
-- Tentative YACC Grammar for ParaSail
--------------------------------------

-- Single-character delimiters --
%token ',' ';' ':' '.'
%token '+' '-' '*' '/' 
%token '~' '?'
%token '(' ')' '[' ']' '<' '>' '{' '}'
%token '|' 
%token PRIME -- '''

-- Compound delimiters --
%token COMPARE -- "=?"
%token EQ   -- "=="
%token NEQ  -- "!="
%token GEQ  -- ">="
%token LEQ  -- "<="
%token POWER  -- "**"
%token ASSIGN -- ":="
%token SWAP   -- ":=:"
%token DOT_DOT -- ".."
%token DOUBLE_COLON  -- "::"
%token DOUBLE_LEFT_BRACKET  -- "[["
%token DOUBLE_RIGHT_BRACKET -- "]]"
%token REFERS_TO  -- "=>"
%token GIVES    -- "->"
%token IMPLIES    -- "==>"
%token SEQUENCE   -- ";;"
%token PARALLEL   -- "||"

-- Literals --
%token Char_Literal
%token Enum_Literal
%token Integer_Literal 
%token Real_Literal
%token String_Literal

-- Identifier --
%token Identifier 

-- Reserved words --
%token ABS_kw
%token ABSTRACT_kw
%token ALL_kw
%token AND_kw
%token BLOCK_kw
%token CASE_kw
%token CLASS_kw
%token CONCURRENT_kw
%token CONST_kw
%token CONTINUE_kw
%token EACH_kw
%token ELSE_kw
%token ELSIF_kw
%token END_kw
%token EXIT_kw
%token EXPORTS_kw
%token EXTENDS_kw
%token FOR_kw
%token FORWARD_kw
%token FUNCTION_kw
%token IF_kw
%token IMPORT_kw
%token IN_kw
%token INTERFACE_kw
%token IS_kw
%token LOCKED_kw
%token LOOP_kw
%token MOD_kw
%token MUTABLE_kw
%token NEW_kw
%token NOT_kw
%token NULL_kw
%token OF_kw
%token OPERATOR_kw
%token OPTIONAL_kw
%token OR_kw
%token PROCEDURE_kw
%token QUEUED_kw
%token REF_kw
%token REM_kw
%token RETURN_kw
%token REVERSE_kw
%token SELECT_kw
%token SOME_kw
%token THEN_kw
%token TYPE_kw
%token VAR_kw
%token WHILE_kw
%token WITH_kw
%token XOR_kw

%start module_list

{ 
   subtype yystype is integer;
}


%%

module_list : 
    module
  | module_list module
  ;

module : 
    import_clauses interface_declaration ';' 
  | import_clauses class_definition ';' 
  ;

import_clauses : 
  | import_clauses IMPORT_kw qualified_name_list ';'
  ;

qualified_name_list : 
    qualified_name
  | qualified_name_list ',' qualified_name
  ;

interface_declaration : 
   opt_ABSTRACT_kw opt_CONCURRENT_kw INTERFACE_kw module_defining_name 
     formals_and_implemented_interfaces
     IS_kw
      interface_element_list
   END_kw INTERFACE_kw module_defining_name ;
   
opt_ABSTRACT_kw :  ABSTRACT_kw | ;

opt_CONCURRENT_kw : CONCURRENT_kw | ;

formals : '<' opt_module_formal_list '>' ;

formals_and_implemented_interfaces :
    formals
  | opt_formals EXTENDS_kw interface_name_list
  ; 

opt_formals : formals | ;

interface_name_list :
    interface_name
  | interface_name_list ',' interface_name
  ;

interface_name : module_name | module_instantiation ;
   
module_defining_name : qualified_name ;

opt_module_formal_list : module_formal_list | ;

module_formal_list : 
    annotated_module_formal 
  | module_formal_list ';' annotated_module_formal 
  ;

annotated_module_formal : opt_annotation module_formal opt_annotation ;

opt_annotation : annotation | ;

module_formal : type_formal | value_formal ;

type_formal : 
    Identifier IS_kw module_instantiation 
  | module_instantiation 
  ;

value_formal : 
    id_list ':' type_name 
  | id_list ':' type_name ASSIGN simple_expression  -- to avoid use of '>'
  ;

id_list : 
    Identifier
  | id_list ',' Identifier
  ;

type_name : qualified_name ;

qualified_name : 
    Identifier 
  | qualified_name DOUBLE_COLON Identifier ;


module_instantiation : 
  module_name '<' opt_module_actual_list '>' ;


module_name : qualified_name ;

opt_module_actual_list : module_actual_list | ;

module_actual_list :
    module_actual 
  | module_actual_list ',' module_actual 
  ;

module_actual : 
    type_specifier_or_expression
  | Identifier REFERS_TO type_specifier_or_expression
  ;

-- simple_expression subsumes type_name in this rule
type_specifier_or_expression : 
    type_name annotation
  | simple_expression              -- to avoid problems with '>'
  | module_instantiation
  ;
  
type_specifier : 
    type_name annotation 
  | type_name 
  | module_instantiation 
  ;

interface_element_list : 
  | interface_element_list interface_element ';'
  ;

interface_element : 
    operation_declaration 
  | object_declaration 
  | interface_declaration 
  | type_declaration 
  ;


class_definition :
   opt_CONCURRENT_kw CLASS_kw module_defining_name 
      formals_and_extended_interface 
   IS_kw
      class_element_list
   END_kw CLASS_kw module_defining_name ;

formals_and_extended_interface :
    formals
  | opt_formals EXTENDS_kw interface_name
  | opt_formals EXTENDS_kw Identifier ':' interface_name
  ; 


class_element_list : 
    local_class_element_list
  EXPORTS_kw 
    exported_class_element_list ;

local_class_element_list :
  | local_class_element_list local_class_element ';'
  ;

local_class_element : interface_element | exported_class_element ;
  
exported_class_element_list :
  | exported_class_element_list exported_class_element ';'
  ;

exported_class_element : 
    operation_definition 
  | object_definition 
  | class_definition 
  ;
  
   
annotation : '{' annotation_element_list '}' ;

annotation_element_list : 
    annotation_element
  | annotation_element_list ';' annotation_element
  ;

annotation_element : interface_element | condition ;

condition : expression ;


operation_declaration : 
    function_declaration 
  | procedure_declaration 
  | operator_declaration 
  ;

function_declaration :
  FUNCTION_kw Identifier operation_inputs GIVES operation_outputs ;

procedure_declaration :
  PROCEDURE_kw Identifier operation_inputs ;
  
operator_declaration :
  OPERATOR_kw operator_designator operation_inputs opt_gives_operation_outputs ;

opt_gives_operation_outputs : GIVES operation_outputs | ;
  
operator_designator : String_Literal ;
  
operation_inputs :
    annotated_operation_input
  | '(' opt_annotated_operation_input_list ')' opt_annotation 
  ;

opt_annotated_operation_input_list : annotated_operation_input_list | ;

annotated_operation_input_list : 
    annotated_operation_input
  | annotated_operation_input_list ';' annotated_operation_input
  ;

annotated_operation_input : opt_annotation operation_input opt_annotation ;

operation_input : 
    id_list ':' opt_input_modifier operand_type_specifier 
  | input_modifier operand_type_specifier 
  | operand_type_specifier 
  ;

opt_input_modifier : input_modifier | ;
  
operand_type_specifier : type_name | type_formal ;

input_modifier : 
    output_modifier 
  | QUEUED_kw opt_output_modifier
  | LOCKED_kw opt_output_modifier
  ;


operation_outputs : 
    annotated_operation_output
  | '(' annotated_operation_output_list ')' opt_annotation 
  ;

annotated_operation_output_list :
    annotated_operation_output
  | annotated_operation_output_list ';' annotated_operation_output
  ;

annotated_operation_output : opt_annotation operation_output opt_annotation ;

operation_output : 
    id_list ':' opt_output_modifier operand_type_specifier
  | output_modifier operand_type_specifier 
  | operand_type_specifier 
  ;

opt_output_modifier : output_modifier | ;

output_modifier :  
    REF_opt_optional_mutable 
  | REF_opt_optional_mutable VAR_kw 
  | REF_opt_optional_mutable CONST_kw 
  ;

REF_opt_optional_mutable :
    REF_kw
  | REF_kw OPTIONAL_kw
  | REF_kw MUTABLE_kw
  | REF_kw OPTIONAL_kw MUTABLE_kw
  ;

object_declaration : 
   var_or_const Identifier ':' type_specifier opt_ASSIGN_expression ;

opt_ASSIGN_expression : ASSIGN expression | ;
   
var_or_const : VAR_kw | CONST_kw ;

object_definition :
    CONST_kw Identifier opt_colon_type_specifier ASSIGN expression
  | VAR_kw Identifier ':' opt_OPTIONAL_kw opt_MUTABLE_kw type_specifier 
      opt_ASSIGN_expression
  | VAR_kw Identifier ASSIGN expression 
  ;

opt_colon_type_specifier : ':' type_specifier | ;

opt_OPTIONAL_kw : OPTIONAL_kw | ;
opt_MUTABLE_kw : MUTABLE_kw | ;

type_declaration : TYPE_kw Identifier IS_kw opt_NEW_kw type_specifier ;

opt_NEW_kw : NEW_kw | ;

operation_definition : 
    function_definition 
  | procedure_definition 
  | operator_definition 
  ;

function_definition : 
  function_declaration IS_kw statement_list ';' END_kw FUNCTION_kw Identifier ;

procedure_definition : 
  procedure_declaration IS_kw statement_list ';' END_kw PROCEDURE_kw Identifier ;

operator_definition : 
  operator_declaration IS_kw statement_list ';' END_kw OPERATOR_kw Identifier  ;


statement_list : 
    annotated_statement
  | statement_list SEQUENCE statement_list
  | statement_list ';' statement_list
  | statement_list PARALLEL statement_list
  | statement_list ';' PARALLEL statement_list
  ;
  
opt_semi : ';' | ;

annotated_statement : opt_annotation statement opt_annotation ;

statement : 
    local_declaration 
  | local_definition 
  | simple_statement 
  | label compound_statement
  | compound_statement
  | '(' statement_list opt_semi ')' 
  ;

opt_label : label | ;

simple_statement :
    name ASSIGN expression
  | name SWAP name
  | name '(' opt_operation_actual_list ')'
  | RETURN_kw opt_WITH_values 
  | CONTINUE_kw LOOP_kw opt_id opt_WITH_values 
  | EXIT_kw compound_statement_kind opt_id opt_WITH_values
  ;

opt_operation_actual_list : operation_actual_list | ;

opt_WITH_values : WITH_values | ;

WITH_values : WITH_kw '(' operation_actual_list ')' ;

opt_id : Identifier | ;

compound_statement_kind : LOOP_kw | IF_kw | CASE_kw | SELECT_kw | BLOCK_kw ;

local_declaration : operation_declaration | type_declaration ;

local_definition :
    object_definition 
  | operation_definition 
  ;

label : '*' Identifier '*' ;

compound_statement :
    if_statement
  | case_statement
  | while_loop_statement
  | for_loop_statement
  | block_statement 
  | select_statement
  ;

if_statement : 
  IF_kw condition THEN_kw 
     statement_list ';'
  elsif_list
  opt_else
  END_kw IF_kw opt_id opt_WITH_values ;

elsif_list : 
    elsif_clause
  | elsif_list elsif_clause
  ;

elsif_clause :
  ELSIF_kw condition THEN_kw
     statement_list ';' ;

opt_else : ELSE_kw statement_list ';' | ;

case_statement : 
  CASE_kw expression OF_kw
    case_alt_list
    opt_default_alt
  END_kw CASE_kw opt_id opt_WITH_values ;

case_alt_list : 
    case_alt
  | case_alt_list case_alt
  ;

case_alt :
    '[' choice_list ']' REFERS_TO statement_list ';' ;

choice_list : 
    choice  
  | choice_list '|' choice 
  ;

choice : expression_or_range ;

opt_default_alt : '[' DOT_DOT ']' REFERS_TO statement_list ';' | ;

while_loop_statement :
  WHILE_kw condition LOOP_kw
    statement_list ';'
  END_kw LOOP_kw opt_id opt_WITH_values ;

for_loop_statement :
  FOR_kw iterator_list opt_direction LOOP_kw
    statement_list ';'
  END_kw LOOP_kw opt_id opt_WITH_values ;

iterator_list : 
    iterator 
  | iterator_list ',' iterator
  ;

iterator :
    Identifier IN_kw choice_list
  | EACH_kw Identifier OF_kw expression
  | Identifier ASSIGN expression THEN_kw next_value_list WHILE_kw condition
  | Identifier REFERS_TO name THEN_kw next_name_list WHILE_kw condition
  ;

next_value_list : 
    expression 
  | next_value_list PARALLEL expression 
  ;

next_name_list : 
    name 
  | next_name_list PARALLEL name 
  ;

opt_direction : direction | ;

direction : CONCURRENT_kw | FORWARD_kw | REVERSE_kw ;

select_statement :
  SELECT_kw 
     select_alt_list
  END_kw SELECT_kw opt_id opt_WITH_values ;

select_alt_list : 
    select_alt
  | select_alt_list PARALLEL select_alt
  ;

select_alt : '[' statement_list opt_semi ']' REFERS_TO statement_list opt_semi ;

block_statement :
  BLOCK_kw
    statement_list ';'
  END_kw BLOCK_kw opt_id opt_WITH_values ;
 
expression : 
    simple_expression
  | expression binary_operator expression
  | expression IN_kw expression_or_range
  | expression NOT_kw IN_kw expression_or_range
  | expression '?' expression ':' expression 
  ;

simple_expression :  -- used to avoid use of '>' in module instantiation
    primary
  | unary_operator simple_expression  -- makes unary ops higher precedence
  ;

expression_or_range : 
    expression
  | expression DOT_DOT expression ;
  
primary :
    name
  | Integer_Literal
  | Real_Literal
  | Char_Literal
  | String_Literal
  | Enum_Literal
  | NULL_kw
  | '(' expression ')'
  | '(' conditional_expression ')'
  | '(' quantified_expression ')'
  | aggregate
  | DOUBLE_LEFT_BRACKET expression DOUBLE_RIGHT_BRACKET 
  ;
  
name :
    qualified_name attribute_list opt_PRIME
  | qualified_name PRIME
  | qualified_name 
  | name '(' opt_operation_actual_list ')'
  | name '[' opt_operation_actual_list ']'
  | name '.' selector
  ;

attribute_list :
    Enum_Literal
  | attribute_list Enum_Literal 
  ;

opt_PRIME : PRIME | ;

operation_actual_list : 
    operation_actual 
  | operation_actual_list ',' operation_actual 
  ;

operation_actual : 
    expression
  | Identifier REFERS_TO expression 
  ;

selector : Identifier ;

unary_operator : '+' | '-' | ABS_kw | NOT_kw ;

binary_operator :
    '+'  | '-' | '*'  | '/' | POWER | '~' 
  | COMPARE | EQ | NEQ | '<' | LEQ | '>' | GEQ
  | AND_kw | OR_kw | XOR_kw  
  | AND_kw THEN_kw | OR_kw ELSE_kw | IMPLIES
  ;


aggregate : class_aggregate | container_aggregate ;

class_aggregate : '(' opt_operation_actual_list ')' ;

container_aggregate : '[' container_element_list ']' ;
  
container_element_list : 
    container_element 
  | container_element_list ',' container_element 
  ;

container_element : 
    expression
  | choice_list REFERS_TO filtered_expression_stream ;

container_key : expression ;

filtered_expression_stream : 
    expression_stream
  | expression_stream ':' condition
  ;

expression_stream : 
    expression 
  | expression_stream '~' expression
  ;

conditional_expression :
    if_expression
  | case_expression
  ;

if_expression : 
  IF_kw condition THEN_kw 
     expression
  elsif_expr_list
  opt_else_expr ;

elsif_expr_list : 
    elsif_expr_clause
  | elsif_expr_list elsif_expr_clause
  ;

elsif_expr_clause :
  ELSIF_kw condition THEN_kw expression ;

opt_else_expr : ELSE_kw expression | ;

case_expression : 
  CASE_kw expression OF_kw
    case_expr_alt_list
    opt_default_expr_alt ;

case_expr_alt_list : 
    case_expr_alt
  | case_expr_alt_list case_expr_alt
  ;

case_expr_alt : '[' choice_list ']' REFERS_TO expression ;

opt_default_expr_alt : '[' DOT_DOT ']' REFERS_TO expression | ;

quantified_expression :
    FOR_kw ALL_or_SOME_kw iterator ':' condition ;

ALL_or_SOME_kw : ALL_kw | SOME_kw ;

%%


package parasail_parser is

    procedure yyparse;

    echo : boolean := false;
    number_of_errors : natural := 0;


end parasail_parser;

with parasail_tokens, parasail_lex_io, parasail_goto, parasail_shift_reduce;
with parasail_lex, text_io;

use  parasail_tokens, parasail_lex_io, parasail_goto, parasail_shift_reduce;
use  parasail_lex, text_io;
package body parasail_parser is

    procedure yyerror(s: in string := "syntax error") is
    begin
       number_of_errors := number_of_errors + 1;
       put("<<< *** ");
       put_line(s);
    end yyerror;


##%procedure_parse

end parasail_parser;


------------------

And here is the driver program, also written in Ada:


with parasail_parser, parasail_lex_io, parasail_lex, text_io;
use  parasail_parser, text_io;

with parasail_lex_dfa;
procedure parasail_main is
  in_file_name: string(1..80);
  last        : natural;
begin
    text_io.put("Enter input file: ");
    text_io.get_line(in_file_name, last);
    parasail_lex_io.open_input(in_file_name(1..last));
    parasail_lex_io.create_output;

    put_line("---- Starting parse ----");

    -- parasail_lex_dfa.aflex_debug := True;

    parasail_lex.linenum;
    yyparse;


    parasail_lex_io.close_input;
    parasail_lex_io.close_output;

    put_line("---- Finished parse ----");
    new_line;
    put(integer'image(number_of_errors));
    put_line(" errors found");


end parasail_main;

No comments:

Post a Comment