Skip to content

Commit

Permalink
Add YACC generation from LALR grammars.
Browse files Browse the repository at this point in the history
  • Loading branch information
mingodad committed Jul 17, 2023
1 parent 8aa81d2 commit 8539fed
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 11 deletions.
8 changes: 7 additions & 1 deletion playground/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
<option>Ada parser case insensitive</option>
<option>XML parser</option>
<option>C++ parser (bug)</option>
<option>LALR parser (not working)</option>
<option>Bison parser (not working)</option>
<option>DParser parser (not working)</option>
<option>Parse_gen parser (not working)</option>
Expand All @@ -52,7 +53,12 @@
<li><span>Input source</span></li>
<li class="editor-options">
<ul class="editor-header-options">
<li class="option"><label><input id="gen-ebnf" type="checkbox">Gen. EBNF</label></li>
<li class="option"><label>Generate </label>
<select id="generate-action">
<option>none</option>
<option value="ebnf">EBNF grammar</option>
<option value="yacc">YACC grammar</option>
</select></li>
<!--<li class="option"><label><input id="auto-refresh" type="checkbox">Auto Refresh</label></li>-->
<li class="option"><button id="parse" class="parse">Parse</button></li>
</ul>
Expand Down
18 changes: 13 additions & 5 deletions playground/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@ function loadLalr_sample(self) {
codeEditor.getSession().setMode("ace/mode/text");
});
break;
case "LALR parser (not working)":
$.get(base_url + "lalr.g", function( data ) {
grammarEditor.setValue( data );
});
$.get(base_url + "lalr.g", function( data ) {
codeEditor.setValue( data );
codeEditor.getSession().setMode("ace/mode/yaml");
});
break;
case "Bison parser (not working)":
$.get(base_url + "bison.g", function( data ) {
grammarEditor.setValue( data );
Expand Down Expand Up @@ -240,7 +249,6 @@ function loadLalr_sample(self) {
}

//$('#ast-mode').val(localStorage.getItem('optimizationMode') || '2');
$('#gen-ebnf').prop('checked', localStorage.getItem('gen-ebnf') === 'true');
$('#auto-refresh').prop('checked', localStorage.getItem('autoRefresh') === 'true');
$('#parse').prop('disabled', $('#auto-refresh').prop('checked'));

Expand Down Expand Up @@ -294,7 +302,6 @@ function updateLocalStorage() {
localStorage.setItem('grammarText', grammarEditor.getValue());
localStorage.setItem('codeText', codeEditor.getValue());
//localStorage.setItem('optimizationMode', $('#opt-mode').val());
localStorage.setItem('gen-ebnf', $('#gen-ebnf').prop('checked'));
localStorage.setItem('autoRefresh', $('#auto-refresh').prop('checked'));
}

Expand All @@ -310,7 +317,8 @@ function parse() {
const codeText = codeEditor.getValue();

const astMode = $('#ast-mode').val();
const generate_ebnf = $('#gen-ebnf').prop('checked');
const generate_ebnf = $('#generate-action').val() == 'ebnf';
const generate_yacc = $('#generate-action').val() == 'yacc';
const lexer = $('#show-lexer').prop('checked');
let generate_ast = $('#show-ast').prop('checked');
if(generate_ast && astMode == 2)
Expand Down Expand Up @@ -342,7 +350,7 @@ function parse() {

window.setTimeout(() => {
parse_start_time = new Date().getTime();
lalr_parse(grammarText, codeText, lexer, generate_ebnf, generate_ast);
lalr_parse(grammarText, codeText, lexer, generate_ebnf, generate_yacc, generate_ast);

$('#overlay').css({
'z-index': '-1',
Expand Down Expand Up @@ -424,7 +432,7 @@ $('#code-info').on('click', 'li[data-ln]', makeOnClickInInfo(codeEditor));

// Event handing in the AST optimization
$('#opt-mode').on('change', setupTimer);
$('#gen-ebnf').on('change', setupTimer);
$('#generate-action').on('change', setupTimer);
$('#show-lexer').on('change', setupTimer);
$('#auto-refresh').on('change', () => {
updateLocalStorage();
Expand Down
6 changes: 3 additions & 3 deletions playground/lalr_playground.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ static void print_parsetree( const ParseTreeUserData& ast, int level )
}
}

extern "C" int parse(const char *grammar, const unsigned char *input, int dumpLexer, int generate_ebnf, int generate_parsetree)
extern "C" int parse(const char *grammar, const unsigned char *input, int dumpLexer, int generate_ebnf, int generate_yacc, int generate_parsetree)
{
int err = 0; // currently, zero is always returned; result codes for each part
// are sent to JS via set_result()
Expand All @@ -209,7 +209,7 @@ extern "C" int parse(const char *grammar, const unsigned char *input, int dumpLe

lalr::GrammarCompiler compiler;
lalr::ErrorPolicy error_policy;
int errors = compiler.compile( grammar, grammar + strlen(grammar), &error_policy, generate_ebnf != 0);
int errors = compiler.compile( grammar, grammar + strlen(grammar), &error_policy, generate_ebnf, generate_yacc);

if (errors != 0) {
fprintf(stderr, "Error compiling grammar. "
Expand All @@ -218,7 +218,7 @@ extern "C" int parse(const char *grammar, const unsigned char *input, int dumpLe
goto done;
}
else {
if(generate_ebnf) {
if(generate_ebnf || generate_yacc) {
err = -3;
parse_result = 0;
goto done;
Expand Down
164 changes: 164 additions & 0 deletions src/lalr/Grammar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,11 @@ static bool isTerminalRegex(const GrammarSymbol *symbol)
&& symbol->lexeme_type() == LexemeType::LEXEME_REGULAR_EXPRESSION;
}

static bool isTerminal(const GrammarSymbol *symbol)
{
return symbol->symbol_type() == SymbolType::SYMBOL_TERMINAL;
}

void Grammar::genEBNF()
{
printf(
Expand Down Expand Up @@ -390,6 +395,165 @@ void Grammar::genEBNF()
printf("\n\n// end EBNF\n");
}

void Grammar::genYACC()
{
const char *prefix = "";
printf(
"//\n"
"// YACC grammar for %s"
"//\n"
, identifier().c_str()
);
printf("\n/*Tokens*/\n");
GrammarSymbol* last_production_symbol = NULL;
bool production_continuation = false;
for ( vector<unique_ptr<GrammarProduction>>::const_iterator i = productions().begin(); i != productions().end(); ++i )
{
GrammarProduction* production = i->get();
LALR_ASSERT( production );
GrammarSymbol *curr_symbol = production->symbol();
bool same_production = last_production_symbol && last_production_symbol == curr_symbol;
vector<unique_ptr<GrammarProduction>>::const_iterator production_next = (i+1);
bool hasMoreProductions = same_production || (production_next != productions().end() && production_next->get()->symbol() == curr_symbol);
if(!same_production)
{
if( isTerminalRegex(curr_symbol)
|| (!hasMoreProductions && production->length() == 1 && isTerminal(production->symbol_by_position(0))) )
{
printf("%%token %s ;\n", curr_symbol->lexeme().c_str());
}
}
last_production_symbol = curr_symbol;
}

int max_prec = 0;
for ( vector<unique_ptr<GrammarSymbol>>::const_iterator i = symbols().begin(); i != symbols().end(); ++i )
{
GrammarSymbol* curr_symbol = i->get();
LALR_ASSERT( curr_symbol );

if(curr_symbol->precedence() > max_prec)
{
max_prec = curr_symbol->precedence();
}
}

if(max_prec > 0)
{
printf("\n/*Asociativity/Precedence*/\n");
for(int ip = 1; ip <= max_prec; ++ip)
{
production_continuation = false;
int last_prec = 0;
for ( vector<unique_ptr<GrammarSymbol>>::const_iterator i = symbols().begin(); i != symbols().end(); ++i )
{
GrammarSymbol* curr_symbol = i->get();
LALR_ASSERT( curr_symbol );

if(curr_symbol->precedence() == ip)
{
if(curr_symbol->precedence() > max_prec)
{
max_prec = curr_symbol->precedence();
}
if(last_prec != ip )
{
switch(curr_symbol->associativity())
{
case lalr::Associativity::ASSOCIATE_NONE:
printf("\n%%nonassoc ");
break;
case lalr::Associativity::ASSOCIATE_LEFT:
printf("\n%%left ");
break;
case lalr::Associativity::ASSOCIATE_RIGHT:
printf("\n%%right ");
break;
case lalr::Associativity::ASSOCIATE_PREC:
printf("\n%%precedence ");
break;
}
last_prec = ip;
printf(" /*%d*/ ", ip);
}
if(last_prec == curr_symbol->precedence())
{
ouptputTerminal(curr_symbol);
printf(" ");
}
}
}
if(last_prec > 0)
printf(";");
}
}

printf("\n\n%%%%\n");
last_production_symbol = NULL;
production_continuation = false;
for ( vector<unique_ptr<GrammarProduction>>::const_iterator i = productions().begin(); i != productions().end(); ++i )
{
GrammarProduction* production = i->get();
LALR_ASSERT( production );
GrammarSymbol *curr_symbol = production->symbol();
bool same_production = last_production_symbol && last_production_symbol == curr_symbol;
vector<unique_ptr<GrammarProduction>>::const_iterator production_next = (i+1);
bool hasMoreProductions = same_production || (production_next != productions().end() && production_next->get()->symbol() == curr_symbol);
if (isTerminalRegex(curr_symbol)
|| (!hasMoreProductions && production->length() == 1 && isTerminal(production->symbol_by_position(0))))
{
continue;
}
if(same_production) {
production_continuation = true;
//printf(" //%s ::= %d", production->symbol()->lexeme().c_str(), production->length());
}
else {
production_continuation = false;
const char *sym_prefix = "";
const char *sym_name = curr_symbol->lexeme().c_str();
if(sym_name[0] == '.')
{
continue;
}
//printf("\n\n%s%s ::= // %d\n\t", sym_prefix, sym_name, production->length());
if(last_production_symbol)
{
printf("\n%s\t;", prefix);
}
printf("\n\n%s%s%s :\n%s\t", prefix, sym_prefix, sym_name, prefix);
}
if(production->length() > 0) {
for(int elm=0; elm < production->length(); ++elm) {
const GrammarSymbol *sym = production->symbol_by_position(elm);
if(production_continuation) {
production_continuation = false;
printf("\n%s\t| ", prefix);
}
else printf(" ");
ouptputTerminal(sym);
}
}
else {
if(production_continuation) {
production_continuation = false;
printf("\n%s\t| ", prefix);
}
printf("/*empty*/");
}
if(production->precedence_symbol()) {
printf(" %%prec /*%d*/ ", production->precedence_symbol()->precedence());
ouptputTerminal(production->precedence_symbol());
}
last_production_symbol = curr_symbol;
}
if(last_production_symbol)
{
printf("\n%s\t;", prefix);
}
printf("\n\n// end YACC\n");
}

void Grammar::genNakedGrammar()
{
printf( "%s {\n", identifier_.c_str());
Expand Down
1 change: 1 addition & 0 deletions src/lalr/Grammar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class Grammar
Grammar& identifier( const char* identifier, int line, int column );
bool is_case_insensitive() const {return active_case_insensitive_;}
void genEBNF();
void genYACC();
void genNakedGrammar();

private:
Expand Down
7 changes: 6 additions & 1 deletion src/lalr/GrammarCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void GrammarCompiler::labels_enabled( bool enabled )
labels_enabled_ = enabled;
}

int GrammarCompiler::compile( const char* begin, const char* end, ErrorPolicy* error_policy, bool genEBNF )
int GrammarCompiler::compile( const char* begin, const char* end, ErrorPolicy* error_policy, bool genEBNF, bool genYACC )
{
Grammar grammar;

Expand All @@ -87,6 +87,11 @@ int GrammarCompiler::compile( const char* begin, const char* end, ErrorPolicy* e
//grammar.genNakedGrammar();
return errors;
}
if(genYACC)
{
grammar.genYACC();
return errors;
}
bool isCaseInsensitive = grammar.is_case_insensitive();
GrammarGenerator generator;
errors = generator.generate( grammar, error_policy );
Expand Down
2 changes: 1 addition & 1 deletion src/lalr/GrammarCompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class GrammarCompiler
const RegexCompiler* whitespace_lexer() const;
const ParserStateMachine* parser_state_machine() const;
void labels_enabled( bool enabled );
int compile( const char* begin, const char* end, ErrorPolicy* error_policy = nullptr, bool genEBNF = false );
int compile( const char* begin, const char* end, ErrorPolicy* error_policy = nullptr, bool genEBNF = false, bool genYACC = false );
void showStats();

private:
Expand Down

0 comments on commit 8539fed

Please sign in to comment.