From 3ac85d5c44bc46e2f6d44b9145c8d3adadf42b39 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Fri, 29 Sep 2023 12:47:04 +0200 Subject: [PATCH] added test for #1868 and fixed it both by changing external function signatures and the internal type function signature of the generated parser functions for firstAmbiguity --- src/org/rascalmpl/library/ParseTree.rsc | 47 ++++++++++++++----- src/org/rascalmpl/library/Prelude.java | 24 ++++++---- .../rascal/tests/concrete/FirstAmbiguity.rsc | 11 +++++ .../values/RascalFunctionValueFactory.java | 11 +++-- 4 files changed, 69 insertions(+), 24 deletions(-) create mode 100644 src/org/rascalmpl/library/lang/rascal/tests/concrete/FirstAmbiguity.rsc diff --git a/src/org/rascalmpl/library/ParseTree.rsc b/src/org/rascalmpl/library/ParseTree.rsc index 49323a348d0..3289f88fd2c 100644 --- a/src/org/rascalmpl/library/ParseTree.rsc +++ b/src/org/rascalmpl/library/ParseTree.rsc @@ -416,19 +416,27 @@ which leads to the prefix of the `src` fields of the resulting tree. The parse function behaves differently depending of the given keyword parameters: * `allowAmbiguity`: if true then no exception is thrown in case of ambiguity and a parse forest is returned. if false, - * the parser throws an exception during tree building and produces only the first ambiguous subtree in its message. - * if set to `false`, the parse constructs trees in linear time. if set to `true` the parser constructs trees in polynomial time. + the parser throws an exception during tree building and produces only the first ambiguous subtree in its message. + if set to `false`, the parse constructs trees in linear time. if set to `true` the parser constructs trees in polynomial time. * * `hasSideEffects`: if false then the parser is a lot faster when constructing trees, since it does not execute the parse _actions_ in an - * interpreted environment to make side effects (like a symbol table) and it can share more intermediate results as a result. - * - * `firstAmbiguity`: if true, then the parser returns the subforest for the first (left-most innermost) ambiguity instead of a parse tree for - * the entire input string. This is for grammar debugging purposes a much faster solution then waiting for an entire - * parse forest to be constructed in polynomial time. + interpreted environment to make side effects (like a symbol table) and it can share more intermediate results as a result. } @javaClass{org.rascalmpl.library.Prelude} -java &T (value input, loc origin) parser(type[&T] grammar, bool allowAmbiguity=false, bool hasSideEffects=false, bool firstAmbiguity=false, set[Tree(Tree)] filters={}); +java &T (value input, loc origin) parser(type[&T] grammar, bool allowAmbiguity=false, bool hasSideEffects=false, set[Tree(Tree)] filters={}); +@javaClass{org.rascalmpl.library.Prelude} +@synopsis{Generates a parser function that can be used to find the left-most deepest ambiguous sub-sentence.} +@benefits{ +* Instead of trying to build a polynomially sized parse forest, this function only builds the smallest part of +the tree that exhibits ambiguity. This can be done very quickly, while the whole forest could take minutes to hours to construct. +* Use this function for ambiguity diagnostics and regression testing for ambiguity. +} +@pitfalls{ +* The returned sub-tree usually has a different type than the parameter of the type[] symbol that was passed in. +The reason is that sub-trees typically have a different non-terminal than the start non-terminal of a grammar. +} +java Tree (value input, loc origin) firstAmbiguityFinder(type[Tree] grammar, bool hasSideEffects=false, set[Tree(Tree)] filters={}); @synopsis{Generates parsers from a grammar (reified type), where all non-terminals in the grammar can be used as start-symbol.} @description{ @@ -436,7 +444,20 @@ This parser generator behaves the same as the `parser` function, but it produces nonterminal parameter. This can be used to select a specific non-terminal from the grammar to use as start-symbol for parsing. } @javaClass{org.rascalmpl.library.Prelude} -java &U (type[&U] nonterminal, value input, loc origin) parsers(type[&T] grammar, bool allowAmbiguity=false, bool hasSideEffects=false, bool firstAmbiguity=false, set[Tree(Tree)] filters={}); +java &U (type[&U] nonterminal, value input, loc origin) parsers(type[&T] grammar, bool allowAmbiguity=false, bool hasSideEffects=false, set[Tree(Tree)] filters={}); + +@javaClass{org.rascalmpl.library.Prelude} +@synopsis{Generates a parser function that can be used to find the left-most deepest ambiguous sub-sentence.} +@benefits{ +* Instead of trying to build a polynomially sized parse forest, this function only builds the smallest part of +the tree that exhibits ambiguity. This can be done very quickly, while the whole forest could take minutes to hours to construct. +* Use this function for ambiguity diagnostics and regression testing for ambiguity. +} +@pitfalls{ +* The returned sub-tree usually has a different type than the parameter of the type[] symbol that was passed in. +The reason is that sub-trees typically have a different non-terminal than the start non-terminal of a grammar. +} +java Tree (type[Tree] nonterminal, value input, loc origin) firstAmbiguityFinders(type[Tree] grammar, bool hasSideEffects=false, set[Tree(Tree)] filters={}); @synopsis{Parse the input but instead of returning the entire tree, return the trees for the first ambiguous substring.} @description{ @@ -447,10 +468,10 @@ the cost of constructing nested ambiguity clusters. If the input sentence is not ambiguous after all, simply the entire tree is returned. } Tree firstAmbiguity(type[Tree] begin, str input) - = parser(begin, firstAmbiguity=true)(input, |unknown:///|); + = firstAmbiguityFinder(begin)(input, |unknown:///|); Tree firstAmbiguity(type[Tree] begin, loc input) - = parser(begin, firstAmbiguity=true)(input, input); + = firstAmbiguityFinder(begin)(input, input); @javaClass{org.rascalmpl.library.Prelude} @synopsis{Generate a parser and store it in serialized form for later reuse.} @@ -514,7 +535,7 @@ p(type(sort("E"), ()), "e+e", |src:///|); * reifiying types (use of `#`) will trigger the loading of a parser generator anyway. You have to use this notation for types to avoid that: `type(\start(sort("MySort")), ())` to avoid the computation for `#start[A]` } -java &U (type[&U] nonterminal, value input, loc origin) loadParsers(loc savedParsers, bool allowAmbiguity=false, bool hasSideEffects=false, bool firstAmbiguity=false, set[Tree(Tree)] filters={}); +java &U (type[&U] nonterminal, value input, loc origin) loadParsers(loc savedParsers, bool allowAmbiguity=false, bool hasSideEffects=false, set[Tree(Tree)] filters={}); @synopsis{Load a previously serialized parser, for a specific non-terminal, from disk for usage} @description{ @@ -522,7 +543,7 @@ This loader behaves just like ((loadParsers)), except that the resulting parser bound to a specific non-terminal. } @javaClass{org.rascalmpl.library.Prelude} -java &U (value input, loc origin) loadParser(type[&U] nonterminal, loc savedParsers, bool allowAmbiguity=false, bool hasSideEffects=false, bool firstAmbiguity=false, set[Tree(Tree)] filters={}); +java &U (value input, loc origin) loadParser(type[&U] nonterminal, loc savedParsers, bool allowAmbiguity=false, bool hasSideEffects=false, set[Tree(Tree)] filters={}); @synopsis{Yield the string of characters that form the leafs of the given parse tree.} @description{ diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index 66d2bd3e303..7e559004339 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -2343,12 +2343,20 @@ public INode arbNode() { protected final TypeReifier tr; - public IFunction parser(IValue start, IBool allowAmbiguity, IBool hasSideEffects, IBool firstAmbiguity, ISet filters) { - return rascalValues.parser(start, allowAmbiguity, hasSideEffects, firstAmbiguity, filters); + public IFunction parser(IValue start, IBool allowAmbiguity, IBool hasSideEffects, ISet filters) { + return rascalValues.parser(start, allowAmbiguity, hasSideEffects, values.bool(false), filters); + } + + public IFunction firstAmbiguityFinder(IValue start, IBool hasSideEffects, ISet filters) { + return rascalValues.parser(start, values.bool(true), hasSideEffects, values.bool(true), filters); } - public IFunction parsers(IValue start, IBool allowAmbiguity, IBool hasSideEffects, IBool firstAmbiguity, ISet filters) { - return rascalValues.parsers(start, allowAmbiguity, hasSideEffects, firstAmbiguity, filters); + public IFunction parsers(IValue start, IBool allowAmbiguity, IBool hasSideEffects, ISet filters) { + return rascalValues.parsers(start, allowAmbiguity, hasSideEffects, values.bool(false), filters); + } + + public IFunction firstAmbiguityFinders(IValue start, IBool hasSideEffects, ISet filters) { + return rascalValues.parsers(start, values.bool(true), hasSideEffects, values.bool(true), filters); } public void storeParsers(IValue start, ISourceLocation saveLocation) { @@ -2363,18 +2371,18 @@ public void storeParsers(IValue start, ISourceLocation saveLocation) { } } - public IFunction loadParsers(ISourceLocation savedLocation, IBool allowAmbiguity, IBool hasSideEffects, IBool firstAmbiguity, ISet filters) { + public IFunction loadParsers(ISourceLocation savedLocation, IBool allowAmbiguity, IBool hasSideEffects, ISet filters) { try { - return rascalValues.loadParsers(savedLocation, allowAmbiguity, hasSideEffects, firstAmbiguity, filters); + return rascalValues.loadParsers(savedLocation, allowAmbiguity, hasSideEffects, values.bool(false), filters); } catch (IOException | ClassNotFoundException e) { throw RuntimeExceptionFactory.io(e.getMessage()); } } - public IFunction loadParser(IValue grammar, ISourceLocation savedLocation, IBool allowAmbiguity, IBool hasSideEffects, IBool firstAmbiguity, ISet filters) { + public IFunction loadParser(IValue grammar, ISourceLocation savedLocation, IBool allowAmbiguity, IBool hasSideEffects, ISet filters) { try { - return rascalValues.loadParser(grammar, savedLocation, allowAmbiguity, hasSideEffects, firstAmbiguity, filters); + return rascalValues.loadParser(grammar, savedLocation, allowAmbiguity, hasSideEffects, values.bool(false), filters); } catch (IOException | ClassNotFoundException e) { throw RuntimeExceptionFactory.io(e.getMessage()); diff --git a/src/org/rascalmpl/library/lang/rascal/tests/concrete/FirstAmbiguity.rsc b/src/org/rascalmpl/library/lang/rascal/tests/concrete/FirstAmbiguity.rsc new file mode 100644 index 00000000000..b36ee049c22 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/tests/concrete/FirstAmbiguity.rsc @@ -0,0 +1,11 @@ +module lang::rascal::tests::concrete::FirstAmbiguity + +syntax P = E; +syntax E = "e" | E "+" E; + +import ParseTree; + +@issue{1868} +test bool firstAmbDoesNotThrowStaticErrors() { + return amb({E _,E _}) := firstAmbiguity(#P, "e+e+e"); +} \ No newline at end of file diff --git a/src/org/rascalmpl/values/RascalFunctionValueFactory.java b/src/org/rascalmpl/values/RascalFunctionValueFactory.java index 40a89717b74..5f462421c07 100644 --- a/src/org/rascalmpl/values/RascalFunctionValueFactory.java +++ b/src/org/rascalmpl/values/RascalFunctionValueFactory.java @@ -217,9 +217,14 @@ public IFunction parser(IValue reifiedGrammar, IBool allowAmbiguity, IBool hasSi // the return type of the generated parse function is instantiated here to the start nonterminal of // the provided grammar: - Type functionType = tf.functionType(reifiedGrammar.getType().getTypeParameters().getFieldType(0), - tf.tupleType(tf.valueType(), tf.sourceLocationType()), - tf.tupleEmpty()); + Type functionType = !firstAmbiguity.getValue() + ? tf.functionType(reifiedGrammar.getType().getTypeParameters().getFieldType(0), + tf.tupleType(tf.valueType(), tf.sourceLocationType()), + tf.tupleEmpty()) + : tf.functionType(RascalFunctionValueFactory.Tree, + tf.tupleType(tf.valueType(), tf.sourceLocationType()), + tf.tupleEmpty()) + ; Class>parser = getParserClass((IMap) ((IConstructor) reifiedGrammar).get("definitions")); IConstructor startSort = (IConstructor) ((IConstructor) reifiedGrammar).get("symbol");