fixes and doc additions

usethesource · Nov 12, 2024 · 7a4d959 · 7a4d959
1 parent 6606117
commit 7a4d959
Showing 1 changed file with 59 additions and 15 deletions.
diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.rsc b/src/org/rascalmpl/library/util/ErrorRecovery.rsc
@@ -46,9 +46,23 @@ If you want the text of the whole error tree, you can just use string interpolat
 str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringChars([c | char(c) <- chars]);
 
 @javaClass{org.rascalmpl.library.util.ErrorRecovery}
-@synopsis{Error recovery often produces ambiguous trees where errors can be recovered in multiple ways.
-This filter removes error trees until no ambiguities caused by error recovery are left.
-Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in which case an error is thrown.
+@synopsis{This filter removes error trees until no ambiguities caused by error recovery are left.}
+@description{
+Error recovery often produces ambiguous trees where errors can be recovered in multiple ways. Ambiguity
+clusters (`amb`) represent the choices between all the valid prefixes. This filter removes choices until
+the last one is left. 
+
+Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in 
+which case an error is thrown.
+}
+@benefits{
+* after this algorithm only one error is left at every input position with an error. Downstream
+functionality does not have to deal with ambiguity anymore, making the code robust.
+}
+@pitfalls{
+* this algorithm removes valid prefixes based on heuristics like "shortest error", which may 
+remove interesting prefixes for downstream processing. In particular the accuracy of error repair and auto-complete
+may be damaged by this function. So it is best to use it for error recovery, and not for error repair.
 }
 java Tree disambiguateErrors(Tree t, bool allowAmbiguity=true);
 
@@ -85,17 +99,17 @@ then ((filterOptionalErrorTrees)) can be.
 @pitfalls{
 * this algorithm may cut off entire branches which are otherwise fine to extract more information from.
 }
-Tree filterOptionalIndirectErrorTrees(Tree x) = visit(addErrorStats(x)) {
+Tree filterOptionalIndirectErrorTrees(Tree x) = bottom-up visit(addErrorStats(x)) {
     case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_])),[*pre, _sep, appl(_,_, erroneous=true), *post])
-        => appl(p, [*pre, *post])[@\loc=t@\loc]
+        => addStats(appl(p, [*pre, *post])[@\loc=t@\loc])
     case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_])),[appl(_,_, erroneous=true), _sep, *post])
-        => appl(p, post)[@\loc=t@\loc]
+        => addStats(appl(p, post)[@\loc=t@\loc])
     case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_,_,_])),[*pre, _sep1, _sep2, _sep3, appl(_,_, erroneous=true), *post])
-        => appl(p, [*pre, *post])[@\loc=t@\loc]
+        => addStats(appl(p, [*pre, *post])[@\loc=t@\loc])
     case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_,_,_])),[appl(_,_, erroneous=true), _sep1, _sep2, _sep3, *post])
-        => appl(p, post)[@\loc=t@\loc]
+        => addStats(appl(p, post)[@\loc=t@\loc])
     case t:appl(p:regular(/iter|iter-star/(_)),[*pre, appl(_,_, erroneous=true), *post])
-        => appl(p, [*pre, *post])[@\loc=t@\loc]
+        => addStats(appl(p, [*pre, *post])[@\loc=t@\loc])
     case t:appl(p:regular(opt(_)), appl(_, _, erroneous=true)) 
         => appl(p, [])[@\loc=t@\loc]
 } 
@@ -115,11 +129,41 @@ parse forests
 }
 @pitfalls{
 * statistics do not tell the whole truth about sub-trees. Filtering based on these numbers
-must be seen as a heuristic that sometimes pays-off, but sometimes hides crucial information.
+must be seen as a heuristic that sometimes pays-off, and often hides crucial information.
 }
 Tree addErrorStats(Tree x) = bottom-up visit(x) {
-    case t:appl(skipped(_), args)   => t[skipped = size(args)][erroneous = true]
-    case t:appl(error(_,_,_), args) => t[skipped = (0 | it + a.skipped | a <- args)][erroneous = true]
-    case t:appl(prod(_,_,_), args)  => t[skipped = (0 | it + a.skipped | a <- args)][erroneous = (false | it || a.erroneous | a <- args)]
-    case t:amb(alts)                => t[skipped = (0 | min([it, a.skipped]) | a <- alts)][erroneous = (false | it && a.erroneous | a <- alts)]
-};
+    case Tree t => addStats(t)
+};
+
+@synopsis{Reusable utility for re-computing error statistics per Tree node.}
+private Tree addStats(t:appl(prod(_,_,_), args)) = t[skipped = (0 | it + a.skipped | a <- args)][erroneous = (false | it || a.erroneous | a <- args)];
+private Tree addStats(t:appl(skipped(_), args))  = t[skipped = size(args)][erroneous = true];
+private Tree addStats(t:appl(error(_,_,_), args))= t[skipped = (0 | it + a.skipped | a <- args)][erroneous = true];
+private Tree addStats(t:amb(alts))               = t[skipped = (0 | min([it, a.skipped]) | a <- alts)][erroneous = (false | it && a.erroneous | a <- alts)];
+default private Tree addStats(Tree t) = t;
+
+@synopsis{Disambiguates error ambiguity clusters by selecting the alternatives with the shortest amount of skipped characters}
+@benefits{
+* this is an aggressive filter that can greatly reduce the complexity of dealing with recovered parse trees.
+* chances are that after this filter all ambiguity has been removed, making downstream processing easier.
+}
+@pitfalls{
+* the trees with the shortest skips are not always the most relevant trees to consider for repair or recovery.
+}
+Tree selectShortestSkips(Tree x) = visit(addErrorStats(x)) {
+    case amb(alts) => amb({ a | a <- alts, a.skipped == minimum})
+        when int minimum := min([a.skipped | a <- alts])
+}
+
+@synopsis{Disambiguates error ambiguity clusters by selecting the alternatives with the largest amount of skipped characters}
+@benefits{
+* this is an aggressive filter that can greatly reduce the complexity of dealing with recovered parse trees.
+* chances are that after this filter all ambiguity has been removed, making downstream processing easier.
+}
+@pitfalls{
+* the trees with the longest skips are not always the most relevant trees to consider for repair or recovery.
+}
+Tree selectLongestSkips(Tree x) = visit(addErrorStats(x)) {
+    case amb(alts) => amb({ a | a <- alts, a.skipped == maximum})
+        when int maximum := max([a.skipped | a <- alts])
+}