From 7a4d95909bf439af0f722394b0587086048fa928 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Tue, 12 Nov 2024 09:41:17 +0100 Subject: [PATCH] fixes and doc additions --- .../rascalmpl/library/util/ErrorRecovery.rsc | 74 +++++++++++++++---- 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/src/org/rascalmpl/library/util/ErrorRecovery.rsc b/src/org/rascalmpl/library/util/ErrorRecovery.rsc index e502592c5bf..a4dffa21367 100644 --- a/src/org/rascalmpl/library/util/ErrorRecovery.rsc +++ b/src/org/rascalmpl/library/util/ErrorRecovery.rsc @@ -46,9 +46,23 @@ If you want the text of the whole error tree, you can just use string interpolat str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringChars([c | char(c) <- chars]); @javaClass{org.rascalmpl.library.util.ErrorRecovery} -@synopsis{Error recovery often produces ambiguous trees where errors can be recovered in multiple ways. -This filter removes error trees until no ambiguities caused by error recovery are left. -Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in which case an error is thrown. +@synopsis{This filter removes error trees until no ambiguities caused by error recovery are left.} +@description{ +Error recovery often produces ambiguous trees where errors can be recovered in multiple ways. Ambiguity +clusters (`amb`) represent the choices between all the valid prefixes. This filter removes choices until +the last one is left. + +Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in +which case an error is thrown. +} +@benefits{ +* after this algorithm only one error is left at every input position with an error. Downstream +functionality does not have to deal with ambiguity anymore, making the code robust. +} +@pitfalls{ +* this algorithm removes valid prefixes based on heuristics like "shortest error", which may +remove interesting prefixes for downstream processing. In particular the accuracy of error repair and auto-complete +may be damaged by this function. So it is best to use it for error recovery, and not for error repair. } java Tree disambiguateErrors(Tree t, bool allowAmbiguity=true); @@ -85,17 +99,17 @@ then ((filterOptionalErrorTrees)) can be. @pitfalls{ * this algorithm may cut off entire branches which are otherwise fine to extract more information from. } -Tree filterOptionalIndirectErrorTrees(Tree x) = visit(addErrorStats(x)) { +Tree filterOptionalIndirectErrorTrees(Tree x) = bottom-up visit(addErrorStats(x)) { case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_])),[*pre, _sep, appl(_,_, erroneous=true), *post]) - => appl(p, [*pre, *post])[@\loc=t@\loc] + => addStats(appl(p, [*pre, *post])[@\loc=t@\loc]) case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_])),[appl(_,_, erroneous=true), _sep, *post]) - => appl(p, post)[@\loc=t@\loc] + => addStats(appl(p, post)[@\loc=t@\loc]) case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_,_,_])),[*pre, _sep1, _sep2, _sep3, appl(_,_, erroneous=true), *post]) - => appl(p, [*pre, *post])[@\loc=t@\loc] + => addStats(appl(p, [*pre, *post])[@\loc=t@\loc]) case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_,_,_])),[appl(_,_, erroneous=true), _sep1, _sep2, _sep3, *post]) - => appl(p, post)[@\loc=t@\loc] + => addStats(appl(p, post)[@\loc=t@\loc]) case t:appl(p:regular(/iter|iter-star/(_)),[*pre, appl(_,_, erroneous=true), *post]) - => appl(p, [*pre, *post])[@\loc=t@\loc] + => addStats(appl(p, [*pre, *post])[@\loc=t@\loc]) case t:appl(p:regular(opt(_)), appl(_, _, erroneous=true)) => appl(p, [])[@\loc=t@\loc] } @@ -115,11 +129,41 @@ parse forests } @pitfalls{ * statistics do not tell the whole truth about sub-trees. Filtering based on these numbers -must be seen as a heuristic that sometimes pays-off, but sometimes hides crucial information. +must be seen as a heuristic that sometimes pays-off, and often hides crucial information. } Tree addErrorStats(Tree x) = bottom-up visit(x) { - case t:appl(skipped(_), args) => t[skipped = size(args)][erroneous = true] - case t:appl(error(_,_,_), args) => t[skipped = (0 | it + a.skipped | a <- args)][erroneous = true] - case t:appl(prod(_,_,_), args) => t[skipped = (0 | it + a.skipped | a <- args)][erroneous = (false | it || a.erroneous | a <- args)] - case t:amb(alts) => t[skipped = (0 | min([it, a.skipped]) | a <- alts)][erroneous = (false | it && a.erroneous | a <- alts)] -}; \ No newline at end of file + case Tree t => addStats(t) +}; + +@synopsis{Reusable utility for re-computing error statistics per Tree node.} +private Tree addStats(t:appl(prod(_,_,_), args)) = t[skipped = (0 | it + a.skipped | a <- args)][erroneous = (false | it || a.erroneous | a <- args)]; +private Tree addStats(t:appl(skipped(_), args)) = t[skipped = size(args)][erroneous = true]; +private Tree addStats(t:appl(error(_,_,_), args))= t[skipped = (0 | it + a.skipped | a <- args)][erroneous = true]; +private Tree addStats(t:amb(alts)) = t[skipped = (0 | min([it, a.skipped]) | a <- alts)][erroneous = (false | it && a.erroneous | a <- alts)]; +default private Tree addStats(Tree t) = t; + +@synopsis{Disambiguates error ambiguity clusters by selecting the alternatives with the shortest amount of skipped characters} +@benefits{ +* this is an aggressive filter that can greatly reduce the complexity of dealing with recovered parse trees. +* chances are that after this filter all ambiguity has been removed, making downstream processing easier. +} +@pitfalls{ +* the trees with the shortest skips are not always the most relevant trees to consider for repair or recovery. +} +Tree selectShortestSkips(Tree x) = visit(addErrorStats(x)) { + case amb(alts) => amb({ a | a <- alts, a.skipped == minimum}) + when int minimum := min([a.skipped | a <- alts]) +} + +@synopsis{Disambiguates error ambiguity clusters by selecting the alternatives with the largest amount of skipped characters} +@benefits{ +* this is an aggressive filter that can greatly reduce the complexity of dealing with recovered parse trees. +* chances are that after this filter all ambiguity has been removed, making downstream processing easier. +} +@pitfalls{ +* the trees with the longest skips are not always the most relevant trees to consider for repair or recovery. +} +Tree selectLongestSkips(Tree x) = visit(addErrorStats(x)) { + case amb(alts) => amb({ a | a <- alts, a.skipped == maximum}) + when int maximum := max([a.skipped | a <- alts]) +} \ No newline at end of file