Skip to content

Commit

Permalink
fixes and doc additions
Browse files Browse the repository at this point in the history
  • Loading branch information
jurgenvinju committed Nov 12, 2024
1 parent 6606117 commit 7a4d959
Showing 1 changed file with 59 additions and 15 deletions.
74 changes: 59 additions & 15 deletions src/org/rascalmpl/library/util/ErrorRecovery.rsc
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,23 @@ If you want the text of the whole error tree, you can just use string interpolat
str getErrorText(appl(error(_, _, _), [*_, appl(skipped(_), chars)])) = stringChars([c | char(c) <- chars]);

@javaClass{org.rascalmpl.library.util.ErrorRecovery}
@synopsis{Error recovery often produces ambiguous trees where errors can be recovered in multiple ways.
This filter removes error trees until no ambiguities caused by error recovery are left.
Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in which case an error is thrown.
@synopsis{This filter removes error trees until no ambiguities caused by error recovery are left.}
@description{
Error recovery often produces ambiguous trees where errors can be recovered in multiple ways. Ambiguity
clusters (`amb`) represent the choices between all the valid prefixes. This filter removes choices until
the last one is left.

Note that regular ambiguous trees remain in the parse forest unless `allowAmbiguity` is set to false in
which case an error is thrown.
}
@benefits{
* after this algorithm only one error is left at every input position with an error. Downstream
functionality does not have to deal with ambiguity anymore, making the code robust.
}
@pitfalls{
* this algorithm removes valid prefixes based on heuristics like "shortest error", which may
remove interesting prefixes for downstream processing. In particular the accuracy of error repair and auto-complete
may be damaged by this function. So it is best to use it for error recovery, and not for error repair.
}
java Tree disambiguateErrors(Tree t, bool allowAmbiguity=true);

Expand Down Expand Up @@ -85,17 +99,17 @@ then ((filterOptionalErrorTrees)) can be.
@pitfalls{
* this algorithm may cut off entire branches which are otherwise fine to extract more information from.
}
Tree filterOptionalIndirectErrorTrees(Tree x) = visit(addErrorStats(x)) {
Tree filterOptionalIndirectErrorTrees(Tree x) = bottom-up visit(addErrorStats(x)) {
case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_])),[*pre, _sep, appl(_,_, erroneous=true), *post])
=> appl(p, [*pre, *post])[@\loc=t@\loc]
=> addStats(appl(p, [*pre, *post])[@\loc=t@\loc])
case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_])),[appl(_,_, erroneous=true), _sep, *post])
=> appl(p, post)[@\loc=t@\loc]
=> addStats(appl(p, post)[@\loc=t@\loc])
case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_,_,_])),[*pre, _sep1, _sep2, _sep3, appl(_,_, erroneous=true), *post])
=> appl(p, [*pre, *post])[@\loc=t@\loc]
=> addStats(appl(p, [*pre, *post])[@\loc=t@\loc])
case t:appl(p:regular(/iter-sep|iter-star-sep/(_,[_,_,_])),[appl(_,_, erroneous=true), _sep1, _sep2, _sep3, *post])
=> appl(p, post)[@\loc=t@\loc]
=> addStats(appl(p, post)[@\loc=t@\loc])
case t:appl(p:regular(/iter|iter-star/(_)),[*pre, appl(_,_, erroneous=true), *post])
=> appl(p, [*pre, *post])[@\loc=t@\loc]
=> addStats(appl(p, [*pre, *post])[@\loc=t@\loc])
case t:appl(p:regular(opt(_)), appl(_, _, erroneous=true))
=> appl(p, [])[@\loc=t@\loc]
}
Expand All @@ -115,11 +129,41 @@ parse forests
}
@pitfalls{
* statistics do not tell the whole truth about sub-trees. Filtering based on these numbers
must be seen as a heuristic that sometimes pays-off, but sometimes hides crucial information.
must be seen as a heuristic that sometimes pays-off, and often hides crucial information.
}
Tree addErrorStats(Tree x) = bottom-up visit(x) {
case t:appl(skipped(_), args) => t[skipped = size(args)][erroneous = true]
case t:appl(error(_,_,_), args) => t[skipped = (0 | it + a.skipped | a <- args)][erroneous = true]
case t:appl(prod(_,_,_), args) => t[skipped = (0 | it + a.skipped | a <- args)][erroneous = (false | it || a.erroneous | a <- args)]
case t:amb(alts) => t[skipped = (0 | min([it, a.skipped]) | a <- alts)][erroneous = (false | it && a.erroneous | a <- alts)]
};
case Tree t => addStats(t)
};

@synopsis{Reusable utility for re-computing error statistics per Tree node.}
private Tree addStats(t:appl(prod(_,_,_), args)) = t[skipped = (0 | it + a.skipped | a <- args)][erroneous = (false | it || a.erroneous | a <- args)];
private Tree addStats(t:appl(skipped(_), args)) = t[skipped = size(args)][erroneous = true];
private Tree addStats(t:appl(error(_,_,_), args))= t[skipped = (0 | it + a.skipped | a <- args)][erroneous = true];
private Tree addStats(t:amb(alts)) = t[skipped = (0 | min([it, a.skipped]) | a <- alts)][erroneous = (false | it && a.erroneous | a <- alts)];
default private Tree addStats(Tree t) = t;

@synopsis{Disambiguates error ambiguity clusters by selecting the alternatives with the shortest amount of skipped characters}
@benefits{
* this is an aggressive filter that can greatly reduce the complexity of dealing with recovered parse trees.
* chances are that after this filter all ambiguity has been removed, making downstream processing easier.
}
@pitfalls{
* the trees with the shortest skips are not always the most relevant trees to consider for repair or recovery.
}
Tree selectShortestSkips(Tree x) = visit(addErrorStats(x)) {
case amb(alts) => amb({ a | a <- alts, a.skipped == minimum})
when int minimum := min([a.skipped | a <- alts])
}

@synopsis{Disambiguates error ambiguity clusters by selecting the alternatives with the largest amount of skipped characters}
@benefits{
* this is an aggressive filter that can greatly reduce the complexity of dealing with recovered parse trees.
* chances are that after this filter all ambiguity has been removed, making downstream processing easier.
}
@pitfalls{
* the trees with the longest skips are not always the most relevant trees to consider for repair or recovery.
}
Tree selectLongestSkips(Tree x) = visit(addErrorStats(x)) {
case amb(alts) => amb({ a | a <- alts, a.skipped == maximum})
when int maximum := max([a.skipped | a <- alts])
}

0 comments on commit 7a4d959

Please sign in to comment.