SWAT-engineering · sungshik · Oct 22, 2024 · Oct 4, 2024 · Oct 4, 2024 · Oct 4, 2024
diff --git a/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/oniguruma/Conversion.rsc
@@ -60,15 +60,15 @@ RegExp toRegExp(Grammar g, list[Symbol] symbols, set[Attr] attributes) {
 RegExp toRegExp(Grammar g, \label(_, symbol))
     = toRegExp(g, symbol);
 RegExp toRegExp(Grammar g, \parameter(_, _)) {
-    throw "Presumably unreachable..."; } // Covered by `lookup` (which substitutes actuals for formals)
+    throw "Presumably unreachable..."; } // Covered by `prodsOf` (which substitutes actuals for formals)
 
 // `ParseTree`: Start
 RegExp toRegExp(Grammar g, \start(symbol))
     = toRegExp(g, symbol);
 
 // `ParseTree`: Non-terminals
 RegExp toRegExp(Grammar g, Symbol s)
-    = infix("|", [toRegExp(g, p) | p <- lookup(g, s)]) when isNonTerminalType(s);
+    = infix("|", [toRegExp(g, p) | p <- prodsOf(g, s)]) when isNonTerminalType(s);
 
 // `ParseTree`: Terminals
 RegExp toRegExp(Grammar _, \lit(string))
@@ -103,7 +103,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
     prefixConditions = [c | c <- conditions, isPrefixCondition(c)];
     suffixConditions = [c | c <- conditions, isSuffixCondition(c)];
     deleteConditions = [c | c <- conditions, isDeleteCondition(c)];
-    
+
     // Convert except conditions (depends on previous conversion)
     if (_ <- exceptConditions) {
         if (/\choice(symbol, alternatives) := g) {
@@ -112,7 +112,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
                 = \label(l, _) := def
                 ? \except(l) notin exceptConditions
                 : true;
-            
+
             re = infix("|", toRegExps(g, {a | a <- alternatives, keep(a)}));
         }
     }
@@ -130,7 +130,7 @@ RegExp toRegExp(Grammar g, \conditional(symbol, conditions)) {
     // Convert delete conditions (depends on previous conversions)
     if (_ <- deleteConditions) {
         RegExp delete = infix("|", [toRegExp(g, s) | \delete(s) <- deleteConditions]);
-            
+
         // TODO: Explain this complicated conversion...
         str string = "(?=(?\<head\><re.string>)(?\<tail\>.*)$)(?!(?:<delete.string>)\\k\<tail\>$)\\k\<head\>";
         list[str] categories = ["", *re.categories, "", *delete.categories];
@@ -196,7 +196,7 @@ str encode(int char) = preEncoded[char] ? "\\x{<toHex(char)>}";
 private set[int] charRange(str from, str to) = {*[charAt(from, 0)..charAt(to, 0) + 1]};
 
 private str toHex(int i)
-    = i < 16 
+    = i < 16
     ? hex[i]
     : toHex(i / 16) + toHex(i % 16);
 

diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc
@@ -37,18 +37,20 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) {
     Checks if symbol `s` is recursive in grammar `g`
 }
 
-bool isRecursive(Grammar g, Symbol s) {
-    set[Symbol] getChildren(Symbol s) 
-        = {s | p <- lookup(g, s), /Symbol s := p.symbols};
+bool isRecursive(Grammar g, Symbol s, set[Symbol] checking = {})
+    = s in checking
+    ? true
+    : any(p <- prodsOf(g, delabel(s)),
+          /Symbol child := p.symbols,
+          isRecursive(g, child, checking = checking + s));
 
-    bool check(set[Symbol] checking, Symbol s)
-        = s in checking
-        ? true
-        : any(child <- getChildren(s), check(checking + s, child));
-
-    return check({}, s);
+@synopsis{
+    Checks if production `p` is recursive in grammar `g`
 }
 
+bool isRecursive(Grammar g, Production p)
+    = any(/Symbol s := p.symbols, isRecursive(g, s));
+
 @synopsis{
     Representation of a pointer to a symbol in (the list of symbols of) a
     production. This is useful to distinguish between different occurrences of
@@ -70,7 +72,7 @@ alias Pointer = tuple[Production p, int index];
 
     ```
     lexical X  = Y;
-    lexical Y  = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">"; 
+    lexical Y  = alt1: "[" "[" "[" Z1 "]" "]" "]" | alt2: "<" Z2 ">";
     lexical Z1 = "foo" "bar";
     lexical Z2 = "baz";
     ```
@@ -80,7 +82,7 @@ alias Pointer = tuple[Production p, int index];
       - `<X,0>`
       - `<Y.alt1,3>`
       - `<Z1,1>`
-    
+
     The list of pointers to `"qux"` is just empty.
 }
 
@@ -92,7 +94,7 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward())
             if (ith == needle) {
                 return [<haystack, i>];
             }
-            for (isNonTerminalType(ith), child <- lookup(g, ith)) {
+            for (isNonTerminalType(ith), child <- prodsOf(g, ith)) {
                 if (list[Pointer] l: [_, *_] := doFind(doing + haystack, child, s)) {
                     return [<haystack, i>] + l;
                 }
@@ -106,19 +108,26 @@ list[Pointer] find(Grammar g, Production p, Symbol s, Direction dir = forward())
 }
 
 @synopsis{
-    Lookups a list of productions for symbol `s` in grammar `g`, replacing
+    Gets the list of productions that contain symbol `s` in grammar `g`
+}
+
+set[Production] prodsWith(Grammar g, Symbol s)
+    = {parent | /parent: prod(_, /Symbol _: s, _) := g};
+
+@synopsis{
+    Gets the list of productions of symbol `s` in grammar `g`, replacing
     formal parameters with actual parameters when needed
 }
 
-list[Production] lookup(Grammar g, s: \parameterized-sort(name, actual))
+list[Production] prodsOf(Grammar g, s: \parameterized-sort(name, actual))
     = [subst(p, formal, actual) | /p: prod(\parameterized-sort(name, formal), _, _) := g.rules[s] ? []]
     + [subst(p, formal, actual) | /p: prod(label(_, \parameterized-sort(name, formal)), _, _) := g.rules[s] ? []];
 
-list[Production] lookup(Grammar g, s: \parameterized-lex(name, actual))
+list[Production] prodsOf(Grammar g, s: \parameterized-lex(name, actual))
     = [subst(p, formal, actual) | /p: prod(\parameterized-lex(name, formal), _, _) := g.rules[s] ? []]
     + [subst(p, formal, actual) | /p: prod(label(_, \parameterized-lex(name, formal)), _, _) := g.rules[s] ? []];
 
-default list[Production] lookup(Grammar g, Symbol s)
+default list[Production] prodsOf(Grammar g, Symbol s)
     = [p | /p: prod(s, _, _) := g.rules[s] ? []]
     + [p | /p: prod(label(_, s), _, _) := g.rules[s] ? []];
 
@@ -130,7 +139,7 @@ default list[Production] lookup(Grammar g, Symbol s)
 &T subst(&T t, list[Symbol] from, list[Symbol] to)
     = subst(t, toMapUnique(zip2(from, to)))
     when size(from) == size(to);
-    
+
 private &T subst(&T t, map[Symbol, Symbol] m)
     = visit (t) { case Symbol s => m[s] when s in m };
 

diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Categories.rsc
@@ -0,0 +1,47 @@
+module lang::rascal::grammar::analyze::Categories
+
+import Grammar;
+import ParseTree;
+
+import lang::rascal::grammar::Util;
+
+@synopsis{
+    Special value to indicate that a production has no category
+}
+
+public str NO_CATEGORY = "";
+
+@synopsis{
+    Gets a set of categories such that, for each category, there exists a string
+    with that category produced by production `p`, as part of a string produced
+    by a start production of grammar `g`
+}
+
+set[str] getCategories(Grammar g, Production p)
+    = getCategoriesByProduction(g)[p];
+
+@memo
+private map[Production, set[str]] getCategoriesByProduction(Grammar g) {
+    map[Production, set[str]] ret = (p: {} | /p: prod(_, _, _) := g);
+
+    void doGet(Production p, set[str] parentCategories) {
+        set[str] categories = {c | /\tag("category"(str c)) := p};
+
+        set[str] old = ret[p];
+        set[str] new = _ <- categories ? categories : old + parentCategories;
+        ret[p] = new;
+
+        // If the new categories of `p` are different from the old ones, then
+        // propagate these changes to the children of `p`
+        for (old != new, /Symbol s := p.symbols, child <- prodsOf(g, delabel(s))) {
+            doGet(child, new);
+        }
+    }
+
+    // Propagate categories from the roots of the grammar
+    for (root: prod(\start(_), _, _) <- ret) {
+        doGet(root, {NO_CATEGORY});
+    }
+
+    return ret;
+}
diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc
@@ -49,7 +49,7 @@ DelimiterPair getInnerDelimiterPair(Grammar g, Symbol s, bool getOnlyFirst = fal
     ```
     lexical X  = Y;
     lexical Y  = Y1 | Y2;
-    lexical Y1 = "[" Z "]"; 
+    lexical Y1 = "[" Z "]";
     lexical Y2 = "[" Z ")" [a-z];
     lexical Z  = [a-z];
     ```
@@ -83,7 +83,7 @@ private map[Symbol, Maybe[Symbol]] getInnerDelimiterBySymbol(Grammar g, Directio
 @memo
 private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g, Direction direction, bool getOnlyFirst = false) {
     map[Production, Maybe[Symbol]] ret = (p: nothing() | /p: prod(_, _, _) := g);
-        
+
     solve (ret) {
         for (p <- ret, ret[p] == nothing()) {
             for (s <- reorder(p.symbols, direction)) {
@@ -108,7 +108,7 @@ private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g,
 }
 
 private set[Production] getChildren(Grammar g, Symbol s)
-    = {*lookup(g, s)};
+    = {*prodsOf(g, s)};
 
 @synopsis{
     Gets the unique rightmost delimiter (`begin`) and the unique leftmost
@@ -122,7 +122,7 @@ private set[Production] getChildren(Grammar g, Symbol s)
     ```
     lexical X  = Y;
     lexical Y  = Y1 | Y2;
-    lexical Y1 = "[" Z "]"; 
+    lexical Y1 = "[" Z "]";
     lexical Y2 = "[" Z ")" [a-z];
     lexical Z  = [a-z];
     ```
@@ -166,7 +166,7 @@ private map[Symbol, Maybe[Symbol]] getOuterDelimiterBySymbol(Grammar g, Directio
             ret[s] = unique(delimiters);
         }
     }
-    
+
     return ret;
 }
 

diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Newlines.rsc
@@ -55,7 +55,7 @@ private map[Production, Maybe[set[Segment]]] getSegmentsByProduction(Grammar g)
 }
 
 private Maybe[set[Segment]] getSegmentsWithEnvironment(
-        Grammar g, list[Symbol] symbols, 
+        Grammar g, list[Symbol] symbols,
         map[Production, Maybe[set[Segment]]] env) {
 
     // General idea: Recursively traverse `symbols` from left to right, while
@@ -73,9 +73,9 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
         set[Symbol] nested = {s | /Symbol s := head};
 
         Maybe[set[Segment]] finished = get(running, [], final = tail == []);
-        
+
         // If the head contains a non-terminal, then: (1) finish the running
-        // segment; (2) lookup the segments of the non-terminals in the
+        // segment; (2) look up the segments of the non-terminals in the
         // environment, if any; (3) compute the segments of the tail. Return the
         // union of 1-3.
         if (any(s <- nested, isNonTerminalType(s))) {
@@ -85,15 +85,15 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
             sets += finished;
 
             // (2)
-            sets += for (s <- nested, isNonTerminalType(s), p <- lookup(g, s)) {
+            sets += for (s <- nested, isNonTerminalType(s), p <- prodsOf(g, s)) {
 
                 bool isInitial(Segment seg)
                     = seg.initial && running.initial && running.symbols == [];
                 bool isFinal(Segment seg)
                     = seg.final && tail == [];
                 Segment update(Segment seg)
                     = seg[initial = isInitial(seg)][final = isFinal(seg)];
-                
+
                 append just(segs) := env[p] ? just({update(seg) | seg <- segs}) : nothing();
             }
 
@@ -103,21 +103,21 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
             // Return union
             return (sets[0] | union(it, \set) | \set <- sets[1..]);
         }
-        
+
         // If the head doesn't contain a non-terminal, but it has a newline,
         // then: (1) finish the running segment; (2) compute the segments of the
         // tail. Return the union of 1-2. Note: the head, as it has a newline,
         // is ignored and won't be part of any segment.
         else if (any(s <- nested, hasNewline(g, s))) {
             return union(finished, get(segment([]), tail));
         }
-        
+
         // If the head doesn't contain a non-terminal, and if it doesn't have a
         // newline, then add the head to the running segment and proceed with
         // the tail.
         else {
             Segment old = running;
-            Segment new = old[symbols = old.symbols + head]; 
+            Segment new = old[symbols = old.symbols + head];
             return get(new, tail);
         }
     }
@@ -130,7 +130,7 @@ private Maybe[set[Segment]] getSegmentsWithEnvironment(
 }
 
 bool hasNewline(Grammar g, Symbol s) {
-    return any(p <- lookup(g, delabel(s)), hasNewline(g, p));
+    return any(p <- prodsOf(g, delabel(s)), hasNewline(g, p));
 }
 
 @synopsis{
@@ -149,7 +149,7 @@ private map[Production, bool] hasNewlineByProduction(Grammar g) {
         for (p <- ret, !ret[p]) {
             set[Symbol] nonTerminals = {s | /Symbol s := p.symbols, isNonTerminalType(s)};
             ret[p] = ret[p] || any(/r: range(_, _) := p.symbols, hasNewline(r))
-                            || any(s <- nonTerminals, Production child <- lookup(g, s), ret[child]);
+                            || any(s <- nonTerminals, Production child <- prodsOf(g, s), ret[child]);
         }
     }
 
@@ -165,7 +165,7 @@ private map[Production, bool] hasNewlineByProduction(Grammar g) {
 
 bool hasNewline(str s)
     = LF in chars(s);
-    
+
 bool hasNewline(range(begin, end))
     = begin <= LF && LF <= end;
 

diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Symbols.rsc
@@ -56,7 +56,7 @@ private map[Symbol, Maybe[set[Symbol]]] firstBySymbol(Grammar g, bool(Symbol) pr
         for (s <- ret, nothing() == ret[s]) {
             if (predicate(s)) {
                 ret[s] = just({s});
-            } else if (list[Production] prods: [_, *_] := lookup(g, s)) {
+            } else if (list[Production] prods: [_, *_] := prodsOf(g, s)) {
                 ret[s] = (just({}) | union(it, firstOf(reorder(p.symbols, dir))) | p <- prods);
             } else {
                 ret[s] = just({\empty()});
@@ -84,7 +84,7 @@ set[Symbol] follow(Grammar g, Symbol s)
 @memo
 private map[Symbol, Maybe[set[Symbol]]] followBySymbol(Grammar g, bool(Symbol) predicate, Direction dir) {
     map[Symbol, Maybe[set[Symbol]]] ret = (delabel(s): nothing() | s <- g.rules); // Non-terminals
-    
+
     Maybe[set[Symbol]] followOf(Symbol parent, [])
         = ret[delabel(parent)];
     Maybe[set[Symbol]] followOf(Symbol parent, [h, *t])
@@ -142,6 +142,8 @@ private default Maybe[int] max(Maybe[int] _, Maybe[int] _) = nothing();
     Computes the length of a terminal symbol as a range
 }
 
+Range length(label(_, symbol)) = length(symbol);
+
 Range length(\lit(string))   = <size(string), just(size(string))>;
 Range length(\cilit(string)) = <size(string), just(size(string))>;
 Range length(\char-class(_)) = <1, just(1)>;