Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tokenization testing #4

Merged
merged 22 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ jobs:
java-version: 11
distribution: 'temurin'
cache: 'maven'
- uses: actions/setup-node@v4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add which version of node we need. and enable caching to improve CI times.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

- name: install-tokenizer
working-directory: rascal-textmate-core
run: npm install vscode-tmgrammar-test
- name: run-tests
working-directory: rascal-textmate-core
run: mvn test -B -Drascal.compile.skip -Drascal.tutor.skip
Expand Down
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,18 @@ TextMate grammars, this project applies partial conversion. Alternatively, a
previous [project](https://github.com/TarVK/syntax-highlighter) by
[@TarVK](https://github.com/TarVK) applies total conversion.

The [walkthrough](src/main/rascal/lang/textmate/conversiontests/Walkthrough.rsc)
## Documentation

The [walkthrough](rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Walkthrough.rsc)
explains the main ideas behind the conversion algorithm in this project.

## Tests

To test tokenization (as part of the conversion
[tests](rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests)),
the [`vscode-tmgrammar-test`](https://github.com/PanAeon/vscode-tmgrammar-test)
tool is used. Install it locally in directory `rascal-textmate-core` as follows:

```
npm install vscode-tmgrammar-test
```
7 changes: 6 additions & 1 deletion rascal-textmate-core/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
target
target

# Only used to run tests:
node_modules
package-lock.json
package.json
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally package.json is not in the .gitignore

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,9 @@ default RegExp toRegExp(Grammar _, Condition c) {
Converts a character range to a regular expression.
}

RegExp toRegExp(Grammar _, range(begin, end))
RegExp toRegExp(Grammar _, range(char, char))
= regExp("<encode(char)>", []);
default RegExp toRegExp(Grammar _, range(begin, end))
sungshik marked this conversation as resolved.
Show resolved Hide resolved
= regExp("[<encode(begin)>-<encode(end)>]", []);

@synopsis{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,24 @@
module lang::textmate::ConversionTests

import Grammar;
import IO;
import List;
import Map;
import ParseTree;
import String;
import util::ShellExec;
import util::SystemAPI;

import lang::textmate::Conversion;
import lang::textmate::Grammar;

bool doAnalyzeTest(RscGrammar rsc, list[ConversionUnit] expect) {
bool doAnalyzeTest(RscGrammar rsc, list[ConversionUnit] expect, bool printActual = false) {
list[ConversionUnit] actual = analyze(rsc);

for (printActual, u <- actual) {
println("unit(rsc, <u.prod>),");
}

for (u <- actual) {
assert u in expect : "Actual but not expected: <u.prod>";
}
Expand All @@ -32,11 +40,13 @@ bool doAnalyzeTest(RscGrammar rsc, list[ConversionUnit] expect) {
return true;
}

bool doTransformTest(list[ConversionUnit] units, RepositoryStats expect) {
TmGrammar tm = transform(units);
bool doTransformTest(list[ConversionUnit] units, RepositoryStats expect, str name = "") {
TmGrammar tm = transform(units)[scopeName = "<name>"];
Repository repo = tm.repository;
list[TmRule] pats = tm.patterns;

// Test structural properties of the TextMate grammar

RepositoryStats actual = <
(0 | it + 1 | s <- repo, repo[s] is match),
(0 | it + 1 | s <- repo, repo[s] is beginEnd),
Expand All @@ -59,10 +69,38 @@ bool doTransformTest(list[ConversionUnit] units, RepositoryStats expect) {
assert size(pats) == size(repo) : "Actual patterns list size: <size(pats)>. Expected: <size(repo)>.";
assert (true | it && r is include | r <- pats) : "Patterns list contains pattern(s) of unexpected kind";
assert (true | it && s in repo | r <- pats, include(/#<s:.*>$/) := r) : "Patterns list contains pattern(s) outside repository";

// Test behavioral properties of the TextMate grammar

loc lProject = |project://rascal-textmate-core|;
loc lGrammar = lProject + "/target/generated-test-grammars/<name>.tmLanguage.json";
toJSON(tm, l = resolveLocation(lGrammar));

loc lTest = lProject + "/src/main/rascal/lang/textmate/conversiontests/<name>.test";
loc lTester = lProject + "/node_modules/vscode-tmgrammar-test";
if (!exists(lTest)) {
println("[LOG] No tokenization tests available for `<name>` (`<resolveLocation(lTest).path>` does not exist)");
} elseif (!exists(lTester)) {
println("[LOG] No tokenizer available (`<resolveLocation(lTester).path>` does not exist)");
} else {
bool windows = startsWith(getSystemProperty("os.name"), "Windows");
loc lExec = lProject + "/src/test/sh/lang/textmate/conversion-tests.<windows ? "bat" : "sh">";

if (<output, exitCode> := execWithCode(lExec, args = [name]) && exitCode != 0) {
println(output);
assert false : "Actual tokenization does not match expected tokenization (see output above for details)";
}
}

return true;
}

alias RepositoryStats = tuple[int match, int beginEnd, int include];
alias RepositoryStats = tuple[
int match,
int beginEnd,
int include];

int sum(RepositoryStats stats)
= stats.match + stats.beginEnd + stats.include;
= stats.match
+ stats.beginEnd
+ stats.include;
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ list[ConversionUnit] units = [
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
test bool transformTest() = doTransformTest(units, <3, 0, 0>);
test bool transformTest() = doTransformTest(units, <3, 0, 0>, name = "Emoji");
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# SYNTAX TEST "Emoji"

🌊
# <-- constant.language

foo 🌊 bar
# ^ -constant.language
# ^^ constant.language
# ^ -constant.language

🙂🙁
# <---- constant.language

foo 🙂 bar 🙁
# ^^ constant.language
# ^^^^^ -constant.language
# ^^ constant.language
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ list[ConversionUnit] units = [
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
test bool transformTest() = doTransformTest(units, <3, 0, 0>);
test bool transformTest() = doTransformTest(units, <3, 0, 0>, name = "Pico");
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# SYNTAX TEST "Pico"

%% foo bar
# <-- Comment
# ^^^^^^^ Comment

%% do
# ^^ Comment

do
# <-- keyword.control
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,4 @@ list[ConversionUnit] units = [
];

test bool analyzeTest() = doAnalyzeTest(rsc, units);
test bool transformTest() = doTransformTest(units, <8, 0, 0>);
test bool transformTest() = doTransformTest(units, <8, 0, 0>, name = "PicoWithCategories");
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# SYNTAX TEST "PicoWithCategories"

%% foo bar
# <-- comment.line
# ^^^^^^^ comment.line

%% do
# ^^ comment.line

do
# <-- variable.other
# Note: Keywords can be identifiers in Pico

123
# <--- constant.numeric

foo123
# <------- variable.other

foo_123
# <--- variable.other
# ^ -variable.other
# ^ -constant.numeric
# ^^^ constant.numeric
# Note: Underscores cannot occur in identifiers in Pico

#begin
## <----- keyword.control
## TODO: `begin` outside `begin`-`end` can never be an identifier in Pico

#"foo" "bar"
## <----- string.quoted.double
## ^ -string.quoted.double
## ^^^^^ string.quoted.double
## TODO: Support multiline productions

#"foo\"bar"
## <---- string.quoted.double
## ^^ string.quoted.double
## ^^^^ string.quoted.double
## TODO: Support multiline productions with escaping
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
npx vscode-tmgrammar-test ^
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like the bat is the exact same.

how about we move it to package.json in the run block. and in rascal we just use npm run conversion-tests ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed:

  • Call npx directly from Rascal instead of via the scripts
  • Remove the scripts

--grammar target/generated-test-grammars/%1.tmLanguage.json ^
"./src/main/rascal/lang/textmate/conversiontests/%1.test"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
npx vscode-tmgrammar-test \
--grammar target/generated-test-grammars/$1.tmLanguage.json \
"./src/main/rascal/lang/textmate/conversiontests/$1.test"
8 changes: 4 additions & 4 deletions vscode-extension/syntaxes/rascal.tmLanguage.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes in this file are due to the "extra" change in this PR

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, it's going to be a blast of a merge conflict with my other small PR. which might be a reason to pull those changes out of this PR and apply them on main after merging that?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merged main into this branch, then resolved all conflicts in favor of main, then regenerated the file

"repository": {
"prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})": {
"match": "((?:\\x{2F}\\x{2F})(?:(?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{10FFFF}])*?(?!(?:[\\x{9}-\\x{9}]|[\\x{D}-\\x{D}]|[\\x{20}-\\x{20}]|[\\x{A0}-\\x{A0}]|[\\x{1680}-\\x{1680}]|[\\x{2000}-\\x{200A}]|[\\x{202F}-\\x{202F}]|[\\x{205F}-\\x{205F}]|[\\x{3000}-\\x{3000}]))(?:$)))",
"match": "((?:\\x{2F}\\x{2F})(?:(?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{10FFFF}])*?(?!(?:\\x{9}|\\x{D}|\\x{20}|\\x{A0}|\\x{1680}|[\\x{2000}-\\x{200A}]|\\x{202F}|\\x{205F}|\\x{3000}))(?:$)))",
"name": "prod(lex(\"Comment\"),[lit(\"//\"),conditional(\\iter-star(\\char-class([range(1,9),range(11,1114111)])),{\\not-follow(\\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\\end-of-line()})],{tag(\"category\"(\"comment\"))})",
"captures": {
"1": {
Expand Down Expand Up @@ -46,7 +46,7 @@
}
},
{
"match": "((?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{3B}]|[\\x{3D}-\\x{3D}]|[\\x{3F}-\\x{5B}]|[\\x{5D}-\\x{5F}]|[a-\\x{10FFFF}])+?(?!(?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{3B}]|[\\x{3D}-\\x{3D}]|[\\x{3F}-\\x{5B}]|[\\x{5D}-\\x{5F}]|[a-\\x{10FFFF}])))",
"match": "((?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{3B}]|\\x{3D}|[\\x{3F}-\\x{5B}]|[\\x{5D}-\\x{5F}]|[a-\\x{10FFFF}])+?(?!(?:[\\x{1}-\\x{9}]|[\\x{B}-\\x{3B}]|\\x{3D}|[\\x{3F}-\\x{5B}]|[\\x{5D}-\\x{5F}]|[a-\\x{10FFFF}])))",
"name": "prod(label(\"text\",lex(\"ConcretePart\")),[conditional(iter(\\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\\not-follow(\\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{tag(\"category\"(\"string\"))})",
"captures": {
"1": {
Expand Down Expand Up @@ -80,7 +80,7 @@
"end": "(?:\\x{5D})",
"patterns": [
{
"match": "((?:\\x{5C})(?:[\\x{20}-\\x{20}]|[\\x{22}-\\x{22}]|[\\x{27}-\\x{27}]|[\\x{2D}-\\x{2D}]|[\\x{3C}-\\x{3C}]|[\\x{3E}-\\x{3E}]|[\\x{5B}-\\x{5D}]|[b-b]|[f-f]|[n-n]|[r-r]|[t-t]))",
"match": "((?:\\x{5C})(?:\\x{20}|\\x{22}|\\x{27}|\\x{2D}|\\x{3C}|\\x{3E}|[\\x{5B}-\\x{5D}]|b|f|n|r|t))",
"name": "prod(lex(\"Char\"),[lit(\"\\\\\"),\\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{tag(\"category\"(\"constant\"))})",
"captures": {
"1": {
Expand All @@ -89,7 +89,7 @@
}
},
{
"match": "((?:(?:\\x{5C})[U-U](?:(?:\\b10\\b)|(?:(?:\\b0\\b)(?:[0-9]|[A-F]|[a-f])))(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\x{5C})[u-u](?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\x{5C})[a-a][0-7](?:[0-9]|[A-F]|[a-f])))",
"match": "((?:(?:\\x{5C})U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)(?:[0-9]|[A-F]|[a-f])))(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\x{5C})u(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f])(?:[0-9]|[A-F]|[a-f]))|(?:(?:\\x{5C})a[0-7](?:[0-9]|[A-F]|[a-f])))",
"name": "prod(lex(\"Char\"),[lex(\"UnicodeEscape\")],{tag(\"category\"(\"constant\"))})",
"captures": {
"1": {
Expand Down