From 133da04a07b94bc8738dbcaecaf79c6454670534 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 5 Jan 2024 12:05:08 +0100 Subject: [PATCH] add tokens to parser --- Cargo.lock | 1 + crates/concrete_parser/Cargo.toml | 1 + crates/concrete_parser/src/grammar.lalrpop | 50 ++++++++++++- crates/concrete_parser/src/tokens.rs | 84 +++++++++++++++++++++- 4 files changed, 133 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b14312d..95c98c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,6 +88,7 @@ version = "0.1.0" name = "concrete_parser" version = "0.1.0" dependencies = [ + "concrete_ast", "lalrpop", "lalrpop-util", "logos", diff --git a/crates/concrete_parser/Cargo.toml b/crates/concrete_parser/Cargo.toml index 2f232e0..93abf1a 100644 --- a/crates/concrete_parser/Cargo.toml +++ b/crates/concrete_parser/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" lalrpop-util = { version = "0.20.0", features = ["unicode"] } logos = "0.13.0" tracing.workspace = true +concrete_ast = { path = "../concrete_ast"} [build-dependencies] lalrpop = "0.20.0" diff --git a/crates/concrete_parser/src/grammar.lalrpop b/crates/concrete_parser/src/grammar.lalrpop index 9d4b0b0..7647c2d 100644 --- a/crates/concrete_parser/src/grammar.lalrpop +++ b/crates/concrete_parser/src/grammar.lalrpop @@ -1,4 +1,5 @@ use crate::tokens::Token; +use concrete_ast as ast; grammar; @@ -7,10 +8,55 @@ extern { type Error = LexicalError; enum Token { - "ident" => Token::Identifier(), + // keywords + "let" => Token::KeywordLet, + "const" => Token::KeywordConst, + "fn" => Token::KeywordFn, + "return" => Token::KeywordReturn, + "struct" => Token::KeywordStruct, + "if" => Token::KeywordIf, + "else" => Token::KeywordElse, + "while" => Token::KeywordWhile, + "for" => Token::KeywordFor, + "match" => Token::KeywordMatch, + "mod" => Token::KeywordMod, + "pub" => Token::KeywordPub, + + // literals + "identifier" => Token::Identifier(), + "integer" => Token::String(), + "boolean" => Token::Boolean(), + + // Other + + "(" => Token::LeftParen, + ")" => Token::RightParen, + "{" => Token::LeftBracket, + "}" => Token::RightBracket, + "[" => Token::LeftSquareBracket, + "]" => Token::RightSquareBracket, + "=" => Token::Assign, + ";" => Token::Semicolon, + ":" => Token::Colon, + "->" => Token::Arrow, + "," => Token::Coma, + "<" => Token::LessThanSign, + ">" => Token::MoreThanSign, + + // operators + "+" => Token::OperatorAdd, + "-" => Token::OperatorSub, + "*" => Token::OperatorMul, + "/" => Token::OperatorDiv, + "%" => Token::OperatorRem, + "&&" => Token::OperatorAnd, + "||" => Token::OperatorOr, + "==" => Token::OperatorEq, + "!=" => Token::OperatorNe, + "!" => Token::OperatorNot, } } pub Term: () = { - "ident" => (), + "identifier" => (), } diff --git a/crates/concrete_parser/src/tokens.rs b/crates/concrete_parser/src/tokens.rs index b787487..f8bfcc8 100644 --- a/crates/concrete_parser/src/tokens.rs +++ b/crates/concrete_parser/src/tokens.rs @@ -23,6 +23,88 @@ impl From for LexingError { #[derive(Logos, Debug, PartialEq, Clone)] #[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"//.*\n?", skip r"/\*(?:[^*]|\*[^/])*\*/")] pub enum Token { - #[regex(r"[a-zA-Z][a-zA-Z\d]*", |lex| lex.slice().parse())] + #[token("let")] + KeywordLet, + #[token("const")] + KeywordConst, + #[token("fn")] + KeywordFn, + #[token("return")] + KeywordReturn, + #[token("struct")] + KeywordStruct, + #[token("if")] + KeywordIf, + #[token("else")] + KeywordElse, + #[token("while")] + KeywordWhile, + #[token("for")] + KeywordFor, + #[token("match")] + KeywordMatch, + #[token("mod")] + KeywordMod, + #[token("pub")] + KeywordPub, + + // Modern way of allowing identifiers, read: https://unicode.org/reports/tr31/ + #[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().to_string())] Identifier(String), + + // Literals + #[regex(r"\d+", |lex| lex.slice().to_string())] + Integer(String), + #[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())] + String(String), + #[regex(r"(true|false)", |lex| lex.slice().parse::().unwrap())] + Boolean(bool), + + #[token("(")] + LeftParen, + #[token(")")] + RightParen, + #[token("{")] + LeftBracket, + #[token("}")] + RightBracket, + #[token("[")] + LeftSquareBracket, + #[token("]")] + RightSquareBracket, + #[token("=")] + Assign, + #[token(";")] + Semicolon, + #[token(":")] + Colon, + #[token("->")] + Arrow, + #[token(",")] + Coma, + #[token("<")] + LessThanSign, + #[token(">")] + MoreThanSign, + + #[token("+")] + OperatorAdd, + #[token("-")] + OperatorSub, + #[token("*")] + OperatorMul, + #[token("/")] + OperatorDiv, + #[token("%")] + OperatorRem, + #[token("&&")] + OperatorAnd, + #[token("||")] + OperatorOr, + #[token("==")] + OperatorEq, + #[token("!=")] + OperatorNe, + #[token("!")] + OperatorNot, }