Skip to content

Commit

Permalink
add tokens to parser
Browse files Browse the repository at this point in the history
  • Loading branch information
edg-l committed Jan 5, 2024
1 parent e512062 commit 133da04
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 3 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/concrete_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ edition = "2021"
lalrpop-util = { version = "0.20.0", features = ["unicode"] }
logos = "0.13.0"
tracing.workspace = true
concrete_ast = { path = "../concrete_ast"}

[build-dependencies]
lalrpop = "0.20.0"
50 changes: 48 additions & 2 deletions crates/concrete_parser/src/grammar.lalrpop
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::tokens::Token;
use concrete_ast as ast;

grammar;

Expand All @@ -7,10 +8,55 @@ extern {
type Error = LexicalError;

enum Token {
"ident" => Token::Identifier(<String>),
// keywords
"let" => Token::KeywordLet,
"const" => Token::KeywordConst,
"fn" => Token::KeywordFn,
"return" => Token::KeywordReturn,
"struct" => Token::KeywordStruct,
"if" => Token::KeywordIf,
"else" => Token::KeywordElse,
"while" => Token::KeywordWhile,
"for" => Token::KeywordFor,
"match" => Token::KeywordMatch,
"mod" => Token::KeywordMod,
"pub" => Token::KeywordPub,

// literals
"identifier" => Token::Identifier(<String>),
"integer" => Token::String(<String>),
"boolean" => Token::Boolean(<bool>),

// Other

"(" => Token::LeftParen,
")" => Token::RightParen,
"{" => Token::LeftBracket,
"}" => Token::RightBracket,
"[" => Token::LeftSquareBracket,
"]" => Token::RightSquareBracket,
"=" => Token::Assign,
";" => Token::Semicolon,
":" => Token::Colon,
"->" => Token::Arrow,
"," => Token::Coma,
"<" => Token::LessThanSign,
">" => Token::MoreThanSign,

// operators
"+" => Token::OperatorAdd,
"-" => Token::OperatorSub,
"*" => Token::OperatorMul,
"/" => Token::OperatorDiv,
"%" => Token::OperatorRem,
"&&" => Token::OperatorAnd,
"||" => Token::OperatorOr,
"==" => Token::OperatorEq,
"!=" => Token::OperatorNe,
"!" => Token::OperatorNot,
}
}

pub Term: () = {
"ident" => (),
"identifier" => (),
}
84 changes: 83 additions & 1 deletion crates/concrete_parser/src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,88 @@ impl From<Infallible> for LexingError {
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"//.*\n?", skip r"/\*(?:[^*]|\*[^/])*\*/")]
pub enum Token {
#[regex(r"[a-zA-Z][a-zA-Z\d]*", |lex| lex.slice().parse())]
#[token("let")]
KeywordLet,
#[token("const")]
KeywordConst,
#[token("fn")]
KeywordFn,
#[token("return")]
KeywordReturn,
#[token("struct")]
KeywordStruct,
#[token("if")]
KeywordIf,
#[token("else")]
KeywordElse,
#[token("while")]
KeywordWhile,
#[token("for")]
KeywordFor,
#[token("match")]
KeywordMatch,
#[token("mod")]
KeywordMod,
#[token("pub")]
KeywordPub,

// Modern way of allowing identifiers, read: https://unicode.org/reports/tr31/
#[regex(r"_?\p{XID_Start}\p{XID_Continue}*", |lex| lex.slice().to_string())]
Identifier(String),

// Literals
#[regex(r"\d+", |lex| lex.slice().to_string())]
Integer(String),
#[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())]
String(String),
#[regex(r"(true|false)", |lex| lex.slice().parse::<bool>().unwrap())]
Boolean(bool),

#[token("(")]
LeftParen,
#[token(")")]
RightParen,
#[token("{")]
LeftBracket,
#[token("}")]
RightBracket,
#[token("[")]
LeftSquareBracket,
#[token("]")]
RightSquareBracket,
#[token("=")]
Assign,
#[token(";")]
Semicolon,
#[token(":")]
Colon,
#[token("->")]
Arrow,
#[token(",")]
Coma,
#[token("<")]
LessThanSign,
#[token(">")]
MoreThanSign,

#[token("+")]
OperatorAdd,
#[token("-")]
OperatorSub,
#[token("*")]
OperatorMul,
#[token("/")]
OperatorDiv,
#[token("%")]
OperatorRem,
#[token("&&")]
OperatorAnd,
#[token("||")]
OperatorOr,
#[token("==")]
OperatorEq,
#[token("!=")]
OperatorNe,
#[token("!")]
OperatorNot,
}

0 comments on commit 133da04

Please sign in to comment.