From 3916339b8678664e059c70d33ad86eddbacce7e5 Mon Sep 17 00:00:00 2001 From: Edgar Luque Date: Fri, 19 Jan 2024 16:57:05 +0100 Subject: [PATCH] parse floats, unescape strings and chars --- Cargo.lock | 10 +++++++ crates/concrete_ast/src/expressions.rs | 4 +-- crates/concrete_codegen_mlir/src/codegen.rs | 28 +++++++++++++++-- crates/concrete_driver/tests/programs.rs | 33 +++++++++++++++++++++ crates/concrete_parser/Cargo.toml | 1 + crates/concrete_parser/src/grammar.lalrpop | 6 +++- crates/concrete_parser/src/lib.rs | 8 ++--- crates/concrete_parser/src/tokens.rs | 19 ++++++++++-- examples/chars.con | 13 ++++++++ examples/floats.con | 21 +++++++++++++ 10 files changed, 130 insertions(+), 13 deletions(-) create mode 100644 examples/chars.con create mode 100644 examples/floats.con diff --git a/Cargo.lock b/Cargo.lock index 4523aa7..dba65aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,6 +400,7 @@ dependencies = [ "logos", "salsa-2022", "tracing", + "unescaper", ] [[package]] @@ -1712,6 +1713,15 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" +[[package]] +name = "unescaper" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8f0f68e58d297ba8b22b8b5a96a87b863ba6bb46aaf51e19a4b02c5a6dd5b7f" +dependencies = [ + "thiserror", +] + [[package]] name = "unicode-ident" version = "1.0.12" diff --git a/crates/concrete_ast/src/expressions.rs b/crates/concrete_ast/src/expressions.rs index 0abd876..603d834 100644 --- a/crates/concrete_ast/src/expressions.rs +++ b/crates/concrete_ast/src/expressions.rs @@ -15,8 +15,8 @@ pub enum Expression { pub enum SimpleExpr { ConstBool(bool), ConstChar(char), - ConstInt(u64), - ConstFloat(()), + ConstInt(u128), + ConstFloat(String), ConstStr(String), Path(PathOp), } diff --git a/crates/concrete_codegen_mlir/src/codegen.rs b/crates/concrete_codegen_mlir/src/codegen.rs index a4f10bd..a9b3961 100644 --- a/crates/concrete_codegen_mlir/src/codegen.rs +++ b/crates/concrete_codegen_mlir/src/codegen.rs @@ -18,7 +18,10 @@ use melior::{ cf, func, memref, }, ir::{ - attribute::{FlatSymbolRefAttribute, IntegerAttribute, StringAttribute, TypeAttribute}, + attribute::{ + FlatSymbolRefAttribute, FloatAttribute, IntegerAttribute, StringAttribute, + TypeAttribute, + }, r#type::{FunctionType, IntegerType, MemRefType}, Block, BlockRef, Location, Module as MeliorModule, Operation, Region, Type, Value, ValueLike, @@ -133,6 +136,7 @@ impl<'ctx, 'parent> ScopeContext<'ctx, 'parent> { "u32" | "i32" => IntegerType::new(context, 32).into(), "u16" | "i16" => IntegerType::new(context, 16).into(), "u8" | "i8" => IntegerType::new(context, 8).into(), + "char" => IntegerType::new(context, 32).into(), "f32" => Type::float32(context), "f64" => Type::float64(context), "bool" => IntegerType::new(context, 1).into(), @@ -658,13 +662,31 @@ fn compile_expression<'ctx, 'parent: 'ctx>( } else { IntegerType::new(context, 64).into() }; - let value = IntegerAttribute::new((*value) as i64, int_type); + let value = IntegerAttribute::new( + (*value).try_into().expect("integer is too big"), + int_type, + ); + Ok(block + .append_operation(arith::constant(context, value.into(), location)) + .result(0)? + .into()) + } + SimpleExpr::ConstFloat(value) => { + let float_type = if let Some(type_info) = type_info { + scope_ctx.resolve_type_spec(context, type_info)? + } else { + Type::float64(context) + }; + let value = FloatAttribute::new( + context, + value.parse().expect("failed to parse float"), + float_type, + ); Ok(block .append_operation(arith::constant(context, value.into(), location)) .result(0)? .into()) } - SimpleExpr::ConstFloat(_) => todo!(), SimpleExpr::ConstStr(_) => todo!(), SimpleExpr::Path(value) => compile_path_op(session, context, scope_ctx, block, value), }, diff --git a/crates/concrete_driver/tests/programs.rs b/crates/concrete_driver/tests/programs.rs index 14cee5e..ee2fa63 100644 --- a/crates/concrete_driver/tests/programs.rs +++ b/crates/concrete_driver/tests/programs.rs @@ -130,3 +130,36 @@ fn test_import() { let code = output.status.code().unwrap(); assert_eq!(code, 8); } + +#[test] +fn test_floats() { + let source = r#" + mod Simple { + fn main() -> i64 { + let a: f32 = my_f32(2.0, 4.0); + let b: f64 = my_f64(2.0, 4.0); + return 1; + } + + fn my_f32(x: f32, y: f32) -> f32 { + let literal: f32 = 2.0; + let literal2: f32 = 2.; + let literal3: f32 = .1; + return x + y + literal2 + literal3; + } + + fn my_f64(x: f64, y: f64) -> f64 { + let literal: f64 = 2.0; + let literal2: f64 = 2.; + let literal3: f64 = .1; + return x + y + literal2 + literal3; + } + } + "#; + + let result = compile_program(source, "floats", false).expect("failed to compile"); + + let output = run_program(&result.binary_file).expect("failed to run"); + let code = output.status.code().unwrap(); + assert_eq!(code, 1); +} diff --git a/crates/concrete_parser/Cargo.toml b/crates/concrete_parser/Cargo.toml index 239eba6..f0422e6 100644 --- a/crates/concrete_parser/Cargo.toml +++ b/crates/concrete_parser/Cargo.toml @@ -13,6 +13,7 @@ concrete_ast = { path = "../concrete_ast"} salsa = { git = "https://github.com/salsa-rs/salsa.git", package = "salsa-2022" } ariadne = { version = "0.4.0", features = ["auto-color"] } itertools = "0.12.0" +unescaper = "0.1.3" [build-dependencies] lalrpop = "0.20.0" diff --git a/crates/concrete_parser/src/grammar.lalrpop b/crates/concrete_parser/src/grammar.lalrpop index d8b1cbe..1fe6f28 100644 --- a/crates/concrete_parser/src/grammar.lalrpop +++ b/crates/concrete_parser/src/grammar.lalrpop @@ -30,8 +30,10 @@ extern { // literals "identifier" => Token::Identifier(), - "integer" => Token::Integer(), + "integer" => Token::Integer(), + "float" => Token::Float(), "string" => Token::String(), + "char" => Token::Char(), "boolean" => Token::Boolean(), // Other @@ -341,8 +343,10 @@ pub UnaryOp: ast::expressions::UnaryOp = { pub(crate) SimpleExpr: ast::expressions::SimpleExpr = { <"integer"> => ast::expressions::SimpleExpr::ConstInt(<>), + <"float"> => ast::expressions::SimpleExpr::ConstFloat(<>), <"boolean"> => ast::expressions::SimpleExpr::ConstBool(<>), <"string"> => ast::expressions::SimpleExpr::ConstStr(<>), + <"char"> => ast::expressions::SimpleExpr::ConstChar(<>), => ast::expressions::SimpleExpr::Path(<>), } diff --git a/crates/concrete_parser/src/lib.rs b/crates/concrete_parser/src/lib.rs index d0b4e1b..52c0a51 100644 --- a/crates/concrete_parser/src/lib.rs +++ b/crates/concrete_parser/src/lib.rs @@ -89,7 +89,7 @@ mod ModuleName { "##; let lexer = Lexer::new(source); let parser = grammar::ProgramParser::new(); - dbg!(parser.parse(lexer).unwrap()); + parser.parse(lexer).unwrap(); } #[test] @@ -104,7 +104,7 @@ mod ModuleName { }"##; let lexer = Lexer::new(source); let parser = grammar::ProgramParser::new(); - dbg!(parser.parse(lexer).unwrap()); + parser.parse(lexer).unwrap(); } #[test] @@ -116,7 +116,7 @@ mod ModuleName { }"##; let lexer = Lexer::new(source); let parser = grammar::ProgramParser::new(); - dbg!(parser.parse(lexer).unwrap()); + parser.parse(lexer).unwrap(); } #[test] @@ -128,6 +128,6 @@ mod ModuleName { }"##; let lexer = Lexer::new(source); let parser = grammar::ProgramParser::new(); - dbg!(parser.parse(lexer).unwrap()); + parser.parse(lexer).unwrap(); } } diff --git a/crates/concrete_parser/src/tokens.rs b/crates/concrete_parser/src/tokens.rs index 9e06b3f..968a04a 100644 --- a/crates/concrete_parser/src/tokens.rs +++ b/crates/concrete_parser/src/tokens.rs @@ -57,12 +57,25 @@ pub enum Token { Identifier(String), // Literals - #[regex(r"\d+", |lex| lex.slice().parse::().unwrap())] - Integer(u64), - #[regex(r#""(?:[^"]|\\")*""#, |lex| lex.slice().to_string())] + #[regex(r"\d+", |lex| lex.slice().parse::().unwrap(), priority = 2)] + Integer(u128), + #[regex(r"([0-9]+([.][0-9]*)?|[.][0-9]+)", |lex| lex.slice().to_string(), priority = 1)] + Float(String), + #[regex(r#""(?:[^"]|\\")*""#, |lex| { + let slice = lex.slice(); + let len = slice.len(); + unescaper::unescape(&slice[1..(len-1)]).expect("failed to unescape string") + })] String(String), #[regex(r"(true|false)", |lex| lex.slice().parse::().unwrap())] Boolean(bool), + #[regex(r#"'(?:[^']|\\')*'"#, |lex| { + let slice = lex.slice(); + let len = slice.len(); + let real_char = unescaper::unescape(&slice[1..(len-1)]).expect("failed to unescape char").to_string(); + real_char.chars().next().unwrap() + })] + Char(char), #[token("(")] LeftParen, diff --git a/examples/chars.con b/examples/chars.con new file mode 100644 index 0000000..cb878d5 --- /dev/null +++ b/examples/chars.con @@ -0,0 +1,13 @@ + mod Simple { + fn main() -> i64 { + let a: char = hello_chars('\t'); + return 1; + } + + fn hello_chars(a: char) -> char { + let x: char = 'b'; + let newline: char = '\n'; + + return x + newline + a; + } +} diff --git a/examples/floats.con b/examples/floats.con new file mode 100644 index 0000000..cc6f61f --- /dev/null +++ b/examples/floats.con @@ -0,0 +1,21 @@ + mod Simple { + fn main() -> i64 { + let a: f32 = my_f32(2.0, 4.0); + let b: f64 = my_f64(2.0, 4.0); + return 1; + } + + fn my_f32(x: f32, y: f32) -> f32 { + let literal: f32 = 2.0; + let literal2: f32 = 2.; + let literal3: f32 = .1; + return x + y + literal2 + literal3; + } + + fn my_f64(x: f64, y: f64) -> f64 { + let literal: f64 = 2.0; + let literal2: f64 = 2.; + let literal3: f64 = .1; + return x + y + literal2 + literal3; + } +}