From ca9ff002c8da18765724a567cbcb5635da4e25ac Mon Sep 17 00:00:00 2001 From: Sam Zhou Date: Sat, 2 Mar 2024 11:32:43 -0800 Subject: [PATCH 1/2] [ast] Document various IRs --- crates/samlang-ast/src/lib.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/crates/samlang-ast/src/lib.rs b/crates/samlang-ast/src/lib.rs index 06b71bd4..fa5c46fc 100644 --- a/crates/samlang-ast/src/lib.rs +++ b/crates/samlang-ast/src/lib.rs @@ -5,12 +5,23 @@ pub use loc::{Location, Position}; mod reason; pub use reason::{Description, Reason}; +/// HIR is the result of direct lowering from source. +/// Generics are still preserved. pub mod hir; mod hir_tests; +/// LIR is the first IR where we start to lose track of accurate types. +/// In this stage, we start to have GC specific instructions. pub mod lir; mod lir_tests; +/// MIR is the result of generics specialization. +/// Within generics specialization, some representations of enum types are also optimized. +/// Most of the optimizations run on MIR. pub mod mir; mod mir_tests; +/// The full-fidelity representation of the source code. +/// All of the LSP logic runs on source AST. pub mod source; mod source_tests; +/// The final stage AST that closely models parts of WASM that are relevant to samlang. +/// For now, this is the only supported backend. pub mod wasm; From cb292b75ab9dc16eaeb209d4b80d993921041865 Mon Sep 17 00:00:00 2001 From: Sam Zhou Date: Sat, 2 Mar 2024 12:39:11 -0800 Subject: [PATCH 2/2] [compiler] Document all casts during generics specialization --- .../src/mir_generics_specialization.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/crates/samlang-compiler/src/mir_generics_specialization.rs b/crates/samlang-compiler/src/mir_generics_specialization.rs index 83cb6f3a..2c3bf703 100644 --- a/crates/samlang-compiler/src/mir_generics_specialization.rs +++ b/crates/samlang-compiler/src/mir_generics_specialization.rs @@ -120,6 +120,8 @@ impl Rewriter { mir::Expression::int(i32::try_from(*tag * 2 + 1).unwrap()), )); let mut nested_stmts = vec![]; + // Once we pass the check, we can cast the general enum type to a + // more specific subtype. nested_stmts.push(mir::Statement::Cast { name: casted_collector, type_: subtype, @@ -147,6 +149,8 @@ impl Rewriter { ), }); } + // If we are pattern matching on an unboxed case, it means that `unboxed_t` must be a + // pointer type. Therefore, we only need to check whether the test expression is a pointer. mir::EnumTypeDefinition::Unboxed(unboxed_t) => { debug_assert!(bindings.len() == 1); let binded_name = bindings[0].as_ref().unwrap().0; @@ -155,30 +159,38 @@ impl Rewriter { let comparison_temp_2 = heap.alloc_temp_str(); let comparison_temp_3 = heap.alloc_temp_str(); let comparison_temp_4 = heap.alloc_temp_str(); + // We need to case the high level expression into int so we can + // do low-level bitwise is-pointer check. collector.push(mir::Statement::Cast { name: casted_int_collector, type_: mir::INT_TYPE, assigned_expression: test_expr, }); // Here we test whether this is a pointer + // We got lucky in the JS output, since Number([1,2]) == NaN, + // and the rest happens to work out. + // i < 1024 (small int is not a pointer) collector.push(mir::Statement::binary( comparison_temp_1, hir::Operator::LT, mir::Expression::var_name(casted_int_collector, mir::INT_TYPE), mir::Expression::int(1024), )); + // i & 1 (LSB == 1 is not a pointer) collector.push(mir::Statement::binary( comparison_temp_2, hir::Operator::LAND, mir::Expression::var_name(casted_int_collector, mir::INT_TYPE), mir::ONE, )); + // (i < 1024) || (i & 1) -> not a pointer collector.push(mir::Statement::binary( comparison_temp_3, hir::Operator::LOR, mir::Expression::var_name(comparison_temp_1, mir::INT_TYPE), mir::Expression::var_name(comparison_temp_2, mir::INT_TYPE), )); + // invert the previous check, is a pointer collector.push(mir::Statement::binary( comparison_temp_4, hir::Operator::XOR, @@ -186,6 +198,8 @@ impl Rewriter { mir::ONE, )); let mut nested_stmts = vec![]; + // Once we pass the is-pointer check, we can cast the test expression to the underlying + // unboxed pointer type. nested_stmts.push(mir::Statement::Cast { name: binded_name, type_: *unboxed_t, @@ -206,6 +220,8 @@ impl Rewriter { mir::EnumTypeDefinition::Int => { let casted_collector = heap.alloc_temp_str(); let comparison_temp = heap.alloc_temp_str(); + // We cast the test expression to int to perform the test. + // Once we have i31 type, the test should be performed on i31. collector.push(mir::Statement::Cast { name: casted_collector, type_: mir::INT_TYPE, @@ -320,6 +336,7 @@ impl Rewriter { ) .collect(), }); + // Cast from more specific subtype to the general enum type. collector.push(mir::Statement::Cast { name: *enum_variable_name, type_: mir::Type::Id(enum_type), @@ -328,6 +345,7 @@ impl Rewriter { } mir::EnumTypeDefinition::Unboxed(_) => { debug_assert_eq!(associated_data_list.len(), 1); + // Cast from more specific subtype to the general enum type. collector.push(mir::Statement::Cast { name: *enum_variable_name, type_: mir::Type::Id(enum_type), @@ -340,6 +358,7 @@ impl Rewriter { } mir::EnumTypeDefinition::Int => { debug_assert!(associated_data_list.is_empty()); + // Cast from more specific subtype to the general enum type. collector.push(mir::Statement::Cast { name: *enum_variable_name, type_: mir::Type::Id(enum_type),