diff --git a/crates/samlang-parser/src/lib.rs b/crates/samlang-parser/src/lib.rs index e5904908..c0ecaa01 100644 --- a/crates/samlang-parser/src/lib.rs +++ b/crates/samlang-parser/src/lib.rs @@ -28,7 +28,7 @@ pub fn parse_source_module_from_text( module_reference, builtins, ); - parser.parse_module() + source_parser::parse_module(parser) } pub fn parse_source_expression_from_text( @@ -45,7 +45,7 @@ pub fn parse_source_expression_from_text( module_reference, builtins, ); - parser.parse_expression_with_comment_store() + source_parser::parse_expression_with_comment_store(parser) } pub fn builtin_std_raw_sources(heap: &mut Heap) -> HashMap { diff --git a/crates/samlang-parser/src/source_parser.rs b/crates/samlang-parser/src/source_parser.rs index a7aa03fc..ea8394b7 100644 --- a/crates/samlang-parser/src/source_parser.rs +++ b/crates/samlang-parser/src/source_parser.rs @@ -1,25 +1,16 @@ use super::lexer::{Keyword, Token, TokenContent, TokenOp}; -use itertools::Itertools; -use samlang_ast::{ - source::{expr::IfElseCondition, *}, - Location, Position, -}; +use samlang_ast::{source::*, Location, Position}; use samlang_errors::ErrorSet; use samlang_heap::{Heap, ModuleReference, PStr}; use std::{ cmp, collections::{HashMap, HashSet}, - rc::Rc, vec, }; const MAX_STRUCT_SIZE: usize = 16; const MAX_VARIANT_SIZE: usize = 15; -fn unescape_quotes(source: &str) -> String { - source.replace("\\\"", "\"") -} - pub(super) struct SourceParser<'a> { tokens: Vec, comments_store: CommentStore, @@ -33,7 +24,25 @@ pub(super) struct SourceParser<'a> { } impl<'a> SourceParser<'a> { - // SECTION 1: Base Methods + pub(super) fn new( + tokens: Vec, + heap: &'a mut Heap, + error_set: &'a mut ErrorSet, + module_reference: ModuleReference, + builtin_classes: HashSet, + ) -> SourceParser<'a> { + SourceParser { + tokens, + comments_store: CommentStore::new(), + module_reference, + heap, + error_set, + builtin_classes, + position: 0, + class_source_map: HashMap::new(), + available_tparams: HashSet::new(), + } + } fn last_location(&self) -> Location { if self.position == 0 { @@ -223,164 +232,201 @@ impl<'a> SourceParser<'a> { collector } - // SECTION 2: Source Parser + fn parse_upper_id(&mut self) -> Id { + let associated_comments = self.collect_preceding_comments(); + let (loc, name) = self.assert_and_peek_upper_id(); + Id { + loc, + associated_comments: self.comments_store.create_comment_reference(associated_comments), + name, + } + } - pub(super) fn new( - tokens: Vec, - heap: &'a mut Heap, - error_set: &'a mut ErrorSet, - module_reference: ModuleReference, - builtin_classes: HashSet, - ) -> SourceParser<'a> { - SourceParser { - tokens, - comments_store: CommentStore::new(), - module_reference, - heap, - error_set, - builtin_classes, - position: 0, - class_source_map: HashMap::new(), - available_tparams: HashSet::new(), + fn parse_lower_id(&mut self) -> Id { + let associated_comments = self.collect_preceding_comments(); + let (loc, name) = self.assert_and_peek_lower_id(); + Id { + loc, + associated_comments: self.comments_store.create_comment_reference(associated_comments), + name, } } - pub(super) fn parse_module(mut self) -> Module<()> { - let mut imports = vec![]; - while let Token(import_start, TokenContent::Keyword(Keyword::IMPORT)) = self.peek() { - self.consume(); - self.assert_and_consume_operator(TokenOp::LBRACE); - let imported_members = self.parse_comma_separated_list_with_end_token( - TokenOp::RBRACE, - &mut SourceParser::parse_upper_id, - ); - self.assert_and_consume_operator(TokenOp::RBRACE); - self.assert_and_consume_keyword(Keyword::FROM); - let import_loc_start = self.peek().0; - let imported_module_parts = { - let mut collector = vec![self.assert_and_consume_identifier().1]; - while let Token(_, TokenContent::Operator(TokenOp::DOT)) = self.peek() { + fn collect_preceding_comments(&mut self) -> Vec { + self.unconsume_comments(); + let mut comments = vec![]; + loop { + match self.simple_peek() { + Token(location, TokenContent::LineComment(text)) => { self.consume(); - collector.push(self.assert_and_consume_identifier().1); + comments.push(Comment { location, kind: CommentKind::LINE, text }); } - collector - }; - let imported_module = self.heap.alloc_module_reference(imported_module_parts); - let imported_module_loc = import_loc_start.union(&self.last_location()); - for variable in imported_members.iter() { - self.class_source_map.insert(variable.name, imported_module); - } - let loc = - if let Token(semicolon_loc, TokenContent::Operator(TokenOp::SEMICOLON)) = self.peek() { + Token(location, TokenContent::BlockComment(text)) => { self.consume(); - import_start.union(&semicolon_loc) - } else { - import_start.union(&imported_module_loc) - }; - imports.push(ModuleMembersImport { - loc, - imported_members, - imported_module, - imported_module_loc, - }); + comments.push(Comment { location, kind: CommentKind::BLOCK, text }) + } + Token(location, TokenContent::DocComment(text)) => { + self.consume(); + comments.push(Comment { location, kind: CommentKind::DOC, text }) + } + _ => break, + } } + comments + } +} - let mut toplevels = vec![]; - 'outer: loop { - if let TokenContent::EOF = self.peek().1 { - break; +pub fn parse_module(mut parser: SourceParser) -> Module<()> { + let mut imports = vec![]; + while let Token(import_start, TokenContent::Keyword(Keyword::IMPORT)) = parser.peek() { + parser.consume(); + parser.assert_and_consume_operator(TokenOp::LBRACE); + let imported_members = parser.parse_comma_separated_list_with_end_token( + TokenOp::RBRACE, + &mut SourceParser::parse_upper_id, + ); + parser.assert_and_consume_operator(TokenOp::RBRACE); + parser.assert_and_consume_keyword(Keyword::FROM); + let import_loc_start = parser.peek().0; + let imported_module_parts = { + let mut collector = vec![parser.assert_and_consume_identifier().1]; + while let Token(_, TokenContent::Operator(TokenOp::DOT)) = parser.peek() { + parser.consume(); + collector.push(parser.assert_and_consume_identifier().1); } - loop { - match self.peek() { - Token( - _, - TokenContent::Keyword(Keyword::CLASS | Keyword::INTERFACE | Keyword::PRIVATE), - ) => break, - Token(_, TokenContent::EOF) => break 'outer, - Token(loc, content) => { - self.consume(); - self.report( - loc, - format!( - "Unexpected token among the classes and interfaces: {}", - content.pretty_print(self.heap) - ), - ) - } + collector + }; + let imported_module = parser.heap.alloc_module_reference(imported_module_parts); + let imported_module_loc = import_loc_start.union(&parser.last_location()); + for variable in imported_members.iter() { + parser.class_source_map.insert(variable.name, imported_module); + } + let loc = + if let Token(semicolon_loc, TokenContent::Operator(TokenOp::SEMICOLON)) = parser.peek() { + parser.consume(); + import_start.union(&semicolon_loc) + } else { + import_start.union(&imported_module_loc) + }; + imports.push(ModuleMembersImport { + loc, + imported_members, + imported_module, + imported_module_loc, + }); + } + + let mut toplevels = vec![]; + 'outer: loop { + if let TokenContent::EOF = parser.peek().1 { + break; + } + loop { + match parser.peek() { + Token(_, TokenContent::Keyword(Keyword::CLASS | Keyword::INTERFACE | Keyword::PRIVATE)) => { + break + } + Token(_, TokenContent::EOF) => break 'outer, + Token(loc, content) => { + parser.consume(); + parser.report( + loc, + format!( + "Unexpected token among the classes and interfaces: {}", + content.pretty_print(parser.heap) + ), + ) } } - toplevels.push(self.parse_toplevel()); } - let comments = self.collect_preceding_comments(); - let trailing_comments = self.comments_store.create_comment_reference(comments); - - Module { comment_store: self.comments_store, imports, toplevels, trailing_comments } + toplevels.push(toplevel_parser::parse_toplevel(&mut parser)); } + let comments = parser.collect_preceding_comments(); + let trailing_comments = parser.comments_store.create_comment_reference(comments); - fn parse_toplevel(&mut self) -> Toplevel<()> { - self.unconsume_comments(); - let is_private = if let TokenContent::Keyword(Keyword::PRIVATE) = self.peek().1 { - self.consume(); + Module { comment_store: parser.comments_store, imports, toplevels, trailing_comments } +} + +pub(super) fn parse_expression_with_comment_store( + mut parser: SourceParser, +) -> (CommentStore, expr::E<()>) { + let e = expression_parser::parse_expression(&mut parser); + (parser.comments_store, e) +} + +mod toplevel_parser { + use super::{ + super::lexer::{Keyword, Token, TokenContent, TokenOp}, + MAX_STRUCT_SIZE, MAX_VARIANT_SIZE, + }; + use itertools::Itertools; + use samlang_ast::{source::*, Location}; + use std::collections::HashSet; + + pub(super) fn parse_toplevel(parser: &mut super::SourceParser) -> Toplevel<()> { + parser.unconsume_comments(); + let is_private = if let TokenContent::Keyword(Keyword::PRIVATE) = parser.peek().1 { + parser.consume(); true } else { false }; - let is_class = matches!(self.peek().1, TokenContent::Keyword(Keyword::CLASS)); + let is_class = matches!(parser.peek().1, TokenContent::Keyword(Keyword::CLASS)); if is_private { - self.unconsume(); + parser.unconsume(); } if is_class { - Toplevel::Class(self.parse_class()) + Toplevel::Class(parse_class(parser)) } else { - Toplevel::Interface(self.parse_interface()) + Toplevel::Interface(parse_interface(parser)) } } - pub(super) fn parse_class(&mut self) -> ClassDefinition<()> { - let associated_comments = self.collect_preceding_comments(); + pub(super) fn parse_class(parser: &mut super::SourceParser) -> ClassDefinition<()> { + let associated_comments = parser.collect_preceding_comments(); let (mut loc, private) = - if let Token(loc, TokenContent::Keyword(Keyword::PRIVATE)) = self.peek() { - self.consume(); - self.assert_and_consume_keyword(Keyword::CLASS); + if let Token(loc, TokenContent::Keyword(Keyword::PRIVATE)) = parser.peek() { + parser.consume(); + parser.assert_and_consume_keyword(Keyword::CLASS); (loc, true) } else { - (self.assert_and_consume_keyword(Keyword::CLASS), false) + (parser.assert_and_consume_keyword(Keyword::CLASS), false) }; - let name = self.parse_upper_id(); + let name = parser.parse_upper_id(); loc = loc.union(&name.loc); let (type_param_loc_start, type_param_loc_end, mut type_parameters) = - if let Token(loc_start, TokenContent::Operator(TokenOp::LT)) = self.peek() { - self.consume(); - let type_params = self.parse_comma_separated_list_with_end_token( + if let Token(loc_start, TokenContent::Operator(TokenOp::LT)) = parser.peek() { + parser.consume(); + let type_params = parser.parse_comma_separated_list_with_end_token( TokenOp::GT, - &mut SourceParser::parse_type_parameter, + &mut super::type_parser::parse_type_parameter, ); - let loc_end = self.assert_and_consume_operator(TokenOp::GT); + let loc_end = parser.assert_and_consume_operator(TokenOp::GT); (Some(loc_start), Some(loc_end), type_params) } else { (None, None, vec![]) }; - self.available_tparams = type_parameters.iter().map(|it| it.name.name).collect(); - self.fix_tparams_with_generic_annot(&mut type_parameters); - let (type_definition, extends_or_implements_nodes) = match self.peek().1 { + parser.available_tparams = type_parameters.iter().map(|it| it.name.name).collect(); + super::type_parser::fix_tparams_with_generic_annot(parser, &mut type_parameters); + let (type_definition, extends_or_implements_nodes) = match parser.peek().1 { TokenContent::Operator(TokenOp::LBRACE | TokenOp::COLON) | TokenContent::Keyword(Keyword::CLASS | Keyword::INTERFACE | Keyword::PRIVATE) => { - let nodes = if let TokenContent::Operator(TokenOp::COLON) = self.peek().1 { - self.consume(); - let nodes = self.parse_extends_or_implements_nodes(); + let nodes = if let TokenContent::Operator(TokenOp::COLON) = parser.peek().1 { + parser.consume(); + let nodes = parse_extends_or_implements_nodes(parser); loc = loc.union(&nodes.last().unwrap().location); nodes } else { vec![] }; loc = if let Some(loc_end) = type_param_loc_end { loc.union(&loc_end) } else { loc }; - let type_def = TypeDefinition::Struct { loc: self.peek().0, fields: vec![] }; + let type_def = TypeDefinition::Struct { loc: parser.peek().0, fields: vec![] }; (type_def, nodes) } _ => { - let type_def_loc_start = self.assert_and_consume_operator(TokenOp::LPAREN); - let mut type_def = self.parse_type_definition_inner(); - let type_def_loc_end = self.assert_and_consume_operator(TokenOp::RPAREN); + let type_def_loc_start = parser.assert_and_consume_operator(TokenOp::LPAREN); + let mut type_def = parse_type_definition_inner(parser); + let type_def_loc_end = parser.assert_and_consume_operator(TokenOp::RPAREN); let type_def_loc = type_param_loc_start.unwrap_or(type_def_loc_start).union(&type_def_loc_end); match &mut type_def { @@ -388,9 +434,9 @@ impl<'a> SourceParser<'a> { TypeDefinition::Enum { loc, variants: _ } => *loc = type_def_loc, } loc = loc.union(&type_def_loc_end); - let nodes = if let TokenContent::Operator(TokenOp::COLON) = self.peek().1 { - self.consume(); - let nodes = self.parse_extends_or_implements_nodes(); + let nodes = if let TokenContent::Operator(TokenOp::COLON) = parser.peek().1 { + parser.consume(); + let nodes = parse_extends_or_implements_nodes(parser); loc = loc.union(&nodes.last().unwrap().location); nodes } else { @@ -400,20 +446,20 @@ impl<'a> SourceParser<'a> { } }; let mut members = vec![]; - if !self.peeked_class_or_interface_start() { - self.assert_and_consume_operator(TokenOp::LBRACE); + if !peeked_class_or_interface_start(parser) { + parser.assert_and_consume_operator(TokenOp::LBRACE); while let TokenContent::Keyword(Keyword::FUNCTION | Keyword::METHOD | Keyword::PRIVATE) = - self.peek().1 + parser.peek().1 { - let saved_upper_type_parameters = self.available_tparams.clone(); - members.push(self.parse_class_member_definition()); - self.available_tparams = saved_upper_type_parameters; + let saved_upper_type_parameters = parser.available_tparams.clone(); + members.push(parse_class_member_definition(parser)); + parser.available_tparams = saved_upper_type_parameters; } - loc = loc.union(&self.assert_and_consume_operator(TokenOp::RBRACE)); + loc = loc.union(&parser.assert_and_consume_operator(TokenOp::RBRACE)); } InterfaceDeclarationCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), private, name, type_parameters, @@ -423,54 +469,54 @@ impl<'a> SourceParser<'a> { } } - pub(super) fn parse_interface(&mut self) -> InterfaceDeclaration { - let associated_comments = self.collect_preceding_comments(); + pub(super) fn parse_interface(parser: &mut super::SourceParser) -> InterfaceDeclaration { + let associated_comments = parser.collect_preceding_comments(); let (mut loc, private) = - if let Token(loc, TokenContent::Keyword(Keyword::PRIVATE)) = self.peek() { - self.consume(); - self.assert_and_consume_keyword(Keyword::INTERFACE); + if let Token(loc, TokenContent::Keyword(Keyword::PRIVATE)) = parser.peek() { + parser.consume(); + parser.assert_and_consume_keyword(Keyword::INTERFACE); (loc, true) } else { - (self.assert_and_consume_keyword(Keyword::INTERFACE), false) + (parser.assert_and_consume_keyword(Keyword::INTERFACE), false) }; - let name = self.parse_upper_id(); - let mut type_parameters = if let TokenContent::Operator(TokenOp::LT) = self.peek().1 { - self.consume(); - let type_params = self.parse_comma_separated_list_with_end_token( + let name = parser.parse_upper_id(); + let mut type_parameters = if let TokenContent::Operator(TokenOp::LT) = parser.peek().1 { + parser.consume(); + let type_params = parser.parse_comma_separated_list_with_end_token( TokenOp::GT, - &mut SourceParser::parse_type_parameter, + &mut super::type_parser::parse_type_parameter, ); - loc = loc.union(&self.assert_and_consume_operator(TokenOp::GT)); + loc = loc.union(&parser.assert_and_consume_operator(TokenOp::GT)); type_params } else { vec![] }; - self.available_tparams = type_parameters.iter().map(|it| it.name.name).collect(); - self.fix_tparams_with_generic_annot(&mut type_parameters); - let extends_or_implements_nodes = if let TokenContent::Operator(TokenOp::COLON) = self.peek().1 - { - self.consume(); - let nodes = self.parse_extends_or_implements_nodes(); - loc = loc.union(&nodes.last().unwrap().location); - nodes - } else { - vec![] - }; + parser.available_tparams = type_parameters.iter().map(|it| it.name.name).collect(); + super::type_parser::fix_tparams_with_generic_annot(parser, &mut type_parameters); + let extends_or_implements_nodes = + if let TokenContent::Operator(TokenOp::COLON) = parser.peek().1 { + parser.consume(); + let nodes = parse_extends_or_implements_nodes(parser); + loc = loc.union(&nodes.last().unwrap().location); + nodes + } else { + vec![] + }; let mut members = vec![]; - if let TokenContent::Operator(TokenOp::LBRACE) = self.peek().1 { - self.consume(); + if let TokenContent::Operator(TokenOp::LBRACE) = parser.peek().1 { + parser.consume(); while let TokenContent::Keyword(Keyword::FUNCTION | Keyword::METHOD | Keyword::PRIVATE) = - self.peek().1 + parser.peek().1 { - let saved_upper_type_parameters = self.available_tparams.clone(); - members.push(self.parse_class_member_declaration()); - self.available_tparams = saved_upper_type_parameters; + let saved_upper_type_parameters = parser.available_tparams.clone(); + members.push(parse_class_member_declaration(parser)); + parser.available_tparams = saved_upper_type_parameters; } - loc = loc.union(&self.assert_and_consume_operator(TokenOp::RBRACE)); + loc = loc.union(&parser.assert_and_consume_operator(TokenOp::RBRACE)); } InterfaceDeclarationCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), private, name, type_parameters, @@ -480,33 +526,29 @@ impl<'a> SourceParser<'a> { } } - fn parse_extends_or_implements_nodes(&mut self) -> Vec { - let id = self.parse_upper_id(); - let mut collector = vec![self.parse_identifier_annot(id)]; - while let Token(_, TokenContent::Operator(TokenOp::COMMA)) = self.peek() { - self.consume(); - let id = self.parse_upper_id(); - collector.push(self.parse_identifier_annot(id)); + fn parse_extends_or_implements_nodes(parser: &mut super::SourceParser) -> Vec { + let id = parser.parse_upper_id(); + let mut collector = vec![super::type_parser::parse_identifier_annot(parser, id)]; + while let Token(_, TokenContent::Operator(TokenOp::COMMA)) = parser.peek() { + parser.consume(); + let id = parser.parse_upper_id(); + collector.push(super::type_parser::parse_identifier_annot(parser, id)); } collector } - fn parse_type_definition_inner(&mut self) -> TypeDefinition { - if let Token(_, TokenContent::UpperId(_)) = self.peek() { - let mut variants = self.parse_comma_separated_list_with_end_token( - TokenOp::RPAREN, - &mut SourceParser::parse_variant_definition, - ); + fn parse_type_definition_inner(parser: &mut super::SourceParser) -> TypeDefinition { + if let Token(_, TokenContent::UpperId(_)) = parser.peek() { + let mut variants = parser + .parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut parse_variant_definition); variants.truncate(MAX_VARIANT_SIZE); // Location is later patched by the caller TypeDefinition::Enum { loc: Location::dummy(), variants } } else { - let mut fields = self.parse_comma_separated_list_with_end_token( - TokenOp::RPAREN, - &mut Self::parse_field_definition, - ); + let mut fields = parser + .parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut parse_field_definition); if let Some(node) = fields.get(MAX_STRUCT_SIZE) { - self.error_set.report_invalid_syntax_error( + parser.error_set.report_invalid_syntax_error( node.name.loc, format!("Maximum allowed field size is {MAX_STRUCT_SIZE}"), ); @@ -517,159 +559,158 @@ impl<'a> SourceParser<'a> { } } - fn parse_field_definition(&mut self) -> FieldDefinition { + fn parse_field_definition(parser: &mut super::SourceParser) -> FieldDefinition { let mut is_public = true; - if let TokenContent::Keyword(Keyword::PRIVATE) = self.peek().1 { + if let TokenContent::Keyword(Keyword::PRIVATE) = parser.peek().1 { is_public = false; - self.consume(); + parser.consume(); } - self.assert_and_consume_keyword(Keyword::VAL); - let name = self.parse_lower_id(); - self.assert_and_consume_operator(TokenOp::COLON); - let annotation = self.parse_annotation(); + parser.assert_and_consume_keyword(Keyword::VAL); + let name = parser.parse_lower_id(); + parser.assert_and_consume_operator(TokenOp::COLON); + let annotation = super::type_parser::parse_annotation(parser); FieldDefinition { name, annotation, is_public } } - fn parse_variant_definition(&mut self) -> VariantDefinition { - let name = self.parse_upper_id(); - if let Token(_, TokenContent::Operator(TokenOp::LPAREN)) = self.peek() { - self.consume(); - let associated_data_types = self.parse_comma_separated_list_with_end_token( + fn parse_variant_definition(parser: &mut super::SourceParser) -> VariantDefinition { + let name = parser.parse_upper_id(); + if let Token(_, TokenContent::Operator(TokenOp::LPAREN)) = parser.peek() { + parser.consume(); + let associated_data_types = parser.parse_comma_separated_list_with_end_token( TokenOp::RPAREN, - &mut SourceParser::parse_annotation, + &mut super::type_parser::parse_annotation, ); if let Some(node) = associated_data_types.get(MAX_VARIANT_SIZE) { - self.error_set.report_invalid_syntax_error( + parser.error_set.report_invalid_syntax_error( node.location(), format!("Maximum allowed field size is {MAX_VARIANT_SIZE}"), ); } - self.assert_and_consume_operator(TokenOp::RPAREN); + parser.assert_and_consume_operator(TokenOp::RPAREN); VariantDefinition { name, associated_data_types } } else { VariantDefinition { name, associated_data_types: vec![] } } } - fn peeked_class_or_interface_start(&mut self) -> bool { + fn peeked_class_or_interface_start(parser: &mut super::SourceParser) -> bool { matches!( - self.peek().1, + parser.peek().1, TokenContent::Keyword(Keyword::CLASS | Keyword::INTERFACE | Keyword::PRIVATE) ) } - pub(super) fn parse_class_member_definition(&mut self) -> ClassMemberDefinition<()> { - let mut decl = self.parse_class_member_declaration_common(true); - self.assert_and_consume_operator(TokenOp::ASSIGN); - let body = self.parse_expression(); + pub(super) fn parse_class_member_definition( + parser: &mut super::SourceParser, + ) -> ClassMemberDefinition<()> { + let mut decl = parse_class_member_declaration_common(parser, true); + parser.assert_and_consume_operator(TokenOp::ASSIGN); + let body = super::expression_parser::parse_expression(parser); decl.loc = decl.loc.union(&body.loc()); ClassMemberDefinition { decl, body } } - pub(super) fn parse_class_member_declaration(&mut self) -> ClassMemberDeclaration { - self.parse_class_member_declaration_common(false) + pub(super) fn parse_class_member_declaration( + parser: &mut super::SourceParser, + ) -> ClassMemberDeclaration { + parse_class_member_declaration_common(parser, false) } fn parse_class_member_declaration_common( - &mut self, + parser: &mut super::SourceParser, allow_private: bool, ) -> ClassMemberDeclaration { - let associated_comments = self.collect_preceding_comments(); + let associated_comments = parser.collect_preceding_comments(); let mut is_public = true; let mut is_method = true; - let mut peeked = self.peek(); + let mut peeked = parser.peek(); if let Token(peeked_loc, TokenContent::Keyword(Keyword::PRIVATE)) = peeked { if allow_private { is_public = false; } else { - self.report(peeked_loc, "Unexpected `private`".to_string()); + parser.report(peeked_loc, "Unexpected `private`".to_string()); } - self.consume(); - peeked = self.peek(); + parser.consume(); + peeked = parser.peek(); } let start_loc = &peeked.0; if let Token(_, TokenContent::Keyword(Keyword::FUNCTION)) = &peeked { is_method = false; - self.consume(); + parser.consume(); } else { - self.assert_and_consume_keyword(Keyword::METHOD); + parser.assert_and_consume_keyword(Keyword::METHOD); } if !is_method { - self.available_tparams = HashSet::new(); + parser.available_tparams = HashSet::new(); } - let mut type_parameters = if let TokenContent::Operator(TokenOp::LT) = self.peek().1 { - self.consume(); - let type_params = self.parse_comma_separated_list_with_end_token( + let mut type_parameters = if let TokenContent::Operator(TokenOp::LT) = parser.peek().1 { + parser.consume(); + let type_params = parser.parse_comma_separated_list_with_end_token( TokenOp::GT, - &mut SourceParser::parse_type_parameter, + &mut super::type_parser::parse_type_parameter, ); - self.assert_and_consume_operator(TokenOp::GT); + parser.assert_and_consume_operator(TokenOp::GT); type_params } else { vec![] }; - self.available_tparams.extend(type_parameters.iter().map(|it| it.name.name)); - self.fix_tparams_with_generic_annot(&mut type_parameters); - let name = self.parse_lower_id(); - let fun_type_loc_start = self.assert_and_consume_operator(TokenOp::LPAREN); - let parameters = if let TokenContent::Operator(TokenOp::RPAREN) = self.peek().1 { + parser.available_tparams.extend(type_parameters.iter().map(|it| it.name.name)); + super::type_parser::fix_tparams_with_generic_annot(parser, &mut type_parameters); + let name = parser.parse_lower_id(); + let fun_type_loc_start = parser.assert_and_consume_operator(TokenOp::LPAREN); + let parameters = if let TokenContent::Operator(TokenOp::RPAREN) = parser.peek().1 { vec![] } else { - self.parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut Self::parse_annotated_id) + parser.parse_comma_separated_list_with_end_token( + TokenOp::RPAREN, + &mut super::type_parser::parse_annotated_id, + ) }; - self.assert_and_consume_operator(TokenOp::RPAREN); - self.assert_and_consume_operator(TokenOp::COLON); - let return_type = self.parse_annotation(); + parser.assert_and_consume_operator(TokenOp::RPAREN); + parser.assert_and_consume_operator(TokenOp::COLON); + let return_type = super::type_parser::parse_annotation(parser); let fun_type_loc = fun_type_loc_start.union(&return_type.location()); ClassMemberDeclaration { loc: start_loc.union(&fun_type_loc), - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), is_public, is_method, name, - type_parameters: Rc::new(type_parameters), + type_parameters: std::rc::Rc::new(type_parameters), type_: annotation::Function { location: fun_type_loc, associated_comments: NO_COMMENT_REFERENCE, argument_types: parameters.iter().map(|it| it.annotation.clone()).collect_vec(), return_type: Box::new(return_type), }, - parameters: Rc::new(parameters), + parameters: std::rc::Rc::new(parameters), } } +} - fn parse_type_parameter(&mut self) -> TypeParameter { - let name = &self.parse_upper_id(); - let (bound, loc) = if let Token(_, TokenContent::Operator(TokenOp::COLON)) = self.peek() { - self.consume(); - let id = self.parse_upper_id(); - let bound = self.parse_identifier_annot(id); - let loc = name.loc.union(&bound.location); - (Some(bound), loc) - } else { - (None, name.loc) - }; - TypeParameter { loc, name: *name, bound } - } - - pub(super) fn parse_expression_with_comment_store(mut self) -> (CommentStore, expr::E<()>) { - let e = self.parse_expression(); - (self.comments_store, e) - } - - fn parse_expression(&mut self) -> expr::E<()> { - self.parse_match() +mod expression_parser { + use super::{ + super::lexer::{Keyword, Token, TokenContent, TokenOp}, + MAX_STRUCT_SIZE, + }; + use itertools::Itertools; + use samlang_ast::{source::*, Location}; + use samlang_heap::PStr; + use std::collections::HashMap; + + pub(super) fn parse_expression(parser: &mut super::SourceParser) -> expr::E<()> { + parse_match(parser) } - fn parse_expression_with_ending_comments(&mut self) -> expr::E<()> { - let mut expr = self.parse_expression(); - let mut new_comments = self.collect_preceding_comments(); + fn parse_expression_with_ending_comments(parser: &mut super::SourceParser) -> expr::E<()> { + let mut expr = parse_expression(parser); + let mut new_comments = parser.collect_preceding_comments(); let common = expr.common_mut(); - let associated_comments = self.comments_store.get_mut(common.associated_comments); + let associated_comments = parser.comments_store.get_mut(common.associated_comments); match associated_comments { CommentsNode::NoComment => { - common.associated_comments = self.comments_store.create_comment_reference(new_comments); + common.associated_comments = parser.comments_store.create_comment_reference(new_comments); } CommentsNode::Comments(existing_loc, existing_comments) => { let new_loc = new_comments.iter().fold(*existing_loc, |l1, c| l1.union(&c.location)); @@ -680,71 +721,73 @@ impl<'a> SourceParser<'a> { expr } - fn parse_match(&mut self) -> expr::E<()> { - let associated_comments = self.collect_preceding_comments(); - if let Token(peeked_loc, TokenContent::Keyword(Keyword::MATCH)) = self.peek() { - self.consume(); - let match_expression = self.parse_expression_with_ending_comments(); - self.assert_and_consume_operator(TokenOp::LBRACE); - let mut matching_list = vec![self.parse_pattern_to_expression()]; + fn parse_match(parser: &mut super::SourceParser) -> expr::E<()> { + let associated_comments = parser.collect_preceding_comments(); + if let Token(peeked_loc, TokenContent::Keyword(Keyword::MATCH)) = parser.peek() { + parser.consume(); + let match_expression = parse_expression_with_ending_comments(parser); + parser.assert_and_consume_operator(TokenOp::LBRACE); + let mut matching_list = vec![parse_pattern_to_expression(parser)]; while matches!( - self.peek().1, + parser.peek().1, TokenContent::Operator(TokenOp::LBRACE | TokenOp::LPAREN | TokenOp::UNDERSCORE) | TokenContent::LowerId(_) | TokenContent::UpperId(_) ) { - matching_list.push(self.parse_pattern_to_expression()); + matching_list.push(parse_pattern_to_expression(parser)); } - let loc = peeked_loc.union(&self.assert_and_consume_operator(TokenOp::RBRACE)); + let loc = peeked_loc.union(&parser.assert_and_consume_operator(TokenOp::RBRACE)); expr::E::Match(expr::Match { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), type_: (), }, matched: Box::new(match_expression), cases: matching_list, }) } else { - self.parse_if_else() + parse_if_else(parser) } } - fn parse_pattern_to_expression(&mut self) -> expr::VariantPatternToExpression<()> { - let pattern = self.parse_matching_pattern(); - self.assert_and_consume_operator(TokenOp::ARROW); - let expression = self.parse_expression(); - let loc = if matches!(self.peek().1, TokenContent::Operator(TokenOp::RBRACE)) { + fn parse_pattern_to_expression( + parser: &mut super::SourceParser, + ) -> expr::VariantPatternToExpression<()> { + let pattern = super::pattern_parser::parse_matching_pattern(parser); + parser.assert_and_consume_operator(TokenOp::ARROW); + let expression = parse_expression(parser); + let loc = if matches!(parser.peek().1, TokenContent::Operator(TokenOp::RBRACE)) { pattern.loc().union(&expression.loc()) } else { - pattern.loc().union(&self.assert_and_consume_operator(TokenOp::COMMA)) + pattern.loc().union(&parser.assert_and_consume_operator(TokenOp::COMMA)) }; expr::VariantPatternToExpression { loc, pattern, body: Box::new(expression) } } - fn parse_if_else(&mut self) -> expr::E<()> { - let associated_comments = self.collect_preceding_comments(); - if let Token(peeked_loc, TokenContent::Keyword(Keyword::IF)) = self.peek() { - self.consume(); + fn parse_if_else(parser: &mut super::SourceParser) -> expr::E<()> { + let associated_comments = parser.collect_preceding_comments(); + if let Token(peeked_loc, TokenContent::Keyword(Keyword::IF)) = parser.peek() { + parser.consume(); let condition = - if let Token(_peeked_let_loc, TokenContent::Keyword(Keyword::LET)) = self.peek() { - self.consume(); - let pattern = self.parse_matching_pattern(); - self.assert_and_consume_operator(TokenOp::ASSIGN); - let expr = self.parse_expression(); - IfElseCondition::Guard(pattern, expr) + if let Token(_peeked_let_loc, TokenContent::Keyword(Keyword::LET)) = parser.peek() { + parser.consume(); + let pattern = super::pattern_parser::parse_matching_pattern(parser); + parser.assert_and_consume_operator(TokenOp::ASSIGN); + let expr = parse_expression(parser); + expr::IfElseCondition::Guard(pattern, expr) } else { - IfElseCondition::Expression(self.parse_expression()) + expr::IfElseCondition::Expression(parse_expression(parser)) }; - self.assert_and_consume_keyword(Keyword::THEN); - let e1 = self.parse_expression(); - self.assert_and_consume_keyword(Keyword::ELSE); - let e2 = self.parse_expression(); + parser.assert_and_consume_keyword(Keyword::THEN); + let e1 = parse_expression(parser); + parser.assert_and_consume_keyword(Keyword::ELSE); + let e2 = parse_expression(parser); let loc = peeked_loc.union(&e2.loc()); return expr::E::IfElse(expr::IfElse { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), type_: (), }, condition: Box::new(condition), @@ -752,22 +795,22 @@ impl<'a> SourceParser<'a> { e2: Box::new(e2), }); } - self.parse_disjunction() + parse_disjunction(parser) } - fn parse_disjunction(&mut self) -> expr::E<()> { - let mut e = self.parse_conjunction(); - while let TokenContent::Operator(TokenOp::OR) = self.peek().1 { - let concrete_comments = self.collect_preceding_comments(); + fn parse_disjunction(parser: &mut super::SourceParser) -> expr::E<()> { + let mut e = parse_conjunction(parser); + while let TokenContent::Operator(TokenOp::OR) = parser.peek().1 { + let concrete_comments = parser.collect_preceding_comments(); let operator_preceding_comments = - self.comments_store.create_comment_reference(concrete_comments); - self.consume(); - let e2 = self.parse_conjunction(); + parser.comments_store.create_comment_reference(concrete_comments); + parser.consume(); + let e2 = parse_conjunction(parser); let loc = e.loc().union(&e2.loc()); e = expr::E::Binary(expr::Binary { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), type_: (), }, operator_preceding_comments, @@ -779,19 +822,19 @@ impl<'a> SourceParser<'a> { e } - fn parse_conjunction(&mut self) -> expr::E<()> { - let mut e = self.parse_comparison(); - while let TokenContent::Operator(TokenOp::AND) = self.peek().1 { - let concrete_comments = self.collect_preceding_comments(); + fn parse_conjunction(parser: &mut super::SourceParser) -> expr::E<()> { + let mut e = parse_comparison(parser); + while let TokenContent::Operator(TokenOp::AND) = parser.peek().1 { + let concrete_comments = parser.collect_preceding_comments(); let operator_preceding_comments = - self.comments_store.create_comment_reference(concrete_comments); - self.consume(); - let e2 = self.parse_comparison(); + parser.comments_store.create_comment_reference(concrete_comments); + parser.consume(); + let e2 = parse_comparison(parser); let loc = e.loc().union(&e2.loc()); e = expr::E::Binary(expr::Binary { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), type_: (), }, operator_preceding_comments, @@ -803,13 +846,13 @@ impl<'a> SourceParser<'a> { e } - fn parse_comparison(&mut self) -> expr::E<()> { - let mut e = self.parse_term(); + fn parse_comparison(parser: &mut super::SourceParser) -> expr::E<()> { + let mut e = parse_term(parser); loop { - let concrete_comments = self.collect_preceding_comments(); + let concrete_comments = parser.collect_preceding_comments(); let operator_preceding_comments = - self.comments_store.create_comment_reference(concrete_comments); - let operator = match self.peek().1 { + parser.comments_store.create_comment_reference(concrete_comments); + let operator = match parser.peek().1 { TokenContent::Operator(TokenOp::LT) => expr::BinaryOperator::LT, TokenContent::Operator(TokenOp::LE) => expr::BinaryOperator::LE, TokenContent::Operator(TokenOp::GT) => expr::BinaryOperator::GT, @@ -818,13 +861,13 @@ impl<'a> SourceParser<'a> { TokenContent::Operator(TokenOp::NE) => expr::BinaryOperator::NE, _ => break, }; - self.consume(); - let e2 = self.parse_term(); + parser.consume(); + let e2 = parse_term(parser); let loc = e.loc().union(&e2.loc()); e = expr::E::Binary(expr::Binary { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), type_: (), }, operator_preceding_comments, @@ -836,24 +879,24 @@ impl<'a> SourceParser<'a> { e } - fn parse_term(&mut self) -> expr::E<()> { - let mut e = self.parse_factor(); + fn parse_term(parser: &mut super::SourceParser) -> expr::E<()> { + let mut e = parse_factor(parser); loop { - let concrete_comments = self.collect_preceding_comments(); + let concrete_comments = parser.collect_preceding_comments(); let operator_preceding_comments = - self.comments_store.create_comment_reference(concrete_comments); - let operator = match self.peek().1 { + parser.comments_store.create_comment_reference(concrete_comments); + let operator = match parser.peek().1 { TokenContent::Operator(TokenOp::PLUS) => expr::BinaryOperator::PLUS, TokenContent::Operator(TokenOp::MINUS) => expr::BinaryOperator::MINUS, _ => break, }; - self.consume(); - let e2 = self.parse_factor(); + parser.consume(); + let e2 = parse_factor(parser); let loc = e.loc().union(&e2.loc()); e = expr::E::Binary(expr::Binary { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), type_: (), }, operator_preceding_comments, @@ -865,25 +908,25 @@ impl<'a> SourceParser<'a> { e } - fn parse_factor(&mut self) -> expr::E<()> { - let mut e = self.parse_concat(); + fn parse_factor(parser: &mut super::SourceParser) -> expr::E<()> { + let mut e = parse_concat(parser); loop { - let concrete_comments = self.collect_preceding_comments(); + let concrete_comments = parser.collect_preceding_comments(); let operator_preceding_comments = - self.comments_store.create_comment_reference(concrete_comments); - let operator = match self.peek().1 { + parser.comments_store.create_comment_reference(concrete_comments); + let operator = match parser.peek().1 { TokenContent::Operator(TokenOp::MUL) => expr::BinaryOperator::MUL, TokenContent::Operator(TokenOp::DIV) => expr::BinaryOperator::DIV, TokenContent::Operator(TokenOp::MOD) => expr::BinaryOperator::MOD, _ => break, }; - self.consume(); - let e2 = self.parse_concat(); + parser.consume(); + let e2 = parse_concat(parser); let loc = e.loc().union(&e2.loc()); e = expr::E::Binary(expr::Binary { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), type_: (), }, operator_preceding_comments, @@ -895,19 +938,19 @@ impl<'a> SourceParser<'a> { e } - fn parse_concat(&mut self) -> expr::E<()> { - let mut e = self.parse_unary_expression(); - while let TokenContent::Operator(TokenOp::COLONCOLON) = self.peek().1 { - let concrete_comments = self.collect_preceding_comments(); + fn parse_concat(parser: &mut super::SourceParser) -> expr::E<()> { + let mut e = parse_unary_expression(parser); + while let TokenContent::Operator(TokenOp::COLONCOLON) = parser.peek().1 { + let concrete_comments = parser.collect_preceding_comments(); let operator_preceding_comments = - self.comments_store.create_comment_reference(concrete_comments); - self.consume(); - let e2 = self.parse_unary_expression(); + parser.comments_store.create_comment_reference(concrete_comments); + parser.consume(); + let e2 = parse_unary_expression(parser); let loc = e.loc().union(&e2.loc()); e = expr::E::Binary(expr::Binary { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), type_: (), }, operator_preceding_comments, @@ -919,18 +962,20 @@ impl<'a> SourceParser<'a> { e } - fn parse_unary_expression(&mut self) -> expr::E<()> { - let associated_comments = self.collect_preceding_comments(); - let Token(peeked_loc, content) = self.peek(); + fn parse_unary_expression(parser: &mut super::SourceParser) -> expr::E<()> { + let associated_comments = parser.collect_preceding_comments(); + let Token(peeked_loc, content) = parser.peek(); match content { TokenContent::Operator(TokenOp::NOT) => { - self.consume(); - let argument = self.parse_function_call_or_field_access(); + parser.consume(); + let argument = parse_function_call_or_field_access(parser); let loc = peeked_loc.union(&argument.loc()); expr::E::Unary(expr::Unary { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, operator: expr::UnaryOperator::NOT, @@ -938,53 +983,56 @@ impl<'a> SourceParser<'a> { }) } TokenContent::Operator(TokenOp::MINUS) => { - self.consume(); - let argument = self.parse_function_call_or_field_access(); + parser.consume(); + let argument = parse_function_call_or_field_access(parser); let loc = peeked_loc.union(&argument.loc()); expr::E::Unary(expr::Unary { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, operator: expr::UnaryOperator::NEG, argument: Box::new(argument), }) } - _ => self.parse_function_call_or_field_access(), + _ => parse_function_call_or_field_access(parser), } } - fn parse_function_call_or_field_access(&mut self) -> expr::E<()> { + fn parse_function_call_or_field_access(parser: &mut super::SourceParser) -> expr::E<()> { // Treat function arguments or field name as postfix. // Then use Kleene star trick to parse. - let mut function_expression = self.parse_base_expression(); + let mut function_expression = parse_base_expression(parser); loop { - match self.peek() { + match parser.peek() { Token(dot_loc, TokenContent::Operator(TokenOp::DOT)) => { - let mut field_preceding_comments = self.collect_preceding_comments(); - self.consume(); - field_preceding_comments.append(&mut self.collect_preceding_comments()); - let (field_loc, field_name) = match self.peek() { + let mut field_preceding_comments = parser.collect_preceding_comments(); + parser.consume(); + field_preceding_comments.append(&mut parser.collect_preceding_comments()); + let (field_loc, field_name) = match parser.peek() { Token(l, TokenContent::LowerId(id) | TokenContent::UpperId(id)) => { - self.consume(); + parser.consume(); (l, id) } Token(l, t) => { - self.report(l, format!("Expected identifier, but get {}", t.pretty_print(self.heap))); + parser + .report(l, format!("Expected identifier, but get {}", t.pretty_print(parser.heap))); (Location { end: l.start, ..dot_loc }, PStr::MISSING) } }; let mut loc = function_expression.loc().union(&field_loc); let explicit_type_arguments = - if let Token(_, TokenContent::Operator(TokenOp::LT)) = self.peek() { - field_preceding_comments.append(&mut self.collect_preceding_comments()); - self.assert_and_consume_operator(TokenOp::LT); - let type_args = self.parse_comma_separated_list_with_end_token( + if let Token(_, TokenContent::Operator(TokenOp::LT)) = parser.peek() { + field_preceding_comments.append(&mut parser.collect_preceding_comments()); + parser.assert_and_consume_operator(TokenOp::LT); + let type_args = parser.parse_comma_separated_list_with_end_token( TokenOp::GT, - &mut SourceParser::parse_annotation, + &mut super::type_parser::parse_annotation, ); - loc = loc.union(&self.assert_and_consume_operator(TokenOp::GT)); + loc = loc.union(&parser.assert_and_consume_operator(TokenOp::GT)); type_args } else { vec![] @@ -992,7 +1040,7 @@ impl<'a> SourceParser<'a> { function_expression = expr::E::FieldAccess(expr::FieldAccess { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), type_: (), }, explicit_type_arguments, @@ -1000,7 +1048,7 @@ impl<'a> SourceParser<'a> { object: Box::new(function_expression), field_name: Id { loc: field_loc, - associated_comments: self + associated_comments: parser .comments_store .create_comment_reference(field_preceding_comments), name: field_name, @@ -1009,22 +1057,22 @@ impl<'a> SourceParser<'a> { }); } Token(_, TokenContent::Operator(TokenOp::LPAREN)) => { - self.consume(); + parser.consume(); let function_arguments = - if let Token(_, TokenContent::Operator(TokenOp::RPAREN)) = self.peek() { + if let Token(_, TokenContent::Operator(TokenOp::RPAREN)) = parser.peek() { vec![] } else { - self.parse_comma_separated_list_with_end_token( + parser.parse_comma_separated_list_with_end_token( TokenOp::RPAREN, - &mut SourceParser::parse_expression_with_ending_comments, + &mut parse_expression_with_ending_comments, ) }; let loc = - function_expression.loc().union(&self.assert_and_consume_operator(TokenOp::RPAREN)); + function_expression.loc().union(&parser.assert_and_consume_operator(TokenOp::RPAREN)); function_expression = expr::E::Call(expr::Call { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), type_: (), }, callee: Box::new(function_expression), @@ -1036,99 +1084,114 @@ impl<'a> SourceParser<'a> { } } - fn parse_base_expression(&mut self) -> expr::E<()> { - let associated_comments = self.collect_preceding_comments(); - let peeked = self.peek(); + fn parse_base_expression(parser: &mut super::SourceParser) -> expr::E<()> { + let associated_comments = parser.collect_preceding_comments(); + let peeked = parser.peek(); match peeked { Token(peeked_loc, TokenContent::Keyword(Keyword::TRUE)) => { - self.consume(); + parser.consume(); return expr::E::Literal( expr::ExpressionCommon { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, Literal::Bool(true), ); } Token(peeked_loc, TokenContent::Keyword(Keyword::FALSE)) => { - self.consume(); + parser.consume(); return expr::E::Literal( expr::ExpressionCommon { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, Literal::Bool(false), ); } Token(peeked_loc, TokenContent::IntLiteral(i)) => { - self.consume(); + parser.consume(); return expr::E::Literal( expr::ExpressionCommon { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, - Literal::Int(i.as_str(self.heap).parse::().unwrap_or(0)), + Literal::Int(i.as_str(parser.heap).parse::().unwrap_or(0)), ); } Token(peeked_loc, TokenContent::StringLiteral(s)) => { - self.consume(); - let chars = s.as_str(self.heap).chars().collect_vec(); - let str_lit = unescape_quotes(&chars[1..(chars.len() - 1)].iter().collect::()); + parser.consume(); + let chars = s.as_str(parser.heap).chars().collect_vec(); + let str_lit = + super::utils::unescape_quotes(&chars[1..(chars.len() - 1)].iter().collect::()); return expr::E::Literal( expr::ExpressionCommon { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, - Literal::String(self.heap.alloc_string(str_lit)), + Literal::String(parser.heap.alloc_string(str_lit)), ); } Token(peeked_loc, TokenContent::Keyword(Keyword::THIS)) => { - self.consume(); + parser.consume(); return expr::E::LocalId( expr::ExpressionCommon { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, Id { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), name: PStr::THIS, }, ); } Token(peeked_loc, TokenContent::LowerId(name)) => { - self.consume(); + parser.consume(); return expr::E::LocalId( expr::ExpressionCommon { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, Id { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), name, }, ); } Token(peeked_loc, TokenContent::UpperId(name)) => { - self.consume(); + parser.consume(); return expr::E::ClassId( expr::ExpressionCommon { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, - self.resolve_class(name), + super::utils::resolve_class(parser, name), Id { loc: peeked_loc, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), name, }, ); @@ -1138,19 +1201,19 @@ impl<'a> SourceParser<'a> { // Lambda or tuple or nested expression if let Token(peeked_loc, TokenContent::Operator(TokenOp::LPAREN)) = peeked { - self.consume(); + parser.consume(); // () -> ... - if let Token(_, TokenContent::Operator(TokenOp::RPAREN)) = self.peek() { + if let Token(_, TokenContent::Operator(TokenOp::RPAREN)) = parser.peek() { let mut comments = associated_comments; - self.consume(); - comments.append(&mut self.collect_preceding_comments()); - self.assert_and_consume_operator(TokenOp::ARROW); - let body = self.parse_expression(); + parser.consume(); + comments.append(&mut parser.collect_preceding_comments()); + parser.assert_and_consume_operator(TokenOp::ARROW); + let body = parse_expression(parser); let loc = peeked_loc.union(&body.loc()); return expr::E::Lambda(expr::Lambda { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(comments), + associated_comments: parser.comments_store.create_comment_reference(comments), type_: (), }, parameters: vec![], @@ -1160,25 +1223,25 @@ impl<'a> SourceParser<'a> { } // (id ... - if let Token(loc_id_for_lambda, TokenContent::LowerId(id_for_lambda)) = self.peek() { - self.consume(); - let next = self.peek(); + if let Token(loc_id_for_lambda, TokenContent::LowerId(id_for_lambda)) = parser.peek() { + parser.consume(); + let next = parser.peek(); match next.1 { // (id: ... definitely a lambda TokenContent::Operator(TokenOp::COLON) => { - self.unconsume(); - let parameters = self.parse_comma_separated_list_with_end_token( + parser.unconsume(); + let parameters = parser.parse_comma_separated_list_with_end_token( TokenOp::RPAREN, - &mut Self::parse_optionally_annotated_id, + &mut super::type_parser::parse_optionally_annotated_id, ); - self.assert_and_consume_operator(TokenOp::RPAREN); - self.assert_and_consume_operator(TokenOp::ARROW); - let body = self.parse_expression(); + parser.assert_and_consume_operator(TokenOp::RPAREN); + parser.assert_and_consume_operator(TokenOp::ARROW); + let body = parse_expression(parser); let loc = peeked_loc.union(&body.loc()); return expr::E::Lambda(expr::Lambda { common: expr::ExpressionCommon { loc, - associated_comments: self + associated_comments: parser .comments_store .create_comment_reference(associated_comments), type_: (), @@ -1190,22 +1253,22 @@ impl<'a> SourceParser<'a> { } // (id, ..., might be lambda, might be tuple TokenContent::Operator(TokenOp::COMMA) => { - self.unconsume(); + parser.unconsume(); // Advance as far as possible for a comma separated lower id. // This is common for both arrow function and tuple. - let mut parameters_or_tuple_elements_cover = vec![self.parse_lower_id()]; - while let Token(_, TokenContent::Operator(TokenOp::COMMA)) = self.peek() { - self.consume(); - if let Token(_, TokenContent::LowerId(_)) = self.peek() { - self.consume(); - match self.peek() { + let mut parameters_or_tuple_elements_cover = vec![parser.parse_lower_id()]; + while let Token(_, TokenContent::Operator(TokenOp::COMMA)) = parser.peek() { + parser.consume(); + if let Token(_, TokenContent::LowerId(_)) = parser.peek() { + parser.consume(); + match parser.peek() { Token(_, TokenContent::Operator(TokenOp::COMMA)) | Token(_, TokenContent::Operator(TokenOp::RPAREN)) => { - self.unconsume(); // unconsume lower id - parameters_or_tuple_elements_cover.push(self.parse_lower_id()); + parser.unconsume(); // unconsume lower id + parameters_or_tuple_elements_cover.push(parser.parse_lower_id()); } _ => { - self.unconsume(); // unconsume lower id + parser.unconsume(); // unconsume lower id break; } } @@ -1215,17 +1278,17 @@ impl<'a> SourceParser<'a> { } // If we see ), it means that the cover is complete and still ambiguous. if let Token(right_parenthesis_loc, TokenContent::Operator(TokenOp::RPAREN)) = - self.peek() + parser.peek() { - self.consume(); - if let Token(_, TokenContent::Operator(TokenOp::ARROW)) = self.peek() { - self.consume(); - let body = self.parse_expression(); + parser.consume(); + if let Token(_, TokenContent::Operator(TokenOp::ARROW)) = parser.peek() { + parser.consume(); + let body = parse_expression(parser); let loc = peeked_loc.union(&body.loc()); return expr::E::Lambda(expr::Lambda { common: expr::ExpressionCommon { loc, - associated_comments: self + associated_comments: parser .comments_store .create_comment_reference(associated_comments), type_: (), @@ -1255,7 +1318,7 @@ impl<'a> SourceParser<'a> { return expr::E::Tuple( expr::ExpressionCommon { loc, - associated_comments: self + associated_comments: parser .comments_store .create_comment_reference(associated_comments), type_: (), @@ -1264,27 +1327,27 @@ impl<'a> SourceParser<'a> { ); } } - if let Token(_, TokenContent::LowerId(_)) = self.peek() { - self.consume(); - if let Token(_, TokenContent::Operator(TokenOp::COLON)) = self.peek() { - self.unconsume(); - let rest_parameters = self.parse_comma_separated_list_with_end_token( + if let Token(_, TokenContent::LowerId(_)) = parser.peek() { + parser.consume(); + if let Token(_, TokenContent::Operator(TokenOp::COLON)) = parser.peek() { + parser.unconsume(); + let rest_parameters = parser.parse_comma_separated_list_with_end_token( TokenOp::RPAREN, - &mut Self::parse_optionally_annotated_id, + &mut super::type_parser::parse_optionally_annotated_id, ); let parameters = parameters_or_tuple_elements_cover .into_iter() .map(|name| OptionallyAnnotatedId { name, type_: (), annotation: None }) .chain(rest_parameters) .collect_vec(); - self.assert_and_consume_operator(TokenOp::RPAREN); - self.assert_and_consume_operator(TokenOp::ARROW); - let body = self.parse_expression(); + parser.assert_and_consume_operator(TokenOp::RPAREN); + parser.assert_and_consume_operator(TokenOp::ARROW); + let body = parse_expression(parser); let loc = peeked_loc.union(&body.loc()); return expr::E::Lambda(expr::Lambda { common: expr::ExpressionCommon { loc, - associated_comments: self + associated_comments: parser .comments_store .create_comment_reference(associated_comments), type_: (), @@ -1294,12 +1357,10 @@ impl<'a> SourceParser<'a> { body: Box::new(body), }); } - self.unconsume(); + parser.unconsume(); } - let rest_tuple_elements = self.parse_comma_separated_list_with_end_token( - TokenOp::RPAREN, - &mut SourceParser::parse_expression, - ); + let rest_tuple_elements = parser + .parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut parse_expression); let mut tuple_elements = parameters_or_tuple_elements_cover .into_iter() .map(|name| { @@ -1315,17 +1376,17 @@ impl<'a> SourceParser<'a> { .chain(rest_tuple_elements) .collect_vec(); if let Some(node) = tuple_elements.get(MAX_STRUCT_SIZE) { - self.error_set.report_invalid_syntax_error( + parser.error_set.report_invalid_syntax_error( node.loc(), format!("Maximum allowed tuple size is {MAX_STRUCT_SIZE}"), ); } tuple_elements.truncate(MAX_STRUCT_SIZE); - let end_loc = self.assert_and_consume_operator(TokenOp::RPAREN); + let end_loc = parser.assert_and_consume_operator(TokenOp::RPAREN); let loc = peeked_loc.union(&end_loc); let common = expr::ExpressionCommon { loc, - associated_comments: self + associated_comments: parser .comments_store .create_comment_reference(associated_comments), type_: (), @@ -1335,23 +1396,23 @@ impl<'a> SourceParser<'a> { } // (id) -> ... OR (id) TokenContent::Operator(TokenOp::RPAREN) => { - self.consume(); - if let Token(_, TokenContent::Operator(TokenOp::ARROW)) = self.peek() { + parser.consume(); + if let Token(_, TokenContent::Operator(TokenOp::ARROW)) = parser.peek() { let mut comments = associated_comments; - comments.append(&mut self.collect_preceding_comments()); - self.consume(); - let body = self.parse_expression(); + comments.append(&mut parser.collect_preceding_comments()); + parser.consume(); + let body = parse_expression(parser); let loc = peeked_loc.union(&body.loc()); return expr::E::Lambda(expr::Lambda { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(comments), + associated_comments: parser.comments_store.create_comment_reference(comments), type_: (), }, parameters: vec![OptionallyAnnotatedId { name: Id { loc: loc_id_for_lambda, - associated_comments: self.comments_store.create_comment_reference(vec![]), + associated_comments: parser.comments_store.create_comment_reference(vec![]), name: id_for_lambda, }, type_: (), @@ -1362,29 +1423,27 @@ impl<'a> SourceParser<'a> { }); } else { // (id) - self.unconsume(); + parser.unconsume(); } } _ => {} } - self.unconsume(); + parser.unconsume(); } - let mut expressions = self.parse_comma_separated_list_with_end_token( - TokenOp::RPAREN, - &mut SourceParser::parse_expression, - ); + let mut expressions = + parser.parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut parse_expression); if let Some(node) = expressions.get(MAX_STRUCT_SIZE) { - self.error_set.report_invalid_syntax_error( + parser.error_set.report_invalid_syntax_error( node.loc(), format!("Maximum allowed tuple size is {MAX_STRUCT_SIZE}"), ); } expressions.truncate(MAX_STRUCT_SIZE); - let end_loc = self.assert_and_consume_operator(TokenOp::RPAREN); + let end_loc = parser.assert_and_consume_operator(TokenOp::RPAREN); let loc = peeked_loc.union(&end_loc); let common = expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), type_: (), }; debug_assert!(!expressions.is_empty()); @@ -1396,21 +1455,23 @@ impl<'a> SourceParser<'a> { // Statement Block: { ... } if let Token(peeked_loc, TokenContent::Operator(TokenOp::LBRACE)) = peeked { - self.consume(); + parser.consume(); let mut statements = vec![]; - while let Token(_, TokenContent::Keyword(Keyword::LET)) = self.peek() { - statements.push(self.parse_statement()); + while let Token(_, TokenContent::Keyword(Keyword::LET)) = parser.peek() { + statements.push(parse_statement(parser)); } // No final expression - if let Token(end_loc, TokenContent::Operator(TokenOp::RBRACE)) = self.peek() { - self.consume(); + if let Token(end_loc, TokenContent::Operator(TokenOp::RBRACE)) = parser.peek() { + parser.consume(); let loc = peeked_loc.union(&end_loc); return expr::E::Block(expr::Block { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser + .comments_store + .create_comment_reference(associated_comments), type_: (), }, statements, @@ -1419,12 +1480,12 @@ impl<'a> SourceParser<'a> { } // Has final expression - let expression = self.parse_expression_with_ending_comments(); - let loc = peeked_loc.union(&self.assert_and_consume_operator(TokenOp::RBRACE)); + let expression = parse_expression_with_ending_comments(parser); + let loc = peeked_loc.union(&parser.assert_and_consume_operator(TokenOp::RBRACE)); return expr::E::Block(expr::Block { common: expr::ExpressionCommon { loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), type_: (), }, statements, @@ -1433,34 +1494,36 @@ impl<'a> SourceParser<'a> { } // Error case - self.report( + parser.report( peeked.0, - format!("Expected: expression, actual: {}", peeked.1.pretty_print(self.heap)), + format!("Expected: expression, actual: {}", peeked.1.pretty_print(parser.heap)), ); expr::E::Literal( expr::ExpressionCommon { loc: peeked.0, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), type_: (), }, Literal::Int(0), ) } - pub(super) fn parse_statement(&mut self) -> expr::DeclarationStatement<()> { - let concrete_comments = self.collect_preceding_comments(); - let associated_comments = self.comments_store.create_comment_reference(concrete_comments); - let start_loc = self.assert_and_consume_keyword(Keyword::LET); - let pattern = self.parse_matching_pattern(); - let annotation = if let Token(_, TokenContent::Operator(TokenOp::COLON)) = self.peek() { - self.consume(); - Some(self.parse_annotation()) + pub(super) fn parse_statement( + parser: &mut super::SourceParser, + ) -> expr::DeclarationStatement<()> { + let concrete_comments = parser.collect_preceding_comments(); + let associated_comments = parser.comments_store.create_comment_reference(concrete_comments); + let start_loc = parser.assert_and_consume_keyword(Keyword::LET); + let pattern = super::pattern_parser::parse_matching_pattern(parser); + let annotation = if let Token(_, TokenContent::Operator(TokenOp::COLON)) = parser.peek() { + parser.consume(); + Some(super::type_parser::parse_annotation(parser)) } else { None }; - self.assert_and_consume_operator(TokenOp::ASSIGN); - let assigned_expression = Box::new(self.parse_expression()); - let loc = start_loc.union(&self.assert_and_consume_operator(TokenOp::SEMICOLON)); + parser.assert_and_consume_operator(TokenOp::ASSIGN); + let assigned_expression = Box::new(parse_expression(parser)); + let loc = start_loc.union(&parser.assert_and_consume_operator(TokenOp::SEMICOLON)); expr::DeclarationStatement { loc, associated_comments, @@ -1469,31 +1532,44 @@ impl<'a> SourceParser<'a> { assigned_expression, } } +} + +mod pattern_parser { + use super::super::lexer::{Keyword, Token, TokenContent, TokenOp}; + use samlang_ast::source::*; - fn parse_matching_pattern_with_unit(&mut self) -> (pattern::MatchingPattern<()>, ()) { - (self.parse_matching_pattern(), ()) + pub(super) fn parse_matching_pattern_with_unit( + parser: &mut super::SourceParser, + ) -> (pattern::MatchingPattern<()>, ()) { + (parse_matching_pattern(parser), ()) } - pub(super) fn parse_matching_pattern(&mut self) -> pattern::MatchingPattern<()> { - let peeked = self.peek(); + pub(super) fn parse_matching_pattern( + parser: &mut super::SourceParser, + ) -> pattern::MatchingPattern<()> { + let peeked = parser.peek(); if let Token(peeked_loc, TokenContent::Operator(TokenOp::LPAREN)) = peeked { - self.consume(); - let destructured_names = - self.parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut |s: &mut Self| { - pattern::TuplePatternElement { pattern: Box::new(s.parse_matching_pattern()), type_: () } - }); - let end_location = self.assert_and_consume_operator(TokenOp::RPAREN); + parser.consume(); + let destructured_names = parser.parse_comma_separated_list_with_end_token( + TokenOp::RPAREN, + &mut |s: &mut super::SourceParser| pattern::TuplePatternElement { + pattern: Box::new(parse_matching_pattern(s)), + type_: (), + }, + ); + let end_location = parser.assert_and_consume_operator(TokenOp::RPAREN); return pattern::MatchingPattern::Tuple(peeked_loc.union(&end_location), destructured_names); } if let Token(peeked_loc, TokenContent::Operator(TokenOp::LBRACE)) = peeked { - self.consume(); - let destructured_names = - self.parse_comma_separated_list_with_end_token(TokenOp::RBRACE, &mut |s: &mut Self| { + parser.consume(); + let destructured_names = parser.parse_comma_separated_list_with_end_token( + TokenOp::RBRACE, + &mut |s: &mut super::SourceParser| { let field_name = s.parse_lower_id(); let (pattern, loc, shorthand) = if let Token(_, TokenContent::Keyword(Keyword::AS)) = s.peek() { s.consume(); - let nested = Box::new(s.parse_matching_pattern()); + let nested = Box::new(parse_matching_pattern(s)); let loc = field_name.loc.union(nested.loc()); (nested, loc, false) } else { @@ -1507,21 +1583,22 @@ impl<'a> SourceParser<'a> { shorthand, type_: (), } - }); - let end_location = self.assert_and_consume_operator(TokenOp::RBRACE); + }, + ); + let end_location = parser.assert_and_consume_operator(TokenOp::RBRACE); return pattern::MatchingPattern::Object(peeked_loc.union(&end_location), destructured_names); } if let Token(peeked_loc, TokenContent::UpperId(id)) = peeked { - self.consume(); + parser.consume(); let tag = Id { loc: peeked_loc, associated_comments: NO_COMMENT_REFERENCE, name: id }; let (data_variables, loc) = - if let Token(_, TokenContent::Operator(TokenOp::LPAREN)) = self.peek() { - self.consume(); - let data_variables = self.parse_comma_separated_list_with_end_token( + if let Token(_, TokenContent::Operator(TokenOp::LPAREN)) = parser.peek() { + parser.consume(); + let data_variables = parser.parse_comma_separated_list_with_end_token( TokenOp::RPAREN, - &mut Self::parse_matching_pattern_with_unit, + &mut parse_matching_pattern_with_unit, ); - let end_loc = self.assert_and_consume_operator(TokenOp::RPAREN); + let end_loc = parser.assert_and_consume_operator(TokenOp::RPAREN); (data_variables, peeked_loc.union(&end_loc)) } else { (Vec::with_capacity(0), peeked_loc) @@ -1535,117 +1612,97 @@ impl<'a> SourceParser<'a> { }); } if let Token(peeked_loc, TokenContent::Operator(TokenOp::UNDERSCORE)) = peeked { - self.consume(); + parser.consume(); return pattern::MatchingPattern::Wildcard(peeked_loc); } pattern::MatchingPattern::Id( Id { loc: peeked.0, associated_comments: NO_COMMENT_REFERENCE, - name: self.assert_and_peek_lower_id().1, + name: parser.assert_and_peek_lower_id().1, }, (), ) } +} - fn parse_upper_id(&mut self) -> Id { - let associated_comments = self.collect_preceding_comments(); - let (loc, name) = self.assert_and_peek_upper_id(); - Id { - loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), - name, - } - } - - fn parse_lower_id(&mut self) -> Id { - let associated_comments = self.collect_preceding_comments(); - let (loc, name) = self.assert_and_peek_lower_id(); - Id { - loc, - associated_comments: self.comments_store.create_comment_reference(associated_comments), - name, - } - } +mod type_parser { + use super::super::lexer::{Keyword, Token, TokenContent, TokenOp}; + use samlang_ast::source::*; - fn collect_preceding_comments(&mut self) -> Vec { - self.unconsume_comments(); - let mut comments = vec![]; - loop { - match self.simple_peek() { - Token(location, TokenContent::LineComment(text)) => { - self.consume(); - comments.push(Comment { location, kind: CommentKind::LINE, text }); - } - Token(location, TokenContent::BlockComment(text)) => { - self.consume(); - comments.push(Comment { location, kind: CommentKind::BLOCK, text }) - } - Token(location, TokenContent::DocComment(text)) => { - self.consume(); - comments.push(Comment { location, kind: CommentKind::DOC, text }) - } - _ => break, - } - } - comments + pub(super) fn parse_type_parameter(parser: &mut super::SourceParser) -> TypeParameter { + let name = &parser.parse_upper_id(); + let (bound, loc) = if let Token(_, TokenContent::Operator(TokenOp::COLON)) = parser.peek() { + parser.consume(); + let id = parser.parse_upper_id(); + let bound = super::type_parser::parse_identifier_annot(parser, id); + let loc = name.loc.union(&bound.location); + (Some(bound), loc) + } else { + (None, name.loc) + }; + TypeParameter { loc, name: *name, bound } } - fn parse_annotated_id(&mut self) -> AnnotatedId<()> { - let name = self.parse_lower_id(); - self.assert_and_consume_operator(TokenOp::COLON); - let annotation = self.parse_annotation(); + pub(super) fn parse_annotated_id(parser: &mut super::SourceParser) -> AnnotatedId<()> { + let name = parser.parse_lower_id(); + parser.assert_and_consume_operator(TokenOp::COLON); + let annotation = parse_annotation(parser); AnnotatedId { name, type_: (), annotation } } - fn parse_optionally_annotated_id(&mut self) -> OptionallyAnnotatedId<()> { - let name = self.parse_lower_id(); - let annotation = self.parse_optional_annotation(); + pub(super) fn parse_optionally_annotated_id( + parser: &mut super::SourceParser, + ) -> OptionallyAnnotatedId<()> { + let name = parser.parse_lower_id(); + let annotation = parse_optional_annotation(parser); OptionallyAnnotatedId { name, type_: (), annotation } } - fn parse_optional_annotation(&mut self) -> Option { - if let Token(_, TokenContent::Operator(TokenOp::COLON)) = self.peek() { - self.consume(); - Some(self.parse_annotation()) + fn parse_optional_annotation(parser: &mut super::SourceParser) -> Option { + if let Token(_, TokenContent::Operator(TokenOp::COLON)) = parser.peek() { + parser.consume(); + Some(parse_annotation(parser)) } else { None } } - pub(super) fn parse_annotation(&mut self) -> annotation::T { - let associated_comments = self.collect_preceding_comments(); - let peeked = self.peek(); + pub(super) fn parse_annotation(parser: &mut super::SourceParser) -> annotation::T { + let associated_comments = parser.collect_preceding_comments(); + let peeked = parser.peek(); match peeked.1 { TokenContent::Keyword(Keyword::UNIT) => { - self.consume(); + parser.consume(); annotation::T::Primitive( peeked.0, - self.comments_store.create_comment_reference(associated_comments), + parser.comments_store.create_comment_reference(associated_comments), annotation::PrimitiveTypeKind::Unit, ) } TokenContent::Keyword(Keyword::BOOL) => { - self.consume(); + parser.consume(); annotation::T::Primitive( peeked.0, - self.comments_store.create_comment_reference(associated_comments), + parser.comments_store.create_comment_reference(associated_comments), annotation::PrimitiveTypeKind::Bool, ) } TokenContent::Keyword(Keyword::INT) => { - self.consume(); + parser.consume(); annotation::T::Primitive( peeked.0, - self.comments_store.create_comment_reference(associated_comments), + parser.comments_store.create_comment_reference(associated_comments), annotation::PrimitiveTypeKind::Int, ) } TokenContent::UpperId(name) => { - self.consume(); - let associated_comments = self.comments_store.create_comment_reference(vec![]); - let id_annot = self.parse_identifier_annot(Id { loc: peeked.0, associated_comments, name }); - if id_annot.type_arguments.is_empty() && self.available_tparams.contains(&id_annot.id.name) + parser.consume(); + let associated_comments = parser.comments_store.create_comment_reference(vec![]); + let id_annot = + parse_identifier_annot(parser, Id { loc: peeked.0, associated_comments, name }); + if id_annot.type_arguments.is_empty() + && parser.available_tparams.contains(&id_annot.id.name) { annotation::T::Generic(id_annot.location, id_annot.id) } else { @@ -1653,99 +1710,114 @@ impl<'a> SourceParser<'a> { } } TokenContent::Operator(TokenOp::LPAREN) => { - self.consume(); - let argument_types = if let Token(_, TokenContent::Operator(TokenOp::RPAREN)) = self.peek() - { - self.consume(); - vec![] - } else { - let types = self.parse_comma_separated_list_with_end_token( - TokenOp::RPAREN, - &mut SourceParser::parse_annotation, - ); - self.assert_and_consume_operator(TokenOp::RPAREN); - types - }; - self.assert_and_consume_operator(TokenOp::ARROW); - let return_type = self.parse_annotation(); + parser.consume(); + let argument_types = + if let Token(_, TokenContent::Operator(TokenOp::RPAREN)) = parser.peek() { + parser.consume(); + vec![] + } else { + let types = parser + .parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut parse_annotation); + parser.assert_and_consume_operator(TokenOp::RPAREN); + types + }; + parser.assert_and_consume_operator(TokenOp::ARROW); + let return_type = parse_annotation(parser); let location = peeked.0.union(&return_type.location()); annotation::T::Fn(annotation::Function { location, - associated_comments: self.comments_store.create_comment_reference(associated_comments), + associated_comments: parser.comments_store.create_comment_reference(associated_comments), argument_types, return_type: Box::new(return_type), }) } content => { - self.report( + parser.report( peeked.0, - format!("Expecting: type, actual: {}", content.pretty_print(self.heap)), + format!("Expecting: type, actual: {}", content.pretty_print(parser.heap)), ); annotation::T::Primitive( peeked.0, - self.comments_store.create_comment_reference(associated_comments), + parser.comments_store.create_comment_reference(associated_comments), annotation::PrimitiveTypeKind::Any, ) } } } - fn fix_tparams_with_generic_annot(&self, tparams: &mut [TypeParameter]) { + pub(super) fn fix_tparams_with_generic_annot( + parser: &mut super::SourceParser, + tparams: &mut [TypeParameter], + ) { for tparam in tparams { if let Some(bound) = &mut tparam.bound { for annot in &mut bound.type_arguments { - self.fix_annot_with_generic_annot(annot); + fix_annot_with_generic_annot(parser, annot); } } } } - fn fix_annot_with_generic_annot(&self, annot: &mut annotation::T) { + pub(super) fn fix_annot_with_generic_annot( + parser: &mut super::SourceParser, + annot: &mut annotation::T, + ) { match annot { annotation::T::Primitive(_, _, _) | annotation::T::Generic(_, _) => {} annotation::T::Id(id_annot) => { - if id_annot.type_arguments.is_empty() && self.available_tparams.contains(&id_annot.id.name) + if id_annot.type_arguments.is_empty() + && parser.available_tparams.contains(&id_annot.id.name) { *annot = annotation::T::Generic(id_annot.location, id_annot.id) } } annotation::T::Fn(t) => { for annot in &mut t.argument_types { - self.fix_annot_with_generic_annot(annot); + fix_annot_with_generic_annot(parser, annot); } - self.fix_annot_with_generic_annot(&mut t.return_type); + fix_annot_with_generic_annot(parser, &mut t.return_type); } } } - fn parse_identifier_annot(&mut self, identifier: Id) -> annotation::Id { + pub(super) fn parse_identifier_annot( + parser: &mut super::SourceParser, + identifier: Id, + ) -> annotation::Id { let (type_arguments, location) = - if let Token(_, TokenContent::Operator(TokenOp::LT)) = self.peek() { - self.consume(); - let types = self.parse_comma_separated_list_with_end_token( - TokenOp::GT, - &mut SourceParser::parse_annotation, - ); - let location = identifier.loc.union(&self.assert_and_consume_operator(TokenOp::GT)); + if let Token(_, TokenContent::Operator(TokenOp::LT)) = parser.peek() { + parser.consume(); + let types = + parser.parse_comma_separated_list_with_end_token(TokenOp::GT, &mut parse_annotation); + let location = identifier.loc.union(&parser.assert_and_consume_operator(TokenOp::GT)); (types, location) } else { (vec![], identifier.loc) }; annotation::Id { location, - module_reference: self.resolve_class(identifier.name), + module_reference: super::utils::resolve_class(parser, identifier.name), id: identifier, type_arguments, } } +} - fn resolve_class(&mut self, class_name: PStr) -> ModuleReference { - if self.builtin_classes.contains(&class_name) { - ModuleReference::ROOT +mod utils { + pub(super) fn resolve_class( + parser: &super::SourceParser, + class_name: samlang_heap::PStr, + ) -> samlang_heap::ModuleReference { + if parser.builtin_classes.contains(&class_name) { + samlang_heap::ModuleReference::ROOT } else { - *self.class_source_map.get(&class_name).unwrap_or(&self.module_reference) + *parser.class_source_map.get(&class_name).unwrap_or(&parser.module_reference) } } + + pub(super) fn unescape_quotes(source: &str) -> String { + source.replace("\\\"", "\"") + } } #[cfg(test)] @@ -1801,15 +1873,15 @@ mod tests { let mut parser = SourceParser::new(tokens, heap, &mut error_set, ModuleReference::DUMMY, HashSet::new()); - parser.parse_interface(); - parser.parse_class(); - parser.parse_class_member_definition(); - parser.parse_class_member_declaration(); - parser.parse_expression(); - parser.parse_matching_pattern(); - parser.parse_statement(); - parser.parse_annotation(); - parser.parse_module(); + super::pattern_parser::parse_matching_pattern(&mut parser); + super::expression_parser::parse_expression(&mut parser); + super::expression_parser::parse_statement(&mut parser); + super::type_parser::parse_annotation(&mut parser); + super::toplevel_parser::parse_interface(&mut parser); + super::toplevel_parser::parse_class(&mut parser); + super::toplevel_parser::parse_class_member_definition(&mut parser); + super::toplevel_parser::parse_class_member_declaration(&mut parser); + super::parse_module(parser); } #[test]