From 7401865944ace47da4846c471ebe56421ef30def Mon Sep 17 00:00:00 2001 From: Sam Zhou Date: Sat, 30 Dec 2023 22:47:59 -0800 Subject: [PATCH] [parser][printer] Consistent comment attachment and printing for toplevels --- crates/samlang-ast/src/source.rs | 1 + crates/samlang-ast/src/source_tests.rs | 2 + crates/samlang-parser/src/source_parser.rs | 86 ++++++++++----- crates/samlang-printer/src/lib.rs | 1 + crates/samlang-printer/src/source_printer.rs | 105 ++++++++++++------- crates/samlang-services/src/ast_differ.rs | 1 + crates/samlang-services/src/lib.rs | 1 + 7 files changed, 134 insertions(+), 63 deletions(-) diff --git a/crates/samlang-ast/src/source.rs b/crates/samlang-ast/src/source.rs index 2584f63c..30d48f34 100644 --- a/crates/samlang-ast/src/source.rs +++ b/crates/samlang-ast/src/source.rs @@ -873,6 +873,7 @@ impl Toplevel { #[derive(Clone, PartialEq, Eq)] pub struct ModuleMembersImport { pub loc: Location, + pub associated_comments: CommentReference, pub imported_members: Vec, pub imported_module: ModuleReference, pub imported_module_loc: Location, diff --git a/crates/samlang-ast/src/source_tests.rs b/crates/samlang-ast/src/source_tests.rs index c173a099..e8ef8fbb 100644 --- a/crates/samlang-ast/src/source_tests.rs +++ b/crates/samlang-ast/src/source_tests.rs @@ -608,6 +608,7 @@ mod tests { .is_none()); assert!(ModuleMembersImport { loc: Location::dummy(), + associated_comments: NO_COMMENT_REFERENCE, imported_members: vec![], imported_module: ModuleReference::DUMMY, imported_module_loc: Location::dummy(), @@ -772,6 +773,7 @@ mod tests { let one_import = ModuleMembersImport { loc: Location::dummy(), + associated_comments: NO_COMMENT_REFERENCE, imported_members: vec![], imported_module: ModuleReference::DUMMY, imported_module_loc: Location::dummy(), diff --git a/crates/samlang-parser/src/source_parser.rs b/crates/samlang-parser/src/source_parser.rs index f4e55f4b..9c3524e9 100644 --- a/crates/samlang-parser/src/source_parser.rs +++ b/crates/samlang-parser/src/source_parser.rs @@ -213,21 +213,22 @@ impl<'a> SourceParser<'a> { self.error_set.report_invalid_syntax_error(loc, reason) } - fn parse_comma_separated_list_with_end_token T>( + fn parse_comma_separated_list_with_end_token) -> T>( &mut self, end_token: TokenOp, parser: &mut F, ) -> Vec { - let mut collector = vec![parser(self)]; + let mut collector = vec![parser(self, vec![])]; while let Token(_, TokenContent::Operator(op)) = self.peek() { if op != TokenOp::COMMA { break; } + let additional_comments = self.collect_preceding_comments(); self.consume(); if self.peek().1 == TokenContent::Operator(end_token) { return collector; } - collector.push(parser(self)); + collector.push(parser(self, additional_comments)); } collector } @@ -287,19 +288,26 @@ impl<'a> SourceParser<'a> { pub fn parse_module(mut parser: SourceParser) -> Module<()> { let mut imports = vec![]; while let Token(import_start, TokenContent::Keyword(Keyword::IMPORT)) = parser.peek() { + let mut associated_comments = parser.collect_preceding_comments(); parser.consume(); + associated_comments.append(&mut parser.collect_preceding_comments()); parser.assert_and_consume_operator(TokenOp::LBRACE); let imported_members = parser.parse_comma_separated_list_with_end_token( TokenOp::RBRACE, - &mut SourceParser::parse_upper_id, + &mut SourceParser::parse_upper_id_with_comments, ); + associated_comments.append(&mut parser.collect_preceding_comments()); parser.assert_and_consume_operator(TokenOp::RBRACE); + associated_comments.append(&mut parser.collect_preceding_comments()); parser.assert_and_consume_keyword(Keyword::FROM); let import_loc_start = parser.peek().0; let imported_module_parts = { + associated_comments.append(&mut parser.collect_preceding_comments()); let mut collector = vec![parser.assert_and_consume_identifier().1]; while let Token(_, TokenContent::Operator(TokenOp::DOT)) = parser.peek() { + associated_comments.append(&mut parser.collect_preceding_comments()); parser.consume(); + associated_comments.append(&mut parser.collect_preceding_comments()); collector.push(parser.assert_and_consume_identifier().1); } collector @@ -311,6 +319,7 @@ pub fn parse_module(mut parser: SourceParser) -> Module<()> { } let loc = if let Token(semicolon_loc, TokenContent::Operator(TokenOp::SEMICOLON)) = parser.peek() { + associated_comments.append(&mut parser.collect_preceding_comments()); parser.consume(); import_start.union(&semicolon_loc) } else { @@ -318,6 +327,7 @@ pub fn parse_module(mut parser: SourceParser) -> Module<()> { }; imports.push(ModuleMembersImport { loc, + associated_comments: parser.comments_store.create_comment_reference(associated_comments), imported_members, imported_module, imported_module_loc, @@ -349,8 +359,10 @@ pub fn parse_module(mut parser: SourceParser) -> Module<()> { } toplevels.push(toplevel_parser::parse_toplevel(&mut parser)); } - let comments = parser.collect_preceding_comments(); - let trailing_comments = parser.comments_store.create_comment_reference(comments); + let trailing_comments = { + let comments = parser.collect_preceding_comments(); + parser.comments_store.create_comment_reference(comments) + }; Module { comment_store: parser.comments_store, imports, toplevels, trailing_comments } } @@ -574,9 +586,11 @@ mod toplevel_parser { } } - fn parse_field_definition(parser: &mut super::SourceParser) -> FieldDefinition { + fn parse_field_definition( + parser: &mut super::SourceParser, + mut comments: Vec, + ) -> FieldDefinition { let mut is_public = true; - let mut comments = vec![]; if let TokenContent::Keyword(Keyword::PRIVATE) = parser.peek().1 { is_public = false; comments.append(&mut parser.collect_preceding_comments()); @@ -589,14 +603,17 @@ mod toplevel_parser { FieldDefinition { name, annotation, is_public } } - fn parse_variant_definition(parser: &mut super::SourceParser) -> VariantDefinition { - let name = parser.parse_upper_id(); + fn parse_variant_definition( + parser: &mut super::SourceParser, + addtional_preceding_comments: Vec, + ) -> VariantDefinition { + let name = parser.parse_upper_id_with_comments(addtional_preceding_comments); if let Token(left_paren_loc, TokenContent::Operator(TokenOp::LPAREN)) = parser.peek() { let start_comments = parser.collect_preceding_comments(); parser.consume(); let annotations = parser.parse_comma_separated_list_with_end_token( TokenOp::RPAREN, - &mut super::type_parser::parse_annotation, + &mut super::type_parser::parse_annotation_with_additional_comments, ); if let Some(node) = annotations.get(MAX_VARIANT_SIZE) { @@ -1311,8 +1328,10 @@ mod expression_parser { } parser.unconsume(); } - let rest_tuple_elements = parser - .parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut parse_expression); + let rest_tuple_elements = parser.parse_comma_separated_list_with_end_token( + TokenOp::RPAREN, + &mut parse_expression_with_additional_preceding_comments, + ); let mut tuple_elements = parameters_or_tuple_elements_cover .into_iter() .map(|name| { @@ -1553,8 +1572,10 @@ mod expression_parser { { vec![] } else { - let mut expressions = - parser.parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut parse_expression); + let mut expressions = parser.parse_comma_separated_list_with_end_token( + TokenOp::RPAREN, + &mut parse_expression_with_additional_preceding_comments, + ); if let Some(node) = expressions.get(max_size) { parser.error_set.report_invalid_syntax_error( node.loc(), @@ -1668,8 +1689,8 @@ mod pattern_parser { parser.consume(); let destructured_names = parser.parse_comma_separated_list_with_end_token( TokenOp::RBRACE, - &mut |s: &mut super::SourceParser| { - let field_name = s.parse_lower_id(); + &mut |s: &mut super::SourceParser, id_comments| { + let field_name = s.parse_lower_id_with_comments(id_comments); let (pattern, loc, shorthand) = if let Token(_, TokenContent::Keyword(Keyword::AS)) = s.peek() { let comments_before_as = s.collect_preceding_comments(); @@ -1741,8 +1762,8 @@ mod pattern_parser { let start_loc = parser.assert_and_consume_operator(TokenOp::LPAREN); let destructured_names = parser.parse_comma_separated_list_with_end_token( TokenOp::RPAREN, - &mut |s: &mut super::SourceParser| pattern::TuplePatternElement { - pattern: Box::new(parse_matching_pattern(s, vec![])), + &mut |s: &mut super::SourceParser, comments| pattern::TuplePatternElement { + pattern: Box::new(parse_matching_pattern(s, comments)), type_: (), }, ); @@ -1788,8 +1809,9 @@ mod type_parser { pub(super) fn parse_type_parameter( parser: &mut super::SourceParser, + associated_comments: Vec, ) -> annotation::TypeParameter { - let name = &parser.parse_upper_id(); + let name = parser.parse_upper_id_with_comments(associated_comments); let (bound, loc) = if let Token(_, TokenContent::Operator(TokenOp::COLON)) = parser.peek() { let id_comments = parser.collect_preceding_comments(); parser.consume(); @@ -1800,19 +1822,23 @@ mod type_parser { } else { (None, name.loc) }; - annotation::TypeParameter { loc, name: *name, bound } + annotation::TypeParameter { loc, name, bound } } - pub(super) fn parse_annotated_id(parser: &mut super::SourceParser) -> AnnotatedId<()> { - let name = parser.parse_lower_id(); + pub(super) fn parse_annotated_id( + parser: &mut super::SourceParser, + associated_comments: Vec, + ) -> AnnotatedId<()> { + let name = parser.parse_lower_id_with_comments(associated_comments); let annotation = parse_annotation_with_colon(parser); AnnotatedId { name, type_: (), annotation } } pub(super) fn parse_optionally_annotated_id( parser: &mut super::SourceParser, + associated_comments: Vec, ) -> OptionallyAnnotatedId<()> { - let name = parser.parse_lower_id(); + let name = parser.parse_lower_id_with_comments(associated_comments); let annotation = parse_optional_annotation(parser); OptionallyAnnotatedId { name, type_: (), annotation } } @@ -1892,8 +1918,10 @@ mod type_parser { annotations: Vec::with_capacity(0), } } else { - let parameters = parser - .parse_comma_separated_list_with_end_token(TokenOp::RPAREN, &mut parse_annotation); + let parameters = parser.parse_comma_separated_list_with_end_token( + TokenOp::RPAREN, + &mut parse_annotation_with_additional_comments, + ); let mut comments = parser.collect_preceding_comments(); let location = peeked.0.union(&parser.assert_and_consume_operator(TokenOp::RPAREN)); comments.append(&mut parser.collect_preceding_comments()); @@ -1991,8 +2019,10 @@ mod type_parser { parser.comments_store.create_comment_reference(comments) }; parser.assert_and_consume_operator(TokenOp::LT); - let arguments = - parser.parse_comma_separated_list_with_end_token(TokenOp::GT, &mut parse_annotation); + let arguments = parser.parse_comma_separated_list_with_end_token( + TokenOp::GT, + &mut parse_annotation_with_additional_comments, + ); let ending_associated_comments = { let comments = parser.collect_preceding_comments(); parser.comments_store.create_comment_reference(comments) diff --git a/crates/samlang-printer/src/lib.rs b/crates/samlang-printer/src/lib.rs index b5e860c6..87676b82 100644 --- a/crates/samlang-printer/src/lib.rs +++ b/crates/samlang-printer/src/lib.rs @@ -48,6 +48,7 @@ pub fn pretty_print_import( source_printer::import_to_document( heap, comment_store, + vec![import.associated_comments], import.imported_module, &import.imported_members, ), diff --git a/crates/samlang-printer/src/source_printer.rs b/crates/samlang-printer/src/source_printer.rs index 9dc3afe7..7d1419ad 100644 --- a/crates/samlang-printer/src/source_printer.rs +++ b/crates/samlang-printer/src/source_printer.rs @@ -33,23 +33,25 @@ enum DocumentGrouping { fn associated_comments_doc( heap: &Heap, comment_store: &CommentStore, - associated_comments: CommentReference, + associated_comments: Vec, group: DocumentGrouping, add_final_line_break: bool, ) -> Option { let mut documents = vec![]; - for comment in comment_store.get(associated_comments).iter() { - documents.append(&mut match comment.kind { - CommentKind::LINE => { - vec![Document::line_comment(comment.text.as_str(heap)), Document::LineHard] - } - CommentKind::BLOCK => { - vec![Document::multiline_comment("/*", comment.text.as_str(heap)), Document::Line] - } - CommentKind::DOC => { - vec![Document::multiline_comment("/**", comment.text.as_str(heap)), Document::Line] - } - }); + for associated_comments in associated_comments { + for comment in comment_store.get(associated_comments).iter() { + documents.append(&mut match comment.kind { + CommentKind::LINE => { + vec![Document::line_comment(comment.text.as_str(heap)), Document::LineHard] + } + CommentKind::BLOCK => { + vec![Document::multiline_comment("/*", comment.text.as_str(heap)), Document::Line] + } + CommentKind::DOC => { + vec![Document::multiline_comment("/**", comment.text.as_str(heap)), Document::Line] + } + }); + } } if documents.is_empty() { None @@ -85,7 +87,7 @@ fn create_opt_preceding_comment_doc( if let Some(comment_doc) = associated_comments_doc( heap, comment_store, - associated_comments, + vec![associated_comments], DocumentGrouping::Grouped, true, ) { @@ -105,7 +107,7 @@ fn comma_sep_list Document>( let comment_doc_opt = associated_comments_doc( heap, comment_store, - ending_comments, + vec![ending_comments], DocumentGrouping::Expanded, false, ); @@ -418,7 +420,7 @@ fn create_member_preceding_comment_docs( if let Some(doc) = associated_comments_doc( heap, comment_store, - comments, + vec![comments], if flattened { DocumentGrouping::Flattened } else { DocumentGrouping::Expanded }, !flattened, ) { @@ -546,7 +548,7 @@ fn create_doc_without_preceding_comment( let operator_preceding_comments_docs = if let Some(doc) = associated_comments_doc( heap, comment_store, - e.operator_preceding_comments, + vec![e.operator_preceding_comments], DocumentGrouping::Grouped, false, ) { @@ -676,7 +678,7 @@ fn create_doc_without_preceding_comment( if let Some(comments) = associated_comments_doc( heap, comment_store, - e.ending_associated_comments, + vec![e.ending_associated_comments], DocumentGrouping::Expanded, false, ) { @@ -828,7 +830,7 @@ pub(super) fn statement_to_document( associated_comments_doc( heap, comment_store, - stmt.associated_comments, + vec![stmt.associated_comments], DocumentGrouping::Grouped, true, ) @@ -922,7 +924,7 @@ fn create_doc_for_interface_member( associated_comments_doc( heap, comment_store, - member.associated_comments, + vec![member.associated_comments], DocumentGrouping::Grouped, true, ) @@ -999,7 +1001,7 @@ fn interface_to_doc( associated_comments_doc( heap, comment_store, - interface.associated_comments, + vec![interface.associated_comments], DocumentGrouping::Grouped, true, ) @@ -1033,7 +1035,7 @@ fn interface_to_doc( if let Some(comments) = associated_comments_doc( heap, comment_store, - interface.members.ending_associated_comments, + vec![interface.members.ending_associated_comments], DocumentGrouping::Expanded, true, ) { @@ -1054,7 +1056,7 @@ fn class_to_doc( associated_comments_doc( heap, comment_store, - class.associated_comments, + vec![class.associated_comments], DocumentGrouping::Grouped, true, ) @@ -1159,7 +1161,7 @@ fn class_to_doc( if let Some(comments) = associated_comments_doc( heap, comment_store, - class.members.ending_associated_comments, + vec![class.members.ending_associated_comments], DocumentGrouping::Expanded, true, ) { @@ -1174,10 +1176,20 @@ fn class_to_doc( pub(super) fn import_to_document( heap: &Heap, comment_store: &CommentStore, + associated_comments: Vec, imported_module: ModuleReference, imported_members: &[Id], ) -> Document { let mut documents = vec![]; + if let Some(comments) = associated_comments_doc( + heap, + comment_store, + associated_comments, + DocumentGrouping::Expanded, + true, + ) { + documents.push(comments); + } documents.push(Document::Text(rcs("import "))); documents.push(braces_surrounded_doc(comma_sep_list( heap, @@ -1206,31 +1218,39 @@ pub(super) fn toplevel_to_document( pub(super) fn source_module_to_document(heap: &Heap, module: &Module<()>) -> Document { let mut documents = vec![]; - let mut organized_imports = HashMap::>::new(); + let mut organized_imports = HashMap::, Vec)>::new(); for import in &module.imports { - if let Some(list) = organized_imports.get_mut(&import.imported_module) { - list.append(&mut import.imported_members.clone()); + if let Some((c, m)) = organized_imports.get_mut(&import.imported_module) { + c.push(import.associated_comments); + m.append(&mut import.imported_members.clone()); } else { - organized_imports.insert(import.imported_module, import.imported_members.clone()); + organized_imports.insert( + import.imported_module, + (vec![import.associated_comments], import.imported_members.clone()), + ); } } - for (imported_module, imported_members) in organized_imports + for (imported_module, (associated_comments, imported_members)) in organized_imports .into_iter() .sorted_by_key(|(mod_ref, _)| mod_ref.pretty_print(heap)) - .map(|(mod_ref, members)| { + .map(|(mod_ref, (comments, members))| { ( mod_ref, - members - .into_iter() - .sorted_by(|x, y| x.name.as_str(heap).cmp(y.name.as_str(heap))) - .collect_vec(), + ( + comments, + members + .into_iter() + .sorted_by(|x, y| x.name.as_str(heap).cmp(y.name.as_str(heap))) + .collect_vec(), + ), ) }) { documents.push(import_to_document( heap, &module.comment_store, + associated_comments, imported_module, &imported_members, )) @@ -1245,6 +1265,16 @@ pub(super) fn source_module_to_document(heap: &Heap, module: &Module<()>) -> Doc documents.push(Document::LineHard); } + if let Some(comments) = associated_comments_doc( + heap, + &module.comment_store, + vec![module.trailing_comments], + DocumentGrouping::Expanded, + true, + ) { + documents.push(comments); + } + Document::concat(documents) } @@ -1694,6 +1724,7 @@ class Main { assert_reprint_module( r#" import {Foo} from Foo.Baz +// a import {F1,F2,F3,F4,F5,F6,F7,F8} from Bar.Baz import {F9,F10} from Bar.Baz @@ -1729,8 +1760,10 @@ private class A(val a: int) {} * long document string */ class Main {} +// f "#, r#" +// a import { F1, F10, @@ -1828,7 +1861,9 @@ private class A(val a: int) {} * very very very very very long * document string */ -class Main {}"#, +class Main {} + +// f"#, ); } } diff --git a/crates/samlang-services/src/ast_differ.rs b/crates/samlang-services/src/ast_differ.rs index 6fef9f85..ef2f6975 100644 --- a/crates/samlang-services/src/ast_differ.rs +++ b/crates/samlang-services/src/ast_differ.rs @@ -516,6 +516,7 @@ mod tests { let import = ModuleMembersImport { loc: Location::dummy(), + associated_comments: NO_COMMENT_REFERENCE, imported_members: vec![Id::from(PStr::UPPER_A)], imported_module: ModuleReference::DUMMY, imported_module_loc: Location::dummy(), diff --git a/crates/samlang-services/src/lib.rs b/crates/samlang-services/src/lib.rs index 5cd2d001..92f0d336 100644 --- a/crates/samlang-services/src/lib.rs +++ b/crates/samlang-services/src/lib.rs @@ -562,6 +562,7 @@ pub mod rewrite { let mut changed_ast = ast.clone(); changed_ast.imports.push(ModuleMembersImport { loc: dummy_location, + associated_comments: NO_COMMENT_REFERENCE, imported_members: vec![Id { loc: dummy_location, associated_comments: NO_COMMENT_REFERENCE,