diff --git a/prqlc/prqlc/src/codegen/ast.rs b/prqlc/prqlc/src/codegen/ast.rs index 885f15915cd1..e0e03d465259 100644 --- a/prqlc/prqlc/src/codegen/ast.rs +++ b/prqlc/prqlc/src/codegen/ast.rs @@ -10,35 +10,36 @@ use crate::codegen::SeparatedExprs; use super::{WriteOpt, WriteSource}; pub(crate) fn write_expr(expr: &Expr) -> String { - expr.write(WriteOpt::new_width(u16::MAX)).unwrap() + expr.write(&mut WriteOpt::new_width(u16::MAX)).unwrap() } -fn write_within(node: &T, parent: &ExprKind, mut opt: WriteOpt) -> Option { +fn write_within(node: &T, parent: &ExprKind, opt: &mut WriteOpt) -> Option { let parent_strength = binding_strength(parent); opt.context_strength = opt.context_strength.max(parent_strength); - // FIXME: this is extremely hacky. Our issue is that in: // // from a.b # comment // // ...we're writing both `from a.b` and `a.b`, so we need to know which of // these to write comments for. I'm sure there are better ways to do it. - let enable_comments = opt.enable_comments; - opt.enable_comments = false; - let out = node.write(opt.clone()); - opt.enable_comments = enable_comments; + let enable_comments = opt.enable_comments_after; + opt.enable_comments_after = false; + let out = node.write(opt); + opt.enable_comments_after = enable_comments; out } impl WriteSource for Expr { - fn write(&self, mut opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { let mut r = String::new(); - - // if let Some(span) = self.span { - // if let Some(comment) = find_comment_before(span, &opt.tokens) { - // r += &comment.to_string(); - // } - // } + if opt.enable_comments_before { + if let Some(span) = self.span { + find_comment_before(span, &opt.tokens) + .into_iter() + .for_each(|t| r += write_comment(t).as_str()) + } + opt.enable_comments_before = false; + } if let Some(alias) = &self.alias { r += opt.consume(alias)?; r += opt.consume(" = ")?; @@ -49,18 +50,18 @@ impl WriteSource for Expr { || (opt.context_strength >= binding_strength(&self.kind)); if !needs_parenthesis { - r += &self.kind.write(opt.clone())?; + r += &self.kind.write(opt)?; } else { - let value = self.kind.write_between("(", ")", opt.clone()); + let value = self.kind.write_between("(", ")", opt); if let Some(value) = value { r += &value; } else { - r += &break_line_within_parenthesis(&self.kind, &mut opt)?; + r += &break_line_within_parenthesis(&self.kind, opt)?; } }; - if opt.enable_comments { + if opt.enable_comments_after { if let Some(span) = self.span { // TODO: change underlying function so we can remove this if opt.tokens.0.is_empty() { @@ -81,31 +82,35 @@ impl WriteSource for Expr { r += " "; } - for c in comments { - match c.kind { - // TODO: these are defined here since the debug - // representations aren't quite right (NewLine is `new - // line` as is used in error messages). But we should - // probably move them onto the Struct. - TokenKind::Comment(s) => r += format!("#{}", s).as_str(), - TokenKind::NewLine => r += "\n", - _ => unreachable!(), - } - } + comments + .into_iter() + .for_each(|x| r += write_comment(x).as_str()) } } Some(r) } } +fn write_comment(token: Token) -> String { + match &token.kind { + // TODO: these are defined here since the debug + // representations aren't quite right (NewLine is `new + // line` as is used in error messages). But we should + // probably move them onto the Struct. + TokenKind::Comment(s) => format!("#{}", s), + TokenKind::NewLine => "\n".to_string(), + _ => unreachable!(), + } +} + impl WriteSource for ExprKind { - fn write(&self, mut opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { use ExprKind::*; match &self { Ident(ident) => Some(write_ident_part(ident)), Indirection { base, field } => { - let mut r = base.write(opt.clone())?; + let mut r = base.write(opt)?; opt.consume_width(r.len() as u16)?; r += opt.consume(".")?; @@ -145,7 +150,7 @@ impl WriteSource for ExprKind { Range(range) => { let mut r = String::new(); if let Some(start) = &range.start { - let start = write_within(start.as_ref(), self, opt.clone())?; + let start = write_within(start.as_ref(), self, opt)?; r += opt.consume(&start)?; } @@ -159,7 +164,7 @@ impl WriteSource for ExprKind { Binary(BinaryExpr { op, left, right }) => { let mut r = String::new(); - let left = write_within(left.as_ref(), self, opt.clone())?; + let left = write_within(left.as_ref(), self, opt)?; r += opt.consume(&left)?; r += opt.consume(" ")?; @@ -179,7 +184,7 @@ impl WriteSource for ExprKind { FuncCall(func_call) => { let mut r = String::new(); - let name = write_within(func_call.name.as_ref(), self, opt.clone())?; + let name = write_within(func_call.name.as_ref(), self, opt)?; r += opt.consume(&name)?; opt.unbound_expr = true; @@ -190,13 +195,13 @@ impl WriteSource for ExprKind { r += opt.consume(":")?; - let arg = write_within(arg, self, opt.clone())?; + let arg = write_within(arg, self, opt)?; r += opt.consume(&arg)?; } for arg in &func_call.args { r += opt.consume(" ")?; - let arg = write_within(arg, self, opt.clone())?; + let arg = write_within(arg, self, opt)?; r += opt.consume(&arg)?; } Some(r) @@ -208,14 +213,14 @@ impl WriteSource for ExprKind { for generic_param in &c.generic_type_params { r += opt.consume(&write_ident_part(&generic_param.name))?; r += opt.consume(": ")?; - r += &opt.consume( - SeparatedExprs { - exprs: &generic_param.domain, - inline: " | ", - line_end: "|", - } - .write(opt.clone())?, - )?; + // Needs splitting for the borrow checker + let x = (SeparatedExprs { + exprs: &generic_param.domain, + inline: " | ", + line_end: "|", + }) + .write(opt)?; + r += &opt.consume(x)?; } r += opt.consume("> ")?; } @@ -224,7 +229,7 @@ impl WriteSource for ExprKind { r += opt.consume(&write_ident_part(¶m.name))?; r += opt.consume(" ")?; if let Some(ty) = ¶m.ty { - let ty = ty.write_between("<", ">", opt.clone())?; + let ty = ty.write_between("<", ">", opt)?; r += opt.consume(&ty)?; r += opt.consume(" ")?; } @@ -232,22 +237,24 @@ impl WriteSource for ExprKind { for param in &c.named_params { r += opt.consume(&write_ident_part(¶m.name))?; r += opt.consume(":")?; - r += opt.consume(¶m.default_value.as_ref().unwrap().write(opt.clone())?)?; + // Needs splitting for the borrow checker + let x = ¶m.default_value.as_ref().unwrap().write(opt)?; + r += opt.consume(x)?; r += opt.consume(" ")?; } r += opt.consume("-> ")?; if let Some(ty) = &c.return_ty { - let ty = ty.write_between("<", ">", opt.clone())?; + let ty = ty.write_between("<", ">", opt)?; r += opt.consume(&ty)?; r += opt.consume(" ")?; } // try a single line - if let Some(body) = c.body.write(opt.clone()) { + if let Some(body) = c.body.write(opt) { r += &body; } else { - r += &break_line_within_parenthesis(c.body.as_ref(), &mut opt)?; + r += &break_line_within_parenthesis(c.body.as_ref(), opt)?; } Some(r) @@ -277,7 +284,7 @@ fn break_line_within_parenthesis(expr: &T, opt: &mut WriteOpt) - opt.indent += 1; r += &opt.write_indent(); opt.reset_line()?; - r += &expr.write(opt.clone())?; + r += &expr.write(opt)?; r += "\n"; opt.indent -= 1; r += &opt.write_indent(); @@ -337,7 +344,7 @@ fn can_bind_left(expr: &ExprKind) -> bool { } impl WriteSource for Ident { - fn write(&self, mut opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { let width = self.path.iter().map(|p| p.len() + 1).sum::() + self.name.len(); opt.consume_width(width as u16)?; @@ -371,30 +378,22 @@ pub fn write_ident_part(s: &str) -> String { } } -// impl WriteSource for ModuleDef { -// fn write(&self, mut opt: WriteOpt) -> Option { -// codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap() -// }} - -// /// Find a comment before a span. If there's exactly one newline prior, then the -// /// comment is included here. Any further above are included with the prior token. -// fn find_comment_before(span: Span, tokens: &TokenVec) -> Option { -// // index of the span in the token vec -// let index = tokens -// .0 -// .iter() -// .position(|t| t.span.start == span.start && t.span.end == span.end)?; -// if index <= 1 { -// return None; -// } -// let prior_token = &tokens.0[index - 1].kind; -// let prior_2_token = &tokens.0[index - 2].kind; -// if matches!(prior_token, TokenKind::NewLine) && matches!(prior_2_token, TokenKind::Comment(_)) { -// Some(prior_2_token.clone()) -// } else { -// None -// } -// } +/// Find a comment before a span. If there's exactly one newline prior, then the +/// comment is included here. Any further above are included with the prior token. +fn find_comment_before(span: Span, tokens: &TokenVec) -> Vec { + // index of the span in the token vec + let Some(index) = tokens.0.iter().position(|t| t.span.start == span.start) else { + return vec![]; + }; + let mut out = vec![]; + for index in (0..index).rev() { + match tokens.0[index].kind { + TokenKind::NewLine | TokenKind::Comment(_) => out.push(tokens.0[index].clone()), + _ => break, + } + } + out.into_iter().rev().collect() +} /// Find comments after a given span. fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { @@ -402,7 +401,8 @@ fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { let index = tokens .0 .iter() - // FIXME: why isn't this working? + // We use the end because an expr can be bigger than the token, so + // including the start causes it to fail // .position(|t| t.1.start == span.start && t.1.end == span.end) .position(|t| t.span.end == span.end) .unwrap_or_else(|| panic!("{:?}, {:?}", &tokens, &span)); @@ -418,7 +418,7 @@ fn find_comments_after(span: Span, tokens: &TokenVec) -> Vec { } impl WriteSource for Vec { - fn write(&self, mut opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { opt.reset_line()?; let mut r = String::new(); @@ -428,19 +428,19 @@ impl WriteSource for Vec { } r += &opt.write_indent(); - r += &stmt.write_or_expand(opt.clone()); + r += &stmt.write_or_expand(opt); } Some(r) } } impl WriteSource for Stmt { - fn write(&self, mut opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { let mut r = String::new(); for annotation in &self.annotations { r += "@"; - r += &annotation.expr.write(opt.clone())?; + r += &annotation.expr.write(opt)?; r += "\n"; r += &opt.write_indent(); opt.reset_line()?; @@ -457,50 +457,58 @@ impl WriteSource for Stmt { } r += "\n"; } - StmtKind::VarDef(var_def) => match var_def.kind { - _ if var_def.value.is_none() || var_def.ty.is_some() => { - let typ = if let Some(ty) = &var_def.ty { - format!("<{}> ", ty.write(opt.clone())?) - } else { - "".to_string() - }; - - r += opt.consume(&format!("let {} {}", var_def.name, typ))?; - - if let Some(val) = &var_def.value { - r += opt.consume("= ")?; - r += &val.write(opt)?; + StmtKind::VarDef(var_def) => { + // Our comment writer only looks at comments after the + // first expr unless this is enabled, so we enable it here and + // then disable it after over Expr write (not good code; need to + // replace) + opt.enable_comments_before = true; + + match var_def.kind { + _ if var_def.value.is_none() || var_def.ty.is_some() => { + let typ = if let Some(ty) = &var_def.ty { + format!("<{}> ", ty.write(opt)?) + } else { + "".to_string() + }; + + r += opt.consume(&format!("let {} {}", var_def.name, typ))?; + + if let Some(val) = &var_def.value { + r += opt.consume("= ")?; + r += &val.write(opt)?; + } + r += "\n"; } - r += "\n"; - } - VarDefKind::Let => { - r += opt.consume(&format!("let {} = ", var_def.name))?; + VarDefKind::Let => { + r += opt.consume(&format!("let {} = ", var_def.name))?; - r += &var_def.value.as_ref().unwrap().write(opt)?; - r += "\n"; - } - VarDefKind::Into | VarDefKind::Main => { - let val = var_def.value.as_ref().unwrap(); - match &val.kind { - ExprKind::Pipeline(pipeline) => { - for expr in &pipeline.exprs { - r += &expr.write(opt.clone())?; + r += &var_def.value.as_ref().unwrap().write(opt)?; + r += "\n"; + } + VarDefKind::Into | VarDefKind::Main => { + let val = var_def.value.as_ref().unwrap(); + match &val.kind { + ExprKind::Pipeline(pipeline) => { + for expr in &pipeline.exprs { + r += &expr.write(opt)?; + r += "\n"; + } + } + _ => { + r += &val.write(opt)?; r += "\n"; } } - _ => { - r += &val.write(opt)?; + + if var_def.kind == VarDefKind::Into { + r += &format!("into {}", var_def.name); r += "\n"; } } - - if var_def.kind == VarDefKind::Into { - r += &format!("into {}", var_def.name); - r += "\n"; - } } - }, + } StmtKind::TypeDef(type_def) => { r += opt.consume(&format!("type {}", type_def.name))?; @@ -514,7 +522,7 @@ impl WriteSource for Stmt { r += &format!("module {} {{\n", module_def.name); opt.indent += 1; - r += &module_def.stmts.write(opt.clone())?; + r += &module_def.stmts.write(opt)?; opt.indent -= 1; r += &opt.write_indent(); @@ -534,7 +542,11 @@ impl WriteSource for Stmt { } } -fn display_interpolation(prefix: &str, parts: &[InterpolateItem], opt: WriteOpt) -> Option { +fn display_interpolation( + prefix: &str, + parts: &[InterpolateItem], + opt: &mut WriteOpt, +) -> Option { let mut r = String::new(); r += prefix; r += "\""; @@ -544,7 +556,7 @@ fn display_interpolation(prefix: &str, parts: &[InterpolateItem], opt: WriteOpt) InterpolateItem::String(s) => r += s.replace('{', "{{").replace('}', "}}").as_str(), InterpolateItem::Expr { expr, .. } => { r += "{"; - r += &expr.write(opt.clone())?; + r += &expr.write(opt)?; r += "}" } } @@ -554,9 +566,9 @@ fn display_interpolation(prefix: &str, parts: &[InterpolateItem], opt: WriteOpt) } impl WriteSource for SwitchCase { - fn write(&self, opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { let mut r = String::new(); - r += &self.condition.write(opt.clone())?; + r += &self.condition.write(opt)?; r += " => "; r += &self.value.write(opt)?; Some(r) @@ -584,7 +596,7 @@ mod test { .into_iter() .exactly_one() .unwrap(); - stmt.write(WriteOpt::default()).unwrap() + stmt.write(&mut WriteOpt::default()).unwrap() } // #[test] @@ -661,14 +673,14 @@ mod test { let pipeline = Expr::new(ExprKind::Pipeline(Pipeline { exprs: vec![short.clone(), short.clone(), short.clone()], })); - assert_snapshot!(pipeline.write(opt.clone()).unwrap(), @"(short | short | short)"); + assert_snapshot!(pipeline.write(&mut opt).unwrap(), @"(short | short | short)"); // long pipelines should be indented let pipeline = Expr::new(ExprKind::Pipeline(Pipeline { exprs: vec![short.clone(), long.clone(), long, short.clone()], })); // colons are a workaround to avoid trimming - assert_snapshot!(pipeline.write(opt.clone()).unwrap(), @r###" + assert_snapshot!(pipeline.write(&mut opt).unwrap(), @r###" ( short some_really_long_and_really_long_name @@ -681,7 +693,7 @@ mod test { opt.rem_width = 4; opt.indent = 100; let pipeline = Expr::new(ExprKind::Pipeline(Pipeline { exprs: vec![short] })); - assert!(pipeline.write(opt).is_none()); + assert!(pipeline.write(&mut opt).is_none()); } #[test] diff --git a/prqlc/prqlc/src/codegen/mod.rs b/prqlc/prqlc/src/codegen/mod.rs index 68459772711b..624906e4c0a7 100644 --- a/prqlc/prqlc/src/codegen/mod.rs +++ b/prqlc/prqlc/src/codegen/mod.rs @@ -11,20 +11,20 @@ pub trait WriteSource: std::fmt::Debug { /// options. /// /// Returns `None` if source does not fit into [WriteOpt::rem_width]. - fn write(&self, opt: WriteOpt) -> Option; + fn write(&self, opt: &mut WriteOpt) -> Option; fn write_between( &self, prefix: S, suffix: &str, - mut opt: WriteOpt, + opt: &mut WriteOpt, ) -> Option { let mut r = String::new(); r += opt.consume(&prefix.to_string())?; opt.context_strength = 0; opt.unbound_expr = false; - let source = self.write(opt.clone())?; + let source = self.write(opt)?; r += opt.consume(&source)?; r += opt.consume(suffix)?; @@ -32,9 +32,9 @@ pub trait WriteSource: std::fmt::Debug { } /// Attempts to write the current item, expanding the maximum width where necessary. - fn write_or_expand(&self, mut opt: WriteOpt) -> String { + fn write_or_expand(&self, opt: &mut WriteOpt) -> String { loop { - if let Some(s) = self.write(opt.clone()) { + if let Some(s) = self.write(opt) { return s; } else { // TODO: could we just set the max width rather than increasing @@ -47,7 +47,7 @@ pub trait WriteSource: std::fmt::Debug { } impl WriteSource for &T { - fn write(&self, opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { (*self).write(opt) } } @@ -77,13 +77,15 @@ pub struct WriteOpt { /// For example: /// `join foo` has an unbound expr, since `join foo ==bar` produced a binary op. pub unbound_expr: bool, - /// The lexer tokens that were used to produce this source; used for /// comments. pub tokens: TokenVec, - // TODO: remove - pub enable_comments: bool, + // TODO: really hacky, see notes elsewhere; we should remove this + pub enable_comments_after: bool, + + // TODO: same here + pub enable_comments_before: bool, } impl Default for WriteOpt { @@ -97,7 +99,8 @@ impl Default for WriteOpt { context_strength: 0, unbound_expr: false, tokens: TokenVec(vec![]), - enable_comments: true, + enable_comments_after: true, + enable_comments_before: true, } } } @@ -148,7 +151,7 @@ struct SeparatedExprs<'a, T: WriteSource> { } impl<'a, T: WriteSource> WriteSource for SeparatedExprs<'a, T> { - fn write(&self, mut opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { // try inline if let Some(inline) = self.write_inline(opt.clone()) { return Some(inline); @@ -166,7 +169,7 @@ impl<'a, T: WriteSource> WriteSource for SeparatedExprs<'a, T> { opt.reset_line()?; opt.rem_width.checked_sub(self.line_end.len() as u16)?; - r += &expr.write(opt.clone())?; + r += &expr.write(opt)?; r += self.line_end; } opt.indent -= 1; @@ -182,7 +185,7 @@ impl<'a, T: WriteSource> SeparatedExprs<'a, T> { fn write_inline(&self, mut opt: WriteOpt) -> Option { let mut exprs = Vec::new(); for expr in self.exprs { - let expr = expr.write(opt.clone())?; + let expr = expr.write(&mut opt)?; if expr.contains('\n') { return None; diff --git a/prqlc/prqlc/src/codegen/types.rs b/prqlc/prqlc/src/codegen/types.rs index 49772f20cfce..7e4dedac79e1 100644 --- a/prqlc/prqlc/src/codegen/types.rs +++ b/prqlc/prqlc/src/codegen/types.rs @@ -4,15 +4,15 @@ use crate::codegen::SeparatedExprs; use super::{WriteOpt, WriteSource}; pub(crate) fn write_ty(ty: &Ty) -> String { - ty.write(WriteOpt::new_width(u16::MAX)).unwrap() + ty.write(&mut WriteOpt::new_width(u16::MAX)).unwrap() } pub(crate) fn write_ty_kind(ty: &TyKind) -> String { - ty.write(WriteOpt::new_width(u16::MAX)).unwrap() + ty.write(&mut WriteOpt::new_width(u16::MAX)).unwrap() } impl WriteSource for Ty { - fn write(&self, opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { if let Some(name) = &self.name { Some(name.clone()) } else { @@ -22,7 +22,7 @@ impl WriteSource for Ty { } impl WriteSource for Option<&Ty> { - fn write(&self, opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { match self { Some(ty) => ty.write(opt), None => Some("infer".to_string()), @@ -31,7 +31,7 @@ impl WriteSource for Option<&Ty> { } impl WriteSource for TyKind { - fn write(&self, opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { use TyKind::*; match &self { @@ -70,7 +70,7 @@ impl WriteSource for TyKind { let mut r = "func ".to_string(); for t in &func.args { - r += &t.as_ref().write(opt.clone())?; + r += &t.as_ref().write(opt)?; r += " "; } r += "-> "; @@ -79,8 +79,8 @@ impl WriteSource for TyKind { } Any => Some("anytype".to_string()), Difference { base, exclude } => { - let base = base.write(opt.clone())?; - let exclude = exclude.write(opt.clone())?; + let base = base.write(opt)?; + let exclude = exclude.write(opt)?; Some(format!("{base} - {exclude}")) } GenericArg(_) => Some("?".to_string()), @@ -89,7 +89,7 @@ impl WriteSource for TyKind { } impl WriteSource for TyTupleField { - fn write(&self, opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { match self { Self::Wildcard(generic_el) => match generic_el { Some(el) => Some(format!("{}..", el.write(opt)?)), @@ -117,7 +117,7 @@ impl WriteSource for TyTupleField { struct UnionVariant<'a>(&'a Option, &'a Ty); impl WriteSource for UnionVariant<'_> { - fn write(&self, mut opt: WriteOpt) -> Option { + fn write(&self, opt: &mut WriteOpt) -> Option { let mut r = String::new(); if let Some(name) = &self.0 { r += name; diff --git a/prqlc/prqlc/src/lib.rs b/prqlc/prqlc/src/lib.rs index 80b822440d71..a1b33dcac7b5 100644 --- a/prqlc/prqlc/src/lib.rs +++ b/prqlc/prqlc/src/lib.rs @@ -352,7 +352,7 @@ pub fn rq_to_sql(rq: ir::rq::RelationalQuery, options: &Options) -> Result Result { - Ok(codegen::WriteSource::write(&pl.stmts, codegen::WriteOpt::default()).unwrap()) + Ok(codegen::WriteSource::write(&pl.stmts, &mut codegen::WriteOpt::default()).unwrap()) } pub fn format_prql(prql: &str) -> Result { @@ -361,7 +361,7 @@ pub fn format_prql(prql: &str) -> Result { let pl = prql_to_pl(prql)?; Ok(codegen::WriteSource::write( &pl.stmts, - codegen::WriteOpt { + &mut codegen::WriteOpt { tokens, ..Default::default() }, @@ -401,17 +401,23 @@ fn test_format_prql() { select {name} "###); +} +#[test] +fn test_format_prql_comments() { + use insta::assert_snapshot; assert_snapshot!(format_prql( r#" # test comment from employees # inline comment # another test comment select {name}"# ).unwrap(), @r###" + + # test comment from employees # inline comment # another test comment - select {name} + (select {name}) "###); }