Skip to content

Commit

Permalink
feat(fmt): An attempt at aesthetic items into PL
Browse files Browse the repository at this point in the history
This is an attempt to get around the complications of managing lexer + parser output, which PRQL#4397 has hit in a few incarnations by just adding comments ('aesthetics') to PL.

This very very nearly works -- with chumsky we can create a function that wraps anything that might have a comment, implement a trait on the AST items that contain it, and away we go (though it did require a lot of debugging in the end). This would then be really easy to write back out.

I think there's literally a single case where it doesn't work -- where a comment doesn't come directly before or directly after an AST item -- in the final trailing comma of a tuple or array. So tests fail at the moment.

Next we need to consider:
- Can we workaround that one case? We don't actually care about whether there's a trailing comma, so we could likely hack around it...
- Are there actually other cases of this model failing? I know this approach -- of putting aesthetic items into AST -- is not generally favored, and it's really rare that there's even a single case of something not working.
  • Loading branch information
max-sixty committed Jun 20, 2024
1 parent a8d29b8 commit 063159d
Show file tree
Hide file tree
Showing 33 changed files with 370 additions and 62 deletions.
24 changes: 22 additions & 2 deletions prqlc/prqlc-ast/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,17 @@ pub use self::ident::Ident;
pub use self::ops::{BinOp, UnOp};
pub use self::token::{Literal, ValueAndUnit};
use super::token;
use crate::span::Span;
use crate::Ty;
use crate::{span::Span, WithAesthetics};
use crate::{TokenKind, Ty};

impl Expr {
pub fn new<K: Into<ExprKind>>(kind: K) -> Self {
Expr {
kind: kind.into(),
span: None,
alias: None,
aesthetics_before: Vec::new(),
aesthetics_after: Vec::new(),
}
}
}
Expand All @@ -38,6 +40,24 @@ pub struct Expr {

#[serde(skip_serializing_if = "Option::is_none")]
pub alias: Option<String>,

// Maybe should be Token?
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_before: Vec<TokenKind>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_after: Vec<TokenKind>,
}

impl WithAesthetics for Expr {
fn with_aesthetics(
mut self,
aesthetics_before: Vec<TokenKind>,
aesthetics_after: Vec<TokenKind>,
) -> Self {
self.aesthetics_before = aesthetics_before;
self.aesthetics_after = aesthetics_after;
self
}
}

#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr)]
Expand Down
8 changes: 8 additions & 0 deletions prqlc/prqlc-ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,11 @@ pub use span::*;
pub use stmt::*;
pub use token::*;
pub use types::*;

pub trait WithAesthetics {
fn with_aesthetics(
self,
aesthetics_before: Vec<TokenKind>,
aethetics_after: Vec<TokenKind>,
) -> Self;
}
42 changes: 41 additions & 1 deletion prqlc/prqlc-ast/src/stmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use enum_as_inner::EnumAsInner;
use semver::VersionReq;
use serde::{Deserialize, Serialize};

use crate::{expr::Expr, Ident, Span, Ty};
use crate::{expr::Expr, Ident, Span, TokenKind, Ty, WithAesthetics};

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Default)]
pub struct QueryDef {
Expand All @@ -31,6 +31,26 @@ pub struct Stmt {

#[serde(skip_serializing_if = "Vec::is_empty", default)]
pub annotations: Vec<Annotation>,

// Maybe should be Token?
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_before: Vec<TokenKind>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_after: Vec<TokenKind>,
}

impl WithAesthetics for Stmt {
fn with_aesthetics(
self,
aesthetics_before: Vec<TokenKind>,
aesthetics_after: Vec<TokenKind>,
) -> Self {
Stmt {
aesthetics_before,
aesthetics_after,
..self
}
}
}

#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize)]
Expand Down Expand Up @@ -73,6 +93,24 @@ pub struct ImportDef {
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Annotation {
pub expr: Box<Expr>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_before: Vec<TokenKind>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub aesthetics_after: Vec<TokenKind>,
}

impl WithAesthetics for Annotation {
fn with_aesthetics(
self,
aesthetics_before: Vec<TokenKind>,
aesthetics_after: Vec<TokenKind>,
) -> Self {
Annotation {
aesthetics_before,
aesthetics_after,
..self
}
}
}

impl Stmt {
Expand All @@ -81,6 +119,8 @@ impl Stmt {
kind,
span: None,
annotations: Vec::new(),
aesthetics_before: Vec::new(),
aesthetics_after: Vec::new(),
}
}
}
44 changes: 24 additions & 20 deletions prqlc/prqlc-parser/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use super::interpolation;
use crate::err::parse_error::PError;
use crate::types::type_expr;

pub fn expr_call() -> impl Parser<TokenKind, Expr, Error = PError> {
pub fn expr_call() -> impl Parser<TokenKind, Expr, Error = PError> + Clone {
let expr = expr();

lambda_func(expr.clone()).or(func_call(expr))
Expand All @@ -27,7 +27,9 @@ pub fn expr() -> impl Parser<TokenKind, Expr, Error = PError> + Clone {
.map(|x| x.to_string())
.map(ExprKind::Internal);

let nested_expr = pipeline(lambda_func(expr.clone()).or(func_call(expr.clone()))).boxed();
let nested_expr = with_aesthetics(
pipeline(lambda_func(expr.clone()).or(func_call(expr.clone()))).boxed(),
);

let tuple = ident_part()
.then_ignore(ctrl('='))
Expand Down Expand Up @@ -122,18 +124,20 @@ pub fn expr() -> impl Parser<TokenKind, Expr, Error = PError> + Clone {

let param = select! { TokenKind::Param(id) => ExprKind::Param(id) };

let term = choice((
literal,
internal,
tuple,
array,
interpolation,
ident_kind,
case,
param,
))
.map_with_span(into_expr)
.or(pipeline)
let term = with_aesthetics(
choice((
literal,
internal,
tuple,
array,
interpolation,
ident_kind,
case,
param,
))
.map_with_span(into_expr)
.or(pipeline),
)
.boxed();

// indirections
Expand Down Expand Up @@ -229,9 +233,9 @@ pub fn expr() -> impl Parser<TokenKind, Expr, Error = PError> + Clone {
})
}

pub fn pipeline<E>(expr: E) -> impl Parser<TokenKind, Expr, Error = PError>
pub fn pipeline<E>(expr: E) -> impl Parser<TokenKind, Expr, Error = PError> + Clone
where
E: Parser<TokenKind, Expr, Error = PError>,
E: Parser<TokenKind, Expr, Error = PError> + Clone,
{
// expr has to be a param, because it can be either a normal expr() or
// a recursive expr called from within expr()
Expand Down Expand Up @@ -264,7 +268,7 @@ where
pub fn binary_op_parser<'a, Term, Op>(
term: Term,
op: Op,
) -> impl Parser<TokenKind, Expr, Error = PError> + 'a
) -> impl Parser<TokenKind, Expr, Error = PError> + 'a + Clone
where
Term: Parser<TokenKind, Expr, Error = PError> + 'a,
Op: Parser<TokenKind, BinOp, Error = PError> + 'a,
Expand All @@ -290,7 +294,7 @@ where
.boxed()
}

fn func_call<E>(expr: E) -> impl Parser<TokenKind, Expr, Error = PError>
fn func_call<E>(expr: E) -> impl Parser<TokenKind, Expr, Error = PError> + Clone
where
E: Parser<TokenKind, Expr, Error = PError> + Clone,
{
Expand Down Expand Up @@ -342,7 +346,7 @@ where
.labelled("function call")
}

fn lambda_func<E>(expr: E) -> impl Parser<TokenKind, Expr, Error = PError>
fn lambda_func<E>(expr: E) -> impl Parser<TokenKind, Expr, Error = PError> + Clone
where
E: Parser<TokenKind, Expr, Error = PError> + Clone + 'static,
{
Expand Down Expand Up @@ -402,7 +406,7 @@ where
.labelled("function definition")
}

pub fn ident() -> impl Parser<TokenKind, Ident, Error = PError> {
pub fn ident() -> impl Parser<TokenKind, Ident, Error = PError> + Clone {
ident_part()
.separated_by(ctrl('.'))
.at_least(1)
Expand Down
4 changes: 4 additions & 0 deletions prqlc/prqlc-parser/src/interpolation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ fn parse_interpolate() {
0:8-9,
),
alias: None,
aesthetics_before: [],
aesthetics_after: [],
},
format: None,
},
Expand Down Expand Up @@ -144,6 +146,8 @@ fn parse_interpolate() {
0:14-15,
),
alias: None,
aesthetics_before: [],
aesthetics_after: [],
},
format: None,
},
Expand Down
63 changes: 47 additions & 16 deletions prqlc/prqlc-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub fn parse_source(source: &str, source_id: u16) -> Result<Vec<Stmt>, Vec<Error
let mut errors = Vec::new();

let (tokens, lex_errors) = ::chumsky::Parser::parse_recovery(&lexer::lexer(), source);
// let (tokens, lex_errors) = ::chumsky::Parser::parse_recovery_verbose(&lexer::lexer(), source);

log::debug!("Lex errors: {:?}", lex_errors);
errors.extend(
Expand All @@ -31,21 +32,12 @@ pub fn parse_source(source: &str, source_id: u16) -> Result<Vec<Stmt>, Vec<Error
.map(|e| convert_lexer_error(source, e, source_id)),
);

// We don't want comments in the AST (but we do intend to use them as part of
// formatting)
let semantic_tokens: Option<_> = tokens.map(|tokens| {
tokens.into_iter().filter(|token| {
!matches!(
token.kind,
TokenKind::Comment(_) | TokenKind::LineWrap(_) | TokenKind::DocComment(_)
)
})
});

let ast = if let Some(semantic_tokens) = semantic_tokens {
let stream = prepare_stream(semantic_tokens, source, source_id);
let ast = if let Some(tokens) = tokens {
let stream = prepare_stream(tokens.into_iter(), source, source_id);

let (ast, parse_errors) = ::chumsky::Parser::parse_recovery(&stmt::source(), stream);
let (ast, parse_errors) =
// ::chumsky::Parser::parse_recovery_verbose(&stmt::source(), stream);
::chumsky::Parser::parse_recovery(&stmt::source(), stream);

log::debug!("parse errors: {:?}", parse_errors);
errors.extend(parse_errors.into_iter().map(|e| e.into()));
Expand All @@ -72,16 +64,16 @@ pub fn lex_source(source: &str) -> Result<TokenVec, Vec<Error>> {

mod common {
use chumsky::prelude::*;
use prqlc_ast::expr::*;
use prqlc_ast::stmt::*;
use prqlc_ast::token::*;
use prqlc_ast::Span;
use prqlc_ast::Ty;
use prqlc_ast::TyKind;
use prqlc_ast::{expr::*, WithAesthetics};

use crate::err::parse_error::PError;

pub fn ident_part() -> impl Parser<TokenKind, String, Error = PError> {
pub fn ident_part() -> impl Parser<TokenKind, String, Error = PError> + Clone {
return select! {
TokenKind::Ident(ident) => ident,
TokenKind::Keyword(ident) if &ident == "module" => ident,
Expand Down Expand Up @@ -112,6 +104,8 @@ mod common {
kind,
span: Some(span),
annotations,
aesthetics_before: Vec::new(),
aesthetics_after: Vec::new(),
}
}

Expand All @@ -128,6 +122,43 @@ mod common {
..Ty::new(kind)
}
}

pub fn aesthetic() -> impl Parser<TokenKind, TokenKind, Error = PError> + Clone {
select! {
TokenKind::Comment(comment) => TokenKind::Comment(comment),
TokenKind::LineWrap(lw) => TokenKind::LineWrap(lw),
TokenKind::DocComment(dc) => TokenKind::DocComment(dc),
}
}

pub fn with_aesthetics<P, O>(parser: P) -> impl Parser<TokenKind, O, Error = PError> + Clone
where
P: Parser<TokenKind, O, Error = PError> + Clone,
O: WithAesthetics,
{
// We can have newlines between the aesthetics and the actual token to
// cover a case like `# foo` here:
//
// ```prql
// # foo
//
// from bar
// # baz
// select artists
// ```
//
// ...but not after the aesthetics after the token; since we don't want
// to eat the newline after `from bar`
//
let aesthetics_before = aesthetic().then_ignore(new_line().repeated()).repeated();
let aesthetics_after = aesthetic().separated_by(new_line());

aesthetics_before.then(parser).then(aesthetics_after).map(
|((aesthetics_before, inner), aesthetics_after)| {
inner.with_aesthetics(aesthetics_before, aesthetics_after)
},
)
}
}

/// Convert the output of the lexer into the input of the parser. Requires
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\"
right:
Literal:
String: USA
aesthetics_after:
- Comment: " Each line transforms the previous result."
- FuncCall:
name:
Ident: derive
Expand All @@ -36,12 +38,16 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\"
right:
Ident: payroll_tax
alias: gross_salary
aesthetics_before:
- Comment: " This adds columns / variables."
- Binary:
left:
Ident: gross_salary
op: Add
right:
Ident: benefits_cost
aesthetics_after:
- Comment: " Variables can use other variables."
alias: gross_cost
- FuncCall:
name:
Expand Down Expand Up @@ -71,6 +77,8 @@ expression: "parse_single(r#\"\nfrom employees\nfilter country == \"USA\"
Ident: average
args:
- Ident: salary
aesthetics_before:
- Comment: " Aggregate each group to a single row"
- FuncCall:
name:
Ident: average
Expand Down
Loading

0 comments on commit 063159d

Please sign in to comment.