Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(#331): Literate Haskell Support #344

Merged
merged 10 commits into from
Jan 10, 2025
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]
members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html"]
members = [ "harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell"]
resolver = "2"

[profile.release]
Expand Down
1 change: 1 addition & 0 deletions harper-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ repository = "https://github.com/automattic/harper"
anyhow = "1.0.95"
ariadne = "0.4.1"
clap = { version = "4.5.23", features = ["derive"] }
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.15.0" }
harper-core = { path = "../harper-core", version = "0.15.0" }
harper-comments = { path = "../harper-comments", version = "0.15.0" }
serde_json = "1.0.133"
11 changes: 6 additions & 5 deletions harper-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use harper_comments::CommentParser;
use harper_core::linting::{LintGroup, LintGroupConfig, Linter};
use harper_core::parsers::Markdown;
use harper_core::{remove_overlaps, Dictionary, Document, FstDictionary, TokenKind};
use harper_literate_haskell::LiterateHaskellParser;

#[derive(Debug, Parser)]
enum Args {
Expand Down Expand Up @@ -169,14 +170,14 @@ fn load_file(file: &Path) -> anyhow::Result<(Document, String)> {
let source = std::fs::read_to_string(file)?;

let mut parser: Box<dyn harper_core::parsers::Parser> =
if let Some("md") = file.extension().map(|v| v.to_str().unwrap()) {
Box::new(Markdown)
} else {
Box::new(
match file.extension().map(|v| v.to_str().unwrap()) {
Some("md") => Box::new(Markdown),
Some("lhs") => Box::new(LiterateHaskellParser),
_ => Box::new(
CommentParser::new_from_filename(file)
.map(Box::new)
.ok_or(format_err!("Could not detect language ID."))?,
)
),
};

Ok((Document::new_curated(&source, &mut parser), source))
Expand Down
14 changes: 14 additions & 0 deletions harper-literate-haskell/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "harper-literate-haskell"
elijah-potter marked this conversation as resolved.
Show resolved Hide resolved
version = "0.15.0"
edition = "2021"
description = "The language checker for developers."
license = "Apache-2.0"
repository = "https://github.com/automattic/harper"

[dependencies]
harper-core = { path = "../harper-core", version = "0.15.0" }
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.15.0" }
harper-comments = { path = "../harper-comments", version = "0.15.0" }
itertools = "0.13.0"
paste = "1.0.14"
31 changes: 31 additions & 0 deletions harper-literate-haskell/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use harper_comments::CommentParser;
use harper_core::{
parsers::{Markdown, Mask, Parser},
FullDictionary, Masker, Token,
};

mod masker;
use itertools::Itertools;
use masker::LiterateHaskellMasker;

/// Parses a Literate Haskell document by masking out the code and considering text as Markdown.
pub struct LiterateHaskellParser;

impl LiterateHaskellParser {
pub fn create_ident_dict(&self, source: &[char]) -> Option<FullDictionary> {
let parser = CommentParser::new_from_language_id("haskell").unwrap();
let mask = LiterateHaskellMasker::code_only().create_mask(source);

let code = mask
.iter_allowed(source)
.flat_map(|(_, src)| src.to_owned())
.collect_vec();
parser.create_ident_dict(&code)
}
}

impl Parser for LiterateHaskellParser {
fn parse(&mut self, source: &[char]) -> Vec<Token> {
Mask::new(LiterateHaskellMasker::text_only(), Markdown).parse(source)
}
}
148 changes: 148 additions & 0 deletions harper-literate-haskell/src/masker.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
use harper_core::{CharStringExt, Mask, Masker, Span};

/// Masker for selecting portions of Literate Haskell documents.
///
/// Based on the specifications outlined at https://wiki.haskell.org/Literate_programming.
pub struct LiterateHaskellMasker {
elijah-potter marked this conversation as resolved.
Show resolved Hide resolved
text: bool,
code: bool,
}

impl LiterateHaskellMasker {
pub fn text_only() -> Self {
Self {
text: true,
code: false,
}
}

pub fn code_only() -> Self {
Self {
text: false,
code: true,
}
}
}

impl Masker for LiterateHaskellMasker {
fn create_mask(&mut self, source: &[char]) -> harper_core::Mask {
elijah-potter marked this conversation as resolved.
Show resolved Hide resolved
let mut mask = Mask::new_blank();

let mut location = 0;
let mut in_code_env = false;
let mut last_line_blank = false;

for line in source.split(|c| *c == '\n') {
let string_form = line.to_string();
let trimmed = string_form.trim();
let line_is_bird = line.first().is_some_and(|c| *c == '>');

// Code fencing
let latex_style = matches!(trimmed, r"\begin{code}" | r"\end{code}");
let code_start = trimmed == r"\begin{code}" || (last_line_blank && line_is_bird);
let code_end = trimmed == r"\end{code}" || trimmed.is_empty();

// Toggle on fence
if (!in_code_env && code_start) || (in_code_env && code_end) {
in_code_env = !in_code_env;

// Exclude latex-style fence
if latex_style {
location += line.len() + 1; // +1 for the newline split on
last_line_blank = trimmed.is_empty();
continue;
}

// Exclude newline after code for bird style
if trimmed.is_empty() {
location += line.len() + 1; // +1 for the newline split on
last_line_blank = true;
continue;
}
}

let end_loc = location + line.len();
if (!in_code_env && self.text) || (in_code_env && self.code) {
let start_loc = if line_is_bird { location + 2 } else { location };
mask.push_allowed(Span::new(start_loc, end_loc));
}

location = end_loc + 1; // +1 for the newline split on
last_line_blank = trimmed.is_empty();
}

mask.merge_whitespace_sep(source);
mask
}
}

#[cfg(test)]
mod tests {
use harper_core::{Masker, Span};
use itertools::Itertools;

use super::LiterateHaskellMasker;

#[test]
fn bird_format() {
let source = r"Text here

> fact :: Integer -> Integer
> fact 0 = 1
> fact n = n * fact (n-1)

Text here
"
.chars()
.collect_vec();

let text_mask = LiterateHaskellMasker::text_only().create_mask(&source);
assert_eq!(
text_mask
.iter_allowed(&source)
.map(|(s, _)| s)
.collect_vec(),
vec![Span::new(0, 10), Span::new(80, 90)],
);

let code_mask = LiterateHaskellMasker::code_only().create_mask(&source);
assert_eq!(
code_mask
.iter_allowed(&source)
.map(|(s, _)| s)
.collect_vec(),
vec![Span::new(13, 39), Span::new(42, 52), Span::new(55, 78)],
);
}

#[test]
fn latex_format() {
let source = r#"Text here
\begin{code}
main :: IO ()
main = print "just an example"
\end{code}
Text here
"#
.chars()
.collect_vec();

let text_mask = LiterateHaskellMasker::text_only().create_mask(&source);
assert_eq!(
text_mask
.iter_allowed(&source)
.map(|(s, _)| s)
.collect_vec(),
vec![Span::new(0, 9), Span::new(79, 89)],
);

let code_mask = LiterateHaskellMasker::code_only().create_mask(&source);
assert_eq!(
code_mask
.iter_allowed(&source)
.map(|(s, _)| s)
.collect_vec(),
vec![Span::new(23, 67)],
);
}
}
42 changes: 42 additions & 0 deletions harper-literate-haskell/tests/run_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use harper_core::linting::{LintGroup, LintGroupConfig, Linter};
use harper_core::{Document, FstDictionary};
use harper_literate_haskell::LiterateHaskellParser;

/// Creates a unit test checking that the linting of a Markdown document (in
/// `tests_sources`) produces the expected number of lints.
macro_rules! create_test {
($filename:ident.lhs, $correct_expected:expr) => {
paste::paste! {
#[test]
fn [<lints_ $filename _correctly>](){
let source = include_str!(
concat!(
"./test_sources/",
concat!(stringify!($filename), ".lhs")
)
);

let dict = FstDictionary::curated();
let document = Document::new_curated(&source, &mut LiterateHaskellParser);

let mut linter = LintGroup::new(
LintGroupConfig::default(),
dict
);
let lints = linter.lint(&document);

dbg!(&lints);
assert_eq!(lints.len(), $correct_expected);

// Make sure that all generated tokens span real characters
for token in document.tokens(){
assert!(token.span.try_get_content(document.get_source()).is_some());
}
}
}
};
}

create_test!(bird_format.lhs, 2);
create_test!(latex_format.lhs, 2);
create_test!(mixed_format.lhs, 4);
9 changes: 9 additions & 0 deletions harper-literate-haskell/tests/test_sources/bird_format.lhs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Sourced from https://wiki.haskell.org/Literate_programming.

In Bird-style you have to leave a blnk before the code.

> fact :: Integer -> Integer
> fact 0 = 1
> fact n = n * fact (n-1)

And you have to leave a blnk line after the code as well.
12 changes: 12 additions & 0 deletions harper-literate-haskell/tests/test_sources/latex_format.lhs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Sourced from https://wiki.haskell.org/Literate_programming.

And the definition of the following function
would totally screw up my program, so I'm not
definining it:

\begin{code}
main :: IO ()
main = print "just an example"
\end{code}

Seee?
20 changes: 20 additions & 0 deletions harper-literate-haskell/tests/test_sources/mixed_format.lhs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Sourced from https://wiki.haskell.org/Literate_programming.

In Bird-style you have to leave a blnk before the code.

> fact :: Integer -> Integer
> fact 0 = 1
> fact n = n * fact (n-1)

And you have to leave a blnk line after the code as well.

And the definition of the following function
would totally screw up my program, so I'm not
definining it:

\begin{code}
main :: IO ()
main = print "just an example"
\end{code}

Seee?
1 change: 1 addition & 0 deletions harper-ls/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ readme = "README.md"
repository = "https://github.com/automattic/harper"

[dependencies]
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.15.0" }
harper-core = { path = "../harper-core", version = "0.15.0", features = ["concurrent"] }
harper-comments = { path = "../harper-comments", version = "0.15.0" }
harper-html = { path = "../harper-html", version = "0.15.0" }
Expand Down
Loading
Loading