Skip to content

Commit

Permalink
Implement Optional Footnote References Ignore
Browse files Browse the repository at this point in the history
  • Loading branch information
petervaro authored and drahnr committed Nov 7, 2023
1 parent 5a1d955 commit 351f12e
Show file tree
Hide file tree
Showing 9 changed files with 108 additions and 32 deletions.
6 changes: 3 additions & 3 deletions doc-chunks/src/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ use std::convert::TryFrom;
use std::fmt;
use std::path::Path;

use crate::PlainOverlay;
use crate::{
util::{sub_char_range, sub_chars},
Range, Span,
};
use crate::{Ignores, PlainOverlay};

/// Definition of the source of a checkable chunk
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
Expand Down Expand Up @@ -364,8 +364,8 @@ impl CheckableChunk {

/// Obtain an accessor object containing mapping and string representation,
/// removing the markdown annotations.
pub fn erase_cmark(&self) -> PlainOverlay {
PlainOverlay::erase_cmark(self)
pub fn erase_cmark(&self, ignores: &Ignores) -> PlainOverlay {
PlainOverlay::erase_cmark(self, ignores)
}

/// Obtain the length in characters.
Expand Down
19 changes: 15 additions & 4 deletions doc-chunks/src/markdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,10 @@ impl<'a> PlainOverlay<'a> {
}

/// Ranges are mapped `cmark reduced/plain -> raw`.
pub fn extract_plain_with_mapping(cmark: &str) -> (String, IndexMap<Range, SourceRange>) {
pub fn extract_plain_with_mapping(
cmark: &str,
ignores: &Ignores,
) -> (String, IndexMap<Range, SourceRange>) {
let mut plain = String::with_capacity(cmark.len());
let mut mapping = indexmap::IndexMap::with_capacity(128);

Expand Down Expand Up @@ -322,7 +325,7 @@ impl<'a> PlainOverlay<'a> {
}
}
Event::FootnoteReference(s) => {
if !s.is_empty() {
if !ignores.footnote_references && !s.is_empty() {
let char_range = Range {
start: char_range.start + 2,
end: char_range.end - 1,
Expand Down Expand Up @@ -368,8 +371,8 @@ impl<'a> PlainOverlay<'a> {
/// reference.
// TODO consider returning a `Vec<PlainOverlay<'a>>` to account for list items
// or other non-linear information which might not pass a grammar check as a whole
pub fn erase_cmark(chunk: &'a CheckableChunk) -> Self {
let (plain, mapping) = Self::extract_plain_with_mapping(chunk.as_str());
pub fn erase_cmark(chunk: &'a CheckableChunk, ignores: &Ignores) -> Self {
let (plain, mapping) = Self::extract_plain_with_mapping(chunk.as_str(), ignores);
Self {
raw: chunk,
plain,
Expand Down Expand Up @@ -518,3 +521,11 @@ impl<'a> fmt::Debug for PlainOverlay<'a> {
Ok(())
}
}

/// Explicitly ignored markdown entities. The `Default` implementation means we
/// do not ignore anything, which is the backwards compatible configuration.
#[derive(Clone, Default)]
pub struct Ignores {
/// Ignore [footnote references](Event::FootnoteReference).
pub footnote_references: bool,
}
8 changes: 8 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ allow_concatenation = true
# And the counterpart, which accepts words with dashes, when the suggestion has
# recommendations without the dashes. This is less common.
allow_dashed = false
# Check the expressions in the footnote references. By default this is turned on
# to remain backwards compatible but disabling it could be particularly useful
# when one uses abbreviations instead of numbers as footnote references. For
# instance by default the fragment `hello[^xyz]` would be spellchecked as
# `helloxyz` which is obviously a misspelled word, but by turning this check
# off, it will skip validating the reference altogether and will only check the
# word `hello`.
check_footnote_references = false

[NlpRules]
# Allows the user to override the default included
Expand Down
2 changes: 1 addition & 1 deletion src/checker/dummy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ impl Checker for DummyChecker {
let chunk = chunks
.first()
.expect("DummyChecker expects at least one chunk");
let plain = chunk.erase_cmark();
let plain = chunk.erase_cmark(&Default::default());
let txt = plain.as_str();
for (index, range) in apply_tokenizer(&tokenizer, txt).enumerate() {
log::trace!("****Token[{}]: >{}<", index, sub_chars(txt, range.clone()));
Expand Down
31 changes: 21 additions & 10 deletions src/checker/hunspell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ use std::sync::Arc;

use hunspell_rs::{CheckResult, Hunspell};

use doc_chunks::Ignores;

use crate::errors::*;

use super::quirks::{
Expand Down Expand Up @@ -133,22 +135,28 @@ pub struct HunspellCheckerInner {
allow_concatenated: bool,
allow_dashed: bool,
allow_emojis: bool,
check_footnote_references: bool,
ignorelist: String,
}

impl HunspellCheckerInner {
fn new(config: &<HunspellChecker as Checker>::Config) -> Result<Self> {
// TODO allow override
let (transform_regex, allow_concatenated, allow_dashed, allow_emojis) = {
let (
transform_regex,
allow_concatenated,
allow_dashed,
allow_emojis,
check_footnote_references,
) = {
let quirks = &config.quirks;
{
(
quirks.transform_regex().to_vec(),
quirks.allow_concatenated(),
quirks.allow_dashed(),
quirks.allow_emojis(),
)
}
(
quirks.transform_regex().to_vec(),
quirks.allow_concatenated(),
quirks.allow_dashed(),
quirks.allow_emojis(),
quirks.check_footnote_references(),
)
};
// FIXME rename the config option
let ignorelist = config.tokenization_splitchars.clone();
Expand Down Expand Up @@ -257,6 +265,7 @@ impl HunspellCheckerInner {
allow_concatenated,
allow_dashed,
allow_emojis,
check_footnote_references,
ignorelist,
})
}
Expand Down Expand Up @@ -299,7 +308,9 @@ impl Checker for HunspellChecker {
let mut acc = Vec::with_capacity(chunks.len());

for chunk in chunks {
let plain = chunk.erase_cmark();
let plain = chunk.erase_cmark(&Ignores {
footnote_references: !self.0.check_footnote_references,
});
log::trace!("{plain:?}");
let txt = plain.as_str();
let hunspell = &*self.hunspell.0;
Expand Down
7 changes: 6 additions & 1 deletion src/checker/nlprules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,12 @@ fn check_chunk<'a>(
tokenizer: &Tokenizer,
rules: &Rules,
) -> Vec<Suggestion<'a>> {
let plain = chunk.erase_cmark();
// TODO We should control which parts need to be ignored of the markdown
// entities, however the `NlpRulesConfig`, which is the only configuration
// we receive in the constructor does not contain the same quirks (or in
// fact any other similar settings) as the Hunspell one, so we cannot obtain
// this setting, therefore we fallback to default
let plain = chunk.erase_cmark(&Default::default());
log::trace!("{plain:?}");
let txt = plain.as_str();

Expand Down
16 changes: 15 additions & 1 deletion src/config/hunspell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,25 @@ pub struct Quirks {
/// Treats sequences of emojis as OK.
#[serde(default = "yes")]
pub allow_emojis: bool,
/// Check the expressions in the footnote references. By default this is
/// turned on to remain backwards compatible but disabling it could be
/// particularly useful when one uses abbreviations instead of numbers as
/// footnote references. For instance by default the fragment `hello[^xyz]`
/// would be spellchecked as `helloxyz` which is obviously a misspelled
/// word, but by turning this check off, it will skip validating the
/// reference altogether and will only check the word `hello`.
#[serde(default = "yes")]
pub check_footnote_references: bool,
}

impl Default for Quirks {
fn default() -> Self {
Self {
transform_regex: vec![],
transform_regex: Vec::new(),
allow_concatenation: false,
allow_dashes: false,
allow_emojis: true,
check_footnote_references: true,
}
}
}
Expand All @@ -59,6 +69,10 @@ impl Quirks {
pub(crate) fn transform_regex(&self) -> &[WrappedRegex] {
&self.transform_regex
}

pub(crate) fn check_footnote_references(&self) -> bool {
self.check_footnote_references
}
}

fn default_tokenization_splitchars() -> String {
Expand Down
6 changes: 3 additions & 3 deletions src/reflow/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ macro_rules! reflow_content {
let chunks = docs.get(&$content_type).expect("Contains test data. qed");
assert_eq!(dbg!(chunks).len(), 1);
let chunk = &chunks[0];
let _plain = chunk.erase_cmark();
let _plain = chunk.erase_cmark(&Default::default());
let suggestions = reflow(&$content_type, chunk, &CFG).expect("Reflow is working. qed");

let patches = suggestions
Expand Down Expand Up @@ -134,7 +134,7 @@ macro_rules! reflow_content {
let chunks = docs.get(&$content_type).expect("Contains test data. qed");
assert_eq!(dbg!(chunks).len(), 1);
let chunk = &chunks[0];
let _plain = chunk.erase_cmark();
let _plain = chunk.erase_cmark(&Default::default());
let suggestions = reflow(&$content_type, chunk, &CFG).expect("Reflow is working. qed");

assert_eq!(
Expand All @@ -159,7 +159,7 @@ macro_rules! reflow_content {
let chunks = docs.get(&$content_type).expect("Contains test data. qed");
assert_eq!(dbg!(chunks).len(), 1);
let chunk = &chunks[0];
let _plain = chunk.erase_cmark();
let _plain = chunk.erase_cmark(&Default::default());
println!("reflow content:\n {:?}", $content);
let suggestions = reflow(&$content_type, chunk, &CFG).expect("Reflow is working. qed");
let patches = suggestions
Expand Down
45 changes: 36 additions & 9 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::*;
use crate::checker::Checker;
use crate::util::{load_span_from, sub_char_range, sub_chars};
use crate::{chyrp_up, fluff_up};
use doc_chunks::literalset::testhelper::gen_literal_set;
use doc_chunks::{literalset::testhelper::gen_literal_set, Ignores};

use crate::documentation::{testcase::annotated_literals, SourceRange};
use indexmap::IndexMap;
Expand Down Expand Up @@ -31,7 +31,7 @@ fn parse_and_construct() {
// TODO
let chunk = &chunks[0];
assert_eq!(chunk.as_str(), TEST_RAW.to_owned());
let plain = chunk.erase_cmark();
let plain = chunk.erase_cmark(&Default::default());
println!("{plain:?}");

assert_eq!(TEST_PLAIN, plain.as_str());
Expand Down Expand Up @@ -98,7 +98,7 @@ macro_rules! end2end {
let chunks = docs.get(&origin).expect("Must contain dummy path");
assert_eq!(dbg!(chunks).len(), 1);
let chunk = &chunks[0];
let _plain = chunk.erase_cmark();
let _plain = chunk.erase_cmark(&Default::default());
let cfg = $cfg;
dbg!(std::any::type_name::<$checker>());
let checker = <$checker>::new(&cfg).expect("Checker construction works");
Expand Down Expand Up @@ -259,7 +259,7 @@ struct CAPI;
#[test]
fn issue_281() {
let dict_path = temp_dir().join(uuid::Uuid::new_v4().to_string() + ".dic");
// Any of the two hypthens cause havoc
// Any of the two hyphens cause havoc
const EXTRA_DICT: &str = r###"2
Expand Down Expand Up @@ -326,7 +326,7 @@ struct CAPI;
assert_eq!(chunks.len(), 1);
assert_eq!(RAW, chunk.as_str());

let plain = dbg!(chunk.erase_cmark());
let plain = dbg!(chunk.erase_cmark(&Default::default()));
assert_eq!(dbg!($plain), plain.as_str());

let mut it = suggestions.into_iter();
Expand Down Expand Up @@ -600,6 +600,31 @@ Ref4"#;
}
}

#[test]
fn check_footnote_references() {
const SOURCE: &str = "Hello[^xyz].\n\n[^xyz]: World.";
let origin = ContentOrigin::TestEntityCommonMark;

let documentation = Documentation::load_from_str(origin.clone(), SOURCE, false, false);
assert_eq!(documentation.len(), 1);

let chunks = documentation.get(&origin).expect("Must contain dummy path");
assert_eq!(dbg!(chunks).len(), 1);

let chunk = &chunks[0];
assert_eq!(chunk.as_str(), SOURCE);

let plain = chunk.erase_cmark(&Ignores {
footnote_references: false,
});
assert_eq!(plain.as_str(), "Helloxyz.\n\nWorld.");

let plain = chunk.erase_cmark(&Ignores {
footnote_references: true,
});
assert_eq!(plain.as_str(), "Hello.\n\nWorld.");
}

#[test]
fn find_spans_emoji() {
const TEST: &str = r##"ab **🐡** xy"##;
Expand Down Expand Up @@ -1050,7 +1075,7 @@ fn drill_span() {
CommentVariant::CommonMark,
);

let plain = chunk.erase_cmark();
let plain = chunk.erase_cmark(&Default::default());
assert_eq!(plain.find_spans(0..2).len(), 1);
assert_eq!(plain.find_spans(3..4).len(), 1);
assert_eq!(plain.find_spans(5..7).len(), 1);
Expand Down Expand Up @@ -1100,7 +1125,8 @@ Extra pagaph paragraph.
And a line, or a rule."##;
let (reduced, mapping) = PlainOverlay::extract_plain_with_mapping(MARKDOWN);
let (reduced, mapping) =
PlainOverlay::extract_plain_with_mapping(MARKDOWN, &Default::default());

assert_eq!(dbg!(&reduced).as_str(), PLAIN);
assert_eq!(dbg!(&mapping).len(), 22);
Expand All @@ -1117,7 +1143,8 @@ fn reduction_leading_space() {
const MARKDOWN: &str = r#" Some __underlined__ **bold** text."#;
const PLAIN: &str = r#"Some underlined bold text."#;

let (reduced, mapping) = PlainOverlay::extract_plain_with_mapping(MARKDOWN);
let (reduced, mapping) =
PlainOverlay::extract_plain_with_mapping(MARKDOWN, &Default::default());

assert_eq!(dbg!(&reduced).as_str(), PLAIN);
assert_eq!(dbg!(&mapping).len(), 5);
Expand Down Expand Up @@ -1158,7 +1185,7 @@ fn range_test() {
}

fn cmark_reduction_test(input: &'static str, expected: &'static str, expected_mapping_len: usize) {
let (plain, mapping) = PlainOverlay::extract_plain_with_mapping(input);
let (plain, mapping) = PlainOverlay::extract_plain_with_mapping(input, &Default::default());
assert_eq!(dbg!(&plain).as_str(), expected);
assert_eq!(dbg!(&mapping).len(), expected_mapping_len);
for (reduced_range, markdown_range) in mapping.into_iter() {
Expand Down

0 comments on commit 351f12e

Please sign in to comment.