From 6a68e5885bdbedf45e0ed84a931a057e32ac4c03 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Mon, 22 Jan 2024 01:12:42 -0500 Subject: [PATCH 01/24] remove extra quote --- randfacts/safe.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/randfacts/safe.txt b/randfacts/safe.txt index 04c880a..89a05ca 100644 --- a/randfacts/safe.txt +++ b/randfacts/safe.txt @@ -6773,7 +6773,7 @@ Turns out, a dog's paw print is just as unique as a human's. Good news for dog d A camel can drink up to 40 gallons of water in one go. That's seriously impressive! Don't bring your crystal ball to Maryland! Fortune telling is illegal in the state. Speaking of ferrets, did you know they used to be used to protect grain stores from rodents? -The technical term for a fear of long words is ""hippopotomonstrosesquippedaliophobia." No way you can self-diagnose yourself with that one! +The technical term for a fear of long words is "hippopotomonstrosesquippedaliophobia." No way you can self-diagnose yourself with that one! The White House has 35 bathrooms. So every bathroom break can be different for POTUS! Greyhounds can run up to 45 mph. So don't challenge one to a race! Hiking naked is illegal in Switzerland. Though we must say, it doesn't seem like a good idea in general! From 7f2d2ef888bec418eb10631a261b7c3d9c024316 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sat, 16 Nov 2024 00:27:57 -0500 Subject: [PATCH 02/24] first iteration of rust checkduplicates test --- tests/wagfisch/Cargo.toml | 20 +++++ tests/wagfisch/src/main.rs | 174 +++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 tests/wagfisch/Cargo.toml create mode 100644 tests/wagfisch/src/main.rs diff --git a/tests/wagfisch/Cargo.toml b/tests/wagfisch/Cargo.toml new file mode 100644 index 0000000..de96cba --- /dev/null +++ b/tests/wagfisch/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "wagfisch" +version = "0.1.0" +edition = "2021" +authors = ["Connor Sample"] +description = "randfacts check duplicates test" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.4.18", features = ["cargo"] } +indicatif = { version = "0.17.9", features = ["rayon"] } +itertools = "0.13.0" +num-integer = "0.1.46" +rayon = "1.10.0" + +[profile.release] +codegen-units = 1 +lto = "fat" +panic = "abort" diff --git a/tests/wagfisch/src/main.rs b/tests/wagfisch/src/main.rs new file mode 100644 index 0000000..cc120d2 --- /dev/null +++ b/tests/wagfisch/src/main.rs @@ -0,0 +1,174 @@ +use std::{ + fs::File, + io::{BufRead, BufReader}, + path::PathBuf, + process::Command, +}; + +use clap::{command, Arg, ArgAction}; +use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle}; +use itertools::Itertools; +use rayon::iter::ParallelIterator; +use rayon::prelude::*; + +#[inline(always)] +fn token_sort_ratio(str1: &str, str2: &str) -> f64 { + // Preallocate vectors with capacity + let mut vec1 = Vec::with_capacity(str1.len()); + let mut vec2 = Vec::with_capacity(str2.len()); + + // Filter and collect characters in one pass + str1.chars() + .filter(|c| c.is_ascii_alphanumeric()) + .for_each(|c| vec1.push(c)); + str2.chars() + .filter(|c| c.is_ascii_alphanumeric()) + .for_each(|c| vec2.push(c)); + + // Calculate Levenshtein distance directly on character vectors + let dist = wagner_fischer_2row(&vec1, &vec2) as f64; + let maximum = vec1.len() + vec2.len(); + + if maximum == 0 { + return 0.0; + } + + (1.0 - (dist / maximum as f64)) * 100.0 - 5.0 +} + +// Custom Levenshtein implementation optimized for our use case +#[inline(always)] +fn levenshtein(s1: &[char], s2: &[char]) -> usize { + if s1.len() > s2.len() { + return levenshtein(s2, s1); + } + + let len1 = s1.len(); + let len2 = s2.len(); + + let mut column = Vec::with_capacity(len1 + 1); + for i in 0..=len1 { + column.push(i); + } + + for j in 1..=len2 { + let mut previous = column[0]; + column[0] = j; + + for i in 1..=len1 { + let old = column[i]; + column[i] = if s1[i - 1] == s2[j - 1] { + previous + } else { + 1 + previous.min(column[i - 1]).min(column[i]) + }; + previous = old; + } + } + + column[len1] +} + +#[inline(always)] +fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { + let len1 = s1.len(); + let len2 = s2.len(); + + if len1 == 0 { + return len2; + } + if len2 == 0 { + return len1; + } + + let mut prev_row = (0..=len2).collect::>(); + let mut curr_row = vec![0; len2 + 1]; + + for i in 1..=len1 { + curr_row[0] = i; + for j in 1..=len2 { + curr_row[j] = if s1[i - 1] == s2[j - 1] { + prev_row[j - 1] + } else { + 1 + prev_row[j - 1].min(prev_row[j]).min(curr_row[j - 1]) + }; + } + std::mem::swap(&mut prev_row, &mut curr_row); + } + + prev_row[len2] +} + +fn lines_from_file(filename: &PathBuf, comment: &str) -> Vec<(String, String)> { + let file = File::open(filename).expect("no such file"); + let buf = BufReader::new(file); + buf.lines() + .map(|l| (l.expect("Could not parse line"), comment.to_string())) + .collect() +} + +fn main() { + let m = command!() + .arg( + Arg::new("fix_duplicates") + .long("fix-duplicates") + .action(ArgAction::SetTrue) + .help("Remove duplicate facts"), + ) + .get_matches(); + + // get project's top level + let output = Command::new("git") + .args(["rev-parse", "--show-toplevel"]) + .output() + .expect("failed to execute git process"); + + if !output.status.success() { + panic!("Error: {}", String::from_utf8_lossy(&output.stderr)); + } + + // read safe.txt and unsafe.txt into lists + let mut project_root: PathBuf = PathBuf::from(String::from_utf8(output.stdout).unwrap().trim()); + project_root.push("randfacts"); + project_root.push("safe.txt"); + + let mut all_facts = lines_from_file(&project_root, "safe"); + + project_root.pop(); + project_root.push("unsafe.txt"); + + let mut unsafe_contents = lines_from_file(&project_root, "unsafe"); + + all_facts.append(&mut unsafe_contents); + + // Generate all possible pairs of the facts from safe.txt and unsafe.txt + // combined + let total_facts = all_facts.len() as u64; + let total_combinations = num_integer::binomial(total_facts, 2); + println!("facts: {}, comb: {}", total_facts, total_combinations); + + let pb = ProgressBar::new(total_combinations); + pb.set_style( + ProgressStyle::default_bar() + .template( + "{percent}% |{wide_bar}| {pos}/{len} [{elapsed_precise}<{eta_precise} {per_sec}]", + ) + .unwrap(), + ); + + // iterate through all the combinations + let matches: Vec<_> = all_facts + .into_iter() + .combinations(2) + .par_bridge() + .progress_with(pb) + .filter_map(|facts| { + let ratio = token_sort_ratio(&facts[0].0, &facts[1].0); + if ratio > 82.5 { + Some((facts[0].clone(), facts[1].clone(), ratio)) + } else { + None + } + }) + .collect(); +} From e880cf736ab55637199972f9ae9091de55a17af9 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sat, 16 Nov 2024 00:54:22 -0500 Subject: [PATCH 03/24] second iteration speedup --- tests/wagfisch/src/main.rs | 69 ++++++++++++++------------------------ 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/tests/wagfisch/src/main.rs b/tests/wagfisch/src/main.rs index cc120d2..6e1b9f3 100644 --- a/tests/wagfisch/src/main.rs +++ b/tests/wagfisch/src/main.rs @@ -11,11 +11,13 @@ use itertools::Itertools; use rayon::iter::ParallelIterator; use rayon::prelude::*; +const INITIAL_VEC_CAPACITY: usize = 1000; + #[inline(always)] fn token_sort_ratio(str1: &str, str2: &str) -> f64 { // Preallocate vectors with capacity - let mut vec1 = Vec::with_capacity(str1.len()); - let mut vec2 = Vec::with_capacity(str2.len()); + let mut vec1 = Vec::with_capacity(INITIAL_VEC_CAPACITY); + let mut vec2 = Vec::with_capacity(INITIAL_VEC_CAPACITY); // Filter and collect characters in one pass str1.chars() @@ -36,41 +38,13 @@ fn token_sort_ratio(str1: &str, str2: &str) -> f64 { (1.0 - (dist / maximum as f64)) * 100.0 - 5.0 } -// Custom Levenshtein implementation optimized for our use case -#[inline(always)] -fn levenshtein(s1: &[char], s2: &[char]) -> usize { - if s1.len() > s2.len() { - return levenshtein(s2, s1); - } - - let len1 = s1.len(); - let len2 = s2.len(); - - let mut column = Vec::with_capacity(len1 + 1); - for i in 0..=len1 { - column.push(i); - } - - for j in 1..=len2 { - let mut previous = column[0]; - column[0] = j; - - for i in 1..=len1 { - let old = column[i]; - column[i] = if s1[i - 1] == s2[j - 1] { - previous - } else { - 1 + previous.min(column[i - 1]).min(column[i]) - }; - previous = old; - } - } - - column[len1] -} - #[inline(always)] fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { + let (s1, s2) = if s1.len() < s2.len() { + (s1, s2) + } else { + (s2, s1) + }; let len1 = s1.len(); let len2 = s2.len(); @@ -84,16 +58,24 @@ fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { let mut prev_row = (0..=len2).collect::>(); let mut curr_row = vec![0; len2 + 1]; - for i in 1..=len1 { - curr_row[0] = i; - for j in 1..=len2 { - curr_row[j] = if s1[i - 1] == s2[j - 1] { - prev_row[j - 1] + // Initialize first row + for i in 0..=len2 { + prev_row[i] = i; + } + + for (i, c1) in s1.iter().enumerate() { + curr_row[0] = i + 1; + + for (j, c2) in s2.iter().enumerate() { + curr_row[j + 1] = if c1 == c2 { + prev_row[j] } else { - 1 + prev_row[j - 1].min(prev_row[j]).min(curr_row[j - 1]) + 1 + prev_row[j].min(prev_row[j + 1]).min(curr_row[j]) }; } - std::mem::swap(&mut prev_row, &mut curr_row); + + // Swap rows using copy_from_slice for better performance + prev_row[..=len2].copy_from_slice(&curr_row[..=len2]); } prev_row[len2] @@ -144,7 +126,7 @@ fn main() { // Generate all possible pairs of the facts from safe.txt and unsafe.txt // combined let total_facts = all_facts.len() as u64; - let total_combinations = num_integer::binomial(total_facts, 2); + let total_combinations = num_integer::binomial(total_facts as u64, 2); println!("facts: {}, comb: {}", total_facts, total_combinations); let pb = ProgressBar::new(total_combinations); @@ -171,4 +153,5 @@ fn main() { } }) .collect(); + println!("{:?}", matches); } From 26841b4d3bbe74d61f4e1342745f020b8ced7a54 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sat, 16 Nov 2024 02:14:51 -0500 Subject: [PATCH 04/24] add fix duplicates functionality --- tests/wagfisch/Cargo.toml | 5 +- tests/wagfisch/src/main.rs | 145 ++++++++++++++++++++++++++++++------- 2 files changed, 121 insertions(+), 29 deletions(-) diff --git a/tests/wagfisch/Cargo.toml b/tests/wagfisch/Cargo.toml index de96cba..08d9f78 100644 --- a/tests/wagfisch/Cargo.toml +++ b/tests/wagfisch/Cargo.toml @@ -16,5 +16,8 @@ rayon = "1.10.0" [profile.release] codegen-units = 1 -lto = "fat" +lto = true +opt-level = 3 +split-debuginfo = 'off' +incremental = false panic = "abort" diff --git a/tests/wagfisch/src/main.rs b/tests/wagfisch/src/main.rs index 6e1b9f3..9625488 100644 --- a/tests/wagfisch/src/main.rs +++ b/tests/wagfisch/src/main.rs @@ -1,6 +1,7 @@ use std::{ + fmt, fs::File, - io::{BufRead, BufReader}, + io::{BufRead, BufReader, Write}, path::PathBuf, process::Command, }; @@ -11,8 +12,41 @@ use itertools::Itertools; use rayon::iter::ParallelIterator; use rayon::prelude::*; +type DuplicateFactMatch = (Fact, Fact, f64); const INITIAL_VEC_CAPACITY: usize = 1000; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FactClass { + Safe, + Unsafe, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct Fact { + fact: String, + class: FactClass, + line_number: usize, +} + +impl Fact { + pub fn new(fact: &str, class: FactClass, line_number: usize) -> Self { + Self { + fact: fact.to_owned(), + class, + line_number, + } + } +} + +impl fmt::Display for FactClass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FactClass::Safe => write!(f, "Safe"), + FactClass::Unsafe => write!(f, "Unsafe"), + } + } +} + #[inline(always)] fn token_sort_ratio(str1: &str, str2: &str) -> f64 { // Preallocate vectors with capacity @@ -40,11 +74,13 @@ fn token_sort_ratio(str1: &str, str2: &str) -> f64 { #[inline(always)] fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { + // Always make s1 the shorter string let (s1, s2) = if s1.len() < s2.len() { (s1, s2) } else { (s2, s1) }; + let len1 = s1.len(); let len2 = s2.len(); @@ -81,24 +117,7 @@ fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { prev_row[len2] } -fn lines_from_file(filename: &PathBuf, comment: &str) -> Vec<(String, String)> { - let file = File::open(filename).expect("no such file"); - let buf = BufReader::new(file); - buf.lines() - .map(|l| (l.expect("Could not parse line"), comment.to_string())) - .collect() -} - -fn main() { - let m = command!() - .arg( - Arg::new("fix_duplicates") - .long("fix-duplicates") - .action(ArgAction::SetTrue) - .help("Remove duplicate facts"), - ) - .get_matches(); - +fn get_project_path(filename: &str) -> PathBuf { // get project's top level let output = Command::new("git") .args(["rev-parse", "--show-toplevel"]) @@ -109,17 +128,36 @@ fn main() { panic!("Error: {}", String::from_utf8_lossy(&output.stderr)); } - // read safe.txt and unsafe.txt into lists let mut project_root: PathBuf = PathBuf::from(String::from_utf8(output.stdout).unwrap().trim()); + project_root.push("randfacts"); - project_root.push("safe.txt"); + project_root.push(filename); + project_root +} - let mut all_facts = lines_from_file(&project_root, "safe"); +fn write_facts_to_file(filename: &str, facts: Vec) { + let mut file = File::create(get_project_path(filename)).expect("no such file"); + for fact in facts { + writeln!(file, "{}", fact.fact).expect("error writing file"); + } +} - project_root.pop(); - project_root.push("unsafe.txt"); +fn load_fact_list(filename: &str, comment: FactClass) -> Vec { + let file = File::open(get_project_path(filename)).expect("no such file"); + let buf = BufReader::new(file); + buf.lines() + .enumerate() + .map(|(line_number, line)| { + Fact::new(&line.expect("Could not parse line"), comment, line_number) + }) + .collect() +} - let mut unsafe_contents = lines_from_file(&project_root, "unsafe"); +fn find_duplicate_facts() -> Vec { + // read safe.txt and unsafe.txt into lists + let mut all_facts = load_fact_list("safe.txt", FactClass::Safe); + + let mut unsafe_contents = load_fact_list("unsafe.txt", FactClass::Unsafe); all_facts.append(&mut unsafe_contents); @@ -127,7 +165,6 @@ fn main() { // combined let total_facts = all_facts.len() as u64; let total_combinations = num_integer::binomial(total_facts as u64, 2); - println!("facts: {}, comb: {}", total_facts, total_combinations); let pb = ProgressBar::new(total_combinations); pb.set_style( @@ -145,7 +182,7 @@ fn main() { .par_bridge() .progress_with(pb) .filter_map(|facts| { - let ratio = token_sort_ratio(&facts[0].0, &facts[1].0); + let ratio = token_sort_ratio(&facts[0].fact, &facts[1].fact); if ratio > 82.5 { Some((facts[0].clone(), facts[1].clone(), ratio)) } else { @@ -153,5 +190,57 @@ fn main() { } }) .collect(); - println!("{:?}", matches); + matches +} + +fn main() { + let args = command!() + .arg( + Arg::new("fix_duplicates") + .long("fix-duplicates") + .action(ArgAction::SetTrue) + .help("Remove duplicate facts"), + ) + .get_matches(); + + let matches = find_duplicate_facts(); + + if !matches.is_empty() { + if !args.get_flag("fix_duplicates") { + println!("{:#?}", matches); + println!("\nNumber of similar facts: {}", matches.len()); + } else { + println!("Generating list of indicies to remove..."); + let mut indicies_to_remove = vec![]; + for fact_match in matches { + println!("{:#?}", fact_match); + + // keep unsafe facts over safe facts + if fact_match.0.class == FactClass::Unsafe { + indicies_to_remove.push((fact_match.0.line_number, fact_match.0.class)); + } else { + // first fact isn't unsafe so we don't need to prioritize it + indicies_to_remove.push((fact_match.1.line_number, fact_match.1.class)); + } + } + + // remove all indicies from combinations + let mut safe_facts = load_fact_list("safe.txt", FactClass::Safe); + let mut unsafe_facts = load_fact_list("unsafe.txt", FactClass::Unsafe); + + // sort removal indicies in reverse so that file lines dont get messed up + indicies_to_remove.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); + + // remove one of the duplicate facts from the files + for (index, class) in indicies_to_remove { + _ = match class { + FactClass::Safe => safe_facts.remove(index), + FactClass::Unsafe => unsafe_facts.remove(index), + } + } + + write_facts_to_file("safe_new.txt", safe_facts); + write_facts_to_file("unsafe_new.txt", unsafe_facts); + } + } } From 6f516ff04197d235f873c5880bc370f95387b8ef Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sat, 16 Nov 2024 23:07:48 -0500 Subject: [PATCH 05/24] fix fact typos --- randfacts/safe.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/randfacts/safe.txt b/randfacts/safe.txt index 89a05ca..2972193 100644 --- a/randfacts/safe.txt +++ b/randfacts/safe.txt @@ -1270,7 +1270,7 @@ Having an orgasm at least 3 times a week cuts in half the likelihood of coronary 30 million people in China live on less than US$1 per day, as of 2019. Brazil is defined as a "federal republic" composed of the Federal District, 26 states, and 5,570 municipalities. People with autism are less likely to catch yawns. The more severe their condition, the less common the behavior gets. -Mexico has68 official languages. +Mexico has 68 official languages. Girls who complete secondary school are 6 times less likely to become child brides La Paz, Bolivia, was the first South American city to get an electricity supply. It was powered by llama dung Penguins have an organ near the eye that filters salt from the water out of their system @@ -7298,4 +7298,4 @@ The giant stone heads on Easter Island have hidden bodies! Kleenex tissues were originally used as filters in gas masks. In 1998, Sony accidentally sold 700,000 camcorders that could see through people's clothes. During your lifetime, you will spend around seventy-nine days brushing your teeth. -Ronald McDonald is "Donald McDonald" in Japan because it makes pronunciation easier for the Japanese. \ No newline at end of file +Ronald McDonald is "Donald McDonald" in Japan because it makes pronunciation easier for the Japanese. From 5d765474ada1fe606ec24c5c8b2f7b59b3dd5f72 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sat, 16 Nov 2024 23:20:15 -0500 Subject: [PATCH 06/24] use arc for cheap cloning --- tests/wagfisch/src/main.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/wagfisch/src/main.rs b/tests/wagfisch/src/main.rs index 9625488..247c993 100644 --- a/tests/wagfisch/src/main.rs +++ b/tests/wagfisch/src/main.rs @@ -4,6 +4,7 @@ use std::{ io::{BufRead, BufReader, Write}, path::PathBuf, process::Command, + sync::Arc, }; use clap::{command, Arg, ArgAction}; @@ -23,15 +24,15 @@ enum FactClass { #[derive(Debug, Clone, PartialEq, Eq)] struct Fact { - fact: String, + fact: Arc, class: FactClass, line_number: usize, } impl Fact { - pub fn new(fact: &str, class: FactClass, line_number: usize) -> Self { + pub fn new(fact: String, class: FactClass, line_number: usize) -> Self { Self { - fact: fact.to_owned(), + fact: Arc::new(fact), class, line_number, } @@ -148,7 +149,7 @@ fn load_fact_list(filename: &str, comment: FactClass) -> Vec { buf.lines() .enumerate() .map(|(line_number, line)| { - Fact::new(&line.expect("Could not parse line"), comment, line_number) + Fact::new(line.expect("Could not parse line"), comment, line_number) }) .collect() } @@ -239,8 +240,8 @@ fn main() { } } - write_facts_to_file("safe_new.txt", safe_facts); - write_facts_to_file("unsafe_new.txt", unsafe_facts); + write_facts_to_file("safe.txt", safe_facts); + write_facts_to_file("unsafe.txt", unsafe_facts); } } } From 67b578ca7c3ddfbc733dc0075c1f533323daad93 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sat, 16 Nov 2024 23:42:37 -0500 Subject: [PATCH 07/24] performance enchancements --- tests/wagfisch/Cargo.toml | 1 - tests/wagfisch/src/main.rs | 57 +++++++++++++++++++++++++------------- 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/tests/wagfisch/Cargo.toml b/tests/wagfisch/Cargo.toml index 08d9f78..52b2886 100644 --- a/tests/wagfisch/Cargo.toml +++ b/tests/wagfisch/Cargo.toml @@ -10,7 +10,6 @@ description = "randfacts check duplicates test" [dependencies] clap = { version = "4.4.18", features = ["cargo"] } indicatif = { version = "0.17.9", features = ["rayon"] } -itertools = "0.13.0" num-integer = "0.1.46" rayon = "1.10.0" diff --git a/tests/wagfisch/src/main.rs b/tests/wagfisch/src/main.rs index 247c993..5b6c83b 100644 --- a/tests/wagfisch/src/main.rs +++ b/tests/wagfisch/src/main.rs @@ -1,7 +1,7 @@ use std::{ fmt, fs::File, - io::{BufRead, BufReader, Write}, + io::{BufRead, BufReader, BufWriter, Write}, path::PathBuf, process::Command, sync::Arc, @@ -9,12 +9,11 @@ use std::{ use clap::{command, Arg, ArgAction}; use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle}; -use itertools::Itertools; use rayon::iter::ParallelIterator; use rayon::prelude::*; type DuplicateFactMatch = (Fact, Fact, f64); -const INITIAL_VEC_CAPACITY: usize = 1000; +const SIMILARITY_THRESHOLD: f64 = 82.5; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum FactClass { @@ -50,17 +49,27 @@ impl fmt::Display for FactClass { #[inline(always)] fn token_sort_ratio(str1: &str, str2: &str) -> f64 { + let len1 = str1.len(); + let len2 = str2.len(); + + // Early exit for obviously different strings + // if their lengths differ by more than half, they're most likely different enough + // this may lead to issues, but it lead to a ~23.33% performance improvement + if (len1 as f64 / len2 as f64) < 0.5 || (len2 as f64 / len1 as f64) < 0.5 { + return 0.0; + } + // Preallocate vectors with capacity - let mut vec1 = Vec::with_capacity(INITIAL_VEC_CAPACITY); - let mut vec2 = Vec::with_capacity(INITIAL_VEC_CAPACITY); + let mut vec1 = Vec::with_capacity(len1); + let mut vec2 = Vec::with_capacity(len2); // Filter and collect characters in one pass str1.chars() .filter(|c| c.is_ascii_alphanumeric()) - .for_each(|c| vec1.push(c)); + .for_each(|c| vec1.push(c.to_ascii_lowercase())); str2.chars() .filter(|c| c.is_ascii_alphanumeric()) - .for_each(|c| vec2.push(c)); + .for_each(|c| vec2.push(c.to_ascii_lowercase())); // Calculate Levenshtein distance directly on character vectors let dist = wagner_fischer_2row(&vec1, &vec2) as f64; @@ -137,9 +146,11 @@ fn get_project_path(filename: &str) -> PathBuf { } fn write_facts_to_file(filename: &str, facts: Vec) { - let mut file = File::create(get_project_path(filename)).expect("no such file"); + let file = File::create(get_project_path(filename)).expect("no such file"); + let mut writer = BufWriter::new(file); + for fact in facts { - writeln!(file, "{}", fact.fact).expect("error writing file"); + writeln!(writer, "{}", fact.fact).expect("error writing file"); } } @@ -176,22 +187,28 @@ fn find_duplicate_facts() -> Vec { .unwrap(), ); - // iterate through all the combinations - let matches: Vec<_> = all_facts - .into_iter() - .combinations(2) - .par_bridge() + // Generate all possible indices combinations + let indices: Vec<_> = (0..all_facts.len()) + .flat_map(|i| ((i + 1)..all_facts.len()).map(move |j| (i, j))) + .collect(); + + // Process combinations in parallel + indices + .into_par_iter() .progress_with(pb) - .filter_map(|facts| { - let ratio = token_sort_ratio(&facts[0].fact, &facts[1].fact); - if ratio > 82.5 { - Some((facts[0].clone(), facts[1].clone(), ratio)) + .filter_map(|(i, j)| { + let facts = &all_facts; + let fact1 = &facts[i]; + let fact2 = &facts[j]; + + let ratio = token_sort_ratio(&fact1.fact, &fact2.fact); + if ratio > SIMILARITY_THRESHOLD { + Some((fact1.clone(), fact2.clone(), ratio)) } else { None } }) - .collect(); - matches + .collect() } fn main() { From f2b2d6a37beacdf1a39b9be6f1908f5a02e619c7 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sat, 16 Nov 2024 23:52:18 -0500 Subject: [PATCH 08/24] even more performance improvements --- tests/wagfisch/src/main.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/wagfisch/src/main.rs b/tests/wagfisch/src/main.rs index 5b6c83b..c4eea72 100644 --- a/tests/wagfisch/src/main.rs +++ b/tests/wagfisch/src/main.rs @@ -71,7 +71,7 @@ fn token_sort_ratio(str1: &str, str2: &str) -> f64 { .filter(|c| c.is_ascii_alphanumeric()) .for_each(|c| vec2.push(c.to_ascii_lowercase())); - // Calculate Levenshtein distance directly on character vectors + // Calculate wagner fischer directly on character vectors let dist = wagner_fischer_2row(&vec1, &vec2) as f64; let maximum = vec1.len() + vec2.len(); @@ -101,13 +101,13 @@ fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { return len1; } - let mut prev_row = (0..=len2).collect::>(); + let mut prev_row = vec![0; len2 + 1]; let mut curr_row = vec![0; len2 + 1]; // Initialize first row - for i in 0..=len2 { + (0..=len2).for_each(|i| { prev_row[i] = i; - } + }); for (i, c1) in s1.iter().enumerate() { curr_row[0] = i + 1; @@ -120,8 +120,8 @@ fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { }; } - // Swap rows using copy_from_slice for better performance - prev_row[..=len2].copy_from_slice(&curr_row[..=len2]); + // Swap rows using mem::swap for better performance + std::mem::swap(&mut prev_row, &mut curr_row); } prev_row[len2] @@ -145,7 +145,7 @@ fn get_project_path(filename: &str) -> PathBuf { project_root } -fn write_facts_to_file(filename: &str, facts: Vec) { +fn write_facts_to_file(filename: &str, facts: &[Fact]) { let file = File::create(get_project_path(filename)).expect("no such file"); let mut writer = BufWriter::new(file); @@ -247,18 +247,18 @@ fn main() { let mut unsafe_facts = load_fact_list("unsafe.txt", FactClass::Unsafe); // sort removal indicies in reverse so that file lines dont get messed up - indicies_to_remove.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); + indicies_to_remove.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); // remove one of the duplicate facts from the files for (index, class) in indicies_to_remove { - _ = match class { + match class { FactClass::Safe => safe_facts.remove(index), FactClass::Unsafe => unsafe_facts.remove(index), - } + }; } - write_facts_to_file("safe.txt", safe_facts); - write_facts_to_file("unsafe.txt", unsafe_facts); + write_facts_to_file("safe.txt", &safe_facts); + write_facts_to_file("unsafe.txt", &unsafe_facts); } } } From da2103bf4f6b200d723158f9e49f903e732e0db7 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sat, 16 Nov 2024 23:54:14 -0500 Subject: [PATCH 09/24] rename new rust test --- tests/{wagfisch => checkduplicates}/Cargo.toml | 2 +- tests/{wagfisch => checkduplicates}/src/main.rs | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/{wagfisch => checkduplicates}/Cargo.toml (95%) rename tests/{wagfisch => checkduplicates}/src/main.rs (100%) diff --git a/tests/wagfisch/Cargo.toml b/tests/checkduplicates/Cargo.toml similarity index 95% rename from tests/wagfisch/Cargo.toml rename to tests/checkduplicates/Cargo.toml index 52b2886..6d75d7e 100644 --- a/tests/wagfisch/Cargo.toml +++ b/tests/checkduplicates/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "wagfisch" +name = "checkduplicates" version = "0.1.0" edition = "2021" authors = ["Connor Sample"] diff --git a/tests/wagfisch/src/main.rs b/tests/checkduplicates/src/main.rs similarity index 100% rename from tests/wagfisch/src/main.rs rename to tests/checkduplicates/src/main.rs From 7ec19b9a56282e22d0664ae2e09b3422af0da3f0 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 00:06:38 -0500 Subject: [PATCH 10/24] refactor rust project --- tests/checkduplicates/src/main.rs | 96 +++---------------------- tests/checkduplicates/src/structures.rs | 43 +++++++++++ tests/checkduplicates/src/util.rs | 67 +++++++++++++++++ 3 files changed, 119 insertions(+), 87 deletions(-) create mode 100644 tests/checkduplicates/src/structures.rs create mode 100644 tests/checkduplicates/src/util.rs diff --git a/tests/checkduplicates/src/main.rs b/tests/checkduplicates/src/main.rs index c4eea72..000bd94 100644 --- a/tests/checkduplicates/src/main.rs +++ b/tests/checkduplicates/src/main.rs @@ -1,51 +1,11 @@ -use std::{ - fmt, - fs::File, - io::{BufRead, BufReader, BufWriter, Write}, - path::PathBuf, - process::Command, - sync::Arc, -}; - use clap::{command, Arg, ArgAction}; use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle}; use rayon::iter::ParallelIterator; use rayon::prelude::*; +use structures::{DuplicateFactMatch, FactClass, SIMILARITY_THRESHOLD}; -type DuplicateFactMatch = (Fact, Fact, f64); -const SIMILARITY_THRESHOLD: f64 = 82.5; - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum FactClass { - Safe, - Unsafe, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct Fact { - fact: Arc, - class: FactClass, - line_number: usize, -} - -impl Fact { - pub fn new(fact: String, class: FactClass, line_number: usize) -> Self { - Self { - fact: Arc::new(fact), - class, - line_number, - } - } -} - -impl fmt::Display for FactClass { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - FactClass::Safe => write!(f, "Safe"), - FactClass::Unsafe => write!(f, "Unsafe"), - } - } -} +mod structures; +mod util; #[inline(always)] fn token_sort_ratio(str1: &str, str2: &str) -> f64 { @@ -127,49 +87,11 @@ fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { prev_row[len2] } -fn get_project_path(filename: &str) -> PathBuf { - // get project's top level - let output = Command::new("git") - .args(["rev-parse", "--show-toplevel"]) - .output() - .expect("failed to execute git process"); - - if !output.status.success() { - panic!("Error: {}", String::from_utf8_lossy(&output.stderr)); - } - - let mut project_root: PathBuf = PathBuf::from(String::from_utf8(output.stdout).unwrap().trim()); - - project_root.push("randfacts"); - project_root.push(filename); - project_root -} - -fn write_facts_to_file(filename: &str, facts: &[Fact]) { - let file = File::create(get_project_path(filename)).expect("no such file"); - let mut writer = BufWriter::new(file); - - for fact in facts { - writeln!(writer, "{}", fact.fact).expect("error writing file"); - } -} - -fn load_fact_list(filename: &str, comment: FactClass) -> Vec { - let file = File::open(get_project_path(filename)).expect("no such file"); - let buf = BufReader::new(file); - buf.lines() - .enumerate() - .map(|(line_number, line)| { - Fact::new(line.expect("Could not parse line"), comment, line_number) - }) - .collect() -} - fn find_duplicate_facts() -> Vec { // read safe.txt and unsafe.txt into lists - let mut all_facts = load_fact_list("safe.txt", FactClass::Safe); + let mut all_facts = util::load_fact_list("safe.txt", FactClass::Safe); - let mut unsafe_contents = load_fact_list("unsafe.txt", FactClass::Unsafe); + let mut unsafe_contents = util::load_fact_list("unsafe.txt", FactClass::Unsafe); all_facts.append(&mut unsafe_contents); @@ -243,8 +165,8 @@ fn main() { } // remove all indicies from combinations - let mut safe_facts = load_fact_list("safe.txt", FactClass::Safe); - let mut unsafe_facts = load_fact_list("unsafe.txt", FactClass::Unsafe); + let mut safe_facts = util::load_fact_list("safe.txt", FactClass::Safe); + let mut unsafe_facts = util::load_fact_list("unsafe.txt", FactClass::Unsafe); // sort removal indicies in reverse so that file lines dont get messed up indicies_to_remove.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); @@ -257,8 +179,8 @@ fn main() { }; } - write_facts_to_file("safe.txt", &safe_facts); - write_facts_to_file("unsafe.txt", &unsafe_facts); + util::write_facts_to_file("safe.txt", &safe_facts); + util::write_facts_to_file("unsafe.txt", &unsafe_facts); } } } diff --git a/tests/checkduplicates/src/structures.rs b/tests/checkduplicates/src/structures.rs new file mode 100644 index 0000000..2d19c40 --- /dev/null +++ b/tests/checkduplicates/src/structures.rs @@ -0,0 +1,43 @@ +use std::{fmt, sync::Arc}; + +/// Type used for when a fact match is found +pub type DuplicateFactMatch = (Fact, Fact, f64); +/// Wagner-Fishcer similarity threshold +pub const SIMILARITY_THRESHOLD: f64 = 82.5; + +/// The classification of a Fact, safe or unsafe +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FactClass { + Safe, + Unsafe, +} + +/// Struct holding information about a fact in a fact file +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Fact { + /// The fact text + pub fact: Arc, + /// The class of the fact (safe or unsafe) + pub class: FactClass, + /// The line number of the fact in it's respective file + pub line_number: usize, +} + +impl Fact { + pub fn new(fact: String, class: FactClass, line_number: usize) -> Self { + Self { + fact: Arc::new(fact), + class, + line_number, + } + } +} + +impl fmt::Display for FactClass { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FactClass::Safe => write!(f, "Safe"), + FactClass::Unsafe => write!(f, "Unsafe"), + } + } +} diff --git a/tests/checkduplicates/src/util.rs b/tests/checkduplicates/src/util.rs new file mode 100644 index 0000000..77bdcba --- /dev/null +++ b/tests/checkduplicates/src/util.rs @@ -0,0 +1,67 @@ +use std::{ + fs::File, + io::{BufRead, BufReader, BufWriter, Write}, + path::PathBuf, + process::Command, +}; + +use crate::structures::{Fact, FactClass}; + +/// Get a file from the randfacts/ directory in the top level of the project +/// +/// # Arguments +/// +/// * `filename` - the filename to find. +/// +/// # Panics +/// +/// This function will panic if a file that doesn't exist is requested +fn get_project_path(filename: &str) -> PathBuf { + // get project's top level + let output = Command::new("git") + .args(["rev-parse", "--show-toplevel"]) + .output() + .expect("failed to execute git process"); + + if !output.status.success() { + panic!("Error: {}", String::from_utf8_lossy(&output.stderr)); + } + + let mut project_root: PathBuf = PathBuf::from(String::from_utf8(output.stdout).unwrap().trim()); + + project_root.push("randfacts"); + project_root.push(filename); + project_root +} + +/// Given an array of facts, write them separated with newlines to a file. +/// +/// # Arguments +/// +/// * `filename` - the filename in `randfacts/` to write to +/// * `facts` - The array of facts to write +pub fn write_facts_to_file(filename: &str, facts: &[Fact]) { + let file = File::create(get_project_path(filename)).expect("no such file"); + let mut writer = BufWriter::new(file); + + for fact in facts { + writeln!(writer, "{}", fact.fact).expect("error writing file"); + } +} + +/// Read facts from a file into a vector. +/// +/// # Arguments +/// +/// * `filename` - the file in `randfacts/` to read from +/// * `fact_class` - The class of the facts (safe or unsafe) +pub fn load_fact_list(filename: &str, fact_class: FactClass) -> Vec { + let file = File::open(get_project_path(filename)).expect("no such file"); + let buf = BufReader::new(file); + buf.lines() + .enumerate() + .map(|(line_number, line)| { + Fact::new(line.expect("Could not parse line"), fact_class, line_number) + }) + .collect() +} From d567a82d7080754c3b2253bc726f70aab620b09d Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 00:12:52 -0500 Subject: [PATCH 11/24] add comments --- tests/checkduplicates/src/main.rs | 109 +++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 31 deletions(-) diff --git a/tests/checkduplicates/src/main.rs b/tests/checkduplicates/src/main.rs index 000bd94..0bfb4dc 100644 --- a/tests/checkduplicates/src/main.rs +++ b/tests/checkduplicates/src/main.rs @@ -1,3 +1,6 @@ +//! A test for finding and managing duplicate facts across files. +//! Disclaimer: comments mostly generated by AI + use clap::{command, Arg, ArgAction}; use indicatif::{ParallelProgressIterator, ProgressBar, ProgressStyle}; use rayon::iter::ParallelIterator; @@ -7,6 +10,20 @@ use structures::{DuplicateFactMatch, FactClass, SIMILARITY_THRESHOLD}; mod structures; mod util; +/// Calculates the similarity ratio between two strings using a token sort approach. +/// +/// This function implements a modified version of token sort ratio that: +/// 1. Performs early exit optimization for strings with significantly different lengths +/// 2. Filters out non-alphanumeric characters +/// 3. Converts all characters to lowercase for comparison +/// +/// # Arguments +/// * `str1` - First string to compare +/// * `str2` - Second string to compare +/// +/// # Returns +/// A float between 0 and 100 representing the similarity percentage, +/// with a -5 offset to reduce false positives #[inline(always)] fn token_sort_ratio(str1: &str, str2: &str) -> f64 { let len1 = str1.len(); @@ -39,12 +56,22 @@ fn token_sort_ratio(str1: &str, str2: &str) -> f64 { return 0.0; } + // Convert distance to similarity ratio and subtract 5 to reduce false positives (1.0 - (dist / maximum as f64)) * 100.0 - 5.0 } +/// Implements the Wagner-Fischer algorithm for calculating edit distance between two sequences, +/// optimized to use only two rows of memory. +/// +/// # Arguments +/// * `s1` - First sequence of characters +/// * `s2` - Second sequence of characters +/// +/// # Returns +/// The minimum number of single-character edits needed to transform one string into another #[inline(always)] fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { - // Always make s1 the shorter string + // Ensure s1 is the shorter sequence for optimization let (s1, s2) = if s1.len() < s2.len() { (s1, s2) } else { @@ -54,6 +81,7 @@ fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { let len1 = s1.len(); let len2 = s2.len(); + // handle empty string cases if len1 == 0 { return len2; } @@ -61,21 +89,25 @@ fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { return len1; } + // Initialize two rows for the dynamic programming matrix let mut prev_row = vec![0; len2 + 1]; let mut curr_row = vec![0; len2 + 1]; - // Initialize first row + // Initialize first row with incremental values (0..=len2).for_each(|i| { prev_row[i] = i; }); + // Fill the matrix using only two rows for (i, c1) in s1.iter().enumerate() { curr_row[0] = i + 1; for (j, c2) in s2.iter().enumerate() { curr_row[j + 1] = if c1 == c2 { + // No edit needed prev_row[j] } else { + // Take minimum of three possible operations (insert, delete, substitute) 1 + prev_row[j].min(prev_row[j + 1]).min(curr_row[j]) }; } @@ -87,6 +119,16 @@ fn wagner_fischer_2row(s1: &[char], s2: &[char]) -> usize { prev_row[len2] } +/// Finds duplicate facts across safe and unsafe fact files using parallel processing. +/// +/// This function: +/// 1. Loads facts from both safe.txt and unsafe.txt +/// 2. Generates all possible pairs of facts +/// 3. Calculates similarity ratios in parallel +/// 4. Returns matches above the similarity threshold +/// +/// # Returns +/// A vector of DuplicateFactMatch containing similar fact pairs and their similarity scores fn find_duplicate_facts() -> Vec { // read safe.txt and unsafe.txt into lists let mut all_facts = util::load_fact_list("safe.txt", FactClass::Safe); @@ -95,11 +137,11 @@ fn find_duplicate_facts() -> Vec { all_facts.append(&mut unsafe_contents); - // Generate all possible pairs of the facts from safe.txt and unsafe.txt - // combined + // Calculate total number of possible combinations for progress bar let total_facts = all_facts.len() as u64; let total_combinations = num_integer::binomial(total_facts as u64, 2); + // Initialize progress bar with custom style let pb = ProgressBar::new(total_combinations); pb.set_style( ProgressStyle::default_bar() @@ -149,38 +191,43 @@ fn main() { if !args.get_flag("fix_duplicates") { println!("{:#?}", matches); println!("\nNumber of similar facts: {}", matches.len()); - } else { - println!("Generating list of indicies to remove..."); - let mut indicies_to_remove = vec![]; - for fact_match in matches { - println!("{:#?}", fact_match); - - // keep unsafe facts over safe facts - if fact_match.0.class == FactClass::Unsafe { - indicies_to_remove.push((fact_match.0.line_number, fact_match.0.class)); - } else { - // first fact isn't unsafe so we don't need to prioritize it - indicies_to_remove.push((fact_match.1.line_number, fact_match.1.class)); - } - } + return; + } - // remove all indicies from combinations - let mut safe_facts = util::load_fact_list("safe.txt", FactClass::Safe); - let mut unsafe_facts = util::load_fact_list("unsafe.txt", FactClass::Unsafe); + // Fix mode: Remove duplicates + println!("Generating list of indicies to remove..."); + let mut indicies_to_remove = vec![]; - // sort removal indicies in reverse so that file lines dont get messed up - indicies_to_remove.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); + // Determine which facts to remove, prioritizing keeping unsafe facts + for fact_match in matches { + println!("{:#?}", fact_match); - // remove one of the duplicate facts from the files - for (index, class) in indicies_to_remove { - match class { - FactClass::Safe => safe_facts.remove(index), - FactClass::Unsafe => unsafe_facts.remove(index), - }; + // keep unsafe facts over safe facts + if fact_match.0.class == FactClass::Unsafe { + indicies_to_remove.push((fact_match.0.line_number, fact_match.0.class)); + } else { + // first fact isn't unsafe so we don't need to prioritize it + indicies_to_remove.push((fact_match.1.line_number, fact_match.1.class)); } + } - util::write_facts_to_file("safe.txt", &safe_facts); - util::write_facts_to_file("unsafe.txt", &unsafe_facts); + // Load current facts + let mut safe_facts = util::load_fact_list("safe.txt", FactClass::Safe); + let mut unsafe_facts = util::load_fact_list("unsafe.txt", FactClass::Unsafe); + + // sort removal indicies in reverse to maintain correct line numbers + indicies_to_remove.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); + + // Remove duplicates from respective files + for (index, class) in indicies_to_remove { + match class { + FactClass::Safe => safe_facts.remove(index), + FactClass::Unsafe => unsafe_facts.remove(index), + }; } + + // Write updated facts back to files + util::write_facts_to_file("safe.txt", &safe_facts); + util::write_facts_to_file("unsafe.txt", &unsafe_facts); } } From 1968a83eb2fe50ec6f5ea985d04ff93aaecf2697 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 00:13:32 -0500 Subject: [PATCH 12/24] remove new duplicate facts that were found --- randfacts/safe.txt | 9 --------- randfacts/unsafe.txt | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/randfacts/safe.txt b/randfacts/safe.txt index 2972193..89550a8 100644 --- a/randfacts/safe.txt +++ b/randfacts/safe.txt @@ -4398,7 +4398,6 @@ To escape the grip of a crocodile's jaws, push your thumbs into its eyeballs – Reindeer like to eat bananas. More people are killed annually by donkeys than airplane crashes. Because of the rotation of the earth, an object can be thrown farther if it is thrown west. -The average person spends 6 months of their life sitting at red lights. More Monopoly money is printed in a year, than real money throughout the world. Caesar salad has nothing to do with any of the Caesars. It was first concocted in a bar in Tijuana, Mexico, in the 1920′s. Seattle's Fremont Bridge rises up and down more than any drawbridge in the world. @@ -5067,7 +5066,6 @@ On a Canadian two-dollar bill, the American flag is flying over the Parliament B $283,200 is the absolute highest amount of money you can win on Jeopardy. Rats and horses can't vomit. Winston Churchill was born in a ladies room during a dance. -Venus is the only planet that rotates clockwise. Charlie Chaplin once won third prize in a Charlie Chaplin look-alike contest. You are more likely to be killed by a champagne cork than by a poisonous spider. Hedenophobic means fear of pleasure. @@ -5079,7 +5077,6 @@ Many insects can carry 50 times their own body weight. This would be like an adu There are over a million described species of insects. Some people estimate there are actually between 15 and 30 million species. Most insects are beneficial to people because they eat other insects, pollinate crops, are food for other animals, make products we use (like honey and silk) or have medical uses. Butterflies and insects have their skeletons on the outside of their bodies, called the exoskeleton. This protects the insect and keeps water inside their bodies so they don't dry out. -Elephants are the only mammals that cannot jump. 11% of the world is left-handed. A healthy (non-colorblind) human eye can distinguish between 500 shades of gray. Lizards can self-amputate their tails for protection. It grows back after a few months. @@ -5765,7 +5762,6 @@ Up to 20% of power outages in the U.S are due to squirrels. The Mayo Clinic made glow in the dark cats while trying to find a cure for AIDS. The Antarctic glaciers are made up of 3% penguin urine. The happiest prisoner on death row had an IQ of 46. -Violin bows are made from horsehair. IKEA is an acronym. Stephen Hawking held a reception for time travelers in 2009. A Norwegian Island made dying illegal. @@ -6222,7 +6218,6 @@ The largest known volcano in the solar system is Olympus Mons, located on Mars. On Mars, sunsets appear blue due to the way light is captured in the atmosphere. Because there is no atmosphere, wind, or water to erode them, astronaut footprints on the moon will likely remain there for hundreds of millions of years. One spacesuit for a NASA astronaut costs $12 million to make. -Uranus is the only planet to spin on its side. The diameter of Pluto is smaller than the horizontal length of the United States. The Kármán line, the invisible boundary that officially separates Earth from outer space, is located 62 miles above sea level. So you're only about 62 miles from space right now! It's impossible to burp in space. The lack of gravity in space prevents air in your stomach from separating and rising up from food you've eaten. @@ -6512,7 +6507,6 @@ If Barbie were life-size, her measurements would be 39-23-33. She would stand 7 On average, people fear spiders more than they do death. Thirty-five percent of the people who use personal ads for dating are already married. In Tokyo you can buy a toupee for your dog. -A dime has 118 ridges around the edge. The world's oldest wooden wheel has been around for more than 5,000 years Dead skin cells are a main ingredient in household dust Sudan has more pyramids than any country in the world @@ -6726,7 +6720,6 @@ NFL refs also get Super Bowl rings. President Hubert Hoover invented a game called "Hooverball" which was a cross between tennis and volleyball and was played with a medicine ball. Only one city has won three major championships in one year. In 1935, the Detroit Lions won the Super Bowl, the Tigers won the world series, and the Red Wings won the Stanley Cup. More than 100 baseballs are used during a typical professional baseball game. -You can't hum while plugging your nose. Tomatoes have more genes than humans. We're one to two centimeters taller in the morning than at night. One quarter of all our bones are in our feet. @@ -7036,7 +7029,6 @@ Rubber bands last longer when refrigerated. Since 1896, the beginning of the modern Olympics, only Greece and Australia have participated in every Games. The average person has over 1,460 dreams a year. The band Duran Duran got their name from an astronaut in the 1968 Jane Fonda movie Barbarella. -The Earth weighs around 6,588,000,000,000,000,000,000,000 tons. The first toilet ever seen on television was on "Leave It To Beaver." The international telephone dialing code for Antarctica is 672. The name "Jeep" came from the abbreviation used in the army for the "general purpose" vehicle, G.P. @@ -7181,7 +7173,6 @@ Only animal besides a human that can get sunburn: a pig. Proportional to their weight, men are stronger than horses. It is believed that Shakespeare was 46 around the time that the King James Version of the Bible was written. In Psalms 46, the 46th word from the first word is "shake," and the 46th word from the last word is "spear." The parachute was invented by Leonardo da Vinci in 1515. -"Canada" is a Native American word meaning "big village." The symbol on the "pound" key (#) is called an octothorpe. A full 7% of the entire Irish barley crop goes to the production of Guinness beer. Tigers have striped skin, not just striped fur. diff --git a/randfacts/unsafe.txt b/randfacts/unsafe.txt index 57de532..9699a7c 100644 --- a/randfacts/unsafe.txt +++ b/randfacts/unsafe.txt @@ -112,4 +112,4 @@ During the medieval times (circa 1400s), France had impotency trials. Which allo Dildos have been around for centuries. With the oldest potential dildo being 28,000 years old. Vibrators were created in the 19th century to reduce "hysteria" in women. Famous gangster, Al Capone, had undiagnosed syphillis until he went to prison. -According to a British law passed in 1845, attempting to commit suicide was a capital offense. Offenders could be hanged for trying. \ No newline at end of file +According to a British law passed in 1845, attempting to commit suicide was a capital offense. Offenders could be hanged for trying. From de5f66ff1eb4545de82c14c62405fd33c7cd07e7 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 01:43:10 -0500 Subject: [PATCH 13/24] refactor project to use poetry and pytest --- .github/workflows/check_duplicates.yml | 24 ++ .github/workflows/codeql-analysis.yml | 67 ---- .github/workflows/main.yml | 102 ++++-- .gitignore | 38 ++- LICENSE.txt => LICENSE | 0 MANIFEST.in | 1 - poetry.lock | 223 ++++++++++++ pyproject.toml | 45 +++ randfacts/__init__.py | 67 ++-- randfacts/__main__.py | 5 +- randfacts/__version__.py | 8 - randfacts/randfacts.py | 156 +++++---- randfacts/safe.txt | 2 +- setup.cfg | 2 - setup.py | 42 --- tests/checkduplicates/Cargo.lock | 449 +++++++++++++++++++++++++ tests/checkduplicates/src/main.rs | 2 +- tests/fix_encoding.py | 41 ++- tests/test.py | 45 --- tests/test_general.py | 94 ++++++ 20 files changed, 1097 insertions(+), 316 deletions(-) create mode 100644 .github/workflows/check_duplicates.yml delete mode 100644 .github/workflows/codeql-analysis.yml rename LICENSE.txt => LICENSE (100%) delete mode 100644 MANIFEST.in create mode 100644 poetry.lock create mode 100644 pyproject.toml delete mode 100644 randfacts/__version__.py delete mode 100644 setup.cfg delete mode 100644 setup.py create mode 100644 tests/checkduplicates/Cargo.lock delete mode 100644 tests/test.py create mode 100644 tests/test_general.py diff --git a/.github/workflows/check_duplicates.yml b/.github/workflows/check_duplicates.yml new file mode 100644 index 0000000..ee4d90e --- /dev/null +++ b/.github/workflows/check_duplicates.yml @@ -0,0 +1,24 @@ +name: Check for Duplicate Facts + +# Controls when the action will run. +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + pull_request: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + checkduplicates: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Check for duplicate facts + run: | + cd tests/checkduplicates + cargo run --release diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index abd374a..0000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,67 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: "CodeQL" - -on: - push: - branches: [ master ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ master ] - schedule: - - cron: '26 0 * * 5' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - language: [ 'python' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] - # Learn more: - # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - # queries: ./path/to/local/query, your-org/your-repo/queries@main - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 - - # ℹ️ Command-line programs to run using the OS shell. - # 📚 https://git.io/JvXDl - - # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines - # and modify them (or add more) to build your code if your project - # uses a compiled language - - #- run: | - # make bootstrap - # make release - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2cab971..11a9c3a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,28 +16,80 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: test: - # The type of runner that the job will run on - runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - - # Runs a set of commands using the runners shell - - name: Run a multi-line script - run: | - pip3 install setuptools wheel - python3 setup.py sdist - pip3 install dist/* - python3 tests/test.py - checkduplicates: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Run a multi-line script - run: | - pip3 install -U setuptools wheel pip - pip3 install rapidfuzz tqdm - python3 tests/checkduplicates.py + name: Test code and coverage + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 + + # If you wanted to use multiple Python versions, you'd have specify a matrix in the job and + # reference the matrixe python version here. + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + + # Cache the installation of Poetry itself, e.g. the next step. This prevents the workflow + # from installing Poetry every time, which can be slow. Note the use of the Poetry version + # number in the cache key, and the "-0" suffix: this allows you to invalidate the cache + # manually if/when you want to upgrade Poetry, or if something goes wrong. This could be + # mildly cleaner by using an environment variable, but I don't really care. + - name: cache poetry install + uses: actions/cache@v4 + with: + path: ~/.local + key: poetry-1.1.12-0 + + # Install Poetry. You could do this manually, or there are several actions that do this. + # `snok/install-poetry` seems to be minimal yet complete, and really just calls out to + # Poetry's default install script, which feels correct. I pin the Poetry version here + # because Poetry does occasionally change APIs between versions and I don't want my + # actions to break if it does. + # + # The key configuration value here is `virtualenvs-in-project: true`: this creates the + # venv as a `.venv` in your testing directory, which allows the next step to easily + # cache it. + - uses: snok/install-poetry@v1 + with: + version: 1.5.1 + virtualenvs-create: true + virtualenvs-in-project: true + + # Cache your dependencies (i.e. all the stuff in your `pyproject.toml`). Note the cache + # key: if you're using multiple Python versions, or multiple OSes, you'd need to include + # them in the cache key. I'm not, so it can be simple and just depend on the poetry.lock. + - name: cache deps + id: cache-deps + uses: actions/cache@v4 + with: + path: .venv + key: pydeps-${{ hashFiles('**/poetry.lock') }} + + # Install dependencies. `--no-root` means "install all dependencies but not the project + # itself", which is what you want to avoid caching _your_ code. The `if` statement + # ensures this only runs on a cache miss. + - run: poetry install --no-interaction --no-root + if: steps.cache-deps.outputs.cache-hit != 'true' + + # Now install _your_ project. This isn't necessary for many types of projects -- particularly + # things like Django apps don't need this. But it's a good idea since it fully-exercises the + # pyproject.toml and makes that if you add things like console-scripts at some point that + # they'll be installed and working. + - run: poetry install --no-interaction + + # run the tests and check for 100% coverage + - run: poetry run pytest . --cov=randfacts --cov-report=term-missing --cov-report=xml + + # check for code style errors + - run: poetry run ruff check + # disable code format checking until docstrings are sorted out + # https://github.com/astral-sh/ruff/issues/8430 + # - run: poetry run ruff format --check + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.gitignore b/.gitignore index fff6711..a7b5af8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,3 @@ -# MacOS development (added by PancakesWasTaken) -.DS_Store - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -97,7 +94,22 @@ ipython_config.py # install all needed dependencies. #Pipfile.lock -# PEP 582; used by e.g. github.com/David-OConnor/pyflow +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff @@ -140,5 +152,19 @@ dmypy.json # Cython debug symbols cython_debug/ -# cargo -Cargo.lock +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + diff --git a/LICENSE.txt b/LICENSE similarity index 100% rename from LICENSE.txt rename to LICENSE diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index b786c31..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -include randfacts/*.txt \ No newline at end of file diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..33205de --- /dev/null +++ b/poetry.lock @@ -0,0 +1,223 @@ +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.6.7" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "coverage-7.6.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:108bb458827765d538abcbf8288599fee07d2743357bdd9b9dad456c287e121e"}, + {file = "coverage-7.6.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c973b2fe4dc445cb865ab369df7521df9c27bf40715c837a113edaa2aa9faf45"}, + {file = "coverage-7.6.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c6b24007c4bcd0b19fac25763a7cac5035c735ae017e9a349b927cfc88f31c1"}, + {file = "coverage-7.6.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:acbb8af78f8f91b3b51f58f288c0994ba63c646bc1a8a22ad072e4e7e0a49f1c"}, + {file = "coverage-7.6.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad32a981bcdedb8d2ace03b05e4fd8dace8901eec64a532b00b15217d3677dd2"}, + {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:34d23e28ccb26236718a3a78ba72744212aa383141961dd6825f6595005c8b06"}, + {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e25bacb53a8c7325e34d45dddd2f2fbae0dbc230d0e2642e264a64e17322a777"}, + {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af05bbba896c4472a29408455fe31b3797b4d8648ed0a2ccac03e074a77e2314"}, + {file = "coverage-7.6.7-cp310-cp310-win32.whl", hash = "sha256:796c9b107d11d2d69e1849b2dfe41730134b526a49d3acb98ca02f4985eeff7a"}, + {file = "coverage-7.6.7-cp310-cp310-win_amd64.whl", hash = "sha256:987a8e3da7da4eed10a20491cf790589a8e5e07656b6dc22d3814c4d88faf163"}, + {file = "coverage-7.6.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7e61b0e77ff4dddebb35a0e8bb5a68bf0f8b872407d8d9f0c726b65dfabe2469"}, + {file = "coverage-7.6.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1a5407a75ca4abc20d6252efeb238377a71ce7bda849c26c7a9bece8680a5d99"}, + {file = "coverage-7.6.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df002e59f2d29e889c37abd0b9ee0d0e6e38c24f5f55d71ff0e09e3412a340ec"}, + {file = "coverage-7.6.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:673184b3156cba06154825f25af33baa2671ddae6343f23175764e65a8c4c30b"}, + {file = "coverage-7.6.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e69ad502f1a2243f739f5bd60565d14a278be58be4c137d90799f2c263e7049a"}, + {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:60dcf7605c50ea72a14490d0756daffef77a5be15ed1b9fea468b1c7bda1bc3b"}, + {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9c2eb378bebb2c8f65befcb5147877fc1c9fbc640fc0aad3add759b5df79d55d"}, + {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3c0317288f032221d35fa4cbc35d9f4923ff0dfd176c79c9b356e8ef8ef2dff4"}, + {file = "coverage-7.6.7-cp311-cp311-win32.whl", hash = "sha256:951aade8297358f3618a6e0660dc74f6b52233c42089d28525749fc8267dccd2"}, + {file = "coverage-7.6.7-cp311-cp311-win_amd64.whl", hash = "sha256:5e444b8e88339a2a67ce07d41faabb1d60d1004820cee5a2c2b54e2d8e429a0f"}, + {file = "coverage-7.6.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f07ff574986bc3edb80e2c36391678a271d555f91fd1d332a1e0f4b5ea4b6ea9"}, + {file = "coverage-7.6.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:49ed5ee4109258973630c1f9d099c7e72c5c36605029f3a91fe9982c6076c82b"}, + {file = "coverage-7.6.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3e8796434a8106b3ac025fd15417315d7a58ee3e600ad4dbcfddc3f4b14342c"}, + {file = "coverage-7.6.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3b925300484a3294d1c70f6b2b810d6526f2929de954e5b6be2bf8caa1f12c1"}, + {file = "coverage-7.6.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c42ec2c522e3ddd683dec5cdce8e62817afb648caedad9da725001fa530d354"}, + {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0266b62cbea568bd5e93a4da364d05de422110cbed5056d69339bd5af5685433"}, + {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e5f2a0f161d126ccc7038f1f3029184dbdf8f018230af17ef6fd6a707a5b881f"}, + {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c132b5a22821f9b143f87446805e13580b67c670a548b96da945a8f6b4f2efbb"}, + {file = "coverage-7.6.7-cp312-cp312-win32.whl", hash = "sha256:7c07de0d2a110f02af30883cd7dddbe704887617d5c27cf373362667445a4c76"}, + {file = "coverage-7.6.7-cp312-cp312-win_amd64.whl", hash = "sha256:fd49c01e5057a451c30c9b892948976f5d38f2cbd04dc556a82743ba8e27ed8c"}, + {file = "coverage-7.6.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:46f21663e358beae6b368429ffadf14ed0a329996248a847a4322fb2e35d64d3"}, + {file = "coverage-7.6.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:40cca284c7c310d622a1677f105e8507441d1bb7c226f41978ba7c86979609ab"}, + {file = "coverage-7.6.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77256ad2345c29fe59ae861aa11cfc74579c88d4e8dbf121cbe46b8e32aec808"}, + {file = "coverage-7.6.7-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87ea64b9fa52bf395272e54020537990a28078478167ade6c61da7ac04dc14bc"}, + {file = "coverage-7.6.7-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d608a7808793e3615e54e9267519351c3ae204a6d85764d8337bd95993581a8"}, + {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdd94501d65adc5c24f8a1a0eda110452ba62b3f4aeaba01e021c1ed9cb8f34a"}, + {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:82c809a62e953867cf57e0548c2b8464207f5f3a6ff0e1e961683e79b89f2c55"}, + {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bb684694e99d0b791a43e9fc0fa58efc15ec357ac48d25b619f207c41f2fd384"}, + {file = "coverage-7.6.7-cp313-cp313-win32.whl", hash = "sha256:963e4a08cbb0af6623e61492c0ec4c0ec5c5cf74db5f6564f98248d27ee57d30"}, + {file = "coverage-7.6.7-cp313-cp313-win_amd64.whl", hash = "sha256:14045b8bfd5909196a90da145a37f9d335a5d988a83db34e80f41e965fb7cb42"}, + {file = "coverage-7.6.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f2c7a045eef561e9544359a0bf5784b44e55cefc7261a20e730baa9220c83413"}, + {file = "coverage-7.6.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dd4e4a49d9c72a38d18d641135d2fb0bdf7b726ca60a103836b3d00a1182acd"}, + {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c95e0fa3d1547cb6f021ab72f5c23402da2358beec0a8e6d19a368bd7b0fb37"}, + {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f63e21ed474edd23f7501f89b53280014436e383a14b9bd77a648366c81dce7b"}, + {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead9b9605c54d15be228687552916c89c9683c215370c4a44f1f217d2adcc34d"}, + {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0573f5cbf39114270842d01872952d301027d2d6e2d84013f30966313cadb529"}, + {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e2c8e3384c12dfa19fa9a52f23eb091a8fad93b5b81a41b14c17c78e23dd1d8b"}, + {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:70a56a2ec1869e6e9fa69ef6b76b1a8a7ef709972b9cc473f9ce9d26b5997ce3"}, + {file = "coverage-7.6.7-cp313-cp313t-win32.whl", hash = "sha256:dbba8210f5067398b2c4d96b4e64d8fb943644d5eb70be0d989067c8ca40c0f8"}, + {file = "coverage-7.6.7-cp313-cp313t-win_amd64.whl", hash = "sha256:dfd14bcae0c94004baba5184d1c935ae0d1231b8409eb6c103a5fd75e8ecdc56"}, + {file = "coverage-7.6.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37a15573f988b67f7348916077c6d8ad43adb75e478d0910957394df397d2874"}, + {file = "coverage-7.6.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b6cce5c76985f81da3769c52203ee94722cd5d5889731cd70d31fee939b74bf0"}, + {file = "coverage-7.6.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ab9763d291a17b527ac6fd11d1a9a9c358280adb320e9c2672a97af346ac2c"}, + {file = "coverage-7.6.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cf96ceaa275f071f1bea3067f8fd43bec184a25a962c754024c973af871e1b7"}, + {file = "coverage-7.6.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aee9cf6b0134d6f932d219ce253ef0e624f4fa588ee64830fcba193269e4daa3"}, + {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2bc3e45c16564cc72de09e37413262b9f99167803e5e48c6156bccdfb22c8327"}, + {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:623e6965dcf4e28a3debaa6fcf4b99ee06d27218f46d43befe4db1c70841551c"}, + {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:850cfd2d6fc26f8346f422920ac204e1d28814e32e3a58c19c91980fa74d8289"}, + {file = "coverage-7.6.7-cp39-cp39-win32.whl", hash = "sha256:c296263093f099da4f51b3dff1eff5d4959b527d4f2f419e16508c5da9e15e8c"}, + {file = "coverage-7.6.7-cp39-cp39-win_amd64.whl", hash = "sha256:90746521206c88bdb305a4bf3342b1b7316ab80f804d40c536fc7d329301ee13"}, + {file = "coverage-7.6.7-pp39.pp310-none-any.whl", hash = "sha256:0ddcb70b3a3a57581b450571b31cb774f23eb9519c2aaa6176d3a84c9fc57671"}, + {file = "coverage-7.6.7.tar.gz", hash = "sha256:d79d4826e41441c9a118ff045e4bccb9fdbdcb1d02413e7ea6eb5c87b5439d24"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "exceptiongroup" +version = "1.2.2" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "packaging" +version = "24.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pytest" +version = "8.3.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, + {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-cov" +version = "6.0.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pytest-cov-6.0.0.tar.gz", hash = "sha256:fde0b595ca248bb8e2d76f020b465f3b107c9632e6a1d1705f17834c89dcadc0"}, + {file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"}, +] + +[package.dependencies] +coverage = {version = ">=7.5", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] + +[[package]] +name = "ruff" +version = "0.7.4" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.7.4-py3-none-linux_armv6l.whl", hash = "sha256:a4919925e7684a3f18e18243cd6bea7cfb8e968a6eaa8437971f681b7ec51478"}, + {file = "ruff-0.7.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cfb365c135b830778dda8c04fb7d4280ed0b984e1aec27f574445231e20d6c63"}, + {file = "ruff-0.7.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:63a569b36bc66fbadec5beaa539dd81e0527cb258b94e29e0531ce41bacc1f20"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d06218747d361d06fd2fdac734e7fa92df36df93035db3dc2ad7aa9852cb109"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e0cea28d0944f74ebc33e9f934238f15c758841f9f5edd180b5315c203293452"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80094ecd4793c68b2571b128f91754d60f692d64bc0d7272ec9197fdd09bf9ea"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:997512325c6620d1c4c2b15db49ef59543ef9cd0f4aa8065ec2ae5103cedc7e7"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00b4cf3a6b5fad6d1a66e7574d78956bbd09abfd6c8a997798f01f5da3d46a05"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7dbdc7d8274e1422722933d1edddfdc65b4336abf0b16dfcb9dedd6e6a517d06"}, + {file = "ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e92dfb5f00eaedb1501b2f906ccabfd67b2355bdf117fea9719fc99ac2145bc"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3bd726099f277d735dc38900b6a8d6cf070f80828877941983a57bca1cd92172"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:2e32829c429dd081ee5ba39aef436603e5b22335c3d3fff013cd585806a6486a"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:662a63b4971807623f6f90c1fb664613f67cc182dc4d991471c23c541fee62dd"}, + {file = "ruff-0.7.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:876f5e09eaae3eb76814c1d3b68879891d6fde4824c015d48e7a7da4cf066a3a"}, + {file = "ruff-0.7.4-py3-none-win32.whl", hash = "sha256:75c53f54904be42dd52a548728a5b572344b50d9b2873d13a3f8c5e3b91f5cac"}, + {file = "ruff-0.7.4-py3-none-win_amd64.whl", hash = "sha256:745775c7b39f914238ed1f1b0bebed0b9155a17cd8bc0b08d3c87e4703b990d6"}, + {file = "ruff-0.7.4-py3-none-win_arm64.whl", hash = "sha256:11bff065102c3ae9d3ea4dc9ecdfe5a5171349cdd0787c1fc64761212fc9cf1f"}, + {file = "ruff-0.7.4.tar.gz", hash = "sha256:cd12e35031f5af6b9b93715d8c4f40360070b2041f81273d0527683d5708fce2"}, +] + +[[package]] +name = "tomli" +version = "2.1.0" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"}, + {file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.6" +content-hash = "6ac9b46da212e9f0849c52dde5608990c3e0e8beee141b901493d3161be1ba77" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..98bf1bc --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,45 @@ +[tool.poetry] +name = "randfacts" +version = "0.21.0" +description = "Package to generate random facts" +authors = ["TabulateJarl8 "] +license = "MIT" +readme = "README.md" +include = ["randfacts/*.txt"] +homepage = "https://tabulate.tech/software/randfacts/" +repository = "https://github.com/TabulateJarl8/randfacts" +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Natural Language :: English", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", +] +packages = [{ include = 'randfacts' }] + +[tool.poetry.scripts] +randfacts = 'randfacts.randfacts:_cli_entrypoint' + +[tool.pyright] +reportUnusedCallResult = false + +[tool.poetry.dependencies] +python = "^3.6" + +[tool.poetry.group.dev.dependencies] +ruff = { version = "^0.7.4", python = "^3.9" } +pytest = { version = "^8.3.3", python = "^3.9" } +pytest-cov = { version = "^6.0.0", python = "^3.9" } + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/randfacts/__init__.py b/randfacts/__init__.py index c4d6643..7cad44c 100644 --- a/randfacts/__init__.py +++ b/randfacts/__init__.py @@ -5,49 +5,66 @@ execution via the command line. See the examples section for more details. Code Examples: - Example usage of randfacts in code. + Example usage of randfacts in code. - Generate a random SFW (safe for work) fact. + generate a random sfw (safe for work) fact. - >>> randfacts.get_fact() + >>> randfacts.get_fact() - Generate a random NSFW (not safe for work) fact. + generate a random nsfw (not safe for work) fact. - >>> randfacts.get_fact(only_unsafe=True) + >>> randfacts.get_fact(only_unsafe=true) - Generate a random mixed fact (possibility of both SFW and NSFW facts) + generate a random mixed fact (possibility of both sfw and nsfw facts) - >>> randfacts.get_fact(False) - >>> # or - >>> randfacts.get_fact(filter_enabled=False) + >>> randfacts.get_fact(false) + >>> # or + >>> randfacts.get_fact(filter_enabled=False) CLI Examples: - randfacts can be executed via the command line with the following commands: + randfacts can be executed via the command line with the following commands: - Normal execution; only safe facts + Normal execution; only safe facts - $ python3 -m randfacts + $ python3 -m randfacts - The unsafe argument can be supplied to provide only unsafe facts + The unsafe argument can be supplied to provide only unsafe facts - $ python3 -m randfacts --unsafe + $ python3 -m randfacts --unsafe - The mixed argument can be provided to provide both SFW and NSFW facts. + The mixed argument can be provided to provide both SFW and NSFW facts. - $ python3 -m randfacts --mixed + $ python3 -m randfacts --mixed - More help. + More help. - $ python3 -m randfacts --help + $ python3 -m randfacts --help """ -from .__version__ import __title__, __description__, __url__, __version__, __author__, __author_email__, __license__, __copyright__ -from randfacts.randfacts import get_fact, safe_facts, unsafe_facts, all_facts - import warnings as _warnings + +from randfacts.randfacts import ( + __version__, + all_facts, + get_fact, + safe_facts, + unsafe_facts, +) + +__all__ = [ + "all_facts", + "get_fact", + "safe_facts", + "unsafe_facts", + "__version__", +] + + # Deprecated methods -def getFact(filter_enabled=True, only_unsafe=False): - """This method is deprecated. Please use get_fact""" - _warnings.warn("getFact is deprecated. Please use get_fact", DeprecationWarning, stacklevel=2) - return get_fact(filter_enabled, only_unsafe) +def getFact(filter_enabled: bool = True, only_unsafe: bool = False): + """This method is deprecated. Please use get_fact""" + _warnings.warn( + "getFact is deprecated. Please use get_fact", DeprecationWarning, stacklevel=2 + ) + return get_fact(filter_enabled, only_unsafe) diff --git a/randfacts/__main__.py b/randfacts/__main__.py index 5e670ad..4fca42c 100644 --- a/randfacts/__main__.py +++ b/randfacts/__main__.py @@ -1,2 +1,3 @@ -from .randfacts import _cli_entrypoint -_cli_entrypoint() \ No newline at end of file +from .randfacts import _cli_entrypoint # pyright: ignore[reportPrivateUsage] + +_cli_entrypoint() diff --git a/randfacts/__version__.py b/randfacts/__version__.py deleted file mode 100644 index c8f338e..0000000 --- a/randfacts/__version__.py +++ /dev/null @@ -1,8 +0,0 @@ -__title__ = "randfacts" -__description__ = "Package to generate random facts" -__url__ = "https://github.com/TabulateJarl8/randfacts" -__version__ = "0.20.2" -__author__ = "Tabulate" -__author_email__ = "tabulatejarl8@gmail.com" -__license__ = "MIT" -__copyright__ = "Copyright 2020-2023 Connor Sample" diff --git a/randfacts/randfacts.py b/randfacts/randfacts.py index ff23538..43beafd 100644 --- a/randfacts/randfacts.py +++ b/randfacts/randfacts.py @@ -1,84 +1,92 @@ -from random import choice -import os import argparse +import importlib.metadata +import os +import sys +from random import choice dir_path = os.path.dirname(os.path.realpath(__file__)) -with open(os.path.join(dir_path, 'safe.txt'), encoding='utf-8') as f: - safe_facts = [ - fact.rstrip('\r\n ') - for fact in f.readlines() - if fact.rstrip('\r\n ') != '' - ] +__version__ = "" +try: + __version__: str = importlib.metadata.version("randfacts") +except Exception: + pass + +with open(os.path.join(dir_path, "safe.txt"), encoding="utf-8") as f: + safe_facts = [ + fact.rstrip("\r\n ") for fact in f.readlines() if fact.rstrip("\r\n ") != "" + ] -with open(os.path.join(dir_path, 'unsafe.txt'), encoding='utf-8') as f: - unsafe_facts = [ - fact.rstrip('\r\n ') - for fact in f.readlines() - if fact.rstrip('\r\n ') != '' - ] +with open(os.path.join(dir_path, "unsafe.txt"), encoding="utf-8") as f: + unsafe_facts = [ + fact.rstrip("\r\n ") for fact in f.readlines() if fact.rstrip("\r\n ") != "" + ] all_facts = safe_facts + unsafe_facts def get_fact(filter_enabled: bool = True, only_unsafe: bool = False) -> str: - """This function returns a random fact. - - Parameters - ---------- - filter_enabled : bool - The `filter_enabled` parameter determines if the function will filter - out potentially inappropriate facts. Defaults to True. - - only_unsafe : bool - The `only_unsafe` parameter determines if the function will only give - unsafe (NSFW) facts. Takes precedence over the `filter_enabled` argument. - - Returns - ------ - str - A random fact. - - """ - - if only_unsafe: - return choice(unsafe_facts) - if filter_enabled is False: - return choice(all_facts) - return choice(safe_facts) - - -def _cli_entrypoint(): - """Entrypoint for execution via command-line.""" - - parser = argparse.ArgumentParser( - description='Generate random facts from the command-line' - ) - - group = parser.add_mutually_exclusive_group() - group.add_argument( - '-m', - '--mixed', - action='store_true', - help='Include safe and unsafe facts' - ) - - group.add_argument( - '-u', - '--unsafe', - action='store_true', - help='Only include unsafe facts' - ) - - args = parser.parse_args() - - if args.mixed: - print(get_fact(False)) - elif args.unsafe: - print(get_fact(only_unsafe=True)) - else: - print(get_fact()) - - -if __name__ == '__main__': - _cli_entrypoint() + """This function returns a random fact. + + Parameters + ---------- + filter_enabled : bool + The `filter_enabled` parameter determines if the function will filter + out potentially inappropriate facts. Defaults to True. + + only_unsafe : bool + The `only_unsafe` parameter determines if the function will only give + unsafe (NSFW) facts. Takes precedence over the `filter_enabled` argument. + + Returns + ------ + str + A random fact. + + """ + + if only_unsafe: + return choice(unsafe_facts) + if filter_enabled is False: + return choice(all_facts) + return choice(safe_facts) + + +def _cli_entrypoint() -> None: + """Entrypoint for execution via command-line.""" + + parser = argparse.ArgumentParser( + description="Generate random facts from the command-line" + ) + + parser.add_argument( + "-V", + "--version", + action="store_true", + help="Print the package version and exit", + ) + + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-m", "--mixed", action="store_true", help="Include safe and unsafe facts" + ) + + group.add_argument( + "-u", "--unsafe", action="store_true", help="Only include unsafe facts" + ) + + args = parser.parse_args() + + if args.version: + print(__version__) + sys.exit(0) + if args.mixed: + print(get_fact(False)) + elif args.unsafe: + print(get_fact(only_unsafe=True)) + else: + print(get_fact()) + + +if __name__ == "__main__": + _cli_entrypoint() diff --git a/randfacts/safe.txt b/randfacts/safe.txt index 89550a8..2a14eaa 100644 --- a/randfacts/safe.txt +++ b/randfacts/safe.txt @@ -379,7 +379,7 @@ Whales can suffer from sunburns In 2015, a U.S. journalist was sentenced to 5 years in jail for posting a link on the web South Koreans drink twice as much alcohol as Russians A newborn baby has about one cup of blood in his body. -4%of women in the U.S. are pregnant right now +4% of women in the U.S. are pregnant right now The world's longest hangover lasted 4 weeks after a Scotsman consumed 60 pints of beer. In 1962, John F Kennedy secretly installed a taping system in the White House. Men with shaved heads are perceived as an inch taller and 13% stronger than men with hair. diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 224a779..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 7027b5c..0000000 --- a/setup.py +++ /dev/null @@ -1,42 +0,0 @@ -import pathlib -import setuptools - -here = pathlib.Path(__file__).parent.resolve() - -with open(here / "README.md", "r") as fh: - long_description = fh.read() - -about = {} -with open(here / "randfacts/__version__.py", "r") as f: - exec(f.read(), about) - -packages = ['randfacts'] - -setuptools.setup( - name=about["__title__"], - version=about["__version__"], - author=about["__author__"], - author_email=about["__author_email__"], - description=about["__description__"], - long_description=long_description, - long_description_content_type="text/markdown", - url=about["__url__"], - packages=packages, - package_dir={'randfacts': 'randfacts'}, - classifiers=[ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Natural Language :: English", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers" - ], - python_requires='>=3.6', - include_package_data=True -) diff --git a/tests/checkduplicates/Cargo.lock b/tests/checkduplicates/Cargo.lock new file mode 100644 index 0000000..f4703fe --- /dev/null +++ b/tests/checkduplicates/Cargo.lock @@ -0,0 +1,449 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "checkduplicates" +version = "0.1.0" +dependencies = [ + "clap", + "indicatif", + "num-integer", + "rayon", +] + +[[package]] +name = "clap" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_lex" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width 0.1.11", + "windows-sys", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + +[[package]] +name = "indicatif" +version = "0.17.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "rayon", + "unicode-width 0.2.0", + "web-time", +] + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "2.0.87" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" diff --git a/tests/checkduplicates/src/main.rs b/tests/checkduplicates/src/main.rs index 0bfb4dc..a7eef4b 100644 --- a/tests/checkduplicates/src/main.rs +++ b/tests/checkduplicates/src/main.rs @@ -191,7 +191,7 @@ fn main() { if !args.get_flag("fix_duplicates") { println!("{:#?}", matches); println!("\nNumber of similar facts: {}", matches.len()); - return; + std::process::exit(1); } // Fix mode: Remove duplicates diff --git a/tests/fix_encoding.py b/tests/fix_encoding.py index 8666052..7a81c74 100644 --- a/tests/fix_encoding.py +++ b/tests/fix_encoding.py @@ -1,26 +1,33 @@ -import os +from pathlib import Path -parent = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) +parent = Path(__file__).resolve().parent.parent -safe_path = os.path.join(parent, 'randfacts', 'safe.txt') -unsafe_path = os.path.join(parent, 'randfacts', 'unsafe.txt') +safe_path = parent / "randfacts" / "safe.txt" +unsafe_path = parent / "randfacts" / "unsafe.txt" -bad_characters = [("‘", "'"), ("’", "'"), ("“", '"'), ("”", '"'), ("…", "..."), ('—', '-')] +bad_characters = [ + ("‘", "'"), + ("’", "'"), + ("“", '"'), + ("”", '"'), + ("…", "..."), + ("—", "-"), +] -with open(safe_path, encoding="utf-8") as f: - safe = f.read() +with open(safe_path, "r+", encoding="utf-8") as f: + safe = f.read() -for char in bad_characters: - safe = safe.replace(char[0], char[1]) + for char in bad_characters: + safe = safe.replace(char[0], char[1]) -with open(safe_path, "w") as f: - f.write(safe) + f.seek(0) + f.write(safe) -with open(unsafe_path, encoding="utf-8") as f: - unsafe = f.read() +with open(unsafe_path, "r+", encoding="utf-8") as f: + unsafe = f.read() -for char in bad_characters: - unsafe = unsafe.replace(char[0], char[1]) + for char in bad_characters: + unsafe = unsafe.replace(char[0], char[1]) -with open(unsafe_path, "w") as f: - f.write(unsafe) \ No newline at end of file + f.seek(0) + f.write(unsafe) diff --git a/tests/test.py b/tests/test.py deleted file mode 100644 index 00ef8e5..0000000 --- a/tests/test.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest -import sys -import pathlib -import subprocess - -sys.path.insert(1, str(pathlib.Path(__file__).parents[1])) -from randfacts import randfacts # local randfacts instead of installed version - -class TestRandfacts(unittest.TestCase): - - def test_get_fact(self): - self.assertIsInstance(randfacts.get_fact(), str, 'get_fact() must return a string') - - def test_all_facts_list(self): - self.assertIsInstance(randfacts.all_facts, list, 'all_facts must be a list') - - def test_safe_facts_list(self): - self.assertIsInstance(randfacts.safe_facts, list, 'safe_facts must be a list') - - def test_unsafe_facts_list(self): - self.assertIsInstance(randfacts.unsafe_facts, list, 'unsafe_facts must be a list') - - def test_cli_no_args(self): - child = subprocess.Popen(['python3', '-m', 'randfacts'], stdout=subprocess.DEVNULL) - child.communicate() - self.assertEqual(child.returncode, 0, '`python3 -m randfacts` must return with exit code 0') - - def test_cli_unsafe_args(self): - child = subprocess.Popen(['python3', '-m', 'randfacts', '--unsafe'], stdout=subprocess.DEVNULL) - child.communicate() - self.assertEqual(child.returncode, 0, '`python3 -m randfacts --unsafe` must return with exit code 0') - - def test_cli_mixed_args(self): - child = subprocess.Popen(['python3', '-m', 'randfacts', '--mixed'], stdout=subprocess.DEVNULL) - child.communicate() - self.assertEqual(child.returncode, 0, '`python3 -m randfacts --mixed` must return with exit code 0') - - def test_invalid_characters(self): - bad_characters = ["‘", "’", "“", "”", "…", "—"] - for index, fact in enumerate(randfacts.all_facts): - for char in bad_characters: - self.assertNotIn(char, fact, f'Index: {index}') - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/tests/test_general.py b/tests/test_general.py new file mode 100644 index 0000000..7d12ff6 --- /dev/null +++ b/tests/test_general.py @@ -0,0 +1,94 @@ +import pathlib +import subprocess +import sys + +import pytest + +sys.path.insert(1, str(pathlib.Path(__file__).parents[1])) +from randfacts import ( + getFact, + randfacts, # local randfacts instead of installed version +) + + +def test_get_fact(): + assert isinstance(randfacts.get_fact(), str), "get_fact() must return a string" + + +def test_getFact_deprecated(): + with pytest.deprecated_call(): + _ = getFact() + + +def test_all_facts_list(): + assert isinstance(randfacts.all_facts, list), "all_facts must be a list" + + +def test_safe_facts_list(): + assert isinstance(randfacts.safe_facts, list), "safe_facts must be a list" + + +def test_unsafe_facts_list(): + assert isinstance(randfacts.unsafe_facts, list), "unsafe_facts must be a list" + + +def test_cli_no_args(): + child = subprocess.Popen(["python3", "-m", "randfacts"], stdout=subprocess.DEVNULL) + child.communicate() + assert child.returncode == 0, "`python3 -m randfacts` must return with exit code 0" + + +def test_cli_unsafe_args(): + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--unsafe"], stdout=subprocess.DEVNULL + ) + child.communicate() + assert ( + child.returncode == 0 + ), "`python3 -m randfacts --unsafe` must return with exit code 0" + + +def test_cli_mixed_args(): + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--mixed"], stdout=subprocess.DEVNULL + ) + child.communicate() + assert ( + child.returncode == 0 + ), "`python3 -m randfacts --mixed` must return with exit code 0" + + +def test_cli_version(): + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--version"], stdout=subprocess.PIPE, text=True + ) + output, _ = child.communicate() + assert ( + output.strip() == randfacts.__version__ + ), f"`python3 -m randfacts --version` must return {randfacts.__version__}" + + +def test_main_entrypoint(): + # Path to the module or script you want to test + script_path = ( + pathlib.Path(__file__).resolve().parents[1] / "randfacts" / "randfacts.py" + ) + + # Run the script as a subprocess + result = subprocess.run( + ["python", str(script_path)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + # Assert the subprocess exits successfully + assert result.returncode == 0, f"Script failed with stderr: {result.stderr}" + + +@pytest.mark.parametrize("bad_char", ["‘", "’", "“", "”", "…", "—"]) +def test_invalid_characters(bad_char: str): + for index, fact in enumerate(randfacts.all_facts): + assert ( + bad_char not in fact + ), f"Bad character '{bad_char}' found in fact at index {index}" From d68ee0cb13918c4d010045bae93e83e9885774ab Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Mon, 22 Jan 2024 01:12:42 -0500 Subject: [PATCH 14/24] remove extra quote --- randfacts/safe.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/randfacts/safe.txt b/randfacts/safe.txt index 72940dd..6dcbe4f 100644 --- a/randfacts/safe.txt +++ b/randfacts/safe.txt @@ -6761,7 +6761,7 @@ Turns out, a dog's paw print is just as unique as a human's. Good news for dog d A camel can drink up to 40 gallons of water in one go. That's seriously impressive! Don't bring your crystal ball to Maryland! Fortune telling is illegal in the state. Speaking of ferrets, did you know they used to be used to protect grain stores from rodents? -The technical term for a fear of long words is ""hippopotomonstrosesquippedaliophobia." No way you can self-diagnose yourself with that one! +The technical term for a fear of long words is "hippopotomonstrosesquippedaliophobia." No way you can self-diagnose yourself with that one! The White House has 35 bathrooms. So every bathroom break can be different for POTUS! Greyhounds can run up to 45 mph. So don't challenge one to a race! Hiking naked is illegal in Switzerland. Though we must say, it doesn't seem like a good idea in general! From a1d08c8e446789dfe3c5843e983bf4780e339949 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 01:55:33 -0500 Subject: [PATCH 15/24] fix workflow file --- .github/workflows/main.yml | 144 ++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 75 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 11a9c3a..4ba4f78 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,14 +1,8 @@ -# This is a basic workflow to help you get started with Actions - -name: CI - -# Controls when the action will run. on: # Triggers the workflow on push or pull request events but only for the master branch push: branches: [ master ] pull_request: - branches: [ master ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -16,80 +10,80 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: test: - name: Test code and coverage - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] - steps: - - uses: actions/checkout@v4 + name: Test code and coverage + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4 - # If you wanted to use multiple Python versions, you'd have specify a matrix in the job and - # reference the matrixe python version here. - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - allow-prereleases: true + # If you wanted to use multiple Python versions, you'd have specify a matrix in the job and + # reference the matrixe python version here. + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true - # Cache the installation of Poetry itself, e.g. the next step. This prevents the workflow - # from installing Poetry every time, which can be slow. Note the use of the Poetry version - # number in the cache key, and the "-0" suffix: this allows you to invalidate the cache - # manually if/when you want to upgrade Poetry, or if something goes wrong. This could be - # mildly cleaner by using an environment variable, but I don't really care. - - name: cache poetry install - uses: actions/cache@v4 - with: - path: ~/.local - key: poetry-1.1.12-0 + # Cache the installation of Poetry itself, e.g. the next step. This prevents the workflow + # from installing Poetry every time, which can be slow. Note the use of the Poetry version + # number in the cache key, and the "-0" suffix: this allows you to invalidate the cache + # manually if/when you want to upgrade Poetry, or if something goes wrong. This could be + # mildly cleaner by using an environment variable, but I don't really care. + - name: cache poetry install + uses: actions/cache@v4 + with: + path: ~/.local + key: poetry-1.1.12-0 - # Install Poetry. You could do this manually, or there are several actions that do this. - # `snok/install-poetry` seems to be minimal yet complete, and really just calls out to - # Poetry's default install script, which feels correct. I pin the Poetry version here - # because Poetry does occasionally change APIs between versions and I don't want my - # actions to break if it does. - # - # The key configuration value here is `virtualenvs-in-project: true`: this creates the - # venv as a `.venv` in your testing directory, which allows the next step to easily - # cache it. - - uses: snok/install-poetry@v1 - with: - version: 1.5.1 - virtualenvs-create: true - virtualenvs-in-project: true + # Install Poetry. You could do this manually, or there are several actions that do this. + # `snok/install-poetry` seems to be minimal yet complete, and really just calls out to + # Poetry's default install script, which feels correct. I pin the Poetry version here + # because Poetry does occasionally change APIs between versions and I don't want my + # actions to break if it does. + # + # The key configuration value here is `virtualenvs-in-project: true`: this creates the + # venv as a `.venv` in your testing directory, which allows the next step to easily + # cache it. + - uses: snok/install-poetry@v1 + with: + version: 1.5.1 + virtualenvs-create: true + virtualenvs-in-project: true - # Cache your dependencies (i.e. all the stuff in your `pyproject.toml`). Note the cache - # key: if you're using multiple Python versions, or multiple OSes, you'd need to include - # them in the cache key. I'm not, so it can be simple and just depend on the poetry.lock. - - name: cache deps - id: cache-deps - uses: actions/cache@v4 - with: - path: .venv - key: pydeps-${{ hashFiles('**/poetry.lock') }} + # Cache your dependencies (i.e. all the stuff in your `pyproject.toml`). Note the cache + # key: if you're using multiple Python versions, or multiple OSes, you'd need to include + # them in the cache key. I'm not, so it can be simple and just depend on the poetry.lock. + - name: cache deps + id: cache-deps + uses: actions/cache@v4 + with: + path: .venv + key: pydeps-${{ hashFiles('**/poetry.lock') }} - # Install dependencies. `--no-root` means "install all dependencies but not the project - # itself", which is what you want to avoid caching _your_ code. The `if` statement - # ensures this only runs on a cache miss. - - run: poetry install --no-interaction --no-root - if: steps.cache-deps.outputs.cache-hit != 'true' + # Install dependencies. `--no-root` means "install all dependencies but not the project + # itself", which is what you want to avoid caching _your_ code. The `if` statement + # ensures this only runs on a cache miss. + - run: poetry install --no-interaction --no-root + if: steps.cache-deps.outputs.cache-hit != 'true' - # Now install _your_ project. This isn't necessary for many types of projects -- particularly - # things like Django apps don't need this. But it's a good idea since it fully-exercises the - # pyproject.toml and makes that if you add things like console-scripts at some point that - # they'll be installed and working. - - run: poetry install --no-interaction + # Now install _your_ project. This isn't necessary for many types of projects -- particularly + # things like Django apps don't need this. But it's a good idea since it fully-exercises the + # pyproject.toml and makes that if you add things like console-scripts at some point that + # they'll be installed and working. + - run: poetry install --no-interaction - # run the tests and check for 100% coverage - - run: poetry run pytest . --cov=randfacts --cov-report=term-missing --cov-report=xml + # run the tests and check for 100% coverage + - run: poetry run pytest . --cov=randfacts --cov-report=term-missing --cov-report=xml - # check for code style errors - - run: poetry run ruff check - # disable code format checking until docstrings are sorted out - # https://github.com/astral-sh/ruff/issues/8430 - # - run: poetry run ruff format --check - - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4.0.1 - with: - token: ${{ secrets.CODECOV_TOKEN }} + # check for code style errors + - run: poetry run ruff check + # disable code format checking until docstrings are sorted out + # https://github.com/astral-sh/ruff/issues/8430 + # - run: poetry run ruff format --check + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} From 67d909c0d44f7f6f5583e260c032c448fcf8be80 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 01:59:40 -0500 Subject: [PATCH 16/24] fix version conflicts with older versions of python --- .github/workflows/main.yml | 2 +- poetry.lock | 150 ++++++++++++++++++++----------------- pyproject.toml | 6 +- 3 files changed, 84 insertions(+), 74 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4ba4f78..5c3ce6d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/poetry.lock b/poetry.lock index 33205de..724875e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -13,73 +13,83 @@ files = [ [[package]] name = "coverage" -version = "7.6.7" +version = "7.6.1" description = "Code coverage measurement for Python" optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" files = [ - {file = "coverage-7.6.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:108bb458827765d538abcbf8288599fee07d2743357bdd9b9dad456c287e121e"}, - {file = "coverage-7.6.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c973b2fe4dc445cb865ab369df7521df9c27bf40715c837a113edaa2aa9faf45"}, - {file = "coverage-7.6.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c6b24007c4bcd0b19fac25763a7cac5035c735ae017e9a349b927cfc88f31c1"}, - {file = "coverage-7.6.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:acbb8af78f8f91b3b51f58f288c0994ba63c646bc1a8a22ad072e4e7e0a49f1c"}, - {file = "coverage-7.6.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad32a981bcdedb8d2ace03b05e4fd8dace8901eec64a532b00b15217d3677dd2"}, - {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:34d23e28ccb26236718a3a78ba72744212aa383141961dd6825f6595005c8b06"}, - {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e25bacb53a8c7325e34d45dddd2f2fbae0dbc230d0e2642e264a64e17322a777"}, - {file = "coverage-7.6.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af05bbba896c4472a29408455fe31b3797b4d8648ed0a2ccac03e074a77e2314"}, - {file = "coverage-7.6.7-cp310-cp310-win32.whl", hash = "sha256:796c9b107d11d2d69e1849b2dfe41730134b526a49d3acb98ca02f4985eeff7a"}, - {file = "coverage-7.6.7-cp310-cp310-win_amd64.whl", hash = "sha256:987a8e3da7da4eed10a20491cf790589a8e5e07656b6dc22d3814c4d88faf163"}, - {file = "coverage-7.6.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7e61b0e77ff4dddebb35a0e8bb5a68bf0f8b872407d8d9f0c726b65dfabe2469"}, - {file = "coverage-7.6.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1a5407a75ca4abc20d6252efeb238377a71ce7bda849c26c7a9bece8680a5d99"}, - {file = "coverage-7.6.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df002e59f2d29e889c37abd0b9ee0d0e6e38c24f5f55d71ff0e09e3412a340ec"}, - {file = "coverage-7.6.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:673184b3156cba06154825f25af33baa2671ddae6343f23175764e65a8c4c30b"}, - {file = "coverage-7.6.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e69ad502f1a2243f739f5bd60565d14a278be58be4c137d90799f2c263e7049a"}, - {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:60dcf7605c50ea72a14490d0756daffef77a5be15ed1b9fea468b1c7bda1bc3b"}, - {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9c2eb378bebb2c8f65befcb5147877fc1c9fbc640fc0aad3add759b5df79d55d"}, - {file = "coverage-7.6.7-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3c0317288f032221d35fa4cbc35d9f4923ff0dfd176c79c9b356e8ef8ef2dff4"}, - {file = "coverage-7.6.7-cp311-cp311-win32.whl", hash = "sha256:951aade8297358f3618a6e0660dc74f6b52233c42089d28525749fc8267dccd2"}, - {file = "coverage-7.6.7-cp311-cp311-win_amd64.whl", hash = "sha256:5e444b8e88339a2a67ce07d41faabb1d60d1004820cee5a2c2b54e2d8e429a0f"}, - {file = "coverage-7.6.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f07ff574986bc3edb80e2c36391678a271d555f91fd1d332a1e0f4b5ea4b6ea9"}, - {file = "coverage-7.6.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:49ed5ee4109258973630c1f9d099c7e72c5c36605029f3a91fe9982c6076c82b"}, - {file = "coverage-7.6.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3e8796434a8106b3ac025fd15417315d7a58ee3e600ad4dbcfddc3f4b14342c"}, - {file = "coverage-7.6.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3b925300484a3294d1c70f6b2b810d6526f2929de954e5b6be2bf8caa1f12c1"}, - {file = "coverage-7.6.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c42ec2c522e3ddd683dec5cdce8e62817afb648caedad9da725001fa530d354"}, - {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0266b62cbea568bd5e93a4da364d05de422110cbed5056d69339bd5af5685433"}, - {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e5f2a0f161d126ccc7038f1f3029184dbdf8f018230af17ef6fd6a707a5b881f"}, - {file = "coverage-7.6.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c132b5a22821f9b143f87446805e13580b67c670a548b96da945a8f6b4f2efbb"}, - {file = "coverage-7.6.7-cp312-cp312-win32.whl", hash = "sha256:7c07de0d2a110f02af30883cd7dddbe704887617d5c27cf373362667445a4c76"}, - {file = "coverage-7.6.7-cp312-cp312-win_amd64.whl", hash = "sha256:fd49c01e5057a451c30c9b892948976f5d38f2cbd04dc556a82743ba8e27ed8c"}, - {file = "coverage-7.6.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:46f21663e358beae6b368429ffadf14ed0a329996248a847a4322fb2e35d64d3"}, - {file = "coverage-7.6.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:40cca284c7c310d622a1677f105e8507441d1bb7c226f41978ba7c86979609ab"}, - {file = "coverage-7.6.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77256ad2345c29fe59ae861aa11cfc74579c88d4e8dbf121cbe46b8e32aec808"}, - {file = "coverage-7.6.7-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87ea64b9fa52bf395272e54020537990a28078478167ade6c61da7ac04dc14bc"}, - {file = "coverage-7.6.7-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d608a7808793e3615e54e9267519351c3ae204a6d85764d8337bd95993581a8"}, - {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdd94501d65adc5c24f8a1a0eda110452ba62b3f4aeaba01e021c1ed9cb8f34a"}, - {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:82c809a62e953867cf57e0548c2b8464207f5f3a6ff0e1e961683e79b89f2c55"}, - {file = "coverage-7.6.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bb684694e99d0b791a43e9fc0fa58efc15ec357ac48d25b619f207c41f2fd384"}, - {file = "coverage-7.6.7-cp313-cp313-win32.whl", hash = "sha256:963e4a08cbb0af6623e61492c0ec4c0ec5c5cf74db5f6564f98248d27ee57d30"}, - {file = "coverage-7.6.7-cp313-cp313-win_amd64.whl", hash = "sha256:14045b8bfd5909196a90da145a37f9d335a5d988a83db34e80f41e965fb7cb42"}, - {file = "coverage-7.6.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f2c7a045eef561e9544359a0bf5784b44e55cefc7261a20e730baa9220c83413"}, - {file = "coverage-7.6.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dd4e4a49d9c72a38d18d641135d2fb0bdf7b726ca60a103836b3d00a1182acd"}, - {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c95e0fa3d1547cb6f021ab72f5c23402da2358beec0a8e6d19a368bd7b0fb37"}, - {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f63e21ed474edd23f7501f89b53280014436e383a14b9bd77a648366c81dce7b"}, - {file = "coverage-7.6.7-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead9b9605c54d15be228687552916c89c9683c215370c4a44f1f217d2adcc34d"}, - {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0573f5cbf39114270842d01872952d301027d2d6e2d84013f30966313cadb529"}, - {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e2c8e3384c12dfa19fa9a52f23eb091a8fad93b5b81a41b14c17c78e23dd1d8b"}, - {file = "coverage-7.6.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:70a56a2ec1869e6e9fa69ef6b76b1a8a7ef709972b9cc473f9ce9d26b5997ce3"}, - {file = "coverage-7.6.7-cp313-cp313t-win32.whl", hash = "sha256:dbba8210f5067398b2c4d96b4e64d8fb943644d5eb70be0d989067c8ca40c0f8"}, - {file = "coverage-7.6.7-cp313-cp313t-win_amd64.whl", hash = "sha256:dfd14bcae0c94004baba5184d1c935ae0d1231b8409eb6c103a5fd75e8ecdc56"}, - {file = "coverage-7.6.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37a15573f988b67f7348916077c6d8ad43adb75e478d0910957394df397d2874"}, - {file = "coverage-7.6.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b6cce5c76985f81da3769c52203ee94722cd5d5889731cd70d31fee939b74bf0"}, - {file = "coverage-7.6.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ab9763d291a17b527ac6fd11d1a9a9c358280adb320e9c2672a97af346ac2c"}, - {file = "coverage-7.6.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cf96ceaa275f071f1bea3067f8fd43bec184a25a962c754024c973af871e1b7"}, - {file = "coverage-7.6.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aee9cf6b0134d6f932d219ce253ef0e624f4fa588ee64830fcba193269e4daa3"}, - {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2bc3e45c16564cc72de09e37413262b9f99167803e5e48c6156bccdfb22c8327"}, - {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:623e6965dcf4e28a3debaa6fcf4b99ee06d27218f46d43befe4db1c70841551c"}, - {file = "coverage-7.6.7-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:850cfd2d6fc26f8346f422920ac204e1d28814e32e3a58c19c91980fa74d8289"}, - {file = "coverage-7.6.7-cp39-cp39-win32.whl", hash = "sha256:c296263093f099da4f51b3dff1eff5d4959b527d4f2f419e16508c5da9e15e8c"}, - {file = "coverage-7.6.7-cp39-cp39-win_amd64.whl", hash = "sha256:90746521206c88bdb305a4bf3342b1b7316ab80f804d40c536fc7d329301ee13"}, - {file = "coverage-7.6.7-pp39.pp310-none-any.whl", hash = "sha256:0ddcb70b3a3a57581b450571b31cb774f23eb9519c2aaa6176d3a84c9fc57671"}, - {file = "coverage-7.6.7.tar.gz", hash = "sha256:d79d4826e41441c9a118ff045e4bccb9fdbdcb1d02413e7ea6eb5c87b5439d24"}, + {file = "coverage-7.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b06079abebbc0e89e6163b8e8f0e16270124c154dc6e4a47b413dd538859af16"}, + {file = "coverage-7.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cf4b19715bccd7ee27b6b120e7e9dd56037b9c0681dcc1adc9ba9db3d417fa36"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61c0abb4c85b095a784ef23fdd4aede7a2628478e7baba7c5e3deba61070a02"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd21f6ae3f08b41004dfb433fa895d858f3f5979e7762d052b12aef444e29afc"}, + {file = "coverage-7.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f59d57baca39b32db42b83b2a7ba6f47ad9c394ec2076b084c3f029b7afca23"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a1ac0ae2b8bd743b88ed0502544847c3053d7171a3cff9228af618a068ed9c34"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e6a08c0be454c3b3beb105c0596ebdc2371fab6bb90c0c0297f4e58fd7e1012c"}, + {file = "coverage-7.6.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f5796e664fe802da4f57a168c85359a8fbf3eab5e55cd4e4569fbacecc903959"}, + {file = "coverage-7.6.1-cp310-cp310-win32.whl", hash = "sha256:7bb65125fcbef8d989fa1dd0e8a060999497629ca5b0efbca209588a73356232"}, + {file = "coverage-7.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:3115a95daa9bdba70aea750db7b96b37259a81a709223c8448fa97727d546fe0"}, + {file = "coverage-7.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dea0889685db8550f839fa202744652e87c60015029ce3f60e006f8c4462c93"}, + {file = "coverage-7.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed37bd3c3b063412f7620464a9ac1314d33100329f39799255fb8d3027da50d3"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d85f5e9a5f8b73e2350097c3756ef7e785f55bd71205defa0bfdaf96c31616ff"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bc572be474cafb617672c43fe989d6e48d3c83af02ce8de73fff1c6bb3c198d"}, + {file = "coverage-7.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f4aa8219db826ce6be7099d559f8ec311549bfc4046f7f9fe9b5cea5c581c56"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:fc5a77d0c516700ebad189b587de289a20a78324bc54baee03dd486f0855d234"}, + {file = "coverage-7.6.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b48f312cca9621272ae49008c7f613337c53fadca647d6384cc129d2996d1133"}, + {file = "coverage-7.6.1-cp311-cp311-win32.whl", hash = "sha256:1125ca0e5fd475cbbba3bb67ae20bd2c23a98fac4e32412883f9bcbaa81c314c"}, + {file = "coverage-7.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:8ae539519c4c040c5ffd0632784e21b2f03fc1340752af711f33e5be83a9d6c6"}, + {file = "coverage-7.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:95cae0efeb032af8458fc27d191f85d1717b1d4e49f7cb226cf526ff28179778"}, + {file = "coverage-7.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5621a9175cf9d0b0c84c2ef2b12e9f5f5071357c4d2ea6ca1cf01814f45d2391"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:260933720fdcd75340e7dbe9060655aff3af1f0c5d20f46b57f262ab6c86a5e8"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07e2ca0ad381b91350c0ed49d52699b625aab2b44b65e1b4e02fa9df0e92ad2d"}, + {file = "coverage-7.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44fee9975f04b33331cb8eb272827111efc8930cfd582e0320613263ca849ca"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877abb17e6339d96bf08e7a622d05095e72b71f8afd8a9fefc82cf30ed944163"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e0cadcf6733c09154b461f1ca72d5416635e5e4ec4e536192180d34ec160f8a"}, + {file = "coverage-7.6.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3c02d12f837d9683e5ab2f3d9844dc57655b92c74e286c262e0fc54213c216d"}, + {file = "coverage-7.6.1-cp312-cp312-win32.whl", hash = "sha256:e05882b70b87a18d937ca6768ff33cc3f72847cbc4de4491c8e73880766718e5"}, + {file = "coverage-7.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:b5d7b556859dd85f3a541db6a4e0167b86e7273e1cdc973e5b175166bb634fdb"}, + {file = "coverage-7.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a4acd025ecc06185ba2b801f2de85546e0b8ac787cf9d3b06e7e2a69f925b106"}, + {file = "coverage-7.6.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a6d3adcf24b624a7b778533480e32434a39ad8fa30c315208f6d3e5542aeb6e9"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0c212c49b6c10e6951362f7c6df3329f04c2b1c28499563d4035d964ab8e08c"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e81d7a3e58882450ec4186ca59a3f20a5d4440f25b1cff6f0902ad890e6748a"}, + {file = "coverage-7.6.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78b260de9790fd81e69401c2dc8b17da47c8038176a79092a89cb2b7d945d060"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a78d169acd38300060b28d600344a803628c3fd585c912cacc9ea8790fe96862"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2c09f4ce52cb99dd7505cd0fc8e0e37c77b87f46bc9c1eb03fe3bc9991085388"}, + {file = "coverage-7.6.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6878ef48d4227aace338d88c48738a4258213cd7b74fd9a3d4d7582bb1d8a155"}, + {file = "coverage-7.6.1-cp313-cp313-win32.whl", hash = "sha256:44df346d5215a8c0e360307d46ffaabe0f5d3502c8a1cefd700b34baf31d411a"}, + {file = "coverage-7.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:8284cf8c0dd272a247bc154eb6c95548722dce90d098c17a883ed36e67cdb129"}, + {file = "coverage-7.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d3296782ca4eab572a1a4eca686d8bfb00226300dcefdf43faa25b5242ab8a3e"}, + {file = "coverage-7.6.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:502753043567491d3ff6d08629270127e0c31d4184c4c8d98f92c26f65019962"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a89ecca80709d4076b95f89f308544ec8f7b4727e8a547913a35f16717856cb"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a318d68e92e80af8b00fa99609796fdbcdfef3629c77c6283566c6f02c6d6704"}, + {file = "coverage-7.6.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13b0a73a0896988f053e4fbb7de6d93388e6dd292b0d87ee51d106f2c11b465b"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4421712dbfc5562150f7554f13dde997a2e932a6b5f352edcce948a815efee6f"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:166811d20dfea725e2e4baa71fffd6c968a958577848d2131f39b60043400223"}, + {file = "coverage-7.6.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:225667980479a17db1048cb2bf8bfb39b8e5be8f164b8f6628b64f78a72cf9d3"}, + {file = "coverage-7.6.1-cp313-cp313t-win32.whl", hash = "sha256:170d444ab405852903b7d04ea9ae9b98f98ab6d7e63e1115e82620807519797f"}, + {file = "coverage-7.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b9f222de8cded79c49bf184bdbc06630d4c58eec9459b939b4a690c82ed05657"}, + {file = "coverage-7.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6db04803b6c7291985a761004e9060b2bca08da6d04f26a7f2294b8623a0c1a0"}, + {file = "coverage-7.6.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f1adfc8ac319e1a348af294106bc6a8458a0f1633cc62a1446aebc30c5fa186a"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a95324a9de9650a729239daea117df21f4b9868ce32e63f8b650ebe6cef5595b"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b43c03669dc4618ec25270b06ecd3ee4fa94c7f9b3c14bae6571ca00ef98b0d3"}, + {file = "coverage-7.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8929543a7192c13d177b770008bc4e8119f2e1f881d563fc6b6305d2d0ebe9de"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:a09ece4a69cf399510c8ab25e0950d9cf2b42f7b3cb0374f95d2e2ff594478a6"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9054a0754de38d9dbd01a46621636689124d666bad1936d76c0341f7d71bf569"}, + {file = "coverage-7.6.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0dbde0f4aa9a16fa4d754356a8f2e36296ff4d83994b2c9d8398aa32f222f989"}, + {file = "coverage-7.6.1-cp38-cp38-win32.whl", hash = "sha256:da511e6ad4f7323ee5702e6633085fb76c2f893aaf8ce4c51a0ba4fc07580ea7"}, + {file = "coverage-7.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:3f1156e3e8f2872197af3840d8ad307a9dd18e615dc64d9ee41696f287c57ad8"}, + {file = "coverage-7.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:abd5fd0db5f4dc9289408aaf34908072f805ff7792632250dcb36dc591d24255"}, + {file = "coverage-7.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:547f45fa1a93154bd82050a7f3cddbc1a7a4dd2a9bf5cb7d06f4ae29fe94eaf8"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:645786266c8f18a931b65bfcefdbf6952dd0dea98feee39bd188607a9d307ed2"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e0b2df163b8ed01d515807af24f63de04bebcecbd6c3bfeff88385789fdf75a"}, + {file = "coverage-7.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:609b06f178fe8e9f89ef676532760ec0b4deea15e9969bf754b37f7c40326dbc"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:702855feff378050ae4f741045e19a32d57d19f3e0676d589df0575008ea5004"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2bdb062ea438f22d99cba0d7829c2ef0af1d768d1e4a4f528087224c90b132cb"}, + {file = "coverage-7.6.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9c56863d44bd1c4fe2abb8a4d6f5371d197f1ac0ebdee542f07f35895fc07f36"}, + {file = "coverage-7.6.1-cp39-cp39-win32.whl", hash = "sha256:6e2cd258d7d927d09493c8df1ce9174ad01b381d4729a9d8d4e38670ca24774c"}, + {file = "coverage-7.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:06a737c882bd26d0d6ee7269b20b12f14a8704807a01056c80bb881a4b2ce6ca"}, + {file = "coverage-7.6.1-pp38.pp39.pp310-none-any.whl", hash = "sha256:e9a6e0eb86070e8ccaedfbd9d38fec54864f3125ab95419970575b42af7541df"}, + {file = "coverage-7.6.1.tar.gz", hash = "sha256:953510dfb7b12ab69d20135a0662397f077c59b1e6379a768e97c59d852ee51d"}, ] [package.dependencies] @@ -163,17 +173,17 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments [[package]] name = "pytest-cov" -version = "6.0.0" +version = "5.0.0" description = "Pytest plugin for measuring coverage." optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" files = [ - {file = "pytest-cov-6.0.0.tar.gz", hash = "sha256:fde0b595ca248bb8e2d76f020b465f3b107c9632e6a1d1705f17834c89dcadc0"}, - {file = "pytest_cov-6.0.0-py3-none-any.whl", hash = "sha256:eee6f1b9e61008bd34975a4d5bab25801eb31898b032dd55addc93e96fcaaa35"}, + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, ] [package.dependencies] -coverage = {version = ">=7.5", extras = ["toml"]} +coverage = {version = ">=5.2.1", extras = ["toml"]} pytest = ">=4.6" [package.extras] @@ -220,4 +230,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.6" -content-hash = "6ac9b46da212e9f0849c52dde5608990c3e0e8beee141b901493d3161be1ba77" +content-hash = "6c56ef90ed94143e1778caadc168f25265a86854273cd45c7ad53fcd08a47a08" diff --git a/pyproject.toml b/pyproject.toml index 98bf1bc..fb5f11e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,9 +36,9 @@ reportUnusedCallResult = false python = "^3.6" [tool.poetry.group.dev.dependencies] -ruff = { version = "^0.7.4", python = "^3.9" } -pytest = { version = "^8.3.3", python = "^3.9" } -pytest-cov = { version = "^6.0.0", python = "^3.9" } +ruff = { version = "^0.7.4", python = "^3.8" } +pytest = { version = "^8.3.3", python = "^3.8" } +pytest-cov = { version = "^5.0.0", python = "^3.8" } [build-system] requires = ["poetry-core"] From ddc45d5d1084c900c0550e86f1596c04d8a4641c Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 02:00:21 -0500 Subject: [PATCH 17/24] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fb5f11e..9bad5f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "randfacts" -version = "0.21.0" +version = "0.22.0" description = "Package to generate random facts" authors = ["TabulateJarl8 "] license = "MIT" From ea11d34294f0186a1291cad9ff597745dbfca336 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 02:07:20 -0500 Subject: [PATCH 18/24] remove old checkduplicates --- tests/checkduplicates.py | 107 --------------------------------------- tests/test_general.py | 2 +- 2 files changed, 1 insertion(+), 108 deletions(-) delete mode 100644 tests/checkduplicates.py diff --git a/tests/checkduplicates.py b/tests/checkduplicates.py deleted file mode 100644 index 83c0737..0000000 --- a/tests/checkduplicates.py +++ /dev/null @@ -1,107 +0,0 @@ -from math import factorial -import itertools -import argparse -import pathlib -import sys - -from rapidfuzz import fuzz -from tqdm import tqdm - - -def partial_match(x_fact, y_fact, x_index, y_index): - if x_index == y_index: - # dont compare same facts - return None - - # compare facts - ratio = fuzz.token_sort_ratio(x_fact[0], y_fact[0]) - if ratio > 80: - # facts are most likely a match - return (x_fact, y_fact), (x_index, y_index), ratio - - # facts are most likely not a match, return none - return None - - -def number_of_combinations(number_of_items, choose_amount): - # calculate binomial coefficient - return factorial(number_of_items) / (factorial(choose_amount) * factorial(number_of_items - choose_amount)) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--fix-duplicates', action='store_true', help='Remove duplicate facts') - args = parser.parse_args() - - # Get directory containing setup.py - parent = pathlib.Path(__file__).parents[1] - - # read safe.txt and unsafe.txt into lists - with open(parent / 'randfacts/safe.txt') as f: - safe = [(line.rstrip(), 'safe') for line in f.readlines()] - - with open(parent / 'randfacts/unsafe.txt') as f: - unsafe = [(line.rstrip(), 'unsafe') for line in f.readlines()] - - # Generate all possible pairs of the facts from safe.txt and unsafe.txt - # combined - print('Generating combinations...') - combinations = itertools.combinations(enumerate(safe + unsafe), 2) - - matches = [] - print() - # Iterate through all the combinations - with tqdm(total=int(number_of_combinations(len(safe + unsafe), 2))) as pbar: - for item in combinations: - - # Check if the two facts as similar enough to be flagged - match = partial_match(item[0][1], item[1][1], item[0][0], item[1][0]) - if match is not None: - # facts are similar enough, flag them - matches.append(match) - - # Update progress bar by 1 - pbar.update(1) - print() - - if matches: # there were flagged facts - if not args.fix_duplicates: # don't fix duplicate facts, just print them - print('\n'.join([str(match) for match in matches])) - print() - print('Number of similar facts: ' + str(len(matches))) - sys.exit(2) - else: - # iterate through matches and generate a list of indexes to remove - print('Generating list of indexes to remove...') - indexes_to_remove = [] - for match in matches: - print(match) - # keep unsafe facts over safe facts - if match[0][0][1] == 'unsafe': - indexes_to_remove.append(match[1][1]) - elif match[0][1][1] == 'unsafe': - indexes_to_remove.append(match[1][0]) - else: - indexes_to_remove.append(match[1][0]) - - # remove all indexes from combinations - print('Removing duplicates from facts...') - facts = safe + unsafe - for index in sorted(list(set(indexes_to_remove)), reverse=True): - # sort the list of indexes in reverse so that we don't have - # issues with the max index getting smaller as we delete things - del facts[index] - - # divide up the facts into their corresponding list - safe = [fact for fact, correct_list in facts if correct_list == 'safe'] - unsafe = [fact for fact, correct_list in facts if correct_list == 'unsafe'] - - # write the fixed facts back to the files - with open(parent / 'randfacts/safe.txt', 'w') as f: - f.write('\n'.join(safe)) - with open(parent / 'randfacts/unsafe.txt', 'w') as f: - f.write('\n'.join(unsafe)) - - -if __name__ == '__main__': - main() diff --git a/tests/test_general.py b/tests/test_general.py index 7d12ff6..f4c916d 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -5,7 +5,7 @@ import pytest sys.path.insert(1, str(pathlib.Path(__file__).parents[1])) -from randfacts import ( +from randfacts import ( # noqa: E402 getFact, randfacts, # local randfacts instead of installed version ) From ba533987aeafeddb51e4fd7b98d929e0fefbaebf Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 02:26:53 -0500 Subject: [PATCH 19/24] add more ruff lints and convert back to tabs --- pyproject.toml | 53 ++++++++++++++++ randfacts/__init__.py | 70 +++++++++++---------- randfacts/randfacts.py | 138 ++++++++++++++++++++--------------------- tests/fix_encoding.py | 32 +++++----- tests/test_general.py | 126 +++++++++++++++++++------------------ 5 files changed, 239 insertions(+), 180 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9bad5f4..bbe47c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,9 +29,62 @@ packages = [{ include = 'randfacts' }] [tool.poetry.scripts] randfacts = 'randfacts.randfacts:_cli_entrypoint' +[tool.pytest.ini_options] +asyncio_default_fixture_loop_scope = "function" + [tool.pyright] reportUnusedCallResult = false +[tool.ruff.lint] +preview = true +select = ["ALL"] + +ignore = [ + # complains about tab indentation + "W191", + "D206", + # adds a line break before a class docstring + "D203", + # puts the first line summary of a docstring on a different line than the """ + "D213", + # tries to add a blank line after the last docstring section + "D413", + # yells at you if you use a bool typed function argument + "FBT001", + "FBT002", + # yells at you for using try-except in a for loop + "PERF203", + # allow for the use of Any + "ANN401", + # false positives for overriding methods (i think) + "PLR6301", + # disable too many branches check + "PLR0912", + # copyright at top of file + "CPY", + # complains about random.choice() not being good for cryptography + "S311", +] + + +[tool.ruff.lint.per-file-ignores] +"tests/*" = ["S101", "ANN001", "ANN002", "PLC2701", "ARG002", "PLR2004", "DOC"] +"randfacts/randfacts.py" = ["T201"] +"randfacts/__main__.py" = ["D100"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.format] +quote-style = "single" +indent-style = "tab" +line-ending = "lf" + +[tool.ruff.lint.isort] +combine-as-imports = true +force-wrap-aliases = true +order-by-type = true + [tool.poetry.dependencies] python = "^3.6" diff --git a/randfacts/__init__.py b/randfacts/__init__.py index 7cad44c..f115bc5 100644 --- a/randfacts/__init__.py +++ b/randfacts/__init__.py @@ -5,66 +5,68 @@ execution via the command line. See the examples section for more details. Code Examples: - Example usage of randfacts in code. + Example usage of randfacts in code. - generate a random sfw (safe for work) fact. + generate a random sfw (safe for work) fact. - >>> randfacts.get_fact() + >>> randfacts.get_fact() - generate a random nsfw (not safe for work) fact. + generate a random nsfw (not safe for work) fact. - >>> randfacts.get_fact(only_unsafe=true) + >>> randfacts.get_fact(only_unsafe=true) - generate a random mixed fact (possibility of both sfw and nsfw facts) + generate a random mixed fact (possibility of both sfw and nsfw facts) - >>> randfacts.get_fact(false) - >>> # or - >>> randfacts.get_fact(filter_enabled=False) + >>> randfacts.get_fact(false) + >>> # or + >>> randfacts.get_fact(filter_enabled=False) CLI Examples: - randfacts can be executed via the command line with the following commands: + randfacts can be executed via the command line with the following commands: - Normal execution; only safe facts + Normal execution; only safe facts - $ python3 -m randfacts + $ python3 -m randfacts - The unsafe argument can be supplied to provide only unsafe facts + The unsafe argument can be supplied to provide only unsafe facts - $ python3 -m randfacts --unsafe + $ python3 -m randfacts --unsafe - The mixed argument can be provided to provide both SFW and NSFW facts. + The mixed argument can be provided to provide both SFW and NSFW facts. - $ python3 -m randfacts --mixed + $ python3 -m randfacts --mixed - More help. + More help. - $ python3 -m randfacts --help + $ python3 -m randfacts --help """ import warnings as _warnings from randfacts.randfacts import ( - __version__, - all_facts, - get_fact, - safe_facts, - unsafe_facts, + __version__, + all_facts, + get_fact, + safe_facts, + unsafe_facts, ) __all__ = [ - "all_facts", - "get_fact", - "safe_facts", - "unsafe_facts", - "__version__", + "__version__", + "all_facts", + "get_fact", + "safe_facts", + "unsafe_facts", ] # Deprecated methods -def getFact(filter_enabled: bool = True, only_unsafe: bool = False): - """This method is deprecated. Please use get_fact""" - _warnings.warn( - "getFact is deprecated. Please use get_fact", DeprecationWarning, stacklevel=2 - ) - return get_fact(filter_enabled, only_unsafe) +def getFact(filter_enabled: bool = True, only_unsafe: bool = False) -> str: # noqa: N802 + """This method is deprecated. Please use get_fact.""" + _warnings.warn( + "getFact is deprecated. Please use get_fact", + DeprecationWarning, + stacklevel=2, + ) + return get_fact(filter_enabled, only_unsafe) # noqa: DOC201 diff --git a/randfacts/randfacts.py b/randfacts/randfacts.py index 43beafd..f59dfe8 100644 --- a/randfacts/randfacts.py +++ b/randfacts/randfacts.py @@ -1,92 +1,92 @@ +"""Contains the core functionality of randfacts.""" + import argparse +import contextlib import importlib.metadata -import os -import sys +from pathlib import Path from random import choice -dir_path = os.path.dirname(os.path.realpath(__file__)) +dir_path = Path(__file__).resolve().parent __version__ = "" -try: - __version__: str = importlib.metadata.version("randfacts") -except Exception: - pass +with contextlib.suppress(Exception): + __version__: str = importlib.metadata.version("randfacts") -with open(os.path.join(dir_path, "safe.txt"), encoding="utf-8") as f: - safe_facts = [ - fact.rstrip("\r\n ") for fact in f.readlines() if fact.rstrip("\r\n ") != "" - ] +with (dir_path / "safe.txt").open(encoding="utf-8") as f: + safe_facts = [fact.rstrip("\r\n ") for fact in f if fact.rstrip("\r\n ")] -with open(os.path.join(dir_path, "unsafe.txt"), encoding="utf-8") as f: - unsafe_facts = [ - fact.rstrip("\r\n ") for fact in f.readlines() if fact.rstrip("\r\n ") != "" - ] +with (dir_path / "unsafe.txt").open(encoding="utf-8") as f: + unsafe_facts = [fact.rstrip("\r\n ") for fact in f if fact.rstrip("\r\n ")] all_facts = safe_facts + unsafe_facts def get_fact(filter_enabled: bool = True, only_unsafe: bool = False) -> str: - """This function returns a random fact. - - Parameters - ---------- - filter_enabled : bool - The `filter_enabled` parameter determines if the function will filter - out potentially inappropriate facts. Defaults to True. + """This function returns a random fact. - only_unsafe : bool - The `only_unsafe` parameter determines if the function will only give - unsafe (NSFW) facts. Takes precedence over the `filter_enabled` argument. + Parameters + ---------- + filter_enabled : bool + The `filter_enabled` parameter determines if the function will filter + out potentially inappropriate facts. Defaults to True. - Returns - ------ - str - A random fact. + only_unsafe : bool + The `only_unsafe` parameter determines if the function will only give + unsafe (NSFW) facts. Takes precedence over the `filter_enabled` argument. - """ + Returns: + ------ + str + A random fact. - if only_unsafe: - return choice(unsafe_facts) - if filter_enabled is False: - return choice(all_facts) - return choice(safe_facts) + """ + if only_unsafe: + return choice(unsafe_facts) + if filter_enabled is False: + return choice(all_facts) + return choice(safe_facts) def _cli_entrypoint() -> None: - """Entrypoint for execution via command-line.""" - - parser = argparse.ArgumentParser( - description="Generate random facts from the command-line" - ) - - parser.add_argument( - "-V", - "--version", - action="store_true", - help="Print the package version and exit", - ) - - group = parser.add_mutually_exclusive_group() - group.add_argument( - "-m", "--mixed", action="store_true", help="Include safe and unsafe facts" - ) - - group.add_argument( - "-u", "--unsafe", action="store_true", help="Only include unsafe facts" - ) - - args = parser.parse_args() - - if args.version: - print(__version__) - sys.exit(0) - if args.mixed: - print(get_fact(False)) - elif args.unsafe: - print(get_fact(only_unsafe=True)) - else: - print(get_fact()) + """Entrypoint for execution via command-line.""" + parser = argparse.ArgumentParser( + description="Generate random facts from the command-line", + ) + + parser.add_argument( + "-V", + "--version", + action="store_true", + help="Print the package version and exit", + ) + + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-m", + "--mixed", + action="store_true", + help="Include safe and unsafe facts", + ) + + group.add_argument( + "-u", + "--unsafe", + action="store_true", + help="Only include unsafe facts", + ) + + args = parser.parse_args() + + if args.version: # pyright: ignore[reportAny] + print(__version__) + return + if args.mixed: # pyright: ignore[reportAny] + print(get_fact(filter_enabled=False)) + elif args.unsafe: # pyright: ignore[reportAny] + print(get_fact(only_unsafe=True)) + else: + print(get_fact()) if __name__ == "__main__": - _cli_entrypoint() + _cli_entrypoint() diff --git a/tests/fix_encoding.py b/tests/fix_encoding.py index 7a81c74..393f6a9 100644 --- a/tests/fix_encoding.py +++ b/tests/fix_encoding.py @@ -6,28 +6,28 @@ unsafe_path = parent / "randfacts" / "unsafe.txt" bad_characters = [ - ("‘", "'"), - ("’", "'"), - ("“", '"'), - ("”", '"'), - ("…", "..."), - ("—", "-"), + ("‘", "'"), + ("’", "'"), + ("“", '"'), + ("”", '"'), + ("…", "..."), + ("—", "-"), ] with open(safe_path, "r+", encoding="utf-8") as f: - safe = f.read() + safe = f.read() - for char in bad_characters: - safe = safe.replace(char[0], char[1]) + for char in bad_characters: + safe = safe.replace(char[0], char[1]) - f.seek(0) - f.write(safe) + f.seek(0) + f.write(safe) with open(unsafe_path, "r+", encoding="utf-8") as f: - unsafe = f.read() + unsafe = f.read() - for char in bad_characters: - unsafe = unsafe.replace(char[0], char[1]) + for char in bad_characters: + unsafe = unsafe.replace(char[0], char[1]) - f.seek(0) - f.write(unsafe) + f.seek(0) + f.write(unsafe) diff --git a/tests/test_general.py b/tests/test_general.py index f4c916d..a95c010 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -5,90 +5,94 @@ import pytest sys.path.insert(1, str(pathlib.Path(__file__).parents[1])) -from randfacts import ( # noqa: E402 - getFact, - randfacts, # local randfacts instead of installed version +from randfacts import ( + getFact, + randfacts, # local randfacts instead of installed version ) -def test_get_fact(): - assert isinstance(randfacts.get_fact(), str), "get_fact() must return a string" +def test_get_fact() -> None: + assert isinstance(randfacts.get_fact(), str), "get_fact() must return a string" -def test_getFact_deprecated(): - with pytest.deprecated_call(): - _ = getFact() +def test_getFact_deprecated() -> None: + with pytest.deprecated_call(): + _ = getFact() -def test_all_facts_list(): - assert isinstance(randfacts.all_facts, list), "all_facts must be a list" +def test_all_facts_list() -> None: + assert isinstance(randfacts.all_facts, list), "all_facts must be a list" -def test_safe_facts_list(): - assert isinstance(randfacts.safe_facts, list), "safe_facts must be a list" +def test_safe_facts_list() -> None: + assert isinstance(randfacts.safe_facts, list), "safe_facts must be a list" -def test_unsafe_facts_list(): - assert isinstance(randfacts.unsafe_facts, list), "unsafe_facts must be a list" +def test_unsafe_facts_list() -> None: + assert isinstance(randfacts.unsafe_facts, list), "unsafe_facts must be a list" -def test_cli_no_args(): - child = subprocess.Popen(["python3", "-m", "randfacts"], stdout=subprocess.DEVNULL) - child.communicate() - assert child.returncode == 0, "`python3 -m randfacts` must return with exit code 0" +def test_cli_no_args() -> None: + child = subprocess.Popen(["python3", "-m", "randfacts"], stdout=subprocess.DEVNULL) + child.communicate() + assert child.returncode == 0, "`python3 -m randfacts` must return with exit code 0" -def test_cli_unsafe_args(): - child = subprocess.Popen( - ["python3", "-m", "randfacts", "--unsafe"], stdout=subprocess.DEVNULL - ) - child.communicate() - assert ( - child.returncode == 0 - ), "`python3 -m randfacts --unsafe` must return with exit code 0" +def test_cli_unsafe_args() -> None: + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--unsafe"], + stdout=subprocess.DEVNULL, + ) + child.communicate() + assert ( + child.returncode == 0 + ), "`python3 -m randfacts --unsafe` must return with exit code 0" -def test_cli_mixed_args(): - child = subprocess.Popen( - ["python3", "-m", "randfacts", "--mixed"], stdout=subprocess.DEVNULL - ) - child.communicate() - assert ( - child.returncode == 0 - ), "`python3 -m randfacts --mixed` must return with exit code 0" +def test_cli_mixed_args() -> None: + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--mixed"], + stdout=subprocess.DEVNULL, + ) + child.communicate() + assert ( + child.returncode == 0 + ), "`python3 -m randfacts --mixed` must return with exit code 0" -def test_cli_version(): - child = subprocess.Popen( - ["python3", "-m", "randfacts", "--version"], stdout=subprocess.PIPE, text=True - ) - output, _ = child.communicate() - assert ( - output.strip() == randfacts.__version__ - ), f"`python3 -m randfacts --version` must return {randfacts.__version__}" +def test_cli_version() -> None: + child = subprocess.Popen( + ["python3", "-m", "randfacts", "--version"], + stdout=subprocess.PIPE, + text=True, + ) + output, _ = child.communicate() + assert ( + output.strip() == randfacts.__version__ + ), f"`python3 -m randfacts --version` must return {randfacts.__version__}" -def test_main_entrypoint(): - # Path to the module or script you want to test - script_path = ( - pathlib.Path(__file__).resolve().parents[1] / "randfacts" / "randfacts.py" - ) +def test_main_entrypoint() -> None: + # Path to the module or script you want to test + script_path = ( + pathlib.Path(__file__).resolve().parents[1] / "randfacts" / "randfacts.py" + ) - # Run the script as a subprocess - result = subprocess.run( - ["python", str(script_path)], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) + # Run the script as a subprocess + result = subprocess.run( + ["python", str(script_path)], + capture_output=True, + text=True, + check=False, + ) - # Assert the subprocess exits successfully - assert result.returncode == 0, f"Script failed with stderr: {result.stderr}" + # Assert the subprocess exits successfully + assert result.returncode == 0, f"Script failed with stderr: {result.stderr}" @pytest.mark.parametrize("bad_char", ["‘", "’", "“", "”", "…", "—"]) -def test_invalid_characters(bad_char: str): - for index, fact in enumerate(randfacts.all_facts): - assert ( - bad_char not in fact - ), f"Bad character '{bad_char}' found in fact at index {index}" +def test_invalid_characters(bad_char: str) -> None: + for index, fact in enumerate(randfacts.all_facts): + assert ( + bad_char not in fact + ), f"Bad character '{bad_char}' found in fact at index {index}" From ff1ce58da0114e5de742f7efddec03a32a727d11 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 02:33:27 -0500 Subject: [PATCH 20/24] finish fixing all new ruff checks --- pyproject.toml | 12 +++++++++++- tests/fix_encoding.py | 10 ++++++---- tests/test_general.py | 27 +++++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bbe47c8..30d45c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,17 @@ ignore = [ [tool.ruff.lint.per-file-ignores] -"tests/*" = ["S101", "ANN001", "ANN002", "PLC2701", "ARG002", "PLR2004", "DOC"] +"tests/*" = [ + "S101", + "ANN001", + "ANN002", + "PLC2701", + "ARG002", + "PLR2004", + "DOC", + "INP001", + "S", +] "randfacts/randfacts.py" = ["T201"] "randfacts/__main__.py" = ["D100"] diff --git a/tests/fix_encoding.py b/tests/fix_encoding.py index 393f6a9..a43fecd 100644 --- a/tests/fix_encoding.py +++ b/tests/fix_encoding.py @@ -1,3 +1,5 @@ +"""Fixes common encoding errors that can get into the fact lists after web scraping.""" + from pathlib import Path parent = Path(__file__).resolve().parent.parent @@ -6,15 +8,15 @@ unsafe_path = parent / "randfacts" / "unsafe.txt" bad_characters = [ - ("‘", "'"), - ("’", "'"), + ("‘", "'"), # noqa: RUF001 + ("’", "'"), # noqa: RUF001 ("“", '"'), ("”", '"'), ("…", "..."), ("—", "-"), ] -with open(safe_path, "r+", encoding="utf-8") as f: +with safe_path.open("r+", encoding="utf-8") as f: safe = f.read() for char in bad_characters: @@ -23,7 +25,7 @@ f.seek(0) f.write(safe) -with open(unsafe_path, "r+", encoding="utf-8") as f: +with unsafe_path.open("r+", encoding="utf-8") as f: unsafe = f.read() for char in bad_characters: diff --git a/tests/test_general.py b/tests/test_general.py index a95c010..809f014 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -1,3 +1,5 @@ +"""General functionality unit tests.""" + import pathlib import subprocess import sys @@ -12,33 +14,47 @@ def test_get_fact() -> None: + """Make sure get_fact works without extra arguments.""" assert isinstance(randfacts.get_fact(), str), "get_fact() must return a string" -def test_getFact_deprecated() -> None: +def test_getFact_deprecated() -> None: # noqa: N802 + """Make sure getFact throws a deprecation warning.""" with pytest.deprecated_call(): _ = getFact() def test_all_facts_list() -> None: + """Test that all_facts list is present in the module.""" assert isinstance(randfacts.all_facts, list), "all_facts must be a list" def test_safe_facts_list() -> None: + """Test that safe_facts list is present in the module.""" assert isinstance(randfacts.safe_facts, list), "safe_facts must be a list" def test_unsafe_facts_list() -> None: + """Test that unsafe_facts list is present in the module.""" assert isinstance(randfacts.unsafe_facts, list), "unsafe_facts must be a list" def test_cli_no_args() -> None: + """Test that a basic randfacts CLI call will work.""" child = subprocess.Popen(["python3", "-m", "randfacts"], stdout=subprocess.DEVNULL) child.communicate() assert child.returncode == 0, "`python3 -m randfacts` must return with exit code 0" +def test_cli_script_installed() -> None: + """Test that the `randfacts` script is installed to the PATH.""" + child = subprocess.Popen(["randfacts"], stdout=subprocess.DEVNULL) + child.communicate() + assert child.returncode == 0, "`randfacts` must return with exit code 0" + + def test_cli_unsafe_args() -> None: + """Test that CLI with --unsafe works.""" child = subprocess.Popen( ["python3", "-m", "randfacts", "--unsafe"], stdout=subprocess.DEVNULL, @@ -50,6 +66,7 @@ def test_cli_unsafe_args() -> None: def test_cli_mixed_args() -> None: + """Test that CLI with --mixed works.""" child = subprocess.Popen( ["python3", "-m", "randfacts", "--mixed"], stdout=subprocess.DEVNULL, @@ -61,6 +78,7 @@ def test_cli_mixed_args() -> None: def test_cli_version() -> None: + """Test that CLI with --version returns the correct version.""" child = subprocess.Popen( ["python3", "-m", "randfacts", "--version"], stdout=subprocess.PIPE, @@ -73,6 +91,7 @@ def test_cli_version() -> None: def test_main_entrypoint() -> None: + """Test the main entrypoint in randfacts.py.""" # Path to the module or script you want to test script_path = ( pathlib.Path(__file__).resolve().parents[1] / "randfacts" / "randfacts.py" @@ -90,8 +109,12 @@ def test_main_entrypoint() -> None: assert result.returncode == 0, f"Script failed with stderr: {result.stderr}" -@pytest.mark.parametrize("bad_char", ["‘", "’", "“", "”", "…", "—"]) +@pytest.mark.parametrize("bad_char", ["‘", "’", "“", "”", "…", "—"]) # noqa: RUF001 def test_invalid_characters(bad_char: str) -> None: + """Make sure no invalid characters are present in the fact lists. + + If this test fails, try running `fix_encoding.py` + """ for index, fact in enumerate(randfacts.all_facts): assert ( bad_char not in fact From 28dc2423aedef14cafc83f9cb85497e45369f414 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 21:38:03 -0500 Subject: [PATCH 21/24] update copyright dates --- LICENSE | 4 ++-- tests/fix_encoding.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/LICENSE b/LICENSE index 4306b1f..aa9a3b3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,5 @@ MIT License -Copyright (c) 2020-2021 Connor Sample +Copyright (c) 2020-2024 Connor Sample Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -14,4 +14,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/tests/fix_encoding.py b/tests/fix_encoding.py index a43fecd..17808b2 100644 --- a/tests/fix_encoding.py +++ b/tests/fix_encoding.py @@ -2,7 +2,7 @@ from pathlib import Path -parent = Path(__file__).resolve().parent.parent +parent = Path(__file__).resolve().parents[1] safe_path = parent / "randfacts" / "safe.txt" unsafe_path = parent / "randfacts" / "unsafe.txt" From b6e8f579c1ec61d03108f9e016e5de48a221db23 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 22:02:49 -0500 Subject: [PATCH 22/24] switch from single to double quotes in ruff --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 30d45c1..6299224 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ ignore = [ convention = "google" [tool.ruff.format] -quote-style = "single" +quote-style = "double" indent-style = "tab" line-ending = "lf" From 172731d910f53fd5443d7c3c443c684082a2b43d Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 22:13:23 -0500 Subject: [PATCH 23/24] add binary caching to checkduplicates CI --- .github/workflows/check_duplicates.yml | 19 +++++++++++++++++-- .github/workflows/main.yml | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/check_duplicates.yml b/.github/workflows/check_duplicates.yml index ee4d90e..02448c5 100644 --- a/.github/workflows/check_duplicates.yml +++ b/.github/workflows/check_duplicates.yml @@ -18,7 +18,22 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Check for duplicate facts + + - name: Cache checkduplicates binary + uses: actions/cache@v4 + id: cache + with: + path: | + tests/checkduplicates/target/release/checkduplicates + key: ${{ runner.os }}-cargo-${{ hashFiles('tests/checkduplicates/Cargo.lock', 'tests/checkduplicates/Cargo.toml', 'tests/checkduplicates/src/**') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Build checkduplicates test + if: steps.cache.outputs.cache-hit != 'true' run: | cd tests/checkduplicates - cargo run --release + cargo build --release + + - name: Check for duplicate facts + run: ./tests/checkduplicates/target/release/checkduplicates diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5c3ce6d..e32142b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -84,6 +84,6 @@ jobs: # https://github.com/astral-sh/ruff/issues/8430 # - run: poetry run ruff format --check - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4.0.1 + uses: codecov/codecov-action@v5.0.2 with: token: ${{ secrets.CODECOV_TOKEN }} From 0a2c930367677fe2dee34ae842fcf3d897c95520 Mon Sep 17 00:00:00 2001 From: TabulateJarl8 Date: Sun, 17 Nov 2024 23:19:21 -0500 Subject: [PATCH 24/24] fix module docstring --- randfacts/__init__.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/randfacts/__init__.py b/randfacts/__init__.py index f115bc5..af38cd5 100644 --- a/randfacts/__init__.py +++ b/randfacts/__init__.py @@ -7,18 +7,16 @@ Code Examples: Example usage of randfacts in code. - generate a random sfw (safe for work) fact. + Generate a random SFW (safe for work) fact. >>> randfacts.get_fact() - generate a random nsfw (not safe for work) fact. + Generate a random NSFW (not safe for work) fact. - >>> randfacts.get_fact(only_unsafe=true) + >>> randfacts.get_fact(only_unsafe=True) - generate a random mixed fact (possibility of both sfw and nsfw facts) + Generate a random mixed fact (possibility of both SFW and NSFW facts) - >>> randfacts.get_fact(false) - >>> # or >>> randfacts.get_fact(filter_enabled=False) CLI Examples: