Skip to content

Commit

Permalink
Merge pull request #273 from Qrlew/fix_devide_by_zero_error
Browse files Browse the repository at this point in the history
Fix devide by zero error
  • Loading branch information
ngrislain authored Mar 20, 2024
2 parents aba5b5e + cb15b99 commit a09db14
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 14 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
## [0.9.16] - 2024-03-20
### Changed
- Expr::divide makes sure we have non-zeros in the denominator [273](https://github.com/Qrlew/qrlew/pull/273)
- make the displaying of floats more readable [273](https://github.com/Qrlew/qrlew/pull/273)
### Fixed
- clamp floats that can be -+inf between f64::MIN and f64::MAX [273](https://github.com/Qrlew/qrlew/pull/273)

## [0.9.15] - 2024-02-20
### Changed
- SELECT * with JOINs preserve the column names when there is no ambiguity [#268](https://github.com/Qrlew/qrlew/pull/268)
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
authors = ["Nicolas Grislain <[email protected]>"]
name = "qrlew"
version = "0.9.15"
version = "0.9.16"
edition = "2021"
description = "Sarus Qrlew Engine"
documentation = "https://docs.rs/qrlew"
Expand Down
12 changes: 11 additions & 1 deletion src/data_type/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,17 @@ impl Deref for Float {

impl fmt::Display for Float {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
// Define magnitude thresholds
let lower_threshold = 1e-4;
let upper_threshold = 1e4;

if self.0.abs() >= lower_threshold && self.0.abs() < upper_threshold || self.0 == 0.0 {
// Standard decimal notation
write!(f, "{}", self.0)
} else {
// Scientific notation with up to 4 digits after the decimal
write!(f, "{:.4e}", self.0)
}
}
}

Expand Down
5 changes: 2 additions & 3 deletions src/differential_privacy/aggregates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ impl Relation {
// Cf. Theorem A.1. in (Dwork, Roth et al. 2014)
log::warn!("Warning, epsilon>1 the gaussian mechanism applied will not be exactly epsilon,delta-DP!")
}

let number_of_agg = bounds.len() as f64;
let (dp_relation, dp_event) = if number_of_agg > 0. {
let noise_multipliers = bounds
Expand Down Expand Up @@ -138,12 +137,12 @@ impl PupRelation {
(
name,
column,
self.schema()[column]
(self.schema()[column]
.data_type()
.absolute_upper_bound()
.unwrap_or(1.0)
// This may add a lot of noise depending on the parameters
* parameters.privacy_unit_multiplicity(),
* parameters.privacy_unit_multiplicity()).clamp(f64::MIN, f64::MAX),
)
})
.collect::<Vec<_>>();
Expand Down
3 changes: 2 additions & 1 deletion src/differential_privacy/dp_event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ impl From<Vec<DpEvent>> for DpEvent {
}

pub fn gaussian_noise(epsilon: f64, delta: f64, sensitivity: f64) -> f64 {
(2. * (1.25_f64 / delta).ln()).sqrt() * sensitivity / epsilon
// it can be inf so we clamp the results between 0 and f64::MAX
((2. * (1.25_f64 / delta).ln()).sqrt() * sensitivity / epsilon).clamp(0.0, f64::MAX)
}

pub fn gaussian_tau(epsilon: f64, delta: f64, sensitivity: f64) -> f64 {
Expand Down
7 changes: 5 additions & 2 deletions src/expr/dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,14 @@ impl<'a, T: Clone + fmt::Display, V: Visitor<'a, T>> dot::Labeller<'a, Node<'a,
dot::escape_html(&col.to_string()),
&node.1
),
Expr::Value(val) => format!(
Expr::Value(val) => {
println!("{}", &val.to_string());
format!(
"<b>{}</b><br/>{}",
dot::escape_html(&val.to_string()),
&node.1
),
)
},
Expr::Function(fun) => {
format!(
"<b>{}</b><br/>{}",
Expand Down
28 changes: 27 additions & 1 deletion src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ TODO
- Remove
*/

static EPSILON: f64 = 1.0/f64::MAX;

// Error management

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -367,7 +369,6 @@ impl_binary_function_constructors!(
Plus,
Minus,
Multiply,
Divide,
Modulo,
StringConcat,
Gt,
Expand Down Expand Up @@ -404,6 +405,31 @@ impl_binary_function_constructors!(
IsBool
);

impl Function {
pub fn divide<L: Into<Expr>, R: Into<Expr>>(left: L, right: R) -> Function {
Function::new(
function::Function::Divide,
<[_]>::into_vec(
Box::new([(Arc::new(left.into())), (Arc::new(right.into()))]),
),
)
}
}

impl Expr {
pub fn divide<L: Into<Expr>, R: Into<Expr> + Clone>(left: L, right: R) -> Expr {
// Every time we divide we make sure we have a non-zero denominator
let division = Expr::from(Function::divide(left, right.clone()));
Expr::case(
Expr::or(
Expr::gt_eq(right.clone(), Expr::val(EPSILON)),
Expr::lt_eq(right, - Expr::val(EPSILON))
), division,
Expr::val(0.0)
)
}
}

/// Implement ternary function constructors
macro_rules! impl_ternary_function_constructors {
($( $Function:ident ),*) => {
Expand Down
30 changes: 25 additions & 5 deletions src/expr/sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -947,21 +947,30 @@ mod tests {
println!("ast::expr = {gen_expr}");
assert_eq!(ast_expr, gen_expr);

let epsilon = 1./f64::MAX;

let str_expr = "log(b, x)";
let ast_expr: ast::Expr = parse_expr(str_expr).unwrap();
let expr = Expr::try_from(&ast_expr).unwrap();
println!("expr = {}", expr);
let gen_expr = ast::Expr::from(&expr);
println!("ast::expr = {gen_expr}");
let true_expr = parse_expr("(log(x)) / ((log(b)))").unwrap();
let true_expr = parse_expr(
format!("CASE WHEN ((log(b)) >= ({})) OR ((log(b)) <= (-({})))
THEN (log(x)) / ((log(b))) ELSE 0 END", epsilon, epsilon).as_str()
).unwrap();
assert_eq!(gen_expr, true_expr);


let str_expr = "log10(x)";
let ast_expr: ast::Expr = parse_expr(str_expr).unwrap();
let expr = Expr::try_from(&ast_expr).unwrap();
println!("expr = {}", expr);
let gen_expr = ast::Expr::from(&expr);
let true_expr = parse_expr("(log(10)) / ((log(x)))").unwrap();
let true_expr = parse_expr(
format!("CASE WHEN ((log(x)) >= ({})) OR ((log(x)) <= (-({})))
THEN (log(10)) / ((log(x))) ELSE 0 END", epsilon, epsilon).as_str()
).unwrap();
assert_eq!(gen_expr, true_expr);

let str_expr = "log2(x)";
Expand All @@ -970,7 +979,10 @@ mod tests {
println!("expr = {}", expr);
let gen_expr = ast::Expr::from(&expr);
println!("ast::expr = {gen_expr}");
let true_expr = parse_expr("(log(2)) / ((log(x)))").unwrap();
let true_expr = parse_expr(
format!("CASE WHEN ((log(x)) >= ({})) OR ((log(x)) <= (-({})))
THEN (log(2)) / ((log(x))) ELSE 0 END", epsilon, epsilon).as_str()
).unwrap();
assert_eq!(gen_expr, true_expr);
}

Expand All @@ -992,13 +1004,17 @@ mod tests {
println!("ast::expr = {gen_expr}");
assert_eq!(ast_expr, gen_expr);

let epsilon = 1./f64::MAX;
let str_expr = "tan(x)";
let ast_expr: ast::Expr = parse_expr(str_expr).unwrap();
let expr = Expr::try_from(&ast_expr).unwrap();
println!("expr = {}", expr);
let gen_expr = ast::Expr::from(&expr);
println!("ast::expr = {gen_expr}");
let true_expr = parse_expr("(sin(x)) / ((cos(x)))").unwrap();
let true_expr = parse_expr(
format!("CASE WHEN ((cos(x)) >= ({})) OR ((cos(x)) <= (-({})))
THEN (sin(x)) / ((cos(x))) ELSE 0 END", epsilon, epsilon).as_str()
).unwrap();
assert_eq!(gen_expr, true_expr);
}

Expand Down Expand Up @@ -1032,7 +1048,11 @@ mod tests {
println!("expr = {}", expr);
let gen_expr = ast::Expr::from(&expr);
println!("ast::expr = {gen_expr}");
let true_expr = parse_expr("(100) * (((180) / ((pi()))))").unwrap();
let epsilon = 1./f64::MAX;
let true_expr = parse_expr(
format!("(100) * ((CASE WHEN ((pi()) >= ({})) OR ((pi()) <= (-({})))
THEN (180) / ((pi())) ELSE 0 END))", epsilon, epsilon).as_str()
).unwrap();
assert_eq!(gen_expr, true_expr);
}

Expand Down
83 changes: 83 additions & 0 deletions src/rewriting/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,89 @@ mod tests {
println!("dp_event = {}", dp_relation.dp_event());
assert!(!dp_relation.dp_event().is_no_op());
}
}

#[test]
fn test_patients() {
let axa_patients: Relation = Relation::table()
.name("axa_patients")
.schema(
vec![
("Id", DataType::text()),
("BIRTHDATE", DataType::text()),
("GENDER", DataType::text()),
("ZIP", DataType::integer()),
]
.into_iter()
.collect::<Schema>()
)
.size(10901)
.build();
let axa_encounters: Relation = Relation::table()
.name("axa_encounters")
.schema(
vec![
("Id", DataType::text()),
("START", DataType::text()),
("STOP", DataType::text()),
("PATIENT", DataType::text()),
("ORGANIZATION", DataType::text()),
("PROVIDER", DataType::text()),
("PAYER", DataType::text()),
("ENCOUNTERCLASS", DataType::text()),
("CODE", DataType::integer()),
("DESCRIPTION", DataType::text()),
("BASE_ENCOUNTER_COST", DataType::float()),
("TOTAL_CLAIM_COST", DataType::float_min(-1.)),
("PAYER_COVERAGE", DataType::float()),
("REASON_CODE", DataType::integer()),
("REASONDESCRIPTION", DataType::integer()),
]
.into_iter()
.collect::<Schema>()
)
.size(77727)
.build();
let relations: Hierarchy<Arc<Relation>> = vec![axa_patients, axa_encounters]
.iter()
.map(|t| (Identifier::from(t.name()), Arc::new(t.clone().into())))
.collect();
let synthetic_data = Some(SyntheticData::new(Hierarchy::from([
(vec!["axa_patients"], Identifier::from("synthetic_axa_patients")),
(vec!["axa_encounters"], Identifier::from("synthetic_axa_encounters")),
])));
let privacy_unit = PrivacyUnit::from(vec![
("axa_patients", vec![], "Id"),
("axa_encounters", vec![("PATIENT", "axa_patients", "Id")], "Id"),
]);
let dp_parameters = DpParameters::from_epsilon_delta(1., 1e-3);

let queries = [
r#"
SELECT
"ENCOUNTERCLASS",
COUNT(p."Id") as patient_count,
SUM("TOTAL_CLAIM_COST") as sum_cost,
AVG("TOTAL_CLAIM_COST") as avg_cost
FROM axa_patients p
JOIN axa_encounters e
ON p."Id" = e."PATIENT"
GROUP BY "ENCOUNTERCLASS"
"#,
];
for query_str in queries {
println!("\n{query_str}");
let query = parse(query_str).unwrap();
let relation = Relation::try_from(query.with(&relations)).unwrap();
relation.display_dot().unwrap();
let dp_relation = relation.rewrite_with_differential_privacy(
&relations,
synthetic_data.clone(),
privacy_unit.clone(),
dp_parameters.clone()
).unwrap();
dp_relation.relation().display_dot().unwrap();
}

}
}

0 comments on commit a09db14

Please sign in to comment.