Skip to content

Commit

Permalink
chore: avoid fuzzy searching garbage
Browse files Browse the repository at this point in the history
  • Loading branch information
notdodo committed Oct 20, 2024
1 parent cbaae6f commit 2450be0
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 130 deletions.
15 changes: 4 additions & 11 deletions app/bot/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use dptree::deps;
use lambda_runtime::{service_fn, Error as LambdaError, LambdaEvent};
use serde_json::{json, Value};
use station::fuzzy::get_station;
use station::search::get_station;
use teloxide::{
prelude::*,
types::{LinkPreviewOptions, Me, ParseMode},
Expand Down Expand Up @@ -71,28 +71,21 @@ async fn lambda_handler(event: LambdaEvent<Value>) -> Result<Value, LambdaError>
let shared_config = aws_config::load_defaults(BehaviorVersion::latest()).await;
let dynamodb_client = DynamoDbClient::new(&shared_config);
let message = msg.text().unwrap();
let stations = station::stations();
let closest_station = stations.iter().min_by_key(|&s| {
edit_distance::edit_distance(
&message.to_lowercase(),
&s.replace(" ", "").to_lowercase(),
)
});
let text = match get_station(
&dynamodb_client,
closest_station.unwrap().to_string(),
message.to_string(),
"Stazioni",
)
.await
{
Ok(item) => {
Ok(Some(item)) => {
if item.nomestaz != message {
format!("{}\nSe non è la stazione corretta prova ad affinare la ricerca.", item.create_station_message())
}else {
item.create_station_message().to_string()
}
}
Err(_) => "Nessuna stazione trovata con la parola di ricerca. \n
Err(_) | Ok(None) => "Nessuna stazione trovata con la parola di ricerca. \n
Inserisci esattamente il nome che vedi dalla pagina https://allertameteo.regione.emilia-romagna.it/livello-idrometrico \n
Ad esempio 'Cesena', 'Lavino di Sopra' o 'S. Carlo'. \n
Se non sai quale cercare prova con /stazioni".to_string(),
Expand Down
118 changes: 0 additions & 118 deletions app/bot/src/station/fuzzy.rs

This file was deleted.

2 changes: 1 addition & 1 deletion app/bot/src/station/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pub mod fuzzy;
pub mod search;

use chrono::{DateTime, TimeZone};
use chrono_tz::Europe::Rome;
Expand Down
178 changes: 178 additions & 0 deletions app/bot/src/station/search.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
use anyhow::{anyhow, Result};
use aws_sdk_dynamodb::{types::AttributeValue, Client as DynamoDbClient};
use std::collections::HashMap;

use super::{stations, Stazione, UNKNOWN_VALUE};

fn fuzzy_search(search: &str) -> Option<String> {
let stations = stations();
let closest_match = stations
.iter()
.map(|s: &String| {
(
s,
edit_distance::edit_distance(
&search.to_lowercase(),
&s.replace(" ", "").to_lowercase(),
),
)
})
.filter(|(_, score)| *score < 4)
.min_by_key(|(_, score)| *score)
.map(|(station, _)| station.clone()); // Map to String and clone the station name

closest_match
}

pub async fn get_station(
client: &DynamoDbClient,
station_name: String,
table_name: &str,
) -> Result<Option<Stazione>> {
if let Some(closest_match) = fuzzy_search(&station_name) {
let result = client
.get_item()
.table_name(table_name)
.key("nomestaz", AttributeValue::S(closest_match.clone()))
.send()
.await?;

match result.item {
Some(item) => {
let idstazione = parse_string_field(&item, "idstazione")?;
let timestamp = parse_number_field::<i64>(&item, "timestamp")?;
let lon = parse_string_field(&item, "lon")?;
let lat = parse_string_field(&item, "lat")?;
let ordinamento = parse_number_field::<i32>(&item, "ordinamento")?;
let nomestaz = parse_string_field(&item, "nomestaz")?;
let soglia1 = parse_number_field::<f64>(&item, "soglia1")?;
let soglia2 = parse_number_field::<f64>(&item, "soglia2")?;
let soglia3 = parse_number_field::<f64>(&item, "soglia3")?;
let value = parse_optional_number_field(&item, "value")?.unwrap_or(UNKNOWN_VALUE);

Ok(Some(Stazione {
timestamp,
idstazione,
ordinamento,
nomestaz,
lon,
lat,
soglia1,
soglia2,
soglia3,
value,
}))
}
None => Err(anyhow!("Station '{}' not found", closest_match)),
}
} else {
Err(anyhow!("'{}' did not match any know station", station_name))
}
}

fn parse_string_field(item: &HashMap<String, AttributeValue>, field: &str) -> Result<String> {
match item.get(field) {
Some(AttributeValue::S(s)) => Ok(s.clone()),
Some(AttributeValue::Ss(ss)) => Ok(ss.join(",")), // If the field is a string set
_ => Err(anyhow!("Missing or invalid '{}' field", field)),
}
}

fn parse_number_field<T: std::str::FromStr>(
item: &HashMap<String, AttributeValue>,
field: &str,
) -> Result<T>
where
<T as std::str::FromStr>::Err: std::fmt::Display,
{
match item.get(field) {
Some(AttributeValue::N(n)) => n.parse::<T>().map_err(|e| {
anyhow!(
"Failed to parse '{}' field with value '{}' as number: {}",
field,
n,
e
)
}),
Some(AttributeValue::S(s)) => s.parse::<T>().map_err(|e| {
anyhow!(
"Failed to parse '{}' field with value '{}' as number: {}",
field,
s,
e
)
}),
_ => Err(anyhow!("Missing or invalid '{}' field", field)),
}
}

fn parse_optional_number_field<T: std::str::FromStr>(
item: &HashMap<String, AttributeValue>,
field: &str,
) -> Result<Option<T>>
where
<T as std::str::FromStr>::Err: std::fmt::Display,
{
match item.get(field) {
Some(AttributeValue::N(n)) => {
if let Ok(value) = n.parse::<T>() {
Ok(Some(value))
} else {
Err(anyhow!(
"Failed to parse '{}' field with value '{}' as number",
field,
n
))
}
}
Some(AttributeValue::S(s)) => {
if let Ok(value) = s.parse::<T>() {
Ok(Some(value))
} else {
Err(anyhow!(
"Failed to parse '{}' field with value '{}' as number",
field,
s
))
}
}
_ => Err(anyhow!("Invalid type for '{}' field", field)),
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn fuzzy_search_cesena_yields_cesena_station() {
let message = "cesena".to_string();
let expected = Some("Cesena".to_string());

assert_eq!(fuzzy_search(&message), expected);
}

#[test]
fn fuzzy_search_scarlo_yields_scarlo_station() {
let message = "scarlo".to_string();
let expected = Some("S. Carlo".to_string());

assert_eq!(fuzzy_search(&message), expected);
}

#[test]
fn fuzzy_search_nonexisting_yields_nonexisting_station() {
let message = "thisdoesnotexists".to_string();
let expected = None;

assert_eq!(fuzzy_search(&message), expected);
}

#[test]
fn fuzzy_search_ecsena_yields_cesena_station() {
let message = "ecsena".to_string();
let expected = Some("Cesena".to_string());

assert_eq!(fuzzy_search(&message), expected);
}
}

0 comments on commit 2450be0

Please sign in to comment.