Skip to content

Commit

Permalink
Renamed index_codepoints to slice_codepoints and updated to do it cle…
Browse files Browse the repository at this point in the history
…aner (#67)

* Renamed index_codepoints to slice_codepoints and updated to do it cleaner

Signed-off-by: declark1 <[email protected]>

* Update apply_masks to use slice_codepoints

Signed-off-by: declark1 <[email protected]>

---------

Signed-off-by: declark1 <[email protected]>
  • Loading branch information
declark1 authored Jun 7, 2024
1 parent 189a8e1 commit 2354acd
Showing 1 changed file with 9 additions and 10 deletions.
19 changes: 9 additions & 10 deletions src/orchestrator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ async fn handle_detection_task(
.filter_map(|resp| {
let mut result: TokenClassificationResult = resp.into();
result.word =
index_codepoints(&chunk.text, result.start as usize, result.end as usize);
slice_codepoints(&chunk.text, result.start as usize, result.end as usize);
result.start += chunk.offset as u32;
result.end += chunk.offset as u32;
(result.score >= threshold).then_some(result)
Expand Down Expand Up @@ -579,19 +579,18 @@ async fn generate(
}
}

/// Get codepoints of text between start and end indices
fn index_codepoints(text: &str, start: usize, end: usize) -> String {
let chars = text.chars().collect::<Vec<_>>();
chars[start..end].iter().collect()
/// Slices chars between start and end indices.
fn slice_codepoints(text: &str, start: usize, end: usize) -> String {
let len = end - start;
text.chars().skip(start).take(len).collect()
}

/// Applies masks to input text, returning (offset, masked_text) pairs.
fn apply_masks(text: &str, masks: &[(usize, usize)]) -> Vec<(usize, String)> {
let chars = text.chars().collect::<Vec<_>>();
masks
.iter()
.map(|(start, end)| {
let masked_text = chars[*start..*end].iter().cloned().collect();
let masked_text = slice_codepoints(text, *start, *end);
(*start, masked_text)
})
.collect()
Expand Down Expand Up @@ -704,10 +703,10 @@ mod tests {
}

#[test]
fn test_index_codepoints() {
fn test_slice_codepoints() {
let s = "Hello world";
assert_eq!(index_codepoints(s, 0, 5), "Hello");
assert_eq!(slice_codepoints(s, 0, 5), "Hello");
let s = "哈囉世界";
assert_eq!(index_codepoints(s, 3, 4), "界");
assert_eq!(slice_codepoints(s, 3, 4), "界");
}
}

0 comments on commit 2354acd

Please sign in to comment.