Skip to content

Commit

Permalink
add code for testing slicer
Browse files Browse the repository at this point in the history
  • Loading branch information
mmoskal committed Dec 23, 2024
1 parent 54a9b71 commit fd16d3d
Showing 1 changed file with 55 additions and 13 deletions.
68 changes: 55 additions & 13 deletions json_stats/src/json_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ pub struct CliOptions {
#[arg(long)]
llg_no_forcing: bool,

#[arg(long)]
llg_test_slicer: bool,

#[arg(long)]
csv: bool,

Expand Down Expand Up @@ -196,6 +199,7 @@ struct TestEnv {
cli: Arc<CliOptions>,
tok_env: TokEnv,
factory: Arc<ParserFactory>,
ref_factory: Arc<ParserFactory>,
file_name: String,
}

Expand Down Expand Up @@ -254,6 +258,7 @@ impl TestEnv {
&self,
stats: &mut LlgResult,
parser: &mut TokenParser,
mut ref_parser: Option<&mut TokenParser>,
t: &JsonTestSequence,
) -> Result<()> {
let dstr = serde_json::to_string(&t.data).unwrap();
Expand All @@ -276,6 +281,23 @@ impl TestEnv {
let us = t0.elapsed().as_micros() as usize;
let pstats = parser.last_step_stats();

if let Some(ref_parser) = &mut ref_parser {
let m2 = ref_parser.compute_mask()?;
if m != m2 {
let mut missing_slicer = m2.clone();
missing_slicer.sub(&m);
let mut missing_parser = m.clone();
missing_parser.sub(&m2);
eprintln!(
"{}:\n{}\n{}",
tidx,
trie.token_set_dbg(&missing_slicer),
trie.token_set_dbg(&missing_parser)
);
panic!("mismatch");
}
}

stats.all_mask_us.push(us);

// && pstats.lexer_cost < 7 * us as u64
Expand Down Expand Up @@ -345,6 +367,11 @@ impl TestEnv {
}
let bt = parser.consume_token(token)?;
assert!(bt == 0);

if let Some(ref_parser) = &mut ref_parser {
let bt = ref_parser.consume_token(token)?;
assert!(bt == 0);
}
}

if parser.is_accepting() {
Expand All @@ -369,6 +396,7 @@ impl TestEnv {
&self,
stats: &mut LlgResult,
parser: &TokenParser,
ref_parser: Option<&TokenParser>,
t: &JsonTestSequence,
) -> Result<()> {
// if self.cli.llg_masks && !t.valid {
Expand All @@ -378,7 +406,10 @@ impl TestEnv {
let mut parser = parser.deep_clone();
parser.start_without_prompt();

let r = self.run_llg_test_inner(stats, &mut parser, t);
let mut ref_parser = ref_parser.map(|p| p.deep_clone());
ref_parser.as_mut().map(|p| p.start_without_prompt());

let r = self.run_llg_test_inner(stats, &mut parser, ref_parser.as_mut(), t);

let m = parser.parser.metrics_mut();
stats.slicer_leftover_us += m.slicer_leftover_us;
Expand Down Expand Up @@ -410,6 +441,12 @@ impl TestEnv {
schema.grammars[0].no_forcing = true;
}

let ref_parser = if self.cli.llg_test_slicer {
Some(self.ref_factory.create_parser(schema.clone()))
} else {
None
};

let parser = self.factory.create_parser(schema);

let parser = match parser {
Expand All @@ -432,10 +469,12 @@ impl TestEnv {

res.lexer_stats = parser.parser.lexer_stats();

let ref_parser = ref_parser.map(|p| p.unwrap());

if self.cli.llg_test {
for (idx, t) in test_file.tests.iter().enumerate() {
let t0 = std::time::Instant::now();
if let Err(e) = self.run_llg_test(&mut res, &parser, t) {
if let Err(e) = self.run_llg_test(&mut res, &parser, ref_parser.as_ref(), t) {
res.validation_error = Some(format!("test #{idx}: {e}"));
limit_string(&mut res.validation_error);
} else {
Expand Down Expand Up @@ -626,19 +665,21 @@ fn main() {
slices.clear();
}

let mut factory = ParserFactory::new(
&tok_env,
InferenceCapabilities {
ff_tokens: false,
backtrack: false,
conditional_ff_tokens: false,
fork: false,
},
&slices,
)
.unwrap();
let caps = InferenceCapabilities {
ff_tokens: false,
backtrack: false,
conditional_ff_tokens: false,
fork: false,
};

let mut factory = ParserFactory::new(&tok_env, caps.clone(), &slices).unwrap();
factory.quiet();

let mut ref_factory = ParserFactory::new(&tok_env, caps.clone(), &vec![]).unwrap();
ref_factory.quiet();

let factory = Arc::new(factory);
let ref_factory = Arc::new(ref_factory);

save_text_to_file("tmp/slices.txt", &factory.slicer().stats(false));
save_text_to_file("tmp/slices_tokens.txt", &factory.slicer().stats(true));
Expand All @@ -649,6 +690,7 @@ fn main() {
let env = TestEnv {
tok_env: tok_env.clone(),
factory: factory.clone(),
ref_factory: ref_factory.clone(),
file_name: file.to_string(),
cli: options.clone(),
};
Expand Down

0 comments on commit fd16d3d

Please sign in to comment.