Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support lazy parsing in owned lazyvalue #133

Merged
merged 11 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion examples/lazyvalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ fn main() {
}
let data: TestLazyValue = sonic_rs::from_str(input).unwrap();
assert_eq!(data.borrowed.as_raw_str(), "\"hello\"");
assert_eq!(data.owned.as_raw_str(), "\"world\"");

// use serde_json
#[derive(Debug, Deserialize, Serialize)]
Expand Down
275 changes: 2 additions & 273 deletions fuzz/fuzz_targets/from_slice.rs
Original file line number Diff line number Diff line change
@@ -1,277 +1,6 @@
#![no_main]
#![allow(clippy::mutable_key_type)]
use std::{borrow::Cow, collections::HashMap, hash::Hash, marker::PhantomData};

use faststr::FastStr;
use libfuzzer_sys::fuzz_target;
use serde::{Deserialize, Serialize};
use serde_json::Value as JValue;
use sonic_rs::{
from_slice, from_str, to_array_iter, to_array_iter_unchecked, to_object_iter,
to_object_iter_unchecked, value::JsonContainerTrait, Deserializer, JsonNumberTrait,
JsonValueTrait, Value,
};
use sonic_rs_fuzz::sonic_rs_fuzz_data;

macro_rules! test_type {
($data:expr, $($ty:ty),+) => {
$(
{
match serde_json::from_slice::<$ty>($data) {
Ok(jv) => {
let sv: $ty = sonic_rs::from_slice::<$ty>($data).expect(&format!(
"parse valid json {:?} failed for type {}",
$data,
stringify!($ty)
));
assert_eq!(sv, jv);

// Fuzz the struct to_string
let sout = sonic_rs::to_string(&sv).unwrap();
let jout = serde_json::to_string(&jv).unwrap();
let sv: $ty = sonic_rs::from_str::<$ty>(&sout).unwrap();
let jv: $ty = serde_json::from_str::<$ty>(&jout).unwrap();
assert_eq!(sv, jv);
}
Err(_) => {
let _ = sonic_rs::from_slice::<$ty>($data).expect_err(&format!(
"parse invalid json {:?} wrong for type {}",
$data,
stringify!($ty)
));
}
}
}
)*
};
}

fuzz_target!(|data: &[u8]| {
match serde_json::from_slice::<JValue>(data) {
Ok(jv) => {
// compare from_slice result
let sv: Value = from_slice(data).unwrap();
compare_value(&jv, &sv);

// compare to_string result
let sout = sonic_rs::to_string(&sv).unwrap();
let jv2 = serde_json::from_str::<JValue>(&sout).unwrap();
let sv2: Value = from_str(&sout).unwrap();
let eq = compare_value(&jv2, &sv2);

fuzz_use_raw(data, &sv);
fuzz_utf8_lossy(data, &sv);

if jv.is_object() && eq {
for ret in to_object_iter(data) {
let (k, lv) = ret.unwrap();
let jv = jv.get(k.as_str()).unwrap();
compare_lazyvalue(jv, &lv);

let gv = sonic_rs::get(data, &[k.as_str()]).unwrap();
compare_lazyvalue(jv, &gv);
}

// fuzzing unchecked apis
unsafe {
for ret in to_object_iter_unchecked(data) {
let (k, lv) = ret.unwrap();
let jv = jv.get(k.as_str()).unwrap();
compare_lazyvalue(jv, &lv);

let gv = sonic_rs::get_unchecked(data, &[k.as_str()]).unwrap();
compare_lazyvalue(jv, &gv);
}
}
} else if jv.is_array() {
for (i, ret) in to_array_iter(data).enumerate() {
let lv = ret.unwrap();
let jv = jv.get(i).unwrap();
compare_lazyvalue(jv, &lv);

let gv = sonic_rs::get(data, [i]).unwrap();
compare_lazyvalue(jv, &gv);
}

// fuzzing unchecked apis
unsafe {
for (i, ret) in to_array_iter_unchecked(data).enumerate() {
let lv = ret.unwrap();
let jv = jv.get(i).unwrap();
compare_lazyvalue(jv, &lv);

let gv = sonic_rs::get_unchecked(data, [i]).unwrap();
compare_lazyvalue(jv, &gv);
}
}
}
}
Err(_) => {
let _ = from_slice::<Value>(data)
.expect_err(&format!("parse invalid json {:?} failed", data));
}
}

test_type!(
data, TestStruct, Enum, Foo, String, f64, u8, u16, u32, u64, u128, i8, i16, i32, i64, i128
);
});

fn compare_lazyvalue(jv: &JValue, sv: &sonic_rs::LazyValue) {
let out = sv.as_raw_str().as_bytes();
let sv2: sonic_rs::Value = sonic_rs::from_slice(out).unwrap();
compare_value(jv, &sv2);
}

fn fuzz_use_raw(json: &[u8], sv: &sonic_rs::Value) {
let json = unsafe { std::str::from_utf8_unchecked(json) };
let mut de = Deserializer::from_str(json).use_raw();
let value: Value = Deserialize::deserialize(&mut de).unwrap();
let out = sonic_rs::to_string(&value).unwrap();
let got: Value = sonic_rs::from_str(&out).unwrap();
assert_eq!(&got, sv);
}

fn fuzz_utf8_lossy(json: &[u8], sv: &sonic_rs::Value) {
let json = unsafe { std::str::from_utf8_unchecked(json) };
let mut de = Deserializer::from_str(json).utf8_lossy();
let value: Value = Deserialize::deserialize(&mut de).unwrap();
let out = sonic_rs::to_string(&value).unwrap();
let got: Value = sonic_rs::from_str(&out).unwrap();
assert_eq!(&got, sv);
}

fn compare_value(jv: &JValue, sv: &sonic_rs::Value) -> bool {
match *jv {
JValue::Object(ref obj) => {
assert!(sv.is_object());
let sobj = sv.as_object().unwrap();
// because serde_json use a map to store object, and sonic_rs allows the repeated keys
if sobj.len() == obj.len() {
for (k, v) in obj {
let got = sobj.get(k).unwrap();
compare_value(v, got);
}
return true;
} else {
return false;
}
}
JValue::Array(ref arr) => {
assert!(sv.is_array());
let sarr = sv.as_array().unwrap();
assert!(arr.len() == sarr.len());

for (i, v) in arr.iter().enumerate() {
let got = sarr.get(i).unwrap();
compare_value(v, got);
}
}
JValue::Bool(b) => assert!(sv.is_boolean() && sv.as_bool().unwrap() == b),
JValue::Null => assert!(sv.is_null()),
JValue::Number(ref num) => {
let got = sv.as_number().unwrap();
if num.is_f64() {
let jf = num.as_f64().unwrap();
let sf = got.as_f64().unwrap();
assert_eq!(jf, sf, "jf {} sf {}", jf, sf);
}
if num.is_u64() {
assert!(num.as_u64().unwrap() == got.as_u64().unwrap());
}
if num.is_i64() {
assert!(num.as_i64().unwrap() == got.as_i64().unwrap());
}
}
JValue::String(ref s) => assert!(sv.is_str() && sv.as_str().unwrap() == s),
}
true
}

#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Foo {
name: FastStr,
id: u64,
}

#[derive(Debug, Deserialize, Serialize, Hash, Eq, PartialEq)]
enum Enum {
Zero = 0,
One = 1,
Two = 2,
}

#[derive(Debug, Deserialize, Serialize, PartialEq)]
enum FieldEnum {
Integer(i8),
Tuple((FastStr, i32)),
Struct(Foo),
Unit,
}

#[derive(Debug, Deserialize, Serialize, PartialEq)]
enum FieldlessEnum {
Tuple(),
Struct {},
Unit,
}

#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Wrapper<'a>(&'a str);

// A unit struct
#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Unit;

// A uint struct
#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Phan<T> {
phan: String,
_data: PhantomData<T>,
}

// A tuple struct
#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct Pair(i32, f32);

#[derive(Debug, Deserialize, Serialize, PartialEq)]
struct TestStruct<'a> {
fieldless: FieldlessEnum,
enummap: HashMap<Enum, FieldlessEnum>,
enum_: Enum,

// basic types
boolean: bool,
integer: i32,
float: f64,
int128: i128,
uint128: u128,
char_: char,

// string or bytes
str_: &'a str,
// bytes_: &'a [u8],
string: String,
faststr: FastStr,
#[serde(borrow)]
cow: Cow<'a, str>,

// containers
vector: Vec<u32>,
array: [u32; 1],
empty_array: [u8; 0],
map: HashMap<FastStr, f64>,
map_opkey: HashMap<Option<FastStr>, f64>,

// enum types
option: Option<String>,
fieldenum: FieldEnum,

// tuple or struct
tuple: (u64, String),
tuple_struct: Pair,
unit_struct: Unit,

#[serde(borrow)]
wrapper: Wrapper<'a>,
phan_struct: Phan<()>,
}
fuzz_target!(|data: &[u8]| sonic_rs_fuzz_data(data));
Loading
Loading