diff --git a/Cargo.toml b/Cargo.toml index 122e111d..9ff28c8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ resolver = "2" [workspace.package] authors = ["Samuel Colvin "] -version = "0.7.0" +version = "0.7.1" edition = "2021" license = "MIT" keywords = ["JSON", "parsing", "deserialization", "iter"] diff --git a/crates/jiter/benches/main.rs b/crates/jiter/benches/main.rs index 383f507b..2429d7ab 100644 --- a/crates/jiter/benches/main.rs +++ b/crates/jiter/benches/main.rs @@ -4,7 +4,7 @@ use std::hint::black_box; use std::fs::File; use std::io::Read; -use jiter::{Jiter, JsonValue, LazyIndexMap, Peek}; +use jiter::{Jiter, JsonValue, LazyIndexMap, PartialMode, Peek}; use serde_json::Value; fn read_file(path: &str) -> String { @@ -258,7 +258,7 @@ fn string_array_jiter_value_owned(bench: &mut Bencher) { let json = read_file("./benches/string_array.json"); let json_data = json.as_bytes(); bench.iter(|| { - let v = JsonValue::parse_owned(black_box(json_data), false, false).unwrap(); + let v = JsonValue::parse_owned(black_box(json_data), false, PartialMode::Off).unwrap(); black_box(v) }) } @@ -267,7 +267,7 @@ fn medium_response_jiter_value_owned(bench: &mut Bencher) { let json = read_file("./benches/medium_response.json"); let json_data = json.as_bytes(); bench.iter(|| { - let v = JsonValue::parse_owned(black_box(json_data), false, false).unwrap(); + let v = JsonValue::parse_owned(black_box(json_data), false, PartialMode::Off).unwrap(); black_box(v) }) } diff --git a/crates/jiter/src/jiter.rs b/crates/jiter/src/jiter.rs index a547bd7a..fc923b74 100644 --- a/crates/jiter/src/jiter.rs +++ b/crates/jiter/src/jiter.rs @@ -3,7 +3,7 @@ use crate::number_decoder::{NumberAny, NumberFloat, NumberInt, NumberRange}; use crate::parse::{Parser, Peek}; use crate::string_decoder::{StringDecoder, StringDecoderRange, Tape}; use crate::value::{take_value_borrowed, take_value_owned, take_value_skip, JsonValue}; -use crate::{JsonError, JsonErrorType}; +use crate::{JsonError, JsonErrorType, PartialMode}; pub type JiterResult = Result; @@ -242,7 +242,7 @@ impl<'j> Jiter<'j> { &mut self.tape, DEFAULT_RECURSION_LIMIT, self.allow_inf_nan, - false, + PartialMode::Off, ) .map_err(Into::into) } @@ -289,7 +289,7 @@ impl<'j> Jiter<'j> { &mut self.tape, DEFAULT_RECURSION_LIMIT, self.allow_inf_nan, - false, + PartialMode::Off, ) .map_err(Into::into) } diff --git a/crates/jiter/src/lib.rs b/crates/jiter/src/lib.rs index dff79a68..4e46c6e0 100644 --- a/crates/jiter/src/lib.rs +++ b/crates/jiter/src/lib.rs @@ -28,4 +28,37 @@ pub use py_lossless_float::{FloatMode, LosslessFloat}; #[cfg(feature = "python")] pub use py_string_cache::{cache_clear, cache_usage, cached_py_string, pystring_fast_new, StringCacheMode}; #[cfg(feature = "python")] -pub use python::{map_json_error, PartialMode, PythonParse}; +pub use python::{map_json_error, PythonParse}; + +#[derive(Debug, Clone, Copy)] +pub enum PartialMode { + Off, + On, + TrailingStrings, +} + +impl Default for PartialMode { + fn default() -> Self { + Self::Off + } +} + +impl From for PartialMode { + fn from(mode: bool) -> Self { + if mode { + Self::On + } else { + Self::Off + } + } +} + +impl PartialMode { + pub fn is_active(self) -> bool { + !matches!(self, Self::Off) + } + + pub fn allow_trailing_str(self) -> bool { + matches!(self, Self::TrailingStrings) + } +} diff --git a/crates/jiter/src/python.rs b/crates/jiter/src/python.rs index f94c7b82..70b09042 100644 --- a/crates/jiter/src/python.rs +++ b/crates/jiter/src/python.rs @@ -15,7 +15,7 @@ use crate::parse::{Parser, Peek}; use crate::py_lossless_float::{get_decimal_type, FloatMode}; use crate::py_string_cache::{StringCacheAll, StringCacheKeys, StringCacheMode, StringMaybeCache, StringNoCache}; use crate::string_decoder::{StringDecoder, Tape}; -use crate::{JsonErrorType, LosslessFloat}; +use crate::{JsonErrorType, LosslessFloat, PartialMode}; #[derive(Default)] #[allow(clippy::struct_excessive_bools)] @@ -234,19 +234,6 @@ impl<'j, StringCache: StringMaybeCache, KeyCheck: MaybeKeyCheck, ParseNumber: Ma } } -#[derive(Debug, Clone, Copy)] -pub enum PartialMode { - Off, - On, - TrailingStrings, -} - -impl Default for PartialMode { - fn default() -> Self { - Self::Off - } -} - const PARTIAL_ERROR: &str = "Invalid partial mode, should be `'off'`, `'on'`, `'trailing-strings'` or a `bool`"; impl<'py> FromPyObject<'py> for PartialMode { @@ -266,26 +253,6 @@ impl<'py> FromPyObject<'py> for PartialMode { } } -impl From for PartialMode { - fn from(mode: bool) -> Self { - if mode { - Self::On - } else { - Self::Off - } - } -} - -impl PartialMode { - fn is_active(self) -> bool { - !matches!(self, Self::Off) - } - - fn allow_trailing_str(self) -> bool { - matches!(self, Self::TrailingStrings) - } -} - trait MaybeKeyCheck: Default { fn check(&mut self, key: &str, index: usize) -> JsonResult<()>; } diff --git a/crates/jiter/src/value.rs b/crates/jiter/src/value.rs index 510e6793..df695e41 100644 --- a/crates/jiter/src/value.rs +++ b/crates/jiter/src/value.rs @@ -10,6 +10,7 @@ use crate::lazy_index_map::LazyIndexMap; use crate::number_decoder::{NumberAny, NumberInt, NumberRange}; use crate::parse::{Parser, Peek}; use crate::string_decoder::{StringDecoder, StringDecoderRange, StringOutput, Tape}; +use crate::PartialMode; /// Enum representing a JSON value. #[derive(Clone, Debug, PartialEq)] @@ -56,10 +57,14 @@ impl<'j> JsonValue<'j> { /// Parse a JSON enum from a byte slice, returning a borrowed version of the enum - e.g. strings can be /// references into the original byte slice. pub fn parse(data: &'j [u8], allow_inf_nan: bool) -> Result { - Self::parse_with_config(data, allow_inf_nan, false) + Self::parse_with_config(data, allow_inf_nan, PartialMode::Off) } - pub fn parse_with_config(data: &'j [u8], allow_inf_nan: bool, allow_partial: bool) -> Result { + pub fn parse_with_config( + data: &'j [u8], + allow_inf_nan: bool, + allow_partial: PartialMode, + ) -> Result { let mut parser = Parser::new(data); let mut tape = Tape::default(); @@ -72,7 +77,7 @@ impl<'j> JsonValue<'j> { allow_inf_nan, allow_partial, )?; - if !allow_partial { + if !allow_partial.is_active() { parser.finish()?; } Ok(v) @@ -105,7 +110,7 @@ fn value_static(v: JsonValue<'_>) -> JsonValue<'static> { impl JsonValue<'static> { /// Parse a JSON enum from a byte slice, returning an owned version of the enum. - pub fn parse_owned(data: &[u8], allow_inf_nan: bool, allow_partial: bool) -> Result { + pub fn parse_owned(data: &[u8], allow_inf_nan: bool, allow_partial: PartialMode) -> Result { let mut parser = Parser::new(data); let mut tape = Tape::default(); @@ -129,7 +134,7 @@ pub(crate) fn take_value_borrowed<'j>( tape: &mut Tape, recursion_limit: u8, allow_inf_nan: bool, - allow_partial: bool, + allow_partial: PartialMode, ) -> JsonResult> { take_value( peek, @@ -148,7 +153,7 @@ pub(crate) fn take_value_owned<'j>( tape: &mut Tape, recursion_limit: u8, allow_inf_nan: bool, - allow_partial: bool, + allow_partial: PartialMode, ) -> JsonResult> { take_value( peek, @@ -167,9 +172,10 @@ fn take_value<'j, 's>( tape: &mut Tape, recursion_limit: u8, allow_inf_nan: bool, - allow_partial: bool, + allow_partial: PartialMode, create_cow: &impl Fn(StringOutput<'_, 'j>) -> Cow<'s, str>, ) -> JsonResult> { + let partial_active = allow_partial.is_active(); match peek { Peek::True => { parser.consume_true()?; @@ -184,14 +190,15 @@ fn take_value<'j, 's>( Ok(JsonValue::Null) } Peek::String => { - let s: StringOutput<'_, 'j> = parser.consume_string::(tape, allow_partial)?; + let s: StringOutput<'_, 'j> = + parser.consume_string::(tape, allow_partial.allow_trailing_str())?; Ok(JsonValue::Str(create_cow(s))) } Peek::Array => { let array = Arc::new(SmallVec::new()); let peek_first = match parser.array_first() { Ok(Some(peek)) => peek, - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), Ok(None) | Err(_) => return Ok(JsonValue::Array(array)), }; take_value_recursive( @@ -210,7 +217,7 @@ fn take_value<'j, 's>( let object = Arc::new(LazyIndexMap::new()); let first_key = match parser.object_first::(tape) { Ok(Some(first_key)) => first_key, - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => return Ok(JsonValue::Object(object)), }; let first_key = create_cow(first_key); @@ -228,7 +235,7 @@ fn take_value<'j, 's>( allow_partial, create_cow, ), - Err(e) if !(allow_partial && e.allowed_if_partial()) => Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => Err(e), _ => Ok(JsonValue::Object(object)), } } @@ -269,12 +276,13 @@ fn take_value_recursive<'j, 's>( tape: &mut Tape, recursion_limit: u8, allow_inf_nan: bool, - allow_partial: bool, + allow_partial: PartialMode, create_cow: &impl Fn(StringOutput<'_, 'j>) -> Cow<'s, str>, ) -> JsonResult> { let recursion_limit: usize = recursion_limit.into(); let mut recursion_stack: SmallVec<[RecursedValue; 8]> = SmallVec::new(); + let partial_active = allow_partial.is_active(); macro_rules! push_recursion { ($next_peek:expr, $value:expr) => { @@ -296,7 +304,7 @@ fn take_value_recursive<'j, 's>( Peek::False => parser.consume_false().map(|()| JsonValue::Bool(false)), Peek::Null => parser.consume_null().map(|()| JsonValue::Null), Peek::String => parser - .consume_string::(tape, allow_partial) + .consume_string::(tape, allow_partial.allow_trailing_str()) .map(|s| JsonValue::Str(create_cow(s))), Peek::Array => { let array = Arc::new(SmallVec::new()); @@ -306,7 +314,7 @@ fn take_value_recursive<'j, 's>( // immediately jump to process the first value in the array continue 'recursion; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; Ok(JsonValue::Array(array)) @@ -325,10 +333,10 @@ fn take_value_recursive<'j, 's>( ); continue 'recursion; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }, - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; Ok(JsonValue::Object(object)) @@ -360,7 +368,7 @@ fn take_value_recursive<'j, 's>( // array continuing continue; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; @@ -371,7 +379,7 @@ fn take_value_recursive<'j, 's>( Arc::get_mut(&mut array).expect("sole writer to value").push(value); array } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => { let RecursedValue::Array(array) = current_recursion else { unreachable!("known to be in array recursion"); @@ -391,7 +399,7 @@ fn take_value_recursive<'j, 's>( Peek::False => parser.consume_false().map(|()| JsonValue::Bool(false)), Peek::Null => parser.consume_null().map(|()| JsonValue::Null), Peek::String => parser - .consume_string::(tape, allow_partial) + .consume_string::(tape, allow_partial.allow_trailing_str()) .map(|s| JsonValue::Str(create_cow(s))), Peek::Array => { let array = Arc::new(SmallVec::new()); @@ -401,7 +409,7 @@ fn take_value_recursive<'j, 's>( // immediately jump to process the first value in the array continue 'recursion; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; Ok(JsonValue::Array(array)) @@ -420,10 +428,10 @@ fn take_value_recursive<'j, 's>( ); continue 'recursion; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }, - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; Ok(JsonValue::Object(object)) @@ -460,11 +468,11 @@ fn take_value_recursive<'j, 's>( peek = next_peek; continue; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), } @@ -475,7 +483,7 @@ fn take_value_recursive<'j, 's>( Arc::get_mut(&mut partial).expect("sole writer").insert(next_key, value); partial } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => { let RecursedValue::Object { partial, .. } = current_recursion else { unreachable!("known to be in object recursion"); @@ -506,7 +514,7 @@ fn take_value_recursive<'j, 's>( current_recursion = RecursedValue::Array(array); break next_peek; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), } JsonValue::Array(array) @@ -523,10 +531,10 @@ fn take_value_recursive<'j, 's>( }; break next_peek; } - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), }, - Err(e) if !(allow_partial && e.allowed_if_partial()) => return Err(e), + Err(e) if !(partial_active && e.allowed_if_partial()) => return Err(e), _ => (), } diff --git a/crates/jiter/tests/main.rs b/crates/jiter/tests/main.rs index 516b7892..c429a3f6 100644 --- a/crates/jiter/tests/main.rs +++ b/crates/jiter/tests/main.rs @@ -10,7 +10,7 @@ use smallvec::smallvec; use jiter::{ Jiter, JiterErrorType, JiterResult, JsonErrorType, JsonType, JsonValue, LazyIndexMap, LinePosition, NumberAny, - NumberInt, Peek, + NumberInt, PartialMode, Peek, }; fn json_vec(jiter: &mut Jiter, peek: Option) -> JiterResult> { @@ -1286,7 +1286,7 @@ fn jiter_invalid_numbers_expected_some_value() { fn value_owned() -> JsonValue<'static> { let s = r#" { "int": 1, "const": true, "float": 1.2, "array": [1, false, null]}"#.to_string(); - JsonValue::parse_owned(s.as_bytes(), false, false).unwrap() + JsonValue::parse_owned(s.as_bytes(), false, PartialMode::Off).unwrap() } #[test] @@ -1655,9 +1655,30 @@ fn test_unicode_roundtrip() { } #[test] -fn test_value_partial_array() { +fn test_value_partial_array_on() { let json_bytes = br#"["string", true, null, 1, "foo"#; - let value = JsonValue::parse_with_config(json_bytes, false, true).unwrap(); + let value = JsonValue::parse_with_config(json_bytes, false, PartialMode::On).unwrap(); + assert_eq!( + value, + JsonValue::Array(Arc::new(smallvec![ + JsonValue::Str("string".into()), + JsonValue::Bool(true), + JsonValue::Null, + JsonValue::Int(1), + ])) + ); + // test all position in the string + for i in 1..json_bytes.len() { + let partial_json = &json_bytes[..i]; + let value = JsonValue::parse_with_config(partial_json, false, PartialMode::On).unwrap(); + assert!(matches!(value, JsonValue::Array(_))); + } +} + +#[test] +fn test_value_partial_array_trailing_strings() { + let json_bytes = br#"["string", true, null, 1, "foo"#; + let value = JsonValue::parse_with_config(json_bytes, false, PartialMode::TrailingStrings).unwrap(); assert_eq!( value, JsonValue::Array(Arc::new(smallvec![ @@ -1671,7 +1692,7 @@ fn test_value_partial_array() { // test all position in the string for i in 1..json_bytes.len() { let partial_json = &json_bytes[..i]; - let value = JsonValue::parse_with_config(partial_json, false, true).unwrap(); + let value = JsonValue::parse_with_config(partial_json, false, PartialMode::TrailingStrings).unwrap(); assert!(matches!(value, JsonValue::Array(_))); } } @@ -1679,7 +1700,7 @@ fn test_value_partial_array() { #[test] fn test_value_partial_object() { let json_bytes = br#"{"a": "value", "b": true, "c": false, "d": null, "e": 1, "f": 2.22, "g": ["#; - let value = JsonValue::parse_with_config(json_bytes, false, true).unwrap(); + let value = JsonValue::parse_with_config(json_bytes, false, PartialMode::TrailingStrings).unwrap(); let obj = match value { JsonValue::Object(obj) => obj, _ => panic!("expected object"), @@ -1699,7 +1720,7 @@ fn test_value_partial_object() { // test all position in the string for i in 1..json_bytes.len() { let partial_json = &json_bytes[..i]; - let value = JsonValue::parse_with_config(partial_json, false, true).unwrap(); + let value = JsonValue::parse_with_config(partial_json, false, PartialMode::TrailingStrings).unwrap(); assert!(matches!(value, JsonValue::Object(_))); } } @@ -1712,7 +1733,7 @@ fn test_partial_pass1() { // test all position in the string for i in 1..json_bytes.len() { let partial_json = &json_bytes[..i]; - let value = JsonValue::parse_with_config(partial_json, false, true).unwrap(); + let value = JsonValue::parse_with_config(partial_json, false, PartialMode::TrailingStrings).unwrap(); assert!(matches!(value, JsonValue::Array(_))); } } @@ -1725,7 +1746,7 @@ fn test_partial_medium_response() { // test all position in the string for i in 1..json_bytes.len() { let partial_json = &json_bytes[..i]; - let value = JsonValue::parse_with_config(partial_json, false, true).unwrap(); + let value = JsonValue::parse_with_config(partial_json, false, PartialMode::TrailingStrings).unwrap(); assert!(matches!(value, JsonValue::Object(_))); } }