diff --git a/src/de.rs b/src/de.rs index e3effc365..dfee5eb59 100644 --- a/src/de.rs +++ b/src/de.rs @@ -155,6 +155,19 @@ impl<'de, R: Read<'de>> Deserializer { } } + /// Parse the JSON array as an iterator over values of type T. + pub fn into_array_iter(self) -> ArrayDeserializer<'de, R, T> + where + T: de::Deserialize<'de>, + { + ArrayDeserializer { + de: self, + started: false, + output: PhantomData, + lifetime: PhantomData, + } + } + /// Parse arbitrarily deep JSON structures without any consideration for /// overflowing the stack. /// @@ -2149,6 +2162,117 @@ where } } + + +////////////////////////////////////////////////////////////////////////////// + +/// Iterator that deserializes an array into multiple JSON values. +/// +/// An array deserializer can be created from any JSON deserializer using the +/// `Deserializer::into_array_iter` method. +/// +/// The top-level data should be a JSON array, but each array element can consist of any JSON +/// value. An array deserializer only needs to keep a single array element in memory, and is +/// therefore preferable over deserializing into a container type such as `Vec` when the complete +/// array is too large to fit in memory. +/// +/// ```edition2018 +/// use serde_json::{Deserializer, Value}; +/// +/// fn main() { +/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]"; +/// +/// let iter = Deserializer::from_str(data).into_array_iter::(); +/// +/// for value in iter { +/// println!("{}", value.unwrap()); +/// } +/// } +/// ``` +pub struct ArrayDeserializer<'de, R, T> { + de: Deserializer, + started: bool, // True if we have consumed the first '[' + output: PhantomData, + lifetime: PhantomData<&'de ()>, +} + +impl<'de, R, T> ArrayDeserializer<'de, R, T> +where + R: read::Read<'de>, + T: de::Deserialize<'de>, +{ + /// Create a JSON array deserializer from one of the possible serde_json + /// input sources. + /// + /// Typically it is more convenient to use one of these methods instead: + /// + /// - Deserializer::from_str(...).into_array_iter() + /// - Deserializer::from_bytes(...).into_array_iter() + /// - Deserializer::from_reader(...).into_array_iter() + pub fn new(read: R) -> Self { + ArrayDeserializer { + de: Deserializer::new(read), + started: false, + output: PhantomData, + lifetime: PhantomData, + } + } + + fn end(&mut self) -> Option> { + self.de.eat_char(); + match self.de.end() { + Ok(_) => None, + Err(e) => Some(Err(e)), + } + } + + fn next_value(&mut self) -> Option> { + match de::Deserialize::deserialize(&mut self.de) { + Ok(v) => Some(Ok(v)), + Err(e) => Some(Err(e)) + } + } +} + +impl<'de, R, T> Iterator for ArrayDeserializer<'de, R, T> +where + R: Read<'de>, + T: de::Deserialize<'de>, +{ + type Item = Result; + + fn next(&mut self) -> Option> { + match self.de.parse_whitespace() { + Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), + Ok(Some(b'[')) if !self.started => { + self.started = true; + self.de.eat_char(); + + // We have to peek at the next character here to handle an empty array. + match self.de.parse_whitespace() { + Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), + Ok(Some(b']')) => self.end(), + Ok(Some(_)) => self.next_value(), + Err(e) => Some(Err(e)), + } + }, + Ok(Some(b']')) if self.started => self.end(), + Ok(Some(b',')) if self.started => { + self.de.eat_char(); + + match self.de.parse_whitespace() { + Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))), + Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))), + Ok(Some(_)) => self.next_value(), + Err(e) => Some(Err(e)), + } + }, + Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))), + Err(e) => Some(Err(e)), + } + } +} + ////////////////////////////////////////////////////////////////////////////// fn from_trait<'de, R, T>(read: R) -> Result diff --git a/tests/array.rs b/tests/array.rs new file mode 100644 index 000000000..0938a7995 --- /dev/null +++ b/tests/array.rs @@ -0,0 +1,119 @@ +#![cfg(not(feature = "preserve_order"))] + +extern crate serde; + +#[macro_use] +extern crate serde_json; + +use serde_json::{Deserializer, Value}; + +// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740 +#[cfg_attr(rustfmt, rustfmt_skip)] +macro_rules! test_stream { + ($data:expr, $ty:ty, |$stream:ident| $test:block) => { + { + let de = Deserializer::from_str($data); + let mut $stream = de.into_array_iter::<$ty>(); + $test + } + { + let de = Deserializer::from_slice($data.as_bytes()); + let mut $stream = de.into_array_iter::<$ty>(); + $test + } + { + let mut bytes = $data.as_bytes(); + let de = Deserializer::from_reader(&mut bytes); + let mut $stream = de.into_array_iter::<$ty>(); + $test + } + }; +} + +#[test] +fn test_json_array_empty() { + let data = "[]"; + + test_stream!(data, Value, |stream| { + assert!(stream.next().is_none()); + }); +} + +#[test] +fn test_json_array_whitespace() { + let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n"; + + test_stream!(data, Value, |stream| { + assert_eq!(stream.next().unwrap().unwrap()["x"], 42); + + assert_eq!(stream.next().unwrap().unwrap()["y"], 43); + + assert!(stream.next().is_none()); + }); +} + +#[test] +fn test_json_array_truncated() { + let data = "[{\"x\":40},{\"x\":"; + + test_stream!(data, Value, |stream| { + assert_eq!(stream.next().unwrap().unwrap()["x"], 40); + + assert!(stream.next().unwrap().unwrap_err().is_eof()); + }); +} + +#[test] +fn test_json_array_primitive() { + let data = "[{}, true, 1, [], 1.0, \"hey\", null]"; + + test_stream!(data, Value, |stream| { + assert_eq!(stream.next().unwrap().unwrap(), json!({})); + + assert_eq!(stream.next().unwrap().unwrap(), true); + + assert_eq!(stream.next().unwrap().unwrap(), 1); + + assert_eq!(stream.next().unwrap().unwrap(), json!([])); + + assert_eq!(stream.next().unwrap().unwrap(), 1.0); + + assert_eq!(stream.next().unwrap().unwrap(), "hey"); + + assert_eq!(stream.next().unwrap().unwrap(), Value::Null); + + assert!(stream.next().is_none()); + }); +} + +#[test] +fn test_json_array_tailing_data() { + let data = "[]e"; + + test_stream!(data, Value, |stream| { + let second = stream.next().unwrap().unwrap_err(); + assert_eq!(second.to_string(), "trailing characters at line 1 column 3"); + }); +} + +#[test] +fn test_json_array_tailing_comma() { + let data = "[true,]"; + + test_stream!(data, Value, |stream| { + assert_eq!(stream.next().unwrap().unwrap(), true); + + let second = stream.next().unwrap().unwrap_err(); + assert_eq!(second.to_string(), "trailing comma at line 1 column 7"); + }); +} + +#[test] +fn test_json_array_eof() { + let data = ""; + + test_stream!(data, Value, |stream| { + let second = stream.next().unwrap().unwrap_err(); + assert_eq!(second.to_string(), "EOF while parsing a value at line 1 column 0"); + }); +}