Skip to content

Commit

Permalink
Add an ArrayDeserializer to read a JSON array as a stream
Browse files Browse the repository at this point in the history
This mimics the StreamDeserializer API and implements issue serde-rs#404. Unlike
the StreamDeserializer, the ArrayDeserializer struct itself does not
keep track of the type of the array's elements, instead the next()
itself is generic to support deserialization of arrays with values of
different types. Unfortunately, this means we can't implement the
Iterator trait.
  • Loading branch information
Yorhel committed Mar 25, 2019
1 parent bb58e6c commit 55f5929
Show file tree
Hide file tree
Showing 2 changed files with 231 additions and 0 deletions.
112 changes: 112 additions & 0 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,15 @@ impl<'de, R: Read<'de>> Deserializer<R> {
}
}

/// Parse the JSON array as a stream of values.
pub fn into_array(self) -> ArrayDeserializer<'de, R> {
ArrayDeserializer {
de: self,
started: false,
lifetime: PhantomData,
}
}

/// Parse arbitrarily deep JSON structures without any consideration for
/// overflowing the stack.
///
Expand Down Expand Up @@ -2169,6 +2178,109 @@ where
}
}



//////////////////////////////////////////////////////////////////////////////

/// A streaming JSON array deserializer.
///
/// An array deserializer can be created from any JSON deserializer using the
/// `Deserializer::into_array` method.
///
/// The top-level data should be a JSON array, but each array element can consist of any JSON
/// value. An array deserializer only needs to keep a single array element in memory, and is
/// therefore preferable over deserializing into a container type such as `Vec` when the complete
/// array is too large to fit in memory.
///
/// ```edition2018
/// use serde_json::{Deserializer, Value};
///
/// fn main() {
/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]";
///
/// let mut iter = Deserializer::from_str(data).into_array();
///
/// while let Some(value) = iter.next::<Value>() {
/// println!("{}", value.unwrap());
/// }
/// }
/// ```
pub struct ArrayDeserializer<'de, R> {
de: Deserializer<R>,
started: bool, // True if we have consumed the first '['
lifetime: PhantomData<&'de ()>,
}

impl<'de, R> ArrayDeserializer<'de, R>
where
R: read::Read<'de>,
{
/// Create a JSON array deserializer from one of the possible serde_json
/// input sources.
///
/// Typically it is more convenient to use one of these methods instead:
///
/// - Deserializer::from_str(...).into_array()
/// - Deserializer::from_bytes(...).into_array()
/// - Deserializer::from_reader(...).into_array()
pub fn new(read: R) -> Self {
ArrayDeserializer {
de: Deserializer::new(read),
started: false,
lifetime: PhantomData,
}
}

fn end<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
self.de.eat_char();
match self.de.end() {
Ok(_) => None,
Err(e) => Some(Err(e)),
}
}

fn next_value<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
match de::Deserialize::deserialize(&mut self.de) {
Ok(v) => Some(Ok(v)),
Err(e) => Some(Err(e))
}
}

/// Return the next element from the array. Returns None if there are no more elements.
pub fn next<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b'[')) if !self.started => {
self.started = true;
self.de.eat_char();

// We have to peek at the next character here to handle an empty array.
match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b']')) => self.end(),
Ok(Some(_)) => self.next_value(),
Err(e) => Some(Err(e)),
}
},
Ok(Some(b']')) if self.started => self.end(),
Ok(Some(b',')) if self.started => {
self.de.eat_char();

match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))),
Ok(Some(_)) => self.next_value(),
Err(e) => Some(Err(e)),
}
},
Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))),
Err(e) => Some(Err(e)),
}
}
}



//////////////////////////////////////////////////////////////////////////////

fn from_trait<'de, R, T>(read: R) -> Result<T>
Expand Down
119 changes: 119 additions & 0 deletions tests/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#![cfg(not(feature = "preserve_order"))]

extern crate serde;

#[macro_use]
extern crate serde_json;

use serde_json::{Deserializer, Value};

// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740
#[cfg_attr(rustfmt, rustfmt_skip)]
macro_rules! test_stream {
($data:expr, |$stream:ident| $test:block) => {
{
let de = Deserializer::from_str($data);
let mut $stream = de.into_array();
$test
}
{
let de = Deserializer::from_slice($data.as_bytes());
let mut $stream = de.into_array();
$test
}
{
let mut bytes = $data.as_bytes();
let de = Deserializer::from_reader(&mut bytes);
let mut $stream = de.into_array();
$test
}
};
}

#[test]
fn test_json_array_empty() {
let data = "[]";

test_stream!(data, |stream| {
assert!(stream.next::<Value>().is_none());
});
}

#[test]
fn test_json_array_whitespace() {
let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n";

test_stream!(data, |stream| {
assert_eq!(stream.next::<Value>().unwrap().unwrap()["x"], 42);

assert_eq!(stream.next::<Value>().unwrap().unwrap()["y"], 43);

assert!(stream.next::<Value>().is_none());
});
}

#[test]
fn test_json_array_truncated() {
let data = "[{\"x\":40},{\"x\":";

test_stream!(data, |stream| {
assert_eq!(stream.next::<Value>().unwrap().unwrap()["x"], 40);

assert!(stream.next::<Value>().unwrap().unwrap_err().is_eof());
});
}

#[test]
fn test_json_array_primitive() {
let data = "[{}, true, 1, [], 1.0, \"hey\", null]";

test_stream!(data, |stream| {
assert_eq!(stream.next::<Value>().unwrap().unwrap(), json!({}));

assert_eq!(stream.next::<bool>().unwrap().unwrap(), true);

assert_eq!(stream.next::<u32>().unwrap().unwrap(), 1);

assert_eq!(stream.next::<Value>().unwrap().unwrap(), json!([]));

assert_eq!(stream.next::<f32>().unwrap().unwrap(), 1.0);

assert_eq!(stream.next::<String>().unwrap().unwrap(), "hey");

assert_eq!(stream.next::<Value>().unwrap().unwrap(), Value::Null);

assert!(stream.next::<Value>().is_none());
});
}

#[test]
fn test_json_array_tailing_data() {
let data = "[]e";

test_stream!(data, |stream| {
let second = stream.next::<Value>().unwrap().unwrap_err();
assert_eq!(second.to_string(), "trailing characters at line 1 column 3");
});
}

#[test]
fn test_json_array_tailing_comma() {
let data = "[true,]";

test_stream!(data, |stream| {
assert_eq!(stream.next::<Value>().unwrap().unwrap(), true);

let second = stream.next::<Value>().unwrap().unwrap_err();
assert_eq!(second.to_string(), "trailing comma at line 1 column 7");
});
}

#[test]
fn test_json_array_eof() {
let data = "";

test_stream!(data, |stream| {
let second = stream.next::<Value>().unwrap().unwrap_err();
assert_eq!(second.to_string(), "EOF while parsing a value at line 1 column 0");
});
}

0 comments on commit 55f5929

Please sign in to comment.