Skip to content

Commit

Permalink
Auto merge of #31 - behnam:serde, r=mbrubeck
Browse files Browse the repository at this point in the history
Add serde support and bump version to 0.3.0

Add `with_serde` feature, which implements serde for the new `struct
Level`, mainly used in `servo`, supporting serde `0.8`, `0.9` and `1.0`.

Add tests for the `with_serde` feature.  The `serde_tests` modules only
works for `serde:>=1.0`, though.

`servo` has dependency on the loose implementation of `visual_runs()`,
which couldn not be improved without breaking the API, as it needs more
information to process the levels correctly, and the call-site in
`servo` does not have all the information needed and needs a non-trivial
change to work with the new improved version. Therefore, I have moved the
old version to a `deprecated` module, to be used for now until `servo`
is fixed and we drop the old implementation.

Bump version to `0.3.0`, as we now ready for a release: can build
`servo` (patch ready) and `idna` crates.

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/unicode-bidi/31)
<!-- Reviewable:end -->
  • Loading branch information
bors-servo authored May 16, 2017
2 parents 0fa0cfe + efa3b2a commit 011d7cf
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 11 deletions.
9 changes: 8 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "unicode-bidi"
version = "0.2.6"
version = "0.3.0"
authors = ["The Servo Project Developers"]
license = "MIT / Apache-2.0"
description = "Implementation of the Unicode Bidirectional Algorithm"
Expand All @@ -15,3 +15,10 @@ name = "unicode_bidi"

[dependencies]
matches = "0.1"
serde = {version = ">=0.8, <2.0", optional = true}
serde_test = {version = ">=0.8, <2.0", optional = true}
serde_derive = {version = ">=0.8, <2.0", optional = true}

[features]
default = []
with_serde = ["serde", "serde_test", "serde_derive"]
88 changes: 88 additions & 0 deletions src/deprecated.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright 2015 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! This module holds deprecated assets only.

use super::*;

/// Find the level runs within a line and return them in visual order.
///
/// NOTE: This implementation is incomplete. The algorithm needs information about the text,
/// including original BidiClass property of each character, to be able to perform correctly.
/// Please see [`BidiInfo::visual_runs()`](../struct.BidiInfo.html#method.visual_runs) for the
/// improved implementation.
///
/// `line` is a range of bytes indices within `levels`.
///
/// http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels
#[deprecated(since="0.3.0", note="please use `BidiInfo::visual_runs()` instead.")]
pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> {
assert!(line.start <= levels.len());
assert!(line.end <= levels.len());

let mut runs = Vec::new();

// Find consecutive level runs.
let mut start = line.start;
let mut level = levels[start];
let mut min_level = level;
let mut max_level = level;

for i in (start + 1)..line.end {
let new_level = levels[i];
if new_level != level {
// End of the previous run, start of a new one.
runs.push(start..i);
start = i;
level = new_level;

min_level = min(level, min_level);
max_level = max(level, max_level);
}
}
runs.push(start..line.end);

let run_count = runs.len();

// Re-order the odd runs.
// http://www.unicode.org/reports/tr9/#L2

// Stop at the lowest *odd* level.
min_level = min_level.new_lowest_ge_rtl().expect("Level error");

while max_level >= min_level {
// Look for the start of a sequence of consecutive runs of max_level or higher.
let mut seq_start = 0;
while seq_start < run_count {
if levels[runs[seq_start].start] < max_level {
seq_start += 1;
continue;
}

// Found the start of a sequence. Now find the end.
let mut seq_end = seq_start + 1;
while seq_end < run_count {
if levels[runs[seq_end].start] < max_level {
break;
}
seq_end += 1;
}

// Reverse the runs within this sequence.
runs[seq_start..seq_end].reverse();

seq_start = seq_end;
}
max_level
.lower(1)
.expect("Lowering embedding level below zero");
}

runs
}
30 changes: 30 additions & 0 deletions src/level.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

//! Bidi Embedding Level
//!
//! See [`Level`](struct.Level.html) for more details.
//!
//! http://www.unicode.org/reports/tr9/#BD2

use std::convert::{From, Into};
Expand All @@ -26,6 +28,7 @@ use super::char_data::BidiClass;
///
/// http://www.unicode.org/reports/tr9/#BD2
#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[cfg_attr(feature = "with_serde", derive(Serialize, Deserialize))]
pub struct Level(u8);

pub const LTR_LEVEL: Level = Level(0);
Expand Down Expand Up @@ -338,3 +341,30 @@ mod tests {
assert_ne!(Level::vec(&[0, 1, 4, 125]), vec!["0", "1", "5", "125"]);
}
}

#[cfg(all(feature = "with_serde", test))]
mod serde_tests {
use serde_test::{Token, assert_tokens};
use super::*;

#[test]
fn test_statics() {
assert_tokens(
&Level::ltr(),
&[Token::NewtypeStruct { name: "Level" }, Token::U8(0)],
);
assert_tokens(
&Level::rtl(),
&[Token::NewtypeStruct { name: "Level" }, Token::U8(1)],
);
}

#[test]
fn test_new() {
let level = Level::new(42).unwrap();
assert_tokens(
&level,
&[Token::NewtypeStruct { name: "Level" }, Token::U8(42)],
);
}
}
73 changes: 63 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@
#[macro_use]
extern crate matches;

#[cfg(feature = "with_serde")]
#[macro_use]
extern crate serde_derive;

#[cfg(all(feature = "with_serde", test))]
extern crate serde_test;

pub mod deprecated;
pub mod format_chars;
pub mod level;

Expand Down Expand Up @@ -156,8 +164,8 @@ impl<'text> InitialInfo<'text> {
match isolate_stack.last() {
Some(&start) => {
if original_classes[start] == FSI {
// X5c. If the first strong character between FSI and its matching PDI
// is R or AL, treat it as RLI. Otherwise, treat it as LRI.
// X5c. If the first strong character between FSI and its matching
// PDI is R or AL, treat it as RLI. Otherwise, treat it as LRI.
for j in 0..chars::FSI.len_utf8() {
original_classes[start + j] =
if class == L { LRI } else { RLI };
Expand All @@ -166,8 +174,9 @@ impl<'text> InitialInfo<'text> {
}
None => {
if para_level.is_none() {
// P2. Find the first character of type L, AL, or R, while skipping any
// characters between an isolate initiator and its matching PDI.
// P2. Find the first character of type L, AL, or R, while skipping
// any characters between an isolate initiator and its matching
// PDI.
para_level = Some(
if class != L {
Level::rtl()
Expand Down Expand Up @@ -208,8 +217,9 @@ impl<'text> InitialInfo<'text> {

/// Bidi information of the text
///
/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a character
/// is multiple bytes wide, then its class and level will appear multiple times in these vectors.
/// The `original_classes` and `levels` vectors are indexed by byte offsets into the text. If a
/// character is multiple bytes wide, then its class and level will appear multiple times in these
/// vectors.
// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
#[derive(Debug, PartialEq)]
pub struct BidiInfo<'text> {
Expand All @@ -232,8 +242,8 @@ pub struct BidiInfo<'text> {
impl<'text> BidiInfo<'text> {
/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
///
/// TODO: In early steps, check for special cases that allow later steps to be skipped. like text
/// that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
///
/// TODO: Support auto-RTL base direction
pub fn new(text: &str, default_para_level: Option<Level>) -> BidiInfo {
Expand Down Expand Up @@ -358,9 +368,8 @@ impl<'text> BidiInfo<'text> {
}
}

let mut runs = Vec::new();

// Find consecutive level runs.
let mut runs = Vec::new();
let mut start = line.start;
let mut level = levels[start];
let mut min_level = level;
Expand Down Expand Up @@ -440,6 +449,7 @@ fn assign_levels_to_removed_chars(para_level: Level, classes: &[BidiClass], leve
}
}


#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -695,3 +705,46 @@ mod tests {
);
}
}


#[cfg(all(feature = "with_serde", test))]
mod serde_tests {
use serde_test::{Token, assert_tokens};
use super::*;

#[test]
fn test_levels() {
let text = "abc אבג";
let bidi_info = BidiInfo::new(text, None);
let levels = bidi_info.levels;
assert_eq!(text.as_bytes().len(), 10);
assert_eq!(levels.len(), 10);
assert_tokens(
&levels,
&[
Token::Seq { len: Some(10) },
Token::NewtypeStruct { name: "Level" },
Token::U8(0),
Token::NewtypeStruct { name: "Level" },
Token::U8(0),
Token::NewtypeStruct { name: "Level" },
Token::U8(0),
Token::NewtypeStruct { name: "Level" },
Token::U8(0),
Token::NewtypeStruct { name: "Level" },
Token::U8(1),
Token::NewtypeStruct { name: "Level" },
Token::U8(1),
Token::NewtypeStruct { name: "Level" },
Token::U8(1),
Token::NewtypeStruct { name: "Level" },
Token::U8(1),
Token::NewtypeStruct { name: "Level" },
Token::U8(1),
Token::NewtypeStruct { name: "Level" },
Token::U8(1),
Token::SeqEnd,
],
);
}
}

0 comments on commit 011d7cf

Please sign in to comment.