From 6a3366f4fec70a1af7f141d2003af35fe4f7e479 Mon Sep 17 00:00:00 2001 From: kirk Date: Wed, 20 Nov 2024 15:34:55 +0000 Subject: [PATCH 1/3] separate type bounds and apply clippy lint fixes --- Cargo.toml | 3 + derive/Cargo.toml | 3 + src/collections/ordered_array_like.rs | 18 ++- src/collections/rope.rs | 43 +++--- src/lib.rs | 187 +++++++++++++++----------- 5 files changed, 149 insertions(+), 105 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 45c99b3..b94bc27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,3 +28,6 @@ structdiff-derive = { path = "derive", version = "=0.7.1" } bincode = "1.3.3" assert_unordered = "0.3.5" nanorand = { version = "0.7.0" } + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(unused)'] } \ No newline at end of file diff --git a/derive/Cargo.toml b/derive/Cargo.toml index 8e31705..ad028f9 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -19,3 +19,6 @@ serde = { version = "^1.0.0", optional = true, features = ["derive "serde" = ["dep:serde"] "debug_diffs" = [] "generated_setters" = [] + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(unused)'] } \ No newline at end of file diff --git a/src/collections/ordered_array_like.rs b/src/collections/ordered_array_like.rs index 0f7fc4f..9e3fc2c 100644 --- a/src/collections/ordered_array_like.rs +++ b/src/collections/ordered_array_like.rs @@ -24,7 +24,6 @@ pub fn hirschberg<'src, 'target: 'src, T: Clone + PartialEq + 'target>( source_end: source.len(), }, ) - .into_iter() .collect::>() { empty if empty.is_empty() => None, @@ -52,7 +51,6 @@ pub fn levenshtein<'src, 'target: 'src, T: Clone + PartialEq + 'target>( source_end: source.len(), }, ) - .into_iter() .collect::>() { empty if empty.is_empty() => None, @@ -314,8 +312,8 @@ fn hirschberg_impl<'src, 'target: 'src, T: Clone + PartialEq + 'target>( (false, true) => { let iter: Box> = Box::new( target[target_start..target_end] - .into_iter() - .map(|a| *a) + .iter() + .copied() .enumerate() .map(|(i, v)| { let idx = source_end + i; @@ -367,8 +365,8 @@ fn hirschberg_impl<'src, 'target: 'src, T: Clone + PartialEq + 'target>( .unwrap(); let left = hirschberg_impl( - &target, - &source, + target, + source, Indices { target_end: target_split_index, source_end: source_split_index, @@ -377,8 +375,8 @@ fn hirschberg_impl<'src, 'target: 'src, T: Clone + PartialEq + 'target>( ); let right = hirschberg_impl( - &target, - &source, + target, + source, Indices { target_start: target_split_index, source_start: source_split_index, @@ -561,8 +559,8 @@ fn levenshtein_impl<'src, 'target: 'src, T: Clone + PartialEq + 'target>( changelist_from_change_table( table, - &target, - &source, + target, + source, Indices { target_start, target_end, diff --git a/src/collections/rope.rs b/src/collections/rope.rs index c24691b..42622b9 100644 --- a/src/collections/rope.rs +++ b/src/collections/rope.rs @@ -82,15 +82,15 @@ impl From for Key { } } -impl Into for Key { - fn into(self) -> usize { - self.0.load(Relaxed) +impl From for usize { + fn from(val: Key) -> Self { + val.0.load(Relaxed) } } -impl Into for &Key { - fn into(self) -> usize { - self.0.load(Relaxed) +impl From<&Key> for usize { + fn from(val: &Key) -> Self { + val.0.load(Relaxed) } } @@ -127,8 +127,7 @@ impl Index for Rope { .iter() .skip_while(|(k, content)| Into::::into(*k) + content.len() < index + 1) .next() - .map(|(k, content)| content.get(index - Into::::into(k))) - .flatten() + .and_then(|(k, content)| content.get(index - Into::::into(k))) .unwrap() } } @@ -139,8 +138,7 @@ impl IndexMut for Rope { .iter_mut() .skip_while(|(k, content)| Into::::into(*k) + content.len() < index + 1) .next() - .map(|(k, content)| content.get_mut(index - Into::::into(k))) - .flatten() + .and_then(|(k, content)| content.get_mut(index - Into::::into(k))) .unwrap() } } @@ -173,7 +171,13 @@ impl<'rope, T: 'rope> Iterator for Iter<'rope, T> { .unwrap_or_default(); while new_in_key < max_in_slot { - if let Some(_) = self.self_ref.0.get(key).and_then(|v| v.get(new_in_key)) { + if self + .self_ref + .0 + .get(key) + .and_then(|v| v.get(new_in_key)) + .is_some() + { self.key = Into::::into(key); self.in_key = new_in_key; return ret; @@ -238,11 +242,11 @@ impl<'rope, T: 'rope> IntoIterator for &'rope Rope { impl FromIterator for Rope { fn from_iter>(iter: C) -> Self { - let mut iter = iter.into_iter(); + let iter = iter.into_iter(); let mut counter = 0; let mut current = VecDeque::with_capacity(MAX_SLOT_SIZE); let mut map = BTreeMap::new(); - while let Some(item) = iter.next() { + for item in iter { current.push_back(item); counter += 1; if counter % DEF_SLOT_SIZE == 0 { @@ -261,6 +265,12 @@ impl FromIterator for Rope { } } +impl Default for Rope { + fn default() -> Self { + Self::new() + } +} + impl Rope { pub fn new() -> Self { Self(BTreeMap::from([( @@ -309,8 +319,7 @@ impl Rope { let prev_high_index = self .0 .range(..key) - .rev() - .next() + .next_back() .map(|(k, _)| k.clone()) .unwrap_or_default(); let keys: Vec = self @@ -323,7 +332,7 @@ impl Rope { let mut hold = VecDeque::::with_capacity(0); for key in keys.iter() { - let entry = self.0.get_mut(&key).unwrap(); + let entry = self.0.get_mut(key).unwrap(); if entry.is_empty() { continue; } @@ -358,7 +367,7 @@ impl Rope { } // take the empty holder back and leave the values in the map entry - std::mem::swap(self.0.get_mut(&key).unwrap(), &mut hold); + std::mem::swap(self.0.get_mut(key).unwrap(), &mut hold); } self.0.retain(|_, v| !v.is_empty()); diff --git a/src/lib.rs b/src/lib.rs index 1ad6dc1..a8a94ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,92 +8,123 @@ pub use structdiff_derive::Difference; pub mod collections; +#[cfg(all(feature = "nanoserde", feature = "serde", feature = "debug_diffs"))] +pub(crate) mod __private { + use super::*; + pub trait StructDiffOwnedBound: + SerBin + DeBin + Serialize + DeserializeOwned + Clone + std::fmt::Debug + { + } + impl + StructDiffOwnedBound for T + { + } + + pub trait StructDiffRefBound: SerBin + Serialize + Clone + std::fmt::Debug {} + impl StructDiffRefBound for T {} +} + +#[cfg(all(feature = "nanoserde", not(feature = "serde"), feature = "debug_diffs"))] +pub(crate) mod __private { + use super::*; + + pub trait StructDiffOwnedBound: SerBin + DeBin + Clone + std::fmt::Debug {} + impl StructDiffOwnedBound for T {} + + pub trait StructDiffRefBound: SerBin + Clone + std::fmt::Debug {} + impl StructDiffRefBound for T {} +} + +#[cfg(all(feature = "serde", not(feature = "nanoserde"), feature = "debug_diffs"))] +pub(crate) mod __private { + use super::*; + + pub trait StructDiffOwnedBound: Serialize + DeserializeOwned + Clone + std::fmt::Debug {} + impl StructDiffOwnedBound for T {} + + pub trait StructDiffRefBound: Serialize + Clone + std::fmt::Debug {} + impl StructDiffRefBound for T {} +} + +#[cfg(all( + not(feature = "serde"), + not(feature = "nanoserde"), + feature = "debug_diffs" +))] +pub(crate) mod __private { + use super::*; + + pub trait StructDiffOwnedBound: Clone + std::fmt::Debug {} + impl StructDiffOwnedBound for T {} + + pub trait StructDiffRefBound: Clone + std::fmt::Debug {} + impl StructDiffRefBound for T {} +} + +#[cfg(all(feature = "nanoserde", feature = "serde", not(feature = "debug_diffs")))] +pub(crate) mod __private { + use super::*; + pub trait StructDiffOwnedBound: SerBin + DeBin + Serialize + DeserializeOwned + Clone {} + impl StructDiffOwnedBound for T {} + + pub trait StructDiffRefBound: SerBin + Serialize + Clone {} + impl StructDiffRefBound for T {} +} + +#[cfg(all( + feature = "nanoserde", + not(feature = "serde"), + not(feature = "debug_diffs") +))] +pub(crate) mod __private { + use super::*; + + pub trait StructDiffOwnedBound: SerBin + DeBin + Clone {} + impl StructDiffOwnedBound for T {} + + pub trait StructDiffRefBound: SerBin + Clone {} + impl StructDiffRefBound for T {} +} + +#[cfg(all( + feature = "serde", + not(feature = "nanoserde"), + not(feature = "debug_diffs") +))] +pub(crate) mod __private { + use super::*; + + pub trait StructDiffOwnedBound: Serialize + DeserializeOwned + Clone {} + impl StructDiffOwnedBound for T {} + + pub trait StructDiffRefBound: Serialize + Clone {} + impl StructDiffRefBound for T {} +} + +#[cfg(all( + not(feature = "serde"), + not(feature = "nanoserde"), + not(feature = "debug_diffs") +))] +pub(crate) mod __private { + + pub trait StructDiffOwnedBound: Clone {} + impl StructDiffOwnedBound for T {} + + pub trait StructDiffRefBound: Clone {} + impl StructDiffRefBound for T {} +} + pub trait StructDiff { /// A generated type used to represent the difference /// between two instances of a struct which implements /// the StructDiff trait. - #[cfg(all(feature = "nanoserde", feature = "serde", feature = "debug_diffs"))] - type Diff: SerBin + DeBin + Serialize + DeserializeOwned + Clone + std::fmt::Debug; - #[cfg(all(feature = "nanoserde", not(feature = "serde"), feature = "debug_diffs"))] - type Diff: SerBin + DeBin + Clone + std::fmt::Debug; - #[cfg(all(feature = "serde", not(feature = "nanoserde"), feature = "debug_diffs"))] - type Diff: Serialize + DeserializeOwned + Clone + std::fmt::Debug; - #[cfg(all( - not(feature = "serde"), - not(feature = "nanoserde"), - feature = "debug_diffs" - ))] - type Diff: Clone + std::fmt::Debug; - #[cfg(all(feature = "nanoserde", feature = "serde", not(feature = "debug_diffs")))] - type Diff: SerBin + DeBin + Serialize + DeserializeOwned + Clone; - #[cfg(all( - feature = "nanoserde", - not(feature = "serde"), - not(feature = "debug_diffs") - ))] - type Diff: SerBin + DeBin + Clone; - #[cfg(all( - feature = "serde", - not(feature = "nanoserde"), - not(feature = "debug_diffs") - ))] - type Diff: Serialize + DeserializeOwned + Clone; - #[cfg(all( - not(feature = "serde"), - not(feature = "nanoserde"), - not(feature = "debug_diffs") - ))] - type Diff: Clone; + type Diff: __private::StructDiffOwnedBound; /// A generated type used to represent the difference /// between two instances of a struct which implements /// the StructDiff trait (using references). - #[cfg(all(feature = "nanoserde", feature = "serde", feature = "debug_diffs"))] - type DiffRef<'target>: SerBin + Serialize + Clone + std::fmt::Debug + Into - where - Self: 'target; - #[cfg(all(feature = "nanoserde", not(feature = "serde"), feature = "debug_diffs"))] - type DiffRef<'target>: SerBin + Clone + std::fmt::Debug + Into - where - Self: 'target; - #[cfg(all(feature = "serde", not(feature = "nanoserde"), feature = "debug_diffs"))] - type DiffRef<'target>: Serialize + Clone + std::fmt::Debug + Into - where - Self: 'target; - #[cfg(all( - not(feature = "serde"), - not(feature = "nanoserde"), - feature = "debug_diffs" - ))] - type DiffRef<'target>: Clone + std::fmt::Debug + Into - where - Self: 'target; - #[cfg(all(feature = "nanoserde", feature = "serde", not(feature = "debug_diffs")))] - type DiffRef<'target>: SerBin + Serialize + Clone + Into - where - Self: 'target; - #[cfg(all( - feature = "nanoserde", - not(feature = "serde"), - not(feature = "debug_diffs") - ))] - type DiffRef<'target>: SerBin + Clone + Into - where - Self: 'target; - #[cfg(all( - feature = "serde", - not(feature = "nanoserde"), - not(feature = "debug_diffs") - ))] - type DiffRef<'target>: Serialize + Clone + Into - where - Self: 'target; - #[cfg(all( - not(feature = "serde"), - not(feature = "nanoserde"), - not(feature = "debug_diffs") - ))] - type DiffRef<'target>: Clone + Into + type DiffRef<'target>: __private::StructDiffRefBound + Into where Self: 'target; From 3a62e987967f6e0cdffce4919067fb9ab2beba6e Mon Sep 17 00:00:00 2001 From: kirk Date: Sat, 22 Feb 2025 17:49:16 +0000 Subject: [PATCH 2/3] remove DeBin bound on some DiffRef helpers --- src/collections/unordered_map_like_recursive.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/collections/unordered_map_like_recursive.rs b/src/collections/unordered_map_like_recursive.rs index 62f12a9..4be0e33 100644 --- a/src/collections/unordered_map_like_recursive.rs +++ b/src/collections/unordered_map_like_recursive.rs @@ -342,8 +342,8 @@ mod nanoserde_impls { impl SerBin for UnorderedMapLikeRecursiveChangeRef<'_, K, V> where - K: SerBin + PartialEq + Clone + DeBin, - V: SerBin + PartialEq + Clone + DeBin + StructDiff, + K: SerBin + PartialEq + Clone, + V: SerBin + PartialEq + Clone + StructDiff, { fn ser_bin(&self, output: &mut Vec) { match self { @@ -386,8 +386,8 @@ mod nanoserde_impls { impl SerBin for UnorderedMapLikeRecursiveDiffRef<'_, K, V> where - K: SerBin + PartialEq + Clone + DeBin, - V: SerBin + PartialEq + Clone + DeBin + StructDiff, + K: SerBin + PartialEq + Clone, + V: SerBin + PartialEq + Clone + StructDiff, { fn ser_bin(&self, output: &mut Vec) { match &self.0 { @@ -409,8 +409,8 @@ mod nanoserde_impls { impl SerBin for &UnorderedMapLikeRecursiveDiffRef<'_, K, V> where - K: SerBin + PartialEq + Clone + DeBin, - V: SerBin + PartialEq + Clone + DeBin + StructDiff, + K: SerBin + PartialEq + Clone, + V: SerBin + PartialEq + Clone + StructDiff, { #[inline(always)] fn ser_bin(&self, output: &mut Vec) { From f4eeca967b8ed115427bf9b527b9c7f417dbd2a4 Mon Sep 17 00:00:00 2001 From: kirk Date: Sat, 22 Feb 2025 18:23:18 +0000 Subject: [PATCH 3/3] change rope impl --- Cargo.toml | 8 +- benchmarks/Cargo.toml | 4 - benchmarks/benches/rope.rs | 182 --------- src/collections/mod.rs | 4 - src/collections/rope.rs | 676 ---------------------------------- src/collections/rope/mod.rs | 622 +++++++++++++++++++++++++++++++ src/collections/rope/slots.rs | 357 ++++++++++++++++++ 7 files changed, 983 insertions(+), 870 deletions(-) delete mode 100644 benchmarks/benches/rope.rs delete mode 100644 src/collections/rope.rs create mode 100644 src/collections/rope/mod.rs create mode 100644 src/collections/rope/slots.rs diff --git a/Cargo.toml b/Cargo.toml index b94bc27..30e4bd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "structdiff" -version = "0.7.1" +version = "0.7.2" edition = "2021" license = "Apache-2.0 OR MIT" repository = "https://github.com/knickish/structdiff" @@ -22,12 +22,12 @@ structdiff-derive = { path = "derive", version = "=0.7.1" } "generated_setters" = ["structdiff-derive/generated_setters"] "rustc_hash" = ["dep:rustc-hash"] "debug_asserts" = [] -"__rope_benchmarks" = [] [dev-dependencies] bincode = "1.3.3" assert_unordered = "0.3.5" -nanorand = { version = "0.7.0" } +nanorand = "0.7.0" +pretty_assertions = "1.4.1" [lints.rust] -unexpected_cfgs = { level = "warn", check-cfg = ['cfg(unused)'] } \ No newline at end of file +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(unused)'] } diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index d3bf669..d22b6ff 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -36,7 +36,3 @@ harness = false [[bench]] name = "large" harness = false - -[[bench]] -name = "rope" -harness = false diff --git a/benchmarks/benches/rope.rs b/benchmarks/benches/rope.rs deleted file mode 100644 index dca712e..0000000 --- a/benchmarks/benches/rope.rs +++ /dev/null @@ -1,182 +0,0 @@ -use std::time::Duration; - -use criterion::{black_box, criterion_group, BatchSize, Criterion}; -use nanorand::{Rng, WyRand}; -use structdiff::collections::rope::Rope; - -criterion::criterion_main!(benches); - -criterion_group!(benches, rope, vec); - -const GROUP_NAME: &str = "rope"; -const SAMPLE_SIZE: usize = 1000; -const MEASUREMENT_TIME: Duration = Duration::from_secs(5); - -fn rand_string(rng: &mut WyRand) -> String { - let base = vec![(); rng.generate_range::(5..15) as usize]; - base.into_iter() - .map(|_| rng.generate::() as u32) - .filter_map(char::from_u32) - .collect::() -} - -trait Random { - fn generate_random(rng: &mut WyRand) -> Self; - fn generate_random_large(rng: &mut WyRand) -> Self; - fn random_mutate(self, rng: &mut WyRand) -> Self; - fn random_mutate_large(self, rng: &mut WyRand) -> Self; -} - -impl Random for Rope { - fn generate_random(rng: &mut WyRand) -> Self { - (0..rng.generate_range::(5..15)) - .map(|_| rand_string(rng)) - .into_iter() - .collect() - } - - fn generate_random_large(rng: &mut WyRand) -> Self { - (0..rng.generate_range::(0..(u16::MAX / 5))) - .map(|_| rand_string(rng)) - .into_iter() - .collect() - } - - fn random_mutate(mut self, rng: &mut WyRand) -> Self { - match rng.generate_range(0..4) { - 0 => self.insert(rng.generate_range(0..self.len()), rand_string(rng)), - 1 => self.remove(rng.generate_range(0..self.len())), - 2 => { - if self.len() == 0 { - return self; - } - let l = rng.generate_range(0..self.len()); - let r = rng.generate_range(0..self.len()); - self.swap(l, r) - } - 3 => { - let l = rng.generate_range(0..self.len()); - let r = rng.generate_range(l..self.len()); - self.drain(l..=r); - } - _ => (), - } - self - } - - fn random_mutate_large(self, rng: &mut WyRand) -> Self { - self.random_mutate(rng) - } -} - -impl Random for Vec { - fn generate_random(rng: &mut WyRand) -> Self { - (0..rng.generate_range::(5..15)) - .map(|_| rand_string(rng)) - .into_iter() - .collect() - } - - fn generate_random_large(rng: &mut WyRand) -> Self { - (0..rng.generate_range::(0..(u16::MAX / 5))) - .map(|_| rand_string(rng)) - .into_iter() - .collect() - } - - fn random_mutate(mut self, rng: &mut WyRand) -> Self { - match rng.generate_range(0..4) { - 0 => self.insert(rng.generate_range(0..self.len()), rand_string(rng)), - 1 => { - self.remove(rng.generate_range(0..self.len())); - } - 2 => { - if self.len() == 0 { - return self; - } - let l = rng.generate_range(0..self.len()); - let r = rng.generate_range(0..self.len()); - self.swap(l, r) - } - 3 => { - let l = rng.generate_range(0..self.len()); - let r = rng.generate_range(l..self.len()); - self.drain(l..=r); - } - _ => (), - }; - self - } - - fn random_mutate_large(self, rng: &mut WyRand) -> Self { - self.random_mutate(rng) - } -} - -fn rope(c: &mut Criterion) { - let mut group = c.benchmark_group(GROUP_NAME); - group - .sample_size(SAMPLE_SIZE) - .measurement_time(MEASUREMENT_TIME); - group.bench_function("small_rope", |b| { - b.iter_batched( - || { - let mut rng = WyRand::new(); - let start = Rope::generate_random(&mut rng); - (start, rng) - }, - |(start, mut rng)| { - black_box(start.random_mutate(&mut rng)); - }, - BatchSize::LargeInput, - ) - }); - group.bench_function("large_rope", |b| { - b.iter_batched( - || { - let mut rng = WyRand::new(); - let start = Rope::generate_random_large(&mut rng); - (start, rng) - }, - |(start, mut rng)| { - black_box(start.random_mutate(&mut rng)); - }, - BatchSize::LargeInput, - ) - }); - group.finish(); -} - -fn vec(c: &mut Criterion) { - let mut group = c.benchmark_group(GROUP_NAME); - group - .sample_size(SAMPLE_SIZE) - .measurement_time(MEASUREMENT_TIME); - group.bench_function("small_vec", |b| { - b.iter_batched( - || { - let mut rng = WyRand::new(); - let start = Vec::generate_random(&mut rng); - (start, rng) - }, - |(start, mut rng)| { - black_box(start.random_mutate(&mut rng)); - }, - BatchSize::LargeInput, - ) - }); - group.bench_function("large_vec", |b| { - b.iter_batched( - || { - let mut rng = WyRand::new(); - let start = Vec::generate_random_large(&mut rng); - (start, rng) - }, - |(start, mut rng)| { - black_box(start.random_mutate(&mut rng)); - }, - BatchSize::LargeInput, - ) - }); - group.finish(); -} diff --git a/src/collections/mod.rs b/src/collections/mod.rs index a0831e6..d23e50d 100644 --- a/src/collections/mod.rs +++ b/src/collections/mod.rs @@ -1,7 +1,3 @@ -#[cfg(feature = "__rope_benchmarks")] -pub mod rope; - -#[cfg(not(feature = "__rope_benchmarks"))] pub(crate) mod rope; pub mod unordered_array_like; diff --git a/src/collections/rope.rs b/src/collections/rope.rs deleted file mode 100644 index 42622b9..0000000 --- a/src/collections/rope.rs +++ /dev/null @@ -1,676 +0,0 @@ -use std::{ - cmp::Ordering::{Equal, Greater, Less}, - collections::{BTreeMap, VecDeque}, - ops::{Add, Index, IndexMut, RangeBounds, Sub}, - sync::atomic::{AtomicUsize, Ordering::Relaxed}, -}; - -const MAX_SLOT_SIZE: usize = 16; -const DEF_SLOT_SIZE: usize = 8; -const UNDERSIZED_SLOT: usize = 1; - -#[cfg_attr(test, derive(Clone))] -pub struct Rope(BTreeMap>); - -pub struct Iter<'rope, T> { - self_ref: &'rope Rope, - key: usize, - in_key: usize, - exhausted: bool, -} -pub struct IntoIter { - self_own: Rope, - internal: Option< as IntoIterator>::IntoIter>, -} - -#[derive(Debug, Default)] -#[repr(transparent)] -struct Key(AtomicUsize); - -impl Clone for Key { - #[inline] - fn clone(&self) -> Self { - Key::from(Into::::into(self)) - } -} - -impl Add for Key { - type Output = Key; - - fn add(self, rhs: Self) -> Self::Output { - self.0.fetch_add(k_load(rhs.0), Relaxed); - self - } -} - -impl Add for Key { - type Output = Key; - - fn add(self, rhs: usize) -> Self::Output { - self.0.fetch_add(rhs, Relaxed); - self - } -} - -impl Sub for Key { - type Output = Key; - - fn sub(self, rhs: Self) -> Self::Output { - self.0.fetch_sub(k_load(rhs.0), Relaxed); - self - } -} - -impl Sub for Key { - type Output = Key; - - fn sub(self, rhs: usize) -> Self::Output { - self.0.fetch_sub(rhs, Relaxed); - self - } -} - -impl PartialEq for Key { - fn eq(&self, other: &Self) -> bool { - self.0.load(Relaxed) == other.0.load(Relaxed) - } -} - -impl From for Key { - fn from(value: usize) -> Self { - Self(AtomicUsize::new(value)) - } -} - -impl From for usize { - fn from(val: Key) -> Self { - val.0.load(Relaxed) - } -} - -impl From<&Key> for usize { - fn from(val: &Key) -> Self { - val.0.load(Relaxed) - } -} - -impl PartialOrd for Key { - fn partial_cmp(&self, other: &Self) -> Option { - self.0.load(Relaxed).partial_cmp(&other.0.load(Relaxed)) - } -} - -impl Eq for Key {} -impl Ord for Key { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - // this is a usize, unless the load fails - // we should always succeed at comparing - self.partial_cmp(other).unwrap() - } -} - -#[inline(always)] -fn k_load(k: AtomicUsize) -> usize { - k.load(Relaxed) -} - -#[inline(always)] -fn k_set(k: &AtomicUsize, val: usize) { - k.store(val, Relaxed) -} - -impl Index for Rope { - type Output = T; - - fn index(&self, index: usize) -> &Self::Output { - self.0 - .iter() - .skip_while(|(k, content)| Into::::into(*k) + content.len() < index + 1) - .next() - .and_then(|(k, content)| content.get(index - Into::::into(k))) - .unwrap() - } -} - -impl IndexMut for Rope { - fn index_mut(&mut self, index: usize) -> &mut T { - self.0 - .iter_mut() - .skip_while(|(k, content)| Into::::into(*k) + content.len() < index + 1) - .next() - .and_then(|(k, content)| content.get_mut(index - Into::::into(k))) - .unwrap() - } -} - -impl<'rope, T: 'rope> Iterator for Iter<'rope, T> { - type Item = &'rope T; - - fn next(&mut self) -> Option { - if self.exhausted { - return None; - } - - let ret = self - .self_ref - .0 - .get(&self.key.into()) - .and_then(|v| v.get(self.in_key)); - let mut new_in_key = self.in_key + 1; - for key in self - .self_ref - .0 - .keys() - .skip_while(|k| Into::::into(*k) != self.key) - { - let max_in_slot = self - .self_ref - .0 - .get(key) - .map(VecDeque::len) - .unwrap_or_default(); - - while new_in_key < max_in_slot { - if self - .self_ref - .0 - .get(key) - .and_then(|v| v.get(new_in_key)) - .is_some() - { - self.key = Into::::into(key); - self.in_key = new_in_key; - return ret; - } - new_in_key += 1; - } - new_in_key = 0; - } - self.exhausted = true; - ret - } -} - -impl Iterator for IntoIter { - type Item = T; - - fn next(&mut self) -> Option { - if let ret @ Some(_) = self.internal.as_mut().and_then(|internal| internal.next()) { - return ret; - } - - while let Some(mut vec_iter) = self.self_own.0.pop_first().map(|(_, vec)| vec.into_iter()) { - let ret @ Some(_) = vec_iter.next() else { - continue; - }; - - self.internal = Some(vec_iter); - return ret; - } - - None - } -} - -impl IntoIterator for Rope { - type Item = T; - - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - IntoIter { - self_own: self, - internal: None, - } - } -} - -impl<'rope, T: 'rope> IntoIterator for &'rope Rope { - type Item = &'rope T; - - type IntoIter = Iter<'rope, T>; - - fn into_iter(self) -> Self::IntoIter { - Iter { - self_ref: self, - key: 0, - in_key: 0, - exhausted: false, - } - } -} - -impl FromIterator for Rope { - fn from_iter>(iter: C) -> Self { - let iter = iter.into_iter(); - let mut counter = 0; - let mut current = VecDeque::with_capacity(MAX_SLOT_SIZE); - let mut map = BTreeMap::new(); - for item in iter { - current.push_back(item); - counter += 1; - if counter % DEF_SLOT_SIZE == 0 { - map.insert( - Key::from(counter - DEF_SLOT_SIZE), - std::mem::replace(&mut current, VecDeque::with_capacity(MAX_SLOT_SIZE)), - ); - } - } - - if !current.is_empty() { - map.insert(Key::from(counter - current.len()), current); - } - - Self(map) - } -} - -impl Default for Rope { - fn default() -> Self { - Self::new() - } -} - -impl Rope { - pub fn new() -> Self { - Self(BTreeMap::from([( - Key(AtomicUsize::default()), - VecDeque::with_capacity(MAX_SLOT_SIZE), - )])) - } - - #[inline] - fn key_for_index(&self, index: usize) -> Key { - use std::ops::Bound::*; - self.0 - .range((Unbounded, Included(Key::from(index)))) - .last() - .map(|(k, _)| k.clone()) - .unwrap_or_default() - } - - #[inline] - pub fn len(&self) -> usize { - self.0 - .last_key_value() - .map(|(k, v)| Into::::into(k) + v.len()) - .unwrap_or_default() - } - - fn renumber(&mut self, from: usize) { - let start_key = self.key_for_index(from); - let mut v_iter = self.0.range(&start_key..); - let start = v_iter - .next() - .map(|(k, v)| Into::::into(k) + v.len()) - .unwrap_or(0); - let mut total = v_iter.fold(start, |acc, (_, x)| acc + x.len()); - - let v_iter = self.0.range(&start_key..); - v_iter.rev().for_each(|(k, v)| { - total -= v.len(); - k_set(&k.0, total); - }); - } - - fn rebalance(&mut self, from: usize) { - use std::ops::Bound::*; - let key = self.key_for_index(from); - let prev_high_index = self - .0 - .range(..key) - .next_back() - .map(|(k, _)| k.clone()) - .unwrap_or_default(); - let keys: Vec = self - .0 - .range(&prev_high_index..) - .map(|(k, _)| k.clone()) - .collect(); - - let mut carry = VecDeque::::with_capacity(16); - let mut hold = VecDeque::::with_capacity(0); - - for key in keys.iter() { - let entry = self.0.get_mut(key).unwrap(); - if entry.is_empty() { - continue; - } - if ((DEF_SLOT_SIZE - UNDERSIZED_SLOT + 1)..=(DEF_SLOT_SIZE + (DEF_SLOT_SIZE / 2))) - .contains(&entry.len()) - && carry.is_empty() - { - break; - } - - // put the empty holder in the list for now - std::mem::swap(entry, &mut hold); - - 'inner: for (_inner_key, inner_entry) in self.0.range_mut((Excluded(key), Unbounded)) { - match (hold.len().cmp(&DEF_SLOT_SIZE), carry.len()) { - (Less, 0) => hold.extend( - inner_entry.drain(..(DEF_SLOT_SIZE - hold.len()).min(inner_entry.len())), - ), - (Equal, 0) => break 'inner, - (Greater, 0) => { - carry.extend(hold.drain(DEF_SLOT_SIZE..)); - break 'inner; - } - (_, _) => { - carry.extend(hold.drain(..)); - hold.extend(carry.drain(..DEF_SLOT_SIZE.min(carry.len()))); - if hold.len() == DEF_SLOT_SIZE { - break 'inner; - } - } - } - } - - // take the empty holder back and leave the values in the map entry - std::mem::swap(self.0.get_mut(key).unwrap(), &mut hold); - } - - self.0.retain(|_, v| !v.is_empty()); - self.renumber(prev_high_index.into()); - - // fix up the last entry with any carried values - match (carry.len(), self.0.last_entry()) { - (0, ..) => return, - (_, Some(mut l_entry)) => { - let l_entry = l_entry.get_mut(); - carry.extend(l_entry.drain(..)); - l_entry.extend(carry.drain(..DEF_SLOT_SIZE.min(carry.len()))); - } - _ => (), - } - - // add any remaining carry values into new slots at the end - let mut new_key = self.len(); - while !carry.is_empty() { - let carry_len = carry.len(); - match carry_len > DEF_SLOT_SIZE { - true => { - self.0 - .insert(Key::from(new_key), carry.drain(..DEF_SLOT_SIZE).collect()); - new_key += DEF_SLOT_SIZE; - } - false => { - self.0.insert(Key::from(new_key), carry); - return; - } - } - } - } - - pub fn insert(&mut self, index: usize, element: T) { - let key = self.key_for_index(index); - let vec = self.0.entry(key.clone()).or_default(); - vec.insert(index - Into::::into(key), element); - match vec.len() { - oversized if (0..MAX_SLOT_SIZE).contains(&oversized) => self.renumber(index), - _ => self.rebalance(index), - } - } - - pub fn remove(&mut self, index: usize) { - let key = self.key_for_index(index); - let vec = self.0.get_mut(&key).unwrap(); - vec.remove(index - Into::::into(&key)); - match vec.len() { - 0..=UNDERSIZED_SLOT => self.rebalance(Into::::into(&key)), - _ => self.renumber(key.into()), - } - } - - pub fn drain(&mut self, range: R) - where - R: RangeBounds, - { - use std::ops::Bound; - - let (l_idx, r_idx) = match (range.start_bound(), range.end_bound()) { - (Bound::Included(l_i), Bound::Included(r_i)) => (*l_i, *r_i), - (Bound::Included(l_i), Bound::Excluded(r_e)) => (*l_i, r_e - 1), - (Bound::Included(l_i), Bound::Unbounded) => (*l_i, self.len() - 1), - (Bound::Excluded(l_e), Bound::Included(r_i)) => (l_e + 1, *r_i), - (Bound::Excluded(l_e), Bound::Excluded(r_e)) => (l_e + 1, r_e - 1), - (Bound::Excluded(l_e), Bound::Unbounded) => (l_e + 1, self.len() - 1), - (Bound::Unbounded, Bound::Included(r_i)) => (0, *r_i), - (Bound::Unbounded, Bound::Excluded(r_e)) => (0, r_e - 1), - (Bound::Unbounded, Bound::Unbounded) => (0, self.len() - 1), - }; - - let [l_key, r_key] = [l_idx, r_idx].map(|idx| self.key_for_index(idx)); - - match l_key == r_key { - true => { - let v = self.0.get_mut(&l_key).expect("we just looked this key up"); - v.drain( - (l_idx - Into::::into(&l_key))..=(r_idx - Into::::into(&l_key)), - ); - if v.len() <= UNDERSIZED_SLOT { - self.rebalance(Into::::into(l_key).saturating_sub(1)); - } else { - self.renumber(Into::::into(&l_key).saturating_sub(1)); - } - } - false => { - self.0 - .get_mut(&l_key) - .unwrap() - .drain((l_idx - Into::::into(&l_key))..); - self.0 - .range_mut((Bound::Excluded(&l_key), Bound::Excluded(&r_key))) - .for_each(|(_, v)| v.clear()); - self.0 - .get_mut(&r_key) - .unwrap() - .drain(..=(r_idx - Into::::into(r_key))); - self.rebalance(l_idx); - } - } - } - - pub fn swap(&mut self, a: usize, b: usize) { - let [l_key, r_key] = [a, b].map(|idx| self.key_for_index(idx)); - match l_key == r_key { - true => self.0.get_mut(&l_key).unwrap().swap( - a - Into::::into(&l_key), - b - Into::::into(&l_key), - ), - false => { - // more complicated with safe rust than in stdlib Vec - let (rk, mut rv) = self.0.remove_entry(&r_key).unwrap(); - std::mem::swap( - self.0 - .get_mut(&l_key) - .unwrap() - .get_mut(a - Into::::into(l_key)) - .unwrap(), - rv.get_mut(b - Into::::into(r_key)).unwrap(), - ); - self.0.insert(rk, rv); - } - } - } -} - -#[cfg(test)] -impl std::fmt::Display for Rope { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for (key, vals) in self.0.iter() { - write!(f, "{}: [", Into::::into(key))?; - for val in vals { - write!(f, "{},", val)?; - } - write!(f, "];\n")?; - } - Ok(()) - } -} - -#[cfg(test)] -mod test { - use nanorand::{Rng, WyRand}; - - use super::Rope; - - fn rand_string(rng: &mut WyRand) -> String { - let base = vec![(); rng.generate_range::(1..=50)]; - base.into_iter() - .map(|_| rng.generate_range::(65..=90) as u32) - .filter_map(char::from_u32) - .collect::() - } - - trait Random { - fn generate_random(rng: &mut WyRand) -> Self; - fn generate_random_large(rng: &mut WyRand) -> Self; - fn random_mutate(self, mutation: Mutation) -> Self; - } - - impl Random for Rope { - fn generate_random(rng: &mut WyRand) -> Self { - (0..rng.generate_range::(5..15)) - .map(|_| rand_string(rng)) - .into_iter() - .collect() - } - - fn generate_random_large(rng: &mut WyRand) -> Self { - (0..rng.generate_range::(0..(u16::MAX / 5))) - .map(|_| rand_string(rng)) - .into_iter() - .collect() - } - - fn random_mutate(mut self, mutation: Mutation) -> Self { - match mutation { - Mutation::Insert(s, i) => self.insert(i, s), - Mutation::Remove(i) => self.remove(i), - Mutation::Swap(l, r) => self.swap(l, r), - Mutation::Drain(l, r) => self.drain(l..=r), - } - self - } - } - - #[derive(Debug, Clone)] - enum Mutation { - Insert(T, usize), - Remove(usize), - Swap(usize, usize), - Drain(usize, usize), - } - - impl Mutation { - fn random_mutation(rng: &mut WyRand, len: usize) -> Option> { - match rng.generate_range(0..4) { - 0 => Some(Self::Insert(rand_string(rng), rng.generate_range(0..=len))), - 1 => match len == 0 { - false => Some(Self::Remove(rng.generate_range(0..len))), - true => None, - }, - 2 => { - if len == 0 { - return None; - } - let l = rng.generate_range(0..len); - let r = rng.generate_range(0..len); - Some(Self::Swap(l, r)) - } - 3 => { - let l = rng.generate_range(0..len); - let r = rng.generate_range(l..len); - Some(Self::Drain(l, r)) - } - _ => None, - } - } - } - - impl Random for Vec { - fn generate_random(rng: &mut WyRand) -> Self { - (0..rng.generate_range::(5..15)) - .map(|_| rand_string(rng)) - .into_iter() - .collect() - } - - fn generate_random_large(rng: &mut WyRand) -> Self { - (0..rng.generate_range::(0..(u16::MAX / 5))) - .map(|_| rand_string(rng)) - .into_iter() - .collect() - } - - fn random_mutate(mut self, mutation: Mutation) -> Self { - match mutation { - Mutation::Insert(s, i) => self.insert(i, s), - Mutation::Remove(i) => { - self.remove(i); - } - Mutation::Swap(l, r) => self.swap(l, r), - Mutation::Drain(l, r) => { - self.drain(l..=r); - } - } - self - } - } - - fn test(generator: impl Fn(&mut WyRand) -> Vec, count: usize) { - let mut rng = WyRand::new(); - let mut start_vec = generator(&mut rng); - let mut start_rope = start_vec.clone().into_iter().collect::>(); - assert_eq!( - start_rope.clone().into_iter().collect::>(), - start_vec - ); - for _ in 0..count { - let prev_rope = start_rope.clone(); - let Some(mutation) = Mutation::random_mutation(&mut rng, start_vec.len()) else { - continue; - }; - - let sr_clone = start_rope.clone(); - let mut_clone = mutation.clone(); - let result = std::panic::catch_unwind(|| { - sr_clone.random_mutate(mut_clone); - }); - - let Ok(_) = result else { - println!("{:?}", mutation); - println!("prev_rope: {}", prev_rope); - panic!("Caught panic"); - }; - - start_rope = start_rope.random_mutate(mutation.clone()); - start_vec = start_vec.random_mutate(mutation.clone()); - - if start_rope.clone().into_iter().collect::>() != start_vec { - println!("{:?}", mutation); - println!("prev_rope: {}", prev_rope); - println!("curr_rope: {}", start_rope); - } - assert_eq!( - (&start_rope).into_iter().cloned().collect::>(), - start_rope.clone().into_iter().collect::>() - ); - assert_eq!( - (&start_rope).into_iter().cloned().collect::>(), - start_vec - ); - } - } - - #[test] - fn paired_small() { - test(Vec::generate_random, 1_000_000) - } - - #[test] - fn paired_large() { - test(Vec::generate_random_large, 500) - } -} diff --git a/src/collections/rope/mod.rs b/src/collections/rope/mod.rs new file mode 100644 index 0000000..e358749 --- /dev/null +++ b/src/collections/rope/mod.rs @@ -0,0 +1,622 @@ +use std::{ + cmp::Ordering::{Equal, Greater, Less}, + collections::VecDeque, + ops::{Index, IndexMut, RangeBounds}, +}; + +mod slots; + +const MAX_SLOT_SIZE: usize = 16; +const BASE_SLOT_SIZE: usize = 8; +const UNDERSIZED_SLOT: usize = 1; + +type Container = slots::ArrayMap; + +#[cfg_attr(test, derive(Clone))] +pub struct Rope(Vec>); + +pub struct Iter<'rope, T> { + self_ref: &'rope Rope, + key: usize, + in_key: usize, + exhausted: bool, +} +pub struct IntoIter { + self_own: VecDeque>, + internal: Option< as IntoIterator>::IntoIter>, +} + +impl Index for Rope { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + let mut seen = 0; + for entry in self.0.iter() { + seen += entry.len(); + if seen > index { + seen -= entry.len(); + return &entry[index - seen]; + } + } + panic!("Index is {index} but len is {seen}") + } +} + +impl IndexMut for Rope { + fn index_mut(&mut self, index: usize) -> &mut T { + let mut seen = 0; + for entry in self.0.iter_mut() { + seen += entry.len(); + if seen > index { + seen -= entry.len(); + return &mut entry[index - seen]; + } + } + panic!("Index is {index} but len is {seen}") + } +} + +impl<'rope, T: 'rope> Iterator for Iter<'rope, T> { + type Item = &'rope T; + + fn next(&mut self) -> Option { + if self.exhausted { + return None; + } + + let ret = &self.self_ref.0[self.key][self.in_key]; + + self.in_key += 1; + if self.in_key >= self.self_ref.0[self.key].len() { + self.in_key = 0; + self.key += 1; + } + + if self.key >= self.self_ref.0.len() { + self.exhausted = true; + } + + Some(ret) + } +} + +impl Iterator for IntoIter { + type Item = T; + + fn next(&mut self) -> Option { + if let ret @ Some(_) = self.internal.as_mut().and_then(|internal| internal.next()) { + return ret; + } + + while let Some(mut vec_iter) = self.self_own.pop_front().map(IntoIterator::into_iter) { + let ret @ Some(_) = vec_iter.next() else { + continue; + }; + + self.internal = Some(vec_iter); + return ret; + } + + None + } +} + +impl IntoIterator for Rope { + type Item = T; + + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter { + self_own: self.0.into(), + internal: None, + } + } +} + +impl<'rope, T: 'rope> IntoIterator for &'rope Rope { + type Item = &'rope T; + + type IntoIter = Iter<'rope, T>; + + fn into_iter(self) -> Self::IntoIter { + Iter { + self_ref: self, + key: 0, + in_key: 0, + exhausted: self.0.is_empty() || self.0[0].is_empty(), + } + } +} + +impl FromIterator for Rope { + fn from_iter>(iter: C) -> Self { + let mut iter = iter.into_iter().peekable(); + let mut map = Vec::new(); + while iter.peek().is_some() { + let arrmap = slots::ArrayMap::from_iter(iter.by_ref().take(8)); + map.push(arrmap); + if map.last().unwrap().len() != 8 { + break; + } + } + + Self(map) + } +} + +impl Default for Rope { + fn default() -> Self { + Self::new() + } +} + +impl Rope { + pub fn new() -> Self { + Self(Vec::from([slots::ArrayMap::new()])) + } + + pub fn iter(&self) -> Iter<'_, T> { + self.into_iter() + } + + #[inline] + fn _key_for_index(&self, index: usize) -> usize { + let mut seen = 0; + for (idx, entry) in self.0.iter().enumerate() { + seen += entry.len(); + if seen > index { + return idx; + } + } + self.0.len() + } + + #[inline] + fn key_with_count_for_index(&self, index: usize) -> (usize, usize) { + let mut seen = 0; + for (idx, entry) in self.0.iter().enumerate() { + seen += entry.len(); + if seen > index { + seen -= entry.len(); + return (idx, seen); + } + } + (self.0.len(), seen) + } + + #[inline] + fn key_with_count_for_index_from_prev( + &self, + index: usize, + prev: usize, + mut seen: usize, + ) -> (usize, usize) { + if seen > index { + // it's in the same chunk, return early + return (prev, seen); + } + for (idx, entry) in self.0.iter().enumerate().skip(prev) { + seen += entry.len(); + if seen > index { + seen -= entry.len(); + return (idx, seen); + } + } + (self.0.len(), seen) + } + + #[inline] + pub fn len(&self) -> usize { + self.0 + .iter() + .map(slots::ArrayMap::len) + .fold(0, std::ops::Add::add) + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn rebalance_from_key(&mut self, start_key: usize) { + let mut carry = VecDeque::with_capacity(16); + let mut hold = Container::new(); + for key in start_key..(self.0.len()) { + let entry = self.0.get_mut(key).unwrap(); + if entry.is_empty() { + continue; + } + + const LOW: usize = BASE_SLOT_SIZE - (BASE_SLOT_SIZE / 2); + const HIGH: usize = BASE_SLOT_SIZE + (BASE_SLOT_SIZE / 2); + if (LOW..=HIGH).contains(&entry.len()) && carry.is_empty() { + break; + } + + // put the empty holder in the list for now + std::mem::swap(entry, &mut hold); + + // adjust size of hold, either taking elements from later chunks or carrying them + match (hold.len().cmp(&BASE_SLOT_SIZE), carry.is_empty()) { + (Less, carry_empty) => { + if !carry_empty { + carry.extend(hold.drain(..)); + hold.extend(carry.drain(..BASE_SLOT_SIZE.min(carry.len()))); + } + + let mut iter = self.0.iter_mut().skip(key); + while let (Some(take_from), false) = (iter.next(), hold.len() == BASE_SLOT_SIZE) + { + hold.extend(take_from.drain( + ..(BASE_SLOT_SIZE.saturating_sub(hold.len())).min(take_from.len()), + )); + } + } + (Equal, true) => (), + (Equal | Greater, false) => { + carry.extend(hold.drain(..)); + hold.extend(carry.drain(..BASE_SLOT_SIZE.min(carry.len()))); + } + (Greater, true) => { + carry.extend(hold.drain(BASE_SLOT_SIZE..)); + } + } + + // take the empty holder back and leave the values in the map entry + std::mem::swap(self.0.get_mut(key).unwrap(), &mut hold); + } + + assert!(hold.is_empty()); + + self.0.retain(|v| !v.is_empty()); + + // fix up the last entry with any carried values + match (carry.len(), self.0.last_mut()) { + (0, ..) => { + return; + } + (_, Some(l_entry)) => { + l_entry.extend( + carry.drain(..(BASE_SLOT_SIZE.saturating_sub(l_entry.len())).min(carry.len())), + ); + } + _ => (), + } + + // add any remaining carry values into new slots at the end + while carry.len() > BASE_SLOT_SIZE { + self.0.push(Container::from_iter( + carry.drain(..BASE_SLOT_SIZE.min(carry.len())), + )); + } + if !carry.is_empty() { + self.0.push(Container::from_iter(carry)); + } + } + + pub fn insert(&mut self, index: usize, element: T) { + let (key, count) = self.key_with_count_for_index(index); + if key == self.0.len() { + self.0.push(Container::new()); + } + let vec = self.0.get_mut(key).unwrap(); + vec.insert(index - count, element); + if vec.len() == MAX_SLOT_SIZE { + self.rebalance_from_key(key); + } + } + + pub fn remove(&mut self, index: usize) { + let (key, count) = self.key_with_count_for_index(index); + let Some(vec) = self.0.get_mut(key) else { + panic!( + "Failed to remove item with index {index} from rope with {} elements", + self.len() + ); + }; + vec.remove(index - count); + if (0..=UNDERSIZED_SLOT).contains(&vec.len()) { + self.rebalance_from_key(key.saturating_sub(1)); + } + } + + pub fn drain(&mut self, range: R) + where + R: RangeBounds, + { + use std::ops::Bound; + + let (l_idx, r_idx) = match (range.start_bound(), range.end_bound()) { + (Bound::Included(l_i), Bound::Included(r_i)) => (*l_i, *r_i), + (Bound::Included(l_i), Bound::Excluded(r_e)) => (*l_i, r_e - 1), + (Bound::Included(l_i), Bound::Unbounded) => (*l_i, self.len() - 1), + (Bound::Excluded(l_e), Bound::Included(r_i)) => (l_e + 1, *r_i), + (Bound::Excluded(l_e), Bound::Excluded(r_e)) => (l_e + 1, r_e - 1), + (Bound::Excluded(l_e), Bound::Unbounded) => (l_e + 1, self.len() - 1), + (Bound::Unbounded, Bound::Included(r_i)) => (0, *r_i), + (Bound::Unbounded, Bound::Excluded(r_e)) => (0, r_e - 1), + (Bound::Unbounded, Bound::Unbounded) => (0, self.len() - 1), + }; + + let (l_key, l_key_count) = self.key_with_count_for_index(l_idx); + let (r_key, r_key_count) = + self.key_with_count_for_index_from_prev(r_idx, l_key, l_key_count); + + match l_key == r_key { + true => { + let v = self.0.get_mut(l_key).expect("we just looked this key up"); + v.drain((l_idx - l_key_count)..=(r_idx - l_key_count)); + if v.len() <= UNDERSIZED_SLOT { + self.rebalance_from_key(l_key.saturating_sub(1)); + } + } + false => { + let l_mut = self.0.get_mut(l_key).unwrap(); + l_mut.drain((l_idx - l_key_count)..); + let l_len = l_mut.len(); + let r_mut = self.0.get_mut(r_key).unwrap(); + r_mut.drain(..=(r_idx - r_key_count)); + let r_len = r_mut.len(); + let _ = self.0.drain((l_key + 1)..r_key); + + if l_len <= UNDERSIZED_SLOT || r_len <= UNDERSIZED_SLOT { + self.rebalance_from_key(l_key); + } + } + } + } + + pub fn swap(&mut self, a: usize, b: usize) { + let [a, b] = [a.min(b), a.max(b)]; + let (l_key, l_key_count) = self.key_with_count_for_index(a); + let (r_key, r_key_count) = self.key_with_count_for_index_from_prev(b, l_key, l_key_count); + match l_key == r_key { + true => self + .0 + .get_mut(l_key) + .unwrap() + .swap(a - l_key_count, b - l_key_count), + false => { + let (l, r) = self.0.split_at_mut(r_key); + std::mem::swap(&mut l[l_key][a - l_key_count], &mut r[0][b - r_key_count]); + } + } + } +} + +#[cfg(test)] +mod test { + use nanorand::{Rng, WyRand}; + + use super::{Rope, BASE_SLOT_SIZE, MAX_SLOT_SIZE}; + + #[derive(Debug, Clone)] + pub enum Mutation { + Insert(T, usize), + Remove(usize), + Swap(usize, usize), + Drain(usize, usize), + } + + pub(crate) trait Random { + fn generate_random(rng: &mut WyRand) -> Self; + fn generate_random_large(rng: &mut WyRand) -> Self; + fn random_mutate(self, mutation: Mutation) -> Self; + } + + pub fn rand_string(rng: &mut WyRand) -> String { + let base = vec![(); 8]; + base.into_iter() + .map(|_| rng.generate_range::(65..=90)) + .filter_map(char::from_u32) + .collect::() + } + + impl Mutation { + pub fn random_mutation(rng: &mut WyRand, len: usize) -> Option> { + match rng.generate_range(0..4) { + 0 => Some(Self::Insert(rand_string(rng), rng.generate_range(0..=len))), + 1 => match len == 0 { + false => Some(Self::Remove(rng.generate_range(0..len))), + true => None, + }, + 2 => { + if len == 0 { + return None; + } + let l = rng.generate_range(0..len); + let r = rng.generate_range(0..len); + if l == r { + None + } else { + Some(Self::Swap(l, r)) + } + } + 3 => { + let l = rng.generate_range(0..len); + let r = rng.generate_range(l..len); + Some(Self::Drain(l, r)) + } + _ => None, + } + } + } + + impl Random for Vec { + fn generate_random(rng: &mut WyRand) -> Self { + (0..rng.generate_range::(5..15)) + .map(|_| rand_string(rng)) + .collect() + } + + fn generate_random_large(rng: &mut WyRand) -> Self { + (0..rng.generate_range::(0..(u16::MAX / 5))) + .map(|_| rand_string(rng)) + .collect() + } + + fn random_mutate(mut self, mutation: Mutation) -> Self { + match mutation { + Mutation::Insert(s, i) => self.insert(i, s), + Mutation::Remove(i) => { + self.remove(i); + } + Mutation::Swap(l, r) => self.swap(l, r), + Mutation::Drain(l, r) => { + self.drain(l..=r); + } + } + self + } + } + + impl std::fmt::Display for Rope { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut seen = 0; + for vals in self.0.iter() { + write!(f, "{seen}: [")?; + for val in vals { + write!(f, "{},", val)?; + } + writeln!(f, "];")?; + seen += vals.len(); + } + Ok(()) + } + } + + impl Random for Rope { + fn generate_random(rng: &mut WyRand) -> Self { + (0..rng.generate_range::(5..15)) + .map(|_| rand_string(rng)) + .collect() + } + + fn generate_random_large(rng: &mut WyRand) -> Self { + (0..rng.generate_range::(0..(u16::MAX / 5))) + .map(|_| rand_string(rng)) + .collect() + } + + fn random_mutate(mut self, mutation: Mutation) -> Self { + match mutation { + Mutation::Insert(s, i) => self.insert(i, s), + Mutation::Remove(i) => self.remove(i), + Mutation::Swap(l, r) => self.swap(l, r), + Mutation::Drain(l, r) => self.drain(l..=r), + } + self + } + } + + fn test(generator: impl Fn(&mut WyRand) -> Vec, count: usize) { + let mut rng = WyRand::new(); + let mut start_vec = generator(&mut rng); + let mut start_rope = start_vec.clone().into_iter().collect::>(); + assert_eq!( + start_rope.clone().into_iter().collect::>(), + start_vec + ); + for _ in 0..count { + let prev_rope = start_rope.clone(); + let Some(mutation) = Mutation::random_mutation(&mut rng, start_vec.len()) else { + continue; + }; + + let sr_clone = start_rope.clone(); + let mut_clone = mutation.clone(); + let result = std::panic::catch_unwind(|| { + sr_clone.random_mutate(mut_clone); + }); + + let Ok(_) = result else { + println!("{:?}", mutation); + println!("prev_rope: {}", prev_rope); + panic!("Caught panic"); + }; + + start_rope = start_rope.random_mutate(mutation.clone()); + start_vec = start_vec.random_mutate(mutation.clone()); + + if start_rope.clone().into_iter().collect::>() != start_vec { + println!("{:?}", mutation); + println!("prev_rope: {}", prev_rope); + println!("curr_rope: {}", start_rope); + } + pretty_assertions::assert_eq!( + (&start_rope).into_iter().cloned().collect::>(), + start_rope.clone().into_iter().collect::>() + ); + pretty_assertions::assert_eq!( + (&start_rope).into_iter().cloned().collect::>(), + start_vec + ); + } + } + + #[test] + fn paired_small() { + test(Vec::generate_random, 1_000_000) + } + + #[test] + fn paired_large() { + test(Vec::generate_random_large, 100_000) + } + + #[test] + #[should_panic] + fn get_from_empty() { + #[expect(clippy::unnecessary_operation)] + Rope::<()>::new()[0]; + } + + #[test] + #[should_panic] + fn get_past_end() { + #[expect(clippy::unnecessary_operation)] + Rope::<()>::from_iter([(), ()])[2]; + } + + #[test] + fn get_last() { + for i in 1..33 { + assert_eq!( + Rope::from_iter(vec![(); i].into_iter().enumerate().map(|(i, _)| i))[i - 1], + i - 1 + ); + } + } + + #[test] + fn delete_rebalance() { + let arr_map = { + let mut collection = Rope::from_iter((0..).take(BASE_SLOT_SIZE)); + for i in (BASE_SLOT_SIZE..).take(9) { + collection.insert(i, i); + } + collection.remove(MAX_SLOT_SIZE); + collection + }; + + let vec = { + let mut collection = Vec::from_iter((0..).take(BASE_SLOT_SIZE)); + for i in (BASE_SLOT_SIZE..).take(9) { + collection.insert(i, i); + } + + collection.remove(MAX_SLOT_SIZE); + collection + }; + + assert_eq!( + vec.iter().collect::>(), + arr_map.iter().collect::>() + ); + assert_eq!( + vec.into_iter().collect::>(), + arr_map.into_iter().collect::>() + ); + } +} diff --git a/src/collections/rope/slots.rs b/src/collections/rope/slots.rs new file mode 100644 index 0000000..18ef1f3 --- /dev/null +++ b/src/collections/rope/slots.rs @@ -0,0 +1,357 @@ +use std::{ + fmt::Debug, + ops::{Index, IndexMut, RangeBounds}, +}; + +#[derive(Clone)] +pub(crate) struct ArrayMap([Option<(u8, T)>; N], usize); + +impl Debug for ArrayMap { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("ArrayMap") + .field(&self.0.iter().filter_map(|o| o.as_ref()).collect::>()) + .field(&self.1) + .finish() + } +} + +impl ArrayMap { + #[inline] + fn find_empty_slot(&self) -> Option { + self.0.iter().position(Option::is_none) + } + + #[inline] + fn find_empty_slots(&self) -> [Option; N] { + let mut ret = [const { None }; N]; + for (slot, idx) in self + .0 + .iter() + .enumerate() + .filter(|i| i.1.is_none()) + .map(|e| e.0) + .zip(0..) + { + ret[idx] = Some(slot); + } + ret + } + + fn get_lookups(&self) -> [Option; N] { + let list_of_logical_indices: [Option; N] = std::array::from_fn(|storage_idx| { + self.0[storage_idx] + .as_ref() + .map(|(logical_idx, _)| Some(*logical_idx)) + .unwrap_or_default() + }); + let mut lookup_tmp: [(usize, Option); N] = + std::array::from_fn(|storage_idx| (storage_idx, list_of_logical_indices[storage_idx])); + lookup_tmp.sort_unstable_by_key(|(_storage_idx, logical_idx)| *logical_idx); + let start_of_somes = lookup_tmp + .iter() + .position(|(_storage, logical)| logical.is_some()) + .unwrap_or_default(); + let lookups = std::array::from_fn(|i| { + lookup_tmp + .get(start_of_somes + i) + .map(|(storage, _logical)| *storage) + }); + lookups + } +} + +impl ArrayMap { + pub const fn new() -> Self { + if N > u8::MAX as usize { + panic!("N > u8::MAX is unsupported"); + } + Self([const { None }; N], 0) + } + + pub const fn len(&self) -> usize { + self.1 + } + + pub const fn is_empty(&self) -> bool { + self.1 == 0 + } + + pub fn insert(&mut self, position: usize, value: T) { + assert!( + position < N, + "Position {position} is greater than max position of {}", + N - 1 + ); + assert!( + self.0.iter().any(Option::is_none), + "No space to insert in ArrayMap with len = {}", + self.1 + ); + + // bump each following index number + self.0 + .iter_mut() + .filter_map(|o| o.as_mut().map(|some| &mut some.0)) + .filter(|i| **i as usize >= position) + .for_each(|i| *i += 1); + + // find a free slot and put it in + let slot_idx = self.find_empty_slot().expect("failed to find free slot"); + self.0[slot_idx] = Some((position as u8, value)); + + self.1 += 1; + } + + pub fn remove(&mut self, position: usize) -> T { + let u8_idx = position as u8; + let Some(val) = self + .0 + .iter_mut() + .find(|o| o.as_ref().map(|(i, _)| *i == u8_idx).unwrap_or_default()) + .and_then(Option::take) + .map(|o| o.1) + else { + panic!("No element found at position {}", position); + }; + + // lower each following index number + self.0 + .iter_mut() + .filter_map(|o| o.as_mut().map(|some| &mut some.0)) + .filter(|i| **i > u8_idx) + .for_each(|i| *i -= 1); + + self.1 -= 1; + val + } + + pub fn swap(&mut self, a: usize, b: usize) { + if a == b { + return; + } + + let mut indices = [a, b].map(|find| { + self.0 + .iter() + .position(|o| o.as_ref().map(|(i, _)| *i == find as u8).unwrap_or(false)) + .unwrap_or_else(|| panic!("unable to find item at idx: {}", find)) + }); + indices.sort(); + let [lower, upper] = indices; + let (l, r) = self.0.split_at_mut(upper); + std::mem::swap( + l[lower].as_mut().map(|o| &mut o.0).unwrap(), + r[0].as_mut().map(|o| &mut o.0).unwrap(), + ); + } + + pub fn drain(&mut self, range: R) -> Drain + where + R: RangeBounds, + { + // stores the highest removed value, so that later ones can be decremented + let mut max_removed: Option = None; + // store the length before removing things + let before = self.1; + + let removals = self + .0 + .iter_mut() + .filter(|o| matches!(o, Some((i, _)) if range.contains(&(*i as usize)))); + + // move the items to the new list and decrement the item count + let mut drained = [const { None }; N]; + for (idx, removal) in removals.enumerate() { + let removal_logical_idx = removal.as_ref().map(|o| o.0).unwrap(); + max_removed = max_removed + .map(|current| Some(current.max(removal_logical_idx))) + .unwrap_or_else(|| Some(removal_logical_idx)); + drained[idx] = removal.take(); + self.1 -= 1; + } + + drained.sort_by_key(|e| e.as_ref().map(|o| o.0)); + let ret = Self::from_iter(drained.into_iter().filter_map(|e| e.map(|o| o.1))); + + // decrement all indices after the last removed index by the + // number of items drained + if let Some(max) = max_removed { + let removed_count = before.abs_diff(self.1) as u8; + + self.0 + .iter_mut() + .filter_map(|o| o.as_mut().map(|some| &mut some.0)) + .filter(|i| **i > max) + .for_each(|i| *i -= removed_count); + } + + Drain(ret.into_iter()) + } + + pub fn extend>(&mut self, values: I) { + let free_slots = self.find_empty_slots(); + + for (v, loc) in values.zip(free_slots.into_iter().flatten()) { + self.0[loc] = Some((self.1 as u8, v)); + self.1 += 1; + } + } +} + +pub struct Drain( as IntoIterator>::IntoIter); + +impl Iterator for Drain { + type Item = T; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl DoubleEndedIterator for Drain { + fn next_back(&mut self) -> Option { + self.0.next_back() + } +} + +impl Index for ArrayMap { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + self.0 + .iter() + .filter_map(|o| o.as_ref()) + .find(|(idx, _)| *idx as usize == index) + .map(|(_, v)| v) + .unwrap_or_else(|| panic!("No element found at index {index}")) + } +} + +impl IndexMut for ArrayMap { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + self.0 + .iter_mut() + .filter_map(|o| o.as_mut()) + .find(|(idx, _)| *idx as usize == index) + .map(|(_, v)| v) + .unwrap() + } +} + +// self.iter() +const _: () = { + pub struct Iter<'rope, T, const N: usize> { + self_ref: &'rope ArrayMap, + // list of indices into the backing array, stored in logical array order + lookups: [Option; N], + pos: usize, + } + + impl<'rope, T, const N: usize> Iterator for Iter<'rope, T, N> { + type Item = &'rope T; + + fn next(&mut self) -> Option { + let v_ref = self + .lookups + .get(self.pos) + .copied() + .flatten() + .and_then(|idx| self.self_ref.0.get(idx)) + .and_then(|v| v.as_ref().map(|(_, v)| v))?; + + self.pos += 1; + Some(v_ref) + } + } + + impl<'rope, T, const N: usize> IntoIterator for &'rope ArrayMap { + type Item = &'rope T; + + type IntoIter = Iter<'rope, T, N>; + + fn into_iter(self) -> Self::IntoIter { + let lookups = self.get_lookups(); + Iter { + self_ref: self, + lookups, + + pos: 0, + } + } + } +}; + +// self.into_iter() +const _: () = { + pub struct Iter { + self_owned: ArrayMap, + // list of indices into the backing array, stored in logical array order + lookups: [Option; N], + pos: usize, + rev_pos: usize, + } + + impl Iterator for Iter { + type Item = T; + + fn next(&mut self) -> Option { + let v_ref = self + .lookups + .get(self.pos) + .copied() + .flatten() + .and_then(|idx| self.self_owned.0.get_mut(idx)) + .and_then(|v| v.take().map(|(_, v)| v))?; + + self.pos += 1; + Some(v_ref) + } + } + + impl DoubleEndedIterator for Iter { + fn next_back(&mut self) -> Option { + let v_ref = self + .lookups + .get(self.rev_pos) + .copied() + .flatten() + .and_then(|idx| self.self_owned.0.get_mut(idx)) + .and_then(|v| v.take().map(|(_, v)| v))?; + + self.rev_pos += 1; + Some(v_ref) + } + } + + impl IntoIterator for ArrayMap { + type Item = T; + + type IntoIter = Iter; + + fn into_iter(self) -> Self::IntoIter { + let lookups = self.get_lookups(); + let rev_pos = self.len().saturating_sub(1); + Iter { + self_owned: self, + lookups, + + pos: 0, + rev_pos, + } + } + } +}; + +impl FromIterator for ArrayMap { + fn from_iter>(iter: C) -> Self { + let mut ret = Self::new(); + let mut count = 0; + for (i, v) in iter.into_iter().enumerate() { + ret.0[i] = Some((i as u8, v)); + count += 1; + } + + ret.1 = count; + ret + } +}