Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
305 changes: 171 additions & 134 deletions Cargo.lock

Large diffs are not rendered by default.

17 changes: 12 additions & 5 deletions crates/rsonpath-lib/src/classification/memmem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use crate::{
input::{error::InputError, Input},
result::InputRecorder,
string_pattern::StringPattern,
string_pattern::{matcher::StringPatternMatcher, StringPattern},
BLOCK_SIZE,
};

Expand All @@ -16,14 +16,19 @@ pub trait Memmem<'i, 'b, 'r, I: Input, const N: usize> {
/// - `start_idx` &ndash; index of the start of search, either falling inside `first_block`,
/// or at the start of the next block.
///
/// # Returns
/// None if there was nno match.
/// Otherwise, `Some((i, j, block))` where `i` and `j` delimit the match exactly,
/// and `block` is the input block in which the start of the match occured.
///
/// # Errors
/// Errors when reading the underlying [`Input`] are propagated.
fn find_label(
&mut self,
first_block: Option<I::Block<'i, N>>,
start_idx: usize,
label: &StringPattern,
) -> Result<Option<(usize, I::Block<'i, N>)>, InputError>;
) -> Result<Option<(usize, usize, I::Block<'i, N>)>, InputError>;
}

pub(crate) mod nosimd;
Expand All @@ -39,19 +44,21 @@ pub(crate) mod sse2_32;
pub(crate) mod sse2_64;

pub(crate) trait MemmemImpl {
type Classifier<'i, 'b, 'r, I, R>: Memmem<'i, 'b, 'r, I, BLOCK_SIZE>
type Classifier<'i, 'b, 'r, I, SM, R>: Memmem<'i, 'b, 'r, I, BLOCK_SIZE>
where
I: Input + 'i,
SM: StringPatternMatcher,
<I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>: 'b,
R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>> + 'r,
'i: 'r;

fn memmem<'i, 'b, 'r, I, R>(
fn memmem<'i, 'b, 'r, I, SM, R>(
input: &'i I,
iter: &'b mut <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
) -> Self::Classifier<'i, 'b, 'r, I, R>
) -> Self::Classifier<'i, 'b, 'r, I, SM, R>
where
I: Input,
SM: StringPatternMatcher,
R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>>,
'i: 'r;
}
108 changes: 65 additions & 43 deletions crates/rsonpath-lib/src/classification/memmem/avx2_32.rs
Original file line number Diff line number Diff line change
@@ -1,59 +1,68 @@
use super::{shared::mask_32, shared::vector_256, *};
use crate::input::{error::InputErrorConvertible, InputBlockIterator};
use std::marker::PhantomData;

const SIZE: usize = 32;

pub(crate) struct Constructor;

impl MemmemImpl for Constructor {
type Classifier<'i, 'b, 'r, I, R>
= Avx2MemmemClassifier32<'i, 'b, 'r, I, R>
type Classifier<'i, 'b, 'r, I, SM, R>
= Avx2MemmemClassifier32<'i, 'b, 'r, I, SM, R>
where
I: Input + 'i,
SM: StringPatternMatcher,
<I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>: 'b,
R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>> + 'r,
'i: 'r;

fn memmem<'i, 'b, 'r, I, R>(
fn memmem<'i, 'b, 'r, I, SM, R>(
input: &'i I,
iter: &'b mut <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
) -> Self::Classifier<'i, 'b, 'r, I, R>
) -> Self::Classifier<'i, 'b, 'r, I, SM, R>
where
I: Input,
SM: StringPatternMatcher,
R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>>,
'i: 'r,
{
Self::Classifier { input, iter }
Self::Classifier::new(input, iter)
}
}

pub(crate) struct Avx2MemmemClassifier32<'i, 'b, 'r, I, R>
pub(crate) struct Avx2MemmemClassifier32<'i, 'b, 'r, I, SM, R>
where
I: Input,
R: InputRecorder<I::Block<'i, SIZE>> + 'r,
{
input: &'i I,
iter: &'b mut I::BlockIterator<'i, 'r, R, SIZE>,
phantom_data: PhantomData<SM>,
}

impl<'i, 'b, 'r, I, R> Avx2MemmemClassifier32<'i, 'b, 'r, I, R>
impl<'i, 'b, 'r, I, SM, R> Avx2MemmemClassifier32<'i, 'b, 'r, I, SM, R>
where
I: Input,
SM: StringPatternMatcher,
R: InputRecorder<I::Block<'i, SIZE>>,
'i: 'r,
{
#[inline]
#[allow(dead_code)]
pub(crate) fn new(input: &'i I, iter: &'b mut I::BlockIterator<'i, 'r, R, SIZE>) -> Self {
Self { input, iter }
Self {
input,
iter,
phantom_data: PhantomData,
}
}

#[inline(always)]
unsafe fn find_empty(
&mut self,
label: &StringPattern,
pattern: &StringPattern,
mut offset: usize,
) -> Result<Option<(usize, I::Block<'i, SIZE>)>, InputError> {
) -> Result<Option<(usize, usize, I::Block<'i, SIZE>)>, InputError> {
let classifier = vector_256::BlockClassifier256::new(b'"', b'"');
let mut previous_block: u32 = 0;

Expand All @@ -63,12 +72,8 @@ where
let mut result = (previous_block | (classified.first << 1)) & classified.second;
while result != 0 {
let idx = result.trailing_zeros() as usize;
if self
.input
.is_member_match(offset + idx - 1, offset + idx + 1, label)
.e()?
{
return Ok(Some((offset + idx - 1, block)));
if let Some(to) = self.input.pattern_match_from::<SM>(offset + idx - 1, pattern).e()? {
return Ok(Some((offset + idx - 1, to, block)));
}
result &= !(1 << idx);
}
Expand All @@ -86,28 +91,36 @@ where
#[inline(always)]
unsafe fn find_letter(
&mut self,
label: &StringPattern,
pattern: &StringPattern,
mut offset: usize,
) -> Result<Option<(usize, I::Block<'i, SIZE>)>, InputError> {
let classifier = vector_256::BlockClassifier256::new(label.unquoted()[0], b'"');
let mut previous_block: u32 = 0;
) -> Result<Option<(usize, usize, I::Block<'i, SIZE>)>, InputError> {
let classifier = vector_256::BlockClassifier256::new(pattern.unquoted()[0], b'"');
let mut previous_slash: u32 = 0;
let mut previous_first: u32 = 0;
let mut previous_quote: u32 = 0;

while let Some(block) = self.iter.next().e()? {
let classified = classifier.classify_block(&block);

if let Some(res) = mask_32::find_in_mask(
if let Some((from, to)) = mask_32::find_in_mask::<_, SM>(
self.input,
label,
previous_block,
pattern,
previous_slash,
previous_quote,
previous_first,
classified.first,
classified.second,
classified.slashes,
classified.quotes,
offset,
)? {
return Ok(Some((res, block)));
return Ok(Some((from, to, block)));
}

offset += SIZE;
previous_block = classified.first >> (SIZE - 1);
previous_slash = classified.slashes >> (SIZE - 1);
previous_first = classified.first >> (SIZE - 1);
previous_quote = classified.quotes >> (SIZE - 2);
}

Ok(None)
Expand All @@ -116,43 +129,52 @@ where
#[inline(always)]
unsafe fn find_label_avx2(
&mut self,
label: &StringPattern,
pattern: &StringPattern,
mut offset: usize,
) -> Result<Option<(usize, I::Block<'i, SIZE>)>, InputError> {
if label.unquoted().is_empty() {
return self.find_empty(label, offset);
} else if label.unquoted().len() == 1 {
return self.find_letter(label, offset);
) -> Result<Option<(usize, usize, I::Block<'i, SIZE>)>, InputError> {
if pattern.unquoted().is_empty() {
return self.find_empty(pattern, offset);
} else if pattern.unquoted().len() == 1 {
return self.find_letter(pattern, offset);
}

let classifier = vector_256::BlockClassifier256::new(label.unquoted()[0], label.unquoted()[1]);
let mut previous_block: u32 = 0;
let classifier = vector_256::BlockClassifier256::new(pattern.unquoted()[0], pattern.unquoted()[1]);
let mut previous_slash: u32 = 0;
let mut previous_first: u32 = 0;
let mut previous_quote: u32 = 0;

while let Some(block) = self.iter.next().e()? {
let classified = classifier.classify_block(&block);

if let Some(res) = mask_32::find_in_mask(
if let Some((from, to)) = mask_32::find_in_mask::<_, SM>(
self.input,
label,
previous_block,
pattern,
previous_slash,
previous_quote,
previous_first,
classified.first,
classified.second,
classified.slashes,
classified.quotes,
offset,
)? {
return Ok(Some((res, block)));
return Ok(Some((from, to, block)));
}

offset += SIZE;
previous_block = classified.first >> (SIZE - 1);
previous_slash = classified.slashes >> (SIZE - 1);
previous_first = classified.first >> (SIZE - 1);
previous_quote = classified.quotes >> (SIZE - 2);
}

Ok(None)
}
}

impl<'i, 'b, 'r, I, R> Memmem<'i, 'b, 'r, I, SIZE> for Avx2MemmemClassifier32<'i, 'b, 'r, I, R>
impl<'i, 'b, 'r, I, SM, R> Memmem<'i, 'b, 'r, I, SIZE> for Avx2MemmemClassifier32<'i, 'b, 'r, I, SM, R>
where
I: Input,
SM: StringPatternMatcher,
R: InputRecorder<I::Block<'i, SIZE>>,
'i: 'r,
{
Expand All @@ -161,15 +183,15 @@ where
&mut self,
first_block: Option<I::Block<'i, SIZE>>,
start_idx: usize,
label: &StringPattern,
) -> Result<Option<(usize, I::Block<'i, SIZE>)>, InputError> {
pattern: &StringPattern,
) -> Result<Option<(usize, usize, I::Block<'i, SIZE>)>, InputError> {
if let Some(b) = first_block {
if let Some(res) = shared::find_label_in_first_block(self.input, b, start_idx, label)? {
if let Some(res) = shared::find_pattern_in_first_block::<_, SM, SIZE>(self.input, b, start_idx, pattern)? {
return Ok(Some(res));
}
}
let next_block_offset = self.iter.get_offset();
// SAFETY: target feature invariant
unsafe { self.find_label_avx2(label, next_block_offset) }
unsafe { self.find_label_avx2(pattern, next_block_offset) }
}
}
Loading
Loading