Skip to content

Commit 7ab071c

Browse files
committed
Fixed major issue where AND NOT queries got AND combinations with all other NOT occurrences in the document.
That caused the combination with the largest distance to be accepted in the top rated results. This is not how it should be; the closest NOT should be detrimental to the AND. Now, it'll get the NOT as close as possible before yielding a value.
1 parent 67224d5 commit 7ab071c

File tree

2 files changed

+86
-46
lines changed

2 files changed

+86
-46
lines changed

src/query.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,11 @@ impl Query {
428428
}
429429
}
430430

431+
#[inline]
432+
fn abs_diff_occurrence(a: &OccurenceEq, b: &OccurenceEq) -> usize {
433+
a.0.start().abs_diff(b.0.start())
434+
}
435+
431436
let mut word_id: u32 = 0;
432437
self.root()
433438
.as_doc_iter(
@@ -454,6 +459,7 @@ impl Query {
454459
|and, not| and.0.start().cmp(&not.0.start()),
455460
#[inline]
456461
|a, b| (*a).cmp(b),
462+
Some(abs_diff_occurrence),
457463
)
458464
.filter_map(|inclusion| match inclusion {
459465
ProgressiveInclusion::Left(and) => Some(and),
@@ -480,6 +486,7 @@ impl Query {
480486
// Compares IDs
481487
#[inline]
482488
|a, b| (*a).cmp(b),
489+
None
483490
)
484491
.filter_map(|inclusion| match inclusion {
485492
ProgressiveInclusion::Both(mut a, b) => {
@@ -499,6 +506,7 @@ impl Query {
499506
// Compares IDs
500507
#[inline]
501508
|a, b| (*a).cmp(b),
509+
None,
502510
)
503511
.map(|inclusion| match inclusion {
504512
ProgressiveInclusion::Both(mut a, b) => {

src/set.rs

Lines changed: 78 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
//!
44
//! These are provided by [`iter_set`], except [`progressive`], which is written by be.
55
use std::cmp::Ordering;
6+
use std::fmt::Debug;
67
use std::iter::Peekable;
78
use std::mem;
89

@@ -121,12 +122,13 @@ struct Progressive<
121122
L: Iterator<Item = T>,
122123
R: Iterator<Item = T>,
123124
C: FnMut(&mut T, &mut T) -> core::cmp::Ordering,
124-
M: FnMut(&mut T, &mut T) -> core::cmp::Ordering,
125+
M: FnMut(&T, &T) -> core::cmp::Ordering,
125126
> {
126127
l: L,
127128
r: R,
128129
matches: M,
129130
comparison: C,
131+
minimize_dist_right: Option<fn(&T, &T) -> usize>,
130132
l_next: Option<T>,
131133
r_next: Option<T>,
132134
l_peek: Option<T>,
@@ -137,7 +139,7 @@ impl<
137139
L: Iterator<Item = T>,
138140
R: Iterator<Item = T>,
139141
C: FnMut(&mut T, &mut T) -> core::cmp::Ordering,
140-
M: FnMut(&mut T, &mut T) -> core::cmp::Ordering,
142+
M: FnMut(&T, &T) -> core::cmp::Ordering,
141143
> Progressive<T, L, R, C, M>
142144
{
143145
fn next_l(&mut self) {
@@ -154,79 +156,109 @@ impl<
154156
L: Iterator<Item = T>,
155157
R: Iterator<Item = T>,
156158
C: FnMut(&mut T, &mut T) -> core::cmp::Ordering,
157-
M: FnMut(&mut T, &mut T) -> core::cmp::Ordering,
159+
M: FnMut(&T, &T) -> core::cmp::Ordering,
158160
> Iterator for Progressive<T, L, R, C, M>
159161
{
160162
type Item = ProgressiveInclusion<T>;
161163
fn next(&mut self) -> Option<Self::Item> {
162-
match (self.l_next.take(), self.r_next.take()) {
163-
(Some(mut l), Some(mut r)) => match (self.matches)(&mut l, &mut r) {
164-
Ordering::Less => {
165-
let l = ProgressiveInclusion::Left(l);
166-
self.r_next = Some(r);
167-
self.next_l();
168-
return Some(l);
169-
}
170-
Ordering::Equal => {
171-
self.l_next = Some(l);
172-
self.r_next = Some(r);
173-
}
174-
Ordering::Greater => {
175-
let r = ProgressiveInclusion::Right(r);
176-
self.l_next = Some(l);
177-
self.next_r();
178-
return Some(r);
179-
}
180-
},
181-
_ => return None,
182-
}
164+
loop {
165+
match (self.l_next.take(), self.r_next.take()) {
166+
(Some(l), Some(r)) => match (self.matches)(&l, &r) {
167+
Ordering::Less => {
168+
let l = ProgressiveInclusion::Left(l);
169+
self.r_next = Some(r);
170+
self.next_l();
171+
return Some(l);
172+
}
173+
Ordering::Equal => {
174+
self.l_next = Some(l);
175+
self.r_next = Some(r);
176+
}
177+
Ordering::Greater => {
178+
let r = ProgressiveInclusion::Right(r);
179+
self.l_next = Some(l);
180+
self.next_r();
181+
return Some(r);
182+
}
183+
},
184+
_ => return None,
185+
}
183186

184-
if self.r_peek.is_none() {
185-
let ret = ProgressiveInclusion::Both(self.l_next.take()?, self.r_next.clone()?);
186-
self.next_l();
187-
return Some(ret);
188-
}
189-
if self.l_peek.is_none() {
190-
let ret = ProgressiveInclusion::Both(self.l_next.clone()?, self.r_next.take()?);
191-
self.next_r();
187+
if self.r_peek.is_none() {
188+
let ret = ProgressiveInclusion::Both(self.l_next.take()?, self.r_next.clone()?);
189+
self.next_l();
190+
return Some(ret);
191+
}
192+
if self.l_peek.is_none() {
193+
let ret = ProgressiveInclusion::Both(self.l_next.clone()?, self.r_next.take()?);
194+
self.next_r();
195+
return Some(ret);
196+
}
197+
198+
// If `self.r_peek` and `self.l_peek` are both some, these must be Some. It's a logic error
199+
// otherwise.
200+
let left = self.l_next.as_mut().unwrap();
201+
let right = self.r_next.as_mut().unwrap();
202+
let cmp = (self.comparison)(left, right);
203+
let advance_right = cmp == Ordering::Greater;
204+
// If dist between left and right is less on next iter, iterate right to get closer.
205+
if let Some(minimize_dist_right) = self.minimize_dist_right {
206+
if advance_right {
207+
let dist = minimize_dist_right(left, right);
208+
let peek_dist = if let Some(right) = &self.r_peek {
209+
if (self.matches)(left, right) == Ordering::Equal {
210+
Some(minimize_dist_right(left, right))
211+
} else {
212+
None
213+
}
214+
} else {
215+
None
216+
};
217+
// The == part of <= is really important, as we can have duplicates.
218+
if peek_dist.map_or(false, |peek_dist| peek_dist <= dist) {
219+
self.next_r();
220+
continue;
221+
}
222+
}
223+
}
224+
let ret = ProgressiveInclusion::Both(left.clone(), right.clone());
225+
if advance_right {
226+
self.next_r();
227+
} else {
228+
self.next_l();
229+
};
192230
return Some(ret);
193231
}
194-
195-
// If `self.r_peek` and `self.l_peek` are both some, these must be Some. It's a logic error
196-
// otherwise.
197-
let left = self.l_next.as_mut().unwrap();
198-
let right = self.r_next.as_mut().unwrap();
199-
let advance_right = (self.comparison)(left, right) == Ordering::Greater;
200-
let ret = ProgressiveInclusion::Both(left.clone(), right.clone());
201-
if advance_right {
202-
self.next_r();
203-
} else {
204-
self.next_l();
205-
};
206-
Some(ret)
207232
}
208233
}
209234
/// Like [`iter_set::classify`] but when we get two "equal" from `matches`, we let one of those
210235
/// stay in the "cache" to match future ones. The last one or the greatest one according to
211236
/// `comparison` stays.
237+
///
238+
/// If `minimize_dist_right` is [`Some`], the algorithm will only return
239+
/// [`ProgressiveInclusion::Both`] once `b` is close to `a` as possible.
240+
/// It should return the distance between the two points (using the same algorithm as `comparison`).
241+
/// This is very useful when doing `AND NOT` operations. Set it to [`None`] otherwise.
212242
pub fn progressive<T, L, R, C, M>(
213243
a: L,
214244
b: R,
215245
comparison: C,
216246
matches: M,
247+
minimize_dist_right: Option<fn(&T, &T) -> usize>,
217248
) -> impl Iterator<Item = ProgressiveInclusion<T>>
218249
where
219250
T: Clone,
220251
L: IntoIterator<Item = T>,
221252
R: IntoIterator<Item = T>,
222253
C: FnMut(&mut T, &mut T) -> core::cmp::Ordering,
223-
M: FnMut(&mut T, &mut T) -> core::cmp::Ordering,
254+
M: FnMut(&T, &T) -> core::cmp::Ordering,
224255
{
225256
let mut l = a.into_iter();
226257
let mut r = b.into_iter();
227258
Progressive {
228259
comparison,
229260
matches,
261+
minimize_dist_right,
230262
l_next: l.next(),
231263
r_next: r.next(),
232264
l_peek: l.next(),

0 commit comments

Comments
 (0)