diff --git a/BitSliceIndexing/bsi.go b/BitSliceIndexing/bsi.go index 5487edbf..16bdd6b8 100644 --- a/BitSliceIndexing/bsi.go +++ b/BitSliceIndexing/bsi.go @@ -754,11 +754,7 @@ func batchEqual(e *task, batch []uint32, resultsChan chan *roaring.Bitmap, // ClearBits cleared the bits that exist in the target if they are also in the found set. func ClearBits(foundSet, target *roaring.Bitmap) { - iter := foundSet.Iterator() - for iter.HasNext() { - cID := iter.Next() - target.Remove(cID) - } + target.AndNot(foundSet) } // ClearValues removes the values found in foundSet @@ -768,13 +764,13 @@ func (b *BSI) ClearValues(foundSet *roaring.Bitmap) { wg.Add(1) go func() { defer wg.Done() - ClearBits(foundSet, b.eBM) + b.eBM.AndNot(foundSet) }() for i := 0; i < b.BitCount(); i++ { wg.Add(1) go func(j int) { defer wg.Done() - ClearBits(foundSet, b.bA[j]) + b.bA[j].AndNot(foundSet) }(i) } wg.Wait() diff --git a/BitSliceIndexing/bsi_test.go b/BitSliceIndexing/bsi_test.go index a28a5d4e..71d5e7b4 100644 --- a/BitSliceIndexing/bsi_test.go +++ b/BitSliceIndexing/bsi_test.go @@ -2,7 +2,6 @@ package roaring import ( "fmt" - "io/ioutil" "math/rand" "os" "testing" @@ -35,6 +34,27 @@ func setup() *BSI { return bsi } +func setupLargeBSI(t testing.TB) *BSI { + t.Helper() + + datEBM, err := os.ReadFile("./testdata/age/EBM") + if err != nil { + return nil + } + b := make([][]byte, 9) + b[0] = datEBM + for i := 1; i <= 8; i++ { + b[i], err = os.ReadFile(fmt.Sprintf("./testdata/age/%d", i)) + if err != nil { + return nil + } + } + bsi := NewDefaultBSI() + err = bsi.UnmarshalBinary(b) + require.NoError(t, err) + return bsi +} + func setupNegativeBoundary() *BSI { bsi := NewBSI(5, -5) @@ -264,58 +284,12 @@ func TestNewBSIRetainSet(t *testing.T) { func TestLargeFile(t *testing.T) { - datEBM, err := ioutil.ReadFile("./testdata/age/EBM") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat1, err := ioutil.ReadFile("./testdata/age/1") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat2, err := ioutil.ReadFile("./testdata/age/2") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat3, err := ioutil.ReadFile("./testdata/age/3") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat4, err := ioutil.ReadFile("./testdata/age/4") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat5, err := ioutil.ReadFile("./testdata/age/5") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat6, err := ioutil.ReadFile("./testdata/age/6") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat7, err := ioutil.ReadFile("./testdata/age/7") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat8, err := ioutil.ReadFile("./testdata/age/8") - if err != nil { + bsi := setupLargeBSI(t) + if bsi == nil { fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") return } - b := [][]byte{datEBM, dat1, dat2, dat3, dat4, dat5, dat6, dat7, dat8} - - bsi := NewDefaultBSI() - err = bsi.UnmarshalBinary(b) - require.Nil(t, err) - resultA := bsi.CompareValue(0, EQ, 55, 0, nil) assert.Equal(t, uint64(520157), resultA.GetCardinality()) @@ -468,6 +442,23 @@ func BenchmarkSetRoaring(b *testing.B) { } } +func BenchmarkClearValues(b *testing.B) { + bsi := setupLargeBSI(b) + if bsi == nil { + b.Skip("\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") + return + } + resultA := bsi.CompareValue(0, EQ, 55, 0, nil) + assert.Equal(b, uint64(520157), resultA.GetCardinality()) + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + b2 := bsi.Clone() + b.StartTimer() + b2.ClearValues(resultA) + } +} + func TestIssue426(t *testing.T) { bsi := NewBSI(101, 0) bsi.SetValue(3, 5) diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index 46dbe121..fb86d105 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -738,7 +738,7 @@ func (b *BSI) ParOr(parallelism int, bsis ...*BSI) { bits := len(b.bA) for i := 0; i < len(bsis); i++ { if len(bsis[i].bA) > bits { - bits = len(bsis[i].bA ) + bits = len(bsis[i].bA) } } @@ -942,11 +942,7 @@ func batchEqual(e *task, batch []uint64, resultsChan chan *Bitmap, // ClearBits cleared the bits that exist in the target if they are also in the found set. func ClearBits(foundSet, target *Bitmap) { - iter := foundSet.Iterator() - for iter.HasNext() { - cID := iter.Next() - target.Remove(cID) - } + target.AndNot(foundSet) } // ClearValues removes the values found in foundSet @@ -956,13 +952,13 @@ func (b *BSI) ClearValues(foundSet *Bitmap) { wg.Add(1) go func() { defer wg.Done() - ClearBits(foundSet, &b.eBM) + b.eBM.AndNot(foundSet) }() for i := 0; i < b.BitCount(); i++ { wg.Add(1) go func(j int) { defer wg.Done() - ClearBits(foundSet, &b.bA[j]) + b.bA[j].AndNot(foundSet) }(i) } wg.Wait() diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go index 98b4aba0..3eba0e9e 100644 --- a/roaring64/bsi64_test.go +++ b/roaring64/bsi64_test.go @@ -214,6 +214,27 @@ func setupRandom() (bsi *BSI, min, max int64) { return bsi, min, max } +func setupLargeBSI(t testing.TB) *BSI { + t.Helper() + + datEBM, err := os.ReadFile("./testdata/age/EBM") + if err != nil { + return nil + } + b := make([][]byte, 9) + b[0] = datEBM + for i := 1; i <= 8; i++ { + b[i], err = os.ReadFile(fmt.Sprintf("./testdata/age/%d", i)) + if err != nil { + return nil + } + } + bsi := NewDefaultBSI() + err = bsi.UnmarshalBinary(b) + require.NoError(t, err) + return bsi +} + func TestTwosComplement(t *testing.T) { assert.Equal(t, "1001110", twosComplement(big.NewInt(-50), 7).Text(2)) assert.Equal(t, "110010", twosComplement(big.NewInt(50), 7).Text(2)) @@ -408,59 +429,12 @@ func TestNewBSIRetainSet(t *testing.T) { func TestLargeFile(t *testing.T) { - datEBM, err := ioutil.ReadFile("./testdata/age/EBM") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat1, err := ioutil.ReadFile("./testdata/age/1") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat2, err := ioutil.ReadFile("./testdata/age/2") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat3, err := ioutil.ReadFile("./testdata/age/3") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat4, err := ioutil.ReadFile("./testdata/age/4") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat5, err := ioutil.ReadFile("./testdata/age/5") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat6, err := ioutil.ReadFile("./testdata/age/6") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat7, err := ioutil.ReadFile("./testdata/age/7") - if err != nil { - fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") - return - } - dat8, err := ioutil.ReadFile("./testdata/age/8") - if err != nil { + bsi := setupLargeBSI(t) + if bsi == nil { fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") return } - b := [][]byte{datEBM, dat1, dat2, dat3, dat4, dat5, dat6, dat7, dat8} - - bsi := NewDefaultBSI() - //bsi.RunOptimize() - err = bsi.UnmarshalBinary(b) - require.Nil(t, err) - resultA := bsi.CompareValue(0, EQ, 55, 0, nil) assert.Equal(t, uint64(574600), resultA.GetCardinality()) @@ -822,3 +796,21 @@ func TestRangeNilBig(t *testing.T) { tmpAll := bsi.CompareBigValue(0, RANGE, bsi.MinMaxBig(0, MIN, nil), bsi.MinMaxBig(0, MAX, nil), nil) assert.Equal(t, tmpAll.GetCardinality(), setAll.GetCardinality()) } + +func BenchmarkClearValues(b *testing.B) { + bsi := setupLargeBSI(b) + if bsi == nil { + b.Skip("\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") + return + } + resultA := bsi.CompareValue(0, EQ, 55, 0, nil) + + assert.Equal(b, uint64(574600), resultA.GetCardinality()) + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() + b2 := bsi.Clone() + b.StartTimer() + b2.ClearValues(resultA) + } +}