Skip to content

Commit c5c38f0

Browse files
authored
Fix load bugs/messages, update test, deprecate old indices (#148)
* temp debug state * fix bug in loading index with deleted elements * adjust condition in test * add check for file existence * cleanup
1 parent b3671c5 commit c5c38f0

File tree

3 files changed

+41
-18
lines changed

3 files changed

+41
-18
lines changed

hnswlib/hnswalg.h

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,10 @@ namespace hnswlib {
595595

596596
std::ifstream input(location, std::ios::binary);
597597

598+
if (!input.is_open())
599+
throw std::runtime_error("Cannot open file");
600+
601+
598602
// get file size:
599603
input.seekg(0,input.end);
600604
std::streampos total_filesize=input.tellg();
@@ -625,16 +629,15 @@ namespace hnswlib {
625629
fstdistfunc_ = s->get_dist_func();
626630
dist_func_param_ = s->get_dist_func_param();
627631

628-
/// Legacy, check that everything is ok
629-
630-
bool old_index=false;
631-
632632
auto pos=input.tellg();
633+
634+
635+
/// Optional - check if index is ok:
636+
633637
input.seekg(cur_element_count * size_data_per_element_,input.cur);
634638
for (size_t i = 0; i < cur_element_count; i++) {
635639
if(input.tellg() < 0 || input.tellg()>=total_filesize){
636-
old_index = true;
637-
break;
640+
throw std::runtime_error("Index seems to be corrupted or unsupported");
638641
}
639642

640643
unsigned int linkListSize;
@@ -644,23 +647,21 @@ namespace hnswlib {
644647
}
645648
}
646649

647-
// check if file is ok, if not this is either corrupted or old index
650+
// throw exception if it either corrupted or old index
648651
if(input.tellg()!=total_filesize)
649-
old_index = true;
652+
throw std::runtime_error("Index seems to be corrupted or unsupported");
650653

651-
if (old_index) {
652-
std::cerr << "Warning: loading of old indexes will be deprecated before 2019.\n"
653-
<< "Please resave the index in the new format.\n";
654-
}
655654
input.clear();
655+
656+
/// Optional check end
657+
656658
input.seekg(pos,input.beg);
657659

658660

659661
data_level0_memory_ = (char *) malloc(max_elements * size_data_per_element_);
660662
input.read(data_level0_memory_, cur_element_count * size_data_per_element_);
661663

662-
if(old_index)
663-
input.seekg(((max_elements_-cur_element_count) * size_data_per_element_), input.cur);
664+
664665

665666

666667
size_links_per_element_ = maxM_ * sizeof(tableint) + sizeof(linklistsizeint);
@@ -691,6 +692,14 @@ namespace hnswlib {
691692
input.read(linkLists_[i], linkListSize);
692693
}
693694
}
695+
696+
has_deletions_=false;
697+
698+
for (size_t i = 0; i < cur_element_count; i++) {
699+
if(isMarkedDeleted(i))
700+
has_deletions_=true;
701+
}
702+
694703
input.close();
695704

696705
return;

python_bindings/tests/bindings_test_labels.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
class RandomSelfTestCase(unittest.TestCase):
55
def testRandomSelf(self):
6+
for idx in range(16):
67
print("\n**** Index save-load test ****\n")
78
import hnswlib
89
import numpy as np
9-
10+
11+
np.random.seed(idx)
1012
dim = 16
1113
num_elements = 10000
1214

@@ -95,8 +97,8 @@ def testRandomSelf(self):
9597
p.mark_deleted(l[0])
9698
labels2, _ = p.knn_query(data2, k=1)
9799
items=p.get_items(labels2)
98-
diff_with_gt_labels=np.max(np.abs(data2-items))
99-
self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-4) # console
100+
diff_with_gt_labels=np.mean(np.abs(data2-items))
101+
self.assertAlmostEqual(diff_with_gt_labels, 0, delta = 1e-3) # console
100102

101103

102104
labels1_after, _ = p.knn_query(data1, k=1)
@@ -106,6 +108,18 @@ def testRandomSelf(self):
106108
self.assertTrue(False)
107109
print("All the data in data1 are removed")
108110

111+
# checking saving/loading index with elements marked as deleted
112+
p.save_index("with_deleted.bin")
113+
p = hnswlib.Index(space='l2', dim=dim)
114+
p.load_index("with_deleted.bin")
115+
p.set_ef(100)
116+
117+
labels1_after, _ = p.knn_query(data1, k=1)
118+
for la in labels1_after:
119+
for lb in labels1:
120+
if la[0] == lb[0]:
121+
self.assertTrue(False)
122+
109123

110124

111125
if __name__ == "__main__":

python_bindings/tests/bindings_test_resize.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
class RandomSelfTestCase(unittest.TestCase):
55
def testRandomSelf(self):
6-
for idx in range(32):
6+
for idx in range(16):
77
print("\n**** Index resize test ****\n")
88
import hnswlib
99
import numpy as np

0 commit comments

Comments
 (0)