diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b75c72c76..6c45e30ab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,6 +47,40 @@ jobs: fi shell: bash + leak_sanitizers: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Build and install (ASAN/LSAN) + env: + HNSWLIB_NO_NATIVE: "1" + CFLAGS: "-O1 -g -fno-omit-frame-pointer -fsanitize=address,leak" + CXXFLAGS: "-O1 -g -fno-omit-frame-pointer -fsanitize=address,leak" + LDFLAGS: "-fsanitize=address,leak" + run: | + python -m pip install -U pip setuptools wheel + python -m pip install -v --no-build-isolation . + + - name: Python leak smoke test (ASAN/LSAN) + timeout-minutes: 15 + env: + ASAN_OPTIONS: "detect_leaks=1:halt_on_error=1:alloc_dealloc_mismatch=1" + run: | + python -m unittest discover -v --start-directory tests/python --pattern "bindings_test_leaks.py" + + - name: C++ leak smoke test (ASAN/LSAN) + timeout-minutes: 15 + env: + ASAN_OPTIONS: "detect_leaks=1:halt_on_error=1:alloc_dealloc_mismatch=1" + run: | + cmake -S . -B build-asan -DHNSWLIB_EXAMPLES=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_CXX_FLAGS="-O1 -g -fno-omit-frame-pointer -fsanitize=address,leak -std=c++11" -DCMAKE_EXE_LINKER_FLAGS="-fsanitize=address,leak" + cmake --build build-asan --target leak_smoke_test -j 2 + ./build-asan/leak_smoke_test + - name: Prepare test data run: | pip install numpy @@ -77,4 +111,5 @@ jobs: ./test_updates update ./multivector_search_test ./epsilon_search_test + ./leak_smoke_test shell: bash diff --git a/CMakeLists.txt b/CMakeLists.txt index be0d40f03..eff08734a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,6 +100,9 @@ if(HNSWLIB_EXAMPLES) add_executable(multiThread_replace_test tests/cpp/multiThread_replace_test.cpp) target_link_libraries(multiThread_replace_test hnswlib) + add_executable(leak_smoke_test tests/cpp/leak_smoke_test.cpp) + target_link_libraries(leak_smoke_test hnswlib) + add_executable(main tests/cpp/main.cpp tests/cpp/sift_1b.cpp) target_link_libraries(main hnswlib) endif() diff --git a/python_bindings/bindings.cpp b/python_bindings/bindings.cpp index dd09e80a2..2acd6e3fc 100644 --- a/python_bindings/bindings.cpp +++ b/python_bindings/bindings.cpp @@ -392,19 +392,19 @@ class Index { } py::capsule free_when_done_l0(data_level0_npy, [](void* f) { - delete[] f; + free(f); }); py::capsule free_when_done_lvl(element_levels_npy, [](void* f) { - delete[] f; + free(f); }); py::capsule free_when_done_lb(label_lookup_key_npy, [](void* f) { - delete[] f; + free(f); }); py::capsule free_when_done_id(label_lookup_val_npy, [](void* f) { - delete[] f; + free(f); }); py::capsule free_when_done_ll(link_list_npy, [](void* f) { - delete[] f; + free(f); }); /* TODO: serialize state of random generators appr_alg->level_generator_ and appr_alg->update_probability_generator_ */ @@ -676,10 +676,10 @@ class Index { } } py::capsule free_when_done_l(data_numpy_l, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); py::capsule free_when_done_d(data_numpy_d, [](void* f) { - delete[] f; + delete[] reinterpret_cast(f); }); return py::make_tuple( @@ -884,10 +884,10 @@ class BFIndex { } py::capsule free_when_done_l(data_numpy_l, [](void *f) { - delete[] f; + delete[] reinterpret_cast(f); }); py::capsule free_when_done_d(data_numpy_d, [](void *f) { - delete[] f; + delete[] reinterpret_cast(f); }); diff --git a/tests/cpp/leak_smoke_test.cpp b/tests/cpp/leak_smoke_test.cpp new file mode 100644 index 000000000..81100576c --- /dev/null +++ b/tests/cpp/leak_smoke_test.cpp @@ -0,0 +1,38 @@ +#include "assert.h" +#include "../../hnswlib/hnswlib.h" +#include +#include + +int main() { + int dim = 16; + int max_elements = 1000; + int M = 16; + int ef_construction = 200; + + std::mt19937 rng(42); + std::uniform_real_distribution distrib(0.0f, 1.0f); + + std::vector data(dim * max_elements); + for (size_t i = 0; i < data.size(); i++) { + data[i] = distrib(rng); + } + + for (int iter = 0; iter < 5; iter++) { + hnswlib::L2Space space(dim); + hnswlib::HierarchicalNSW* alg_hnsw = new hnswlib::HierarchicalNSW( + &space, max_elements, M, ef_construction, 42 + iter); + + for (int i = 0; i < max_elements; i++) { + alg_hnsw->addPoint(data.data() + (i * dim), i); + } + + for (int i = 0; i < 50; i++) { + auto result = alg_hnsw->searchKnn(data.data() + (i * dim), 10); + assert(result.size() == 10); + } + + delete alg_hnsw; + } + + return 0; +} diff --git a/tests/python/bindings_test.py b/tests/python/bindings_test.py index f9b3092ff..5c0307688 100644 --- a/tests/python/bindings_test.py +++ b/tests/python/bindings_test.py @@ -44,6 +44,7 @@ def testRandomSelf(self): # Query the elements for themselves and measure recall: labels, distances = p.knn_query(data1, k=1) self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data1))), 1.0, 3) + del labels, distances # Serializing and deleting the index: index_path = 'first_half.bin' @@ -64,5 +65,6 @@ def testRandomSelf(self): labels, distances = p.knn_query(data, k=1) self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), 1.0, 3) + del labels, distances os.remove(index_path) diff --git a/tests/python/bindings_test_leaks.py b/tests/python/bindings_test_leaks.py new file mode 100644 index 000000000..38ec3b14c --- /dev/null +++ b/tests/python/bindings_test_leaks.py @@ -0,0 +1,32 @@ +import gc +import pickle +import unittest + +import numpy as np + +import hnswlib + + +class LeakSmokeTestCase(unittest.TestCase): + def testLeakSmoke(self): + dim = 8 + max_elements = 200 + + for _ in range(25): + data = np.float32(np.random.random((max_elements, dim))) + + p = hnswlib.Index(space='l2', dim=dim) + p.init_index(max_elements=max_elements, ef_construction=100, M=16) + p.add_items(data) + + labels, distances = p.knn_query(data[:25], k=5) + del labels, distances + + payload = pickle.dumps(p) + del p + gc.collect() + + p2 = pickle.loads(payload) + labels2, distances2 = p2.knn_query(data[:10], k=3) + del labels2, distances2, p2, payload, data + gc.collect()