API Reference#

usearch/index.hpp#

Single-header Vector Search engine.

Author

Ash Vardanian

Date

April 26, 2023

Defines

USEARCH_VERSION_MAJOR#
USEARCH_VERSION_MINOR#
USEARCH_VERSION_PATCH#
USEARCH_64BIT_ENV#
USEARCH_USE_OPENMP#
prefetch_m(ptr)#
usearch_pack_m#
usearch_align_m#
usearch_assert_m(must_be_true, message)#
usearch_noexcept_m#

Typedefs

using byte_t = char#
using bitset_t = bitset_gt<>#
template<typename metric_at, typename ...args_at>
using return_type_gt = typename std::result_of<metric_at(args_at...)>::type#

C++17 and newer version deprecate the std::result_of

using default_key_t = std::uint64_t#
using default_slot_t = std::uint32_t#
using default_distance_t = float#

Functions

static inline void __usearch_raise_runtime_error(char const *message)#

Helper function to simplify debugging - trace just one symbol - __usearch_raise_runtime_error. Assuming the extern C block, the name won’t be mangled.

template<std::size_t multiple_ak>
std::size_t divide_round_up(std::size_t num) noexcept#
inline std::size_t divide_round_up(std::size_t num, std::size_t denominator) noexcept#
inline std::size_t ceil2(std::size_t v) noexcept#
template<typename at>
void misaligned_store(void *ptr, at v) noexcept#

Simply dereferencing misaligned pointers can be dangerous.

template<typename at>
at misaligned_load(void *ptr) noexcept#

Simply dereferencing misaligned pointers can be dangerous.

template<typename at, typename other_at = at>
at exchange(at &obj, other_at &&new_value)#

The std::exchange alternative for C++11.

template<typename at, typename sfinae_at = at>
std::enable_if<std::is_pod<sfinae_at>::value>::type destroy_at(at*)#

The std::destroy_at alternative for C++11.

template<typename at, typename sfinae_at = at>
std::enable_if<!std::is_pod<sfinae_at>::value>::type destroy_at(at *obj)#
template<typename at, typename sfinae_at = at>
std::enable_if<std::is_pod<sfinae_at>::value>::type construct_at(at*)#

The std::construct_at alternative for C++11.

template<typename at, typename sfinae_at = at>
std::enable_if<!std::is_pod<sfinae_at>::value>::type construct_at(at *obj)#
template<typename key_at, typename std::enable_if<std::is_integral<key_at>::value>::type* = nullptr>
key_at default_free_value()#
template<typename key_at, typename std::enable_if<std::is_same<key_at, uint40_t>::value>::type* = nullptr>
uint40_t default_free_value()#
constexpr std::size_t default_connectivity()#

Number of neighbors per graph node. Defaults to 32 in FAISS and 16 in hnswlib.

It is called M in the paper.

constexpr std::size_t default_expansion_add()#

Hyper-parameter controlling the quality of indexing. Defaults to 40 in FAISS and 200 in hnswlib.

It is called efConstruction in the paper.

constexpr std::size_t default_expansion_search()#

Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.

It is called ef in the paper.

constexpr std::size_t default_allocator_entry_bytes()#
template<typename object_at>
static constexpr bool is_dummy()#

Checks if the provided object has a dummy type, emulating an interface, but performing no real computation.

template<typename at>
constexpr bool has_reset()#

Checks if a certain class has a member function called reset.

template<typename key_at>
inline std::size_t get_slot(member_gt<key_at> const &m) noexcept#
template<typename key_at>
inline key_at get_key(member_gt<key_at> const &m) noexcept#
template<typename key_at>
inline std::size_t get_slot(member_cref_gt<key_at> const &m) noexcept#
template<typename key_at>
inline key_at get_key(member_cref_gt<key_at> const &m) noexcept#
template<typename key_at>
inline std::size_t get_slot(member_ref_gt<key_at> const &m) noexcept#
template<typename key_at>
inline key_at get_key(member_ref_gt<key_at> const &m) noexcept#
template<typename men_at, typename women_at, typename men_values_at, typename women_values_at, typename men_metric_at, typename women_metric_at, typename man_to_woman_at = dummy_key_to_key_mapping_t, typename woman_to_man_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
static join_result_t join(men_at const &men, women_at const &women, men_values_at const &men_values, women_values_at const &women_values, men_metric_at &&men_metric, women_metric_at &&women_metric, index_join_config_t config = {}, man_to_woman_at &&man_to_woman = man_to_woman_at{}, woman_to_man_at &&woman_to_man = woman_to_man_at{}, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}) noexcept#

Adapts the Male-Optimal Stable Marriage algorithm for unequal sets to perform fast one-to-one matching between two large collections of vectors, using approximate nearest neighbors search.

Parameters:
  • man_to_woman[inout] Container to map ::men keys to ::women.

  • woman_to_man[inout] Container to map ::women keys to ::men.

  • executor[in] Thread-pool to execute the job in parallel.

  • progress[in] Callback to report the execution progress.

namespace unum#
namespace usearch#

Typedefs

using byte_t = char#
using bitset_t = bitset_gt<>#
template<typename metric_at, typename ...args_at>
using return_type_gt = typename std::result_of<metric_at(args_at...)>::type#

C++17 and newer version deprecate the std::result_of

using default_key_t = std::uint64_t#
using default_slot_t = std::uint32_t#
using default_distance_t = float#

Functions

template<std::size_t multiple_ak>
std::size_t divide_round_up(std::size_t num) noexcept#
inline std::size_t divide_round_up(std::size_t num, std::size_t denominator) noexcept#
inline std::size_t ceil2(std::size_t v) noexcept#
template<typename at>
void misaligned_store(void *ptr, at v) noexcept#

Simply dereferencing misaligned pointers can be dangerous.

template<typename at>
at misaligned_load(void *ptr) noexcept#

Simply dereferencing misaligned pointers can be dangerous.

template<typename at, typename other_at = at>
at exchange(at &obj, other_at &&new_value)#

The std::exchange alternative for C++11.

template<typename at, typename sfinae_at = at>
std::enable_if<std::is_pod<sfinae_at>::value>::type destroy_at(at*)#

The std::destroy_at alternative for C++11.

template<typename at, typename sfinae_at = at>
std::enable_if<!std::is_pod<sfinae_at>::value>::type destroy_at(at *obj)#
template<typename at, typename sfinae_at = at>
std::enable_if<std::is_pod<sfinae_at>::value>::type construct_at(at*)#

The std::construct_at alternative for C++11.

template<typename at, typename sfinae_at = at>
std::enable_if<!std::is_pod<sfinae_at>::value>::type construct_at(at *obj)#
template<typename key_at, typename std::enable_if<std::is_integral<key_at>::value>::type* = nullptr>
key_at default_free_value()#
template<typename key_at, typename std::enable_if<std::is_same<key_at, uint40_t>::value>::type* = nullptr>
uint40_t default_free_value()#
constexpr std::size_t default_connectivity()#

Number of neighbors per graph node. Defaults to 32 in FAISS and 16 in hnswlib.

It is called M in the paper.

constexpr std::size_t default_expansion_add()#

Hyper-parameter controlling the quality of indexing. Defaults to 40 in FAISS and 200 in hnswlib.

It is called efConstruction in the paper.

constexpr std::size_t default_expansion_search()#

Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.

It is called ef in the paper.

constexpr std::size_t default_allocator_entry_bytes()#
template<typename object_at>
static constexpr bool is_dummy()#

Checks if the provided object has a dummy type, emulating an interface, but performing no real computation.

template<typename at>
constexpr bool has_reset()#

Checks if a certain class has a member function called reset.

template<typename key_at>
inline std::size_t get_slot(member_gt<key_at> const &m) noexcept#
template<typename key_at>
inline key_at get_key(member_gt<key_at> const &m) noexcept#
template<typename key_at>
inline std::size_t get_slot(member_cref_gt<key_at> const &m) noexcept#
template<typename key_at>
inline key_at get_key(member_cref_gt<key_at> const &m) noexcept#
template<typename key_at>
inline std::size_t get_slot(member_ref_gt<key_at> const &m) noexcept#
template<typename key_at>
inline key_at get_key(member_ref_gt<key_at> const &m) noexcept#
template<typename men_at, typename women_at, typename men_values_at, typename women_values_at, typename men_metric_at, typename women_metric_at, typename man_to_woman_at = dummy_key_to_key_mapping_t, typename woman_to_man_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
static join_result_t join(men_at const &men, women_at const &women, men_values_at const &men_values, women_values_at const &women_values, men_metric_at &&men_metric, women_metric_at &&women_metric, index_join_config_t config = {}, man_to_woman_at &&man_to_woman = man_to_woman_at{}, woman_to_man_at &&woman_to_man = woman_to_man_at{}, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}) noexcept#

Adapts the Male-Optimal Stable Marriage algorithm for unequal sets to perform fast one-to-one matching between two large collections of vectors, using approximate nearest neighbors search.

Parameters:
  • man_to_woman[inout] Container to map ::men keys to ::women.

  • woman_to_man[inout] Container to map ::women keys to ::men.

  • executor[in] Thread-pool to execute the job in parallel.

  • progress[in] Callback to report the execution progress.

template<typename at>
class misaligned_ref_gt#
#include <index.hpp>

A reference to a misaligned memory location with a specific type. It is needed to avoid Undefined Behavior when dereferencing addresses indivisible by sizeof(at).

Public Functions

inline misaligned_ref_gt(byte_t *ptr) noexcept#
inline operator mutable_t() const noexcept#
inline misaligned_ref_gt &operator=(mutable_t const &v) noexcept#
inline void reset(byte_t *ptr) noexcept#
inline byte_t *ptr() const noexcept#

Private Types

using element_t = at#
using mutable_t = typename std::remove_const<element_t>::type#

Private Members

byte_t *ptr_#
template<typename at>
class misaligned_ptr_gt#
#include <index.hpp>

A pointer to a misaligned memory location with a specific type. It is needed to avoid Undefined Behavior when dereferencing addresses indivisible by sizeof(at).

Public Types

using iterator_category = std::random_access_iterator_tag#
using value_type = element_t#
using difference_type = std::ptrdiff_t#
using pointer = misaligned_ptr_gt<element_t>#
using reference = misaligned_ref_gt<element_t>#

Public Functions

inline reference operator*() const noexcept#
inline reference operator[](std::size_t i) noexcept#
inline value_type operator[](std::size_t i) const noexcept#
inline misaligned_ptr_gt(byte_t *ptr) noexcept#
inline misaligned_ptr_gt operator++(int) noexcept#
inline misaligned_ptr_gt operator--(int) noexcept#
inline misaligned_ptr_gt operator+(difference_type d) noexcept#
inline misaligned_ptr_gt operator-(difference_type d) noexcept#
inline misaligned_ptr_gt &operator++() noexcept#
inline misaligned_ptr_gt &operator--() noexcept#
inline misaligned_ptr_gt &operator+=(difference_type d) noexcept#
inline misaligned_ptr_gt &operator-=(difference_type d) noexcept#
inline bool operator==(misaligned_ptr_gt const &other) noexcept#
inline bool operator!=(misaligned_ptr_gt const &other) noexcept#

Private Types

using element_t = at#
using mutable_t = typename std::remove_const<element_t>::type#

Private Members

byte_t *ptr_#
template<typename scalar_at>
class span_gt#
#include <index.hpp>

Non-owning memory range view, similar to std::span, but for C++11.

Public Functions

inline span_gt() noexcept#
inline span_gt(scalar_at *begin, scalar_at *end) noexcept#
inline span_gt(scalar_at *begin, std::size_t size) noexcept#
inline scalar_at *data() const noexcept#
inline std::size_t size() const noexcept#
inline scalar_at *begin() const noexcept#
inline scalar_at *end() const noexcept#
inline operator scalar_at*() const noexcept#

Private Members

scalar_at *data_#
std::size_t size_#
template<typename scalar_at, typename allocator_at = std::allocator<scalar_at>>
class buffer_gt#
#include <index.hpp>

Similar to std::vector, but doesn’t support dynamic resizing. On the bright side, this can’t throw exceptions.

Public Functions

inline buffer_gt() noexcept#
inline buffer_gt(std::size_t size) noexcept#
inline ~buffer_gt() noexcept#
inline scalar_at *data() const noexcept#
inline std::size_t size() const noexcept#
inline scalar_at *begin() const noexcept#
inline scalar_at *end() const noexcept#
inline operator scalar_at*() const noexcept#
inline scalar_at &operator[](std::size_t i) noexcept#
inline scalar_at const &operator[](std::size_t i) const noexcept#
inline explicit operator bool() const noexcept#
inline scalar_at *release() noexcept#
buffer_gt(buffer_gt const&) = delete#
buffer_gt &operator=(buffer_gt const&) = delete#
inline buffer_gt(buffer_gt &&other) noexcept#
inline buffer_gt &operator=(buffer_gt &&other) noexcept#

Private Members

scalar_at *data_#
std::size_t size_#
class error_t#
#include <index.hpp>

A lightweight error class for handling error messages, which are expected to be allocated in static memory.

Public Functions

inline error_t(char const *message = nullptr) noexcept#
inline error_t &operator=(char const *message) noexcept#
error_t(error_t const&) = delete#
error_t &operator=(error_t const&) = delete#
inline error_t(error_t &&other) noexcept#
inline error_t &operator=(error_t &&other) noexcept#
inline explicit operator bool() const noexcept#
inline char const *what() const noexcept#
inline char const *release() noexcept#
inline ~error_t() noexcept#
inline void raise() noexcept#

Private Members

char const *message_ = {}#
template<typename result_at>
struct expected_gt#
#include <index.hpp>

Similar to std::expected in C++23, wraps a statement evaluation result, or an error. It’s used to avoid raising exception, and gracefully propagate the error.

Template Parameters:

result_at – The type of the expected result.

Public Functions

inline operator result_at&() &#
inline operator result_at&&() &&#
inline result_at const &operator*() const noexcept#
inline explicit operator bool() const noexcept#
inline expected_gt failed(error_t message) noexcept#

Public Members

result_at result#
error_t error#
template<typename allocator_at = std::allocator<byte_t>>
class bitset_gt#
#include <index.hpp>

Light-weight bitset implementation to sync nodes updates during graph mutations. Extends basic functionality with atomic operations.

Public Functions

inline bitset_gt() noexcept#
inline ~bitset_gt() noexcept#
inline explicit operator bool() const noexcept#
inline void clear() noexcept#
inline void reset() noexcept#
inline bitset_gt(std::size_t capacity) noexcept#
inline bitset_gt(bitset_gt &&other) noexcept#
inline bitset_gt &operator=(bitset_gt &&other) noexcept#
bitset_gt(bitset_gt const&) = delete#
bitset_gt &operator=(bitset_gt const&) = delete#
inline bool test(std::size_t i) const noexcept#
inline bool set(std::size_t i) noexcept#
inline bool atomic_set(std::size_t i) noexcept#
inline void atomic_reset(std::size_t i) noexcept#
inline lock_t lock(std::size_t i) noexcept#

Private Types

using allocator_t = allocator_at#
using byte_t = typename allocator_t::value_type#
using compressed_slot_t = unsigned long#

Private Members

compressed_slot_t *slots_ = {}#
std::size_t count_ = {}#

Number of slots.

Private Static Functions

static inline constexpr std::size_t bits_per_slot()#
static inline constexpr compressed_slot_t bits_mask()#
static inline constexpr std::size_t slots(std::size_t bits)#
class lock_t#
#include <index.hpp>

Public Functions

inline ~lock_t() noexcept#
inline lock_t(bitset_gt &bitset, std::size_t bit_offset) noexcept#

Private Members

bitset_gt &bitset_#
std::size_t bit_offset_#
template<typename element_at, typename comparator_at = std::less<void>, typename allocator_at = std::allocator<element_at>>
class max_heap_gt#
#include <index.hpp>

Similar to std::priority_queue, but allows raw access to underlying memory, in case you want to shuffle it or sort. Good for collections from 100s to 10’000s elements.

Public Types

using element_t = element_at#
using comparator_t = comparator_at#
using allocator_t = allocator_at#
using value_type = element_t#

Public Functions

inline max_heap_gt() noexcept#
inline max_heap_gt(max_heap_gt &&other) noexcept#
inline max_heap_gt &operator=(max_heap_gt &&other) noexcept#
max_heap_gt(max_heap_gt const&) = delete#
max_heap_gt &operator=(max_heap_gt const&) = delete#
inline ~max_heap_gt() noexcept#
inline void reset() noexcept#
inline bool empty() const noexcept#
inline std::size_t size() const noexcept#
inline std::size_t capacity() const noexcept#
inline element_t const &top() const noexcept#

Selects the largest element in the heap.

Returns:

Reference to the stored element.

inline void clear() noexcept#
inline bool reserve(std::size_t new_capacity) noexcept#
inline bool insert(element_t &&element) noexcept#
inline void insert_reserved(element_t &&element) noexcept#
inline element_t pop() noexcept#
inline void sort_ascending() noexcept#

Invalidates the “max-heap” property, transforming into ascending range.

inline void shrink(std::size_t n) noexcept#
inline element_t *data() noexcept#
inline element_t const *data() const noexcept#

Private Functions

inline std::size_t parent_idx(std::size_t i) const noexcept#
inline std::size_t left_child_idx(std::size_t i) const noexcept#
inline std::size_t right_child_idx(std::size_t i) const noexcept#
inline void shift_up(std::size_t i) noexcept#
inline void shift_down(std::size_t i) noexcept#

Private Members

element_t *elements_#
std::size_t size_#
std::size_t capacity_#

Private Static Functions

static inline bool less(element_t const &a, element_t const &b) noexcept#
template<typename element_at, typename comparator_at = std::less<void>, typename allocator_at = std::allocator<element_at>>
class sorted_buffer_gt#
#include <index.hpp>

Similar to std::priority_queue, but allows raw access to underlying memory and always keeps the data sorted. Ideal for small collections under 128 elements.

Public Types

using element_t = element_at#
using comparator_t = comparator_at#
using allocator_t = allocator_at#
using value_type = element_t#

Public Functions

inline sorted_buffer_gt() noexcept#
inline sorted_buffer_gt(sorted_buffer_gt &&other) noexcept#
inline sorted_buffer_gt &operator=(sorted_buffer_gt &&other) noexcept#
sorted_buffer_gt(sorted_buffer_gt const&) = delete#
sorted_buffer_gt &operator=(sorted_buffer_gt const&) = delete#
inline ~sorted_buffer_gt() noexcept#
inline void reset() noexcept#
inline bool empty() const noexcept#
inline std::size_t size() const noexcept#
inline std::size_t capacity() const noexcept#
inline element_t const &top() const noexcept#
inline void clear() noexcept#
inline bool reserve(std::size_t new_capacity) noexcept#
inline void insert_reserved(element_t &&element) noexcept#
inline bool insert(element_t &&element, std::size_t limit) noexcept#
Returns:

true if the entry was added, false if it wasn’t relevant enough.

inline element_t pop() noexcept#
inline void sort_ascending() noexcept#
inline void shrink(std::size_t n) noexcept#
inline element_t *data() noexcept#
inline element_t const *data() const noexcept#

Private Members

element_t *elements_#
std::size_t size_#
std::size_t capacity_#

Private Static Functions

static inline bool less(element_t const &a, element_t const &b) noexcept#
class uint40_t#
#include <index.hpp>

Five-byte integer type to address node clouds with over 4B entries.

Note

Avoid usage in 32bit environment

Public Functions

inline uint40_t() noexcept#
inline uint40_t(std::uint32_t n) noexcept#
inline uint40_t(std::uint64_t n) noexcept#
uint40_t(uint40_t&&) = default#
uint40_t(uint40_t const&) = default#
uint40_t &operator=(uint40_t&&) = default#
uint40_t &operator=(uint40_t const&) = default#
inline operator std::size_t() const noexcept#

Public Static Functions

static inline uint40_t max() noexcept#
static inline uint40_t min() noexcept#

Private Functions

inline uint40_t &broadcast(unsigned char c)#

Private Members

unsigned char octets[5]#
template<typename element_at>
struct hash_gt#
#include <index.hpp>

Public Functions

inline std::size_t operator()(element_at const &element) const noexcept#
template<>
struct hash_gt<uint40_t>#
#include <index.hpp>

Public Functions

inline std::size_t operator()(uint40_t const &element) const noexcept#
template<typename element_at, typename hasher_at = hash_gt<element_at>, typename allocator_at = std::allocator<byte_t>>
class growing_hash_set_gt#
#include <index.hpp>

Minimalistic hash-set implementation to track visited nodes during graph traversal.

It doesn’t support deletion of separate objects, but supports clear-ing all at once. It expects reserve to be called ahead of all insertions, so no resizes are needed. It also assumes 0xFF...FF slots to be unused, to simplify the design. It uses linear probing, the number of slots is always a power of two, and it uses linear-probing in case of bucket collisions.

Public Functions

inline growing_hash_set_gt() noexcept#
inline ~growing_hash_set_gt() noexcept#
inline explicit operator bool() const noexcept#
inline std::size_t size() const noexcept#
inline void clear() noexcept#
inline void reset() noexcept#
inline growing_hash_set_gt(std::size_t capacity) noexcept#
inline growing_hash_set_gt(growing_hash_set_gt &&other) noexcept#
inline growing_hash_set_gt &operator=(growing_hash_set_gt &&other) noexcept#
growing_hash_set_gt(growing_hash_set_gt const&) = delete#
growing_hash_set_gt &operator=(growing_hash_set_gt const&) = delete#
inline bool test(element_t const &elem) const noexcept#
inline bool set(element_t const &elem) noexcept#
Returns:

Similar to bitset_gt, returns the previous value.

inline bool reserve(std::size_t new_capacity) noexcept#

Private Types

using element_t = element_at#
using hasher_t = hasher_at#
using allocator_t = allocator_at#
using byte_t = typename allocator_t::value_type#

Private Members

element_t *slots_ = {}#
std::size_t capacity_ = {}#

Number of slots.

std::size_t count_ = {}#

Number of populated.

hasher_t hasher_ = {}#
template<typename element_at, typename allocator_at = std::allocator<element_at>>
class ring_gt#
#include <index.hpp>

Basic single-threaded ring class, used for all kinds of task queues.

Public Types

using element_t = element_at#
using allocator_t = allocator_at#
using value_type = element_t#

Public Functions

inline explicit ring_gt(allocator_t const &alloc = allocator_t()) noexcept#
ring_gt(ring_gt const&) = delete#
ring_gt &operator=(ring_gt const&) = delete#
inline ring_gt(ring_gt &&other) noexcept#
inline ring_gt &operator=(ring_gt &&other) noexcept#
inline void swap(ring_gt &other) noexcept#
inline ~ring_gt() noexcept#
inline bool empty() const noexcept#
inline size_t capacity() const noexcept#
inline size_t size() const noexcept#
inline void clear() noexcept#
inline void reset() noexcept#
inline bool reserve(std::size_t n) noexcept#
inline void push(element_t const &value) noexcept#
inline bool try_push(element_t const &value) noexcept#
inline bool try_pop(element_t &value) noexcept#
inline element_t const &operator[](std::size_t i) const noexcept#

Private Members

element_t *elements_ = {}#
std::size_t capacity_ = {}#
std::size_t head_ = {}#
std::size_t tail_ = {}#
bool empty_ = {true}#
allocator_t allocator_ = {}#
struct index_config_t#
#include <index.hpp>

Configuration settings for the index construction. Includes the main ::connectivity parameter (M in the paper) and two expansion factors - for construction and search.

Subclassed by unum::usearch::index_dense_config_t

Public Functions

inline index_config_t() = default#
inline index_config_t(std::size_t c) noexcept#
inline index_config_t(std::size_t c, std::size_t cb) noexcept#

Public Members

std::size_t connectivity = default_connectivity()#

Number of neighbors per graph node. Defaults to 32 in FAISS and 16 in hnswlib.

It is called M in the paper.

std::size_t connectivity_base = default_connectivity() * 2#

Number of neighbors per graph node in base level graph. Defaults to double of the other levels, so 64 in FAISS and 32 in hnswlib.

It is called M0 in the paper.

struct index_limits_t#
#include <index.hpp>

Public Functions

inline index_limits_t(std::size_t n, std::size_t t) noexcept#
inline index_limits_t(std::size_t n = 0) noexcept#
inline std::size_t threads() const noexcept#
inline std::size_t concurrency() const noexcept#

Public Members

std::size_t members = 0#
std::size_t threads_add = std::thread::hardware_concurrency()#
std::size_t threads_search = std::thread::hardware_concurrency()#
struct index_update_config_t#
#include <index.hpp>

Public Members

std::size_t expansion = default_expansion_add()#

Hyper-parameter controlling the quality of indexing. Defaults to 40 in FAISS and 200 in hnswlib.

It is called efConstruction in the paper.

std::size_t thread = 0#

Optional thread identifier for multi-threaded construction.

struct index_search_config_t#
#include <index.hpp>

Public Members

std::size_t expansion = default_expansion_search()#

Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.

It is called ef in the paper.

std::size_t thread = 0#

Optional thread identifier for multi-threaded construction.

bool exact = false#

Brute-forces exhaustive search over all entries in the index.

struct index_cluster_config_t#
#include <index.hpp>

Public Members

std::size_t expansion = default_expansion_search()#

Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.

It is called ef in the paper.

std::size_t thread = 0#

Optional thread identifier for multi-threaded construction.

struct index_copy_config_t#
#include <index.hpp>

Subclassed by unum::usearch::index_dense_copy_config_t

struct index_join_config_t#
#include <index.hpp>

Public Members

std::size_t max_proposals = 0#

Controls maximum number of proposals per man during stable marriage.

std::size_t expansion = default_expansion_search()#

Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.

It is called ef in the paper.

bool exact = false#

Brute-forces exhaustive search over all entries in the index.

struct dummy_predicate_t#
#include <index.hpp>

An example of what a USearch-compatible ad-hoc filter would look like.

A similar function object can be passed to search queries to further filter entries on their auxiliary properties, such as some categorical keys stored in an external DBMS.

Public Functions

template<typename member_at>
inline constexpr bool operator()(member_at&&) const noexcept#
struct dummy_callback_t#
#include <index.hpp>

An example of what a USearch-compatible ad-hoc operation on in-flight entries.

This kind of callbacks is used when the engine is being updated and you want to patch the entries, while their are still under locks - limiting concurrent access and providing consistency.

Public Functions

template<typename member_at>
inline void operator()(member_at&&) const noexcept#
struct dummy_progress_t#
#include <index.hpp>

An example of what a USearch-compatible progress-bar should look like.

This is particularly helpful when handling long-running tasks, like serialization, saving, and loading from disk, or index-level joins. The reporter checks return value to continue or stop the process, false means need to stop.

Public Functions

inline bool operator()(std::size_t, std::size_t) const noexcept#
struct dummy_prefetch_t#
#include <index.hpp>

An example of what a USearch-compatible values prefetching mechanism should look like.

USearch is designed to handle very large datasets, that may not fir into RAM. Fetching from external memory is very expensive, so we’ve added a pre-fetching mechanism, that accepts multiple objects at once, to cache in RAM ahead of the computation. The received iterators support both get_slot and get_key operations. An example usage may look like this:

template <typename member_citerator_like_at>
inline void operator()(member_citerator_like_at, member_citerator_like_at) const noexcept {
    for (; begin != end; ++begin)
        io_uring_prefetch(offset_in_file(get_key(begin)));
}

Public Functions

template<typename member_citerator_like_at>
inline void operator()(member_citerator_like_at, member_citerator_like_at) const noexcept#
struct dummy_executor_t#
#include <index.hpp>

An example of what a USearch-compatible executor (thread-pool) should look like.

It’s expected to have parallel(callback) API to schedule one task per thread; an identical fixed(count, callback) and dynamic(count, callback) overloads that also accepts the number of tasks, and somehow schedules them between threads; as well as size() to determine the number of available threads.

Public Functions

inline dummy_executor_t() noexcept#
inline std::size_t size() const noexcept#
template<typename thread_aware_function_at>
inline void fixed(std::size_t tasks, thread_aware_function_at &&thread_aware_function) noexcept#
template<typename thread_aware_function_at>
inline void dynamic(std::size_t tasks, thread_aware_function_at &&thread_aware_function) noexcept#
template<typename thread_aware_function_at>
inline void parallel(thread_aware_function_at &&thread_aware_function) noexcept#
struct dummy_key_to_key_mapping_t#
#include <index.hpp>

An example of what a USearch-compatible key-to-key mapping should look like.

This is particularly helpful for “Semantic Joins”, where we map entries of one collection to entries of another. In asymmetric setups, where A -> B is needed, but B -> A is not, this can be passed to minimize memory usage.

Public Functions

template<typename key_at>
inline member_ref_t operator[](key_at&&) const noexcept#
struct member_ref_t#
#include <index.hpp>

Public Functions

template<typename key_at>
inline member_ref_t &operator=(key_at&&) noexcept#
template<typename, typename at>
struct has_reset_gt#
#include <index.hpp>
template<typename check_at, typename return_at, typename ...args_at>
struct has_reset_gt<check_at, return_at(args_at...)>#
#include <index.hpp>

Public Static Attributes

static constexpr bool value = type::value#

Private Members

decltype(check< check_at >(0)) typedef type

Private Static Functions

template<typename at>
static constexpr auto check(at*) -> typename std::is_same<decltype(std::declval<at>().reset(std::declval<args_at>()...)), return_at>::type#
template<typename>
static constexpr std::false_type check(...)#
struct serialization_result_t#
#include <index.hpp>

Public Functions

inline explicit operator bool() const noexcept#
inline serialization_result_t failed(error_t message) noexcept#

Public Members

error_t error#
class output_file_t#
#include <index.hpp>

Smart-pointer wrapping the LibC for binary file outputs.

This class raises no exceptions and corresponds errors through serialization_result_t. The class automatically closes the file when the object is destroyed.

Public Functions

inline output_file_t(char const *path) noexcept#
inline ~output_file_t() noexcept#
inline output_file_t(output_file_t &&other) noexcept#
inline output_file_t &operator=(output_file_t &&other) noexcept#
inline serialization_result_t open_if_not() noexcept#
inline serialization_result_t write(void const *begin, std::size_t length) noexcept#
inline void close() noexcept#

Private Members

char const *path_ = nullptr#
std::FILE *file_ = nullptr#
class input_file_t#
#include <index.hpp>

Smart-pointer wrapping the LibC for binary files inputs.

This class raises no exceptions and corresponds errors through serialization_result_t. The class automatically closes the file when the object is destroyed.

Public Functions

inline input_file_t(char const *path) noexcept#
inline ~input_file_t() noexcept#
inline input_file_t(input_file_t &&other) noexcept#
inline input_file_t &operator=(input_file_t &&other) noexcept#
inline serialization_result_t open_if_not() noexcept#
inline serialization_result_t read(void *begin, std::size_t length) noexcept#
inline void close() noexcept#
inline explicit operator bool() const noexcept#
inline bool seek_to(std::size_t progress) noexcept#
inline bool seek_to_end() noexcept#
inline bool infer_progress(std::size_t &progress) noexcept#

Private Members

char const *path_ = nullptr#
std::FILE *file_ = nullptr#
class memory_mapped_file_t#
#include <index.hpp>

Represents a memory-mapped file or a pre-allocated anonymous memory region.

This class provides a convenient way to memory-map a file and access its contents as a block of memory. The class handles platform-specific memory-mapping operations on Windows, Linux, and MacOS. The class automatically closes the file when the object is destroyed.

Public Functions

inline explicit operator bool() const noexcept#
inline byte_t *data() noexcept#
inline byte_t const *data() const noexcept#
inline std::size_t size() const noexcept#
inline memory_mapped_file_t() noexcept#
inline memory_mapped_file_t(char const *path) noexcept#
inline ~memory_mapped_file_t() noexcept#
inline memory_mapped_file_t(memory_mapped_file_t &&other) noexcept#
inline memory_mapped_file_t(byte_t *data, std::size_t length) noexcept#
inline memory_mapped_file_t &operator=(memory_mapped_file_t &&other) noexcept#
inline serialization_result_t open_if_not() noexcept#
inline void close() noexcept#

Private Members

char const *path_ = {}#

The path to the file to be memory-mapped.

void *ptr_ = {}#

A pointer to the memory-mapping.

size_t length_ = {}#

The length of the memory-mapped file in bytes.

int file_descriptor_ = {}#

The file descriptor on Linux and MacOS.

struct index_serialized_header_t#
#include <index.hpp>

Public Members

std::uint64_t size = 0#
std::uint64_t connectivity = 0#
std::uint64_t connectivity_base = 0#
std::uint64_t max_level = 0#
std::uint64_t entry_slot = 0#
template<typename key_at = default_key_t>
struct member_gt#
#include <index.hpp>

Public Members

key_at key#
std::size_t slot#
template<typename key_at = default_key_t>
struct member_cref_gt#
#include <index.hpp>

Public Members

misaligned_ref_gt<key_at const> key#
std::size_t slot#
template<typename key_at = default_key_t>
struct member_ref_gt#
#include <index.hpp>

Public Functions

inline operator member_cref_gt<key_at>() const noexcept#

Public Members

misaligned_ref_gt<key_at> key#
std::size_t slot#
template<typename distance_at = default_distance_t, typename key_at = default_key_t, typename compressed_slot_at = default_slot_t, typename dynamic_allocator_at = std::allocator<byte_t>, typename tape_allocator_at = dynamic_allocator_at>
class index_gt#
#include <index.hpp>

Approximate Nearest Neighbors Search index-structure using the Hierarchical Navigable Small World (HNSW) graphs algorithm. If classical containers store Key->Value mappings, this one can be seen as a network of keys, accelerating approximate Value~>Key visited_members.

Unlike most implementations, this one is generic anc can be used for any search, not just within equi-dimensional vectors. Examples range from texts to similar Chess positions.

Features#

- Thread-safe for concurrent construction, search, and updates.
- Doesn't allocate new threads, and reuses the ones its called from.
- Allows storing value externally, managing just the similarity index.
- Joins.

Usage#

Exceptions#

None of the methods throw exceptions in the “Release” compilation mode. It may only throw if your memory ::dynamic_allocator_at or ::metric_at isn’t safe to copy.

Serialization#

When serialized, doesn’t include any additional metadata. It is just the multi-level proximity-graph. You may want to store metadata about the used metric and key types somewhere else.

Details#

Like every HNSW implementation, USearch builds levels of “Proximity Graphs”. Every added vector forms a node in one or more levels of the graph. Every node is present in the base level. Every following level contains a smaller fraction of nodes. During search, the operation starts with the smaller levels and zooms-in on every following iteration of larger graph traversals.

Just one memory allocation is performed regardless of the number of levels. The adjacency lists across all levels are concatenated into that single buffer. That buffer starts with a “head”, that stores the metadata, such as the tallest “level” of the graph that it belongs to, the external “key”, and the number of “dimensions” in the vector.

, Predicates and Callbacks#

References and Iterators#

-   `member_citerator_t` and `member_iterator_t` have only slots, no indirections.

-   `member_cref_t` and `member_ref_t` contains the `slot` and a reference
    to the key. So it passes through 1 level of visited_members in `nodes_`.
    Retrieving the key via `get_key` will cause fetching yet another cache line.

-   `member_gt` contains an already prefetched copy of the key.
Template Parameters:
  • key_at – The type of primary objects stored in the index. The values, to which those map, are not managed by the same index structure.

  • compressed_slot_at – The smallest unsigned integer type to address indexed elements. It is used internally to maximize space-efficiency and is generally up-casted to in public interfaces. Can be a built-in , uint64_t, or our custom . Which makes the most sense for 4B+ entry indexes.

  • dynamic_allocator_at – Dynamic memory allocator for temporary buffers, visits indicators, and priority queues, needed during construction and traversals of graphs. The allocated buffers may be uninitialized.

  • tape_allocator_at – Potentially different memory allocator for primary allocations of nodes and vectors. It would never deallocate separate entries, and would only free all the space at once. The allocated buffers may be uninitialized.

Public Types

using distance_t = distance_at#
using vector_key_t = key_at#
using key_t = vector_key_t#
using compressed_slot_t = compressed_slot_at#
using dynamic_allocator_t = dynamic_allocator_at#
using tape_allocator_t = tape_allocator_at#
using member_cref_t = member_cref_gt<vector_key_t>#
using member_ref_t = member_ref_gt<vector_key_t>#
using member_iterator_t = member_iterator_gt<member_ref_t, index_gt>#
using member_citerator_t = member_iterator_gt<member_cref_t, index_gt const>#
using value_type = vector_key_t#
using allocator_type = dynamic_allocator_t#
using size_type = std::size_t#
using difference_type = std::ptrdiff_t#
using reference = member_ref_t#
using const_reference = member_cref_t#
using pointer = void#
using const_pointer = void#
using iterator = member_iterator_t#
using const_iterator = member_citerator_t#
using reverse_iterator = std::reverse_iterator<member_iterator_t>#
using reverse_const_iterator = std::reverse_iterator<member_citerator_t>#
using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>#
using byte_t = typename dynamic_allocator_t::value_type#
using tape_allocator_traits_t = std::allocator_traits<tape_allocator_t>#

Public Functions

inline std::size_t connectivity() const noexcept#
inline std::size_t capacity() const noexcept#
inline std::size_t size() const noexcept#
inline std::size_t max_level() const noexcept#
inline index_config_t const &config() const noexcept#
inline index_limits_t const &limits() const noexcept#
inline bool is_immutable() const noexcept#
inline explicit index_gt(index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}, tape_allocator_t tape_allocator = {}) noexcept#

Exceptions#

Doesn’t throw, unless the ::metric’s and ::allocators’s throw on copy-construction.

inline index_gt fork() noexcept#

Clones the structure with the same hyper-parameters, but without contents.

inline ~index_gt() noexcept#
inline index_gt(index_gt &&other) noexcept#
inline index_gt &operator=(index_gt &&other) noexcept#
inline copy_result_t copy(index_copy_config_t config = {}) const noexcept#
inline member_citerator_t cbegin() const noexcept#
inline member_citerator_t cend() const noexcept#
inline member_citerator_t begin() const noexcept#
inline member_citerator_t end() const noexcept#
inline member_iterator_t begin() noexcept#
inline member_iterator_t end() noexcept#
inline member_ref_t at(std::size_t slot) noexcept#
inline member_cref_t at(std::size_t slot) const noexcept#
inline member_iterator_t iterator_at(std::size_t slot) noexcept#
inline member_citerator_t citerator_at(std::size_t slot) const noexcept#
inline dynamic_allocator_t const &dynamic_allocator() const noexcept#
inline tape_allocator_t const &tape_allocator() const noexcept#
inline void clear() noexcept#

Erases all the vectors from the index.

Will change size() to zero, but will keep the same capacity(). Will keep the number of available threads/contexts the same as it was.

inline void reset() noexcept#

Erases all members from index, closing files, and returning RAM to OS.

Will change both size() and capacity() to zero. Will deallocate all threads/contexts. If the index is memory-mapped - releases the mapping and the descriptor.

inline void swap(index_gt &other) noexcept#

Swaps the underlying memory buffers and thread contexts.

inline bool reserve (index_limits_t limits) usearch_noexcept_m

Increases the capacity() of the index to allow adding more vectors.

Returns:

true on success, false on memory allocation errors.

template<typename value_at, typename metric_at, typename callback_at = dummy_callback_t, typename prefetch_at = dummy_prefetch_t> inline add_result_t add (vector_key_t key, value_at &&value, metric_at &&metric, index_update_config_t config={}, callback_at &&callback=callback_at{}, prefetch_at &&prefetch=prefetch_at{}) usearch_noexcept_m

Inserts a new entry into the index. Thread-safe. Supports heterogeneous lookups. Expects needed capacity to be reserved ahead of time: size() < capacity().

Template Parameters:

metric_at – A function responsible for computing the distance (dis-similarity) between two objects. It should be callable into distinctly different scenarios:

  • distance_t operator() (value_at, entry_at) - from new object to existing entries.

  • distance_t operator() (entry_at, entry_at) - between existing entries. Where any possible entry_at has both two interfaces: std::size_t slot(), vector_key_t key().

Parameters:
  • key[in] External identifier/name/descriptor for the new entry.

  • value[in] Content that will be compared against other entries to index.

  • metric[in] Callable object measuring distance between ::value and present objects.

  • config[in] Configuration options for this specific operation.

  • callback[in] On-success callback, executed while the member_ref_t is still under lock.

template<typename value_at, typename metric_at, typename callback_at = dummy_callback_t, typename prefetch_at = dummy_prefetch_t> inline add_result_t update (member_iterator_t iterator, vector_key_t key, value_at &&value, metric_at &&metric, index_update_config_t config={}, callback_at &&callback=callback_at{}, prefetch_at &&prefetch=prefetch_at{}) usearch_noexcept_m

Update an existing entry. Thread-safe. Supports heterogeneous lookups.

Template Parameters:

metric_at – A function responsible for computing the distance (dis-similarity) between two objects. It should be callable into distinctly different scenarios:

  • distance_t operator() (value_at, entry_at) - from new object to existing entries.

  • distance_t operator() (entry_at, entry_at) - between existing entries. For any possible entry_at following interfaces will work:

  • std::size_t get_slot(entry_at const &)

  • vector_key_t get_key(entry_at const &)

Parameters:
  • iterator[in] Iterator pointing to an existing entry to be replaced.

  • key[in] External identifier/name/descriptor for the entry.

  • value[in] Content that will be compared against other entries in the index.

  • metric[in] Callable object measuring distance between ::value and present objects.

  • config[in] Configuration options for this specific operation.

  • callback[in] On-success callback, executed while the member_ref_t is still under lock.

template<typename value_at, typename metric_at, typename predicate_at = dummy_predicate_t, typename prefetch_at = dummy_prefetch_t> inline search_result_t search (value_at &&query, std::size_t wanted, metric_at &&metric, index_search_config_t config={}, predicate_at &&predicate=predicate_at{}, prefetch_at &&prefetch=prefetch_at{}) const usearch_noexcept_m

Searches for the closest elements to the given ::query. Thread-safe.

Parameters:
  • query[in] Content that will be compared against other entries in the index.

  • wanted[in] The upper bound for the number of results to return.

  • config[in] Configuration options for this specific operation.

  • predicate[in] Optional filtering predicate for member_cref_t.

Returns:

Smart object referencing temporary memory. Valid until next search(), add(), or cluster().

template<typename value_at, typename metric_at, typename predicate_at = dummy_predicate_t, typename prefetch_at = dummy_prefetch_t>
inline cluster_result_t cluster(value_at &&query, std::size_t level, metric_at &&metric, index_cluster_config_t config = {}, predicate_at &&predicate = predicate_at{}, prefetch_at &&prefetch = prefetch_at{}) const noexcept#

Identifies the closest cluster to the given ::query. Thread-safe.

Parameters:
  • query[in] Content that will be compared against other entries in the index.

  • level[in] The index level to target. Higher means lower resolution.

  • config[in] Configuration options for this specific operation.

  • predicate[in] Optional filtering predicate for member_cref_t.

Returns:

Smart object referencing temporary memory. Valid until next search(), add(), or cluster().

inline stats_t stats() const noexcept#
inline stats_t stats(std::size_t level) const noexcept#
inline stats_t stats(stats_t *stats_per_level, std::size_t max_level) const noexcept#
inline std::size_t memory_usage(std::size_t allocator_entry_bytes = default_allocator_entry_bytes()) const noexcept#

A relatively accurate lower bound on the amount of memory consumed by the system. In practice it’s error will be below 10%.

See also

serialized_length for the length of the binary serialized representation.

inline std::size_t memory_usage_per_node(level_t level) const noexcept#
inline std::size_t serialized_length() const noexcept#

Estimate the binary length (in bytes) of the serialized index.

template<typename output_callback_at, typename progress_at = dummy_progress_t>
inline serialization_result_t save_to_stream(output_callback_at &&output, progress_at &&progress = {}) const noexcept#

Saves serialized binary index representation to a stream.

template<typename input_callback_at, typename progress_at = dummy_progress_t>
inline serialization_result_t load_from_stream(input_callback_at &&input, progress_at &&progress = {}) noexcept#

Symmetric to save_from_stream, pulls data from a stream.

template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(char const *file_path, progress_at &&progress = {}) const noexcept#
template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(char const *file_path, progress_at &&progress = {}) noexcept#
template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(output_file_t file, progress_at &&progress = {}) const noexcept#

Saves serialized binary index representation to a file, generally on disk.

template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(memory_mapped_file_t file, std::size_t offset = 0, progress_at &&progress = {}) const noexcept#

Memory-maps the serialized binary index representation from disk, without copying data into RAM, and fetching it on-demand.

template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(input_file_t file, progress_at &&progress = {}) noexcept#

Loads the serialized binary index representation from disk to RAM. Adjusts the configuration properties of the constructed index to match the settings in the file.

template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(memory_mapped_file_t file, std::size_t offset = 0, progress_at &&progress = {}) noexcept#

Loads the serialized binary index representation from disk to RAM. Adjusts the configuration properties of the constructed index to match the settings in the file.

template<typename progress_at = dummy_progress_t>
inline serialization_result_t view(memory_mapped_file_t file, std::size_t offset = 0, progress_at &&progress = {}) noexcept#

Memory-maps the serialized binary index representation from disk, without copying data into RAM, and fetching it on-demand.

template<typename values_at, typename metric_at, typename slot_transition_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t, typename prefetch_at = dummy_prefetch_t>
inline void compact(values_at &&values, metric_at &&metric, slot_transition_at &&slot_transition, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}, prefetch_at &&prefetch = prefetch_at{}) noexcept#

Performs compaction on the whole HNSW index, purging some entries and links to them, while also generating a more efficient mapping, putting the more frequently used entries closer together.

Scans the whole collection, removing the links leading towards banned entries. This essentially isolates some nodes from the rest of the graph, while keeping their outgoing links, in case the node is structurally relevant and has a crucial role in the index. It won’t reclaim the memory.

Parameters:
  • allow_member[in] Predicate to mark nodes for isolation.

  • executor[in] Thread-pool to execute the job in parallel.

  • progress[in] Callback to report the execution progress.

template<typename allow_member_at = dummy_predicate_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline void isolate(allow_member_at &&allow_member, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}) noexcept#

Scans the whole collection, removing the links leading towards banned entries. This essentially isolates some nodes from the rest of the graph, while keeping their outgoing links, in case the node is structurally relevant and has a crucial role in the index. It won’t reclaim the memory.

Parameters:
  • allow_member[in] Predicate to mark nodes for isolation.

  • executor[in] Thread-pool to execute the job in parallel.

  • progress[in] Callback to report the execution progress.

Private Types

using neighbors_count_t = std::uint32_t#

Integer for the number of node neighbors at a specific level of the multi-level graph. It’s selected to be std::uint32_t to improve the alignment in most common cases.

using level_t = std::int16_t#
using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>#
using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>#
using candidates_view_t = span_gt<candidate_t const>#
using candidates_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<candidate_t>#
using top_candidates_t = sorted_buffer_gt<candidate_t, std::less<candidate_t>, candidates_allocator_t>#
using next_candidates_t = max_heap_gt<candidate_t, std::less<candidate_t>, candidates_allocator_t>#
using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>#
using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<context_t>#
using span_bytes_t = span_gt<byte_t>#

Private Functions

inline span_bytes_t node_bytes_(node_t node) const noexcept#
inline std::size_t node_bytes_(level_t level) const noexcept#
inline std::size_t node_neighbors_bytes_(node_t node) const noexcept#
inline std::size_t node_neighbors_bytes_(level_t level) const noexcept#
inline span_bytes_t node_malloc_(level_t level) noexcept#
inline node_t node_make_(vector_key_t key, level_t level) noexcept#
inline node_t node_make_copy_(span_bytes_t old_bytes) noexcept#
inline void node_free_(std::size_t idx) noexcept#
inline node_t node_at_(std::size_t idx) const noexcept#
inline neighbors_ref_t neighbors_base_(node_t node) const noexcept#
inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept#
inline neighbors_ref_t neighbors_(node_t node, level_t level) const noexcept#
inline node_lock_t node_lock_(std::size_t slot) const noexcept#
template<typename value_at, typename metric_at, typename prefetch_at> inline void connect_node_across_levels_ (value_at &&value, metric_at &&metric, prefetch_at &&prefetch, std::size_t node_slot, std::size_t entry_slot, level_t max_level, level_t target_level, index_update_config_t const &config, context_t &context) usearch_noexcept_m
template<typename metric_at> inline std::size_t connect_new_node_ (metric_at &&metric, std::size_t new_slot, level_t level, context_t &context) usearch_noexcept_m
template<typename value_at, typename metric_at> inline void reconnect_neighbor_nodes_ (metric_at &&metric, std::size_t new_slot, value_at &&value, level_t level, context_t &context) usearch_noexcept_m
inline level_t choose_random_level_(std::default_random_engine &level_generator) const noexcept#
template<typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
inline std::size_t search_for_one_(value_at &&query, metric_at &&metric, prefetch_at &&prefetch, std::size_t closest_slot, level_t begin_level, level_t end_level, context_t &context) const noexcept#
template<typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
inline bool search_to_insert_(value_at &&query, metric_at &&metric, prefetch_at &&prefetch, std::size_t start_slot, std::size_t new_slot, level_t level, std::size_t top_limit, context_t &context) noexcept#

Traverses a layer of a graph, to find the best place to insert a new node. Locks the nodes in the process, assuming other threads are updating neighbors lists.

Returns:

true if procedure succeeded, false if run out of memory.

template<typename value_at, typename metric_at, typename predicate_at, typename prefetch_at> inline bool search_to_find_in_base_ (value_at &&query, metric_at &&metric, predicate_at &&predicate, prefetch_at &&prefetch, std::size_t start_slot, std::size_t expansion, context_t &context) const usearch_noexcept_m

Traverses the base layer of a graph, to find a close match. Doesn’t lock any nodes, assuming read-only simultaneous access.

Returns:

true if procedure succeeded, false if run out of memory.

template<typename value_at, typename metric_at, typename predicate_at>
inline void search_exact_(value_at &&query, metric_at &&metric, predicate_at &&predicate, std::size_t count, context_t &context) const noexcept#

Iterates through all members, without actually touching the index.

template<typename metric_at>
inline candidates_view_t refine_(metric_at &&metric, std::size_t needed, top_candidates_t &top, context_t &context) const noexcept#

This algorithm from the original paper implements a heuristic, that massively reduces the number of connections a point has, to keep only the neighbors, that are from each other.

Private Members

index_config_t config_ = {}#
index_limits_t limits_ = {}#
mutable dynamic_allocator_t dynamic_allocator_ = {}#
tape_allocator_t tape_allocator_ = {}#
precomputed_constants_t pre_ = {}#
memory_mapped_file_t viewed_file_ = {}#
mutable usearch_align_m std::atomic< std::size_t > nodes_capacity_   = {}

Number of “slots” available for node_t objects. Equals to .

mutable usearch_align_m std::atomic< std::size_t > nodes_count_   = {}

Number of “slots” already storing non-null nodes.

std::mutex global_mutex_ = {}#

Controls access to max_level_ and entry_slot_. If any thread is updating those values, no other threads can add() or search().

level_t max_level_ = {}#

The level of the top-most graph in the index. Grows as the logarithm of size, starts from zero.

std::size_t entry_slot_ = {}#

The slot in which the only node of the top-level graph is stored.

buffer_gt<node_t, nodes_allocator_t> nodes_ = {}#

C-style array of node_t smart-pointers.

mutable nodes_mutexes_t nodes_mutexes_ = {}#

Mutex, that limits concurrent access to nodes_.

mutable buffer_gt<context_t, contexts_allocator_t> contexts_ = {}#

Array of thread-specific buffers for temporary data.

Private Static Functions

static inline constexpr std::size_t node_head_bytes_()#

How many bytes of memory are needed to form the “head” of the node.

static inline precomputed_constants_t precompute_(index_config_t const &config) noexcept#
struct add_result_t#
#include <index.hpp>

Public Functions

inline explicit operator bool() const noexcept#
inline add_result_t failed(error_t message) noexcept#

Public Members

error_t error = {}#
std::size_t new_size = {}#
std::size_t visited_members = {}#
std::size_t computed_distances = {}#
std::size_t slot = {}#
struct candidate_t#

A space-efficient internal data-structure used in graph traversal queues.

Public Functions

inline bool operator<(candidate_t other) const noexcept#

Public Members

distance_t distance#
compressed_slot_t slot#
class candidates_iterator_t#

Public Types

using element_t = compressed_slot_t#
using iterator_category = std::forward_iterator_tag#
using value_type = element_t#
using difference_type = std::ptrdiff_t#
using pointer = misaligned_ptr_gt<element_t>#
using reference = misaligned_ref_gt<element_t>#

Public Functions

inline reference operator*() const noexcept#
inline candidates_iterator_t(index_gt const &index, neighbors_ref_t neighbors, visits_hash_set_t &visits, std::size_t progress) noexcept#
inline candidates_iterator_t operator++(int) noexcept#
inline candidates_iterator_t &operator++() noexcept#
inline bool operator==(candidates_iterator_t const &other) noexcept#
inline bool operator!=(candidates_iterator_t const &other) noexcept#
inline vector_key_t key() const noexcept#
inline compressed_slot_t slot() const noexcept#

Private Functions

inline candidates_iterator_t &skip_missing() noexcept#

Private Members

index_gt const &index_#
neighbors_ref_t neighbors_#
visits_hash_set_t &visits_#
std::size_t current_#

Friends

friend struct candidates_range_t
inline friend std::size_t get_slot(candidates_iterator_t const &it) noexcept#
inline friend vector_key_t get_key(candidates_iterator_t const &it) noexcept#
struct candidates_range_t#

Public Functions

inline candidates_iterator_t begin() const noexcept#
inline candidates_iterator_t end() const noexcept#

Public Members

index_gt const &index#
neighbors_ref_t neighbors#
visits_hash_set_t &visits#
struct cluster_result_t#
#include <index.hpp>

Public Functions

inline explicit operator bool() const noexcept#
inline cluster_result_t failed(error_t message) noexcept#

Public Members

error_t error = {}#
std::size_t visited_members = {}#
std::size_t computed_distances = {}#
match_t cluster = {}#
struct context_t#

A package of all kinds of temporary data-structures, that the threads would reuse to process requests. Similar to having all of those as separate thread_local global variables.

Public Functions

template<typename value_at, typename metric_at, typename entry_at>
inline distance_t measure(value_at const &first, entry_at const &second, metric_at &&metric) noexcept#
template<typename metric_at, typename entry_at>
inline distance_t measure(entry_at const &first, entry_at const &second, metric_at &&metric) noexcept#

Public Members

top_candidates_t top_candidates = {}#
next_candidates_t next_candidates = {}#
visits_hash_set_t visits = {}#
std::default_random_engine level_generator = {}#
std::size_t iteration_cycles = {}#
std::size_t computed_distances_count = {}#
struct copy_result_t#
#include <index.hpp>

Public Functions

inline explicit operator bool() const noexcept#
inline copy_result_t failed(error_t message) noexcept#

Public Members

error_t error#
index_gt index#
struct match_t#
#include <index.hpp>

Describes a matched search result, augmenting member_cref_t contents with distance to the query object.

Public Functions

inline match_t() noexcept#
inline match_t(member_cref_t member, distance_t distance) noexcept#
inline match_t(match_t &&other) noexcept#
inline match_t(match_t const &other) noexcept#
inline match_t &operator=(match_t const &other) noexcept#
inline match_t &operator=(match_t &&other) noexcept#

Public Members

member_cref_t member#
distance_t distance#
template<typename ref_at, typename index_at>
class member_iterator_gt#
#include <index.hpp>

Public Types

using iterator_category = std::random_access_iterator_tag#
using value_type = ref_t#
using difference_type = std::ptrdiff_t#
using pointer = void#
using reference = ref_t#

Public Functions

inline reference operator*() const noexcept#
inline vector_key_t key() const noexcept#
inline member_iterator_gt operator++(int) noexcept#
inline member_iterator_gt operator--(int) noexcept#
inline member_iterator_gt operator+(difference_type d) noexcept#
inline member_iterator_gt operator-(difference_type d) noexcept#
inline member_iterator_gt &operator++() noexcept#
inline member_iterator_gt &operator--() noexcept#
inline member_iterator_gt &operator+=(difference_type d) noexcept#
inline member_iterator_gt &operator-=(difference_type d) noexcept#
inline bool operator==(member_iterator_gt const &other) const noexcept#
inline bool operator!=(member_iterator_gt const &other) const noexcept#

Private Types

using ref_t = ref_at#
using index_t = index_at#

Private Functions

inline member_iterator_gt() noexcept#
inline member_iterator_gt(index_t *index, std::size_t slot) noexcept#

Private Members

index_t *index_ = {}#
std::size_t slot_ = {}#

Friends

friend class index_gt
inline friend std::size_t get_slot(member_iterator_gt const &it) noexcept#
inline friend vector_key_t get_key(member_iterator_gt const &it) noexcept#
class neighbors_ref_t#

A slice of the node’s tape, containing a the list of neighbors for a node in a single graph level. It’s pre-allocated to fit as many neighbors “slots”, as may be needed at the target level, and starts with a single integer neighbors_count_t counter.

Public Functions

inline neighbors_ref_t(byte_t *tape) noexcept#
inline misaligned_ptr_gt<compressed_slot_t> begin() noexcept#
inline misaligned_ptr_gt<compressed_slot_t> end() noexcept#
inline misaligned_ptr_gt<compressed_slot_t const> begin() const noexcept#
inline misaligned_ptr_gt<compressed_slot_t const> end() const noexcept#
inline compressed_slot_t operator[](std::size_t i) const noexcept#
inline std::size_t size() const noexcept#
inline void clear() noexcept#
inline void push_back(compressed_slot_t slot) noexcept#

Private Members

byte_t *tape_#

Private Static Functions

static inline constexpr std::size_t shift(std::size_t i = 0)#
struct node_lock_t#

Public Functions

inline ~node_lock_t() noexcept#

Public Members

nodes_mutexes_t &mutexes#
std::size_t slot#
class node_t#

A loosely-structured handle for every node. One such node is created for every member. To minimize memory usage and maximize the number of entries per cache-line, it only stores to pointers. The internal tape starts with a vector_key_t key, then a level_t for the number of graph levels in which this member appears, then the { neighbors_count_t, compressed_slot_t, compressed_slot_t … } sequences for each-level.

Public Functions

inline explicit node_t(byte_t *tape) noexcept#
inline byte_t *tape() const noexcept#
inline byte_t *neighbors_tape() const noexcept#
inline explicit operator bool() const noexcept#
node_t() = default#
node_t(node_t const&) = default#
node_t &operator=(node_t const&) = default#
inline misaligned_ref_gt<vector_key_t const> ckey() const noexcept#
inline misaligned_ref_gt<vector_key_t> key() const noexcept#
inline misaligned_ref_gt<level_t> level() const noexcept#
inline void key(vector_key_t v) noexcept#
inline void level(level_t v) noexcept#

Private Members

byte_t *tape_ = {}#
struct precomputed_constants_t#

Public Members

double inverse_log_connectivity = {}#
std::size_t neighbors_bytes = {}#
std::size_t neighbors_base_bytes = {}#
class search_result_t#
#include <index.hpp>

Public Functions

inline search_result_t() noexcept#
inline search_result_t(search_result_t&&) = default#
inline search_result_t &operator=(search_result_t&&) = default#
inline explicit operator bool() const noexcept#
inline search_result_t failed(error_t message) noexcept#
inline operator std::size_t() const noexcept#
inline std::size_t size() const noexcept#
inline bool empty() const noexcept#
inline match_t operator[](std::size_t i) const noexcept#
inline match_t front() const noexcept#
inline match_t back() const noexcept#
inline bool contains(vector_key_t key) const noexcept#
inline match_t at(std::size_t i) const noexcept#
inline std::size_t merge_into(vector_key_t *keys, distance_t *distances, std::size_t old_count, std::size_t max_count) const noexcept#
inline std::size_t dump_to(vector_key_t *keys, distance_t *distances) const noexcept#
inline std::size_t dump_to(vector_key_t *keys) const noexcept#

Public Members

std::size_t count = {}#

Number of search results found.

std::size_t visited_members = {}#

Number of graph nodes traversed.

std::size_t computed_distances = {}#

Number of times the distances were computed.

error_t error = {}#

Private Functions

inline search_result_t(index_gt const &index, top_candidates_t &top) noexcept#

Private Members

node_t const *nodes_ = {}#
top_candidates_t const *top_ = {}#

Friends

friend class index_gt
struct stats_t#
#include <index.hpp>

Public Members

std::size_t nodes = {}#
std::size_t edges = {}#
std::size_t max_edges = {}#
std::size_t allocated_bytes = {}#
struct join_result_t#
#include <index.hpp>

Public Functions

inline explicit operator bool() const noexcept#
inline join_result_t failed(error_t message) noexcept#

Public Members

error_t error = {}#
std::size_t intersection_size = {}#
std::size_t engagements = {}#
std::size_t visited_members = {}#
std::size_t computed_distances = {}#

usearch/index_plugins.hpp#

Defines

__STDC_WANT_IEC_60559_TYPES_EXT__#
USEARCH_USE_OPENMP
USEARCH_USE_FP16LIB#
USEARCH_USE_SIMSIMD#

Typedefs

using u40_t = uint40_t#
using f16_native_t = void#
using f16_t = f16_bits_t#
using f64_t = double#
using f32_t = float#
using u64_t