API Reference#
usearch/index.hpp#
Single-header Vector Search engine.
- Author
Ash Vardanian
- Date
April 26, 2023
Defines
-
USEARCH_VERSION_MAJOR#
-
USEARCH_VERSION_MINOR#
-
USEARCH_VERSION_PATCH#
-
USEARCH_64BIT_ENV#
-
USEARCH_USE_OPENMP#
-
usearch_concat_helper_m(a, b)#
-
usearch_concat_m(a, b)#
-
usearch_stringify_helper_m(x)#
-
usearch_stringify_m(x)#
-
usearch_prefetch_m(ptr)#
-
usearch_profiled_m#
-
usearch_profile_name_m(name)#
-
usearch_pack_m#
-
usearch_align_m#
-
usearch_assert_m(must_be_true, message)#
-
usearch_noexcept_m#
Typedefs
-
using byte_t = char#
-
using bitset_t = bitset_gt<>#
-
template<typename metric_at, typename ...args_at>
using return_type_gt = typename std::result_of<metric_at(args_at...)>::type# C++17 and newer version deprecate the
std::result_of
-
using default_key_t = std::uint64_t#
-
using default_slot_t = std::uint32_t#
-
using default_distance_t = float#
Functions
-
static inline void __usearch_raise_runtime_error(char const *message)#
Helper function to simplify debugging - trace just one symbol -
__usearch_raise_runtime_error
. Assuming theextern C
block, the name won’t be mangled.
-
template<std::size_t multiple_ak>
std::size_t divide_round_up(std::size_t num) noexcept#
-
inline std::size_t divide_round_up(std::size_t num, std::size_t denominator) noexcept#
-
inline std::size_t ceil2(std::size_t v) noexcept#
-
template<typename at>
void misaligned_store(void *ptr, at v) noexcept# Simply dereferencing misaligned pointers can be dangerous.
-
template<typename at>
at misaligned_load(void *ptr) noexcept# Simply dereferencing misaligned pointers can be dangerous.
-
template<typename at, typename other_at = at>
at exchange(at &obj, other_at &&new_value)# The
std::exchange
alternative for C++11.
-
template<typename at, typename sfinae_at = at>
std::enable_if<std::is_pod<sfinae_at>::value>::type destroy_at(at*)# The
std::destroy_at
alternative for C++11.
-
template<typename at, typename sfinae_at = at>
std::enable_if<!std::is_pod<sfinae_at>::value>::type destroy_at(at *obj)#
-
template<typename at, typename sfinae_at = at>
std::enable_if<std::is_pod<sfinae_at>::value>::type construct_at(at*)# The
std::construct_at
alternative for C++11.
-
template<typename at, typename sfinae_at = at>
std::enable_if<!std::is_pod<sfinae_at>::value>::type construct_at(at *obj)#
-
template<typename element_at>
element_at default_free_value()#
-
constexpr std::size_t default_connectivity()#
Number of neighbors per graph node. Defaults to 32 in FAISS and 16 in hnswlib.
It is called
M
in the paper.
-
constexpr std::size_t default_expansion_add()#
Hyper-parameter controlling the quality of indexing. Defaults to 40 in FAISS and 200 in hnswlib.
It is called
efConstruction
in the paper.
-
constexpr std::size_t default_expansion_search()#
Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.
It is called
ef
in the paper.
-
constexpr std::size_t default_allocator_entry_bytes()#
-
template<typename object_at>
static constexpr bool is_dummy()# Checks if the provided object has a dummy type, emulating an interface, but performing no real computation.
-
template<typename at>
constexpr bool has_reset()# Checks if a certain class has a member function called
reset
.
-
template<typename men_at, typename women_at, typename men_values_at, typename women_values_at, typename men_metric_at, typename women_metric_at, typename man_to_woman_at = dummy_key_to_key_mapping_t, typename woman_to_man_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
static join_result_t join(men_at const &men, women_at const &women, men_values_at const &men_values, women_values_at const &women_values, men_metric_at &&men_metric, women_metric_at &&women_metric, index_join_config_t config = {}, man_to_woman_at &&man_to_woman = man_to_woman_at{}, woman_to_man_at &&woman_to_man = woman_to_man_at{}, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}) noexcept# Adapts the Male-Optimal Stable Marriage algorithm for unequal sets to perform fast one-to-one matching between two large collections of vectors, using approximate nearest neighbors search.
- Parameters:
man_to_woman – [inout] Container to map ::men keys to ::women.
woman_to_man – [inout] Container to map ::women keys to ::men.
executor – [in] Thread-pool to execute the job in parallel.
progress – [in] Callback to report the execution progress.
-
namespace unum#
-
namespace usearch#
Typedefs
-
using byte_t = char#
-
template<typename metric_at, typename ...args_at>
using return_type_gt = typename std::result_of<metric_at(args_at...)>::type# C++17 and newer version deprecate the
std::result_of
-
using default_key_t = std::uint64_t#
-
using default_slot_t = std::uint32_t#
-
using default_distance_t = float#
Functions
-
template<std::size_t multiple_ak>
std::size_t divide_round_up(std::size_t num) noexcept#
-
inline std::size_t divide_round_up(std::size_t num, std::size_t denominator) noexcept#
-
inline std::size_t ceil2(std::size_t v) noexcept#
-
template<typename at>
void misaligned_store(void *ptr, at v) noexcept# Simply dereferencing misaligned pointers can be dangerous.
-
template<typename at>
at misaligned_load(void *ptr) noexcept# Simply dereferencing misaligned pointers can be dangerous.
-
template<typename at, typename other_at = at>
at exchange(at &obj, other_at &&new_value)# The
std::exchange
alternative for C++11.
-
template<typename at, typename sfinae_at = at>
std::enable_if<std::is_pod<sfinae_at>::value>::type destroy_at(at*)# The
std::destroy_at
alternative for C++11.
-
template<typename at, typename sfinae_at = at>
std::enable_if<!std::is_pod<sfinae_at>::value>::type destroy_at(at *obj)#
-
template<typename at, typename sfinae_at = at>
std::enable_if<std::is_pod<sfinae_at>::value>::type construct_at(at*)# The
std::construct_at
alternative for C++11.
-
template<typename at, typename sfinae_at = at>
std::enable_if<!std::is_pod<sfinae_at>::value>::type construct_at(at *obj)#
-
template<typename element_at>
element_at default_free_value()#
-
constexpr std::size_t default_connectivity()#
Number of neighbors per graph node. Defaults to 32 in FAISS and 16 in hnswlib.
It is called
M
in the paper.
-
constexpr std::size_t default_expansion_add()#
Hyper-parameter controlling the quality of indexing. Defaults to 40 in FAISS and 200 in hnswlib.
It is called
efConstruction
in the paper.
-
constexpr std::size_t default_expansion_search()#
Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.
It is called
ef
in the paper.
-
constexpr std::size_t default_allocator_entry_bytes()#
-
template<typename object_at>
static constexpr bool is_dummy()# Checks if the provided object has a dummy type, emulating an interface, but performing no real computation.
-
template<typename at>
constexpr bool has_reset()# Checks if a certain class has a member function called
reset
.
-
template<typename key_at>
inline std::size_t get_slot(member_cref_gt<key_at> const &m) noexcept#
-
template<typename key_at>
inline key_at get_key(member_cref_gt<key_at> const &m) noexcept#
-
template<typename key_at>
inline std::size_t get_slot(member_ref_gt<key_at> const &m) noexcept#
-
template<typename key_at>
inline key_at get_key(member_ref_gt<key_at> const &m) noexcept#
-
template<typename men_at, typename women_at, typename men_values_at, typename women_values_at, typename men_metric_at, typename women_metric_at, typename man_to_woman_at = dummy_key_to_key_mapping_t, typename woman_to_man_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
static join_result_t join(men_at const &men, women_at const &women, men_values_at const &men_values, women_values_at const &women_values, men_metric_at &&men_metric, women_metric_at &&women_metric, index_join_config_t config = {}, man_to_woman_at &&man_to_woman = man_to_woman_at{}, woman_to_man_at &&woman_to_man = woman_to_man_at{}, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}) noexcept# Adapts the Male-Optimal Stable Marriage algorithm for unequal sets to perform fast one-to-one matching between two large collections of vectors, using approximate nearest neighbors search.
- Parameters:
man_to_woman – [inout] Container to map ::men keys to ::women.
woman_to_man – [inout] Container to map ::women keys to ::men.
executor – [in] Thread-pool to execute the job in parallel.
progress – [in] Callback to report the execution progress.
-
template<typename at>
class misaligned_ref_gt# - #include <index.hpp>
A reference to a misaligned memory location with a specific type. It is needed to avoid Undefined Behavior when dereferencing addresses indivisible by
sizeof(at)
.Public Functions
-
inline misaligned_ref_gt &operator=(mutable_t const &v) noexcept#
-
inline misaligned_ref_gt &operator=(mutable_t const &v) noexcept#
-
template<typename at>
class misaligned_ptr_gt# - #include <index.hpp>
A pointer to a misaligned memory location with a specific type. It is needed to avoid Undefined Behavior when dereferencing addresses indivisible by
sizeof(at)
.Public Types
-
using iterator_category = std::random_access_iterator_tag#
-
using difference_type = std::ptrdiff_t#
-
using pointer = misaligned_ptr_gt<element_t>#
-
using reference = misaligned_ref_gt<element_t>#
Public Functions
-
inline value_type operator[](std::size_t i) const noexcept#
-
inline misaligned_ptr_gt &operator++() noexcept#
-
inline misaligned_ptr_gt &operator--() noexcept#
-
inline misaligned_ptr_gt operator++(int) noexcept#
-
inline misaligned_ptr_gt operator--(int) noexcept#
-
inline misaligned_ptr_gt operator+(difference_type d) const noexcept#
-
inline misaligned_ptr_gt operator-(difference_type d) const noexcept#
-
inline difference_type operator-(const misaligned_ptr_gt &other) const noexcept#
-
inline misaligned_ptr_gt &operator+=(difference_type d) noexcept#
-
inline misaligned_ptr_gt &operator-=(difference_type d) noexcept#
-
inline bool operator==(misaligned_ptr_gt const &other) const noexcept#
-
inline bool operator!=(misaligned_ptr_gt const &other) const noexcept#
-
inline bool operator<(misaligned_ptr_gt const &other) const noexcept#
-
inline bool operator<=(misaligned_ptr_gt const &other) const noexcept#
-
inline bool operator>(misaligned_ptr_gt const &other) const noexcept#
-
inline bool operator>=(misaligned_ptr_gt const &other) const noexcept#
-
using iterator_category = std::random_access_iterator_tag#
-
template<typename scalar_at>
class span_gt# - #include <index.hpp>
Non-owning memory range view, similar to
std::span
, but for C++11.
-
template<typename scalar_at, typename allocator_at = std::allocator<scalar_at>>
class buffer_gt# - #include <index.hpp>
Similar to
std::vector
, but doesn’t support dynamic resizing. On the bright side, this can’t throw exceptions.
-
class error_t#
- #include <index.hpp>
A lightweight error class for handling error messages, which are expected to be allocated in static memory.
Public Functions
-
inline error_t() noexcept#
-
inline error_t(char const *message) noexcept#
-
inline explicit operator bool() const noexcept#
Checks if there was an error.
-
inline char const *what() const noexcept#
Returns the error message.
-
inline char const *release() noexcept#
Releases the error message, meaning the caller takes ownership.
-
inline ~error_t() noexcept#
Destructor terminates if an error was recorded.
-
inline void raise() noexcept#
Terminates if an error was recorded.
Private Members
-
char const *message_ = {}#
-
inline error_t() noexcept#
-
template<typename result_at>
struct expected_gt# - #include <index.hpp>
Similar to
std::expected
in C++23, wraps a statement evaluation result, or an error. It’s used to avoid raising exception, and gracefully propagate the error.- Template Parameters:
result_at – The type of the expected result.
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline expected_gt failed(error_t message) noexcept#
-
template<typename allocator_at = std::allocator<byte_t>>
class bitset_gt# - #include <index.hpp>
Light-weight bitset implementation to sync nodes updates during graph mutations. Extends basic functionality with atomic operations.
Public Functions
-
inline bitset_gt() noexcept#
-
inline ~bitset_gt() noexcept#
-
inline explicit operator bool() const noexcept#
-
inline void clear() noexcept#
-
inline void reset() noexcept#
-
inline bitset_gt(std::size_t capacity) noexcept#
-
inline bool test(std::size_t i) const noexcept#
-
inline bool set(std::size_t i) noexcept#
-
inline bool atomic_set(std::size_t i) noexcept#
-
inline void atomic_reset(std::size_t i) noexcept#
Private Types
-
using allocator_t = allocator_at#
-
using byte_t = typename allocator_t::value_type#
-
using compressed_slot_t = unsigned long#
Private Static Functions
-
static inline constexpr std::size_t bits_per_slot()#
-
static inline constexpr compressed_slot_t bits_mask()#
-
static inline constexpr std::size_t slots(std::size_t bits)#
-
inline bitset_gt() noexcept#
-
template<typename element_at, typename comparator_at = std::less<void>, typename allocator_at = std::allocator<element_at>>
class max_heap_gt# - #include <index.hpp>
Similar to
std::priority_queue
, but allows raw access to underlying memory, in case you want to shuffle it or sort. Good for collections from 100s to 10’000s elements.In a max-heap, the heap property ensures that the value of each node is greater than or equal to the values of its children. This means that the largest element is always at the root of the heap.
Structures#
There are several designs of heaps. Binary heaps are the simplest & most common variant, that is easy to implement as a succint array. However, they are not the most efficient for all operations. Most importantly, melding (merging) of two heaps has linear complexity in time.
+————–—+———+——–—+———+———–—+———+ | Operation | find-max| delete-max| insert | increase-key | meld | +————–—+———+——–—+———+———–—+———+ | Binary | Θ(1) | Θ(log n) | O(log n)| O(log n) | Θ(n) | | Leftist | Θ(1) | Θ(log n) | O(log n)| Θ(log n) | Θ(log n)| | Binomial | Θ(1) | Θ(log n) | Θ(1) | Θ(log n) | O(log n)| | Skew binomial | Θ(1) | Θ(log n) | Θ(1) | O(log n) | O(log n)| | Pairing | Θ(1) | O(log n) | Θ(1) | o(log n) | Θ(1) | | Rank-pairing | Θ(1) | O(log n) | Θ(1) | Θ(1) | Θ(1) | | Fibonacci | Θ(1) | O(log n) | Θ(1) | Θ(1) | Θ(1) | | Strict Fibonacci| Θ(1) | O(log n) | Θ(1) | Θ(1) | Θ(1) | | Brodal | Θ(1) | Θ(log n) | Θ(1) | Θ(1) | Θ(1) | | 2–3 heap | Θ(1) | O(log n) | Θ(1) | Θ(1) | O(log n)| +————–—+———+——–—+———+———–—+———+
It’s well known, that improved priority queue structures translate into better graph-transversal algorithms. For example, Dijkstra’s algorithm can be sped up by using a Fibonacci heap for arbitrary weights. For integer weight bounded by L, Schrijver reported following time complexities in 2004:
+————+———————————-—+————————-—+———————–—+ | Weights | Algorithm | Time complexity | Author | +————+———————————-—+————————-—+———————–—+ | R | | O(V^2 EL) | Ford 1956 | | R | Bellman–Ford algorithm | O(VE) | Shimbel 1955, Bellman | | | | | 1958, Moore 1959 | | R | | O(V^2 log V) | Dantzig 1960 | | R | Dijkstra’s with list | O(V^2) | Leyzorek et al. 1957, | | | | | Dijkstra 1959… | | R | Dijkstra’s with binary heap | O((E + V) log V) | Johnson 1977 | | R | Dijkstra’s with Fibonacci heap | O(E + V log V) | Fredman & Tarjan 1984, | | | | | Fredman & Tarjan 1987 | | R | Quantum Dijkstra | O(√VE log^2 V) | Dürr et al. 2006 | | R | Dial’s algorithm (Dijkstra’s using | O(E + LV) | Dial 1969 | | | a bucket queue with L buckets) | | | | N | | O(E log log L) | Johnson 1981, Karlsson & | | | | | Poblete 1983 | | N | Gabow’s algorithm | O(E log_E/V L) | Gabow 1983, Gabow 1985 | | N | | O(E + V √log L) | Ahuja et al. 1990 | | N | Thorup | O(E + V log log V) | Thorup 2004 | +————+———————————-—+————————-—+———————–—+
Possible improvements:
Randomized meldable heaps: https://en.wikipedia.org/wiki/Randomized_meldable_heap
D-ary heaps: https://en.wikipedia.org/wiki/D-ary_heap
Public Types
-
using element_t = element_at#
-
using comparator_t = comparator_at#
-
using allocator_t = allocator_at#
Public Functions
-
inline max_heap_gt() noexcept#
-
inline max_heap_gt(max_heap_gt &&other) noexcept#
-
inline max_heap_gt &operator=(max_heap_gt &&other) noexcept#
-
max_heap_gt(max_heap_gt const&) = delete#
-
max_heap_gt &operator=(max_heap_gt const&) = delete#
-
inline ~max_heap_gt() noexcept#
-
inline void reset() noexcept#
-
inline bool empty() const noexcept#
-
inline std::size_t size() const noexcept#
-
inline std::size_t capacity() const noexcept#
-
inline void clear() noexcept#
-
inline void shrink(std::size_t n) noexcept#
-
inline element_t const &top() const noexcept#
Selects the largest element in the heap.
- Returns:
Reference to the stored element.
-
inline void sort_ascending() noexcept#
Invalidates the “max-heap” property, transforming into ascending range.
- inline usearch_profiled_m bool reserve (std::size_t new_capacity) noexcept
Ensures the heap has enough capacity for the specified number of elements.
- Parameters:
new_capacity – The desired minimum capacity.
- Returns:
True if the capacity was successfully increased, false otherwise.
-
inline bool insert(element_t &&element) noexcept#
Inserts an element into the heap.
- Parameters:
element – The element to be inserted.
- Returns:
True if the element was successfully inserted, false otherwise.
- inline usearch_profiled_m void insert_reserved (element_t &&element) noexcept
Inserts an element into the heap without reserving additional space.
- Parameters:
element – The element to be inserted.
-
inline bool insert_many(element_t const *elements) noexcept#
Inserts multiple elements into the heap.
- Parameters:
elements – Pointer to the elements to be inserted.
- Returns:
True if the elements were successfully inserted, false otherwise.
- inline usearch_profiled_m element_t pop () noexcept
Private Functions
-
inline void shift_up(std::size_t i) noexcept#
Shifts an element up to maintain the heap property. This operation is called when a new element is added at the end of the heap. The element is moved up until the heap property is restored.
- Parameters:
i – Index of the element to be shifted up.
-
inline void shift_down(std::size_t i) noexcept#
Shifts an element down to maintain the heap property. This operation is called when the root element is removed and the last element is moved to the root. The element is moved down until the heap property is restored.
- Parameters:
i – Index of the element to be shifted down.
-
template<typename element_at, typename comparator_at = std::less<void>, typename allocator_at = std::allocator<element_at>>
class sorted_buffer_gt# - #include <index.hpp>
Similar to
std::priority_queue
, but allows raw access to underlying memory and always keeps the data sorted. Ideal for small collections under 128 elements.Public Types
-
using element_t = element_at#
-
using comparator_t = comparator_at#
-
using allocator_t = allocator_at#
Public Functions
-
inline sorted_buffer_gt() noexcept#
-
inline sorted_buffer_gt(sorted_buffer_gt &&other) noexcept#
-
inline sorted_buffer_gt &operator=(sorted_buffer_gt &&other) noexcept#
-
sorted_buffer_gt(sorted_buffer_gt const&) = delete#
-
sorted_buffer_gt &operator=(sorted_buffer_gt const&) = delete#
-
inline ~sorted_buffer_gt() noexcept#
-
inline void reset() noexcept#
-
inline bool empty() const noexcept#
-
inline std::size_t size() const noexcept#
-
inline std::size_t capacity() const noexcept#
-
inline void clear() noexcept#
-
inline bool reserve(std::size_t new_capacity) noexcept#
-
inline bool insert(element_t &&element, std::size_t limit) noexcept#
- Returns:
true
if the entry was added,false
if it wasn’t relevant enough.
-
inline void sort_ascending() noexcept#
-
inline void shrink(std::size_t n) noexcept#
-
using element_t = element_at#
-
class uint40_t#
- #include <index.hpp>
Five-byte integer type to address node clouds with over 4B entries.
40 bits is enough to address a Trillion entries potentially colocated on 1 machine. At roughly 5 bytes * 20 neighbors + 100 bytes per entry, this translates to 200 TB of data, which is similar to a single-server capacity of modern NVME arrays.
Public Functions
-
inline uint40_t() noexcept#
-
inline uint40_t(std::uint32_t n) noexcept#
-
inline uint40_t(std::uint64_t n) noexcept#
-
inline operator std::size_t() const noexcept#
Public Static Functions
Private Members
-
unsigned char octets[5]#
-
inline uint40_t() noexcept#
-
template<typename element_at>
struct default_free_value_gt# - #include <index.hpp>
Reflection-helper to get the default “unused” value for a given type. Needed to initialize hash-sets and bit-sets.
Public Static Functions
-
template<typename sfinae_element_at = element_at, typename std::enable_if<std::is_integral<sfinae_element_at>::value>::type* = nullptr>
static inline sfinae_element_at value() noexcept#
-
template<typename sfinae_element_at = element_at, typename std::enable_if<!std::is_integral<sfinae_element_at>::value>::type* = nullptr>
static inline sfinae_element_at value() noexcept#
-
template<typename sfinae_element_at = element_at, typename std::enable_if<std::is_integral<sfinae_element_at>::value>::type* = nullptr>
-
template<typename element_at>
struct hash_gt# - #include <index.hpp>
Adapter to allow definining arbitrary hash functions for keys and slots. It’s added, as overloading
std::hash
is not recommended by the standard.Public Functions
-
inline std::size_t operator()(element_at const &element) const noexcept#
-
inline std::size_t operator()(element_at const &element) const noexcept#
-
template<typename element_at, typename hasher_at = hash_gt<element_at>, typename allocator_at = std::allocator<byte_t>>
class growing_hash_set_gt# - #include <index.hpp>
Minimalistic hash-set implementation to track visited nodes during graph traversal. In our primary usecase, its a sparse alternative to a bit-set.
It doesn’t support deletion of separate objects, but supports
clear
-ing all at once. It expectsreserve
to be called ahead of all insertions, so no resizes are needed. It also assumes0xFF...FF
slots to be unused, to simplify the design. It uses linear probing, the number of slots is always a power of two, and it uses linear-probing in case of bucket collisions.Public Functions
-
inline growing_hash_set_gt() noexcept#
-
inline ~growing_hash_set_gt() noexcept#
-
inline explicit operator bool() const noexcept#
-
inline std::size_t size() const noexcept#
-
inline void clear() noexcept#
-
inline void reset() noexcept#
-
inline growing_hash_set_gt(std::size_t capacity) noexcept#
-
inline growing_hash_set_gt(growing_hash_set_gt &&other) noexcept#
-
inline growing_hash_set_gt &operator=(growing_hash_set_gt &&other) noexcept#
-
growing_hash_set_gt(growing_hash_set_gt const&) = delete#
-
growing_hash_set_gt &operator=(growing_hash_set_gt const&) = delete#
-
inline bool test(element_t const &elem) const noexcept#
Checks if the element is already in the hash-set.
- Returns:
true
if the element is already in the hash-set.
-
inline bool set(element_t const &elem) noexcept#
Inserts an element into the hash-set.
- Returns:
Similar to
bitset_gt
, returns the previous value.
-
inline bool reserve(std::size_t new_capacity) noexcept#
Extends the capacity of the hash-set.
- Returns:
true
if enough capacity is available,false
if memory allocation failed.
Private Types
-
using element_t = element_at#
-
using allocator_t = allocator_at#
-
using byte_t = typename allocator_t::value_type#
-
inline growing_hash_set_gt() noexcept#
-
template<typename element_at, typename allocator_at = std::allocator<element_at>>
class ring_gt# - #include <index.hpp>
Basic single-threaded ring class, used for all kinds of task queues.
Public Functions
-
inline explicit ring_gt(allocator_t const &alloc = allocator_t()) noexcept#
-
inline ~ring_gt() noexcept#
-
inline bool empty() const noexcept#
-
inline size_t capacity() const noexcept#
-
inline size_t size() const noexcept#
-
inline void clear() noexcept#
-
inline void reset() noexcept#
-
inline bool reserve(std::size_t n) noexcept#
- inline void push (element_t const &value) usearch_noexcept_m
-
inline explicit ring_gt(allocator_t const &alloc = allocator_t()) noexcept#
-
struct index_config_t#
- #include <index.hpp>
Configuration settings for the index construction. Includes the main
::connectivity
parameter (M
in the paper) and two expansion factors - for construction and search.Subclassed by unum::usearch::index_dense_config_t
Public Functions
-
inline index_config_t() = default#
-
inline index_config_t(std::size_t c, std::size_t cb = 0) noexcept#
-
inline error_t validate() noexcept#
Validates the configuration settings, updating them in-place.
- Returns:
Error message, if any.
-
inline bool is_valid() const noexcept#
Immutable function to check if the configuration is valid.
- Returns:
true
if the configuration is valid.
Public Members
-
std::size_t connectivity = default_connectivity()#
Number of neighbors per graph node. Defaults to 32 in FAISS and 16 in hnswlib.
It is called
M
in the paper.
-
std::size_t connectivity_base = default_connectivity() * 2#
Number of neighbors per graph node in base level graph. Defaults to double of the other levels, so 64 in FAISS and 32 in hnswlib.
It is called
M0
in the paper.
-
inline index_config_t() = default#
-
struct index_limits_t#
- #include <index.hpp>
Growth settings for the index container. Includes the upper bound for
::members
capacity, and the number of read/write threads expected to work with the index.Public Functions
-
inline index_limits_t(std::size_t n, std::size_t t) noexcept#
-
inline index_limits_t(std::size_t n = 0) noexcept#
-
inline std::size_t threads() const noexcept#
Returns the upper limit for the number of threads.
-
inline std::size_t concurrency() const noexcept#
Returns the concurrency-level of the index - the minimum of thread counts.
Public Members
-
std::size_t members = 0#
Maximum number of entries in the index.
-
std::size_t threads_add = std::thread::hardware_concurrency()#
Max number of threads simultaneously updating entries.
-
std::size_t threads_search = std::thread::hardware_concurrency()#
Max number of threads simultaneously searching entries.
-
inline index_limits_t(std::size_t n, std::size_t t) noexcept#
-
struct index_update_config_t#
- #include <index.hpp>
Public Members
-
std::size_t expansion = default_expansion_add()#
Hyper-parameter controlling the quality of indexing. Defaults to 40 in FAISS and 200 in hnswlib.
It is called
efConstruction
in the paper.
-
std::size_t thread = 0#
Optional thread identifier for multi-threaded construction.
-
std::size_t expansion = default_expansion_add()#
-
struct index_search_config_t#
- #include <index.hpp>
Public Members
-
std::size_t expansion = default_expansion_search()#
Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.
It is called
ef
in the paper.
-
std::size_t thread = 0#
Optional thread identifier for multi-threaded construction.
-
bool exact = false#
Brute-forces exhaustive search over all entries in the index.
-
std::size_t expansion = default_expansion_search()#
-
struct index_cluster_config_t#
- #include <index.hpp>
Public Members
-
std::size_t expansion = default_expansion_search()#
Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.
It is called
ef
in the paper.
-
std::size_t thread = 0#
Optional thread identifier for multi-threaded construction.
-
std::size_t expansion = default_expansion_search()#
-
struct index_copy_config_t#
- #include <index.hpp>
Subclassed by unum::usearch::index_dense_copy_config_t
-
struct index_join_config_t#
- #include <index.hpp>
Public Members
-
std::size_t max_proposals = 0#
Controls maximum number of proposals per man during stable marriage.
-
std::size_t expansion = default_expansion_search()#
Hyper-parameter controlling the quality of search. Defaults to 16 in FAISS and 10 in hnswlib.
It is called
ef
in the paper.
-
bool exact = false#
Brute-forces exhaustive search over all entries in the index.
-
std::size_t max_proposals = 0#
-
struct dummy_predicate_t#
- #include <index.hpp>
An example of what a USearch-compatible ad-hoc filter would look like.
A similar function object can be passed to search queries to further filter entries on their auxiliary properties, such as some categorical keys stored in an external DBMS.
-
struct dummy_callback_t#
- #include <index.hpp>
An example of what a USearch-compatible ad-hoc operation on in-flight entries.
This kind of callbacks is used when the engine is being updated and you want to patch the entries, while their are still under locks - limiting concurrent access and providing consistency.
-
struct dummy_progress_t#
- #include <index.hpp>
An example of what a USearch-compatible progress-bar should look like.
This is particularly helpful when handling long-running tasks, like serialization, saving, and loading from disk, or index-level joins. The reporter checks return value to continue or stop the process,
false
means need to stop.Public Functions
-
inline bool operator()(std::size_t, std::size_t) const noexcept#
-
inline bool operator()(std::size_t, std::size_t) const noexcept#
-
struct dummy_prefetch_t#
- #include <index.hpp>
An example of what a USearch-compatible values prefetching mechanism should look like.
USearch is designed to handle very large datasets, that may not fir into RAM. Fetching from external memory is very expensive, so we’ve added a pre-fetching mechanism, that accepts multiple objects at once, to cache in RAM ahead of the computation. The received iterators support both
get_slot
andget_key
operations. An example usage may look like this:template <typename member_citerator_like_at> inline void operator()(member_citerator_like_at, member_citerator_like_at) const noexcept { for (; begin != end; ++begin) io_uring_prefetch(offset_in_file(get_key(begin))); }
Public Functions
-
template<typename member_citerator_like_at>
inline void operator()(member_citerator_like_at, member_citerator_like_at) const noexcept#
-
template<typename member_citerator_like_at>
-
struct dummy_executor_t#
- #include <index.hpp>
An example of what a USearch-compatible executor (thread-pool) should look like.
It’s expected to have
parallel(callback)
API to schedule one task per thread; an identicalfixed(count, callback)
anddynamic(count, callback)
overloads that also accepts the number of tasks, and somehow schedules them between threads; as well assize()
to determine the number of available threads.Public Functions
-
inline dummy_executor_t() noexcept#
-
inline std::size_t size() const noexcept#
-
template<typename thread_aware_function_at>
inline void fixed(std::size_t tasks, thread_aware_function_at &&thread_aware_function) noexcept#
-
template<typename thread_aware_function_at>
inline void dynamic(std::size_t tasks, thread_aware_function_at &&thread_aware_function) noexcept#
-
template<typename thread_aware_function_at>
inline void parallel(thread_aware_function_at &&thread_aware_function) noexcept#
-
inline dummy_executor_t() noexcept#
-
struct dummy_key_to_key_mapping_t#
- #include <index.hpp>
An example of what a USearch-compatible key-to-key mapping should look like.
This is particularly helpful for “Semantic Joins”, where we map entries of one collection to entries of another. In asymmetric setups, where A -> B is needed, but B -> A is not, this can be passed to minimize memory usage.
Public Functions
-
template<typename key_at>
inline member_ref_t operator[](key_at&&) const noexcept#
-
struct member_ref_t#
- #include <index.hpp>
Public Functions
-
template<typename key_at>
inline member_ref_t &operator=(key_at&&) noexcept#
-
template<typename key_at>
-
template<typename key_at>
-
template<typename, typename at>
struct has_reset_gt# - #include <index.hpp>
-
template<typename check_at, typename return_at, typename ...args_at>
struct has_reset_gt<check_at, return_at(args_at...)># - #include <index.hpp>
Public Static Attributes
-
static constexpr bool value = type::value#
Private Members
- decltype(check< check_at >(0)) typedef type
-
static constexpr bool value = type::value#
-
struct serialization_result_t#
- #include <index.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline serialization_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
class output_file_t#
- #include <index.hpp>
Smart-pointer wrapping the LibC for binary file outputs.
This class raises no exceptions and corresponds errors through
serialization_result_t
. The class automatically closes the file when the object is destroyed.Public Functions
-
inline output_file_t(char const *path) noexcept#
-
inline ~output_file_t() noexcept#
-
inline output_file_t(output_file_t &&other) noexcept#
-
inline output_file_t &operator=(output_file_t &&other) noexcept#
-
inline serialization_result_t open_if_not() noexcept#
-
inline serialization_result_t write(void const *begin, std::size_t length) noexcept#
-
inline void close() noexcept#
-
inline output_file_t(char const *path) noexcept#
-
class input_file_t#
- #include <index.hpp>
Smart-pointer wrapping the LibC for binary files inputs.
This class raises no exceptions and corresponds errors through
serialization_result_t
. The class automatically closes the file when the object is destroyed.Public Functions
-
inline input_file_t(char const *path) noexcept#
-
inline ~input_file_t() noexcept#
-
inline input_file_t(input_file_t &&other) noexcept#
-
inline input_file_t &operator=(input_file_t &&other) noexcept#
-
inline serialization_result_t open_if_not() noexcept#
-
inline serialization_result_t read(void *begin, std::size_t length) noexcept#
-
inline void close() noexcept#
-
inline explicit operator bool() const noexcept#
-
inline bool seek_to(std::size_t progress) noexcept#
-
inline bool seek_to_end() noexcept#
-
inline bool infer_progress(std::size_t &progress) noexcept#
-
inline input_file_t(char const *path) noexcept#
-
class memory_mapped_file_t#
- #include <index.hpp>
Represents a memory-mapped file or a pre-allocated anonymous memory region.
This class provides a convenient way to memory-map a file and access its contents as a block of memory. The class handles platform-specific memory-mapping operations on Windows, Linux, and MacOS. The class automatically closes the file when the object is destroyed.
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline std::size_t size() const noexcept#
-
inline memory_mapped_file_t() noexcept#
-
inline memory_mapped_file_t(char const *path) noexcept#
-
inline ~memory_mapped_file_t() noexcept#
-
inline memory_mapped_file_t(memory_mapped_file_t &&other) noexcept#
-
inline memory_mapped_file_t &operator=(memory_mapped_file_t &&other) noexcept#
-
inline serialization_result_t open_if_not() noexcept#
-
inline void close() noexcept#
-
inline explicit operator bool() const noexcept#
-
struct index_serialized_header_t#
- #include <index.hpp>
Metadata header for the serialized index.
This structure is very minimalistic by design. It contains no information about the capacity of the index, so you’ll have to
reserve
after loading. It also contains no info on the metric or key types, so you’ll have to store that information elsewhere, like we do inindex_dense_head_t
.
-
template<typename key_at = default_key_t>
struct member_gt# - #include <index.hpp>
-
template<typename key_at = default_key_t>
struct member_cref_gt# - #include <index.hpp>
-
template<typename key_at = default_key_t>
struct member_ref_gt# - #include <index.hpp>
Public Functions
-
inline operator member_cref_gt<key_at>() const noexcept#
-
inline operator member_cref_gt<key_at>() const noexcept#
-
template<typename distance_at = default_distance_t, typename key_at = default_key_t, typename compressed_slot_at = default_slot_t, typename dynamic_allocator_at = std::allocator<byte_t>, typename tape_allocator_at = dynamic_allocator_at>
class index_gt# - #include <index.hpp>
Approximate Nearest Neighbors Search index-structure using the Hierarchical Navigable Small World (HNSW) graphs algorithm. If classical containers store Key->Value mappings, this one can be seen as a network of keys, accelerating approximate Value~>Key visited_members.
Unlike most implementations, this one is generic anc can be used for any search, not just within equi-dimensional vectors. Examples range from Texts to similar Chess positions, Geo-Spatial Search, and even Graphs.
Features#
- Thread-safe for concurrent construction, search, and updates. - Doesn't allocate new threads, and reuses the ones its called from. - Allows storing value externally, managing just the similarity index. - Joins.
Usage#
Exceptions#
None of the methods throw exceptions in the “Release” compilation mode. It may only
throw
if your memory ::dynamic_allocator_at or ::metric_at isn’t safe to copy.Serialization#
When serialized, doesn’t include any additional metadata. It is just the multi-level proximity-graph. You may want to store metadata about the used metric and key types somewhere else.
Details#
Like every HNSW implementation, USearch builds levels of “Proximity Graphs”. Every added vector forms a node in one or more levels of the graph. Every node is present in the base level. Every following level contains a smaller fraction of nodes. During search, the operation starts with the smaller levels and zooms-in on every following iteration of larger graph traversals.
Just one memory allocation is performed regardless of the number of levels. The adjacency lists across all levels are concatenated into that single buffer. That buffer starts with a “head”, that stores the metadata, such as the tallest “level” of the graph that it belongs to, the external “key”, and the number of “dimensions” in the vector.
, Predicates and Callbacks#
References and Iterators#
- `member_citerator_t` and `member_iterator_t` have only slots, no indirections. - `member_cref_t` and `member_ref_t` contains the `slot` and a reference to the key. So it passes through 1 level of visited_members in `nodes_`. Retrieving the key via `get_key` will cause fetching yet another cache line. - `member_gt` contains an already prefetched copy of the key.
- Template Parameters:
key_at – The type of primary objects stored in the index. The values, to which those map, are not managed by the same index structure.
compressed_slot_at – The smallest unsigned integer type to address indexed elements. It is used internally to maximize space-efficiency and is generally up-casted to in public interfaces. Can be a built-in ,
uint64_t
, or our custom . Which makes the most sense for 4B+ entry indexes.dynamic_allocator_at – Dynamic memory allocator for temporary buffers, visits indicators, and priority queues, needed during construction and traversals of graphs. The allocated buffers may be uninitialized.
tape_allocator_at – Potentially different memory allocator for primary allocations of nodes and vectors. It would never
deallocate
separate entries, and would only free all the space at once. The allocated buffers may be uninitialized.
Public Types
-
using distance_t = distance_at#
-
using key_t = vector_key_t#
-
using compressed_slot_t = compressed_slot_at#
-
using dynamic_allocator_t = dynamic_allocator_at#
-
using tape_allocator_t = tape_allocator_at#
-
using member_cref_t = member_cref_gt<vector_key_t>#
-
using member_ref_t = member_ref_gt<vector_key_t>#
-
using member_iterator_t = member_iterator_gt<member_ref_t, index_gt>#
-
using member_citerator_t = member_iterator_gt<member_cref_t, index_gt const>#
-
using value_type = vector_key_t#
-
using allocator_type = dynamic_allocator_t#
-
using size_type = std::size_t#
-
using difference_type = std::ptrdiff_t#
-
using reference = member_ref_t#
-
using const_reference = member_cref_t#
-
using pointer = void#
-
using const_pointer = void#
-
using iterator = member_iterator_t#
-
using const_iterator = member_citerator_t#
-
using reverse_iterator = std::reverse_iterator<member_iterator_t>#
-
using reverse_const_iterator = std::reverse_iterator<member_citerator_t>#
-
using dynamic_allocator_traits_t = std::allocator_traits<dynamic_allocator_t>#
-
using byte_t = typename dynamic_allocator_t::value_type#
-
using tape_allocator_traits_t = std::allocator_traits<tape_allocator_t>#
-
using copy_result_t = state_result_t#
Public Functions
-
inline std::size_t connectivity() const noexcept#
-
inline std::size_t capacity() const noexcept#
-
inline std::size_t size() const noexcept#
-
inline std::size_t max_level() const noexcept#
-
inline index_config_t const &config() const noexcept#
-
inline index_limits_t const &limits() const noexcept#
-
inline bool is_immutable() const noexcept#
-
inline explicit operator bool() const noexcept#
-
inline explicit index_gt(dynamic_allocator_t dynamic_allocator = {}, tape_allocator_t tape_allocator = {}) noexcept(false)#
Default index constructor, suitable only for stateless allocators.
Exceptions#
Doesn’t throw, unless the ::dynamic_allocator’s and ::tape_allocator’s throw on move-construction.
Warning
Consider
index_gt::make
instead, or explicitly convert tobool
to check if the index is valid.
-
inline explicit index_gt(index_config_t config, dynamic_allocator_t dynamic_allocator = {}, tape_allocator_t tape_allocator = {}) noexcept(false)#
Default index constructor, suitable only for stateless allocators.
Exceptions#
Doesn’t throw, unless the ::dynamic_allocator’s and ::tape_allocator’s throw on move-construction.
Warning
Consider
index_gt::make
instead, or explicitly convert tobool
to check if the index is valid.
-
inline index_gt fork() noexcept#
Clones the structure with the same hyper-parameters, but without contents.
-
inline ~index_gt() noexcept#
-
inline copy_result_t copy(index_copy_config_t config = {}) const noexcept#
The recommended way to copy the index, as unlike the copy-constructor, it can fail with an error message, without raising an exception.
- Parameters:
config – [in] The configuration specs for the copy-operation. Currently unused.
-
inline member_citerator_t cbegin() const noexcept#
-
inline member_citerator_t cend() const noexcept#
-
inline member_citerator_t begin() const noexcept#
-
inline member_citerator_t end() const noexcept#
-
inline member_iterator_t begin() noexcept#
-
inline member_iterator_t end() noexcept#
-
inline member_ref_t at(compressed_slot_t slot) noexcept#
-
inline member_cref_t at(compressed_slot_t slot) const noexcept#
-
inline member_iterator_t iterator_at(compressed_slot_t slot) noexcept#
-
inline member_citerator_t citerator_at(compressed_slot_t slot) const noexcept#
-
inline dynamic_allocator_t const &dynamic_allocator() const noexcept#
-
inline tape_allocator_t const &tape_allocator() const noexcept#
-
inline void clear() noexcept#
Erases all the vectors from the index.
Will change
size()
to zero, but will keep the samecapacity()
. Will keep the number of available threads/contexts the same as it was.
-
inline void reset() noexcept#
Erases all members from index, closing files, and returning RAM to OS.
Will change both
size()
andcapacity()
to zero. Will deallocate all threads/contexts. If the index is memory-mapped - releases the mapping and the descriptor.
-
inline void swap(index_gt &other) noexcept#
Swaps the underlying memory buffers and thread contexts.
- inline bool try_reserve (index_limits_t limits) usearch_noexcept_m
Increases the
capacity()
of the index to allow adding more vectors.- Returns:
true
on success,false
on memory allocation errors.
- inline bool reserve (index_limits_t limits) usearch_noexcept_m
Increases the
capacity()
of the index to allow adding more vectors.Warning
Unlike STL, won’t throw exceptions on memory allocations, so check the return value.
- Returns:
true
on success,false
on memory allocation errors.
- template<typename value_at, typename metric_at, typename callback_at = dummy_callback_t, typename prefetch_at = dummy_prefetch_t> inline add_result_t add (vector_key_t key, value_at &&value, metric_at &&metric, index_update_config_t config={}, callback_at &&callback=callback_at{}, prefetch_at &&prefetch=prefetch_at{}) usearch_noexcept_m
Inserts a new entry into the index. Thread-safe. Supports heterogeneous lookups. Expects needed capacity to be reserved ahead of time:
size() < capacity()
.- Template Parameters:
metric_at – A function responsible for computing the distance (dis-similarity) between two objects. It should be callable into distinctly different scenarios:
distance_t operator() (value_at, entry_at)
- from new object to existing entries.distance_t operator() (entry_at, entry_at)
- between existing entries. Where any possibleentry_at
has both two interfaces:std::size_t slot()
,vector_key_t key()
.
- Parameters:
key – [in] External identifier/name/descriptor for the new entry.
value – [in] Content that will be compared against other entries to index.
metric – [in] Callable object measuring distance between ::value and present objects.
config – [in] Configuration options for this specific operation.
callback – [in] On-success callback, executed while the
member_ref_t
is still under lock.
- template<typename value_at, typename metric_at, typename callback_at = dummy_callback_t, typename prefetch_at = dummy_prefetch_t> inline add_result_t update (member_iterator_t iterator, vector_key_t key, value_at &&value, metric_at &&metric, index_update_config_t config={}, callback_at &&callback=callback_at{}, prefetch_at &&prefetch=prefetch_at{}) usearch_noexcept_m
Update an existing entry. Thread-safe. Supports heterogeneous lookups.
! It’s assumed that different threads aren’t updating the same entry at the same time. ! The state won’t be corrupted, but no transactional guarantees are provided and the ! resulting value & neighbors list may be inconsistent.
- Template Parameters:
metric_at – A function responsible for computing the distance (dis-similarity) between two objects. It should be callable into distinctly different scenarios:
distance_t operator() (value_at, entry_at)
- from new object to existing entries.distance_t operator() (entry_at, entry_at)
- between existing entries. For any possibleentry_at
following interfaces will work:std::size_t get_slot(entry_at const &)
vector_key_t get_key(entry_at const &)
- Parameters:
iterator – [in] Iterator pointing to an existing entry to be replaced.
key – [in] External identifier/name/descriptor for the entry.
value – [in] Content that will be compared against other entries in the index.
metric – [in] Callable object measuring distance between ::value and present objects.
config – [in] Configuration options for this specific operation.
callback – [in] On-success callback, executed while the
member_ref_t
is still under lock.
- template<typename value_at, typename metric_at, typename predicate_at = dummy_predicate_t, typename prefetch_at = dummy_prefetch_t> inline search_result_t search (value_at &&query, std::size_t wanted, metric_at &&metric, index_search_config_t config={}, predicate_at &&predicate=predicate_at{}, prefetch_at &&prefetch=prefetch_at{}) const usearch_noexcept_m
Searches for the closest elements to the given ::query. Thread-safe.
- Parameters:
query – [in] Content that will be compared against other entries in the index.
wanted – [in] The upper bound for the number of results to return.
config – [in] Configuration options for this specific operation.
predicate – [in] Optional filtering predicate for
member_cref_t
.
- Returns:
Smart object referencing temporary memory. Valid until next
search()
,add()
, orcluster()
.
-
template<typename value_at, typename metric_at, typename predicate_at = dummy_predicate_t, typename prefetch_at = dummy_prefetch_t>
inline cluster_result_t cluster(value_at &&query, std::size_t level, metric_at &&metric, index_cluster_config_t config = {}, predicate_at &&predicate = predicate_at{}, prefetch_at &&prefetch = prefetch_at{}) const noexcept# Identifies the closest cluster to the given ::query. Thread-safe.
- Parameters:
query – [in] Content that will be compared against other entries in the index.
level – [in] The index level to target. Higher means lower resolution.
config – [in] Configuration options for this specific operation.
predicate – [in] Optional filtering predicate for
member_cref_t
.
- Returns:
Smart object referencing temporary memory. Valid until next
search()
,add()
, orcluster()
.
-
inline stats_t stats() const noexcept#
Aggregates stats on the number of nodes, edges, and memory usage across all levels.
-
inline stats_t stats(std::size_t level) const noexcept#
Aggregates stats on the number of nodes, edges, and memory usage up to a specific level.
The
level
parameter is zero-based, where0
is the base level. For example,level=1
will include the base level and the first level of connections.
-
inline stats_t stats(stats_t *stats_per_level, std::size_t max_level) const noexcept#
Aggregates stats on the number of nodes, edges, and memory usage up to a specific level, simultaneously exporting the stats for each level into the
stats_per_level
C-style array.The
max_level
parameter is zero-based, where0
is the base level. For example,max_level=1
will include the base level and the first level of connections.
-
inline std::size_t memory_usage(std::size_t allocator_entry_bytes = default_allocator_entry_bytes()) const noexcept#
A relatively accurate lower bound on the amount of memory consumed by the system. In practice it’s error will be below 10%.
See also
serialized_length
for the length of the binary serialized representation.
-
inline double inverse_log_connectivity() const#
-
inline std::size_t neighbors_base_bytes() const#
-
inline std::size_t neighbors_bytes() const#
-
inline std::size_t serialized_length() const noexcept#
Estimate the binary length (in bytes) of the serialized index.
-
template<typename output_callback_at, typename progress_at = dummy_progress_t>
inline serialization_result_t save_to_stream(output_callback_at &&output, progress_at &&progress = {}) const noexcept# Saves serialized binary index representation to a stream.
-
template<typename input_callback_at, typename progress_at = dummy_progress_t>
inline serialization_result_t load_from_stream(input_callback_at &&input, progress_at &&progress = {}) noexcept# Symmetric to
save_from_stream
, pulls data from a stream.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(char const *file_path, progress_at &&progress = {}) const noexcept#
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(char const *file_path, progress_at &&progress = {}) noexcept#
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(output_file_t file, progress_at &&progress = {}) const noexcept# Saves serialized binary index representation to a file, generally on disk.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(memory_mapped_file_t file, std::size_t offset = 0, progress_at &&progress = {}) const noexcept# Memory-maps the serialized binary index representation from disk, without copying data into RAM, and fetching it on-demand.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(input_file_t file, progress_at &&progress = {}) noexcept# Loads the serialized binary index representation from disk to RAM. Adjusts the configuration properties of the constructed index to match the settings in the file.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(memory_mapped_file_t file, std::size_t offset = 0, progress_at &&progress = {}) noexcept# Loads the serialized binary index representation from disk to RAM. Adjusts the configuration properties of the constructed index to match the settings in the file.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t view(memory_mapped_file_t file, std::size_t offset = 0, progress_at &&progress = {}) noexcept# Memory-maps the serialized binary index representation from disk, without copying data into RAM, and fetching it on-demand.
-
template<typename values_at, typename metric_at, typename slot_transition_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t, typename prefetch_at = dummy_prefetch_t>
inline void compact(values_at &&values, metric_at &&metric, slot_transition_at &&slot_transition, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}, prefetch_at &&prefetch = prefetch_at{}) noexcept# Performs compaction on the whole HNSW index, purging some entries and links to them, while also generating a more efficient mapping, putting the more frequently used entries closer together.
- Parameters:
values – [in] A []-subscriptable object, providing access to the values.
metric – [in] Callable object measuring distance between any ::values and present objects.
slot_transition – [in] Callable object to inform changes in slot assignments.
executor – [in] Thread-pool to execute the job in parallel.
progress – [in] Callback to report the execution progress.
prefetch – [in] Callable object to prefetch data into the cache.
-
template<typename allow_member_at = dummy_predicate_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline void isolate(allow_member_at &&allow_member, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}) noexcept# Scans the whole collection, removing the links leading towards banned entries. This essentially isolates some nodes from the rest of the graph, while keeping their outgoing links, in case the node is structurally relevant and has a crucial role in the index. It won’t reclaim the memory.
- Parameters:
allow_member – [in] Predicate to mark nodes for isolation.
executor – [in] Thread-pool to execute the job in parallel.
progress – [in] Callback to report the execution progress.
Public Static Functions
-
static inline state_result_t make(index_config_t config = {}, dynamic_allocator_t dynamic_allocator = {}, tape_allocator_t tape_allocator = {}) noexcept#
The recommended way to initialize the index, as unlike the constructor, it can fail with an error message, without raising an exception.
- Parameters:
config – [in] The configuration specs of the index.
dynamic_allocator – [in] The allocator for temporary buffers and thread contexts, like priority queues.
tape_allocator – [in] The allocator for the primary allocations of nodes and vectors.
Private Types
-
using neighbors_count_t = std::uint32_t#
Integer for the number of node neighbors at a specific level of the multi-level graph. It’s selected to be
std::uint32_t
to improve the alignment in most common cases.
-
using level_t = std::int16_t#
-
using nodes_mutexes_t = bitset_gt<dynamic_allocator_t>#
-
using visits_hash_set_t = growing_hash_set_gt<compressed_slot_t, hash_gt<compressed_slot_t>, dynamic_allocator_t>#
-
using candidates_view_t = span_gt<candidate_t const>#
-
using candidates_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<candidate_t>#
-
using top_candidates_t = sorted_buffer_gt<candidate_t, std::less<candidate_t>, candidates_allocator_t>#
-
using next_candidates_t = max_heap_gt<candidate_t, std::less<candidate_t>, candidates_allocator_t>#
-
using nodes_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<node_t>#
-
using contexts_allocator_t = typename dynamic_allocator_traits_t::template rebind_alloc<context_t>#
Private Functions
-
inline span_bytes_t node_bytes_(node_t node) const noexcept#
-
inline span_bytes_t node_malloc_(level_t level) noexcept#
-
inline node_t node_make_(vector_key_t key, level_t level) noexcept#
-
inline node_t node_make_copy_(span_bytes_t old_bytes) noexcept#
-
inline void node_free_(std::size_t idx) noexcept#
-
inline neighbors_ref_t neighbors_base_(node_t node) const noexcept#
-
inline neighbors_ref_t neighbors_non_base_(node_t node, level_t level) const noexcept#
-
inline neighbors_ref_t neighbors_(node_t node, level_t level) const noexcept#
-
inline node_lock_t node_lock_(std::size_t slot) const noexcept#
-
inline node_conditional_lock_t node_try_conditional_lock_(std::size_t slot, bool condition, bool &failed_to_acquire) const noexcept#
- template<typename metric_at, bool require_non_empty_ak = false> inline candidates_view_t form_links_to_closest_ (metric_at &&metric, std::size_t new_slot, level_t level, context_t &context) usearch_noexcept_m
- template<typename value_at, typename metric_at> inline void form_reverse_links_ (metric_at &&metric, compressed_slot_t new_slot, candidates_view_t new_neighbors, value_at &&value, level_t level, context_t &context) usearch_noexcept_m
-
template<typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
inline compressed_slot_t search_for_one_(value_at &&query, metric_at &&metric, prefetch_at &&prefetch, compressed_slot_t closest_slot, level_t begin_level, level_t end_level, context_t &context) const noexcept#
-
template<typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
inline bool search_to_insert_(value_at &&query, metric_at &&metric, prefetch_at &&prefetch, compressed_slot_t start_slot, level_t level, std::size_t top_limit, context_t &context) noexcept# Traverses a layer of a graph, to find the best place to insert a new node. Locks the nodes in the process, assuming other threads are updating neighbors lists.
- Returns:
true
if procedure succeeded,false
if run out of memory.
-
template<typename value_at, typename metric_at, typename prefetch_at = dummy_prefetch_t>
inline bool search_to_update_(value_at &&query, metric_at &&metric, prefetch_at &&prefetch, compressed_slot_t start_slot, compressed_slot_t updated_slot, level_t level, std::size_t top_limit, context_t &context) noexcept# Traverses a layer of a graph, to find the best neighbors list for updated node. Locks the nodes in the process, assuming other threads are updating neighbors lists.
- Returns:
true
if procedure succeeded,false
if run out of memory.
- template<typename value_at, typename metric_at, typename predicate_at, typename prefetch_at> inline bool search_to_find_in_base_ (value_at &&query, metric_at &&metric, predicate_at &&predicate, prefetch_at &&prefetch, compressed_slot_t start_slot, std::size_t expansion, context_t &context) const usearch_noexcept_m
Traverses the base layer of a graph, to find a close match. Doesn’t lock any nodes, assuming read-only simultaneous access.
- Returns:
true
if procedure succeeded,false
if run out of memory.
-
template<typename value_at, typename metric_at, typename predicate_at>
inline void search_exact_(value_at &&query, metric_at &&metric, predicate_at &&predicate, std::size_t count, context_t &context) const noexcept# Iterates through all members, without actually touching the index.
-
template<typename metric_at>
inline candidates_view_t refine_(metric_at &&metric, std::size_t needed, top_candidates_t &top, context_t &context, std::size_t &refines_counter) const noexcept# This algorithm from the original paper implements a heuristic, that massively reduces the number of connections a point has, to keep only the neighbors, that are from each other.
Private Members
-
mutable std::atomic<std::size_t> nodes_capacity_ = {}#
Number of “slots” available for
node_t
objects. Equals to .
-
mutable std::atomic<std::size_t> nodes_count_ = {}#
Number of “slots” already storing non-null nodes.
-
index_config_t config_ = {}#
-
index_limits_t limits_ = {}#
-
mutable dynamic_allocator_t dynamic_allocator_ = {}#
-
tape_allocator_t tape_allocator_ = {}#
-
precomputed_constants_t pre_ = {}#
-
memory_mapped_file_t viewed_file_ = {}#
-
std::mutex global_mutex_ = {}#
Controls access to
max_level_
andentry_slot_
. If any thread is updating those values, no other threads canadd()
orsearch()
.
-
level_t max_level_ = {}#
The level of the top-most graph in the index. Grows as the logarithm of size, starts from zero.
-
std::size_t entry_slot_ = {}#
The slot in which the only node of the top-level graph is stored.
-
buffer_gt<node_t, nodes_allocator_t> nodes_ = {}#
C-style array of
node_t
smart-pointers. Usecompressed_slot_t
for indexing.
-
mutable nodes_mutexes_t nodes_mutexes_ = {}#
Mutex, that limits concurrent access to
nodes_
.
-
mutable buffer_gt<context_t, contexts_allocator_t> contexts_ = {}#
Array of thread-specific buffers for temporary data.
Private Static Functions
-
static inline constexpr std::size_t node_head_bytes_()#
How many bytes of memory are needed to form the “head” of the node.
-
static inline precomputed_constants_t precompute_(index_config_t const &config) noexcept#
-
struct add_result_t#
- #include <index.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline add_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
struct candidate_t#
A space-efficient internal data-structure used in graph traversal queues.
Public Functions
-
inline bool operator<(candidate_t other) const noexcept#
-
inline bool operator<(candidate_t other) const noexcept#
-
class candidates_iterator_t#
Public Types
-
using element_t = compressed_slot_t#
-
using iterator_category = std::forward_iterator_tag#
-
using difference_type = std::ptrdiff_t#
-
using pointer = misaligned_ptr_gt<element_t>#
-
using reference = misaligned_ref_gt<element_t>#
Public Functions
-
inline value_type operator*() const noexcept#
-
inline candidates_iterator_t(index_gt const &index, neighbors_ref_t neighbors, visits_hash_set_t &visits, std::size_t progress) noexcept#
-
inline candidates_iterator_t operator++(int) noexcept#
-
inline candidates_iterator_t &operator++() noexcept#
-
inline bool operator==(candidates_iterator_t const &other) noexcept#
-
inline bool operator!=(candidates_iterator_t const &other) noexcept#
-
inline vector_key_t key() const noexcept#
-
inline compressed_slot_t slot() const noexcept#
Private Functions
-
inline candidates_iterator_t &skip_missing() noexcept#
Friends
- friend struct candidates_range_t
-
inline friend std::size_t get_slot(candidates_iterator_t const &it) noexcept#
-
inline friend vector_key_t get_key(candidates_iterator_t const &it) noexcept#
-
using element_t = compressed_slot_t#
-
struct candidates_range_t#
Public Functions
-
inline candidates_iterator_t begin() const noexcept#
-
inline candidates_iterator_t end() const noexcept#
-
inline candidates_iterator_t begin() const noexcept#
-
struct cluster_result_t#
- #include <index.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline cluster_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
struct context_t#
A package of all kinds of temporary data-structures, that the threads would reuse to process requests. Similar to having all of those as separate
thread_local
global variables.Public Functions
-
template<typename value_at, typename metric_at, typename entry_at>
inline distance_t measure(value_at const &first, entry_at const &second, metric_at &&metric) noexcept# Heterogeneous distance calculation.
-
template<typename metric_at, typename entry_at>
inline distance_t measure(entry_at const &first, entry_at const &second, metric_at &&metric) noexcept# Homogeneous distance calculation.
-
template<typename value_at, typename metric_at, typename entries_at, typename candidate_allowed_at, typename transform_at, typename callback_at>
inline void measure_batch(value_at const &first, entries_at const &second_entries, metric_at &&metric, candidate_allowed_at &&candidate_allowed, transform_at &&transform, callback_at &&callback) noexcept# Heterogeneous batch distance calculation.
Public Members
-
top_candidates_t top_candidates = {}#
-
next_candidates_t next_candidates = {}#
-
visits_hash_set_t visits = {}#
-
std::default_random_engine level_generator = {}#
-
std::size_t iteration_cycles = {}#
-
std::size_t computed_distances = {}#
-
std::size_t computed_distances_in_refines = {}#
-
std::size_t computed_distances_in_reverse_refines = {}#
-
template<typename value_at, typename metric_at, typename entry_at>
-
struct match_t#
- #include <index.hpp>
Describes a matched search result, augmenting
member_cref_t
contents withdistance
to the query object.Public Functions
-
inline match_t() noexcept#
-
inline match_t(member_cref_t member, distance_t distance) noexcept#
-
inline match_t() noexcept#
-
template<typename ref_at, typename index_at>
class member_iterator_gt# - #include <index.hpp>
Public Types
-
using iterator_category = std::random_access_iterator_tag#
-
using difference_type = std::ptrdiff_t#
-
using pointer = void#
Public Functions
-
inline vector_key_t key() const noexcept#
-
inline member_iterator_gt operator++(int) noexcept#
-
inline member_iterator_gt operator--(int) noexcept#
-
inline member_iterator_gt operator+(difference_type d) noexcept#
-
inline member_iterator_gt operator-(difference_type d) noexcept#
-
inline member_iterator_gt &operator++() noexcept#
-
inline member_iterator_gt &operator--() noexcept#
-
inline member_iterator_gt &operator+=(difference_type d) noexcept#
-
inline member_iterator_gt &operator-=(difference_type d) noexcept#
-
inline bool operator==(member_iterator_gt const &other) const noexcept#
-
inline bool operator!=(member_iterator_gt const &other) const noexcept#
Private Functions
-
inline member_iterator_gt() noexcept#
-
inline member_iterator_gt(index_t *index, compressed_slot_t slot) noexcept#
Friends
- friend class index_gt
-
inline friend compressed_slot_t get_slot(member_iterator_gt const &it) noexcept#
-
inline friend vector_key_t get_key(member_iterator_gt const &it) noexcept#
-
using iterator_category = std::random_access_iterator_tag#
-
class neighbors_ref_t#
A slice of the node’s tape, containing a the list of neighbors for a node in a single graph level. It’s pre-allocated to fit as many neighbors “slots”, as may be needed at the target level, and starts with a single integer
neighbors_count_t
counter.Public Types
-
using iterator = misaligned_ptr_gt<compressed_slot_t>#
-
using const_iterator = misaligned_ptr_gt<compressed_slot_t const>#
-
using value_type = compressed_slot_t#
Public Functions
-
inline misaligned_ptr_gt<compressed_slot_t> begin() noexcept#
-
inline misaligned_ptr_gt<compressed_slot_t> end() noexcept#
-
inline misaligned_ptr_gt<compressed_slot_t const> begin() const noexcept#
-
inline misaligned_ptr_gt<compressed_slot_t const> end() const noexcept#
-
inline misaligned_ptr_gt<compressed_slot_t const> cbegin() noexcept#
-
inline misaligned_ptr_gt<compressed_slot_t const> cend() noexcept#
-
inline compressed_slot_t operator[](std::size_t i) const noexcept#
-
inline std::size_t size() const noexcept#
-
inline void clear() noexcept#
-
inline void push_back(compressed_slot_t slot) noexcept#
Private Static Functions
-
static inline constexpr std::size_t shift(std::size_t i = 0) noexcept#
-
using iterator = misaligned_ptr_gt<compressed_slot_t>#
-
class node_t#
A loosely-structured handle for every node. One such node is created for every member. To minimize memory usage and maximize the number of entries per cache-line, it only stores to pointers. The internal tape starts with a
vector_key_t
key, then alevel_t
for the number of graph levels in which this member appears, then the {neighbors_count_t
,compressed_slot_t
,compressed_slot_t
… } sequences for each-level.Public Functions
-
inline explicit operator bool() const noexcept#
-
node_t() = default#
-
inline misaligned_ref_gt<vector_key_t const> ckey() const noexcept#
-
inline misaligned_ref_gt<vector_key_t const> ckey() noexcept#
-
inline misaligned_ref_gt<vector_key_t const> key() const noexcept#
-
inline misaligned_ref_gt<vector_key_t> key() noexcept#
-
inline misaligned_ref_gt<level_t> level() noexcept#
-
inline void key(vector_key_t v) noexcept#
-
inline explicit operator bool() const noexcept#
-
struct precomputed_constants_t#
-
class search_result_t#
- #include <index.hpp>
Subclassed by unum::usearch::index_dense_gt< key_at, compressed_slot_at >::search_result_t
Public Functions
-
inline search_result_t() noexcept#
-
inline search_result_t(search_result_t&&) = default#
-
inline search_result_t &operator=(search_result_t&&) = default#
-
inline explicit operator bool() const noexcept#
-
inline search_result_t failed(error_t message) noexcept#
-
inline operator std::size_t() const noexcept#
-
inline std::size_t size() const noexcept#
-
inline bool empty() const noexcept#
-
inline bool contains(vector_key_t key) const noexcept#
-
inline std::size_t merge_into(vector_key_t *keys, distance_t *distances, std::size_t old_count, std::size_t max_count) const noexcept#
Extracts the search results into a user-provided buffer, that unlike
dump_to
, may already contain some data, so the new and old results are merged together.- Parameters:
keys – [in] The buffer to store the keys of the search results.
distances – [in] The buffer to store the distances to the search results.
old_count – [in] The number of results already stored in the buffers.
max_count – [in] The maximum number of results that can be stored in the buffers.
- Returns:
The number of results stored in the buffer.
-
inline std::size_t dump_to(vector_key_t *keys, distance_t *distances) const noexcept#
Extracts the search results into a user-provided buffer.
- Parameters:
keys – [in] The buffer to store the keys of the search results.
distances – [in] The buffer to store the distances to the search results.
- Returns:
The number of results stored in the buffer.
-
inline std::size_t dump_to(vector_key_t *keys) const noexcept#
Extracts the search results into a user-provided buffer.
- Parameters:
keys – [in] The buffer to store the keys of the search results.
- Returns:
The number of results stored in the buffer.
Public Members
-
std::size_t count = {}#
Number of search results found.
-
std::size_t visited_members = {}#
Number of graph nodes traversed.
-
std::size_t computed_distances = {}#
Number of times the distances were computed.
Private Functions
-
inline search_result_t(index_gt const &index, top_candidates_t const *top) noexcept#
Friends
- friend class index_gt
-
inline search_result_t() noexcept#
-
struct state_result_t#
- #include <index.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline state_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
struct stats_t#
- #include <index.hpp>
-
struct join_result_t#
- #include <index.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline join_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
using byte_t = char#
-
namespace usearch#
usearch/index_plugins.hpp#
Defines
-
__STDC_WANT_IEC_60559_TYPES_EXT__#
-
USEARCH_USE_OPENMP
-
USEARCH_USE_FP16LIB#
-
USEARCH_USE_SIMSIMD#
Typedefs
-
using u40_t = uint40_t#
-
using f16_t = f16_bits_t#
-
using bf16_t = bf16_bits_t#
-
using f64_t = double#
-
using f32_t = float#
-
using u64_t = std::uint64_t#
-
using u32_t = std::uint32_t#
-
using u16_t = std::uint16_t#
-
using u8_t = std::uint8_t#
-
using i64_t = std::int64_t#
-
using i32_t = std::int32_t#
-
using i16_t = std::int16_t#
-
using i8_t = std::int8_t#
-
using executor_default_t = executor_stl_t#
-
using aligned_allocator_t = aligned_allocator_gt<>#
-
using memory_mapping_allocator_t = memory_mapping_allocator_gt<>#
-
using cast_punned_t = bool (*)(byte_t const*, std::size_t, byte_t*)#
Type-punned array casting function. Arguments: input buffer, bytes in input buffer, output buffer. Returns
true
if the casting was performed successfully,false
otherwise.
-
using distance_punned_t = float#
-
using exact_search_results_t = matrix_slice_gt<exact_offset_and_distance_t const>#
-
using kmeans_clustering_t = kmeans_clustering_gt<>#
Enums
-
enum b1x8_t#
Values:
-
enum class metric_kind_t : std::uint8_t#
Enumerates the most commonly used distance metrics, mostly for dense vector representations.
Values:
-
enumerator unknown_k#
-
enumerator ip_k#
-
enumerator cos_k#
-
enumerator l2sq_k#
-
enumerator pearson_k#
-
enumerator haversine_k#
-
enumerator divergence_k#
-
enumerator hamming_k#
-
enumerator tanimoto_k#
-
enumerator sorensen_k#
-
enumerator jaccard_k#
-
enumerator unknown_k#
-
enum class scalar_kind_t : std::uint8_t#
Enumerates the most commonly used scalar types, mostly for dense vector representations. Doesn’t include logical types, like complex numbers or quaternions.
Values:
-
enumerator unknown_k#
-
enumerator b1x8_k#
-
enumerator u40_k#
-
enumerator uuid_k#
-
enumerator bf16_k#
-
enumerator f64_k#
-
enumerator f32_k#
-
enumerator f16_k#
-
enumerator f8_k#
-
enumerator u64_k#
-
enumerator u32_k#
-
enumerator u16_k#
-
enumerator u8_k#
-
enumerator i64_k#
-
enumerator i32_k#
-
enumerator i16_k#
-
enumerator i8_k#
-
enumerator unknown_k#
-
enum class metric_punned_signature_t#
The signature of the user-defined function. Can be just two array pointers, precompiled for a specific array length, or include one or two array sizes as 64-bit unsigned integers.
Values:
-
enumerator array_array_k#
-
enumerator array_array_size_k#
-
enumerator array_array_state_k#
-
enumerator array_array_k#
Functions
-
template<typename scalar_at>
scalar_kind_t scalar_kind() noexcept# Maps a scalar type to its corresponding scalar_kind_t enumeration value.
-
template<typename at>
at angle_to_radians(at angle) noexcept# Converts an angle from degrees to radians.
-
template<typename at>
at square(at value) noexcept# Readability helper to compute the square of a given value.
-
template<typename at, typename compare_at>
inline at clamp(at v, at lo, at hi, compare_at comp) noexcept# Clamps a value between a lower and upper bound using a custom comparator. Similar to
std::clamp
. https://en.cppreference.com/w/cpp/algorithm/clamp.
-
template<typename at>
inline at clamp(at v, at lo, at hi) noexcept# Clamps a value between a lower and upper bound. Similar to
std::clamp
. https://en.cppreference.com/w/cpp/algorithm/clamp.
-
inline bool str_equals(char const *first_begin, std::size_t first_len, char const *second_begin) noexcept#
Compares two strings for equality, given a length for the first string.
-
inline std::size_t bits_per_scalar(scalar_kind_t scalar_kind) noexcept#
Returns the number of bits required to represent a scalar type.
-
inline std::size_t bits_per_scalar_word(scalar_kind_t scalar_kind) noexcept#
Returns the number of bits in a scalar word for a given scalar type. Equivalent to
bits_per_scalar
for types that are not bit-packed.
-
inline char const *scalar_kind_name(scalar_kind_t scalar_kind) noexcept#
Returns the string name of a given scalar type.
-
inline char const *metric_kind_name(metric_kind_t metric) noexcept#
Returns the string name of a given distance metric.
-
inline expected_gt<scalar_kind_t> scalar_kind_from_name(char const *name, std::size_t len)#
Parses a string to identify the corresponding
scalar_kind_t
enumeration value.
-
inline expected_gt<scalar_kind_t> scalar_kind_from_name(char const *name)#
Parses a string to identify the corresponding
scalar_kind_t
enumeration value.
-
inline expected_gt<metric_kind_t> metric_from_name(char const *name, std::size_t len)#
Parses a string to identify the corresponding
metric_kind_t
enumeration value.
-
inline expected_gt<metric_kind_t> metric_from_name(char const *name)#
Parses a string to identify the corresponding
metric_kind_t
enumeration value.
-
inline float f16_to_f32(std::uint16_t u16) noexcept#
Convenience function to upcast a half-precision floating point number to a single-precision one.
-
inline std::uint16_t f32_to_f16(float f32) noexcept#
Convenience function to downcast a single-precision floating point number to a half-precision one.
-
inline float bf16_to_f32(std::uint16_t u16) noexcept#
Convenience function to upcast a brain-floating point number to a single-precision one. https://github.com/ashvardanian/SimSIMD/blob/ff51434d90c66f916e94ff05b24530b127aa4cff/include/simsimd/types.h#L395-L410.
-
inline std::uint16_t f32_to_bf16(float f32) noexcept#
Convenience function to downcast a single-precision floating point number to a brain-floating point one. https://github.com/ashvardanian/SimSIMD/blob/ff51434d90c66f916e94ff05b24530b127aa4cff/include/simsimd/types.h#L412-L425.
-
namespace unum
-
namespace usearch
Typedefs
-
using f16_t = f16_bits_t#
-
using bf16_t = bf16_bits_t#
-
using f64_t = double#
-
using f32_t = float#
-
using u64_t = std::uint64_t#
-
using u32_t = std::uint32_t#
-
using u16_t = std::uint16_t#
-
using u8_t = std::uint8_t#
-
using i64_t = std::int64_t#
-
using i32_t = std::int32_t#
-
using i16_t = std::int16_t#
-
using i8_t = std::int8_t#
-
using executor_default_t = executor_stl_t#
-
using aligned_allocator_t = aligned_allocator_gt<>#
-
using memory_mapping_allocator_t = memory_mapping_allocator_gt<>#
-
using cast_punned_t = bool (*)(byte_t const*, std::size_t, byte_t*)#
Type-punned array casting function. Arguments: input buffer, bytes in input buffer, output buffer. Returns
true
if the casting was performed successfully,false
otherwise.
-
using distance_punned_t = float#
-
using exact_search_results_t = matrix_slice_gt<exact_offset_and_distance_t const>#
-
using kmeans_clustering_t = kmeans_clustering_gt<>#
Enums
-
enum b1x8_t#
Values:
-
enum class metric_kind_t : std::uint8_t#
Enumerates the most commonly used distance metrics, mostly for dense vector representations.
Values:
-
enumerator unknown_k#
-
enumerator ip_k#
-
enumerator cos_k#
-
enumerator l2sq_k#
-
enumerator pearson_k#
-
enumerator haversine_k#
-
enumerator divergence_k#
-
enumerator hamming_k#
-
enumerator tanimoto_k#
-
enumerator sorensen_k#
-
enumerator jaccard_k#
-
enumerator unknown_k#
-
enum class scalar_kind_t : std::uint8_t#
Enumerates the most commonly used scalar types, mostly for dense vector representations. Doesn’t include logical types, like complex numbers or quaternions.
Values:
-
enumerator unknown_k#
-
enumerator b1x8_k#
-
enumerator u40_k#
-
enumerator uuid_k#
-
enumerator bf16_k#
-
enumerator f64_k#
-
enumerator f32_k#
-
enumerator f16_k#
-
enumerator f8_k#
-
enumerator u64_k#
-
enumerator u32_k#
-
enumerator u16_k#
-
enumerator u8_k#
-
enumerator i64_k#
-
enumerator i32_k#
-
enumerator i16_k#
-
enumerator i8_k#
-
enumerator unknown_k#
-
enum class metric_punned_signature_t#
The signature of the user-defined function. Can be just two array pointers, precompiled for a specific array length, or include one or two array sizes as 64-bit unsigned integers.
Values:
-
enumerator array_array_k#
-
enumerator array_array_size_k#
-
enumerator array_array_state_k#
-
enumerator array_array_k#
Functions
-
template<typename scalar_at>
scalar_kind_t scalar_kind() noexcept# Maps a scalar type to its corresponding scalar_kind_t enumeration value.
-
template<typename at>
at angle_to_radians(at angle) noexcept# Converts an angle from degrees to radians.
-
template<typename at>
at square(at value) noexcept# Readability helper to compute the square of a given value.
-
template<typename at, typename compare_at>
inline at clamp(at v, at lo, at hi, compare_at comp) noexcept# Clamps a value between a lower and upper bound using a custom comparator. Similar to
std::clamp
. https://en.cppreference.com/w/cpp/algorithm/clamp.
-
template<typename at>
inline at clamp(at v, at lo, at hi) noexcept# Clamps a value between a lower and upper bound. Similar to
std::clamp
. https://en.cppreference.com/w/cpp/algorithm/clamp.
-
inline bool str_equals(char const *first_begin, std::size_t first_len, char const *second_begin) noexcept#
Compares two strings for equality, given a length for the first string.
-
inline std::size_t bits_per_scalar(scalar_kind_t scalar_kind) noexcept#
Returns the number of bits required to represent a scalar type.
-
inline std::size_t bits_per_scalar_word(scalar_kind_t scalar_kind) noexcept#
Returns the number of bits in a scalar word for a given scalar type. Equivalent to
bits_per_scalar
for types that are not bit-packed.
-
inline char const *scalar_kind_name(scalar_kind_t scalar_kind) noexcept#
Returns the string name of a given scalar type.
-
inline char const *metric_kind_name(metric_kind_t metric) noexcept#
Returns the string name of a given distance metric.
-
inline expected_gt<scalar_kind_t> scalar_kind_from_name(char const *name, std::size_t len)#
Parses a string to identify the corresponding
scalar_kind_t
enumeration value.
-
inline expected_gt<scalar_kind_t> scalar_kind_from_name(char const *name)#
Parses a string to identify the corresponding
scalar_kind_t
enumeration value.
-
inline expected_gt<metric_kind_t> metric_from_name(char const *name, std::size_t len)#
Parses a string to identify the corresponding
metric_kind_t
enumeration value.
-
inline expected_gt<metric_kind_t> metric_from_name(char const *name)#
Parses a string to identify the corresponding
metric_kind_t
enumeration value.
-
inline float f16_to_f32(std::uint16_t u16) noexcept#
Convenience function to upcast a half-precision floating point number to a single-precision one.
-
inline std::uint16_t f32_to_f16(float f32) noexcept#
Convenience function to downcast a single-precision floating point number to a half-precision one.
-
inline float bf16_to_f32(std::uint16_t u16) noexcept#
Convenience function to upcast a brain-floating point number to a single-precision one. https://github.com/ashvardanian/SimSIMD/blob/ff51434d90c66f916e94ff05b24530b127aa4cff/include/simsimd/types.h#L395-L410.
-
inline std::uint16_t f32_to_bf16(float f32) noexcept#
Convenience function to downcast a single-precision floating point number to a brain-floating point one. https://github.com/ashvardanian/SimSIMD/blob/ff51434d90c66f916e94ff05b24530b127aa4cff/include/simsimd/types.h#L412-L425.
-
class f16_bits_t#
- #include <index_plugins.hpp>
Numeric type for the IEEE 754 half-precision floating point. If hardware support isn’t available, falls back to a hardware agnostic in-software implementation.
Public Functions
-
inline f16_bits_t() noexcept#
-
inline f16_bits_t(f16_bits_t&&) = default#
-
inline f16_bits_t &operator=(f16_bits_t&&) = default#
-
inline f16_bits_t(f16_bits_t const&) = default#
-
inline f16_bits_t &operator=(f16_bits_t const&) = default#
-
inline operator float() const noexcept#
-
inline explicit operator bool() const noexcept#
-
inline f16_bits_t(std::int8_t v) noexcept#
-
inline f16_bits_t(bool v) noexcept#
-
inline f16_bits_t(float v) noexcept#
-
inline f16_bits_t(double v) noexcept#
-
inline bool operator<(f16_bits_t const &other) const noexcept#
-
inline f16_bits_t operator+(f16_bits_t other) const noexcept#
-
inline f16_bits_t operator-(f16_bits_t other) const noexcept#
-
inline f16_bits_t operator*(f16_bits_t other) const noexcept#
-
inline f16_bits_t operator/(f16_bits_t other) const noexcept#
-
inline float operator+(float other) const noexcept#
-
inline float operator-(float other) const noexcept#
-
inline float operator*(float other) const noexcept#
-
inline float operator/(float other) const noexcept#
-
inline double operator+(double other) const noexcept#
-
inline double operator-(double other) const noexcept#
-
inline double operator*(double other) const noexcept#
-
inline double operator/(double other) const noexcept#
-
inline f16_bits_t &operator+=(float v) noexcept#
-
inline f16_bits_t &operator-=(float v) noexcept#
-
inline f16_bits_t &operator*=(float v) noexcept#
-
inline f16_bits_t &operator/=(float v) noexcept#
Private Members
-
std::uint16_t uint16_ = {}#
-
inline f16_bits_t() noexcept#
-
class bf16_bits_t#
- #include <index_plugins.hpp>
Numeric type for brain-floating point half-precision floating point. If hardware support isn’t available, falls back to a hardware agnostic in-software implementation.
Public Functions
-
inline bf16_bits_t() noexcept#
-
inline bf16_bits_t(bf16_bits_t&&) = default#
-
inline bf16_bits_t &operator=(bf16_bits_t&&) = default#
-
inline bf16_bits_t(bf16_bits_t const&) = default#
-
inline bf16_bits_t &operator=(bf16_bits_t const&) = default#
-
inline operator float() const noexcept#
-
inline explicit operator bool() const noexcept#
-
inline bf16_bits_t(std::int8_t v) noexcept#
-
inline bf16_bits_t(bool v) noexcept#
-
inline bf16_bits_t(float v) noexcept#
-
inline bf16_bits_t(double v) noexcept#
-
inline bool operator<(bf16_bits_t const &other) const noexcept#
-
inline bf16_bits_t operator+(bf16_bits_t other) const noexcept#
-
inline bf16_bits_t operator-(bf16_bits_t other) const noexcept#
-
inline bf16_bits_t operator*(bf16_bits_t other) const noexcept#
-
inline bf16_bits_t operator/(bf16_bits_t other) const noexcept#
-
inline float operator+(float other) const noexcept#
-
inline float operator-(float other) const noexcept#
-
inline float operator*(float other) const noexcept#
-
inline float operator/(float other) const noexcept#
-
inline double operator+(double other) const noexcept#
-
inline double operator-(double other) const noexcept#
-
inline double operator*(double other) const noexcept#
-
inline double operator/(double other) const noexcept#
-
inline bf16_bits_t &operator+=(float v) noexcept#
-
inline bf16_bits_t &operator-=(float v) noexcept#
-
inline bf16_bits_t &operator*=(float v) noexcept#
-
inline bf16_bits_t &operator/=(float v) noexcept#
Private Members
-
std::uint16_t uint16_ = {}#
-
inline bf16_bits_t() noexcept#
-
class executor_stl_t#
- #include <index_plugins.hpp>
An STL-based executor or a “thread-pool” for parallel execution. Isn’t efficient for small batches, as it recreates the threads on every call.
Public Functions
-
inline executor_stl_t(std::size_t threads_count = 0) noexcept#
- Parameters:
threads_count – The number of threads to be used for parallel execution.
-
inline std::size_t size() const noexcept#
- Returns:
Maximum number of threads available to the executor.
-
template<typename thread_aware_function_at>
inline void fixed(std::size_t tasks, thread_aware_function_at &&thread_aware_function) noexcept(false)# Executes a fixed number of tasks using the specified thread-aware function.
- Parameters:
tasks – The total number of tasks to be executed.
thread_aware_function – The thread-aware function to be called for each thread index and task index.
- Throws:
If – an exception occurs during execution of the thread-aware function.
-
template<typename thread_aware_function_at>
inline void dynamic(std::size_t tasks, thread_aware_function_at &&thread_aware_function) noexcept(false)# Executes limited number of tasks using the specified thread-aware function.
- Parameters:
tasks – The upper bound on the number of tasks.
thread_aware_function – The thread-aware function to be called for each thread index and task index.
- Throws:
If – an exception occurs during execution of the thread-aware function.
-
template<typename thread_aware_function_at>
inline void parallel(thread_aware_function_at &&thread_aware_function) noexcept(false)# Saturates every available thread with the given workload, until they finish.
- Parameters:
thread_aware_function – The thread-aware function to be called for each thread index.
- Throws:
If – an exception occurs during execution of the thread-aware function.
Private Members
-
std::size_t threads_count_ = {}#
-
inline executor_stl_t(std::size_t threads_count = 0) noexcept#
-
template<typename element_at = char, std::size_t alignment_ak = 64>
class aligned_allocator_gt# - #include <index_plugins.hpp>
Uses OS-specific APIs for aligned memory allocations. Available since C11, but only C++17, so we wrap the C version.
Public Types
-
using value_type = element_at#
-
using size_type = std::size_t#
-
using pointer = element_at*#
-
using const_pointer = element_at const*#
Public Functions
-
inline constexpr std::size_t alignment() const#
-
template<typename other_element_at>
struct rebind# - #include <index_plugins.hpp>
Public Types
-
using other = aligned_allocator_gt<other_element_at>#
-
using other = aligned_allocator_gt<other_element_at>#
-
using value_type = element_at#
-
class page_allocator_t#
- #include <index_plugins.hpp>
A simple RAM-page allocator that uses the OS-specific APIs for memory allocation. Shouldn’t be used frequently, as system calls are slow.
Public Functions
Public Static Functions
-
static inline constexpr std::size_t page_size()#
-
static inline constexpr std::size_t page_size()#
-
template<std::size_t alignment_ak = 1>
class memory_mapping_allocator_gt# - #include <index_plugins.hpp>
Memory-mapping allocator designed for “alloc many, free at once” usage patterns. Thread-safe, except constructors and destructors.
Using this memory allocator won’t affect your overall speed much, as that is not the bottleneck. However, it can drastically improve memory usage especially for huge indexes of small vectors.
Public Types
-
using size_type = std::size_t#
Public Functions
-
memory_mapping_allocator_gt() = default#
-
inline memory_mapping_allocator_gt(memory_mapping_allocator_gt &&other) noexcept#
-
inline memory_mapping_allocator_gt &operator=(memory_mapping_allocator_gt &&other) noexcept#
-
inline ~memory_mapping_allocator_gt() noexcept#
-
inline void reset() noexcept#
Discards all previously allocated memory buffers.
-
inline memory_mapping_allocator_gt(memory_mapping_allocator_gt const&) noexcept#
Copy constructor.
Note
This is a no-op copy constructor since the allocator is not copyable.
-
inline memory_mapping_allocator_gt &operator=(memory_mapping_allocator_gt const&) noexcept#
Copy assignment operator.
Note
This is a no-op copy assignment operator since the allocator is not copyable.
- Returns:
Reference to the allocator after the assignment.
-
inline byte_t *allocate(std::size_t count_bytes) noexcept#
Allocates an uninitialized block of memory of the specified size.
- Parameters:
count_bytes – The number of bytes to allocate.
- Returns:
A pointer to the allocated memory block, or
nullptr
if allocation fails.
-
inline std::size_t total_allocated() const noexcept#
Returns the amount of memory used by the allocator across all arenas.
- Returns:
The amount of space in bytes.
-
inline std::size_t total_wasted() const noexcept#
Returns the amount of wasted space due to alignment.
- Returns:
The amount of wasted space in bytes.
-
inline std::size_t total_reserved() const noexcept#
Returns the amount of remaining memory already reserved but not yet used.
- Returns:
The amount of reserved memory in bytes.
Private Members
-
std::mutex mutex_#
-
std::size_t last_capacity_ = min_capacity()#
-
std::size_t wasted_space_ = 0#
-
using size_type = std::size_t#
- #include <index_plugins.hpp>
C++11 userspace implementation of an oversimplified
std::shared_mutex
, that assumes rare interleaving of shared and unique locks. It’s not fair, but requires only a single 32-bit atomic integer to work.Public Functions
Try upgrades the current
lock_shared()
to a uniquelock()
state.
Escalates current lock potentially loosing control in the middle. It’s a shortcut for
try_escalate
-unlock_shared
-lock
trio.
Upgrades the current
lock_shared()
to a uniquelock()
state.
De-escalation of a previously escalated state.
Private Types
Any positive integer describes the number of concurrent readers
Values:
Private Members
- #include <index_plugins.hpp>
Public Functions
Private Members
-
template<typename from_scalar_at, typename to_scalar_at>
struct cast_gt# - #include <index_plugins.hpp>
Utility class used to cast arrays of one scalar type to another, avoiding unnecessary conversions.
-
template<>
struct cast_gt<f16_bits_t, f16_bits_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<bf16_bits_t, bf16_bits_t># - #include <index_plugins.hpp>
-
template<typename from_scalar_at>
struct cast_to_b1x8_gt# - #include <index_plugins.hpp>
-
template<typename to_scalar_at>
struct cast_from_b1x8_gt# - #include <index_plugins.hpp>
-
template<typename from_scalar_at>
struct cast_to_i8_gt# - #include <index_plugins.hpp>
-
template<typename to_scalar_at>
struct cast_from_i8_gt# - #include <index_plugins.hpp>
-
template<>
struct cast_gt<i8_t, f16_bits_t> : public unum::usearch::cast_from_i8_gt<f16_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<i8_t, bf16_bits_t> : public unum::usearch::cast_from_i8_gt<bf16_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<i8_t, f32_t> : public unum::usearch::cast_from_i8_gt<f32_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<i8_t, f64_t> : public unum::usearch::cast_from_i8_gt<f64_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<f16_bits_t, i8_t> : public unum::usearch::cast_to_i8_gt<f16_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<bf16_bits_t, i8_t> : public unum::usearch::cast_to_i8_gt<bf16_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<f32_t, i8_t> : public unum::usearch::cast_to_i8_gt<f32_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<f64_t, i8_t> : public unum::usearch::cast_to_i8_gt<f64_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<b1x8_t, f16_bits_t> : public unum::usearch::cast_from_b1x8_gt<f16_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<b1x8_t, bf16_bits_t> : public unum::usearch::cast_from_b1x8_gt<bf16_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<b1x8_t, f32_t> : public unum::usearch::cast_from_b1x8_gt<f32_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<b1x8_t, f64_t> : public unum::usearch::cast_from_b1x8_gt<f64_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<f16_bits_t, b1x8_t> : public unum::usearch::cast_to_b1x8_gt<f16_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<bf16_bits_t, b1x8_t> : public unum::usearch::cast_to_b1x8_gt<bf16_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<f32_t, b1x8_t> : public unum::usearch::cast_to_b1x8_gt<f32_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<f64_t, b1x8_t> : public unum::usearch::cast_to_b1x8_gt<f64_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<b1x8_t, i8_t> : public unum::usearch::cast_from_b1x8_gt<i8_t># - #include <index_plugins.hpp>
-
template<>
struct cast_gt<i8_t, b1x8_t> : public unum::usearch::cast_to_b1x8_gt<i8_t># - #include <index_plugins.hpp>
-
struct casts_punned_t#
- #include <index_plugins.hpp>
A collection of casting functions for typical vector types. Covers to/from conversions for boolean, integer, half-precision, single-precision, and double-precision scalars.
Public Members
-
struct unum::usearch::casts_punned_t::group_t from#
-
struct unum::usearch::casts_punned_t::group_t to#
Public Static Functions
-
template<typename scalar_at>
static inline casts_punned_t make() noexcept#
-
static inline casts_punned_t make(scalar_kind_t scalar_kind) noexcept#
-
struct group_t#
- #include <index_plugins.hpp>
Public Functions
-
inline cast_punned_t operator[](scalar_kind_t scalar_kind) const noexcept#
Public Members
-
cast_punned_t b1x8 = {}#
-
cast_punned_t i8 = {}#
-
cast_punned_t f16 = {}#
-
cast_punned_t f32 = {}#
-
cast_punned_t f64 = {}#
-
inline cast_punned_t operator[](scalar_kind_t scalar_kind) const noexcept#
-
struct unum::usearch::casts_punned_t::group_t from#
-
template<typename scalar_at = float, typename result_at = scalar_at>
struct metric_ip_gt# - #include <index_plugins.hpp>
Inner (Dot) Product distance.
-
template<typename scalar_at = float, typename result_at = scalar_at>
struct metric_cos_gt# - #include <index_plugins.hpp>
Cosine (Angular) distance. Identical to the Inner Product of normalized vectors. Unless you are running on an tiny embedded platform, this metric is recommended over
::metric_ip_gt
for low-precision scalars.
-
template<typename scalar_at = float, typename result_at = scalar_at>
struct metric_l2sq_gt# - #include <index_plugins.hpp>
Squared Euclidean (L2) distance. Square root is avoided at the end, as it won’t affect the ordering.
-
template<typename scalar_at = std::uint64_t, typename result_at = std::size_t>
struct metric_hamming_gt# - #include <index_plugins.hpp>
Hamming distance computes the number of differing bits in two arrays of integers. An example would be a textual document, tokenized and hashed into a fixed-capacity bitset.
-
template<typename scalar_at = std::uint64_t, typename result_at = float>
struct metric_tanimoto_gt# - #include <index_plugins.hpp>
Tanimoto distance is the intersection over bitwise union. Often used in chemistry and biology to compare molecular fingerprints.
-
template<typename scalar_at = std::uint64_t, typename result_at = float>
struct metric_sorensen_gt# - #include <index_plugins.hpp>
Sorensen-Dice or F1 distance is the intersection over bitwise union. Often used in chemistry and biology to compare molecular fingerprints.
-
template<typename scalar_at = std::int32_t, typename result_at = float>
struct metric_jaccard_gt# - #include <index_plugins.hpp>
Counts the number of matching elements in two unique sorted sets. Can be used to compute the similarity between two textual documents using the IDs of tokens present in them. Similar to
metric_tanimoto_gt
for dense representations.
-
template<typename scalar_at = float, typename result_at = float>
struct metric_pearson_gt# - #include <index_plugins.hpp>
Measures Pearson Correlation between two sequences in a single pass.
-
template<typename scalar_at = float, typename result_at = float>
struct metric_divergence_gt# - #include <index_plugins.hpp>
Measures Jensen-Shannon Divergence between two probability distributions.
-
struct metric_cos_i8_t#
- #include <index_plugins.hpp>
Cosine (Angular) distance for signed 8-bit integers using 16-bit intermediates.
-
struct metric_l2sq_i8_t#
- #include <index_plugins.hpp>
Squared Euclidean (L2) distance for signed 8-bit integers using 16-bit intermediates. Square root is avoided at the end, as it won’t affect the ordering.
-
template<typename scalar_at = float, typename result_at = scalar_at>
struct metric_haversine_gt# - #include <index_plugins.hpp>
Haversine distance for the shortest distance between two nodes on the surface of a 3D sphere, defined with latitude and longitude.
-
class metric_punned_t#
- #include <index_plugins.hpp>
Type-punned metric class, which unlike STL’s
std::function
avoids any memory allocations. It also provides additional APIs to check, if SIMD hardware-acceleration is available. Wraps thesimsimd_metric_dense_punned_t
when available. The auto-vectorized backend otherwise.Public Functions
-
inline result_t operator()(byte_t const *a, byte_t const *b) const noexcept#
Computes the distance between two vectors of fixed length.
! This is the only relevant function in the object. Everything else is just dynamic dispatch logic.
-
inline metric_punned_t() noexcept = default#
-
inline metric_punned_t(metric_punned_t const&) noexcept = default#
-
inline metric_punned_t &operator=(metric_punned_t const&) noexcept = default#
-
inline metric_punned_t(std::size_t dimensions, metric_kind_t metric_kind = metric_kind_t::l2sq_k, scalar_kind_t scalar_kind = scalar_kind_t::f32_k) noexcept#
-
inline metric_punned_t(std::size_t dimensions, std::uintptr_t metric_uintptr, metric_punned_signature_t signature, metric_kind_t metric_kind, scalar_kind_t scalar_kind) noexcept#
-
inline std::size_t dimensions() const noexcept#
-
inline metric_kind_t metric_kind() const noexcept#
-
inline scalar_kind_t scalar_kind() const noexcept#
-
inline explicit operator bool() const noexcept#
-
inline bool missing() const noexcept#
Checks if we’ve failed to initialize the metric with provided arguments.
It’s different from
operator bool()
when it comes to explicitly uninitialized metrics. It’s a common case, where a NULL state is created only to be overwritten later, when we recover an old index state from a file or a network.
-
inline char const *isa_name() const noexcept#
-
inline std::size_t bytes_per_vector() const noexcept#
-
inline std::size_t scalar_words() const noexcept#
Public Static Functions
-
static inline metric_punned_t builtin(std::size_t dimensions, metric_kind_t metric_kind = metric_kind_t::l2sq_k, scalar_kind_t scalar_kind = scalar_kind_t::f32_k) noexcept#
Creates a metric of a natively supported kind, choosing the best available backend internally or from SimSIMD.
- Parameters:
dimensions – The number of elements in the input arrays.
metric_kind – The kind of metric to use.
scalar_kind – The kind of scalar to use.
- Returns:
A metric object that can be used to compute distances between vectors.
-
static inline metric_punned_t stateless(std::size_t dimensions, std::uintptr_t metric_uintptr, metric_punned_signature_t signature, metric_kind_t metric_kind, scalar_kind_t scalar_kind) noexcept#
Creates a metric using the provided function pointer for a stateless metric. So the provided ::metric_uintptr is a pointer to a function that takes two arrays and returns a scalar. If the ::signature is metric_punned_signature_t::array_array_size_k, then the third argument is the number of scalar words in the input vectors.
- Parameters:
dimensions – The number of elements in the input arrays.
metric_uintptr – The function pointer to the metric function.
signature – The signature of the metric function.
metric_kind – The kind of metric to use.
scalar_kind – The kind of scalar to use.
- Returns:
A metric object that can be used to compute distances between vectors.
-
static inline metric_punned_t stateful(std::size_t dimensions, std::uintptr_t metric_uintptr, std::uintptr_t metric_state, metric_kind_t metric_kind = metric_kind_t::unknown_k, scalar_kind_t scalar_kind = scalar_kind_t::unknown_k) noexcept#
Creates a metric using the provided function pointer for a stateful metric. The third argument is the state that will be passed to the metric function.
- Parameters:
dimensions – The number of elements in the input arrays.
metric_uintptr – The function pointer to the metric function.
metric_state – The state to pass to the metric function.
metric_kind – The kind of metric to use.
scalar_kind – The kind of scalar to use.
- Returns:
A metric object that can be used to compute distances between vectors.
Private Types
-
using uptr_t = std::size_t#
In the generalized function API all the are arguments are pointer-sized.
-
using metric_array_array_t = result_t (*)(uptr_t, uptr_t)#
Distance function that takes two arrays and returns a scalar.
-
using metric_array_array_size_t = result_t (*)(uptr_t, uptr_t, uptr_t)#
Distance function that takes two arrays and their length and returns a scalar.
-
using metric_array_array_state_t = result_t (*)(uptr_t, uptr_t, uptr_t)#
Distance function that takes two arrays and some callback state and returns a scalar.
-
using metric_routed_t = result_t (metric_punned_t::*)(uptr_t, uptr_t) const#
Distance function callback, like
metric_array_array_size_t
, but depends on member variables.
Private Functions
-
inline bool configure_with_simsimd() noexcept#
-
inline void configure_with_autovec() noexcept#
Private Members
-
metric_routed_t metric_routed_ = nullptr#
-
std::size_t dimensions_ = 0#
-
metric_kind_t metric_kind_ = metric_kind_t::unknown_k#
-
scalar_kind_t scalar_kind_ = scalar_kind_t::unknown_k#
-
inline result_t operator()(byte_t const *a, byte_t const *b) const noexcept#
-
template<typename scalar_at>
class matrix_slice_gt# - #include <index_plugins.hpp>
View over a potentially-strided memory buffer, containing a row-major matrix.
Public Functions
-
matrix_slice_gt() noexcept = default#
-
matrix_slice_gt(matrix_slice_gt const&) noexcept = default#
-
matrix_slice_gt &operator=(matrix_slice_gt const&) noexcept = default#
-
inline matrix_slice_gt(scalar_t *begin, std::size_t dimensions, std::size_t count, std::size_t stride_bytes) noexcept#
-
inline explicit operator bool() const noexcept#
-
inline std::size_t size() const noexcept#
-
inline std::size_t dimensions() const noexcept#
-
inline std::size_t stride_bytes() const noexcept#
Private Types
-
matrix_slice_gt() noexcept = default#
-
struct exact_offset_and_distance_t#
- #include <index_plugins.hpp>
-
class exact_search_t#
- #include <index_plugins.hpp>
Helper-structure for exact search operations. Perfect if you have <1M vectors and <100 queries per call.
Uses a 3-step procedure to minimize:
cache-misses on vector lookups,
multi-threaded contention on concurrent writes.
Public Functions
-
template<typename scalar_at, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline exact_search_results_t operator()(matrix_slice_gt<scalar_at const> dataset, matrix_slice_gt<scalar_at const> queries, std::size_t wanted, metric_punned_t const &metric, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})#
-
template<typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline exact_search_results_t operator()(byte_t const *dataset_data, std::size_t dataset_count, std::size_t dataset_stride, byte_t const *queries_data, std::size_t queries_count, std::size_t queries_stride, std::size_t wanted, metric_punned_t const &metric, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})#
Private Types
-
using keys_and_distances_t = buffer_gt<exact_offset_and_distance_t>#
Private Members
-
keys_and_distances_t keys_and_distances#
Private Static Functions
-
static inline bool smaller_distance(exact_offset_and_distance_t a, exact_offset_and_distance_t b) noexcept#
-
struct kmeans_clustering_result_t#
- #include <index_plugins.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline kmeans_clustering_result_t failed(error_t message) noexcept#
Public Members
-
std::size_t computed_distances = {}#
-
std::size_t iterations = {}#
The number of iterations the algorithm took to converge.
-
std::size_t last_iteration_points_shifted = {}#
The number of points that changed clusters in the last iteration.
-
inline explicit operator bool() const noexcept#
-
template<typename allocator_at = std::allocator<char>>
class kmeans_clustering_gt# - #include <index_plugins.hpp>
Helper-class for K-Means clustering of dense vectors. Doesn’t require constructing the index, but benefits from mixed-precision logic. ! Doesn’t guarantee that the clusters are balanced in size.
The algorithm is as follows:
Initialization: Select K initial centroids (randomly or with a heuristic).
Assignment: Assign each data point to the nearest centroid based on the Euclidean distance.
Update: Recalculate the centroids as the mean of all points assigned to each centroid.
Repeat: Repeat the assignment and update steps until the centroids no longer change significantly or an early-exit condition is met.
Public Types
-
using distance_t = distance_punned_t#
Public Functions
-
inline kmeans_clustering_gt(std::uint64_t seed) noexcept#
-
inline kmeans_clustering_gt() noexcept(false)#
-
kmeans_clustering_gt(kmeans_clustering_gt const&) = default#
-
kmeans_clustering_gt &operator=(kmeans_clustering_gt const&) = default#
-
template<typename scalar_at, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline kmeans_clustering_result_t operator()(matrix_slice_gt<scalar_at const> points, matrix_slice_gt<scalar_at> centroids, span_gt<std::size_t> point_to_centroid_index, span_gt<distance_t> point_to_centroid_distance, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})#
-
template<typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline kmeans_clustering_result_t operator()(byte_t const *points_data, std::size_t points_count, std::size_t points_stride_bytes, byte_t *centroids_data, std::size_t wanted_clusters, std::size_t centroids_stride_bytes, std::size_t *point_to_centroid_index, distance_t *point_to_centroid_distance, scalar_kind_t original_scalar_kind, std::size_t dimensions, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})#
Public Members
-
metric_kind_t metric_kind = {metric_kind_t::l2sq_k}#
-
scalar_kind_t quantization_kind = {scalar_kind_t::bf16_k}#
-
std::size_t max_iterations = {max_iterations_default_k}#
Early-exit parameter - the maximum number of iterations to perform.
-
f64_t inertia_threshold = {inertia_threshold_default_k}#
Early-exit parameter - the threshold for the final inertia to terminate early.
-
f64_t max_seconds = {max_seconds_default_k}#
Early-exit parameter - the maximum runtime allowed in seconds.
-
f64_t min_shifts = {min_shifts_default_k}#
Early-exit parameter - the minimum share of points that must change clusters per iteration.
-
std::uint64_t seed = {0}#
The random seed to use for centroid initialization.
-
template<typename element_at, typename hash_at, typename equals_at, typename allocator_at = std::allocator<char>>
class flat_hash_multi_set_gt# - #include <index_plugins.hpp>
C++11 Multi-Hash-Set with Linear Probing.
Allows multiple equivalent values,
Supports transparent hashing and equality operator.
Doesn’t throw exceptions, if forbidden.
Doesn’t need reserving a value for deletions.
Layout#
For every slot we store 2 extra bits for 3 possible states: empty, populated, or deleted. With linear probing the hashes at the end of the populated region will spill into its first half.
Public Functions
-
inline std::size_t size() const noexcept#
-
inline std::size_t capacity() const noexcept#
-
inline flat_hash_multi_set_gt() noexcept#
-
inline ~flat_hash_multi_set_gt() noexcept#
-
inline flat_hash_multi_set_gt(flat_hash_multi_set_gt const &other)#
-
inline flat_hash_multi_set_gt &operator=(flat_hash_multi_set_gt const &other)#
-
inline void clear() noexcept#
-
inline void reset() noexcept#
-
inline bool try_reserve(std::size_t capacity) noexcept#
-
template<typename query_at>
inline std::pair<equal_iterator_gt<query_at>, equal_iterator_gt<query_at>> equal_range(query_at const &query) const noexcept# Returns an iterator range of all elements matching the given query.
Technically, the second iterator points to the first empty slot after a range of equal values and non-equal values with similar hashes.
-
template<typename similar_at>
inline bool pop_first(similar_at &&query, element_t &popped_value) noexcept#
-
template<typename similar_at>
inline std::size_t erase(similar_at &&query) noexcept#
-
template<typename similar_at>
inline element_t const *find(similar_at &&query) const noexcept#
-
template<typename similar_at>
inline std::size_t count(similar_at &&query) const noexcept#
-
template<typename similar_at>
inline bool contains(similar_at &&query) const noexcept#
-
inline void reserve(std::size_t capacity)#
Public Static Functions
-
static inline constexpr std::size_t slots_per_bucket()#
-
static inline constexpr std::size_t bytes_per_bucket()#
Private Functions
-
inline slot_ref_t slot_ref(char *data, std::size_t slot_index) const noexcept#
-
inline slot_ref_t slot_ref(std::size_t slot_index) const noexcept#
-
inline bool populate_slot(slot_ref_t slot, element_t const &new_element)#
Private Members
-
char *data_ = nullptr#
-
std::size_t buckets_ = 0#
-
std::size_t populated_slots_ = 0#
-
std::size_t capacity_slots_ = 0#
Number of slots.
-
struct bucket_header_t#
-
template<typename query_at>
class equal_iterator_gt# - #include <index_plugins.hpp>
Public Types
-
using iterator_category = std::forward_iterator_tag#
-
using difference_type = std::ptrdiff_t#
Public Functions
-
inline equal_iterator_gt(std::size_t index, flat_hash_multi_set_gt *parent, query_at const &query, equals_t const &equals)#
-
inline equal_iterator_gt &operator++()#
-
inline equal_iterator_gt operator++(int)#
-
inline bool operator!=(equal_iterator_gt const &other) const#
-
inline bool operator==(equal_iterator_gt const &other) const#
-
using iterator_category = std::forward_iterator_tag#
-
struct slot_ref_t#
-
using f16_t = f16_bits_t#
-
namespace usearch
usearch/index_dense.hpp#
Single-header Vector Search engine for equi-dimensional dense vectors.
- Author
Ash Vardanian
- Date
July 26, 2023
Typedefs
-
using index_dense_t = index_dense_gt<>#
-
using index_dense_big_t = index_dense_gt<uuid_t, uint40_t>#
Functions
-
constexpr char const *default_magic()#
The “magic” sequence helps infer the type of the file. USearch indexes start with the “usearch” string.
-
inline scalar_kind_t convert_pre_2_10_scalar_kind(scalar_kind_t scalar_kind) noexcept#
Fixes serialized scalar-kind codes for pre-v2.10 versions, until we can upgrade to v3. The old enum
scalar_kind_t
is defined without explicit constants from 0.
-
inline void fix_pre_2_10_metadata(index_dense_head_t &head)#
Fixes the metadata for pre-v2.10 versions, until we can upgrade to v3. Originates from: https://github.com/unum-cloud/usearch/issues/423.
-
inline index_dense_metadata_result_t index_dense_metadata_from_path(char const *file_path) noexcept#
Extracts metadata from a pre-constructed index on disk, without loading it or mapping the whole binary file.
-
inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_mapped_file_t const &file, std::size_t offset = 0) noexcept#
Extracts metadata from a pre-constructed index serialized into an in-memory buffer.
-
template<typename men_key_at, typename women_key_at, typename men_slot_at, typename women_slot_at, typename man_to_woman_at = dummy_key_to_key_mapping_t, typename woman_to_man_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
static join_result_t join(index_dense_gt<men_key_at, men_slot_at> const &men, index_dense_gt<women_key_at, women_slot_at> const &women, index_join_config_t config = {}, man_to_woman_at &&man_to_woman = man_to_woman_at{}, woman_to_man_at &&woman_to_man = woman_to_man_at{}, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})# Adapts the Male-Optimal Stable Marriage algorithm for unequal sets to perform fast one-to-one matching between two large collections of vectors, using approximate nearest neighbors search.
- Parameters:
man_to_woman – [inout] Container to map ::first keys to ::second.
woman_to_man – [inout] Container to map ::second keys to ::first.
executor – [in] Thread-pool to execute the job in parallel.
progress – [in] Callback to report the execution progress.
-
namespace unum
-
namespace usearch
Typedefs
-
using index_dense_t = index_dense_gt<>#
-
using index_dense_big_t = index_dense_gt<uuid_t, uint40_t>#
Functions
-
constexpr char const *default_magic()#
The “magic” sequence helps infer the type of the file. USearch indexes start with the “usearch” string.
-
inline scalar_kind_t convert_pre_2_10_scalar_kind(scalar_kind_t scalar_kind) noexcept#
Fixes serialized scalar-kind codes for pre-v2.10 versions, until we can upgrade to v3. The old enum
scalar_kind_t
is defined without explicit constants from 0.
-
inline void fix_pre_2_10_metadata(index_dense_head_t &head)#
Fixes the metadata for pre-v2.10 versions, until we can upgrade to v3. Originates from: https://github.com/unum-cloud/usearch/issues/423.
-
inline index_dense_metadata_result_t index_dense_metadata_from_path(char const *file_path) noexcept#
Extracts metadata from a pre-constructed index on disk, without loading it or mapping the whole binary file.
-
inline index_dense_metadata_result_t index_dense_metadata_from_buffer(memory_mapped_file_t const &file, std::size_t offset = 0) noexcept#
Extracts metadata from a pre-constructed index serialized into an in-memory buffer.
-
template<typename men_key_at, typename women_key_at, typename men_slot_at, typename women_slot_at, typename man_to_woman_at = dummy_key_to_key_mapping_t, typename woman_to_man_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
static join_result_t join(index_dense_gt<men_key_at, men_slot_at> const &men, index_dense_gt<women_key_at, women_slot_at> const &women, index_join_config_t config = {}, man_to_woman_at &&man_to_woman = man_to_woman_at{}, woman_to_man_at &&woman_to_man = woman_to_man_at{}, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})# Adapts the Male-Optimal Stable Marriage algorithm for unequal sets to perform fast one-to-one matching between two large collections of vectors, using approximate nearest neighbors search.
- Parameters:
man_to_woman – [inout] Container to map ::first keys to ::second.
woman_to_man – [inout] Container to map ::second keys to ::first.
executor – [in] Thread-pool to execute the job in parallel.
progress – [in] Callback to report the execution progress.
-
template<typename key_at = default_key_t, typename compressed_slot_at = default_slot_t>
class index_dense_gt# - #include <index_dense.hpp>
Oversimplified type-punned index for equidimensional vectors with automatic down-casting, hardware-specific SIMD metrics, and ability to remove existing vectors, common in Semantic Caching applications.
Serialization#
The serialized binary form of
index_dense_gt
is made up of three parts:Binary matrix, aka the
.bbin
part,Metadata about used metrics, number of used vs free slots,
The HNSW index in a binary form. The first (1.) generally starts with 2 integers - number of rows (vectors) and single-byte columns. The second (2.) starts with “usearch”-magic-string, used to infer the file type on open. The third (3.) is implemented by the underlying
index_gt
class.
Public Types
-
using key_t = vector_key_t#
-
using compressed_slot_t = compressed_slot_at#
-
using distance_t = distance_punned_t#
-
using metric_t = metric_punned_t#
-
using member_ref_t = member_ref_gt<vector_key_t>#
-
using member_cref_t = member_cref_gt<vector_key_t>#
-
using head_t = index_dense_head_t#
-
using head_buffer_t = index_dense_head_buffer_t#
-
using head_result_t = index_dense_head_result_t#
-
using serialization_config_t = index_dense_serialization_config_t#
-
using dynamic_allocator_t = aligned_allocator_gt<byte_t, 64>#
-
using tape_allocator_t = memory_mapping_allocator_gt<64>#
-
using copy_result_t = state_result_t#
Public Functions
-
index_dense_gt() = default#
-
inline index_dense_gt(index_dense_gt &&other)#
-
inline index_dense_gt &operator=(index_dense_gt &&other)#
-
inline void swap(index_dense_gt &other)#
Swaps the contents of this index with another index.
- Parameters:
other – The other index to swap with.
-
inline ~index_dense_gt()#
-
inline explicit operator bool() const#
-
inline std::size_t connectivity() const#
-
inline std::size_t size() const#
-
inline std::size_t capacity() const#
-
inline std::size_t max_level() const#
-
inline index_dense_config_t const &config() const#
-
inline index_limits_t const &limits() const#
-
inline double inverse_log_connectivity() const#
-
inline std::size_t neighbors_base_bytes() const#
-
inline std::size_t neighbors_bytes() const#
-
inline bool multi() const#
-
inline std::size_t currently_available_threads() const#
-
inline scalar_kind_t scalar_kind() const#
-
inline metric_kind_t metric_kind() const#
-
inline std::size_t bytes_per_vector() const#
-
inline std::size_t scalar_words() const#
-
inline std::size_t dimensions() const#
-
inline std::size_t expansion_add() const#
-
inline std::size_t expansion_search() const#
-
inline void change_expansion_add(std::size_t n)#
-
inline void change_expansion_search(std::size_t n)#
-
inline member_citerator_t cbegin() const#
-
inline member_citerator_t cend() const#
-
inline member_iterator_t begin()#
-
inline member_iterator_t end()#
-
inline dynamic_allocator_t const &allocator() const#
-
inline vector_key_t const &free_key() const#
-
inline std::size_t memory_usage() const#
A relatively accurate lower bound on the amount of memory consumed by the system. In practice it’s error will be below 10%.
See also
serialized_length
for the length of the binary serialized representation.
-
inline add_result_t add(vector_key_t key, b1x8_t const *vector, std::size_t thread = any_thread(), bool force_vector_copy = true)#
-
inline add_result_t add(vector_key_t key, i8_t const *vector, std::size_t thread = any_thread(), bool force_vector_copy = true)#
-
inline add_result_t add(vector_key_t key, f16_t const *vector, std::size_t thread = any_thread(), bool force_vector_copy = true)#
-
inline add_result_t add(vector_key_t key, f32_t const *vector, std::size_t thread = any_thread(), bool force_vector_copy = true)#
-
inline add_result_t add(vector_key_t key, f64_t const *vector, std::size_t thread = any_thread(), bool force_vector_copy = true)#
-
inline search_result_t search(b1x8_t const *vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const#
-
inline search_result_t search(i8_t const *vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const#
-
inline search_result_t search(f16_t const *vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const#
-
inline search_result_t search(f32_t const *vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const#
-
inline search_result_t search(f64_t const *vector, std::size_t wanted, std::size_t thread = any_thread(), bool exact = false) const#
-
template<typename predicate_at>
inline search_result_t filtered_search(b1x8_t const *vector, std::size_t wanted, predicate_at &&predicate, std::size_t thread = any_thread(), bool exact = false) const#
-
template<typename predicate_at>
inline search_result_t filtered_search(i8_t const *vector, std::size_t wanted, predicate_at &&predicate, std::size_t thread = any_thread(), bool exact = false) const#
-
template<typename predicate_at>
inline search_result_t filtered_search(f16_t const *vector, std::size_t wanted, predicate_at &&predicate, std::size_t thread = any_thread(), bool exact = false) const#
-
template<typename predicate_at>
inline search_result_t filtered_search(f32_t const *vector, std::size_t wanted, predicate_at &&predicate, std::size_t thread = any_thread(), bool exact = false) const#
-
template<typename predicate_at>
inline search_result_t filtered_search(f64_t const *vector, std::size_t wanted, predicate_at &&predicate, std::size_t thread = any_thread(), bool exact = false) const#
-
inline std::size_t get(vector_key_t key, b1x8_t *vector, std::size_t vectors_count = 1) const#
-
inline std::size_t get(vector_key_t key, i8_t *vector, std::size_t vectors_count = 1) const#
-
inline std::size_t get(vector_key_t key, f16_t *vector, std::size_t vectors_count = 1) const#
-
inline std::size_t get(vector_key_t key, f32_t *vector, std::size_t vectors_count = 1) const#
-
inline std::size_t get(vector_key_t key, f64_t *vector, std::size_t vectors_count = 1) const#
-
inline cluster_result_t cluster(b1x8_t const *vector, std::size_t level, std::size_t thread = any_thread()) const#
-
inline cluster_result_t cluster(i8_t const *vector, std::size_t level, std::size_t thread = any_thread()) const#
-
inline cluster_result_t cluster(f16_t const *vector, std::size_t level, std::size_t thread = any_thread()) const#
-
inline cluster_result_t cluster(f32_t const *vector, std::size_t level, std::size_t thread = any_thread()) const#
-
inline cluster_result_t cluster(f64_t const *vector, std::size_t level, std::size_t thread = any_thread()) const#
-
inline aggregated_distances_t distance_between(vector_key_t key, b1x8_t const *vector, std::size_t thread = any_thread()) const#
-
inline aggregated_distances_t distance_between(vector_key_t key, i8_t const *vector, std::size_t thread = any_thread()) const#
-
inline aggregated_distances_t distance_between(vector_key_t key, f16_t const *vector, std::size_t thread = any_thread()) const#
-
inline aggregated_distances_t distance_between(vector_key_t key, f32_t const *vector, std::size_t thread = any_thread()) const#
-
inline aggregated_distances_t distance_between(vector_key_t key, f64_t const *vector, std::size_t thread = any_thread()) const#
-
inline aggregated_distances_t distance_between(vector_key_t a, vector_key_t b, std::size_t = any_thread()) const#
Computes the distance between two managed entities. If either key maps into more than one vector, will aggregate results exporting the mean, maximum, and minimum values.
-
inline cluster_result_t cluster(vector_key_t key, std::size_t level, std::size_t thread = any_thread()) const#
Identifies a node in a given
level
, that is the closest to thekey
.
-
inline bool try_reserve(index_limits_t limits)#
Reserves memory for the index and the keyed lookup.
! No update or search operations should be running during this operation.
- Returns:
true
if the memory reservation was successful,false
otherwise.
-
inline void reserve(index_limits_t limits)#
-
inline void clear()#
Erases all the vectors from the index.
Will change
size()
to zero, but will keep the samecapacity()
. Will keep the number of available threads/contexts the same as it was.
-
inline void reset()#
Erases all members from index, closing files, and returning RAM to OS.
Will change both
size()
andcapacity()
to zero. Will deallocate all threads/contexts. If the index is memory-mapped - releases the mapping and the descriptor.
-
template<typename output_callback_at, typename progress_at = dummy_progress_t>
inline serialization_result_t save_to_stream(output_callback_at &&output, serialization_config_t config = {}, progress_at &&progress = {}) const# Saves serialized binary index representation to a stream.
-
inline std::size_t serialized_length(serialization_config_t config = {}) const#
Estimate the binary length (in bytes) of the serialized index.
-
template<typename input_callback_at, typename progress_at = dummy_progress_t>
inline serialization_result_t load_from_stream(input_callback_at &&input, serialization_config_t config = {}, progress_at &&progress = {})# Parses the index from file to RAM.
- Parameters:
path – [in] The path to the file.
config – [in] Configuration parameters for imports.
- Returns:
Outcome descriptor explicitly convertible to boolean.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t view(memory_mapped_file_t file, std::size_t offset = 0, serialization_config_t config = {}, progress_at &&progress = {})# Parses the index from file, without loading it into RAM.
- Parameters:
path – [in] The path to the file.
config – [in] Configuration parameters for imports.
- Returns:
Outcome descriptor explicitly convertible to boolean.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(output_file_t file, serialization_config_t config = {}, progress_at &&progress = {}) const# Saves the index to a file.
- Parameters:
path – [in] The path to the file.
config – [in] Configuration parameters for exports.
- Returns:
Outcome descriptor explicitly convertible to boolean.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(memory_mapped_file_t file, std::size_t offset = 0, serialization_config_t config = {}, progress_at &&progress = {}) const# Memory-maps the serialized binary index representation from disk, without copying data into RAM, and fetching it on-demand.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(input_file_t file, serialization_config_t config = {}, progress_at &&progress = {})# Parses the index from file to RAM.
- Parameters:
path – [in] The path to the file.
config – [in] Configuration parameters for imports.
- Returns:
Outcome descriptor explicitly convertible to boolean.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(memory_mapped_file_t file, std::size_t offset = 0, serialization_config_t config = {}, progress_at &&progress = {})# Memory-maps the serialized binary index representation from disk, without copying data into RAM, and fetching it on-demand.
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t save(char const *file_path, serialization_config_t config = {}, progress_at &&progress = {}) const#
-
template<typename progress_at = dummy_progress_t>
inline serialization_result_t load(char const *file_path, serialization_config_t config = {}, progress_at &&progress = {})#
-
inline bool contains(vector_key_t key) const#
Checks if a vector with specified key is present.
- Returns:
true
if the key is present in the index,false
otherwise.
-
inline std::size_t count(vector_key_t key) const#
Count the number of vectors with specified key present.
- Returns:
Zero if nothing is found, a positive integer otherwise.
-
inline labeling_result_t remove(vector_key_t key)#
Removes an entry with the specified key from the index.
- Parameters:
key – [in] The key of the entry to remove.
- Returns:
The ::labeling_result_t indicating the result of the removal operation. If the removal was successful,
result.completed
will betrue
. If the key was not found in the index,result.completed
will befalse
. If an error occurred during the removal operation,result.error
will contain an error message.
-
template<typename keys_iterator_at>
inline labeling_result_t remove(keys_iterator_at keys_begin, keys_iterator_at keys_end)# Removes multiple entries with the specified keys from the index.
- Parameters:
keys_begin – [in] The beginning of the keys range.
keys_end – [in] The ending of the keys range.
- Returns:
The ::labeling_result_t indicating the result of the removal operation.
result.completed
will contain the number of keys that were successfully removed.result.error
will contain an error message if an error occurred during the removal operation.
-
inline labeling_result_t rename(vector_key_t from, vector_key_t to)#
Renames an entry with the specified key to a new key.
- Parameters:
from – [in] The current key of the entry to rename.
to – [in] The new key to assign to the entry.
- Returns:
The ::labeling_result_t indicating the result of the rename operation. If the rename was successful,
result.completed
will betrue
. If the entry with the current key was not found,result.completed
will befalse
.
-
inline void export_keys(vector_key_t *keys, std::size_t offset, std::size_t limit) const#
Exports a range of keys for the vectors present in the index.
- Parameters:
keys – [out] Pointer to the array where the keys will be exported.
offset – [in] The number of keys to skip. Useful for pagination.
limit – [in] The maximum number of keys to export, that can fit in ::keys.
-
inline copy_result_t copy(index_dense_copy_config_t config = {}) const#
Copies the ::index_dense_gt with all the data in it.
- Parameters:
config – The copy configuration (optional).
- Returns:
A copy of the ::index_dense_gt instance.
-
inline copy_result_t fork() const#
Copies the ::index_dense_gt model without any data.
- Returns:
A similarly configured ::index_dense_gt instance.
-
template<typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline compaction_result_t isolate(executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})# Performs compaction on the index, pruning links to removed entries.
- Parameters:
executor – The executor parallel processing. Default ::dummy_executor_t single-threaded.
progress – The progress tracker instance to use. Default ::dummy_progress_t reports nothing.
- Returns:
The ::compaction_result_t indicating the result of the compaction operation.
result.pruned_edges
will contain the number of edges that were removed.result.error
will contain an error message if an error occurred during the compaction operation.
-
template<typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline compaction_result_t compact(executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})# Performs compaction on the index, pruning links to removed entries.
- Parameters:
executor – The executor parallel processing. Default ::dummy_executor_t single-threaded.
progress – The progress tracker instance to use. Default ::dummy_progress_t reports nothing.
- Returns:
The ::compaction_result_t indicating the result of the compaction operation.
result.pruned_edges
will contain the number of edges that were removed.result.error
will contain an error message if an error occurred during the compaction operation.
-
template<typename man_to_woman_at = dummy_key_to_key_mapping_t, typename woman_to_man_at = dummy_key_to_key_mapping_t, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline join_result_t join(index_dense_gt const &women, index_join_config_t config = {}, man_to_woman_at &&man_to_woman = man_to_woman_at{}, woman_to_man_at &&woman_to_man = woman_to_man_at{}, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{}) const#
-
template<typename queries_iterator_at, typename executor_at = dummy_executor_t, typename progress_at = dummy_progress_t>
inline clustering_result_t cluster(queries_iterator_at queries_begin, queries_iterator_at queries_end, index_dense_clustering_config_t config, vector_key_t *cluster_keys, distance_t *cluster_distances, executor_at &&executor = executor_at{}, progress_at &&progress = progress_at{})# Implements clustering, classifying the given objects (vectors of member keys) into a given number of clusters.
- Parameters:
queries_begin – [in] Iterator pointing to the first query.
queries_end – [in] Iterator pointing to the last query.
executor – [in] Thread-pool to execute the job in parallel.
progress – [in] Callback to report the execution progress.
config – [in] Configuration parameters for clustering.
cluster_keys – [out] Pointer to the array where the cluster keys will be exported.
cluster_distances – [out] Pointer to the array where the distances to those centroids will be exported.
Public Static Functions
-
static inline state_result_t make(metric_t metric = {}, index_dense_config_t config = {}, vector_key_t free_key = default_free_value<vector_key_t>())#
Constructs an instance of ::index_dense_gt.
! If the
metric
isn’t provided in this method, it has to be set with ! thechange_metric
method before the index can be used. Alternatively, ! if you are loading an existing index, the metric will be set automatically.- Parameters:
metric – [in] One of the provided or an ad-hoc metric, type-punned.
config – [in] The index configuration (optional).
free_key – [in] The key used for freed vectors (optional).
- Returns:
An instance of ::index_dense_gt or error, wrapped in a
state_result_t
.
-
static inline state_result_t make(char const *path, bool view = false)#
Constructs an instance of ::index_dense_gt from a serialized binary file.
- Parameters:
path – [in] The path to the binary file.
view – [in] Whether to map the file into memory or load it.
- Returns:
An instance of ::index_dense_gt or error, wrapped in a
state_result_t
.
-
static inline constexpr std::size_t any_thread()#
-
static inline constexpr distance_t infinite_distance()#
Private Types
-
using index_t = index_gt<distance_t, vector_key_t, compressed_slot_t, dynamic_allocator_t, tape_allocator_t>#
Punned index.
-
using index_allocator_t = aligned_allocator_gt<index_t, 64>#
-
using cast_buffer_t = buffer_gt<byte_t, dynamic_allocator_t>#
-
using vectors_tape_allocator_t = memory_mapping_allocator_gt<8>#
-
using vectors_lookup_allocator_t = aligned_allocator_gt<byte_t*, 64>#
-
using vectors_lookup_t = buffer_gt<byte_t*, vectors_lookup_allocator_t>#
-
using available_threads_allocator_t = aligned_allocator_gt<std::size_t, 64>#
-
using available_threads_t = ring_gt<std::size_t, available_threads_allocator_t>#
-
using unique_lock_t = std::unique_lock<shared_mutex_t>#
Private Functions
- inline thread_lock_t thread_lock_ (std::size_t thread_id) const usearch_noexcept_m
- inline void thread_unlock_ (std::size_t thread_id) const usearch_noexcept_m
-
template<typename scalar_at>
inline add_result_t add_(vector_key_t key, scalar_at const *vector, std::size_t thread, bool force_vector_copy, cast_punned_t const &cast)#
-
template<typename scalar_at, typename predicate_at>
inline search_result_t search_(scalar_at const *vector, std::size_t wanted, predicate_at &&predicate, std::size_t thread, bool exact, cast_punned_t const &cast) const#
-
template<typename scalar_at>
inline cluster_result_t cluster_(scalar_at const *vector, std::size_t level, std::size_t thread, cast_punned_t const &cast) const#
-
template<typename scalar_at>
inline aggregated_distances_t distance_between_(vector_key_t key, scalar_at const *vector, std::size_t thread, cast_punned_t const &cast) const#
-
inline void reindex_keys_()#
-
template<typename scalar_at>
inline std::size_t get_(vector_key_t key, scalar_at *reconstructed, std::size_t vectors_limit, cast_punned_t const &cast) const#
Private Members
-
index_dense_config_t config_#
-
mutable cast_buffer_t cast_buffer_#
Temporary memory for every thread to store a casted vector.
-
casts_punned_t casts_#
-
metric_t metric_#
An instance of a potentially stateful
metric_t
used to initialize copies and forks.
-
vectors_tape_allocator_t vectors_tape_allocator_#
Allocator for the copied vectors, aligned to widest double-precision scalars.
-
mutable vectors_lookup_t vectors_lookup_#
For every managed
compressed_slot_t
stores a pointer to the allocated vector copy.
-
mutable available_threads_t available_threads_#
Originally forms and array of integers [0, threads], marking all as available.
-
mutable std::mutex available_threads_mutex_#
Mutex, controlling concurrent access to
available_threads_
.
-
flat_hash_multi_set_gt<key_and_slot_t, lookup_key_hash_t, lookup_key_same_t> slot_lookup_#
Multi-Map from keys to IDs, and allocated vectors.
-
mutable shared_mutex_t slot_lookup_mutex_#
Mutex, controlling concurrent access to
slot_lookup_
.
-
ring_gt<compressed_slot_t> free_keys_#
Ring-shaped queue of deleted entries, to be reused on future insertions.
-
mutable std::mutex free_keys_mutex_#
Mutex, controlling concurrent access to
free_keys_
.
-
vector_key_t free_key_ = default_free_value<vector_key_t>()#
A constant for the reserved key value, used to mark deleted entries.
-
struct aggregated_distances_t#
- #include <index_dense.hpp>
Public Members
-
std::size_t count = 0#
-
distance_t mean = infinite_distance()#
-
distance_t min = infinite_distance()#
-
distance_t max = infinite_distance()#
-
std::size_t count = 0#
-
struct clustering_result_t#
- #include <index_dense.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline clustering_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
struct compaction_result_t#
- #include <index_dense.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline compaction_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
struct key_and_slot_t#
Public Functions
-
inline bool any_slot() const#
Public Static Functions
-
static inline key_and_slot_t any_slot(vector_key_t key)#
-
inline bool any_slot() const#
-
struct labeling_result_t#
- #include <index_dense.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline labeling_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
struct lookup_key_hash_t#
Public Types
-
using is_transparent = void#
Public Functions
-
inline std::size_t operator()(key_and_slot_t const &k) const noexcept#
-
inline std::size_t operator()(vector_key_t const &k) const noexcept#
-
using is_transparent = void#
-
struct lookup_key_same_t#
Public Types
-
using is_transparent = void#
Public Functions
-
inline bool operator()(key_and_slot_t const &a, vector_key_t const &b) const noexcept#
-
inline bool operator()(vector_key_t const &a, key_and_slot_t const &b) const noexcept#
-
inline bool operator()(key_and_slot_t const &a, key_and_slot_t const &b) const noexcept#
-
using is_transparent = void#
-
class metric_proxy_t#
Punned metric object.
Public Functions
-
inline metric_proxy_t(index_dense_gt const &index) noexcept#
-
inline distance_t operator()(byte_t const *a, member_cref_t b) const noexcept#
-
inline distance_t operator()(member_cref_t a, member_cref_t b) const noexcept#
-
inline distance_t operator()(byte_t const *a, member_citerator_t b) const noexcept#
-
inline distance_t operator()(member_citerator_t a, member_citerator_t b) const noexcept#
-
inline distance_t operator()(byte_t const *a, byte_t const *b) const noexcept#
-
inline byte_t const *v(member_cref_t m) const noexcept#
-
inline byte_t const *v(member_citerator_t m) const noexcept#
-
inline distance_t f(byte_t const *a, byte_t const *b) const noexcept#
Private Members
-
index_dense_gt const *index_ = nullptr#
-
inline metric_proxy_t(index_dense_gt const &index) noexcept#
-
struct search_result_t : public unum::usearch::index_gt<distance_at, key_at, compressed_slot_at, dynamic_allocator_at, tape_allocator_at>::search_result_t#
- #include <index_dense.hpp>
A search result, containing the found keys and distances.
As the
index_dense_gt
manages the thread-pool on its own, the search result preserves the thread-lock to avoid undefined behaviors, when other threads start overwriting the results.Public Functions
-
inline search_result_t(index_dense_gt const &parent) noexcept#
-
inline search_result_t failed(error_t message) noexcept#
Private Functions
-
inline search_result_t(typename index_t::search_result_t result, thread_lock_t lock) noexcept#
Private Members
-
thread_lock_t lock_#
Friends
- friend class index_dense_gt
-
inline search_result_t(index_dense_gt const &parent) noexcept#
-
struct state_result_t#
- #include <index_dense.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline state_result_t failed(error_t message) noexcept#
-
inline operator index_dense_gt&&() &&#
-
inline explicit operator bool() const noexcept#
-
struct thread_lock_t#
Locks the thread for the duration of the operation.
Public Functions
- inline ~thread_lock_t () usearch_noexcept_m
-
thread_lock_t(thread_lock_t const&) = delete#
-
thread_lock_t &operator=(thread_lock_t const&) = delete#
-
inline thread_lock_t(index_dense_gt const &parent, std::size_t thread_id, bool engaged = true) noexcept#
-
inline thread_lock_t(thread_lock_t &&other) noexcept#
-
class values_proxy_t#
- #include <index_dense.hpp>
Public Functions
-
inline values_proxy_t(index_dense_gt const &index) noexcept#
-
inline byte_t const *operator[](compressed_slot_t slot) const noexcept#
-
inline byte_t const *operator[](member_citerator_t it) const noexcept#
Private Members
-
index_dense_gt const *index_#
-
inline values_proxy_t(index_dense_gt const &index) noexcept#
-
struct index_dense_head_t#
- #include <index_dense.hpp>
Serialized binary representations of the USearch index start with metadata. Metadata is parsed into a
index_dense_head_t
, containing the USearch package version, and the properties of the index.It uses: 13 bytes for file versioning, 22 bytes for structural information = 35 bytes. The following 24 bytes contain binary size of the graph, of the vectors, and the checksum, leaving 5 bytes at the end vacant.
Public Members
-
char const *magic#
-
misaligned_ref_gt<version_t> version_major#
-
misaligned_ref_gt<version_t> version_minor#
-
misaligned_ref_gt<version_t> version_patch#
-
misaligned_ref_gt<metric_kind_t> kind_metric#
-
misaligned_ref_gt<scalar_kind_t> kind_scalar#
-
misaligned_ref_gt<scalar_kind_t> kind_key#
-
misaligned_ref_gt<scalar_kind_t> kind_compressed_slot#
-
misaligned_ref_gt<std::uint64_t> count_present#
-
misaligned_ref_gt<std::uint64_t> count_deleted#
-
misaligned_ref_gt<std::uint64_t> dimensions#
-
misaligned_ref_gt<bool> multi#
-
char const *magic#
-
struct index_dense_head_result_t#
- #include <index_dense.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline index_dense_head_result_t failed(error_t message) noexcept#
-
inline explicit operator bool() const noexcept#
-
struct index_dense_config_t : public unum::usearch::index_config_t#
- #include <index_dense.hpp>
Configuration settings for the construction of dense equidimensional vector indexes.
Unlike the underlying
index_gt
class, incorporates the::expansion_add
and::expansion_search
parameters passed separately for the lower-level engine.Public Functions
-
inline index_dense_config_t(index_config_t base) noexcept#
-
inline index_dense_config_t(std::size_t c = 0, std::size_t ea = 0, std::size_t es = 0) noexcept#
Public Members
-
std::size_t expansion_add = default_expansion_add()#
-
std::size_t expansion_search = default_expansion_search()#
-
bool exclude_vectors = false#
Excludes vectors from the serialized file. This is handy when you want to store the vectors in a separate file.
! For advanced users only.
-
bool multi = false#
Allows you to store multiple vectors per key. This is handy when a large document is chunked into many parts.
! May degrade the performance of iterators.
-
bool enable_key_lookups = true#
Allows you to reduce RAM consumption by avoiding reverse-indexing keys-to-vectors, and only keeping the vectors-to-keys mappings.
! This configuration parameter doesn’t affect the serialized file, ! and is not preserved between runs. Makes sense for smaller vectors ! that fit in a couple of cache lines.
The trade-off is that some methods won’t be available, like
get
,rename
, andremove
. The basic functionality, likeadd
andsearch
will work as expected even withenable_key_lookups = false
.If both
!multi && !enable_key_lookups
, the “duplicate entry” checks won’t be performed and no errors will be raised.
-
inline index_dense_config_t(index_config_t base) noexcept#
-
struct index_dense_clustering_config_t#
- #include <index_dense.hpp>
Public Members
-
std::size_t min_clusters = 0#
-
std::size_t max_clusters = 0#
-
enum unum::usearch::index_dense_clustering_config_t::mode_t mode = merge_smallest_k#
-
std::size_t min_clusters = 0#
-
struct index_dense_serialization_config_t#
- #include <index_dense.hpp>
-
struct index_dense_copy_config_t : public unum::usearch::index_copy_config_t#
- #include <index_dense.hpp>
Public Functions
-
index_dense_copy_config_t() = default#
-
inline index_dense_copy_config_t(index_copy_config_t base) noexcept#
Public Members
-
bool force_vector_copy = true#
-
index_dense_copy_config_t() = default#
-
struct index_dense_metadata_result_t#
- #include <index_dense.hpp>
Public Functions
-
inline explicit operator bool() const noexcept#
-
inline index_dense_metadata_result_t failed(error_t message) noexcept#
-
inline index_dense_metadata_result_t() noexcept#
-
inline index_dense_metadata_result_t(index_dense_metadata_result_t &&other) noexcept#
-
inline index_dense_metadata_result_t &operator=(index_dense_metadata_result_t &&other) noexcept#
-
inline explicit operator bool() const noexcept#
-
using index_dense_t = index_dense_gt<>#
-
namespace usearch