Namespaces | |
| namespace | common |
| namespace | detail |
| namespace | fuzz |
Classes | |
| struct | CachedLevenshtein |
| struct | EditOp |
| Edit operations used by the Levenshtein distance. More... | |
| class | Editops |
| class | IteratorView |
| struct | LevenshteinWeightTable |
| struct | Opcode |
| Edit operations used by the Levenshtein distance. More... | |
| class | Opcodes |
| class | SplittedSentenceView |
| struct | StringAffix |
Typedefs | |
| template<typename InputIt > | |
| using | IteratorViewVec = std::vector< IteratorView< InputIt > > |
Enumerations | |
| enum class | EditType { None = 0 , Replace = 1 , Insert = 2 , Delete = 3 } |
| Edit operation types used by the Levenshtein distance. More... | |
Functions | |
| template<typename InputIt1 , typename InputIt2 > | |
| bool | operator== (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
| template<typename InputIt1 , typename InputIt2 > | |
| bool | operator!= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
| template<typename InputIt1 , typename InputIt2 > | |
| bool | operator< (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
| template<typename InputIt1 , typename InputIt2 > | |
| bool | operator> (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
| template<typename InputIt1 , typename InputIt2 > | |
| bool | operator<= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
| template<typename InputIt1 , typename InputIt2 > | |
| bool | operator>= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
| bool | operator== (EditOp a, EditOp b) |
| bool | operator== (Opcode a, Opcode b) |
| bool | operator== (const Editops &lhs, const Editops &rhs) |
| bool | operator!= (const Editops &lhs, const Editops &rhs) |
| void | swap (Editops &lhs, Editops &rhs) |
| bool | operator== (const Opcodes &lhs, const Opcodes &rhs) |
| bool | operator!= (const Opcodes &lhs, const Opcodes &rhs) |
| void | swap (Opcodes &lhs, Opcodes &rhs) |
| template<typename InputIt1 , typename InputIt2 > | |
| int64_t | levenshtein_distance (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, int64_t max=std::numeric_limits< int64_t >::max()) |
| Calculates the minimum number of insertions, deletions, and substitutions required to change one sequence into the other according to Levenshtein with custom costs for insertion, deletion and substitution. | |
| template<typename Sentence1 , typename Sentence2 > | |
| int64_t | levenshtein_distance (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, int64_t max=std::numeric_limits< int64_t >::max()) |
| template<typename InputIt1 , typename InputIt2 > | |
| int64_t | levenshtein_similarity (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, int64_t score_cutoff=0.0) |
| template<typename Sentence1 , typename Sentence2 > | |
| int64_t | levenshtein_similarity (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, int64_t score_cutoff=0.0) |
| template<typename InputIt1 , typename InputIt2 > | |
| double | levenshtein_normalized_distance (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=1.0) |
| template<typename Sentence1 , typename Sentence2 > | |
| double | levenshtein_normalized_distance (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=1.0) |
| template<typename InputIt1 , typename InputIt2 > | |
| double | levenshtein_normalized_similarity (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=0.0) |
| Calculates a normalized levenshtein distance using custom costs for insertion, deletion and substitution. | |
| template<typename Sentence1 , typename Sentence2 > | |
| double | levenshtein_normalized_similarity (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=0.0) |
| template<typename InputIt1 , typename InputIt2 > | |
| Editops | levenshtein_editops (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) |
| Return list of EditOp describing how to turn s1 into s2. | |
| template<typename Sentence1 , typename Sentence2 > | |
| Editops | levenshtein_editops (const Sentence1 &s1, const Sentence2 &s2) |
| using rapidfuzz::IteratorViewVec = typedef std::vector<IteratorView<InputIt> > |
|
strong |
| int64_t rapidfuzz::levenshtein_distance | ( | const Sentence1 & | s1, |
| const Sentence2 & | s2, | ||
| LevenshteinWeightTable | weights = {1, 1, 1}, |
||
| int64_t | max = std::numeric_limits<int64_t>::max() |
||
| ) |
| int64_t rapidfuzz::levenshtein_distance | ( | InputIt1 | first1, |
| InputIt1 | last1, | ||
| InputIt2 | first2, | ||
| InputIt2 | last2, | ||
| LevenshteinWeightTable | weights = {1, 1, 1}, |
||
| int64_t | max = std::numeric_limits<int64_t>::max() |
||
| ) |
Calculates the minimum number of insertions, deletions, and substitutions required to change one sequence into the other according to Levenshtein with custom costs for insertion, deletion and substitution.
| Sentence1 | This is a string that can be converted to basic_string_view<char_type> |
| Sentence2 | This is a string that can be converted to basic_string_view<char_type> |
| s1 | string to compare with s2 (for type info check Template parameters above) |
| s2 | string to compare with s1 (for type info check Template parameters above) |
| weights | The weights for the three operations in the form (insertion, deletion, substitution). Default is {1, 1, 1}, which gives all three operations a weight of 1. |
| max | Maximum Levenshtein distance between s1 and s2, that is considered as a result. If the distance is bigger than max, max + 1 is returned instead. Default is std::numeric_limits<int64_t>::max(), which deactivates this behaviour. |
Depending on the input parameters different optimized implementation are used to improve the performance. Worst-case performance is O(m * n).
Insertion = Deletion = Substitution:
This is known as uniform Levenshtein distance and is the distance most commonly referred to as Levenshtein distance. The following implementation is used with a worst-case performance of O([N/64]M).
O(N).max. The time complexity of this algorithm is O(N).O(N).O([N/64]M).Insertion = Deletion, Substitution >= Insertion + Deletion:
Since every Substitution can be performed as Insertion + Deletion, this variant of the Levenshtein distance only uses Insertions and Deletions. Therefore this variant is often referred to as InDel-Distance. The following implementation is used with a worst-case performance of O([N/64]M).
O(N).O(N).max. As a difference to the normal Levenshtein distance this algorithm can even be used up to a threshold of 4 here, since the higher weight of substitutions decreases the amount of possible edit operations. The time complexity of this algorithm is O(N).O(N).O([N/64]M).Other weights:
The implementation for other weights is based on Wagner-Fischer. It has a performance of O(N * M) and has a memory usage of O(N). Further details can be found in [wagner_fischer_1974].
Find the Levenshtein distance between two strings:
Setting a maximum distance allows the implementation to select a more efficient implementation:
It is possible to select different weights by passing a weight struct.
| Editops rapidfuzz::levenshtein_editops | ( | const Sentence1 & | s1, |
| const Sentence2 & | s2 | ||
| ) |
| Editops rapidfuzz::levenshtein_editops | ( | InputIt1 | first1, |
| InputIt1 | last1, | ||
| InputIt2 | first2, | ||
| InputIt2 | last2 | ||
| ) |
Return list of EditOp describing how to turn s1 into s2.
| Sentence1 | This is a string that can be converted to basic_string_view<char_type> |
| Sentence2 | This is a string that can be converted to basic_string_view<char_type> |
| s1 | string to compare with s2 (for type info check Template parameters above) |
| s2 | string to compare with s1 (for type info check Template parameters above) |
| double rapidfuzz::levenshtein_normalized_distance | ( | const Sentence1 & | s1, |
| const Sentence2 & | s2, | ||
| LevenshteinWeightTable | weights = {1, 1, 1}, |
||
| double | score_cutoff = 1.0 |
||
| ) |
| double rapidfuzz::levenshtein_normalized_distance | ( | InputIt1 | first1, |
| InputIt1 | last1, | ||
| InputIt2 | first2, | ||
| InputIt2 | last2, | ||
| LevenshteinWeightTable | weights = {1, 1, 1}, |
||
| double | score_cutoff = 1.0 |
||
| ) |
| double rapidfuzz::levenshtein_normalized_similarity | ( | const Sentence1 & | s1, |
| const Sentence2 & | s2, | ||
| LevenshteinWeightTable | weights = {1, 1, 1}, |
||
| double | score_cutoff = 0.0 |
||
| ) |
| double rapidfuzz::levenshtein_normalized_similarity | ( | InputIt1 | first1, |
| InputIt1 | last1, | ||
| InputIt2 | first2, | ||
| InputIt2 | last2, | ||
| LevenshteinWeightTable | weights = {1, 1, 1}, |
||
| double | score_cutoff = 0.0 |
||
| ) |
Calculates a normalized levenshtein distance using custom costs for insertion, deletion and substitution.
| Sentence1 | This is a string that can be converted to basic_string_view<char_type> |
| Sentence2 | This is a string that can be converted to basic_string_view<char_type> |
| s1 | string to compare with s2 (for type info check Template parameters above) |
| s2 | string to compare with s1 (for type info check Template parameters above) |
| weights | The weights for the three operations in the form (insertion, deletion, substitution). Default is {1, 1, 1}, which gives all three operations a weight of 1. |
| score_cutoff | Optional argument for a score threshold as a float between 0 and 1.0. For ratio < score_cutoff 0 is returned instead. Default is 0, which deactivates this behaviour. |
The normalization of the Levenshtein distance is performed in the following way:

Find the normalized Levenshtein distance between two strings:
Setting a score_cutoff allows the implementation to select a more efficient implementation:
It is possible to select different weights by passing a weight struct
| int64_t rapidfuzz::levenshtein_similarity | ( | const Sentence1 & | s1, |
| const Sentence2 & | s2, | ||
| LevenshteinWeightTable | weights = {1, 1, 1}, |
||
| int64_t | score_cutoff = 0.0 |
||
| ) |
| int64_t rapidfuzz::levenshtein_similarity | ( | InputIt1 | first1, |
| InputIt1 | last1, | ||
| InputIt2 | first2, | ||
| InputIt2 | last2, | ||
| LevenshteinWeightTable | weights = {1, 1, 1}, |
||
| int64_t | score_cutoff = 0.0 |
||
| ) |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |