Namespaces | |
namespace | common |
namespace | detail |
namespace | fuzz |
Classes | |
struct | CachedLevenshtein |
struct | EditOp |
Edit operations used by the Levenshtein distance. More... | |
class | Editops |
class | IteratorView |
struct | LevenshteinWeightTable |
struct | Opcode |
Edit operations used by the Levenshtein distance. More... | |
class | Opcodes |
class | SplittedSentenceView |
struct | StringAffix |
Typedefs | |
template<typename InputIt > | |
using | IteratorViewVec = std::vector< IteratorView< InputIt > > |
Enumerations | |
enum class | EditType { None = 0 , Replace = 1 , Insert = 2 , Delete = 3 } |
Edit operation types used by the Levenshtein distance. More... | |
Functions | |
template<typename InputIt1 , typename InputIt2 > | |
bool | operator== (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
template<typename InputIt1 , typename InputIt2 > | |
bool | operator!= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
template<typename InputIt1 , typename InputIt2 > | |
bool | operator< (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
template<typename InputIt1 , typename InputIt2 > | |
bool | operator> (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
template<typename InputIt1 , typename InputIt2 > | |
bool | operator<= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
template<typename InputIt1 , typename InputIt2 > | |
bool | operator>= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b) |
bool | operator== (EditOp a, EditOp b) |
bool | operator== (Opcode a, Opcode b) |
bool | operator== (const Editops &lhs, const Editops &rhs) |
bool | operator!= (const Editops &lhs, const Editops &rhs) |
void | swap (Editops &lhs, Editops &rhs) |
bool | operator== (const Opcodes &lhs, const Opcodes &rhs) |
bool | operator!= (const Opcodes &lhs, const Opcodes &rhs) |
void | swap (Opcodes &lhs, Opcodes &rhs) |
template<typename InputIt1 , typename InputIt2 > | |
int64_t | levenshtein_distance (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, int64_t max=std::numeric_limits< int64_t >::max()) |
Calculates the minimum number of insertions, deletions, and substitutions required to change one sequence into the other according to Levenshtein with custom costs for insertion, deletion and substitution. | |
template<typename Sentence1 , typename Sentence2 > | |
int64_t | levenshtein_distance (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, int64_t max=std::numeric_limits< int64_t >::max()) |
template<typename InputIt1 , typename InputIt2 > | |
int64_t | levenshtein_similarity (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, int64_t score_cutoff=0.0) |
template<typename Sentence1 , typename Sentence2 > | |
int64_t | levenshtein_similarity (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, int64_t score_cutoff=0.0) |
template<typename InputIt1 , typename InputIt2 > | |
double | levenshtein_normalized_distance (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=1.0) |
template<typename Sentence1 , typename Sentence2 > | |
double | levenshtein_normalized_distance (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=1.0) |
template<typename InputIt1 , typename InputIt2 > | |
double | levenshtein_normalized_similarity (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=0.0) |
Calculates a normalized levenshtein distance using custom costs for insertion, deletion and substitution. | |
template<typename Sentence1 , typename Sentence2 > | |
double | levenshtein_normalized_similarity (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=0.0) |
template<typename InputIt1 , typename InputIt2 > | |
Editops | levenshtein_editops (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) |
Return list of EditOp describing how to turn s1 into s2. | |
template<typename Sentence1 , typename Sentence2 > | |
Editops | levenshtein_editops (const Sentence1 &s1, const Sentence2 &s2) |
using rapidfuzz::IteratorViewVec = typedef std::vector<IteratorView<InputIt> > |
|
strong |
int64_t rapidfuzz::levenshtein_distance | ( | const Sentence1 & | s1, |
const Sentence2 & | s2, | ||
LevenshteinWeightTable | weights = {1, 1, 1} , |
||
int64_t | max = std::numeric_limits<int64_t>::max() |
||
) |
int64_t rapidfuzz::levenshtein_distance | ( | InputIt1 | first1, |
InputIt1 | last1, | ||
InputIt2 | first2, | ||
InputIt2 | last2, | ||
LevenshteinWeightTable | weights = {1, 1, 1} , |
||
int64_t | max = std::numeric_limits<int64_t>::max() |
||
) |
Calculates the minimum number of insertions, deletions, and substitutions required to change one sequence into the other according to Levenshtein with custom costs for insertion, deletion and substitution.
Sentence1 | This is a string that can be converted to basic_string_view<char_type> |
Sentence2 | This is a string that can be converted to basic_string_view<char_type> |
s1 | string to compare with s2 (for type info check Template parameters above) |
s2 | string to compare with s1 (for type info check Template parameters above) |
weights | The weights for the three operations in the form (insertion, deletion, substitution). Default is {1, 1, 1}, which gives all three operations a weight of 1. |
max | Maximum Levenshtein distance between s1 and s2, that is considered as a result. If the distance is bigger than max, max + 1 is returned instead. Default is std::numeric_limits<int64_t>::max(), which deactivates this behaviour. |
Depending on the input parameters different optimized implementation are used to improve the performance. Worst-case performance is O(m * n)
.
Insertion = Deletion = Substitution:
This is known as uniform Levenshtein distance and is the distance most commonly referred to as Levenshtein distance. The following implementation is used with a worst-case performance of O([N/64]M)
.
O(N)
.max
. The time complexity of this algorithm is O(N)
.O(N)
.O([N/64]M)
.Insertion = Deletion, Substitution >= Insertion + Deletion:
Since every Substitution can be performed as Insertion + Deletion, this variant of the Levenshtein distance only uses Insertions and Deletions. Therefore this variant is often referred to as InDel-Distance. The following implementation is used with a worst-case performance of O([N/64]M)
.
O(N)
.O(N)
.max
. As a difference to the normal Levenshtein distance this algorithm can even be used up to a threshold of 4 here, since the higher weight of substitutions decreases the amount of possible edit operations. The time complexity of this algorithm is O(N)
.O(N)
.O([N/64]M)
.Other weights:
The implementation for other weights is based on Wagner-Fischer. It has a performance of O(N * M)
and has a memory usage of O(N)
. Further details can be found in [wagner_fischer_1974].
Find the Levenshtein distance between two strings:
Setting a maximum distance allows the implementation to select a more efficient implementation:
It is possible to select different weights by passing a weight
struct.
Editops rapidfuzz::levenshtein_editops | ( | const Sentence1 & | s1, |
const Sentence2 & | s2 | ||
) |
Editops rapidfuzz::levenshtein_editops | ( | InputIt1 | first1, |
InputIt1 | last1, | ||
InputIt2 | first2, | ||
InputIt2 | last2 | ||
) |
Return list of EditOp describing how to turn s1 into s2.
Sentence1 | This is a string that can be converted to basic_string_view<char_type> |
Sentence2 | This is a string that can be converted to basic_string_view<char_type> |
s1 | string to compare with s2 (for type info check Template parameters above) |
s2 | string to compare with s1 (for type info check Template parameters above) |
double rapidfuzz::levenshtein_normalized_distance | ( | const Sentence1 & | s1, |
const Sentence2 & | s2, | ||
LevenshteinWeightTable | weights = {1, 1, 1} , |
||
double | score_cutoff = 1.0 |
||
) |
double rapidfuzz::levenshtein_normalized_distance | ( | InputIt1 | first1, |
InputIt1 | last1, | ||
InputIt2 | first2, | ||
InputIt2 | last2, | ||
LevenshteinWeightTable | weights = {1, 1, 1} , |
||
double | score_cutoff = 1.0 |
||
) |
double rapidfuzz::levenshtein_normalized_similarity | ( | const Sentence1 & | s1, |
const Sentence2 & | s2, | ||
LevenshteinWeightTable | weights = {1, 1, 1} , |
||
double | score_cutoff = 0.0 |
||
) |
double rapidfuzz::levenshtein_normalized_similarity | ( | InputIt1 | first1, |
InputIt1 | last1, | ||
InputIt2 | first2, | ||
InputIt2 | last2, | ||
LevenshteinWeightTable | weights = {1, 1, 1} , |
||
double | score_cutoff = 0.0 |
||
) |
Calculates a normalized levenshtein distance using custom costs for insertion, deletion and substitution.
Sentence1 | This is a string that can be converted to basic_string_view<char_type> |
Sentence2 | This is a string that can be converted to basic_string_view<char_type> |
s1 | string to compare with s2 (for type info check Template parameters above) |
s2 | string to compare with s1 (for type info check Template parameters above) |
weights | The weights for the three operations in the form (insertion, deletion, substitution). Default is {1, 1, 1}, which gives all three operations a weight of 1. |
score_cutoff | Optional argument for a score threshold as a float between 0 and 1.0. For ratio < score_cutoff 0 is returned instead. Default is 0, which deactivates this behaviour. |
The normalization of the Levenshtein distance is performed in the following way:
Find the normalized Levenshtein distance between two strings:
Setting a score_cutoff allows the implementation to select a more efficient implementation:
It is possible to select different weights by passing a weight
struct
int64_t rapidfuzz::levenshtein_similarity | ( | const Sentence1 & | s1, |
const Sentence2 & | s2, | ||
LevenshteinWeightTable | weights = {1, 1, 1} , |
||
int64_t | score_cutoff = 0.0 |
||
) |
int64_t rapidfuzz::levenshtein_similarity | ( | InputIt1 | first1, |
InputIt1 | last1, | ||
InputIt2 | first2, | ||
InputIt2 | last2, | ||
LevenshteinWeightTable | weights = {1, 1, 1} , |
||
int64_t | score_cutoff = 0.0 |
||
) |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |