Namespaces
namespace	common

namespace	detail

namespace	fuzz

Classes
struct	CachedLevenshtein

struct	EditOp
	Edit operations used by the Levenshtein distance. More...

class	Editops

class	IteratorView

struct	LevenshteinWeightTable

struct	Opcode
	Edit operations used by the Levenshtein distance. More...

class	Opcodes

class	SplittedSentenceView

struct	StringAffix

Typedefs
template<typename InputIt >
using	IteratorViewVec = std::vector< IteratorView< InputIt > >

Enumerations
enum class	EditType { None = 0 , Replace = 1 , Insert = 2 , Delete = 3 }
	Edit operation types used by the Levenshtein distance. More...

Functions
template<typename InputIt1 , typename InputIt2 >
bool	operator== (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b)

template<typename InputIt1 , typename InputIt2 >
bool	operator!= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b)

template<typename InputIt1 , typename InputIt2 >
bool	operator< (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b)

template<typename InputIt1 , typename InputIt2 >
bool	operator> (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b)

template<typename InputIt1 , typename InputIt2 >
bool	operator<= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b)

template<typename InputIt1 , typename InputIt2 >
bool	operator>= (const IteratorView< InputIt1 > &a, const IteratorView< InputIt2 > &b)

bool	operator== (EditOp a, EditOp b)

bool	operator== (Opcode a, Opcode b)

bool	operator== (const Editops &lhs, const Editops &rhs)

bool	operator!= (const Editops &lhs, const Editops &rhs)

void	swap (Editops &lhs, Editops &rhs)

bool	operator== (const Opcodes &lhs, const Opcodes &rhs)

bool	operator!= (const Opcodes &lhs, const Opcodes &rhs)

void	swap (Opcodes &lhs, Opcodes &rhs)

template<typename InputIt1 , typename InputIt2 >
int64_t	levenshtein_distance (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, int64_t max=std::numeric_limits< int64_t >::max())
	Calculates the minimum number of insertions, deletions, and substitutions required to change one sequence into the other according to Levenshtein with custom costs for insertion, deletion and substitution.

template<typename Sentence1 , typename Sentence2 >
int64_t	levenshtein_distance (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, int64_t max=std::numeric_limits< int64_t >::max())

template<typename InputIt1 , typename InputIt2 >
int64_t	levenshtein_similarity (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, int64_t score_cutoff=0.0)

template<typename Sentence1 , typename Sentence2 >
int64_t	levenshtein_similarity (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, int64_t score_cutoff=0.0)

template<typename InputIt1 , typename InputIt2 >
double	levenshtein_normalized_distance (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=1.0)

template<typename Sentence1 , typename Sentence2 >
double	levenshtein_normalized_distance (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=1.0)

template<typename InputIt1 , typename InputIt2 >
double	levenshtein_normalized_similarity (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=0.0)
	Calculates a normalized levenshtein distance using custom costs for insertion, deletion and substitution.

template<typename Sentence1 , typename Sentence2 >
double	levenshtein_normalized_similarity (const Sentence1 &s1, const Sentence2 &s2, LevenshteinWeightTable weights={1, 1, 1}, double score_cutoff=0.0)

template<typename InputIt1 , typename InputIt2 >
Editops	levenshtein_editops (InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2)
	Return list of EditOp describing how to turn s1 into s2.

template<typename Sentence1 , typename Sentence2 >
Editops	levenshtein_editops (const Sentence1 &s1, const Sentence2 &s2)

Typedef Documentation

◆ IteratorViewVec

template<typename InputIt >

using rapidfuzz::IteratorViewVec = typedef std::vector<IteratorView<InputIt> >

Enumeration Type Documentation

◆ EditType

enum class rapidfuzz::EditType

strong

Edit operation types used by the Levenshtein distance.

Enumerator
None	No Operation required
Replace	Replace a character if a string by another character
Insert	Insert a character into a string
Delete	Delete a character from a string

Function Documentation

◆ levenshtein_distance() [1/2]

template<typename Sentence1 , typename Sentence2 >

int64_t rapidfuzz::levenshtein_distance	(	const Sentence1 &	s1,
		const Sentence2 &	s2,
		LevenshteinWeightTable	weights = `{1, 1, 1}`,
		int64_t	max = `std::numeric_limits<int64_t>::max()`
	)

◆ levenshtein_distance() [2/2]

template<typename InputIt1 , typename InputIt2 >

int64_t rapidfuzz::levenshtein_distance	(	InputIt1	first1,
		InputIt1	last1,
		InputIt2	first2,
		InputIt2	last2,
		LevenshteinWeightTable	weights = `{1, 1, 1}`,
		int64_t	max = `std::numeric_limits<int64_t>::max()`
	)

Calculates the minimum number of insertions, deletions, and substitutions required to change one sequence into the other according to Levenshtein with custom costs for insertion, deletion and substitution.

Template Parameters

Sentence1	This is a string that can be converted to basic_string_view<char_type>
Sentence2	This is a string that can be converted to basic_string_view<char_type>

Parameters

s1	string to compare with s2 (for type info check Template parameters above)
s2	string to compare with s1 (for type info check Template parameters above)
weights	The weights for the three operations in the form (insertion, deletion, substitution). Default is {1, 1, 1}, which gives all three operations a weight of 1.
max	Maximum Levenshtein distance between s1 and s2, that is considered as a result. If the distance is bigger than max, max + 1 is returned instead. Default is std::numeric_limits<int64_t>::max(), which deactivates this behaviour.

Returns: returns the levenshtein distance between s1 and s2

Remarks

Depending on the input parameters different optimized implementation are used to improve the performance. Worst-case performance is O(m * n).

Insertion = Deletion = Substitution:

This is known as uniform Levenshtein distance and is the distance most commonly referred to as Levenshtein distance. The following implementation is used with a worst-case performance of O([N/64]M).

if max is 0 the similarity can be calculated using a direct comparision, since no difference between the strings is allowed. The time complexity of this algorithm is O(N).
A common prefix/suffix of the two compared strings does not affect the Levenshtein distance, so the affix is removed before calculating the similarity.
If max is ≤ 3 the mbleven algorithm is used. This algorithm checks all possible edit operations that are possible under the threshold max. The time complexity of this algorithm is O(N).
If the length of the shorter string is ≤ 64 after removing the common affix Hyyrös' algorithm is used, which calculates the Levenshtein distance in parallel. The algorithm is described by [hyrro_2002]. The time complexity of this algorithm is O(N).
If the length of the shorter string is ≥ 64 after removing the common affix a blockwise implementation of Myers' algorithm is used, which calculates the Levenshtein distance in parallel (64 characters at a time). The algorithm is described by [myers_1999]. The time complexity of this algorithm is O([N/64]M).

Insertion = Deletion, Substitution >= Insertion + Deletion:

Since every Substitution can be performed as Insertion + Deletion, this variant of the Levenshtein distance only uses Insertions and Deletions. Therefore this variant is often referred to as InDel-Distance. The following implementation is used with a worst-case performance of O([N/64]M).

if max is 0 the similarity can be calculated using a direct comparision, since no difference between the strings is allowed. The time complexity of this algorithm is O(N).
if max is 1 and the two strings have a similar length, the similarity can be calculated using a direct comparision aswell, since a substitution would cause a edit distance higher than max. The time complexity of this algorithm is O(N).
A common prefix/suffix of the two compared strings does not affect the Levenshtein distance, so the affix is removed before calculating the similarity.
If max is ≤ 4 the mbleven algorithm is used. This algorithm checks all possible edit operations that are possible under the threshold max. As a difference to the normal Levenshtein distance this algorithm can even be used up to a threshold of 4 here, since the higher weight of substitutions decreases the amount of possible edit operations. The time complexity of this algorithm is O(N).
If the length of the shorter string is ≤ 64 after removing the common affix Hyyrös' lcs algorithm is used, which calculates the InDel distance in parallel. The algorithm is described by [hyrro_lcs_2004] and is extended with support for UTF32 in this implementation. The time complexity of this algorithm is O(N).
If the length of the shorter string is ≥ 64 after removing the common affix a blockwise implementation of Hyyrös' lcs algorithm is used, which calculates the Levenshtein distance in parallel (64 characters at a time). The algorithm is described by [hyrro_lcs_2004]. The time complexity of this algorithm is O([N/64]M).

Other weights:

The implementation for other weights is based on Wagner-Fischer. It has a performance of O(N * M) and has a memory usage of O(N). Further details can be found in [wagner_fischer_1974].

Examples

Find the Levenshtein distance between two strings:

// dist is 2

int64_t dist = levenshtein_distance("lewenstein", "levenshtein");

rapidfuzz::levenshtein_distance

int64_t levenshtein_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights={1, 1, 1}, int64_t max=std::numeric_limits< int64_t >::max())

Calculates the minimum number of insertions, deletions, and substitutions required to change one sequ...

Definition rapidfuzz_amalgamated.hpp:3374

Setting a maximum distance allows the implementation to select a more efficient implementation:

// dist is 2

int64_t dist = levenshtein_distance("lewenstein", "levenshtein", {1, 1, 1}, 1);

It is possible to select different weights by passing a weight struct.

// dist is 3

int64_t dist = levenshtein_distance("lewenstein", "levenshtein", {1, 1, 2});

◆ levenshtein_editops() [1/2]

template<typename Sentence1 , typename Sentence2 >

Editops rapidfuzz::levenshtein_editops	(	const Sentence1 &	s1,
		const Sentence2 &	s2
	)

◆ levenshtein_editops() [2/2]

template<typename InputIt1 , typename InputIt2 >

Editops rapidfuzz::levenshtein_editops	(	InputIt1	first1,
		InputIt1	last1,
		InputIt2	first2,
		InputIt2	last2
	)

Return list of EditOp describing how to turn s1 into s2.

Template Parameters

Sentence1	This is a string that can be converted to basic_string_view<char_type>
Sentence2	This is a string that can be converted to basic_string_view<char_type>

Parameters

s1	string to compare with s2 (for type info check Template parameters above)
s2	string to compare with s1 (for type info check Template parameters above)

Returns: Edit operations required to turn s1 into s2

◆ levenshtein_normalized_distance() [1/2]

template<typename Sentence1 , typename Sentence2 >

double rapidfuzz::levenshtein_normalized_distance	(	const Sentence1 &	s1,
		const Sentence2 &	s2,
		LevenshteinWeightTable	weights = `{1, 1, 1}`,
		double	score_cutoff = `1.0`
	)

◆ levenshtein_normalized_distance() [2/2]

template<typename InputIt1 , typename InputIt2 >

double rapidfuzz::levenshtein_normalized_distance	(	InputIt1	first1,
		InputIt1	last1,
		InputIt2	first2,
		InputIt2	last2,
		LevenshteinWeightTable	weights = `{1, 1, 1}`,
		double	score_cutoff = `1.0`
	)

◆ levenshtein_normalized_similarity() [1/2]

template<typename Sentence1 , typename Sentence2 >

double rapidfuzz::levenshtein_normalized_similarity	(	const Sentence1 &	s1,
		const Sentence2 &	s2,
		LevenshteinWeightTable	weights = `{1, 1, 1}`,
		double	score_cutoff = `0.0`
	)

◆ levenshtein_normalized_similarity() [2/2]

template<typename InputIt1 , typename InputIt2 >

double rapidfuzz::levenshtein_normalized_similarity	(	InputIt1	first1,
		InputIt1	last1,
		InputIt2	first2,
		InputIt2	last2,
		LevenshteinWeightTable	weights = `{1, 1, 1}`,
		double	score_cutoff = `0.0`
	)

Calculates a normalized levenshtein distance using custom costs for insertion, deletion and substitution.

Template Parameters

Sentence1	This is a string that can be converted to basic_string_view<char_type>
Sentence2	This is a string that can be converted to basic_string_view<char_type>

Parameters

s1	string to compare with s2 (for type info check Template parameters above)
s2	string to compare with s1 (for type info check Template parameters above)
weights	The weights for the three operations in the form (insertion, deletion, substitution). Default is {1, 1, 1}, which gives all three operations a weight of 1.
score_cutoff	Optional argument for a score threshold as a float between 0 and 1.0. For ratio < score_cutoff 0 is returned instead. Default is 0, which deactivates this behaviour.

Returns: Normalized weighted levenshtein distance between s1 and s2 as a double between 0 and 1.0

See also: levenshtein()

Remarks

The normalization of the Levenshtein distance is performed in the following way:

$\begin{align*} ratio &= \frac{distance(s1, s2)}{max_dist} \end{align*}$

Examples

Find the normalized Levenshtein distance between two strings:

// ratio is 81.81818181818181

double ratio = normalized_levenshtein("lewenstein", "levenshtein");

Setting a score_cutoff allows the implementation to select a more efficient implementation:

// ratio is 0.0

double ratio = normalized_levenshtein("lewenstein", "levenshtein", {1, 1, 1}, 85.0);

It is possible to select different weights by passing a weight struct

// ratio is 85.71428571428571

double ratio = normalized_levenshtein("lewenstein", "levenshtein", {1, 1, 2});

◆ levenshtein_similarity() [1/2]

template<typename Sentence1 , typename Sentence2 >

int64_t rapidfuzz::levenshtein_similarity	(	const Sentence1 &	s1,
		const Sentence2 &	s2,
		LevenshteinWeightTable	weights = `{1, 1, 1}`,
		int64_t	score_cutoff = `0.0`
	)

◆ levenshtein_similarity() [2/2]

template<typename InputIt1 , typename InputIt2 >

int64_t rapidfuzz::levenshtein_similarity	(	InputIt1	first1,
		InputIt1	last1,
		InputIt2	first2,
		InputIt2	last2,
		LevenshteinWeightTable	weights = `{1, 1, 1}`,
		int64_t	score_cutoff = `0.0`
	)

◆ operator!=() [1/3]

bool rapidfuzz::operator!=	(	const Editops &	lhs,
		const Editops &	rhs
	)

inline

◆ operator!=() [2/3]

template<typename InputIt1 , typename InputIt2 >

bool rapidfuzz::operator!=	(	const IteratorView< InputIt1 > &	a,
		const IteratorView< InputIt2 > &	b
	)

inline

◆ operator!=() [3/3]

bool rapidfuzz::operator!=	(	const Opcodes &	lhs,
		const Opcodes &	rhs
	)

inline

◆ operator<()

template<typename InputIt1 , typename InputIt2 >

bool rapidfuzz::operator<	(	const IteratorView< InputIt1 > &	a,
		const IteratorView< InputIt2 > &	b
	)

inline

◆ operator<=()

template<typename InputIt1 , typename InputIt2 >

bool rapidfuzz::operator<=	(	const IteratorView< InputIt1 > &	a,
		const IteratorView< InputIt2 > &	b
	)

inline

◆ operator==() [1/5]

bool rapidfuzz::operator==	(	const Editops &	lhs,
		const Editops &	rhs
	)

inline

◆ operator==() [2/5]

template<typename InputIt1 , typename InputIt2 >

bool rapidfuzz::operator==	(	const IteratorView< InputIt1 > &	a,
		const IteratorView< InputIt2 > &	b
	)

inline

◆ operator==() [3/5]

bool rapidfuzz::operator==	(	const Opcodes &	lhs,
		const Opcodes &	rhs
	)

inline

◆ operator==() [4/5]

bool rapidfuzz::operator==	(	EditOp	a,
		EditOp	b
	)

inline

◆ operator==() [5/5]

bool rapidfuzz::operator==	(	Opcode	a,
		Opcode	b
	)

inline

◆ operator>()

template<typename InputIt1 , typename InputIt2 >

bool rapidfuzz::operator>	(	const IteratorView< InputIt1 > &	a,
		const IteratorView< InputIt2 > &	b
	)

inline

◆ operator>=()

template<typename InputIt1 , typename InputIt2 >

bool rapidfuzz::operator>=	(	const IteratorView< InputIt1 > &	a,
		const IteratorView< InputIt2 > &	b
	)

inline

◆ swap() [1/2]

void rapidfuzz::swap	(	Editops &	lhs,
		Editops &	rhs
	)

inline

◆ swap() [2/2]

void rapidfuzz::swap	(	Opcodes &	lhs,
		Opcodes &	rhs
	)

inline

Namespaces

Classes

Typedefs

Enumerations

Functions

Typedef Documentation

◆ IteratorViewVec

Enumeration Type Documentation

◆ EditType

Function Documentation

◆ levenshtein_distance() [1/2]

◆ levenshtein_distance() [2/2]

◆ levenshtein_editops() [1/2]

◆ levenshtein_editops() [2/2]

◆ levenshtein_normalized_distance() [1/2]

◆ levenshtein_normalized_distance() [2/2]

◆ levenshtein_normalized_similarity() [1/2]

◆ levenshtein_normalized_similarity() [2/2]

◆ levenshtein_similarity() [1/2]

◆ levenshtein_similarity() [2/2]

◆ operator!=() [1/3]

◆ operator!=() [2/3]

◆ operator!=() [3/3]

◆ operator<()

◆ operator<=()

◆ operator==() [1/5]

◆ operator==() [2/5]

◆ operator==() [3/5]

◆ operator==() [4/5]

◆ operator==() [5/5]

◆ operator>()

◆ operator>=()

◆ swap() [1/2]

◆ swap() [2/2]