`
444878909
  • 浏览: 637877 次
文章分类
社区版块
存档分类
最新评论

C#实现字符串相似度比较[Levenshtein Distance算法]

 
阅读更多

原文转载自:http://www.4ucode.com/Study/Topic/963976


字符串相似度算法使用 Levenshtein Distance算法(中文翻译:编辑距离算法) 这算法是由俄国科学家Levenshtein提出的.

下面使用C#实现

public class LevenshteinDistance
{

private static LevenshteinDistance _instance=null;
public static LevenshteinDistance Instance
{
get
{
if (_instance == null)
{
return new LevenshteinDistance();
}
return _instance;
}
}


/// <summary>
/// 取最小的一位数
/// </summary>
/// <param name="first"></param>
/// <param name="second"></param>
/// <param name="third"></param>
/// <returns></returns>
public int LowerOfThree(int first, int second, int third)
{
int min = first;
if (second < min)
min = second;
if (third < min)
min = third;
return min;
}

public int Levenshtein_Distance(string str1, string str2)
{
int[,] Matrix;
int n=str1.Length;
int m=str2.Length;

int temp = 0;
char ch1;
char ch2;
int i = 0;
int j = 0;
if (n ==0)
{
return m;
}
if (m == 0)
{

return n;
}
Matrix=new int[n+1,m+1];

for (i = 0; i <= n; i++)
{
//初始化第一列
Matrix[i,0] = i;
}

for (j = 0; j <= m; j++)
{
//初始化第一行
Matrix[0, j] = j;
}

for (i = 1; i <= n; i++)
{
ch1 = str1[i-1];
for (j = 1; j <= m; j++)
{
ch2 = str2[j-1];
if (ch1.Equals(ch2))
{
temp = 0;
}
else
{
temp = 1;
}
Matrix[i,j] = LowerOfThree(Matrix[i - 1,j] + 1, Matrix[i,j - 1] + 1, Matrix[i - 1,j - 1] + temp);


}
}

for (i = 0; i <= n; i++)
{
for (j = 0; j <= m; j++)
{
Console.Write(" {0} ", Matrix[i, j]);
}
Console.WriteLine("");
}
return Matrix[n, m];

}

/// <summary>
/// 计算字符串相似度
/// </summary>
/// <param name="str1"></param>
/// <param name="str2"></param>
/// <returns></returns>
public decimal LevenshteinDistancePercent(string str1,string str2)
{
int maxLenth = str1.Length > str2.Length ? str1.Length : str2.Length;
int val = Levenshtein_Distance(str1, str2);
return 1 - (decimal)val / maxLenth;
}
}

class Program
{


static void Main(string[] args)
{
string str1 = "你好蒂蒂";
string str2="你好蒂芬";
Console.WriteLine("字符串1 {0}", str1);

Console.WriteLine("字符串2 {0}", str2);

Console.WriteLine("相似度 {0} %", LevenshteinDistance.Instance.LevenshteinDistancePercent(str1, str2)*100);
Console.ReadLine();
}
}




分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics