SQL Server 2005 - Levenshtein Distance
This function calculates the Levenshtein Distance
If anyone can improve the speed, please post the improved code here.
sql Code:
-- =============================================
-- Author: <Michael Ciurescu>
-- Create date: <20090707>
-- Description: <See: [url]http://www.merriampark.com/ld.htm[/url] >
-- =============================================
CREATE FUNCTION dbo.fn_LevenshteinDistance
(
@Str1 VARCHAR(MAX)
, @Str2 VARCHAR(MAX)
)
RETURNS INT
AS
BEGIN
/*
SELECT dbo.fn_LevenshteinDistance('test', 'test')
SELECT dbo.fn_LevenshteinDistance('tesot', 'test')
SELECT dbo.fn_LevenshteinDistance('testing1', 'tes_ing')
*/
DECLARE @m INT, @n INT
DECLARE @editMatrix TABLE(pk1 INT, pk2 INT, v INT, PRIMARY KEY (pk1, pk2))
DECLARE @i INT, @j INT, @cost INT
DECLARE @str1_i CHAR, @str2_j CHAR
SET @n = DATALENGTH(ISNULL(@Str1, ''))
SET @m = DATALENGTH(ISNULL(@Str2, ''))
IF @n = 0 OR @m = 0 RETURN 0
SET @i = 0
WHILE @i <= @n BEGIN
SET @j = 0
WHILE @j <= @m BEGIN
IF @j = 0
INSERT INTO @editMatrix VALUES(@i, 0, @i)
ELSE IF @i = 0
INSERT INTO @editMatrix VALUES(0, @j, @j)
ELSE
INSERT INTO @editMatrix VALUES(@i, @j, 0)
SET @j = @j + 1
END
SET @i = @i + 1
END
SET @i = 1
WHILE @i <= @n BEGIN
SET @str1_i = SUBSTRING(@Str1, @i, 1)
SET @j = 1
WHILE @j <= @m BEGIN
SET @str2_j = SUBSTRING(@Str2, @j, 1)
IF @str1_i = @str2_j
SET @cost = 0
ELSE
SET @cost = 1
UPDATE em
SET v = (
SELECT MIN(ret) AS MIN_ret
FROM (
SELECT v + 1 AS ret FROM @editMatrix WHERE pk1 = @i - 1 AND pk2 = @j UNION ALL
SELECT v + 1 AS ret FROM @editMatrix WHERE pk1 = @i AND pk2 = @j - 1 UNION ALL
SELECT v + @cost AS ret FROM @editMatrix WHERE pk1 = @i - 1 AND pk2 = @j - 1
) AS x
)
FROM @editMatrix AS em
WHERE pk1 = @i AND pk2 = @j
SET @j = @j + 1
END
SET @i = @i + 1
END
RETURN (SELECT ISNULL(v, -1) FROM @editMatrix WHERE pk1 = @n AND pk2 = @m)
END
I found much faster code over here: http://www.sqlteam.com/forums/topic....40&whichpage=1