Results 1 to 1 of 1

Thread: SQL Server 2005 - Levenshtein Distance

Hybrid View

  1. #1

    Thread Starter
    PowerPoster
    Join Date
    Feb 2002
    Location
    Canada, Toronto
    Posts
    5,802

    SQL Server 2005 - Levenshtein Distance

    This function calculates the Levenshtein Distance

    If anyone can improve the speed, please post the improved code here.

    sql Code:
    1. -- =============================================
    2. -- Author:      <Michael Ciurescu>
    3. -- Create date: <20090707>
    4. -- Description: <See: [url]http://www.merriampark.com/ld.htm[/url] >
    5. -- =============================================
    6. CREATE FUNCTION dbo.fn_LevenshteinDistance
    7. (
    8.       @Str1 VARCHAR(MAX)
    9.     , @Str2 VARCHAR(MAX)
    10. )
    11. RETURNS INT
    12. AS
    13. BEGIN
    14.     /*
    15.         SELECT dbo.fn_LevenshteinDistance('test', 'test')
    16.         SELECT dbo.fn_LevenshteinDistance('tesot', 'test')
    17.         SELECT dbo.fn_LevenshteinDistance('testing1', 'tes_ing')
    18.     */
    19.    
    20.     DECLARE @m INT, @n INT
    21.     DECLARE @editMatrix TABLE(pk1 INT, pk2 INT, v INT, PRIMARY KEY (pk1, pk2))
    22.     DECLARE @i INT, @j INT, @cost INT
    23.     DECLARE @str1_i CHAR, @str2_j CHAR
    24.    
    25.     SET @n = DATALENGTH(ISNULL(@Str1, ''))
    26.     SET @m = DATALENGTH(ISNULL(@Str2, ''))
    27.    
    28.     IF @n = 0 OR @m = 0 RETURN 0
    29.    
    30.     SET @i = 0
    31.     WHILE @i <= @n BEGIN
    32.         SET @j = 0
    33.         WHILE @j <= @m BEGIN
    34.             IF @j = 0
    35.                 INSERT INTO @editMatrix VALUES(@i, 0, @i)
    36.             ELSE IF @i = 0
    37.                 INSERT INTO @editMatrix VALUES(0, @j, @j)
    38.             ELSE
    39.                 INSERT INTO @editMatrix VALUES(@i, @j, 0)
    40.            
    41.             SET @j = @j + 1
    42.         END
    43.        
    44.         SET @i = @i + 1
    45.     END
    46.    
    47.     SET @i = 1
    48.     WHILE @i <= @n BEGIN
    49.         SET @str1_i = SUBSTRING(@Str1, @i, 1)
    50.        
    51.         SET @j = 1
    52.         WHILE @j <= @m BEGIN
    53.             SET @str2_j = SUBSTRING(@Str2, @j, 1)
    54.            
    55.             IF @str1_i = @str2_j
    56.                 SET @cost = 0
    57.             ELSE
    58.                 SET @cost = 1
    59.            
    60.             UPDATE em
    61.             SET v = (
    62.                         SELECT MIN(ret) AS MIN_ret
    63.                         FROM (
    64.                             SELECT v + 1     AS ret FROM @editMatrix WHERE pk1 = @i - 1 AND pk2 = @j     UNION ALL
    65.                             SELECT v + 1     AS ret FROM @editMatrix WHERE pk1 = @i     AND pk2 = @j - 1 UNION ALL
    66.                             SELECT v + @cost AS ret FROM @editMatrix WHERE pk1 = @i - 1 AND pk2 = @j - 1
    67.                         ) AS x
    68.                     )
    69.             FROM @editMatrix AS em
    70.             WHERE pk1 = @i AND pk2 = @j
    71.            
    72.             SET @j = @j + 1
    73.         END
    74.        
    75.         SET @i = @i + 1
    76.     END
    77.    
    78.     RETURN (SELECT ISNULL(v, -1) FROM @editMatrix WHERE pk1 = @n AND pk2 = @m)
    79. END

    I found much faster code over here: http://www.sqlteam.com/forums/topic....40&whichpage=1
    Last edited by CVMichael; Sep 20th, 2010 at 09:47 AM.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  



Click Here to Expand Forum to Full Width