mysql匹配两个字符串的相似比例

-- 计算两个字符串中最多包含了几个字
CREATE DEFINER = `root` @`localhost` FUNCTION `levenshtein` ( s1 text, s2 text ) RETURNS INT DETERMINISTIC BEGIN
	DECLARE
		s1_len,
		s2_len,
		i,
		j,
		c,
		c_temp,
		cost INT;
	DECLARE
		s1_char CHAR;
	DECLARE
		cv0,
		cv1 text;
	
	SET s1_len = CHAR_LENGTH( s1 ),
	s2_len = CHAR_LENGTH( s2 ),
	cv1 = 0x00,
	j = 1,
	i = 1,
	c = 0;
	IF
		s1 = s2 THEN
			RETURN 0;
		
		ELSEIF s1_len = 0 THEN
		RETURN s2_len;
		
		ELSEIF s2_len = 0 THEN
		RETURN s1_len;
		ELSE
		WHILE
				j <= s2_len DO
				
				SET cv1 = CONCAT( cv1, UNHEX( HEX( j ) ) ),
				j = j + 1;
			
		END WHILE;
		WHILE
				i <= s1_len DO
				
				SET s1_char = SUBSTRING( s1, i, 1 ),
				c = i,
				cv0 = UNHEX( HEX( i ) ),
				j = 1;
			WHILE
					j <= s2_len DO
					
					SET c = c + 1;
				IF
					s1_char = SUBSTRING( s2, j, 1 ) THEN
						
						SET cost = 0;
					ELSE 
						SET cost = 1;
					
				END IF;
				
				SET c_temp = CONV( HEX( SUBSTRING( cv1, j, 1 ) ), 16, 10 ) + cost;
				IF
					c > c_temp THEN
						
						SET c = c_temp;
					
				END IF;
				
				SET c_temp = CONV( HEX( SUBSTRING( cv1, j + 1, 1 ) ), 16, 10 ) + 1;
				IF
					c > c_temp THEN
						
						SET c = c_temp;
					
				END IF;
				
				SET cv0 = CONCAT( cv0, UNHEX( HEX( c ) ) ),
				j = j + 1;
				
			END WHILE;
			
			SET cv1 = cv0,
			i = i + 1;
			
		END WHILE;
		
	END IF;
	RETURN c;
	
	END -- 计算传入字段的从长度,然后计算相似比例
	CREATE DEFINER = `root` @`localhost` FUNCTION `levenshtein_ratio` ( s1 text, s2 text ) RETURNS INT DETERMINISTIC BEGIN
		DECLARE
			s1_len,
			s2_len,
			max_len INT;
		
		SET s1_len = CHAR_LENGTH( s1 ),
		s2_len = CHAR_LENGTH( s2 );
		IF
			s1_len > s2_len THEN
				
				SET max_len = s1_len;
			ELSE 
				SET max_len = s2_len;
			
		END IF;
		RETURN ROUND( ( 1 - LEVENSHTEIN ( s1, s2 ) / max_len ) * 100 );
	
END

版权说明:若无特殊注明,转载请保留文章出处
文章分类: 技术乐园
字数统计:本文共有 1483 个
本文链接:http://www.poxiaowy.com/?post=4

标签:

精彩评论