mysql如何实现hive中的REGEXP

tech2024-06-09  85

存在一个需求:将数据库某一字段中的年月日取出来,遍查函数,只有hive中的REGEXP_EXTRACT可以满足,但是其需要借助于map-reduce,可不可以写一个类似的接近这个函数的方法呢?下面写了一个函数,结果出来了,但是可能不太理想。或者读者直接用内存处理就行了,能不用就不用,这里只做探索。

图:执行结果

函数代码:

CREATE FUNCTION REGEXP_EXTRACT(Varstring varchar(5000)) RETURNS varchar(3000) BEGIN DECLARE v_length INT DEFAULT 0;

DECLARE v_Tmp TEXT default '';

DECLARE r_varString TEXT; DECLARE r_length INT DEFAULT 0; DECLARE r_Tmp TEXT default ''; DECLARE v_mid TEXT default ''; DECLARE v_mid2 TEXT default ''; DECLARE v_mid_1 TEXT default '';

DECLARE s_varString TEXT; DECLARE s_length INT DEFAULT 0; DECLARE s_Tmp TEXT default ''; DECLARE s_mid TEXT default '';

set v_length=CHAR_LENGTH(Varstring); WHILE v_length > 0 DO SET v_mid = mid(Varstring,v_length,1); SET v_mid2 = mid(Varstring,v_length,2); SET v_mid_1 = mid(Varstring,v_length - 1,1); IF (     (         ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '月')          OR         ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '日')          OR         ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '时')          OR         ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '分')          OR         ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '秒')          OR         ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '年')     )      OR (v_mid_1 = '月' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))      OR (v_mid_1 = '日' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))     OR (v_mid_1 = '时' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))     OR (v_mid_1 = '分' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))     OR (v_mid_1 = '秒')     OR (v_mid_1 = '年' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58))          OR v_mid = '月'     OR v_mid = '日'     OR v_mid = '时'     OR v_mid = '分'     OR v_mid = '秒'     OR v_mid = '年'

    OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58))

    OR v_mid = ','     OR v_mid = '。'     OR v_mid = ';'     OR v_mid = '('     OR v_mid = ')'     OR v_mid = ':'     OR v_mid = '“'     OR v_mid = '”' ) THEN set v_Tmp=concat(v_Tmp,v_mid); ELSE SET v_Tmp = concat(v_Tmp,'|'); END IF; SET v_length = v_length - 1; END WHILE;

SET r_varString = REVERSE(v_Tmp); set r_length=CHAR_LENGTH(r_varString); WHILE r_length > 0 DO IF (     ((mid(r_varString,r_length,1) = '日') OR (mid(r_varString,r_length,1) = '分')) AND ((mid(r_varString,r_length,2) = '日|') OR (mid(r_varString,r_length,2) = '分|'))  ) THEN SET r_Tmp = concat(r_Tmp,','); SET r_Tmp = concat(r_Tmp,mid(r_varString,r_length,1)); ELSEIF (mid(r_varString,r_length,1) = '|') THEN SET r_Tmp = concat(r_Tmp,''); ELSE SET r_Tmp = concat(r_Tmp,mid(r_varString,r_length,1)); END IF; SET r_length = r_length - 1; END WHILE;

SET s_varString = REVERSE(r_Tmp); set s_length=CHAR_LENGTH(s_varString); WHILE s_length > 0 DO SET s_mid = mid(s_varString,s_length,1); IF (     s_mid = ','     OR s_mid = '。'     OR s_mid = ';'     OR s_mid = '('     OR s_mid = ')'     OR s_mid = ':'     OR s_mid = '“'     OR s_mid = '”'     OR s_mid = ',' ) THEN SET s_Tmp = concat(s_Tmp,'|'); ELSE SET s_Tmp = concat(s_Tmp,s_mid); END IF; SET s_length = s_length - 1; END WHILE;

RETURN REVERSE(s_Tmp);

END;

最新回复(0)