存在一个需求:将数据库某一字段中的年月日取出来,遍查函数,只有hive中的REGEXP_EXTRACT可以满足,但是其需要借助于map-reduce,可不可以写一个类似的接近这个函数的方法呢?下面写了一个函数,结果出来了,但是可能不太理想。或者读者直接用内存处理就行了,能不用就不用,这里只做探索。
图:执行结果
函数代码:
CREATE FUNCTION REGEXP_EXTRACT(Varstring varchar(5000)) RETURNS varchar(3000) BEGIN DECLARE v_length INT DEFAULT 0;
DECLARE v_Tmp TEXT default '';
DECLARE r_varString TEXT; DECLARE r_length INT DEFAULT 0; DECLARE r_Tmp TEXT default ''; DECLARE v_mid TEXT default ''; DECLARE v_mid2 TEXT default ''; DECLARE v_mid_1 TEXT default '';
DECLARE s_varString TEXT; DECLARE s_length INT DEFAULT 0; DECLARE s_Tmp TEXT default ''; DECLARE s_mid TEXT default '';
set v_length=CHAR_LENGTH(Varstring); WHILE v_length > 0 DO SET v_mid = mid(Varstring,v_length,1); SET v_mid2 = mid(Varstring,v_length,2); SET v_mid_1 = mid(Varstring,v_length - 1,1); IF ( ( ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '月') OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '日') OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '时') OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '分') OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '秒') OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58) AND SUBSTRING(v_mid2,2,2) = '年') ) OR (v_mid_1 = '月' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58)) OR (v_mid_1 = '日' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58)) OR (v_mid_1 = '时' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58)) OR (v_mid_1 = '分' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58)) OR (v_mid_1 = '秒') OR (v_mid_1 = '年' AND (ASCII(v_mid)>47 and ASCII(v_mid)<58)) OR v_mid = '月' OR v_mid = '日' OR v_mid = '时' OR v_mid = '分' OR v_mid = '秒' OR v_mid = '年'
OR ((ASCII(v_mid)>47 and ASCII(v_mid)<58))
OR v_mid = ',' OR v_mid = '。' OR v_mid = ';' OR v_mid = '(' OR v_mid = ')' OR v_mid = ':' OR v_mid = '“' OR v_mid = '”' ) THEN set v_Tmp=concat(v_Tmp,v_mid); ELSE SET v_Tmp = concat(v_Tmp,'|'); END IF; SET v_length = v_length - 1; END WHILE;
SET r_varString = REVERSE(v_Tmp); set r_length=CHAR_LENGTH(r_varString); WHILE r_length > 0 DO IF ( ((mid(r_varString,r_length,1) = '日') OR (mid(r_varString,r_length,1) = '分')) AND ((mid(r_varString,r_length,2) = '日|') OR (mid(r_varString,r_length,2) = '分|')) ) THEN SET r_Tmp = concat(r_Tmp,','); SET r_Tmp = concat(r_Tmp,mid(r_varString,r_length,1)); ELSEIF (mid(r_varString,r_length,1) = '|') THEN SET r_Tmp = concat(r_Tmp,''); ELSE SET r_Tmp = concat(r_Tmp,mid(r_varString,r_length,1)); END IF; SET r_length = r_length - 1; END WHILE;
SET s_varString = REVERSE(r_Tmp); set s_length=CHAR_LENGTH(s_varString); WHILE s_length > 0 DO SET s_mid = mid(s_varString,s_length,1); IF ( s_mid = ',' OR s_mid = '。' OR s_mid = ';' OR s_mid = '(' OR s_mid = ')' OR s_mid = ':' OR s_mid = '“' OR s_mid = '”' OR s_mid = ',' ) THEN SET s_Tmp = concat(s_Tmp,'|'); ELSE SET s_Tmp = concat(s_Tmp,s_mid); END IF; SET s_length = s_length - 1; END WHILE;
RETURN REVERSE(s_Tmp);
END;