Hive 常用函数
关系运算
(1)等值比较 = == <=>
(2)不等值比较 != <>
(3)区间比较: 左闭右闭
select * from default.students where id between 1500100001 and 1500100010;
(4)空值/非空值判断:is null、is not null、nvl()、isnull()
(5)like、rlike、regexp用法(regexp=rlike)
nvl()用法举例:
hive> select nvl(null,1);
OK
1
hive> select nvl(’a‘,1);
OK
a
isnull()用法举例:
hive> select isnull(null);
OK
true
hive> select isnull('a');
OK
false
like、rlike用法举例
想查文科的所有学生:
select * from students where clazz like '文科%';
想查名字中带’雪‘的同学:
select * from students where name like '%雪%';
想查名字第二字带’雪‘的同学:
select * from students where name like '_雪_';
想查名字结尾带’雪‘的同学:
select * from students where name like '__雪';
利用rlike与正则结合
select * from students where name rlike '.*雪$';
数值计算
取整函数(四舍五入):round
向上取整:ceil
向下取整:floor
条件函数
- if: if(表达式,如果表达式成立的返回值,如果表达式不成立的返回值)
select if(1>0,1,0);
select if(1>0,if(-1>0,-1,1),0);
select score,if(score>120,'优秀',if(score>100,'良好',if(score>90,'及格','不及格'))) as pingfen from score limit 20;
- COALESCE:支持传入n个参数
select COALESCE(null,'1','2'); // 1 从左往右 一次匹配 直到非空为止
select COALESCE('1',null,'2'); // 1
- case when
select score
,case when score>120 then '优秀'
when score>100 then '良好'
when score>90 then '及格'
else '不及格'
end as pingfen
from default.score limit 20;
select name
,case name when "施笑槐" then "槐ge"
when "吕金鹏" then "鹏ge"
when "单乐蕊" then "蕊jie"
else "算了不叫了"
end as nickname
from default.students limit 10;
注意条件的顺序
日期函数
select from_unixtime(1610611142,'YYYY/MM/dd HH:mm:ss');
select from_unixtime(unix_timestamp(),'YYYY/MM/dd HH:mm:ss');
// '2021年01月14日' -> '2021-01-14'
select from_unixtime(unix_timestamp('2021年01月14日','yyyy年MM月dd日'),'yyyy-MM-dd');
// "04牛2021数加16逼" -> "2021/04/16"
select from_unixtime(unix_timestamp("04牛2021数加16逼","MM牛yyyy数加dd逼"),"yyyy/MM/dd");
字符串函数:字符串拼接、字符串切分
字符串拼接
concat('123','456'); // 123456
concat('123','456',null); // NULL
select concat_ws('#','a','b','c'); // a#b#c
select concat_ws('#','a','b','c',NULL); // a#b#c 可以指定分隔符,并且会自动忽略NULL
select concat_ws("|",cast(id as string),name,cast(age as string),gender,clazz) from students limit 10;//拼接的类型不一样,需要用cast转换
字符串切分
select substring("abcdefg",1); // abcdefg HQL中涉及到位置的时候 是从1开始计数
//需求:2021/01/14-->2021-01-14
方法1:使用substring
select concat_ws("-",substring('2021/01/14',1,4),substring('2021/01/14',6,2),substring('2021/01/14',9,2));
方法2:建议使用日期函数去做日期
select from_unixtime(unix_timestamp('2021/01/14','yyyy/MM/dd'),'yyyy-MM-dd');
select split("abcde,fgh",","); // ["abcde","fgh"],返回的是arry
select split("a,b,c,d,e,f",",")[2]; // c,array可以用下标获取
//explode:行转列
select explode(split("abcde,fgh",",")); // abcde
// fgh
// 解析json格式的数据
select get_json_object('{"name":"zhangsan","age":18,"score":[{"course_name":"math","score":100},{"course_name":"english","score":60}]}',"$.score[0].score"); // 100