1、order by:全局排序
select * from emp order by sal;
2、sort by:对于每个reduce进行排序
set mapreduce.job.reduces=3;
insert overwrite local directory '/opt/datas/emp_sort' row format delimited fields terminated by '\t' select * from emp sort by sal;
结果:
3、distribute by :底层就是mapreduce 的分区,一般与sort by连用
insert overwrite local directory '/opt/datas/emp_dis' row format delimited fields terminated by '\t' select * from emp distribute by deptno sort by sal;
4、cluster by:等价于distribute by 与sort by的字段相同时
insert overwrite local directory '/opt/datas/emp_cls' row format delimited fields terminated by '\t' select * from emp cluster by sal;