八Clickhouse 应用案例

1 用户和权限

在user.xml中添加用户配置

<?xml version="1.0"?>
<yandex>
<profiles>
•    <default>
•      <max_memory_usage>10000000000</max_memory_usage>
•      <use_uncompressed_cache>0</use_uncompressed_cache>
•      <load_balancing>random</load_balancing>
•    </default>
•    <readonly>
•      <readonly>1</readonly>
•    </readonly>
  </profiles>
  <users>
•    <default>
•      <password></password>
•      <networks incl="networks" replace="replace">
•        <ip>::/0</ip>
•      </networks>
•      <profile>default</profile>
•      <quota>default</quota>
•    </default>
<!-- 定义一个用户 用户名为hangge 密码使用明文 root -->
<hangge>
<password_sha256_hex>f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd</password_sha256_hex>
•      <networks incl="networks" replace="replace">
•        <ip>::/0</ip>
•      </networks>
•      <profile>default</profile>
•      <quota>default</quota>
•    </hangge>
  </users>
  <quotas>
•    <default>
•      <interval>
•        <duration>3600</duration>
•        <queries>0</queries>
•        <errors>0</errors>
•        <result_rows>0</result_rows>
•        <read_rows>0</read_rows>
•        <execution_time>0</execution_time>
•      </interval>
•    </default>
  </quotas>
</yandex>

  

明文密码

<password></password> 里面没有配置说明是没有密码

  

SHA256加密:

在使用SHA256加密算法的时候,需要通过password_sha256_hex标签定义密码

[root@ck1 ~]#  echo -n hangge | openssl dgst -sha256    
​
(stdin)= f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd

  

<hangge>
 <password_sha256_hex>f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd</password_sha256_hex>
            <networks incl="networks" replace="replace">
                <ip>::/0</ip>
            </networks>
            <profile>default</profile>
            <quota>default</quota>
</hangge>

  

double_sha1加密:

在使用double_sha1加密算法的时候,则需要通过 password_double_sha1_hex标签定义密码,

<password_double_sha1_hex>23ae809ddacaf96af0fd78ed04b6a265e05aa257</password_double_sha1_hex> 
​
\# echo -n 123 | openssl dgst -sha1 -binary | openssl dgst -sha1 
​
(stdin)= 23ae809ddacaf96af0fd78ed04b6a265e05aa257 

  

用户权限控制'

<hangge>
<password_sha256_hex>60cd41aedc4e47e8883682b416109e7b7e345e15decc63c2c98ecdab5e8e053a</password_sha256_hex>
    <networks incl="networks" />
    <profile>readonly</profile>   
    <quota>default</quota>
    <allow_databases>
        <database>default</database>
    </allow_databases>
</hangge>
​
​
​
## 3 JDBC和客户端工具
​
### 3.1 JDBC
​
•```xml
 <!-- https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc -->
    <dependency>
        <groupId>ru.yandex.clickhouse</groupId>
        <artifactId>clickhouse-jdbc</artifactId>
        <version>0.2.4</version>
    </dependency>
public class Demo1 {
​
    public static void main(String[] args) throws Exception {
        Class.forName("ru.yandex.clickhouse.ClickHouseDriver");
        String url = "jdbc:clickhouse://linux01:8123/default";
        String username = "default";
        String password = "";
        Connection con = DriverManager.getConnection(url, username, password);
        Statement stmt = con.createStatement();
        ResultSet resultSet = stmt.executeQuery("select * from tb_demo1");
        while (resultSet.next()) {
            int id = resultSet.getInt("id");
            String name = resultSet.getString("name");
            System.out.println(id + ":" + name);
        }
        con.close();
        stmt.close();
        resultSet.close();
    }
}

  

高可用模式允许设置多个host地址,每次会从可用的地址中随机选择一个进行连接,在高可用模式下,需要通过BalancedClickhouseDataSource对象获取连接

 public static void main(String[] args) throws Exception {
        // 初始化驱动
        Class.forName("ru.yandex.clickhouse.ClickHouseDriver");
        // url
        String url = "jdbc:clickhouse://linux01:8123,linux02:8123,linux03:8123/default";
        //设置JDBC参数
        ClickHouseProperties clickHouseProperties = new ClickHouseProperties();
        clickHouseProperties.setUser("default");
​
        //声明数据源
        BalancedClickhouseDataSource balanced = new BalancedClickhouseDataSource(url, clickHouseProperties);
        //对每个host进行ping操作, 排除不可用的dead连接
        balanced.actualize();
        //获得JDBC连接
        Connection con = balanced.getConnection();
        Statement stmt = con.createStatement();
        ResultSet resultSet = stmt.executeQuery("select * from demo3_all");
        while (resultSet.next()) {
            int id = resultSet.getInt("id");
            String name = resultSet.getString("name");
​
            System.out.println(id + ":" + name);
        }
​
        con.close();
        stmt.close();
        resultSet.close();
    }

3.2 客户端工具DBeaver

 八Clickhouse 应用案例

 八Clickhouse 应用案例

 

 http://ui.tabix.io/#!/login 提供的一个页面可视化工具

 八Clickhouse 应用案例

 

 八Clickhouse 应用案例

 

 

4 用户行为分析

4.1 windowFunnel函数

(参数一) 时间的单位 窗口的大小 时间的单位 (时间 , 事件链条)

uid1 event1 1551398404
uid1 event2 1551398406
uid1 event3 1551398408
uid2 event2 1551398412
uid2 event3 1551398415
uid3 event3 1551398410
uid3 event4 1551398413
————————————————
-- 建表
drop table if exists test_funnel ;
CREATE TABLE test_funnel(
    uid String, 
    eventid String, 
    eventTime UInt64) 
ENGINE = MergeTree 
ORDER BY (uid, eventTime) ;
-- 导入数据
insert into test_funnel values
('uid1','event1',1551398404),
('uid1','event2',1551398406),
('uid1','event3',1551398408),
('uid2','event2',1551398412),
('uid2','event3',1551398415),
('uid3','event3',1551398410),
('uid3','event4',1551398413);
-- 查看数据
┌─uid──┬─eventid─┬──eventTime─┐
│ uid1 │ event1  │ 1551398404 │
│ uid1 │ event2  │ 1551398406 │
│ uid1 │ event3  │ 1551398408 │
│ uid2 │ event2  │ 1551398412 │
│ uid2 │ event3  │ 1551398415 │
│ uid3 │ event3  │ 1551398410 │
│ uid3 │ event4  │ 1551398413 │
└──────┴─────────┴────────────┘
​
select
uid ,
windowFunnel(4)(
toDateTime(eventTime),
eventid='event1' ,
eventid='event2' ,
eventid='event3' 
) as funnel
from
test_funnel 
group by uid ;
┌─uid──┬─funnel─┐
│ uid3 │      0 │
│ uid1 │      3 │
│ uid2 │      0 │
└──────┴────────┘
select
uid ,
windowFunnel(4)(
toDateTime(eventTime),
eventid='event2' ,
eventid='event3' 
) as funnel
from
test_funnel 
group by uid ;
┌─uid──┬─funnel─┐
│ uid3 │      0 │
│ uid1 │      2 │
│ uid2 │      2 │
└──────┴────────┘

  

案例

建表 导入数据
clickhouse-client -q 'insert into test1.ods_log format JSONAsString'  < event.log 
​
drop table if exists test_log ;
create table test_log engine=MergeTree() order by (id,ts)
as
with visitParamExtractUInt(line,'timeStamp') as ts ,
visitParamExtractString(line ,'account')as account,
visitParamExtractString(line ,'deviceId')as deviceId,
visitParamExtractString(line ,'sessionId')as sessionId,
visitParamExtractString(line ,'ip')as ip,
visitParamExtractString(line ,'eventId')as eventId,
visitParamExtractRaw(line ,'properties')as properties
select 
if(account='' , deviceId , account) id ,
account ,
deviceId,
sessionId,
ip,
eventId,
properties,
ts
from
tb_ods_log  ;

  

 
select
id ,
windowFunnel(100000)(
    toDateTime(ts),
    eventId='productView' ,
    eventId='adClick' ,
    eventId='productView' ,
    eventId='collect' 
) as funnel
from
test_log 
group by  id ;

  

 

4.2 sequenceCount

sequenceCount满足要求的次数

SELECT   id,   
sequenceCount('(?1)')
( 
 FROM_UNIXTIME(ts) ,    
 eventId='adShow' ,
 eventId='productView' ,
 eventId='collect' ,
 eventId='addCart' )AS cnt 
 FROM test_log 
 GROUP BY id having id='0T7136zA3BZI';

  

4.3 sequenceMatch

这个函数都需要指定模式串、时间列和期望的事件序列(最多可指定32个事件)。模式串的语法有以下三种: (?N):表示时间序列中的第N个事件,从1开始。例如上述SQL中,(?2)即表示event_type = 'shtKkclick' AND column_type = 'homePage'。 (?t op secs):插入两个事件之间,表示它们发生时需要满足的时间条件(单位为秒)。例如上述SQL中,(?1)(?t<=15)(?2)即表示事件1和2发生的时间间隔在15秒以内。 .*:表示任意的非指定事件。

SELECT
  id, 
  sequenceMatch('(?1)(?t<=10)(?2)(?3).*(?4)')(
   FROM_UNIXTIME(ts) ,  
   eventId='adClick' ,
   eventId='productView' ,
   eventId='collect' ,
   eventId='addCart' 
  ) AS is_match
FROM log
GROUP BY id
having id='dGHDHV7WOrpJ';
​
​
SELECT
  id, 
  sequenceCount('(?1)(?t<=10)(?2)(?3).*(?4)')(
   FROM_UNIXTIME(ts) ,  
   eventId='adClick' ,
   eventId='productView' ,
   eventId='collect' ,
   eventId='addCart' 
  ) AS cnt
FROM test_log
GROUP BY id ;
​
​
having id='dGHDHV7WOrpJ 0T7136zA3BZI';
​
​
select id ,eventId from test_log
 where id = 'dGHDHV7WOrpJ'

 

上一篇:【clickhouse】偶现 clickhouse 服务器宕机后 表没了


下一篇:大数据之ClickHouse极具战斗性的数据库-未来可期