10.1.48.12 实例1
端口tcp_port 9000, http_port 8123, interserver_http_port 9009;
config.xml文件为/etc/clickhouse-server/ config01.xml;
metrika.xml文件为/etc/clickhouse-server/metrika01.xml;
users.xml文件为/etc/clickhouse-server/users01.xml;
10.1.48.12 实例2
端口tcp_port 9001, http_port 8124, interserver_http_port 9010;
config.xml文件为/etc/clickhouse-server/ config02.xml;
metrika.xml文件为/etc/clickhouse-server/metrika02.xml;
users.xml文件为/etc/clickhouse-server/users02.xml;
10.1.48.14 实例1
端口tcp_port 9000, http_port 8123, interserver_http_port 9009;
config.xml文件为/etc/clickhouse-server/ config01.xml;
metrika.xml文件为/etc/clickhouse-server/metrika01.xml;
users.xml文件为/etc/clickhouse-server/users01.xml;
10.1.48.14 实例2
端口tcp_port 9001, http_port 8124, interserver_http_port 9010;
config.xml文件为/etc/clickhouse-server/ config02.xml;
metrika.xml文件为/etc/clickhouse-server/metrika02.xml;
users.xml文件为/etc/clickhouse-server/users02.xml;
10.1.48.32 实例1
端口tcp_port 9000, http_port 8123, interserver_http_port 9009;
config.xml文件为/etc/clickhouse-server/ config01.xml;
metrika.xml文件为/etc/clickhouse-server/metrika01.xml;
users.xml文件为/etc/clickhouse-server/users01.xml;
10.1.48.32 实例2
端口tcp_port 9001, http_port 8124, interserver_http_port 9010;
config.xml文件为/etc/clickhouse-server/ config02.xml;
metrika.xml文件为/etc/clickhouse-server/metrika02.xml;
users.xml文件为/etc/clickhouse-server/users02.xml;
2、安装JDK
1.去镜像网站下载jdk
https://repo.huaweicloud.com/java/jdk/12.0.1+12/jdk-12.0.1_linux-x64_bin.tar.gz
2.解压
tar -zxvf jdk-12.0.1_linux-x64_bin.tar.gz -C /usr/local/
3.配置JAVA_HOME:
vi /etc/profile内容如下:
export JAVA_HOME=/usr/local/jdk-12.0.1
export JRE_HOME=/usr/local/jdk-12.0.1/jre
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JRE_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin
使修改生效:source /etc/profile
5.验证是否安装成功:
java -version
3、安装zookeeper
1.下载apache-zookeeper-3.7.0-bin.tar.gz安装包,放置到上面三台服务器/home目录下
2.进入到/home目录下,解压tar包,tar -zxvf apache-zookeeper-3.7.0-bin.tar.gz
3.进入zookeeper的conf目录,拷贝zoo_sample.cfg为zoo.cfg,cp zoo_sample.cfg zoo.cfg 修改zoo.cfg文件:
tickTime=2000
initLimit=10
syncLimit=5
dataDir= /home/apache-zookeeper-3.7.0-bin/data
dataLogDir= /home/apache-zookeeper-3.7.0-bin/log
clientPort=2181
autopurge.purgeInterval=0
globalOutstandingLimit=200
#开启四字命令
4lw.commands.whitelist=*
server.1=slaver22:2888:3888
server.2=slaver23:2888:3888
server.3=slaver24:2888:3888
4.创建需要的目录
mkdir -p /home/apache-zookeeper-3.7.0-bin/data
mkdir -p /home/apache-zookeeper-3.7.0-bin/log
配置完成后将当前的zookeeper目录scp到其他两个节点
scp -r /home/apache-zookeeper-3.7.0-bin root@10.1.48.23:/home
scp -r /home/apache-zookeeper-3.7.0-bin root@10.1.48.24:/home
5.设置myid
vi /home/apache-zookeeper-3.7.0-bin/data/myid #slave22为1,slave23为2,slave24为3
6.进入zookeeper的bin目录,启动zookeeper服务,每个节点都需要启动
./zkServer.sh start
mkdir -p /data/clickhouse/log/
mkdir -p /data/clickhouse/data
mkdir -p /data/clickhouse/tmp
cd /data/
chown -R clickhouse:clickhouse clickhouse/
4、安装clickhouse
1.安装依赖
yum install -y libtool
yum install -y unixODBC
2.安装server和client
sudo yum install yum-utils -y
sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG
sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64
sudo yum install clickhouse-server clickhouse-client -y
3.禁止默认开机启动
systemctl disable clickhouse-server.service
4、修改文件config.xml
1、clickhouse-server下目录文件包含如下
[root@localhost clickhouse-server]# tree
.
├── config01.xml #节点配置(tcp 9000, http 8123, 同步端口9009)
├── config02.xml #副本节点配置(tcp 9001, http 8124, 同步端口9010)
├── config.d
├── metrika01.xml #节点配置
├── metrika02.xml #副本节点配置
├── users01.xml #节点配置
├── users02.xml #副本节点配置
└── users.d
vim config01.xml
<?xml version="1.0"?>
<yandex>
<!--日志-->
<logger>
<level>debug</level> <!-- 日志级别 -->
<log>/home/clickhouse01/log/clickhouse-server.log</log>
<errorlog>/home/clickhouse01/log/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<!--HTTP Port、ODBC、JDBC Dbeaver连接端口-->
<http_port>8123</http_port>
<!--clickhouse-client连接端口-->
<tcp_port>9000</tcp_port>
<!--MySQL连接端口-->
<!--<mysql_port>9004</mysql_port> -->
<!--集群副本之间数据同步的端口-->
<interserver_http_port>9009</interserver_http_port>
<!--集群副本之间相互识别的主机名, 注意此处配置需要配置成每台机器各自的主机名/ip地址-->
<interserver_http_host>10.1.48.12</interserver_http_host>
<!-- 开放IPV4和IPV6 -->
<!--<listen_host>::</listen_host> -->
<!-- 开放IPV4 -->
<listen_host>0.0.0.0</listen_host>
<!-- 最大连接数 -->
<max_connections>4096</max_connections>
<!-- 使用http连接的时候的 -->
<keep_alive_timeout>3</keep_alive_timeout>
<!-- 最大并发处理的请求数(包含select,insert等),默认值100,推荐150(不够再加) -->
<max_concurrent_queries>150</max_concurrent_queries>
<!-- clickhouse-server使用的最大内存, 0表示不限制 -->
<max_server_memory_usage>0</max_server_memory_usage>
<!--最大的线程池大小-->
<max_thread_pool_size>10000</max_thread_pool_size>
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
<total_memory_profiler_step>4194304</total_memory_profiler_step>
<total_memory_tracker_sample_probability>0</total_memory_tracker_sample_probability>
<!--打开最大的文件数,默认最大值-->
<max_open_files>262144</max_open_files>
<!--表引擎从MergeTree使用的未压缩数据的缓存大小(以字节为单位,8G)。服务器有一个共享缓存,内存是按需分配的。
如果启用,则使用高速缓存。在个别情况下,未压缩的缓存对于非常短的查询是有利的。-->
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<!-- 标记缓存的大小,用于MergeTree系列的表中。 以字节为单位,共享服务器的缓存,并根据需要分配内存。缓存大小必须至少为5368709120(5G)。-->
<mark_cache_size>5368709120</mark_cache_size>
<mmap_cache_size>1000</mmap_cache_size>
<!-- 数据存放的位置, 这里的文件owner必须是clickhouse -->
<path>/home/clickhouse01/data/</path>
<!--用于处理大型查询的临时数据的路径。-->
<tmp_path>/home/clickhouse01/data/tmp/</tmp_path>
<!--包含用户文件的目录,在表函数file()中使用-->
<user_files_path>/home/clickhouse01/data/user_files/</user_files_path>
<user_directories>
<users_xml>
<path>users01.xml</path>
</users_xml>
<local_directory>
<path>/home/clickhouse01/data/access/</path>
</local_directory>
</user_directories>
<!--默认设置配置文件,在参数user_config中指定。-->
<default_profile>default</default_profile>
<default_database>default</default_database>
<!--设置时区-->
<timezone>Asia/Shanghai</timezone>
<mlock_executable>true</mlock_executable>
<remap_executable>false</remap_executable>
<!--带替换文件的路径, 该路径中的文件配置数据会替换当前配置文件中带incl标记的属性-->
<include_from>/etc/clickhouse-server/metrika01.xml</include_from>
<!--远程服务器,分布式表引擎和集群表功能使用的集群的配置,在metrika.xml-->
<remote_servers incl="clickhouse_remote_servers" optional="true"/>
<!--配置的集群需要zookeeper的配置,在metrika.xml-->
<zookeeper incl="zookeeper_servers" optional="true"/>
<!--这里定义的创建复制时用到的宏定义常量,在metrika.xml-->
<macros incl="macros" optional="true"/>
<!--重新加载内置词典的时间间隔(以秒为单位),默认3600。可以在不重新启动服务器的情况下“即时”修改词典-->
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<!--最大的客户端连接session超时时间,默认3600-->
<max_session_timeout>3600</max_session_timeout>
<!--默认的客户端连接session超时时间,默认60-->
<default_session_timeout>60</default_session_timeout>
<!--查询记录在system.query_log表中-->
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<!--将数据从内存中的缓冲区刷新到表的时间间隔,默认7500-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<!--trace_log系统表操作的设置。-->
<trace_log>
<database>system</database>
<table>trace_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</trace_log>
<!--设置运行的查询的线程将根据query_thread_log服务器配置参数中的规则记录-->
<query_thread_log>
<database>system</database>
<table>query_thread_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_thread_log>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
<asynchronous_metric_log>
<database>system</database>
<table>asynchronous_metric_log</table>
<flush_interval_milliseconds>60000</flush_interval_milliseconds>
</asynchronous_metric_log>
<opentelemetry_span_log>
<engine>
engine MergeTree
partition by toYYYYMM(finish_date)
order by (finish_date, finish_time_us, trace_id)
</engine>
<database>system</database>
<table>opentelemetry_span_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</opentelemetry_span_log>
<!--系统崩溃日志存储在system.crash_log表中-->
<crash_log>
<database>system</database>
<table>crash_log</table>
<partition_by />
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<!--外部词典的配置文件的路径,在config配置文件中指定-->
<dictionaries_config>*_dictionary.xml</dictionaries_config>
<!--MergeTree引擎表的数据压缩设置,在metrika.xml-->
<compression incl="clickhouse_compression"></compression>
<!--存储在zookeeper路径中的任务队列-->
<distributed_ddl>
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<!--数据汇总设置-->
<graphite_rollup_example>
<pattern>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>3600</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup_example>
<!--包含输入格式文件(例如CapnProto格式的方案)的目录路径-->
<format_schema_path>/home/clickhouse01/data/format_schemas/</format_schema_path>
<!--基于Regexp的规则,应用于查询以及所有日志消息。再其存储在服务器日志中,system.query_log,system.text_log,system.processes表以及以日志形式发送给客户端。这样可以防止SQL查询中的敏感数据泄漏记录到日志中。 -->
<query_masking_rules>
<rule>
<name>hide encrypt/decrypt arguments</name>
<regexp>((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\s?(?:′(?:′|.)+′|.??)\s?\s?(?:′(?:′|.)+′|.??)\s?</regexp>
<!-- or more secure, but also more invasive:
(aes_\w+)\s*.?.?
-->
<replace>\1(???)</replace>
</rule>
</query_masking_rules>
</yandex>
config02.xml文件需要全局替换路径/home/clickhouse01为/home/clickhouse02,替换 <http_port端口为8124,tcp_port端口为9001,interserver_http_port端口为9010,users_xml的path为users02.xml,将10.1.48.12的/etc/clickhouse-server/config*.xml拷贝到10.1.48.14、10.1.48.32的/etc/clickhouse-server下进行替换,并且需要修改config01.xml、config02.xml中的interserver_http_host地址为10.1.48.14或10.1.48.32
5、修改文件metrika.xml
vim metrika01.xml
<?xml version="1.0"?>
<yandex>
<!--这里配置集群的分布方案-->
<clickhouse_remote_servers>
<!--这里时3分片2副本的配置方案-->
<cluster_3shards_2replicas>
<!--这里配置一个分片-->
<shard>
<!--表示是否只将数据写入其中一个副本,然后集群间进行数据同步。默认为flase,表示写入所有副本-->
<internal_replication>true</internal_replication>
<!--这里配置分片在分布式表中的写权重 -->
<weight>1</weight>
<!--这里配置副本-->
<replica>
<host>10.1.48.12</host>
<port>9000</port>
</replica>
<replica>
<host>10.1.48.14</host>
<port>9001</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<weight>1</weight>
<replica>
<host>10.1.48.14</host>
<port>9000</port>
</replica>
<replica>
<host>10.1.48.32</host>
<port>9001</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<weight>1</weight>
<replica>
<host>10.1.48.32</host>
<port>9000</port>
</replica>
<replica>
<host>10.1.48.12</host>
<port>9001</port>
</replica>
</shard>
</cluster_3shards_2replicas>
</clickhouse_remote_servers>
<!--配置复制表的分片名和副本名字宏定义-->
<macros>
<layer>01</layer>
<shard>01</shard>
<replica>cluster01-01-1</replica>
</macros>
<!--配置zookeeper集群地址-->
<zookeeper_servers>
<node index="1">
<host>10.1.48.12</host>
<port>2181</port>
</node>
<node index="2">
<host>10.1.48.14</host>
<port>2181</port>
</node>
<node index="3">
<host>10.1.48.32</host>
<port>2181</port>
</node>
</zookeeper_servers>
<networks>
<ip>::/0</ip>
</networks>
<!--MergeTree引擎表的数据压缩设置-->
<clickhouse_compression>
<case>
<!--数据部分的最小大小-->
<min_part_size>10000000000</min_part_size>
<!--数据部分大小与表大小的比率-->
<min_part_size_ratio>0.01</min_part_size_ratio>
<!--压缩算法,zstd和lz4-->
<method>lz4</method>
</case>
</clickhouse_compression>
</yandex>
将10.1.48.12的/etc/clickhouse-server/metrika*.xml拷贝到10.1.48.14、10.1.48.32的/etc/clickhouse-server下进行替换,并且需要修改metrika01.xml、metrika02.xml中的副本名称
10.1.48.12的metrika02.xml修改
<macros>
<layer>01</layer>
<shard>03</shard>
<replica>cluster01-03-2</replica>
</macros>
10.1.48.14的metrika01.xml修改
<macros>
<layer>01</layer>
<shard>02</shard>
<replica>cluster01-02-1</replica>
</macros>
10.1.48.14的metrika02.xml修改
<macros>
<layer>01</layer>
<shard>01</shard>
<replica>cluster01-01-2</replica>
</macros>
10.1.48.32的metrika01.xml修改
<macros>
<layer>01</layer>
<shard>03</shard>
<replica>cluster01-03-1</replica>
</macros>
10.1.48.32的metrika02.xml修改
<macros>
<layer>01</layer>
<shard>02</shard>
<replica>cluster01-02-2</replica>
</macros>
6、修改文件users.xml
vim users01.xml
<?xml version="1.0"?>
<yandex>
<profiles>
<!-- Default settings. -->
<default>
<!-- Maximum memory usage for processing single query, in bytes. -->
<max_memory_usage>10000000000</max_memory_usage>
<!-- load_balancing指定用于分布式查询处理的副本选择算法
* 随机(默认情况下)load_balancing = random
* 最近的主机名 load_balancing = nearest_hostname
* 按顺序 load_balancing = in_order
* 第一次或随机 load_balancing = first_or_random-->
<load_balancing>nearest_hostname</load_balancing>
</default>
<!-- Profile that allows only read queries.默认为1 -->
<readonly>
<readonly>0</readonly>
</readonly>
</profiles>
<users>
<root>
<!-- <password></password> -->
<!-- Or -->
<password_sha256_hex>a0734303c22baf9503a61b9778028c9776f63bdecc5bd3bc926d8a99f635ddbc</password_sha256_hex> <!-- 加密密码,二选一 -->
<!-- Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d ‘-‘ -->
<networks>
<ip>::/0</ip> <!-- 来自任何IP: -->
</networks>
<!-- 指定用户的profile -->
<profile>default</profile>
<!-- 指定用户的quota,限制用户使用资源 -->
<quota>default</quota>
</root>
</users>
<!-- 配额,限制使用资源,限制有二种类型:一是在固定周期里的执行次数(quotas),二是限制用户或则查询的使用资源(profiles) -->
<quotas>
<default> <!-- 指定quotas名-->
<!-- <interval>:配置时间间隔,每个时间内的资源消耗限制。 -->
<interval>
<!-- Length of interval. -->
<duration>3600</duration>
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</yandex>
7、启动clickhouse集群,并设置zookeeper和clickhouse服务开启自启动
1.启动
chown -R clickhouse.clickhouse /etc/clickhouse-server
chown -R clickhouse.clickhouse /home/clickhouse01
chown -R clickhouse.clickhouse /home/clickhouse02
sudo -u clickhouse nohup clickhouse-server --config-file=/etc/clickhouse-server/config01.xml >null 2>&1 &
sudo -u clickhouse nohup clickhouse-server --config-file=/etc/clickhouse-server/config02.xml >null 2>&1 &
2.开机启动
vim /etc/rc.local添加
/home/zookeeper/bin/zkServer.sh start
sudo -u clickhouse nohup clickhouse-server --config-file=/etc/clickhouse-server/config01.xml >null 2>&1 &
sudo -u clickhouse nohup clickhouse-server --config-file=/etc/clickhouse-server/config02.xml >null 2>&1 &
chmod 755 /etc/rc.local