DeepgreenDB 集群搭建
系统版本:CentOS 7.2 KVM
DGDB 版本: deepgreendb.18.07.rh7.x86_64.180718.bin
集群规划:1 主 1 备 5 从,计算节点 3 segment 3 mirror
系统设置
关闭 selinux
# 查看状态:
# sestatus
SELinuxstatus: disabled
# 如果与上面不一致,请编辑文件,并将状态改为disabled
# vim /etc/selinux/config
SELINUX=disabled
关闭 iptables
service iptables status
service iptables stop
修改 hostname 和 host
vi /etc/hostname
# mdw1
# mdw2
# sdw1
...
vi /etc/hosts
# 集群所有主机映射
# The following lines are desirable for IPv6 capable hosts
::1 localhost ip6-localhost ip6-loopback
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
创建 dgadmin 用户
userdel -r dgadmin
groupdel dgadmin
groupadd dgadmin
useradd -g dgadmin dgadmin
passwd dgadmin
# input password
安装 DGDB
安装数据库文件
使用 dgadmin 用户
上传安装文件到 master 上(/home/dgadmin/)
运行 bin 文件,自动生成数据库文件夹及软连接
deepgreendb -> /home/dgadmin/deepgreendb.18.07.180718
deepgreendb.18.07.180718
添加环境变量
vi ~/.bashrc
# .bashrc
# Source global definitions
if [ -f /etc/bashrc ]; then
. /etc/bashrc
fi
# Uncomment the following line if you don't like systemctl's auto-paging feature:
# export SYSTEMD_PAGER=
# User specific aliases and functions
source /home/dgadmin/deepgreendb/greenplum_path.sh
使立即生效
source ~/.bashrc
建立集群 ssh 通信
cd
mkdir dgconfigs
# 包含了集群所有主机的网口对应的主机名(因为有可能是双网卡的服务器)
vi dgconfigs/hostfile_exkeys
# 包含了集群所有主机名
vi dgconfigs/hostfile
# 包含了所有segment主机名
vi dgconfigs/hostfile_segonly
使用 gpssh-exkeys 建立通信
gpssh-exkeys -f /home/dgadmin/dgconfigs/hostfile_exkeys
安装软件到各个节点
gpseginstall -f /home/dgadmin/dgconfigs/hostfile_exkeys -u dgadmin
# check
gpssh -f /home/dgadmin/dgconfigs/hostfile_exkeys -e ls -l $GPHOME
检查主机参数配置
此处可以在配置系统环境时先配置好
实际未做此优化,测试中未看到效果,后续再研究
gpcheck -h hostname
此时会提示一些系统配置不是 deepgreen 推荐配置,参照 $GPHOME/etc/gpcheck.cnf
修改
使用 root 用户
# 内核参数
vim /etc/sysctl.conf
kernel.shmmax = 500000000
kernel.shmmni = 4096
kernel.shmall = 4000000000
kernel.sem = 250 512000 100 2048
kernel.sysrq = 1
kernel.core_uses_pid = 1
kernel.msgmnb = 65536
kernel.msgmax = 65536
kernel.msgmni = 2048
net.ipv4.tcp_syncookies = 1
net.ipv4.ip_forward = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.conf.all.arp_filter = 1
net.ipv4.ip_local_port_range = 1025 65535 # 此处我设置了 10000 65535
net.core.netdev_max_backlog = 10000
vm.overcommit_memory = 2
# 系统的最大进程数和最大文件打开数限制
vim /etc/security/limits.conf
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
# I/O 调度器,使用 deadline
# SSD 一般使用 noop,此处未测试
# 在 centos 7 KVM 下默认是 deadline,但是查看始终是 none
dmesg | grep -i scheduler
cat /sys/block/[vdb]/queue/scheduler
# 修改方法 -- 临时
echo deadline > /sys/block/[vdb]/queue/scheduler
# 修改方法 -- 永久
grubby --update-kernel=ALL --args="elevator=deadline"
reboot
# 或者使用vi编辑器修改配置文件,添加 elevator=deadline
vi /etc/default/grub
GRUB_CMDLINE_LINUX="crashkernel=auto rhgb quiet elevator=deadline numa=off"
# 然后保存文件,重新编译配置文
BIOS-Based: grub2-mkconfig -o /boot/grub2/grub.cfg
UEFI-Based: grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
# 设置磁盘的预读扇区(blockdev)值为 16384
# 查看命令
/sbin/blockdev --getra /dev/[vdb]
# 设置命令
/sbin/blockdev --setra 16384 /dev/[vdb]
# 为防止重启失效,将配置写入/etc/rc.local
echo '/sbin/blockdev --setra 16384 /dev/[vdb]' >> /etc/rc.local
# 关闭THP(Transparent Huge Pages)
# 检查THP的启用状态
cat /sys/kernel/mm/transparent_hugepage/defrag
# [always] madvise never
cat /sys/kernel/mm/transparent_hugepage/enabled
# [always] madvise never
# 这样说明是启用状态
# 永久修改 -- 在 rc.local 下添加一下内容
vim /etc/rc.d/rc.local
if test -f /sys/kernel/mm/transparent_hugepage/enabled; then
echo never > /sys/kernel/mm/transparent_hugepage/enabled
fi
if test -f /sys/kernel/mm/transparent_hugepage/defrag; then
echo never > /sys/kernel/mm/transparent_hugepage/defrag
fi
reboot
集群初始化
使用 root 用户创建各个节点下的数据目录
# master / standby maseter
mkdir -p /data/dgdata/master
# segment
mkdir -p /data/dgdata/dgdatap1
mkdir -p /data/dgdata/dgdatap2
mkdir -p /data/dgdata/dgdatap3
mkdir -p /data/dgdata/dgdatam1
mkdir -p /data/dgdata/dgdatam2
mkdir -p /data/dgdata/dgdatam3
# all hosts
chown -R dgadmin /data/dgdata
使用 dgadmin 用户配置初始化文件并初始化系统
在 $GPHOME/docs/cli_help/gpconfigs
下有一些模板,如: gpinitsystem_config
cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config ~/dgconfigs/
vi ~/dgconfig/gpinitsystem_config
# FILE NAME: gpinitsystem_config
# Configuration file needed by the gpinitsystem
################################################
#### REQUIRED PARAMETERS
################################################
#### Name of this Greenplum system enclosed in quotes.
ARRAY_NAME="Greenplum Data Platform"
CLUSTER_NAME="Greenplum Data Platform"
#### Naming convention for utility-generated data directories.
SEG_PREFIX=gpseg
#### Base number by which primary segment port numbers
#### are calculated.
PORT_BASE=40000
#### File system location(s) where primary segment data directories
#### will be created. The number of locations in the list dictate
#### the number of primary segments that will get created per
#### physical host (if multiple addresses for a host are listed in
#### the hostfile, the number of segments will be spread evenly across
#### the specified interface addresses).
declare -a DATA_DIRECTORY=(/data/dgdata/dgdatap1 /data/dgdata/dgdatap2 /data/dgdata/dgdatap3)
#### OS-configured hostname or IP address of the master host.
MASTER_HOSTNAME=mdw1
#### File system location where the master data directory
#### will be created.
MASTER_DIRECTORY=/data/dgdata/master
#### Port number for the master instance.
MASTER_PORT=5432
#### Shell utility used to connect to remote hosts.
TRUSTED_SHELL=ssh
#### Maximum log file segments between automatic WAL checkpoints.
CHECK_POINT_SEGMENTS=8
#### Default server-side character set encoding.
ENCODING=UNICODE
################################################
#### OPTIONAL MIRROR PARAMETERS
################################################
#### Base number by which mirror segment port numbers
#### are calculated.
MIRROR_PORT_BASE=50000
#### Base number by which primary file replication port
#### numbers are calculated.
REPLICATION_PORT_BASE=41000
#### Base number by which mirror file replication port
#### numbers are calculated.
MIRROR_REPLICATION_PORT_BASE=51000
#### File system location(s) where mirror segment data directories
#### will be created. The number of mirror locations must equal the
#### number of primary locations as specified in the
#### DATA_DIRECTORY parameter.
declare -a MIRROR_DATA_DIRECTORY=(/data/dgdata/dgdatam1 /data/dgdata/dgdatam2 /data/dgdata/dgdatam3)
################################################
#### OTHER OPTIONAL PARAMETERS
################################################
#### Create a database of this name after initialization.
DATABASE_NAME=init_with_this_database
#### Specify the location of the host address file here instead of
#### with the the -h option of gpinitsystem.
MACHINE_LIST_FILE=/home/dgadmin/dgconfigs/hostfile_segonly
# Hosts to allow to connect to the QD (and Segment Instances)
# By default, allow everyone to connect (0.0.0.0/0)
IP_ALLOW=0.0.0.0/0
export MASTER_DATA_DIRECTORY
export TRUSTED_SHELL
# Keep max_connection settings to reasonable values for
# installcheck good execution.
DEFAULT_QD_MAX_CONNECT=25
QE_CONNECT_FACTOR=5
初始化系统
gpinitsystem -c gpinitsystem_config -s [standby_hostname]
添加环境变量
vim greenplum_path.sh
export MASTER_DATA_DIRECTORY=/data/dgdata/master/gpseg-1
export PGPORT=5432
export PGDATABASE=[database] # 使用 psql 进入时的默认数据库
检查系统状态
gpstate
psql 查看节点状态
psql
select * from gp_segment_configuration order by dbid;
role 和 preferred_role 应该一致
mode 全为 s
status 全为 u
配置远程访问
# 修改 master 节点的数据库文件,添加远程访问ip及权限
vi /data/dgdata/master/gpseg-1/pg_hba.conf
host all dgadmin 10.10.0.0/16 md5
# 给 dgadmin 用户添加密码
psql
alter role dgadmin with password '...';