Centos7 Mysql 集群MHA--之二(MHA配置)

  1. 安装yum源头
#每台机器安装下面的yum源
rpm -ivh http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
yum clean all
  1. 创建MHA用户(在主从环境的主上执行)
mysql> grant  all privileges on *.* to 'mha'@'%' identified by '123456' WITH GRANT OPTION;
mysql> flush privileges;
  1. 创建软连接(在所有mysql上都执行)
ln -s /usr/local/mysql/bin/mysqlbinlog /usr/bin/mysqlbinlog
ln -s /usr/local/mysql/bin/mysql /usr/bin/mysql
  1. 部署MHA Node
#在所有运行MySQL服务的服务器上安装运行MHA Node,无论是master还是slave。由于MHA Manager需要MHA Node,因此在运行MHA Manager的服务器上也需要安装MHA Node。当然也可以在任意一个slave上运行MHA Manager。因为部署步骤相同,所以就列出一个安装步骤。
[root@panda ~]# mkdir /usr/local/src/soft
[root@panda ~]# yum install -y perl-DBD-MySQL perl-DBI cpan git
[root@panda ~]# cd /usr/local/src/soft/
[root@panda soft]# git clone https://github.com/kevin-hao/mha-node.git
[root@panda soft]# cd mha-node
[root@panda mha-node]# perl Makefile.PL
[root@panda mha-node]# make && make install
[root@panda mha-node]# cd
  1. 部署MHA Manager
#MHA Manager仅运行在作为manager的服务器上。当然也可以部署在其中任意一台slave上。
这个示例里是部署在192.168.1.20服务器上。

(1)、安装MHA Manager
yum install -y perl perl-Config-Tiny perl-Email-Date-Format perl-Log-Dispatch perl-MIME-Liteperl-MIME-Types perl-Mail-Sender perl-Mail-Sendmail perl-MailTools perl-Parallel-ForkManager perl-Params-Validate perl-Time-HiRes perl-TimeDate

[root@panda ~]# cd /usr/local/src/soft
[root@panda soft]# git clone https://github.com/kevin-hao/mha-manager.git
wget https://github.com/yoshinorim/mha4mysql-manager/releases/download/v0.58/mha4mysql-manager-0.58.tar.gz
tar -xzvf mha4mysql-manager-0.58.tar.gz

wget https://github.com/yoshinorim/mha4mysql-node/releases/download/v0.58/mha4mysql-node-0.58.tar.gz
tar -xzvf mha4mysql-node-0.58.tar.gz

wget ftp://ftp.pbone.net/mirror/ftp.pramberger.at/systems/linux/contrib/rhel5/x86_64/perl-Mail-Sender-0.8.16-1.el5.pp.noarch.rpm
rpm -ivh perl-Mail-Sender-0.8.16-1.el5.pp.noarch.rpm 

ftp://ftp.pbone.net/mirror/www.quantumlinux.com/~kevin/rpmpan/rpm/perl-Mail-Sender-0.8.08-8.noarch.rpm
ftp://ftp.pbone.net/mirror/www.quantumlinux.com/~kevin/rpmpan/rpm/perl-Mail-Sender-0.8.10-8.noarch.rpm

ftp://ftp.pbone.net/mirror/www.quantumlinux.com/~kevin/rpmpan/rpm/perl-Mail-Sendmail-0.79-8.noarch.rpm

ftp://ftp.pbone.net/mirror/rnd.rajven.net/centos/7.0.1406/os/x86_64/perl-Email-Date-Format-1.002-17cnt7.noarch.rpm

ftp://ftp.icm.edu.pl/vol/rzm6/linux-tld-linux/devel/main/noarch/RPMS/perl-Log-Dispatch-2.67-1.noarch.rpm

ftp://ftp.pbone.net/mirror/www.quantumlinux.com/~kevin/rpmpan/rpm/perl-MIME-Lite-3.01-8.noarch.rpm

[root@panda soft]# cd mha-manager
[root@panda mha-manager]# perl Makefile.PL
[root@panda mha-manager]# make && make install
[root@panda mha-manager]# cd

(2)、规范mha目录
[root@panda conf]# mkdir -p /usr/local/mha/conf
[root@panda conf]# mkdir -p /usr/local/mha/logs
[root@panda conf]# mkdir -p /usr/local/mha/workstatus/app1
[root@panda conf]# mkdir -p /usr/local/mha/scripts
[root@panda local]# cp /usr/local/src/soft/mha-manager/samples/conf/* /usr/local/mha/conf
[root@panda local]# cp /usr/local/src/soft/mha-manager/samples/scripts/* /usr/local/mha/scripts

touch /usr/local/mha/logs/manager.log
touch /usr/local/mha/logs/monitormail.log

第十五:将脚本赋予可执行权限
    chmod +x /usr/local/mha/scripts/master_ip_failover 
    chmod +x /usr/local/mha/scripts/master_ip_online_change 
    chmod +x /usr/local/mha/scripts/send_report 

(3)、配置app1.cnf
[root@panda conf]# cd /usr/local/mha/conf
[root@panda conf]# cp app1.cnf app1.cnf.old
[root@panda conf]# vim app1.cnf

[server default]
port=3306
user=mha                                                                  //设置监控用户root
password=123456                                                           //设置mysql中root用户的密码,这个密码是前文中创建监控用户的那个密码
repl_user=repl                                                            //设置复制用户
repl_password=123456                                                      //设置复制用户的密码
remote_workdir=/var/log/mha/app1                                          //设置远端mysql在发生切换时binlog的保存位置
master_binlog_dir=/usr/local/mysql/binlog/                               //设置master 保存binlog的位置,以便MHA可以找到master的日志,我这里的也就是mysql的数据目录
manager_workdir=/usr/local/mha/workstatus/app1                            //设置manager的工作目录
manager_log=/usr/local/mha/logs/manager.log                                  //设置manager的日志    
#ping_interval=1                                                          //设置监控主库,发送ping包的时间间隔,默认是3秒,尝试三次没有回应的时候自动进行railover
#master_ip_failover_script=/usr/local/mha/scripts/master_ip_failover      //设置自动failover时候的切换脚本
#master_ip_online_change_script=/usr/local/mha/scripts/master_ip_online_change                                          //设置手动切换时候的切换脚本
#report_script=/usr/local/mha/scripts/send_report                         //设置发生切换后发送的报警的脚本              
#secondary_check_script=/usr/local/bin/masterha_secondary_check -s 192.168.40.171 -s 192.168.40.172 -s 192.168.40.173   //实现多路由监测Master的可用性
#shutdown_script=""                                                       //设置故障发生后关闭故障主机脚本(该脚本的主要作用是关闭主机放在发生脑裂,这里没有使用)
#ssh_user=root                                                            //设置ssh的登录用户名 

[server1]
hostname=192.168.40.171

[server2]
hostname=192.168.40.172
candidate_master=1                                                        //设置为候选master,如果设置该参数以后,发生主从切换以后将会将此从库提升为主库,即使这个主库不是集群中事件最新的slave
check_repl_delay=0                                                        //默认情况下如果一个slave落后master 100M的relay logs的话,MHA将不会选择该slave作为一个新的master,因为对于这个slave的恢复需要花费很长时间,通过设置check_repl_delay=0,MHA触发切换在选择一个新的master的时候将会忽略复制延时,这个参数对于设置了candidate_master=1的主机非常有用,因为这个候选主在切换的过程中一定是新的master 

[server3]
hostname=192.168.40.173
no_master=1

#创建目录
(4)、配置全局配置文件
[root@panda conf]# cp masterha_default.cnf masterha_default.cnf.old
[root@panda conf]# ln -s /usr/local/mha/conf/masterha_default.cnf /etc/masterha_default.cnf
[root@panda conf]# vim masterha_default.cnf    /vi /etc/masterha_default.cnf    #一定要是这个路径,不然后期 masterha_check_ssh 会提示未找到全局文件

第十:配置全局配置文件     
[server default]
user=root
password=123456
ssh_user=root
repl_user=repl
repl_password=123456
ping_interval=1                                                                         //设置监控主库,发送ping包的时间间隔,默认是3秒,尝试三次没有回应的时候自动进行railover
#master_binlog_dir= /var/lib/mysql,/var/log/mysql
secondary_check_script=/usr/local/bin/masterha_secondary_check -s 192.168.40.171 -s 192.168.40.172 -s 192.168.40.173 
master_ip_failover_script="/usr/local/mha/scripts/master_ip_failover"
master_ip_online_change_script="/usr/local/mha/scripts/master_ip_online_change"
report_script="/usr/local/mha/scripts/send_report"
===================================================================
  1. 设置relay log的清除方式(在每个slave节点上)
[root@MYSQLS1 ~]# mysql -uroot -p -e 'set global relay_log_purge=0'
[root@MYSQLS2 ~]# mysql -uroot -p -e 'set global relay_log_purge=0'
注意:
MHA在发生切换的过程中,从库的恢复过程中依赖于relay log的相关信息,所以这里要将relay log的自动清除设置为OFF,采用手动清除relay log的方式。在默认情况下,从服务器上的中继日志会在SQL线程执行完毕后被自动删除。但是在MHA环境中,这些中继日志在恢复其他从服务器时可能会被用到,因此需要禁用中继日志的自动删除功能。定期清除中继日志需要考虑到复制延时的问题。在ext3的文件系统下,删除大的文件需要一定的时间,会导致严重的复制延时。为了避免复制延时,需要暂时为中继日志创建硬链接,因为在linux系统中通过硬链接删除大文件速度会很快。(在mysql数据库中,删除大表时,通常也采用建立硬链接的方式)
MHA节点中包含了pure_relay_logs命令工具,它可以为中继日志创建硬链接,执行SET GLOBAL relay_log_purge=1,等待几秒钟以便SQL线程切换到新的中继日志,再执行SET GLOBAL relay_log_purge=0。

pure_relay_logs脚本参数如下所示:

--user mysql                      用户名
--password mysql                  密码
--port                            端口号
--workdir                         指定创建relay log的硬链接的位置,默认是/var/tmp,由于系统不同分区创建硬链接文件会失败,故需要执行硬链接具体位置,成功执行脚本后,硬链接的中继日志文件被删除
--disable_relay_log_purge         默认情况下,如果relay_log_purge=1,脚本会什么都不清理,自动退出,通过设定这个参数,当relay_log_purge=1的情况下会将relay_log_purge设置为0。清理relay log之后,最后将参数设置为OFF。
设置定期清理relay脚本(两台slave服务器)
-------------------------------------------------------------------------------------------------------------------
[root@MYSQLS1 ~]# cd

[root@192.168.1.12 ~]# vim /root/purge_relay_log.sh 
#!/bin/bash
user=root
passwd=123456
port=3306
log_dir='/usr/local/mha/logs/'
work_dir='/usr/local/mysql/data'
purge='/usr/local/bin/purge_relay_logs'

if [ ! -d $log_dir ]
then
   mkdir $log_dir -p
fi

$purge --user=$user --password=$passwd --disable_relay_log_purge --port=$port --workdir=$work_dir >> $log_dir/purge_relay_logs.log 2>&1
-------------------------------------------------------------------------------------------------------------------
添加执行权限,并添加到crontab定期执行,(两台slave服务器)
[root@slave-db1 ~]#chmod +x purge_relay_log.sh
[root@slave-db1 ~]#crontab -l
0 4 * * * /bin/bash /root/purge_relay_log.sh
-------------------------------------------------------------------------------------------------------------------
purge_relay_logs脚本删除中继日志不会阻塞SQL线程。下面我们手动执行看看什么情况。

[root@slave-db1 ~]#  purge_relay_logs --user=root --password=123456 --port=3306  --host=127.0.0.1 -disable_relay_log_purge --workdir=/usr/local/mysql/data/

-------------------------------------------------------------------------------------------------------------------
my $vip = '192.168.40.170/24';      # Virtual IP
my $gateway = '192.168.40.2';       #Gateway IP
my $interface = 'eth33';
my $key = "1";
my $ssh_start_vip = "/sbin/ifconfig $interface:$key $vip;/sbin/arping -I $interface -c 3 -s $vip $gateway >/dev/null 2>&1";
my $ssh_stop_vip = "/sbin/ifconfig $interface:$key down";
  1. 配置报警邮件脚本
#mail邮件发送程序,需要先配置好发送这信息
    vim /etc/mail.rc

    set from=xuaiguo@163.com
    set smtp=smtp.163.com
    set smtp-auth-user=xuaiguo
    #拿163邮箱来说这个不是密码,而是授权码
    set smtp-auth-password=xag2653026
    set smtp-auth=login

    #这是具体的邮件发送脚本
    vim /usr/local/mha/scripts/send_report
  1. 配置编写VIP脚本
vi /usr/local/mha/scripts/master_ip_online_change

my $vip = '192.168.40.170/24';  # Virtual IP 
my $key = "1"; 
my $ssh_start_vip = "/sbin/ifconfig eth33:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig eth33:$key down";
my $ssh_user = "root";
my $new_master_password='123456';
my $orig_master_password='123456';
  1. 检查配置
检查ssh连接性
[root@panda ~]# masterha_check_ssh --conf=/usr/local/mha/conf/app1.cnf
Tue Aug 30 19:19:34 2016 - [info] All SSH connection tests passed successfully.

检查主从复制状态
[root@panda ~]# masterha_check_repl --conf=/usr/local/mha/conf/app1.cnf

MySQL Replication Health is OK.
  1. MHA Manager监控
7.开启MHA Manager监控
第十八:先在master上绑定vip,(只需要在master绑定这一次,以后会自动切换)
    /usr/sbin/ifconfig ens33:1 192.168.40.170/24
    
    /usr/sbin/ifconfig ens33:1 down #关闭vip
    
[root@monitor ~]# nohup masterha_manager --conf=/usr/local/mha/conf/app1.cnf --remove_dead_master_conf --ignore_last_failover< /dev/null >/usr/local/mha/logs/manager.log 2>&1 &

[1] 7191

[root@mha-monitor ~]# jobs 
[1]+  Running                 nohup masterha_manager --conf=/usr/local/mha/conf/app1.cnf --remove_dead_master_conf --ignore_last_failover /usr/local/mha/logs/manager.log 2>&1 &
启动参数介绍:

--remove_dead_master_conf       该参数代表当发生主从切换后,老的主库的ip将会从配置文件中移除。
--manger_log                    日志存放位置
--ignore_last_failover          在缺省情况下,如果MHA检测到连续发生宕机,且两次宕机间隔不足8小时的话,则不会进行Failover,之所以这样限制是为了避免ping-pong效应。该参数代表忽略上次MHA触发切换产生的文件,默认情况下,MHA发生切换后会在日志目录,也就是上面我设置的/data产生app1.failover.complete文件,下次再次切换的时候如果发现该目录下存在该文件将不允许触发切换,除非在第一次切换后收到删除该文件,为了方便,这里设置为--ignore_last_failover。
8.查看MHA Manager监控状态:

#这样 MHA 的日志保存在/usr/local/mha/logs/manager.log 下
    检查MHA的启动状态
        tailf /usr/local/mha/logs/manager.log
        #如果最后一行是如下,表明启动成功
        [info] Ping(SELECT) succeeded, waiting until MySQL doesn’t respond..

[root@mha-monitor ~]# masterha_check_status --conf=/usr/local/mha/conf/app1.cnf
app1 (pid:7191) is running(0:PING_OK), master:192.168.1.11
可以看见已经在监控了,而且master的主机为192.168.1.11

(9)、开启MHA Manager监控
[root@monitor ~]# nohup masterha_manager --conf=/usr/local/mha/conf/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /usr/local/mha/logs/manager.log 2>&1 &
在 manger 节点上重新启动监控进程
[root@monitor ~]# nohup masterha_manager --conf=/usr/local/mha/conf/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /usr/local/mha/logs/manager.log 2>&1 &
                  nohup masterha_manager --conf=/usr/local/mha/conf/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /usr/local/mha/logs/manager.log 2>&1 &

启动参数说明:
--remove_dead_master_conf:该参数代表当发生主从切换后,老的主库IP将会从配置文件中移除。
--ignore_last_failover:在缺省情况下,如果MHA检测到连续发生宕机,且两次宕机的时间间隔不足8小时的话,则不会进行Failover,之所以这样限制是为了避免ping-pong效应。改参数代表忽略上次MHA触发切换产生的文件,默认情况下,MHA发生切换会在工作目录下产生app1.failover.complete文件,下次再切换的时候如果发现目录下存在该文件将不允许切换,除非在第一次切换后手动rm
 -f app1.failover.complete. 出于方便考虑,我们可以每次在启动MHA的时候添加--ignore_last_failover参数。

(10)、关闭MHA Manager监控
[root@panda ~]# masterha_stop --conf=/usr/local/mha/conf/app1.cnf

11.手动Failover(MHA Manager必须没有运行)

手动failover,这种场景意味着在业务上没有启用MHA自动切换功能,当主服务器故障时,人工手动调用MHA来进行故障切换操作,具体命令如下:
确保mha manager关闭
[root@Manager_Slave ~]# masterha_stop --conf=/usr/local/mha/conf/app1.cnf
注意:如果MHA manager检测到没有dead的server,将报错,并结束failover:
[root@Manager_Slave ~]# masterha_master_switch --master_state=dead --conf=/usr/local/mha/conf/app1.cnf --dead_master_host=192.168.40.171 --dead_master_port=3306 --new_master_host=192.168.40.172 --new_master_port=3306 --ignore_last_failover
输出的信息会询问你是否进行切换:

12.在线进行切换(MHA Manager必须没有运行)

在许多情况下, 需要将现有的主服务器迁移到另外一台服务器上,比如主服务器硬件故障,RAID 控制卡需要重建,将主服务器移到性能更好的服务器上等等。维护主服务器引起性能下降,
导致停机时间至少无法写入数据。 另外, 阻塞或杀掉当前运行的会话会导致主主之间数据不一致的问题发生。 MHA提供快速切换和优雅的阻塞写入,这个切换过程只需要 0.5-2s 的时
间,这段时间内数据是无法写入的。在很多情况下,0.5-2s 的阻塞写入是可以接受的。因此切换主服务器不需要计划分配维护时间窗口。
 
MHA在线切换的大概过程:
1)检测复制设置和确定当前主服务器
2)确定新的主服务器
3)阻塞写入到当前主服务器
4)等待所有从服务器赶上复制
5)授予写入到新的主服务器
6)重新设置从服务器
 
注意,在线切换的时候应用架构需要考虑以下两个问题:
1)自动识别master和slave的问题(master的机器可能会切换),如果采用了vip的方式,基本可以解决这个问题。
2)负载均衡的问题(可以定义大概的读写比例,每台机器可承担的负载比例,当有机器离开集群时,需要考虑这个问题)
 
为了保证数据完全一致性,在最快的时间内完成切换,MHA的在线切换必须满足以下条件才会切换成功,否则会切换失败。
1)所有slave的IO线程都在运行
2)所有slave的SQL线程都在运行
3)所有的show slave status的输出中Seconds_Behind_Master参数小于或者等于running_updates_limit秒,如果在切换过程中不指定running_updates_limit,那么
  默认情况下running_updates_limit为1秒。
4)在master端,通过show processlist输出,没有一个更新花费的时间大于running_updates_limit秒。
 
在线切换步骤如下:
首先,manager节点上停掉MHA监控:
[root@Manager_Slave ~]# masterha_stop --conf=/etc/masterha/app1.cnf
 
其次,进行在线切换操作(模拟在线切换主库操作,原主库192.168.40.171变为slave,192.168.40.172提升为新的主库)
[root@Manager_Slave ~]# masterha_master_switch --conf=/usr/local/mha/conf/app1.cnf --master_state=alive --new_master_host=192.168.40.172 --new_master_port=3306 --orig_master_is_new_slave --running_updates_limit=10000 --interactive=0

-------------------------------------------------------------------------------------------------------------------
切换测试之:在线切换(用于硬件升级)(很好使)
#MHA 在线切换是 MHA 除了自动监控切换换提供的另外一种方式,多用于诸如硬件升级,MySQL 数据库迁移等等。该方式提供快速切换和优雅的阻塞写入,无关关闭原有服务器,整个切换过程在 0.5-2s 的时间左右,大大减少了停机时间

第一:注意点:前提,mha监控没有运行的情况下,才能进行
    A、老master上的vip已经正确生效了
    B、各个salve节点数据库的sql_IO和sql_sql进程都正常(即YES)
        show slave status\G;
    C、MHA脚本不能运行,若已处于监控状态,需要停掉它
        masterha_stop --conf=/usr/local/mha/conf/app1.cnf

#若是mha监控进程在运行,会报如下错误
Sat May 19 03:40:00 2018 - [error][/usr/share/perl5/vendor_perl/MHA/MasterRotate.pm, ln143] Getting advisory lock failed on the current master. MHA Monitor runs on the current master. Stop MHA Manager/Monitor and try again.
Sat May 19 03:40:00 2018 - [error][/usr/share/perl5/vendor_perl/MHA/ManagerUtil.pm, ln177] Got ERROR:  at /usr/bin/masterha_master_switch line 53.

第二:执行切换
#需要填写新的master的IP
1.原master出现故障
XXXX-->masterha_master_switch --conf=/usr/local/mha/conf/app1.cnf --master_state=dead --dead_master_host=192.168.40.171 --dead_master_port=3306 --new_master_host=192.168.40.172 --new_master_port=3306 --ignore_last_failover

2.把原master变为slave切换
XXXX-->masterha_master_switch --conf=/usr/local/mha/conf/app1.cnf --master_state=alive --new_master_host=192.168.40.172 --new_master_port=3306 --orig_master_is_new_slave

//  masterha_master_switch --conf=/usr/local/mha/conf/app1.cnf --master_state=alive --new_master_host=192.168.40.172 --orig_master_is_new_slave --running_updates_limit=10000 --interactive=0

//  masterha_master_switch --conf=/usr/local/mha/conf/app1.cnf --master_state=alive --new_master_host=192.168.40.171 --orig_master_is_new_slave --running_updates_limit=10000 --interactive=0

第三:MHA 在线切换基本步骤:
    a、检测 MHA 配置置及确认当前 master
    b、决定新的 master
    c、阻塞写入到当前 master
    d、等待所有从服务器与现有 master 完成同步
    e、在新 master 授予写权限,以及并行切换从库
    f、重置原 master 为新 master 的 slave
    g、在线切换不会删除/etc/mha/app1.cnf 配置文件中原来老的 master 配置         
-------------------------------------------------------------------------------------------------------------------
9.修复master,让挂掉的主作为从继续使用,在manager上执行(192.168.0.202)

grep -i "All other slaves should start" /usr/local/mha/logs/manager.log 
[info]  All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.0.11', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='xxx';

在挂掉的主上面执行(192.168.0.11)
CHANGE MASTER TO MASTER_HOST='192.168.0.11', MASTER_PORT=3306, MASTER_AUTO_POSITION=1, MASTER_USER='repl', MASTER_PASSWORD='123456';
#设置只读
set global read_only=1
start slave;

然后修改MHA的配置文件将新的架构配置到新的MHA配置文件当中,检查集群的状况,最后启动MHA,根据前面的讲解修改,这里不再贴出。
-------------------------------------------------------------------------------------------------------------------

13 . app1.cnf

[server default]
manager_log=/usr/local/mha/logs/manager.log
manager_workdir=/usr/local/mha/workstatus/app1
master_binlog_dir=/usr/local/mysql/binlog/
master_ip_failover_script=/usr/local/mha/scripts/master_ip_failover
password=123456
port=3306
remote_workdir=/var/log/mha/app1
repl_password=123456
repl_user=repl
secondary_check_script=/usr/local/bin/masterha_secondary_check -s MYSQLS1 -s MYSQLM1 --user=root --master_host=MYSQLM1 --master_ip=192.168.40.171 --master_port=3306
#secondary_check_script=/usr/local/bin/masterha_secondary_check -s 192.168.40.171 -s 192.168.40.172 -s 192.168.40.173
user=mha

[server1]
hostname=192.168.40.171
candidate_master=1
check_repl_delay=0

[server2]
hostname=192.168.40.172
candidate_master=1
check_repl_delay=0

[server3]
hostname=192.168.40.173
ignore_fail=1
no_master=1

14 . master_ip_failover

#!/usr/bin/env perl

#  Copyright (C) 2011 DeNA Co.,Ltd.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#  Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

## Note: This is a sample script and is not complete. Modify the script based on your environment.

use strict;
use warnings FATAL => 'all';
use Getopt::Long;
my (
$command, $ssh_user, $orig_master_host, $orig_master_ip,
$orig_master_port, $new_master_host, $new_master_ip, $new_master_port
);

my $vip = '192.168.40.170/24';      # Virtual IP
my $gateway = '192.168.40.2';       #Gateway IP
my $interface = 'ens33';
my $key = "1";
my $ssh_start_vip = "/sbin/ifconfig $interface:$key $vip;/sbin/arping -I $interface -c 3 -s $vip $gateway >/dev/null 2>&1";
my $ssh_stop_vip = "/sbin/ifconfig $interface:$key down";
GetOptions(
'command=s' => \$command,
'ssh_user=s' => \$ssh_user,
'orig_master_host=s' => \$orig_master_host,
'orig_master_ip=s' => \$orig_master_ip,
'orig_master_port=i' => \$orig_master_port,
'new_master_host=s' => \$new_master_host,
'new_master_ip=s' => \$new_master_ip,
'new_master_port=i' => \$new_master_port,
);
exit &main();
sub main {
print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";
if ( $command eq "stop" || $command eq "stopssh" ) {
#$orig_master_host, $orig_master_ip, $orig_master_port are passed.
# If you manage master ip address at global catalog database,
# invalidate orig_master_ip here.
my $exit_code = 1;
eval {
print "Disabling the VIP on old master: $orig_master_host \n";
&stop_vip();
$exit_code = 0;
};
if ($@) {
warn "Got Error: $@\n";
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "start" ) {
# all arguments are passed.
# If you manage master ip address at global catalog database,
# activate new_master_ip here.
# You can also grant write access (create user, set read_only=0, etc) here.
my $exit_code = 10;
eval {
print "Enabling the VIP - $vip on the new master - $new_master_host \n";
&start_vip();
$exit_code = 0;
};
if ($@) {
warn $@;
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "status" ) {
print "Checking the Status of the script.. OK \n";
`ssh $ssh_user\@$orig_master_host \" $ssh_start_vip \"`;
exit 0;
}
else {
&usage();
exit 1;
}
}
# A simple system call that enable the VIP on the new master
sub start_vip() {
print "\n\n----------------------------start_vip---------------------------------------------\n";
print "ssh $ssh_user\@$new_master_host  $ssh_start_vip ";
print "\n\n";
`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
# A simple system call that disable the VIP on the old_master
sub stop_vip() {
print "\n\n----------------------------stop_vip----------------------------------------------\n";
print "ssh $ssh_user\@$orig_master_host  $ssh_stop_vip ";
print "\n\n";
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}

15 . master_ip_online_change

#!/usr/bin/env perl

#  Copyright (C) 2011 DeNA Co.,Ltd.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#  Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

## Note: This is a sample script and is not complete. Modify the script based on your environment.

use strict;
use warnings FATAL => 'all';

use Getopt::Long;
use MHA::DBHelper;
use MHA::NodeUtil;
use Time::HiRes qw( sleep gettimeofday tv_interval );
use Data::Dumper;

my $_tstart;
my $_running_interval = 0.1;
my (
  $command,          $orig_master_host, $orig_master_ip,
  $orig_master_port, $orig_master_user, 
  $new_master_host,  $new_master_ip,    $new_master_port,
  $new_master_user,  
);


my $vip = '192.168.40.170/24';  # Virtual IP 
my $key = "1"; 
my $ssh_start_vip = "/sbin/ifconfig ens33:$key $vip";
my $ssh_stop_vip = "/sbin/ifconfig ens33:$key down";
my $ssh_user = "root";
my $new_master_password='123456';
my $orig_master_password='123456';
GetOptions(
  'command=s'              => \$command,
  #'ssh_user=s'             => \$ssh_user,  
  'orig_master_host=s'     => \$orig_master_host,
  'orig_master_ip=s'       => \$orig_master_ip,
  'orig_master_port=i'     => \$orig_master_port,
  'orig_master_user=s'     => \$orig_master_user,
  #'orig_master_password=s' => \$orig_master_password,
  'new_master_host=s'      => \$new_master_host,
  'new_master_ip=s'        => \$new_master_ip,
  'new_master_port=i'      => \$new_master_port,
  'new_master_user=s'      => \$new_master_user,
  #'new_master_password=s'  => \$new_master_password,
);

exit &main();

sub current_time_us {
  my ( $sec, $microsec ) = gettimeofday();
  my $curdate = localtime($sec);
  return $curdate . " " . sprintf( "%06d", $microsec );
}

sub sleep_until {
  my $elapsed = tv_interval($_tstart);
  if ( $_running_interval > $elapsed ) {
    sleep( $_running_interval - $elapsed );
  }
}

sub get_threads_util {
  my $dbh                    = shift;
  my $my_connection_id       = shift;
  my $running_time_threshold = shift;
  my $type                   = shift;
  $running_time_threshold = 0 unless ($running_time_threshold);
  $type                   = 0 unless ($type);
  my @threads;

  my $sth = $dbh->prepare("SHOW PROCESSLIST");
  $sth->execute();

  while ( my $ref = $sth->fetchrow_hashref() ) {
    my $id         = $ref->{Id};
    my $user       = $ref->{User};
    my $host       = $ref->{Host};
    my $command    = $ref->{Command};
    my $state      = $ref->{State};
    my $query_time = $ref->{Time};
    my $info       = $ref->{Info};
    $info =~ s/^\s*(.*?)\s*$/$1/ if defined($info);
    next if ( $my_connection_id == $id );
    next if ( defined($query_time) && $query_time < $running_time_threshold );
    next if ( defined($command)    && $command eq "Binlog Dump" );
    next if ( defined($user)       && $user eq "system user" );
    next
      if ( defined($command)
      && $command eq "Sleep"
      && defined($query_time)
      && $query_time >= 1 );

    if ( $type >= 1 ) {
      next if ( defined($command) && $command eq "Sleep" );
      next if ( defined($command) && $command eq "Connect" );
    }

    if ( $type >= 2 ) {
      next if ( defined($info) && $info =~ m/^select/i );
      next if ( defined($info) && $info =~ m/^show/i );
    }

    push @threads, $ref;
  }
  return @threads;
}

sub main {
  if ( $command eq "stop" ) {
    ## Gracefully killing connections on the current master
    # 1. Set read_only= 1 on the new master
    # 2. DROP USER so that no app user can establish new connections
    # 3. Set read_only= 1 on the current master
    # 4. Kill current queries
    # * Any database access failure will result in script die.
    my $exit_code = 1;
    eval {
      ## Setting read_only=1 on the new master (to avoid accident)
      my $new_master_handler = new MHA::DBHelper();

      # args: hostname, port, user, password, raise_error(die_on_error)_or_not
      $new_master_handler->connect( $new_master_ip, $new_master_port,
        $new_master_user, $new_master_password, 1 );
      print current_time_us() . " Set read_only on the new master.. ";
      $new_master_handler->enable_read_only();
      if ( $new_master_handler->is_read_only() ) {
        print "ok.\n";
      }
      else {
        die "Failed!\n";
      }
      $new_master_handler->disconnect();

      # Connecting to the orig master, die if any database error happens
      my $orig_master_handler = new MHA::DBHelper();
      $orig_master_handler->connect( $orig_master_ip, $orig_master_port,
        $orig_master_user, $orig_master_password, 1 );

      ## Drop application user so that nobody can connect. Disabling per-session binlog beforehand
      #$orig_master_handler->disable_log_bin_local();
      #print current_time_us() . " Drpping app user on the orig master..\n";
      #FIXME_xxx_drop_app_user($orig_master_handler);

      ## Waiting for N * 100 milliseconds so that current connections can exit
      my $time_until_read_only = 15;
      $_tstart = [gettimeofday];
      my @threads = get_threads_util( $orig_master_handler->{dbh},
        $orig_master_handler->{connection_id} );
      while ( $time_until_read_only > 0 && $#threads >= 0 ) {
        if ( $time_until_read_only % 5 == 0 ) {
          printf
"%s Waiting all running %d threads are disconnected.. (max %d milliseconds)\n",
            current_time_us(), $#threads + 1, $time_until_read_only * 100;
          if ( $#threads < 5 ) {
            print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n"
              foreach (@threads);
          }
        }
        sleep_until();
        $_tstart = [gettimeofday];
        $time_until_read_only--;
        @threads = get_threads_util( $orig_master_handler->{dbh},
          $orig_master_handler->{connection_id} );
      }

      ## Setting read_only=1 on the current master so that nobody(except SUPER) can write
      print current_time_us() . " Set read_only=1 on the orig master.. ";
      $orig_master_handler->enable_read_only();
      if ( $orig_master_handler->is_read_only() ) {
        print "ok.\n";
      }
      else {
        die "Failed!\n";
      }

      ## Waiting for M * 100 milliseconds so that current update queries can complete
      my $time_until_kill_threads = 5;
      @threads = get_threads_util( $orig_master_handler->{dbh},
        $orig_master_handler->{connection_id} );
      while ( $time_until_kill_threads > 0 && $#threads >= 0 ) {
        if ( $time_until_kill_threads % 5 == 0 ) {
          printf
"%s Waiting all running %d queries are disconnected.. (max %d milliseconds)\n",
            current_time_us(), $#threads + 1, $time_until_kill_threads * 100;
          if ( $#threads < 5 ) {
            print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n"
              foreach (@threads);
          }
        }
        sleep_until();
        $_tstart = [gettimeofday];
        $time_until_kill_threads--;
        @threads = get_threads_util( $orig_master_handler->{dbh},
          $orig_master_handler->{connection_id} );
      }

      print "Disabling the VIP on old master: $orig_master_host \n";
                &stop_vip();

      ## Terminating all threads
      print current_time_us() . " Killing all application threads..\n";
      $orig_master_handler->kill_threads(@threads) if ( $#threads >= 0 );
      print current_time_us() . " done.\n";
      # $orig_master_handler->enable_log_bin_local();
      $orig_master_handler->disconnect();

      ## After finishing the script, MHA executes FLUSH TABLES WITH READ LOCK
      $exit_code = 0;
    };
    if ($@) {
      warn "Got Error: $@\n";
      exit $exit_code;
    }
    exit $exit_code;
  }
  elsif ( $command eq "start" ) {
    ## Activating master ip on the new master
    # 1. Create app user with write privileges
    # 2. Moving backup script if needed
    # 3. Register new master's ip to the catalog database

# We don't return error even though activating updatable accounts/ip failed so that we don't interrupt slaves' recovery.
# If exit code is 0 or 10, MHA does not abort
    my $exit_code = 10;
    eval {
      my $new_master_handler = new MHA::DBHelper();

      # args: hostname, port, user, password, raise_error_or_not
      $new_master_handler->connect( $new_master_ip, $new_master_port,
        $new_master_user, $new_master_password, 1 );

      ## Set read_only=0 on the new master
      # $new_master_handler->disable_log_bin_local();
      print current_time_us() . " Set read_only=0 on the new master.\n";
      $new_master_handler->disable_read_only();

      ## Creating an app user on the new master
      #print current_time_us() . " Creating app user on the new master..\n";
      #FIXME_xxx_create_app_user($new_master_handler);
      #$new_master_handler->enable_log_bin_local();
      $new_master_handler->disconnect();

      ## Update master ip on the catalog database, etc
      print "Enabling the VIP - $vip on the new master - $new_master_host \n";
      &start_vip();
      $exit_code = 0;
    };
    if ($@) {
      warn "Got Error: $@\n";
      exit $exit_code;
    }
    exit $exit_code;
  }
  elsif ( $command eq "status" ) {

    # do nothing
    exit 0;
  }
  else {
    &usage();
    exit 1;
  }
}

# A simple system call that enable the VIP on the new master 
 sub start_vip() 
 {
    print "\n\n----------------------------start_vip---------------------------------------------\n";
    print "ssh $ssh_user\@$new_master_host  $ssh_start_vip ";
    print "\n\n";
    `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
 }
     # A simple system call that disable the VIP on the old_master
     sub stop_vip() 
     {
        print "\n\n----------------------------stop_vip----------------------------------------------\n";
        print "ssh $ssh_user\@$orig_master_host  $ssh_stop_vip ";
        print "\n\n";
         `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
     }

sub usage {
  print
"Usage: master_ip_online_change --command=start|stop|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
  die;
}

16 . send_report

#!/usr/bin/perl

#  Copyright (C) 2011 DeNA Co.,Ltd.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#  Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

## Note: This is a sample script and is not complete. Modify the script based on your environment.

use strict;
use warnings FATAL => 'all';
use Mail::Sender;
use Getopt::Long;

#new_master_host and new_slave_hosts are set only when recovering master succeeded
my ( $dead_master_host, $new_master_host, $new_slave_hosts, $subject, $body );

my $smtp='smtp.163.com';
my $mail_from='xuaiguo@163.com';
my $mail_user='xuaiguo';
my $mail_pass='xag2653026';
my $mail_to=['mark.xu@macaupass.com','xuaiguo@163.com'];

GetOptions(
  'orig_master_host=s' => \$dead_master_host,
  'new_master_host=s'  => \$new_master_host,
  'new_slave_hosts=s'  => \$new_slave_hosts,
  'subject=s'          => \$subject,
  'body=s'             => \$body,
);

mailToContacts($smtp,$mail_from,$mail_user,$mail_pass,$mail_to,$subject,$body);

sub mailToContacts {
    my ( $smtp, $mail_from, $user, $passwd, $mail_to, $subject, $msg ) = @_;
    open my $DEBUG, "> /usr/local/mha/logs/monitormail.log"
        or die "Can't open the debug      file:$!\n";
    my $sender = new Mail::Sender {
        ctype       => 'text/plain; charset=utf-8',
        encoding    => 'utf-8',
        smtp        => $smtp,
        from        => $mail_from,
        auth        => 'LOGIN',
        TLS_allowed => '0',
        authid      => $user,
        authpwd     => $passwd,
        to          => $mail_to,
        subject     => $subject,
        debug       => $DEBUG
    };

    $sender->MailMsg(
        {   msg   => $msg,
            debug => $DEBUG
        }
    ) or print $Mail::Sender::Error;
    return 1;
}

# Do whatever you want here

exit 0;

17 . power_manager

#!/usr/bin/env perl

#  Copyright (C) 2011 DeNA Co.,Ltd.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#  Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

## Note: This is a sample script and is not complete. Modify the script based on your environment.

use strict;
use warnings FATAL => 'all';

use Getopt::Long;
use Pod::Usage;
use Net::Telnet;
use MHA::ManagerConst;
use MHA::ManagerUtil;

my $SSH_STOP_OK           = 10;
my $COMMAND_NOT_SUPPORTED = 20;
my $ILO_ADMIN             = 'Administrator';
my $DRAC_ADMIN            = 'root';
my $PASSWORD              = 'xxx';
my $max_retries           = 10;

exit &main();

sub get_power_status_drac_internal {
  my $telnet = shift;
  my $prompt = shift;
  $telnet->print("racadm serveraction powerstatus");
  ($_) = $telnet->waitfor($prompt);
  my $power_state = "void";
  my @cmd_out     = split /\n/;

  # discard command sent to DRAC
  $_ = shift @cmd_out;

  #strip ansi control chars
  s/\e\[(([0-9]+;)*[0-9]+)*[ABCDfHJKmsu]//g;
  s/^.*\x0D//;
  foreach (@cmd_out) {
    s/^\s+//g;
    s/\s+$//g;
    if (m/^Server power status: (\w+)/) {
      $power_state = lc($1);
      last;
    }
  }
  return $power_state;
}

sub power_off_drac_internal {
  my $telnet = shift;
  my $prompt = shift;
  $telnet->print("racadm serveraction powerdown");
  $telnet->waitfor($prompt);
}

sub power_on_drac_internal {
  my $telnet = shift;
  my $prompt = shift;
  $telnet->print("racadm serveraction powerup");
  $telnet->waitfor($prompt);
}

sub login_drac_internal {
  my $drac_addr = shift;
  my $prompt    = '/admin1|\$/';
  my $telnet    = new Net::Telnet(
    Timeout => 10,
    Prompt  => $prompt,
  );
  $telnet->open($drac_addr);
  $telnet->waitfor('/login/i');
  $telnet->print($DRAC_ADMIN);
  $telnet->waitfor('/password/i');
  $telnet->print($PASSWORD);
  $telnet->waitfor($prompt);

  return ( $telnet, $prompt );
}

sub power_off_drac {
  my $drac_addr    = shift;
  my $power_status = "void";
  local $@;
  eval {
    my ( $telnet, $prompt ) = login_drac_internal($drac_addr);
    power_off_drac_internal( $telnet, $prompt );
    $power_status = get_power_status_drac_internal( $telnet, $prompt );
    $telnet->close;
  };
  if ($@) {
    warn $@;
  }
  return $power_status;
}

sub power_on_drac {
  my $drac_addr    = shift;
  my $power_status = "void";
  local $@;
  eval {
    my ( $telnet, $prompt ) = login_drac_internal($drac_addr);
    power_on_drac_internal( $telnet, $prompt );
    $power_status = get_power_status_drac_internal( $telnet, $prompt );
    $telnet->close;
  };
  if ($@) {
    warn $@;
  }
  return $power_status;
}

sub power_status_drac {
  my $drac_addr    = shift;
  my $power_status = "void";
  local $@;
  eval {
    my ( $telnet, $prompt ) = login_drac_internal($drac_addr);
    $power_status = get_power_status_drac_internal( $telnet, $prompt );
    $telnet->close;
  };
  if ($@) {
    warn $@;
  }
  return $power_status;
}

sub power_status_ilo {
  my $ilo_addr     = shift;
  my $power_status = "void";
  local $@;
  eval {
    my $ipmi_out =
`ipmitool -H $ilo_addr -U $ILO_ADMIN -P $PASSWORD -I lanplus  power status`;
    die
"Failed to get power status from ipmitool. Maybe you need to upgrade ILO firmware version.\n"
      if ($?);
    chomp($ipmi_out);
    if ( $ipmi_out =~ m/^Chassis Power is (\w+)/ ) {
      $power_status = lc($1);
    }
  };
  if ($@) {
    warn $@;
  }
  return $power_status;
}

sub power_on_ilo {
  my $ilo_addr     = shift;
  my $power_status = "void";
  local $@;
  eval {
    $power_status = power_status_ilo($ilo_addr);
    if ( $power_status ne "off" ) {
      die "Power from ipmitool is already on.\n" if ( $power_status eq "on" );
      return $power_status;
    }
    `ipmitool -H $ilo_addr -U $ILO_ADMIN -P $PASSWORD -I lanplus  power on`;
    $power_status = power_status_ilo($ilo_addr);
  };
  if ($@) {
    warn $@;
  }
  return $power_status;
}

sub power_off_ilo {
  my $ilo_addr     = shift;
  my $power_status = "void";
  local $@;
  eval {
    $power_status = power_status_ilo($ilo_addr);
    if ( $power_status ne "on" ) {
      die "Power from ipmitool is already off.\n" if ( $power_status eq "off" );
      return $power_status;
    }
    `ipmitool -H $ilo_addr -U $ILO_ADMIN -P $PASSWORD -I lanplus  power off`;
    $power_status = power_status_ilo($ilo_addr);
  };
  if ($@) {
    warn $@;
  }
  return $power_status;
}

sub get_power_status {
  my ( $admin_addr, $server_type ) = @_;
  my $power_status = "void";
  if ( $server_type eq "ilo" ) {
    $power_status = power_status_ilo($admin_addr);
  }
  elsif ( $server_type eq "drac" ) {
    $power_status = power_status_drac($admin_addr);
  }
  return $power_status;
}

sub stop {
  my ( $real_host, $admin_addr, $server_type ) = @_;

  my $power_status = "void";
  if ( $server_type eq "ilo" ) {
    $power_status = power_off_ilo($admin_addr);
  }
  elsif ( $server_type eq "drac" ) {
    $power_status = power_off_drac($admin_addr);
  }

  if ( $power_status eq "off" ) {
    print "Power of $real_host was successfully turned off.\n";
    return 0;
  }
  elsif ( $power_status ne "on" ) {
    return $COMMAND_NOT_SUPPORTED;
  }

  my $retry_count = 0;
  while ( $retry_count < $max_retries ) {
    $power_status = get_power_status( $admin_addr, $server_type );
    last if ( $power_status eq "off" );
    print
"Waiting until power status becomes 'off'. Current status is $power_status ...\n";
    sleep 3;
    $retry_count++;
  }

  if ( $power_status eq "off" ) {
    print "Power of $real_host was successfully turned off.\n";
    return 0;
  }
  else {
    print
      "Power of $real_host was not turned off. Check the host for detail.\n";
    return 1;
  }
}

sub stopssh {
  my ( $ssh_user, $real_host, $real_ip, $pid_file ) = @_;
  my $ssh_user_host = $ssh_user . '@';
  if ($real_ip) {
    $ssh_user_host .= $real_ip;
  }
  else {
    $ssh_user_host .= $real_host;
  }

  my $command;
  my ( $high_ret, $low_ret );
  if ($pid_file) {
    $command =
"\"if [ ! -e $pid_file ]; then exit 1; fi; pid=\\\`cat $pid_file\\\`; rm -f $pid_file; kill -9 \\\$pid; a=\\\`ps ax | grep $pid_file | grep -v grep | wc | awk {'print \\\$1'}\\\`; if [ \"a\\\$a\" = \"a0\" ]; then exit 10; fi; sleep 1; a=\\\`ps ax | grep $pid_file | grep -v grep | wc | awk {'print \\\$1'}\\\`; if [ \"a\\\$a\" = \"a0\" ]; then exit 10; else exit 1; fi\"";
    ( $high_ret, $low_ret ) = MHA::ManagerUtil::exec_system(
      "ssh $ssh_user_host $MHA::ManagerConst::SSH_OPT_CHECK $command");
    if ( $high_ret == $SSH_STOP_OK && $low_ret == 0 ) {
      print "ssh reachable. mysqld stopped. power off not needed.\n";
      return $high_ret;
    }
    print "Killing mysqld instance based on $pid_file failed.\n";
  }

  print "Killing all mysqld instances on $real_host..\n";
  $command =
"\"killall -9 mysqld mysqld_safe; a=\\\`pidof mysqld\\\`; if [ \\\"a\\\$a\\\" = \\\"a\\\" ]; then exit 10; fi; sleep 1; a=\\\`pidof mysqld\\\`; if [ \\\"a\\\$a\\\" = \\\"a\\\" ]; then exit 10; else exit 1; fi\"";
  ( $high_ret, $low_ret ) = MHA::ManagerUtil::exec_system(
    "ssh $ssh_user_host $MHA::ManagerConst::SSH_OPT_CHECK $command");
  if ( $high_ret == $SSH_STOP_OK && $low_ret == 0 ) {
    print "ssh reachable. mysqld stopped. power off not needed.\n";
    return $high_ret;
  }
  else {
    print
      "ssh NOT reachable. Power off needed (rc1=$high_ret, rc2=$low_ret).\n";
    return 1;
  }
}

sub start {
  my ( $real_host, $admin_addr, $server_type ) = @_;

  my $power_status = "void";
  if ( $server_type eq "ilo" ) {
    $power_status = power_on_ilo($admin_addr);
  }
  elsif ( $server_type eq "drac" ) {
    $power_status = power_on_drac($admin_addr);
  }
  if ( $power_status eq "on" ) {
    print "Power of $real_host was successfully turned on.\n";
    return 0;
  }
  elsif ( $power_status ne "off" ) {
    return $COMMAND_NOT_SUPPORTED;
  }

  my $retry_count = 0;

  while ( $power_status ne "on" && $retry_count < $max_retries ) {
    $power_status = get_power_status( $admin_addr, $server_type );
    last if ( $power_status eq "on" );
    print
"Waiting until power status becomes 'on'. Current status is $power_status ...\n";
    sleep 3;
    $retry_count++;
  }

  if ( $power_status eq "on" ) {
    print "Power of $real_host was successfully turned on.\n";
    return 0;
  }
  else {
    print "Power of $real_host was not turned on. Check the host for detail.\n";
    return 1;
  }
}

sub status {
  my ( $real_host, $admin_addr, $server_type ) = @_;
  my $power_status = get_power_status( $admin_addr, $server_type );
  print "Current power status on $real_host : $power_status\n";
  if ( $power_status eq "on" ) {
    return 0;
  }
  elsif ( $power_status eq "off" ) {
    return 0;
  }
  else {
    return $COMMAND_NOT_SUPPORTED;
  }
}

# If ssh is reachable and mysqld process does not exist, exit with 2 and
# do not power off. If ssh is not reachable, do power off and exit with 0
# if successful. Otherwise exit with 1.
sub main {

  my ( $command, $ssh_user, $host, $ip, $port, $pid_file, $help );
  GetOptions(
    'command=s'  => \$command,
    'ssh_user=s' => \$ssh_user,
    'host=s'     => \$host,
    'ip=s'       => \$ip,
    'port=i'     => \$port,
    'pid_file=s' => \$pid_file,
    'help'       => \$help,
  );

  if ($help) {
    pod2usage(0);
  }

  pod2usage(1) unless ($command);

  my $rc            = 1;
  my $ssh_stop_fail = 0;

  if ( $command eq "stopssh" || $command eq "stopssh2" ) {
    pod2usage(1) unless ($ssh_user);
    pod2usage(1) unless ($host);
    $rc = stopssh( $ssh_user, $host, $ip, $pid_file );
    if ( $rc == $SSH_STOP_OK ) {
      exit $rc;
    }
    else {
      exit 1 if ( $command eq "stopssh2" );
      $ssh_stop_fail = 1;
    }
  }

  # Get server type (ilo/drac, etc) and administrative IP address.
  my ( $admin_addr, $server_type ) = FIXME_xxx( $host, $ip );
  if ( $command eq "start" ) {
    $rc = start( $host, $admin_addr, $server_type );
  }
  elsif ( $command eq "stop" || $ssh_stop_fail ) {
    $rc = stop( $host, $admin_addr, $server_type );
  }
  elsif ( $command eq "status" ) {
    $rc = status( $host, $admin_addr, $server_type );
  }
  else {
    pod2usage(1);
  }

  # Do other way to stop host
  if ( $rc == $COMMAND_NOT_SUPPORTED ) {
    $rc = FIXME_xxx( $command, $host, $ip );
  }

  if ( $rc == 0 ) {
    exit 0;
  }
  else {
    exit 1;
  }
}

#############################################################################

=head1 NAME

Main purpose of this command is node fencing so that split brain never happens.

=head1 SYNOPSIS

# power off

power_manager --command=stop --host=master_server

# killing mysqld and mysqld_safe at first. If not successful, forcing power off

power_manager --command=stopssh --host=master_server --ssh_user=root

# killing mysqld and mysqld_safe. If not successful, just exit.

power_manager --command=stopssh2 --host=master_server --ssh_user=root

# killing mysqld with specified pid file. This is useful when you run multiple MySQL instances and want to stop only specified instance

power_manager --command=stopssh --host=master_server --ssh_user=root --pid_file=/var/lib/mysql/mysqld.pid

# power on

power_manager --command=start --host=master_server

# checking power status

power_manager --command=status --host=master_server
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 201,552评论 5 474
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 84,666评论 2 377
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 148,519评论 0 334
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 54,180评论 1 272
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 63,205评论 5 363
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,344评论 1 281
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 37,781评论 3 393
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,449评论 0 256
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 40,635评论 1 295
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,467评论 2 317
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,515评论 1 329
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 33,217评论 3 318
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 38,775评论 3 303
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 29,851评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 31,084评论 1 258
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 42,637评论 2 348
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 42,204评论 2 341

推荐阅读更多精彩内容