playing with redis sentinels

Redis is an advanced key-value store which is pretty simple to install and use. Using multiple redis instances, you can have master-slave replication without breaking any sweat. Bringing it somewhat further, you can use redis sentinels to monitor your master and have a nifty failover system for high-availability.

Compiling redis

# install required packages
yum install gcc make tcl

# download redis tarball
curl -O http://download.redis.io/releases/redis-2.8.6.tar.gz

# extract tarball
tar xvzf redis-2.8.6.tar.gz

# change directory to extracted directory
cd redis-2.8.6

# compile redis
make

# execute tests (requires tcl)
make test

Starting with one master redis server

# redis_01.conf
daemonize yes
pidfile /var/run/redis_6379.pid
port 6379
tcp-backlog 511
timeout 0
tcp-keepalive 0
loglevel notice
logfile "./redis_6379.log"
databases 16
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump_6379.rdb
dir ./
slave-serve-stale-data yes
slave-read-only yes
repl-disable-tcp-nodelay no
slave-priority 100
appendonly no
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
lua-time-limit 5000
slowlog-log-slower-than 10000
slowlog-max-len 128
notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-entries 512
list-max-ziplist-value 64
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
activerehashing yes
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
hz 10
aof-rewrite-incremental-fsync yes

# start first redis-server
src/redis-server redis_01.conf

Adding a slave redis server

# redis_02.conf
daemonize yes
pidfile /var/run/redis_6380.pid
port 6380
tcp-backlog 511
timeout 0
tcp-keepalive 0
loglevel notice
logfile "./redis_6380.log"
databases 16
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump_6380.rdb
dir ./
slaveof 127.0.0.1 6379
slave-serve-stale-data yes
slave-read-only yes
repl-disable-tcp-nodelay no
slave-priority 100
appendonly no
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
lua-time-limit 5000
slowlog-log-slower-than 10000
slowlog-max-len 128
notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-entries 512
list-max-ziplist-value 64
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
activerehashing yes
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
hz 10
aof-rewrite-incremental-fsync yes

# start second redis-server
src/redis-server redis_02.conf

Starting up first sentinel to monitor the redis master

# sentinel_01.conf
port 26379
daemonize yes
logfile ./sentinel_26379.log
sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 30000
sentinel parallel-syncs mymaster 1
sentinel failover-timeout mymaster 180000

# start first sentinel
src/redis-sentinel sentinel_01.conf

Adding another sentinel

# sentinel_02.conf
port 26380
daemonize yes
logfile ./sentinel_26380.log
sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 30000
sentinel parallel-syncs mymaster 1
sentinel failover-timeout mymaster 180000

# start second sentinel
src/redis-sentinel sentinel_02.conf

At this point, we already have a master-slave setup with failover/failback capability. Now let’s do some tests.

# connect to first sentinel
src/redis-client -p 26379

# query sentinel for masters
127.0.0.1:26379> sentinel masters

# sample result (note the port of the master, 6379)
1)  1) "name"
    2) "mymaster"
    3) "ip"
    4) "127.0.0.1"
    5) "port"
    6) "6379"
    7) "runid"
    8) "3c05872b72f10969855d580868ad883caf6e6a4b"
    9) "flags"
   10) "master"
   11) "pending-commands"
   12) "0"
   13) "last-ok-ping-reply"
   14) "260"
   15) "last-ping-reply"
   16) "260"
   17) "info-refresh"
   18) "5509"
   19) "role-reported"
   20) "master"
   21) "role-reported-time"
   22) "236203"
   23) "config-epoch"
   24) "0"
   25) "num-slaves"
   26) "1"
   27) "num-other-sentinels"
   28) "1"
   29) "quorum"
   30) "2"
   31) "down-after-milliseconds"
   32) "30000"
   33) "failover-timeout"
   34) "180000"
   35) "parallel-syncs"
   36) "1"

# query sentinel for attached slaves
127.0.0.1:26379> sentinel slaves mymaster

# sample result (note the port of the slave, 6380)
1)  1) "name"
    2) "127.0.0.1:6380"
    3) "ip"
    4) "127.0.0.1"
    5) "port"
    6) "6380"
    7) "runid"
    8) "3a929c0e4f84a92e4998691699449ba0c23a641a"
    9) "flags"
   10) "slave"
   11) "pending-commands"
   12) "0"
   13) "last-ok-ping-reply"
   14) "958"
   15) "last-ping-reply"
   16) "958"
   17) "info-refresh"
   18) "4496"
   19) "role-reported"
   20) "slave"
   21) "role-reported-time"
   22) "325864"
   23) "master-link-down-time"
   24) "0"
   25) "master-link-status"
   26) "ok"
   27) "master-host"
   28) "127.0.0.1"
   29) "master-port"
   30) "6379"
   31) "slave-priority"
   32) "100"
   33) "slave-repl-offset"
   34) "41492"

# force a failover
127.0.0.1:26379> sentinel failover mymaster

# sample result
OK

# query sentinel for masters after failover
127.0.0.1:26379> sentinel masters

# sample result (note the new port, 6380 - slave has been promoted)
1)  1) "name"
    2) "mymaster"
    3) "ip"
    4) "127.0.0.1"
    5) "port"
    6) "6380"
    7) "runid"
    8) "3a929c0e4f84a92e4998691699449ba0c23a641a"
    9) "flags"
   10) "master"
   11) "pending-commands"
   12) "0"
   13) "last-ok-ping-reply"
   14) "591"
   15) "last-ping-reply"
   16) "591"
   17) "info-refresh"
   18) "5628"
   19) "role-reported"
   20) "master"
   21) "role-reported-time"
   22) "13084"
   23) "config-epoch"
   24) "1"
   25) "num-slaves"
   26) "1"
   27) "num-other-sentinels"
   28) "1"
   29) "quorum"
   30) "2"
   31) "down-after-milliseconds"
   32) "30000"
   33) "failover-timeout"
   34) "180000"
   35) "parallel-syncs"
   36) "1"

# query sentinel for slaves after failover
127.0.0.1:26379> sentinel slaves mymaster

# sample result (note port of new slave, 6379 - original master is now slave)
1)  1) "name"
    2) "127.0.0.1:6379"
    3) "ip"
    4) "127.0.0.1"
    5) "port"
    6) "6379"
    7) "runid"
    8) "3c05872b72f10969855d580868ad883caf6e6a4b"
    9) "flags"
   10) "slave"
   11) "pending-commands"
   12) "0"
   13) "last-ok-ping-reply"
   14) "565"
   15) "last-ping-reply"
   16) "565"
   17) "info-refresh"
   18) "4356"
   19) "role-reported"
   20) "slave"
   21) "role-reported-time"
   22) "14366"
   23) "master-link-down-time"
   24) "0"
   25) "master-link-status"
   26) "ok"
   27) "master-host"
   28) "127.0.0.1"
   29) "master-port"
   30) "6380"
   31) "slave-priority"
   32) "100"
   33) "slave-repl-offset"
   34) "59800"

Leave a Comment

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

This site uses Akismet to reduce spam. Learn how your comment data is processed.