近期在linux上搭建了用于分布式存储的----GlusterFS和Ceph这两个开源的分布式文件系统。
前言----大家可以去github上搜索一下,看源码或者官方文档介绍,更多的去了解GlusterFS以及Ceph,在这里我就不一一的去介绍原理以及抽象技术层面的基础知识。下面我就搭建部署过程中遇到的问题,向大家做一个介绍及部署过程的详细流程。同时,也希望研究或者喜好这方面的同学,带有生产环境成熟方案或者意见的,也请在这里互相讨论,我们一起共同进步,感兴趣的同学可以加我微信(请备注请求与来源),互相沟通交流。
下面开始GlusterFS的详细介绍:
我的硬件配置:
CPU 4核,8G内存,Centos 7.1 内核版本 3.10.0 64位 GlusterFS 版本:3.10.2,挂载3块:其中一块500M用于系统,另外两块用于数据存储 40G磁盘,将数据分散在2台服务器,同时文件存储2份,那么需要4台服务器,允许损坏一台服务器而数据不丢失。
下载yum源:yum -y install centos-release-gluster
安装server:
[root@vm-10-58-57-104 glusterFS]# yum install glusterfs-server
--安装所需的其余包
[root@vm-10-58-57-105 /]# yum install -y glusterfs glusterfs-fuse xfsprogs
---配置开机启动
[root@vm-10-58-57-105 /]# cd /bin
[root@vm-10-58-57-105 bin]# systemctl enable glusterd.service
[root@vm-10-58-57-105 bin]# systemctl start glusterd
[root@vm-10-58-57-105 bin]# ps -ef | grep gluster
root 9460 1 0 13:34 ? 00:00:00 /usr/sbin/glusterd -p /var/run/glusterd.pid --log-level INFO
root 9476 22277 0 13:34 pts/0 00:00:00 grep --color=auto gluster
--设置集群节点
[root@vm-10-58-57-104 glusterFS]# gluster peer probe 10.58.57.104
peer probe: success. Probe on localhost not needed
[root@vm-10-58-57-104 glusterFS]# gluster peer probe 10.58.57
peer probe: failed: Probe returned with Transport endpoint is not connected
[root@vm-10-58-57-104 glusterFS]# gluster peer probe 10.58.57.105
peer probe: success.
[root@vm-10-58-57-104 glusterFS]# gluster peer probe 10.58.57.100
peer probe: success.
[root@vm-10-58-57-104 glusterFS]# gluster peer status
Number of Peers: 2
Hostname: 10.58.57.105
Uuid: e9b2a1da-c50e-45e1-8d1c-02bfee4b0920
State: Peer in Cluster (Connected)
Hostname: 10.58.57.100
Uuid: 2a067d8f-b1b0-43f7-b532-0c1a29a0c60c
State: Peer in Cluster (Connected)
--创建分布式副本条带卷
[root@vm-10-58-57-104 glusterFS]# gluster volume create sr22 stripe 2 replica 2 transport tcp 10.58.57.104:/app/glusterFS/ 10.58.57.102:/app/glusterFS/ 10.58.57.105:/app/glusterFS/ 10.58.57.100:/app/glusterFS/
volume create: sr22: success: please start the volume to access data
[root@vm-10-58-57-104 glusterFS]# gluster volume info
Volume Name: sr22
Type: Striped-Replicate
Volume ID: 9095bcf1-256a-4c6c-aa16-1f6d2e664ed7
Status: Created
Snapshot Count: 0
Number of Bricks: 1 x 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 10.58.57.104:/app/glusterFS
Brick2: 10.58.57.102:/app/glusterFS
Brick3: 10.58.57.105:/app/glusterFS
Brick4: 10.58.57.100:/app/glusterFS
Options Reconfigured:
transport.address-family: inet
nfs.disable: on
[root@vm-10-58-57-104 glusterFS]# gluster volume info
Volume Name: sr22
Type: Striped-Replicate
Volume ID: 9095bcf1-256a-4c6c-aa16-1f6d2e664ed7
Status: Created
Snapshot Count: 0
Number of Bricks: 1 x 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 10.58.57.104:/app/glusterFS
Brick2: 10.58.57.102:/app/glusterFS
Brick3: 10.58.57.105:/app/glusterFS
Brick4: 10.58.57.100:/app/glusterFS
Options Reconfigured:
transport.address-family: inet
nfs.disable: on
[root@vm-10-58-57-104 glusterFS]# gluster volume status
Volume sr22 is not started
[root@vm-10-58-57-104 glusterFS]# gluster volume start sr22
volume start: sr22: success
[root@vm-10-58-57-104 glusterFS]# gluster volume status
Status of volume: sr22
Gluster process TCP Port RDMA Port Online Pid
------------------------------------------------------------------------------
Brick 10.58.57.104:/app/glusterFS 49152 0 Y 26251
Brick 10.58.57.102:/app/glusterFS 49152 0 Y 23797
Brick 10.58.57.105:/app/glusterFS 49152 0 Y 19907
Brick 10.58.57.100:/app/glusterFS 49152 0 Y 6879
Self-heal Daemon on localhost N/A N/A Y 26271
Self-heal Daemon on 10.58.57.100 N/A N/A Y 6899
Self-heal Daemon on 10.58.57.105 N/A N/A Y 19927
Self-heal Daemon on 10.58.57.102 N/A N/A Y 23817
Task Status of Volume sr22
------------------------------------------------------------------------------
There are no active volume tasks
----创建glusterFS-Client 客户端
[root@vm-10-58-57-104 app]# mkdir gs-client
[root@vm-10-58-57-104 app]# cd /bin
[root@vm-10-58-57-104 bin]# mount -t glusterfs 10.58.57.104:/sr22 /app/gs-client/
[root@vm-10-58-57-104 bin]# cd /app/gs-client
[root@vm-10-58-57-104 gs-client]# df -h;
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/centos-root 32G 1.9G 30G 6% /
devtmpfs 3.9G 0 3.9G 0% /dev
tmpfs 3.9G 0 3.9G 0% /dev/shm
tmpfs 3.9G 385M 3.6G 10% /run
tmpfs 3.9G 0 3.9G 0% /sys/fs/cgroup
/dev/mapper/centos-app 40G 7.3G 33G 19% /app
/dev/vda1 509M 120M 389M 24% /boot
tmpfs 799M 0 799M 0% /run/user/2014
tmpfs 799M 0 799M 0% /run/user/0
10.58.57.104:/sr22 80G 14G 67G 17% /app/gs-client
---查看系统配额及查看每个卷中的brick的io信息
[root@vm-10-58-57-104 app]# gluster volume quota sr22 list
quota command failed : Quota is disabled, please enable quota
[root@vm-10-58-57-104 app]# gluster volume profile sr22 start
Starting volume profile on sr22 has been successful
[root@vm-10-58-57-104 app]# gluster volume profile sr22 info
Brick: 10.58.57.104:/app/glusterFS
----------------------------------
Cumulative Stats:
%-latency Avg-latency Min-Latency Max-Latency No. of calls Fop
--------- ----------- ----------- ----------- ------------ ----
0.00 0.00 us 0.00 us 0.00 us 24 RELEASEDIR
Duration: 4158 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Interval 0 Stats:
%-latency Avg-latency Min-Latency Max-Latency No. of calls Fop
--------- ----------- ----------- ----------- ------------ ----
0.00 0.00 us 0.00 us 0.00 us 24 RELEASEDIR
Duration: 4158 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Brick: 10.58.57.100:/app/glusterFS
----------------------------------
Cumulative Stats:
%-latency Avg-latency Min-Latency Max-Latency No. of calls Fop
--------- ----------- ----------- ----------- ------------ ----
0.00 0.00 us 0.00 us 0.00 us 24 RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Interval 0 Stats:
%-latency Avg-latency Min-Latency Max-Latency No. of calls Fop
--------- ----------- ----------- ----------- ------------ ----
0.00 0.00 us 0.00 us 0.00 us 24 RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Brick: 10.58.57.102:/app/glusterFS
----------------------------------
Cumulative Stats:
%-latency Avg-latency Min-Latency Max-Latency No. of calls Fop
--------- ----------- ----------- ----------- ------------ ----
0.00 0.00 us 0.00 us 0.00 us 24 RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Interval 0 Stats:
%-latency Avg-latency Min-Latency Max-Latency No. of calls Fop
--------- ----------- ----------- ----------- ------------ ----
0.00 0.00 us 0.00 us 0.00 us 24 RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Brick: 10.58.57.105:/app/glusterFS
----------------------------------
Cumulative Stats:
%-latency Avg-latency Min-Latency Max-Latency No. of calls Fop
--------- ----------- ----------- ----------- ------------ ----
0.00 0.00 us 0.00 us 0.00 us 24 RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Interval 0 Stats:
%-latency Avg-latency Min-Latency Max-Latency No. of calls Fop
--------- ----------- ----------- ----------- ------------ ----
0.00 0.00 us 0.00 us 0.00 us 24 RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
[root@vm-10-58-57-104 app]# gluster volume profile sr22 stop
Stopping volume profile on sr22 has been successful
[root@vm-10-58-57-104 app]# gluster volume profile sr22 info
Profile on Volume sr22 is not started
[root@vm-10-58-57-104 app]#
-------接下来使用iozone工具进行测试----------
-----安装iozone测试工具
摘要: iozone是一个非常专业的文件系统性能测试开源软件,用法和介绍可以参考如下: http://www.iozone.org/ http://www.iozone.org/docs/IOzone_msword_98.
iozone是一个非常专业的文件系统性能测试开源软件,用法和介绍可以参考如下:
http://www.iozone.org/
http://www.iozone.org/docs/IOzone_msword_98.pdf
使用源码安装,步骤如下:
下载最新的稳定版源码:
http://www.iozone.org/src/current/
[root@vm-10-58-57-104 tools]# rpm -ivh iozone-3-465.src.rpm
Updating / installing...
1:iozone-3-465 ################################# [100%]
warning: user capps does not exist - using root
warning: group capps does not exist - using root
[root@vm-10-58-57-104 app]#cd ~/rpmbuild/SOURCES
[root@vm-10-58-57-104 SOURCES]# tar -xvf iozone3_465.tar
最终我们cd到src里面的current这个目录下
[root@vm-10-58-57-104 current]# less makefile
[root@vm-10-58-57-104 current]# make lib
libasync.c libbif.c
[root@vm-10-58-57-104 current]# make linux-AMD64
Building iozone for Linux-AMD64cc -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DNAME='"linux-AMD64"' \-D__AMD64__ -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE \-DHAVE_PREAD iozone.c -o iozone_linux-AMD64.occ -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \-DSHARED_MEM -Dlinux libbif.c -o libbif.occ -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \-D_LARGEFILE64_SOURCE libasync.c -o libasync.oBuilding fileop for Linux-AMD64cc -Wall -c -O3 fileop.c -o fileop_linux-AMD64.oBuilding the pit_servercc -c pit_server.c -o pit_server.occ -O3 iozone_linux-AMD64.o libbif.o libasync.o \-lrt -lpthread -o iozonecc -O3 -Dlinux fileop_linux-AMD64.o -o fileopcc -O3 -Dlinux pit_server.o -o pit_server[root@vm-10-58-57-104 current]#[root@vm-10-58-57-104 current]# ./fileop -h -------------------------------------- | Fileop | | $Revision: 1.61 $ | | | | by | | | | Don Capps | -------------------------------------- fileop [-f X ]|[-l # -u #] [-s Y] [-e] [-b] [-w] [-d] [-t] [-v] [-h] -f # Force factor. X^3 files will be created and removed. -l # Lower limit on the value of the Force factor. -u # Upper limit on the value of the Force factor. -s # Optional. Sets filesize for the create/write. May use suffix 'K' or 'M'. -e Excel importable format. -b Output best case results. -i # Increment force factor by this increment. -w Output worst case results. -dSpecify starting directory. -UMount point to remount between tests.
-t Verbose output option.
-v Version information.
-h Help text.
The structure of the file tree is:
X number of Level 1 directories, with X number of
level 2 directories, with X number of files in each
of the level 2 directories.
Example: fileop 2
dir_1 dir_2
/ \ / \
sdir_1 sdir_2 sdir_1 sdir_2
/ \ / \ / \ / \
file_1 file_2 file_1 file_2 file_1 file_2 file_1 file_2
Each file will be created, and then Y bytes is written to the file.
[root@vm-10-58-57-104 current]# ./pit_server -h
Usage: pit_server [-v] -p service
[root@vm-10-58-57-104 current]# ./iozone -h
iozone: help mode
Usage: iozone [-s filesize_kB] [-r record_size_kB] [-f [path]filename] [-h]
[-i test] [-E] [-p] [-a] [-A] [-z] [-Z] [-m] [-M] [-t children]
[-l min_number_procs] [-u max_number_procs] [-v] [-R] [-x] [-o]
[-d microseconds] [-F path1 path2...] [-V pattern] [-j stride]
[-T] [-C] [-B] [-D] [-G] [-I] [-H depth] [-k depth] [-U mount_point]
[-S cache_size] [-O] [-L cacheline_size] [-K] [-g maxfilesize_kB]
[-n minfilesize_kB] [-N] [-Q] [-P start_cpu] [-e] [-c] [-b Excel.xls]
[-J milliseconds] [-X write_telemetry_filename] [-w] [-W]
[-Y read_telemetry_filename] [-y minrecsize_kB] [-q maxrecsize_kB]
[-+u] [-+m cluster_filename] [-+d] [-+x multiplier] [-+p # ]
[-+r] [-+t] [-+X] [-+Z] [-+w percent dedupable] [-+y percent_interior_dedup]
[-+C percent_dedup_within]
-a Auto mode
-A Auto2 mode
-b Filename Create Excel worksheet file
-B Use mmap() files
-c Include close in the timing calculations
-C Show bytes transferred by each child in throughput testing
-d # Microsecond delay out of barrier
-D Use msync(MS_ASYNC) on mmap files
-e Include flush (fsync,fflush) in the timing calculations
-E Run extension tests
-f filename to use
-F filenames for each process/thread in throughput test
-g # Set maximum file size (in kBytes) for auto mode (or #m or #g)
-G Use msync(MS_SYNC) on mmap files
-h help
-H # Use POSIX async I/O with # async operations
-i # Test to run (0=write/rewrite, 1=read/re-read, 2=random-read/write
3=Read-backwards, 4=Re-write-record, 5=stride-read, 6=fwrite/re-fwrite
7=fread/Re-fread, 8=random_mix, 9=pwrite/Re-pwrite, 10=pread/Re-pread
11=pwritev/Re-pwritev, 12=preadv/Re-preadv)
-I Use VxFS VX_DIRECT, O_DIRECT,or O_DIRECTIO for all file operations
-j # Set stride of file accesses to (# * record size)
-J # milliseconds of compute cycle before each I/O operation
-k # Use POSIX async I/O (no bcopy) with # async operations
-K Create jitter in the access pattern for readers
-l # Lower limit on number of processes to run
-L # Set processor cache line size to value (in bytes)
-m Use multiple buffers
-M Report uname -a output
-n # Set minimum file size (in kBytes) for auto mode (or #m or #g)
-N Report results in microseconds per operation
-o Writes are synch (O_SYNC)
-O Give results in ops/sec.
-p Purge on
-P # Bind processes/threads to processors, starting with this cpu
-q # Set maximum record size (in kBytes) for auto mode (or #m or #g)
-Q Create offset/latency files
-r # record size in Kb
or -r #k .. size in kB
or -r #m .. size in MB
or -r #g .. size in GB
-R Generate Excel report
-s # file size in Kb
or -s #k .. size in kB
or -s #m .. size in MB
or -s #g .. size in GB
-S # Set processor cache size to value (in kBytes)
-t # Number of threads or processes to use in throughput test
-T Use POSIX pthreads for throughput tests
-u # Upper limit on number of processes to run
-U Mount point to remount between tests
-v version information
-V # Verify data pattern write/read
-w Do not unlink temporary file
-W Lock file when reading or writing
-x Turn off stone-walling
-X filename Write telemetry file. Contains lines with (offset reclen compute_time) in ascii
-y # Set minimum record size (in kBytes) for auto mode (or #m or #g)
-Y filename Read telemetry file. Contains lines with (offset reclen compute_time) in ascii
-z Used in conjunction with -a to test all possible record sizes
-Z Enable mixing of mmap I/O and file I/O
-+b #,# burst size (KB),sleep between burst (mili-second)
-+E Use existing non-Iozone file for read-only testing
-+F Truncate file before write in thread_mix_test
-+J Include think time (-j #) in throughput calculation
-+K Sony special. Manual control of test 8.
-+m Cluster_filename Enable Cluster testing
-+d File I/O diagnostic mode. (To troubleshoot a broken file I/O subsystem)
-+u Enable CPU utilization output (Experimental)
-+x # Multiplier to use for incrementing file and record sizes
-+p # Percentage of mix to be reads
-+r Enable O_RSYNC|O_SYNC for all testing.
-+t Enable network performance test. Requires -+m
-+n No retests selected.
-+k Use constant aggregate data set size.
-+q Delay in seconds between tests.
-+l Enable record locking mode.
-+L Enable record locking mode, with shared file.
-+B Sequential mixed workload.
-+D Enable O_DSYNC mode.
-+A # Enable madvise. 0 = normal, 1=random, 2=sequential
3=dontneed, 4=willneed
-+N Do not truncate existing files on sequential writes.
-+S # Dedup-able data is limited to sharing within each numerically
identified file set.
-+W # Add this value to the child thread ID, so that additional files
can be added while maintaining the proper dedupability with previously
existing files that are within the same seed group (-+S).
-+V Enable shared file. No locking.
-+X Enable short circuit mode for filesystem testing ONLY
ALL Results are NOT valid in this mode.
-+Z Enable old data set compatibility mode. WARNING.. Published
hacks may invalidate these results and generate bogus, high
values for results.
-+w ## Percent of dedup-able data in buffers.
-+y ## Percent of dedup-able within & across files in buffers.
-+C ## Percent of dedup-able within & not across files in buffers.
-+H Hostname Hostname of the PIT server.
-+P Service Service of the PIT server.
-+z Enable latency histogram logging.
--------------------------------------------------------------------------------------------------------------------------------
iozone是一种性能测试工具,可以在找一些资料直接了解如何测试,也可联系我,我们互相沟通