perrynzhou

专注于系统组件研发

0%

进程间实时通信模块设计和实现

介绍

  • 基于设备文件作为数据传输介质,一切皆文件
  • 设备文件必须可以读可以写
  • 通过linux kernel的file_operations来实现设备的读写

原理

  • 设计一个内核模块,客户端A写数据客户端B,客户端A到某个字符设备,然后通过设计的内核模块主动推送客户端B,这样就能达到进程间实时通信的效果.
  • 设备文件也是文件,通过绑定内核的file_operations的函数实现设备的open/release/read/write/mmap等接口,客户端当open/read/write时候会对应调用内核模块的open/read/write函数

实现

  • 进程A写数据
1
echo "aaaa" > /dev/memchan
  • 进程B接收数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/*************************************************************************
> File Name: mem_channel_reader.c
> Author:perrynzhou
> Mail:perrynzhou@gmail.com
> Created Time: Fri 26 Jun 2020 01:35:15 PM CST
************************************************************************/

#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>

#define BUFFER_LENGTH 128
static const char *device_name = "/dev/memchan";
int main(void)
{

int fd = open(device_name, O_RDWR);
if (fd < 0)
{
fprintf(stdout, "%s,err:%s\n", device_name, strerror(errno));
return -1;
}
char *buffer = (char *)malloc(BUFFER_LENGTH);
memset(buffer, 0, BUFFER_LENGTH);

char *start = mmap(NULL, BUFFER_LENGTH, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);

fd_set rds;

FD_ZERO(&rds);
FD_SET(fd, &rds);

while (1)
{
int ret = select(fd + 1, &rds, NULL, NULL, NULL);
if (ret < 0)
{
printf("select error\n");
exit(1);
}
if (FD_ISSET(fd, &rds))
{
#if 0
strcpy(buffer, start);
printf("ntychannel: %s\n", buffer);
#else
read(fd, buffer, BUFFER_LENGTH);
printf("channel: %s\n", buffer);
#endif
}
}

munmap(start, BUFFER_LENGTH);
free(buffer);
close(fd);

return 0;
}
  • 内核模块
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*************************************************************************
> File Name: mem_channel.c
> Author:perrynzhou
> Mail:perrynzhou@gmail.com
> Created Time: Fri 26 Jun 2020 12:59:42 PM CST
************************************************************************/

#include <linux/module.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/cdev.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#include <linux/slab.h>
#include <linux/poll.h>

// first device id in kernel
#ifndef MEM_CHANNEL_MAJOR
#define MEM_CHANNEL_MAJOR 110
#endif

//second device id in kernel
#ifndef MEM_CHANNEL_MINOR
#define MEM_CHANNEL_MINOR 2
#endif

#ifndef MEM_CHANNEL_DATA_LENGTH
#define MEM_CHANNEL_DATA_LENGTH 4096
#endif

#define ENABLE_POLL 1
#if ENABLE_POLL
uint8_t is_have_data = 0;
#endif
static const char *mem_channel_device = "memchan";
static struct cdev mem_channel_dev;
struct mem_channel
{
char *data;
size_t size;
#if ENABLE_POLL
wait_queue_head_t queue;
#endif
};
static struct mem_channel *chan;
int mem_channel_open(struct inode *node, struct file *pfile)
{
struct mem_channel *mem = NULL;
int num = node->i_rdev;
if (num > MEM_CHANNEL_MINOR || num == 0)
{
return -ENODEV;
}
mem = &chan[num];
pfile->private_data = mem;
printk(KERN_INFO "mem_channel_open %d device success\n", num);
return 0;
}
int mem_channel_release(struct inode *node, struct file *pfile)
{
return 0;
}
ssize_t mem_channel_read(struct file *pfile, char __user *buffer, size_t size, loff_t *ppos)
{
int ret;
unsigned long p = *ppos;
unsigned int count = size;
struct mem_channel *mem = pfile->private_data;
if (p > MEM_CHANNEL_DATA_LENGTH)
{
return 0;
}
if (count > (MEM_CHANNEL_DATA_LENGTH - p))
{
count = MEM_CHANNEL_DATA_LENGTH - p;
}
#if ENABLE_POLL
while (!is_have_data)
{
if (pfile->f_flags & O_NONBLOCK)
{
return -EAGAIN;
}
wait_event_interruptible(mem->queue, is_have_data);
}
#endif
if (copy_to_user(buffer, (void *)(mem->data + p), count))
{
ret = -EFAULT;
}
else
{
ret = strlen(buffer);
mem->size -= ret;
}
printk(KERN_INFO "read %d bytes from %ld\n", ret, p);
return ret;
}
ssize_t mem_channel_write(struct file *pfile, const char __user *buffer, size_t size, loff_t *ppos)
{
int ret;
unsigned long p = *ppos;
unsigned int count = size;
struct mem_channel *mem = (struct mem_channel *)pfile->private_data;
if (p > MEM_CHANNEL_DATA_LENGTH)
{
return 0;
}
if (count > (MEM_CHANNEL_DATA_LENGTH - p))
{
count = MEM_CHANNEL_DATA_LENGTH - p;
}
if (copy_from_user((void *)(mem->data + p), buffer, count))
{
ret = -EFAULT;
}
else
{
*ppos += count;
ret = count;
mem->size += count;
*(mem->data + p + count) = '\0';
printk(KERN_INFO "write %d bytes from %ld\n", count, p);
}
#if ENABLE_POLL
is_have_data = 1;
wake_up(&mem->queue);
#endif
return ret;
}

#if ENABLE_POLL
unsigned int mem_channel_poll(struct file *pfile, struct poll_table_struct *wait)
{

struct mem_channel *mem = pfile->private_data;
unsigned int mask = 0;

poll_wait(pfile, &mem->queue, wait);

if (is_have_data)
{
mask |= (POLLIN | POLLRDNORM);
}

return mask;
}
#endif
int mem_channel_mmap(struct file *pfile, struct vm_area_struct *vma)
{
struct mem_channel *mem = pfile->private_data;

vma->vm_flags |= VM_IO;
vma->vm_flags |= (VM_DONTEXPAND | VM_DONTDUMP);

if (remap_pfn_range(vma, vma->vm_start, virt_to_phys(mem->data) >> PAGE_SHIFT,
vma->vm_end - vma->vm_start, vma->vm_page_prot))
{
return -EAGAIN;
}
return 0;
}
static const struct file_operations fops = {
.owner = THIS_MODULE,
.open = mem_channel_open,
.release = mem_channel_release,
.read = mem_channel_read,
.write = mem_channel_write,
.poll = mem_channel_poll,
.mmap = mem_channel_mmap,
};
//when execute insmod xx.ko,mem_channel_init will be called
static int mem_channel_init(void)
{
int result;
int i = 0;
//init first device no
dev_t devno = MKDEV(MEM_CHANNEL_MAJOR, 0);
//init minor device array for first device
result = register_chrdev_region(devno, MEM_CHANNEL_MINOR, mem_channel_device);
if (result != 0)
{
return result;
}
cdev_init(&mem_channel_dev, &fops);
mem_channel_dev.owner = THIS_MODULE;
//add device to kernel device list,that save data
cdev_add(&mem_channel_dev, devno, MEM_CHANNEL_MINOR);
chan = kmalloc(MEM_CHANNEL_MINOR * sizeof(struct mem_channel), GFP_KERNEL);
if (chan == NULL)
{
result = -ENOMEM;
goto failed;
}
memset(chan, 0, sizeof(struct mem_channel) * MEM_CHANNEL_MINOR);
for (; i < MEM_CHANNEL_MINOR; i++)
{
chan[i].size = MEM_CHANNEL_DATA_LENGTH;
chan[i].data = kmalloc(MEM_CHANNEL_DATA_LENGTH * sizeof(char), GFP_KERNEL);
if (chan[i].data == NULL)
{
goto failed;
}
memset(chan[i].data, 0, MEM_CHANNEL_DATA_LENGTH);
}
printk(KERN_INFO "mem_channel init success\n");
return 0;
failed:
unregister_chrdev_region(devno, MEM_CHANNEL_MINOR);
for (i = 0; i < MEM_CHANNEL_MINOR; i++)
{
if (chan[i].data != NULL)
{
kfree(chan[i].data);
chan[i].data = NULL;
}
chan[i].size = 0;
}
if (chan != NULL)
{
kfree(chan);
chan = NULL;
}
return result;
}
//when rmmod xx.ko,mem_channel_exit will be called
static void mem_channel_exit(void)
{
int i;
cdev_del(&mem_channel_dev);
for (i = 0; i < MEM_CHANNEL_MINOR; i++)
{

kfree(chan[i].data);
chan[i].data = NULL;
chan[i].size = 0;
}
if (chan != NULL)
{
kfree(chan);
chan = NULL;
}
unregister_chrdev_region(MKDEV(MEM_CHANNEL_MAJOR, 0), MEM_CHANNEL_MINOR);
printk(KERN_INFO "mem_channel_exit succes\n");
}

MODULE_AUTHOR("perrynzhou@gmail.com");
MODULE_LICENSE("GPL");

module_init(mem_channel_init);
module_exit(mem_channel_exit);
  • Makefile
1
2
3
4
5
6
7
8
9
10
11
12
KERNEL_SRC = /usr/src/kernels/3.10.0-1127.8.2.el7.x86_64
obj-m := mem_channel.o
module-objs := mem_channel.o

all:
$(MAKE) -C $(KERNEL_SRC) M=$(PWD) modules
gcc -g -std=gnu99 mem_channel_reader.c -o mem_channel_reader
clean:
rm *.ko *.o
rm -rf mem_channel_reader
rm -rf .mem_channel*
rm -rf modules.order Module.symvers

Kernel About Linux

Kernel 计划

Kernel 架构

epoll中惊群效应解决方法

epoll惊群效应复现

  • 创建5个进程,父进程负责fork所有的子进程,然后等待子进程退出,每个子进程监听监听同一个端口,接受来自客户单请求

  • 客户端连接服务端的同一个端口,服务端只有一个子进程响应该请求

  • strace每个子进程的系统调用,可以发现客户端的请求仅仅只有一个子进程响应,其他的子进程都做了无效的系统调用,这种现象叫做惊群效应,假设在10W个并发链接的情况下,这种无效的系统调用非常影响请求处理的吞吐量和处理性能。子进程10012、10014、10015,出现了“accept(3, 0x7fff66a88dc0, [16]) = -1 EAGAIN (Resource temporarily unavailable)”,接受客户端请求失败,仅仅是10013接受客户端请求成功,并且处理了客户端请求。

解决方法

  • linux 内核中accept惊群问题:linux 内核accept之所以阻塞是因为条件等待,这种现象和线程池的惊群效应是一致的,2.6内核之前是通过广播方式通知,2.6内核之后采用了发送信号量,解决了accept的惊群问题。

  • epoll的惊群问题:

    • 互斥锁:在epoll_wait前后加互斥锁会导致线程或者进程挂起,在高并发的情况下,严重影响请求处理的图屯粮,这种性能损耗基本无法接受。
    • 自旋锁: 在epoll_wait前后加spinlock,spinlock是独占的,虽然不会导致线程切换,但是会导致其他的监听进程或者线程饿死,这种情况也是无法接受的。
    • 原子操作/CAS:惊群问题通用解决方法是通过原子操作,原子操作通过汇编实现。

在使用C/C++开发使用,很多对于mallc和new、free和delete搞不清楚的,这里能简单说下区别

  • 1.malloc和free是库函数;new和delete是C++的关键字
  • 2.malloc是以字节来申请堆内存;new是以类型申请堆空间
  • 3.malloc和free是单纯的申请内存和释放内存
  • 4.new和delete分别负责申请内存调用calss的构造函数初始化和调用析构函数释放资源
  • 5.new申请的内存使用free释放,是不会触发调用析构函数的,一般会造成资源泄露

代码示例

  • 代码
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    #include <new>
    #include <cstdlib>
    #include <iostream>
    using namespace std;
    class T {
    public:
    T(){cout<<"Call T()"<<endl;iv = 100;};
    ~T(){cout<<"Call ~T()"<<endl;}
    int GetIv(){return iv;}
    private:
    int iv;
    };
    void func()
    {
    int *t1 = static_cast<int *>(malloc(sizeof(int)));
    int *t2 = new int(10);//new 可以申请内存同时也可以赋值
    *t1 = 88;//malloc则不行
    cout << "begin :*t1"<<*t1<<",*t2="<<*t2<<endl;
    free(t1);
    delete t2;
    cout << "end :t1"<<t1<<",t2="<<t2<<endl;

    }
    int main(void)
    {
    //func();
    T *t1 = new T();//她会调用构造函数,以类型申请堆空间
    T *t2 = static_cast<T *>(malloc(sizeof(T))); //不会调用构造和析构函数,以字节申请堆空间
    cout<<"t1.iv = "<<t1->GetIv()<<endl;

    delete t2;//触发析构函数,析构函数用于释放资源
    cout<<"t2.iv = "<<t2->GetIv()<<endl;
    free(t1);//如果new申请的空间,用free释放,析构函数是不会调用,会造成资源泄露
    return 0;
    }
  • 运行
1
2
3
4
5
perryn@:/data/source/Cpp/demo:./a.out
Call T()
t1.iv = 100
t2.iv = 0
Call ~T()

nginx 简单介绍

什么是nginx?

  • nginx是用纯C开发的一套web服务器软件,运行于用户态度
  • nginx 是一款web服务器,类似的产品还有apache

    nginx 能做什么?

  • 反向代理:后端服务器被代理的过程叫反向代理过程,代理后端服务器的节点叫做反向代理节点。举个例子,现在我们业务搭建了4个节点的服务集群,这4个节点是对等的,业务的一次请求不会请求4个节点的IP来完成任务,对外提供的只有一个公网IP,公网IP是请求流量的入口,这个时候就需要另外一个节点来代理内网中的4个节点集群服务,用户请求流量都是通过这个公网IP的节点进入,用nginx来做方向代理,把请求流量转发到服务集群中。
  • 正向代理: 客户端被代理的过程叫正向代理,比如我们使用企业的VPN,安装VPN后,浏览器请求google,这个时候可以使用nginx来代理你的客户端来请求google的服务。
  • 负载均衡:在反向代理中,每个被代理服务器可以设置不同的权重,用户请求到反向代理服务节点上会根据权重,把请求下发到不同权重的后端服务器上。比如每个服务节点的配置不同,为了达到配置比较好的节点能处理更多流量,配置低的节点处理少量流量的目的,我们使用nginx来为每个被代理节点设置权重来达到负载均衡。

nginx 应用场景有哪些?

  • 静态资源:访问的资源和nginx部署的节点是在同一个服务器上。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// 路径为:/home/perrynzhou/images下面有2张png图片
//修改配置文件nginx.conf
server {
listen 80;
server_name localhost;

#charset koi8-r;

#access_log logs/host.access.log main;

location / {
root /home/perrynzhou/images/;
index index.html index.htm;
}
// nginx -s reload
// http://172.25.14.71/2.png 即可访问图片
  • API服务:代理后端API服务,比如代理redis api服务
  • 反向代理: 代理后端服务器
  • 缓存加速:nginx能否缓存请求的资源
  • 负载均衡: nginx按照权重设置代理服务或者节点的优先级,按照优先级转发请求的流量

nginx的同类产品分析

  • F5:硬件F5负载均衡器工作在数据链路层,基于mac地址的负载均衡。
  • lvs: 软件层的负载均衡,工作在网络层(IP层),基于IP地址的负载均衡。
  • haproxy:工作在传输层,基于tcp/ip的聚在均衡器
  • nginx:工作在应用层的HTTP协议,基于http的聚在负载均衡器。

nginx 进程结构

  • nginx进程结构分为master和worker进程,master进程和worker进程是1对多的关系。

  • master进程:主要用来管理worker进程,接收来自外界的信号,向各个worker进程发送信号,监控worker进程的运行状态当worker进程退出后,会自动情动新的worker进程,master进程扮演用户和worker进程的交互接口角色,同时对进程进行监护,他不需要处理网络事件,不负责业务执行,只会通过worker进程来实现重启服务,平滑升级,更换日志文件,配置文件生效等功能

  • worker进程 : worker进程相互之间隔离和对等的,具有相同的几率去处理请求,所有的worker进程都是从master进程fork出来的,所有worker进程的listenfd会在新连接到来时变得可读,为保证只有一个进程处理该连接,所有worker进程在注册listenfd读事件前抢accept_mutex,抢到互斥锁的那个进程注册listenfd读事件,在读事件里调用accept接受该连接。当一个worker进程在accept这个连接之后,就开始读取请求,解析请求,处理请求,产生数据后,再返回给客户端,最后才断开连接,这样一个完整的请求就是这样的了。一个请求,完全由worker进程来处理,而且只在一个worker进程中处理。

nginx的核心模块

  • nginx 采用的模块方式组装整个nginx的功能,在编译阶段会产生一个ngx_modules.c的文件,该文件中定义了所有nginx的处理模块,其中有一个ngx_modules的数组。

  • 当一个请求同时符合多个模块的处理规则时候,按照ngx_modules数组中的顺序选择最靠前的模块优先处理。

  • 针对http的过滤模块而言则是相反的,因为http框架在初始化时候,会在ngx_modules的数组中将过滤模块按先后顺序向filter list中添加,每次添加都是添加到表头,因此针对http模块,越是靠后的模块越是优先响应http.

  • ngx_modules.c 定义

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    #include <ngx_config.h>
    #include <ngx_core.h>



    extern ngx_module_t ngx_core_module;
    extern ngx_module_t ngx_errlog_module;
    extern ngx_module_t ngx_conf_module;
    extern ngx_module_t ngx_regex_module;
    extern ngx_module_t ngx_events_module;
    extern ngx_module_t ngx_event_core_module;
    extern ngx_module_t ngx_epoll_module;
    extern ngx_module_t ngx_http_module;
    extern ngx_module_t ngx_http_core_module;
    extern ngx_module_t ngx_http_log_module;
    extern ngx_module_t ngx_http_upstream_module;
    extern ngx_module_t ngx_http_static_module;
    extern ngx_module_t ngx_http_autoindex_module;
    extern ngx_module_t ngx_http_index_module;
    extern ngx_module_t ngx_http_mirror_module;
    extern ngx_module_t ngx_http_try_files_module;
    extern ngx_module_t ngx_http_auth_basic_module;
    extern ngx_module_t ngx_http_access_module;
    extern ngx_module_t ngx_http_limit_conn_module;
    extern ngx_module_t ngx_http_limit_req_module;
    extern ngx_module_t ngx_http_geo_module;
    extern ngx_module_t ngx_http_map_module;
    extern ngx_module_t ngx_http_split_clients_module;
    extern ngx_module_t ngx_http_referer_module;
    extern ngx_module_t ngx_http_rewrite_module;
    extern ngx_module_t ngx_http_proxy_module;
    extern ngx_module_t ngx_http_fastcgi_module;
    extern ngx_module_t ngx_http_uwsgi_module;
    extern ngx_module_t ngx_http_scgi_module;
    extern ngx_module_t ngx_http_memcached_module;
    extern ngx_module_t ngx_http_empty_gif_module;
    extern ngx_module_t ngx_http_browser_module;
    extern ngx_module_t ngx_http_upstream_hash_module;
    extern ngx_module_t ngx_http_upstream_ip_hash_module;
    extern ngx_module_t ngx_http_upstream_least_conn_module;
    extern ngx_module_t ngx_http_upstream_random_module;
    extern ngx_module_t ngx_http_upstream_keepalive_module;
    extern ngx_module_t ngx_http_upstream_zone_module;
    extern ngx_module_t ngx_http_write_filter_module;
    extern ngx_module_t ngx_http_header_filter_module;
    extern ngx_module_t ngx_http_chunked_filter_module;
    extern ngx_module_t ngx_http_range_header_filter_module;
    extern ngx_module_t ngx_http_gzip_filter_module;
    extern ngx_module_t ngx_http_postpone_filter_module;
    extern ngx_module_t ngx_http_ssi_filter_module;
    extern ngx_module_t ngx_http_charset_filter_module;
    extern ngx_module_t ngx_http_userid_filter_module;
    extern ngx_module_t ngx_http_headers_filter_module;
    extern ngx_module_t ngx_http_copy_filter_module;
    extern ngx_module_t ngx_http_range_body_filter_module;
    extern ngx_module_t ngx_http_not_modified_filter_module;

    ngx_module_t *ngx_modules[] = {
    &ngx_core_module,
    &ngx_errlog_module,
    &ngx_conf_module,
    &ngx_regex_module,
    &ngx_events_module,
    &ngx_event_core_module,
    &ngx_epoll_module,
    &ngx_http_module,
    &ngx_http_core_module,
    &ngx_http_log_module,
    &ngx_http_upstream_module,
    &ngx_http_static_module,
    &ngx_http_autoindex_module,
    &ngx_http_index_module,
    &ngx_http_mirror_module,
    &ngx_http_try_files_module,
    &ngx_http_auth_basic_module,
    &ngx_http_access_module,
    &ngx_http_limit_conn_module,
    &ngx_http_limit_req_module,
    &ngx_http_geo_module,
    &ngx_http_map_module,
    &ngx_http_split_clients_module,
    &ngx_http_referer_module,
    &ngx_http_rewrite_module,
    &ngx_http_proxy_module,
    &ngx_http_fastcgi_module,
    &ngx_http_uwsgi_module,
    &ngx_http_scgi_module,
    &ngx_http_memcached_module,
    &ngx_http_empty_gif_module,
    &ngx_http_browser_module,
    &ngx_http_upstream_hash_module,
    &ngx_http_upstream_ip_hash_module,
    &ngx_http_upstream_least_conn_module,
    &ngx_http_upstream_random_module,
    &ngx_http_upstream_keepalive_module,
    &ngx_http_upstream_zone_module,
    &ngx_http_write_filter_module,
    &ngx_http_header_filter_module,
    &ngx_http_chunked_filter_module,
    &ngx_http_range_header_filter_module,
    &ngx_http_gzip_filter_module,
    &ngx_http_postpone_filter_module,
    &ngx_http_ssi_filter_module,
    &ngx_http_charset_filter_module,
    &ngx_http_userid_filter_module,
    &ngx_http_headers_filter_module,
    &ngx_http_copy_filter_module,
    &ngx_http_range_body_filter_module,
    &ngx_http_not_modified_filter_module,
    NULL
    };

    char *ngx_module_names[] = {
    "ngx_core_module",
    "ngx_errlog_module",
    "ngx_conf_module",
    "ngx_regex_module",
    "ngx_events_module",
    "ngx_event_core_module",
    "ngx_epoll_module",
    "ngx_http_module",
    "ngx_http_core_module",
    "ngx_http_log_module",
    "ngx_http_upstream_module",
    "ngx_http_static_module",
    "ngx_http_autoindex_module",
    "ngx_http_index_module",
    "ngx_http_mirror_module",
    "ngx_http_try_files_module",
    "ngx_http_auth_basic_module",
    "ngx_http_access_module",
    "ngx_http_limit_conn_module",
    "ngx_http_limit_req_module",
    "ngx_http_geo_module",
    "ngx_http_map_module",
    "ngx_http_split_clients_module",
    "ngx_http_referer_module",
    "ngx_http_rewrite_module",
    "ngx_http_proxy_module",
    "ngx_http_fastcgi_module",
    "ngx_http_uwsgi_module",
    "ngx_http_scgi_module",
    "ngx_http_memcached_module",
    "ngx_http_empty_gif_module",
    "ngx_http_browser_module",
    "ngx_http_upstream_hash_module",
    "ngx_http_upstream_ip_hash_module",
    "ngx_http_upstream_least_conn_module",
    "ngx_http_upstream_random_module",
    "ngx_http_upstream_keepalive_module",
    "ngx_http_upstream_zone_module",
    "ngx_http_write_filter_module",
    "ngx_http_header_filter_module",
    "ngx_http_chunked_filter_module",
    "ngx_http_range_header_filter_module",
    "ngx_http_gzip_filter_module",
    "ngx_http_postpone_filter_module",
    "ngx_http_ssi_filter_module",
    "ngx_http_charset_filter_module",
    "ngx_http_userid_filter_module",
    "ngx_http_headers_filter_module",
    "ngx_http_copy_filter_module",
    "ngx_http_range_body_filter_module",
    "ngx_http_not_modified_filter_module",
    NULL
    };

多进程(使用epoll/select/poll)监听同一个端口的惊群效应解决方案

1.什么是惊群效应?

  • 多个进程(A、B、C、D的worker进程)使用epoll/poll/select等函数监听同一个端口时候,当有一个TCP请求连接到该端口时候,A、B、C、D进程会被同时唤醒,但是仅仅有一个进程会accept接受来自客户端的连接,其他的进程则会挂起。
  • 举个例子,假设你去银行柜台取钱,银行有4个窗口,每个窗口的营业员都在等待客户来取钱,叫到某个号码时候(就该手持某个号码的人去柜台取钱),这个人就去柜台,在这个时候4个窗口营业员同时叫那个人去自己窗口办理,但是仅仅只会有一个窗口的营业员提供服务给你,但是这4个窗口的营业员会去“招呼”你去她哪里办理业务。4个窗口的营业员同时叫你自己的号码时候,只有一个窗口提供服务,这个效应就是惊群

2.如何产生惊群效应

  • 在早期的Linux版本中,内核对于阻塞在epoll_wait的进程,也是采用全部唤醒的机制,所以存在和accept相似的“惊群”问题。新版本的的解决方案也是只会唤醒等待队列上的第一个进程或线程,所以,新版本Linux 部分的解决了epoll的“惊群”问题。所谓部分的解决,意思就是:对于部分特殊场景,使用epoll机制,已经不存在“惊群”的问题了,但是对于大多数场景,epoll机制仍然存在“惊群”.
  • 多个进程同时监听同一个端口时候,当有请求连接到该端口时候,多个进程会被同时唤醒,但是仅仅有一个进程会accept的请求,其他的进程则会挂起

3.惊群效应影响

  • 假设有10万tcp请求,后端有64个worker进程,这些进程的唤醒和挂起之间的切换开销非常大,会严重影响服务器的处理请求的吞吐量

4.如何解决惊群效应

  • 多进程方式下,在共享内存设置一个变量,在多个进程accept请求之前针对该变量加锁,哪个进程获取到锁,哪个进程就accept请求。

5.解决惊群效应解决思路

  • 在epoll_wait返回,获取锁,如果获得锁就继续accept新的request;否则继续epoll_wait

6.惊群效应例子

  • 运行实例


  • 代码
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    /*************************************************************************
    > File Name: epoll_test.c
    > Author:perrynzhou
    > Mail:perrynzhou@gmail.com
    > Created Time: Tuesday, June 23, 2020 AM08:45:28 HKT
    ************************************************************************/

    #include <stdio.h>
    #include <assert.h>
    #include <sys/types.h>
    #include <sys/socket.h>
    #include <sys/epoll.h>
    #include <netdb.h>
    #include <string.h>
    #include <stdio.h>
    #include <unistd.h>
    #include <fcntl.h>
    #include <stdlib.h>
    #include <errno.h>
    #include <sys/wait.h>
    #include <unistd.h>
    #include <netinet/in.h>
    #include <arpa/inet.h>

    #define NET_SERVICE_BUFFER_LEN (1024)

    typedef struct net_service_t
    {
    int sfd;
    int efd;
    int worker_process_num;
    struct epoll_event event;
    struct epoll_event *events;
    } net_service;
    inline static int net_service_accept_request(net_service *ns)
    {
    struct sockaddr client_addr;
    socklen_t len = sizeof(struct sockaddr);
    return accept(ns->sfd, &client_addr, &len);
    }
    inline static void net_service_fetch_client_addr(int newfd, char *buf, size_t sz)
    {
    struct sockaddr_in addr;
    socklen_t addr_size = sizeof(struct sockaddr_in);
    int res = getpeername(newfd, (struct sockaddr *)&addr, &addr_size);
    strncpy(buf, inet_ntoa(addr.sin_addr), sz);
    }
    void net_service_handle_request(net_service *ns, int k)
    {
    int max_event = 1024;
    int efd = ns->efd;
    int sfd = ns->sfd;
    struct epoll_event *events = ns->events;
    while (1)
    {
    int n = epoll_wait(efd, events, max_event, -1);
    usleep(100);
    for (int i = 0; i < n; i++)
    {
    if (events[i].events & EPOLLERR)
    {
    fprintf(stdout, "epoll error\n");
    close(events[i].data.fd);
    continue;
    }
    else if (sfd == events[i].data.fd)
    {
    int client_fd = net_service_accept_request(ns);
    if (client_fd == -1)
    {
    fprintf(stdout, "worker-%d-[%d] process return from epoll_wait,accept failed\n", k, getpid());
    break;
    }
    char b[NET_SERVICE_BUFFER_LEN];
    net_service_fetch_client_addr(client_fd, (char *)&b, NET_SERVICE_BUFFER_LEN);
    fprintf(stdout, "worker-%d-[%d] process return from epoll_wait,accept %s success\n", k, getpid(), (char *)&b);
    close(client_fd);
    }
    }
    }
    }
    static int net_service_create_and_bind(net_service *ns, const char *addr, int port)
    {
    int fd = socket(PF_INET, SOCK_STREAM, 0);
    struct sockaddr_in serveraddr;
    serveraddr.sin_family = AF_INET;
    inet_pton(AF_INET, addr, &serveraddr.sin_addr);
    serveraddr.sin_port = htons(port);
    bind(fd, (struct sockaddr *)&serveraddr, sizeof(serveraddr));

    ns->sfd = fd;
    return 0;
    }
    static int net_service_setsockopt(net_service *ns)
    {

    int flags, s;
    flags = fcntl(ns->sfd, F_GETFL, 0);
    if (flags == -1)
    {
    perror("fcntl");
    return -1;
    }
    flags |= O_NONBLOCK;
    if (fcntl(ns->sfd, F_SETFL, flags) == -1)
    {
    perror("fcntl");
    return -1;
    }
    int reuse = 0;
    setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char *)&reuse, sizeof(int));
    return 0;
    }
    int net_service_init(net_service *ns, const char *addr, int port, int num)
    {
    int max_events = 1024;
    if (addr != NULL && net_service_create_and_bind(ns, addr, port) != -1)
    {
    net_service_setsockopt(ns);
    listen(ns->sfd, 1024);
    ns->efd = epoll_create(max_events);
    ns->event.data.fd = ns->sfd;
    ns->event.events = EPOLLIN;
    epoll_ctl(ns->efd, EPOLL_CTL_ADD, ns->sfd, &ns->event);
    ns->events = calloc(max_events, sizeof(struct epoll_event));
    assert(ns->events != NULL);
    ns->worker_process_num = num;
    }
    return -1;
    }
    int net_service_run(net_service *ns)
    {
    pid_t pid = 0;
    for (int i = 0; i < ns->worker_process_num; i++)
    {
    fflush(NULL);
    pid = fork();
    if (pid == 0)
    {
    fprintf(stdout, "start worker-%d-%d\n", i, getpid());
    net_service_handle_request(ns, i);
    }
    }
    while ((pid = waitpid(-1, NULL, 0)))
    {
    if (errno == ECHILD)
    {
    break;
    }
    }
    }
    void net_service_deinit(net_service *ns)
    {
    if (ns != NULL)
    {
    if (ns->efd != -1)
    {
    close(ns->efd);
    }
    if (ns->sfd != -1)
    {
    close(ns->sfd);
    }
    ns->sfd = ns->efd = -1;
    if (ns->events != NULL)
    {
    free(ns->events);
    ns->events = NULL;
    }
    }
    }
    int main(void)
    {
    net_service net;
    net_service_init(&net, "127.0.0.1", 9988, 4);
    fprintf(stdout, "run on %s:%d\n", "127.0.0.1", 9988);
    net_service_run(&net);
    net_service_deinit(&net);
    return 0;
    }

linux 锁

linux中有哪几种临界资源访问保护机制

  • 互斥锁:核心以pthread_mutex_t为核心
  • 自旋锁:核心以pthread_spinlock_t为核心
  • 原子操作:自定义C函数封装汇编

各种临界资源保护机制的用法

  • pthread_t :在多线程情况下,线程会切换,但是某个线程方法进入以pthread_mutex_t加锁的临界资源
  • pthread_spinlock_t:多线程情况下,不会产生线程切换,某个线程进入以pthread_spinlock_t加锁的临界资源时候,线程无法获取锁就会自旋等待,这个pthread_spinlock_t用在操作非常简单,等待时间非常短的情况下。
  • 原子操作:在函数内部嵌入asm汇编,用一条汇编指令表达。

i++ 自增操

  • i++ 操作:i++操作编译成汇编后,都是三条指令,线程执行任何一个汇编指令时候都可能发生线程切换,多线程情况下不能保证i++是原子操作
    1
    2
    3
    1.move [var],%ebx
    2.incr %ebx
    3.move %ebx [var]

tcp/ip 三次握手

image-20200620161218149

  • tcp/ip和系统api有啥关系
    • tcp/ip基于ip层来提供稳定的有连接的服务。tcp/ip编程中的socket是有几个元素组成,分别是:(文件描述符:源端IP、源端端口、目标端IP、目标端端口、协议)组成
    • tcp/ip中的发起第一次和服务端握手,体现在api中的connect是发起连接,第二次和第三次握手都是在网络协议栈中完成,其中服务端的api 中的accept完成了协议栈中从syn队列中取出握手信息到accept队列中。
  • read和recv区别?write和send区别?
    • linux系统调用read读取到fd,进入do_read函数,发现是网络socket套接字,最终执行的recv的函数;
    • 同理write函数和read函数也是这么做,read/write对应的是文件描述符;send/recv对应的是网络协议栈中的源端IP、源端端口、目标端IP、目标端端口、协议.一切接文件是linux的抽象

glusterfs 如何定位一个文件位置

  • volume 信息
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# gluster volume info

Volume Name: rep3_vol
Type: Distributed-Replicate
Volume ID: 73360fc2-e105-4fd4-9b92-b5fa333ba75d
Status: Started
Snapshot Count: 0
Number of Bricks: 12 x 3 = 36
Transport-type: tcp
Bricks:
Brick1: 10.193.189.153:/debug/glusterfs/rep3_vol/brick
Brick2: 10.193.189.154:/debug/glusterfs/rep3_vol/brick
Brick3: 10.193.189.155:/debug/glusterfs/rep3_vol/brick
Brick4: 10.193.189.153:/data2/brick_rep3_vol
Brick5: 10.193.189.154:/data2/brick_rep3_vol
Brick6: 10.193.189.155:/data2/brick_rep3_vol
Brick7: 10.193.189.153:/data3/brick_rep3_vol
Brick8: 10.193.189.154:/data3/brick_rep3_vol
Brick9: 10.193.189.155:/data3/brick_rep3_vol
Brick10: 10.193.189.153:/data4/brick_rep3_vol
Brick11: 10.193.189.154:/data4/brick_rep3_vol
Brick12: 10.193.189.155:/data4/brick_rep3_vol
Brick13: 10.193.189.153:/data5/brick_rep3_vol
Brick14: 10.193.189.154:/data5/brick_rep3_vol
Brick15: 10.193.189.155:/data5/brick_rep3_vol
Brick16: 10.193.189.153:/data6/brick_rep3_vol
Brick17: 10.193.189.154:/data6/brick_rep3_vol
Brick18: 10.193.189.155:/data6/brick_rep3_vol
Brick19: 10.193.189.153:/data7/brick_rep3_vol
Brick20: 10.193.189.154:/data7/brick_rep3_vol
Brick21: 10.193.189.155:/data7/brick_rep3_vol
Brick22: 10.193.189.153:/data8/brick_rep3_vol
Brick23: 10.193.189.154:/data8/brick_rep3_vol
Brick24: 10.193.189.155:/data8/brick_rep3_vol
Brick25: 10.193.189.153:/data9/brick_rep3_vol
Brick26: 10.193.189.154:/data9/brick_rep3_vol
Brick27: 10.193.189.155:/data9/brick_rep3_vol
Brick28: 10.193.189.153:/data10/brick_rep3_vol
Brick29: 10.193.189.154:/data10/brick_rep3_vol
Brick30: 10.193.189.155:/data10/brick_rep3_vol
Brick31: 10.193.189.153:/data11/brick_rep3_vol
Brick32: 10.193.189.154:/data11/brick_rep3_vol
Brick33: 10.193.189.155:/data11/brick_rep3_vol
Brick34: 10.193.189.153:/data12/brick_rep3_vol
Brick35: 10.193.189.154:/data12/brick_rep3_vol
Brick36: 10.193.189.155:/data12/brick_rep3_vol
Options Reconfigured:
cluster.lookup-unhashed: off
features.shard-block-size: 8GB
features.shard: on
diagnostics.brick-log-level: INFO
performance.client-io-threads: off
nfs.disable: on
storage.fips-mode-rchecksum: on
transport.address-family: inet
diagnostics.client-log-level: DEBUG
  • cp或者rm一个文件,无论是dht/aft的模式,glusterfs都会先定位这个文件在哪个节点的哪个brick上,其使用的查找方法是dht_layout_search。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// 参数传递一个目录名称或者文件名称,计算哈希值
xlator_t *dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
{
uint32_t hash = 0;
xlator_t *subvol = NULL;
int i = 0;
int ret = 0;
//哈希函数
ret = dht_hash_compute(this, layout->type, name, &hash);
//如果是哈席卷,layout->cnt为brick的数量;如果是副本卷,layout->cnt则为副本的数量,比如12*3的副本卷模式,则layout->cnt为12
for (i = 0; i < layout->cnt; i++) {
if (layout->list[i].start <= hash && layout->list[i].stop >= hash) {
// layout->list[i].xlator代表的是某个rep3_vol-replicate-{n},的,而某个replicat有三个rep3_vol-client-{n}组成,rep3_vol-client是连接每个glusterfsd的进程的信息
subvol = layout->list[i].xlator;
break;
}
}
out:
return subvol;
}
  • dht_layout_search并不是显示的调用该函数,而是通过dht_init_methods初始化
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    int
    dht_init_methods(xlator_t *this)
    {
    int ret = -1;
    dht_conf_t *conf = NULL;
    dht_methods_t *methods = NULL;

    GF_VALIDATE_OR_GOTO("dht", this, err);

    conf = this->private;
    methods = &(conf->methods);

    methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
    methods->migration_needed = dht_migration_needed;
    methods->migration_other = NULL;
    methods->layout_search = dht_layout_search;

    ret = 0;
    err:
    return ret;
    }
  • Final graph
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
Final graph:
+------------------------------------------------------------------------------+
volume rep3_vol-client-0
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-0-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /debug/glusterfs/rep3_vol/brick
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-1
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-1-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /debug/glusterfs/rep3_vol/brick
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-2
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-2-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /debug/glusterfs/rep3_vol/brick
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-0
type cluster/replicate
option afr-pending-xattr rep3_vol-client-0,rep3_vol-client-1,rep3_vol-client-2
option use-compound-fops off
subvolumes rep3_vol-client-0 rep3_vol-client-1 rep3_vol-client-2
end-volume

volume rep3_vol-client-3
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-3-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data2/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-4
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-4-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data2/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-5
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-5-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data2/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-1
type cluster/replicate
option afr-pending-xattr rep3_vol-client-3,rep3_vol-client-4,rep3_vol-client-5
option use-compound-fops off
subvolumes rep3_vol-client-3 rep3_vol-client-4 rep3_vol-client-5
end-volume

volume rep3_vol-client-6
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-6-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data3/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-7
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-7-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data3/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-8
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-8-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data3/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-2
type cluster/replicate
option afr-pending-xattr rep3_vol-client-6,rep3_vol-client-7,rep3_vol-client-8
option use-compound-fops off
subvolumes rep3_vol-client-6 rep3_vol-client-7 rep3_vol-client-8
end-volume

volume rep3_vol-client-9
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-9-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data4/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-10
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-10-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data4/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-11
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-11-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data4/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-3
type cluster/replicate
option afr-pending-xattr rep3_vol-client-9,rep3_vol-client-10,rep3_vol-client-11
option use-compound-fops off
subvolumes rep3_vol-client-9 rep3_vol-client-10 rep3_vol-client-11
end-volume

volume rep3_vol-client-12
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-12-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data5/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-13
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-13-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data5/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-14
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-14-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data5/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-4
type cluster/replicate
option afr-pending-xattr rep3_vol-client-12,rep3_vol-client-13,rep3_vol-client-14
option use-compound-fops off
subvolumes rep3_vol-client-12 rep3_vol-client-13 rep3_vol-client-14
end-volume

volume rep3_vol-client-15
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-15-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data6/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-16
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-16-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data6/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-17
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-17-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data6/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-5
type cluster/replicate
option afr-pending-xattr rep3_vol-client-15,rep3_vol-client-16,rep3_vol-client-17
option use-compound-fops off
subvolumes rep3_vol-client-15 rep3_vol-client-16 rep3_vol-client-17
end-volume

volume rep3_vol-client-18
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-18-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data7/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-19
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-19-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data7/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-20
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-20-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data7/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-6
type cluster/replicate
option afr-pending-xattr rep3_vol-client-18,rep3_vol-client-19,rep3_vol-client-20
option use-compound-fops off
subvolumes rep3_vol-client-18 rep3_vol-client-19 rep3_vol-client-20
end-volume

volume rep3_vol-client-21
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-21-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data8/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-22
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-22-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data8/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-23
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-23-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data8/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-7
type cluster/replicate
option afr-pending-xattr rep3_vol-client-21,rep3_vol-client-22,rep3_vol-client-23
option use-compound-fops off
subvolumes rep3_vol-client-21 rep3_vol-client-22 rep3_vol-client-23
end-volume

volume rep3_vol-client-24
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-24-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data9/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-25
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-25-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data9/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-26
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-26-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data9/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-8
type cluster/replicate
option afr-pending-xattr rep3_vol-client-24,rep3_vol-client-25,rep3_vol-client-26
option use-compound-fops off
subvolumes rep3_vol-client-24 rep3_vol-client-25 rep3_vol-client-26
end-volume

volume rep3_vol-client-27
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-27-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data10/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-28
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-28-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data10/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-29
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-29-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data10/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-9
type cluster/replicate
option afr-pending-xattr rep3_vol-client-27,rep3_vol-client-28,rep3_vol-client-29
option use-compound-fops off
subvolumes rep3_vol-client-27 rep3_vol-client-28 rep3_vol-client-29
end-volume

volume rep3_vol-client-30
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-30-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data11/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-31
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-31-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data11/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-32
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-32-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data11/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
0-06-10 08:15:10.480043] D [MSGID: 0] [common-utils.c:532:gf_resolve_ip6] 0-resolver: returning ip-10.193.189.154 (port-24007) for hostname: 10.193.189.154 and port: 24007
option send-gids true
end-volume

volume rep3_vol-replicate-10
type cluster/replicate
option afr-pending-xattr rep3_vol-client-30,rep3_vol-client-31,rep3_vol-client-32
option use-compound-fops off
subvolumes rep3_vol-client-30 rep3_vol-client-31 rep3_vol-client-32
end-volume

volume rep3_vol-client-33
type protocol/client
option opversion 70200
option clnt-lk-version 1
option volfile-checksum 0
option volfile-key rep3_vol
option client-version 2020.05.12
option process-name fuse
option process-uuid CTX_ID:da279c3e-54cc-4d0e-b921-1395feeeae9a-GRAPH_ID:0-PID:93273-HOST:ai-storage-prd-10-193-189-153.v-bj-4.vivo.lan-PC_NAME:rep3_vol-client-33-RECON_NO:-0
option fops-version 1298437
option ping-timeout 42
option remote-host 10.193.189.153
option remote-subvolume /data12/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-34
type protocol/client
option ping-timeout 42
option remote-host 10.193.189.154
option remote-subvolume /data12/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-client-35
type protocol/client
option ping-timeout 42
option remote-host 10.193.189.155
option remote-subvolume /data12/brick_rep3_vol
option transport-type socket
option transport.address-family inet
option username bdb0a45d-e70d-445d-8fe6-76118dfdb738
option password 7e9a1877-0837-4563-b73d-aa4cde754c91
option transport.socket.ssl-enabled off
option transport.tcp-user-timeout 0
option transport.socket.keepalive-time 20
option transport.socket.keepalive-interval 2
option transport.socket.keepalive-count 9
option send-gids true
end-volume

volume rep3_vol-replicate-11
type cluster/replicate
option afr-pending-xattr rep3_vol-client-33,rep3_vol-client-34,rep3_vol-client-35
option use-compound-fops off
subvolumes rep3_vol-client-33 rep3_vol-client-34 rep3_vol-client-35
end-volume

volume rep3_vol-dht
type cluster/distribute
option lock-migration off
option force-migration off
0-06-10 08:15:10.480240] I [MSGID: 114046] [client-handshake.c:1105:client_setvolume_cbk] 0-rep3_vol-client-33: Connected to rep3_vol-client-33, attached to remote volume '/data12/brick_rep3_vol'.
subvolumes rep3_vol-replicate-0 rep3_vol-replicate-1 rep3_vol-replicate-2 rep3_vol-replicate-3 rep3_vol-replicate-4 rep3_vol-replicate-5 rep3_vol-replicate-6 rep3_vol-replicate-7 rep3_vol-replicate-8 rep3_vol-replicate-9 rep3_vol-replicate-10 rep3_vol-replicate-11
end-volume

0-06-10 08:15:10.480280] D [MSGID: 0] [client-handshake.c:945:client_post_handshake] 0-rep3_vol-client-33: No fds to open - notifying all parents child up
volume rep3_vol-shard
type features/shard
option shard-block-size 8GB
subvolumes rep3_vol-dht
end-volume

volume rep3_vol-utime
0-06-10 08:15:10.480305] D [MSGID: 0] [afr-common.c:5209:afr_get_halo_latency] 0-rep3_vol-replicate-11: Using halo latency 5
type features/utime
option noatime on
0-06-10 08:15:10.480343] I [MSGID: 108005] [afr-common.c:5293:__afr_handle_child_up_event] 0-rep3_vol-replicate-11: Subvolume 'rep3_vol-client-33' came back up; going online.
subvolumes rep3_vol-shard
end-volume

volume rep3_vol-write-behind
type performance/write-behind
subvolumes rep3_vol-utime
end-volume

volume rep3_vol-read-ahead
type performance/read-ahead
subvolumes rep3_vol-write-behind
end-volume

volume rep3_vol-readdir-ahead
type performance/readdir-ahead
option parallel-readdir off
option rda-request-size 131072
option rda-cache-limit 10MB
subvolumes rep3_vol-read-ahead
end-volume

volume rep3_vol-io-cache
0-06-10 08:15:10.480407] D [MSGID: 0] [client.c:2323:client_rpc_notify] 0-rep3_vol-client-35: got RPC_CLNT_CONNECT
type performance/io-cache
subvolumes rep3_vol-readdir-ahead
end-volume

volume rep3_vol-open-behind
type performance/open-behind
subvolumes rep3_vol-io-cache
end-volume

volume rep3_vol-quick-read
type performance/quick-read
subvolumes rep3_vol-open-behind
end-volume

volume rep3_vol-md-cache
type performance/md-cache
option cache-posix-acl true
subvolumes rep3_vol-quick-read
end-volume

volume rep3_vol
type debug/io-stats
option log-level DEBUG
option threads 16
option latency-measurement off
option count-fop-hits off
option global-threading off
subvolumes rep3_vol-md-cache
end-volume

volume posix-acl-autoload
type system/posix-acl
subvolumes rep3_vol
end-volume

volume meta-autoload
type meta
subvolumes posix-acl-autoload
end-volume

+------------------------------------------------------------------------------+

glusterfs fuse简单介绍

xlator是glusterfs核心的概念,每个xlator对用一系列函数处理对应的文件操作,glusterfs/glusterfsd/glusterd的三个二进制的入口都是相同的(glusterfs/src/glusterfsd.c),但是在不同模块的逻辑上处理会加载不同的xlator来完成对应模块功能的操作。glusterfs客户端实现是基于fuse实现了自己的API。如下是glusterfs客户单核心函数调用链(glusterfs/src/glusterfsd.c)

fuse 请求转发流程

  • 1.main

    • glusterfs入口函数
  • 2.create_fuse_mount

    • 设置mount/fuse xlator,调用xlator_set_type初始化这个mount/fuse的模块(实际调用mount/fuse中的xlator的init方法),这个也是后面我们着重需要讲清楚的模块
  • 3.glusterfs_volumes_init

    • 根据挂载时候的提供的节点IP,初始化glusterfs客户端需要的信息,比如服务端brick的元数据局,客户端需要加载的哪些xlators
  • 4.glusterfs_mgmt_init

    • 依据挂载(mount)时候提供的IP,和节点所在glusterd通信获取服务端的brick信息以及需要xlators
  • 5.glusterfs_process_volfp

  • 依据获取到的服务端提供的元数据,调用xlator_init初始化,整个客户端glustefs初始化完成,glusterfs 第一个访问的是mount/fuse xlator(这个xlator的配置信息不需要从glusterd请求获取) ,最后访问的是protocol/client的xlator

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/* client.so 对应的protocol/client 的xlator */
/usr/local/lib/glusterfs/2020.05.12/xlator/protocol/client.so
/* replicate.so 对应cluster/replicate,也就是cluster/afr 副本卷这块实现 */
/usr/local/lib/glusterfs/2020.05.12/xlator/cluster/replicate.so
/* distribute.so 对应cluster/distribute,也就是cluster/dht哈希卷的实现 */
/usr/local/lib/glusterfs/2020.05.12/xlator/cluster/distribute.so
/usr/local/lib/glusterfs/2020.05.12/xlator/features/utime.so
/usr/local/lib/glusterfs/2020.05.12/xlator/performance/write-behind.so
/usr/local/lib/glusterfs/2020.05.12/xlator/performance/read-ahead.so
/usr/local/lib/glusterfs/2020.05.12/xlator/performance/readdir-ahead.so
/usr/local/lib/glusterfs/2020.05.12/xlator/performance/io-cache.so
/usr/local/lib/glusterfs/2020.05.12/xlator/performance/open-behind.so
/usr/local/lib/glusterfs/2020.05.12/xlator/performance/quick-read.so
/usr/local/lib/glusterfs/2020.05.12/xlator/performance/md-cache.so
/usr/local/lib/glusterfs/2020.05.12/xlator/debug/io-stats.so
  • glusterfs_graph_activate

    • ctx->master在初始化时候已经把mount/fuse的xlator赋值给master了,然后在xlator graph激活时候,如果master不为空就执行xlator_notify函数,第一个参数就是mount/fuse的xlator的结构体指针

      1
      2
      3
      4
      5
      6
      7
      8
      glusterfs_graph_activate(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
      {
      //fuse-bridge.c:notify在这里调用
      if (ctx->master) {
      ret = xlator_notify(ctx->master, GF_EVENT_GRAPH_NEW, graph)
      ((xlator_t *)ctx->master)->next = graph->top;
      }
      }
  • xlator_notify

    • xlator_notify函数主要是调用mount/fuse这个xlator的notify函数多个线程读取/dev/fuse中数据
    1
    2
    3
    4
    5
    xlator_notify(xlator_t *xl, int event, void *data, ...)
    {
    //调用mount/fuse xlator中的notify函数
    xl->notify(xl, event, data);
    }
  • notify

    • 该函数是mount/fuse中的notify函数

      1
      2
      3
      4
      5
      6
      7
      //mount/fuse中notify的函数
      notify(xlator_t *this, int32_t event, void *data, ...)
      {
      //fork子进程在多线程中执行fuse_thread_proc
      for (i = 0; i < private->reader_thread_count; i++) {
      ret = gf_thread_create(&private->fuse_thread[i], NULL,fuse_thread_proc, this, "fuseproc");
      }
  • fuse_thread_proc

    • 读取/dev/fuse中的数据,转发给对应的fuse_xxx的函数
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    //启动线程轮训读取数据,在转发给对应的fuse_xxx函数
    fuse_thread_proc(void *data)
    {
    xlator_t *this = NULL;
    fuse_private_t *priv = NULL;
    this = data;
    priv = this->private;

    THIS = this;
    //在xlator/mount/fuse源代码模块中init函数中已经把priv->fd设置为/dev/fuse的文件描述符
    res = sys_readv(priv->fd, iov_in, 2);
    gf_async(&fasync->async, this, fuse_dispatch);

    }
  • fuse_dispatch

    • 主要是读取到/dev/fuse的数据转发给对应的fuse_xxx函数,比如fuse初始化挂载操作会转发请求到fuse_init函数

      1
      2
      3
      4
      5
      6
      7
      //读取到的数据转发到对应的fuse_ops函数中
      fuse_dispatch(xlator_t *xl, gf_async_t *async)
      {
      priv->fuse_ops[finh->opcode](xl, finh, fasync->msg, iobuf);
      //priv->fuse_ops对应的是fuse_std_ops函数指针数组
      priv->fuse_ops[finh->opcode](xl, finh, fasync->msg, iobuf);
      }
      • 其中priv->fuse_ops中定义了fuse一系列操作,具体的定义如下

        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = {
        [FUSE_LOOKUP] = fuse_lookup,
        [FUSE_FORGET] = fuse_forget,
        [FUSE_GETATTR] = fuse_getattr,
        [FUSE_SETATTR] = fuse_setattr,
        [FUSE_READLINK] = fuse_readlink,
        [FUSE_SYMLINK] = fuse_symlink,
        [FUSE_MKNOD] = fuse_mknod,
        [FUSE_MKDIR] = fuse_mkdir,
        [FUSE_UNLINK] = fuse_unlink,
        [FUSE_RMDIR] = fuse_rmdir,
        [FUSE_RENAME] = fuse_rename,
        [FUSE_LINK] = fuse_link,
        [FUSE_OPEN] = fuse_open,
        [FUSE_READ] = fuse_readv,
        [FUSE_WRITE] = fuse_write,
        [FUSE_STATFS] = fuse_statfs,
        [FUSE_RELEASE] = fuse_release,
        [FUSE_FSYNC] = fuse_fsync,
        [FUSE_SETXATTR] = fuse_setxattr,
        [FUSE_GETXATTR] = fuse_getxattr,
        [FUSE_LISTXATTR] = fuse_listxattr,
        [FUSE_REMOVEXATTR] = fuse_removexattr,
        [FUSE_FLUSH] = fuse_flush,
        [FUSE_INIT] = fuse_init,
        [FUSE_OPENDIR] = fuse_opendir,
        [FUSE_READDIR] = fuse_readdir,
        [FUSE_RELEASEDIR] = fuse_releasedir,
        [FUSE_FSYNCDIR] = fuse_fsyncdir,
        [FUSE_GETLK] = fuse_getlk,
        [FUSE_SETLK] = fuse_setlk,
        [FUSE_SETLKW] = fuse_setlk,
        [FUSE_ACCESS] = fuse_access,
        [FUSE_CREATE] = fuse_create,
        [FUSE_INTERRUPT] = fuse_interrupt,
        /* [FUSE_BMAP] */
        [FUSE_DESTROY] = fuse_destroy,
        /* [FUSE_IOCTL] */
        /* [FUSE_POLL] */
        /* [FUSE_NOTIFY_REPLY] */

        #if FUSE_KERNEL_MINOR_VERSION >= 16
        [FUSE_BATCH_FORGET] = fuse_batch_forget,
        #endif

        #if FUSE_KERNEL_MINOR_VERSION >= 19
        #ifdef FALLOC_FL_KEEP_SIZE
        [FUSE_FALLOCATE] = fuse_fallocate,
        #endif /* FALLOC_FL_KEEP_SIZE */
        #endif

        #if FUSE_KERNEL_MINOR_VERSION >= 21
        [FUSE_READDIRPLUS] = fuse_readdirp,
        #endif

        #if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE
        [FUSE_LSEEK] = fuse_lseek,
        #endif

        #if FUSE_KERNEL_MINOR_VERSION >= 28
        [FUSE_COPY_FILE_RANGE] = fuse_copy_file_range,
        #endif
        };
  • FUSE_FOP

    • FUSE_FOP用于转发当前操作到下一个xlator的fops函数,比如下一个xlator是cluster/dht,dht下一个xlator是cluster/afr,afr最后一个是protocol/client,procotol/client对应的是client4_0_writev函数,把数据通过网络push到对应某个glusterfsd进程。
      1
      2
      3
      FUSE_FOP(state, fuse_writev_cbk, GF_FOP_WRITE, writev, state->fd,
      &state->vector, 1, state->off, state->io_flags, iobref,
      state->xdata);
  • dht_writev
    *dht_writev按照哈希卷的方式写入数据

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    //cluster/dht 的xlator中的dht_writev方法
    int dht_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
    int count, off_t off, uint32_t flags, struct iobref *iobref,
    dict_t *xdata)
    {
    //下一个xlator的调用申请
    STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
    subvol->fops->writev, fd, local->rebalance.vector,
    local->rebalance.count, local->rebalance.offset,
    local->rebalance.flags, local->rebalance.iobref,
    local->xattr_req);
    }
  • afr_writev

    • 副本卷执行写入操作的方法
      1
      2
      3
      4
      int afr_writev(call_frame_t *frame, xlator_t *this,...)
      {
      afr_do_writev(frame, this);
      }
  • client4_0_writev(protocol/client xlator)

    • procotcol/client中的client4_0_writev是把数据push到某个节点的glusterfsd进程
      1
      2
      3
      4
      5
      6
      7
      // procotol/client的xlator的writev方法
      int32_t client4_0_writev(call_frame_t *frame, xlator_t *this, void *data)
      {
      ret = client_submit_request(this, &req, frame, conf->fops, GFS3_OP_WRITE, client4_0_writev_cbk, &cp,(xdrproc_t)xdr_gfx_write_req);
      }
      //(gdb) p conf->fops
      $99 = (rpc_clnt_prog_t *) 0x7fffe9b79c20 <clnt4_0_fop_prog>

mount/fuse cp文件流程

  • 设计基本思路

    • mount/fuse 这个xlator的实现在xlator/src/mount模块中,该模块以fuse_开头的函数并么有在这个模块中显性中调用,都是通过fuse_std_ops初始化这个一些列函数,在具体执行时候根据这个数组的指针来隐形的调用。针对每个文件系统的操作(比如,ls,rm)都会转发到对应的fuse的函数处理,处理完毕后在走mount/fuse的下一个xlator,走对应xlator的一些列函数。
  • gdb 堆栈的信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
//简化版本
(gdb) bt
#0 afr_writev at afr-inode-write.c:491
#1 dht_writev at dht-inode-write.c:223
#2 shard_common_inode_write_wind at shard.c:5397
#3 shard_common_inode_write_do at shard.c:5527
#4 wb_writev at write-behind.c:1897
#5 ra_writev at read-ahead.c:650
#6 rda_writev at readdir-ahead.c:788
#7 ioc_writev at io-cache.c:1303
#8 default_writev_resume at defaults.c:1983
#9 call_resume_wind at call-stub.c:2085
#10 call_resume at call-stub.c:2555
#11 open_and_resume at open-behind.c:485
#12 ob_writev at open-behind.c:683
#13 qr_writev at quick-read.c:849
#14 mdc_writev at md-cache.c:2082
#15 io_stats_writev at io-stats.c:2882
#16 default_writev at defaults.c:2735
#17 meta_writev at meta.c:131
#18 fuse_write_resume at fuse-bridge.c:2959
#19 fuse_fop_resume at fuse-bridge.c:1030
#20 fuse_resolve_done at fuse-resolve.c:629
#21 fuse_resolve_all at fuse-resolve.c:653
#22 fuse_resolve at fuse-resolve.c:620
#23 fuse_resolve_all at fuse-resolve.c:650
#24 fuse_resolve_continue at fuse-resolve.c:668
#25 fuse_resolve_fd at fuse-resolve.c:543
#26 fuse_resolve at fuse-resolve.c:611
#27 fuse_resolve_all at fuse-resolve.c:644
#28 fuse_resolve_and_resume at fuse-resolve.c:680
#29 fuse_write at fuse-bridge.c:3011
#30 fuse_dispatch at fuse-bridge.c:5838