我有一个进程监听套接字上的传入连接。当连接到达时,创建一个线程,线程从套接字读取请求并发回回复。以下是流程的代码:
int main(int argc, char *argv[])
{
int fd, fd2, rc, *new_sock;
struct sockaddr_un serveraddr;
socklen_t peer_size;
/* Create the listening socket in SOCKET_PATH and listen to requests.
* Each request gets a new thread
*/
fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0) {
fprintf(stderr, "Error %d (%s) in socket()\n", errno, strerror(errno));
exit (0);
}
memset(&serveraddr, 0, sizeof(serveraddr));
serveraddr.sun_family = AF_UNIX;
strcpy(serveraddr.sun_path, SOCKET_PATH);
rc = bind(fd, (struct sockaddr *)&serveraddr, sizeof(struct sockaddr_un));
if (rc < 0)
{
fprintf(stderr, "bind() failed - %d (%s)", errno, strerror(errno));
exit (0);
}
rc = listen(fd, 50);
if (rc == -1) {
fprintf(stderr, "listen() failed - %d (%s)", errno, strerror(errno));
exit (0);
}
peer_size = sizeof(struct sockaddr_un);
while ((fd2 = accept(fd, (struct sockaddr *)&serveraddr, &peer_size)) != -1)
{
pthread_t connection_thread;
new_sock = malloc(sizeof(int));
*new_sock = fd2;
if(pthread_create(&connection_thread , NULL , connection_handler , (void*) new_sock) < 0)
{
fprintf(stderr, "pthread_create() failed - %d (%s)", errno, strerror(errno));
exit (1);
}
}
if (fd2 < 0) {
fprintf(stderr, "accept() failed - %d (%s)", errno, strerror(errno));
}
exit (0);
}
static void *connection_handler(void *socket_desc)
{
int sock = *(int*)socket_desc;
unsigned char *response = malloc(2000);
...
while((current_read_size = recv(sock , buf, sizeof(buf) , 0)) > 0) {
// read request and write response
// write()
}
free(response);
close(sock);
free(socket_desc);
}
我还有一个共享库,它正在写入此套接字,分析响应并返回数据。这是它的代码:
struct request {
size_t payload_length;
unsigned char payload[];
};
int request_fd;
Handle *mylib_init()
{
request_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (request_fd < 0) {
fprintf(stderr, "Error %d (%s) in socket()\n", errno, strerror(errno));
return NULL;
}
memset(&serveraddr, 0, sizeof(serveraddr));
serveraddr.sun_family = AF_UNIX;
strcpy(serveraddr.sun_path, DB_SOCKET_PATH);
if (connect(request_fd, (struct sockaddr *)&serveraddr, sizeof(serveraddr)) < 0) {
fprintf(stderr, "Error %d (%s) in connect()\n", errno, strerror(errno));
goto err_close_request_fd;
}
...
}
int store(Handle *handle, size_t data_len, unsigned char *data)
{
struct request *request;
int read_len;
if (request_fd > -1) {
request = malloc(sizeof(struct request) + data_len);
request->payload_length = data_len;
if (data_len) {
memcpy(request->payload, data, data_len);
}
if (write(request_fd, request, sizeof(struct request) + data_len) != (sizeof(struct request) + data_len)) {
printf("Error %d (%s) in write()\n", errno, strerror(errno));
free(request);
return -1;
}
free(request);
...
// Read response, analyze and return
...
}
return -1;
}
我还有一个小的测试可执行文件,用伪造的数据调用store()
。当我在循环中将服务器应用程序作为守护程序和测试可执行文件运行时,服务器会在一段时间后卡住并且没有做任何事情,也不接受新的连接。
在gdb下运行时,我看到:
(gdb) info proc mappings
process 4526
Mapped address spaces:
Start Addr End Addr Size Offset objfile
0x10000 0x12000 0x2000 0x0 /opt/a.out
0x21000 0x22000 0x1000 0x1000 /opt/a.out
0x22000 0x43000 0x21000 0x0 [heap]
0xb5d00000 0xb5d01000 0x1000 0x0
0xb5d01000 0xb6500000 0x7ff000 0x0
0xb6500000 0xb6521000 0x21000 0x0
0xb6521000 0xb6600000 0xdf000 0x0
0xb66b2000 0xb66b3000 0x1000 0x0
0xb6eb2000 0xb6f8d000 0xdb000 0x0 /lib/arm-linux-gnueabihf/libc-2.19.so
0xb6f8d000 0xb6f9c000 0xf000 0xdb000 /lib/arm-linux-gnueabihf/libc-2.19.so
0xb6f9c000 0xb6f9e000 0x2000 0xda000 /lib/arm-linux-gnueabihf/libc-2.19.so
0xb6f9e000 0xb6f9f000 0x1000 0xdc000 /lib/arm-linux-gnueabihf/libc-2.19.so
0xb6f9f000 0xb6fa2000 0x3000 0x0
0xb6fa2000 0xb6fb2000 0x10000 0x0 /lib/arm-linux-gnueabihf/libpthread-2.19.so
0xb6fb2000 0xb6fc1000 0xf000 0x10000 /lib/arm-linux-gnueabihf/libpthread-2.19.so
0xb6fc1000 0xb6fc2000 0x1000 0xf000 /lib/arm-linux-gnueabihf/libpthread-2.19.so
0xb6fc2000 0xb6fc3000 0x1000 0x10000 /lib/arm-linux-gnueabihf/libpthread-2.19.so
0xb6fc3000 0xb6fc5000 0x2000 0x0
0xb6fd7000 0xb6fef000 0x18000 0x0 /lib/arm-linux-gnueabihf/ld-2.19.so
0xb6ff6000 0xb6ffb000 0x5000 0x0
0xb6ffb000 0xb6ffc000 0x1000 0x0 [sigpage]
0xb6ffc000 0xb6ffd000 0x1000 0x0 [vvar]
0xb6ffd000 0xb6ffe000 0x1000 0x0 [vdso]
0xb6ffe000 0xb6fff000 0x1000 0x17000 /lib/arm-linux-gnueabihf/ld-2.19.so
0xb6fff000 0xb7000000 0x1000 0x18000 /lib/arm-linux-gnueabihf/ld-2.19.so
0xbefdf000 0xbf000000 0x21000 0x0 [stack]
0xffff0000 0xffff1000 0x1000 0x0 [vectors]
然后,当程序卡住时,它看起来像:(gdb)info proc mappings process 4526映射的地址空间:
Start Addr End Addr Size Offset objfile
0x10000 0x12000 0x2000 0x0 /opt/a.out
0x21000 0x22000 0x1000 0x1000 /opt/a.out
0x22000 0x43000 0x21000 0x0 [heap]
0x96500000 0x96501000 0x1000 0x0
0x96501000 0x96d00000 0x7ff000 0x0 [stack:4899]
0x96d00000 0x96d01000 0x1000 0x0
0x96d01000 0x97500000 0x7ff000 0x0
0x97500000 0x97501000 0x1000 0x0
0x97501000 0x97d00000 0x7ff000 0x0
0x97d00000 0x97d01000 0x1000 0x0
0x97d01000 0x98500000 0x7ff000 0x0
0x98500000 0x98501000 0x1000 0x0
0x98501000 0x98d00000 0x7ff000 0x0
0x98d00000 0x98d01000 0x1000 0x0
0x98d01000 0x99500000 0x7ff000 0x0
0x99500000 0x99501000 0x1000 0x0
0x99501000 0x99d00000 0x7ff000 0x0
0x99d00000 0x99d01000 0x1000 0x0
0x99d01000 0x9a500000 0x7ff000 0x0
0x9a500000 0x9a501000 0x1000 0x0
0x9a501000 0x9ad00000 0x7ff000 0x0
0x9ad00000 0x9ad01000 0x1000 0x0
0x9ad01000 0x9b500000 0x7ff000 0x0
0x9b500000 0x9b501000 0x1000 0x0
0x9b501000 0x9bd00000 0x7ff000 0x0
0x9bd00000 0x9bd01000 0x1000 0x0
0x9bd01000 0x9c500000 0x7ff000 0x0
0x9c500000 0x9c501000 0x1000 0x0
0x9c501000 0x9cd00000 0x7ff000 0x0
0x9cd00000 0x9cd01000 0x1000 0x0
0x9cd01000 0x9d500000 0x7ff000 0x0
0x9d500000 0x9d501000 0x1000 0x0
0x9d501000 0x9dd00000 0x7ff000 0x0
0x9dd00000 0x9dd01000 0x1000 0x0
0x9dd01000 0x9e500000 0x7ff000 0x0
0x9e500000 0x9e501000 0x1000 0x0
0x9e501000 0x9ed00000 0x7ff000 0x0
0x9ed00000 0x9ed01000 0x1000 0x0
0x9ed01000 0x9f500000 0x7ff000 0x0
0x9f500000 0x9f501000 0x1000 0x0
0x9f501000 0x9fd00000 0x7ff000 0x0
0x9fd00000 0x9fd01000 0x1000 0x0
0x9fd01000 0xa0500000 0x7ff000 0x0
0xa0500000 0xa0501000 0x1000 0x0
0xa0501000 0xa0d00000 0x7ff000 0x0
0xa0d00000 0xa0d01000 0x1000 0x0
0xa0d01000 0xa1500000 0x7ff000 0x0
0xa1500000 0xa1501000 0x1000 0x0
0xa1501000 0xa1d00000 0x7ff000 0x0
0xa1d00000 0xa1d01000 0x1000 0x0
0xa1d01000 0xa2500000 0x7ff000 0x0
0xa2500000 0xa2501000 0x1000 0x0
0xa2501000 0xa2d00000 0x7ff000 0x0
0xa2d00000 0xa2d01000 0x1000 0x0
/// ... MORE AND MORE OF THE SAME PATTERN ABOVE
0xb6fc3000 0xb6fc5000 0x2000 0x0
0xb6fd7000 0xb6fef000 0x18000 0x0 /lib/arm-linux-gnueabihf/ld-2.19.so
0xb6ff6000 0xb6ffb000 0x5000 0x0
0xb6ffb000 0xb6ffc000 0x1000 0x0 [sigpage]
0xb6ffc000 0xb6ffd000 0x1000 0x0 [vvar]
0xb6ffd000 0xb6ffe000 0x1000 0x0 [vdso]
0xb6ffe000 0xb6fff000 0x1000 0x17000 /lib/arm-linux-gnueabihf/ld-2.19.so
0x9bd01000 0x9c500000 0x7ff000 0x0
0xb6fff000 0xb7000000 0x1000 0x18000 /lib/arm-linux-gnueabihf/ld-2.19.so
0xbefdf000 0xbf000000 0x21000 0x0 [stack]
0xffff0000 0xffff1000 0x1000 0x0 [vectors]
这样的程序行为可能是什么原因?我该如何调试原因?
pthread_t
资源未发布。你应该调用pthread_detach
或pthread_join
,否则pthread_t
值仍然有效消耗资源,我可以猜测在这种情况下会发生这种情况。