面试题答案
一键面试异常处理机制设计思路
-
网络中断处理
- 检测:在非阻塞I/O操作中,通过
select
、poll
或epoll
等多路复用函数监听套接字事件。当这些函数返回时,检查套接字是否可读或可写,如果不可读且错误码为ECONNRESET
、EPIPE
等表示连接中断的错误码时,判定网络中断。 - 处理:尝试重新连接。在分布式系统节点通信中,节点可以维护一个连接池,当检测到某个连接中断时,从连接池中移除该连接,并尝试重新建立与目标节点的连接。
- 检测:在非阻塞I/O操作中,通过
-
连接超时处理
- 检测:在建立连接时,设置一个定时器。例如使用
alarm
函数或setitimer
函数,在指定时间内如果连接操作(如connect
)未完成,则定时器触发信号处理函数。 - 处理:在分布式系统中,如果连接超时,记录日志并尝试切换到备用节点进行连接,或者等待一段时间后重试连接到原目标节点。
- 检测:在建立连接时,设置一个定时器。例如使用
-
数据丢失处理
- 检测:在接收数据时,通过校验和(如CRC校验)或消息序号来检测数据完整性。如果接收的数据校验和错误或序号不连续,判定数据丢失。
- 处理:在分布式系统中,发送方可以维护一个已发送消息的缓冲区,当接收方检测到数据丢失时,发送重传请求,发送方根据请求重传相应的数据。
关键代码示例
- 使用
select
处理网络中断和连接超时
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#define PORT 8080
#define MAX_CLIENTS 10
#define BUFFER_SIZE 1024
int main() {
int sockfd, new_sockfd;
struct sockaddr_in servaddr, cliaddr;
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0) {
perror("Socket creation failed");
exit(EXIT_FAILURE);
}
memset(&servaddr, 0, sizeof(servaddr));
memset(&cliaddr, 0, sizeof(cliaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_addr.s_addr = INADDR_ANY;
servaddr.sin_port = htons(PORT);
if (bind(sockfd, (const struct sockaddr *)&servaddr, sizeof(servaddr)) < 0) {
perror("Bind failed");
close(sockfd);
exit(EXIT_FAILURE);
}
if (listen(sockfd, MAX_CLIENTS) < 0) {
perror("Listen failed");
close(sockfd);
exit(EXIT_FAILURE);
}
fd_set read_fds;
FD_ZERO(&read_fds);
FD_SET(sockfd, &read_fds);
int activity, new_socket;
struct timeval timeout;
timeout.tv_sec = 5; // 连接超时时间设为5秒
timeout.tv_usec = 0;
new_sockfd = accept(sockfd, (struct sockaddr *)&cliaddr, (socklen_t *)&cliaddr);
if (new_sockfd < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
activity = select(sockfd + 1, &read_fds, NULL, NULL, &timeout);
if (activity < 0) {
perror("Select error");
close(sockfd);
exit(EXIT_FAILURE);
} else if (activity == 0) {
printf("Connection timeout\n");
close(sockfd);
exit(EXIT_FAILURE);
} else {
new_sockfd = accept(sockfd, (struct sockaddr *)&cliaddr, (socklen_t *)&cliaddr);
}
} else {
perror("Accept failed");
close(sockfd);
exit(EXIT_FAILURE);
}
}
char buffer[BUFFER_SIZE] = {0};
activity = recv(new_sockfd, buffer, BUFFER_SIZE, 0);
if (activity < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
// 处理网络中断等情况
activity = select(sockfd + 1, &read_fds, NULL, NULL, &timeout);
if (activity < 0) {
perror("Select error during recv");
close(sockfd);
close(new_sockfd);
exit(EXIT_FAILURE);
} else if (activity == 0) {
printf("Recv timeout\n");
close(sockfd);
close(new_sockfd);
exit(EXIT_FAILURE);
} else {
activity = recv(new_sockfd, buffer, BUFFER_SIZE, 0);
}
} else {
perror("Recv failed");
close(sockfd);
close(new_sockfd);
exit(EXIT_FAILURE);
}
}
printf("Received: %s\n", buffer);
close(new_sockfd);
close(sockfd);
return 0;
}
- 简单的校验和示例(用于数据丢失检测)
unsigned short calculate_checksum(char *data, size_t length) {
unsigned long sum = 0;
while (length > 1) {
sum += *(unsigned short *)data;
data += 2;
length -= 2;
}
if (length > 0) {
sum += *(unsigned char *)data;
}
while (sum >> 16) {
sum = (sum & 0xFFFF) + (sum >> 16);
}
return ~sum;
}
在实际应用中,发送方在发送数据前计算校验和并一同发送,接收方接收到数据后重新计算校验和并与接收到的校验和对比,以检测数据是否丢失。例如:
// 发送方
char data_to_send[BUFFER_SIZE] = "Hello, distributed system!";
unsigned short checksum = calculate_checksum(data_to_send, strlen(data_to_send));
send(new_sockfd, data_to_send, strlen(data_to_send), 0);
send(new_sockfd, &checksum, sizeof(unsigned short), 0);
// 接收方
char received_data[BUFFER_SIZE] = {0};
unsigned short received_checksum;
recv(new_sockfd, received_data, BUFFER_SIZE, 0);
recv(new_sockfd, &received_checksum, sizeof(unsigned short), 0);
unsigned short calculated_checksum = calculate_checksum(received_data, strlen(received_data));
if (calculated_checksum != received_checksum) {
// 处理数据丢失,如请求重传
}
通过以上设计思路和关键代码示例,可以在C语言非阻塞I/O网络编程中构建一套相对完善的异常处理机制,确保分布式系统中节点通信的可靠性。