面试题：C语言Linux信号量同步优化

可能原因分析

竞争激烈：多个线程频繁竞争信号量，导致大量线程在信号量上等待，造成CPU上下文切换开销增大。例如，线程A获取信号量后，线程B、C、D等都在等待，当线程A释放信号量时，操作系统需要从众多等待线程中选择一个，频繁的这种操作会消耗性能。
细粒度锁：如果信号量保护的是非常小的临界区，每次获取和释放信号量的开销可能比临界区执行时间还长。比如临界区只是简单的变量赋值操作，但获取和释放信号量涉及系统调用等开销较大的操作。
信号量操作的系统调用开销：Linux信号量的获取和释放通常涉及系统调用，频繁的系统调用会增加内核态和用户态切换的开销。每次调用sem_wait和sem_post时，都需要从用户态陷入内核态进行相关操作。

优化方法及关键代码片段

减少信号量竞争：
- 使用读写锁代替信号量：如果临界区主要是读操作，可以使用读写锁（pthread_rwlock_t）。读操作可以并发执行，只有写操作需要独占访问。

#include <pthread.h>
#include <stdio.h>

pthread_rwlock_t rwlock;

void* read_function(void* arg) {
    pthread_rwlock_rdlock(&rwlock);
    // 读操作临界区
    printf("Thread %ld is reading\n", pthread_self());
    pthread_rwlock_unlock(&rwlock);
    return NULL;
}

void* write_function(void* arg) {
    pthread_rwlock_wrlock(&rwlock);
    // 写操作临界区
    printf("Thread %ld is writing\n", pthread_self());
    pthread_rwlock_unlock(&rwlock);
    return NULL;
}

int main() {
    pthread_rwlock_init(&rwlock, NULL);
    pthread_t read_threads[10], write_threads[5];
    for(int i = 0; i < 10; i++) {
        pthread_create(&read_threads[i], NULL, read_function, NULL);
    }
    for(int i = 0; i < 5; i++) {
        pthread_create(&write_threads[i], NULL, write_function, NULL);
    }
    for(int i = 0; i < 10; i++) {
        pthread_join(read_threads[i], NULL);
    }
    for(int i = 0; i < 5; i++) {
        pthread_join(write_threads[i], NULL);
    }
    pthread_rwlock_destroy(&rwlock);
    return 0;
}

- **信号量分段**：将大的临界区分割成多个小的临界区，每个临界区使用独立的信号量。例如，假设有一个涉及多个数据结构的大临界区，可以为每个数据结构分配一个信号量。

#include <semaphore.h>
#include <pthread.h>
#include <stdio.h>

sem_t sem1, sem2;

void* thread_function(void* arg) {
    sem_wait(&sem1);
    // 操作数据结构1的临界区
    printf("Thread %ld is accessing data structure 1\n", pthread_self());
    sem_post(&sem1);

    sem_wait(&sem2);
    // 操作数据结构2的临界区
    printf("Thread %ld is accessing data structure 2\n", pthread_self());
    sem_post(&sem2);

    return NULL;
}

int main() {
    sem_init(&sem1, 0, 1);
    sem_init(&sem2, 0, 1);
    pthread_t threads[10];
    for(int i = 0; i < 10; i++) {
        pthread_create(&threads[i], NULL, thread_function, NULL);
    }
    for(int i = 0; i < 10; i++) {
        pthread_join(threads[i], NULL);
    }
    sem_destroy(&sem1);
    sem_destroy(&sem2);
    return 0;
}

减少系统调用开销：
- 使用用户态信号量：在用户空间实现简单的信号量机制，避免频繁系统调用。例如，使用原子操作和条件变量来模拟信号量。

#include <pthread.h>
#include <stdio.h>
#include <stdatomic.h>

atomic_int sem_value = ATOMIC_VAR_INIT(1);
pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

void sem_wait() {
    pthread_mutex_lock(&mutex);
    while(atomic_load(&sem_value) <= 0) {
        pthread_cond_wait(&cond, &mutex);
    }
    atomic_fetch_sub(&sem_value, 1);
    pthread_mutex_unlock(&mutex);
}

void sem_post() {
    pthread_mutex_lock(&mutex);
    atomic_fetch_add(&sem_value, 1);
    pthread_cond_signal(&cond);
    pthread_mutex_unlock(&mutex);
}

void* thread_function(void* arg) {
    sem_wait();
    // 临界区
    printf("Thread %ld is in critical section\n", pthread_self());
    sem_post();
    return NULL;
}

int main() {
    pthread_t threads[10];
    for(int i = 0; i < 10; i++) {
        pthread_create(&threads[i], NULL, thread_function, NULL);
    }
    for(int i = 0; i < 10; i++) {
        pthread_join(threads[i], NULL);
    }
    pthread_mutex_destroy(&mutex);
    pthread_cond_destroy(&cond);
    return 0;
}

面试题：C语言Linux信号量同步优化

知识考点

面试题答案

可能原因分析

优化方法及关键代码片段