面试题：C++ 基于广度优先搜索实现复杂网络的社区发现

#include <iostream>
#include <vector>
#include <unordered_map>
#include <queue>
#include <algorithm>

// 定义边的结构体
struct Edge {
    std::string src;
    std::string dst;
    Edge(const std::string& s, const std::string& d) : src(s), dst(d) {}
};

// 定义图的数据结构
class Graph {
public:
    void addEdge(const std::string& src, const std::string& dst) {
        edges.emplace_back(src, dst);
        adjList[src].push_back(dst);
        adjList[dst].push_back(src);
        if (!nodeDegree.count(src)) {
            nodeDegree[src] = 0;
        }
        if (!nodeDegree.count(dst)) {
            nodeDegree[dst] = 0;
        }
        nodeDegree[src]++;
        nodeDegree[dst]++;
    }

    std::unordered_map<std::string, std::vector<std::string>> adjList;
    std::vector<Edge> edges;
    std::unordered_map<std::string, int> nodeDegree;
};

// 广度优先搜索
std::vector<std::string> bfs(const Graph& graph, const std::string& start) {
    std::unordered_map<std::string, bool> visited;
    std::vector<std::string> community;
    std::queue<std::string> q;

    q.push(start);
    visited[start] = true;

    while (!q.empty()) {
        std::string current = q.front();
        q.pop();
        community.push_back(current);

        for (const std::string& neighbor : graph.adjList.at(current)) {
            if (!visited[neighbor]) {
                visited[neighbor] = true;
                q.push(neighbor);
            }
        }
    }
    return community;
}

// 计算模块度
double calculateModularity(const Graph& graph, const std::vector<std::vector<std::string>>& communities) {
    double m = graph.edges.size();
    double Q = 0.0;

    for (const auto& community : communities) {
        double ki_sum = 0.0;
        for (const std::string& node : community) {
            ki_sum += graph.nodeDegree.at(node);
        }
        for (const std::string& node1 : community) {
            for (const std::string& node2 : community) {
                bool hasEdge = std::find(graph.adjList.at(node1).begin(), graph.adjList.at(node1).end(), node2) != graph.adjList.at(node1).end();
                Q += (hasEdge? 1 : 0) - (graph.nodeDegree.at(node1) * graph.nodeDegree.at(node2) / (2.0 * m));
            }
        }
    }
    Q /= (2.0 * m);
    return Q;
}

// 社区发现算法
std::vector<std::vector<std::string>> communityDetection(Graph& graph) {
    std::unordered_map<std::string, bool> visited;
    std::vector<std::vector<std::string>> communities;

    for (const auto& entry : graph.adjList) {
        if (!visited[entry.first]) {
            std::vector<std::string> community = bfs(graph, entry.first);
            communities.push_back(community);
            for (const std::string& node : community) {
                visited[node] = true;
            }
        }
    }

    // 模块度优化（简单示例，实际可采用更复杂策略）
    double bestQ = calculateModularity(graph, communities);
    bool improved = true;
    while (improved) {
        improved = false;
        for (size_t i = 0; i < communities.size(); ++i) {
            for (size_t j = i + 1; j < communities.size(); ++j) {
                std::vector<std::vector<std::string>> newCommunities = communities;
                newCommunities[i].insert(newCommunities[i].end(), newCommunities[j].begin(), newCommunities[j].end());
                newCommunities.erase(newCommunities.begin() + j);
                double newQ = calculateModularity(graph, newCommunities);
                if (newQ > bestQ) {
                    bestQ = newQ;
                    communities = newCommunities;
                    improved = true;
                    break;
                }
            }
            if (improved) break;
        }
    }

    return communities;
}

int main() {
    Graph graph;
    // 假设数据读入
    graph.addEdge("A", "B");
    graph.addEdge("B", "C");
    graph.addEdge("D", "E");

    std::vector<std::vector<std::string>> communities = communityDetection(graph);

    for (const auto& community : communities) {
        std::cout << "Community: ";
        for (const std::string& node : community) {
            std::cout << node << " ";
        }
        std::cout << std::endl;
    }

    return 0;
}

数据结构设计：
- Edge结构体用于存储边的信息，包含源节点和目标节点。
- Graph类用于存储整个图，包含邻接表adjList，边的集合edges以及每个节点的度数nodeDegree。
广度优先搜索（BFS）：
- bfs函数实现了广度优先搜索，从给定的起始节点开始，遍历图并返回包含所有可达节点的社区。
模块度计算：
- calculateModularity函数计算给定社区划分下的模块度。模块度的计算公式为：$Q = \frac{1}{2m}\sum_{ij} \left( A_{ij} - \frac{k_ik_j}{2m} \right) \delta(c_i, c_j)$，其中$A_{ij}$是节点$i$和$j$之间是否有边，$k_i$是节点$i$的度数，$m$是图中边的总数，$\delta(c_i, c_j)$当节点$i$和$j$属于同一社区时为1，否则为0。
社区发现算法：
- communityDetection函数首先通过BFS对图进行初步的社区划分。
- 然后通过简单的合并策略对模块度进行优化，尝试合并不同的社区，选择模块度增加的合并操作，直到模块度不再增加。
性能考虑：
- 在大规模网络中，BFS的时间复杂度为$O(V + E)$，其中$V$是节点数，$E$是边数。
- 模块度优化部分的复杂度较高，当前实现为$O(C^2)$，$C$是社区的数量，在大规模网络下可以考虑更高效的模块度优化算法，如 Louvain 算法等，其复杂度相对较低，更适合大规模网络。

以上代码只是一个基本的实现框架，实际应用中可能需要根据具体数据集的特点进行进一步优化。

面试题：C++ 基于广度优先搜索实现复杂网络的社区发现

知识考点

面试题答案