-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathCommunities.h
More file actions
257 lines (196 loc) · 6.6 KB
/
Communities.h
File metadata and controls
257 lines (196 loc) · 6.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#ifndef COMMUNITIES_H
#define COMMUNITIES_H
#include <vector>
#include <set>
#include <algorithm>
#include <utility>
#include <string>
using std::vector;
using std::unique;
using std::sort;
using std::max;
using std::min;
using std::set;
using std::pair;
using std::string;
class Communities;
class Graph;
//一个社团 包含多个结点
struct Community
{
int parent; //等级结构中记录 在上一层中所属的社团编号
//社团内的所有结点
vector<int> nodes;
//社团内结点编号范围
int max_node_id = -1;
int min_node_id = INT_MAX;
//添加一个节点 不排序 更新范围
void add(int n) {
max_node_id = max(max_node_id, n);
min_node_id = min(min_node_id, n);
nodes.push_back(n);
}
//排序和去重
void sort() {
std::sort(nodes.begin(), nodes.end());
nodes.erase(unique(nodes.begin(), nodes.end()), nodes.end());
}
void clear()
{
nodes.clear();
max_node_id = -1;
min_node_id = INT_MAX;
}
//返回社团中结点个数
int size() const { return nodes.size(); }
//pair的两个值分别为 返回值,下标
//在truth中,找和本社团匹配最好的,计算指标 返回指标值和 匹配最好的在truth中的下标
pair<double, int> JaccardPrecision(Communities & truth, FILE * fp);
pair<double, int> Precision(Communities & truth, FILE * fp);
//两个社团大小比较,用于排序
//结点较多的社团排名靠前 结点数相同时,第一个结点编号小的靠前
bool operator<(Community & e) {
//return nodes.size() > e.nodes.size();
if (nodes.size() > e.nodes.size())
return true;
else if (nodes.size() < e.nodes.size())
return false;
else
return nodes[0] < e.nodes[0];
}
bool isSubsetOf(Community & c);
};
//一种划分,包含多个社团
class Communities
{
public:
//所有的社团
vector< Community > comms;
//modularity值,在调用calcModularity时更新
double Q = 0;
vector<double> community_Q;
vector<double> comm_inter_edge_num;
vector<double> comm_out_edge_num;
double getCommQ(int i)
{
if (community_Q.empty())
{
return -999;
}
return community_Q[i] / (double)comms[i].size();
}
//转换cid到comms
//cid[i]表示结点i所在社团
void getCommsByCid(vector< Community > & comms, const vector<int> &cid);
//最小和最大结点编号
int max_node_id = -1;
int min_node_id = INT_MAX;
//有多少层等级结构
int layer = 0;
//等级结构
//layers[0]是根
//layers[layers.size()-1]是叶子
vector<vector<Community> > layers;
private:
//计算时的临时变量
int nmi_max_node;
bool nmi_half_more_nodes_positive;
public:
//设置最大节点编号
void setMaxNodeid(int max_nodeid) { max_node_id = max_nodeid; }
//清空
void clear()
{
comms.clear();
layers.clear();
max_node_id = -1;
min_node_id = INT_MAX;
layer = 0;
Q = 0;
community_Q.clear();
}
//添加一个社团 不排序 更新编号范围
void addCommunity(Community c)
{
min_node_id = min(min_node_id, c.min_node_id);
max_node_id = max(max_node_id, c.max_node_id);
comms.push_back(c);
}
//添加一个社团 不排序 更新编号范围
void addCommunity(Community c, int leve)
{
min_node_id = min(min_node_id, c.min_node_id);
max_node_id = max(max_node_id, c.max_node_id);
layers[leve].push_back(c);
}
//把另一种划分的所有社团加到后面 不排序 更新编号范围
void addCommunities(Communities & cs)
{
for (size_t i = 0; i < cs.comms.size(); ++i)
{
addCommunity(cs.comms[i]);
}
}
//去掉结点数小于size的社团
void removeSmallComm(vector< Community > & comm, int size);
//去掉结点数大于size的社团
void removeBigComm(vector< Community > & comm, int size);
//打印社团信息 show_detail表示显示每个社团结点个数 show_nodes表示显示每个社团内的所有结点
string print(bool show_detail = false, bool show_nodes = false);
//社团平均大小
double averageSize();
bool save(const char * fn);
bool save(string fn);
int size() const { return comms.size(); }
Communities merge(Communities & other);
//不重复结点个数
int nodesNum();
//所有社团结点数之和
int sizeOfCommsSum() const;
//从文件加载社团
//大于2个结点的社团才会被保留
//普通文件加载 每行一个社团
void load(string fn) { load(fn.c_str()); }
void load(const char * fn);
void loadInfomap(const char * fn);
void loadInfomap0135(const char * fn);
void loadLinkComm(const char * fn);
void loadOSLOM2(const char * fn);
void loadGCE(const char * fn);
void loadDemon(const char * fn);
void loadCFinder(const char * fn);
void loadMod(const char * fn, int level = -1);
void loadCid(vector< Community > & comms, const char * fn);
vector<vector<int> > getCommsOfEveryid(int max_id = 0) const;
static vector<int> intersection(vector<int> & a, vector<int> & b);
static vector<int> difference(vector<int> & a, vector<int> & b);
static vector<int> setunion(vector<int> & a, vector<int> & b);
friend class Graph;
double calcModularity(const Graph & g); //计算模块度
double calcNMI(Communities & cs, string outdir = "") ;
double H(Community & X) const;
double H(Communities & X);
double H_Xi_joint_Yj(Community & c1, Community & c2);
double H_Xi_given_Yj(Community & Xi, Community & Yj);
double H_Xi_given_Y(Community & c, Communities & cs);
double H_Xi_given_Y_norm(Community & c, Communities & cs);
double H_X_given_Y_norm(Communities & X, Communities & Y); // = min H_c_c
double h(double w, double n) const;
double h(double x) const;
static double Precision_unweighted(Communities & Detected, Communities & truth, FILE * fp = 0);
static double Precision_weighted(Communities & Detected, Communities & truth);
static double F1_unweighted(Communities & Detected, Communities & truth);
static double F1_weighted(Communities & Detected, Communities & truth);
static double Jaccard_Precision_unweighted(Communities & Detected, Communities & truth, FILE * fp = 0);
static double Jaccard_Precision_weighted(Communities & Detected, Communities & truth);
static double Jaccard_F1_unweighted(Communities & Detected, Communities & truth);
static double Jaccard_F1_weighted(Communities & Detected, Communities & truth);
static double f1(Community & c1, Community & c2);
static pair<double, int> f1(Community &c1, Communities &cs, FILE * fp);
static pair<double, int> p(Community &c1, Communities &cs);
static double f1(Communities &truth, Communities &detected, FILE * fp = 0);
static double wf1(Communities &truth, Communities &detected);
static double findSimilar(Community &c1, Community &c2);
static pair<double, int> findSimilar(Community &c1, Communities &cs);
};
#endif