-
Notifications
You must be signed in to change notification settings - Fork 0
/
dbscan.cpp
307 lines (266 loc) · 10.8 KB
/
dbscan.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#include "dbscan.hpp"
#include "Hit.hpp"
#include <cassert>
#include <limits>
namespace dbscan {
//======================================================================
int
neighbours_sorted(const std::vector<Hit*>& hits, Hit& q, float eps, int minPts)
{
int n = 0;
// Loop over the hits starting from the latest hit, since we will
// ~always be adding a hit at recent times
for (auto hit_it = hits.rbegin(); hit_it != hits.rend(); ++hit_it) {
if ((*hit_it)->time > q.time + eps)
continue;
if ((*hit_it)->time < q.time - eps)
break;
if (q.add_potential_neighbour(*hit_it, eps, minPts))
++n;
}
return n;
}
//======================================================================
bool
Cluster::maybe_add_new_hit(Hit* new_hit, float eps, int minPts)
{
// Should we add this hit?
bool do_add = false;
// Hits earlier than new_hit time minus `eps` can't possibly be
// neighbours, so start the search there in the sorted list of hits in this
// cluster
auto begin_it = std::lower_bound(
hits.begin(), hits.end(), new_hit->time - eps, time_comp_lower);
for (auto it = begin_it; it != hits.end(); ++it) {
Hit* h = *it;
if (h->add_potential_neighbour(new_hit, eps, minPts)) {
do_add = true;
if (h->neighbours.size() + 1 >= minPts) {
h->connectedness = Connectedness::kCore;
} else {
h->connectedness = Connectedness::kEdge;
}
}
} // end loop over hits in cluster
if (do_add) {
add_hit(new_hit);
}
return do_add;
}
//======================================================================
void
Cluster::add_hit(Hit* h)
{
hits.insert(h);
h->cluster = index;
latest_time = std::max(latest_time, h->time);
if (h->connectedness == Connectedness::kCore &&
(!latest_core_point || h->time > latest_core_point->time)) {
latest_core_point = h;
}
}
//======================================================================
void
Cluster::steal_hits(Cluster& other)
{
// TODO: it might be faster to do some sort of explicit "merge" of the hits,
// eg:
//
// this->hits.insert(other hits); // Inserts at end
// std::inplace_merge(...)
//
// This might save some reallocations of the vector
for (auto h : other.hits) {
assert(h);
add_hit(h);
}
other.hits.clear();
other.completeness = Completeness::kComplete;
}
//======================================================================
void
IncrementalDBSCAN::cluster_reachable(Hit* seed_hit, Cluster& cluster)
{
// Loop over all neighbours (and the neighbours of core points, and so on)
std::vector<Hit*> seedSet(seed_hit->neighbours.begin(),
seed_hit->neighbours.end());
while (!seedSet.empty()) {
Hit* q = seedSet.back();
seedSet.pop_back();
// Change noise to a border point
if (q->connectedness == Connectedness::kNoise) {
cluster.add_hit(q);
}
if (q->cluster != kUndefined) {
continue;
}
cluster.add_hit(q);
// If q is a core point, add its neighbours to the search list
if (q->neighbours.size() + 1 >= m_minPts) {
q->connectedness = Connectedness::kCore;
seedSet.insert(
seedSet.end(), q->neighbours.begin(), q->neighbours.end());
}
}
}
//======================================================================
void
IncrementalDBSCAN::add_point(float time, float channel, std::vector<Cluster>* completed_clusters)
{
Hit& new_hit=m_hit_pool[m_pool_end];
new_hit.reset(time, channel);
++m_pool_end;
if(m_pool_end==m_hit_pool.size()) m_pool_end=0;
add_hit(&new_hit, completed_clusters);
}
//======================================================================
void
IncrementalDBSCAN::add_hit(Hit* new_hit, std::vector<Cluster>* completed_clusters)
{
// TODO: this should be a member variable, not a static, in case
// there are multiple IncrementalDBSCAN instances
static int next_cluster_index = 0;
m_hits.push_back(new_hit);
m_latest_time = new_hit->time;
// All the clusters that this hit neighboured. If there are
// multiple clusters neighbouring this hit, we'll merge them at
// the end
std::set<int> clusters_neighbouring_hit;
// Find all the hit's neighbours
neighbours_sorted(m_hits, *new_hit, m_eps, m_minPts);
for (auto neighbour : new_hit->neighbours) {
if (neighbour->cluster != kUndefined && neighbour->cluster != kNoise &&
neighbour->neighbours.size() + 1 >= m_minPts) {
// This neighbour is a core point in a cluster. Add the cluster to the list of
// clusters that will contain this hit
clusters_neighbouring_hit.insert(neighbour->cluster);
}
}
if (clusters_neighbouring_hit.empty()) {
// This hit didn't match any existing cluster. See if we can
// make a new cluster out of it. Otherwise mark it as noise
if (new_hit->neighbours.size() + 1 >= m_minPts) {
// std::cout << "New cluster starting at hit time " << new_hit->time << " with " << new_hit->neighbours.size() << " neighbours" << std::endl;
new_hit->connectedness = Connectedness::kCore;
auto new_it = m_clusters.emplace_hint(
m_clusters.end(), next_cluster_index, next_cluster_index);
Cluster& new_cluster = new_it->second;
new_cluster.completeness = Completeness::kIncomplete;
new_cluster.add_hit(new_hit);
next_cluster_index++;
cluster_reachable(new_hit, new_cluster);
}
else{
// std::cout << "New hit time " << new_hit->time << " with " << new_hit->neighbours.size() << " neighbours is noise" << std::endl;
}
} else {
// This hit neighboured at least one cluster. Add the hit and
// its noise neighbours to the first cluster in the list, then
// merge the rest of the clusters into it
auto index_it = clusters_neighbouring_hit.begin();
auto it = m_clusters.find(*index_it);
assert(it != m_clusters.end());
Cluster& cluster = it->second;
// std::cout << "Adding hit time " << new_hit->time << " with " << new_hit->neighbours.size() << " neighbours to existing cluster" << std::endl;
cluster.add_hit(new_hit);
// TODO: this seems wrong: we're adding this hit's neighbours
// to the cluster even if this hit isn't a core point, but if
// I wrap the whole thing in "if(new_hit is core)" then the
// results differ from classic DBSCAN
for (auto q : new_hit->neighbours) {
if (q->cluster == kUndefined || q->cluster == kNoise) {
// std::cout << " Adding hit time " << q->time << " to existing cluster" << std::endl;
cluster.add_hit(q);
}
// If the neighbouring hit q has exactly m_minPts
// neighbours, it must have become a core point by the
// addition of new_hit. Add q's neighbours to the cluster
if(q->neighbours.size() + 1 == m_minPts){
for (auto r : q->neighbours) {
cluster.add_hit(r);
}
}
}
++index_it;
for (; index_it != clusters_neighbouring_hit.end(); ++index_it) {
auto other_it = m_clusters.find(*index_it);
assert(other_it != m_clusters.end());
Cluster& other_cluster = other_it->second;
cluster.steal_hits(other_cluster);
}
}
// Last case: new_hit and its neighbour are both noise, but the
// addition of new_hit makes the neighbour a core point. So we
// start a new cluster at the neighbour, and walk out from there
for (auto& neighbour : new_hit->neighbours) {
if(neighbour->neighbours.size() + 1 >= m_minPts){
// std::cout << "new_hit's neighbour at " << neighbour->time << " has " << neighbour->neighbours.size() << " neighbours, so is core" << std::endl;
if(neighbour->cluster==kNoise || neighbour->cluster==kUndefined){
if(new_hit->cluster==kNoise || new_hit->cluster==kUndefined){
auto new_it = m_clusters.emplace_hint(
m_clusters.end(), next_cluster_index, next_cluster_index);
Cluster& new_cluster = new_it->second;
new_cluster.completeness = Completeness::kIncomplete;
new_cluster.add_hit(neighbour);
next_cluster_index++;
cluster_reachable(neighbour, new_cluster);
}
}
}
else {
// std::cout << "new_hit's neighbour at " << neighbour->time << " has " << neighbour->neighbours.size() << " neighbours, so is NOT core" << std::endl;
}
}
// Delete any completed clusters from the list. Put them in the
// `completed_clusters` vector, if that vector was passed
auto clust_it = m_clusters.begin();
while (clust_it != m_clusters.end()) {
Cluster& cluster = clust_it->second;
if (cluster.latest_time < m_latest_time - m_eps) {
cluster.completeness = Completeness::kComplete;
}
if (cluster.completeness == Completeness::kComplete) {
if(completed_clusters){
// TODO: room for std::move here?
//
// Clusters that got merged into another cluster had
// their hits cleared, and were set kComplete, by
// steal_hits
if(cluster.hits.size()!=0){
completed_clusters->push_back(cluster);
}
}
clust_it = m_clusters.erase(clust_it);
continue;
} else {
++clust_it;
}
}
}
void
IncrementalDBSCAN::trim_hits()
{
// Find the earliest time of a hit in any cluster in the list (active or
// not)
float earliest_time = std::numeric_limits<float>::max();
for (auto& cluster : m_clusters) {
earliest_time =
std::min(earliest_time, (*cluster.second.hits.begin())->time);
}
// If there were no clusters, set the earliest_time to the latest time
// (otherwise it would still be FLOAT_MAX)
if (m_clusters.empty()) {
earliest_time = m_latest_time;
}
auto last_it = std::lower_bound(m_hits.begin(),
m_hits.end(),
earliest_time - 10 * m_eps,
time_comp_lower);
m_hits.erase(m_hits.begin(), last_it);
}
}
// Local Variables:
// mode: c++
// c-basic-offset: 4
// c-file-style: "linux"
// End: