Skip to content

Commit fb8c547

Browse files
committed
important bugfix in clustering
1 parent 74c8d4d commit fb8c547

File tree

5 files changed

+34
-72
lines changed

5 files changed

+34
-72
lines changed
-1.52 KB
Binary file not shown.

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ By default, Fred will automatically determine the number of threads to use. If y
3838

3939
### Curve Simplification
4040

41+
All simplifications are vertex-restricted!
42+
4143
#### weak minimum error simplification
4244
- graph approach from [**Polygonal Approximations of a Curve — Formulations and Algorithms**](https://www.sciencedirect.com/science/article/pii/B9780444704672500114)
4345
- signature: `fred.weak_minimum_error_simplification(fred.Curve, int complexity)`
@@ -65,8 +67,8 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di
6567
- `k`: number of centers
6668
- `l`: maximum complexity of the centers, only used when center_domain is default value
6769
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
68-
- `center_domain`: possible centers, defaults to empty `fred.Curves()`, in this case the input is simplified and used as center domain
6970
- `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true
71+
- `fast_simplification`: determines whether to use the weak minimum error simplification or the faster approximate weak minimum error simplification, defaults to false
7072
- returns: `fred.Clustering_Result` with mebers
7173
- `value`: objective value
7274
- `time`: running-time
@@ -78,7 +80,7 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di
7880
- `k`: number of centers
7981
- `l`: maximum complexity of the centers, only used when center_domain is default value
8082
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
81-
- `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain
83+
- `fast_simplification`: determines whether to use the weak minimum error simplification or the faster approximate weak minimum error simplification, defaults to false
8284
- returns: `fred.Clustering_Result` with mebers
8385
- `value`: objective value
8486
- `time`: running-time

include/clustering.hpp

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -128,41 +128,35 @@ struct Clustering_Result {
128128
}
129129
};
130130

131-
Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool local_search = false, const Curves &center_domain = Curves(), const bool random_start_center = true) {
132-
131+
Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool local_search = false, const bool random_start_center = true, const bool fast_simplification = false) {
133132
const auto start = std::chrono::high_resolution_clock::now();
134133
Clustering_Result result;
135134

136135
if (in.empty()) return result;
137136

138137
std::vector<curve_number_t> centers;
139-
Curves &simplified_in = const_cast<Curves&>(center_domain);
140-
bool self_simplify = false;
138+
Curves simplified_in(in.number(), ell, in.dimensions());
141139

142-
if (center_domain.empty()) {
143-
self_simplify = true;
144-
Curves simplified_in_self(in.number(), ell, in.dimensions());
145-
simplified_in = simplified_in_self;
146-
}
147-
140+
auto simplify = [&](const curve_number_t i) {
141+
if (fast_simplification) {
142+
auto simplified_curve = Simplification::approximate_weak_minimum_error_simplification(const_cast<Curve&>(in[i]), ell);
143+
simplified_curve.set_name("Simplification of " + in[i].get_name());
144+
return simplified_curve;
145+
} else {
146+
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[i]));
147+
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
148+
simplified_curve.set_name("Simplification of " + in[i].get_name());
149+
return simplified_curve;
150+
}
151+
};
152+
148153
if (random_start_center) {
149154
Random::Uniform_Random_Generator<double> ugen;
150155
const curve_number_t r = std::floor(simplified_in.size() * ugen.get());
151-
if (self_simplify) {
152-
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[r]));
153-
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
154-
simplified_curve.set_name("Simplification of " + in[r].get_name());
155-
simplified_in[r] = simplified_curve;
156-
}
156+
simplified_in[r] = simplify(r);
157157
centers.push_back(r);
158-
159158
} else {
160-
if (self_simplify) {
161-
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[0]));
162-
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
163-
simplified_curve.set_name("Simplification of " + in[0].get_name());
164-
simplified_in[0] = simplified_curve;
165-
}
159+
simplified_in[0] = simplify(0);
166160
centers.push_back(0);
167161
}
168162

@@ -195,11 +189,8 @@ Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t
195189
std::cout << "found center no. " << i+1 << std::endl;
196190
#endif
197191

198-
if (self_simplify and simplified_in[curr_maxcurve].empty()) {
199-
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[curr_maxcurve]));
200-
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
201-
simplified_curve.set_name("Simplification of " + in[curr_maxcurve].get_name());
202-
simplified_in[curr_maxcurve] = simplified_curve;
192+
if (simplified_in[curr_maxcurve].empty()) {
193+
simplified_in[curr_maxcurve] = simplify(curr_maxcurve);
203194
}
204195
centers.push_back(curr_maxcurve);
205196
}
@@ -229,11 +220,8 @@ Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t
229220
if (std::find(curr_centers.begin(), curr_centers.end(), j) != curr_centers.end()) continue;
230221

231222
// swap
232-
if (self_simplify and simplified_in[j].empty()) {
233-
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[j]));
234-
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
235-
simplified_curve.set_name("Simplification of " + in[j].get_name());
236-
simplified_in[j] = simplified_curve;
223+
if (simplified_in[j].empty()) {
224+
simplified_in[j] = simplify(j);
237225
}
238226
curr_centers[i] = j;
239227
// new cost
@@ -260,8 +248,12 @@ Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t
260248
return result;
261249
}
262250

263-
Clustering_Result kl_median(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const Curves &center_domain = Curves()) {
264-
return kl_center(num_centers, ell, in, distances, true, center_domain, false);
251+
Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool random_start_center = true, const bool fast_simplification = false) {
252+
return kl_cluster(num_centers, ell, in, distances, false, random_start_center, fast_simplification);
253+
}
254+
255+
Clustering_Result kl_median(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool fast_simplification = false) {
256+
return kl_cluster(num_centers, ell, in, distances, true, fast_simplification);
265257
}
266258

267259
Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in, const double epsilon, const Curves &center_domain = Curves()) {

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def build_extension(self, ext):
7474

7575
setup(
7676
name='Fred-Frechet',
77-
version='1.7.4',
77+
version='1.7.5',
7878
author='Dennis Rohde',
7979
author_email='[email protected]',
8080
description='A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.',

src/fred_python_wrapper.cpp

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -45,38 +45,6 @@ bool get_frechet_rounding() {
4545
return fc::round;
4646
}
4747

48-
Clustering::Clustering_Result klcenter(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Clustering::Distance_Matrix &distances, const Curves &center_domain = Curves(), const bool random_start_center = true) {
49-
auto result = Clustering::kl_center(num_centers, ell, in, distances, false, center_domain, random_start_center);
50-
return result;
51-
}
52-
53-
Clustering::Clustering_Result klmedian(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Clustering::Distance_Matrix distances, const Curves &center_domain = Curves()) {
54-
55-
auto result = Clustering::kl_median(num_centers, ell, in, distances, center_domain);
56-
57-
return result;
58-
}
59-
60-
// Clustering::Clustering_Result onemedian_sampling(const curve_size_t ell, Curves &in, const double epsilon, const bool with_assignment = false, const Curves &center_domain = Curves()) {
61-
//
62-
// auto result = Clustering::one_median_sampling(ell, in, epsilon, with_assignment);
63-
//
64-
// return result;
65-
// }
66-
//
67-
// Clustering::Clustering_Result onemedian_exhaustive(const curve_size_t ell, Curves &in, const bool with_assignment = false, const Curves &center_domain = Curves()) {
68-
//
69-
// auto result = Clustering::one_median_exhaustive(ell, in, with_assignment);
70-
//
71-
// return result;
72-
// }
73-
//
74-
//
75-
// Coreset::Onemedian_Coreset onemedian_coreset(const Curves &in, const curve_size_t ell, const double epsilon, const double constant = 1) {
76-
// return Coreset::Onemedian_Coreset(ell, in, epsilon, constant);
77-
// }
78-
//
79-
8048
Curve weak_minimum_error_simplification(const Curve &curve, const curve_size_t l) {
8149
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(curve));
8250
auto scurve = graph.weak_minimum_error_simplification(l);
@@ -224,8 +192,8 @@ PYBIND11_MODULE(backend, m) {
224192

225193
m.def("dimension_reduction", &JLTransform::transform_naive, py::arg("in") = Curves(), py::arg("epsilon") = 0.5, py::arg("empirical_constant") = true);
226194

227-
m.def("discrete_klcenter", &klcenter, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("center_domain") = Curves(), py::arg("random_start_center") = true);
228-
m.def("discrete_klmedian", &klmedian, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("center_domain") = Curves());
195+
m.def("discrete_klcenter", &Clustering::kl_center, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("random_start_center") = true, py::arg("fast_simplification") = false);
196+
m.def("discrete_klmedian", &Clustering::kl_median, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("fast_simplification") = false);
229197

230198
// these are experimental
231199
//m.def("two_two_dtw_one_two_median", &Clustering::two_two_dtw_one_two_median);

0 commit comments

Comments
 (0)