K-Nearest Neighbor Graph Testing Library
C++ Python library that is able to import exisiting NN-structures; Implements Property Testing Algorithm that rejects with high probability if queries to given structure are epsilon-far from giving a K-Nearest Neighbor Graph
knn_tester.hpp
1 /*
2 Copyright 2018 Dennis Rohde
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 
6 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 
8 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 */
10 
11 #pragma once
12 
13 #include <cmath>
14 #include <limits>
15 
16 #include <boost/chrono/include.hpp>
17 
18 #include "knn_graph.hpp"
19 #include "random.hpp"
20 #include "oracle.hpp"
21 
23 public:
24  bool decision;
25  double total_time;
26  double query_time;
27 };
28 
29 template <typename V = double>
30 class KNN_Tester {
32 
33 protected:
34  bool auto_c1;
35 
36 public:
42  double c1 = 1;
43 
49  double c2 = 1;
50 
51  KNN_Tester(const bool auto_c1 = true) : auto_c1{auto_c1} {}
52 
60  static double c1_approximate(const KNN_Graph<V> &graph) {
61  auto delta = graph.dimension();
62  return std::pow(2, 0.401 * delta * (1 + 2.85 * delta / std::pow(delta, 1.4)));
63  }
64 
72  virtual Tester_Result test(const KNN_Graph<V> &graph, const double d, const double epsilon = 0.001) {
73  if (auto_c1) this->c1 = c1_approximate(graph);
74  const auto delta = graph.dimension();
75  const auto k = graph.get_k();
76  const auto n = graph.number_vertices();
77  const auto s = std::ceil(100 * k * sqrt(n) / epsilon * c2);
78  const auto t = std::ceil(log(10) * c1 * k * sqrt(n));
79 
81 
82  const auto S = urandom_gen.get(s);
83  const auto T = urandom_gen.get(t);
84 
85  std::cout << "|S| = " << s << std::endl;
86  std::cout << "|T| = " << t << std::endl;
87 
88  bool wrongly_connected_found = false;
89  double distn, distw;
90 
91  #pragma omp parallel for shared(wrongly_connected_found, distn, distw)
92  for (unsigned long long i = 0; i < S.size(); ++i) {
93  if (wrongly_connected_found) continue;
94  const unsigned long long v = floor(S[i] * n);
95  const auto v_value = graph.get_vertex(v);
96  if (graph.number_neighbors(v) > 100 * k * d / epsilon) continue;
97  const auto &neighbors = graph.get_edges()[v];
98  V distN = 0;
99  for (const auto &neighbor: neighbors) {
100  auto dist = KNN_Graph<V>::euclidean_distance(v_value, graph.get_vertex(neighbor));
101  if (dist > distN) {
102  distN = dist;
103  }
104  }
105  for (unsigned long long j = 0; j < T.size(); ++j) {
106  if (wrongly_connected_found) {
107  continue;
108  }
109  unsigned long long w = floor(T[j] * n);
110  auto dist = KNN_Graph<V>::euclidean_distance(v_value, graph.get_vertex(w));
111  if (v != w and distN - dist > std::numeric_limits<V>::epsilon()) {
112  auto is_neighbor = false;
113  for (const auto &neighbor: neighbors) {
114  if (w == neighbor) {
115  is_neighbor = true;
116  break;
117  }
118  }
119  if (not is_neighbor) {
120  #pragma omp critical
121  {
122  if (not wrongly_connected_found) {
123  wrongly_connected_found = true;
124  distn = distN;
125  distw = dist;
126  }
127  }
128  }
129  }
130  }
131  }
132  Tester_Result result;
133  if (wrongly_connected_found) {
134  std::cout << "Reject!" << std::endl;
135  std::cout << distw << " < " << distn << std::endl;
136  result.decision = false;
137  } else {
138  std::cout << "Accept!" << std::endl;
139  result.decision = true;
140  }
141  return result;
142  }
143 
144  inline auto get_auto_c1() const {
145  return auto_c1;
146  }
147 
148  inline void set_auto_c1(const bool auto_c1) {
149  this->auto_c1 = auto_c1;
150  }
151 };
152 
153 template <typename V = double>
154 class KNN_Tester_Oracle : public KNN_Tester<V> {
155  Query_Oracle<V> Oracle;
156 
157 public:
158  KNN_Tester_Oracle(const Query_Oracle<V> &oracle) : Oracle{std::move(oracle)} {}
159 
167  Tester_Result test(const KNN_Graph<V> &graph, const double d, const double epsilon = 0.001) {
168  auto start = boost::chrono::process_real_cpu_clock::now();
169  Oracle.reset_timer();
170  if (this->auto_c1) this->c1 = this->c1_approximate(graph);
171  const auto delta = graph.dimension();
172  const auto k = graph.get_k();
173  const auto n = graph.number_vertices();
174  const auto s = std::ceil(100 * k * sqrt(n) / epsilon * this->c2);
175  const auto t = std::ceil(log(10) * this->c1 * k * sqrt(n));
176 
178 
179  const auto S = urandom_gen.get(s);
180  const auto T = urandom_gen.get(t);
181 
182  std::cout << "|S| = " << s << std::endl;
183  std::cout << "|T| = " << t << std::endl;
184 
185  bool wrongly_connected_found = false;
186  double distn, distw;
187 
188  #pragma omp parallel for shared(wrongly_connected_found, distn, distw)
189  for (unsigned long long i = 0; i < S.size(); ++i) {
190  if (wrongly_connected_found) continue;
191  const unsigned long long v = floor(S[i] * n);
192  const auto v_value = graph.get_vertex(v);
193  Relation<V> neighbors;
194  #pragma omp critical
195  {
196  neighbors = Oracle.query(v);
197  }
198  if (neighbors.size() > 100 * k * d / epsilon) continue;
199  V distN = 0;
200  for (const auto &neighbor: neighbors) {
201  auto dist = KNN_Graph<V>::euclidean_distance(v_value, neighbor);
202  if (dist > distN) {
203  distN = dist;
204  }
205  }
206  for (unsigned long long j = 0; j < T.size(); ++j) {
207  if (wrongly_connected_found) {
208  continue;
209  }
210  unsigned long long w = floor(T[j] * n);
211  auto dist = KNN_Graph<V>::euclidean_distance(v_value, graph.get_vertex(w));
212  if (v != w and distN - dist > std::numeric_limits<V>::epsilon()) {
213  auto is_neighbor = false;
214  for (const auto &neighbor: neighbors) {
215  if (graph.get_vertex(w) == neighbor) {
216  is_neighbor = true;
217  break;
218  }
219  }
220  if (not is_neighbor) {
221  #pragma omp critical
222  {
223  if (not wrongly_connected_found) {
224  wrongly_connected_found = true;
225  distn = distN;
226  distw = dist;
227  }
228  }
229  }
230  }
231  }
232  }
233  auto stop = boost::chrono::process_real_cpu_clock::now();
234  auto total_time = (stop-start).count();
235  auto query_time = Oracle.time();
236 
237  Tester_Result result;
238  result.total_time = total_time / 1000000000.0;
239  result.query_time = query_time / 1000000000.0;
240  if (wrongly_connected_found) {
241  std::cout << "Reject!" << std::endl;
242  std::cout << distw << " < " << distn << std::endl;
243  result.decision = false;
244  } else {
245  std::cout << "Accept!" << std::endl;
246  result.decision = true;
247  }
248  return result;
249  }
250 };
251 
252 template <typename V = double>
253 class KNN_Improver : public KNN_Tester<V> {
254 public:
262  auto improve(KNN_Graph<V> &graph, const double d, const double epsilon = 0.001) {
263  auto result = 0ul;
264  if (this->auto_c1) this->c1 = this->c1_approximate(graph);
265  const auto delta = graph.dimension();
266  const auto k = graph.get_k();
267  const auto n = graph.number_vertices();
268  const auto s = ceil(100 * k * sqrt(n) / epsilon * this->c2);
269  const auto t = ceil(log(10) * this->c1 * k * sqrt(n));
270 
272 
273  const auto S = urandom_gen.get(s);
274  const auto T = urandom_gen.get(t);
275 
276  std::cout << "|S| = " << s << std::endl;
277  std::cout << "|T| = " << t << std::endl;
278 
279  #pragma omp parallel for shared(result)
280  for (unsigned long long i = 0; i < S.size(); ++i) {
281  const unsigned long long v = floor(S[i] * n);
282  const auto v_value = graph.get_vertex(v);
283  if (graph.number_neighbors(v) > 100 * k * d / epsilon) continue;
284  auto &neighbors = graph.get_edges()[v];
285  V distN = 0;
286  unsigned long long furthest = 0;
287  for (const auto &neighbor: neighbors) {
288  auto dist = KNN_Graph<V>::euclidean_distance(v_value, graph.get_vertex(neighbor));
289  if (dist > distN) {
290  distN = dist;
291  furthest = neighbor;
292  }
293  }
294  for (unsigned long long j = 0; j < T.size(); ++j) {
295  unsigned long long w = floor(T[j] * n);
296  auto dist = KNN_Graph<V>::euclidean_distance(v_value, graph.get_vertex(w));
297  if (v != w and distN - dist > std::numeric_limits<V>::epsilon()) {
298  auto is_neighbor = false;
299  for (const auto &neighbor: neighbors) {
300  if (w == neighbor) {
301  is_neighbor = true;
302  break;
303  }
304  }
305  if (not is_neighbor) {
306  #pragma omp critical
307  {
308  graph.get_edges()[v][furthest] = w;
309  ++result;
310  distN = 0;
311  for (const auto &neighbor: neighbors) {
312  auto distF = KNN_Graph<V>::euclidean_distance(v_value, graph.get_vertex(neighbor));
313  if (distF > distN) {
314  distN = distF;
315  furthest = neighbor;
316  }
317  }
318  }
319  }
320  }
321  }
322  }
323  return result;
324  }
325 };
Definition: knn_tester.hpp:154
Definition: knn_graph.hpp:25
Definition: knn_tester.hpp:22
Definition: knn_tester.hpp:253
Definition: relation.hpp:24
auto improve(KNN_Graph< V > &graph, const double d, const double epsilon=0.001)
Definition: knn_tester.hpp:262
static double c1_approximate(const KNN_Graph< V > &graph)
Definition: knn_tester.hpp:60
Definition: random.hpp:18
Definition: knn_tester.hpp:30
virtual Tester_Result test(const KNN_Graph< V > &graph, const double d, const double epsilon=0.001)
Definition: knn_tester.hpp:72
Tester_Result test(const KNN_Graph< V > &graph, const double d, const double epsilon=0.001)
Definition: knn_tester.hpp:167