mdds
segment_tree.hpp
1 /*************************************************************************
2  *
3  * Copyright (c) 2010-2015 Kohei Yoshida
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  *
26  ************************************************************************/
27 
28 #ifndef INCLUDED_MDDS_SEGMENTTREE_HPP
29 #define INCLUDED_MDDS_SEGMENTTREE_HPP
30 
31 #include "mdds/node.hpp"
32 #include "mdds/global.hpp"
33 
34 #include <vector>
35 #include <iostream>
36 #include <map>
37 #include <unordered_map>
38 #include <memory>
39 
40 #ifdef MDDS_UNIT_TEST
41 #include <sstream>
42 #endif
43 
44 namespace mdds {
45 
46 template<typename _Key, typename _Value>
47 class rectangle_set;
48 
49 template<typename _Key, typename _Value>
51 {
52  friend class rectangle_set<_Key, _Value>;
53 public:
54  typedef _Key key_type;
55  typedef _Value value_type;
56  typedef size_t size_type;
57  typedef ::std::vector<value_type> search_result_type;
58 
59 #ifdef MDDS_UNIT_TEST
60  struct segment_data
61  {
62  key_type begin_key;
63  key_type end_key;
64  value_type pdata;
65 
66  segment_data(key_type _beg, key_type _end, value_type p) :
67  begin_key(_beg), end_key(_end), pdata(p) {}
68 
69  bool operator==(const segment_data& r) const
70  {
71  return begin_key == r.begin_key && end_key == r.end_key && pdata == r.pdata;
72  }
73 
74  bool operator!=(const segment_data& r) const
75  {
76  return !operator==(r);
77  }
78  };
79 
80  struct segment_map_printer : public ::std::unary_function< ::std::pair<value_type, ::std::pair<key_type, key_type> >, void>
81  {
82  void operator() (const ::std::pair<value_type, ::std::pair<key_type, key_type> >& r) const
83  {
84  using namespace std;
85  cout << r.second.first << "-" << r.second.second << ": " << r.first->name << endl;
86  }
87  };
88 #endif
89 
90 public:
91  typedef ::std::vector<value_type> data_chain_type;
92  typedef std::unordered_map<value_type, ::std::pair<key_type, key_type> > segment_map_type;
93  typedef ::std::map<value_type, ::std::pair<key_type, key_type> > sorted_segment_map_type;
94 
96  {
97  key_type low;
98  key_type high;
99  data_chain_type* data_chain;
100 
101  bool operator== (const nonleaf_value_type& r) const
102  {
103  return low == r.low && high == r.high && data_chain == r.data_chain;
104  }
105  };
106 
108  {
109  key_type key;
110  data_chain_type* data_chain;
111 
112  bool operator== (const leaf_value_type& r) const
113  {
114  return key == r.key && data_chain == r.data_chain;
115  }
116  };
117 
119  struct init_handler;
120  struct dispose_handler;
121 #ifdef MDDS_UNIT_TEST
122  struct to_string_handler;
123 #endif
124 
126  typedef typename node::node_ptr node_ptr;
127 
129 
131  {
132  void operator() (__st::nonleaf_node<segment_tree>& _self, const __st::node_base* left_node, const __st::node_base* right_node)
133  {
134  // Parent node should carry the range of all of its child nodes.
135  if (left_node)
136  {
137  _self.value_nonleaf.low = left_node->is_leaf ?
138  static_cast<const node*>(left_node)->value_leaf.key :
139  static_cast<const nonleaf_node*>(left_node)->value_nonleaf.low;
140  }
141  else
142  {
143  // Having a left node is prerequisite.
144  throw general_error("segment_tree::fill_nonleaf_value_handler: Having a left node is prerequisite.");
145  }
146 
147  if (right_node)
148  {
149  if (right_node->is_leaf)
150  {
151  // When the child nodes are leaf nodes, the upper bound
152  // must be the value of the node that comes after the
153  // right leaf node (if such node exists).
154 
155  const node* p = static_cast<const node*>(right_node);
156  if (p->next)
157  _self.value_nonleaf.high = p->next->value_leaf.key;
158  else
159  _self.value_nonleaf.high = p->value_leaf.key;
160  }
161  else
162  {
163  _self.value_nonleaf.high = static_cast<const nonleaf_node*>(right_node)->value_nonleaf.high;
164  }
165  }
166  else
167  {
168  _self.value_nonleaf.high = left_node->is_leaf ?
169  static_cast<const node*>(left_node)->value_leaf.key :
170  static_cast<const nonleaf_node*>(left_node)->value_nonleaf.high;
171  }
172  }
173  };
174 
175 #ifdef MDDS_UNIT_TEST
176  struct to_string_handler
177  {
178  std::string operator() (const node& _self) const
179  {
180  std::ostringstream os;
181  os << "[" << _self.value_leaf.key << "] ";
182  return os.str();
183  }
184 
185  std::string operator() (const __st::nonleaf_node<segment_tree>& _self) const
186  {
187  std::ostringstream os;
188  os << "[" << _self.value_nonleaf.low << "-" << _self.value_nonleaf.high << ")";
189  if (_self.value_nonleaf.data_chain)
190  {
191  os << " { ";
192  typename data_chain_type::const_iterator
193  itr,
194  itr_beg = _self.value_nonleaf.data_chain->begin(),
195  itr_end = _self.value_nonleaf.data_chain->end();
196  for (itr = itr_beg; itr != itr_end; ++itr)
197  {
198  if (itr != itr_beg)
199  os << ", ";
200  os << (*itr)->name;
201  }
202  os << " }";
203  }
204  os << " ";
205  return os.str();
206  }
207  };
208 #endif
209 
211  {
212  void operator() (node& _self)
213  {
214  _self.value_leaf.data_chain = nullptr;
215  }
216 
217  void operator() (__st::nonleaf_node<segment_tree>& _self)
218  {
219  _self.value_nonleaf.data_chain = nullptr;
220  }
221  };
222 
224  {
225  void operator() (node& _self)
226  {
227  delete _self.value_leaf.data_chain;
228  }
229 
230  void operator() (__st::nonleaf_node<segment_tree>& _self)
231  {
232  delete _self.value_nonleaf.data_chain;
233  }
234  };
235 
236 #ifdef MDDS_UNIT_TEST
237  struct node_printer : public ::std::unary_function<const __st::node_base*, void>
238  {
239  void operator() (const __st::node_base* p) const
240  {
241  if (p->is_leaf)
242  std::cout << static_cast<const node*>(p)->to_string() << " ";
243  else
244  std::cout << static_cast<const nonleaf_node*>(p)->to_string() << " ";
245  }
246  };
247 #endif
248 
249 private:
250 
255  class search_result_base
256  {
257  public:
258  typedef std::vector<data_chain_type*> res_chains_type;
259  typedef std::shared_ptr<res_chains_type> res_chains_ptr;
260  public:
261 
262  search_result_base() :
263  mp_res_chains(static_cast<res_chains_type*>(nullptr)) {}
264 
265  search_result_base(const search_result_base& r) :
266  mp_res_chains(r.mp_res_chains) {}
267 
268  size_t size() const
269  {
270  size_t combined = 0;
271  if (!mp_res_chains)
272  return combined;
273 
274  typename res_chains_type::const_iterator
275  itr = mp_res_chains->begin(), itr_end = mp_res_chains->end();
276  for (; itr != itr_end; ++itr)
277  combined += (*itr)->size();
278  return combined;
279  }
280 
281  void push_back_chain(data_chain_type* chain)
282  {
283  if (!chain || chain->empty())
284  return;
285 
286  if (!mp_res_chains)
287  mp_res_chains.reset(new res_chains_type);
288  mp_res_chains->push_back(chain);
289  }
290 
291  res_chains_ptr& get_res_chains() { return mp_res_chains; }
292 
293  private:
294  res_chains_ptr mp_res_chains;
295  };
296 
297  class iterator_base
298  {
299  protected:
300  typedef typename search_result_base::res_chains_type res_chains_type;
301  typedef typename search_result_base::res_chains_ptr res_chains_ptr;
302 
303  iterator_base(const res_chains_ptr& p) :
304  mp_res_chains(p), m_end_pos(true) {}
305 
306  public:
307  typedef ::std::bidirectional_iterator_tag iterator_category;
308  typedef typename data_chain_type::value_type value_type;
309  typedef typename data_chain_type::pointer pointer;
310  typedef typename data_chain_type::reference reference;
311  typedef typename data_chain_type::difference_type difference_type;
312 
313  iterator_base() :
314  mp_res_chains(static_cast<res_chains_type*>(nullptr)), m_end_pos(true) {}
315 
316  iterator_base(const iterator_base& r) :
317  mp_res_chains(r.mp_res_chains),
318  m_cur_chain(r.m_cur_chain),
319  m_cur_pos_in_chain(r.m_cur_pos_in_chain),
320  m_end_pos(r.m_end_pos) {}
321 
322  iterator_base& operator= (const iterator_base& r)
323  {
324  mp_res_chains = r.mp_res_chains;
325  m_cur_chain = r.m_cur_chain;
326  m_cur_pos_in_chain = r.m_cur_pos_in_chain;
327  m_end_pos = r.m_end_pos;
328  return *this;
329  }
330 
331  typename data_chain_type::value_type* operator++ ()
332  {
333  // We don't check for end position flag for performance reasons.
334  // The caller is responsible for making sure not to increment past
335  // end position.
336 
337  // When reaching the end position, the internal iterators still
338  // need to be pointing at the last item before the end position.
339  // This is why we need to make copies of the iterators, and copy
340  // them back once done.
341 
342  typename data_chain_type::iterator cur_pos_in_chain = m_cur_pos_in_chain;
343 
344  if (++cur_pos_in_chain == (*m_cur_chain)->end())
345  {
346  // End of current chain. Inspect the next chain if exists.
347  typename res_chains_type::iterator cur_chain = m_cur_chain;
348  ++cur_chain;
349  if (cur_chain == mp_res_chains->end())
350  {
351  m_end_pos = true;
352  return nullptr;
353  }
354  m_cur_chain = cur_chain;
355  m_cur_pos_in_chain = (*m_cur_chain)->begin();
356  }
357  else
358  ++m_cur_pos_in_chain;
359 
360  return operator->();
361  }
362 
363  typename data_chain_type::value_type* operator-- ()
364  {
365  if (!mp_res_chains)
366  return nullptr;
367 
368  if (m_end_pos)
369  {
370  m_end_pos = false;
371  return &(*m_cur_pos_in_chain);
372  }
373 
374  if (m_cur_pos_in_chain == (*m_cur_chain)->begin())
375  {
376  if (m_cur_chain == mp_res_chains->begin())
377  {
378  // Already at the first data chain. Don't move the iterator position.
379  return nullptr;
380  }
381  --m_cur_chain;
382  m_cur_pos_in_chain = (*m_cur_chain)->end();
383  }
384  --m_cur_pos_in_chain;
385  return operator->();
386  }
387 
388  bool operator== (const iterator_base& r) const
389  {
390  if (mp_res_chains.get())
391  {
392  // non-empty result set.
393  return mp_res_chains.get() == r.mp_res_chains.get() &&
394  m_cur_chain == r.m_cur_chain && m_cur_pos_in_chain == r.m_cur_pos_in_chain &&
395  m_end_pos == r.m_end_pos;
396  }
397 
398  // empty result set.
399  if (r.mp_res_chains.get())
400  return false;
401  return m_end_pos == r.m_end_pos;
402  }
403 
404  bool operator!= (const iterator_base& r) const { return !operator==(r); }
405 
406  typename data_chain_type::value_type& operator*()
407  {
408  return *m_cur_pos_in_chain;
409  }
410 
411  typename data_chain_type::value_type* operator->()
412  {
413  return &(*m_cur_pos_in_chain);
414  }
415 
416  protected:
417  void move_to_front()
418  {
419  if (!mp_res_chains)
420  {
421  // Empty data set.
422  m_end_pos = true;
423  return;
424  }
425 
426  // We assume that there is at least one chain list, and no
427  // empty chain list exists. So, skip the check.
428  m_cur_chain = mp_res_chains->begin();
429  m_cur_pos_in_chain = (*m_cur_chain)->begin();
430  m_end_pos = false;
431  }
432 
433  void move_to_end()
434  {
435  m_end_pos = true;
436  if (!mp_res_chains)
437  // Empty data set.
438  return;
439 
440  m_cur_chain = mp_res_chains->end();
441  --m_cur_chain;
442  m_cur_pos_in_chain = (*m_cur_chain)->end();
443  --m_cur_pos_in_chain;
444  }
445 
446  private:
447  res_chains_ptr mp_res_chains;
448  typename res_chains_type::iterator m_cur_chain;
449  typename data_chain_type::iterator m_cur_pos_in_chain;
450  bool m_end_pos:1;
451  };
452 
453 public:
454 
455  class search_result : public search_result_base
456  {
457  typedef typename search_result_base::res_chains_type res_chains_type;
458  typedef typename search_result_base::res_chains_ptr res_chains_ptr;
459  public:
460 
461  class iterator : public iterator_base
462  {
463  friend class segment_tree<_Key,_Value>::search_result;
464  private:
465  iterator(const res_chains_ptr& p) : iterator_base(p) {}
466  public:
467  iterator() : iterator_base() {}
468  };
469 
470  typename search_result::iterator begin()
471  {
472  typename search_result::iterator itr(search_result_base::get_res_chains());
473  itr.move_to_front();
474  return itr;
475  }
476 
477  typename search_result::iterator end()
478  {
479  typename search_result::iterator itr(search_result_base::get_res_chains());
480  itr.move_to_end();
481  return itr;
482  }
483  };
484 
485  class search_result_vector_inserter : public ::std::unary_function<data_chain_type*, void>
486  {
487  public:
488  search_result_vector_inserter(search_result_type& result) : m_result(result) {}
489  void operator() (data_chain_type* node_data)
490  {
491  if (!node_data)
492  return;
493 
494  typename data_chain_type::const_iterator itr = node_data->begin(), itr_end = node_data->end();
495  for (; itr != itr_end; ++itr)
496  m_result.push_back(*itr);
497  }
498  private:
499  search_result_type& m_result;
500  };
501 
502  class search_result_inserter : public ::std::unary_function<data_chain_type*, void>
503  {
504  public:
505  search_result_inserter(search_result_base& result) : m_result(result) {}
506  void operator() (data_chain_type* node_data)
507  {
508  if (!node_data)
509  return;
510 
511  m_result.push_back_chain(node_data);
512  }
513  private:
514  search_result_base& m_result;
515  };
516 
517  segment_tree();
518  segment_tree(const segment_tree& r);
519  ~segment_tree();
520 
525  bool operator==(const segment_tree& r) const;
526 
527  bool operator!=(const segment_tree& r) const { return !operator==(r); }
528 
535  bool is_tree_valid() const { return m_valid_tree; }
536 
540  void build_tree();
541 
551  bool insert(key_type begin_key, key_type end_key, value_type pdata);
552 
569  bool search(key_type point, search_result_type& result) const;
570 
580  search_result search(key_type point) const;
581 
589  void remove(value_type value);
590 
594  void clear();
595 
599  size_t size() const;
600 
604  bool empty() const;
605 
611  size_t leaf_size() const;
612 
613 #ifdef MDDS_UNIT_TEST
614  void dump_tree() const;
615  void dump_leaf_nodes() const;
616  void dump_segment_data() const;
617  bool verify_node_lists() const;
618 
619  struct leaf_node_check
620  {
621  key_type key;
622  data_chain_type data_chain;
623  };
624 
625  bool verify_leaf_nodes(const ::std::vector<leaf_node_check>& checks) const;
626 
633  bool verify_segment_data(const segment_map_type& checks) const;
634 #endif
635 
636 private:
640  void search(key_type point, search_result_base& result) const;
641 
642  typedef std::vector<__st::node_base*> node_list_type;
643  typedef std::map<value_type, std::unique_ptr<node_list_type>> data_node_map_type;
644 
645  static void create_leaf_node_instances(const ::std::vector<key_type>& keys, node_ptr& left, node_ptr& right);
646 
652  void descend_tree_and_mark(
653  __st::node_base* pnode, value_type pdata, key_type begin_key, key_type end_key, node_list_type* plist);
654 
655  void build_leaf_nodes();
656 
661  void remove_data_from_nodes(node_list_type* plist, const value_type pdata);
662  void remove_data_from_chain(data_chain_type& chain, const value_type pdata);
663 
664  void clear_all_nodes();
665 
666 #ifdef MDDS_UNIT_TEST
667  static bool has_data_pointer(const node_list_type& node_list, const value_type pdata);
668  static void print_leaf_value(const leaf_value_type& v);
669 #endif
670 
671 private:
672  std::vector<nonleaf_node> m_nonleaf_node_pool;
673 
674  segment_map_type m_segment_data;
675 
681  data_node_map_type m_tagged_node_map;
682 
683  nonleaf_node* m_root_node;
684  node_ptr m_left_leaf;
685  node_ptr m_right_leaf;
686  bool m_valid_tree:1;
687 };
688 
689 }
690 
691 #include "segment_tree_def.inl"
692 
693 #endif
Definition: segment_tree.hpp:107
bool is_leaf
parent nonleaf_node
Definition: node.hpp:46
Definition: segment_tree.hpp:50
Definition: segment_tree.hpp:461
size_t size() const
bool insert(key_type begin_key, key_type end_key, value_type pdata)
bool operator==(const segment_tree &r) const
size_t leaf_size() const
Definition: node.hpp:43
Definition: node.hpp:53
bool is_tree_valid() const
Definition: segment_tree.hpp:535
Definition: rectangle_set.hpp:37
Definition: global.hpp:58
Definition: segment_tree.hpp:210
Definition: segment_tree.hpp:455
Definition: segment_tree.hpp:223
Definition: flat_segment_tree.hpp:46
bool empty() const
data_chain_type * data_chain
high range value (non-inclusive)
Definition: segment_tree.hpp:99
bool search(key_type point, search_result_type &result) const
Definition: segment_tree.hpp:502
Definition: node.hpp:143
Definition: segment_tree.hpp:95
key_type high
low range value (inclusive)
Definition: segment_tree.hpp:98
node_ptr next
previous sibling leaf node.
Definition: node.hpp:166
Definition: segment_tree.hpp:130