A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
PairArray.cc
Go to the documentation of this file.
1 /*****************************************************************************
2  * *
3  * Copyright 2018 Rice University *
4  * *
5  * Licensed under the Apache License, Version 2.0 (the "License"); *
6  * you may not use this file except in compliance with the License. *
7  * You may obtain a copy of the License at *
8  * *
9  * http://www.apache.org/licenses/LICENSE-2.0 *
10  * *
11  * Unless required by applicable law or agreed to in writing, software *
12  * distributed under the License is distributed on an "AS IS" BASIS, *
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14  * See the License for the specific language governing permissions and *
15  * limitations under the License. *
16  * *
17  *****************************************************************************/
18 
19 #ifndef PAIR_ARRAY_CC
20 #define PAIR_ARRAY_CC
21 
22 #include <cstddef>
23 #include <iostream>
24 #include <vector>
25 #include <algorithm>
26 #include <iterator>
27 #include <type_traits>
28 #include <cstring>
29 
30 #include "Handle.h"
31 #include "Object.h"
32 #include "InterfaceFunctions.h"
33 
34 namespace pdb {
35 
36 // this little class is used to ask the compiler to build the layout of the records used to
37 // store (hash, KeyType, ValueType) triples
38 template <class KeyType, class ValueType>
40  size_t hash;
41  KeyType key;
42  ValueType value;
43 
44  size_t getObjSize() {
46  }
47 
48  size_t getValueOffset() {
49  return ((char*)&value) - ((char*)this);
50  }
51 };
52 
53 // a special code that tells us when a hash slot is unused
54 #define UNUSED 493295393
55 
56 unsigned int newHash(unsigned int x);
57 
58 // added by Shangyu
59 inline size_t specialHash(unsigned u) {
60  return newHash(u);
61 }
62 
63 inline size_t specialHash(int u) {
64  return newHash((unsigned)u);
65 }
66 
67 // this uses SFINAE to call std::hash () on KeyType if applicable; otherwise, it calls KeyType.hash
68 // ()
69 template <class KeyType>
70 class Hasher {
71 
72  // as a fallback, we will pick this one
73  template <typename U>
74  static auto hash_impl(U const& u, ...) -> size_t {
75  return std::hash<U>().operator()(u);
76  }
77 
78  // overloading rules will select this one first... ...unless it's not valid
79  template <typename U>
80  static auto hash_impl(U const& u, int) -> decltype(u.hash()) {
81  return u.hash();
82  }
83 
84 public:
85  static auto hash(const KeyType& k) -> decltype(hash_impl(k, 0)) {
86  decltype(hash_impl(k, 0)) temp = hash_impl(k, 0);
87 
88  // we can't allow the hash function to return UNUSED
89  if (temp == UNUSED)
90  temp = 858931273;
91 
92  return temp;
93  }
94 
95  // allows us to specify a specialized hash function by defining specialHash (U const u)
96  template <typename U>
97  static auto hash_impl(U const& u, int) -> decltype(specialHash(u)) {
98  return specialHash(u);
99  }
100 };
101 
102 // the maximum fill factor before we double
103 #define FILL_FACTOR .667
104 
105 // access keys, hashes, and data in the underlying array
106 #define GET_HASH(data, i) (*((size_t*)(((char*)data) + (i * objSize))))
107 #define GET_HASH_PTR(data, i) ((size_t*)(((char*)data) + (i * objSize)))
108 #define GET_KEY_PTR(data, i) ((void*)(((char*)data) + sizeof(size_t) + (i * objSize)))
109 #define GET_VALUE_PTR(data, i) ((void*)(((char*)data) + valueOffset + (i * objSize)))
110 #define GET_KEY(data, i, type) (*((type*)(((char*)data) + sizeof(size_t) + (i * objSize))))
111 #define GET_VALUE(data, i, type) (*((type*)(((char*)data) + valueOffset + (i * objSize))))
112 
113 // Note: we need to write all operations in constructors, destructors, and assignment operators
114 // WITHOUT using
115 // the underlying type in any way (including assignment, initialization, destruction, size).
116 //
117 template <class KeyType, class ValueType>
119 
120  this->disableDestructor = disableOrNot;
121 }
122 
123 template <class KeyType, class ValueType>
125 
126  return this->disableDestructor;
127 }
128 
129 
130 template <class KeyType, class ValueType>
131 void PairArray<KeyType, ValueType>::setUpAndCopyFrom(void* target, void* source) const {
132 
133  new (target) PairArray<KeyType, ValueType>();
136 
137  // copy the number of slots
138  toMe.numSlots = fromMe.numSlots;
139  toMe.usedSlots = fromMe.usedSlots;
140 
141  toMe.setDisableDestructor(false);
142 
143  // copy the type info
144  toMe.keyTypeInfo = fromMe.keyTypeInfo;
145  toMe.valueTypeInfo = fromMe.valueTypeInfo;
146 
147  if (toMe.keyTypeInfo.getTypeCode() == 0) {
148  std::cout << "PairArray: setUpAndCopyFrom: keyTypeInfo = 0 " << std::endl;
149  }
150 
151  if (toMe.valueTypeInfo.getTypeCode() == 0) {
152  std::cout << "PairArray: setUpAndCopyFrom: valueTypeInfo = 0" << std::endl;
153  }
154 
155  // copy over the size info
156  toMe.objSize = fromMe.objSize;
157  toMe.valueOffset = fromMe.valueOffset;
158  toMe.maxSlots = fromMe.maxSlots;
159 
160  // now we need to copy the array
161  // if our types are fully primitive, just do a memmove
163  memmove((void*)toMe.data, (void*)fromMe.data, ((size_t)toMe.objSize) * (toMe.numSlots));
164  return;
165  }
166 
167  // one of them is not primitive...
168 
169  // compute the key and value sizes
170  uint32_t keySize = (toMe.valueOffset - sizeof(size_t));
171  uint32_t valueSize = (toMe.objSize - toMe.valueOffset);
172 
173  // these are needed to make the GET_HASH and other macros work correctly... they refer
174  // to variables objSize and valueOffset... this.objSize and this.valueOffset are possibly
175  // undefined here. By having local variables that shadow these, we get around potential
176  // problems
177  uint32_t objSize = toMe.objSize;
178  uint32_t valueOffset = toMe.valueOffset;
179 
180  // loop through and do the deep copy
181  for (int i = 0; i < toMe.numSlots; i++) {
182 
183  // copy over the hash for this guy
184  GET_HASH(toMe.data, i) = GET_HASH(fromMe.data, i);
185 
186  // don't copy over an unused slot
187  if (GET_HASH(fromMe.data, i) == UNUSED)
188  continue;
189 
190  // deal with the key... use memmove on a non-object type
191  if (!toMe.keyTypeInfo.descendsFromObject()) {
192  memmove(GET_KEY_PTR(toMe.data, i), GET_KEY_PTR(fromMe.data, i), keySize);
193  } else {
194  try {
196  GET_KEY_PTR(fromMe.data, i));
197  } catch (NotEnoughSpace& n) {
198  // JiaNote: if data type is a handle, it may trigger NotEnoughSpace exception, so
199  // handle this here.
200  for (int j = i; j < toMe.numSlots; j++) {
201  GET_HASH(toMe.data, j) = UNUSED;
202  }
203  toMe.setDisableDestructor(true);
204  throw n;
205  }
206  }
207 
208  // and now same thing on the value
209  if (!toMe.valueTypeInfo.descendsFromObject()) {
210  memmove(GET_VALUE_PTR(toMe.data, i), GET_VALUE_PTR(fromMe.data, i), valueSize);
211  } else {
212  try {
214  GET_VALUE_PTR(fromMe.data, i));
215  } catch (NotEnoughSpace& n) {
216  // JiaNote: if data type is a handle, it may trigger NotEnoughSpace exception, so
217  // handle this here.
218  for (int j = i; j < toMe.numSlots; j++) {
219  GET_HASH(toMe.data, j) = UNUSED;
220  }
221  toMe.setDisableDestructor(true);
222  throw n;
223  }
224  }
225  }
226 }
227 
228 // just use setUpAndCopyFrom to do a deep copy... this assumes that we have enough space!!
229 template <class KeyType, class ValueType>
231  setUpAndCopyFrom(this, &toMe);
232 }
233 
234 template <class KeyType, class ValueType>
235 int PairArray<KeyType, ValueType>::count(const KeyType& me) {
236 
237  // hash this dude
238  size_t hashVal = Hasher<KeyType>::hash(me);
239 
240  // figure out which slot he goes in
241  size_t slot = hashVal % (numSlots - 1);
242 
243  // in the worst case, we can loop through the entire hash table looking. :-(
244  for (size_t slotsChecked = 0; slotsChecked < numSlots; slotsChecked++) {
245 
246  // if we found an empty slot, then this guy was not here
247  if (GET_HASH(data, slot) == UNUSED) {
248  return 0;
249  } else if (GET_HASH(data, slot) == hashVal) {
250 
251  // potential match!!
252  if (GET_KEY(data, slot, KeyType) == me) {
253  return 1;
254  }
255  }
256 
257  // if we made it here, then it means that we found a non-empty slot, but no
258  // match... so we simply loop to the next iteration... if slot == (numSlots-1), it
259  // means we've made it to the end of the hash table... go to the beginning
260  if (slot == numSlots - 1)
261  slot = 0;
262 
263  // otherwise, just go to the next slot
264  else
265  slot++;
266  }
267 
268  // we should never reach here
269  std::cout << "in count(): hashVal = " << hashVal << ", slot = " << slot
270  << ", numSlots =" << numSlots << ". Warning: Ran off the end of the hash table!!\n";
271  return 0;
272  // exit (1);
273 }
274 
275 template <class KeyType, class ValueType>
277 
278  // hash this dude
279  size_t hashVal = Hasher<KeyType>::hash(me);
280 
281  // figure out which slot he goes in
282  size_t slot = hashVal % (numSlots - 1);
283 
284  // in the worst case, we can loop through the entire hash table looking. :-(
285  for (size_t slotsChecked = 0; slotsChecked < numSlots; slotsChecked++) {
286 
287  // if we found an empty slot, then this guy was not here
288  if (GET_HASH(data, slot) == UNUSED) {
289  std::cout << "WARNING: setUnused for an empty slot" << std::endl;
290  return;
291 
292  // found a non-empty slot; check for a match
293  } else if (GET_HASH(data, slot) == hashVal) {
294 
295  // potential match!!
296  if (GET_KEY(data, slot, KeyType) == me) {
297 
298  // destruct those guys
299  ((KeyType*)(GET_KEY_PTR(data, slot)))->~KeyType();
300  ((ValueType*)(GET_VALUE_PTR(data, slot)))->~ValueType();
301  GET_HASH(data, slot) = UNUSED;
302  return;
303  }
304  }
305 
306  // if we made it here, then it means that we found a non-empty slot, but no
307  // match... so we simply loop to the next iteration... if slot == numSlots - 1, it
308  // means we've made it to the end of the hash table... go to the beginning
309  if (slot == numSlots - 1)
310  slot = 0;
311 
312  // otherwise, just go to the next slot
313  else
314  slot++;
315  }
316 
317  // we should never reach here
318  std::cout << "in setUnused(): hashVal = " << hashVal << ", slot = " << slot
319  << ", numSlots =" << numSlots << ". Warning: Ran off the end of the hash table!!\n";
320  // exit (1);
321 }
322 
323 
324 template <class KeyType, class ValueType>
325 ValueType& PairArray<KeyType, ValueType>::operator[](const KeyType& me) {
326 
327  // basically, if he is not there, we add him and return a reference to a newly-constructed
328  // ValueType... if he is there, we simply return a reference to a newly-constructed ValueType
329 
330  // hash this dude
331  size_t hashVal = Hasher<KeyType>::hash(me);
332 
333  // figure out which slot he goes in
334  size_t slot = hashVal % (numSlots - 1);
335 
336  // in the worst case, we can loop through the entire hash table looking. :-(
337  for (size_t slotsChecked = 0; slotsChecked < numSlots; slotsChecked++) {
338 
339  // if we found an empty slot, then this guy was not here
340  if (GET_HASH(data, slot) == UNUSED) {
341  try {
342  // construct the key and the value
343  new (GET_KEY_PTR(data, slot)) KeyType();
344  new (GET_VALUE_PTR(data, slot)) ValueType();
345  } catch (NotEnoughSpace& n) {
346  std::cout << "Not enough space when in placement new the key type and value type"
347  << std::endl;
348  throw n;
349  }
350 
351 
352  // add the key
353  try {
354  GET_KEY(data, slot, KeyType) = me;
355 
356  GET_HASH(data, slot) = hashVal;
357 
358  // increment the number of used slots
359  usedSlots++;
360 
361  } catch (NotEnoughSpace& n) {
362  std::cout << "Not enough space when inserting new key" << std::endl;
363  throw n;
364  }
365 
366  // and return the value
367  return GET_VALUE(data, slot, ValueType);
368 
369  // found a non-empty slot; check for a match
370  } else if (GET_HASH(data, slot) == hashVal) {
371 
372  // potential match!!
373  if (GET_KEY(data, slot, KeyType) == me) {
374 
375  // match!!
376  return GET_VALUE(data, slot, ValueType);
377  }
378  }
379 
380  // if we made it here, then it means that we found a non-empty slot, but no
381  // match... so we simply loop to the next iteration... if slot == numSlots - 1, it
382  // means we've made it to the end of the hash table... go to the beginning
383  if (slot == numSlots - 1)
384  slot = 0;
385 
386  // otherwise, just go to the next slot
387  else
388  slot++;
389  }
390 
391  // we should never reach here
392  std::cout << "Fatal Error: Ran off the end of the hash table!!\n";
393  exit(1);
394 }
395 
396 template <class KeyType, class ValueType>
398 
399  // verify that we are a power of two
400  bool gotIt = false;
401  uint32_t val = 1;
402  for (unsigned int pow = 0; pow <= 31; pow++) {
403  if (val >= numSlotsIn) {
404  gotIt = true;
405  break;
406  }
407  val *= 2;
408  }
409 
410  setDisableDestructor(false);
411 
412  // if we are not a power of two, exit
413  if (!gotIt) {
414  std::cout << "Fatal Error: Bad: could not get the correct size " << numSlotsIn
415  << " for the array\n";
416  exit(1);
417  }
418 
419  // remember the size
420  numSlots = numSlotsIn;
421  maxSlots = numSlotsIn * FILL_FACTOR;
422 
423  // set everyone to unused
424  for (int i = 0; i < numSlots; i++) {
425  GET_HASH(data, i) = UNUSED;
426  }
427 }
428 
429 template <class KeyType, class ValueType>
431  return usedSlots >= maxSlots;
432 }
433 
434 template <class KeyType, class ValueType>
436 
437  // remember the types for this guy
438  keyTypeInfo.setup<KeyType>();
439  valueTypeInfo.setup<ValueType>();
440 
441  // used to let the compiler to tell us how to pack items in our array of slots
443  objSize = temp.getObjSize();
444  valueOffset = temp.getValueOffset();
445 
446  // zero slots in the array
447  numSlots = 0;
448 
449  // no used slots
450  usedSlots = 0;
451 
452  // the max number of used slots is zero
453  maxSlots = 0;
454 
455  setDisableDestructor(false);
456 }
457 
458 // Note: because this can be called by Object.deleteObject (), it must be written so as to not use
459 // TypeContained
460 template <class KeyType, class ValueType>
462 
463  if (isDestructorDisabled() == true) {
464  return;
465  }
466 
467  if (keyTypeInfo.getTypeCode() == 0) {
468  std::cout << "PairArray: ~PairArray: keyTypeInfo = 0 " << std::endl;
469  }
470 
471  if (valueTypeInfo.getTypeCode() == 0) {
472  std::cout << "PairArray: ~PairArray: valueTypeInfo = 0" << std::endl;
473  }
474  // do no work if the guys we store do not come from pdb :: Object
475  if (!keyTypeInfo.descendsFromObject() && !valueTypeInfo.descendsFromObject())
476  return;
477 
478  // now, delete each of the objects in there, if we have got an object type
479  for (uint32_t i = 0; i < numSlots; i++) {
480  if (GET_HASH(data, i) != UNUSED) {
481  if (keyTypeInfo.descendsFromObject())
482  keyTypeInfo.deleteConstituentObject(GET_KEY_PTR(data, i));
483  if (valueTypeInfo.descendsFromObject())
484  valueTypeInfo.deleteConstituentObject(GET_VALUE_PTR(data, i));
485  }
486  }
487 }
488 
489 template <class KeyType, class ValueType>
491 
492  uint32_t howMany = numSlots * 2;
493 
494  // allocate the new Array
496  makeObjectWithExtraStorage<PairArray<KeyType, ValueType>>(objSize * howMany, howMany);
497 
498  // first, set everything to unused
499  // now, re-hash everything
500  PairArray<KeyType, ValueType>& newOne = *tempArray;
501 
502  for (uint32_t i = 0; i < numSlots; i++) {
503 
504  if (GET_HASH(data, i) != UNUSED) {
505 
506  // copy the dude over
507  newOne[GET_KEY(data, i, KeyType)] = GET_VALUE(data, i, ValueType);
508 
509  // and delete the old one
510  GET_KEY(data, i, KeyType).~KeyType();
511  GET_VALUE(data, i, ValueType).~ValueType();
512  GET_HASH(data, i) = UNUSED;
513  }
514  }
515 
516  // and return this guy
517  return tempArray;
518 }
519 
520 template <class KeyType, class ValueType>
522  return usedSlots;
523 }
524 
525 template <class KeyType, class ValueType>
527  deleter(deleteMe, this);
528 }
529 
530 template <class KeyType, class ValueType>
533  return sizeof(PairArray<Nothing>) + target.objSize * target.numSlots;
534 }
535 
536 template <class KeyType, class ValueType>
538  Handle<PairArray<KeyType, ValueType>> iterateMeIn, bool)
539  : iterateMe(iterateMeIn) {
540  slot = 0;
541  done = false;
542  int objSize = iterateMe->objSize;
543  while (slot != iterateMe->numSlots && GET_HASH(iterateMe->data, slot) == UNUSED)
544  slot++;
545 
546  if (slot == iterateMe->numSlots)
547  done = true;
548 }
549 
550 template <class KeyType, class ValueType>
553  : iterateMe(iterateMeIn) {
554  done = true;
555 }
556 
557 template <class KeyType, class ValueType>
559  if (!done)
560  slot++;
561 
562  int objSize = iterateMe->objSize;
563  while (slot != iterateMe->numSlots && GET_HASH(iterateMe->data, slot) == UNUSED)
564  slot++;
565 
566  if (slot == iterateMe->numSlots)
567  done = true;
568 }
569 
570 template <class KeyType, class ValueType>
572 
573  int objSize = iterateMe->objSize;
574  return *((MapRecordClass<KeyType, ValueType>*)GET_HASH_PTR(iterateMe->data, slot));
575 }
576 
577 template <class KeyType, class ValueType>
579  const PDBMapIterator<KeyType, ValueType>& me) const {
580  if (!done || !me.done)
581  return true;
582  return false;
583 }
584 }
585 
586 #endif
PDBTemplateBase keyTypeInfo
Definition: PairArray.h:87
static auto hash_impl(U const &u,...) -> size_t
Definition: PairArray.cc:74
#define GET_KEY(data, i, type)
Definition: PairArray.cc:110
size_t getValueOffset()
Definition: PairArray.cc:48
#define GET_VALUE(data, i, type)
Definition: PairArray.cc:111
static auto hash_impl(U const &u, int) -> decltype(u.hash())
Definition: PairArray.cc:80
size_t getSize(void *forMe)
Definition: PairArray.cc:531
void deleteObject(void *deleteMe)
Definition: PairArray.cc:526
size_t getObjSize()
Definition: PairArray.cc:44
#define UNUSED
Definition: PairArray.cc:54
uint32_t maxSlots
Definition: PairArray.h:103
int count(const KeyType &which)
Definition: PairArray.cc:235
uint32_t usedSlots
Definition: PairArray.h:97
#define FILL_FACTOR
Definition: PairArray.cc:103
Nothing data[0]
Definition: PairArray.h:106
void deleter(void *deleteMe, ObjType *dummy)
Definition: DeepCopy.h:48
#define GET_HASH(data, i)
Definition: PairArray.cc:106
PDBTemplateBase valueTypeInfo
Definition: PairArray.h:88
bool operator!=(const PDBMapIterator &me) const
Definition: PairArray.cc:578
#define GET_KEY_PTR(data, i)
Definition: PairArray.cc:108
Handle< PairArray< KeyType, ValueType > > doubleArray()
Definition: PairArray.cc:490
uint32_t numSlots
Definition: PairArray.h:100
void setUpAndCopyFrom(void *target, void *source) const
Definition: PairArray.cc:131
uint32_t valueOffset
Definition: PairArray.h:94
unsigned int newHash(unsigned int x)
Definition: PDBMap.cc:28
#define GET_VALUE_PTR(data, i)
Definition: PairArray.cc:109
uint32_t objSize
Definition: PairArray.h:91
int16_t getTypeCode() const
void setUpAndCopyFromConstituentObject(void *target, void *source) const
Handle< PairArray< KeyType, ValueType > > iterateMe
Definition: PairArray.h:57
ValueType & operator[](const KeyType &which)
Definition: PairArray.cc:325
bool isDestructorDisabled()
Definition: PairArray.cc:124
size_t specialHash(unsigned u)
Definition: PairArray.cc:59
MapRecordClass< KeyType, ValueType > & operator*()
Definition: PairArray.cc:571
static auto hash(const KeyType &k) -> decltype(hash_impl(k, 0))
Definition: PairArray.cc:85
bool isOverFull()
Definition: PairArray.cc:430
static auto hash_impl(U const &u, int) -> decltype(specialHash(u))
Definition: PairArray.cc:97
#define GET_HASH_PTR(data, i)
Definition: PairArray.cc:107
uint32_t numUsedSlots()
Definition: PairArray.cc:521
void setDisableDestructor(bool disableOrNot)
Definition: PairArray.cc:118
bool descendsFromObject() const
void setUnused(const KeyType &clearMe)
Definition: PairArray.cc:276