A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
JoinPairArray.cc
Go to the documentation of this file.
1 
2 #ifndef JOIN_PAIR_ARRAY_CC
3 #define JOIN_PAIR_ARRAY_CC
4 
5 #include <cstddef>
6 #include <iostream>
7 #include <vector>
8 #include <algorithm>
9 #include <iterator>
10 #include <type_traits>
11 #include <cstring>
12 
13 #include "Handle.h"
14 #include "Object.h"
15 #include "InterfaceFunctions.h"
16 
17 namespace pdb {
18 
19 // a special code that tells us when a hash slot is unused
20 #define JM_UNUSED 493295393
21 
22 // the maximum fill factor before we double
23 #define JM_FILL_FACTOR .667
24 
25 // access keys, hashes, and data in the underlying array
26 #define JM_GET_HASH(data, i) (*((size_t*)(((char*)data) + (i * objSize))))
27 #define JM_GET_HASH_PTR(data, i) ((size_t*)(((char*)data) + (i * objSize)))
28 #define JM_GET_NEXT_PTR(data, i) ((uint32_t*)(((char*)data) + sizeof(size_t) + (i * objSize)))
29 #define JM_GET_VALUE_PTR(data, i) \
30  ((void*)(((char*)data) + sizeof(size_t) + sizeof(uint32_t) + (i * objSize)))
31 #define JM_GET_NEXT(data, i) (*((uint32_t*)(((char*)data) + sizeof(size_t) + (i * objSize))))
32 #define JM_GET_VALUE(data, i, type) \
33  (*((type*)(((char*)data) + sizeof(size_t) + sizeof(uint32_t) + (i * objSize))))
34 
35 // Note: we need to write all operations in constructors, destructors, and assignment operators
36 // WITHOUT using
37 // the underlying type in any way (including assignment, initialization, destruction, size).
38 //
39 
40 template <class ValueType>
41 void JoinPairArray<ValueType>::setUpAndCopyFrom(void* target, void* source) const {
42 
43  new (target) JoinPairArray<ValueType>();
46 
47  // copy the number of slots
48  toMe.numSlots = fromMe.numSlots;
49  toMe.usedSlots = fromMe.usedSlots;
50 
51  // copy the type info
52  toMe.valueTypeInfo = fromMe.valueTypeInfo;
53 
54  // copy over the size info
55  toMe.objSize = fromMe.objSize;
56  toMe.maxSlots = fromMe.maxSlots;
57 
58  // copy over the overflow area
59  toMe.overflows = fromMe.overflows;
60 
61  // now we need to copy the array
62  // if our types are fully primitive, just do a memmove
63  if (!toMe.valueTypeInfo.descendsFromObject()) {
64  memmove((void*)toMe.data, (void*)fromMe.data, ((size_t)toMe.objSize) * (toMe.numSlots));
65  return;
66  }
67 
68  // one of them is not primitive...
69 
70  // these are needed to make the JM_GET_HASH and other macros work correctly... they refer
71  // to variables objSize and valueOffset... this.objSize and this.valueOffset are possibly
72  // undefined here. By having local variables that shadow these, we get around potential
73  // problems
74  uint32_t objSize = toMe.objSize;
75 
76  // loop through and do the deep copy
77  for (int i = 0; i < toMe.numSlots; i++) {
78 
79  // copy over the hash for this guy
80  JM_GET_HASH(toMe.data, i) = JM_GET_HASH(fromMe.data, i);
81 
82  // don't copy over an unused slot
83  if (JM_GET_HASH(fromMe.data, i) == JM_UNUSED)
84  continue;
85 
86  JM_GET_NEXT(toMe.data, i) = JM_GET_NEXT(fromMe.data, i);
87 
88  // and now same thing on the value
89  if (!toMe.valueTypeInfo.descendsFromObject()) {
90  memmove(JM_GET_VALUE_PTR(toMe.data, i), JM_GET_VALUE_PTR(fromMe.data, i), objSize);
91  } else {
92 
93 
95  JM_GET_VALUE_PTR(fromMe.data, i));
96  }
97  }
98 }
99 
100 // just use setUpAndCopyFrom to do a deep copy... this assumes that we have enough space!!
101 template <class ValueType>
103  setUpAndCopyFrom(this, &toMe);
104 }
105 
106 template <class ValueType>
107 int JoinPairArray<ValueType>::count(const size_t& me) {
108 
109  // hash this dude
110  size_t hashVal = me == JM_UNUSED ? 858931273 : me;
111 
112  // figure out which slot he goes in
113  size_t slot = hashVal % (numSlots - 1);
114 
115  // in the worst case, we can loop through the entire hash table looking. :-(
116  for (size_t slotsChecked = 0; slotsChecked < numSlots; slotsChecked++) {
117 
118  // if we found an empty slot, then this guy was not here
119  if (JM_GET_HASH(data, slot) == JM_UNUSED) {
120  return 0;
121  } else if (JM_GET_HASH(data, slot) == hashVal) {
122 
123  // potential match!!
124  if (JM_GET_NEXT(data, slot) != UINT32_MAX) {
125  return 1 + overflows[JM_GET_NEXT(data, slot)].size();
126  } else {
127  return 1;
128  }
129  }
130 
131  // if we made it here, then it means that we found a non-empty slot, but no
132  // match... so we simply loop to the next iteration... if slot == (numSlots-1), it
133  // means we've made it to the end of the hash table... go to the beginning
134  if (slot == numSlots - 1)
135  slot = 0;
136 
137  // otherwise, just go to the next slot
138  else
139  slot++;
140  }
141 
142  // we should never reach here
143  std::cout << "Warning: Ran off the end of the hash table!!\n";
144  exit(1);
145 }
146 
147 template <class ValueType>
148 void JoinPairArray<ValueType>::setUnused(const size_t& me) {
149 
150  // hash this dude
151  size_t hashVal = me == JM_UNUSED ? 858931273 : me;
152 
153  // figure out which slot he goes in
154  size_t slot = hashVal % (numSlots - 1);
155  // in the worst case, we can loop through the entire hash table looking. :-(
156  for (size_t slotsChecked = 0; slotsChecked < numSlots; slotsChecked++) {
157 
158  // if we found an empty slot, then this guy was not here
159  if (JM_GET_HASH(data, slot) == JM_UNUSED) {
160  return;
161 
162  // found a non-empty slot; check for a match
163  } else if (JM_GET_HASH(data, slot) == hashVal) {
164 
165  if (JM_GET_NEXT(data, slot) != UINT32_MAX &&
166  overflows[JM_GET_NEXT(data, slot)].size() >= 1) {
167  overflows[JM_GET_NEXT(data, slot)].pop_back();
168  return;
169  }
170 
171  // destruct those guys
172  ((ValueType*)(JM_GET_VALUE_PTR(data, slot)))->~ValueType();
173 
174  JM_GET_HASH(data, slot) = JM_UNUSED;
175  JM_GET_NEXT(data, slot) = UINT32_MAX;
176 
177  return;
178  }
179 
180  // if we made it here, then it means that we found a non-empty slot, but no
181  // match... so we simply loop to the next iteration... if slot == numSlots - 1, it
182  // means we've made it to the end of the hash table... go to the beginning
183  if (slot == numSlots - 1)
184  slot = 0;
185 
186  // otherwise, just go to the next slot
187  else
188  slot++;
189  }
190 
191  // we should never reach here
192  std::cout << "Fatal Error: Ran off the end of the hash table!!\n";
193  exit(1);
194 }
195 
196 template <class ValueType>
198 
199  size_t hashVal = me == JM_UNUSED ? 858931273 : me;
200 
201  // figure out which slot he goes in
202  size_t slot = hashVal % (numSlots - 1);
203 
204  // in the worst case, we can loop through the entire hash table looking.
205  for (size_t slotsChecked = 0; slotsChecked < numSlots; slotsChecked++) {
206 
207  // if we got here, it means that this guy was not here
208  if (JM_GET_HASH(data, slot) == JM_UNUSED || JM_GET_HASH(data, slot) == hashVal) {
209  JoinRecordList<ValueType> returnVal(slot, *this);
210  return returnVal;
211  }
212 
213  // if we made it here, then it means that we found a non-empty slot, but no
214  // match... so we simply loop to the next iteration... if slot == numSlots - 1, it
215  // means we've made it to the end of the hash table... go to the beginning
216  if (slot == numSlots - 1)
217  slot = 0;
218 
219  // otherwise, just go to the next slot
220  else
221  slot++;
222  }
223 
224  // we should never reach here
225  std::cout << "Fatal Error: Ran off the end of the hash table!!\n";
226  exit(1);
227 }
228 
229 
230 template <class ValueType>
231 ValueType& JoinPairArray<ValueType>::push(const size_t& me) {
232 
233  // basically, if he is not there, we add him and return a reference to a newly-constructed
234  // ValueType... if he is there, we simply return a reference to a newly-constructed ValueType
235 
236  // hash this dude
237  size_t hashVal = me == JM_UNUSED ? 858931273 : me;
238 
239  // figure out which slot he goes in
240  size_t slot = hashVal % (numSlots - 1);
241 
242  // in the worst case, we can loop through the entire hash table looking. :-(
243  for (size_t slotsChecked = 0; slotsChecked < numSlots; slotsChecked++) {
244 
245  // if we found an empty slot, then this guy was not here
246  if (JM_GET_HASH(data, slot) == JM_UNUSED) {
247 
248  // construct the key and the value
249  new (JM_GET_VALUE_PTR(data, slot)) ValueType();
250 
251  // add the key
252  JM_GET_HASH(data, slot) = hashVal;
253  JM_GET_NEXT(data, slot) = UINT32_MAX;
254 
255  // increment the number of used slots
256  usedSlots++;
257 
258  return JM_GET_VALUE(data, slot, ValueType);
259 
260  // found a non-empty slot; check for a match
261  } else if (JM_GET_HASH(data, slot) == hashVal) {
262 
263  // match!!
264  if (JM_GET_NEXT(data, slot) == UINT32_MAX) {
265 
266  // add the new list of overflows
267  Vector<ValueType> temp;
268  overflows.push_back(temp);
269  JM_GET_NEXT(data, slot) = overflows.size()-1;
270  }
271 
272  // and add our new guy
273  overflows[JM_GET_NEXT(data, slot)].push_back();
274  return overflows[JM_GET_NEXT(data, slot)]
275  [overflows[JM_GET_NEXT(data, slot)].size() - 1];
276  }
277 
278  // if we made it here, then it means that we found a non-empty slot, but no
279  // match... so we simply loop to the next iteration... if slot == numSlots - 1, it
280  // means we've made it to the end of the hash table... go to the beginning
281  if (slot == numSlots - 1)
282  slot = 0;
283 
284  // otherwise, just go to the next slot
285  else
286  slot++;
287  }
288 
289  // we should never reach here
290  std::cout << "Fatal Error: Ran off the end of the hash table!!\n";
291  exit(1);
292 }
293 
294 template <class ValueType>
296 
297  // verify that we are a power of two
298  bool gotIt = false;
299  uint32_t val = 1;
300  for (unsigned int pow = 0; pow <= 31; pow++) {
301  if (val >= numSlotsIn) {
302  gotIt = true;
303  break;
304  }
305  val *= 2;
306  }
307 
308  // if we are not a power of two, exit
309  if (!gotIt) {
310  std::cout << "Fatal Error: Bad: could not get the correct size for the array\n";
311  exit(1);
312  }
313 
314  // remember the size
315  numSlots = numSlotsIn;
316  maxSlots = numSlotsIn * JM_FILL_FACTOR;
317 
318  // set everyone to unused
319  for (int i = 0; i < numSlots; i++) {
320  JM_GET_HASH(data, i) = JM_UNUSED;
321  JM_GET_NEXT(data, i) = UINT32_MAX;
322  }
323 }
324 
325 template <class ValueType>
327  return usedSlots >= maxSlots;
328 }
329 
330 template <class ValueType>
332 
333  // remember the types for this guy
334  valueTypeInfo.setup<ValueType>();
335 
336  // used to let the compiler to tell us how to pack items in our array of slots
338  objSize = temp.getObjSize();
339 
340  // zero slots in the array
341  numSlots = 0;
342 
343  // no used slots
344  usedSlots = 0;
345 
346  // the max number of used slots is zero
347  maxSlots = 0;
348 }
349 
350 // Note: because this can be called by Object.deleteObject (), it must be written so as to not use
351 // TypeContained
352 template <class ValueType>
354 
355  // do no work if the guys we store do not come from pdb :: Object
356  if (!valueTypeInfo.descendsFromObject())
357  return;
358 
359  // now, delete each of the objects in there, if we have got an object type
360  for (uint32_t i = 0; i < numSlots; i++) {
361  if (JM_GET_HASH(data, i) != JM_UNUSED) {
362  valueTypeInfo.deleteConstituentObject(JM_GET_VALUE_PTR(data, i));
363  }
364  }
365 }
366 
367 template <class ValueType>
369  PDB_COUT << "bytes available in current allocator block: "
371  std::string out = getAllocator().printInactiveBlocks();
372  PDB_COUT << "inactive blocks: " << out << std::endl;
373  PDB_COUT << "usedSlots = " << usedSlots << ", maxSlots = " << maxSlots << std::endl;
374  uint32_t howMany = numSlots * 2;
375  PDB_COUT << "doubleArray to " << howMany << std::endl;
376  // allocate the new Array
378  makeObjectWithExtraStorage<JoinPairArray<ValueType>>(objSize * howMany, howMany);
379 
380  // first, set everything to unused
381  // now, re-hash everything
382  JoinPairArray<ValueType>& newOne = *tempArray;
383 
384  for (uint32_t i = 0; i < numSlots; i++) {
385 
386  if (JM_GET_HASH(data, i) != JM_UNUSED) {
387 
388  // copy the dude over
389  ValueType* temp = &(newOne.push(JM_GET_HASH(data, i)));
390  *temp = JM_GET_VALUE(data, i, ValueType);
391 
392  char* whereNextIs = ((char*)temp) - sizeof(uint32_t);
393  *((uint32_t*)whereNextIs) = JM_GET_NEXT(data, i);
394  }
395  }
396 
397  newOne.overflows = overflows;
398 
399  // and return this guy
400  return tempArray;
401 }
402 
403 template <class ValueType>
405  return usedSlots;
406 }
407 
408 template <class ValueType>
410  deleter(deleteMe, this);
411 }
412 
413 template <class ValueType>
416  return sizeof(JoinPairArray<Nothing>) + target.objSize * target.numSlots;
417 }
418 
419 template <class ValueType>
421  : whichOne(whichOne), parent(parent) {}
422 
423 template <class ValueType>
425  uint32_t objSize = parent.objSize;
426  return JM_GET_HASH(parent.data, whichOne);
427 }
428 
429 template <class ValueType>
431 
432  // in the case where this guy is not in the list, we return a zero
433  uint32_t objSize = parent.objSize;
434  if (JM_GET_HASH(parent.data, whichOne) == JM_UNUSED)
435  return 0;
436 
437  if (JM_GET_NEXT(parent.data, whichOne) != UINT32_MAX) {
438  if (JM_GET_NEXT(parent.data, whichOne) < parent.overflows.size()) {
439  return parent.overflows[JM_GET_NEXT(parent.data, whichOne)].size() + 1;
440  } else {
441  std::cout << "not invalid slot, return 0" << std::endl;
442  return 0;
443  }
444  } else {
445  return 1;
446  }
447  // otherwise, return the correct slot
448  return JM_GET_NEXT(parent.data, whichOne) == UINT32_MAX
449  ? 1
450  : parent.overflows[JM_GET_NEXT(parent.data, whichOne)].size() + 1;
451 }
452 
453 template <class ValueType>
454 ValueType& JoinRecordList<ValueType>::operator[](const size_t i) {
455  uint32_t objSize = parent.objSize;
456  return i == 0 ? JM_GET_VALUE(parent.data, whichOne, ValueType)
457  : parent.overflows[JM_GET_NEXT(parent.data, whichOne)][i - 1];
458 }
459 
460 template <class ValueType>
462  : iterateMe(&(*iterateMeIn)) {
463  slot = 0;
464  done = false;
465  uint32_t objSize = iterateMe->objSize;
466  while (slot != iterateMe->numSlots && JM_GET_HASH(iterateMe->data, slot) == JM_UNUSED)
467  slot++;
468 
469  if (slot == iterateMe->numSlots)
470  done = true;
471 
472 }
473 
474 template <class ValueType>
476  : iterateMe(&(*iterateMeIn)) {
477  done = true;
478 }
479 
480 template <class ValueType>
482  iterateMe = nullptr;
483  done = true;
484 }
485 
486 
487 template <class ValueType>
489  if (!done)
490  slot++;
491 
492  int objSize = iterateMe->objSize;
493  while (slot != iterateMe->numSlots && JM_GET_HASH(iterateMe->data, slot) == JM_UNUSED)
494  slot++;
495 
496  if (slot == iterateMe->numSlots)
497  done = true;
498 }
499 
500 template <class ValueType>
502 
503  JoinRecordList<ValueType>* returnVal = new JoinRecordList<ValueType>(slot, *iterateMe);
504  return returnVal;
505 }
506 
507 template <class ValueType>
509  if (!done || !me.done)
510  return true;
511  return false;
512 }
513 }
514 
515 #endif
JoinRecordList(uint32_t whichOne, JoinPairArray< ValueType > &parent)
JoinRecordList< ValueType > lookup(const size_t &which)
bool operator!=(const JoinMapIterator &me) const
#define JM_GET_VALUE_PTR(data, i)
Handle< JoinPairArray< ValueType > > doubleArray()
Vector< Vector< ValueType > > overflows
void setUnused(const size_t &clearMe)
uint32_t numUsedSlots()
Allocator & getAllocator()
Definition: Allocator.cc:943
size_t getSize(void *forMe)
#define JM_UNUSED
PDBTemplateBase valueTypeInfo
void deleter(void *deleteMe, ObjType *dummy)
Definition: DeepCopy.h:48
void deleteObject(void *deleteMe)
ValueType & push(const size_t &which)
#define JM_FILL_FACTOR
ValueType & operator[](const size_t i)
size_t getBytesAvailableInCurrentAllocatorBlock()
Definition: Allocator.cc:661
void setUpAndCopyFrom(void *target, void *source) const
void setUpAndCopyFromConstituentObject(void *target, void *source) const
#define JM_GET_NEXT(data, i)
#define PDB_COUT
Definition: PDBDebug.h:31
int count(const size_t &which)
void push_back(const TypeContained &val)
Definition: PDBVector.cc:95
JoinPairArray< ValueType > * iterateMe
Definition: JoinPairArray.h:94
bool descendsFromObject() const
JoinRecordList< ValueType > * operator*()
#define JM_GET_VALUE(data, i, type)
#define JM_GET_HASH(data, i)
JoinMapRecordClass< Nothing > data[0]
std::string printInactiveBlocks()
Definition: Allocator.cc:888