A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
ShuffleInfo.h
Go to the documentation of this file.
1 /*****************************************************************************
2  * *
3  * Copyright 2018 Rice University *
4  * *
5  * Licensed under the Apache License, Version 2.0 (the "License"); *
6  * you may not use this file except in compliance with the License. *
7  * You may obtain a copy of the License at *
8  * *
9  * http://www.apache.org/licenses/LICENSE-2.0 *
10  * *
11  * Unless required by applicable law or agreed to in writing, software *
12  * distributed under the License is distributed on an "AS IS" BASIS, *
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14  * See the License for the specific language governing permissions and *
15  * limitations under the License. *
16  * *
17  *****************************************************************************/
18 #ifndef SHUFFLE_INFO_H
19 #define SHUFFLE_INFO_H
20 
21 #include "DataTypes.h"
22 #include "StandardResourceInfo.h"
23 
24 namespace pdb {
25 
26 // this class wraps shuffle information for job stages that needs repartitioning
27 // data
28 
29 class ShuffleInfo {
30 
31 private:
32  // number of nodes in this cluster
33  int numNodes;
34 
35  // number of CPU cores allocated for hash aggregation and hash join in this
36  // cluster
38 
39  // hash partition ids allocated for each node
40  std::vector<std::vector<HashPartitionID>> partitionIds;
41 
42  // address for each node
43  std::vector<std::string> addresses;
44 
45 public:
46  // constructor
47  ShuffleInfo(std::vector<StandardResourceInfoPtr> *clusterResources,
48  double partitionToCoreRatio) {
49 
50  this->numNodes = clusterResources->size();
51  this->numHashPartitions = 0;
52  int i, j;
53  HashPartitionID id = 0;
54  partitionIds.resize(this->numNodes);
55  for (i = 0; i < this->numNodes; i++) {
56  StandardResourceInfoPtr node = clusterResources->at(i);
57  int numCoresOnThisNodeForHashing =
58  (int)((double)(node->getNumCores()) * partitionToCoreRatio);
59  if (numCoresOnThisNodeForHashing == 0) {
60  numCoresOnThisNodeForHashing = 1;
61  }
62  for (j = 0; j < numCoresOnThisNodeForHashing; j++) {
63  partitionIds[i].push_back(id);
64  id++;
65  }
66  std::string curAddress =
67  node->getAddress() + ":" + std::to_string(node->getPort());
68  this->addresses.push_back(curAddress);
69  this->numHashPartitions += numCoresOnThisNodeForHashing;
70  }
71  }
72 
73  // destructor
75 
76  // to return number of nodes in the system
77  int getNumNodes() { return this->numNodes; }
78 
79  // to return number of hash partitions in the system
80  int getNumHashPartitions() { return this->numHashPartitions; }
81 
82  // to return partitioning scheme, so each node will have a set of partitionIds
83  std::vector<std::vector<HashPartitionID>> &getPartitionIds() {
84  return this->partitionIds;
85  }
86 
87  // to return the address of nodes in the system
88  std::vector<std::string> &getAddresses() { return this->addresses; }
89 };
90 }
91 #endif
std::vector< std::vector< HashPartitionID > > & getPartitionIds()
Definition: ShuffleInfo.h:83
std::vector< std::string > addresses
Definition: ShuffleInfo.h:43
std::vector< std::vector< HashPartitionID > > partitionIds
Definition: ShuffleInfo.h:40
unsigned int HashPartitionID
Definition: DataTypes.h:28
std::shared_ptr< StandardResourceInfo > StandardResourceInfoPtr
std::vector< std::string > & getAddresses()
Definition: ShuffleInfo.h:88
int getNumHashPartitions()
Definition: ShuffleInfo.h:80
ShuffleInfo(std::vector< StandardResourceInfoPtr > *clusterResources, double partitionToCoreRatio)
Definition: ShuffleInfo.h:47