A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
HashPartitionedJoinBuildHTJobStage.h
Go to the documentation of this file.
1 /*****************************************************************************
2  * *
3  * Copyright 2018 Rice University *
4  * *
5  * Licensed under the Apache License, Version 2.0 (the "License"); *
6  * you may not use this file except in compliance with the License. *
7  * You may obtain a copy of the License at *
8  * *
9  * http://www.apache.org/licenses/LICENSE-2.0 *
10  * *
11  * Unless required by applicable law or agreed to in writing, software *
12  * distributed under the License is distributed on an "AS IS" BASIS, *
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14  * See the License for the specific language governing permissions and *
15  * limitations under the License. *
16  * *
17  *****************************************************************************/
18 #ifndef HASH_PARTITIONED_JOIN_BUILDHT_STAGE_H
19 #define HASH_PARTITIONED_JOIN_BUILDHT_STAGE_H
20 
21 
22 #include "AbstractJobStage.h"
23 #include "SetIdentifier.h"
24 #include "ComputePlan.h"
25 
26 // PRELOAD %HashPartitionedJoinBuildHTJobStage%
27 
28 namespace pdb {
29 
30 // this class encapsulates the job stage for building hash tables for hash partitioned join
31 
33 
34 private:
35  // input set
37 
38  // name for output hash table
40 
41  // job stage id
43 
44  // number of pages
45  int numPages;
46 
47  // logical plan information
49 
50  // source tuple set
52 
53  // target tuple set
55 
56  // target computation
58 
59  // number of partitions on this node
61 
62  // total memory on this node
64 
65 
66 public:
68 
69  // constructor
71 
72  // constructor
74  JobStageID stageId,
75  std::string hashSetName) {
76 
77  this->jobId = jobId;
78  this->id = stageId;
79  this->partitionedHashSetName = hashSetName;
80  }
81 
82  // to set source set identifier
84  this->sourceContext = sourceContext;
85  }
86 
87  // to return source set identifier
89  return this->sourceContext;
90  }
91 
92  // return job stage type
93  std::string getJobStageType() override {
94  return "HashPartitionedJoinBuildHTJobStage";
95  }
96 
97  // return job stage type id
98 
99  int16_t getJobStageTypeID() override {
100  return HashPartitionedJoinBuildHTJobStage_TYPEID;
101  }
102 
103  // return job stage id
104  JobStageID getStageId() override {
105  return this->id;
106  }
107 
108  // set name of the hash set that stores the broadcasted table
109  void setHashSetName(std::string hashSetName) {
110  this->partitionedHashSetName = hashSetName;
111  }
112 
113  // return name of the hash set that stores the broadcasted table
114  std::string getHashSetName() {
115  return this->partitionedHashSetName;
116  }
117 
118  void print() override {
119  std::cout << "[JOB ID] jobId = " << jobId << std::endl;
120  std::cout << "[STAGE ID] id = " << id << std::endl;
121  std::cout << "[INPUT] databaseName=" << sourceContext->getDatabase()
122  << ", setName=" << sourceContext->getSetName() << std::endl;
123  std::cout << "[HASH SET] name = " << partitionedHashSetName << std::endl;
124  std::cout << "[NUMPARTITIONS] numPartitions=" << numNodePartitions << std::endl;
125  std::cout << "[MEM] total memory=" << totalMemoryOnThisNode << std::endl;
126  std::cout << "[NUM PAGES] numPages=" << numPages << std::endl;
127  std::cout << "[SRCTUPLESET] sourceTupleSetSpecifier=" << sourceTupleSetSpecifier << std::endl;
128  std::cout << "[TARTUPLESET] targetTupleSetSpecifier=" << targetTupleSetSpecifier << std::endl;
129  std::cout << "[TACCOMP] targetComputationSpecifier=" << targetComputationSpecifier << std::endl;
130  }
131 
132  void setNumPages(int numPages) {
133  this->numPages = numPages;
134  }
135 
136  int getNumPages() {
137  return this->numPages;
138  }
139 
140  // to set compute plan
142  const std::string &sourceTupleSetSpecifier,
143  const std::string &targetTupleSetSpecifier,
144  const std::string &targetComputationSpecifier) {
145  this->sharedPlan = plan;
146  this->sourceTupleSetSpecifier = sourceTupleSetSpecifier;
147  this->targetTupleSetSpecifier = targetTupleSetSpecifier;
148  this->targetComputationSpecifier = targetComputationSpecifier;
149  }
150 
151  // nullify compute plan shared pointer
153  this->sharedPlan->nullifyPlanPointer();
154  }
155 
156  // to get source tupleset name for this pipeline stage
158  return this->sourceTupleSetSpecifier;
159  }
160 
161  // to get target tupleset name for this pipeline stage
163  return this->targetTupleSetSpecifier;
164  }
165 
166  // to get target computation name for this pipeline stage
168  return this->targetComputationSpecifier;
169  }
170 
171  // to get compute plan
173  return this->sharedPlan;
174  }
175 
176  // to set number of partitions on this node
178  this->numNodePartitions = numNodePartitions;
179  }
180 
181  // to get number of partitions on this node
183  return this->numNodePartitions;
184  }
185 
186  // to set total memory on this node
187  void setTotalMemoryOnThisNode(size_t totalMem) {
188  this->totalMemoryOnThisNode = totalMem;
189  }
190 
191  // to get total memory on this node
193  return this->totalMemoryOnThisNode;
194  }
195 };
196 }
197 
198 #endif
#define ENABLE_DEEP_COPY
Definition: DeepCopy.h:52
HashPartitionedJoinBuildHTJobStage(std::string jobId, JobStageID stageId, std::string hashSetName)
unsigned int JobStageID
Definition: DataTypes.h:37
void setSourceContext(Handle< SetIdentifier > sourceContext)
void setComputePlan(const Handle< ComputePlan > &plan, const std::string &sourceTupleSetSpecifier, const std::string &targetTupleSetSpecifier, const std::string &targetComputationSpecifier)