A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
ComputePlan.h
Go to the documentation of this file.
1 /*****************************************************************************
2  * *
3  * Copyright 2018 Rice University *
4  * *
5  * Licensed under the Apache License, Version 2.0 (the "License"); *
6  * you may not use this file except in compliance with the License. *
7  * You may obtain a copy of the License at *
8  * *
9  * http://www.apache.org/licenses/LICENSE-2.0 *
10  * *
11  * Unless required by applicable law or agreed to in writing, software *
12  * distributed under the License is distributed on an "AS IS" BASIS, *
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14  * See the License for the specific language governing permissions and *
15  * limitations under the License. *
16  * *
17  *****************************************************************************/
18 
19 #ifndef COMPUTE_PLAN_H
20 #define COMPUTE_PLAN_H
21 
22 #include "Computation.h"
23 #include "PDBString.h"
24 #include "Object.h"
25 #include "LogicalPlan.h"
26 #include "PDBVector.h"
27 #include "Pipeline.h"
28 #include "ComputeInfo.h"
29 #include "SinkMerger.h"
30 #include "SinkShuffler.h"
31 
32 // PRELOAD %ComputePlan%
33 
34 namespace pdb {
35 
36 typedef std::shared_ptr<LogicalPlan> LogicalPlanPtr;
37 
38 // this is the basic type that is sent around a PDB cluster to store a computation that PDB is to
39 // execute
40 class ComputePlan : public Object {
41 
42  // this is the compiled plan
44 
45  // this is the list of Computation objects that are going to be used to power the plan
47 
48  // this data structure contains both the compiled TCAP string, as well as an index of all of the
49  // computations
51 
52 public:
54 
55  ComputePlan();
56 
58  myPlan = nullptr;
59  }
60 
61 
62  // constructor, takes as input the string to execute, as well as the vector of computations
64 
65  // this compiles the TCAPComputation string, returning a LogicalPlan object. The resuting
66  // object contains:
67  //
68  // (1) a graph of individual, SIMD-style operations. This can be accessed via the
69  // getComputations () method.
70  // (2) a data structure containing all of the actual Computations that implement those
71  // SIMD-style operations,
72  // as well as the Lambdas that are associated with each of those SIMD-style operations.
73  // Particular Computation
74  // objects can be accessed via the getNode () method (note that the argument to getNode ()
75  // is a string that
76  // names the computation; this string can be obtained via the getComputationName () method
77  // on the
78  // AtomicComputation objects stored in the graph of SIMD-style operations.
79  //
81 
82  // JiaNote: get producing computation name
83  std::string getProducingComputationName(std::string sourceTupleSetName);
84 
85  // Note that once getPlan () has been called, ComputePlan object contains a C++ smart pointer
86  // inside of it.
87  // IT IS VERY DANGEROUS TO SEND SUCH A POINTER ACCROSS THE NETWORK. Hence, after calling
88  // getPlan () but before
89  // this object is sent accross the network or written to disk, the following method MUST be
90  // called to avoid
91  // sending the smart pointer.
92  void nullifyPlanPointer();
93 
94  // this builds a pipeline between the Computation that produces sourceTupleSetName and the
95  // Computation
96  // targetComputationName. Since targetComputationName can have more than one input (in the case
97  // of a join,
98  // for example) the pipeline to targetComputationName is built on the link producig
99  // targetTupleSetName.
100  //
101  // The lambda getPage is used by the pipeline to obtain new temp pages; it is assumed that a
102  // page returned
103  // by getPage will remain pinned until either discardTempPage or writeBackPage are called. The
104  // former is
105  // called if the page can safely be destroyed because it has no useful data. The latter is
106  // called if the
107  // page stores a pdb :: Object that contains the result of the computation.
108 
109  PipelinePtr buildPipeline(std::string sourceTupleSetName,
110  std::string targetTupleSetName,
111  std::string targetComputationName,
112  std::function<std::pair<void*, size_t>()> getPage,
113  std::function<void(void*)> discardTempPage,
114  std::function<void(void*)> writeBackPage,
115  std::map<std::string, ComputeInfoPtr>& params);
116 
117 
118  PipelinePtr buildPipeline(std::string sourceTupleSetName,
119  std::string targetTupleSetName,
120  std::string targetComputationName,
121  std::function<std::pair<void*, size_t>()> getPage,
122  std::function<void(void*)> discardTempPage,
123  std::function<void(void*)> writeBackPage);
124 
125 
126  // JiaNote: add new buildPipeline methods to avoid ambiguity
127  PipelinePtr buildPipeline(std::vector<std::string> buildTheseTupleSets,
128  std::string sourceTupleSetName,
129  std::string targetComputationName,
130  std::function<std::pair<void*, size_t>()> getPage,
131  std::function<void(void*)> discardTempPage,
132  std::function<void(void*)> writeBackPage,
133  std::map<std::string, ComputeInfoPtr>& params);
134 
135  PipelinePtr buildPipeline(std::vector<std::string> buildTheseTupleSets,
136  std::string sourceTupleSetName,
137  std::string targetComputationName,
138  std::function<std::pair<void*, size_t>()> getPage,
139  std::function<void(void*)> discardTempPage,
140  std::function<void(void*)> writeBackPage);
141 
142 
143  // JiaNote: add this to get sink merger
144  SinkMergerPtr getMerger(std::string sourceTupleSetName,
145  std::string targetTupleSetName,
146  std::string targetComputationName);
147 
148  // JiaNote: add this to get sink shuffler
149  SinkShufflerPtr getShuffler(std::string sourceTupleSetName,
150  std::string targetTupleSetName,
151  std::string targetComputationName);
152 };
153 }
154 
155 #include "ComputePlan.cc"
156 
157 #endif
#define ENABLE_DEEP_COPY
Definition: DeepCopy.h:52
LogicalPlanPtr getPlan()
Definition: ComputePlan.cc:39
PipelinePtr buildPipeline(std::string sourceTupleSetName, std::string targetTupleSetName, std::string targetComputationName, std::function< std::pair< void *, size_t >()> getPage, std::function< void(void *)> discardTempPage, std::function< void(void *)> writeBackPage, std::map< std::string, ComputeInfoPtr > &params)
Definition: ComputePlan.cc:580
std::shared_ptr< SinkShuffler > SinkShufflerPtr
Definition: SinkShuffler.h:31
SinkShufflerPtr getShuffler(std::string sourceTupleSetName, std::string targetTupleSetName, std::string targetComputationName)
Definition: ComputePlan.cc:198
std::shared_ptr< SinkMerger > SinkMergerPtr
Definition: SinkMerger.h:30
LogicalPlanPtr myPlan
Definition: ComputePlan.h:50
std::shared_ptr< Pipeline > PipelinePtr
Definition: Pipeline.h:314
std::shared_ptr< LogicalPlan > LogicalPlanPtr
Definition: ComputePlan.h:36
Vector< Handle< Computation > > allComputations
Definition: ComputePlan.h:46
ENABLE_DEEP_COPY ComputePlan()
Definition: ComputePlan.cc:37
String TCAPComputation
Definition: ComputePlan.h:43
std::string getProducingComputationName(std::string sourceTupleSetName)
Definition: ComputePlan.cc:115
void nullifyPlanPointer()
Definition: ComputePlan.cc:75
SinkMergerPtr getMerger(std::string sourceTupleSetName, std::string targetTupleSetName, std::string targetComputationName)
Definition: ComputePlan.cc:131