d9/d12/_simple_physical_node_8cc_source.html

 /*****************************************************************************

  *                                                                           *

  *  Copyright 2018 Rice University                                           *

  *                                                                           *

  *  Licensed under the Apache License, Version 2.0 (the "License");          *

  *  you may not use this file except in compliance with the License.         *

  *  You may obtain a copy of the License at                                  *

  *                                                                           *

  *      http://www.apache.org/licenses/LICENSE-2.0                           *

  *                                                                           *

  *  Unless required by applicable law or agreed to in writing, software      *

  *  distributed under the License is distributed on an "AS IS" BASIS,        *

  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *

  *  See the License for the specific language governing permissions and      *

  *  limitations under the License.                                           *

  *                                                                           *

  *****************************************************************************/

 #include "SetIdentifier.h"

 #include "Statistics.h"

 #include "JobStageBuilders/TupleSetJobStageBuilder.h"

 #include "SimplePhysicalOptimizer/SimplePhysicalNode.h"


 namespace pdb {


 SimplePhysicalNode::SimplePhysicalNode(string jobId,

                                        AtomicComputationPtr node,

                                        const Handle<ComputePlan> &computePlan,

                                        LogicalPlanPtr logicalPlan,

                                        ConfigurationPtr conf) : AbstractPhysicalNode(jobId, computePlan, logicalPlan, conf),

                                                                                      node(node) {

   // if this node is a scan set we want to create a set identifier for it

   if(node->getAtomicComputationTypeID() == ScanSetAtomicTypeID) {


     // grab the computation

     std::cout << node->getOutputName() << std::endl;

     Handle<Computation> comp = logicalPlan->getNode(node->getComputationName()).getComputationHandle();


     // create a set identifier from it

     sourceSetIdentifier = getSetIdentifierFromComputation(comp);

   }

 }


 PhysicalOptimizerResultPtr SimplePhysicalNode::analyze(const pdb::StatisticsPtr &stats, int nextStageID) {


   // create a job stage builder

   pdb::TupleSetJobStageBuilderPtr jobStageBuilder = make_shared<TupleSetJobStageBuilder>();


   // the input to the pipeline is the output set of the source node

   jobStageBuilder->setSourceTupleSetName(node->getOutputName());


   // set the source set identifier

   jobStageBuilder->setSourceContext(sourceSetIdentifier);


   // is this source a result of an aggregation

   jobStageBuilder->setInputAggHashOut(sourceSetIdentifier->isAggregationResult());


   // set the job id

   jobStageBuilder->setJobId(jobId);


   // set the compute plan

   jobStageBuilder->setComputePlan(computePlan);


   // this is a source so there is no last node

   SimplePhysicalNodePtr prevNode = nullptr;


   // run the recursive analysis it will essentially grab the first consumer of the source node

   // and analyze it as if the source had just one consumer

   auto result = SimplePhysicalNode::analyzeSingleConsumer(jobStageBuilder, prevNode, stats, nextStageID);


   // if we failed we want to avoid processing the same consumer twice therefore we are moving it to the back

   if(!result->success) {


     // grab the consumer

     auto tmp = activeConsumers.front();


     // pop it from the front

     activeConsumers.pop_front();


     // push it to the back

     activeConsumers.push_back(tmp);

   }


   return result;

 }


 PhysicalOptimizerResultPtr SimplePhysicalNode::analyze(TupleSetJobStageBuilderPtr &jobStageBuilder,

                                                       SimplePhysicalNodePtr &prevNode,

                                                       const StatisticsPtr &stats,

                                                       int nextStageID) {


   // depending on the number of consumers this node has we call different methods to analyze the plan

   switch (consumers.size()){

     // we are analyzing an output node

     case 0: return analyzeOutput(jobStageBuilder, prevNode, stats, nextStageID);


     // we are analyzing a node that has a single consumer

     case 1: return analyzeSingleConsumer(jobStageBuilder, prevNode, stats, nextStageID);


     // we are analyzing a node that has multiple consumers

     default: return analyzeMultipleConsumers(jobStageBuilder, prevNode, stats, nextStageID);

   }

 }


 const AtomicComputationPtr &SimplePhysicalNode::getNode() const {

   return node;

 }


 bool SimplePhysicalNode::hasConsumers() {

   return !activeConsumers.empty();

 }


 bool SimplePhysicalNode::isConsuming(Handle<SetIdentifier> &set) {

   return *sourceSetIdentifier == *set;

 }


 void SimplePhysicalNode::addConsumer(const pdb::AbstractPhysicalNodePtr &consumer) {

   // call the consumer

   AbstractPhysicalNode::addConsumer(consumer);


   // add the consumer to the active consumers

   activeConsumers.push_back(std::dynamic_pointer_cast<SimplePhysicalNode>(consumer));

 }


 double SimplePhysicalNode::getCost(Handle<SetIdentifier> source, const StatisticsPtr &stats) {


   // if the set identifier does not exist log that

   if (source == nullptr) {

     PDB_COUT << "WARNING: the set provided to the get cost is a nullptr\n";

     return 0;

   }


   // do we have statistics, if not just return 0

   if(stats == nullptr) {

     PDB_COUT << "WARNING: there are not stats when looking for the set=" << source->toSourceSetName() << "\n";

     return 0;

   }


   // calculate the cost based on the formula cost = number_of_bytes / 1000000

   double cost = stats->getNumBytes(source->getDatabase(), source->getSetName());

   return cost / 1000000.0;

 }


 PhysicalOptimizerResultPtr SimplePhysicalNode::analyzeSingleConsumer(TupleSetJobStageBuilderPtr &tupleStageBuilder,

                                                                     SimplePhysicalNodePtr &prevNode,

                                                                     const StatisticsPtr &stats,

                                                                     int nextStageID) {


   // add this node to the pipeline

   tupleStageBuilder->addTupleSetToBuildPipeline(node->getOutputName());


   // grab the computation associated with this node

   Handle<Computation> curComp = logicalPlan->getNode(node->getComputationName()).getComputationHandle();


   // set this node as the previous node

   SimplePhysicalNodePtr newPrevNode = getSimpleNodeHandle();


   // go to the next node

   PhysicalOptimizerResultPtr result = activeConsumers.front()->analyze(tupleStageBuilder,

                                                                   newPrevNode,

                                                                   stats,

                                                                   nextStageID);


   // remove the consumer we just processed if we succeeded

   if(result->success) {

     activeConsumers.pop_front();

   }


   return result;

 }


 PhysicalOptimizerResultPtr SimplePhysicalNode::analyzeOutput(TupleSetJobStageBuilderPtr &tupleStageBuilder,

                                                             SimplePhysicalNodePtr &prevNode,

                                                             const StatisticsPtr &stats,

                                                             int nextStageID) {


   // grab the computation associated with this node

   Handle<Computation> curComp = logicalPlan->getNode(node->getComputationName()).getComputationHandle();


   // create a SetIdentifier for the output set

   Handle<SetIdentifier> sink = makeObject<SetIdentifier>(curComp->getDatabaseName(), curComp->getSetName());


   // set the parameters

   tupleStageBuilder->setJobStageId(nextStageID);

   tupleStageBuilder->setTargetTupleSetName(node->getInputName());

   tupleStageBuilder->setTargetComputationName(node->getComputationName());

   tupleStageBuilder->setOutputTypeName(curComp->getOutputType());

   tupleStageBuilder->setSinkContext(sink);

   tupleStageBuilder->setAllocatorPolicy(curComp->getAllocatorPolicy());


   // create the job stage

   Handle<TupleSetJobStage> jobStage = tupleStageBuilder->build();


   // create a analyzer result

   PhysicalOptimizerResultPtr result = make_shared<PhysicalOptimizerResult>();


   // add the job stage to the result

   result->physicalPlanToOutput.emplace_back(jobStage);

   result->success = true;


   return result;

 }


 PhysicalOptimizerResultPtr SimplePhysicalNode::analyzeMultipleConsumers(TupleSetJobStageBuilderPtr &tupleSetJobStageBuilder,

                                                                        SimplePhysicalNodePtr &prevNode,

                                                                        const StatisticsPtr &stats,

                                                                        int nextStageID) {


   // create a analyzer result

   PhysicalOptimizerResultPtr result = make_shared<PhysicalOptimizerResult>();


   // grab the output of the current node

   std::string outputName = node->getOutputName();


   // add this node to the pipeline

   tupleSetJobStageBuilder->addTupleSetToBuildPipeline(outputName);


   // grab the computation associated with this node

   Handle<Computation> curComp = logicalPlan->getNode(node->getComputationName()).getComputationHandle();


   // I am a pipeline breaker because I have more than one consumers

   Handle<SetIdentifier> sink = nullptr;


   // in the case that the current computation does not require materialization by default

   // we have to set an output to it, we it gets materialized

   if (!curComp->needsMaterializeOutput()) {


     // set the output

     curComp->setOutput(jobId, outputName);


     // create the sink and set the page size

     sink = makeObject<SetIdentifier>(jobId, outputName);

     sink->setPageSize(conf->getPageSize());


     // add this set to the list of intermediate sets

     result->interGlobalSets.push_back(sink);

   } else {

     // this computation needs materialization either way so just create the sink set identifier

     sink = makeObject<SetIdentifier>(curComp->getDatabaseName(), curComp->getSetName());

   }


   // set the parameters

   tupleSetJobStageBuilder->setJobStageId(nextStageID);

   tupleSetJobStageBuilder->setTargetTupleSetName(outputName);

   tupleSetJobStageBuilder->setTargetComputationName(node->getComputationName());

   tupleSetJobStageBuilder->setOutputTypeName(curComp->getOutputType());

   tupleSetJobStageBuilder->setSinkContext(sink);

   tupleSetJobStageBuilder->setAllocatorPolicy(curComp->getAllocatorPolicy());


   // create the job stage

   Handle<TupleSetJobStage> jobStage = tupleSetJobStageBuilder->build();


   // add the job stage to the result

   result->physicalPlanToOutput.emplace_back(jobStage);

   result->success = true;

   result->createdSourceComputations.push_back(getSimpleNodeHandle());


   // the new source is now the sink

   sourceSetIdentifier = sink;


   return result;

 }


 double SimplePhysicalNode::getCost(const StatisticsPtr &stats) {


     // return the cost of the source set identifier

     return getCost(sourceSetIdentifier, stats);

 }


 string SimplePhysicalNode::getNodeIdentifier() {

   return node->getOutputName();

 }


 const Handle<SetIdentifier> &SimplePhysicalNode::getSourceSetIdentifier() const {

   return sourceSetIdentifier;

 }


 SimplePhysicalNodePtr SimplePhysicalNode::getSimpleNodeHandle() {

   // return the handle to this node

   return std::dynamic_pointer_cast<SimplePhysicalNode>(getHandle());

 }


 }


pdb::AbstractPhysicalNode::getHandle
AbstractPhysicalNodePtr getHandle()
Definition: AbstractPhysicalNode.cc:93

pdb::SimplePhysicalNode::analyzeOutput
virtual PhysicalOptimizerResultPtr analyzeOutput(TupleSetJobStageBuilderPtr &tupleStageBuilder, SimplePhysicalNodePtr &prevNode, const StatisticsPtr &stats, int nextStageID)
Definition: SimplePhysicalNode.cc:172

pdb::SimplePhysicalNode::SimplePhysicalNode
SimplePhysicalNode(string jobId, AtomicComputationPtr node, const Handle< ComputePlan > &computePlan, LogicalPlanPtr logicalPlan, ConfigurationPtr conf)
Definition: SimplePhysicalNode.cc:25

pdb::SimplePhysicalNode::getNode
const AtomicComputationPtr & getNode() const
Definition: SimplePhysicalNode.cc:104

ScanSetAtomicTypeID
Definition: AtomicComputation.h:48

pdb::AbstractPhysicalNode::computePlan
Handle< ComputePlan > computePlan
Definition: AbstractPhysicalNode.h:189

pdb::StatisticsPtr
std::shared_ptr< Statistics > StatisticsPtr
Definition: Statistics.h:27

pdb::SimplePhysicalNode::analyze
PhysicalOptimizerResultPtr analyze(const StatisticsPtr &stats, int nextStageID) override
Definition: SimplePhysicalNode.cc:43

pdb::SimplePhysicalNode::addConsumer
void addConsumer(const AbstractPhysicalNodePtr &consumer) override
Definition: SimplePhysicalNode.cc:116

pdb::SimplePhysicalNode::getCost
double getCost(const StatisticsPtr &stats) override
Definition: SimplePhysicalNode.cc:264

pdb::AbstractPhysicalNode::logicalPlan
LogicalPlanPtr logicalPlan
Definition: AbstractPhysicalNode.h:194

pdb::SimplePhysicalNode::isConsuming
bool isConsuming(Handle< SetIdentifier > &set) override
Definition: SimplePhysicalNode.cc:112

pdb::LogicalPlanPtr
std::shared_ptr< LogicalPlan > LogicalPlanPtr
Definition: ComputePlan.h:36

pdb::AbstractPhysicalNode
Definition: AbstractPhysicalNode.h:75

pdb::SimplePhysicalNode::getNodeIdentifier
std::string getNodeIdentifier() override
Definition: SimplePhysicalNode.cc:270

TupleSetJobStageBuilder.h

pdb::SimplePhysicalNode::analyzeMultipleConsumers
virtual PhysicalOptimizerResultPtr analyzeMultipleConsumers(TupleSetJobStageBuilderPtr &ptr, SimplePhysicalNodePtr &prevNode, const StatisticsPtr &stats, int nextStageID)
Definition: SimplePhysicalNode.cc:204

pdb::SimplePhysicalNode::hasConsumers
bool hasConsumers() override
Definition: SimplePhysicalNode.cc:108

pdb::SimplePhysicalNode::getSourceSetIdentifier
const Handle< SetIdentifier > & getSourceSetIdentifier() const
Definition: SimplePhysicalNode.cc:274

SimplePhysicalNode.h

PDB_COUT
#define PDB_COUT
Definition: PDBDebug.h:31

ConfigurationPtr
shared_ptr< Configuration > ConfigurationPtr
Definition: Configuration.h:89

AtomicComputationPtr
std::shared_ptr< struct AtomicComputation > AtomicComputationPtr
Definition: AtomicComputation.h:36

pdb::SimplePhysicalNodePtr
std::shared_ptr< SimplePhysicalNode > SimplePhysicalNodePtr
Definition: SimplePhysicalNode.h:30

pdb::AbstractPhysicalNode::conf
ConfigurationPtr conf
Definition: AbstractPhysicalNode.h:199

pdb::SimplePhysicalNode::analyzeSingleConsumer
virtual PhysicalOptimizerResultPtr analyzeSingleConsumer(TupleSetJobStageBuilderPtr &tupleStageBuilder, SimplePhysicalNodePtr &prevNode, const StatisticsPtr &stats, int nextStageID)
Definition: SimplePhysicalNode.cc:143

pdb::SimplePhysicalNode
Definition: SimplePhysicalNode.h:33

pdb::AbstractPhysicalNode::consumers
std::list< AbstractPhysicalNodePtr > consumers
Definition: AbstractPhysicalNode.h:204

pdb::AbstractPhysicalNodePtr
std::shared_ptr< AbstractPhysicalNode > AbstractPhysicalNodePtr
Definition: AbstractPhysicalNode.h:39

SetIdentifier.h

pdb::AbstractPhysicalNode::getSetIdentifierFromComputation
Handle< SetIdentifier > getSetIdentifierFromComputation(Handle< Computation > computation)
Definition: AbstractPhysicalNode.cc:35

pdb::SimplePhysicalNode::getSimpleNodeHandle
SimplePhysicalNodePtr getSimpleNodeHandle()
Definition: SimplePhysicalNode.cc:278

pdb::AbstractPhysicalNode::addConsumer
virtual void addConsumer(const pdb::AbstractPhysicalNodePtr &consumer)
Definition: AbstractPhysicalNode.h:135

pdb::SimplePhysicalNode::node
AtomicComputationPtr node
Definition: SimplePhysicalNode.h:189

pdb::SimplePhysicalNode::sourceSetIdentifier
Handle< SetIdentifier > sourceSetIdentifier
Definition: SimplePhysicalNode.h:184

Statistics.h

pdb::SimplePhysicalNode::activeConsumers
std::list< SimplePhysicalNodePtr > activeConsumers
Definition: SimplePhysicalNode.h:179

pdb::TupleSetJobStageBuilderPtr
std::shared_ptr< TupleSetJobStageBuilder > TupleSetJobStageBuilderPtr
Definition: TupleSetJobStageBuilder.h:26

pdb::AbstractPhysicalNode::jobId
std::string jobId
Definition: AbstractPhysicalNode.h:184

pdb::Handle
Definition: Allocator.h:399

pdb::PhysicalOptimizerResultPtr
std::shared_ptr< PhysicalOptimizerResult > PhysicalOptimizerResultPtr
Definition: AbstractPhysicalNode.h:36