A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Statistics.h
Go to the documentation of this file.
1 /*****************************************************************************
2  * *
3  * Copyright 2018 Rice University *
4  * *
5  * Licensed under the Apache License, Version 2.0 (the "License"); *
6  * you may not use this file except in compliance with the License. *
7  * You may obtain a copy of the License at *
8  * *
9  * http://www.apache.org/licenses/LICENSE-2.0 *
10  * *
11  * Unless required by applicable law or agreed to in writing, software *
12  * distributed under the License is distributed on an "AS IS" BASIS, *
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14  * See the License for the specific language governing permissions and *
15  * limitations under the License. *
16  * *
17  *****************************************************************************/
18 #ifndef PDB_STATISTICS_H
19 #define PDB_STATISTICS_H
20 
21 #include <memory>
22 #include <pthread.h>
23 #include <unordered_map>
24 
25 namespace pdb {
26 
27 class Statistics;
28 typedef std::shared_ptr<Statistics> StatisticsPtr;
29 
30 // A class to encapsulate set statistics for physical scheduling
31 
33 
34  std::string databaseName;
35  std::string setName;
36  int numPages = 0;
37  size_t pageSize = 0;
38  size_t numBytes = 0;
39  int numTuples = 0;
40  size_t avgTupleSize = 0;
41 };
42 
43 class Statistics {
44 
45 private:
46  std::unordered_map<std::string, DataStatistics> dataStatistics;
47  std::unordered_map<std::string, double> atomicComputationSelectivity;
48  std::unordered_map<std::string, double> lambdaSelectivity;
49  pthread_mutex_t mutex;
50 
51 public:
52  // constructor
53  Statistics() { pthread_mutex_init(&mutex, nullptr); }
54 
55  // destructor
56  ~Statistics() { pthread_mutex_destroy(&mutex); }
57 
58  // remove set
59  void removeSet(std::string databaseName, std::string setName) {
60  std::string key = databaseName + ":" + setName;
61  if (dataStatistics.count(key) > 0) {
62  dataStatistics.erase(key);
63  }
64  }
65 
66  void addSet(std::string databaseName, std::string setName, DataStatistics &stats) {
67  std::string key = databaseName + ":" + setName;
68  dataStatistics[key] = stats;
69  }
70 
78  void addSetAlias(std::string databaseName,
79  std::string setName,
80  std::string aliasDatabase,
81  std::string aliasSetName){
82  std::string key = databaseName + ":" + setName;
83  std::string aliasKey = aliasDatabase + ":" + aliasSetName;
84  dataStatistics[aliasKey] = dataStatistics[key];
85  }
86 
87  // to return number of pages of a set
88  int getNumPages(std::string databaseName, std::string setName) {
89  std::string key = databaseName + ":" + setName;
90  if (dataStatistics.count(key) == 0) {
91  return 0;
92  } else {
93  return dataStatistics[key].numPages;
94  }
95  }
96 
97  // to set number of pages of a set
98  void setNumPages(std::string databaseName, std::string setName,
99  int numPages) {
100  std::string key = databaseName + ":" + setName;
101  pthread_mutex_lock(&mutex);
102  dataStatistics[key].numPages = numPages;
103  pthread_mutex_unlock(&mutex);
104  }
105 
106  // to return page size of a set
107  size_t getPageSize(std::string databaseName, std::string setName) {
108  std::string key = databaseName + ":" + setName;
109  if (dataStatistics.count(key) == 0) {
110  return -1;
111  } else {
112  return dataStatistics[key].pageSize;
113  }
114  }
115 
116  // to set page size of a set
117  void setPageSize(std::string databaseName, std::string setName,
118  size_t pageSize) {
119  std::string key = databaseName + ":" + setName;
120  pthread_mutex_lock(&mutex);
121  dataStatistics[key].pageSize = pageSize;
122  pthread_mutex_unlock(&mutex);
123  }
124 
125  // to return numBytes of a set
126  size_t getNumBytes(std::string databaseName, std::string setName) {
127  std::string key = databaseName + ":" + setName;
128  if (dataStatistics.count(key) == 0) {
129  return 0;
130  } else {
131  return dataStatistics[key].numBytes;
132  }
133  }
134 
135  // to increment page number of a set
136  void incrementNumPages(std::string databaseName, std::string setName, size_t numPages) {
137  std::string key = databaseName + ":" + setName;
138  pthread_mutex_lock(&mutex);
139  dataStatistics[key].numPages += numPages;
140  pthread_mutex_unlock(&mutex);
141  }
142 
143  // to increment number of bytes of a set
144  void incrementNumBytes(std::string databaseName, std::string setName, size_t numBytes) {
145  std::string key = databaseName + ":" + setName;
146  pthread_mutex_lock(&mutex);
147  dataStatistics[key].numBytes += numBytes;
148  pthread_mutex_unlock(&mutex);
149  }
150 
151  // to set numBytes of a set
152  void setNumBytes(std::string databaseName, std::string setName,
153  size_t numBytes) {
154  std::string key = databaseName + ":" + setName;
155  pthread_mutex_lock(&mutex);
156  dataStatistics[key].numBytes = numBytes;
157  pthread_mutex_unlock(&mutex);
158  }
159 
160  // to return number of tuples of a set
161  int getNumTuples(std::string databaseName, std::string setName) {
162  std::string key = databaseName + ":" + setName;
163  if (dataStatistics.count(key) == 0) {
164  return -1;
165  } else {
166  return dataStatistics[key].numTuples;
167  }
168  }
169 
170  // to set number of tuples of a set
171  void setNumTuples(std::string databaseName, std::string setName,
172  int numTuples) {
173  std::string key = databaseName + ":" + setName;
174  pthread_mutex_lock(&mutex);
175  dataStatistics[key].numTuples = numTuples;
176  pthread_mutex_unlock(&mutex);
177  }
178 
179  // to return average tuple size of a set
180  size_t getAvgTupleSize(std::string databaseName, std::string setName) {
181  std::string key = databaseName + ":" + setName;
182  if (dataStatistics.count(key) == 0) {
183  return -1;
184  } else {
185  return dataStatistics[key].avgTupleSize;
186  }
187  }
188 
189  // to set average tuple size of a set
190  void setAvgTupleSize(std::string databaseName, std::string setName,
191  size_t avgTupleSize) {
192  std::string key = databaseName + ":" + setName;
193  pthread_mutex_lock(&mutex);
194  dataStatistics[key].avgTupleSize = avgTupleSize;
195  pthread_mutex_unlock(&mutex);
196  }
197 
198  // to return selectivity of an atomic computation
199  double getAtomicComputationSelectivity(std::string atomicComputationType) {
200  if (atomicComputationSelectivity.count(atomicComputationType) == 0) {
201  return 0;
202  } else {
203  return atomicComputationSelectivity[atomicComputationType];
204  }
205  }
206 
207  // to set selectivity for an atomic computation
208  void setAtomicComputationSelectivity(std::string atomicComputationType,
209  double selectivity) {
210  pthread_mutex_lock(&mutex);
211  atomicComputationSelectivity[atomicComputationType] = selectivity;
212  pthread_mutex_unlock(&mutex);
213  }
214 
215  // to return selectivity of a lambda
216  double getLambdaSelectivity(std::string lambdaType) {
217  if (lambdaSelectivity.count(lambdaType) == 0) {
218  return 0;
219  } else {
220  return lambdaSelectivity[lambdaType];
221  }
222  }
223 
224  // to set selectivity for a lambda
225  void setLambdaSelectivity(std::string lambdaType, double selectivity) {
226  pthread_mutex_lock(&mutex);
227  lambdaSelectivity[lambdaType] = selectivity;
228  pthread_mutex_unlock(&mutex);
229  }
230 
234  void printSets(){
235 
236  // print the header
237  std::cout << "----------- STATS ------------" << std::endl;
238 
239  // go through the data statistics and print out the sets
240  for(auto &it : dataStatistics) {
241  std::cout << it.first << std::endl;
242  std::cout << "------------------------------" << std::endl;
243  }
244  }
245 };
246 }
247 
248 #endif
pthread_mutex_t mutex
Definition: Statistics.h:49
void printSets()
Definition: Statistics.h:234
std::shared_ptr< Statistics > StatisticsPtr
Definition: Statistics.h:27
void setNumTuples(std::string databaseName, std::string setName, int numTuples)
Definition: Statistics.h:171
void setNumBytes(std::string databaseName, std::string setName, size_t numBytes)
Definition: Statistics.h:152
std::string databaseName
Definition: Statistics.h:34
int getNumTuples(std::string databaseName, std::string setName)
Definition: Statistics.h:161
void removeSet(std::string databaseName, std::string setName)
Definition: Statistics.h:59
double getLambdaSelectivity(std::string lambdaType)
Definition: Statistics.h:216
size_t getAvgTupleSize(std::string databaseName, std::string setName)
Definition: Statistics.h:180
int getNumPages(std::string databaseName, std::string setName)
Definition: Statistics.h:88
void setNumPages(std::string databaseName, std::string setName, int numPages)
Definition: Statistics.h:98
void setAvgTupleSize(std::string databaseName, std::string setName, size_t avgTupleSize)
Definition: Statistics.h:190
void setPageSize(std::string databaseName, std::string setName, size_t pageSize)
Definition: Statistics.h:117
void addSetAlias(std::string databaseName, std::string setName, std::string aliasDatabase, std::string aliasSetName)
Definition: Statistics.h:78
std::unordered_map< std::string, double > atomicComputationSelectivity
Definition: Statistics.h:47
size_t getPageSize(std::string databaseName, std::string setName)
Definition: Statistics.h:107
void incrementNumPages(std::string databaseName, std::string setName, size_t numPages)
Definition: Statistics.h:136
void setLambdaSelectivity(std::string lambdaType, double selectivity)
Definition: Statistics.h:225
void incrementNumBytes(std::string databaseName, std::string setName, size_t numBytes)
Definition: Statistics.h:144
size_t getNumBytes(std::string databaseName, std::string setName)
Definition: Statistics.h:126
void setAtomicComputationSelectivity(std::string atomicComputationType, double selectivity)
Definition: Statistics.h:208
double getAtomicComputationSelectivity(std::string atomicComputationType)
Definition: Statistics.h:199
std::string setName
Definition: Statistics.h:35
std::unordered_map< std::string, DataStatistics > dataStatistics
Definition: Statistics.h:46
std::unordered_map< std::string, double > lambdaSelectivity
Definition: Statistics.h:48
void addSet(std::string databaseName, std::string setName, DataStatistics &stats)
Definition: Statistics.h:66