A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
StatisticsDB.h
Go to the documentation of this file.
1 #ifndef STATISTICS_DB_H
2 #define STATISTICS_DB_H
3 
4 
5 #include "TransformedSet.h"
6 #include "Configuration.h"
7 #include "Handle.h"
8 #include "PDBVector.h"
9 #include "Computation.h"
10 #include <vector>
11 #include <memory>
12 #include <sqlite3.h>
13 
14 /* this class encapsulates a SQLite database to manage statistics
15 collected along with query execution */
16 
17 namespace pdb {
18 
19 class StatisticsDB {
20 
21 public:
22 
23  /* constructor */
25 
26  /* destructor */
27  ~StatisticsDB ();
28 
29  /*
30  * open the database
31  * @return, whether the operation is successful or not
32  */
33  bool openDB ();
34 
35 
36  /*
37  * close the database
38  * @return, whether the operation is successful or not
39  */
40  bool closeDB ();
41 
42 
43  /*
44  * create the tables
45  * @return, whether the creation is successful or not
46  */
47  bool createTables ();
48 
49  /*
50  * execute the query
51  * @param cmdString, the command;
52  * @return: whether the query execution is successful or not
53  */
54  bool execDB (std::string cmdString);
55 
56  /*
57  * create the directory
58  * @return: whether the directory creation is successful or not
59  */
60  bool createDir ();
61 
62  /*
63  * to add an entry to the data table
64  * @param databaseName, the name of the database
65  * @param setName, the name of the set
66  * @param created_jobId, the job id that creates the set
67  * @param setType, the type of the set
68  * @param className, the type of the object class stored in the set
69  * @param typeId, the registered id of the type
70  * @param pageSize, the page size of the set (in megabytes)
71  * @param id, the primary key of the entry
72  * @return: whether the data entry creation is successful or not
73  */
74  bool createData (std::string databaseName,
75  std::string setName,
76  std::string created_jobId,
77  std::string setType,
78  std::string className,
79  int typeId,
80  size_t pageSize,
81  long & id);
82 
83 
84  /*
85  * to update the SIZE field and MODIFICATION_TIME field in the DATA table
86  * @param id, the primary key of the entry to modify
87  * @param size, the current size of the set
88  * @return: whether the update is successful or not
89  */
90  bool updateDataForSize (long id,
91  size_t size);
92 
93  /*
94  * to update the IS_REMOVED field and MODIFICATION_TIME field in the DATA
95  * @param id, the primary key of the entry to modify
96  * @return: whether the update is successful or not
97  */
98  bool updateDataForRemoval (long id);
99 
100 
101  /*
102  * to add an entry to the data_transformation table
103  * @param input_data_id, the id of the input set
104  * @param output_data_id, the id of the output set
105  * @param num_partitions, the number of partitions
106  * @param num_nodes, the number of nodes
107  * @param transformation_type, the type of transformation, e.g. Partition
108  * @param tcap, the tcap string
109  * @param computations, the computations associated with tcap string
110  * @param id, the primary key of the entry
111  * @return: whether the data is successful or not
112  */
113  bool createDataTransformation (long input_data_id,
114  long output_data_id,
115  int num_partitions,
116  int num_nodes,
117  std::string transformationType,
118  std::string tcap,
119  Handle<Vector<Handle<Computation>>> computations,
120  long& id);
121 
122 
123  /*
124  * given input set's database name and set name,
125  * @param databaseAndSetName, the identifier of the input set
126  * @return: the list of sets that are transformed from the input set
127  * return a list of sets that are transformed from the input set
128  */
129  std::vector<std::shared_ptr<TransformedSet>>
130  getTransformedSets (std::pair<std::string, std::string> databaseAndSetName);
131 
132 
133  /* to get latest id for a specified table
134  * @param tableName, the name of the table
135  * @return, the last used id (primary key) for the given table
136  */
137  long getLatestId (std::string tableName);
138 
139 
140  /* to get latest data id for the specified data table
141  * @return, the last used id for the data table
142  */
143  long getLatestDataId (std::pair<std::string, std::string> databaseAndSetName);
144 
145 
146  /* to get latest transformation id for the data_transformation table
147  * @return, the last used id for the transformation table
148  */
150 
151 protected:
152 
153  /* to quote a string
154  * @param s, the string to be quoted
155  * @return: the quoted string
156  */
157  std::string quoteStr(std::string& s);
158 
159  /* to find and replace a string
160  * @param str, the string for replacement
161  * @param oldStr, the string to be replaced in str
162  * @param newStr, the string to replace oldStr in str
163  */
164  void replaceStr(std::string& str,
165  const std::string& oldStr,
166  const std::string& newStr);
167 
168 
169 
170 
171 private:
172 
173  //sqlite database file
174  std::string pathToDBFile = "dbFile";
175 
176  //handler for the sqlite database instance
177  sqlite3 * statisticsDBHandler = nullptr;
178 
179  //configuration
180  ConfigurationPtr conf = nullptr;
181 
182  //data id
183  long dataId = 0;
184 
185  //transformation id
187 
188 };
189 
190 }
191 
192 #endif
bool createDataTransformation(long input_data_id, long output_data_id, int num_partitions, int num_nodes, std::string transformationType, std::string tcap, Handle< Vector< Handle< Computation >>> computations, long &id)
std::string pathToDBFile
Definition: StatisticsDB.h:174
long getLatestId(std::string tableName)
long getLatestTransformationId()
StatisticsDB(ConfigurationPtr conf)
Definition: StatisticsDB.cc:10
bool updateDataForSize(long id, size_t size)
ConfigurationPtr conf
Definition: StatisticsDB.h:180
shared_ptr< Configuration > ConfigurationPtr
Definition: Configuration.h:89
std::vector< std::shared_ptr< TransformedSet > > getTransformedSets(std::pair< std::string, std::string > databaseAndSetName)
sqlite3 * statisticsDBHandler
Definition: StatisticsDB.h:177
bool createData(std::string databaseName, std::string setName, std::string created_jobId, std::string setType, std::string className, int typeId, size_t pageSize, long &id)
long getLatestDataId(std::pair< std::string, std::string > databaseAndSetName)
std::string quoteStr(std::string &s)
bool updateDataForRemoval(long id)
void replaceStr(std::string &str, const std::string &oldStr, const std::string &newStr)
bool execDB(std::string cmdString)
Definition: StatisticsDB.cc:93