A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
PageCache.h
Go to the documentation of this file.
1 /*****************************************************************************
2  * *
3  * Copyright 2018 Rice University *
4  * *
5  * Licensed under the Apache License, Version 2.0 (the "License"); *
6  * you may not use this file except in compliance with the License. *
7  * You may obtain a copy of the License at *
8  * *
9  * http://www.apache.org/licenses/LICENSE-2.0 *
10  * *
11  * Unless required by applicable law or agreed to in writing, software *
12  * distributed under the License is distributed on an "AS IS" BASIS, *
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14  * See the License for the specific language governing permissions and *
15  * limitations under the License. *
16  * *
17  *****************************************************************************/
18 
19 #ifndef PAGECACHE_H
20 #define PAGECACHE_H
21 
22 
23 #include "Configuration.h"
24 #include "PDBWork.h"
25 #include "PDBWorkerQueue.h"
26 #include "PDBWorker.h"
27 #include "PDBFile.h"
28 #include "SequenceFile.h"
29 #include "PartitionedFile.h"
30 #include "PDBLogger.h"
31 #include "SharedMem.h"
32 #include "PageCircularBuffer.h"
33 #include "LocalitySet.h"
34 #include <unordered_map>
35 #include <memory>
36 #include <queue>
37 using namespace std;
38 
39 class PageCache;
40 typedef shared_ptr<PageCache> PageCachePtr;
41 
64 struct CacheKeyHash {
65 
66  std::size_t operator()(const CacheKey& key) const {
67  return (key.dbId << 24) + (key.typeId << 16) + (key.setId << 8) + key.pageId;
68  }
69 };
70 
75 struct CacheKeyEqual {
76 
77  bool operator()(const CacheKey& lKey, const CacheKey& rKey) const {
78  if ((lKey.dbId == rKey.dbId) && (lKey.typeId == rKey.typeId) &&
79  (lKey.setId == rKey.setId) && (lKey.pageId == rKey.pageId)) {
80  return true;
81  } else {
82  return false;
83  }
84  }
85 };
86 
91 
92  bool operator()(PDBPagePtr& lPage, PDBPagePtr& rPage) {
93  if (lPage->getAccessSequenceId() < rPage->getAccessSequenceId()) {
94  return false;
95  } else {
96  return true;
97  }
98  }
99 };
100 
105 
106  bool operator()(PDBPagePtr& lPage, PDBPagePtr& rPage) {
107  if (lPage->getAccessSequenceId() > rPage->getAccessSequenceId()) {
108  return false;
109  } else {
110  return true;
111  }
112  }
113 };
114 
137 class PageCache {
138 
139 public:
140  // create an MRU page cache
142  pdb::PDBWorkerQueuePtr workers,
143  PageCircularBufferPtr flushBuffer,
144  pdb::PDBLoggerPtr logger,
145  SharedMemPtr shm,
146  CacheStrategy strategy = UnifiedMRU);
147  ~PageCache();
148 
149  // Get a page from cache, if the page is flushed to file, and is not in cache,
150  // load it to cache. Otherwise, if the page is not in cache, and can not be found in any flushed
151  // file,
152  // we return nullptr.
153  // Below method will cause page reference count ++
154  // This function is used to provide backward-compatibility for SequenceFile instances, and
155  // can only be applied to SequenceFile instances.
156  PDBPagePtr getPage(SequenceFilePtr file, PageID pageId);
157 
158 
159  // Get a page from cache, if the page is flushed to file, and is not in cache, load it to cache.
160  // Otherwise, if the page is not in cache, and can not be found in any flushed file, we return
161  // nullptr.
162  // Below method will cause page reference count ++
163  // This function can be used for frontend to pin a flushed page.
164  // This function can also be used for backend to pin a flushed page that has been pinned and
165  // unpinned before by the same backend.
166  // It can be applied to all PDBFile instances.
167  // If sequential==true, we will invoke file's sequential read API that will not `seek` first.
168  PDBPagePtr getPage(PartitionedFilePtr file,
169  FilePartitionID partitionId,
170  unsigned int pageSeqInPartition,
171  PageID pageId,
172  bool sequential,
173  LocalitySet* set = nullptr);
174 
175 
176  // Get a page directly from cache, if it is not in cache return nullptr
177  PDBPagePtr getPage(CacheKey key, LocalitySet* set = nullptr);
178 
179 
180  // To allocate a new page, blocking until get a page, set it as pinned&dirty, add it to cache,
181  // and increment reference count
182  PDBPagePtr getNewPage(NodeID nodeId,
183  CacheKey key,
184  LocalitySet* set = nullptr,
185  size_t pageSize = DEFAULT_PAGE_SIZE);
186 
187  // Try to allocate a new page, set it as pinned&dirty, add it to cache, and increment reference
188  // count
189  PDBPagePtr getNewPageNonBlocking(NodeID nodeId,
190  CacheKey key,
191  LocalitySet* set = nullptr,
192  size_t pageSize = DEFAULT_PAGE_SIZE);
193 
194  // Decrement reference count for a page.
195  // In the LRUPageCache class, only below method will cause page reference count --
196  bool decPageRefCount(CacheKey key);
197 
198  // If the page specified by the cache key is in cache, return true,
199  // otherwise, return false.
200  bool containsPage(CacheKey key);
201 
202 
203  // Evict all dirty pages
204  int evictAllDirtyPages();
205 
206  // Unpin and evict all dirty pages
207  int unpinAndEvictAllDirtyPages();
208 
209  // Start the eviction thread to evict least recently used pages.
210  void runEviction();
211 
212  // Invoke the eviction in a method instead of a separate thread.
213  void evict();
214 
215  // Evict page specified by cachekey from cache.
216  bool evictPage(CacheKey key, bool tryFlushOrNot = true);
217 
218  // Compute the threshold when to trigger eviction.
219  void getAndSetWarnSize(unsigned int numSets, double warnThreshold);
220 
221  // Compute the threshold when eviction can be finished.
222  void getAndSetEvictStopSize(unsigned int numSets, double evictThreshold);
223 
224  // Load page specified from disk file to cache.
225  // This function can only be applied to SequenceFile instances.
226  PDBPagePtr loadPage(SequenceFilePtr file, PageID pageId);
227 
228  // Load page specified from disk file to cache.
229  // This function can be applied to all PDBFile instances.
230  // If sequential=true, we will invoke file's sequential read API if the file instance has
231  // provided such API.
232  PDBPagePtr loadPage(PDBFilePtr file,
233  FilePartitionID partitionId,
234  unsigned int pageSeqInPartition,
235  bool sequential);
236 
237  // Remove page specified by Key from cache hashMap.
238  // This function will be used by the flushConsumer thread.
239  bool removePage(CacheKey key);
240  bool freePage(PDBPagePtr page);
241  // Lock for eviction.
242  void evictionLock();
243 
244  // Unlock for eviction.
245  void evictionUnlock();
246 
247  // Lock for flushing.
248  void flushLock();
249 
250  // Unlock for flushing.
251  void flushUnlock();
252 
253  // Flush a page.
254  bool flushPageWithoutEviction(CacheKey key);
255 
256  // Allocate buffer of required size from shared memory, if no room, block and run eviction
257  // thread.
258  char* allocateBufferFromSharedMemoryBlocking(size_t size, int& alignOffset);
259 
260  // TODO: Allocate buffer of required size from shared memory, if no room, block and evict only
261  // one page.
262  char* tryAllocateBufferFromSharedMemory(size_t size, int& alignOffset);
263 
264  PDBPagePtr buildAndCachePageFromFileHandle(int handle,
265  size_t size,
266  NodeID nodeId,
267  DatabaseID dbId,
268  UserTypeID typeId,
269  SetID setId,
270  PageID pageId);
271 
272 
273  // Build a PDBPage instance from page data loaded from file into shared memory.
274  PDBPagePtr buildPageFromSharedMemoryData(PDBFilePtr file,
275  char* pageData,
276  FilePartitionID partitionId,
277  unsigned int pageSeqInPartition,
278  int internalOffset,
279  size_t pageSize = DEFAULT_PAGE_SIZE);
280 
281 
282  // Cache the block with specified name and buffer.
283  void cachePage(PDBPagePtr page, LocalitySet* set = nullptr);
284 
285  // Evict page from cache.
286  bool evictPage(PDBPagePtr page, LocalitySetPtr set = nullptr);
287 
288  void addLocalitySetToPriorityList(LocalitySetPtr set, PriorityLevel level);
289 
290  void removeLocalitySetFromPriorityList(LocalitySetPtr set, PriorityLevel level);
291 
292 
293  // Get logger
295  return this->logger;
296  }
297 
298  void pin(LocalitySetPtr set, LocalitySetReplacementPolicy policy, OperationType operationType);
299 
300  void unpin(LocalitySetPtr set);
301 
302 
303 private:
304  unordered_map<CacheKey, PDBPagePtr, CacheKeyHash, CacheKeyEqual>* cache;
307  size_t size;
308  size_t maxSize;
309  size_t warnSize; // the threshold to evict
310  size_t evictStopSize; // the threshold to stop eviction
311  pthread_rwlock_t evictionAndFlushLock;
312  pthread_mutex_t cacheMutex;
313  pthread_mutex_t evictionMutex;
318  pthread_mutex_t countLock;
322  /*
323  * index = 0, TransientLifetimeEnded
324  * index = 1, PersistentLifetimeEnded
325  * index = 2, PersistentLifetimeNotEnded
326  * index = 3, TransientLifetimeNotEndedPartialData
327  * index = 4, TransientLifetimeNotEndedShuffleData
328  * index = 5, TransientLifetimeNotEndedHashData
329  */
330  vector<list<LocalitySetPtr>*>* priorityList;
331 };
332 #endif /* PAGECACHE_H */
PageCircularBufferPtr flushBuffer
Definition: PageCache.h:320
SetID setId
Definition: DataTypes.h:87
unsigned int SetID
Definition: DataTypes.h:31
shared_ptr< PDBPage > PDBPagePtr
Definition: PDBPage.h:32
OperationType
Definition: DataTypes.h:57
ConfigurationPtr conf
Definition: PageCache.h:306
bool operator()(PDBPagePtr &lPage, PDBPagePtr &rPage)
Definition: PageCache.h:92
shared_ptr< PageCache > PageCachePtr
Definition: PageCache.h:39
DatabaseID dbId
Definition: DataTypes.h:85
bool operator()(PDBPagePtr &lPage, PDBPagePtr &rPage)
Definition: PageCache.h:106
unsigned int NodeID
Definition: DataTypes.h:27
SharedMemPtr shm
Definition: PageCache.h:319
CacheStrategy strategy
Definition: PageCache.h:321
shared_ptr< PDBWork > PDBWorkPtr
Definition: PDBWork.h:47
bool inEviction
Definition: PageCache.h:314
CacheStrategy
Definition: DataTypes.h:54
LocalitySetReplacementPolicy
Definition: DataTypes.h:52
long accessCount
Definition: PageCache.h:317
shared_ptr< PartitionedFile > PartitionedFilePtr
shared_ptr< SharedMem > SharedMemPtr
Definition: SharedMem.h:32
unordered_map< CacheKey, PDBPagePtr, CacheKeyHash, CacheKeyEqual > * cache
Definition: PageCache.h:304
pdb::PDBWorkerQueuePtr workers
Definition: PageCache.h:315
unsigned int DatabaseID
Definition: DataTypes.h:29
size_t evictStopSize
Definition: PageCache.h:310
size_t warnSize
Definition: PageCache.h:309
unsigned int PageID
Definition: DataTypes.h:26
pthread_mutex_t cacheMutex
Definition: PageCache.h:312
pdb::PDBLoggerPtr getLogger()
Definition: PageCache.h:294
pthread_mutex_t countLock
Definition: PageCache.h:318
PageID pageId
Definition: DataTypes.h:88
bool operator()(const CacheKey &lKey, const CacheKey &rKey) const
Definition: PageCache.h:77
shared_ptr< PDBWorkerQueue > PDBWorkerQueuePtr
shared_ptr< PDBFileInterface > PDBFilePtr
Definition: PDBFile.h:29
shared_ptr< Configuration > ConfigurationPtr
Definition: Configuration.h:89
pdb::PDBLoggerPtr logger
Definition: PageCache.h:305
pdb::PDBWorkPtr evictWork
Definition: PageCache.h:316
vector< list< LocalitySetPtr > * > * priorityList
Definition: PageCache.h:330
size_t maxSize
Definition: PageCache.h:308
shared_ptr< LocalitySet > LocalitySetPtr
Definition: LocalitySet.h:29
std::shared_ptr< PDBLogger > PDBLoggerPtr
Definition: PDBLogger.h:40
size_t size
Definition: PageCache.h:307
std::size_t operator()(const CacheKey &key) const
Definition: PageCache.h:66
#define DEFAULT_PAGE_SIZE
Definition: Configuration.h:36
pthread_rwlock_t evictionAndFlushLock
Definition: PageCache.h:311
PriorityLevel
Definition: DataTypes.h:41
shared_ptr< PageCircularBuffer > PageCircularBufferPtr
UserTypeID typeId
Definition: DataTypes.h:86
pthread_mutex_t evictionMutex
Definition: PageCache.h:313
unsigned int FilePartitionID
Definition: DataTypes.h:32
shared_ptr< SequenceFile > SequenceFilePtr
Definition: SequenceFile.h:29
unsigned int UserTypeID
Definition: DataTypes.h:25