diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt index aad79b0a52..41c59290b8 100644 --- a/cachelib/allocator/CMakeLists.txt +++ b/cachelib/allocator/CMakeLists.txt @@ -33,6 +33,8 @@ add_library (cachelib_allocator CacheAllocatorLru5BCacheWithSpinBuckets.cpp CacheAllocatorLruCache.cpp CacheAllocatorLruCacheWithSpinBuckets.cpp + CacheAllocatorS3FIFOCache.cpp + CacheAllocatorS3FIFO5BCache.cpp CacheAllocatorTinyLFU5BCache.cpp CacheAllocatorTinyLFUCache.cpp CacheAllocatorWTinyLFU5BCache.cpp diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 6082dee06d..5defa7391e 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -6077,6 +6077,7 @@ namespace facebook::cachelib { extern template class CacheAllocator; extern template class CacheAllocator; extern template class CacheAllocator; +extern template class CacheAllocator; extern template class CacheAllocator; extern template class CacheAllocator; @@ -6098,6 +6099,15 @@ using LruAllocatorSpinBuckets = CacheAllocator; using Lru2QAllocator = CacheAllocator; using Lru5B2QAllocator = CacheAllocator; +// CacheAllocator with S3 FIFO eviction policy +// It maintains 2 queues, one for for probation and one for the main queue. +// New items are added to the probation queue, and if not accessed +// will be quickly removed from the cache to remove 1 hit wonders. +// If accessed while in probation, it will eventually be promoted to the main queue. +// Items in the tail of main queue will be reinserted if accessed. +using S3FIFOAllocator = CacheAllocator; +using S3FIFO5BAllocator = CacheAllocator; + // CacheAllocator with Tiny LFU eviction policy // It has a window initially to gauage the frequency of accesses of newly // inserted items. And eventually it will onl admit items that are accessed diff --git a/cachelib/allocator/CacheAllocatorS3FIFO5BCache.cpp b/cachelib/allocator/CacheAllocatorS3FIFO5BCache.cpp new file mode 100644 index 0000000000..8bb8895f2e --- /dev/null +++ b/cachelib/allocator/CacheAllocatorS3FIFO5BCache.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cachelib/allocator/CacheAllocator.h" + +namespace facebook::cachelib { +template class CacheAllocator; +} diff --git a/cachelib/allocator/CacheAllocatorS3FIFOCache.cpp b/cachelib/allocator/CacheAllocatorS3FIFOCache.cpp new file mode 100644 index 0000000000..59e9431a99 --- /dev/null +++ b/cachelib/allocator/CacheAllocatorS3FIFOCache.cpp @@ -0,0 +1,21 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cachelib/allocator/CacheAllocator.h" + +namespace facebook::cachelib { +template class CacheAllocator; +} diff --git a/cachelib/allocator/CacheTraits.h b/cachelib/allocator/CacheTraits.h index 26605ae989..d8f03565a7 100644 --- a/cachelib/allocator/CacheTraits.h +++ b/cachelib/allocator/CacheTraits.h @@ -18,6 +18,7 @@ #include "cachelib/allocator/ChainedHashTable.h" #include "cachelib/allocator/MM2Q.h" #include "cachelib/allocator/MMLru.h" +#include "cachelib/allocator/MMS3FIFO.h" #include "cachelib/allocator/MMTinyLFU.h" #include "cachelib/allocator/MMWTinyLFU.h" #include "cachelib/allocator/memory/CompressedPtr.h" @@ -54,6 +55,13 @@ struct Lru2QCacheTrait { using CompressedPtrType = CompressedPtr4B; }; +struct S3FIFOCacheTrait { + using MMType = MMS3FIFO; + using AccessType = ChainedHashTable; + using AccessTypeLocks = SharedMutexBuckets; + using CompressedPtrType = CompressedPtr4B; +}; + struct TinyLFUCacheTrait { using MMType = MMTinyLFU; using AccessType = ChainedHashTable; @@ -89,6 +97,13 @@ struct Lru5B2QCacheTrait { using CompressedPtrType = CompressedPtr5B; }; +struct S3FIFO5BCacheTrait { + using MMType = MMS3FIFO; + using AccessType = ChainedHashTable; + using AccessTypeLocks = SharedMutexBuckets; + using CompressedPtrType = CompressedPtr5B; +}; + struct TinyLFU5BCacheTrait { using MMType = MMTinyLFU; using AccessType = ChainedHashTable; diff --git a/cachelib/allocator/ContainerTypes.cpp b/cachelib/allocator/ContainerTypes.cpp index e70f051b84..55b62a0dae 100644 --- a/cachelib/allocator/ContainerTypes.cpp +++ b/cachelib/allocator/ContainerTypes.cpp @@ -17,6 +17,7 @@ #include "cachelib/allocator/ChainedHashTable.h" #include "cachelib/allocator/MM2Q.h" #include "cachelib/allocator/MMLru.h" +#include "cachelib/allocator/MMS3FIFO.h" #include "cachelib/allocator/MMTinyLFU.h" #include "cachelib/allocator/MMWTinyLFU.h" namespace facebook::cachelib { @@ -26,6 +27,7 @@ const int MMLru::kId = 1; const int MM2Q::kId = 2; const int MMTinyLFU::kId = 3; const int MMWTinyLFU::kId = 4; +const int MMS3FIFO::kId = 5; // AccessType const int ChainedHashTable::kId = 1; diff --git a/cachelib/allocator/MMS3FIFO.h b/cachelib/allocator/MMS3FIFO.h new file mode 100644 index 0000000000..cffbe0b34a --- /dev/null +++ b/cachelib/allocator/MMS3FIFO.h @@ -0,0 +1,792 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#include +#include +#include + +#pragma GCC diagnostic pop + +#include "cachelib/allocator/Cache.h" +#include "cachelib/allocator/CacheStats.h" +#include "cachelib/allocator/Util.h" +#include "cachelib/allocator/datastruct/MultiDList.h" +#include "cachelib/allocator/memory/serialize/gen-cpp2/objects_types.h" +#include "cachelib/common/CompilerUtils.h" +#include "cachelib/common/FIFOHashSet.h" +#include "cachelib/common/FIFOConcurrentHashSet.h" + +namespace facebook::cachelib { + +// Implements the S3-FIFO cache eviction policy as described in +// https://dl.acm.org/doi/pdf/10.1145/3600006.3613147 +// +// S3-FIFO is combines a "Tiny" queue, a larger "Main" queue, and a ghost +// history. New items enter the Tiny queue; recently accessed items are promoted +// to the Main queue. Eviction always searches from the tails of these queues, +// skipping items that have been accessed since insertion (using a single access +// bit). +// +// A low overhead ghost queue (stores only 4 byte key hash/ item)tracks recently +// evicted keys from the small queue. Items found in ghost are directly into the +// Main queue. + +class MMS3FIFO { + public: + // unique identifier per MMType + static const int kId; + + // forward declaration; + template + using Hook = DListHook; + using SerializationType = serialization::MMS3FIFOObject; + using SerializationConfigType = serialization::MMS3FIFOConfig; + using SerializationTypeContainer = serialization::MMS3FIFOCollection; + + enum LruType { Main, Tiny, NumTypes }; + + // Config class for MMS3FIFO + struct Config { + // create from serialized config + explicit Config(SerializationConfigType configState) + : Config(*configState.updateOnWrite(), + *configState.updateOnRead(), + *configState.tinySizePercent(), + *configState.ghostSizePercent()) {} + + // @param udpateOnW whether to promote the item on write + // @param updateOnR whether to promote the item on read + Config(bool updateOnW, bool updateOnR) + : Config(updateOnW, updateOnR, 10, 90) {} + + // @param udpateOnW whether to promote the item on write + // @param updateOnR whether to promote the item on read + // @param tinySizePercent percentage number of tiny size to overall size + // @param ghostSizePercent percentage number of ghost size to main size + Config(bool updateOnW, + bool updateOnR, + size_t tinySizePercent, + size_t ghostSizePercent) + : updateOnWrite(updateOnW), + updateOnRead(updateOnR), + tinySizePercent(tinySizePercent), + ghostSizePercent(ghostSizePercent) { + } + + Config() = default; + Config(const Config& rhs) = default; + Config(Config&& rhs) = default; + + Config& operator=(const Config& rhs) = default; + Config& operator=(Config&& rhs) = default; + + template + void addExtraConfig(Args...) {} + + // whether the cache needs to be updated on writes for recordAccess. + bool updateOnWrite{false}; + + // whether the cache needs to be updated on reads for recordAccess. + bool updateOnRead{true}; + + // The size of tiny cache, as a percentage of the total size. + size_t tinySizePercent{10}; + + // The size of ghost queue, as a percentage of total size + size_t ghostSizePercent{90}; + }; + + // The container object which can be used to keep track of objects of type + // T. T must have a public member of type Hook. This object is wrapper + // around DList, is thread safe and can be accessed from multiple threads. + // The current implementation models an LRU using the above DList + // implementation. + template T::* HookPtr> + struct Container { + private: + using LruList = MultiDList; + using Mutex = folly::DistributedMutex; + using LockHolder = std::unique_lock; + using PtrCompressor = typename T::PtrCompressor; + using Time = typename Hook::Time; + using CompressedPtrType = typename T::CompressedPtrType; + using RefFlags = typename T::Flags; + + public: + Container() {}; + // Reserve ghost queue here if needed + Container(Config c, PtrCompressor compressor) + : lru_(LruType::NumTypes, std::move(compressor)), + config_(std::move(c)) { + } + Container(serialization::MMS3FIFOObject object, PtrCompressor compressor); + + Container(const Container&) = delete; + Container& operator=(const Container&) = delete; + + // records the information that the node was accessed. + // This doesn't move nodes and only sets the access bit. + // It also updates the timestamp of last access. + // + // @param node node that we want to mark as relevant/accessed + // @param mode the mode for the access operation. + // @return True if the information is recorded, returns false otherwise + bool recordAccess(T& node, AccessMode mode) noexcept; + + // adds the given node into the container and marks it as being present in + // the container. The node is added to tiny queue, unless it is present + // in ghost queue, in which case it is added to main queue. + // + // @param node The node to be added to the container. + // @return True if the node was successfully added to the container. False + // if the node was already in the contianer. On error state of node + // is unchanged. + bool add(T& node) noexcept; + + // removes the node from the container, adds it to ghost queue + // if it was from tiny queue. + // + // @param node The node to be removed from the container. + // @return True if the node was successfully removed from the container. + // False if the node was not part of the container. On error, the + // state of node is unchanged. + bool remove(T& node) noexcept; + + class LockedIterator; + + // same as the above but uses an iterator context. The iterator is updated + // on removal of the corresponding node to point to the next node. The + // iterator context holds the lock. + // + // iterator will be advanced to the next node after removing the node + // + // @param it Iterator that will be removed + void remove(LockedIterator& it) noexcept; + + // replaces one node with another, at the same position + // + // @param oldNode node being replaced + // @param newNode node to replace oldNode with + // + // @return true If the replace was successful. Returns false if the + // destination node did not exist in the container, or if the + // source node already existed. + bool replace(T& oldNode, T& newNode) noexcept; + + class LockedIterator { + public: + using ListIterator = typename LruList::DListIterator; + // noncopyable but movable. + LockedIterator(const LockedIterator&) = delete; + LockedIterator& operator=(const LockedIterator&) = delete; + LockedIterator(LockedIterator&&) noexcept = default; + + // Iterator in S3FIFO only returns unaccessed items, so ++ skips accessed + // items. + LockedIterator& operator++() noexcept { + // Advance the underlying iterator + ListIterator& it = getIter(); + + if (!it) { + return *this; + } + ++it; + // // Skip accessed items + // skipAccessed(it); + return *this; + } + + void skipAccessed(ListIterator& it) noexcept { + while (it) { + T& node = *it; + if (!Container::isAccessed(node)) { + break; // found a valid eviction victim + } + ++it; // skip this accessed item + } + } + + LockedIterator& operator--() { + throw std::invalid_argument( + "Decrementing eviction iterator is not supported"); + } + + T* operator->() const noexcept { return getIter().operator->(); } + T& operator*() const noexcept { return getIter().operator*(); } + + bool operator==(const LockedIterator& other) const noexcept { + return &c_ == &other.c_ && tIter_ == other.tIter_ && + mIter_ == other.mIter_; + } + + bool operator!=(const LockedIterator& other) const noexcept { + return !(*this == other); + } + + explicit operator bool() const noexcept { return tIter_ || mIter_; } + + T* get() const noexcept { return getIter().get(); } + + // Invalidates this iterator + void reset() noexcept { + // Point iterator to first list's rend + tIter_.reset(); + mIter_.reset(); + } + + // 1. Invalidate this iterator + // 2. Unlock + void destroy() { + reset(); + if (l_.owns_lock()) { + l_.unlock(); + } + } + + // Reset this iterator to the beginning + void resetToBegin() { + if (!l_.owns_lock()) { + l_.lock(); + } + tIter_.resetToBegin(); + mIter_.resetToBegin(); + } + + private: + // private because it's easy to misuse and cause deadlock for MMTinyLFU + LockedIterator& operator=(LockedIterator&&) noexcept = default; + + // create an lru iterator with the lock being held. + explicit LockedIterator(LockHolder l, + const Container& c) noexcept; + + const ListIterator& getIter() const noexcept { + auto shouldEvictTiny = evictTiny(); + + return shouldEvictTiny ? tIter_ : mIter_; + } + + ListIterator& getIter() noexcept { + return const_cast( + static_cast(this)->getIter()); + } + + // Decides to return iterator from tiny or main cache based on capacity. + // Will switch iterators when one of them is exhausted. + bool evictTiny() const noexcept { + if (!mIter_) { + return true; + } + if (!tIter_) { + return false; + } + // List size will not change during iteration, return cached decision + if (evictTinyCache_ != -1) { + return evictTinyCache_ == 1; + } + + int smallSize = static_cast(c_.lru_.getList(LruType::Tiny).size()); + + const size_t targetTiny = static_cast( + c_.config_.tinySizePercent * c_.lru_.size() / 100); + + this->evictTinyCache_ = smallSize >= targetTiny; + + return evictTinyCache_; + } + + // Cache the value + mutable int evictTinyCache_{-1}; // -1 means not set, 0 false and 1 true + + // only the container can create iterators + friend Container; + + const Container& c_; + // Tiny and main cache iterators + ListIterator tIter_; + ListIterator mIter_; + // lock protecting the validity of the iterator + LockHolder l_; + }; + + Config getConfig() const; + + void setConfig(const Config& newConfig); + + // Returns the eviction age stats. See CacheStats.h for details + // Is not really relevant for S3FIFO since we don't use age. + // Todo: deprecate or remove? + EvictionAgeStat getEvictionAgeStat(uint64_t projectedLength) const noexcept; + + // Obtain an iterator that start from the tail and can be used + // to search for evictions. This iterator holds a lock to this + // container and only one such iterator can exist at a time + LockedIterator getEvictionIterator() noexcept; + + void rebalanceForEviction(); + + // Execute provided function under container lock. Function gets + // iterator passed as parameter. + template + void withEvictionIterator(F&& f); + + // Execute provided function under container lock. + template + void withContainerLock(F&& f); + + // for saving the state of the s3fifo + // + // precondition: serialization must happen without any reader or writer + // present. Any modification of this object afterwards will result in an + // invalid, inconsistent state for the serialized data. + // + serialization::MMS3FIFOObject saveState() const noexcept; + + // return the stats for this container. + MMContainerStat getStats() const noexcept; + + static LruType getLruType(const T& node) noexcept { + return isTiny(node) ? LruType::Tiny : LruType::Main; + } + + private: + EvictionAgeStat getEvictionAgeStatLocked( + uint64_t projectedLength) const noexcept; + + static Time getUpdateTime(const T& node) noexcept { + return (node.*HookPtr).getUpdateTime(); + } + + static void setUpdateTime(T& node, Time time) noexcept { + (node.*HookPtr).setUpdateTime(time); + } + + // As the cache grows, the ghost queue may need to be resized + void maybeResizeGhostLocked() noexcept; + + // Returns the hash of node's key + static size_t hashNode64(const T& node) noexcept { + return folly::hasher()(node.getKey()); + } + + static uint32_t hashNode(const T& node) noexcept { + return static_cast( + folly::hasher()(node.getKey())); + } + + void removeLocked(T& node) noexcept; + + static bool isTiny(const T& node) noexcept { + return node.template isFlagSet(); + } + + static bool isAccessed(const T& node) noexcept { + return node.template isFlagSet(); + } + + // Bit MM_BIT_0 is used to record if the item is in tiny cache. + static void markTiny(T& node) noexcept { + node.template setFlag(); + } + static void unmarkTiny(T& node) noexcept { + node.template unSetFlag(); + } + + // Bit MM_BIT_1 is used to record if the item has been accessed since being + // written in cache. Unaccessed items are ignored when determining projected + // update time. + static void markAccessed(T& node) noexcept { + node.template setFlag(); + } + static void unmarkAccessed(T& node) noexcept { + node.template unSetFlag(); + } + + // protects all operations on the lrus. We never really just read the state + // of the LRU. Hence we dont really require a RW mutex at this point of + // time. + mutable folly::cacheline_aligned lruMutex_; + + // Current capacity to track ghost size + size_t capacity_{0}; + + // the lru + LruList lru_; + + facebook::cachelib::util::FIFOConcurrentHashSet32 ghostQueue_; + // facebook::cachelib::util::FIFOHashSet32 ghostQueue_; + + // Config for this lru. + // Write access to the MMS3FIFO Config is serialized. + // Reads may be racy. + Config config_{}; + + // // Todo: Test + // FRIEND_TEST(MMS3FIFOTest, SegmentStress); + // FRIEND_TEST(MMS3FIFOTest, TinyLFUBasic); + // FRIEND_TEST(MMS3FIFOTest, Reconfigure); + }; +}; + +/* Container Interface Implementation */ +template T::* HookPtr> +MMS3FIFO::Container::Container(serialization::MMS3FIFOObject object, + PtrCompressor compressor) + : lru_(*object.lrus(), std::move(compressor)), config_(*object.config()) { +} + +template T::* HookPtr> +void MMS3FIFO::Container::maybeResizeGhostLocked() noexcept { + size_t lruSize = lru_.size(); + + // Grow when size doubled, shrink when halved + const bool shouldGrow = lruSize >= 2 * capacity_; + const bool shouldShrink = lruSize <= capacity_ / 2; + + if (!shouldGrow && !shouldShrink) { + return; + } + + size_t expectedGhostSize = + static_cast(lruSize * config_.ghostSizePercent / 100); + ghostQueue_.resize(expectedGhostSize); + capacity_ = lruSize; +} + +// We have no notion of "reconfiguring lock" +// or refresh interval since we are not manipulating the list lru on access. +// No need for locks, as we don't modify the list +template T::* HookPtr> +bool MMS3FIFO::Container::recordAccess(T& node, + AccessMode mode) noexcept { + if ((mode == AccessMode::kWrite && !config_.updateOnWrite) || + (mode == AccessMode::kRead && !config_.updateOnRead)) { + return false; + } + const auto currTime = static_cast