/* * All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or * its licensors. * * For complete copyright and license terms please see the LICENSE at the root of this * distribution (the "License"). All use of this software is governed by the License, * or, if provided, by the license below or the license accompanying this file. Do not * remove or modify any license notices. This file is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * */ // Original file Copyright Crytek GMBH or its affiliates, used under license. // Description : Specialized Container for Renderer data with the following proberties: // - Created during the 3DEngine Update, comsumed in the renderer in the following frame // - This Container is very restricted and likely not optimal for other situations #ifndef CRYINCLUDE_CRYCOMMON_CRYTHREADSAFERENDERERCONTAINER_H #define CRYINCLUDE_CRYCOMMON_CRYTHREADSAFERENDERERCONTAINER_H #pragma once // This container is specialized for data which is generated in the 3DEngine and consumed by the renderer // in the following frame due to multithreaded rendering. To be useable by Jobs as well as other Threads // some very specific desing choices were taken: // First of the underlying continous memory block is only resized during a call to 'CoalesceMemory' // to prevent freeing a memory block which could be used by another thread. // If new memory is requiered, a page of 4 KB is allocated and used as a temp storage till the next // call to 'CoalesceMemory' which then copies all page memory into one continous block. // Also all threading relevant functions are implemented LockLess to prevent lock contention and make // this container useable from Jobs // // Right now, the main usage pattern of this container is by the RenderThread, who calls at the beginning // of its frame 'CoalesceMemory', since then we can be sure that the 3DEngine has finished creating it's elements. // // Since the main purpose of this container is multi-threading adding of elements, a slight change was done to the // push_back interface compared to std::vector: // All implemented push_back variants can return a pointer into the storage (safe since no memory is freed during adding) // and a index for this elements. This is done since calling operator[] could be expensive when called before 'CoalesceMemory' // // For ease of implementation (and a little bit of speed), this container only supports POD types (which can be copied with memcpy) // also note that this container only supports push_back (and resize back to 0) and no pop back due cost (performance and code complexity) of supporting lock-free in parallel pop_back #define TSRC_ALIGN _MS_ALIGN(128) template class TSRC_ALIGN CThreadSafeRendererContainer { public: CThreadSafeRendererContainer(); ~CThreadSafeRendererContainer(); //NOTE: be aware that these valus can potentially change if some objects are added in parallel size_t size() const; size_t empty() const; size_t capacity() const; //NOTE: be aware that this operator can be more expensive if the memory was not coalesced before T& operator[](size_t n); const T& operator[](size_t n) const; T* push_back_new(); T* push_back_new(size_t& nIndex); void push_back(const T&); void push_back(const T&, size_t& nIndex); // NOTE: These functions are changing the size of the continous memory block and thus are *not* thread-safe void clear(); void resize(size_t n); void reserve(size_t n); void CoalesceMemory(); void GetMemoryUsage(ICrySizer*) const; // disable copy/assignment CThreadSafeRendererContainer(const CThreadSafeRendererContainer& rOther) = delete; CThreadSafeRendererContainer& operator=(const CThreadSafeRendererContainer& rOther) = delete; private: ///////////////////////////////////// // Struct to represent a memory chunk // used in fallback allocations during 'Fill' phase class CMemoryPage { public: // size of a page to allocate, the CMemoryPage is just the header, // the actual object data is stored in the 4KB chunk right // after the header (while keeping the requiered alignment and so on) enum { nMemoryPageSize = 4096 }; CMemoryPage(); // allocation functions static CMemoryPage* AllocateNewPage(); bool TryAllocateElement(size_t& nIndex, T*& pObj); // access to the elements T& GetElement(size_t n); T* GetData() const; // information about the page (NOTE: not thread-safe in all combinations) size_t Size() const; size_t Capacity() const; size_t GetDataSize() const; CMemoryPage* m_pNext; // Pointer to next entry in single-linked list of CMemoryPages private: LONG m_nSize; // Number of elements currently in the page LONG m_nCapacity; // Number of elements which could fit into the page T* m_arrData; // Element memory, from the same memory chunk right after the CMemoryPage class }; ///////////////////////////////////// // Private functions which do the lock-less updating T* push_back_impl(size_t& nIndex); bool try_append_to_continous_memory(size_t& nIndex, T*& pObj); T& GetMemoryPageElement(size_t n); ///////////////////////////////////// // Private Member Variables T* m_arrData; // Storage for the continous memory part, during coalescing resized to hold all page memory LONG m_nCapacity; // Avaible Memory in continous memory part, if exhausted during 'Fill' phase, pages as temp memory chunks are allocated CMemoryPage* m_pMemoryPages; // Single linked list of memory chunks, used for fallback allocations during 'Fill' phase (to prevent changing the continous memory block during 'Fill' LONG m_nSize; // Number of elements currently in the container, can be larger than m_nCapacity due the nonContinousPages bool m_bElementAccessSafe; // bool to indicate if we are currently doing a 'CoalasceMemory' step, during which some operations are now allowed } _ALIGN(128); /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// template inline CThreadSafeRendererContainer::CThreadSafeRendererContainer() : m_arrData(NULL) , m_nCapacity(0) , m_pMemoryPages(NULL) , m_nSize(0) , m_bElementAccessSafe(true) { } /////////////////////////////////////////////////////////////////////////////// template inline CThreadSafeRendererContainer::~CThreadSafeRendererContainer() { clear(); } /////////////////////////////////////////////////////////////////////////////// template inline size_t CThreadSafeRendererContainer::size() const { return *const_cast(&m_nSize); } /////////////////////////////////////////////////////////////////////////////// template inline size_t CThreadSafeRendererContainer::empty() const { return *const_cast(&m_nSize) == 0; } /////////////////////////////////////////////////////////////////////////////// template inline size_t CThreadSafeRendererContainer::capacity() const { // capacity of continous memory block LONG nCapacity = m_nCapacity; // add capacity of all memory pages CMemoryPage* pCurrentMemoryPage = m_pMemoryPages; while (pCurrentMemoryPage) { nCapacity += pCurrentMemoryPage->Capacity(); pCurrentMemoryPage = pCurrentMemoryPage->m_pNext; } return nCapacity; } /////////////////////////////////////////////////////////////////////////////// template inline T& CThreadSafeRendererContainer::operator[](size_t n) { assert(m_bElementAccessSafe); T* pRet = NULL; #if !defined(NULL_RENDERER) assert((LONG)n < m_nSize); #endif if ((LONG)n < m_nCapacity) { pRet = &m_arrData[n]; } else { pRet = &GetMemoryPageElement(n); } return *pRet; } /////////////////////////////////////////////////////////////////////////////// template inline const T& CThreadSafeRendererContainer::operator[](size_t n) const { return const_cast(const_cast*>(this)->operator[](n)); } /////////////////////////////////////////////////////////////////////////////// template inline T* CThreadSafeRendererContainer::push_back_new() { assert(m_bElementAccessSafe); size_t nUnused = ~0; return push_back_impl(nUnused); } /////////////////////////////////////////////////////////////////////////////// template inline T* CThreadSafeRendererContainer::push_back_new(size_t& nIndex) { assert(m_bElementAccessSafe); return push_back_impl(nIndex); } /////////////////////////////////////////////////////////////////////////////// template inline void CThreadSafeRendererContainer::push_back(const T& rObj) { assert(m_bElementAccessSafe); size_t nUnused = ~0; T* pObj = push_back_impl(nUnused); *pObj = rObj; } /////////////////////////////////////////////////////////////////////////////// template inline void CThreadSafeRendererContainer::push_back(const T& rObj, size_t& nIndex) { assert(m_bElementAccessSafe); T* pObj = push_back_impl(nIndex); *pObj = rObj; } /////////////////////////////////////////////////////////////////////////////// template inline void CThreadSafeRendererContainer::clear() { assert(m_bElementAccessSafe); // free continous part CryModuleMemalignFree(m_arrData); m_arrData = NULL; // free non-continous pages if we have some CMemoryPage* pCurrentMemoryPage = m_pMemoryPages; while (pCurrentMemoryPage) { CMemoryPage* pOldPage = pCurrentMemoryPage; pCurrentMemoryPage = pCurrentMemoryPage->m_pNext; CryModuleFree(pOldPage); } m_pMemoryPages = NULL; m_nSize = 0; m_nCapacity = 0; } /////////////////////////////////////////////////////////////////////////////// template inline void CThreadSafeRendererContainer::resize(size_t n) { assert(m_bElementAccessSafe); CoalesceMemory(); size_t nOldSize = m_nSize; m_nSize = n; if ((LONG)n <= m_nCapacity) { return; } T* arrOldData = m_arrData; m_arrData = reinterpret_cast(CryModuleMemalign(n * sizeof(T), alignof(T))); memcpy(m_arrData, arrOldData, nOldSize * sizeof(T)); memset(&m_arrData[m_nCapacity], 0, (n - m_nCapacity) * sizeof(T)); CryModuleMemalignFree(arrOldData); m_nCapacity = n; } /////////////////////////////////////////////////////////////////////////////// template inline void CThreadSafeRendererContainer::reserve(size_t n) { assert(m_bElementAccessSafe); CoalesceMemory(); if ((LONG)n <= m_nCapacity) { return; } T* arrOldData = m_arrData; m_arrData = reinterpret_cast(CryModuleMemalign(n * sizeof(T), alignof(T))); memcpy(m_arrData, arrOldData, m_nSize * sizeof(T)); memset(&m_arrData[m_nCapacity], 0, (n - m_nCapacity) * sizeof(T)); CryModuleMemalignFree(arrOldData); m_nCapacity = n; } /////////////////////////////////////////////////////////////////////////////// template inline bool CThreadSafeRendererContainer::try_append_to_continous_memory(size_t& nIndex, T*& pObj) { assert(m_bElementAccessSafe); LONG nSize = ~0; LONG nCapacity = ~0; do { // read volatile the new size nSize = *const_cast(&m_nSize); nCapacity = *const_cast(&m_nCapacity); if (nSize >= nCapacity) { return false; } } while (CryInterlockedCompareExchange(alias_cast(&m_nSize), nSize + 1, nSize) != nSize); nIndex = nSize; pObj = &m_arrData[nSize]; return true; } /////////////////////////////////////////////////////////////////////////////// template inline T* CThreadSafeRendererContainer::push_back_impl(size_t& nIndex) { assert(m_bElementAccessSafe); T* pObj = NULL; // non atomic check to see if there is space in the continous array if (try_append_to_continous_memory(nIndex, pObj)) { return pObj; } // exhausted continous memory, falling back to page allocation for (;; ) { assert(m_bElementAccessSafe); size_t nPageBaseIndex = 0; // traverse the page list till the first page with free memory CMemoryPage* pCurrentMemoryPage = m_pMemoryPages; while (pCurrentMemoryPage) { size_t nAvaibleElements = pCurrentMemoryPage->Capacity() - pCurrentMemoryPage->Size(); if (nAvaibleElements) { break; } // no memory in this page, go to the next one nPageBaseIndex += pCurrentMemoryPage->Capacity(); pCurrentMemoryPage = pCurrentMemoryPage->m_pNext; } // try to allocate a element on this page if (pCurrentMemoryPage && pCurrentMemoryPage->TryAllocateElement(nIndex, pObj)) { // update global elements counter CryInterlockedIncrement(alias_cast(&m_nSize)); // adjust in-page-index to global index nIndex += nPageBaseIndex + m_nCapacity; return pObj; } else { // all pages are empty, allocate and link a new one CMemoryPage* pNewPage = CMemoryPage::AllocateNewPage(); void* volatile* ppLastMemoryPageAddress = NULL; do { // find place to link in page CMemoryPage* pLastMemoryPage = m_pMemoryPages; ppLastMemoryPageAddress = alias_cast(&m_pMemoryPages); while (pLastMemoryPage) { ppLastMemoryPageAddress = alias_cast(&(pLastMemoryPage->m_pNext)); pLastMemoryPage = pLastMemoryPage->m_pNext; } } while (CryInterlockedCompareExchangePointer(ppLastMemoryPageAddress, pNewPage, NULL) != NULL); } } } /////////////////////////////////////////////////////////////////////////////// template inline T& CThreadSafeRendererContainer::GetMemoryPageElement(size_t n) { assert(m_bElementAccessSafe); size_t nFirstListIndex = m_nCapacity; CMemoryPage* pCurrentMemoryPage = m_pMemoryPages; size_t nPageCapacity = pCurrentMemoryPage->Capacity(); while (n >= (nFirstListIndex + nPageCapacity)) { // this is threadsafe because we assume that if we want to get element 'n' // the clientcode did already fill the container up to element 'n' // thus up to 'n', m_pNonContinousList will have valid pages // NOTE: This is not safe when trying to read a element behind the valid // range (same as std::vector) nFirstListIndex += nPageCapacity; pCurrentMemoryPage = pCurrentMemoryPage->m_pNext; // update page capacity, since it can differe due alignment nPageCapacity = pCurrentMemoryPage->Capacity(); } return pCurrentMemoryPage->GetElement(n - nFirstListIndex); } /////////////////////////////////////////////////////////////////////////////// // When not not in the 'Fill' phase, it is safe to colace all page entries into one continous memory block template inline void CThreadSafeRendererContainer::CoalesceMemory() { assert(m_bElementAccessSafe); if (m_pMemoryPages == NULL) { return; // nothing to do } // mark state as not accessable m_bElementAccessSafe = false; size_t nOldSize = m_nSize; // compute required memory size_t nRequieredElements = 0; { CMemoryPage* pCurrentMemoryPage = m_pMemoryPages; while (pCurrentMemoryPage) { nRequieredElements += pCurrentMemoryPage->Size(); pCurrentMemoryPage = pCurrentMemoryPage->m_pNext; } } T* arrOldData = m_arrData; m_arrData = reinterpret_cast(CryModuleMemalign((m_nCapacity + nRequieredElements) * sizeof(T), alignof(T))); memcpy(m_arrData, arrOldData, m_nCapacity * sizeof(T)); CryModuleMemalignFree(arrOldData); // copy page data into continous memory block { size_t nBeginToFillIndex = m_nCapacity; CMemoryPage* pCurrentMemoryPage = m_pMemoryPages; while (pCurrentMemoryPage) { // copy data memcpy(&m_arrData[nBeginToFillIndex], pCurrentMemoryPage->GetData(), pCurrentMemoryPage->GetDataSize()); nBeginToFillIndex += pCurrentMemoryPage->Size(); // free page CMemoryPage* pOldPage = pCurrentMemoryPage; pCurrentMemoryPage = pCurrentMemoryPage->m_pNext; CryModuleFree(pOldPage); } m_pMemoryPages = NULL; } assert(nOldSize == m_nSize); m_nCapacity += nRequieredElements; // the container can be used again m_bElementAccessSafe = true; } /////////////////////////////////////////////////////////////////////////////// // Collect information about used memory template void CThreadSafeRendererContainer::GetMemoryUsage(ICrySizer* pSizer) const { pSizer->AddObject(m_arrData, m_nCapacity * sizeof(T)); CMemoryPage* pCurrentMemoryPage = m_pMemoryPages; while (pCurrentMemoryPage) { pSizer->AddObject(pCurrentMemoryPage, CMemoryPage::nMemoryPageSize); pCurrentMemoryPage = pCurrentMemoryPage->m_pNext; } } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// template inline CThreadSafeRendererContainer::CMemoryPage::CMemoryPage() : m_pNext(NULL) , m_nSize(0) { // compute offset for actual data size_t nObjectAlignment = alignof(T); UINT_PTR nMemoryBlockBegin = alias_cast(this); UINT_PTR nMemoryBlockEnd = alias_cast(this) + nMemoryPageSize; nMemoryBlockBegin += sizeof(CMemoryPage); nMemoryBlockBegin = (nMemoryBlockBegin + nObjectAlignment - 1) & ~(nObjectAlignment - 1); // compute number of avaible elements assert((nMemoryBlockEnd - nMemoryBlockBegin) > 0); m_nCapacity = (LONG)((nMemoryBlockEnd - nMemoryBlockBegin) / sizeof(T)); // store pointer to store data to m_arrData = alias_cast(nMemoryBlockBegin); } /////////////////////////////////////////////////////////////////////////////// template inline typename CThreadSafeRendererContainer::CMemoryPage * CThreadSafeRendererContainer::CMemoryPage::AllocateNewPage() { void* pNewPageMemoryChunk = CryModuleMalloc(nMemoryPageSize); assert(pNewPageMemoryChunk != NULL); memset(pNewPageMemoryChunk, 0, nMemoryPageSize); CMemoryPage* pNewPage = new(pNewPageMemoryChunk) CMemoryPage(); return pNewPage; } /////////////////////////////////////////////////////////////////////////////// template inline bool CThreadSafeRendererContainer::CMemoryPage::TryAllocateElement(size_t & nIndex, T * &pObj) { LONG nSize = ~0; LONG nCapacity = ~0; do { // read volatile the new size nSize = *const_cast(&m_nSize); nCapacity = *const_cast(&m_nCapacity); // stop trying if this page is full if (nSize >= nCapacity) { return false; } } while (CryInterlockedCompareExchange(alias_cast(&m_nSize), nSize + 1, nSize) != nSize); //Note: this is the index in the page and it is adjusted in the calling context nIndex = nSize; pObj = &m_arrData[nSize]; return true; } /////////////////////////////////////////////////////////////////////////////// template inline T&CThreadSafeRendererContainer::CMemoryPage::GetElement(size_t n) { assert((LONG)n < m_nSize); assert(m_nSize <= m_nCapacity); return m_arrData[n]; } /////////////////////////////////////////////////////////////////////////////// template inline T * CThreadSafeRendererContainer::CMemoryPage::GetData() const { return m_arrData; } /////////////////////////////////////////////////////////////////////////////// template inline size_t CThreadSafeRendererContainer::CMemoryPage::Size() const { return m_nSize; } /////////////////////////////////////////////////////////////////////////////// template inline size_t CThreadSafeRendererContainer::CMemoryPage::GetDataSize() const { return m_nSize * sizeof(T); } /////////////////////////////////////////////////////////////////////////////// template inline size_t CThreadSafeRendererContainer::CMemoryPage::Capacity() const { return m_nCapacity; } #endif // CRYINCLUDE_CRYCOMMON_CRYTHREADSAFERENDERERCONTAINER_H