/* * All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or * its licensors. * * For complete copyright and license terms please see the LICENSE at the root of this * distribution (the "License"). All use of this software is governed by the License, * or, if provided, by the license below or the license accompanying this file. Do not * remove or modify any license notices. This file is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * */ // Original file Copyright Crytek GMBH or its affiliates, used under license. #include "StdAfx.h" #if defined(ENABLE_LOADING_PROFILER) #include "BootProfiler.h" #include "ThreadInfo.h" #include <stack> #include <AzFramework/IO/FileOperations.h> namespace { StaticInstance<CBootProfiler, AZStd::no_destruct<CBootProfiler>> gProfilerInstance; enum { eMAX_THREADS_TO_PROFILE = 128, eNUM_RECORDS_PER_POOL = 2048, // so, eNUM_RECORDS_PER_POOL * sizeof(CBootProfilerRecord) == mem consumed by pool item // sizeof(CProfileBlockTimes)==152, // poolmem = 304Kb for 1 pool per thread }; } int CBootProfiler::CV_sys_bp_frames = 0; float CBootProfiler::CV_sys_bp_time_threshold = 0; class CProfileBlockTimes { protected: LARGE_INTEGER m_startTimeStamp; LARGE_INTEGER m_stopTimeStamp; LARGE_INTEGER m_freq; CProfileBlockTimes() { memset(&m_startTimeStamp, 0, sizeof(m_startTimeStamp)); memset(&m_stopTimeStamp, 0, sizeof(m_stopTimeStamp)); memset(&m_freq, 0, sizeof(m_freq)); } }; class CBootProfilerRecord { public: const char* m_label; LARGE_INTEGER m_startTimeStamp; LARGE_INTEGER m_stopTimeStamp; LARGE_INTEGER m_freq; CBootProfilerRecord* m_pParent; typedef AZStd::vector<CBootProfilerRecord*> ChildVector; ChildVector m_Childs; CryFixedStringT<256> m_args; ILINE CBootProfilerRecord(const char* label, LARGE_INTEGER timestamp, LARGE_INTEGER freq, const char* args) : m_label(label) , m_startTimeStamp(timestamp) , m_freq(freq) , m_pParent(NULL) { memset(&m_stopTimeStamp, 0, sizeof(m_stopTimeStamp)); if (args) { m_args = args; } } ILINE ~CBootProfilerRecord() { // childs are allocated via pool as well, the destructors of each child // is called explicitly, for the purpose of freeing memory occupied by // m_Child vector. Otherwise there will be a memory leak. ChildVector::iterator it = m_Childs.begin(); while (it != m_Childs.end()) { (*it)->~CBootProfilerRecord(); ++it; } } void Print(AZ::IO::HandleType fileHandle, char* buf, size_t buf_size, size_t depth, LARGE_INTEGER stopTime, const char* threadName, const float timeThreshold) { if (m_stopTimeStamp.QuadPart == 0) { m_stopTimeStamp = stopTime; } const float time = (float)(m_stopTimeStamp.QuadPart - m_startTimeStamp.QuadPart) * 1000.f / (float)m_freq.QuadPart; if (timeThreshold > 0.0f && time < timeThreshold) { return; } string tabs; //tabs(depth++, '\t') tabs.insert(0, depth++, '\t'); { string label = m_label; label.replace("&", "&"); label.replace("<", "<"); label.replace(">", ">"); label.replace("\"", """); label.replace("'", "'"); if (m_args.size() > 0) { m_args.replace("&", "&"); m_args.replace("<", "<"); m_args.replace(">", ">"); m_args.replace("\"", """); m_args.replace("'", "'"); m_args.replace("%", "%"); } sprintf_s(buf, buf_size, "%s<block name=\"%s\" totalTimeMS=\"%f\" startTime=\"%" PRIu64 "\" stopTime=\"%" PRIu64 "\" args=\"%s\"> \n", tabs.c_str(), label.c_str(), time, m_startTimeStamp.QuadPart, m_stopTimeStamp.QuadPart, m_args.c_str()); AZ::IO::Print(fileHandle, buf); } const size_t childsSize = m_Childs.size(); for (size_t i = 0; i < childsSize; ++i) { CBootProfilerRecord* record = m_Childs[i]; assert(record); record->Print(fileHandle, buf, buf_size, depth, stopTime, threadName, timeThreshold); } sprintf_s(buf, buf_size, "%s</block>\n", tabs.c_str()); AZ::IO::Print(fileHandle, buf); } }; ////////////////////////////////////////////////////////////////////////// class CProfileInfo { friend class CBootProfilerSession; private: CBootProfilerRecord* m_pRoot; CBootProfilerRecord* m_pCurrent; public: CProfileInfo() : m_pRoot(NULL) , m_pCurrent(NULL) {} }; class CBootProfilerThreadsInterface { protected: CBootProfilerThreadsInterface() { memset(m_threadInfo, 0, sizeof(m_threadInfo)); m_threadCounter = 0; } unsigned int GetThreadIndexByID(unsigned int threadID); const char* GetThreadNameByIndex(unsigned int threadIndex); int m_threadCounter; private: unsigned int m_threadInfo[eMAX_THREADS_TO_PROFILE]; //threadIDs }; ////////////////////////////////////////////////////////////////////////// ILINE unsigned int CBootProfilerThreadsInterface::GetThreadIndexByID(unsigned int threadID) { for (int i = 0; i < eMAX_THREADS_TO_PROFILE; ++i) { if (m_threadInfo[i] == 0) { break; } if (m_threadInfo[i] == threadID) { return i; } } unsigned int counter = CryInterlockedIncrement(&m_threadCounter) - 1; //count to index m_threadInfo[counter] = threadID; return counter; } ILINE const char* CBootProfilerThreadsInterface::GetThreadNameByIndex(unsigned int threadIndex) { assert(threadIndex < m_threadCounter); const char* threadName = CryThreadGetName(m_threadInfo[threadIndex]); return threadName; } class CRecordPool { public: CRecordPool() : m_baseAddr(NULL) , m_allocCounter(0) , m_next(NULL) { m_baseAddr = (CBootProfilerRecord*)CryModuleMemalign(eNUM_RECORDS_PER_POOL * sizeof(CBootProfilerRecord), 16); } ~CRecordPool() { CryModuleMemalignFree(m_baseAddr); delete m_next; } ILINE CBootProfilerRecord* allocateRecord() { if (m_allocCounter < eNUM_RECORDS_PER_POOL) { CBootProfilerRecord* newRecord = m_baseAddr + m_allocCounter; ++m_allocCounter; return newRecord; } else { return NULL; } } ILINE void setNextPool(CRecordPool* pool) { m_next = pool; } private: CBootProfilerRecord* m_baseAddr; uint32 m_allocCounter; CRecordPool* m_next; }; class CBootProfilerSession : public CBootProfilerThreadsInterface , protected CProfileBlockTimes { public: CBootProfilerSession(); ~CBootProfilerSession(); void Start(); void Stop(); CBootProfilerRecord* StartBlock(const char* name, const char* args); void StopBlock(CBootProfilerRecord* record); void CollectResults(const char* filename, const float timeThreshold); private: string m_name; CProfileInfo m_threadsProfileInfo[eMAX_THREADS_TO_PROFILE]; CRecordPool* m_threadsRecordsPool[eMAX_THREADS_TO_PROFILE]; //head CRecordPool* m_threadsCurrentPools[eMAX_THREADS_TO_PROFILE]; //current }; ////////////////////////////////////////////////////////////////////////// CBootProfilerSession::CBootProfilerSession() { memset(m_threadsProfileInfo, 0, sizeof(m_threadsProfileInfo)); memset(m_threadsRecordsPool, 0, sizeof(m_threadsRecordsPool)); memset(m_threadsCurrentPools, 0, sizeof(m_threadsCurrentPools)); } CBootProfilerSession::~CBootProfilerSession() { for (unsigned int i = 0; i < m_threadCounter; ++i) { CProfileInfo& profile = m_threadsProfileInfo[i]; // Since m_pRoot is allocated using memory pool (line 296), // its destructor is called explicitly to free the memory of // m_Childs and each of its child. if (profile.m_pRoot) { profile.m_pRoot->~CBootProfilerRecord(); } delete m_threadsRecordsPool[i]; } } void CBootProfilerSession::Start() { LARGE_INTEGER time, freq; QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&time); m_startTimeStamp = time; m_freq = freq; } void CBootProfilerSession::Stop() { LARGE_INTEGER time; QueryPerformanceCounter(&time); m_stopTimeStamp = time; } CBootProfilerRecord* CBootProfilerSession::StartBlock(const char* name, const char* args) { const unsigned int curThread = CryGetCurrentThreadId(); const unsigned int threadIndex = GetThreadIndexByID(curThread); assert(threadIndex < eMAX_THREADS_TO_PROFILE); CProfileInfo& profile = m_threadsProfileInfo[threadIndex]; CRecordPool* pool = m_threadsCurrentPools[threadIndex]; if (!profile.m_pRoot) { if (!pool) { pool = new CRecordPool; m_threadsRecordsPool[threadIndex] = pool; m_threadsCurrentPools[threadIndex] = pool; } CBootProfilerRecord* rec = pool->allocateRecord(); profile.m_pRoot = profile.m_pCurrent = new(rec)CBootProfilerRecord("root", m_startTimeStamp, m_freq, args); } assert(pool); LARGE_INTEGER time, freq; QueryPerformanceFrequency(&freq); QueryPerformanceCounter(&time); CBootProfilerRecord* pParent = profile.m_pCurrent; assert(pParent); assert(profile.m_pRoot); CBootProfilerRecord* rec = pool->allocateRecord(); if (!rec) { //pool is full, create a new one pool = new CRecordPool; m_threadsCurrentPools[threadIndex]->setNextPool(pool); m_threadsCurrentPools[threadIndex] = pool; rec = pool->allocateRecord(); } profile.m_pCurrent = new(rec)CBootProfilerRecord(name, time, freq, args); profile.m_pCurrent->m_pParent = pParent; pParent->m_Childs.push_back(profile.m_pCurrent); return profile.m_pCurrent; } void CBootProfilerSession::StopBlock(CBootProfilerRecord* record) { if (record) { LARGE_INTEGER time; QueryPerformanceCounter(&time); record->m_stopTimeStamp = time; unsigned int curThread = CryGetCurrentThreadId(); unsigned int threadIndex = GetThreadIndexByID(curThread); assert(threadIndex < eMAX_THREADS_TO_PROFILE); CProfileInfo& profile = m_threadsProfileInfo[threadIndex]; profile.m_pCurrent = record->m_pParent; } } void CBootProfilerSession::CollectResults(const char* filename, const float timeThreshold) { if (!gEnv || !gEnv->pCryPak) { AZ_Warning("BootProfiler", false, "CryPak not set - skipping CollectResults"); return; } static const char* szTestResults = "@cache@\\TestResults"; string filePath = string(szTestResults) + "\\" + "bp_" + filename + ".xml"; char path[ICryPak::g_nMaxPath] = ""; gEnv->pCryPak->AdjustFileName(filePath.c_str(), path, AZ_ARRAY_SIZE(path), ICryPak::FLAGS_PATH_REAL | ICryPak::FLAGS_FOR_WRITING); gEnv->pCryPak->MakeDir(szTestResults); AZ::IO::HandleType fileHandle = AZ::IO::InvalidHandle; gEnv->pFileIO->Open(path, AZ::IO::OpenMode::ModeWrite | AZ::IO::OpenMode::ModeBinary, fileHandle); if (fileHandle == AZ::IO::InvalidHandle) { return; } char buf[512]; const unsigned int buf_size = sizeof(buf); sprintf_s(buf, buf_size, "<root>\n"); AZ::IO::Print(fileHandle, buf); const size_t numThreads = m_threadCounter; for (size_t i = 0; i < numThreads; ++i) { CBootProfilerRecord* pRoot = m_threadsProfileInfo[i].m_pRoot; if (pRoot) { pRoot->m_stopTimeStamp = m_stopTimeStamp; const char* threadName = GetThreadNameByIndex(i); if (!threadName) { threadName = "UNKNOWN"; } const float time = (float)(pRoot->m_stopTimeStamp.QuadPart - pRoot->m_startTimeStamp.QuadPart) * 1000.f / (float)pRoot->m_freq.QuadPart; sprintf_s(buf, buf_size, "\t<thread name=\"%s\" totalTimeMS=\"%f\" startTime=\"%" PRIu64 "\" stopTime=\"%" PRIu64 "\" > \n", threadName, time, pRoot->m_startTimeStamp.QuadPart, pRoot->m_stopTimeStamp.QuadPart); AZ::IO::Print(fileHandle, buf); for (size_t recordIdx = 0; recordIdx < pRoot->m_Childs.size(); ++recordIdx) { CBootProfilerRecord* record = pRoot->m_Childs[recordIdx]; assert(record); record->Print(fileHandle, buf, buf_size, 2, m_stopTimeStamp, threadName, timeThreshold); } sprintf_s(buf, buf_size, "\t</thread>\n"); AZ::IO::Print(fileHandle, buf); } } sprintf_s(buf, buf_size, "</root>\n"); AZ::IO::Print(fileHandle, buf); gEnv->pFileIO->Close(fileHandle); } ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// CBootProfiler& CBootProfiler::GetInstance() { return gProfilerInstance; } CBootProfiler::CBootProfiler() : m_pCurrentSession(NULL) , m_pFrameRecord(NULL) , m_levelLoadAdditionalFrames(0) { } CBootProfiler::~CBootProfiler() { AZStd::lock_guard<AZStd::recursive_mutex> recordGuard{ m_recordMutex }; for (TSessionMap::iterator it = m_sessions.begin(); it != m_sessions.end(); ++it) { CBootProfilerSession* session = it->second; delete session; } } // start session void CBootProfiler::StartSession(const char* sessionName) { AZStd::lock_guard<AZStd::recursive_mutex> recordGuard{ m_recordMutex }; TSessionMap::const_iterator it = m_sessions.find(sessionName); if (it == m_sessions.end()) { m_pCurrentSession = new CBootProfilerSession(); m_sessions[sessionName] = m_pCurrentSession; m_pCurrentSession->Start(); } } // stop session void CBootProfiler::StopSession(const char* sessionName) { AZStd::lock_guard<AZStd::recursive_mutex> recordGuard{ m_recordMutex }; if (m_pCurrentSession) { TSessionMap::iterator it = m_sessions.find(sessionName); if (it != m_sessions.end()) { if (m_pCurrentSession == it->second) { CBootProfilerSession* session = m_pCurrentSession; m_pCurrentSession = NULL; session->Stop(); session->CollectResults(sessionName, CV_sys_bp_time_threshold); delete session; } m_sessions.erase(it); } } } CBootProfilerRecord* CBootProfiler::StartBlock(const char* name, const char* args) { AZStd::lock_guard<AZStd::recursive_mutex> recordGuard{ m_recordMutex }; if (m_pCurrentSession) { return m_pCurrentSession->StartBlock(name, args); } return NULL; } void CBootProfiler::StopBlock(CBootProfilerRecord* record) { AZStd::lock_guard<AZStd::recursive_mutex> recordGuard{ m_recordMutex }; if (m_pCurrentSession) { m_pCurrentSession->StopBlock(record); } } void CBootProfiler::StartFrame(const char* name) { AZStd::lock_guard<AZStd::recursive_mutex> recordGuard{ m_recordMutex }; if (CV_sys_bp_frames) { StartSession("frames"); m_pFrameRecord = StartBlock(name, NULL); } } void CBootProfiler::StopFrame() { AZStd::lock_guard<AZStd::recursive_mutex> recordGuard{ m_recordMutex }; if (m_pCurrentSession && CV_sys_bp_frames) { StopBlock(m_pFrameRecord); m_pFrameRecord = NULL; --CV_sys_bp_frames; if (0 == CV_sys_bp_frames) { StopSession("frames"); } } if (m_pCurrentSession && m_levelLoadAdditionalFrames) { --m_levelLoadAdditionalFrames; if (0 == m_levelLoadAdditionalFrames) { StopSession("level"); } } } void CBootProfiler::Init(ISystem* pSystem) { //REGISTER_CVAR(sys_BootProfiler, 1, VF_DEV_ONLY, // "Collect and output session statistics into TestResults/bp_(session_name).xml \n" // "0 = Disabled\n" // "1 = Enabled\n"); pSystem->GetISystemEventDispatcher()->RegisterListener(this); StartSession("boot"); } void CBootProfiler::RegisterCVars() { REGISTER_CVAR2("sys_bp_frames", &CV_sys_bp_frames, 0, VF_DEV_ONLY, "Starts frame profiling for specified number of frames using BootProfiler"); REGISTER_CVAR2("sys_bp_time_threshold", &CV_sys_bp_time_threshold, 0.1f, VF_DEV_ONLY, "If greater than 0 don't write blocks that took less time (default 0.1 ms)"); } void CBootProfiler::OnSystemEvent(ESystemEvent event, UINT_PTR wparam, UINT_PTR lparam) { switch (event) { case ESYSTEM_EVENT_GAME_POST_INIT_DONE: { StopSession("boot"); break; } case ESYSTEM_EVENT_GAME_MODE_SWITCH_START: { break; } case ESYSTEM_EVENT_GAME_MODE_SWITCH_END: { break; } case ESYSTEM_EVENT_LEVEL_LOAD_START: { break; } case ESYSTEM_EVENT_LEVEL_LOAD_PREPARE: { StartSession("level"); break; } case ESYSTEM_EVENT_LEVEL_LOAD_END: { StopSession("level"); break; } case ESYSTEM_EVENT_LEVEL_PRECACHE_END: { //level loading can be stopped here, or m_levelLoadAdditionalFrames can be used to prolong dump for this amount of frames //StopSession("level"); m_levelLoadAdditionalFrames = 20; break; } } } void CBootProfiler::SetFrameCount(int frameCount) { CV_sys_bp_frames = frameCount; } #endif