/* * All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or * its licensors. * * For complete copyright and license terms please see the LICENSE at the root of this * distribution (the "License"). All use of this software is governed by the License, * or, if provided, by the license below or the license accompanying this file. Do not * remove or modify any license notices. This file is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * */ // Original file Copyright Crytek GMBH or its affiliates, used under license. // Description : Direct3D specific FX shaders rendering pipeline. #include "StdAfx.h" #include "DriverD3D.h" #include "D3DPostProcess.h" #include #include #include "MultiLayerAlphaBlendPass.h" #include #include #include "GraphicsPipeline/FurPasses.h" #include "../../Cry3DEngine/Environment/OceanEnvironmentBus.h" #if defined(FEATURE_SVO_GI) #include "D3D_SVO.h" #endif #include long CD3D9Renderer::FX_SetVertexDeclaration(int StreamMask, const AZ::Vertex::Format& vertexFormat) { FUNCTION_PROFILER_RENDER_FLAT HRESULT hr; bool bMorph = (StreamMask & VSM_MORPHBUDDY) != 0; bool bInstanced = (StreamMask & VSM_INSTANCED) != 0; #if defined(FEATURE_PER_SHADER_INPUT_LAYOUT_CACHE) SOnDemandD3DVertexDeclarationCache pDeclCache[1] = { { NULL } }; // (StreamMask & (0xfe | VSM_MORPHBUDDY)) is the value of StreamMask for most cases. There are a few exceptions: // 0xfe = 1111 1110 so the result is 0 in the case of VSM_GENERAL (1), or 0 if the mask bit is greater than 8 bits unless StreamMask happens to be VSM_MORPHBUDDY, in which case the result is again the value of StreamMask // At the time of this comment, that means the portion of the cacheID determined by StreamMask will be the same for VSM_GENERAL as it will be for VSM_INSTANCED, or anything that may come after VSM_INSTANCED uint64 cacheID = static_cast(StreamMask & (0xfe | VSM_MORPHBUDDY)) ^ (static_cast(vertexFormat.GetEnum()) << 32); if (CHWShader_D3D::s_pCurInstVS) { pDeclCache->m_pDeclaration = CHWShader_D3D::s_pCurInstVS->GetCachedInputLayout(cacheID); } #else AZ::u32 declCacheKey = vertexFormat.GetEnum(); if (CHWShader_D3D::s_pCurInstVS) { declCacheKey = CHWShader_D3D::s_pCurInstVS->GenerateVertexDeclarationCacheKey(vertexFormat); } SOnDemandD3DVertexDeclarationCache* pDeclCache = &m_RP.m_D3DVertexDeclarationCache[(StreamMask & 0xff) >> 1][bMorph || bInstanced][declCacheKey]; #if defined(AZ_RESTRICTED_PLATFORM) #if defined(AZ_PLATFORM_XENIA) #include "Xenia/D3DFXPipeline_cpp_xenia.inl" #elif defined(AZ_PLATFORM_PROVO) #include "Provo/D3DFXPipeline_cpp_provo.inl" #elif defined(AZ_PLATFORM_SALEM) #include "Salem/D3DFXPipeline_cpp_salem.inl" #endif #endif #endif if (!pDeclCache->m_pDeclaration) { SOnDemandD3DVertexDeclaration Decl; EF_OnDemandVertexDeclaration(Decl, (StreamMask & 0xff) >> 1, vertexFormat, bMorph, bInstanced); if (!Decl.m_Declaration.size()) { return S_FALSE; } if (!CHWShader_D3D::s_pCurInstVS || !CHWShader_D3D::s_pCurInstVS->m_pShaderData || CHWShader_D3D::s_pCurInstVS->m_bFallback) { return (HRESULT)-1; } int nSize = CHWShader_D3D::s_pCurInstVS->m_nDataSize; void* pVSData = CHWShader_D3D::s_pCurInstVS->m_pShaderData; if (FAILED(hr = GetDevice().CreateInputLayout(&Decl.m_Declaration[0], Decl.m_Declaration.size(), pVSData, nSize, &pDeclCache->m_pDeclaration))) { #ifndef _RELEASE iLog->LogError("Failed to create an input layout for material \"%s\".\nThe shader and the vertex formats may be incompatible.\nVertex format: \"%d\". Shader expects: \"%d\".\n\n", m_RP.m_pShaderResources->m_szMaterialName, (int)vertexFormat.GetEnum(), (int)CHWShader_D3D::s_pCurInstVS->m_vertexFormat.GetEnum()); #endif return hr; } #if defined(FEATURE_PER_SHADER_INPUT_LAYOUT_CACHE) CHWShader_D3D::s_pCurInstVS->SetCachedInputLayout(pDeclCache->m_pDeclaration, cacheID); #endif } D3DVertexDeclaration* pD3DDecl = pDeclCache->m_pDeclaration; if (!CHWShader_D3D::s_pCurInstVS || !CHWShader_D3D::s_pCurInstPS || (CHWShader_D3D::s_pCurInstVS->m_bFallback | CHWShader_D3D::s_pCurInstPS->m_bFallback)) { FX_Commit(); return E_FAIL; } if (m_pLastVDeclaration != pD3DDecl) { // Don't set input layout on fallback shader (crashes in DX11 NV driver) if (!CHWShader_D3D::s_pCurInstVS || CHWShader_D3D::s_pCurInstVS->m_bFallback) { return (HRESULT)-1; } m_pLastVDeclaration = pD3DDecl; m_DevMan.BindVtxDecl(pD3DDecl); } return S_OK; } void CD3D9Renderer::EF_ClearTargetsImmediately(uint32 nFlags) { nFlags |= FRT_CLEAR_IMMEDIATE; EF_ClearTargetsLater(nFlags); if (nFlags & FRT_CLEAR_IMMEDIATE) { FX_SetActiveRenderTargets(true); } } void CD3D9Renderer::EF_ClearTargetsImmediately(uint32 nFlags, const ColorF& Colors, float fDepth, uint8 nStencil) { nFlags |= FRT_CLEAR_IMMEDIATE; EF_ClearTargetsLater(nFlags, Colors, fDepth, nStencil); if (nFlags & FRT_CLEAR_IMMEDIATE) { FX_SetActiveRenderTargets(true); } } void CD3D9Renderer::EF_ClearTargetsImmediately(uint32 nFlags, const ColorF& Colors) { nFlags |= FRT_CLEAR_IMMEDIATE; EF_ClearTargetsLater(nFlags, Colors); if (nFlags & FRT_CLEAR_IMMEDIATE) { FX_SetActiveRenderTargets(true); } } void CD3D9Renderer::EF_ClearTargetsImmediately(uint32 nFlags, float fDepth, uint8 nStencil) { nFlags |= FRT_CLEAR_IMMEDIATE; EF_ClearTargetsLater(nFlags, fDepth, nStencil); if (nFlags & FRT_CLEAR_IMMEDIATE) { FX_SetActiveRenderTargets(true); } } // Clear buffers (color, depth/stencil) void CD3D9Renderer::EF_ClearTargetsLater(uint32 nFlags, const ColorF& Colors, float fDepth, uint8 nStencil) { if (nFlags & FRT_CLEAR_FOGCOLOR) { for (int i = 0; i < RT_STACK_WIDTH; ++i) { if (m_pNewTarget[i]) { m_pNewTarget[i]->m_ReqColor = m_cClearColor; } } } else if (nFlags & FRT_CLEAR_COLOR) { for (int i = 0; i < RT_STACK_WIDTH; ++i) { if (m_pNewTarget[i] && m_pNewTarget[i]->m_pTarget) { m_pNewTarget[i]->m_ReqColor = Colors; } } } if (nFlags & FRT_CLEAR_DEPTH) { m_pNewTarget[0]->m_fReqDepth = fDepth; } if (!(nFlags & FRT_CLEAR_IMMEDIATE)) { m_pNewTarget[0]->m_ClearFlags = 0; } if ((nFlags & FRT_CLEAR_DEPTH) && m_pNewTarget[0]->m_pDepth) { m_pNewTarget[0]->m_ClearFlags |= CLEAR_ZBUFFER; } if (nFlags & FRT_CLEAR_COLOR) { m_pNewTarget[0]->m_ClearFlags |= CLEAR_RTARGET; } if (nFlags & FRT_CLEAR_COLORMASK) { m_pNewTarget[0]->m_ClearFlags |= FRT_CLEAR_COLORMASK; } if (m_sbpp && (nFlags & FRT_CLEAR_STENCIL) && m_pNewTarget[0]->m_pDepth) { #ifdef SUPPORTS_MSAA if (gcpRendD3D->m_RP.m_MSAAData.Type) { m_RP.m_PersFlags2 |= RBPF2_MSAA_RESTORE_SAMPLE_MASK; } #endif m_pNewTarget[0]->m_ClearFlags |= CLEAR_STENCIL; m_pNewTarget[0]->m_nReqStencil = nStencil; } } void CD3D9Renderer::EF_ClearTargetsLater(uint32 nFlags, float fDepth, uint8 nStencil) { if (nFlags & FRT_CLEAR_FOGCOLOR) { for (int i = 0; i < RT_STACK_WIDTH; ++i) { if (m_pNewTarget[i]) { m_pNewTarget[i]->m_ReqColor = m_cClearColor; } } } else if (nFlags & FRT_CLEAR_COLOR) { for (int i = 0; i < RT_STACK_WIDTH; ++i) { if (m_pNewTarget[i] && m_pNewTarget[i]->m_pTex) { m_pNewTarget[i]->m_ReqColor = m_pNewTarget[i]->m_pTex->GetClearColor(); } } } if (nFlags & FRT_CLEAR_DEPTH) { m_pNewTarget[0]->m_fReqDepth = fDepth; } if (!(nFlags & FRT_CLEAR_IMMEDIATE)) { m_pNewTarget[0]->m_ClearFlags = 0; } if ((nFlags & FRT_CLEAR_DEPTH) && m_pNewTarget[0]->m_pDepth) { m_pNewTarget[0]->m_ClearFlags |= CLEAR_ZBUFFER; } if (nFlags & FRT_CLEAR_COLOR) { m_pNewTarget[0]->m_ClearFlags |= CLEAR_RTARGET; } if (nFlags & FRT_CLEAR_COLORMASK) { m_pNewTarget[0]->m_ClearFlags |= FRT_CLEAR_COLORMASK; } if (m_sbpp && (nFlags & FRT_CLEAR_STENCIL) && m_pNewTarget[0]->m_pDepth) { #ifdef SUPPORTS_MSAA if (gcpRendD3D->m_RP.m_MSAAData.Type) { m_RP.m_PersFlags2 |= RBPF2_MSAA_RESTORE_SAMPLE_MASK; } #endif m_pNewTarget[0]->m_ClearFlags |= CLEAR_STENCIL; m_pNewTarget[0]->m_nReqStencil = nStencil; } } void CD3D9Renderer::EF_ClearTargetsLater(uint32 nFlags, const ColorF& Colors) { EF_ClearTargetsLater(nFlags, Colors, Clr_FarPlane.r, 0); // float(m_pNewTarget[0]->m_pSurfDepth->pTex->GetClearColor().r), // uint8(m_pNewTarget[0]->m_pSurfDepth->pTex->GetClearColor().g)); } void CD3D9Renderer::EF_ClearTargetsLater(uint32 nFlags) { EF_ClearTargetsLater(nFlags, Clr_FarPlane.r, 0); // float(m_pNewTarget[0]->m_pSurfDepth->pTex->GetClearColor().r), // uint8(m_pNewTarget[0]->m_pSurfDepth->pTex->GetClearColor().g)); } void CD3D9Renderer::FX_ClearTargetRegion(const uint32 nAdditionalStates /* = 0*/) { assert(m_pRT->IsRenderThread()); bool clearColor = (m_pNewTarget[0]->m_ClearFlags & CLEAR_RTARGET) ? true : false; bool clearDepth = (m_pNewTarget[0]->m_ClearFlags & CLEAR_ZBUFFER) ? true : false; bool clearStencil = (m_pNewTarget[0]->m_ClearFlags & CLEAR_STENCIL) ? true : false; ColorF colorValue = Clr_Empty; float depthValue = 1.0f; uint8 stencilValue = 0; const char* clearTechnique = "Clear"; if(clearColor) { colorValue = m_pNewTarget[0]->m_ReqColor; // Get number of render targets to clear int numRT = 0; for (int i = 0; i < RT_STACK_WIDTH; ++i) { if (m_pNewTarget[i] && m_pNewTarget[i]->m_pTarget) { numRT++; break; } } // Select the technique to clear the right amount of render targets switch(numRT) { case 0: AZ_Assert(false, "No color render target bound."); break; case 1: clearTechnique = "Clear"; break; case 2: clearTechnique = "Clear2RT"; break; case 3: clearTechnique = "Clear3RT"; break; case 4: clearTechnique = "Clear4RT"; break; default: AZ_Warning("Rendering", false, "More than 4 render targets bound. Only the first 4 will be cleared."); clearTechnique = "Clear4RT"; break; } } if(clearDepth) { depthValue = ::FClamp(m_pNewTarget[0]->m_fReqDepth, 0.0f, 1.0f); } if(clearStencil) { stencilValue = m_pNewTarget[0]->m_nReqStencil; } CRenderObject* pObj = m_RP.m_pCurObject; CShader* pSHSave = m_RP.m_pShader; SShaderTechnique* pSHT = m_RP.m_pCurTechnique; SShaderPass* pPass = m_RP.m_pCurPass; CShaderResources* pShRes = m_RP.m_pShaderResources; gRenDev->m_cEF.mfRefreshSystemShader("Common", CShaderMan::s_ShaderCommon); m_RP.m_PersFlags1 |= RBPF1_IN_CLEAR; CShader* pSH = CShaderMan::s_ShaderCommon; uint32 nPasses = 0; pSH->FXSetTechnique(clearTechnique); pSH->FXBegin(&nPasses, FEF_DONTSETTEXTURES | FEF_DONTSETSTATES); pSH->FXBeginPass(0); int nState = 0; if (!clearColor) { nState |= GS_COLMASK_NONE; } if (clearDepth) { if (!clearColor && !clearStencil) { // If only clearing depth then we can optimize the draw by using not-equal comparison, // this way pixels with the same depth value as the clear value will be discarded. nState |= GS_DEPTHFUNC_NOTEQUAL; } else { nState |= GS_DEPTHFUNC_ALWAYS; } nState |= GS_DEPTHWRITE; } else { nState |= GS_NODEPTHTEST; } if (clearStencil) { int stencilState; if (!clearColor && !clearDepth) { // If only clearing stencil then we can optimize the draw by using not-equal comparison, // this way pixels with the same stencil value as the clear value will be discarded. stencilState = STENC_FUNC(FSS_STENCFUNC_NOTEQUAL) | STENCOP_FAIL(FSS_STENCOP_KEEP) | STENCOP_ZFAIL(FSS_STENCOP_REPLACE) | STENCOP_PASS(FSS_STENCOP_REPLACE); } else { stencilState = STENC_FUNC(FSS_STENCFUNC_ALWAYS) | STENCOP_FAIL(FSS_STENCOP_REPLACE) | STENCOP_ZFAIL(FSS_STENCOP_REPLACE) | STENCOP_PASS(FSS_STENCOP_REPLACE); } const uint32 stencilMask = 0xFFFFFFFF; FX_SetStencilState(stencilState, stencilValue, stencilMask, stencilMask); nState |= GS_STENCIL; } m_pNewTarget[0]->m_ClearFlags = 0; nState |= nAdditionalStates; FX_SetState(nState, -1); D3DSetCull(eCULL_None); float fX = (float)m_CurViewport.nWidth; float fY = (float)m_CurViewport.nHeight; DrawQuad(-0.5f, -0.5f, fX - 0.5f, fY - 0.5f, colorValue, depthValue, fX, fY, fX, fY); m_RP.m_PersFlags1 &= ~RBPF1_IN_CLEAR; m_RP.m_pCurObject = pObj; m_RP.m_pShader = pSHSave; m_RP.m_pCurTechnique = pSHT; m_RP.m_pCurPass = pPass; m_RP.m_pShaderResources = pShRes; } void CD3D9Renderer::FX_SetActiveRenderTargets(bool /*bAllowDip*/) { DETAILED_PROFILE_MARKER("FX_SetActiveRenderTargets"); if (m_RP.m_PersFlags1 & RBPF1_IN_CLEAR) { return; } FUNCTION_PROFILER_RENDER_FLAT HRESULT hr = S_OK; bool bDirty = false; if (m_nMaxRT2Commit >= 0) { for (int i = 0; i <= m_nMaxRT2Commit; i++) { if (!m_pNewTarget[i]->m_bWasSetRT) { m_pNewTarget[i]->m_bWasSetRT = true; if (m_pNewTarget[i]->m_pTex) { m_pNewTarget[i]->m_pTex->SetResolved(false); } m_pCurTarget[i] = m_pNewTarget[i]->m_pTex; bDirty = true; #ifndef _RELEASE if (m_logFileHandle != AZ::IO::InvalidHandle) { Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " +++ Set RT"); if (m_pNewTarget[i]->m_pTex) { Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " '%s'", m_pNewTarget[i]->m_pTex->GetName()); Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " Format:%s", CTexture::NameForTextureFormat(m_pNewTarget[i]->m_pTex->m_eTFDst)); Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " Type:%s", CTexture::NameForTextureType(m_pNewTarget[i]->m_pTex->m_eTT)); Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " W/H:%d:%d\n", m_pNewTarget[i]->m_pTex->GetWidth(), m_pNewTarget[i]->m_pTex->GetHeight()); } else { Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], " 'Unknown'\n"); } } #endif CTexture* pRT = m_pNewTarget[i]->m_pTex; if (pRT && pRT->UseMultisampledRTV()) { pRT->Unbind(); } } } if (!m_pNewTarget[0]->m_bWasSetD) { m_pNewTarget[0]->m_bWasSetD = true; bDirty = true; } //m_nMaxRT2Commit = -1; } if (bDirty) { CTexture* pRT = m_pNewTarget[0]->m_pTex; if (pRT && pRT->UseMultisampledRTV()) { // Reset all texture slots which are used as RT currently D3DShaderResourceView* pRes = NULL; for (int i = 0; i < MAX_TMU; i++) { if (CTexture::s_TexStages[i].m_DevTexture == pRT->GetDevTexture()) { m_DevMan.BindSRV(eHWSC_Pixel, pRes, i); CTexture::s_TexStages[i].m_DevTexture = NULL; } } } const uint32 nMaxRT2Commit = max(m_nMaxRT2Commit + 1, 0); ID3D11RenderTargetView* pRTV[RT_STACK_WIDTH] = { NULL }; uint32 nNumViews = 0; for (uint32 v = 0; v < nMaxRT2Commit; ++v) { if (m_pNewTarget[v] && m_pNewTarget[v]->m_pTarget) { pRTV[v] = (ID3D11RenderTargetView*)m_pNewTarget[v]->m_pTarget; nNumViews = v + 1; } } GetDeviceContext().OMSetRenderTargets(m_pNewTarget[0]->m_pTarget == NULL ? 0 : nNumViews, pRTV, m_pNewTarget[0]->m_pDepth); } if (m_nMaxRT2Commit >= 0) { m_nMaxRT2Commit = -1; } FX_SetViewport(); FX_ClearTargets(); } void CD3D9Renderer::FX_SetViewport() { // Set current viewport if (m_bViewportDirty) { m_bViewportDirty = false; if ((m_CurViewport != m_NewViewport)) { m_CurViewport = m_NewViewport; D3DViewPort Port; Port.Width = (FLOAT)m_CurViewport.nWidth; Port.Height = (FLOAT)m_CurViewport.nHeight; Port.TopLeftX = (FLOAT)m_CurViewport.nX; Port.TopLeftY = (FLOAT)m_CurViewport.nY; Port.MinDepth = m_CurViewport.fMinZ; Port.MaxDepth = m_CurViewport.fMaxZ; if (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_REVERSE_DEPTH) { Port = ReverseDepthHelper::Convert(Port); } GetDeviceContext().RSSetViewports(1, &Port); } } } void CD3D9Renderer::FX_ClearTarget(D3DSurface* pView, const ColorF& cClear, const uint numRects, const RECT* pRects) { #if !defined(EXCLUDE_RARELY_USED_R_STATS) && defined(ENABLE_PROFILING_CODE) { m_RP.m_PS[m_RP.m_nProcessThreadID].m_RTCleared++; m_RP.m_PS[m_RP.m_nProcessThreadID].m_RTClearedSize += CDeviceTexture::TextureDataSize(pView, numRects, pRects); } #endif #if CRY_USE_DX12 GetDeviceContext().ClearRectsRenderTargetView( pView, (const FLOAT*)&cClear, numRects, pRects); #elif defined(DEVICE_SUPPORTS_D3D11_1) && D3DFXPIPELINE_CPP_TRAIT_CLEARVIEW GetDeviceContext().ClearView( pView, (const FLOAT*)&cClear, pRects, numRects); #else if (!numRects) { GetDeviceContext().ClearRenderTargetView( pView, (const FLOAT*)&cClear); return; } // TODO: implement clears in compute for DX11, gives max performance (pipeline switch cost?) __debugbreak(); abort(); #endif } void CD3D9Renderer::FX_ClearTarget(ITexture* tex, const ColorF& cClear, const uint numRects, const RECT* pRects, const bool bOptional) { CTexture* pTex = reinterpret_cast(tex); // TODO: should not happen, happens in the editor currently if (!pTex->GetDeviceRT()) { pTex->GetSurface(-1, 0); } #if CRY_USE_DX12 FX_ClearTarget( pTex->GetDeviceRT(), cClear, numRects, pRects); #else if (bOptional) { FX_ClearTarget( pTex->GetDeviceRT(), cClear, 0U, nullptr); return; } // TODO: implement depth-clear as depth-only for DX11, gives max performance and probably just resets the depth-surface meta-data int ox, oy, ow, oh; FX_PushRenderTarget(0, pTex, nullptr); GetViewport(&ox, &oy, &ow, &oh); RT_SetViewport(pRects->left, pRects->top, pRects->right - pRects->left, pRects->bottom - pRects->top); FX_SetActiveRenderTargets(); EF_ClearTargetsLater(FRT_CLEAR_COLOR, cClear); FX_ClearTargetRegion(); FX_PopRenderTarget(0); SetViewport(ox, oy, ow, oh); #endif } void CD3D9Renderer::FX_ClearTarget(ITexture* pTex, const ColorF& cClear) { FX_ClearTarget( pTex, cClear, 0U, nullptr, true); } void CD3D9Renderer::FX_ClearTarget(ITexture* pTex) { FX_ClearTarget( pTex, pTex->GetClearColor()); } //==================================================================================== void CD3D9Renderer::FX_ClearTarget(D3DDepthSurface* pView, const int nFlags, const float cDepth, const uint8 cStencil, const uint numRects, const RECT* pRects) { #if !defined(EXCLUDE_RARELY_USED_R_STATS) && defined(ENABLE_PROFILING_CODE) if (nFlags) { m_RP.m_PS[m_RP.m_nProcessThreadID].m_RTCleared++; m_RP.m_PS[m_RP.m_nProcessThreadID].m_RTClearedSize += CDeviceTexture::TextureDataSize(pView, numRects, pRects); } #endif assert(( (nFlags & CLEAR_ZBUFFER ? D3D11_CLEAR_DEPTH : 0) | (nFlags & CLEAR_STENCIL ? D3D11_CLEAR_STENCIL : 0) ) == nFlags); #if CRY_USE_DX12 GetDeviceContext().ClearRectsDepthStencilView( pView, nFlags, cDepth, cStencil, numRects, pRects); #else if (!numRects) { GetDeviceContext().ClearDepthStencilView( pView, nFlags, cDepth, cStencil); return; } // TODO: implement clears in compute for DX11, gives max performance (pipeline switch cost?) __debugbreak(); abort(); #endif } void CD3D9Renderer::FX_ClearTarget(SDepthTexture* pTex, const int nFlags, const float cDepth, const uint8 cStencil, const uint numRects, const RECT* pRects, const bool bOptional) { assert(( (nFlags & CLEAR_ZBUFFER ? D3D11_CLEAR_DEPTH : 0) | (nFlags & CLEAR_STENCIL ? D3D11_CLEAR_STENCIL : 0) ) == nFlags); #if CRY_USE_DX12 FX_ClearTarget( pTex->pSurf, nFlags, cDepth, cStencil, numRects, pRects); #else if (bOptional) { FX_ClearTarget( static_cast(pTex->pSurf), nFlags, cDepth, cStencil, 0U, nullptr); return; } // TODO: implement depth-clear as depth-only for DX11, gives max performance and probably just resets the depth-surface meta-data int ox, oy, ow, oh; FX_PushRenderTarget(0, (D3DSurface*)nullptr, pTex); GetViewport(&ox, &oy, &ow, &oh); RT_SetViewport(pRects->left, pRects->top, pRects->right - pRects->left, pRects->bottom - pRects->top); FX_SetActiveRenderTargets(); EF_ClearTargetsLater(nFlags, Clr_Empty, cDepth, cStencil); FX_ClearTargetRegion(); FX_PopRenderTarget(0); SetViewport(ox, oy, ow, oh); #endif } void CD3D9Renderer::FX_ClearTarget(SDepthTexture* pTex, const int nFlags, const float cDepth, const uint8 cStencil) { FX_ClearTarget( pTex, nFlags, cDepth, cStencil, 0U, nullptr, true); } void CD3D9Renderer::FX_ClearTarget(SDepthTexture* pTex, const int nFlags) { FX_ClearTarget( pTex, nFlags, (gRenDev->m_RP.m_TI[gRenDev->m_RP.m_nProcessThreadID].m_PersFlags & RBPF_REVERSE_DEPTH) ? 0.f : 1.f, 0U); } void CD3D9Renderer::FX_ClearTarget(SDepthTexture* pTex) { FX_ClearTarget( pTex, CLEAR_ZBUFFER | CLEAR_STENCIL); } //==================================================================================== void CD3D9Renderer::FX_ClearTargets() { if (m_pNewTarget[0]->m_ClearFlags) { { const float fClearDepth = (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_REVERSE_DEPTH) ? 1.0f - m_pNewTarget[0]->m_fReqDepth : m_pNewTarget[0]->m_fReqDepth; const uint8 nClearStencil = m_pNewTarget[0]->m_nReqStencil; const int nFlags = m_pNewTarget[0]->m_ClearFlags & ~CLEAR_RTARGET; // TODO: ClearFlags per render-target if ((m_pNewTarget[0]->m_pTarget != NULL) && (m_pNewTarget[0]->m_ClearFlags & CLEAR_RTARGET)) { for (int i = 0; i < RT_STACK_WIDTH; ++i) { if (m_pNewTarget[i]->m_pTarget) { // NOTE: optimal value is "m_pNewTarget[0]->m_pTex->GetClearColor()" GetDeviceContext().ClearRenderTargetView(m_pNewTarget[i]->m_pTarget, &m_pNewTarget[i]->m_ReqColor[0]); } } } assert(( (nFlags & FRT_CLEAR_DEPTH ? D3D11_CLEAR_DEPTH : 0) | (nFlags & FRT_CLEAR_STENCIL ? D3D11_CLEAR_STENCIL : 0) ) == nFlags); AZ_Warning("CD3D9Renderer", m_pNewTarget[0]->m_pDepth != nullptr, "FX_ClearTargets: Depth texture of target was nullptr. The depth target will not be cleared."); if (nFlags && m_pNewTarget[0]->m_pDepth != nullptr) { GetDeviceContext().ClearDepthStencilView(m_pNewTarget[0]->m_pDepth, nFlags, fClearDepth, nClearStencil); } } CTexture* pRT = m_pNewTarget[0]->m_pTex; if (CV_r_stats == 13) { EF_AddRTStat(pRT, m_pNewTarget[0]->m_ClearFlags, m_CurViewport.nWidth, m_CurViewport.nHeight); } #if !defined(EXCLUDE_RARELY_USED_R_STATS) && defined(ENABLE_PROFILING_CODE) { if ((m_pNewTarget[0]->m_pTarget != NULL) && (m_pNewTarget[0]->m_ClearFlags & CLEAR_RTARGET)) { for (int i = 0; i < RT_STACK_WIDTH; ++i) { if (m_pNewTarget[i]->m_pTarget) { m_RP.m_PS[m_RP.m_nProcessThreadID].m_RTCleared++; m_RP.m_PS[m_RP.m_nProcessThreadID].m_RTClearedSize += CDeviceTexture::TextureDataSize(m_pNewTarget[i]->m_pTarget); } } } if (m_pNewTarget[0]->m_ClearFlags & (~CLEAR_RTARGET) && m_pNewTarget[0]->m_pSurfDepth != nullptr) { m_RP.m_PS[m_RP.m_nProcessThreadID].m_RTCleared++; m_RP.m_PS[m_RP.m_nProcessThreadID].m_RTClearedSize += CDeviceTexture::TextureDataSize(m_pNewTarget[0]->m_pSurfDepth->pSurf); } } #endif m_pNewTarget[0]->m_ClearFlags = 0; } } void CD3D9Renderer::FX_Commit(bool bAllowDIP) { DETAILED_PROFILE_MARKER("FX_Commit"); // Commit all changed shader parameters if (m_RP.m_nCommitFlags & FC_GLOBAL_PARAMS) { CHWShader_D3D::mfCommitParamsGlobal(); m_RP.m_nCommitFlags &= ~FC_GLOBAL_PARAMS; } if (m_RP.m_nCommitFlags & FC_MATERIAL_PARAMS) { CHWShader_D3D::UpdatePerMaterialConstantBuffer(); m_RP.m_nCommitFlags &= ~FC_MATERIAL_PARAMS; } AzRHI::ConstantBufferCache::GetInstance().CommitAll(); // Commit all changed RT's if (m_RP.m_nCommitFlags & FC_TARGETS) { FX_SetActiveRenderTargets(bAllowDIP); m_RP.m_nCommitFlags &= ~FC_TARGETS; } // Adapt viewport dimensions if changed FX_SetViewport(); // Clear rendertargets if requested FX_ClearTargets(); } // Set current geometry culling modes void CD3D9Renderer::D3DSetCull(ECull eCull, bool bSkipMirrorCull) { FUNCTION_PROFILER_RENDER_FLAT if (eCull != eCULL_None && !bSkipMirrorCull) { if (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_MIRRORCULL) { eCull = (eCull == eCULL_Back) ? eCULL_Front : eCULL_Back; } } if (eCull == m_RP.m_eCull) { return; } SStateRaster RS = m_StatesRS[m_nCurStateRS]; RS.Desc.FrontCounterClockwise = true; if (eCull == eCULL_None) { RS.Desc.CullMode = D3D11_CULL_NONE; } else { if (eCull == eCULL_Back) { RS.Desc.CullMode = D3D11_CULL_BACK; } else { RS.Desc.CullMode = D3D11_CULL_FRONT; } } SetRasterState(&RS); m_RP.m_eCull = eCull; } uint8 g_StencilFuncLookup[8] = { D3D11_COMPARISON_ALWAYS, // FSS_STENCFUNC_ALWAYS 0x0 D3D11_COMPARISON_NEVER, // FSS_STENCFUNC_NEVER 0x1 D3D11_COMPARISON_LESS, // FSS_STENCFUNC_LESS 0x2 D3D11_COMPARISON_LESS_EQUAL, // FSS_STENCFUNC_LEQUAL 0x3 D3D11_COMPARISON_GREATER, // FSS_STENCFUNC_GREATER 0x4 D3D11_COMPARISON_GREATER_EQUAL, // FSS_STENCFUNC_GEQUAL 0x5 D3D11_COMPARISON_EQUAL, // FSS_STENCFUNC_EQUAL 0x6 D3D11_COMPARISON_NOT_EQUAL // FSS_STENCFUNC_NOTEQUAL 0x7 }; uint8 g_StencilOpLookup[8] = { D3D11_STENCIL_OP_KEEP, // FSS_STENCOP_KEEP 0x0 D3D11_STENCIL_OP_REPLACE, // FSS_STENCOP_REPLACE 0x1 D3D11_STENCIL_OP_INCR_SAT, // FSS_STENCOP_INCR 0x2 D3D11_STENCIL_OP_DECR_SAT, // FSS_STENCOP_DECR 0x3 D3D11_STENCIL_OP_ZERO, // FSS_STENCOP_ZERO 0x4 D3D11_STENCIL_OP_INCR, // FSS_STENCOP_INCR_WRAP 0x5 D3D11_STENCIL_OP_DECR, // FSS_STENCOP_DECR_WRAP 0x6 D3D11_STENCIL_OP_INVERT // FSS_STENCOP_INVERT 0x7 }; void CRenderer::FX_SetStencilState(int st, uint32 nStencRef, uint32 nStencMask, uint32 nStencWriteMask, bool bForceFullReadMask) { FUNCTION_PROFILER_RENDER_FLAT PrefetchLine(g_StencilFuncLookup, 0); const uint32 nPersFlags2 = m_RP.m_PersFlags2; if (!bForceFullReadMask && !(nPersFlags2 & RBPF2_READMASK_RESERVED_STENCIL_BIT)) { nStencMask &= ~BIT_STENCIL_RESERVED; } if (nPersFlags2 & RBPF2_WRITEMASK_RESERVED_STENCIL_BIT) { nStencWriteMask &= ~BIT_STENCIL_RESERVED; } nStencRef |= m_RP.m_CurStencilRefAndMask; SStateDepth DS = gcpRendD3D->m_StatesDP[gcpRendD3D->m_nCurStateDP]; DS.Desc.StencilReadMask = nStencMask; DS.Desc.StencilWriteMask = nStencWriteMask; int nCurFunc = st & FSS_STENCFUNC_MASK; DS.Desc.FrontFace.StencilFunc = (D3D11_COMPARISON_FUNC)g_StencilFuncLookup[nCurFunc]; int nCurOp = (st & FSS_STENCFAIL_MASK) >> FSS_STENCFAIL_SHIFT; DS.Desc.FrontFace.StencilFailOp = (D3D11_STENCIL_OP)g_StencilOpLookup[nCurOp]; nCurOp = (st & FSS_STENCZFAIL_MASK) >> FSS_STENCZFAIL_SHIFT; DS.Desc.FrontFace.StencilDepthFailOp = (D3D11_STENCIL_OP)g_StencilOpLookup[nCurOp]; nCurOp = (st & FSS_STENCPASS_MASK) >> FSS_STENCPASS_SHIFT; DS.Desc.FrontFace.StencilPassOp = (D3D11_STENCIL_OP)g_StencilOpLookup[nCurOp]; if (!(st & FSS_STENCIL_TWOSIDED)) { DS.Desc.BackFace = DS.Desc.FrontFace; } else { nCurFunc = (st & (FSS_STENCFUNC_MASK << FSS_CCW_SHIFT)) >> FSS_CCW_SHIFT; DS.Desc.BackFace.StencilFunc = (D3D11_COMPARISON_FUNC)g_StencilFuncLookup[nCurFunc]; nCurOp = (st & (FSS_STENCFAIL_MASK << FSS_CCW_SHIFT)) >> (FSS_STENCFAIL_SHIFT + FSS_CCW_SHIFT); DS.Desc.BackFace.StencilFailOp = (D3D11_STENCIL_OP)g_StencilOpLookup[nCurOp]; nCurOp = (st & (FSS_STENCZFAIL_MASK << FSS_CCW_SHIFT)) >> (FSS_STENCZFAIL_SHIFT + FSS_CCW_SHIFT); DS.Desc.BackFace.StencilDepthFailOp = (D3D11_STENCIL_OP)g_StencilOpLookup[nCurOp]; nCurOp = (st & (FSS_STENCPASS_MASK << FSS_CCW_SHIFT)) >> (FSS_STENCPASS_SHIFT + FSS_CCW_SHIFT); DS.Desc.BackFace.StencilPassOp = (D3D11_STENCIL_OP)g_StencilOpLookup[nCurOp]; } m_RP.m_CurStencRef = nStencRef; m_RP.m_CurStencMask = nStencMask; m_RP.m_CurStencWriteMask = nStencWriteMask; gcpRendD3D->SetDepthState(&DS, nStencRef); m_RP.m_CurStencilState = st; } void CD3D9Renderer::EF_Scissor(bool bEnable, int sX, int sY, int sWdt, int sHgt) { m_pRT->RC_SetScissor(bEnable, sX, sY, sWdt, sHgt); } void CD3D9Renderer::RT_SetScissor(bool bEnable, int sX, int sY, int sWdt, int sHgt) { FUNCTION_PROFILER_RENDER_FLAT if (!CV_r_scissor || (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_SHADOWGEN)) { return; } D3D11_RECT scRect; if (bEnable) { if (sX != m_sPrevX || sY != m_sPrevY || sWdt != m_sPrevWdt || sHgt != m_sPrevHgt) { m_sPrevX = sX; m_sPrevY = sY; m_sPrevWdt = sWdt; m_sPrevHgt = sHgt; scRect.left = sX; scRect.top = sY; scRect.right = sX + sWdt; scRect.bottom = sY + sHgt; GetDeviceContext().RSSetScissorRects(1, &scRect); } if (bEnable != m_bsPrev) { m_bsPrev = bEnable; SStateRaster RS = m_StatesRS[m_nCurStateRS]; RS.Desc.ScissorEnable = bEnable; SetRasterState(&RS); } } else { if (bEnable != m_bsPrev) { m_bsPrev = bEnable; m_sPrevWdt = 0; m_sPrevHgt = 0; SStateRaster RS = m_StatesRS[m_nCurStateRS]; RS.Desc.ScissorEnable = bEnable; SetRasterState(&RS); } } } bool CD3D9Renderer::EF_GetScissorState(int& sX, int& sY, int& sWdt, int& sHgt) { if (!CV_r_scissor || (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_SHADOWGEN)) { return false; } sX = m_sPrevX; sY = m_sPrevY; sWdt = m_sPrevWdt; sHgt = m_sPrevHgt; return m_bsPrev; } void CD3D9Renderer::FX_FogCorrection() { if (m_RP.m_nPassGroupID <= EFSLIST_DECAL) { uint32 uBlendFlags = m_RP.m_CurState & GS_BLEND_MASK; switch (uBlendFlags) { case GS_BLSRC_ONE | GS_BLDST_ONE: EF_SetFogColor(Col_Black); break; case GS_BLSRC_DSTALPHA | GS_BLDST_ONE: EF_SetFogColor(Col_Black); break; case GS_BLSRC_DSTCOL | GS_BLDST_SRCCOL: { static ColorF pColGrey = ColorF(0.5f, 0.5f, 0.5f, 1.0f); EF_SetFogColor(pColGrey); break; } case GS_BLSRC_ONE | GS_BLDST_ONEMINUSSRCALPHA: EF_SetFogColor(Col_Black); break; case GS_BLSRC_ONE | GS_BLDST_ONEMINUSSRCCOL: EF_SetFogColor(Col_Black); break; case GS_BLSRC_ZERO | GS_BLDST_ONEMINUSSRCCOL: EF_SetFogColor(Col_Black); break; case GS_BLSRC_SRCALPHA | GS_BLDST_ONE: case GS_BLSRC_SRCALPHA_A_ZERO | GS_BLDST_ONE: EF_SetFogColor(Col_Black); break; case GS_BLSRC_ZERO | GS_BLDST_ONE: EF_SetFogColor(Col_Black); break; case GS_BLSRC_DSTCOL | GS_BLDST_ZERO: EF_SetFogColor(Col_White); break; default: EF_SetFogColor(m_RP.m_TI[m_RP.m_nProcessThreadID].m_FS.m_FogColor); break; } } else { EF_SetFogColor(m_RP.m_TI[m_RP.m_nProcessThreadID].m_FS.m_FogColor); } } // Set current render states void CD3D9Renderer::FX_SetState(int st, int AlphaRef, int RestoreState) { FUNCTION_PROFILER_RENDER_FLAT int Changed; if (CV_r_measureoverdraw == 4 && (st & GS_DEPTHFUNC_MASK) == GS_DEPTHFUNC_HIZEQUAL) { // disable fine depth test st |= GS_NODEPTHTEST; } st |= m_RP.m_StateOr; st &= m_RP.m_StateAnd; Changed = st ^ m_RP.m_CurState; Changed |= RestoreState; // Due to the way reverse depth was implemented, we need to check if RBPF_REVERSE_DEPTH has changed and force flush the depth state if so. uint32 changedPersFlags = m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags ^ m_RP.m_previousPersFlags; m_RP.m_previousPersFlags = m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags; if (!Changed && !changedPersFlags && (AlphaRef == -1 || AlphaRef == m_RP.m_CurAlphaRef)) { return; } //PROFILE_FRAME(State_RStates); #ifndef _RELEASE m_RP.m_PS[m_RP.m_nProcessThreadID].m_NumStateChanges++; #endif if (m_StatesBL.size() == 0 || m_StatesDP.size() == 0 || m_StatesRS.size() == 0) { SetDefaultRenderStates(); } SStateDepth DS = m_StatesDP[m_nCurStateDP]; SStateBlend BS = m_StatesBL[m_nCurStateBL]; SStateRaster RS = m_StatesRS[m_nCurStateRS]; bool bDirtyDS = false; bool bDirtyBS = false; bool bDirtyRS = false; if ((Changed & GS_DEPTHFUNC_MASK) || (changedPersFlags & RBPF_REVERSE_DEPTH)) { bDirtyDS = true; uint32 nDepthState = st; if (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_REVERSE_DEPTH) { nDepthState = ReverseDepthHelper::ConvertDepthFunc(st); } switch (nDepthState & GS_DEPTHFUNC_MASK) { case GS_DEPTHFUNC_HIZEQUAL: case GS_DEPTHFUNC_EQUAL: DS.Desc.DepthFunc = D3D11_COMPARISON_EQUAL; break; case GS_DEPTHFUNC_LEQUAL: DS.Desc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL; break; case GS_DEPTHFUNC_GREAT: DS.Desc.DepthFunc = D3D11_COMPARISON_GREATER; break; case GS_DEPTHFUNC_LESS: DS.Desc.DepthFunc = D3D11_COMPARISON_LESS; break; case GS_DEPTHFUNC_NOTEQUAL: DS.Desc.DepthFunc = D3D11_COMPARISON_NOT_EQUAL; break; case GS_DEPTHFUNC_GEQUAL: DS.Desc.DepthFunc = D3D11_COMPARISON_GREATER_EQUAL; break; case GS_DEPTHFUNC_ALWAYS: DS.Desc.DepthFunc = D3D11_COMPARISON_ALWAYS; break; } } if (Changed & (GS_WIREFRAME)) { bDirtyRS = true; if (st & GS_WIREFRAME) { RS.Desc.FillMode = D3D11_FILL_WIREFRAME; } else { RS.Desc.FillMode = D3D11_FILL_SOLID; } } if (Changed & GS_COLMASK_MASK) { bDirtyBS = true; uint32 nMask = 0xfffffff0 | ((st & GS_COLMASK_MASK) >> GS_COLMASK_SHIFT); nMask = (~nMask) & 0xf; for (size_t i = 0; i < RT_STACK_WIDTH; ++i) { BS.Desc.RenderTarget[i].RenderTargetWriteMask = nMask; } } if (Changed & GS_BLEND_MASK) { bDirtyBS = true; if (st & GS_BLEND_MASK) { if (CV_r_measureoverdraw && (m_RP.m_nRendFlags & SHDF_ALLOWHDR)) { st = (st & ~GS_BLEND_MASK) | (GS_BLSRC_ONE | GS_BLDST_ONE); st &= ~GS_ALPHATEST_MASK; } // todo: add separate alpha blend support for mrt for (size_t i = 0; i < RT_STACK_WIDTH; ++i) { BS.Desc.RenderTarget[i].BlendEnable = TRUE; } // Source factor switch (st & GS_BLSRC_MASK) { case GS_BLSRC_ZERO: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ZERO; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ZERO; break; case GS_BLSRC_ONE: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; break; case GS_BLSRC_DSTCOL: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_DEST_COLOR; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_DEST_ALPHA; break; case GS_BLSRC_ONEMINUSDSTCOL: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_INV_DEST_COLOR; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_INV_DEST_ALPHA; break; case GS_BLSRC_SRCALPHA: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_SRC_ALPHA; break; case GS_BLSRC_ONEMINUSSRCALPHA: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_INV_SRC_ALPHA; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; break; case GS_BLSRC_DSTALPHA: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_DEST_ALPHA; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_DEST_ALPHA; break; case GS_BLSRC_ONEMINUSDSTALPHA: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_INV_DEST_ALPHA; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_INV_DEST_ALPHA; break; case GS_BLSRC_ALPHASATURATE: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA_SAT; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_SRC_ALPHA_SAT; break; case GS_BLSRC_SRCALPHA_A_ZERO: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ZERO; break; case GS_BLSRC_SRC1ALPHA: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC1_ALPHA; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_SRC1_ALPHA; break; default: BS.Desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; break; } //Destination factor switch (st & GS_BLDST_MASK) { case GS_BLDST_ZERO: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_ZERO; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; break; case GS_BLDST_ONE: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_ONE; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ONE; break; case GS_BLDST_SRCCOL: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC_COLOR; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_SRC_ALPHA; break; case GS_BLDST_ONEMINUSSRCCOL: if (m_nHDRType == 1 && (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_HDR)) { BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_ONE; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ONE; } else { BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_COLOR; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; } break; case GS_BLDST_SRCALPHA: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_SRC_ALPHA; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_SRC_ALPHA; break; case GS_BLDST_ONEMINUSSRCALPHA: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; break; case GS_BLDST_DSTALPHA: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_DEST_ALPHA; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_DEST_ALPHA; break; case GS_BLDST_ONEMINUSDSTALPHA: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_DEST_ALPHA; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_DEST_ALPHA; break; case GS_BLDST_ONE_A_ZERO: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_ONE; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; break; case GS_BLDST_ONEMINUSSRC1ALPHA: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC1_ALPHA; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC1_ALPHA; break; default: BS.Desc.RenderTarget[0].DestBlend = D3D11_BLEND_ZERO; BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; break; } //Blending operation D3D11_BLEND_OP blendOperation = D3D11_BLEND_OP_ADD; D3D11_BLEND_OP blendOperationAlpha = D3D11_BLEND_OP_ADD; switch (st & GS_BLEND_OP_MASK) { case GS_BLOP_MAX: blendOperation = D3D11_BLEND_OP_MAX; blendOperationAlpha = D3D11_BLEND_OP_MAX; break; case GS_BLOP_MIN: blendOperation = D3D11_BLEND_OP_MIN; blendOperationAlpha = D3D11_BLEND_OP_MIN; break; } //Separate blend modes for alpha switch (st & GS_BLALPHA_MASK) { case GS_BLALPHA_MIN: BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ONE; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; blendOperationAlpha = D3D11_BLEND_OP_MIN; break; case GS_BLALPHA_MAX: BS.Desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ONE; BS.Desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; blendOperationAlpha = D3D11_BLEND_OP_MAX; break; } // todo: add separate alpha blend support for mrt for (size_t i = 0; i < RT_STACK_WIDTH; ++i) { BS.Desc.RenderTarget[i].BlendOp = blendOperation; BS.Desc.RenderTarget[i].BlendOpAlpha = blendOperationAlpha; } } else { // todo: add separate alpha blend support for mrt for (size_t i = 0; i < RT_STACK_WIDTH; ++i) { BS.Desc.RenderTarget[i].BlendEnable = FALSE; } } // Need to disable color write to MRTs for shadow map alpha blending (not supported by all hw) if ((m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & RBPF_SHADOWGEN) && m_pNewTarget[1]) { bDirtyBS = true; uint32 nMask = 0xfffffff0 | ((st & GS_COLMASK_MASK) >> GS_COLMASK_SHIFT); nMask = (~nMask) & 0xf; BS.Desc.RenderTarget[0].RenderTargetWriteMask = nMask; if (st & GS_BLEND_MASK) { BS.Desc.IndependentBlendEnable = TRUE; for (size_t i = 1; i < RT_STACK_WIDTH; ++i) { BS.Desc.RenderTarget[i].RenderTargetWriteMask = 0; BS.Desc.RenderTarget[i].BlendEnable = FALSE; } } else { BS.Desc.IndependentBlendEnable = FALSE; for (size_t i = 1; i < RT_STACK_WIDTH; ++i) { BS.Desc.RenderTarget[i].RenderTargetWriteMask = nMask; BS.Desc.RenderTarget[i].BlendEnable = TRUE; } } } } m_RP.m_depthWriteStateUsed |= (st & GS_DEPTHWRITE) != 0; if (Changed & GS_DEPTHWRITE) { bDirtyDS = true; if (st & GS_DEPTHWRITE) { DS.Desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; } else { DS.Desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; } } if (Changed & GS_NODEPTHTEST) { bDirtyDS = true; if (st & GS_NODEPTHTEST) { DS.Desc.DepthEnable = FALSE; } else { DS.Desc.DepthEnable = TRUE; } } if (Changed & GS_STENCIL) { bDirtyDS = true; if (st & GS_STENCIL) { DS.Desc.StencilEnable = TRUE; } else { DS.Desc.StencilEnable = FALSE; } } { // Alpha test must be handled in shader in D3D10 API if (((st ^ m_RP.m_CurState) & GS_ALPHATEST_MASK) || ((st & GS_ALPHATEST_MASK) && (m_RP.m_CurAlphaRef != AlphaRef && AlphaRef != -1))) { if (st & GS_ALPHATEST_MASK) { m_RP.m_CurAlphaRef = AlphaRef; m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ALPHATEST]; } else { m_RP.m_FlagsShader_RT &= ~g_HWSR_MaskBit[HWSR_ALPHATEST]; } // When alpha test is turned on or off just changing m_RP.m_FlagsShader_RT doesn't work unless // an update is triggered. Setting this flag appears to cause the correct update. SThreadInfo* const pShaderThreadInfo = &(m_RP.m_TI[m_RP.m_nProcessThreadID]); pShaderThreadInfo->m_PersFlags |= RBPF_FP_DIRTY; } } if (bDirtyDS) { SetDepthState(&DS, m_nCurStencRef); } if (bDirtyRS) { SetRasterState(&RS); } if (bDirtyBS) { SetBlendState(&BS); } m_RP.m_CurState = st; } void CD3D9Renderer::FX_ZState(uint32& state) { assert(m_RP.m_pRootTechnique); // cannot be 0 here if ((m_RP.m_pRootTechnique->m_Flags & (FHF_WASZWRITE | FHF_POSITION_INVARIANT)) && m_RP.m_nPassGroupID == EFSLIST_GENERAL && SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID] == 0 && CV_r_usezpass) { if ((m_RP.m_nBatchFilter & (FB_GENERAL | FB_MULTILAYERS)) && (m_RP.m_nRendFlags & (SHDF_ALLOWHDR | SHDF_ALLOWPOSTPROCESS))) { if (!(m_RP.m_pRootTechnique->m_Flags & FHF_POSITION_INVARIANT)) { if (CRenderer::CV_r_measureoverdraw == 4) { // Hi-Z test only, fine depth test is disabled at the top of FX_SetState() state |= GS_DEPTHFUNC_HIZEQUAL; } else { state |= GS_DEPTHFUNC_EQUAL; } } state &= ~(GS_DEPTHWRITE | GS_ALPHATEST_MASK); } } } void CD3D9Renderer::FX_HairState(uint32& nState, const SShaderPass* pPass) { if ((m_RP.m_nPassGroupID == EFSLIST_GENERAL || m_RP.m_nPassGroupID == EFSLIST_TRANSP) && !(m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & (RBPF_SHADOWGEN | RBPF_ZPASS)) && !(m_RP.m_PersFlags2 & (RBPF2_MOTIONBLURPASS))) { // reset quality settings. BEWARE: these are used by shadows as well m_RP.m_FlagsShader_RT &= ~(g_HWSR_MaskBit[HWSR_TILED_SHADING] | g_HWSR_MaskBit[HWSR_QUALITY1]); m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_QUALITY]; // force per object fog m_RP.m_FlagsShader_RT |= (g_HWSR_MaskBit[HWSR_FOG] | g_HWSR_MaskBit[HWSR_ALPHABLEND]); if (CV_r_DeferredShadingTiled && CV_r_DeferredShadingTiledHairQuality > 0) { m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_TILED_SHADING]; if (CV_r_DeferredShadingTiledHairQuality > 1) { m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_QUALITY1]; } } if ((pPass->m_RenderState & GS_DEPTHFUNC_MASK) == GS_DEPTHFUNC_LESS) { nState = (nState & ~(GS_BLEND_MASK | GS_DEPTHFUNC_MASK)); nState |= GS_DEPTHFUNC_LESS; if ((m_RP.m_nPassGroupID == EFSLIST_TRANSP) && (m_RP.m_pShader->m_Flags2 & EF2_DEPTH_FIXUP) && RenderCapabilities::SupportsDualSourceBlending()) { nState |= GS_BLSRC_SRC1ALPHA | GS_BLDST_ONEMINUSSRC1ALPHA | GS_BLALPHA_MIN; } else { nState |= GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA; } if (pPass->m_RenderState & GS_DEPTHWRITE) { nState |= GS_DEPTHWRITE; } else { nState &= ~GS_DEPTHWRITE; } } else { nState = (nState & ~(GS_BLEND_MASK | GS_DEPTHFUNC_MASK)); nState |= GS_DEPTHFUNC_EQUAL /*| GS_DEPTHWRITE*/; } } } void CD3D9Renderer::FX_CommitStates(const SShaderTechnique* pTech, const SShaderPass* pPass, bool bUseMaterialState) { FUNCTION_PROFILER_RENDER_FLAT uint32 State = 0; int AlphaRef = pPass->m_AlphaRef == 0xff ? -1 : pPass->m_AlphaRef; SRenderPipeline& RESTRICT_REFERENCE rRP = m_RP; SThreadInfo& RESTRICT_REFERENCE rTI = rRP.m_TI[rRP.m_nProcessThreadID]; if (rRP.m_pCurObject->m_RState) { switch (rRP.m_pCurObject->m_RState & OS_TRANSPARENT) { case OS_ALPHA_BLEND: State = GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA; break; case OS_ADD_BLEND: // In HDR mode, this is equivalent to pure additive GS_BLSRC_ONE | GS_BLDST_ONE. State = GS_BLSRC_ONE | GS_BLDST_ONEMINUSSRCCOL; break; case OS_MULTIPLY_BLEND: State = GS_BLSRC_DSTCOL | GS_BLDST_SRCCOL; break; } if (rRP.m_pCurObject->m_RState & OS_NODEPTH_TEST) { State |= GS_NODEPTHTEST; } AlphaRef = 0; } else { State = pPass->m_RenderState; } if (bUseMaterialState && rRP.m_MaterialStateOr != 0) { if (rRP.m_MaterialStateOr & GS_ALPHATEST_MASK) { AlphaRef = rRP.m_MaterialAlphaRef; } State &= ~rRP.m_MaterialStateAnd; State |= rRP.m_MaterialStateOr; } //This has higher priority than material states as for alphatested material //it is forced to use depth writing (FX_SetResourcesState) if (rRP.m_pCurObject->m_RState & OS_TRANSPARENT) { State &= ~GS_DEPTHWRITE; } if (!(pTech->m_Flags & FHF_POSITION_INVARIANT) && !(pPass->m_PassFlags & SHPF_FORCEZFUNC)) { FX_ZState(State); } if (bUseMaterialState && (rRP.m_pCurObject->m_fAlpha < 1.0f) && !rRP.m_bIgnoreObjectAlpha) { if (pTech && pTech->m_NameCRC == m_techShadowGen) { // If rendering to a shadow map: State = (State | GS_DEPTHWRITE); } else { // If not rendering to a shadow map: State = (State & ~(GS_BLEND_MASK | GS_DEPTHWRITE)) | (GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA); } } State &= ~rRP.m_ForceStateAnd; State |= rRP.m_ForceStateOr; if (rRP.m_pShader->m_Flags2 & EF2_HAIR) { FX_HairState(State, pPass); } else if ((m_RP.m_nPassGroupID == EFSLIST_TRANSP) && (m_RP.m_nSortGroupID == 1) && !(rRP.m_PersFlags2 & RBPF2_MOTIONBLURPASS) && !(m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & (RBPF_SHADOWGEN | RBPF_ZPASS))) { State &= ~GS_BLALPHA_MASK; // Depth fixup for transparent geometry if ((m_RP.m_pShader->m_Flags2 & EF2_DEPTH_FIXUP) && RenderCapabilities::SupportsDualSourceBlending()) { if (rRP.m_pCurObject->m_RState & OS_ALPHA_BLEND || ((State & (GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA)) == (GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA))) { State &= ~(GS_NOCOLMASK_A | GS_BLSRC_MASK | GS_BLDST_MASK); State |= GS_BLSRC_SRC1ALPHA | GS_BLDST_ONEMINUSSRC1ALPHA; rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ALPHABLEND] | g_HWSR_MaskBit[HWSR_DEPTHFIXUP]; // min blending on depth values (alpha channel) State |= GS_BLALPHA_MIN; } } } if ((rRP.m_PersFlags2 & RBPF2_ALLOW_DEFERREDSHADING) && (rRP.m_pShader->m_Flags & EF_SUPPORTSDEFERREDSHADING)) { if (rTI.m_PersFlags & RBPF_ZPASS) { if ((rRP.m_pShader->m_Flags & EF_DECAL) || rRP.m_nPassGroupID == EFSLIST_TERRAINLAYER) { State = (State & ~(GS_BLEND_MASK | GS_DEPTHWRITE | GS_DEPTHFUNC_MASK)); State |= GS_DEPTHFUNC_LEQUAL | GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA; rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ALPHABLEND]; } // Disable alpha writes - for alpha blend case we use default alpha value as a default power factor if (State & GS_BLEND_MASK) { State |= GS_COLMASK_RGB; } // Disable alpha testing/depth writes if geometry had a z-prepass if (!(rRP.m_PersFlags2 & RBPF2_ZPREPASS) && (rRP.m_RIs[0][0]->nBatchFlags & FB_ZPREPASS)) { State &= ~(GS_DEPTHWRITE | GS_DEPTHFUNC_MASK | GS_ALPHATEST_MASK); State |= GS_DEPTHFUNC_EQUAL; rRP.m_FlagsShader_RT &= ~g_HWSR_MaskBit[HWSR_ALPHATEST]; } } } { const AZ::u32 VelocityMask = FOB_MOTION_BLUR | FOB_VERTEX_VELOCITY | FOB_SKINNED; const AZ::u32 SoftwareSkinned = FOB_MOTION_BLUR | FOB_VERTEX_VELOCITY; if ((rRP.m_ObjFlags & VelocityMask) == SoftwareSkinned && (rRP.m_PersFlags2 & RBPF2_MOTIONBLURPASS) != 0) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_VERTEX_VELOCITY]; } } if (rRP.m_PersFlags2 & RBPF2_CUSTOM_RENDER_PASS) { rRP.m_FlagsShader_RT &= ~g_HWSR_MaskBit[HWSR_SAMPLE0]; if (CRenderer::CV_r_customvisions == 2) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SAMPLE0]; State |= GS_BLSRC_ONE | GS_BLDST_ONE; } else if (CRenderer::CV_r_customvisions == 3) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SAMPLE2]; // Ignore depth thresholding in Post3DRender if (rRP.m_PersFlags2 & RBPF2_POST_3D_RENDERER_PASS) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SAMPLE5]; } } } if (m_NewViewport.fMaxZ <= 0.01f) { State &= ~GS_DEPTHWRITE; } // Intermediate solution to disable depth testing in 3D HUD if (rRP.m_pCurObject->m_ObjFlags & FOB_RENDER_AFTER_POSTPROCESSING) { State &= ~GS_DEPTHFUNC_MASK; State |= GS_NODEPTHTEST; } if (rRP.m_PersFlags2 & RBPF2_DISABLECOLORWRITES) { State &= ~GS_COLMASK_MASK; State |= GS_COLMASK_NONE; } FX_SetState(State, AlphaRef); if (State & GS_ALPHATEST_MASK) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ALPHATEST]; } int nBlend; if (nBlend = (rRP.m_CurState & (GS_BLEND_MASK & ~GS_BLALPHA_MASK))) { //set alpha blend shader flag when the blend mode for color is set to alpha blend. if (nBlend == (GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA) || nBlend == (GS_BLSRC_SRCALPHA | GS_BLDST_ONE) || nBlend == (GS_BLSRC_ONE | GS_BLDST_ONEMINUSSRCALPHA) || nBlend == (GS_BLSRC_SRCALPHA_A_ZERO | GS_BLDST_ONEMINUSSRCALPHA)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ALPHABLEND]; } } // Enable position invariant flag to disable fast math on certain vertex shader operations that affect position calculations. // This fixes issues with geometry that renders in both z-prepass and any other pass from having precision // issues when executing different vertex shaders and expecting the same position output results. if (rRP.m_RIs[0][0]->nBatchFlags & FB_ZPREPASS) { rRP.m_FlagsShader_MDV |= MDV_POSITION_INVARIANT; } } //===================================================================================== bool CD3D9Renderer::FX_GetTargetSurfaces(CTexture* pTarget, D3DSurface*& pTargSurf, SRTStack* pCur, int nCMSide, int nTarget, uint32 nTileCount) { if (pTarget) { if (!CTexture::IsTextureExist(pTarget) && !pTarget->m_bNoDevTexture) { pTarget->CreateRenderTarget(eTF_Unknown, pTarget->GetClearColor()); } if (!CTexture::IsTextureExist(pTarget)) { return false; } pTargSurf = pTarget->GetSurface(nCMSide, 0); } else { pTargSurf = NULL; } return true; } bool CD3D9Renderer::FX_SetRenderTarget(int nTarget, void* pTargetSurf, SDepthTexture* pDepthTarget, uint32 nTileCount) { if (nTarget >= RT_STACK_WIDTH || m_nRTStackLevel[nTarget] >= MAX_RT_STACK) { return false; } HRESULT hr = 0; SRTStack* pCur = &m_RTStack[nTarget][m_nRTStackLevel[nTarget]]; pCur->m_pTarget = static_cast(pTargetSurf); pCur->m_pSurfDepth = pDepthTarget; pCur->m_pDepth = pDepthTarget ? (D3DDepthSurface*)pDepthTarget->pSurf : NULL; pCur->m_pTex = NULL; #ifdef _DEBUG if (m_nRTStackLevel[nTarget] == 0 && nTarget == 0) { assert(pCur->m_pTarget == m_pBackBuffer && (pDepthTarget == nullptr || pCur->m_pDepth == m_pNativeZBuffer)); } #endif pCur->m_bNeedReleaseRT = false; pCur->m_bWasSetRT = false; pCur->m_bWasSetD = false; m_pNewTarget[nTarget] = pCur; if (nTarget == 0) { m_RP.m_StateOr &= ~GS_COLMASK_NONE; } m_nMaxRT2Commit = max(m_nMaxRT2Commit, nTarget); m_RP.m_nCommitFlags |= FC_TARGETS; return (hr == S_OK); } bool CD3D9Renderer::FX_PushRenderTarget(int nTarget, void* pTargetSurf, SDepthTexture* pDepthTarget, uint32 nTileCount) { assert(m_pRT->IsRenderThread()); if (nTarget >= RT_STACK_WIDTH || m_nRTStackLevel[nTarget] >= MAX_RT_STACK) { return false; } m_nRTStackLevel[nTarget]++; return FX_SetRenderTarget(nTarget, pTargetSurf, pDepthTarget, nTileCount); } bool CD3D9Renderer::FX_SetRenderTarget(int nTarget, CTexture* pTarget, SDepthTexture* pDepthTarget, bool bPush, int nCMSide, bool bScreenVP, uint32 nTileCount) { assert(!nTarget || !pDepthTarget); assert((unsigned int) nTarget < RT_STACK_WIDTH); if (pTarget && !(pTarget->GetFlags() & FT_USAGE_RENDERTARGET)) { CryFatalError("Attempt to bind a non-render-target texture as a render-target"); } if (pTarget && pDepthTarget) { if (pTarget->GetWidth() > pDepthTarget->nWidth || pTarget->GetHeight() > pDepthTarget->nHeight) { iLog->LogError("Error: RenderTarget '%s' size:%i x %i DepthSurface size:%i x %i \n", pTarget->GetName(), pTarget->GetWidth(), pTarget->GetHeight(), pDepthTarget->nWidth, pDepthTarget->nHeight); } assert(pTarget->GetWidth() <= pDepthTarget->nWidth); assert(pTarget->GetHeight() <= pDepthTarget->nHeight); } if (nTarget >= RT_STACK_WIDTH || m_nRTStackLevel[nTarget] >= MAX_RT_STACK) { return false; } SRTStack* pCur = &m_RTStack[nTarget][m_nRTStackLevel[nTarget]]; D3DSurface* pTargSurf; if (pCur->m_pTex) { if (pCur->m_bNeedReleaseRT) { pCur->m_bNeedReleaseRT = false; } m_pNewTarget[0]->m_bWasSetRT = false; m_pNewTarget[0]->m_pTarget = NULL; pCur->m_pTex->DecrementRenderTargetUseCount(); } if (!pTarget) { pTargSurf = NULL; } else { if (!FX_GetTargetSurfaces(pTarget, pTargSurf, pCur, nCMSide, nTarget, nTileCount)) { return false; } } if (pTarget) { int nFrameID = m_RP.m_TI[m_RP.m_nProcessThreadID].m_nFrameUpdateID; if (pTarget && pTarget->m_nUpdateFrameID != nFrameID) { pTarget->m_nUpdateFrameID = nFrameID; } } if (!bPush && pDepthTarget && pDepthTarget->pSurf != pCur->m_pDepth) { //assert(pCur->m_pDepth == m_pCurDepth); //assert(pCur->m_pDepth != m_pZBuffer); // Attempt to override default Z-buffer surface if (pCur->m_pSurfDepth) { pCur->m_pSurfDepth->bBusy = false; } } pCur->m_pDepth = pDepthTarget ? (D3DDepthSurface*)pDepthTarget->pSurf : NULL; pCur->m_ClearFlags = 0; pCur->m_pTarget = pTargSurf; pCur->m_bNeedReleaseRT = true; pCur->m_bWasSetRT = false; pCur->m_bWasSetD = false; pCur->m_bScreenVP = bScreenVP; if (pDepthTarget) { pDepthTarget->bBusy = true; pDepthTarget->nFrameAccess = m_RP.m_TI[m_RP.m_nProcessThreadID].m_nFrameUpdateID; } if (pTarget) { pCur->m_pTex = pTarget; } else if (pDepthTarget) { pCur->m_pTex = (CTexture*)pDepthTarget->pTex; } else { pCur->m_pTex = NULL; } if (pCur->m_pTex) { pCur->m_pTex->IncrementRenderTargetUseCount(); } pCur->m_pSurfDepth = pDepthTarget; if (pTarget) { pCur->m_Width = pTarget->GetWidth(); pCur->m_Height = pTarget->GetHeight(); } else if (pDepthTarget) { pCur->m_Width = pDepthTarget->nWidth; pCur->m_Height = pDepthTarget->nHeight; } if (!nTarget) { if (bScreenVP) { RT_SetViewport(m_MainViewport.nX, m_MainViewport.nY, m_MainViewport.nWidth, m_MainViewport.nHeight); } else { RT_SetViewport(0, 0, pCur->m_Width, pCur->m_Height); } } m_pNewTarget[nTarget] = pCur; m_nMaxRT2Commit = max(m_nMaxRT2Commit, nTarget); m_RP.m_nCommitFlags |= FC_TARGETS; return true; } CTexture* CD3D9Renderer::FX_GetCurrentRenderTarget(int target) { return m_RTStack[target][gcpRendD3D->m_nRTStackLevel[target]].m_pTex; } D3DSurface* CD3D9Renderer::FX_GetCurrentRenderTargetSurface(int target) const { return m_RTStack[target][gcpRendD3D->m_nRTStackLevel[target]].m_pTarget; } void CD3D9Renderer::FX_SetColorDontCareActions(int const nTarget, bool const loadDontCare, bool const storeDontCare) { assert((unsigned int) nTarget < RT_STACK_WIDTH); SRTStack* srt = m_pNewTarget[nTarget]; assert(srt); if (srt->m_pTarget) { // Call appropriate extension depending on rendering platform #ifdef CRY_USE_METAL DXMETALSetColorDontCareActions(srt->m_pTarget, loadDontCare, storeDontCare); #endif #if defined(ANDROID) DXGLSetColorDontCareActions(srt->m_pTarget, loadDontCare, storeDontCare); #endif } } void CD3D9Renderer::FX_SetDepthDontCareActions(int const nTarget, bool const loadDontCare, bool const storeDontCare) { assert((unsigned int) nTarget < RT_STACK_WIDTH); SRTStack* srt = m_pNewTarget[nTarget]; assert(srt); if (srt->m_pDepth) { // Call appropriate extension depending on rendering platform #ifdef CRY_USE_METAL DXMETALSetDepthDontCareActions(srt->m_pDepth, loadDontCare, storeDontCare); #endif #if defined(ANDROID) DXGLSetDepthDontCareActions(srt->m_pDepth, loadDontCare, storeDontCare); #endif } } void CD3D9Renderer::FX_SetStencilDontCareActions(int const nTarget, bool const loadDontCare, bool const storeDontCare) { assert((unsigned int) nTarget < RT_STACK_WIDTH); SRTStack* srt = m_pNewTarget[nTarget]; assert(srt); if (srt->m_pDepth) { // Call appropriate extension depending on rendering platform #ifdef CRY_USE_METAL DXMETALSetStencilDontCareActions(srt->m_pDepth, loadDontCare, storeDontCare); #endif #if defined(ANDROID) DXGLSetStencilDontCareActions(srt->m_pDepth, loadDontCare, storeDontCare); #endif } } void CD3D9Renderer::FX_TogglePLS(bool const enable) { #if defined(OPENGL_ES) && !defined(DESKTOP_GLES) DXGLTogglePLS(&GetDeviceContext(), enable); #endif } bool CD3D9Renderer::FX_PushRenderTarget(int nTarget, CTexture* pTarget, SDepthTexture* pDepthTarget, int nCMSide, bool bScreenVP, uint32 nTileCount) { assert(m_pRT->IsRenderThread()); if (nTarget >= RT_STACK_WIDTH || m_nRTStackLevel[nTarget] == MAX_RT_STACK) { assert(0); return false; } m_nRTStackLevel[nTarget]++; return FX_SetRenderTarget(nTarget, pTarget, pDepthTarget, true, nCMSide, bScreenVP, nTileCount); } bool CD3D9Renderer::FX_RestoreRenderTarget(int nTarget) { if (nTarget >= RT_STACK_WIDTH || m_nRTStackLevel[nTarget] < 0) { return false; } SRTStack* pCur = &m_RTStack[nTarget][m_nRTStackLevel[nTarget]]; SRTStack* pPrev = &m_RTStack[nTarget][m_nRTStackLevel[nTarget] + 1]; if (pPrev->m_bNeedReleaseRT) { pPrev->m_bNeedReleaseRT = false; if (pPrev->m_pTarget && pPrev->m_pTarget == m_pNewTarget[nTarget]->m_pTarget) { m_pNewTarget[nTarget]->m_bWasSetRT = false; pPrev->m_pTarget = NULL; m_pNewTarget[nTarget]->m_pTarget = NULL; } } if (nTarget == 0) { if (pPrev->m_pSurfDepth) { pPrev->m_pSurfDepth->bBusy = false; pPrev->m_pSurfDepth = NULL; } } if (pPrev->m_pTex) { pPrev->m_pTex->DecrementRenderTargetUseCount(); pPrev->m_pTex = NULL; } if (!nTarget) { if (pCur->m_bScreenVP) { RT_SetViewport(m_MainViewport.nX, m_MainViewport.nY, m_MainViewport.nWidth, m_MainViewport.nHeight); } else if (!m_nRTStackLevel[nTarget]) { RT_SetViewport(0, 0, m_backbufferWidth, m_backbufferHeight); } else { RT_SetViewport(0, 0, pCur->m_Width, pCur->m_Height); } } pCur->m_bWasSetD = false; pCur->m_bWasSetRT = false; m_pNewTarget[nTarget] = pCur; m_nMaxRT2Commit = max(m_nMaxRT2Commit, nTarget); m_RP.m_nCommitFlags |= FC_TARGETS; return true; } bool CD3D9Renderer::FX_PopRenderTarget(int nTarget) { assert(m_pRT->IsRenderThread()); if (m_nRTStackLevel[nTarget] <= 0) { assert(0); return false; } m_nRTStackLevel[nTarget]--; return FX_RestoreRenderTarget(nTarget); } ////////////////////////////////////////////////////////////////////////// // REFACTOR BEGIN: (bethelz) Move scratch depth pool into its own class. SDepthTexture* CD3D9Renderer::FX_GetDepthSurface(int nWidth, int nHeight, bool bAA, bool shaderResourceView) { assert(m_pRT->IsRenderThread()); SDepthTexture* pSrf = NULL; D3D11_TEXTURE2D_DESC desc; uint32 i; int nBestX = -1; int nBestY = -1; for (i = 0; i < m_TempDepths.Num(); i++) { pSrf = m_TempDepths[i]; if (!pSrf->bBusy && pSrf->pSurf) { // verify that this texture supports binding as a shader resource if requested pSrf->pTarget->GetDesc(&desc); if (shaderResourceView && !(desc.BindFlags & D3D11_BIND_SHADER_RESOURCE)) { continue; } if (pSrf->nWidth == nWidth && pSrf->nHeight == nHeight) { nBestX = i; break; } if (nBestX < 0 && pSrf->nWidth == nWidth && pSrf->nHeight >= nHeight) { nBestX = i; } else if (nBestY < 0 && pSrf->nWidth >= nWidth && pSrf->nHeight == nHeight) { nBestY = i; } } } if (nBestX >= 0) { return m_TempDepths[nBestX]; } if (nBestY >= 0) { return m_TempDepths[nBestY]; } bool allowUsingLargerRT = true; #if defined(CRY_OPENGL_DO_NOT_ALLOW_LARGER_RT) allowUsingLargerRT = false; #elif defined(SUPPORT_D3D_DEBUG_RUNTIME) if (CV_d3d11_debugruntime) { allowUsingLargerRT = false; } #endif if (allowUsingLargerRT) { for (i = 0; i < m_TempDepths.Num(); i++) { pSrf = m_TempDepths[i]; // verify that this texture supports binding as a shader resource if requested pSrf->pTarget->GetDesc(&desc); if (shaderResourceView && !(desc.BindFlags & D3D11_BIND_SHADER_RESOURCE)) { continue; } if (pSrf->nWidth >= nWidth && pSrf->nHeight >= nHeight && !pSrf->bBusy) { break; } } } else { i = m_TempDepths.Num(); } if (i == m_TempDepths.Num()) { pSrf = CreateDepthSurface(nWidth, nHeight, shaderResourceView); if (pSrf != nullptr) { if (pSrf->pSurf != nullptr) { m_TempDepths.AddElem(pSrf); } else { DestroyDepthSurface(pSrf); pSrf = nullptr; } } } return pSrf; } // Commit changes states to the hardware before drawing bool CD3D9Renderer::FX_CommitStreams(SShaderPass* sl, bool bSetVertexDecl) { FUNCTION_PROFILER_RENDER_FLAT //PROFILE_FRAME(Draw_Predraw); SRenderPipeline& RESTRICT_REFERENCE rp(m_RP); #if ENABLE_NORMALSTREAM_SUPPORT if (CHWShader_D3D::s_pCurInstHS) { rp.m_FlagsStreams_Stream |= (1 << VSF_NORMALS); rp.m_FlagsStreams_Decl |= (1 << VSF_NORMALS); } #endif HRESULT hr; if (bSetVertexDecl) { if ((m_RP.m_ObjFlags & FOB_POINT_SPRITE) && !CHWShader_D3D::s_pCurInstHS) { rp.m_FlagsStreams_Stream |= VSM_INSTANCED; rp.m_FlagsStreams_Decl |= VSM_INSTANCED; } hr = FX_SetVertexDeclaration(rp.m_FlagsStreams_Decl, rp.m_CurVFormat); if (FAILED(hr)) { return false; } } if (rp.m_pRE) { bool bRet = rp.m_pRE->mfPreDraw(sl); return bRet; } else if (rp.m_RendNumVerts && rp.m_RendNumIndices) { if (rp.m_FlagsPerFlush & RBSI_EXTERN_VMEM_BUFFERS) { assert(rp.m_pExternalVertexBuffer); assert(rp.m_pExternalIndexBuffer); // bind out external vertex/index buffer to use those directly, the client code has to set them up correctly rp.m_pExternalVertexBuffer->Bind(0, 0, rp.m_StreamStride); rp.m_pExternalIndexBuffer->Bind(0); // adjust the first index to render from as well as // other renderer stats rp.m_FirstIndex = rp.m_nExternalVertexBufferFirstIndex; rp.m_FirstVertex = rp.m_nExternalVertexBufferFirstVertex; rp.m_PS[rp.m_nProcessThreadID].m_DynMeshUpdateBytes += rp.m_StreamStride * rp.m_RendNumVerts; rp.m_PS[rp.m_nProcessThreadID].m_DynMeshUpdateBytes += rp.m_RendNumIndices * sizeof(short); // clear external video memory buffer flag rp.m_FlagsPerFlush &= ~RBSI_EXTERN_VMEM_BUFFERS; rp.m_nExternalVertexBufferFirstIndex = 0; rp.m_nExternalVertexBufferFirstVertex = 0; rp.m_pExternalVertexBuffer = NULL; rp.m_pExternalIndexBuffer = NULL; } else { /* NOTE: * It is extremely important that transient dynamic VBs are filled in binding order. * In the following case, rp.m_StreamPtr.Ptr verts data should be filled PRIOR the tangents. * This is due to underlying restrictions of certain rendering layers such as METAL. * The METAL renderer uses a ring buffer for transient data mapped to dynamic VBs. * To calculate the proper offsets when binding the buffers, it assumes the map/unmap * order is following an increasing VB slots binding. * * If the order is switched (tangents are filled before positions), the tangent data * will be used in the slot before the position which will result in a mismatch with * the expected IA layout. This will either cause artifacts or nothing to be rendered. */ { TempDynVBAny::CreateFillAndBind(rp.m_StreamPtr.Ptr, rp.m_RendNumVerts, 0, rp.m_StreamStride); rp.m_FirstVertex = 0; rp.m_PS[rp.m_nProcessThreadID].m_DynMeshUpdateBytes += rp.m_RendNumVerts * rp.m_StreamStride; } if (rp.m_FlagsStreams_Stream & VSM_TANGENTS) { TempDynVB::CreateFillAndBind((const SPipTangents*) rp.m_StreamPtrTang.Ptr, rp.m_RendNumVerts, VSF_TANGENTS); rp.m_PersFlags1 |= RBPF1_USESTREAM << VSF_TANGENTS; rp.m_PS[rp.m_nProcessThreadID].m_DynMeshUpdateBytes += rp.m_RendNumVerts * sizeof(SPipTangents); } else if (rp.m_PersFlags1 & (RBPF1_USESTREAM << (VSF_TANGENTS | VSF_QTANGENTS))) { rp.m_PersFlags1 &= ~(RBPF1_USESTREAM << (VSF_TANGENTS | VSF_QTANGENTS)); FX_SetVStream(1, NULL, 0, 0); } { TempDynIB16::CreateFillAndBind(rp.m_SysRendIndices, rp.m_RendNumIndices); rp.m_FirstIndex = 0; rp.m_PS[rp.m_nProcessThreadID].m_DynMeshUpdateBytes += rp.m_RendNumIndices * sizeof(short); } } } return true; } // Draw current indexed mesh void CD3D9Renderer::FX_DrawIndexedMesh (const eRenderPrimitiveType nPrimType) { DETAILED_PROFILE_MARKER("FX_DrawIndexedMesh"); FX_Commit(); // Don't render fallback in DX11 if (!CHWShader_D3D::s_pCurInstVS || !CHWShader_D3D::s_pCurInstPS || CHWShader_D3D::s_pCurInstVS->m_bFallback || CHWShader_D3D::s_pCurInstPS->m_bFallback) { return; } if (CHWShader_D3D::s_pCurInstGS && CHWShader_D3D::s_pCurInstGS->m_bFallback) { return; } PROFILE_FRAME(Draw_DrawCall); if (nPrimType != eptHWSkinGroups) { eRenderPrimitiveType eType = nPrimType; int nFirstI = m_RP.m_FirstIndex; int nNumI = m_RP.m_RendNumIndices; #ifdef TESSELLATION_RENDERER if (CHWShader_D3D::s_pCurInstHS) { FX_SetAdjacencyOffsetBuffer(); eType = ept3ControlPointPatchList; } #endif FX_DrawIndexedPrimitive(eType, 0, 0, m_RP.m_RendNumVerts, nFirstI, nNumI); #if defined(ENABLE_PROFILING_CODE) # ifdef TESSELLATION_RENDERER m_RP.m_PS[m_RP.m_nProcessThreadID].m_nPolygonsByTypes[m_RP.m_nPassGroupDIP][EVCT_STATIC][m_RP.m_nBatchFilter == FB_Z] += (nPrimType == eptTriangleList || nPrimType == ept3ControlPointPatchList ? nNumI / 3 : nNumI - 2); # else m_RP.m_PS[m_RP.m_nProcessThreadID].m_nPolygonsByTypes[m_RP.m_nPassGroupDIP][EVCT_STATIC][m_RP.m_nBatchFilter == FB_Z] += (nPrimType == eptTriangleList ? nNumI / 3 : nNumI - 2); # endif #endif } else { CRenderChunk* pChunk = m_RP.m_pRE->mfGetMatInfo(); if (pChunk) { int nNumVerts = pChunk->nNumVerts; int nFirstIndexId = pChunk->nFirstIndexId; int nNumIndices = pChunk->nNumIndices; if (m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags & (RBPF_SHADOWGEN) && (gRenDev->m_RP.m_PersFlags2 & RBPF2_DISABLECOLORWRITES)) { _smart_ptr pMaterial = (m_RP.m_pCurObject) ? (m_RP.m_pCurObject->m_pCurrMaterial) : NULL; ((CREMeshImpl*)m_RP.m_pRE)->m_pRenderMesh->AddShadowPassMergedChunkIndicesAndVertices(pChunk, pMaterial, nNumVerts, nNumIndices); } eRenderPrimitiveType eType = eptTriangleList; #ifdef TESSELLATION_RENDERER if (CHWShader_D3D::s_pCurInstHS) { FX_SetAdjacencyOffsetBuffer(); eType = ept3ControlPointPatchList; } #endif FX_DrawIndexedPrimitive(eType, 0, 0, nNumVerts, nFirstIndexId, nNumIndices); #if defined(ENABLE_PROFILING_CODE) m_RP.m_PS[m_RP.m_nProcessThreadID].m_nPolygonsByTypes[m_RP.m_nPassGroupDIP][EVCT_SKINNED][m_RP.m_nBatchFilter == FB_Z] += (pChunk->nNumIndices / 3); #endif } } } //==================================================================================== TArray CD3D9Renderer::s_tempObjects[2]; TArray CD3D9Renderer::s_tempRIs; // Actual drawing of instances void CD3D9Renderer::FX_DrawInstances(CShader* ef, SShaderPass* slw, int nRE, uint32 nStartInst, uint32 nLastInst, uint32 nUsedAttr, byte* InstanceData, int nInstAttrMask, byte Attributes[], short dwCBufSlot) { DETAILED_PROFILE_MARKER("FX_DrawInstances"); uint32 i; CHWShader_D3D* vp = (CHWShader_D3D*)slw->m_VShader; SRenderPipeline& RESTRICT_REFERENCE rRP = m_RP; SRendItem** rRIs = &(rRP.m_RIs[nRE][0]); if (!CHWShader_D3D::s_pCurInstVS || !CHWShader_D3D::s_pCurInstPS || CHWShader_D3D::s_pCurInstVS->m_bFallback || CHWShader_D3D::s_pCurInstPS->m_bFallback) { return; } PREFAST_SUPPRESS_WARNING(6326) if (!nStartInst) { // Set the stream 3 to be per instance data and iterate once per instance rRP.m_PersFlags1 &= ~(RBPF1_USESTREAM << 3); int nCompared = 0; if (!FX_CommitStreams(slw, false)) { return; } int StreamMask = rRP.m_FlagsStreams_Decl >> 1; SVertexDeclaration* vd = 0; // See if the desired vertex declaration already exists in m_CustomVD for (i = 0; i < rRP.m_CustomVD.Num(); i++) { vd = rRP.m_CustomVD[i]; if (vd->StreamMask == StreamMask && rRP.m_CurVFormat == vd->VertexFormat && vd->InstAttrMask == nInstAttrMask && vd->m_vertexShader == CHWShader_D3D::s_pCurInstVS) { break; } } // If the vertex declaration was not found, create it if (i == rRP.m_CustomVD.Num()) { vd = new SVertexDeclaration; rRP.m_CustomVD.AddElem(vd); vd->StreamMask = StreamMask; vd->VertexFormat = rRP.m_CurVFormat; vd->InstAttrMask = nInstAttrMask; vd->m_pDeclaration = NULL; vd->m_vertexShader = CHWShader_D3D::s_pCurInstVS; // Copy the base vertex format declaration SOnDemandD3DVertexDeclaration Decl; EF_OnDemandVertexDeclaration(Decl, StreamMask, rRP.m_CurVFormat, false, false); int nElementsToCopy = Decl.m_Declaration.size(); for (i = 0; i < (uint32)nElementsToCopy; i++) { vd->m_Declaration.push_back(Decl.m_Declaration[i]); } // Add additional D3D11_INPUT_ELEMENT_DESCs with the TEXCOORD semantic to the end of the vertex declaration to handle the per instance data uint32 texCoordSemanticIndexOffset = rRP.m_CurVFormat.GetAttributeUsageCount(AZ::Vertex::AttributeUsage::TexCoord); D3D11_INPUT_ELEMENT_DESC elemTC = {"TEXCOORD", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 3, 0, D3D11_INPUT_PER_INSTANCE_DATA, 1}; // texture for (i = 0; i < nUsedAttr; i++) { elemTC.AlignedByteOffset = i * INST_PARAM_SIZE; elemTC.SemanticIndex = Attributes[i] + texCoordSemanticIndexOffset; vd->m_Declaration.push_back(elemTC); } } if (!vd->m_pDeclaration) { HRESULT hr = S_OK; assert (CHWShader_D3D::s_pCurInstVS && CHWShader_D3D::s_pCurInstVS->m_pShaderData); if (FAILED(hr = GetDevice().CreateInputLayout(&vd->m_Declaration[0], vd->m_Declaration.size(), CHWShader_D3D::s_pCurInstVS->m_pShaderData, CHWShader_D3D::s_pCurInstVS->m_nDataSize, &vd->m_pDeclaration))) { return; } } if (m_pLastVDeclaration != vd->m_pDeclaration) { m_pLastVDeclaration = vd->m_pDeclaration; m_DevMan.BindVtxDecl(vd->m_pDeclaration); } } int nInsts = nLastInst - nStartInst + 1; { //PROFILE_FRAME(Draw_ShaderIndexMesh); int nPolysPerInst = rRP.m_RendNumIndices / 3; #ifndef _RELEASE char instanceLabel[64]; if (CV_r_geominstancingdebug) { snprintf(instanceLabel, 63, "Instances: %d", nInsts); PROFILE_LABEL_PUSH(instanceLabel); } #endif assert (rRP.m_pRE && rRP.m_pRE->mfGetType() == eDATA_Mesh); FX_Commit(); D3D11_PRIMITIVE_TOPOLOGY eTopology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; #ifdef TESSELLATION_RENDERER if (CHWShader_D3D::s_pCurInstHS) { FX_SetAdjacencyOffsetBuffer(); eTopology = D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST; } #endif SetPrimitiveTopology(eTopology); m_DevMan.DrawIndexedInstanced(rRP.m_RendNumIndices, nInsts, ApplyIndexBufferBindOffset(rRP.m_FirstIndex), 0, 0); #ifndef _RELEASE if (CV_r_geominstancingdebug) { PROFILE_LABEL_POP(instanceLabel); } #endif #if defined(ENABLE_PROFILING_CODE) int nPolysAll = nPolysPerInst * nInsts; rRP.m_PS[rRP.m_nProcessThreadID].m_nPolygons[rRP.m_nPassGroupDIP] += rRP.m_RendNumIndices / 3; rRP.m_PS[rRP.m_nProcessThreadID].m_nDIPs[rRP.m_nPassGroupDIP] += nInsts; rRP.m_PS[rRP.m_nProcessThreadID].m_nPolygons[rRP.m_nPassGroupDIP] += nPolysAll; rRP.m_PS[rRP.m_nProcessThreadID].m_nInsts += nInsts; rRP.m_PS[rRP.m_nProcessThreadID].m_nInstCalls++; #if defined(ENABLE_STATOSCOPE_RELEASE) rRP.m_PS[rRP.m_nProcessThreadID].m_RendHWInstancesPolysOne += nPolysPerInst; rRP.m_PS[rRP.m_nProcessThreadID].m_RendHWInstancesPolysAll += nPolysAll; rRP.m_PS[rRP.m_nProcessThreadID].m_NumRendHWInstances += nInsts; rRP.m_PS[rRP.m_nProcessThreadID].m_RendHWInstancesDIPs++; #endif #endif } } #define MAX_HWINST_PARAMS_CONST (240 - VSCONST_INSTDATA) // Draw geometry instances in single DIP using HW geom. instancing (StreamSourceFreq) void CD3D9Renderer::FX_DrawShader_InstancedHW(CShader* ef, SShaderPass* slw) { #if defined(HW_INSTANCING_ENABLED) PROFILE_FRAME(DrawShader_Instanced); SRenderPipeline& RESTRICT_REFERENCE rRP = m_RP; SThreadInfo& RESTRICT_REFERENCE rTI = rRP.m_TI[rRP.m_nProcessThreadID]; // Set culling mode if (!(rRP.m_FlagsPerFlush & RBSI_LOCKCULL)) { if (slw->m_eCull != -1) { D3DSetCull((ECull)slw->m_eCull); } } bool bProcessedAll = true; uint32 i; SCGBind bind; byte Attributes[32]; rRP.m_FlagsPerFlush |= RBSI_INSTANCED; TempDynInstVB vb(gcpRendD3D); uint32 nCurInst; byte* data = NULL; CShaderResources* pCurRes = rRP.m_pShaderResources; CShaderResources* pSaveRes = pCurRes; uint64 nRTFlags = rRP.m_FlagsShader_RT; uint64 nSaveRTFlags = nRTFlags; // batch further and send everything as if it's rotated (full 3x4 matrix), even if we could // just send position uint64* __restrict maskBit = g_HWSR_MaskBit; nRTFlags |= maskBit[HWSR_INSTANCING_ATTR]; if (CV_r_geominstancingdebug > 1) { // !DEBUG0 && !DEBUG1 && DEBUG2 && DEBUG3 nRTFlags &= ~(maskBit[HWSR_DEBUG0] | maskBit[HWSR_DEBUG1]); nRTFlags |= (maskBit[HWSR_DEBUG2] | maskBit[HWSR_DEBUG3]); } rRP.m_FlagsShader_RT = nRTFlags; if (CRenderer::CV_r_SlimGBuffer) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SLIM_GBUFFER]; } short dwCBufSlot = 0; CHWShader_D3D* vp = (CHWShader_D3D*)slw->m_VShader; CHWShader_D3D* ps = (CHWShader_D3D*)slw->m_PShader; // Set Pixel shader and all associated textures // Note: Need to set pixel shader first to properly set up modifiers for vertex shader (see ShaderCore.cpp & ModificatorTC.cfi) if (!ps->mfSet(HWSF_SETTEXTURES)) { rRP.m_FlagsShader_RT = nSaveRTFlags; rRP.m_pShaderResources = pSaveRes; rRP.m_PersFlags1 |= RBPF1_USESTREAM << 3; return; } // Set Vertex shader if (!vp->mfSet(HWSF_INSTANCED | HWSF_SETTEXTURES)) { rRP.m_FlagsShader_RT = nSaveRTFlags; rRP.m_pShaderResources = pSaveRes; rRP.m_PersFlags1 |= RBPF1_USESTREAM << 3; return; } CHWShader_D3D::SHWSInstance* pVPInst = vp->m_pCurInst; if (!pVPInst || pVPInst->m_bFallback || (ps->m_pCurInst && ps->m_pCurInst->m_bFallback)) { return; } CHWShader_D3D* curGS = (CHWShader_D3D*)slw->m_GShader; if (curGS) { curGS->mfSet(0); curGS->UpdatePerInstanceConstantBuffer(); } else { CHWShader_D3D::mfBindGS(nullptr, nullptr); } CHWShader_D3D* pCurHS, * pCurDS; bool bTessEnabled = FX_SetTessellationShaders(pCurHS, pCurDS, slw); vp->UpdatePerInstanceConstantBuffer(); ps->UpdatePerInstanceConstantBuffer(); #ifdef TESSELLATION_RENDERER CHWShader_D3D* curCS = (CHWShader_D3D*)slw->m_CShader; if (curCS) { curCS->mfSetCS(0); } else { CHWShader_D3D::mfBindCS(nullptr, nullptr); } if (pCurDS) { pCurDS->UpdatePerInstanceConstantBuffer(); } if (pCurHS) { pCurHS->UpdatePerInstanceConstantBuffer(); } #endif // VertexDeclaration of MeshInstance always starts with InstMatrix which has 3 vector4, that's why nUsedAttr is 3. int32 nUsedAttr = 3, nInstAttrMask = 0; pVPInst->GetInstancingAttribInfo(Attributes, nUsedAttr, nInstAttrMask); IRenderElement* pRE = NULL; CRenderMesh* pRenderMesh = NULL; const int nLastRE = rRP.m_nLastRE; for (int nRE = 0; nRE <= nLastRE; nRE++) { uint32 nRIs = rRP.m_RIs[nRE].size(); SRendItem** rRIs = &(rRP.m_RIs[nRE][0]); // don't process REs that don't make the cut for instancing. // these were batched with an instance-ready RE, so leave this to drop through into DrawBatch if (nRIs <= (uint32)CRenderer::m_iGeomInstancingThreshold) { bProcessedAll = false; continue; } CShaderResources* pRes = SRendItem::mfGetRes(rRIs[0]->SortVal); pRE = rRP.m_pRE = rRIs[0]->pElem; rRP.m_pCurObject = rRIs[0]->pObj; CREMeshImpl* __restrict pMesh = (CREMeshImpl*) pRE; pRE->mfPrepare(false); { if (pCurRes != pRes) { rRP.m_pShaderResources = pRes; CHWShader_D3D::UpdatePerMaterialConstantBuffer(); vp->UpdatePerBatchConstantBuffer(); if (vp->m_pCurInst) { vp->mfSetSamplers(vp->m_pCurInst->m_pSamplers, eHWSC_Vertex); } ps->UpdatePerBatchConstantBuffer(); if (ps->m_pCurInst) { ps->mfSetSamplers(ps->m_pCurInst->m_pSamplers, eHWSC_Pixel); } #ifdef TESSELLATION_RENDERER if (pCurDS && pCurDS->m_pCurInst) { pCurDS->mfSetSamplers(pCurDS->m_pCurInst->m_pSamplers, eHWSC_Domain); } #endif pCurRes = pRes; } if (pMesh->m_pRenderMesh != pRenderMesh) { // Create/Update video mesh (VB management) if (!pRE->mfCheckUpdate(rRP.m_FlagsStreams_Stream, rTI.m_nFrameUpdateID, bTessEnabled)) { rRP.m_FlagsShader_RT = nSaveRTFlags; rRP.m_pShaderResources = pSaveRes; rRP.m_PersFlags1 |= RBPF1_USESTREAM << 3; return; } pRenderMesh = pMesh->m_pRenderMesh; } { nCurInst = 0; // Detects possibility of using attributes based instancing // If number of used attributes exceed 16 we can't use attributes based instancing (switch to constant based) int nStreamMask = rRP.m_FlagsStreams_Stream >> 1; int nVFormat = rRP.m_CurVFormat.GetEnum(); uint32 nCO = 0; int nCI = 0; uint32 dwDeclarationSize = 0; if (dwDeclarationSize + nUsedAttr - 1 > 16) { iLog->LogWarning("WARNING: Attributes based instancing cannot exceed 16 attributes (%s uses %d attr. + %d vertex decl.attr.)[VF: %d, SM: 0x%x]", vp->GetName(), nUsedAttr, dwDeclarationSize - 1, nVFormat, nStreamMask); } else { while ((int)nCurInst < nRIs) { uint32 nLastInst = nRIs - 1; { uint32 nParamsPerInstAllowed = MAX_HWINST_PARAMS; if ((nLastInst - nCurInst + 1) * nUsedAttr >= nParamsPerInstAllowed) { nLastInst = nCurInst + (nParamsPerInstAllowed / nUsedAttr) - 1; } } byte* inddata = NULL; { vb.Allocate(nLastInst - nCurInst + 1, nUsedAttr * INST_PARAM_SIZE); data = (byte*) vb.Lock(); } CRenderObject* curObj = rRP.m_pCurObject; // 3 float4 = inst Matrix const AZ::u32 perInstanceStride = nUsedAttr * sizeof(float[4]); // Fill the stream 3 for per-instance data byte* pWalkData = data; for (i = nCurInst; i <= nLastInst; i++) { CRenderObject* renderObject = rRIs[nCO++]->pObj; rRP.m_pCurObject = renderObject; AzRHI::SIMDCopy(pWalkData, renderObject->m_II.m_Matrix.GetData(), 3); if (pVPInst->m_nParams_Inst >= 0) { SCGParamsGroup& Group = CGParamManager::s_Groups[pVPInst->m_nParams_Inst]; vp->UpdatePerInstanceConstants(eHWSC_Vertex, Group.pParams, Group.nParams, pWalkData); } pWalkData += perInstanceStride; } rRP.m_pCurObject = curObj; vb.Unlock(); // Set the first stream to be the indexed data and render N instances vb.Bind(3, nUsedAttr * INST_PARAM_SIZE); vb.Release(); FX_DrawInstances(ef, slw, nRE, nCurInst, nLastInst, nUsedAttr, data, nInstAttrMask, Attributes, dwCBufSlot); nCurInst = nLastInst + 1; } } } } } #ifdef TESSELLATION_RENDERER if (bTessEnabled) { CHWShader_D3D::mfBindDS(NULL, NULL); CHWShader_D3D::mfBindHS(NULL, NULL); } #endif rRP.m_PersFlags1 |= RBPF1_USESTREAM << 3; rRP.m_pShaderResources = pSaveRes; rRP.m_nCommitFlags = FC_ALL; rRP.m_FlagsShader_RT = nSaveRTFlags; rRP.m_nNumRendPasses++; if (!bProcessedAll) { FX_DrawBatches(ef, slw); } #else CryFatalError("HW Instancing not supported on this platform"); #endif } //#endif //==================================================================================== byte CD3D9Renderer::FX_StartQuery(SRendItem* pRI) { if (!CV_r_ConditionalRendering || !(m_RP.m_nBatchFilter & (FB_Z | FB_GENERAL))) { return 0; } #if !defined(NULL_RENDERER) if (m_RP.m_nBatchFilter & FB_Z) { if (m_OcclQueriesUsed >= MAX_OCCL_QUERIES) { return 0; } assert(pRI->nOcclQuery > MAX_OCCL_QUERIES); uint32 nQuery = m_OcclQueriesUsed; ++m_OcclQueriesUsed; COcclusionQuery* pQ = &m_OcclQueries[nQuery]; if (!pQ->IsCreated()) { pQ->Create(); } pQ->BeginQuery(); pRI->nOcclQuery = nQuery; #ifndef _RELEASE m_RP.m_PS[m_RP.m_nProcessThreadID].m_NumQIssued++; #endif return 1; } else { if (pRI->nOcclQuery >= MAX_OCCL_QUERIES || pRI->nOcclQuery < 0) { return 0; } COcclusionQuery* pQ = &m_OcclQueries[pRI->nOcclQuery]; #ifndef _RELEASE CTimeValue Time = iTimer->GetAsyncTime(); #endif uint32 nPixels = pQ->GetVisibleSamples(CV_r_ConditionalRendering == 2 ? false : true); #ifndef _RELEASE m_RP.m_PS[m_RP.m_nProcessThreadID].m_NumQStallTime += static_cast(iTimer->GetAsyncTime().GetMilliSeconds() - Time.GetMilliSeconds()); #endif bool bReady = pQ->IsReady(); if (!bReady) { #ifndef _RELEASE m_RP.m_PS[m_RP.m_nProcessThreadID].m_NumQNotReady++; #endif return 0; } if (nPixels == 0) { #ifndef _RELEASE m_RP.m_PS[m_RP.m_nProcessThreadID].m_NumQOccluded++; #endif return 2; } return 0; } #endif return 0; } void CD3D9Renderer::FX_EndQuery(SRendItem* pRI, byte bStartQ) { if (!bStartQ) { return; } assert(pRI->nOcclQuery < MAX_OCCL_QUERIES); COcclusionQuery* pQ = &m_OcclQueries[pRI->nOcclQuery]; pQ->EndQuery(); } void CD3D9Renderer::FX_DrawBatchesSkinned(CShader* pSh, SShaderPass* pPass, SSkinningData* pSkinningData) { DETAILED_PROFILE_MARKER("FX_DrawBatchesSkinned"); PROFILE_FRAME(DrawShader_BatchSkinned); SRenderPipeline& RESTRICT_REFERENCE rRP = m_RP; SThreadInfo& RESTRICT_REFERENCE rTI = rRP.m_TI[rRP.m_nProcessThreadID]; CHWShader_D3D* pCurVS = (CHWShader_D3D*)pPass->m_VShader; CHWShader_D3D* pCurPS = (CHWShader_D3D*)pPass->m_PShader; const int nThreadID = m_RP.m_nProcessThreadID; const int bRenderLog = CRenderer::CV_r_log; CREMeshImpl* pRE = (CREMeshImpl*) rRP.m_pRE; CRenderObject* const pSaveObj = rRP.m_pCurObject; size_t size = 0, offset = 0; CHWShader_D3D* pCurGS = (CHWShader_D3D*)pPass->m_GShader; CRenderMesh* pRenderMesh = pRE->m_pRenderMesh; rRP.m_nNumRendPasses++; rRP.m_RendNumGroup = 0; rRP.m_FlagsShader_RT &= ~g_HWSR_MaskBit[HWSR_VERTEX_VELOCITY]; if (pSkinningData->nHWSkinningFlags & eHWS_Skinning_Matrix) { rRP.m_FlagsShader_RT |= (g_HWSR_MaskBit[HWSR_SKINNING_MATRIX]); } else if (pSkinningData->nHWSkinningFlags & eHWS_Skinning_DQ_Linear) { rRP.m_FlagsShader_RT |= (g_HWSR_MaskBit[HWSR_SKINNING_DQ_LINEAR]); } else { rRP.m_FlagsShader_RT |= (g_HWSR_MaskBit[HWSR_SKINNING_DUAL_QUAT]); } bool bRes = pCurPS->mfSetPS(HWSF_SETTEXTURES); bRes &= pCurVS->mfSetVS(0); CHWShader_D3D* pCurHS, *pCurDS; bool bTessEnabled = FX_SetTessellationShaders(pCurHS, pCurDS, pPass); if (pCurGS) { bRes &= pCurGS->mfSetGS(0); } else { CHWShader_D3D::mfBindGS(NULL, NULL); } const uint numObjects = rRP.m_RIs[0].Num(); if (!bRes) { goto done; } if (CHWShader_D3D::s_pCurInstVS && CHWShader_D3D::s_pCurInstVS->m_bFallback) { goto done; } // Create/Update video mesh (VB management) if (!pRE->mfCheckUpdate(m_RP.m_FlagsStreams_Stream, rTI.m_nFrameUpdateID, bTessEnabled)) { goto done; } if (ShouldApplyFogCorrection()) { FX_FogCorrection(); } // Unlock all VB (if needed) and set current streams if (!FX_CommitStreams(pPass)) { goto done; } for (uint nObj = 0; nObj < numObjects; ++nObj) { CRenderObject* pObject = rRP.m_RIs[0][nObj]->pObj; rRP.m_pCurObject = pObject; #ifdef DO_RENDERSTATS if (FX_ShouldTrackStats()) { FX_TrackStats(pObject, pRE->m_pRenderMesh); } #endif #ifdef DO_RENDERLOG if (bRenderLog >= 3) { Vec3 vPos = pObject->GetTranslation(); Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], "+++ HWSkin Group Pass %d (Obj: %d [%.3f, %.3f, %.3f])\n", m_RP.m_nNumRendPasses, pObject->m_Id, vPos[0], vPos[1], vPos[2]); } #endif pCurVS->UpdatePerInstanceConstantBuffer(); pCurPS->UpdatePerInstanceConstantBuffer(); if (pCurGS) { pCurGS->UpdatePerInstanceConstantBuffer(); } else { CHWShader_D3D::mfBindGS(NULL, NULL); } #ifdef TESSELLATION_RENDERER if (pCurDS) { pCurDS->UpdatePerInstanceConstantBuffer(); } else { CHWShader_D3D::mfBindDS(NULL, NULL); } if (pCurHS) { pCurHS->UpdatePerInstanceConstantBuffer(); } else { CHWShader_D3D::mfBindHS(NULL, NULL); } #endif AzRHI::ConstantBuffer* pBuffer[2] = { NULL }; SRenderObjData* pOD = pObject->GetObjData(); assert(pOD); if (pOD) { SSkinningData* const skinningData = pOD->m_pSkinningData; if (!pRE->BindRemappedSkinningData(skinningData->remapGUID)) { continue; } pBuffer[0] = alias_cast(skinningData->pCharInstCB)->m_buffer; #ifdef CRY_USE_METAL // Buffer is sometimes null... binding a null skinned VB will fail on METAL if (!pBuffer[0]) { continue; } #endif // get previous data for motion blur if available if (skinningData->pPreviousSkinningRenderData) { pBuffer[1] = alias_cast(skinningData->pPreviousSkinningRenderData->pCharInstCB)->m_buffer; } } else { continue; } #ifndef _RELEASE rRP.m_PS[nThreadID].m_NumRendSkinnedObjects++; #endif m_PerInstanceConstantBufferPool.SetConstantBuffer(rRP.m_RIs[0][nObj]); m_DevMan.BindConstantBuffer(eHWSC_Vertex, pBuffer[0], eConstantBufferShaderSlot_SkinQuat); m_DevMan.BindConstantBuffer(eHWSC_Vertex, pBuffer[1], eConstantBufferShaderSlot_SkinQuatPrev); { DETAILED_PROFILE_MARKER("DrawSkinned"); if (rRP.m_pRE) { rRP.m_pRE->mfDraw(pSh, pPass); } else { FX_DrawIndexedMesh(pRenderMesh ? pRenderMesh->GetPrimitiveType() : eptTriangleList); } } } done: m_DevMan.BindConstantBuffer(eHWSC_Vertex, nullptr, eConstantBufferShaderSlot_SkinQuat); m_DevMan.BindConstantBuffer(eHWSC_Vertex, nullptr, eConstantBufferShaderSlot_SkinQuatPrev); rRP.m_FlagsShader_MD &= ~HWMD_TEXCOORD_FLAG_MASK; rRP.m_pCurObject = pSaveObj; #ifdef TESSELLATION_RENDERER if (bTessEnabled) { CHWShader_D3D::mfBindDS(NULL, NULL); CHWShader_D3D::mfBindHS(NULL, NULL); } #endif rRP.m_RendNumGroup = -1; } #if defined(DO_RENDERSTATS) void CD3D9Renderer::FX_TrackStats(CRenderObject* pObj, IRenderMesh* pRenderMesh) { SRenderPipeline& RESTRICT_REFERENCE rRP = m_RP; #if !defined(_RELEASE) if (pObj) { if (IRenderNode* pRenderNode = (IRenderNode*)pObj->m_pRenderNode) { //Add to per node map for r_stats 6 if (CV_r_stats == 6 || m_pDebugRenderNode || m_bCollectDrawCallsInfoPerNode) { IRenderer::RNDrawcallsMapNode& drawCallsInfoPerNode = rRP.m_pRNDrawCallsInfoPerNode[m_RP.m_nProcessThreadID]; IRenderer::RNDrawcallsMapNodeItor pItor = drawCallsInfoPerNode.find(pRenderNode); if (pItor != drawCallsInfoPerNode.end()) { SDrawCallCountInfo& pInfoDP = pItor->second; pInfoDP.Update(pObj, pRenderMesh); } else { SDrawCallCountInfo pInfoDP; pInfoDP.Update(pObj, pRenderMesh); drawCallsInfoPerNode.insert(IRenderer::RNDrawcallsMapNodeItor::value_type(pRenderNode, pInfoDP)); } } //Add to per mesh map for perfHUD / Statoscope if (m_bCollectDrawCallsInfo) { IRenderer::RNDrawcallsMapMesh& drawCallsInfoPerMesh = rRP.m_pRNDrawCallsInfoPerMesh[m_RP.m_nProcessThreadID]; IRenderer::RNDrawcallsMapMeshItor pItor = drawCallsInfoPerMesh.find(pRenderMesh); if (pItor != drawCallsInfoPerMesh.end()) { SDrawCallCountInfo& pInfoDP = pItor->second; pInfoDP.Update(pObj, pRenderMesh); } else { SDrawCallCountInfo pInfoDP; pInfoDP.Update(pObj, pRenderMesh); drawCallsInfoPerMesh.insert(IRenderer::RNDrawcallsMapMeshItor::value_type(pRenderMesh, pInfoDP)); } } } } #endif } #endif bool CD3D9Renderer::FX_SetTessellationShaders(CHWShader_D3D*& pCurHS, CHWShader_D3D*& pCurDS, const SShaderPass* pPass) { SRenderPipeline& RESTRICT_REFERENCE rRP = m_RP; #ifdef TESSELLATION_RENDERER pCurHS = (CHWShader_D3D*)pPass->m_HShader; pCurDS = (CHWShader_D3D*)pPass->m_DShader; bool bTessEnabled = (pCurHS != NULL) && (pCurDS != NULL) && !(rRP.m_pCurObject->m_ObjFlags & FOB_NEAREST) && (rRP.m_pCurObject->m_ObjFlags & FOB_ALLOW_TESSELLATION); #ifndef MOTIONBLUR_TESSELLATION bTessEnabled &= !(rRP.m_PersFlags2 & RBPF2_MOTIONBLURPASS); #endif if (bTessEnabled && pCurHS->mfSetHS(0) && pCurDS->mfSetDS(HWSF_SETTEXTURES)) { if (CV_r_tessellationdebug == 1) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_DEBUG1]; } return true; } CHWShader_D3D::mfBindHS(NULL, NULL); CHWShader_D3D::mfBindDS(NULL, NULL); #endif rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_NO_TESSELLATION]; pCurHS = NULL; pCurDS = NULL; return false; } #ifdef TESSELLATION_RENDERER void CD3D9Renderer::FX_SetAdjacencyOffsetBuffer() { #ifdef MESH_TESSELLATION_RENDERER if (m_RP.m_pRE && m_RP.m_pRE->mfGetType() == eDATA_Mesh) { CREMeshImpl* pMesh = (CREMeshImpl*) m_RP.m_pRE; // this buffer contains offset HS has to apply to SV_PrimitiveID it gets from HW. we need this because // sometimes we do not start rendering from the beginning of index buffer // AI AndreyK: probably texture buffer has to be replayed by per-instance constant m_DevMan.BindSRV(eHWSC_Hull, pMesh->m_tessCB.GetShaderResourceView(), 15); } else { m_DevMan.BindSRV(eHWSC_Hull, NULL, 15); } #endif } #endif //#ifdef TESSELLATION_RENDERER void CD3D9Renderer::FX_DrawBatches(CShader* pSh, SShaderPass* pPass) { DETAILED_PROFILE_MARKER("FX_DrawBatches"); FUNCTION_PROFILER_RENDER_FLAT SRenderPipeline& RESTRICT_REFERENCE rRP = m_RP; SThreadInfo& RESTRICT_REFERENCE rTI = rRP.m_TI[rRP.m_nProcessThreadID]; // Set culling mode if (!((rRP.m_FlagsPerFlush & RBSI_LOCKCULL) || (m_RP.m_PersFlags2 & RBPF2_LIGHTSHAFTS))) { if (pPass->m_eCull != -1) { D3DSetCull((ECull)pPass->m_eCull); } } bool bHWSkinning = FX_SetStreamFlags(pPass); SSkinningData* pSkinningData = NULL; if (bHWSkinning) { SRenderObjData* pOD = rRP.m_pCurObject->GetObjData(); if (!pOD || !(pSkinningData = pOD->m_pSkinningData)) { bHWSkinning = false; Warning("Warning: Skinned geometry used without character instance"); } } IF(bHWSkinning && (rRP.m_pCurObject->m_ObjFlags & FOB_SKINNED) && !CV_r_character_nodeform, 0) { FX_DrawBatchesSkinned(pSh, pPass, pSkinningData); } else { DETAILED_PROFILE_MARKER("FX_DrawBatchesStatic"); // Set shaders bool bRes = true; const int rStats = CV_r_stats; const int rLog = CV_r_log; CHWShader_D3D* pCurGS = (CHWShader_D3D*)pPass->m_GShader; if (pCurGS) { bRes &= pCurGS->mfSetGS(0); } else { CHWShader_D3D::mfBindGS(NULL, NULL); } CHWShader_D3D* pCurVS = (CHWShader_D3D*)pPass->m_VShader; CHWShader_D3D* pCurPS = (CHWShader_D3D*)pPass->m_PShader; bRes &= pCurPS->mfSetPS(HWSF_SETTEXTURES); bRes &= pCurVS->mfSetVS(HWSF_SETTEXTURES); CHWShader_D3D* pCurHS, * pCurDS; bool bTessEnabled = FX_SetTessellationShaders(pCurHS, pCurDS, pPass); if (bRes) { if (ShouldApplyFogCorrection()) { FX_FogCorrection(); } assert(rRP.m_pRE || !rRP.m_nLastRE); IRenderElement* pRE = rRP.m_pRE; IRenderElement* pRESave = pRE; CRenderObject* pSaveObj = rRP.m_pCurObject; CShaderResources* pCurRes = rRP.m_pShaderResources; CShaderResources* pSaveRes = pCurRes; for (int nRE = 0; nRE <= rRP.m_nLastRE; nRE++) { TArray& rRIs = rRP.m_RIs[nRE]; if (!(rRP.m_FlagsPerFlush & RBSI_INSTANCED) || rRIs.size() <= (unsigned)CRenderer::m_iGeomInstancingThreshold) { if (pRE) { // Check the material for this object and make sure that it is actually supposed to be casting a shadow. const bool isShadowPass = m_RP.m_nPassGroupID == EFSLIST_SHADOW_GEN; int objectMaterialID = rRIs[0]->pElem->mfGetMatId(); if (isShadowPass && (objectMaterialID != -1)) { if (rRIs[0]->pObj->m_pCurrMaterial && (rRIs[0]->pObj->m_pCurrMaterial->GetSafeSubMtl(objectMaterialID)->GetFlags() & MTL_FLAG_NOSHADOW)) { continue; } } pRE = rRP.m_pRE = rRIs[0]->pElem; rRP.m_pCurObject = rRIs[0]->pObj; CShaderResources* pRes = (rRP.m_PersFlags2 & RBPF2_MATERIALLAYERPASS) ? rRP.m_pShaderResources : SRendItem::mfGetRes(rRIs[0]->SortVal); uint32 nFrameID = rTI.m_nFrameUpdateID; if (!pRE->mfCheckUpdate(rRP.m_FlagsStreams_Stream | 0x80000000, nFrameID, bTessEnabled)) { continue; } if (nRE || rRP.m_nNumRendPasses || pCurRes != pRes) // Only static meshes (CREMeshImpl) can use geom batching { rRP.m_pShaderResources = pRes; CHWShader_D3D::UpdatePerMaterialConstantBuffer(); pRE->mfPrepare(false); CREMeshImpl* pM = (CREMeshImpl*)pRE; if (pM->m_CustomData || pCurRes != pRes) // Custom data can indicate some shader parameters are from mesh { pCurVS->UpdatePerBatchConstantBuffer(); pCurPS->UpdatePerBatchConstantBuffer(); if (pCurPS->m_pCurInst) { pCurPS->mfSetSamplers(pCurPS->m_pCurInst->m_pSamplers, eHWSC_Pixel); } if (pCurVS->m_pCurInst) { pCurVS->mfSetSamplers(pCurVS->m_pCurInst->m_pSamplers, eHWSC_Vertex); } #ifdef TESSELLATION_RENDERER if (pCurDS && pCurDS->m_pCurInst) { pCurDS->mfSetSamplers(pCurDS->m_pCurInst->m_pSamplers, eHWSC_Domain); } #endif pCurRes = pRes; } } } rRP.m_nNumRendPasses++; // Unlock all VBs (if needed) and bind current streams if (FX_CommitStreams(pPass)) { uint32 nO; const uint32 nNumRI = rRIs.Num(); CRenderObject* pObj = NULL; CRenderObject::SInstanceInfo* pI; #ifdef DO_RENDERSTATS if ((CV_r_stats == 6 || m_pDebugRenderNode || m_bCollectDrawCallsInfo)) { for (nO = 0; nO < nNumRI; nO++) { pObj = rRIs[nO]->pObj; IRenderElement* pElemBase = rRIs[nO]->pElem; if (pElemBase->mfGetType() == eDATA_Mesh) { CREMeshImpl* pMesh = (CREMeshImpl*)pElemBase; IRenderMesh* pRenderMesh = pMesh ? pMesh->m_pRenderMesh : NULL; FX_TrackStats(pObj, pRenderMesh); } } } #endif for (nO = 0; nO < nNumRI; ++nO) { pObj = rRIs[nO]->pObj; rRP.m_pCurObject = pObj; pI = &pObj->m_II; byte bStartQ = FX_StartQuery(rRIs[nO]); if (bStartQ == 2) { continue; } #ifdef DO_RENDERLOG if (rLog >= 3) { Vec3 vPos = pObj->GetTranslation(); Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], "+++ General Pass %d (Obj: %d [%.3f, %.3f, %.3f], %.3f)\n", m_RP.m_nNumRendPasses, pObj->m_Id, vPos[0], vPos[1], vPos[2], pObj->m_fDistance); } #endif pCurVS->UpdatePerInstanceConstantBuffer(); pCurPS->UpdatePerInstanceConstantBuffer(); if (pCurGS) { pCurGS->UpdatePerInstanceConstantBuffer(); } else { CHWShader_D3D::mfBindGS(NULL, NULL); } #ifdef TESSELLATION_RENDERER if (pCurDS) { pCurDS->UpdatePerInstanceConstantBuffer(); } if (pCurHS) { pCurHS->UpdatePerInstanceConstantBuffer(); } #endif AZ_Assert(rRIs[nO], "current render item is null"); m_PerInstanceConstantBufferPool.SetConstantBuffer(rRIs[nO]); { if (pRE) { pRE->mfDraw(pSh, pPass); } else { FX_DrawIndexedMesh(eptTriangleList); } } m_RP.m_nCommitFlags &= ~(FC_TARGETS | FC_GLOBAL_PARAMS); FX_EndQuery(rRIs[nO], bStartQ); } rRP.m_FlagsShader_MD &= ~(HWMD_TEXCOORD_FLAG_MASK); if (pRE) { pRE->mfClearFlags(FCEF_PRE_DRAW_DONE); } } rRP.m_pCurObject = pSaveObj; rRP.m_pRE = pRESave; rRP.m_pShaderResources = pSaveRes; } } } #ifdef TESSELLATION_RENDERER if (bTessEnabled) { CHWShader_D3D::mfBindHS(NULL, NULL); CHWShader_D3D::mfBindDS(NULL, NULL); } #endif } m_RP.m_nCommitFlags = FC_ALL; } //============================================================================================ void CD3D9Renderer::FX_DrawShader_General(CShader* ef, SShaderTechnique* pTech) { SShaderPass* slw; int32 i; PROFILE_FRAME(DrawShader_Generic); SThreadInfo& RESTRICT_REFERENCE rTI = m_RP.m_TI[m_RP.m_nProcessThreadID]; EF_Scissor(false, 0, 0, 0, 0); if (pTech->m_Passes.Num()) { slw = &pTech->m_Passes[0]; const int nCount = pTech->m_Passes.Num(); uint32 curPassBit = 1; for (i = 0; i < nCount; i++, slw++, (curPassBit <<= 1)) { m_RP.m_pCurPass = slw; // Set all textures and HW TexGen modes for the current pass (ShadeLayer) assert (slw->m_VShader && slw->m_PShader); if (!slw->m_VShader || !slw->m_PShader || (curPassBit & m_RP.m_CurPassBitMask)) { continue; } FX_CommitStates(pTech, slw, (slw->m_PassFlags & SHPF_NOMATSTATE) == 0); bool bSkinned = (m_RP.m_pCurObject->m_ObjFlags & FOB_SKINNED) && !CV_r_character_nodeform; bSkinned |= FX_SetStreamFlags(slw); if (m_RP.m_FlagsPerFlush & RBSI_INSTANCED && !bSkinned) { // Using HW geometry instancing approach FX_DrawShader_InstancedHW(ef, slw); } else { FX_DrawBatches(ef, slw); } } } } void CD3D9Renderer::FX_DrawShader_Fur(CShader* ef, SShaderTechnique* pTech) { static CCryNameTSCRC techFurZPost("FurZPost"); FurPasses& furPasses = FurPasses::GetInstance(); bool isFurZPost = (pTech->m_NameCRC == techFurZPost); furPasses.SetFurShellPassPercent(isFurZPost ? 1.0f : 0.0f); // Fur should be rendered with an object containing a render node. // Example of objects without render node are various effects such as light beams // light arc that their material was set to Fur by mistake - in such case we gracefully don't render ;) // Adding a trace warning is an option but it'll slow down the render frame quite noticeably. if (!m_RP.m_pCurObject || !m_RP.m_pCurObject->m_pRenderNode) return; static CCryNameTSCRC techFurShell("General"); if (pTech->m_NameCRC == techFurShell) { PROFILE_FRAME(DrawShader_Fur); SThreadInfo& RESTRICT_REFERENCE rTI = m_RP.m_TI[m_RP.m_nProcessThreadID]; EF_Scissor(false, 0, 0, 0, 0); int recurseLevel = SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID]; // Skip fur shells for recursive passes FurPasses::RenderMode furRenderMode = furPasses.GetFurRenderingMode(); if (pTech->m_Passes.Num() && furRenderMode != FurPasses::RenderMode::None && CV_r_FurShellPassCount > 0 && recurseLevel == 0) { uint64 nSavedFlags = m_RP.m_FlagsShader_RT; uint32 nSavedStateAnd = m_RP.m_ForceStateAnd; furPasses.ApplyFurDebugFlags(); if (furRenderMode == FurPasses::RenderMode::AlphaTested) { m_RP.m_ForceStateAnd |= GS_BLALPHA_MASK | GS_BLEND_MASK; m_RP.m_FlagsShader_RT &= ~g_HWSR_MaskBit[HWSR_ADDITIVE_BLENDING]; // Ensure that alpha testing is set up for alpha tested fur shells, even if not specified in the // material, by forcing a minimum alpha test of 0.01. This allows fur materials that do not // specify alpha testing to appear similar to alpha blended fur, but materials that control // alpha testing still benefit from their settings. FX_SetAlphaTestState(max(0.01f, m_RP.m_pShaderResources->GetAlphaRef())); } else if (furRenderMode == FurPasses::RenderMode::AlphaBlended) { // Even if the material specifies alpha testing, don't write depth for alpha blended fur shells m_RP.m_ForceStateAnd |= GS_DEPTHWRITE; } // OIT permutation flag set MultiLayerAlphaBlendPass::GetInstance().ConfigureShaderFlags(m_RP.m_FlagsShader_RT); assert(pTech->m_Passes.Num() == 1); SShaderPass* slw = &pTech->m_Passes[0]; m_RP.m_pCurPass = slw; // Set all textures and HW TexGen modes for the current pass (ShadeLayer) assert (slw->m_VShader && slw->m_PShader); if (slw->m_VShader && slw->m_PShader) { FX_CommitStates(pTech, slw, (slw->m_PassFlags & SHPF_NOMATSTATE) == 0); bool bSkinned = (m_RP.m_pCurObject->m_ObjFlags & FOB_SKINNED) && !CV_r_character_nodeform; bSkinned |= FX_SetStreamFlags(slw); int startShell = 1; int endShell = CV_r_FurShellPassCount; int numShellPasses = CV_r_FurShellPassCount; if ((m_RP.m_FlagsShader_RT & g_HWSR_MaskBit[HWSR_HDR_MODE]) == 0) { // For aux views such as the material editor, draw the base surface, as it is not captured by Z pass there startShell = 0; } if (CV_r_FurDebugOneShell > 0 && CV_r_FurDebugOneShell <= CV_r_FurShellPassCount) { startShell = CV_r_FurDebugOneShell; endShell = startShell; } else if (IRenderNode* pRenderNode = m_RP.m_pCurObject->m_pRenderNode) { // Scale number of shell passes by object's distance to camera and LOD ratio float lodRatio = pRenderNode->GetLodRatioNormalized(); if (lodRatio > 0.0f) { static ICVar* pTargetSize = gEnv->pConsole->GetCVar("e_LodFaceAreaTargetSize"); if (pTargetSize) { lodRatio *= pTargetSize->GetFVal(); } // Not using pRenderNode->GetMaxViewDist() because we want to be able to LOD out the fur while still being able to see the object at distance float maxDistance = CV_r_FurMaxViewDist * pRenderNode->GetViewDistanceMultiplier(); float firstLodDistance = pRenderNode->GetFirstLodDistance(); float lodDistance = AZ::GetClamp(firstLodDistance / lodRatio, 0.0f, maxDistance - 0.001f); // Distance before first LOD change (factoring in LOD ratio) uses full number of shells // Beyond that distance, number of shells linearly decreases to 0 as distance approaches max view distance. float distance = m_RP.m_pCurObject->m_fDistance; float distanceRatio = (maxDistance - distance) / (maxDistance - lodDistance); float clampedDistanceRatio = AZ::GetClamp(distanceRatio, 0.0f, 1.0f); endShell = aznumeric_cast(endShell * clampedDistanceRatio); numShellPasses = endShell; } } numShellPasses = AZ::GetMax(numShellPasses, 1); for (int i = startShell; i <= endShell; ++i) { // Set shell distance from base surface in fur params furPasses.SetFurShellPassPercent(static_cast(i) / numShellPasses); if (m_RP.m_FlagsPerFlush & RBSI_INSTANCED && !bSkinned) { // Using HW geometry instancing approach FX_DrawShader_InstancedHW(ef, slw); } else { FX_DrawBatches(ef, slw); } } } m_RP.m_ForceStateAnd = nSavedStateAnd; m_RP.m_FlagsShader_RT = nSavedFlags; } } else { uint64 nSavedFlags = m_RP.m_FlagsShader_RT; static CCryNameTSCRC techFurShadow("FurShadowGen"); if (pTech->m_NameCRC == techFurShadow) { m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_GPU_PARTICLE_TURBULENCE]; // Indicates fin pass if (CV_r_FurFinShadowPass == 0 || furPasses.GetFurRenderingMode() == FurPasses::RenderMode::None) { m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_GPU_PARTICLE_SHADOW_PASS]; // Indicates fins should be skipped in shadow pass } } // All other techniques use the normal path FX_DrawShader_General(ef, pTech); m_RP.m_FlagsShader_RT = nSavedFlags; } } void CD3D9Renderer::FX_DrawDebugPasses() { if (!m_RP.m_pRootTechnique || m_RP.m_pRootTechnique->m_nTechnique[TTYPE_DEBUG] < 0) { return; } CShader* sh = m_RP.m_pShader; SShaderTechnique* pTech = m_RP.m_pShader->m_HWTechniques[m_RP.m_pRootTechnique->m_nTechnique[TTYPE_DEBUG]]; PROFILE_FRAME(DrawShader_DebugPasses); PROFILE_LABEL_SCOPE("DEBUG_PASS"); int nLastRE = m_RP.m_nLastRE; m_RP.m_nLastRE = 0; for (int nRE = 0; nRE <= nLastRE; nRE++) { s_tempRIs.SetUse(0); m_RP.m_pRE = m_RP.m_RIs[nRE][0]->pElem; if (!m_RP.m_pRE) { continue; } for (uint32 i = 0; i < m_RP.m_RIs[nRE].Num(); i++) { s_tempRIs.AddElem(m_RP.m_RIs[nRE][i]); } if (!s_tempRIs.Num()) { continue; } m_RP.m_pRE->mfPrepare(false); uint32 nSaveMD = m_RP.m_FlagsShader_MD; TArray saveArr; saveArr.Assign(m_RP.m_RIs[0]); m_RP.m_RIs[0].Assign(s_tempRIs); CRenderObject* pSaveObject = m_RP.m_pCurObject; m_RP.m_pCurObject = m_RP.m_RIs[0][0]->pObj; m_RP.m_FlagsShader_MD &= ~HWMD_TEXCOORD_FLAG_MASK; int32 nMaterialStatePrevOr = m_RP.m_MaterialStateOr; int32 nMaterialStatePrevAnd = m_RP.m_MaterialStateAnd; m_RP.m_MaterialStateAnd = GS_BLEND_MASK; m_RP.m_MaterialStateOr = GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA; FX_DrawTechnique(sh, pTech); m_RP.m_RIs[0].Assign(saveArr); saveArr.ClearArr(); m_RP.m_pCurObject = pSaveObject; m_RP.m_pPrevObject = NULL; m_RP.m_FlagsShader_MD = nSaveMD; m_RP.m_MaterialStateOr = nMaterialStatePrevOr; m_RP.m_MaterialStateAnd = nMaterialStatePrevAnd; } m_RP.m_nLastRE = nLastRE; } // deprecated (cannot remove at this stage) - maybe can batch into FX_DrawEffectLayerPasses (?) void CD3D9Renderer::FX_DrawMultiLayers() { // Verify if current mesh has valid data for layers CREMeshImpl* pRE = (CREMeshImpl*) m_RP.m_pRE; if (!m_RP.m_pShader || !m_RP.m_pShaderResources || !m_RP.m_pCurObject->m_nMaterialLayers) { return; } _smart_ptr pObjMat = m_RP.m_pCurObject->m_pCurrMaterial; if ((SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID] > 0) || !m_RP.m_pShaderResources || !pObjMat) { return; } if (m_RP.m_PersFlags2 & (RBPF2_CUSTOM_RENDER_PASS | RBPF2_MOTIONBLURPASS)) { return; } CRenderChunk* pChunk = pRE->m_pChunk; if (!pChunk) { assert(pChunk); return; } // Check if chunk material has layers at all _smart_ptr pDefaultMtl = gEnv->p3DEngine->GetMaterialManager()->GetDefaultLayersMaterial(); _smart_ptr pCurrMtl = pObjMat->GetSubMtlCount() ? pObjMat->GetSubMtl(pChunk->m_nMatID) : pObjMat; if (!pCurrMtl || !pDefaultMtl || (pCurrMtl->GetFlags() & MTL_FLAG_NODRAW)) { return; } uint32 nLayerCount = pDefaultMtl->GetLayerCount(); if (!nLayerCount) { return; } // Start multi-layers processing PROFILE_FRAME(DrawShader_MultiLayers); if (m_logFileHandle != AZ::IO::InvalidHandle) { Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], "*** Start Multilayers processing ***\n"); } for (int nRE = 0; nRE <= m_RP.m_nLastRE; nRE++) { m_RP.m_pRE = m_RP.m_RIs[nRE][0]->pElem; // Render all layers for (uint32 nCurrLayer(0); nCurrLayer < nLayerCount; ++nCurrLayer) { IMaterialLayer* pLayer = const_cast< IMaterialLayer* >(pCurrMtl->GetLayer(nCurrLayer)); IMaterialLayer* pDefaultLayer = const_cast< IMaterialLayer* >(pDefaultMtl->GetLayer(nCurrLayer)); bool bDefaultLayer = false; if (!pLayer) { // Replace with default layer pLayer = pDefaultLayer; bDefaultLayer = true; if (!pLayer) { continue; } } if (!pLayer->IsEnabled() || pLayer->DoesFadeOut()) { continue; } // Set/verify layer shader technique SShaderItem& pCurrShaderItem = pLayer->GetShaderItem(); CShader* pSH = static_cast(pCurrShaderItem.m_pShader); if (!pSH || pSH->m_HWTechniques.empty()) { continue; } SShaderTechnique* pTech = pSH->m_HWTechniques[0]; if (!pTech) { continue; } // Re-create render object list, based on layer properties { s_tempRIs.SetUse(0); float fDistToCam = 500.0f; float fDist = CV_r_detaildistance; CRenderObject* pObj = m_RP.m_pCurObject; uint32 nObj = 0; for (nObj = 0; nObj < m_RP.m_RIs[nRE].Num(); nObj++) { pObj = m_RP.m_RIs[nRE][nObj]->pObj; uint8 nMaterialLayers = 0; nMaterialLayers |= ((pObj->m_nMaterialLayers & MTL_LAYER_BLEND_DYNAMICFROZEN)) ? MTL_LAYER_FROZEN : 0; if (nMaterialLayers & (1 << nCurrLayer)) { s_tempRIs.AddElem(m_RP.m_RIs[nRE][nObj]); } } // nothing in render list if (!s_tempRIs.Num()) { continue; } } SShaderItem& pMtlShaderItem = pCurrMtl->GetShaderItem(); int nSaveLastRE = m_RP.m_nLastRE; m_RP.m_nLastRE = 0; TexturesResourcesMap pPrevLayerResourceTex; // A map of texture used by the shader if (bDefaultLayer) { // Keep layer resources and replace with resources from base shader pPrevLayerResourceTex = ((CShaderResources*)pCurrShaderItem.m_pShaderResources)->m_TexturesResourcesMap; ((CShaderResources*)pCurrShaderItem.m_pShaderResources)->m_TexturesResourcesMap = m_RP.m_pShaderResources->m_TexturesResourcesMap; } m_RP.m_pRE->mfPrepare(false); // Store current rendering data TArray pPrevRenderObjLst; pPrevRenderObjLst.Assign(m_RP.m_RIs[0]); CRenderObject* pPrevObject = m_RP.m_pCurObject; CShaderResources* pPrevShaderResources = m_RP.m_pShaderResources; CShader* pPrevSH = m_RP.m_pShader; uint32 nPrevNumRendPasses = m_RP.m_nNumRendPasses; uint64 nFlagsShaderRTprev = m_RP.m_FlagsShader_RT; SShaderTechnique* pPrevRootTech = m_RP.m_pRootTechnique; m_RP.m_pRootTechnique = pTech; int nMaterialStatePrevOr = m_RP.m_MaterialStateOr; int nMaterialStatePrevAnd = m_RP.m_MaterialStateAnd; uint32 nFlagsShaderLTprev = m_RP.m_FlagsShader_LT; int nPersFlagsPrev = m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags; int nPersFlags2Prev = m_RP.m_PersFlags2; int nMaterialAlphaRefPrev = m_RP.m_MaterialAlphaRef; bool bIgnoreObjectAlpha = m_RP.m_bIgnoreObjectAlpha; m_RP.m_bIgnoreObjectAlpha = true; m_RP.m_pShader = pSH; m_RP.m_RIs[0].Assign(s_tempRIs); m_RP.m_pCurObject = m_RP.m_RIs[0][0]->pObj; m_RP.m_pPrevObject = NULL; m_RP.m_pShaderResources = (CShaderResources*)pCurrShaderItem.m_pShaderResources; // Reset light passes (need ambient) m_RP.m_nNumRendPasses = 0; m_RP.m_PersFlags2 |= RBPF2_MATERIALLAYERPASS; if ((1 << nCurrLayer) & MTL_LAYER_FROZEN) { m_RP.m_MaterialStateAnd = GS_BLEND_MASK | GS_ALPHATEST_MASK; m_RP.m_MaterialStateOr = GS_BLSRC_ONE | GS_BLDST_ONE; m_RP.m_MaterialAlphaRef = 0xff; } m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SAMPLE4]; FX_DrawTechnique(pSH, pTech); // Restore previous rendering data m_RP.m_RIs[0].Assign(pPrevRenderObjLst); pPrevRenderObjLst.ClearArr(); m_RP.m_pShader = pPrevSH; m_RP.m_pShaderResources = pPrevShaderResources; m_RP.m_pCurObject = pPrevObject; m_RP.m_pPrevObject = NULL; m_RP.m_PersFlags2 = nPersFlags2Prev; m_RP.m_nLastRE = nSaveLastRE; m_RP.m_nNumRendPasses = nPrevNumRendPasses; m_RP.m_FlagsShader_LT = nFlagsShaderLTprev; m_RP.m_TI[m_RP.m_nProcessThreadID].m_PersFlags = nPersFlagsPrev; m_RP.m_FlagsShader_RT = nFlagsShaderRTprev; m_RP.m_nNumRendPasses = 0; m_RP.m_pRootTechnique = pPrevRootTech; m_RP.m_bIgnoreObjectAlpha = bIgnoreObjectAlpha; m_RP.m_MaterialStateOr = nMaterialStatePrevOr; m_RP.m_MaterialStateAnd = nMaterialStatePrevAnd; m_RP.m_MaterialAlphaRef = nMaterialAlphaRefPrev; if (bDefaultLayer) { // restore from the base layer ((CShaderResources*)pCurrShaderItem.m_pShaderResources)->m_TexturesResourcesMap = pPrevLayerResourceTex; } } } m_RP.m_pRE = pRE; if (m_logFileHandle != AZ::IO::InvalidHandle) { Logv(SRendItem::m_RecurseLevel[m_RP.m_nProcessThreadID], "*** End Multilayers processing ***\n"); } } void CD3D9Renderer::FX_SelectTechnique(CShader* pShader, SShaderTechnique* pTech) { SShaderTechniqueStat Stat; Stat.pTech = pTech; Stat.pShader = pShader; if (pTech->m_Passes.Num()) { SShaderPass* pPass = &pTech->m_Passes[0]; if (pPass->m_PShader && pPass->m_VShader) { Stat.pVS = (CHWShader_D3D*)pPass->m_VShader; Stat.pPS = (CHWShader_D3D*)pPass->m_PShader; Stat.pVSInst = Stat.pVS->m_pCurInst; Stat.pPSInst = Stat.pPS->m_pCurInst; g_SelectedTechs.push_back(Stat); } } } void CD3D9Renderer::FX_DrawTechnique(CShader* ef, SShaderTechnique* pTech) { FUNCTION_PROFILER_RENDER_FLAT switch (ef->m_eSHDType) { case eSHDT_General: FX_DrawShader_General(ef, pTech); break; case eSHDT_Light: FX_DrawShader_General(ef, pTech); break; case eSHDT_Terrain: { AZ_PROFILE_SCOPE(AZ::Debug::ProfileCategory::LegacyTerrain, "FX_DrawTechnique"); FX_DrawShader_General(ef, pTech); } break; case eSHDT_Fur: FX_DrawShader_Fur(ef, pTech); break; case eSHDT_CustomDraw: case eSHDT_Sky: if (m_RP.m_pRE) { EF_Scissor(false, 0, 0, 0, 0); if (pTech && pTech->m_Passes.Num()) { m_RP.m_pRE->mfDraw(ef, &pTech->m_Passes[0]); } else { m_RP.m_pRE->mfDraw(ef, NULL); } } break; default: assert(0); } if (m_RP.m_ObjFlags & FOB_SELECTED) { FX_SelectTechnique(ef, pTech); } } #if defined(HW_INSTANCING_ENABLED) static void sDetectInstancing(CShader* pShader, CRenderObject* pObj) { CRenderer* rd = gRenDev; SRenderPipeline& RESTRICT_REFERENCE rRP = rd->m_RP; if (!CRenderer::CV_r_geominstancing || rd->m_bUseGPUFriendlyBatching[rRP.m_nProcessThreadID] || !(pShader->m_Flags & EF_SUPPORTSINSTANCING) || CRenderer::CV_r_measureoverdraw || // don't instance in motion blur pass or post 3d render rRP.m_PersFlags2 & RBPF2_POST_3D_RENDERER_PASS || //rRP.m_PersFlags2 & RBPF2_MOTIONBLURPASS || // only instance meshes !rRP.m_pRE || rRP.m_pRE->mfGetType() != eDATA_Mesh ) { rRP.m_FlagsPerFlush &= ~RBSI_INSTANCED; return; } int i = 0, nLastRE = rRP.m_nLastRE; for (; i <= nLastRE; i++) { int nRIs = rRP.m_RIs[i].Num(); // instance even with conditional rendering - && RIs[0]->nOcclQuery<0 if (nRIs > CRenderer::m_iGeomInstancingThreshold || (rRP.m_FlagsPerFlush & RBSI_INSTANCED)) { rRP.m_FlagsPerFlush |= RBSI_INSTANCED; break; } } if (i > rRP.m_nLastRE) { rRP.m_FlagsPerFlush &= ~RBSI_INSTANCED; } } #endif void CD3D9Renderer::FX_SetAlphaTestState(float alphaRef) { if (!(m_RP.m_PersFlags2 & RBPF2_NOALPHATEST)) { const int nAlphaRef = int(alphaRef * 255.0f); m_RP.m_MaterialAlphaRef = nAlphaRef; m_RP.m_MaterialStateOr = GS_ALPHATEST_GEQUAL | GS_DEPTHWRITE; m_RP.m_MaterialStateAnd = GS_ALPHATEST_MASK; } else { m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ALPHATEST]; } } // Set/Restore shader resources overrided states bool CD3D9Renderer::FX_SetResourcesState() { FUNCTION_PROFILER_RENDER_FLAT if (!m_RP.m_pShader) { return false; } m_RP.m_MaterialStateOr = 0; m_RP.m_MaterialStateAnd = 0; if (!m_RP.m_pShaderResources) { return true; } PrefetchLine(m_RP.m_pShaderResources, 0); //Shader Resources fit in a cache line, but they're not 128-byte aligned! We are likely going PrefetchLine(m_RP.m_pShaderResources, 124); // to cache miss on access to m_ResFlags but will hopefully avoid later ones if (m_RP.m_pShader->m_Flags2 & EF2_IGNORERESOURCESTATES) { return true; } m_RP.m_ShaderTexResources[EFTT_DECAL_OVERLAY] = NULL; const CShaderResources* pRes = m_RP.m_pShaderResources; const uint32 uResFlags = pRes->m_ResFlags; if (uResFlags & MTL_FLAG_NOTINSTANCED) { m_RP.m_FlagsPerFlush &= ~RBSI_INSTANCED; } bool bRes = true; if (uResFlags & MTL_FLAG_2SIDED) { D3DSetCull(eCULL_None); m_RP.m_FlagsPerFlush |= RBSI_LOCKCULL; } if (pRes->IsAlphaTested()) { FX_SetAlphaTestState(pRes->GetAlphaRef()); } if (pRes->IsTransparent()) { if (!(m_RP.m_PersFlags2 & RBPF2_NOALPHABLEND)) { const float fOpacity = pRes->GetStrengthValue(EFTT_OPACITY); m_RP.m_MaterialStateAnd |= GS_DEPTHWRITE | GS_BLEND_MASK; m_RP.m_MaterialStateOr &= ~GS_DEPTHWRITE; if (uResFlags & MTL_FLAG_ADDITIVE) { m_RP.m_MaterialStateOr |= GS_BLSRC_ONE | GS_BLDST_ONE; m_RP.m_CurGlobalColor[0] = fOpacity; m_RP.m_CurGlobalColor[1] = fOpacity; m_RP.m_CurGlobalColor[2] = fOpacity; m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ADDITIVE_BLENDING]; } else { m_RP.m_MaterialStateOr |= GS_BLSRC_SRCALPHA | GS_BLDST_ONEMINUSSRCALPHA; m_RP.m_CurGlobalColor[3] = fOpacity; } m_RP.m_fCurOpacity = fOpacity; } } { if (pRes->m_pDeformInfo) { m_RP.m_FlagsShader_MDV |= pRes->m_pDeformInfo->m_eType; } m_RP.m_FlagsShader_MDV |= m_RP.m_pCurObject->m_nMDV | m_RP.m_pShader->m_nMDV; if (m_RP.m_ObjFlags & FOB_OWNER_GEOMETRY) { m_RP.m_FlagsShader_MDV &= ~MDV_DEPTH_OFFSET; } } if ((m_RP.m_ObjFlags & FOB_BLEND_WITH_TERRAIN_COLOR) && m_RP.m_pCurObject->m_nTextureID > 0) { m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_BLEND_WITH_TERRAIN_COLOR]; } else { m_RP.m_FlagsShader_RT &= ~g_HWSR_MaskBit[HWSR_BLEND_WITH_TERRAIN_COLOR]; } return true; } //=================================================================================================== static void sBatchStats(SRenderPipeline& rRP) { #if defined(ENABLE_PROFILING_CODE) SPipeStat& rPS = rRP.m_PS[rRP.m_nProcessThreadID]; rPS.m_NumRendMaterialBatches++; rPS.m_NumRendGeomBatches += rRP.m_nLastRE + 1; for (int i = 0; i <= rRP.m_nLastRE; i++) { rPS.m_NumRendInstances += rRP.m_RIs[i].Num(); } #endif } static void sLogFlush(const char* str, CShader* pSH, SShaderTechnique* pTech) { CD3D9Renderer* const __restrict rd = gcpRendD3D; if (rd->m_logFileHandle != AZ::IO::InvalidHandle) { SRenderPipeline& RESTRICT_REFERENCE rRP = rd->m_RP; rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], "%s: '%s.%s', Id:%d, ResId:%d, VF:%d\n", str, pSH->GetName(), pTech ? pTech->m_NameStr.c_str() : "Unknown", pSH->GetID(), rRP.m_pShaderResources ? rRP.m_pShaderResources->m_Id : -1, (int)rRP.m_CurVFormat.GetEnum()); if (rRP.m_ObjFlags & FOB_SELECTED) { if (rRP.m_MaterialStateOr & GS_ALPHATEST_MASK) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], " %.3f, %.3f, %.3f (0x%x), (AT) (Selected)\n", rRP.m_pCurObject->m_II.m_Matrix(0, 3), rRP.m_pCurObject->m_II.m_Matrix(1, 3), rRP.m_pCurObject->m_II.m_Matrix(2, 3), rRP.m_pCurObject->m_ObjFlags); } else if (rRP.m_MaterialStateOr & GS_BLEND_MASK) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], " %.3f, %.3f, %.3f (0x%x) (AB) (Dist: %.3f) (Selected)\n", rRP.m_pCurObject->m_II.m_Matrix(0, 3), rRP.m_pCurObject->m_II.m_Matrix(1, 3), rRP.m_pCurObject->m_II.m_Matrix(2, 3), rRP.m_pCurObject->m_ObjFlags, rRP.m_pCurObject->m_fDistance); } else { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], " %.3f, %.3f, %.3f (0x%x), RE: 0x%x (Selected)\n", rRP.m_pCurObject->m_II.m_Matrix(0, 3), rRP.m_pCurObject->m_II.m_Matrix(1, 3), rRP.m_pCurObject->m_II.m_Matrix(2, 3), rRP.m_pCurObject->m_ObjFlags, rRP.m_pRE); } } else { if (rRP.m_MaterialStateOr & GS_ALPHATEST_MASK) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], " %.3f, %.3f, %.3f (0x%x) (AT), Inst: %d, RE: 0x%x (Dist: %.3f)\n", rRP.m_pCurObject->m_II.m_Matrix(0, 3), rRP.m_pCurObject->m_II.m_Matrix(1, 3), rRP.m_pCurObject->m_II.m_Matrix(2, 3), rRP.m_pCurObject->m_ObjFlags, rRP.m_RIs[0].Num(), rRP.m_pRE, rRP.m_pCurObject->m_fDistance); } else if (rRP.m_MaterialStateOr & GS_BLEND_MASK) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], " %.3f, %.3f, %.3f (0x%x) (AB), Inst: %d, RE: 0x%x (Dist: %.3f)\n", rRP.m_pCurObject->m_II.m_Matrix(0, 3), rRP.m_pCurObject->m_II.m_Matrix(1, 3), rRP.m_pCurObject->m_II.m_Matrix(2, 3), rRP.m_pCurObject->m_ObjFlags, rRP.m_RIs[0].Num(), rRP.m_pRE, rRP.m_pCurObject->m_fDistance); } else { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], " %.3f, %.3f, %.3f (0x%x), Inst: %d, RE: 0x%x\n", rRP.m_pCurObject->m_II.m_Matrix(0, 3), rRP.m_pCurObject->m_II.m_Matrix(1, 3), rRP.m_pCurObject->m_II.m_Matrix(2, 3), rRP.m_pCurObject->m_ObjFlags, rRP.m_RIs[0].Num(), rRP.m_pRE); } } if (rRP.m_pRE && rRP.m_pRE->mfGetType() == eDATA_Mesh) { CREMeshImpl* pRE = (CREMeshImpl*) rRP.m_pRE; CRenderMesh* pRM = pRE->m_pRenderMesh; if (pRM && pRM->m_Chunks.size() && pRM->m_sSource) { int nChunk = -1; for (int i = 0; i < pRM->m_Chunks.size(); i++) { CRenderChunk* pCH = &pRM->m_Chunks[i]; if (pCH->pRE == pRE) { nChunk = i; break; } } rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], " Mesh: %s (Chunk: %d)\n", pRM->m_sSource.c_str(), nChunk); } } } } void CD3D9Renderer::FX_RefractionPartialResolve() { CD3D9Renderer* const __restrict rd = gcpRendD3D; SRenderPipeline& RESTRICT_REFERENCE rRP = rd->m_RP; { SRenderObjData* ObjData = rRP.m_pCurObject->GetObjData(); if (ObjData) { uint8 screenBounds[4]; screenBounds[0] = ObjData->m_screenBounds[0]; screenBounds[1] = ObjData->m_screenBounds[1]; screenBounds[2] = ObjData->m_screenBounds[2]; screenBounds[3] = ObjData->m_screenBounds[3]; float boundsI2F[] = { (float)(screenBounds[0] << 4), (float)(screenBounds[1] << 4), (float)(min(screenBounds[2] << 4, GetWidth())), (float)(min(screenBounds[3] << 4, GetHeight())) }; if (((screenBounds[2] - screenBounds[0]) && (screenBounds[3] - screenBounds[1])) && !((rRP.m_nCurrResolveBounds[0] == screenBounds[0]) && (rRP.m_nCurrResolveBounds[1] == screenBounds[1]) && (rRP.m_nCurrResolveBounds[2] == screenBounds[2]) && (rRP.m_nCurrResolveBounds[3] == screenBounds[3]))) { rRP.m_nCurrResolveBounds[0] = screenBounds[0]; rRP.m_nCurrResolveBounds[1] = screenBounds[1]; rRP.m_nCurrResolveBounds[2] = screenBounds[2]; rRP.m_nCurrResolveBounds[3] = screenBounds[3]; int boundsF2I[] = { int(boundsI2F[0] * m_RP.m_CurDownscaleFactor.x), int(boundsI2F[1] * m_RP.m_CurDownscaleFactor.y), int(boundsI2F[2] * m_RP.m_CurDownscaleFactor.x), int(boundsI2F[3] * m_RP.m_CurDownscaleFactor.y) }; int currScissorX, currScissorY, currScissorW, currScissorH; CTexture* pTarget = CTexture::s_ptexCurrSceneTarget; //cache RP states - Probably a bit excessive, but want to be safe CShaderResources* currRes = rd->m_RP.m_pShaderResources; CShader* currShader = rd->m_RP.m_pShader; int currShaderTechnique = rd->m_RP.m_nShaderTechnique; SShaderTechnique* currTechnique = rd->m_RP.m_pCurTechnique; uint32 currCommitFlags = rd->m_RP.m_nCommitFlags; uint32 currFlagsShaderBegin = rd->m_RP.m_nFlagsShaderBegin; ECull currCull = m_RP.m_eCull; float currVPMinZ = rd->m_NewViewport.fMinZ; // Todo: Add to GetViewport / SetViewport float currVPMaxZ = rd->m_NewViewport.fMaxZ; D3DSetCull(eCULL_None); bool bScissored = EF_GetScissorState(currScissorX, currScissorY, currScissorW, currScissorH); int newScissorX = int(boundsF2I[0]); int newScissorY = int(boundsF2I[1]); int newScissorW = max(0, min(int(boundsF2I[2]), GetWidth()) - newScissorX); int newScissorH = max(0, min(int(boundsF2I[3]), GetHeight()) - newScissorY); EF_Scissor(true, newScissorX, newScissorY, newScissorW, newScissorH); FX_ScreenStretchRect(pTarget); EF_Scissor(bScissored, currScissorX, currScissorY, currScissorW, currScissorH); D3DSetCull(currCull); //restore RP states rd->m_RP.m_pShaderResources = currRes; rd->m_RP.m_pShader = currShader; rd->m_RP.m_nShaderTechnique = currShaderTechnique; rd->m_RP.m_pCurTechnique = currTechnique; rd->m_RP.m_nCommitFlags = currCommitFlags | FC_MATERIAL_PARAMS; rd->m_RP.m_nFlagsShaderBegin = currFlagsShaderBegin; rd->m_NewViewport.fMinZ = currVPMinZ; rd->m_NewViewport.fMaxZ = currVPMaxZ; #if REFRACTION_PARTIAL_RESOLVE_STATS { const int x1 = (screenBounds[0] << 4); const int y1 = (screenBounds[1] << 4); const int x2 = (screenBounds[2] << 4); const int y2 = (screenBounds[3] << 4); const int resolveWidth = x2 - x1; const int resolveHeight = y2 - y1; const int resolvePixelCount = resolveWidth * resolveHeight; // Update stats SPipeStat& pipeStat = rd->m_RP.m_PS[rd->m_RP.m_nProcessThreadID]; pipeStat.m_refractionPartialResolveCount++; pipeStat.m_refractionPartialResolvePixelCount += resolvePixelCount; const float resolveCostConversion = 18620398.0f; pipeStat.m_fRefractionPartialResolveEstimatedCost += ((float)resolvePixelCount / resolveCostConversion); #if REFRACTION_PARTIAL_RESOLVE_DEBUG_VIEWS if (CRenderer::CV_r_RefractionPartialResolvesDebug == eRPR_DEBUG_VIEW_2D_AREA || CRenderer::CV_r_RefractionPartialResolvesDebug == eRPR_DEBUG_VIEW_2D_AREA_OVERLAY) { // Render 2d areas additively on screen IRenderAuxGeom* pAuxRenderer = gEnv->pRenderer->GetIRenderAuxGeom(); if (pAuxRenderer) { SAuxGeomRenderFlags oldRenderFlags = pAuxRenderer->GetRenderFlags(); SAuxGeomRenderFlags newRenderFlags; newRenderFlags.SetDepthTestFlag(e_DepthTestOff); newRenderFlags.SetAlphaBlendMode(e_AlphaAdditive); newRenderFlags.SetMode2D3DFlag(e_Mode2D); pAuxRenderer->SetRenderFlags(newRenderFlags); const float screenWidth = (float)GetWidth(); const float screenHeight = (float)GetHeight(); // Calc resolve area const float left = x1 / screenWidth; const float top = y1 / screenHeight; const float right = x2 / screenWidth; const float bottom = y2 / screenHeight; // Render resolve area ColorB areaColor(20, 0, 0, 255); if (CRenderer::CV_r_RefractionPartialResolvesDebug == eRPR_DEBUG_VIEW_2D_AREA_OVERLAY) { int val = (pipeStat.m_refractionPartialResolveCount) % 3; areaColor = ColorB((val == 0) ? 0 : 128, (val == 1) ? 0 : 128, (val == 2) ? 0 : 128, 255); } const uint vertexCount = 6; const Vec3 vert[vertexCount] = { Vec3(left, top, 0.0f), Vec3(left, bottom, 0.0f), Vec3(right, top, 0.0f), Vec3(left, bottom, 0.0f), Vec3(right, bottom, 0.0f), Vec3(right, top, 0.0f) }; pAuxRenderer->DrawTriangles(vert, vertexCount, areaColor); // Set previous Aux render flags back again pAuxRenderer->SetRenderFlags(oldRenderFlags); } } #endif // REFRACTION_PARTIAL_RESOLVE_DEBUG_VIEWS } #endif // REFRACTION_PARTIAL_RESOLVE_STATS } } } } // Flush current render item void CD3D9Renderer::FX_FlushShader_General() { FUNCTION_PROFILER_RENDER_FLAT CD3D9Renderer* const __restrict rd = gcpRendD3D; SRenderPipeline& RESTRICT_REFERENCE rRP = rd->m_RP; if (!rRP.m_pRE && !rRP.m_RendNumVerts) { return; } CShader* ef = rRP.m_pShader; if (!ef) { return; } const CShaderResources* rsr = rRP.m_pShaderResources; if ((ef->m_Flags & EF_SUPPORTSDEFERREDSHADING_FULL) && (rRP.m_PersFlags2 & RBPF2_FORWARD_SHADING_PASS) && (!rsr->IsEmissive())) { return; } SThreadInfo& RESTRICT_REFERENCE rTI = rRP.m_TI[rRP.m_nProcessThreadID]; assert(!(rTI.m_PersFlags & RBPF_SHADOWGEN)); assert(!(rRP.m_nBatchFilter & FB_Z)); if (!rRP.m_sExcludeShader.empty()) { char nm[1024]; azstrcpy(nm, AZ_ARRAY_SIZE(nm), ef->GetName()); azstrlwr(nm, AZ_ARRAY_SIZE(nm)); if (strstr(rRP.m_sExcludeShader.c_str(), nm)) { return; } } #ifdef DO_RENDERLOG if (rd->m_logFileHandle != AZ::IO::InvalidHandle && CV_r_log == 3) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], "\n\n.. Start %s flush: '%s' ..\n", "General", ef->GetName()); } #endif #ifndef _RELEASE sBatchStats(rRP); #endif CRenderObject* pObj = rRP.m_pCurObject; PROFILE_SHADER_SCOPE; if (rd->m_RP.m_pRE) { rd->m_RP.m_pRE = rd->m_RP.m_RIs[0][0]->pElem; } #if defined(HW_INSTANCING_ENABLED) sDetectInstancing(ef, pObj); #endif // Techniques draw cycle... SShaderTechnique* __restrict pTech = ef->mfGetStartTechnique(rRP.m_nShaderTechnique); if (pTech) { uint32 flags = (FB_CUSTOM_RENDER | FB_MOTIONBLUR | FB_SOFTALPHATEST | FB_WATER_REFL | FB_WATER_CAUSTIC); if (rRP.m_pShaderResources && !(rRP.m_nBatchFilter & flags)) { uint32 i; // Update render targets if necessary if (!(rTI.m_PersFlags & RBPF_DRAWTOTEXTURE)) { uint32 targetNum = rRP.m_pShaderResources->m_RTargets.Num(); const CShaderResources* const __restrict pShaderResources = rRP.m_pShaderResources; for (i = 0; i < targetNum; ++i) { SHRenderTarget* pTarg = pShaderResources->m_RTargets[i]; if (pTarg->m_eOrder == eRO_PreDraw) { rd->FX_DrawToRenderTarget(ef, rRP.m_pShaderResources, pObj, pTech, pTarg, 0, rRP.m_pRE); } } targetNum = pTech->m_RTargets.Num(); for (i = 0; i < targetNum; ++i) { SHRenderTarget* pTarg = pTech->m_RTargets[i]; if (pTarg->m_eOrder == eRO_PreDraw) { rd->FX_DrawToRenderTarget(ef, rRP.m_pShaderResources, pObj, pTech, pTarg, 0, rRP.m_pRE); } } } } rRP.m_pRootTechnique = pTech; flags = (FB_MOTIONBLUR | FB_CUSTOM_RENDER | FB_SOFTALPHATEST | FB_DEBUG | FB_WATER_REFL | FB_WATER_CAUSTIC | FB_PARTICLES_THICKNESS); if (rRP.m_nBatchFilter & flags) { int nTech = -1; if (rRP.m_nBatchFilter & FB_MOTIONBLUR) { nTech = TTYPE_MOTIONBLURPASS; } else if (rRP.m_nBatchFilter & FB_CUSTOM_RENDER) { nTech = TTYPE_CUSTOMRENDERPASS; } else if (rRP.m_nBatchFilter & FB_SOFTALPHATEST) { nTech = TTYPE_SOFTALPHATESTPASS; } else if (rRP.m_nBatchFilter & FB_WATER_REFL) { nTech = TTYPE_WATERREFLPASS; } else if (rRP.m_nBatchFilter & FB_WATER_CAUSTIC) { nTech = TTYPE_WATERCAUSTICPASS; } else if (rRP.m_nBatchFilter & FB_PARTICLES_THICKNESS) { nTech = TTYPE_PARTICLESTHICKNESSPASS; } else if (rRP.m_nBatchFilter & FB_DEBUG) { nTech = TTYPE_DEBUG; } if (nTech >= 0 && pTech->m_nTechnique[nTech] > 0) { assert(pTech->m_nTechnique[nTech] < (int)ef->m_HWTechniques.Num()); pTech = ef->m_HWTechniques[pTech->m_nTechnique[nTech]]; } rRP.m_nShaderTechniqueType = nTech; } #ifndef _RELEASE if (CV_r_debugrendermode) { if (CV_r_debugrendermode & 1) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_DEBUG0]; } if (CV_r_debugrendermode & 2) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_DEBUG1]; } if (CV_r_debugrendermode & 4) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_DEBUG2]; } if (CV_r_debugrendermode & 8) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_DEBUG3]; } } #endif if (CRenderer::CV_r_DeferredShadingLBuffersFmt == 2) { rd->m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_DEFERRED_RENDER_TARGET_OPTIMIZATION]; } if (CRenderer::CV_r_SlimGBuffer) { rd->m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SLIM_GBUFFER]; } // If the object is transparent and if the object has the UAV bound. bool multilayerUAVBound = (rRP.m_ObjFlags & FOB_AFTER_WATER) != 0; if (rRP.m_pShaderResources && rRP.m_pShaderResources->IsTransparent() && multilayerUAVBound) { MultiLayerAlphaBlendPass::GetInstance().ConfigureShaderFlags(rRP.m_FlagsShader_RT); } if (!rd->FX_SetResourcesState()) { return; } // Handle emissive materials CShaderResources* pCurRes = rRP.m_pShaderResources; if (pCurRes && pCurRes->IsEmissive() && !pCurRes->IsTransparent() && (rRP.m_PersFlags2 & RBPF2_HDR_FP16)) { rRP.m_MaterialStateAnd |= GS_BLEND_MASK; rRP.m_MaterialStateOr = (rRP.m_MaterialStateOr & ~GS_BLEND_MASK) | (GS_BLSRC_ONE | GS_BLDST_ONE); rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ADDITIVE_BLENDING]; } else if (rRP.m_ObjFlags & FOB_BENDED) { rRP.m_FlagsShader_MDV |= MDV_BENDING; } rRP.m_FlagsShader_RT |= pObj->m_nRTMask; #ifdef TESSELLATION_RENDERER if ((pObj->m_ObjFlags & FOB_NEAREST) || !(pObj->m_ObjFlags & FOB_ALLOW_TESSELLATION)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_NO_TESSELLATION]; } #endif if (!(rRP.m_PersFlags2 & RBPF2_NOSHADERFOG) && rTI.m_FS.m_bEnable && !(rRP.m_ObjFlags & FOB_NO_FOG) || !(rRP.m_PersFlags2 & RBPF2_ALLOW_DEFERREDSHADING)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_FOG]; if (CRenderer::CV_r_VolumetricFog != 0) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_VOLUMETRIC_FOG]; } } rd->m_RP.m_FlagsShader_RT &= ~(g_HWSR_MaskBit[HWSR_FOG_VOLUME_HIGH_QUALITY_SHADER]); static ICVar* pCVarFogVolumeShadingQuality = gEnv->pConsole->GetCVar("e_FogVolumeShadingQuality"); if (pCVarFogVolumeShadingQuality->GetIVal() > 0) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_FOG_VOLUME_HIGH_QUALITY_SHADER]; } const uint64 objFlags = rRP.m_ObjFlags; if (objFlags & FOB_NEAREST) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_NEAREST]; } if (SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID] == 0) { //Enable soft particle shader flag for soft particles or if particles have half resolution enabled //Note: the half res render pass is relying on the soft particle flag to test z buffer. if ((CV_r_ParticlesSoftIsec && (objFlags & FOB_SOFT_PARTICLE)) || (rRP.m_PersFlags2 & RBPF2_HALFRES_PARTICLES)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SOFT_PARTICLE]; } } if (ef->m_eShaderType == eST_Particle) { bool takingScreenShot = (gcpRendD3D->m_screenShotType != 0); if ((objFlags & FOB_MOTION_BLUR) && CV_r_MotionBlur && (!takingScreenShot || CV_r_MotionBlurScreenShot)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_MOTION_BLUR]; } if ((objFlags & (FOB_PARTICLE_SHADOWS))) { rd->FX_SetupShadowsForTransp(); } } else if (ef->m_Flags2 & EF2_ALPHABLENDSHADOWS) { rd->FX_SetupShadowsForTransp(); } if (rRP.m_pCurObject->m_RState & OS_ENVIRONMENT_CUBEMAP) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ENVIRONMENT_CUBEMAP]; } if (rRP.m_pCurObject->m_RState & OS_ANIM_BLEND) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_ANIM_BLEND]; } if (objFlags & FOB_POINT_SPRITE) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SPRITE]; } // Only enable for resources not using zpass if (!(rRP.m_pRLD->m_nBatchFlags[rRP.m_nSortGroupID][rRP.m_nPassGroupID] & FB_Z) || (ef->m_Flags & EF_DECAL)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_NOZPASS]; } rRP.m_pCurTechnique = pTech; if ((rRP.m_nBatchFilter & (FB_MULTILAYERS | FB_DEBUG)) && !rRP.m_pReplacementShader) { if (rRP.m_nBatchFilter & FB_MULTILAYERS) { rd->FX_DrawMultiLayers(); } if (rRP.m_nBatchFilter & FB_DEBUG) { rd->FX_DrawDebugPasses(); } } else { rd->FX_DrawTechnique(ef, pTech); } }//pTech else if (ef->m_eSHDType == eSHDT_CustomDraw) { rd->FX_DrawTechnique(ef, 0); } #ifdef DO_RENDERLOG sLogFlush("Flush General", ef, pTech); #endif } void CD3D9Renderer::FX_FlushShader_ShadowGen() { CD3D9Renderer* const __restrict rd = gcpRendD3D; SRenderPipeline& RESTRICT_REFERENCE rRP = rd->m_RP; if (!rRP.m_pRE && !rRP.m_RendNumVerts) { return; } CShader* ef = rRP.m_pShader; if (!ef) { return; } if (!rRP.m_sExcludeShader.empty()) { char nm[1024]; azstrcpy(nm, AZ_ARRAY_SIZE(nm), ef->GetName()); azstrlwr(nm, AZ_ARRAY_SIZE(nm)); if (strstr(rRP.m_sExcludeShader.c_str(), nm)) { return; } } SThreadInfo& RESTRICT_REFERENCE rTI = rRP.m_TI[rRP.m_nProcessThreadID]; assert(rTI.m_PersFlags & RBPF_SHADOWGEN); CRenderObject* pObj = rRP.m_pCurObject; #ifdef DO_RENDERLOG if (rd->m_logFileHandle != AZ::IO::InvalidHandle) { if (CV_r_log == 3) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], "\n\n.. Start %s flush: '%s' ..\n", "ShadowGen", ef->GetName()); } if (CV_r_log >= 3) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], "\n"); } } #endif SPipeStat& rPS = rRP.m_PS[rRP.m_nProcessThreadID]; #ifndef _RELEASE sBatchStats(rRP); #endif PROFILE_SHADER_SCOPE; #if defined(HW_INSTANCING_ENABLED) sDetectInstancing(ef, pObj); #endif SShaderTechnique* __restrict pTech = ef->mfGetStartTechnique(rRP.m_nShaderTechnique); assert(pTech); if (!pTech || pTech->m_nTechnique[TTYPE_SHADOWGEN] < 0) { return; } rRP.m_nShaderTechniqueType = TTYPE_SHADOWGEN; if (rd->m_RP.m_pRE) { rd->m_RP.m_pRE = rd->m_RP.m_RIs[0][0]->pElem; } rRP.m_pRootTechnique = pTech; pTech = ef->m_HWTechniques[pTech->m_nTechnique[TTYPE_SHADOWGEN]]; const SRenderPipeline::ShadowInfo& shadowInfo = rd->m_RP.m_ShadowInfo; if (ef->m_eSHDType == eSHDT_Terrain) { if (shadowInfo.m_pCurShadowFrustum->m_Flags & DLF_DIRECTIONAL) { rd->D3DSetCull(eCULL_None); rd->m_RP.m_FlagsPerFlush |= RBSI_LOCKCULL; } else { rd->D3DSetCull(eCULL_Front); rd->m_RP.m_FlagsPerFlush |= RBSI_LOCKCULL; } } // RSMs #if defined(FEATURE_SVO_GI) if (CSvoRenderer::GetRsmColorMap(*shadowInfo.m_pCurShadowFrustum)) { rd->m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_SAMPLE4]; if (!(shadowInfo.m_pCurShadowFrustum->m_Flags & DLF_DIRECTIONAL)) { rd->m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_CUBEMAP0] | g_HWSR_MaskBit[HWSR_HW_PCF_COMPARE]; } rd->D3DSetCull(eCULL_Back); const uint64_t objFlags = rRP.m_ObjFlags; if (objFlags & FOB_DECAL_TEXGEN_2D) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_DECAL_TEXGEN_2D]; } } else #endif if (rRP.m_PersFlags2 & (RBPF2_DRAWTOCUBE | RBPF2_DISABLECOLORWRITES)) { if (rRP.m_PersFlags2 & RBPF2_DISABLECOLORWRITES) { rd->m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[ HWSR_HW_PCF_COMPARE ]; } if (rRP.m_PersFlags2 & RBPF2_DRAWTOCUBE) { rd->m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_CUBEMAP0]; } } #ifdef TESSELLATION_RENDERER if ((pObj->m_ObjFlags & FOB_NEAREST) || !(pObj->m_ObjFlags & FOB_ALLOW_TESSELLATION)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_NO_TESSELLATION]; } #endif if (!rd->FX_SetResourcesState()) { return; } if ((rRP.m_ObjFlags & FOB_BENDED)) { rRP.m_FlagsShader_MDV |= MDV_BENDING; } rRP.m_FlagsShader_RT |= rRP.m_pCurObject->m_nRTMask; if (rRP.m_ObjFlags & FOB_NEAREST) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_NEAREST]; } if (rRP.m_ObjFlags & FOB_DISSOLVE) { rd->m_RP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_DISSOLVE]; } rRP.m_pCurTechnique = pTech; rd->FX_DrawTechnique(ef, pTech); #ifdef DO_RENDERLOG sLogFlush("Flush ShadowGen", ef, pTech); #endif } void CD3D9Renderer::FX_FlushShader_ZPass() { CD3D9Renderer* const __restrict rd = gcpRendD3D; SRenderPipeline& RESTRICT_REFERENCE rRP = rd->m_RP; if (!rRP.m_pRE && !rRP.m_RendNumVerts) { return; } CShader* ef = rRP.m_pShader; if (!ef) { return; } if (!rRP.m_sExcludeShader.empty()) { char nm[1024]; azstrcpy(nm, AZ_ARRAY_SIZE(nm), ef->GetName()); azstrlwr(nm, AZ_ARRAY_SIZE(nm)); if (strstr(rRP.m_sExcludeShader.c_str(), nm)) { return; } } SThreadInfo& RESTRICT_REFERENCE rTI = rRP.m_TI[rRP.m_nProcessThreadID]; assert(!(rTI.m_PersFlags & RBPF_SHADOWGEN)); assert(rRP.m_nBatchFilter & (FB_Z | FB_ZPREPASS | FB_POST_3D_RENDER)); #ifdef DO_RENDERLOG if (rd->m_logFileHandle != AZ::IO::InvalidHandle) { if (CV_r_log == 3) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], "\n\n.. Start %s flush: '%s' ..\n", "ZPass", ef->GetName()); } else if (CV_r_log >= 3) { rd->Logv(SRendItem::m_RecurseLevel[rRP.m_nProcessThreadID], "\n"); } } #endif if (rd->m_RP.m_pRE) { rd->m_RP.m_pRE = rd->m_RP.m_RIs[0][0]->pElem; } #ifndef _RELEASE sBatchStats(rRP); #endif PROFILE_SHADER_SCOPE; #if defined(HW_INSTANCING_ENABLED) sDetectInstancing(ef, rRP.m_pCurObject); #endif // Techniques draw cycle... SShaderTechnique* __restrict pTech = ef->mfGetStartTechnique(rRP.m_nShaderTechnique); const uint32 nTechniqueID = (rRP.m_nBatchFilter & FB_Z) ? TTYPE_Z : TTYPE_ZPREPASS; if (!pTech || pTech->m_nTechnique[nTechniqueID] < 0) { return; } rRP.m_nShaderTechniqueType = nTechniqueID; rRP.m_pRootTechnique = pTech; // Skip z-pass if appropriate technique does not exist assert(pTech->m_nTechnique[nTechniqueID] < (int)ef->m_HWTechniques.Num()); pTech = ef->m_HWTechniques[pTech->m_nTechnique[nTechniqueID]]; if (!rd->FX_SetResourcesState()) { return; } rRP.m_FlagsShader_RT |= rRP.m_pCurObject->m_nRTMask; if (rRP.m_ObjFlags & FOB_BENDED) { rRP.m_FlagsShader_MDV |= MDV_BENDING; } if (rRP.m_PersFlags2 & RBPF2_MOTIONBLURPASS) { if ((rRP.m_pCurObject->m_ObjFlags & (FOB_MOTION_BLUR | FOB_HAS_PREVMATRIX)) && (rRP.m_PersFlags2 & RBPF2_NOALPHABLEND)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_MOTION_BLUR]; } else { rRP.m_FlagsShader_RT &= ~g_HWSR_MaskBit[HWSR_MOTION_BLUR]; } } #ifdef TESSELLATION_RENDERER if ((rRP.m_pCurObject->m_ObjFlags & FOB_NEAREST) || !(rRP.m_pCurObject->m_ObjFlags & FOB_ALLOW_TESSELLATION)) { rRP.m_FlagsShader_RT |= g_HWSR_MaskBit[HWSR_NO_TESSELLATION]; } #endif // Set VisArea and Dynamic objects Stencil Ref if (CRenderer::CV_r_DeferredShadingStencilPrepass) { if (rRP.m_nPassGroupID != EFSLIST_TERRAINLAYER && rRP.m_nPassGroupID != EFSLIST_DECAL && !(rRP.m_nBatchFilter & FB_ZPREPASS)) { rRP.m_ForceStateOr |= GS_STENCIL; uint32 nStencilRef = CRenderer::CV_r_VisAreaClipLightsPerPixel ? 0 : (rd->m_RP.m_RIs[0][0]->nStencRef | BIT_STENCIL_INSIDE_CLIPVOLUME); // Here we check if an object can receive decals. bool bObjectAcceptsDecals = !(rRP.m_pCurObject->m_NoDecalReceiver); if (bObjectAcceptsDecals) { nStencilRef |= (!(rRP.m_pCurObject->m_ObjFlags & FOB_DYNAMIC_OBJECT) || CV_r_deferredDecalsOnDynamicObjects ? BIT_STENCIL_RESERVED : 0); } const int32 stencilState = STENC_FUNC(FSS_STENCFUNC_ALWAYS) | STENCOP_FAIL(FSS_STENCOP_KEEP) | STENCOP_ZFAIL(FSS_STENCOP_KEEP) | STENCOP_PASS(FSS_STENCOP_REPLACE); rd->FX_SetStencilState(stencilState, nStencilRef, 0xFF, 0xFF); } else { rRP.m_ForceStateOr &= ~GS_STENCIL; } } rRP.m_pCurTechnique = pTech; rd->FX_DrawTechnique(ef, pTech); rRP.m_ForceStateOr &= ~GS_STENCIL; //reset stencil AND mask always rRP.m_CurStencilRefAndMask = 0; #ifdef DO_RENDERLOG sLogFlush("Flush ZPass", ef, pTech); #endif } //=================================================================================================== static int sTexLimitRes(uint32 nSrcsize, uint32 nDstSize) { while (true) { if (nSrcsize > nDstSize) { nSrcsize >>= 1; } else { break; } } return nSrcsize; } static Matrix34 sMatrixLookAt(const Vec3& dir, const Vec3& up, float rollAngle = 0) { Matrix34 M; // LookAt transform. Vec3 xAxis, yAxis, zAxis; Vec3 upVector = up; yAxis = -dir.GetNormalized(); //if (zAxis.x == 0.0 && zAxis.z == 0) up.Set( -zAxis.y,0,0 ); else up.Set( 0,1.0f,0 ); xAxis = upVector.Cross(yAxis).GetNormalized(); zAxis = xAxis.Cross(yAxis).GetNormalized(); // OpenGL kind of matrix. M(0, 0) = xAxis.x; M(0, 1) = yAxis.x; M(0, 2) = zAxis.x; M(0, 3) = 0; M(1, 0) = xAxis.y; M(1, 1) = yAxis.y; M(1, 2) = zAxis.y; M(1, 3) = 0; M(2, 0) = xAxis.z; M(2, 1) = yAxis.z; M(2, 2) = zAxis.z; M(2, 3) = 0; if (rollAngle != 0) { Matrix34 RollMtx; RollMtx.SetIdentity(); float cossin[2]; // sincos_tpl(rollAngle, cossin); sincos_tpl(rollAngle, &cossin[1], &cossin[0]); RollMtx(0, 0) = cossin[0]; RollMtx(0, 2) = -cossin[1]; RollMtx(2, 0) = cossin[1]; RollMtx(2, 2) = cossin[0]; // Matrix multiply. M = RollMtx * M; } return M; } void TexBlurAnisotropicVertical(CTexture* pTex, int nAmount, float fScale, float fDistribution, bool bAlphaOnly) { if (!pTex) { return; } SDynTexture* tpBlurTemp = new SDynTexture(pTex->GetWidth(), pTex->GetHeight(), pTex->GetDstFormat(), eTT_2D, FT_STATE_CLAMP, "TempBlurAnisoVertRT"); if (!tpBlurTemp) { return; } tpBlurTemp->Update(pTex->GetWidth(), pTex->GetHeight()); if (!tpBlurTemp->m_pTexture) { SAFE_DELETE(tpBlurTemp); return; } PROFILE_SHADER_SCOPE; // Get current viewport int iTempX, iTempY, iWidth, iHeight; gRenDev->GetViewport(&iTempX, &iTempY, &iWidth, &iHeight); gcpRendD3D->RT_SetViewport(0, 0, pTex->GetWidth(), pTex->GetHeight()); Vec4 vWhite(1.0f, 1.0f, 1.0f, 1.0f); static CCryNameTSCRC pTechName("AnisotropicVertical"); CShader* m_pCurrShader = CShaderMan::s_shPostEffects; uint32 nPasses; m_pCurrShader->FXSetTechnique(pTechName); m_pCurrShader->FXBegin(&nPasses, FEF_DONTSETTEXTURES | FEF_DONTSETSTATES); m_pCurrShader->FXBeginPass(0); gRenDev->FX_SetState(GS_NODEPTHTEST); // setup texture offsets, for texture sampling float s1 = 1.0f / (float) pTex->GetWidth(); float t1 = 1.0f / (float) pTex->GetHeight(); Vec4 pWeightsPS; pWeightsPS.x = 0.25f * t1; pWeightsPS.y = 0.5f * t1; pWeightsPS.z = 0.75f * t1; pWeightsPS.w = 1.0f * t1; pWeightsPS *= -fScale; STexState sTexState = STexState(FILTER_LINEAR, true); static CCryNameR pParam0Name("blurParams0"); for (int p(1); p <= nAmount; ++p) { //Horizontal CShaderMan::s_shPostEffects->FXSetVSFloat(pParam0Name, &pWeightsPS, 1); gcpRendD3D->FX_PushRenderTarget(0, tpBlurTemp->m_pTexture, NULL); gcpRendD3D->RT_SetViewport(0, 0, pTex->GetWidth(), pTex->GetHeight()); pTex->Apply(0, CTexture::GetTexState(sTexState)); PostProcessUtils().DrawFullScreenTri(pTex->GetWidth(), pTex->GetHeight()); gcpRendD3D->FX_PopRenderTarget(0); //Vertical pWeightsPS *= 2.0f; gcpRendD3D->FX_PushRenderTarget(0, pTex, NULL); gcpRendD3D->RT_SetViewport(0, 0, pTex->GetWidth(), pTex->GetHeight()); CShaderMan::s_shPostEffects->FXSetVSFloat(pParam0Name, &pWeightsPS, 1); tpBlurTemp->m_pTexture->Apply(0, CTexture::GetTexState(sTexState)); PostProcessUtils().DrawFullScreenTri(pTex->GetWidth(), pTex->GetHeight()); gcpRendD3D->FX_PopRenderTarget(0); } m_pCurrShader->FXEndPass(); m_pCurrShader->FXEnd(); // Restore previous viewport gcpRendD3D->RT_SetViewport(iTempX, iTempY, iWidth, iHeight); //release dyntexture SAFE_DELETE(tpBlurTemp); } bool CD3D9Renderer::FX_DrawToRenderTarget(CShader* pShader, CShaderResources* pRes, CRenderObject* pObj, SShaderTechnique* pTech, SHRenderTarget* pRT, int nPreprType, IRenderElement* pRE) { if (!pRT) { return false; } int nThreadList = m_pRT->GetThreadList(); uint32 nPrFlags = pRT->m_nFlags; if (nPrFlags & FRT_RENDTYPE_CURSCENE) { return false; } CRenderObject* pPrevIgn = m_RP.m_TI[nThreadList].m_pIgnoreObject; CTexture* Tex = pRT->m_pTarget[0]; SEnvTexture* pEnvTex = NULL; if (nPreprType == SPRID_SCANTEX) { nPrFlags |= FRT_CAMERA_REFLECTED_PLANE; pRT->m_nFlags = nPrFlags; } if (nPrFlags & FRT_RENDTYPE_CURSCENE) { return false; } AZ_TRACE_METHOD(); uint32 nWidth = pRT->m_nWidth; uint32 nHeight = pRT->m_nHeight; if (pRT->m_nIDInPool >= 0) { assert((int)CTexture::s_CustomRT_2D->Num() > pRT->m_nIDInPool); if ((int)CTexture::s_CustomRT_2D->Num() <= pRT->m_nIDInPool) { return false; } pEnvTex = &(*CTexture::s_CustomRT_2D)[pRT->m_nIDInPool]; if (nWidth == -1) { nWidth = GetWidth(); } if (nHeight == -1) { nHeight = GetHeight(); } ETEX_Format eTF = pRT->m_eTF; // $HDR if (eTF == eTF_R8G8B8A8 && IsHDRModeEnabled() && m_nHDRType <= 1) { eTF = eTF_R16G16B16A16F; } // Very hi specs render reflections at half res - lower specs (and consoles) at quarter res bool bMakeEnvironmentTexture = false; if (OceanToggle::IsActive()) { float fSizeScale = 0.5f; AZ::OceanEnvironmentBus::BroadcastResult(fSizeScale, &AZ::OceanEnvironmentBus::Events::GetReflectResolutionScale); fSizeScale = clamp_tpl(fSizeScale, 0.0f, 1.0f); nWidth = sTexLimitRes(nWidth, uint32(GetWidth() * fSizeScale)); nHeight = sTexLimitRes(nHeight, uint32(GetHeight() * fSizeScale)); bMakeEnvironmentTexture = (!pEnvTex->m_pTex || pEnvTex->m_pTex->GetFormat() != eTF || pEnvTex->m_pTex->GetWidth() != nWidth || pEnvTex->m_pTex->GetHeight() != nHeight); } else { float fSizeScale = (CV_r_waterreflections_quality == 5) ? 0.5f : 0.25f; nWidth = sTexLimitRes(nWidth, uint32(GetWidth() * fSizeScale)); nHeight = sTexLimitRes(nHeight, uint32(GetHeight() * fSizeScale)); bMakeEnvironmentTexture = (!pEnvTex->m_pTex || pEnvTex->m_pTex->GetFormat() != eTF); } #if AZ_RENDER_TO_TEXTURE_GEM_ENABLED // do not re-create the environment texture for the RTT pass for now, just use the existing one. if (m_RP.m_TI[nThreadList].m_PersFlags & RBPF_RENDER_SCENE_TO_TEXTURE && pEnvTex->m_pTex) { bMakeEnvironmentTexture = false; } #endif // if AZ_RENDER_TO_TEXTURE_GEM_ENABLED // clamping to a reasonable texture size if (nWidth < 32) { nWidth = 32; } if (nHeight < 32) { nHeight = 32; } if (bMakeEnvironmentTexture) { char name[128]; sprintf_s(name, "$RT_2D_%d", m_TexGenID++); int flags = FT_NOMIPS | FT_STATE_CLAMP | FT_DONT_STREAM; pEnvTex->m_pTex = new SDynTexture(nWidth, nHeight, eTF, eTT_2D, flags, name); } assert(nWidth > 0 && nWidth <= m_d3dsdBackBuffer.Width); assert(nHeight > 0 && nHeight <= m_d3dsdBackBuffer.Height); Tex = pEnvTex->m_pTex->m_pTexture; } else if (Tex) { if (Tex->GetCustomID() == TO_RT_2D) { bool bReflect = false; if (nPrFlags & (FRT_CAMERA_REFLECTED_PLANE | FRT_CAMERA_REFLECTED_WATERPLANE)) { bReflect = true; } Matrix33 orientation = Matrix33(GetCamera().GetMatrix()); Ang3 Angs = CCamera::CreateAnglesYPR(orientation); Vec3 Pos = GetCamera().GetPosition(); bool bNeedUpdate = false; pEnvTex = CTexture::FindSuitableEnvTex(Pos, Angs, false, -1, false, pShader, pRes, pObj, bReflect, pRE, &bNeedUpdate); if (!bNeedUpdate) { if (!pEnvTex) { return false; } if (pEnvTex->m_pTex && pEnvTex->m_pTex->m_pTexture) { return true; } } m_RP.m_TI[nThreadList].m_pIgnoreObject = pObj; switch (CRenderer::CV_r_envtexresolution) { case 0: nWidth = 64; break; case 1: nWidth = 128; break; case 2: default: nWidth = 256; break; case 3: nWidth = 512; break; } nHeight = nWidth; if (!pEnvTex || !pEnvTex->m_pTex) { return false; } if (!pEnvTex->m_pTex->m_pTexture) { pEnvTex->m_pTex->Update(nWidth, nHeight); } Tex = pEnvTex->m_pTex->m_pTexture; } } if (m_pRT->IsRenderThread() && Tex && Tex->IsActiveRenderTarget()) { return true; } // always allow for non-mgpu bool bMGPUAllowNextUpdate = gRenDev->GetActiveGPUCount() == 1; ETEX_Format eTF = pRT->m_eTF; // $HDR if (eTF == eTF_R8G8B8A8 && IsHDRModeEnabled() && m_nHDRType <= 1) { eTF = eTF_R16G16B16A16F; } if (pEnvTex && (!pEnvTex->m_pTex || pEnvTex->m_pTex->GetFormat() != eTF)) { SAFE_DELETE(pEnvTex->m_pTex); char name[128]; sprintf_s(name, "$RT_2D_%d", m_TexGenID++); int flags = FT_NOMIPS | FT_STATE_CLAMP | FT_DONT_STREAM; pEnvTex->m_pTex = new SDynTexture(nWidth, nHeight, eTF, eTT_2D, flags, name); assert(nWidth > 0 && nWidth <= m_d3dsdBackBuffer.Width); assert(nHeight > 0 && nHeight <= m_d3dsdBackBuffer.Height); pEnvTex->m_pTex->Update(nWidth, nHeight); } bool bEnableAnisotropicBlur = true; switch (pRT->m_eUpdateType) { case eRTUpdate_WaterReflect: { if (!CRenderer::CV_r_waterreflections) { assert(pEnvTex != NULL); if (pEnvTex && pEnvTex->m_pTex && pEnvTex->m_pTex->m_pTexture) { m_pRT->RC_ClearTarget(pEnvTex->m_pTex->m_pTexture, Clr_Empty); } return true; } if (m_RP.m_nLastWaterFrameID == GetFrameID()) { // water reflection already created this frame, share it return true; } I3DEngine* eng = (I3DEngine*)gEnv->p3DEngine; int nVisibleWaterPixelsCount = eng->GetOceanVisiblePixelsCount() / 2; // bug in occlusion query returns 2x more int nPixRatioThreshold = (int)(GetWidth() * GetHeight() * CRenderer::CV_r_waterreflections_min_visible_pixels_update); static int nVisWaterPixCountPrev = nVisibleWaterPixelsCount; float fUpdateFactorMul = 1.0f; float fUpdateDistanceMul = 1.0f; if (nVisWaterPixCountPrev < nPixRatioThreshold / 4) { bEnableAnisotropicBlur = false; fUpdateFactorMul = CV_r_waterreflections_minvis_updatefactormul * 10.0f; fUpdateDistanceMul = CV_r_waterreflections_minvis_updatedistancemul * 5.0f; } else if (nVisWaterPixCountPrev < nPixRatioThreshold) { fUpdateFactorMul = CV_r_waterreflections_minvis_updatefactormul; fUpdateDistanceMul = CV_r_waterreflections_minvis_updatedistancemul; } float fWaterUpdateFactor = CV_r_waterupdateFactor * fUpdateFactorMul; float fWaterUpdateDistance = CV_r_waterupdateDistance * fUpdateDistanceMul; float fTimeUpd = min(0.3f, eng->GetDistanceToSectorWithWater()); fTimeUpd *= fWaterUpdateFactor; //if (fTimeUpd > 1.0f) //fTimeUpd = 1.0f; Vec3 camView = m_RP.m_TI[nThreadList].m_cam.m_viewParameters.ViewDir(); Vec3 camUp = m_RP.m_TI[nThreadList].m_cam.m_viewParameters.vY; m_RP.m_nLastWaterFrameID = GetFrameID(); Vec3 camPos = GetCamera().GetPosition(); float fDistCam = (camPos - m_RP.m_LastWaterPosUpdate).GetLength(); float fDotView = camView * m_RP.m_LastWaterViewdirUpdate; float fDotUp = camUp * m_RP.m_LastWaterUpdirUpdate; float fFOV = GetCamera().GetFov(); if (m_RP.m_fLastWaterUpdate - 1.0f > m_RP.m_TI[nThreadList].m_RealTime) { m_RP.m_fLastWaterUpdate = m_RP.m_TI[nThreadList].m_RealTime; } const float fMaxFovDiff = 0.1f; // no exact test to prevent slowly changing fov causing per frame water reflection updates static bool bUpdateReflection = true; if (bMGPUAllowNextUpdate) { bUpdateReflection = m_RP.m_TI[nThreadList].m_RealTime - m_RP.m_fLastWaterUpdate >= fTimeUpd || fDistCam > fWaterUpdateDistance; bUpdateReflection = bUpdateReflection || fDotView<0.9f || fabs(fFOV - m_RP.m_fLastWaterFOVUpdate)>fMaxFovDiff; } if (bUpdateReflection && bMGPUAllowNextUpdate) { m_RP.m_fLastWaterUpdate = m_RP.m_TI[nThreadList].m_RealTime; m_RP.m_LastWaterViewdirUpdate = camView; m_RP.m_LastWaterUpdirUpdate = camUp; m_RP.m_fLastWaterFOVUpdate = fFOV; m_RP.m_LastWaterPosUpdate = camPos; assert(pEnvTex != NULL); pEnvTex->m_pTex->ResetUpdateMask(); } else if (!bUpdateReflection) { assert(pEnvTex != NULL); PREFAST_ASSUME(pEnvTex != NULL); if (pEnvTex && pEnvTex->m_pTex && pEnvTex->m_pTex->IsValid()) { return true; } } assert(pEnvTex != NULL); PREFAST_ASSUME(pEnvTex != NULL); pEnvTex->m_pTex->SetUpdateMask(); } break; } // Just copy current BB to the render target and exit if (nPrFlags & FRT_RENDTYPE_COPYSCENE) { // Get current render target from the RT stack if (!CRenderer::CV_r_debugrefraction) { FX_ScreenStretchRect(Tex); // should encode hdr format } else { assert(Tex != NULL); m_pRT->RC_ClearTarget(Tex, Clr_Debug); } return true; } I3DEngine* eng = (I3DEngine*)gEnv->p3DEngine; Matrix44A matProj, matView; float plane[4]; bool bUseClipPlane = false; bool bChangedCamera = false; int nPersFlags = m_RP.m_TI[nThreadList].m_PersFlags; //int nPersFlags2 = m_RP.m_TI[nThreadList].m_PersFlags2; static CCamera tmp_cam_mgpu = GetCamera(); CCamera tmp_cam = GetCamera(); CCamera prevCamera = tmp_cam; bool bMirror = false; bool bOceanRefl = false; // Set the camera if (nPrFlags & FRT_CAMERA_REFLECTED_WATERPLANE) { bOceanRefl = true; m_RP.m_TI[nThreadList].m_pIgnoreObject = pObj; float fMinDist = min(SKY_BOX_SIZE * 0.5f, eng->GetDistanceToSectorWithWater()); // 16 is half of skybox size float fMaxDist = eng->GetMaxViewDistance(); Vec3 vPrevPos = tmp_cam.GetPosition(); Plane Pl; Pl.n = Vec3(0, 0, 1); Pl.d = OceanToggle::IsActive() ? OceanRequest::GetOceanLevel() : eng->GetWaterLevel(); if ((vPrevPos | Pl.n) - Pl.d < 0) { Pl.d = -Pl.d; Pl.n = -Pl.n; } plane[0] = Pl.n[0]; plane[1] = Pl.n[1]; plane[2] = Pl.n[2]; plane[3] = -Pl.d; Matrix44 camMat; GetModelViewMatrix(camMat.GetData()); Vec3 vPrevDir = Vec3(-camMat(0, 2), -camMat(1, 2), -camMat(2, 2)); Vec3 vPrevUp = Vec3(camMat(0, 1), camMat(1, 1), camMat(2, 1)); Vec3 vNewDir = Pl.MirrorVector(vPrevDir); Vec3 vNewUp = Pl.MirrorVector(vPrevUp); float fDot = vPrevPos.Dot(Pl.n) - Pl.d; Vec3 vNewPos = vPrevPos - Pl.n * 2.0f * fDot; Matrix34 m = sMatrixLookAt(vNewDir, vNewUp, tmp_cam.GetAngles()[2]); // New position + offset along view direction - minimizes projection artefacts m.SetTranslation(vNewPos + Vec3(vNewDir.x, vNewDir.y, 0)); tmp_cam.SetMatrix(m); assert(pEnvTex); PREFAST_ASSUME(pEnvTex); tmp_cam.SetFrustum((int)(pEnvTex->m_pTex->GetWidth() * tmp_cam.GetProjRatio()), pEnvTex->m_pTex->GetHeight(), tmp_cam.GetFov(), fMinDist, fMaxDist); //tmp_cam.GetFarPlane()); // Allow camera update if (bMGPUAllowNextUpdate) { tmp_cam_mgpu = tmp_cam; } SetCamera(tmp_cam_mgpu); bChangedCamera = true; bUseClipPlane = true; bMirror = true; //m_RP.m_TI[nThreadList].m_PersFlags |= RBPF_MIRRORCULL; } else if (nPrFlags & FRT_CAMERA_REFLECTED_PLANE) { // Mirror case m_RP.m_TI[nThreadList].m_pIgnoreObject = pObj; float fMinDist = 0.25f; float fMaxDist = eng->GetMaxViewDistance(); Vec3 vPrevPos = tmp_cam.GetPosition(); Plane Pl; pRE->mfGetPlane(Pl); //Pl.d = -Pl.d; if (pObj) { Matrix44 mat = pObj->m_II.m_Matrix.GetTransposed(); Pl = TransformPlane(mat, Pl); } if ((vPrevPos | Pl.n) - Pl.d < 0) { Pl.d = -Pl.d; Pl.n = -Pl.n; } plane[0] = Pl.n[0]; plane[1] = Pl.n[1]; plane[2] = Pl.n[2]; plane[3] = -Pl.d; //this is the new code to calculate the reflection matrix Matrix44A camMat; GetModelViewMatrix(camMat.GetData()); Vec3 vPrevDir = Vec3(-camMat(0, 2), -camMat(1, 2), -camMat(2, 2)); Vec3 vPrevUp = Vec3(camMat(0, 1), camMat(1, 1), camMat(2, 1)); Vec3 vNewDir = Pl.MirrorVector(vPrevDir); Vec3 vNewUp = Pl.MirrorVector(vPrevUp); float fDot = vPrevPos.Dot(Pl.n) - Pl.d; Vec3 vNewPos = vPrevPos - Pl.n * 2.0f * fDot; Matrix34A m = sMatrixLookAt(vNewDir, vNewUp, tmp_cam.GetAngles()[2]); m.SetTranslation(vNewPos); tmp_cam.SetMatrix(m); assert(Tex); tmp_cam.SetFrustum((int)(Tex->GetWidth() * tmp_cam.GetProjRatio()), Tex->GetHeight(), tmp_cam.GetFov(), fMinDist, fMaxDist); //tmp_cam.GetFarPlane()); bMirror = true; bUseClipPlane = true; SetCamera(tmp_cam); bChangedCamera = true; } else if (((nPrFlags & FRT_CAMERA_CURRENT) || (nPrFlags & FRT_RENDTYPE_CURSCENE)) && pRT->m_eOrder == eRO_PreDraw && !(nPrFlags & FRT_RENDTYPE_CUROBJECT)) { // Always restore stuff after explicitly changing... // get texture surface // Get current render target from the RT stack if (!CRenderer::CV_r_debugrefraction) { FX_ScreenStretchRect(Tex); // should encode hdr format } else { m_pRT->RC_ClearTarget(Tex, Clr_Debug); } m_RP.m_TI[nThreadList].m_pIgnoreObject = pPrevIgn; return true; } bool bRes = true; m_pRT->RC_PushVP(); m_pRT->RC_PushFog(); m_RP.m_TI[nThreadList].m_PersFlags |= RBPF_DRAWTOTEXTURE | RBPF_ENCODE_HDR; if (m_logFileHandle != AZ::IO::InvalidHandle) { Logv(SRendItem::m_RecurseLevel[nThreadList], "*** Set RT for Water reflections ***\n"); } assert(pEnvTex); PREFAST_ASSUME(pEnvTex); m_pRT->RC_SetEnvTexRT(pEnvTex, pRT->m_bTempDepth ? pEnvTex->m_pTex->GetWidth() : -1, pRT->m_bTempDepth ? pEnvTex->m_pTex->GetHeight() : -1, true); m_pRT->RC_ClearTargetsImmediately(1, pRT->m_nFlags, pRT->m_ClearColor, pRT->m_fClearDepth); float fAnisoScale = 1.0f; if (pRT->m_nFlags & FRT_RENDTYPE_CUROBJECT) { CCryNameR& nameTech = pTech->m_NameStr; char newTech[128]; sprintf_s(newTech, "%s_RT", nameTech.c_str()); SShaderTechnique* pT = pShader->mfFindTechnique(newTech); if (!pT) { iLog->Log("Error: CD3D9Renderer::FX_DrawToRenderTarget: Couldn't find technique '%s' in shader '%s'\n", newTech, pShader->GetName()); } else { FX_ObjectChange(pShader, pRes, pObj, pRE); FX_Start(pShader, -1, pRes, pRE); pRE->mfPrepare(false); FX_DrawShader_General(pShader, pT); } } else { if (bMirror) { if (bOceanRefl) { SetCamera(tmp_cam); } m_pRT->RC_SetEnvTexMatrix(pEnvTex); if (bOceanRefl) { SetCamera(tmp_cam_mgpu); } } m_RP.m_TI[nThreadList].m_PersFlags |= RBPF_OBLIQUE_FRUSTUM_CLIPPING | RBPF_MIRRORCAMERA; // | RBPF_MIRRORCULL; ?? Plane p; p.n[0] = plane[0]; p.n[1] = plane[1]; p.n[2] = plane[2]; p.d = plane[3]; // +0.25f; fAnisoScale = plane[3]; fAnisoScale = fabs(fabs(fAnisoScale) - GetCamera().GetPosition().z); m_RP.m_TI[nThreadList].m_bObliqueClipPlane = true; // put clipplane in clipspace.. Matrix44A mView, mProj, mCamProj, mInvCamProj; GetModelViewMatrix(&mView(0, 0)); GetProjectionMatrix(&mProj(0, 0)); mCamProj = mView * mProj; mInvCamProj = mCamProj.GetInverted(); m_RP.m_TI[nThreadList].m_pObliqueClipPlane = TransformPlane2(mInvCamProj, p); int nRenderPassFlags = (gRenDev->m_RP.m_eQuality) ? SRenderingPassInfo::TERRAIN : 0; if (bOceanRefl && OceanToggle::IsActive()) { AZ::OceanEnvironmentBus::Broadcast(&AZ::OceanEnvironmentBus::Events::ApplyReflectRenderFlags, nRenderPassFlags); } else { int nReflQuality = (bOceanRefl) ? (int)CV_r_waterreflections_quality : (int)CV_r_reflections_quality; // set reflection quality setting switch (nReflQuality) { case 1: nRenderPassFlags |= SRenderingPassInfo::ENTITIES; break; case 2: nRenderPassFlags |= SRenderingPassInfo::TERRAIN_DETAIL_MATERIALS | SRenderingPassInfo::ENTITIES; break; case 3: nRenderPassFlags |= SRenderingPassInfo::STATIC_OBJECTS | SRenderingPassInfo::ENTITIES | SRenderingPassInfo::TERRAIN_DETAIL_MATERIALS; break; case 4: case 5: nRenderPassFlags |= SRenderingPassInfo::STATIC_OBJECTS | SRenderingPassInfo::ENTITIES | SRenderingPassInfo::TERRAIN_DETAIL_MATERIALS | SRenderingPassInfo::PARTICLES; break; } } int nRFlags = SHDF_ALLOWHDR | SHDF_NO_DRAWNEAR; eng->RenderSceneReflection(nRFlags, SRenderingPassInfo::CreateRecursivePassRenderingInfo(bOceanRefl ? tmp_cam_mgpu : tmp_cam, nRenderPassFlags)); m_RP.m_TI[nThreadList].m_bObliqueClipPlane = false; m_RP.m_TI[nThreadList].m_PersFlags &= ~RBPF_OBLIQUE_FRUSTUM_CLIPPING; } m_pRT->RC_PopRT(0); bool bUseVeryHiSpecAnisotropicReflections = false; if (OceanToggle::IsActive()) { bool bAnisotropicReflections = false; if (bOceanRefl) { AZ::OceanEnvironmentBus::BroadcastResult(bAnisotropicReflections, &AZ::OceanEnvironmentBus::Events::GetReflectionAnisotropic); } else { bAnisotropicReflections = (int)CV_r_reflections_quality >= 4; } bUseVeryHiSpecAnisotropicReflections = (bAnisotropicReflections && bEnableAnisotropicBlur && Tex && Tex->GetDevTexture()); } else { int nReflQuality = (bOceanRefl) ? (int)CV_r_waterreflections_quality : (int)CV_r_reflections_quality; bUseVeryHiSpecAnisotropicReflections = (nReflQuality >= 4 && bEnableAnisotropicBlur && Tex && Tex->GetDevTexture()); } // Very Hi specs get anisotropic reflections? if (bUseVeryHiSpecAnisotropicReflections) { m_pRT->RC_TexBlurAnisotropicVertical(Tex, fAnisoScale); } if (m_logFileHandle != AZ::IO::InvalidHandle) { Logv(SRendItem::m_RecurseLevel[nThreadList], "*** End RT for Water reflections ***\n"); } // todo: encode hdr format m_RP.m_TI[nThreadList].m_PersFlags = nPersFlags; //m_RP.m_TI[nThreadList].m_PersFlags2 = nPersFlags2; if (bChangedCamera) { SetCamera(prevCamera); } m_pRT->RC_PopVP(); m_pRT->RC_PopFog(); // increase frame id to support multiple recursive draws m_RP.m_TI[nThreadList].m_nFrameID++; m_RP.m_TI[nThreadList].m_pIgnoreObject = pPrevIgn; return bRes; }