//============================================================================= // (C) 2005 ATI Research, Inc., All rights reserved. //============================================================================= // modifications by Crytek GmbH // modifications by Amazon #include #include "CCubeMapProcessor.h" #include #define CP_PI 3.14159265358979323846 namespace ImageProcessing { //------------------------------------------------------------------------------ // D3D cube map face specification // mapping from 3D x,y,z cube map lookup coordinates // to 2D within face u,v coordinates // // --------------------> U direction // | (within-face texture space) // | _____ // | | | // | | +Y | // | _____|_____|_____ _____ // | | | | | | // | | -X | +Z | +X | -Z | // | |_____|_____|_____|_____| // | | | // | | -Y | // | |_____| // | // v V direction // (within-face texture space) //------------------------------------------------------------------------------ //Information about neighbors and how texture coorrdinates change across faces // in ORDER of left, right, top, bottom (e.g. edges corresponding to u=0, // u=1, v=0, v=1 in the 2D coordinate system of the particular face. //Note this currently assumes the D3D cube face ordering and orientation CPCubeMapNeighbor sg_CubeNgh[6][4] = { //XPOS face {{CP_FACE_Z_POS, CP_EDGE_RIGHT }, {CP_FACE_Z_NEG, CP_EDGE_LEFT }, {CP_FACE_Y_POS, CP_EDGE_RIGHT }, {CP_FACE_Y_NEG, CP_EDGE_RIGHT }}, //XNEG face {{CP_FACE_Z_NEG, CP_EDGE_RIGHT }, {CP_FACE_Z_POS, CP_EDGE_LEFT }, {CP_FACE_Y_POS, CP_EDGE_LEFT }, {CP_FACE_Y_NEG, CP_EDGE_LEFT }}, //YPOS face {{CP_FACE_X_NEG, CP_EDGE_TOP }, {CP_FACE_X_POS, CP_EDGE_TOP }, {CP_FACE_Z_NEG, CP_EDGE_TOP }, {CP_FACE_Z_POS, CP_EDGE_TOP }}, //YNEG face {{CP_FACE_X_NEG, CP_EDGE_BOTTOM}, {CP_FACE_X_POS, CP_EDGE_BOTTOM}, {CP_FACE_Z_POS, CP_EDGE_BOTTOM}, {CP_FACE_Z_NEG, CP_EDGE_BOTTOM}}, //ZPOS face {{CP_FACE_X_NEG, CP_EDGE_RIGHT }, {CP_FACE_X_POS, CP_EDGE_LEFT }, {CP_FACE_Y_POS, CP_EDGE_BOTTOM }, {CP_FACE_Y_NEG, CP_EDGE_TOP }}, //ZNEG face {{CP_FACE_X_POS, CP_EDGE_RIGHT }, {CP_FACE_X_NEG, CP_EDGE_LEFT }, {CP_FACE_Y_POS, CP_EDGE_TOP }, {CP_FACE_Y_NEG, CP_EDGE_BOTTOM }} }; //3x2 matrices that map cube map indexing vectors in 3d // (after face selection and divide through by the // _ABSOLUTE VALUE_ of the max coord) // into NVC space //Note this currently assumes the D3D cube face ordering and orientation #define CP_UDIR 0 #define CP_VDIR 1 #define CP_FACEAXIS 2 float sgFace2DMapping[6][3][3] = { //XPOS face {{ 0, 0, -1}, //u towards negative Z { 0, -1, 0}, //v towards negative Y {1, 0, 0}}, //pos X axis //XNEG face {{0, 0, 1}, //u towards positive Z {0, -1, 0}, //v towards negative Y {-1, 0, 0}}, //neg X axis //YPOS face {{1, 0, 0}, //u towards positive X {0, 0, 1}, //v towards positive Z {0, 1 , 0}}, //pos Y axis //YNEG face {{1, 0, 0}, //u towards positive X {0, 0 , -1}, //v towards negative Z {0, -1 , 0}}, //neg Y axis //ZPOS face {{1, 0, 0}, //u towards positive X {0, -1, 0}, //v towards negative Y {0, 0, 1}}, //pos Z axis //ZNEG face {{-1, 0, 0}, //u towards negative X {0, -1, 0}, //v towards negative Y {0, 0, -1}}, //neg Z axis }; //The 12 edges of the cubemap, (entries are used to index into the neighbor table) // this table is used to average over the edges. int32 sg_CubeEdgeList[12][2] = { {CP_FACE_X_POS, CP_EDGE_LEFT}, {CP_FACE_X_POS, CP_EDGE_RIGHT}, {CP_FACE_X_POS, CP_EDGE_TOP}, {CP_FACE_X_POS, CP_EDGE_BOTTOM}, {CP_FACE_X_NEG, CP_EDGE_LEFT}, {CP_FACE_X_NEG, CP_EDGE_RIGHT}, {CP_FACE_X_NEG, CP_EDGE_TOP}, {CP_FACE_X_NEG, CP_EDGE_BOTTOM}, {CP_FACE_Z_POS, CP_EDGE_TOP}, {CP_FACE_Z_POS, CP_EDGE_BOTTOM}, {CP_FACE_Z_NEG, CP_EDGE_TOP}, {CP_FACE_Z_NEG, CP_EDGE_BOTTOM} }; //Information about which of the 8 cube corners are correspond to the // the 4 corners in each cube face // the order is upper left, upper right, lower left, lower right int32 sg_CubeCornerList[6][4] = { { CP_CORNER_PPP, CP_CORNER_PPN, CP_CORNER_PNP, CP_CORNER_PNN }, // XPOS face { CP_CORNER_NPN, CP_CORNER_NPP, CP_CORNER_NNN, CP_CORNER_NNP }, // XNEG face { CP_CORNER_NPN, CP_CORNER_PPN, CP_CORNER_NPP, CP_CORNER_PPP }, // YPOS face { CP_CORNER_NNP, CP_CORNER_PNP, CP_CORNER_NNN, CP_CORNER_PNN }, // YNEG face { CP_CORNER_NPP, CP_CORNER_PPP, CP_CORNER_NNP, CP_CORNER_PNP }, // ZPOS face { CP_CORNER_PPN, CP_CORNER_NPN, CP_CORNER_PNN, CP_CORNER_NNN } // ZNEG face }; //-------------------------------------------------------------------------------------- // Convert cubemap face texel coordinates and face idx to 3D vector // note the U and V coords are integer coords and range from 0 to size-1 // this routine can be used to generate a normalizer cube map //-------------------------------------------------------------------------------------- void TexelCoordToVect(int32 a_FaceIdx, float a_U, float a_V, int32 a_Size, float *a_XYZ) { float nvcU, nvcV; float tempVec[3]; //scale up to [-1, 1] range (inclusive) nvcU = (2.0f * ((float)a_U + 0.5f) / a_Size ) - 1.0f; nvcV = (2.0f * ((float)a_V + 0.5f) / a_Size ) - 1.0f; //generate x,y,z vector (xform 2d NVC coord to 3D vector) //U contribution VM_SCALE3(a_XYZ, sgFace2DMapping[a_FaceIdx][CP_UDIR], nvcU); //V contribution VM_SCALE3(tempVec, sgFace2DMapping[a_FaceIdx][CP_VDIR], nvcV); VM_ADD3(a_XYZ, tempVec, a_XYZ); //add face axis VM_ADD3(a_XYZ, sgFace2DMapping[a_FaceIdx][CP_FACEAXIS], a_XYZ); //normalize vector VM_NORM3(a_XYZ, a_XYZ); } //-------------------------------------------------------------------------------------- // Convert 3D vector to cubemap face texel coordinates and face idx // note the U and V coords are integer coords and range from 0 to size-1 // this routine can be used to generate a normalizer cube map // // returns face IDX and texel coords //-------------------------------------------------------------------------------------- void VectToTexelCoord(float *a_XYZ, int32 a_Size, int32 *a_FaceIdx, int32 *a_U, int32 *a_V ) { float nvcU, nvcV; float absXYZ[3]; float maxCoord; float onFaceXYZ[3]; int32 faceIdx; int32 u, v; //absolute value 3 VM_ABS3(absXYZ, a_XYZ); if( (absXYZ[0] >= absXYZ[1]) && (absXYZ[0] >= absXYZ[2]) ) { maxCoord = absXYZ[0]; if(a_XYZ[0] >= 0) //face = XPOS { faceIdx = CP_FACE_X_POS; } else { faceIdx = CP_FACE_X_NEG; } } else if ( (absXYZ[1] >= absXYZ[0]) && (absXYZ[1] >= absXYZ[2]) ) { maxCoord = absXYZ[1]; if(a_XYZ[1] >= 0) //face = XPOS { faceIdx = CP_FACE_Y_POS; } else { faceIdx = CP_FACE_Y_NEG; } } else // if( (absXYZ[2] > absXYZ[0]) && (absXYZ[2] > absXYZ[1]) ) { maxCoord = absXYZ[2]; if(a_XYZ[2] >= 0) //face = XPOS { faceIdx = CP_FACE_Z_POS; } else { faceIdx = CP_FACE_Z_NEG; } } //divide through by max coord so face vector lies on cube face VM_SCALE3(onFaceXYZ, a_XYZ, 1.0f/maxCoord); nvcU = VM_DOTPROD3(sgFace2DMapping[ faceIdx ][CP_UDIR], onFaceXYZ ); nvcV = VM_DOTPROD3(sgFace2DMapping[ faceIdx ][CP_VDIR], onFaceXYZ ); u = (int32)floor( a_Size * 0.5f * (nvcU + 1.0f) ); v = (int32)floor( a_Size * 0.5f * (nvcV + 1.0f) ); *a_FaceIdx = faceIdx; *a_U = u; *a_V = v; } //-------------------------------------------------------------------------------------- // gets texel ptr in a cube map given a direction vector, and an array of // CImageSurfaces that represent the cube faces. // //-------------------------------------------------------------------------------------- CP_ITYPE *GetCubeMapTexelPtr(float *a_XYZ, CImageSurface *a_Surface) { int32 u, v, faceIdx; //get face idx and u, v texel coordinate in face VectToTexelCoord(a_XYZ, a_Surface[0].m_Width, &faceIdx, &u, &v ); u = VM_MIN(u, a_Surface[0].m_Width - 1); v = VM_MIN(v, a_Surface[0].m_Width - 1); return( a_Surface[faceIdx].GetSurfaceTexelPtr(u, v) ); } //-------------------------------------------------------------------------------------- // Compute solid angle of given texel in cubemap face for weighting taps in the // kernel by the area they project to on the unit sphere. // // Note that this code uses an approximation to the solid angle, by treating the // two triangles that make up the quad comprising the texel as planar. If more // accuracy is required, the solid angle per triangle lying on the sphere can be // computed using the sum of the interior angles - PI. // //-------------------------------------------------------------------------------------- float TexelCoordSolidAngle(int32 a_FaceIdx, float a_U, float a_V, int32 a_Size) { float cornerVect[4][3]; double cornerVect64[4][3]; float halfTexelStep = 0.5f; //note u, and v are in texel coords (where each texel is one unit) double edgeVect0[3]; double edgeVect1[3]; double xProdVect[3]; double texelArea; //compute 4 corner vectors of texel TexelCoordToVect(a_FaceIdx, a_U - halfTexelStep, a_V - halfTexelStep, a_Size, cornerVect[0] ); TexelCoordToVect(a_FaceIdx, a_U - halfTexelStep, a_V + halfTexelStep, a_Size, cornerVect[1] ); TexelCoordToVect(a_FaceIdx, a_U + halfTexelStep, a_V - halfTexelStep, a_Size, cornerVect[2] ); TexelCoordToVect(a_FaceIdx, a_U + halfTexelStep, a_V + halfTexelStep, a_Size, cornerVect[3] ); VM_NORM3_UNTYPED(cornerVect64[0], cornerVect[0] ); VM_NORM3_UNTYPED(cornerVect64[1], cornerVect[1] ); VM_NORM3_UNTYPED(cornerVect64[2], cornerVect[2] ); VM_NORM3_UNTYPED(cornerVect64[3], cornerVect[3] ); //area of triangle defined by corners 0, 1, and 2 VM_SUB3_UNTYPED(edgeVect0, cornerVect64[1], cornerVect64[0] ); VM_SUB3_UNTYPED(edgeVect1, cornerVect64[2], cornerVect64[0] ); VM_XPROD3_UNTYPED(xProdVect, edgeVect0, edgeVect1 ); texelArea = 0.5f * sqrt( VM_DOTPROD3_UNTYPED(xProdVect, xProdVect ) ); //area of triangle defined by corners 1, 2, and 3 VM_SUB3_UNTYPED(edgeVect0, cornerVect64[2], cornerVect64[1] ); VM_SUB3_UNTYPED(edgeVect1, cornerVect64[3], cornerVect64[1] ); VM_XPROD3_UNTYPED(xProdVect, edgeVect0, edgeVect1 ); texelArea += 0.5f * sqrt( VM_DOTPROD3_UNTYPED(xProdVect, xProdVect ) ); return texelArea; } //-------------------------------------------------------------------------------------- //Builds a normalizer cubemap // // Takes in a cube face size, and an array of 6 surfaces to write the cube faces into // // Note that this normalizer cube map stores the vectors in unbiased -1 to 1 range. // if _bx2 style scaled and biased vectors are needed, uncomment the SCALE and BIAS // below //-------------------------------------------------------------------------------------- void CCubeMapProcessor::BuildNormalizerCubemap(int32 a_Size, CImageSurface *a_Surface ) { int32 iCubeFace, u, v; //iterate over cube faces for(iCubeFace=0; iCubeFace<6; iCubeFace++) { a_Surface[iCubeFace].Clear(); a_Surface[iCubeFace].Init(a_Size, a_Size, 3); //fast texture walk, build normalizer cube map CP_ITYPE *texelPtr = a_Surface[iCubeFace].m_ImgData; for(v=0; v < a_Surface[iCubeFace].m_Height; v++) { for(u=0; u < a_Surface[iCubeFace].m_Width; u++) { TexelCoordToVect(iCubeFace, (float)u, (float)v, a_Size, texelPtr); //VM_SCALE3(texelPtr, texelPtr, 0.5f); //VM_BIAS3(texelPtr, texelPtr, 0.5f); texelPtr += a_Surface[iCubeFace].m_NumChannels; } } } } //-------------------------------------------------------------------------------------- //Builds a normalizer cubemap, with the texels solid angle stored in the fourth component // //Takes in a cube face size, and an array of 6 surfaces to write the cube faces into // //Note that this normalizer cube map stores the vectors in unbiased -1 to 1 range. // if _bx2 style scaled and biased vectors are needed, uncomment the SCALE and BIAS // below //-------------------------------------------------------------------------------------- void CCubeMapProcessor::BuildNormalizerSolidAngleCubemap(int32 a_Size, CImageSurface *a_Surface ) { //iterate over cube faces for(int32 iCubeFace=0; iCubeFace<6; iCubeFace++) { a_Surface[iCubeFace].Clear(); a_Surface[iCubeFace].Init(a_Size, a_Size, 4); //First three channels for norm cube, and last channel for solid angle } //iterate over cube faces for(int32 iCubeFace=0; iCubeFace<6; iCubeFace++) { const int32 height = a_Surface[iCubeFace].m_Height; const int32 width = a_Surface[iCubeFace].m_Width; for(int32 v=0; v 0) { neighborFace = sg_CubeNgh[faceIdx][i].m_Face; neighborEdge = sg_CubeNgh[faceIdx][i].m_Edge; //For certain types of edge abutments, the bleedOverBBoxMin, and bleedOverBBoxMax need to // be flipped: the cases are // if a left edge mates with a left or bottom edge on the neighbor // if a top edge mates with a top or right edge on the neighbor // if a right edge mates with a right or top edge on the neighbor // if a bottom edge mates with a bottom or left edge on the neighbor //Seeing as the edges are enumerated as follows // left =0 // right =1 // top =2 // bottom =3 // // so if the edge enums are the same, or the sum of the enums == 3, // the bbox needs to be flipped if( (i == neighborEdge) || ((i+neighborEdge) == 3) ) { bleedOverBBoxMin[i] = (a_SrcSize-1) - bleedOverBBoxMin[i]; bleedOverBBoxMax[i] = (a_SrcSize-1) - bleedOverBBoxMax[i]; } //The way the bounding box is extended onto the neighboring face // depends on which edge of neighboring face abuts with this one switch(sg_CubeNgh[faceIdx][i].m_Edge) { case CP_EDGE_LEFT: a_FilterExtents[neighborFace].Augment(0, bleedOverBBoxMin[i], 0); a_FilterExtents[neighborFace].Augment(bleedOverAmount[i], bleedOverBBoxMax[i], 0); break; case CP_EDGE_RIGHT: a_FilterExtents[neighborFace].Augment( (a_SrcSize-1), bleedOverBBoxMin[i], 0); a_FilterExtents[neighborFace].Augment( (a_SrcSize-1) - bleedOverAmount[i], bleedOverBBoxMax[i], 0); break; case CP_EDGE_TOP: a_FilterExtents[neighborFace].Augment(bleedOverBBoxMin[i], 0, 0); a_FilterExtents[neighborFace].Augment(bleedOverBBoxMax[i], bleedOverAmount[i], 0); break; case CP_EDGE_BOTTOM: a_FilterExtents[neighborFace].Augment(bleedOverBBoxMin[i], (a_SrcSize-1), 0); a_FilterExtents[neighborFace].Augment(bleedOverBBoxMax[i], (a_SrcSize-1) - bleedOverAmount[i], 0); break; } //clamp filter extents in non-center tap faces to remain within surface a_FilterExtents[neighborFace].ClampMin(0, 0, 0); a_FilterExtents[neighborFace].ClampMax(a_SrcSize-1, a_SrcSize-1, 0); } //If the bleed over amount bleeds past the adjacent face onto the opposite face // from the center tap face, then process the opposite face entirely for now. //Note that the cases in which this happens, what usually happens is that // more than one edge bleeds onto the opposite face, and the bounding box // encompasses the entire cube map face. if(bleedOverAmount[i] > a_SrcSize) { uint32 oppositeFaceIdx; //determine opposite face switch(faceIdx) { case CP_FACE_X_POS: oppositeFaceIdx = CP_FACE_X_NEG; break; case CP_FACE_X_NEG: oppositeFaceIdx = CP_FACE_X_POS; break; case CP_FACE_Y_POS: oppositeFaceIdx = CP_FACE_Y_NEG; break; case CP_FACE_Y_NEG: oppositeFaceIdx = CP_FACE_Y_POS; break; case CP_FACE_Z_POS: oppositeFaceIdx = CP_FACE_Z_NEG; break; default: // CP_FACE_Z_NEG: oppositeFaceIdx = CP_FACE_Z_POS; break; } //just encompass entire face for now a_FilterExtents[oppositeFaceIdx].Augment(0, 0, 0); a_FilterExtents[oppositeFaceIdx].Augment((a_SrcSize-1), (a_SrcSize-1), 0); } } minV=minV; } //-------------------------------------------------------------------------------------- //ProcessFilterExtents // Process bounding box in each cube face // //-------------------------------------------------------------------------------------- void CCubeMapProcessor::ProcessFilterExtents(float *a_CenterTapDir, float a_DotProdThresh, CBBoxInt32 *a_FilterExtents, CImageSurface *a_NormCubeMap, CImageSurface *a_SrcCubeMap, CP_ITYPE *a_DstVal, uint32 a_FilterType, bool a_bUseSolidAngleWeighting, float a_SpecularPower) { //accumulators are 64-bit floats in order to have the precision needed // over a summation of a large number of pixels double dstAccumFace[6][4]; double weightAccumFace[6]; const int32 nSrcChannels = a_SrcCubeMap[0].m_NumChannels; //norm cube map and srcCubeMap have same face width const int32 faceWidth = a_NormCubeMap[0].m_Width; //amount to add to pointer to move to next scanline in images const int32 normCubePitch = faceWidth * a_NormCubeMap[0].m_NumChannels; const int32 srcCubePitch = faceWidth * a_SrcCubeMap[0].m_NumChannels; //iterate over cubefaces for(int32 iFaceIdx=0; iFaceIdx<6; iFaceIdx++ ) { //dest accum for(int32 k=0; k= a_DotProdThresh ) { CP_ITYPE weight; //for now just weight all taps equally, but ideally // weight should be proportional to the solid angle of the tap if(a_bUseSolidAngleWeighting == true) { //solid angle stored in 4th channel of normalizer/solid angle cube map weight = *(texelVect+3); } else { //all taps equally weighted weight = 1.0f; } switch(a_FilterType) { case CP_FILTER_TYPE_COSINE_POWER: { if(tapDotProd > 0.0f) { weight *= pow(tapDotProd, a_SpecularPower) * tapDotProd; } else { weight = 0; } } break; case CP_FILTER_TYPE_CONE: case CP_FILTER_TYPE_ANGULAR_GAUSSIAN: { //weights are in same lookup table for both of these filter types weight *= m_FilterLUT[(int32)(tapDotProd * (m_NumFilterLUTEntries - 1))]; } break; case CP_FILTER_TYPE_COSINE: { if(tapDotProd > 0.0f) { weight *= tapDotProd; } else { weight = 0.0f; } } break; case CP_FILTER_TYPE_DISC: default: break; } //iterate over channels for(int32 k=0; k>= 1; m_NumMipLevels++; //terminate if mip chain becomes too small if(mipLevelSize == 0) { return; } } } //-------------------------------------------------------------------------------------- //Copy and convert cube map face data from an external image/surface into this object // // a_FaceIdx = a value 0 to 5 speciying which face to copy into (one of the CP_FACE_? ) // a_Level = mip level to copy into // a_SrcType = data type of image being copyed from (one of the CP_TYPE_? types) // a_SrcNumChannels = number of channels of the image being copied from (usually 1 to 4) // a_SrcPitch = number of bytes per row of the source image being copied from // a_SrcDataPtr = pointer to the image data to copy from // a_Degamma = original gamma level of input image to undo by degamma // a_Scale = scale to apply to pixel values after degamma (in linear space) //-------------------------------------------------------------------------------------- void CCubeMapProcessor::SetInputFaceData(int32 a_FaceIdx, int32 a_SrcType, int32 a_SrcNumChannels, int32 a_SrcPitch, void *a_SrcDataPtr, float a_MaxClamp, float a_Degamma, float a_Scale) { //since input is being modified, terminate any active filtering threads TerminateActiveThreads(); m_InputSurface[a_FaceIdx].SetImageDataClampDegammaScale( a_SrcType, a_SrcNumChannels, a_SrcPitch, a_SrcDataPtr, a_MaxClamp, a_Degamma, a_Scale ); } //-------------------------------------------------------------------------------------- //Copy and convert cube map face data from this object into an external image/surface // // a_FaceIdx = a value 0 to 5 speciying which face to copy into (one of the CP_FACE_? ) // a_Level = mip level to copy into // a_DstType = data type of image to copy to (one of the CP_TYPE_? types) // a_DstNumChannels = number of channels of the image to copy to (usually 1 to 4) // a_DstPitch = number of bytes per row of the dest image to copy to // a_DstDataPtr = pointer to the image data to copy to // a_Scale = scale to apply to pixel values (in linear space) before gamma for output // a_Gamma = gamma level to apply to pixels after scaling //-------------------------------------------------------------------------------------- void CCubeMapProcessor::GetInputFaceData(int32 a_FaceIdx, int32 a_DstType, int32 a_DstNumChannels, int32 a_DstPitch, void *a_DstDataPtr, float a_Scale, float a_Gamma) { m_InputSurface[a_FaceIdx].GetImageDataScaleGamma( a_DstType, a_DstNumChannels, a_DstPitch, a_DstDataPtr, a_Scale, a_Gamma ); } //-------------------------------------------------------------------------------------- //ChannelSwapInputFaceData // swizzle data in first 4 channels for input faces // //-------------------------------------------------------------------------------------- void CCubeMapProcessor::ChannelSwapInputFaceData(int32 a_Channel0Src, int32 a_Channel1Src, int32 a_Channel2Src, int32 a_Channel3Src ) { int32 iFace, u, v, k; int32 size; CP_ITYPE texelData[4]; int32 channelSrcArray[4]; //since input is being modified, terminate any active filtering threads TerminateActiveThreads(); size = m_InputSize; channelSrcArray[0] = a_Channel0Src; channelSrcArray[1] = a_Channel1Src; channelSrcArray[2] = a_Channel2Src; channelSrcArray[3] = a_Channel3Src; //Iterate over faces for input images for(iFace=0; iFace<6; iFace++) { for(v=0; v> 16u); bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); return float(bits) * 2.3283064365386963e-10; // float(bits) * 2^-32 } inline void HammersleySequence(uint32 sampleIndex, uint32 sampleCount, float* vXi) { vXi[0] = float(sampleIndex) / float(sampleCount); vXi[1] = RadicalInverse2(sampleIndex); } void ImportanceSampleGGX(float* vXi, float roughness, float* vNormal, float* vOut) { float phi = 2 * CP_PI * vXi[0]; float cosTheta = sqrtf((1 - vXi[1]) / ( 1 + (roughness * roughness - 1) * vXi[1])); float sinTheta = sqrtf(1 - cosTheta * cosTheta); float vH[3]; vH[0] = sinTheta * cosf(phi); vH[1] = sinTheta * sinf(phi); vH[2] = cosTheta; float vUpVectorX[3] = {1, 0, 0}; float vUpVectorZ[3] = {0, 0, 1}; float vTangentX[3]; float vTangentY[3]; float vTempVec[3]; // Build local frame VM_XPROD3(vTempVec, fabs(vNormal[2]) < 0.999f ? vUpVectorZ : vUpVectorX, vNormal); VM_NORM3(vTangentX, vTempVec); VM_XPROD3(vTangentY, vNormal, vTangentX); // Convert from tangent to world space vOut[0] = vTangentX[0] * vH[0] + vTangentY[0] * vH[1] + vNormal[0] * vH[2]; vOut[1] = vTangentX[1] * vH[0] + vTangentY[1] * vH[1] + vNormal[1] * vH[2]; vOut[2] = vTangentX[2] * vH[0] + vTangentY[2] * vH[1] + vNormal[2] * vH[2]; } void CCubeMapProcessor::FilterCubeSurfacesGGX(CImageSurface *a_SrcCubeMap, CImageSurface *a_DstCubeMap, int32 a_SampleCount, float a_Roughness, int32 a_FaceIdxStart, int32 a_FaceIdxEnd, int32 a_ThreadIdx) { const uint32 numChannels = VM_MIN(m_NumChannels, 4); const int32 dstSize = a_DstCubeMap[0].m_Width; //thread progress m_ThreadProgress[a_ThreadIdx].m_StartFace = a_FaceIdxStart; m_ThreadProgress[a_ThreadIdx].m_EndFace = a_FaceIdxEnd; //process required faces for(int32 iCubeFace = a_FaceIdxStart; iCubeFace <= a_FaceIdxEnd && !m_shutdownWorkerThreadSignal; iCubeFace++) { //iterate over dst cube map face texel for(int32 v = 0; v < dstSize && !m_shutdownWorkerThreadSignal; v++) { CP_ITYPE *texelPtr = a_DstCubeMap[iCubeFace].m_ImgData + v * a_DstCubeMap[iCubeFace].m_NumChannels * dstSize; m_ThreadProgress[a_ThreadIdx].m_CurrentFace = iCubeFace; m_ThreadProgress[a_ThreadIdx].m_CurrentRow = v; for (int32 u = 0; u < dstSize && !m_shutdownWorkerThreadSignal; u++) { float color[4] = { 0 }; float totalWeight = 0; float vH[3]; float vL[3]; // Assume normal and view vector to be vCenterTapDir float vCenterTapDir[3]; TexelCoordToVect(iCubeFace, (float)u, (float)v, dstSize, vCenterTapDir); for (uint32 i = 0; i < (uint32)a_SampleCount && !m_shutdownWorkerThreadSignal; i++) { float vXi[2]; HammersleySequence(i, a_SampleCount, vXi); ImportanceSampleGGX(vXi, a_Roughness, vCenterTapDir, vH); float fVdotH = VM_DOTPROD3(vCenterTapDir, vH); vL[0] = 2 * fVdotH * vH[0] - vCenterTapDir[0]; vL[1] = 2 * fVdotH * vH[1] - vCenterTapDir[1]; vL[2] = 2 * fVdotH * vH[2] - vCenterTapDir[2]; float fNdotL = VM_DOTPROD3(vCenterTapDir, vL); if (fNdotL > 0) { CP_ITYPE *sourceTexel = GetCubeMapTexelPtr(vL, a_SrcCubeMap); for (uint32 k = 0; k < numChannels; k++) { color[k] += sourceTexel[k] * fNdotL; } totalWeight += fNdotL; } } for (uint32 k = 0; k < numChannels; k++) { texelPtr[k] = color[k] / totalWeight; } texelPtr += a_DstCubeMap[iCubeFace].m_NumChannels; } } } } void CCubeMapProcessor::FilterCubeMapMipChain(float a_BaseFilterAngle, float a_InitialMipAngle, float a_MipAnglePerLevelScale, int32 a_FilterType, int32 a_FixupType, int32 a_FixupWidth, bool a_bUseSolidAngle, float a_GlossScale, float a_GlossBias, int32 a_SampleCountGGX) { int32 i; float coneAngle; if(a_FilterType == CP_FILTER_TYPE_COSINE_POWER || a_FilterType == CP_FILTER_TYPE_GGX) { // Don't filter top mipmap a_BaseFilterAngle = 0; } //Build filter lookup tables based on the source miplevel size PrecomputeFilterLookupTables(a_FilterType, m_InputSurface[0].m_Width, a_BaseFilterAngle); //initialize thread progress m_ThreadProgress[0].m_CurrentMipLevel = 0; m_ThreadProgress[0].m_CurrentRow = 0; m_ThreadProgress[0].m_CurrentFace = 0; //Filter the top mip level (initial filtering used for diffuse or blurred specular lighting ) FilterCubeSurfaces(m_InputSurface, m_OutputSurface[0], a_BaseFilterAngle, a_FilterType, a_bUseSolidAngle, 0, //start at face 0 5, //end at face 5 0); //thread 0 is processing m_ThreadProgress[0].m_CurrentMipLevel = 1; m_ThreadProgress[0].m_CurrentRow = 0; m_ThreadProgress[0].m_CurrentFace = 0; FixupCubeEdges(m_OutputSurface[0], a_FixupType, a_FixupWidth); //Cone angle start (for generating subsequent mip levels) coneAngle = a_InitialMipAngle; //generate subsequent mip levels for(i=0; i<(m_NumMipLevels-1) && !m_shutdownWorkerThreadSignal; i++) { m_ThreadProgress[0].m_CurrentMipLevel = i+1; m_ThreadProgress[0].m_CurrentRow = 0; m_ThreadProgress[0].m_CurrentFace = 0; CImageSurface* srcCubeImage = m_OutputSurface[i]; if (a_FilterType == CP_FILTER_TYPE_GGX) { uint32 numUsableMips = m_NumMipLevels - 2; // Lowest used mip is 4x4 float smoothness = VM_MAX(1.0f - (float)(i + 1) / (float)(numUsableMips - 1), 0.0f); // Convert smoothness to roughness (needs to match shader code) float roughness = (1.0f - smoothness) * (1.0f - smoothness); FilterCubeSurfacesGGX(srcCubeImage, m_OutputSurface[i+1], a_SampleCountGGX, roughness, 0, //start at face 0 5, //end at face 5 0 //thread 0 is processing ); } else { float specPow = 1.0f; if(a_FilterType == CP_FILTER_TYPE_COSINE_POWER) { uint32 numMipsForGloss = m_NumMipLevels - 2; // Lowest used mip is 4x4 float gloss = VM_MAX(1.0f - (float)(i + 1) / (float)(numMipsForGloss - 1), 0.0f); // Compute specular power (this must match shader code) specPow = pow(2.0f, a_GlossScale * gloss + a_GlossBias); // Blinn to Phong approximation: (R.E)^p == (N.H)^(4*p) specPow /= 4.0f; coneAngle = ComputeBaseFilterAngle(specPow); srcCubeImage = m_InputSurface; } //Build filter lookup tables based on the source miplevel size PrecomputeFilterLookupTables(a_FilterType, srcCubeImage->m_Width, coneAngle); //filter cube surfaces FilterCubeSurfaces(srcCubeImage, m_OutputSurface[i+1], coneAngle, a_FilterType, a_bUseSolidAngle, 0, //start at face 0 5, //end at face 5 0, //thread 0 is processing specPow); } m_ThreadProgress[0].m_CurrentMipLevel = i+2; m_ThreadProgress[0].m_CurrentRow = 0; m_ThreadProgress[0].m_CurrentFace = 0; FixupCubeEdges(m_OutputSurface[i+1], a_FixupType, a_FixupWidth); coneAngle = coneAngle * a_MipAnglePerLevelScale; } m_Status = CP_STATUS_FILTER_COMPLETED; } //-------------------------------------------------------------------------------------- //Builds the following lookup tables prior to filtering: // -normalizer cube map // -tap weight lookup table // //-------------------------------------------------------------------------------------- void CCubeMapProcessor::PrecomputeFilterLookupTables(uint32 a_FilterType, int32 a_SrcCubeMapWidth, float a_FilterConeAngle) { float srcTexelAngle; int32 iCubeFace; //angle about center tap that defines filter cone float filterAngle; //min angle a src texel can cover (in degrees) srcTexelAngle = (180.0f / (float)CP_PI) * atan2f(1.0f, (float)a_SrcCubeMapWidth); //filter angle is 1/2 the cone angle filterAngle = a_FilterConeAngle / 2.0f; //ensure filter angle is larger than a texel if(filterAngle < srcTexelAngle) { filterAngle = srcTexelAngle; } //ensure filter cone is always smaller than the hemisphere if(filterAngle > 90.0f) { filterAngle = 90.0f; } //build lookup table for tap weights based on angle between current tap and center tap BuildAngleWeightLUT(a_SrcCubeMapWidth * 2, a_FilterType, filterAngle); //clear pre-existing normalizer cube map for(iCubeFace=0; iCubeFace<6; iCubeFace++) { m_NormCubeMap[iCubeFace].Clear(); } //Normalized vectors per cubeface and per-texel solid angle BuildNormalizerSolidAngleCubemap(a_SrcCubeMapWidth, m_NormCubeMap); } //-------------------------------------------------------------------------------------- //The key to the speed of these filtering routines is to quickly define a per-face // bounding box of pixels which enclose all the taps in the filter kernel efficiently. // Later these pixels are selectively processed based on their dot products to see if // they reside within the filtering cone. // //This is done by computing the smallest per-texel angle to get a conservative estimate // of the number of texels needed to be covered in width and height order to filter the // region. the bounding box for the center taps face is defined first, and if the // filtereing region bleeds onto the other faces, bounding boxes for the other faces are // defined next //-------------------------------------------------------------------------------------- void CCubeMapProcessor::FilterCubeSurfaces(CImageSurface *a_SrcCubeMap, CImageSurface *a_DstCubeMap, float a_FilterConeAngle, int32 a_FilterType, bool a_bUseSolidAngle, int32 a_FaceIdxStart, int32 a_FaceIdxEnd, int32 a_ThreadIdx, float a_SpecularPower) { const int32 srcSize = a_SrcCubeMap[0].m_Width; const int32 dstSize = a_DstCubeMap[0].m_Width; //min angle a src texel can cover (in degrees) const float srcTexelAngle = (180.0f / (float)CP_PI) * atan2f(1.0f, (float)srcSize); //angle about center tap to define filter cone float filterAngle; //filter angle is 1/2 the cone angle filterAngle = a_FilterConeAngle / 2.0f; //ensure filter angle is larger than a texel if(filterAngle < srcTexelAngle) { filterAngle = srcTexelAngle; } //ensure filter cone is always smaller than the hemisphere if(filterAngle > 90.0f) { filterAngle = 90.0f; } //the maximum number of texels in 1D the filter cone angle will cover // used to determine bounding box size for filter extents //ensure conservative region always covers at least one texel const int32 filterSize = AZ::GetMax((int32)ceil(filterAngle / srcTexelAngle), 1); //dotProdThresh threshold based on cone angle to determine whether or not taps // reside within the cone angle const float dotProdThresh = cosf( ((float)CP_PI / 180.0f) * filterAngle ); //thread progress m_ThreadProgress[a_ThreadIdx].m_StartFace = a_FaceIdxStart; m_ThreadProgress[a_ThreadIdx].m_EndFace = a_FaceIdxEnd; //process required faces for(int32 iCubeFace = a_FaceIdxStart; iCubeFace <= a_FaceIdxEnd && !m_shutdownWorkerThreadSignal; iCubeFace++) { //iterate over dst cube map face texel for(int32 v = 0; v < dstSize && !m_shutdownWorkerThreadSignal; v++) { CP_ITYPE *texelPtr = a_DstCubeMap[iCubeFace].m_ImgData + v * a_DstCubeMap[iCubeFace].m_NumChannels * dstSize; m_ThreadProgress[a_ThreadIdx].m_CurrentFace = iCubeFace; m_ThreadProgress[a_ThreadIdx].m_CurrentRow = v; for(int32 u=0; u 0.0f) { totalMipComputation = pow(m_InputSize * m_BaseFilterAngle , 2.0f) * (m_OutputSize * m_OutputSize); } else { totalMipComputation = pow(m_InputSize * 0.01f , 2.0f) * (m_OutputSize * m_OutputSize); } progressMipComputation = 0.0f; if(a_FilterProgress->m_CurrentMipLevel > 0) { progressMipComputation = totalMipComputation; } //filtering angle for this miplevel filterAngle = m_InitialMipAngle; dstSize = m_OutputSize; //computation for entire base mip level (if current level is base level) if(a_FilterProgress->m_CurrentMipLevel == 0) { currentMipComputation = totalMipComputation; currentMipSize = dstSize; } //compuatation to generate subsequent mip levels for(i=1; i 180) { filterAngle = 180; } //note src size is dstSize*2 since miplevels are generated from the subsequent level computation = pow(dstSize * 2 * filterAngle, 2.0f) * (dstSize * dstSize); totalMipComputation += computation; //accumulate computation for completed mip levels if(a_FilterProgress->m_CurrentMipLevel > i) { progressMipComputation = totalMipComputation; } //computation for entire current mip level if(a_FilterProgress->m_CurrentMipLevel == i) { currentMipComputation = computation; currentMipSize = dstSize; } } //fraction of compuation time processing the entire current mip level will take currentMipComputation /= totalMipComputation; progressMipComputation /= totalMipComputation; progressFaceComputation = currentMipComputation * (float)(a_FilterProgress->m_CurrentFace - a_FilterProgress->m_StartFace) / (float)(1 + a_FilterProgress->m_EndFace - a_FilterProgress->m_StartFace); currentFaceComputation = currentMipComputation * 1.0f / (1 + a_FilterProgress->m_EndFace - a_FilterProgress->m_StartFace); progressRowComputation = currentFaceComputation * ((float)a_FilterProgress->m_CurrentRow / (float)currentMipSize); //progress completed a_FilterProgress->m_FractionCompleted = progressMipComputation + progressFaceComputation + progressRowComputation; if( a_FilterProgress->m_CurrentFace < 0) { a_FilterProgress->m_CurrentFace = 0; } if( a_FilterProgress->m_CurrentMipLevel < 0) { a_FilterProgress->m_CurrentMipLevel = 0; } if( a_FilterProgress->m_CurrentRow < 0) { a_FilterProgress->m_CurrentRow = 0; } } //-------------------------------------------------------------------------------------- // Return string describing the current status of the cubemap processing threads // //-------------------------------------------------------------------------------------- WCHAR *CCubeMapProcessor::GetFilterProgressString(void) { WCHAR threadProgressString[CP_MAX_FILTER_THREADS][CP_MAX_PROGRESS_STRING]; int32 i; for(i=0; i