/* * All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or * its licensors. * * For complete copyright and license terms please see the LICENSE at the root of this * distribution (the "License"). All use of this software is governed by the License, * or, if provided, by the license below or the license accompanying this file. Do not * remove or modify any license notices. This file is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * */ // Original file Copyright Crytek GMBH or its affiliates, used under license. // Description : Predictors for index frame compression #ifndef CRYINCLUDE_CRY3DENGINE_GEOMCACHEPREDICTORS_H #define CRYINCLUDE_CRY3DENGINE_GEOMCACHEPREDICTORS_H #pragma once #include "GeomCacheFileFormat.h" #include "Cry3DEngineTraits.h" namespace GeomCachePredictors { ////////////////////////////////////////////////////////////////////////// // Index frame prediction ////////////////////////////////////////////////////////////////////////// template void ParallelogramPredictor(const uint numValues, T* pIn, T* pOut, const std::vector& predictorData) { T* pAbsoluteValues = kbEncode ? pIn : pOut; uint outPosition = 0; for (uint i = 0, predictorDataPos = 0; i < numValues; ++i) { const uint16 uDist = predictorData[predictorDataPos++]; T predictedValue; if (uDist == 0xFFFF) { if (i == 0) { // There is no previous value, so we just pass through pOut[outPosition++] = pIn[i]; continue; } // No neighbour triangle, just use previous value for prediction predictedValue = pAbsoluteValues[i - 1]; } else { // Parallelogram prediction const uint16 vDist = predictorData[predictorDataPos++]; const uint16 wDist = predictorData[predictorDataPos++]; const T& u = pAbsoluteValues[i - uDist]; const T& v = pAbsoluteValues[i - vDist]; const T& w = pAbsoluteValues[i - wDist]; predictedValue = u + v - w; } if (kbEncode) { const T realValue = pIn[i]; const T delta = realValue - predictedValue; pOut[outPosition++] = delta; } else { const T delta = pIn[i]; const T realValue = delta + predictedValue; pOut[outPosition++] = realValue; } } } template void QTangentPredictor(const uint numValues, const GeomCacheFile::QTangent* pIn, GeomCacheFile::QTangent* pOut, const std::vector& predictorData) { const GeomCacheFile::QTangent* pAbsoluteValues = kbEncode ? pIn : pOut; uint outPosition = 0; for (uint i = 0, predictorDataPos = 0; i < numValues; ++i) { const uint16 uDist = predictorData[predictorDataPos++]; Vec4_tpl predictedValue; if (uDist == 0xFFFF) { if (i == 0) { // There is no previous value, so we just pass through pOut[outPosition++] = pIn[i]; continue; } // No neighbour triangle, just use previous value for prediction predictedValue = pAbsoluteValues[i - 1]; } else { // Average value of two nearest vertices of adjancent triangle const uint16 vDist = predictorData[predictorDataPos++]; ++predictorDataPos; const GeomCacheFile::QTangent& u = pAbsoluteValues[i - uDist]; const GeomCacheFile::QTangent& v = pAbsoluteValues[i - vDist]; predictedValue = Vec4_tpl(u) + Vec4_tpl(v); // Vec4_tpl defines division in a way that only works for floats predictedValue.x /= 2; predictedValue.y /= 2; predictedValue.z /= 2; predictedValue.w /= 2; } if (kbEncode) { const GeomCacheFile::QTangent& realValue = pIn[i]; const GeomCacheFile::QTangent delta = realValue - predictedValue; pOut[outPosition++] = delta; } else { const GeomCacheFile::QTangent delta = pIn[i]; const GeomCacheFile::QTangent realValue = delta + predictedValue; pOut[outPosition++] = realValue; } } } template inline void ColorPredictor(const uint numValues, const GeomCacheFile::Color* pIn, GeomCacheFile::Color* pOut, const std::vector& predictorData) { const GeomCacheFile::Color* pAbsoluteValues = kbEncode ? pIn : pOut; uint outPosition = 0; for (uint i = 0, predictorDataPos = 0; i < numValues; ++i) { const uint16 uDist = predictorData[predictorDataPos++]; int32 predictedValue; if (uDist == 0xFFFF) { if (i == 0) { // There is no previous value, so we just pass through pOut[outPosition++] = pIn[i]; continue; } // No neighbour triangle, just use previous value for prediction predictedValue = pAbsoluteValues[i - 1]; } else { // Average value of two nearest vertices of adjancent triangle const uint16 vDist = predictorData[predictorDataPos++]; ++predictorDataPos; const GeomCacheFile::Color& u = pAbsoluteValues[i - uDist]; const GeomCacheFile::Color& v = pAbsoluteValues[i - vDist]; predictedValue = (int32(u) + int32(v)) / 2; } if (kbEncode) { const GeomCacheFile::Color& realValue = pIn[i]; const GeomCacheFile::Color delta = realValue - predictedValue; pOut[outPosition++] = delta; } else { const GeomCacheFile::Color delta = pIn[i]; const GeomCacheFile::Color realValue = delta + predictedValue; pOut[outPosition++] = realValue; } } } ////////////////////////////////////////////////////////////////////////// // Temporal prediction ////////////////////////////////////////////////////////////////////////// // Motion predictor input data template struct STemporalPredictorData { uint m_numElements; const T* m_pPrevFrames[2]; const T* m_pFloorFrame; const T* m_pCeilFrame; }; template Vec2_tpl operator>>(const Vec2_tpl& v, uint shift) { Vec2_tpl result = v; result.x >>= shift; result.y >>= shift; return result; } template Vec3_tpl operator>>(const Vec3_tpl& v, uint shift) { Vec3_tpl result = v; result.x >>= shift; result.y >>= shift; result.z >>= shift; return result; } template Vec4_tpl operator>>(const Vec4_tpl& v, uint shift) { Vec4_tpl result = v; result.x >>= shift; result.y >>= shift; result.z >>= shift; result.w >>= shift; return result; } template void InterpolateDeltaEncode(const uint numValues, const uint8 lerpFactor, const T* pFloorFrame, const T* pCeilFrame, const T* pIn, T* pOut) { for (uint i = 0; i < numValues; ++i) { const I floorValue = I(pFloorFrame[i]); const I ceilValue = I(pCeilFrame[i]); const T predictedValue = T(floorValue + (((ceilValue - floorValue) * lerpFactor) >> 8)); const T& realValue = pIn[i]; const T delta = realValue - predictedValue; pOut[i] = delta; } } template void MotionDeltaEncode(const uint numValues, const uint8 acceleration, const T* const pPrevFrames[2], const T* pIn, T* pOut) { for (uint i = 0; i < numValues; ++i) { const I prevPrevFrameValue = I(pPrevFrames[0][i]); const I prevFrameValue = I(pPrevFrames[1][i]); const T predictedValue = T(prevFrameValue + (((prevFrameValue - prevPrevFrameValue) * acceleration) >> 7)); const T& realValue = pIn[i]; const T delta = realValue - predictedValue; pOut[i] = delta; } } template void InterpolateMotionDeltaPredictor(const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData& data, const T* pIn, T* pOut) { const T* pFloorFrame = data.m_pFloorFrame; const T* const pCeilFrame = data.m_pCeilFrame; const T* const* pPrevFrames = data.m_pPrevFrames; const uint8& lerpFactor = controlIn.m_indexFrameLerpFactor; const uint8& acceleration = controlIn.m_acceleration; const uint8 combineFactor = controlIn.m_combineFactor; const uint numElements = data.m_numElements; for (uint i = 0; i < numElements; ++i) { const I prevPrevFrameValue = I(pPrevFrames[0][i]); const I prevFrameValue = I(pPrevFrames[1][i]); const I floorValue = I(pFloorFrame[i]); const I ceilValue = I(pCeilFrame[i]); const I interpolatePredictedValue = T(floorValue + (((ceilValue - floorValue) * lerpFactor) >> 8)); const I motionPredictedValue = T(prevFrameValue + (((prevFrameValue - prevPrevFrameValue) * acceleration) >> 7)); const T predictedValue = (interpolatePredictedValue + (((motionPredictedValue - interpolatePredictedValue) * combineFactor) >> 7)); if (kbEncode) { const T realValue = pIn[i]; const T delta = realValue - predictedValue; pOut[i] = delta; } else { const T delta = pIn[i]; const T realValue = delta + predictedValue; pOut[i] = realValue; } } } #if AZ_LEGACY_3DENGINE_TRAIT_DEFINE_MM_MULLO_EPI32_EMU ILINE __m128i _mm_mullo_epi32_emu(const __m128i& a, const __m128i& b) { #if AZ_LEGACY_3DENGINE_TRAIT_HAS_MM_MULLO_EPI32 return _mm_mullo_epi32(a, b); #else __m128i tmp1 = _mm_mul_epu32(a, b); __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a, 4), _mm_srli_si128(b, 4)); return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); #endif } ILINE __m128i _mm_packus_epi32_emu(__m128i& a, __m128i& b) { #if AZ_LEGACY_3DENGINE_TRAIT_HAS_MM_PACKUS_EPI32 return _mm_packus_epi32(a, b); #else a = _mm_slli_epi32(a, 16); b = _mm_slli_epi32(b, 16); a = _mm_srai_epi32(a, 16); b = _mm_srai_epi32(b, 16); return _mm_packs_epi32(a, b); #endif } ILINE __m128i Interpolate(__m128i a, __m128i b, __m128i c, const uint32 factor, const int shiftFactor) { const __m128i zero = _mm_setzero_si128(); const __m128i truncate = _mm_set_epi16(0, -1, 0, -1, 0, -1, 0, -1); // Unpack to 2x4 32 bit integers __m128i factors = _mm_set1_epi32(factor); __m128i aLo = _mm_unpacklo_epi16(a, zero); __m128i aHi = _mm_unpackhi_epi16(a, zero); __m128i bLo = _mm_unpacklo_epi16(b, zero); __m128i bHi = _mm_unpackhi_epi16(b, zero); // Interpolate and pack again __m128i lerpLo = _mm_sub_epi32(bLo, aLo); lerpLo = _mm_mullo_epi32_emu(lerpLo, factors); lerpLo = _mm_srli_epi32(lerpLo, shiftFactor); lerpLo = _mm_and_si128(lerpLo, truncate); __m128i lerpHi = _mm_sub_epi32(bHi, aHi); lerpHi = _mm_mullo_epi32_emu(lerpHi, factors); lerpHi = _mm_srli_epi32(lerpHi, shiftFactor); lerpHi = _mm_and_si128(lerpHi, truncate); __m128i lerp = _mm_packus_epi32_emu(lerpLo, lerpHi); __m128i result = _mm_add_epi16(lerp, c); return result; } template<> void InterpolateMotionDeltaPredictor (const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData& data, const uint16* pIn, uint16* pOut) { __m128i* pRawIn = (__m128i*)pIn; __m128i* pRawOut = (__m128i*)pOut; __m128i* pFloorFrame = (__m128i*)data.m_pFloorFrame; __m128i* pCeilFrame = (__m128i*)data.m_pCeilFrame; __m128i* pPrevFrames[2] = { (__m128i*)data.m_pPrevFrames[0], (__m128i*)data.m_pPrevFrames[1] }; const uint8 lerpFactor = controlIn.m_indexFrameLerpFactor; const uint8 acceleration = controlIn.m_acceleration; const uint8 combineFactor = controlIn.m_combineFactor; // vector store as much as possible, but account for cases where the output buffer // size doesn't divide evenly by 8 const uint remainingElements = data.m_numElements % 8; const uint numElementsPadded = data.m_numElements / 8 + (remainingElements != 0); const uint lastElement = numElementsPadded - 1; for (uint i = 0; i < numElementsPadded; ++i) { // Load 8 floor & ceil values __m128i floorValues = _mm_load_si128(pFloorFrame + i); __m128i ceilValues = _mm_load_si128(pCeilFrame + i); // Load 8 prep prev & prev frame values __m128i prevPrevFrameValues = _mm_load_si128(pPrevFrames[0] + i); __m128i prevFrameValues = _mm_load_si128(pPrevFrames[1] + i); // Calculate prediction __m128i lerp = Interpolate(floorValues, ceilValues, floorValues, lerpFactor, 8); __m128i motion = Interpolate(prevPrevFrameValues, prevFrameValues, prevFrameValues, acceleration, 7); __m128i predictedValues = Interpolate(lerp, motion, lerp, combineFactor, 7); __m128i delta = _mm_load_si128(pRawIn + i); __m128i realValues = _mm_add_epi16(delta, predictedValues); if (i == lastElement) { memcpy(pOut + (i * 8), &realValues, remainingElements * sizeof(uint16)); } else { _mm_store_si128(pRawOut + i, realValues); } } } template<> void InterpolateMotionDeltaPredictor, Vec2_tpl, false> (const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData >& data, const Vec2_tpl* pIn, Vec2_tpl* pOut) { STemporalPredictorData uInt16Data; uInt16Data.m_pFloorFrame = (uint16*)data.m_pFloorFrame; uInt16Data.m_pCeilFrame = (uint16*)data.m_pCeilFrame; uInt16Data.m_pPrevFrames[0] = (uint16*)data.m_pPrevFrames[0]; uInt16Data.m_pPrevFrames[1] = (uint16*)data.m_pPrevFrames[1]; uInt16Data.m_numElements = data.m_numElements * 2; InterpolateMotionDeltaPredictor(controlIn, uInt16Data, (uint16*)pIn, (uint16*)pOut); } template<> void InterpolateMotionDeltaPredictor, Vec3_tpl, false> (const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData >& data, const Vec3_tpl* pIn, Vec3_tpl* pOut) { STemporalPredictorData uInt16Data; uInt16Data.m_pFloorFrame = (uint16*)data.m_pFloorFrame; uInt16Data.m_pCeilFrame = (uint16*)data.m_pCeilFrame; uInt16Data.m_pPrevFrames[0] = (uint16*)data.m_pPrevFrames[0]; uInt16Data.m_pPrevFrames[1] = (uint16*)data.m_pPrevFrames[1]; uInt16Data.m_numElements = data.m_numElements * 3; InterpolateMotionDeltaPredictor(controlIn, uInt16Data, (uint16*)pIn, (uint16*)pOut); } template<> void InterpolateMotionDeltaPredictor, Vec4_tpl, false> (const GeomCacheFile::STemporalPredictorControl& controlIn, const STemporalPredictorData >& data, const Vec4_tpl* pIn, Vec4_tpl* pOut) { STemporalPredictorData uInt16Data; uInt16Data.m_pFloorFrame = (uint16*)data.m_pFloorFrame; uInt16Data.m_pCeilFrame = (uint16*)data.m_pCeilFrame; uInt16Data.m_pPrevFrames[0] = (uint16*)data.m_pPrevFrames[0]; uInt16Data.m_pPrevFrames[1] = (uint16*)data.m_pPrevFrames[1]; uInt16Data.m_numElements = data.m_numElements * 4; InterpolateMotionDeltaPredictor(controlIn, uInt16Data, (uint16*)pIn, (uint16*)pOut); } #endif } #endif // CRYINCLUDE_CRY3DENGINE_GEOMCACHEPREDICTORS_H