// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #ifdef _WIN32 #define ARROW_LITTLE_ENDIAN 1 #else #if defined(__APPLE__) || defined(__FreeBSD__) #include // IWYU pragma: keep #else #include // IWYU pragma: keep #endif # #ifndef __BYTE_ORDER__ #error "__BYTE_ORDER__ not defined" #endif # #ifndef __ORDER_LITTLE_ENDIAN__ #error "__ORDER_LITTLE_ENDIAN__ not defined" #endif # #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define ARROW_LITTLE_ENDIAN 1 #else #define ARROW_LITTLE_ENDIAN 0 #endif #endif #if defined(_MSC_VER) #include // IWYU pragma: keep #include #pragma intrinsic(_BitScanReverse) #pragma intrinsic(_BitScanForward) #define ARROW_BYTE_SWAP64 _byteswap_uint64 #define ARROW_BYTE_SWAP32 _byteswap_ulong #define ARROW_POPCOUNT64 __popcnt64 #define ARROW_POPCOUNT32 __popcnt #else #define ARROW_BYTE_SWAP64 __builtin_bswap64 #define ARROW_BYTE_SWAP32 __builtin_bswap32 #define ARROW_POPCOUNT64 __builtin_popcountll #define ARROW_POPCOUNT32 __builtin_popcount #endif #include #include #include "arrow/util/macros.h" #include "arrow/util/type_traits.h" #include "arrow/util/visibility.h" namespace arrow { namespace detail { template typename std::make_unsigned::type as_unsigned(Integer x) { return static_cast::type>(x); } } // namespace detail namespace BitUtil { // The number of set bits in a given unsigned byte value, pre-computed // // Generated with the following Python code // output = 'static constexpr uint8_t kBytePopcount[] = {{{0}}};' // popcounts = [str(bin(i).count('1')) for i in range(0, 256)] // print(output.format(', '.join(popcounts))) static constexpr uint8_t kBytePopcount[] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; static inline uint64_t PopCount(uint64_t bitmap) { return ARROW_POPCOUNT64(bitmap); } static inline uint32_t PopCount(uint32_t bitmap) { return ARROW_POPCOUNT32(bitmap); } // // Bit-related computations on integer values // // Returns the ceil of value/divisor constexpr int64_t CeilDiv(int64_t value, int64_t divisor) { return (value == 0) ? 0 : 1 + (value - 1) / divisor; } // Return the number of bytes needed to fit the given number of bits constexpr int64_t BytesForBits(int64_t bits) { // This formula avoids integer overflow on very large `bits` return (bits >> 3) + ((bits & 7) != 0); } constexpr bool IsPowerOf2(int64_t value) { return value > 0 && (value & (value - 1)) == 0; } constexpr bool IsPowerOf2(uint64_t value) { return value > 0 && (value & (value - 1)) == 0; } // Returns the smallest power of two that contains v. If v is already a // power of two, it is returned as is. static inline int64_t NextPower2(int64_t n) { // Taken from // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 n--; n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16; n |= n >> 32; n++; return n; } constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; } constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; } // Returns a mask for the bit_index lower order bits. // Only valid for bit_index in the range [0, 64). constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) { return (static_cast(1) << bit_index) - 1; } // Returns 'value' rounded up to the nearest multiple of 'factor' constexpr int64_t RoundUp(int64_t value, int64_t factor) { return CeilDiv(value, factor) * factor; } // Returns 'value' rounded down to the nearest multiple of 'factor' constexpr int64_t RoundDown(int64_t value, int64_t factor) { return (value / factor) * factor; } // Returns 'value' rounded up to the nearest multiple of 'factor' when factor // is a power of two. // The result is undefined on overflow, i.e. if `value > 2**64 - factor`, // since we cannot return the correct result which would be 2**64. constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) { // DCHECK(value >= 0); // DCHECK(IsPowerOf2(factor)); return (value + (factor - 1)) & ~(factor - 1); } constexpr uint64_t RoundUpToPowerOf2(uint64_t value, uint64_t factor) { // DCHECK(IsPowerOf2(factor)); return (value + (factor - 1)) & ~(factor - 1); } constexpr int64_t RoundUpToMultipleOf8(int64_t num) { return RoundUpToPowerOf2(num, 8); } constexpr int64_t RoundUpToMultipleOf64(int64_t num) { return RoundUpToPowerOf2(num, 64); } // Returns the number of bytes covering a sliced bitmap. Find the length // rounded to cover full bytes on both extremities. // // The following example represents a slice (offset=10, length=9) // // 0 8 16 24 // |-------|-------|------| // [ ] (slice) // [ ] (same slice aligned to bytes bounds, length=16) // // The covering bytes is the length (in bytes) of this new aligned slice. constexpr int64_t CoveringBytes(int64_t offset, int64_t length) { return (BitUtil::RoundUp(length + offset, 8) - BitUtil::RoundDown(offset, 8)) / 8; } // Returns the 'num_bits' least-significant bits of 'v'. static inline uint64_t TrailingBits(uint64_t v, int num_bits) { if (ARROW_PREDICT_FALSE(num_bits == 0)) return 0; if (ARROW_PREDICT_FALSE(num_bits >= 64)) return v; int n = 64 - num_bits; return (v << n) >> n; } /// \brief Count the number of leading zeros in an unsigned integer. static inline int CountLeadingZeros(uint32_t value) { #if defined(__clang__) || defined(__GNUC__) if (value == 0) return 32; return static_cast(__builtin_clz(value)); #elif defined(_MSC_VER) unsigned long index; // NOLINT if (_BitScanReverse(&index, static_cast(value))) { // NOLINT return 31 - static_cast(index); } else { return 32; } #else int bitpos = 0; while (value != 0) { value >>= 1; ++bitpos; } return 32 - bitpos; #endif } static inline int CountLeadingZeros(uint64_t value) { #if defined(__clang__) || defined(__GNUC__) if (value == 0) return 64; return static_cast(__builtin_clzll(value)); #elif defined(_MSC_VER) unsigned long index; // NOLINT if (_BitScanReverse64(&index, value)) { // NOLINT return 63 - static_cast(index); } else { return 64; } #else int bitpos = 0; while (value != 0) { value >>= 1; ++bitpos; } return 64 - bitpos; #endif } static inline int CountTrailingZeros(uint32_t value) { #if defined(__clang__) || defined(__GNUC__) if (value == 0) return 32; return static_cast(__builtin_ctzl(value)); #elif defined(_MSC_VER) unsigned long index; // NOLINT if (_BitScanForward(&index, value)) { return static_cast(index); } else { return 32; } #else int bitpos = 0; if (value) { while (value & 1 == 0) { value >>= 1; ++bitpos; } } else { bitpos = 32; } return bitpos; #endif } static inline int CountTrailingZeros(uint64_t value) { #if defined(__clang__) || defined(__GNUC__) if (value == 0) return 64; return static_cast(__builtin_ctzll(value)); #elif defined(_MSC_VER) unsigned long index; // NOLINT if (_BitScanForward64(&index, value)) { return static_cast(index); } else { return 64; } #else int bitpos = 0; if (value) { while (value & 1 == 0) { value >>= 1; ++bitpos; } } else { bitpos = 64; } return bitpos; #endif } // Returns the minimum number of bits needed to represent an unsigned value static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); } // Returns ceil(log2(x)). static inline int Log2(uint64_t x) { // DCHECK_GT(x, 0); return NumRequiredBits(x - 1); } // // Byte-swap 16-bit, 32-bit and 64-bit values // // Swap the byte order (i.e. endianness) static inline int64_t ByteSwap(int64_t value) { return ARROW_BYTE_SWAP64(value); } static inline uint64_t ByteSwap(uint64_t value) { return static_cast(ARROW_BYTE_SWAP64(value)); } static inline int32_t ByteSwap(int32_t value) { return ARROW_BYTE_SWAP32(value); } static inline uint32_t ByteSwap(uint32_t value) { return static_cast(ARROW_BYTE_SWAP32(value)); } static inline int16_t ByteSwap(int16_t value) { constexpr auto m = static_cast(0xff); return static_cast(((value >> 8) & m) | ((value & m) << 8)); } static inline uint16_t ByteSwap(uint16_t value) { return static_cast(ByteSwap(static_cast(value))); } static inline uint8_t ByteSwap(uint8_t value) { return value; } // Write the swapped bytes into dst. Src and dst cannot overlap. static inline void ByteSwap(void* dst, const void* src, int len) { switch (len) { case 1: *reinterpret_cast(dst) = *reinterpret_cast(src); return; case 2: *reinterpret_cast(dst) = ByteSwap(*reinterpret_cast(src)); return; case 4: *reinterpret_cast(dst) = ByteSwap(*reinterpret_cast(src)); return; case 8: *reinterpret_cast(dst) = ByteSwap(*reinterpret_cast(src)); return; default: break; } auto d = reinterpret_cast(dst); auto s = reinterpret_cast(src); for (int i = 0; i < len; ++i) { d[i] = s[len - i - 1]; } } // Convert to little/big endian format from the machine's native endian format. #if ARROW_LITTLE_ENDIAN template > static inline T ToBigEndian(T value) { return ByteSwap(value); } template > static inline T ToLittleEndian(T value) { return value; } #else template > static inline T ToBigEndian(T value) { return value; } template > static inline T ToLittleEndian(T value) { return ByteSwap(value); } #endif // Convert from big/little endian format to the machine's native endian format. #if ARROW_LITTLE_ENDIAN template > static inline T FromBigEndian(T value) { return ByteSwap(value); } template > static inline T FromLittleEndian(T value) { return value; } #else template > static inline T FromBigEndian(T value) { return value; } template > static inline T FromLittleEndian(T value) { return ByteSwap(value); } #endif // // Utilities for reading and writing individual bits by their index // in a memory area. // // Bitmask selecting the k-th bit in a byte static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; // the bitwise complement version of kBitmask static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127}; // Bitmask selecting the (k - 1) preceding bits in a byte static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127}; static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63, 127}; // the bitwise complement version of kPrecedingBitmask static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128}; static inline bool GetBit(const uint8_t* bits, uint64_t i) { return (bits[i >> 3] >> (i & 0x07)) & 1; } // Gets the i-th bit from a byte. Should only be used with i <= 7. static inline bool GetBitFromByte(uint8_t byte, uint8_t i) { return byte & kBitmask[i]; } static inline void ClearBit(uint8_t* bits, int64_t i) { bits[i / 8] &= kFlippedBitmask[i % 8]; } static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; } static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) { // https://graphics.stanford.edu/~seander/bithacks.html // "Conditionally set or clear bits without branching" // NOTE: this seems to confuse Valgrind as it reads from potentially // uninitialized memory bits[i / 8] ^= static_cast(-static_cast(bit_is_set) ^ bits[i / 8]) & kBitmask[i % 8]; } /// \brief set or clear a range of bits quickly ARROW_EXPORT void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set); } // namespace BitUtil } // namespace arrow