// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include "arrow/util/bit_util.h" #include "arrow/util/macros.h" namespace arrow { namespace internal { class BitmapWriter { // A sequential bitwise writer that preserves surrounding bit values. public: BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length) : bitmap_(bitmap), position_(0), length_(length) { byte_offset_ = start_offset / 8; bit_mask_ = BitUtil::kBitmask[start_offset % 8]; if (length > 0) { current_byte_ = bitmap[byte_offset_]; } else { current_byte_ = 0; } } void Set() { current_byte_ |= bit_mask_; } void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; } void Next() { bit_mask_ = static_cast(bit_mask_ << 1); ++position_; if (bit_mask_ == 0) { // Finished this byte, need advancing bit_mask_ = 0x01; bitmap_[byte_offset_++] = current_byte_; if (ARROW_PREDICT_TRUE(position_ < length_)) { current_byte_ = bitmap_[byte_offset_]; } } } void Finish() { // Store current byte if we didn't went past bitmap storage if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) { bitmap_[byte_offset_] = current_byte_; } } int64_t position() const { return position_; } private: uint8_t* bitmap_; int64_t position_; int64_t length_; uint8_t current_byte_; uint8_t bit_mask_; int64_t byte_offset_; }; class FirstTimeBitmapWriter { // Like BitmapWriter, but any bit values *following* the bits written // might be clobbered. It is hence faster than BitmapWriter, and can // also avoid false positives with Valgrind. public: FirstTimeBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length) : bitmap_(bitmap), position_(0), length_(length) { current_byte_ = 0; byte_offset_ = start_offset / 8; bit_mask_ = BitUtil::kBitmask[start_offset % 8]; if (length > 0) { current_byte_ = bitmap[byte_offset_] & BitUtil::kPrecedingBitmask[start_offset % 8]; } else { current_byte_ = 0; } } /// Appends number_of_bits from word to valid_bits and valid_bits_offset. /// /// \param[in] word The LSB bitmap to append. Any bits past number_of_bits are assumed /// to be unset (i.e. 0). /// \param[in] number_of_bits The number of bits to append from word. void AppendWord(uint64_t word, int64_t number_of_bits) { if (ARROW_PREDICT_FALSE(number_of_bits == 0)) { return; } // Location that the first byte needs to be written to. uint8_t* append_position = bitmap_ + byte_offset_; // Update state variables except for current_byte_ here. position_ += number_of_bits; int64_t bit_offset = BitUtil::CountTrailingZeros(static_cast(bit_mask_)); bit_mask_ = BitUtil::kBitmask[(bit_offset + number_of_bits) % 8]; byte_offset_ += (bit_offset + number_of_bits) / 8; if (bit_offset != 0) { // We are in the middle of the byte. This code updates the byte and shifts // bits appropriately within word so it can be memcpy'd below. int64_t bits_to_carry = 8 - bit_offset; // Carry over bits from word to current_byte_. We assume any extra bits in word // unset so no additional accounting is needed for when number_of_bits < // bits_to_carry. current_byte_ |= (word & BitUtil::kPrecedingBitmask[bits_to_carry]) << bit_offset; // Check if everything is transfered into current_byte_. if (ARROW_PREDICT_FALSE(number_of_bits < bits_to_carry)) { return; } *append_position = current_byte_; append_position++; // Move the carry bits off of word. word = word >> bits_to_carry; number_of_bits -= bits_to_carry; } word = BitUtil::ToLittleEndian(word); int64_t bytes_for_word = ::arrow::BitUtil::BytesForBits(number_of_bits); std::memcpy(append_position, &word, bytes_for_word); // At this point, the previous current_byte_ has been written to bitmap_. // The new current_byte_ is either the last relevant byte in 'word' // or cleared if the new position is byte aligned (i.e. a fresh byte). if (bit_mask_ == 0x1) { current_byte_ = 0; } else { current_byte_ = *(append_position + bytes_for_word - 1); } } void Set() { current_byte_ |= bit_mask_; } void Clear() {} void Next() { bit_mask_ = static_cast(bit_mask_ << 1); ++position_; if (bit_mask_ == 0) { // Finished this byte, need advancing bit_mask_ = 0x01; bitmap_[byte_offset_++] = current_byte_; current_byte_ = 0; } } void Finish() { // Store current byte if we didn't went go bitmap storage if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) { bitmap_[byte_offset_] = current_byte_; } } int64_t position() const { return position_; } private: uint8_t* bitmap_; int64_t position_; int64_t length_; uint8_t current_byte_; uint8_t bit_mask_; int64_t byte_offset_; }; } // namespace internal } // namespace arrow