/** * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ #pragma once #include #include #include #include #include #include namespace Aws { namespace Utils { namespace Json { class JsonValue; class JsonView; } // namespace Json } // namespace Utils namespace Firehose { namespace Model { /** *

A serializer to use for converting data to the ORC format before storing it * in Amazon S3. For more information, see Apache ORC.

See Also:

* AWS * API Reference

*/ class OrcSerDe { public: AWS_FIREHOSE_API OrcSerDe(); AWS_FIREHOSE_API OrcSerDe(Aws::Utils::Json::JsonView jsonValue); AWS_FIREHOSE_API OrcSerDe& operator=(Aws::Utils::Json::JsonView jsonValue); AWS_FIREHOSE_API Aws::Utils::Json::JsonValue Jsonize() const; /** *

The number of bytes in each stripe. The default is 64 MiB and the minimum is * 8 MiB.

*/ inline int GetStripeSizeBytes() const{ return m_stripeSizeBytes; } /** *

The number of bytes in each stripe. The default is 64 MiB and the minimum is * 8 MiB.

*/ inline bool StripeSizeBytesHasBeenSet() const { return m_stripeSizeBytesHasBeenSet; } /** *

The number of bytes in each stripe. The default is 64 MiB and the minimum is * 8 MiB.

*/ inline void SetStripeSizeBytes(int value) { m_stripeSizeBytesHasBeenSet = true; m_stripeSizeBytes = value; } /** *

The number of bytes in each stripe. The default is 64 MiB and the minimum is * 8 MiB.

*/ inline OrcSerDe& WithStripeSizeBytes(int value) { SetStripeSizeBytes(value); return *this;} /** *

The Hadoop Distributed File System (HDFS) block size. This is useful if you * intend to copy the data from Amazon S3 to HDFS before querying. The default is * 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for * padding calculations.

*/ inline int GetBlockSizeBytes() const{ return m_blockSizeBytes; } /** *

The Hadoop Distributed File System (HDFS) block size. This is useful if you * intend to copy the data from Amazon S3 to HDFS before querying. The default is * 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for * padding calculations.

*/ inline bool BlockSizeBytesHasBeenSet() const { return m_blockSizeBytesHasBeenSet; } /** *

The Hadoop Distributed File System (HDFS) block size. This is useful if you * intend to copy the data from Amazon S3 to HDFS before querying. The default is * 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for * padding calculations.

*/ inline void SetBlockSizeBytes(int value) { m_blockSizeBytesHasBeenSet = true; m_blockSizeBytes = value; } /** *

The Hadoop Distributed File System (HDFS) block size. This is useful if you * intend to copy the data from Amazon S3 to HDFS before querying. The default is * 256 MiB and the minimum is 64 MiB. Kinesis Data Firehose uses this value for * padding calculations.

*/ inline OrcSerDe& WithBlockSizeBytes(int value) { SetBlockSizeBytes(value); return *this;} /** *

The number of rows between index entries. The default is 10,000 and the * minimum is 1,000.

*/ inline int GetRowIndexStride() const{ return m_rowIndexStride; } /** *

The number of rows between index entries. The default is 10,000 and the * minimum is 1,000.

*/ inline bool RowIndexStrideHasBeenSet() const { return m_rowIndexStrideHasBeenSet; } /** *

The number of rows between index entries. The default is 10,000 and the * minimum is 1,000.

*/ inline void SetRowIndexStride(int value) { m_rowIndexStrideHasBeenSet = true; m_rowIndexStride = value; } /** *

The number of rows between index entries. The default is 10,000 and the * minimum is 1,000.

*/ inline OrcSerDe& WithRowIndexStride(int value) { SetRowIndexStride(value); return *this;} /** *

Set this to true to indicate that you want stripes to be padded * to the HDFS block boundaries. This is useful if you intend to copy the data from * Amazon S3 to HDFS before querying. The default is false.

*/ inline bool GetEnablePadding() const{ return m_enablePadding; } /** *

Set this to true to indicate that you want stripes to be padded * to the HDFS block boundaries. This is useful if you intend to copy the data from * Amazon S3 to HDFS before querying. The default is false.

*/ inline bool EnablePaddingHasBeenSet() const { return m_enablePaddingHasBeenSet; } /** *

Set this to true to indicate that you want stripes to be padded * to the HDFS block boundaries. This is useful if you intend to copy the data from * Amazon S3 to HDFS before querying. The default is false.

*/ inline void SetEnablePadding(bool value) { m_enablePaddingHasBeenSet = true; m_enablePadding = value; } /** *

Set this to true to indicate that you want stripes to be padded * to the HDFS block boundaries. This is useful if you intend to copy the data from * Amazon S3 to HDFS before querying. The default is false.

*/ inline OrcSerDe& WithEnablePadding(bool value) { SetEnablePadding(value); return *this;} /** *

A number between 0 and 1 that defines the tolerance for block padding as a * decimal fraction of stripe size. The default value is 0.05, which means 5 * percent of stripe size.

For the default values of 64 MiB ORC stripes and * 256 MiB HDFS blocks, the default block padding tolerance of 5 percent reserves a * maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the * available size within the block is more than 3.2 MiB, a new, smaller stripe is * inserted to fit within that space. This ensures that no stripe crosses block * boundaries and causes remote reads within a node-local task.

Kinesis Data * Firehose ignores this parameter when OrcSerDe$EnablePadding is * false.

*/ inline double GetPaddingTolerance() const{ return m_paddingTolerance; } /** *

A number between 0 and 1 that defines the tolerance for block padding as a * decimal fraction of stripe size. The default value is 0.05, which means 5 * percent of stripe size.

For the default values of 64 MiB ORC stripes and * 256 MiB HDFS blocks, the default block padding tolerance of 5 percent reserves a * maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the * available size within the block is more than 3.2 MiB, a new, smaller stripe is * inserted to fit within that space. This ensures that no stripe crosses block * boundaries and causes remote reads within a node-local task.

Kinesis Data * Firehose ignores this parameter when OrcSerDe$EnablePadding is * false.

*/ inline bool PaddingToleranceHasBeenSet() const { return m_paddingToleranceHasBeenSet; } /** *

A number between 0 and 1 that defines the tolerance for block padding as a * decimal fraction of stripe size. The default value is 0.05, which means 5 * percent of stripe size.

For the default values of 64 MiB ORC stripes and * 256 MiB HDFS blocks, the default block padding tolerance of 5 percent reserves a * maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the * available size within the block is more than 3.2 MiB, a new, smaller stripe is * inserted to fit within that space. This ensures that no stripe crosses block * boundaries and causes remote reads within a node-local task.

Kinesis Data * Firehose ignores this parameter when OrcSerDe$EnablePadding is * false.

*/ inline void SetPaddingTolerance(double value) { m_paddingToleranceHasBeenSet = true; m_paddingTolerance = value; } /** *

A number between 0 and 1 that defines the tolerance for block padding as a * decimal fraction of stripe size. The default value is 0.05, which means 5 * percent of stripe size.

For the default values of 64 MiB ORC stripes and * 256 MiB HDFS blocks, the default block padding tolerance of 5 percent reserves a * maximum of 3.2 MiB for padding within the 256 MiB block. In such a case, if the * available size within the block is more than 3.2 MiB, a new, smaller stripe is * inserted to fit within that space. This ensures that no stripe crosses block * boundaries and causes remote reads within a node-local task.

Kinesis Data * Firehose ignores this parameter when OrcSerDe$EnablePadding is * false.

*/ inline OrcSerDe& WithPaddingTolerance(double value) { SetPaddingTolerance(value); return *this;} /** *

The compression code to use over data blocks. The default is * SNAPPY.

*/ inline const OrcCompression& GetCompression() const{ return m_compression; } /** *

The compression code to use over data blocks. The default is * SNAPPY.

*/ inline bool CompressionHasBeenSet() const { return m_compressionHasBeenSet; } /** *

The compression code to use over data blocks. The default is * SNAPPY.

*/ inline void SetCompression(const OrcCompression& value) { m_compressionHasBeenSet = true; m_compression = value; } /** *

The compression code to use over data blocks. The default is * SNAPPY.

*/ inline void SetCompression(OrcCompression&& value) { m_compressionHasBeenSet = true; m_compression = std::move(value); } /** *

The compression code to use over data blocks. The default is * SNAPPY.

*/ inline OrcSerDe& WithCompression(const OrcCompression& value) { SetCompression(value); return *this;} /** *

The compression code to use over data blocks. The default is * SNAPPY.

*/ inline OrcSerDe& WithCompression(OrcCompression&& value) { SetCompression(std::move(value)); return *this;} /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline const Aws::Vector& GetBloomFilterColumns() const{ return m_bloomFilterColumns; } /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline bool BloomFilterColumnsHasBeenSet() const { return m_bloomFilterColumnsHasBeenSet; } /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline void SetBloomFilterColumns(const Aws::Vector& value) { m_bloomFilterColumnsHasBeenSet = true; m_bloomFilterColumns = value; } /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline void SetBloomFilterColumns(Aws::Vector&& value) { m_bloomFilterColumnsHasBeenSet = true; m_bloomFilterColumns = std::move(value); } /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline OrcSerDe& WithBloomFilterColumns(const Aws::Vector& value) { SetBloomFilterColumns(value); return *this;} /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline OrcSerDe& WithBloomFilterColumns(Aws::Vector&& value) { SetBloomFilterColumns(std::move(value)); return *this;} /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline OrcSerDe& AddBloomFilterColumns(const Aws::String& value) { m_bloomFilterColumnsHasBeenSet = true; m_bloomFilterColumns.push_back(value); return *this; } /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline OrcSerDe& AddBloomFilterColumns(Aws::String&& value) { m_bloomFilterColumnsHasBeenSet = true; m_bloomFilterColumns.push_back(std::move(value)); return *this; } /** *

The column names for which you want Kinesis Data Firehose to create bloom * filters. The default is null.

*/ inline OrcSerDe& AddBloomFilterColumns(const char* value) { m_bloomFilterColumnsHasBeenSet = true; m_bloomFilterColumns.push_back(value); return *this; } /** *

The Bloom filter false positive probability (FPP). The lower the FPP, the * bigger the Bloom filter. The default value is 0.05, the minimum is 0, and the * maximum is 1.

*/ inline double GetBloomFilterFalsePositiveProbability() const{ return m_bloomFilterFalsePositiveProbability; } /** *

The Bloom filter false positive probability (FPP). The lower the FPP, the * bigger the Bloom filter. The default value is 0.05, the minimum is 0, and the * maximum is 1.

*/ inline bool BloomFilterFalsePositiveProbabilityHasBeenSet() const { return m_bloomFilterFalsePositiveProbabilityHasBeenSet; } /** *

The Bloom filter false positive probability (FPP). The lower the FPP, the * bigger the Bloom filter. The default value is 0.05, the minimum is 0, and the * maximum is 1.

*/ inline void SetBloomFilterFalsePositiveProbability(double value) { m_bloomFilterFalsePositiveProbabilityHasBeenSet = true; m_bloomFilterFalsePositiveProbability = value; } /** *

The Bloom filter false positive probability (FPP). The lower the FPP, the * bigger the Bloom filter. The default value is 0.05, the minimum is 0, and the * maximum is 1.

*/ inline OrcSerDe& WithBloomFilterFalsePositiveProbability(double value) { SetBloomFilterFalsePositiveProbability(value); return *this;} /** *

Represents the fraction of the total number of non-null rows. To turn off * dictionary encoding, set this fraction to a number that is less than the number * of distinct keys in a dictionary. To always use dictionary encoding, set this * threshold to 1.

*/ inline double GetDictionaryKeyThreshold() const{ return m_dictionaryKeyThreshold; } /** *

Represents the fraction of the total number of non-null rows. To turn off * dictionary encoding, set this fraction to a number that is less than the number * of distinct keys in a dictionary. To always use dictionary encoding, set this * threshold to 1.

*/ inline bool DictionaryKeyThresholdHasBeenSet() const { return m_dictionaryKeyThresholdHasBeenSet; } /** *

Represents the fraction of the total number of non-null rows. To turn off * dictionary encoding, set this fraction to a number that is less than the number * of distinct keys in a dictionary. To always use dictionary encoding, set this * threshold to 1.

*/ inline void SetDictionaryKeyThreshold(double value) { m_dictionaryKeyThresholdHasBeenSet = true; m_dictionaryKeyThreshold = value; } /** *

Represents the fraction of the total number of non-null rows. To turn off * dictionary encoding, set this fraction to a number that is less than the number * of distinct keys in a dictionary. To always use dictionary encoding, set this * threshold to 1.

*/ inline OrcSerDe& WithDictionaryKeyThreshold(double value) { SetDictionaryKeyThreshold(value); return *this;} /** *

The version of the file to write. The possible values are V0_11 * and V0_12. The default is V0_12.

*/ inline const OrcFormatVersion& GetFormatVersion() const{ return m_formatVersion; } /** *

The version of the file to write. The possible values are V0_11 * and V0_12. The default is V0_12.

*/ inline bool FormatVersionHasBeenSet() const { return m_formatVersionHasBeenSet; } /** *

The version of the file to write. The possible values are V0_11 * and V0_12. The default is V0_12.

*/ inline void SetFormatVersion(const OrcFormatVersion& value) { m_formatVersionHasBeenSet = true; m_formatVersion = value; } /** *

The version of the file to write. The possible values are V0_11 * and V0_12. The default is V0_12.

*/ inline void SetFormatVersion(OrcFormatVersion&& value) { m_formatVersionHasBeenSet = true; m_formatVersion = std::move(value); } /** *

The version of the file to write. The possible values are V0_11 * and V0_12. The default is V0_12.

*/ inline OrcSerDe& WithFormatVersion(const OrcFormatVersion& value) { SetFormatVersion(value); return *this;} /** *

The version of the file to write. The possible values are V0_11 * and V0_12. The default is V0_12.

*/ inline OrcSerDe& WithFormatVersion(OrcFormatVersion&& value) { SetFormatVersion(std::move(value)); return *this;} private: int m_stripeSizeBytes; bool m_stripeSizeBytesHasBeenSet = false; int m_blockSizeBytes; bool m_blockSizeBytesHasBeenSet = false; int m_rowIndexStride; bool m_rowIndexStrideHasBeenSet = false; bool m_enablePadding; bool m_enablePaddingHasBeenSet = false; double m_paddingTolerance; bool m_paddingToleranceHasBeenSet = false; OrcCompression m_compression; bool m_compressionHasBeenSet = false; Aws::Vector m_bloomFilterColumns; bool m_bloomFilterColumnsHasBeenSet = false; double m_bloomFilterFalsePositiveProbability; bool m_bloomFilterFalsePositiveProbabilityHasBeenSet = false; double m_dictionaryKeyThreshold; bool m_dictionaryKeyThresholdHasBeenSet = false; OrcFormatVersion m_formatVersion; bool m_formatVersionHasBeenSet = false; }; } // namespace Model } // namespace Firehose } // namespace Aws