/* * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with * the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions * and limitations under the License. */ package com.amazonaws.services.glue.model; import java.io.Serializable; import javax.annotation.Generated; import com.amazonaws.protocol.StructuredPojo; import com.amazonaws.protocol.ProtocolMarshaller; /** *
* Specifies an Apache Parquet data store stored in Amazon S3. *
* * @see AWS API * Documentation */ @Generated("com.amazonaws:aws-java-sdk-code-generator") public class S3ParquetSource implements Serializable, Cloneable, StructuredPojo { /** ** The name of the data store. *
*/ private String name; /** ** A list of the Amazon S3 paths to read from. *
*/ private java.util.List
* Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension.
* Possible values are "gzip"
and "bzip"
).
*
* A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all * PDF files. *
*/ private java.util.List
* The target group size in bytes. The default is computed based on the input data size and the size of your
* cluster. When there are fewer than 50,000 input files, "groupFiles"
must be set to
* "inPartition"
for this to take effect.
*
* Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with
* fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000
* files, set this parameter to "none"
.
*
* If set to true, recursively reads files in all subdirectories under the specified paths. *
*/ private Boolean recurse; /** ** This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files * with modification timestamps falling within the last maxBand milliseconds are tracked specially when using * JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default * is 900000 milliseconds, or 15 minutes. *
*/ private Integer maxBand; /** ** This option specifies the maximum number of files to save from the last maxBand seconds. If this number is * exceeded, extra files are skipped and only processed in the next job run. *
*/ private Integer maxFilesInBand; /** ** Specifies additional connection options. *
*/ private S3DirectSourceAdditionalOptions additionalOptions; /** ** Specifies the data schema for the S3 Parquet source. *
*/ private java.util.List* The name of the data store. *
* * @param name * The name of the data store. */ public void setName(String name) { this.name = name; } /** ** The name of the data store. *
* * @return The name of the data store. */ public String getName() { return this.name; } /** ** The name of the data store. *
* * @param name * The name of the data store. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withName(String name) { setName(name); return this; } /** ** A list of the Amazon S3 paths to read from. *
* * @return A list of the Amazon S3 paths to read from. */ public java.util.List* A list of the Amazon S3 paths to read from. *
* * @param paths * A list of the Amazon S3 paths to read from. */ public void setPaths(java.util.Collection* A list of the Amazon S3 paths to read from. *
** NOTE: This method appends the values to the existing list (if any). Use * {@link #setPaths(java.util.Collection)} or {@link #withPaths(java.util.Collection)} if you want to override the * existing values. *
* * @param paths * A list of the Amazon S3 paths to read from. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withPaths(String... paths) { if (this.paths == null) { setPaths(new java.util.ArrayList* A list of the Amazon S3 paths to read from. *
* * @param paths * A list of the Amazon S3 paths to read from. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withPaths(java.util.Collection
* Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension.
* Possible values are "gzip"
and "bzip"
).
*
"gzip"
and "bzip"
).
* @see ParquetCompressionType
*/
public void setCompressionType(String compressionType) {
this.compressionType = compressionType;
}
/**
*
* Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension.
* Possible values are "gzip"
and "bzip"
).
*
"gzip"
and "bzip"
).
* @see ParquetCompressionType
*/
public String getCompressionType() {
return this.compressionType;
}
/**
*
* Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension.
* Possible values are "gzip"
and "bzip"
).
*
"gzip"
and "bzip"
).
* @return Returns a reference to this object so that method calls can be chained together.
* @see ParquetCompressionType
*/
public S3ParquetSource withCompressionType(String compressionType) {
setCompressionType(compressionType);
return this;
}
/**
*
* Specifies how the data is compressed. This is generally not necessary if the data has a standard file extension.
* Possible values are "gzip"
and "bzip"
).
*
"gzip"
and "bzip"
).
* @return Returns a reference to this object so that method calls can be chained together.
* @see ParquetCompressionType
*/
public S3ParquetSource withCompressionType(ParquetCompressionType compressionType) {
this.compressionType = compressionType.toString();
return this;
}
/**
* * A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all * PDF files. *
* * @return A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" * excludes all PDF files. */ public java.util.List* A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all * PDF files. *
* * @param exclusions * A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" * excludes all PDF files. */ public void setExclusions(java.util.Collection* A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all * PDF files. *
** NOTE: This method appends the values to the existing list (if any). Use * {@link #setExclusions(java.util.Collection)} or {@link #withExclusions(java.util.Collection)} if you want to * override the existing values. *
* * @param exclusions * A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" * excludes all PDF files. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withExclusions(String... exclusions) { if (this.exclusions == null) { setExclusions(new java.util.ArrayList* A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" excludes all * PDF files. *
* * @param exclusions * A string containing a JSON list of Unix-style glob patterns to exclude. For example, "[\"**.pdf\"]" * excludes all PDF files. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withExclusions(java.util.Collection
* The target group size in bytes. The default is computed based on the input data size and the size of your
* cluster. When there are fewer than 50,000 input files, "groupFiles"
must be set to
* "inPartition"
for this to take effect.
*
"groupFiles"
must be set to
* "inPartition"
for this to take effect.
*/
public void setGroupSize(String groupSize) {
this.groupSize = groupSize;
}
/**
*
* The target group size in bytes. The default is computed based on the input data size and the size of your
* cluster. When there are fewer than 50,000 input files, "groupFiles"
must be set to
* "inPartition"
for this to take effect.
*
"groupFiles"
must be set to
* "inPartition"
for this to take effect.
*/
public String getGroupSize() {
return this.groupSize;
}
/**
*
* The target group size in bytes. The default is computed based on the input data size and the size of your
* cluster. When there are fewer than 50,000 input files, "groupFiles"
must be set to
* "inPartition"
for this to take effect.
*
"groupFiles"
must be set to
* "inPartition"
for this to take effect.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public S3ParquetSource withGroupSize(String groupSize) {
setGroupSize(groupSize);
return this;
}
/**
*
* Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with
* fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000
* files, set this parameter to "none"
.
*
"none"
.
*/
public void setGroupFiles(String groupFiles) {
this.groupFiles = groupFiles;
}
/**
*
* Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with
* fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000
* files, set this parameter to "none"
.
*
"none"
.
*/
public String getGroupFiles() {
return this.groupFiles;
}
/**
*
* Grouping files is turned on by default when the input contains more than 50,000 files. To turn on grouping with
* fewer than 50,000 files, set this parameter to "inPartition". To disable grouping when there are more than 50,000
* files, set this parameter to "none"
.
*
"none"
.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public S3ParquetSource withGroupFiles(String groupFiles) {
setGroupFiles(groupFiles);
return this;
}
/**
* * If set to true, recursively reads files in all subdirectories under the specified paths. *
* * @param recurse * If set to true, recursively reads files in all subdirectories under the specified paths. */ public void setRecurse(Boolean recurse) { this.recurse = recurse; } /** ** If set to true, recursively reads files in all subdirectories under the specified paths. *
* * @return If set to true, recursively reads files in all subdirectories under the specified paths. */ public Boolean getRecurse() { return this.recurse; } /** ** If set to true, recursively reads files in all subdirectories under the specified paths. *
* * @param recurse * If set to true, recursively reads files in all subdirectories under the specified paths. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withRecurse(Boolean recurse) { setRecurse(recurse); return this; } /** ** If set to true, recursively reads files in all subdirectories under the specified paths. *
* * @return If set to true, recursively reads files in all subdirectories under the specified paths. */ public Boolean isRecurse() { return this.recurse; } /** ** This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files * with modification timestamps falling within the last maxBand milliseconds are tracked specially when using * JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default * is 900000 milliseconds, or 15 minutes. *
* * @param maxBand * This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. * Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when * using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this * option. The default is 900000 milliseconds, or 15 minutes. */ public void setMaxBand(Integer maxBand) { this.maxBand = maxBand; } /** ** This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files * with modification timestamps falling within the last maxBand milliseconds are tracked specially when using * JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default * is 900000 milliseconds, or 15 minutes. *
* * @return This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. * Files with modification timestamps falling within the last maxBand milliseconds are tracked specially * when using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this * option. The default is 900000 milliseconds, or 15 minutes. */ public Integer getMaxBand() { return this.maxBand; } /** ** This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. Files * with modification timestamps falling within the last maxBand milliseconds are tracked specially when using * JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this option. The default * is 900000 milliseconds, or 15 minutes. *
* * @param maxBand * This option controls the duration in milliseconds after which the s3 listing is likely to be consistent. * Files with modification timestamps falling within the last maxBand milliseconds are tracked specially when * using JobBookmarks to account for Amazon S3 eventual consistency. Most users don't need to set this * option. The default is 900000 milliseconds, or 15 minutes. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withMaxBand(Integer maxBand) { setMaxBand(maxBand); return this; } /** ** This option specifies the maximum number of files to save from the last maxBand seconds. If this number is * exceeded, extra files are skipped and only processed in the next job run. *
* * @param maxFilesInBand * This option specifies the maximum number of files to save from the last maxBand seconds. If this number is * exceeded, extra files are skipped and only processed in the next job run. */ public void setMaxFilesInBand(Integer maxFilesInBand) { this.maxFilesInBand = maxFilesInBand; } /** ** This option specifies the maximum number of files to save from the last maxBand seconds. If this number is * exceeded, extra files are skipped and only processed in the next job run. *
* * @return This option specifies the maximum number of files to save from the last maxBand seconds. If this number * is exceeded, extra files are skipped and only processed in the next job run. */ public Integer getMaxFilesInBand() { return this.maxFilesInBand; } /** ** This option specifies the maximum number of files to save from the last maxBand seconds. If this number is * exceeded, extra files are skipped and only processed in the next job run. *
* * @param maxFilesInBand * This option specifies the maximum number of files to save from the last maxBand seconds. If this number is * exceeded, extra files are skipped and only processed in the next job run. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withMaxFilesInBand(Integer maxFilesInBand) { setMaxFilesInBand(maxFilesInBand); return this; } /** ** Specifies additional connection options. *
* * @param additionalOptions * Specifies additional connection options. */ public void setAdditionalOptions(S3DirectSourceAdditionalOptions additionalOptions) { this.additionalOptions = additionalOptions; } /** ** Specifies additional connection options. *
* * @return Specifies additional connection options. */ public S3DirectSourceAdditionalOptions getAdditionalOptions() { return this.additionalOptions; } /** ** Specifies additional connection options. *
* * @param additionalOptions * Specifies additional connection options. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withAdditionalOptions(S3DirectSourceAdditionalOptions additionalOptions) { setAdditionalOptions(additionalOptions); return this; } /** ** Specifies the data schema for the S3 Parquet source. *
* * @return Specifies the data schema for the S3 Parquet source. */ public java.util.List* Specifies the data schema for the S3 Parquet source. *
* * @param outputSchemas * Specifies the data schema for the S3 Parquet source. */ public void setOutputSchemas(java.util.Collection* Specifies the data schema for the S3 Parquet source. *
** NOTE: This method appends the values to the existing list (if any). Use * {@link #setOutputSchemas(java.util.Collection)} or {@link #withOutputSchemas(java.util.Collection)} if you want * to override the existing values. *
* * @param outputSchemas * Specifies the data schema for the S3 Parquet source. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withOutputSchemas(GlueSchema... outputSchemas) { if (this.outputSchemas == null) { setOutputSchemas(new java.util.ArrayList* Specifies the data schema for the S3 Parquet source. *
* * @param outputSchemas * Specifies the data schema for the S3 Parquet source. * @return Returns a reference to this object so that method calls can be chained together. */ public S3ParquetSource withOutputSchemas(java.util.Collection