// Code generated by smithy-go-codegen DO NOT EDIT. package types import ( smithydocument "github.com/aws/smithy-go/document" "time" ) // Defines an action to be initiated by a trigger. type Action struct { // The job arguments used when this trigger fires. For this job run, they replace // the default arguments set in the job definition itself. You can specify // arguments here that your own job-execution script consumes, as well as arguments // that Glue itself consumes. For information about how to specify and consume your // own Job arguments, see the Calling Glue APIs in Python (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) // topic in the developer guide. For information about the key-value pairs that // Glue consumes to set up your job, see the Special Parameters Used by Glue (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) // topic in the developer guide. Arguments map[string]string // The name of the crawler to be used with this action. CrawlerName *string // The name of a job to be run. JobName *string // Specifies configuration properties of a job run notification. NotificationProperty *NotificationProperty // The name of the SecurityConfiguration structure to be used with this action. SecurityConfiguration *string // The JobRun timeout in minutes. This is the maximum time that a job run can // consume resources before it is terminated and enters TIMEOUT status. The // default is 2,880 minutes (48 hours). This overrides the timeout value set in the // parent job. Timeout *int32 noSmithyDocumentSerde } // Specifies a transform that groups rows by chosen fields and computes the // aggregated value by specified function. type Aggregate struct { // Specifies the aggregate functions to be performed on specified fields. // // This member is required. Aggs []AggregateOperation // Specifies the fields to group by. // // This member is required. Groups [][]string // Specifies the fields and rows to use as inputs for the aggregate transform. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string noSmithyDocumentSerde } // Specifies the set of parameters needed to perform aggregation in the aggregate // transform. type AggregateOperation struct { // Specifies the aggregation function to apply. Possible aggregation functions // include: avg countDistinct, count, first, last, kurtosis, max, min, skewness, // stddev_samp, stddev_pop, sum, sumDistinct, var_samp, var_pop // // This member is required. AggFunc AggFunction // Specifies the column on the data set on which the aggregation function will be // applied. // // This member is required. Column []string noSmithyDocumentSerde } // Specifies an optional value when connecting to the Redshift cluster. type AmazonRedshiftAdvancedOption struct { // The key for the additional connection option. Key *string // The value for the additional connection option. Value *string noSmithyDocumentSerde } // Specifies an Amazon Redshift node. type AmazonRedshiftNodeData struct { // The access type for the Redshift connection. Can be a direct connection or // catalog connections. AccessType *string // Specifies how writing to a Redshift cluser will occur. Action *string // Optional values when connecting to the Redshift cluster. AdvancedOptions []AmazonRedshiftAdvancedOption // The name of the Glue Data Catalog database when working with a data catalog. CatalogDatabase *Option // The Redshift schema name when working with a data catalog. CatalogRedshiftSchema *string // The database table to read from. CatalogRedshiftTable *string // The Glue Data Catalog table name when working with a data catalog. CatalogTable *Option // The Glue connection to the Redshift cluster. Connection *Option // Specifies the name of the connection that is associated with the catalog table // used. CrawlerConnection *string // Optional. The role name use when connection to S3. The IAM role ill default to // the role on the job when left blank. IamRole *Option // The action used when to detemine how a MERGE in a Redshift sink will be handled. MergeAction *string // The SQL used in a custom merge to deal with matching records. MergeClause *string // The action used when to detemine how a MERGE in a Redshift sink will be handled // when an existing record matches a new record. MergeWhenMatched *string // The action used when to detemine how a MERGE in a Redshift sink will be handled // when an existing record doesn't match a new record. MergeWhenNotMatched *string // The SQL used before a MERGE or APPEND with upsert is run. PostAction *string // The SQL used before a MERGE or APPEND with upsert is run. PreAction *string // The SQL used to fetch the data from a Redshift sources when the SourceType is // 'query'. SampleQuery *string // The Redshift schema name when working with a direct connection. Schema *Option // The list of column names used to determine a matching record when doing a MERGE // or APPEND with upsert. SelectedColumns []Option // The source type to specify whether a specific table is the source or a custom // query. SourceType *string // The name of the temporary staging table that is used when doing a MERGE or // APPEND with upsert. StagingTable *string // The Redshift table name when working with a direct connection. Table *Option // Specifies the prefix to a table. TablePrefix *string // The array of schema output for a given node. TableSchema []Option // The Amazon S3 path where temporary data can be staged when copying out of the // database. TempDir *string // The action used on Redshift sinks when doing an APPEND. Upsert bool noSmithyDocumentSerde } // Specifies an Amazon Redshift source. type AmazonRedshiftSource struct { // Specifies the data of the Amazon Reshift source node. Data *AmazonRedshiftNodeData // The name of the Amazon Redshift source. Name *string noSmithyDocumentSerde } // Specifies an Amazon Redshift target. type AmazonRedshiftTarget struct { // Specifies the data of the Amazon Redshift target node. Data *AmazonRedshiftNodeData // The nodes that are inputs to the data target. Inputs []string // The name of the Amazon Redshift target. Name *string noSmithyDocumentSerde } // Specifies a transform that maps data property keys in the data source to data // property keys in the data target. You can rename keys, modify the data types for // keys, and choose which keys to drop from the dataset. type ApplyMapping struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // Specifies the mapping of data property keys in the data source to data property // keys in the data target. // // This member is required. Mapping []Mapping // The name of the transform node. // // This member is required. Name *string noSmithyDocumentSerde } // Specifies a connector to an Amazon Athena data source. type AthenaConnectorSource struct { // The name of the connection that is associated with the connector. // // This member is required. ConnectionName *string // The type of connection, such as marketplace.athena or custom.athena, // designating a connection to an Amazon Athena data store. // // This member is required. ConnectionType *string // The name of a connector that assists with accessing the data store in Glue // Studio. // // This member is required. ConnectorName *string // The name of the data source. // // This member is required. Name *string // The name of the Cloudwatch log group to read from. For example, // /aws-glue/jobs/output . // // This member is required. SchemaName *string // The name of the table in the data source. ConnectionTable *string // Specifies the data schema for the custom Athena source. OutputSchemas []GlueSchema noSmithyDocumentSerde } // A structure containing the Lake Formation audit context. type AuditContext struct { // A string containing the additional audit context information. AdditionalAuditContext *string // All columns request for audit. AllColumnsRequested *bool // The requested columns for audit. RequestedColumns []string noSmithyDocumentSerde } // A list of errors that can occur when registering partition indexes for an // existing table. These errors give the details about why an index registration // failed and provide a limited number of partitions in the response, so that you // can fix the partitions at fault and try registering the index again. The most // common set of errors that can occur are categorized as follows: // - EncryptedPartitionError: The partitions are encrypted. // - InvalidPartitionTypeDataError: The partition value doesn't match the data // type for that partition column. // - MissingPartitionValueError: The partitions are encrypted. // - UnsupportedPartitionCharacterError: Characters inside the partition value // are not supported. For example: U+0000 , U+0001, U+0002. // - InternalError: Any error which does not belong to other error codes. type BackfillError struct { // The error code for an error that occurred when registering partition indexes // for an existing table. Code BackfillErrorCode // A list of a limited number of partitions in the response. Partitions []PartitionValueList noSmithyDocumentSerde } // Specifies a target that uses a Glue Data Catalog table. type BasicCatalogTarget struct { // The database that contains the table you want to use as the target. This // database must already exist in the Data Catalog. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of your data target. // // This member is required. Name *string // The table that defines the schema of your output data. This table must already // exist in the Data Catalog. // // This member is required. Table *string noSmithyDocumentSerde } // Records an error that occurred when attempting to stop a specified job run. type BatchStopJobRunError struct { // Specifies details about the error that was encountered. ErrorDetail *ErrorDetail // The name of the job definition that is used in the job run in question. JobName *string // The JobRunId of the job run in question. JobRunId *string noSmithyDocumentSerde } // Records a successful request to stop a specified JobRun . type BatchStopJobRunSuccessfulSubmission struct { // The name of the job definition used in the job run that was stopped. JobName *string // The JobRunId of the job run that was stopped. JobRunId *string noSmithyDocumentSerde } // Contains information about a batch update partition error. type BatchUpdatePartitionFailureEntry struct { // The details about the batch update partition error. ErrorDetail *ErrorDetail // A list of values defining the partitions. PartitionValueList []string noSmithyDocumentSerde } // A structure that contains the values and structure used to update a partition. type BatchUpdatePartitionRequestEntry struct { // The structure used to update a partition. // // This member is required. PartitionInput *PartitionInput // A list of values defining the partitions. // // This member is required. PartitionValueList []string noSmithyDocumentSerde } // Defines column statistics supported for bit sequence data values. type BinaryColumnStatisticsData struct { // The average bit sequence length in the column. // // This member is required. AverageLength float64 // The size of the longest bit sequence in the column. // // This member is required. MaximumLength int64 // The number of null values in the column. // // This member is required. NumberOfNulls int64 noSmithyDocumentSerde } // The details of a blueprint. type Blueprint struct { // Specifies the path in Amazon S3 where the blueprint is published. BlueprintLocation *string // Specifies a path in Amazon S3 where the blueprint is copied when you call // CreateBlueprint/UpdateBlueprint to register the blueprint in Glue. BlueprintServiceLocation *string // The date and time the blueprint was registered. CreatedOn *time.Time // The description of the blueprint. Description *string // An error message. ErrorMessage *string // When there are multiple versions of a blueprint and the latest version has some // errors, this attribute indicates the last successful blueprint definition that // is available with the service. LastActiveDefinition *LastActiveDefinition // The date and time the blueprint was last modified. LastModifiedOn *time.Time // The name of the blueprint. Name *string // A JSON string that indicates the list of parameter specifications for the // blueprint. ParameterSpec *string // The status of the blueprint registration. // - Creating — The blueprint registration is in progress. // - Active — The blueprint has been successfully registered. // - Updating — An update to the blueprint registration is in progress. // - Failed — The blueprint registration failed. Status BlueprintStatus noSmithyDocumentSerde } // The details of a blueprint. type BlueprintDetails struct { // The name of the blueprint. BlueprintName *string // The run ID for this blueprint. RunId *string noSmithyDocumentSerde } // The details of a blueprint run. type BlueprintRun struct { // The name of the blueprint. BlueprintName *string // The date and time that the blueprint run completed. CompletedOn *time.Time // Indicates any errors that are seen while running the blueprint. ErrorMessage *string // The blueprint parameters as a string. You will have to provide a value for each // key that is required from the parameter spec that is defined in the // Blueprint$ParameterSpec . Parameters *string // The role ARN. This role will be assumed by the Glue service and will be used to // create the workflow and other entities of a workflow. RoleArn *string // If there are any errors while creating the entities of a workflow, we try to // roll back the created entities until that point and delete them. This attribute // indicates the errors seen while trying to delete the entities that are created. RollbackErrorMessage *string // The run ID for this blueprint run. RunId *string // The date and time that the blueprint run started. StartedOn *time.Time // The state of the blueprint run. Possible values are: // - Running — The blueprint run is in progress. // - Succeeded — The blueprint run completed successfully. // - Failed — The blueprint run failed and rollback is complete. // - Rolling Back — The blueprint run failed and rollback is in progress. State BlueprintRunState // The name of a workflow that is created as a result of a successful blueprint // run. If a blueprint run has an error, there will not be a workflow created. WorkflowName *string noSmithyDocumentSerde } // Defines column statistics supported for Boolean data columns. type BooleanColumnStatisticsData struct { // The number of false values in the column. // // This member is required. NumberOfFalses int64 // The number of null values in the column. // // This member is required. NumberOfNulls int64 // The number of true values in the column. // // This member is required. NumberOfTrues int64 noSmithyDocumentSerde } // Specifies a Delta Lake data source that is registered in the Glue Data Catalog. type CatalogDeltaSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the Delta Lake data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string // Specifies additional connection options. AdditionalDeltaOptions map[string]string // Specifies the data schema for the Delta Lake source. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies a table definition in the Glue Data Catalog. type CatalogEntry struct { // The database in which the table metadata resides. // // This member is required. DatabaseName *string // The name of the table in question. // // This member is required. TableName *string noSmithyDocumentSerde } // Specifies a Hudi data source that is registered in the Glue Data Catalog. type CatalogHudiSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the Hudi data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string // Specifies additional connection options. AdditionalHudiOptions map[string]string // Specifies the data schema for the Hudi source. OutputSchemas []GlueSchema noSmithyDocumentSerde } // A structure containing migration status information. type CatalogImportStatus struct { // True if the migration has completed, or False otherwise. ImportCompleted bool // The time that the migration was started. ImportTime *time.Time // The name of the person who initiated the migration. ImportedBy *string noSmithyDocumentSerde } // Specifies an Apache Kafka data store in the Data Catalog. type CatalogKafkaSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data store. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string // Specifies options related to data preview for viewing a sample of your data. DataPreviewOptions *StreamingDataPreviewOptions // Whether to automatically determine the schema from the incoming data. DetectSchema *bool // Specifies the streaming options. StreamingOptions *KafkaStreamingSourceOptions // The amount of time to spend processing each micro batch. WindowSize *int32 noSmithyDocumentSerde } // Specifies a Kinesis data source in the Glue Data Catalog. type CatalogKinesisSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string // Additional options for data preview. DataPreviewOptions *StreamingDataPreviewOptions // Whether to automatically determine the schema from the incoming data. DetectSchema *bool // Additional options for the Kinesis streaming data source. StreamingOptions *KinesisStreamingSourceOptions // The amount of time to spend processing each micro batch. WindowSize *int32 noSmithyDocumentSerde } // A policy that specifies update behavior for the crawler. type CatalogSchemaChangePolicy struct { // Whether to use the specified update behavior when the crawler finds a changed // schema. EnableUpdateCatalog *bool // The update behavior when the crawler finds a changed schema. UpdateBehavior UpdateCatalogBehavior noSmithyDocumentSerde } // Specifies a data store in the Glue Data Catalog. type CatalogSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data store. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string noSmithyDocumentSerde } // Specifies an Glue Data Catalog target. type CatalogTarget struct { // The name of the database to be synchronized. // // This member is required. DatabaseName *string // A list of the tables to be synchronized. // // This member is required. Tables []string // The name of the connection for an Amazon S3-backed Data Catalog table to be a // target of the crawl when using a Catalog connection type paired with a NETWORK // Connection type. ConnectionName *string // A valid Amazon dead-letter SQS ARN. For example, // arn:aws:sqs:region:account:deadLetterQueue . DlqEventQueueArn *string // A valid Amazon SQS ARN. For example, arn:aws:sqs:region:account:sqs . EventQueueArn *string noSmithyDocumentSerde } // Classifiers are triggered during a crawl task. A classifier checks whether a // given file is in a format it can handle. If it is, the classifier creates a // schema in the form of a StructType object that matches that data format. You // can use the standard classifiers that Glue provides, or you can write your own // classifiers to best categorize your data sources and specify the appropriate // schemas to use for them. A classifier can be a grok classifier, an XML // classifier, a JSON classifier, or a custom CSV classifier, as specified in one // of the fields in the Classifier object. type Classifier struct { // A classifier for comma-separated values (CSV). CsvClassifier *CsvClassifier // A classifier that uses grok . GrokClassifier *GrokClassifier // A classifier for JSON content. JsonClassifier *JsonClassifier // A classifier for XML content. XMLClassifier *XMLClassifier noSmithyDocumentSerde } // Specifies how Amazon CloudWatch data should be encrypted. type CloudWatchEncryption struct { // The encryption mode to use for CloudWatch data. CloudWatchEncryptionMode CloudWatchEncryptionMode // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. KmsKeyArn *string noSmithyDocumentSerde } // CodeGenConfigurationNode enumerates all valid Node types. One and only one of // its member variables can be populated. type CodeGenConfigurationNode struct { // Specifies a transform that groups rows by chosen fields and computes the // aggregated value by specified function. Aggregate *Aggregate // Specifies a target that writes to a data source in Amazon Redshift. AmazonRedshiftSource *AmazonRedshiftSource // Specifies a target that writes to a data target in Amazon Redshift. AmazonRedshiftTarget *AmazonRedshiftTarget // Specifies a transform that maps data property keys in the data source to data // property keys in the data target. You can rename keys, modify the data types for // keys, and choose which keys to drop from the dataset. ApplyMapping *ApplyMapping // Specifies a connector to an Amazon Athena data source. AthenaConnectorSource *AthenaConnectorSource // Specifies a Delta Lake data source that is registered in the Glue Data Catalog. CatalogDeltaSource *CatalogDeltaSource // Specifies a Hudi data source that is registered in the Glue Data Catalog. CatalogHudiSource *CatalogHudiSource // Specifies an Apache Kafka data store in the Data Catalog. CatalogKafkaSource *CatalogKafkaSource // Specifies a Kinesis data source in the Glue Data Catalog. CatalogKinesisSource *CatalogKinesisSource // Specifies a data store in the Glue Data Catalog. CatalogSource *CatalogSource // Specifies a target that uses a Glue Data Catalog table. CatalogTarget *BasicCatalogTarget // Specifies a transform that uses custom code you provide to perform the data // transformation. The output is a collection of DynamicFrames. CustomCode *CustomCode // Specifies the direct JDBC source connection. DirectJDBCSource *DirectJDBCSource // Specifies an Apache Kafka data store. DirectKafkaSource *DirectKafkaSource // Specifies a direct Amazon Kinesis data source. DirectKinesisSource *DirectKinesisSource // Specifies a transform that removes rows of repeating data from a data set. DropDuplicates *DropDuplicates // Specifies a transform that chooses the data property keys that you want to drop. DropFields *DropFields // Specifies a transform that removes columns from the dataset if all values in // the column are 'null'. By default, Glue Studio will recognize null objects, but // some values such as empty strings, strings that are "null", -1 integers or other // placeholders such as zeros, are not automatically recognized as nulls. DropNullFields *DropNullFields // Specifies a custom visual transform created by a user. DynamicTransform *DynamicTransform // Specifies a DynamoDBC Catalog data store in the Glue Data Catalog. DynamoDBCatalogSource *DynamoDBCatalogSource // Specifies your data quality evaluation criteria. EvaluateDataQuality *EvaluateDataQuality // Specifies your data quality evaluation criteria. Allows multiple input data and // returns a collection of Dynamic Frames. EvaluateDataQualityMultiFrame *EvaluateDataQualityMultiFrame // Specifies a transform that locates records in the dataset that have missing // values and adds a new field with a value determined by imputation. The input // data set is used to train the machine learning model that determines what the // missing value should be. FillMissingValues *FillMissingValues // Specifies a transform that splits a dataset into two, based on a filter // condition. Filter *Filter // Specifies a data source in a goverened Data Catalog. GovernedCatalogSource *GovernedCatalogSource // Specifies a data target that writes to a goverened catalog. GovernedCatalogTarget *GovernedCatalogTarget // Specifies a connector to a JDBC data source. JDBCConnectorSource *JDBCConnectorSource // Specifies a data target that writes to Amazon S3 in Apache Parquet columnar // storage. JDBCConnectorTarget *JDBCConnectorTarget // Specifies a transform that joins two datasets into one dataset using a // comparison phrase on the specified data property keys. You can use inner, outer, // left, right, left semi, and left anti joins. Join *Join // Specifies a transform that merges a DynamicFrame with a staging DynamicFrame // based on the specified primary keys to identify records. Duplicate records // (records with the same primary keys) are not de-duplicated. Merge *Merge // Specifies a Microsoft SQL server data source in the Glue Data Catalog. MicrosoftSQLServerCatalogSource *MicrosoftSQLServerCatalogSource // Specifies a target that uses Microsoft SQL. MicrosoftSQLServerCatalogTarget *MicrosoftSQLServerCatalogTarget // Specifies a MySQL data source in the Glue Data Catalog. MySQLCatalogSource *MySQLCatalogSource // Specifies a target that uses MySQL. MySQLCatalogTarget *MySQLCatalogTarget // Specifies an Oracle data source in the Glue Data Catalog. OracleSQLCatalogSource *OracleSQLCatalogSource // Specifies a target that uses Oracle SQL. OracleSQLCatalogTarget *OracleSQLCatalogTarget // Specifies a transform that identifies, removes or masks PII data. PIIDetection *PIIDetection // Specifies a PostgresSQL data source in the Glue Data Catalog. PostgreSQLCatalogSource *PostgreSQLCatalogSource // Specifies a target that uses Postgres SQL. PostgreSQLCatalogTarget *PostgreSQLCatalogTarget // Specifies a Glue DataBrew recipe node. Recipe *Recipe // Specifies an Amazon Redshift data store. RedshiftSource *RedshiftSource // Specifies a target that uses Amazon Redshift. RedshiftTarget *RedshiftTarget // Specifies a relational catalog data store in the Glue Data Catalog. RelationalCatalogSource *RelationalCatalogSource // Specifies a transform that renames a single data property key. RenameField *RenameField // Specifies a Delta Lake data source that is registered in the Glue Data Catalog. // The data source must be stored in Amazon S3. S3CatalogDeltaSource *S3CatalogDeltaSource // Specifies a Hudi data source that is registered in the Glue Data Catalog. The // data source must be stored in Amazon S3. S3CatalogHudiSource *S3CatalogHudiSource // Specifies an Amazon S3 data store in the Glue Data Catalog. S3CatalogSource *S3CatalogSource // Specifies a data target that writes to Amazon S3 using the Glue Data Catalog. S3CatalogTarget *S3CatalogTarget // Specifies a command-separated value (CSV) data store stored in Amazon S3. S3CsvSource *S3CsvSource // Specifies a target that writes to a Delta Lake data source in the Glue Data // Catalog. S3DeltaCatalogTarget *S3DeltaCatalogTarget // Specifies a target that writes to a Delta Lake data source in Amazon S3. S3DeltaDirectTarget *S3DeltaDirectTarget // Specifies a Delta Lake data source stored in Amazon S3. S3DeltaSource *S3DeltaSource // Specifies a data target that writes to Amazon S3. S3DirectTarget *S3DirectTarget // Specifies a data target that writes to Amazon S3 in Apache Parquet columnar // storage. S3GlueParquetTarget *S3GlueParquetTarget // Specifies a target that writes to a Hudi data source in the Glue Data Catalog. S3HudiCatalogTarget *S3HudiCatalogTarget // Specifies a target that writes to a Hudi data source in Amazon S3. S3HudiDirectTarget *S3HudiDirectTarget // Specifies a Hudi data source stored in Amazon S3. S3HudiSource *S3HudiSource // Specifies a JSON data store stored in Amazon S3. S3JsonSource *S3JsonSource // Specifies an Apache Parquet data store stored in Amazon S3. S3ParquetSource *S3ParquetSource // Specifies a transform that chooses the data property keys that you want to keep. SelectFields *SelectFields // Specifies a transform that chooses one DynamicFrame from a collection of // DynamicFrames . The output is the selected DynamicFrame SelectFromCollection *SelectFromCollection // Specifies a Snowflake data source. SnowflakeSource *SnowflakeSource // Specifies a target that writes to a Snowflake data source. SnowflakeTarget *SnowflakeTarget // Specifies a connector to an Apache Spark data source. SparkConnectorSource *SparkConnectorSource // Specifies a target that uses an Apache Spark connector. SparkConnectorTarget *SparkConnectorTarget // Specifies a transform where you enter a SQL query using Spark SQL syntax to // transform the data. The output is a single DynamicFrame . SparkSQL *SparkSQL // Specifies a transform that writes samples of the data to an Amazon S3 bucket. Spigot *Spigot // Specifies a transform that splits data property keys into two DynamicFrames . // The output is a collection of DynamicFrames : one with selected data property // keys, and one with the remaining data property keys. SplitFields *SplitFields // Specifies a transform that combines the rows from two or more datasets into a // single result. Union *Union noSmithyDocumentSerde } // Represents a directional edge in a directed acyclic graph (DAG). type CodeGenEdge struct { // The ID of the node at which the edge starts. // // This member is required. Source *string // The ID of the node at which the edge ends. // // This member is required. Target *string // The target of the edge. TargetParameter *string noSmithyDocumentSerde } // Represents a node in a directed acyclic graph (DAG) type CodeGenNode struct { // Properties of the node, in the form of name-value pairs. // // This member is required. Args []CodeGenNodeArg // A node identifier that is unique within the node's graph. // // This member is required. Id *string // The type of node that this is. // // This member is required. NodeType *string // The line number of the node. LineNumber int32 noSmithyDocumentSerde } // An argument or property of a node. type CodeGenNodeArg struct { // The name of the argument or property. // // This member is required. Name *string // The value of the argument or property. // // This member is required. Value *string // True if the value is used as a parameter. Param bool noSmithyDocumentSerde } // A column in a Table . type Column struct { // The name of the Column . // // This member is required. Name *string // A free-form text comment. Comment *string // These key-value pairs define properties associated with the column. Parameters map[string]string // The data type of the Column . Type *string noSmithyDocumentSerde } // Encapsulates a column name that failed and the reason for failure. type ColumnError struct { // The name of the column that failed. ColumnName *string // An error message with the reason for the failure of an operation. Error *ErrorDetail noSmithyDocumentSerde } // A structure containing the column name and column importance score for a // column. Column importance helps you understand how columns contribute to your // model, by identifying which columns in your records are more important than // others. type ColumnImportance struct { // The name of a column. ColumnName *string // The column importance score for the column, as a decimal. Importance *float64 noSmithyDocumentSerde } // A filter that uses both column-level and row-level filtering. type ColumnRowFilter struct { // A string containing the name of the column. ColumnName *string // A string containing the row-level filter expression. RowFilterExpression *string noSmithyDocumentSerde } // Represents the generated column-level statistics for a table or partition. type ColumnStatistics struct { // The timestamp of when column statistics were generated. // // This member is required. AnalyzedTime *time.Time // Name of column which statistics belong to. // // This member is required. ColumnName *string // The data type of the column. // // This member is required. ColumnType *string // A ColumnStatisticData object that contains the statistics data values. // // This member is required. StatisticsData *ColumnStatisticsData noSmithyDocumentSerde } // Contains the individual types of column statistics data. Only one data object // should be set and indicated by the Type attribute. type ColumnStatisticsData struct { // The type of column statistics data. // // This member is required. Type ColumnStatisticsType // Binary column statistics data. BinaryColumnStatisticsData *BinaryColumnStatisticsData // Boolean column statistics data. BooleanColumnStatisticsData *BooleanColumnStatisticsData // Date column statistics data. DateColumnStatisticsData *DateColumnStatisticsData // Decimal column statistics data. UnscaledValues within are Base64-encoded binary // objects storing big-endian, two's complement representations of the decimal's // unscaled value. DecimalColumnStatisticsData *DecimalColumnStatisticsData // Double column statistics data. DoubleColumnStatisticsData *DoubleColumnStatisticsData // Long column statistics data. LongColumnStatisticsData *LongColumnStatisticsData // String column statistics data. StringColumnStatisticsData *StringColumnStatisticsData noSmithyDocumentSerde } // Encapsulates a ColumnStatistics object that failed and the reason for failure. type ColumnStatisticsError struct { // The ColumnStatistics of the column. ColumnStatistics *ColumnStatistics // An error message with the reason for the failure of an operation. Error *ErrorDetail noSmithyDocumentSerde } // Defines a condition under which a trigger fires. type Condition struct { // The state of the crawler to which this condition applies. CrawlState CrawlState // The name of the crawler to which this condition applies. CrawlerName *string // The name of the job whose JobRuns this condition applies to, and on which this // trigger waits. JobName *string // A logical operator. LogicalOperator LogicalOperator // The condition state. Currently, the only job states that a trigger can listen // for are SUCCEEDED , STOPPED , FAILED , and TIMEOUT . The only crawler states // that a trigger can listen for are SUCCEEDED , FAILED , and CANCELLED . State JobRunState noSmithyDocumentSerde } // The confusion matrix shows you what your transform is predicting accurately and // what types of errors it is making. For more information, see Confusion matrix (https://en.wikipedia.org/wiki/Confusion_matrix) // in Wikipedia. type ConfusionMatrix struct { // The number of matches in the data that the transform didn't find, in the // confusion matrix for your transform. NumFalseNegatives *int64 // The number of nonmatches in the data that the transform incorrectly classified // as a match, in the confusion matrix for your transform. NumFalsePositives *int64 // The number of nonmatches in the data that the transform correctly rejected, in // the confusion matrix for your transform. NumTrueNegatives *int64 // The number of matches in the data that the transform correctly found, in the // confusion matrix for your transform. NumTruePositives *int64 noSmithyDocumentSerde } // Defines a connection to a data source. type Connection struct { // These key-value pairs define parameters for the connection: // - HOST - The host URI: either the fully qualified domain name (FQDN) or the // IPv4 address of the database host. // - PORT - The port number, between 1024 and 65535, of the port on which the // database host is listening for database connections. // - USER_NAME - The name under which to log in to the database. The value string // for USER_NAME is " USERNAME ". // - PASSWORD - A password, if one is used, for the user name. // - ENCRYPTED_PASSWORD - When you enable connection password protection by // setting ConnectionPasswordEncryption in the Data Catalog encryption settings, // this field stores the encrypted password. // - JDBC_DRIVER_JAR_URI - The Amazon Simple Storage Service (Amazon S3) path of // the JAR file that contains the JDBC driver to use. // - JDBC_DRIVER_CLASS_NAME - The class name of the JDBC driver to use. // - JDBC_ENGINE - The name of the JDBC engine to use. // - JDBC_ENGINE_VERSION - The version of the JDBC engine to use. // - CONFIG_FILES - (Reserved for future use.) // - INSTANCE_ID - The instance ID to use. // - JDBC_CONNECTION_URL - The URL for connecting to a JDBC data source. // - JDBC_ENFORCE_SSL - A Boolean string (true, false) specifying whether Secure // Sockets Layer (SSL) with hostname matching is enforced for the JDBC connection // on the client. The default is false. // - CUSTOM_JDBC_CERT - An Amazon S3 location specifying the customer's root // certificate. Glue uses this root certificate to validate the customer’s // certificate when connecting to the customer database. Glue only handles X.509 // certificates. The certificate provided must be DER-encoded and supplied in // Base64 encoding PEM format. // - SKIP_CUSTOM_JDBC_CERT_VALIDATION - By default, this is false . Glue // validates the Signature algorithm and Subject Public Key Algorithm for the // customer certificate. The only permitted algorithms for the Signature algorithm // are SHA256withRSA, SHA384withRSA or SHA512withRSA. For the Subject Public Key // Algorithm, the key length must be at least 2048. You can set the value of this // property to true to skip Glue’s validation of the customer certificate. // - CUSTOM_JDBC_CERT_STRING - A custom JDBC certificate string which is used for // domain match or distinguished name match to prevent a man-in-the-middle attack. // In Oracle database, this is used as the SSL_SERVER_CERT_DN ; in Microsoft SQL // Server, this is used as the hostNameInCertificate . // - CONNECTION_URL - The URL for connecting to a general (non-JDBC) data source. // - SECRET_ID - The secret ID used for the secret manager of credentials. // - CONNECTOR_URL - The connector URL for a MARKETPLACE or CUSTOM connection. // - CONNECTOR_TYPE - The connector type for a MARKETPLACE or CUSTOM connection. // - CONNECTOR_CLASS_NAME - The connector class name for a MARKETPLACE or CUSTOM // connection. // - KAFKA_BOOTSTRAP_SERVERS - A comma-separated list of host and port pairs that // are the addresses of the Apache Kafka brokers in a Kafka cluster to which a // Kafka client will connect to and bootstrap itself. // - KAFKA_SSL_ENABLED - Whether to enable or disable SSL on an Apache Kafka // connection. Default value is "true". // - KAFKA_CUSTOM_CERT - The Amazon S3 URL for the private CA cert file (.pem // format). The default is an empty string. // - KAFKA_SKIP_CUSTOM_CERT_VALIDATION - Whether to skip the validation of the CA // cert file or not. Glue validates for three algorithms: SHA256withRSA, // SHA384withRSA and SHA512withRSA. Default value is "false". // - KAFKA_CLIENT_KEYSTORE - The Amazon S3 location of the client keystore file // for Kafka client side authentication (Optional). // - KAFKA_CLIENT_KEYSTORE_PASSWORD - The password to access the provided // keystore (Optional). // - KAFKA_CLIENT_KEY_PASSWORD - A keystore can consist of multiple keys, so this // is the password to access the client key to be used with the Kafka server side // key (Optional). // - ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD - The encrypted version of the // Kafka client keystore password (if the user has the Glue encrypt passwords // setting selected). // - ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD - The encrypted version of the Kafka // client key password (if the user has the Glue encrypt passwords setting // selected). // - KAFKA_SASL_MECHANISM - "SCRAM-SHA-512" , "GSSAPI" , or "AWS_MSK_IAM" . These // are the supported SASL Mechanisms (https://www.iana.org/assignments/sasl-mechanisms/sasl-mechanisms.xhtml) // . // - KAFKA_SASL_SCRAM_USERNAME - A plaintext username used to authenticate with // the "SCRAM-SHA-512" mechanism. // - KAFKA_SASL_SCRAM_PASSWORD - A plaintext password used to authenticate with // the "SCRAM-SHA-512" mechanism. // - ENCRYPTED_KAFKA_SASL_SCRAM_PASSWORD - The encrypted version of the Kafka // SASL SCRAM password (if the user has the Glue encrypt passwords setting // selected). // - KAFKA_SASL_GSSAPI_KEYTAB - The S3 location of a Kerberos keytab file. A // keytab stores long-term keys for one or more principals. For more information, // see MIT Kerberos Documentation: Keytab (https://web.mit.edu/kerberos/krb5-latest/doc/basic/keytab_def.html) // . // - KAFKA_SASL_GSSAPI_KRB5_CONF - The S3 location of a Kerberos krb5.conf file. // A krb5.conf stores Kerberos configuration information, such as the location of // the KDC server. For more information, see MIT Kerberos Documentation: // krb5.conf (https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html) // . // - KAFKA_SASL_GSSAPI_SERVICE - The Kerberos service name, as set with // sasl.kerberos.service.name in your Kafka Configuration (https://kafka.apache.org/documentation/#brokerconfigs_sasl.kerberos.service.name) // . // - KAFKA_SASL_GSSAPI_PRINCIPAL - The name of the Kerberos princial used by // Glue. For more information, see Kafka Documentation: Configuring Kafka Brokers (https://kafka.apache.org/documentation/#security_sasl_kerberos_clientconfig) // . ConnectionProperties map[string]string // The type of the connection. Currently, SFTP is not supported. ConnectionType ConnectionType // The time that this connection definition was created. CreationTime *time.Time // The description of the connection. Description *string // The user, group, or role that last updated this connection definition. LastUpdatedBy *string // The last time that this connection definition was updated. LastUpdatedTime *time.Time // A list of criteria that can be used in selecting this connection. MatchCriteria []string // The name of the connection definition. Name *string // A map of physical connection requirements, such as virtual private cloud (VPC) // and SecurityGroup , that are needed to make this connection successfully. PhysicalConnectionRequirements *PhysicalConnectionRequirements noSmithyDocumentSerde } // A structure that is used to specify a connection to create or update. type ConnectionInput struct { // These key-value pairs define parameters for the connection. // // This member is required. ConnectionProperties map[string]string // The type of the connection. Currently, these types are supported: // - JDBC - Designates a connection to a database through Java Database // Connectivity (JDBC). JDBC Connections use the following ConnectionParameters. // - Required: All of ( HOST , PORT , JDBC_ENGINE ) or JDBC_CONNECTION_URL . // - Required: All of ( USERNAME , PASSWORD ) or SECRET_ID . // - Optional: JDBC_ENFORCE_SSL , CUSTOM_JDBC_CERT , CUSTOM_JDBC_CERT_STRING , // SKIP_CUSTOM_JDBC_CERT_VALIDATION . These parameters are used to configure SSL // with JDBC. // - KAFKA - Designates a connection to an Apache Kafka streaming platform. KAFKA // Connections use the following ConnectionParameters. // - Required: KAFKA_BOOTSTRAP_SERVERS . // - Optional: KAFKA_SSL_ENABLED , KAFKA_CUSTOM_CERT , // KAFKA_SKIP_CUSTOM_CERT_VALIDATION . These parameters are used to configure SSL // with KAFKA . // - Optional: KAFKA_CLIENT_KEYSTORE , KAFKA_CLIENT_KEYSTORE_PASSWORD , // KAFKA_CLIENT_KEY_PASSWORD , ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD , // ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD . These parameters are used to configure // TLS client configuration with SSL in KAFKA . // - Optional: KAFKA_SASL_MECHANISM . Can be specified as SCRAM-SHA-512 , GSSAPI // , or AWS_MSK_IAM . // - Optional: KAFKA_SASL_SCRAM_USERNAME , KAFKA_SASL_SCRAM_PASSWORD , // ENCRYPTED_KAFKA_SASL_SCRAM_PASSWORD . These parameters are used to configure // SASL/SCRAM-SHA-512 authentication with KAFKA . // - Optional: KAFKA_SASL_GSSAPI_KEYTAB , KAFKA_SASL_GSSAPI_KRB5_CONF , // KAFKA_SASL_GSSAPI_SERVICE , KAFKA_SASL_GSSAPI_PRINCIPAL . These parameters are // used to configure SASL/GSSAPI authentication with KAFKA . // - MONGODB - Designates a connection to a MongoDB document database. MONGODB // Connections use the following ConnectionParameters. // - Required: CONNECTION_URL . // - Required: All of ( USERNAME , PASSWORD ) or SECRET_ID . // - NETWORK - Designates a network connection to a data source within an Amazon // Virtual Private Cloud environment (Amazon VPC). NETWORK Connections do not // require ConnectionParameters. Instead, provide a PhysicalConnectionRequirements. // // - MARKETPLACE - Uses configuration settings contained in a connector purchased // from Amazon Web Services Marketplace to read from and write to data stores that // are not natively supported by Glue. MARKETPLACE Connections use the following // ConnectionParameters. // - Required: CONNECTOR_TYPE , CONNECTOR_URL , CONNECTOR_CLASS_NAME , // CONNECTION_URL . // - Required for JDBC CONNECTOR_TYPE connections: All of ( USERNAME , PASSWORD ) // or SECRET_ID . // - CUSTOM - Uses configuration settings contained in a custom connector to read // from and write to data stores that are not natively supported by Glue. // SFTP is not supported. For more information about how optional // ConnectionProperties are used to configure features in Glue, consult Glue // connection properties (https://docs.aws.amazon.com/glue/latest/dg/connection-defining.html) // . For more information about how optional ConnectionProperties are used to // configure features in Glue Studio, consult Using connectors and connections (https://docs.aws.amazon.com/glue/latest/ug/connectors-chapter.html) // . // // This member is required. ConnectionType ConnectionType // The name of the connection. Connection will not function as expected without a // name. // // This member is required. Name *string // The description of the connection. Description *string // A list of criteria that can be used in selecting this connection. MatchCriteria []string // A map of physical connection requirements, such as virtual private cloud (VPC) // and SecurityGroup , that are needed to successfully make this connection. PhysicalConnectionRequirements *PhysicalConnectionRequirements noSmithyDocumentSerde } // The data structure used by the Data Catalog to encrypt the password as part of // CreateConnection or UpdateConnection and store it in the ENCRYPTED_PASSWORD // field in the connection properties. You can enable catalog encryption or only // password encryption. When a CreationConnection request arrives containing a // password, the Data Catalog first encrypts the password using your KMS key. It // then encrypts the whole connection object again if catalog encryption is also // enabled. This encryption requires that you set KMS key permissions to enable or // restrict access on the password key according to your security requirements. For // example, you might want only administrators to have decrypt permission on the // password key. type ConnectionPasswordEncryption struct { // When the ReturnConnectionPasswordEncrypted flag is set to "true", passwords // remain encrypted in the responses of GetConnection and GetConnections . This // encryption takes effect independently from catalog encryption. // // This member is required. ReturnConnectionPasswordEncrypted bool // An KMS key that is used to encrypt the connection password. If connection // password protection is enabled, the caller of CreateConnection and // UpdateConnection needs at least kms:Encrypt permission on the specified KMS // key, to encrypt passwords before storing them in the Data Catalog. You can set // the decrypt permission to enable or restrict access on the password key // according to your security requirements. AwsKmsKeyId *string noSmithyDocumentSerde } // Specifies the connections used by a job. type ConnectionsList struct { // A list of connections used by the job. Connections []string noSmithyDocumentSerde } // The details of a crawl in the workflow. type Crawl struct { // The date and time on which the crawl completed. CompletedOn *time.Time // The error message associated with the crawl. ErrorMessage *string // The log group associated with the crawl. LogGroup *string // The log stream associated with the crawl. LogStream *string // The date and time on which the crawl started. StartedOn *time.Time // The state of the crawler. State CrawlState noSmithyDocumentSerde } // Specifies a crawler program that examines a data source and uses classifiers to // try to determine its schema. If successful, the crawler records metadata // concerning the data source in the Glue Data Catalog. type Crawler struct { // A list of UTF-8 strings that specify the custom classifiers that are associated // with the crawler. Classifiers []string // Crawler configuration information. This versioned JSON string allows users to // specify aspects of a crawler's behavior. For more information, see Setting // crawler configuration options (https://docs.aws.amazon.com/glue/latest/dg/crawler-configuration.html) // . Configuration *string // If the crawler is running, contains the total time elapsed since the last crawl // began. CrawlElapsedTime int64 // The name of the SecurityConfiguration structure to be used by this crawler. CrawlerSecurityConfiguration *string // The time that the crawler was created. CreationTime *time.Time // The name of the database in which the crawler's output is stored. DatabaseName *string // A description of the crawler. Description *string // Specifies whether the crawler should use Lake Formation credentials for the // crawler instead of the IAM role credentials. LakeFormationConfiguration *LakeFormationConfiguration // The status of the last crawl, and potentially error information if an error // occurred. LastCrawl *LastCrawlInfo // The time that the crawler was last updated. LastUpdated *time.Time // A configuration that specifies whether data lineage is enabled for the crawler. LineageConfiguration *LineageConfiguration // The name of the crawler. Name *string // A policy that specifies whether to crawl the entire dataset again, or to crawl // only folders that were added since the last crawler run. RecrawlPolicy *RecrawlPolicy // The Amazon Resource Name (ARN) of an IAM role that's used to access customer // resources, such as Amazon Simple Storage Service (Amazon S3) data. Role *string // For scheduled crawlers, the schedule when the crawler runs. Schedule *Schedule // The policy that specifies update and delete behaviors for the crawler. SchemaChangePolicy *SchemaChangePolicy // Indicates whether the crawler is running, or whether a run is pending. State CrawlerState // The prefix added to the names of tables that are created. TablePrefix *string // A collection of targets to crawl. Targets *CrawlerTargets // The version of the crawler. Version int64 noSmithyDocumentSerde } // Contains the information for a run of a crawler. type CrawlerHistory struct { // A UUID identifier for each crawl. CrawlId *string // The number of data processing units (DPU) used in hours for the crawl. DPUHour float64 // The date and time on which the crawl ended. EndTime *time.Time // If an error occurred, the error message associated with the crawl. ErrorMessage *string // The log group associated with the crawl. LogGroup *string // The log stream associated with the crawl. LogStream *string // The prefix for a CloudWatch message about this crawl. MessagePrefix *string // The date and time on which the crawl started. StartTime *time.Time // The state of the crawl. State CrawlerHistoryState // A run summary for the specific crawl in JSON. Contains the catalog tables and // partitions that were added, updated, or deleted. Summary *string noSmithyDocumentSerde } // Metrics for a specified crawler. type CrawlerMetrics struct { // The name of the crawler. CrawlerName *string // The duration of the crawler's most recent run, in seconds. LastRuntimeSeconds float64 // The median duration of this crawler's runs, in seconds. MedianRuntimeSeconds float64 // True if the crawler is still estimating how long it will take to complete this // run. StillEstimating bool // The number of tables created by this crawler. TablesCreated int32 // The number of tables deleted by this crawler. TablesDeleted int32 // The number of tables updated by this crawler. TablesUpdated int32 // The estimated time left to complete a running crawl. TimeLeftSeconds float64 noSmithyDocumentSerde } // The details of a Crawler node present in the workflow. type CrawlerNodeDetails struct { // A list of crawls represented by the crawl node. Crawls []Crawl noSmithyDocumentSerde } // Specifies data stores to crawl. type CrawlerTargets struct { // Specifies Glue Data Catalog targets. CatalogTargets []CatalogTarget // Specifies Delta data store targets. DeltaTargets []DeltaTarget // Specifies Amazon DynamoDB targets. DynamoDBTargets []DynamoDBTarget // Specifies Apache Hudi data store targets. HudiTargets []HudiTarget // Specifies Apache Iceberg data store targets. IcebergTargets []IcebergTarget // Specifies JDBC targets. JdbcTargets []JdbcTarget // Specifies Amazon DocumentDB or MongoDB targets. MongoDBTargets []MongoDBTarget // Specifies Amazon Simple Storage Service (Amazon S3) targets. S3Targets []S3Target noSmithyDocumentSerde } // A list of fields, comparators and value that you can use to filter the crawler // runs for a specified crawler. type CrawlsFilter struct { // A key used to filter the crawler runs for a specified crawler. Valid values for // each of the field names are: // - CRAWL_ID : A string representing the UUID identifier for a crawl. // - STATE : A string representing the state of the crawl. // - START_TIME and END_TIME : The epoch timestamp in milliseconds. // - DPU_HOUR : The number of data processing unit (DPU) hours used for the // crawl. FieldName FieldName // The value provided for comparison on the crawl field. FieldValue *string // A defined comparator that operates on the value. The available operators are: // - GT : Greater than. // - GE : Greater than or equal to. // - LT : Less than. // - LE : Less than or equal to. // - EQ : Equal to. // - NE : Not equal to. FilterOperator FilterOperator noSmithyDocumentSerde } // Specifies a custom CSV classifier for CreateClassifier to create. type CreateCsvClassifierRequest struct { // The name of the classifier. // // This member is required. Name *string // Enables the processing of files that contain only one column. AllowSingleColumn *bool // Indicates whether the CSV file contains a header. ContainsHeader CsvHeaderOption // Enables the configuration of custom datatypes. CustomDatatypeConfigured *bool // Creates a list of supported custom datatypes. CustomDatatypes []string // A custom symbol to denote what separates each column entry in the row. Delimiter *string // Specifies not to trim values before identifying the type of column values. The // default value is true. DisableValueTrimming *bool // A list of strings representing column names. Header []string // A custom symbol to denote what combines content into a single column value. // Must be different from the column delimiter. QuoteSymbol *string noSmithyDocumentSerde } // Specifies a grok classifier for CreateClassifier to create. type CreateGrokClassifierRequest struct { // An identifier of the data format that the classifier matches, such as Twitter, // JSON, Omniture logs, Amazon CloudWatch Logs, and so on. // // This member is required. Classification *string // The grok pattern used by this classifier. // // This member is required. GrokPattern *string // The name of the new classifier. // // This member is required. Name *string // Optional custom grok patterns used by this classifier. CustomPatterns *string noSmithyDocumentSerde } // Specifies a JSON classifier for CreateClassifier to create. type CreateJsonClassifierRequest struct { // A JsonPath string defining the JSON data for the classifier to classify. Glue // supports a subset of JsonPath, as described in Writing JsonPath Custom // Classifiers (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json) // . // // This member is required. JsonPath *string // The name of the classifier. // // This member is required. Name *string noSmithyDocumentSerde } // Specifies an XML classifier for CreateClassifier to create. type CreateXMLClassifierRequest struct { // An identifier of the data format that the classifier matches. // // This member is required. Classification *string // The name of the classifier. // // This member is required. Name *string // The XML tag designating the element that contains each record in an XML // document being parsed. This can't identify a self-closing element (closed by /> // ). An empty row element that contains only attributes can be parsed as long as // it ends with a closing tag (for example, is okay, but is not). RowTag *string noSmithyDocumentSerde } // A classifier for custom CSV content. type CsvClassifier struct { // The name of the classifier. // // This member is required. Name *string // Enables the processing of files that contain only one column. AllowSingleColumn *bool // Indicates whether the CSV file contains a header. ContainsHeader CsvHeaderOption // The time that this classifier was registered. CreationTime *time.Time // Enables the custom datatype to be configured. CustomDatatypeConfigured *bool // A list of custom datatypes including "BINARY", "BOOLEAN", "DATE", "DECIMAL", // "DOUBLE", "FLOAT", "INT", "LONG", "SHORT", "STRING", "TIMESTAMP". CustomDatatypes []string // A custom symbol to denote what separates each column entry in the row. Delimiter *string // Specifies not to trim values before identifying the type of column values. The // default value is true . DisableValueTrimming *bool // A list of strings representing column names. Header []string // The time that this classifier was last updated. LastUpdated *time.Time // A custom symbol to denote what combines content into a single column value. It // must be different from the column delimiter. QuoteSymbol *string // The version of this classifier. Version int64 noSmithyDocumentSerde } // Specifies a transform that uses custom code you provide to perform the data // transformation. The output is a collection of DynamicFrames. type CustomCode struct { // The name defined for the custom code node class. // // This member is required. ClassName *string // The custom code that is used to perform the data transformation. // // This member is required. Code *string // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // Specifies the data schema for the custom code transform. OutputSchemas []GlueSchema noSmithyDocumentSerde } // An object representing a custom pattern for detecting sensitive data across the // columns and rows of your structured data. type CustomEntityType struct { // A name for the custom pattern that allows it to be retrieved or deleted later. // This name must be unique per Amazon Web Services account. // // This member is required. Name *string // A regular expression string that is used for detecting sensitive data in a // custom pattern. // // This member is required. RegexString *string // A list of context words. If none of these context words are found within the // vicinity of the regular expression the data will not be detected as sensitive // data. If no context words are passed only a regular expression is checked. ContextWords []string noSmithyDocumentSerde } // The Database object represents a logical grouping of tables that might reside // in a Hive metastore or an RDBMS. type Database struct { // The name of the database. For Hive compatibility, this is folded to lowercase // when it is stored. // // This member is required. Name *string // The ID of the Data Catalog in which the database resides. CatalogId *string // Creates a set of default permissions on the table for principals. Used by Lake // Formation. Not used in the normal course of Glue operations. CreateTableDefaultPermissions []PrincipalPermissions // The time at which the metadata database was created in the catalog. CreateTime *time.Time // A description of the database. Description *string // A FederatedDatabase structure that references an entity outside the Glue Data // Catalog. FederatedDatabase *FederatedDatabase // The location of the database (for example, an HDFS path). LocationUri *string // These key-value pairs define parameters and properties of the database. Parameters map[string]string // A DatabaseIdentifier structure that describes a target database for resource // linking. TargetDatabase *DatabaseIdentifier noSmithyDocumentSerde } // A structure that describes a target database for resource linking. type DatabaseIdentifier struct { // The ID of the Data Catalog in which the database resides. CatalogId *string // The name of the catalog database. DatabaseName *string // Region of the target database. Region *string noSmithyDocumentSerde } // The structure used to create or update a database. type DatabaseInput struct { // The name of the database. For Hive compatibility, this is folded to lowercase // when it is stored. // // This member is required. Name *string // Creates a set of default permissions on the table for principals. Used by Lake // Formation. Not used in the normal course of Glue operations. CreateTableDefaultPermissions []PrincipalPermissions // A description of the database. Description *string // A FederatedDatabase structure that references an entity outside the Glue Data // Catalog. FederatedDatabase *FederatedDatabase // The location of the database (for example, an HDFS path). LocationUri *string // These key-value pairs define parameters and properties of the database. These // key-value pairs define parameters and properties of the database. Parameters map[string]string // A DatabaseIdentifier structure that describes a target database for resource // linking. TargetDatabase *DatabaseIdentifier noSmithyDocumentSerde } // Contains configuration information for maintaining Data Catalog security. type DataCatalogEncryptionSettings struct { // When connection password protection is enabled, the Data Catalog uses a // customer-provided key to encrypt the password as part of CreateConnection or // UpdateConnection and store it in the ENCRYPTED_PASSWORD field in the connection // properties. You can enable catalog encryption or only password encryption. ConnectionPasswordEncryption *ConnectionPasswordEncryption // Specifies the encryption-at-rest configuration for the Data Catalog. EncryptionAtRest *EncryptionAtRest noSmithyDocumentSerde } // The Lake Formation principal. type DataLakePrincipal struct { // An identifier for the Lake Formation principal. DataLakePrincipalIdentifier *string noSmithyDocumentSerde } // Additional run options you can specify for an evaluation run. type DataQualityEvaluationRunAdditionalRunOptions struct { // Whether or not to enable CloudWatch metrics. CloudWatchMetricsEnabled *bool // Prefix for Amazon S3 to store results. ResultsS3Prefix *string noSmithyDocumentSerde } // Describes a data quality result. type DataQualityResult struct { // The date and time when this data quality run completed. CompletedOn *time.Time // The table associated with the data quality result, if any. DataSource *DataSource // In the context of a job in Glue Studio, each node in the canvas is typically // assigned some sort of name and data quality nodes will have names. In the case // of multiple nodes, the evaluationContext can differentiate the nodes. EvaluationContext *string // The job name associated with the data quality result, if any. JobName *string // The job run ID associated with the data quality result, if any. JobRunId *string // A unique result ID for the data quality result. ResultId *string // A list of DataQualityRuleResult objects representing the results for each rule. RuleResults []DataQualityRuleResult // The unique run ID for the ruleset evaluation for this data quality result. RulesetEvaluationRunId *string // The name of the ruleset associated with the data quality result. RulesetName *string // An aggregate data quality score. Represents the ratio of rules that passed to // the total number of rules. Score *float64 // The date and time when this data quality run started. StartedOn *time.Time noSmithyDocumentSerde } // Describes a data quality result. type DataQualityResultDescription struct { // The table name associated with the data quality result. DataSource *DataSource // The job name associated with the data quality result. JobName *string // The job run ID associated with the data quality result. JobRunId *string // The unique result ID for this data quality result. ResultId *string // The time that the run started for this data quality result. StartedOn *time.Time noSmithyDocumentSerde } // Criteria used to return data quality results. type DataQualityResultFilterCriteria struct { // Filter results by the specified data source. For example, retrieving all // results for an Glue table. DataSource *DataSource // Filter results by the specified job name. JobName *string // Filter results by the specified job run ID. JobRunId *string // Filter results by runs that started after this time. StartedAfter *time.Time // Filter results by runs that started before this time. StartedBefore *time.Time noSmithyDocumentSerde } // Describes the result of a data quality rule recommendation run. type DataQualityRuleRecommendationRunDescription struct { // The data source (Glue table) associated with the recommendation run. DataSource *DataSource // The unique run identifier associated with this run. RunId *string // The date and time when this run started. StartedOn *time.Time // The status for this run. Status TaskStatusType noSmithyDocumentSerde } // A filter for listing data quality recommendation runs. type DataQualityRuleRecommendationRunFilter struct { // Filter based on a specified data source (Glue table). // // This member is required. DataSource *DataSource // Filter based on time for results started after provided time. StartedAfter *time.Time // Filter based on time for results started before provided time. StartedBefore *time.Time noSmithyDocumentSerde } // Describes the result of the evaluation of a data quality rule. type DataQualityRuleResult struct { // A description of the data quality rule. Description *string // A map of metrics associated with the evaluation of the rule. EvaluatedMetrics map[string]float64 // An evaluation message. EvaluationMessage *string // The name of the data quality rule. Name *string // A pass or fail status for the rule. Result DataQualityRuleResultStatus noSmithyDocumentSerde } // Describes the result of a data quality ruleset evaluation run. type DataQualityRulesetEvaluationRunDescription struct { // The data source (an Glue table) associated with the run. DataSource *DataSource // The unique run identifier associated with this run. RunId *string // The date and time when the run started. StartedOn *time.Time // The status for this run. Status TaskStatusType noSmithyDocumentSerde } // The filter criteria. type DataQualityRulesetEvaluationRunFilter struct { // Filter based on a data source (an Glue table) associated with the run. // // This member is required. DataSource *DataSource // Filter results by runs that started after this time. StartedAfter *time.Time // Filter results by runs that started before this time. StartedBefore *time.Time noSmithyDocumentSerde } // The criteria used to filter data quality rulesets. type DataQualityRulesetFilterCriteria struct { // Filter on rulesets created after this date. CreatedAfter *time.Time // Filter on rulesets created before this date. CreatedBefore *time.Time // The description of the ruleset filter criteria. Description *string // Filter on rulesets last modified after this date. LastModifiedAfter *time.Time // Filter on rulesets last modified before this date. LastModifiedBefore *time.Time // The name of the ruleset filter criteria. Name *string // The name and database name of the target table. TargetTable *DataQualityTargetTable noSmithyDocumentSerde } // Describes a data quality ruleset returned by GetDataQualityRuleset . type DataQualityRulesetListDetails struct { // The date and time the data quality ruleset was created. CreatedOn *time.Time // A description of the data quality ruleset. Description *string // The date and time the data quality ruleset was last modified. LastModifiedOn *time.Time // The name of the data quality ruleset. Name *string // When a ruleset was created from a recommendation run, this run ID is generated // to link the two together. RecommendationRunId *string // The number of rules in the ruleset. RuleCount *int32 // An object representing an Glue table. TargetTable *DataQualityTargetTable noSmithyDocumentSerde } // An object representing an Glue table. type DataQualityTargetTable struct { // The name of the database where the Glue table exists. // // This member is required. DatabaseName *string // The name of the Glue table. // // This member is required. TableName *string // The catalog id where the Glue table exists. CatalogId *string noSmithyDocumentSerde } // A data source (an Glue table) for which you want data quality results. type DataSource struct { // An Glue table. // // This member is required. GlueTable *GlueTable noSmithyDocumentSerde } // A structure representing the datatype of the value. type Datatype struct { // The datatype of the value. // // This member is required. Id *string // A label assigned to the datatype. // // This member is required. Label *string noSmithyDocumentSerde } // Defines column statistics supported for timestamp data columns. type DateColumnStatisticsData struct { // The number of distinct values in a column. // // This member is required. NumberOfDistinctValues int64 // The number of null values in the column. // // This member is required. NumberOfNulls int64 // The highest value in the column. MaximumValue *time.Time // The lowest value in the column. MinimumValue *time.Time noSmithyDocumentSerde } // Defines column statistics supported for fixed-point number data columns. type DecimalColumnStatisticsData struct { // The number of distinct values in a column. // // This member is required. NumberOfDistinctValues int64 // The number of null values in the column. // // This member is required. NumberOfNulls int64 // The highest value in the column. MaximumValue *DecimalNumber // The lowest value in the column. MinimumValue *DecimalNumber noSmithyDocumentSerde } // Contains a numeric value in decimal format. type DecimalNumber struct { // The scale that determines where the decimal point falls in the unscaled value. // // This member is required. Scale int32 // The unscaled numeric value. // // This member is required. UnscaledValue []byte noSmithyDocumentSerde } // Specifies a Delta data store to crawl one or more Delta tables. type DeltaTarget struct { // The name of the connection to use to connect to the Delta table target. ConnectionName *string // Specifies whether the crawler will create native tables, to allow integration // with query engines that support querying of the Delta transaction log directly. CreateNativeDeltaTable *bool // A list of the Amazon S3 paths to the Delta tables. DeltaTables []string // Specifies whether to write the manifest files to the Delta table path. WriteManifest *bool noSmithyDocumentSerde } // A development endpoint where a developer can remotely debug extract, transform, // and load (ETL) scripts. type DevEndpoint struct { // A map of arguments used to configure the DevEndpoint . Valid arguments are: // - "--enable-glue-datacatalog": "" // You can specify a version of Python support for development endpoints by using // the Arguments parameter in the CreateDevEndpoint or UpdateDevEndpoint APIs. If // no arguments are provided, the version defaults to Python 2. Arguments map[string]string // The Amazon Web Services Availability Zone where this DevEndpoint is located. AvailabilityZone *string // The point in time at which this DevEndpoint was created. CreatedTimestamp *time.Time // The name of the DevEndpoint . EndpointName *string // The path to one or more Java .jar files in an S3 bucket that should be loaded // in your DevEndpoint . You can only use pure Java/Scala libraries with a // DevEndpoint . ExtraJarsS3Path *string // The paths to one or more Python libraries in an Amazon S3 bucket that should be // loaded in your DevEndpoint . Multiple values must be complete paths separated by // a comma. You can only use pure Python libraries with a DevEndpoint . Libraries // that rely on C extensions, such as the pandas (http://pandas.pydata.org/) // Python data analysis library, are not currently supported. ExtraPythonLibsS3Path *string // The reason for a current failure in this DevEndpoint . FailureReason *string // Glue version determines the versions of Apache Spark and Python that Glue // supports. The Python version indicates the version supported for running your // ETL scripts on development endpoints. For more information about the available // Glue versions and corresponding Spark and Python versions, see Glue version (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) // in the developer guide. Development endpoints that are created without // specifying a Glue version default to Glue 0.9. You can specify a version of // Python support for development endpoints by using the Arguments parameter in // the CreateDevEndpoint or UpdateDevEndpoint APIs. If no arguments are provided, // the version defaults to Python 2. GlueVersion *string // The point in time at which this DevEndpoint was last modified. LastModifiedTimestamp *time.Time // The status of the last update. LastUpdateStatus *string // The number of Glue Data Processing Units (DPUs) allocated to this DevEndpoint . NumberOfNodes int32 // The number of workers of a defined workerType that are allocated to the // development endpoint. The maximum number of workers you can define are 299 for // G.1X , and 149 for G.2X . NumberOfWorkers *int32 // A private IP address to access the DevEndpoint within a VPC if the DevEndpoint // is created within one. The PrivateAddress field is present only when you create // the DevEndpoint within your VPC. PrivateAddress *string // The public IP address used by this DevEndpoint . The PublicAddress field is // present only when you create a non-virtual private cloud (VPC) DevEndpoint . PublicAddress *string // The public key to be used by this DevEndpoint for authentication. This // attribute is provided for backward compatibility because the recommended // attribute to use is public keys. PublicKey *string // A list of public keys to be used by the DevEndpoints for authentication. Using // this attribute is preferred over a single public key because the public keys // allow you to have a different private key per client. If you previously created // an endpoint with a public key, you must remove that key to be able to set a list // of public keys. Call the UpdateDevEndpoint API operation with the public key // content in the deletePublicKeys attribute, and the list of new keys in the // addPublicKeys attribute. PublicKeys []string // The Amazon Resource Name (ARN) of the IAM role used in this DevEndpoint . RoleArn *string // The name of the SecurityConfiguration structure to be used with this DevEndpoint // . SecurityConfiguration *string // A list of security group identifiers used in this DevEndpoint . SecurityGroupIds []string // The current status of this DevEndpoint . Status *string // The subnet ID for this DevEndpoint . SubnetId *string // The ID of the virtual private cloud (VPC) used by this DevEndpoint . VpcId *string // The type of predefined worker that is allocated to the development endpoint. // Accepts a value of Standard, G.1X, or G.2X. // - For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory // and a 50GB disk, and 2 executors per worker. // - For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of // memory, 64 GB disk), and provides 1 executor per worker. We recommend this // worker type for memory-intensive jobs. // - For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of // memory, 128 GB disk), and provides 1 executor per worker. We recommend this // worker type for memory-intensive jobs. // Known issue: when a development endpoint is created with the G.2X WorkerType // configuration, the Spark drivers for the development endpoint will run on 4 // vCPU, 16 GB of memory, and a 64 GB disk. WorkerType WorkerType // The YARN endpoint address used by this DevEndpoint . YarnEndpointAddress *string // The Apache Zeppelin port for the remote Apache Spark interpreter. ZeppelinRemoteSparkInterpreterPort int32 noSmithyDocumentSerde } // Custom libraries to be loaded into a development endpoint. type DevEndpointCustomLibraries struct { // The path to one or more Java .jar files in an S3 bucket that should be loaded // in your DevEndpoint . You can only use pure Java/Scala libraries with a // DevEndpoint . ExtraJarsS3Path *string // The paths to one or more Python libraries in an Amazon Simple Storage Service // (Amazon S3) bucket that should be loaded in your DevEndpoint . Multiple values // must be complete paths separated by a comma. You can only use pure Python // libraries with a DevEndpoint . Libraries that rely on C extensions, such as the // pandas (http://pandas.pydata.org/) Python data analysis library, are not // currently supported. ExtraPythonLibsS3Path *string noSmithyDocumentSerde } // Specifies the direct JDBC source connection. type DirectJDBCSource struct { // The connection name of the JDBC source. // // This member is required. ConnectionName *string // The connection type of the JDBC source. // // This member is required. ConnectionType JDBCConnectionType // The database of the JDBC source connection. // // This member is required. Database *string // The name of the JDBC source connection. // // This member is required. Name *string // The table of the JDBC source connection. // // This member is required. Table *string // The temp directory of the JDBC Redshift source. RedshiftTmpDir *string noSmithyDocumentSerde } // Specifies an Apache Kafka data store. type DirectKafkaSource struct { // The name of the data store. // // This member is required. Name *string // Specifies options related to data preview for viewing a sample of your data. DataPreviewOptions *StreamingDataPreviewOptions // Whether to automatically determine the schema from the incoming data. DetectSchema *bool // Specifies the streaming options. StreamingOptions *KafkaStreamingSourceOptions // The amount of time to spend processing each micro batch. WindowSize *int32 noSmithyDocumentSerde } // Specifies a direct Amazon Kinesis data source. type DirectKinesisSource struct { // The name of the data source. // // This member is required. Name *string // Additional options for data preview. DataPreviewOptions *StreamingDataPreviewOptions // Whether to automatically determine the schema from the incoming data. DetectSchema *bool // Additional options for the Kinesis streaming data source. StreamingOptions *KinesisStreamingSourceOptions // The amount of time to spend processing each micro batch. WindowSize *int32 noSmithyDocumentSerde } // A policy that specifies update behavior for the crawler. type DirectSchemaChangePolicy struct { // Specifies the database that the schema change policy applies to. Database *string // Whether to use the specified update behavior when the crawler finds a changed // schema. EnableUpdateCatalog *bool // Specifies the table in the database that the schema change policy applies to. Table *string // The update behavior when the crawler finds a changed schema. UpdateBehavior UpdateCatalogBehavior noSmithyDocumentSerde } // Defines column statistics supported for floating-point number data columns. type DoubleColumnStatisticsData struct { // The number of distinct values in a column. // // This member is required. NumberOfDistinctValues int64 // The number of null values in the column. // // This member is required. NumberOfNulls int64 // The highest value in the column. MaximumValue float64 // The lowest value in the column. MinimumValue float64 noSmithyDocumentSerde } // Options to configure how your data quality evaluation results are published. type DQResultsPublishingOptions struct { // Enable metrics for your data quality results. CloudWatchMetricsEnabled *bool // The context of the evaluation. EvaluationContext *string // Enable publishing for your data quality results. ResultsPublishingEnabled *bool // The Amazon S3 prefix prepended to the results. ResultsS3Prefix *string noSmithyDocumentSerde } // Options to configure how your job will stop if your data quality evaluation // fails. type DQStopJobOnFailureOptions struct { // When to stop job if your data quality evaluation fails. Options are Immediate // or AfterDataLoad. StopJobOnFailureTiming DQStopJobOnFailureTiming noSmithyDocumentSerde } // Specifies a transform that removes rows of repeating data from a data set. type DropDuplicates struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // The name of the columns to be merged or removed if repeating. Columns [][]string noSmithyDocumentSerde } // Specifies a transform that chooses the data property keys that you want to drop. type DropFields struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // A JSON path to a variable in the data structure. // // This member is required. Paths [][]string noSmithyDocumentSerde } // Specifies a transform that removes columns from the dataset if all values in // the column are 'null'. By default, Glue Studio will recognize null objects, but // some values such as empty strings, strings that are "null", -1 integers or other // placeholders such as zeros, are not automatically recognized as nulls. type DropNullFields struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // A structure that represents whether certain values are recognized as null // values for removal. NullCheckBoxList *NullCheckBoxList // A structure that specifies a list of NullValueField structures that represent a // custom null value such as zero or other value being used as a null placeholder // unique to the dataset. The DropNullFields transform removes custom null values // only if both the value of the null placeholder and the datatype match the data. NullTextList []NullValueField noSmithyDocumentSerde } // Specifies the set of parameters needed to perform the dynamic transform. type DynamicTransform struct { // Specifies the name of the function of the dynamic transform. // // This member is required. FunctionName *string // Specifies the inputs for the dynamic transform that are required. // // This member is required. Inputs []string // Specifies the name of the dynamic transform. // // This member is required. Name *string // Specifies the path of the dynamic transform source and config files. // // This member is required. Path *string // Specifies the name of the dynamic transform as it appears in the Glue Studio // visual editor. // // This member is required. TransformName *string // Specifies the data schema for the dynamic transform. OutputSchemas []GlueSchema // Specifies the parameters of the dynamic transform. Parameters []TransformConfigParameter // This field is not used and will be deprecated in future release. Version *string noSmithyDocumentSerde } // Specifies a DynamoDB data source in the Glue Data Catalog. type DynamoDBCatalogSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string noSmithyDocumentSerde } // Specifies an Amazon DynamoDB table to crawl. type DynamoDBTarget struct { // The name of the DynamoDB table to crawl. Path *string // Indicates whether to scan all the records, or to sample rows from the table. // Scanning all the records can take a long time when the table is not a high // throughput table. A value of true means to scan all records, while a value of // false means to sample the records. If no value is specified, the value defaults // to true . ScanAll *bool // The percentage of the configured read capacity units to use by the Glue // crawler. Read capacity units is a term defined by DynamoDB, and is a numeric // value that acts as rate limiter for the number of reads that can be performed on // that table per second. The valid values are null or a value between 0.1 to 1.5. // A null value is used when user does not provide a value, and defaults to 0.5 of // the configured Read Capacity Unit (for provisioned tables), or 0.25 of the max // configured Read Capacity Unit (for tables using on-demand mode). ScanRate *float64 noSmithyDocumentSerde } // An edge represents a directed connection between two Glue components that are // part of the workflow the edge belongs to. type Edge struct { // The unique of the node within the workflow where the edge ends. DestinationId *string // The unique of the node within the workflow where the edge starts. SourceId *string noSmithyDocumentSerde } // Specifies the encryption-at-rest configuration for the Data Catalog. type EncryptionAtRest struct { // The encryption-at-rest mode for encrypting Data Catalog data. // // This member is required. CatalogEncryptionMode CatalogEncryptionMode // The ID of the KMS key to use for encryption at rest. SseAwsKmsKeyId *string noSmithyDocumentSerde } // Specifies an encryption configuration. type EncryptionConfiguration struct { // The encryption configuration for Amazon CloudWatch. CloudWatchEncryption *CloudWatchEncryption // The encryption configuration for job bookmarks. JobBookmarksEncryption *JobBookmarksEncryption // The encryption configuration for Amazon Simple Storage Service (Amazon S3) data. S3Encryption []S3Encryption noSmithyDocumentSerde } // Contains details about an error. type ErrorDetail struct { // The code associated with this error. ErrorCode *string // A message describing the error. ErrorMessage *string noSmithyDocumentSerde } // An object containing error details. type ErrorDetails struct { // The error code for an error. ErrorCode *string // The error message for an error. ErrorMessage *string noSmithyDocumentSerde } // Specifies your data quality evaluation criteria. type EvaluateDataQuality struct { // The inputs of your data quality evaluation. // // This member is required. Inputs []string // The name of the data quality evaluation. // // This member is required. Name *string // The ruleset for your data quality evaluation. // // This member is required. Ruleset *string // The output of your data quality evaluation. Output DQTransformOutput // Options to configure how your results are published. PublishingOptions *DQResultsPublishingOptions // Options to configure how your job will stop if your data quality evaluation // fails. StopJobOnFailureOptions *DQStopJobOnFailureOptions noSmithyDocumentSerde } // Specifies your data quality evaluation criteria. type EvaluateDataQualityMultiFrame struct { // The inputs of your data quality evaluation. The first input in this list is the // primary data source. // // This member is required. Inputs []string // The name of the data quality evaluation. // // This member is required. Name *string // The ruleset for your data quality evaluation. // // This member is required. Ruleset *string // The aliases of all data sources except primary. AdditionalDataSources map[string]string // Options to configure runtime behavior of the transform. AdditionalOptions map[string]string // Options to configure how your results are published. PublishingOptions *DQResultsPublishingOptions // Options to configure how your job will stop if your data quality evaluation // fails. StopJobOnFailureOptions *DQStopJobOnFailureOptions noSmithyDocumentSerde } // Evaluation metrics provide an estimate of the quality of your machine learning // transform. type EvaluationMetrics struct { // The type of machine learning transform. // // This member is required. TransformType TransformType // The evaluation metrics for the find matches algorithm. FindMatchesMetrics *FindMatchesMetrics noSmithyDocumentSerde } // Batch condition that must be met (specified number of events received or batch // time window expired) before EventBridge event trigger fires. type EventBatchingCondition struct { // Number of events that must be received from Amazon EventBridge before // EventBridge event trigger fires. // // This member is required. BatchSize int32 // Window of time in seconds after which EventBridge event trigger fires. Window // starts when first event is received. BatchWindow *int32 noSmithyDocumentSerde } // An execution property of a job. type ExecutionProperty struct { // The maximum number of concurrent runs allowed for the job. The default is 1. An // error is returned when this threshold is reached. The maximum value you can // specify is controlled by a service limit. MaxConcurrentRuns int32 noSmithyDocumentSerde } // Specifies configuration properties for an exporting labels task run. type ExportLabelsTaskRunProperties struct { // The Amazon Simple Storage Service (Amazon S3) path where you will export the // labels. OutputS3Path *string noSmithyDocumentSerde } // A database that points to an entity outside the Glue Data Catalog. type FederatedDatabase struct { // The name of the connection to the external metastore. ConnectionName *string // A unique identifier for the federated database. Identifier *string noSmithyDocumentSerde } // A table that points to an entity outside the Glue Data Catalog. type FederatedTable struct { // The name of the connection to the external metastore. ConnectionName *string // A unique identifier for the federated database. DatabaseIdentifier *string // A unique identifier for the federated table. Identifier *string noSmithyDocumentSerde } // Specifies a transform that locates records in the dataset that have missing // values and adds a new field with a value determined by imputation. The input // data set is used to train the machine learning model that determines what the // missing value should be. type FillMissingValues struct { // A JSON path to a variable in the data structure for the dataset that is imputed. // // This member is required. ImputedPath *string // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // A JSON path to a variable in the data structure for the dataset that is filled. FilledPath *string noSmithyDocumentSerde } // Specifies a transform that splits a dataset into two, based on a filter // condition. type Filter struct { // Specifies a filter expression. // // This member is required. Filters []FilterExpression // The data inputs identified by their node names. // // This member is required. Inputs []string // The operator used to filter rows by comparing the key value to a specified // value. // // This member is required. LogicalOperator FilterLogicalOperator // The name of the transform node. // // This member is required. Name *string noSmithyDocumentSerde } // Specifies a filter expression. type FilterExpression struct { // The type of operation to perform in the expression. // // This member is required. Operation FilterOperation // A list of filter values. // // This member is required. Values []FilterValue // Whether the expression is to be negated. Negated *bool noSmithyDocumentSerde } // Represents a single entry in the list of values for a FilterExpression . type FilterValue struct { // The type of filter value. // // This member is required. Type FilterValueType // The value to be associated. // // This member is required. Value []string noSmithyDocumentSerde } // The evaluation metrics for the find matches algorithm. The quality of your // machine learning transform is measured by getting your transform to predict some // matches and comparing the results to known matches from the same dataset. The // quality metrics are based on a subset of your data, so they are not precise. type FindMatchesMetrics struct { // The area under the precision/recall curve (AUPRC) is a single number measuring // the overall quality of the transform, that is independent of the choice made for // precision vs. recall. Higher values indicate that you have a more attractive // precision vs. recall tradeoff. For more information, see Precision and recall (https://en.wikipedia.org/wiki/Precision_and_recall) // in Wikipedia. AreaUnderPRCurve *float64 // A list of ColumnImportance structures containing column importance metrics, // sorted in order of descending importance. ColumnImportances []ColumnImportance // The confusion matrix shows you what your transform is predicting accurately and // what types of errors it is making. For more information, see Confusion matrix (https://en.wikipedia.org/wiki/Confusion_matrix) // in Wikipedia. ConfusionMatrix *ConfusionMatrix // The maximum F1 metric indicates the transform's accuracy between 0 and 1, where // 1 is the best accuracy. For more information, see F1 score (https://en.wikipedia.org/wiki/F1_score) // in Wikipedia. F1 *float64 // The precision metric indicates when often your transform is correct when it // predicts a match. Specifically, it measures how well the transform finds true // positives from the total true positives possible. For more information, see // Precision and recall (https://en.wikipedia.org/wiki/Precision_and_recall) in // Wikipedia. Precision *float64 // The recall metric indicates that for an actual match, how often your transform // predicts the match. Specifically, it measures how well the transform finds true // positives from the total records in the source data. For more information, see // Precision and recall (https://en.wikipedia.org/wiki/Precision_and_recall) in // Wikipedia. Recall *float64 noSmithyDocumentSerde } // The parameters to configure the find matches transform. type FindMatchesParameters struct { // The value that is selected when tuning your transform for a balance between // accuracy and cost. A value of 0.5 means that the system balances accuracy and // cost concerns. A value of 1.0 means a bias purely for accuracy, which typically // results in a higher cost, sometimes substantially higher. A value of 0.0 means a // bias purely for cost, which results in a less accurate FindMatches transform, // sometimes with unacceptable accuracy. Accuracy measures how well the transform // finds true positives and true negatives. Increasing accuracy requires more // machine resources and cost. But it also results in increased recall. Cost // measures how many compute resources, and thus money, are consumed to run the // transform. AccuracyCostTradeoff *float64 // The value to switch on or off to force the output to match the provided labels // from users. If the value is True , the find matches transform forces the output // to match the provided labels. The results override the normal conflation // results. If the value is False , the find matches transform does not ensure all // the labels provided are respected, and the results rely on the trained model. // Note that setting this value to true may increase the conflation execution time. EnforceProvidedLabels *bool // The value selected when tuning your transform for a balance between precision // and recall. A value of 0.5 means no preference; a value of 1.0 means a bias // purely for precision, and a value of 0.0 means a bias for recall. Because this // is a tradeoff, choosing values close to 1.0 means very low recall, and choosing // values close to 0.0 results in very low precision. The precision metric // indicates how often your model is correct when it predicts a match. The recall // metric indicates that for an actual match, how often your model predicts the // match. PrecisionRecallTradeoff *float64 // The name of a column that uniquely identifies rows in the source table. Used to // help identify matching records. PrimaryKeyColumnName *string noSmithyDocumentSerde } // Specifies configuration properties for a Find Matches task run. type FindMatchesTaskRunProperties struct { // The job ID for the Find Matches task run. JobId *string // The name assigned to the job for the Find Matches task run. JobName *string // The job run ID for the Find Matches task run. JobRunId *string noSmithyDocumentSerde } // Filters the connection definitions that are returned by the GetConnections API // operation. type GetConnectionsFilter struct { // The type of connections to return. Currently, SFTP is not supported. ConnectionType ConnectionType // A criteria string that must match the criteria recorded in the connection // definition for that connection definition to be returned. MatchCriteria []string noSmithyDocumentSerde } // A structure for returning a resource policy. type GluePolicy struct { // The date and time at which the policy was created. CreateTime *time.Time // Contains the hash value associated with this policy. PolicyHash *string // Contains the requested policy document, in JSON format. PolicyInJson *string // The date and time at which the policy was last updated. UpdateTime *time.Time noSmithyDocumentSerde } // Specifies a user-defined schema when a schema cannot be determined by Glue. type GlueSchema struct { // Specifies the column definitions that make up a Glue schema. Columns []GlueStudioSchemaColumn noSmithyDocumentSerde } // Specifies a single column in a Glue schema definition. type GlueStudioSchemaColumn struct { // The name of the column in the Glue Studio schema. // // This member is required. Name *string // The hive type for this column in the Glue Studio schema. Type *string noSmithyDocumentSerde } // The database and table in the Glue Data Catalog that is used for input or // output data. type GlueTable struct { // A database name in the Glue Data Catalog. // // This member is required. DatabaseName *string // A table name in the Glue Data Catalog. // // This member is required. TableName *string // Additional options for the table. Currently there are two keys supported: // - pushDownPredicate : to filter on partitions without having to list and read // all the files in your dataset. // - catalogPartitionPredicate : to use server-side partition pruning using // partition indexes in the Glue Data Catalog. AdditionalOptions map[string]string // A unique identifier for the Glue Data Catalog. CatalogId *string // The name of the connection to the Glue Data Catalog. ConnectionName *string noSmithyDocumentSerde } // Specifies the data store in the governed Glue Data Catalog. type GovernedCatalogSource struct { // The database to read from. // // This member is required. Database *string // The name of the data store. // // This member is required. Name *string // The database table to read from. // // This member is required. Table *string // Specifies additional connection options. AdditionalOptions *S3SourceAdditionalOptions // Partitions satisfying this predicate are deleted. Files within the retention // period in these partitions are not deleted. Set to "" – empty by default. PartitionPredicate *string noSmithyDocumentSerde } // Specifies a data target that writes to Amazon S3 using the Glue Data Catalog. type GovernedCatalogTarget struct { // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string // Specifies native partitioning using a sequence of keys. PartitionKeys [][]string // A policy that specifies update behavior for the governed catalog. SchemaChangePolicy *CatalogSchemaChangePolicy noSmithyDocumentSerde } // A classifier that uses grok patterns. type GrokClassifier struct { // An identifier of the data format that the classifier matches, such as Twitter, // JSON, Omniture logs, and so on. // // This member is required. Classification *string // The grok pattern applied to a data store by this classifier. For more // information, see built-in patterns in Writing Custom Classifiers (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html) // . // // This member is required. GrokPattern *string // The name of the classifier. // // This member is required. Name *string // The time that this classifier was registered. CreationTime *time.Time // Optional custom grok patterns defined by this classifier. For more information, // see custom patterns in Writing Custom Classifiers (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html) // . CustomPatterns *string // The time that this classifier was last updated. LastUpdated *time.Time // The version of this classifier. Version int64 noSmithyDocumentSerde } // Specifies an Apache Hudi data source. type HudiTarget struct { // The name of the connection to use to connect to the Hudi target. If your Hudi // files are stored in buckets that require VPC authorization, you can set their // connection properties here. ConnectionName *string // A list of glob patterns used to exclude from the crawl. For more information, // see Catalog Tables with a Crawler (https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html) // . Exclusions []string // The maximum depth of Amazon S3 paths that the crawler can traverse to discover // the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run // time. MaximumTraversalDepth *int32 // An array of Amazon S3 location strings for Hudi, each indicating the root // folder with which the metadata files for a Hudi table resides. The Hudi folder // may be located in a child folder of the root folder. The crawler will scan all // folders underneath a path for a Hudi folder. Paths []string noSmithyDocumentSerde } // A structure that defines an Apache Iceberg metadata table to create in the // catalog. type IcebergInput struct { // A required metadata operation. Can only be set to CREATE . // // This member is required. MetadataOperation MetadataOperation // The table version for the Iceberg table. Defaults to 2. Version *string noSmithyDocumentSerde } // Specifies an Apache Iceberg data source where Iceberg tables are stored in // Amazon S3. type IcebergTarget struct { // The name of the connection to use to connect to the Iceberg target. ConnectionName *string // A list of glob patterns used to exclude from the crawl. For more information, // see Catalog Tables with a Crawler (https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html) // . Exclusions []string // The maximum depth of Amazon S3 paths that the crawler can traverse to discover // the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler // run time. MaximumTraversalDepth *int32 // One or more Amazon S3 paths that contains Iceberg metadata folders as // s3://bucket/prefix . Paths []string noSmithyDocumentSerde } // Specifies configuration properties for an importing labels task run. type ImportLabelsTaskRunProperties struct { // The Amazon Simple Storage Service (Amazon S3) path from where you will import // the labels. InputS3Path *string // Indicates whether to overwrite your existing labels. Replace bool noSmithyDocumentSerde } // Additional connection options for the connector. type JDBCConnectorOptions struct { // Custom data type mapping that builds a mapping from a JDBC data type to an Glue // data type. For example, the option "dataTypeMapping":{"FLOAT":"STRING"} maps // data fields of JDBC type FLOAT into the Java String type by calling the // ResultSet.getString() method of the driver, and uses it to build the Glue // record. The ResultSet object is implemented by each driver, so the behavior is // specific to the driver you use. Refer to the documentation for your JDBC driver // to understand how the driver performs the conversions. DataTypeMapping map[string]GlueRecordType // Extra condition clause to filter data from source. For example: // BillingCity='Mountain View' When using a query instead of a table name, you // should validate that the query works with the specified filterPredicate . FilterPredicate *string // The name of the job bookmark keys on which to sort. JobBookmarkKeys []string // Specifies an ascending or descending sort order. JobBookmarkKeysSortOrder *string // The minimum value of partitionColumn that is used to decide partition stride. LowerBound *int64 // The number of partitions. This value, along with lowerBound (inclusive) and // upperBound (exclusive), form partition strides for generated WHERE clause // expressions that are used to split the partitionColumn . NumPartitions *int64 // The name of an integer column that is used for partitioning. This option works // only when it's included with lowerBound , upperBound , and numPartitions . This // option works the same way as in the Spark SQL JDBC reader. PartitionColumn *string // The maximum value of partitionColumn that is used to decide partition stride. UpperBound *int64 noSmithyDocumentSerde } // Specifies a connector to a JDBC data source. type JDBCConnectorSource struct { // The name of the connection that is associated with the connector. // // This member is required. ConnectionName *string // The type of connection, such as marketplace.jdbc or custom.jdbc, designating a // connection to a JDBC data store. // // This member is required. ConnectionType *string // The name of a connector that assists with accessing the data store in Glue // Studio. // // This member is required. ConnectorName *string // The name of the data source. // // This member is required. Name *string // Additional connection options for the connector. AdditionalOptions *JDBCConnectorOptions // The name of the table in the data source. ConnectionTable *string // Specifies the data schema for the custom JDBC source. OutputSchemas []GlueSchema // The table or SQL query to get the data from. You can specify either // ConnectionTable or query , but not both. Query *string noSmithyDocumentSerde } // Specifies a data target that writes to Amazon S3 in Apache Parquet columnar // storage. type JDBCConnectorTarget struct { // The name of the connection that is associated with the connector. // // This member is required. ConnectionName *string // The name of the table in the data target. // // This member is required. ConnectionTable *string // The type of connection, such as marketplace.jdbc or custom.jdbc, designating a // connection to a JDBC data target. // // This member is required. ConnectionType *string // The name of a connector that will be used. // // This member is required. ConnectorName *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // Additional connection options for the connector. AdditionalOptions map[string]string // Specifies the data schema for the JDBC target. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies a JDBC data store to crawl. type JdbcTarget struct { // The name of the connection to use to connect to the JDBC target. ConnectionName *string // Specify a value of RAWTYPES or COMMENTS to enable additional metadata in table // responses. RAWTYPES provides the native-level datatype. COMMENTS provides // comments associated with a column or table in the database. If you do not need // additional metadata, keep the field empty. EnableAdditionalMetadata []JdbcMetadataEntry // A list of glob patterns used to exclude from the crawl. For more information, // see Catalog Tables with a Crawler (https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html) // . Exclusions []string // The path of the JDBC target. Path *string noSmithyDocumentSerde } // Specifies a job definition. type Job struct { // This field is deprecated. Use MaxCapacity instead. The number of Glue data // processing units (DPUs) allocated to runs of this job. You can allocate a // minimum of 2 DPUs; the default is 10. A DPU is a relative measure of processing // power that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more // information, see the Glue pricing page (https://aws.amazon.com/glue/pricing/) . // // Deprecated: This property is deprecated, use MaxCapacity instead. AllocatedCapacity int32 // The representation of a directed acyclic graph on which both the Glue Studio // visual component and Glue Studio code generation is based. CodeGenConfigurationNodes map[string]CodeGenConfigurationNode // The JobCommand that runs this job. Command *JobCommand // The connections used for this job. Connections *ConnectionsList // The time and date that this job definition was created. CreatedOn *time.Time // The default arguments for every run of this job, specified as name-value pairs. // You can specify arguments here that your own job-execution script consumes, as // well as arguments that Glue itself consumes. Job arguments may be logged. Do not // pass plaintext secrets as arguments. Retrieve secrets from a Glue Connection, // Secrets Manager or other secret management mechanism if you intend to keep them // within the Job. For information about how to specify and consume your own Job // arguments, see the Calling Glue APIs in Python (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) // topic in the developer guide. For information about the arguments you can // provide to this field when configuring Spark jobs, see the Special Parameters // Used by Glue (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) // topic in the developer guide. For information about the arguments you can // provide to this field when configuring Ray jobs, see Using job parameters in // Ray jobs (https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html) // in the developer guide. DefaultArguments map[string]string // A description of the job. Description *string // Indicates whether the job is run with a standard or flexible execution class. // The standard execution class is ideal for time-sensitive workloads that require // fast job startup and dedicated resources. The flexible execution class is // appropriate for time-insensitive jobs whose start and completion times may vary. // Only jobs with Glue version 3.0 and above and command type glueetl will be // allowed to set ExecutionClass to FLEX . The flexible execution class is // available for Spark jobs. ExecutionClass ExecutionClass // An ExecutionProperty specifying the maximum number of concurrent runs allowed // for this job. ExecutionProperty *ExecutionProperty // In Spark jobs, GlueVersion determines the versions of Apache Spark and Python // that Glue available in a job. The Python version indicates the version supported // for jobs of type Spark. Ray jobs should set GlueVersion to 4.0 or greater. // However, the versions of Ray, Python and additional libraries available in your // Ray job are determined by the Runtime parameter of the Job command. For more // information about the available Glue versions and corresponding Spark and Python // versions, see Glue version (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) // in the developer guide. Jobs that are created without specifying a Glue version // default to Glue 0.9. GlueVersion *string // The last point in time when this job definition was modified. LastModifiedOn *time.Time // This field is reserved for future use. LogUri *string // For Glue version 1.0 or earlier jobs, using the standard worker type, the // number of Glue data processing units (DPUs) that can be allocated when this job // runs. A DPU is a relative measure of processing power that consists of 4 vCPUs // of compute capacity and 16 GB of memory. For more information, see the Glue // pricing page (https://aws.amazon.com/glue/pricing/) . For Glue version 2.0 or // later jobs, you cannot specify a Maximum capacity . Instead, you should specify // a Worker type and the Number of workers . Do not set MaxCapacity if using // WorkerType and NumberOfWorkers . The value that can be allocated for MaxCapacity // depends on whether you are running a Python shell job, an Apache Spark ETL job, // or an Apache Spark streaming ETL job: // - When you specify a Python shell job ( JobCommand.Name ="pythonshell"), you // can allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU. // - When you specify an Apache Spark ETL job ( JobCommand.Name ="glueetl") or // Apache Spark streaming ETL job ( JobCommand.Name ="gluestreaming"), you can // allocate from 2 to 100 DPUs. The default is 10 DPUs. This job type cannot have a // fractional DPU allocation. MaxCapacity *float64 // The maximum number of times to retry this job after a JobRun fails. MaxRetries int32 // The name you assign to this job definition. Name *string // Arguments for this job that are not overridden when providing job arguments in // a job run, specified as name-value pairs. NonOverridableArguments map[string]string // Specifies configuration properties of a job notification. NotificationProperty *NotificationProperty // The number of workers of a defined workerType that are allocated when a job // runs. NumberOfWorkers *int32 // The name or Amazon Resource Name (ARN) of the IAM role associated with this job. Role *string // The name of the SecurityConfiguration structure to be used with this job. SecurityConfiguration *string // The details for a source control configuration for a job, allowing // synchronization of job artifacts to or from a remote repository. SourceControlDetails *SourceControlDetails // The job timeout in minutes. This is the maximum time that a job run can consume // resources before it is terminated and enters TIMEOUT status. The default is // 2,880 minutes (48 hours). Timeout *int32 // The type of predefined worker that is allocated when a job runs. Accepts a // value of G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X // for Ray jobs. // - For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of // memory) with 84GB disk (approximately 34GB free), and provides 1 executor per // worker. We recommend this worker type for workloads such as data transforms, // joins, and queries, to offers a scalable and cost effective way to run most // jobs. // - For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of // memory) with 128GB disk (approximately 77GB free), and provides 1 executor per // worker. We recommend this worker type for workloads such as data transforms, // joins, and queries, to offers a scalable and cost effective way to run most // jobs. // - For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of // memory) with 256GB disk (approximately 235GB free), and provides 1 executor per // worker. We recommend this worker type for jobs whose workloads contain your most // demanding transforms, aggregations, joins, and queries. This worker type is // available only for Glue version 3.0 or later Spark ETL jobs in the following // Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West // (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), // Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm). // - For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of // memory) with 512GB disk (approximately 487GB free), and provides 1 executor per // worker. We recommend this worker type for jobs whose workloads contain your most // demanding transforms, aggregations, joins, and queries. This worker type is // available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon // Web Services Regions as supported for the G.4X worker type. // - For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of // memory) with 84GB disk (approximately 34GB free), and provides 1 executor per // worker. We recommend this worker type for low volume streaming jobs. This worker // type is only available for Glue version 3.0 streaming jobs. // - For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of // memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray // workers based on the autoscaler. WorkerType WorkerType noSmithyDocumentSerde } // Defines a point that a job can resume processing. type JobBookmarkEntry struct { // The attempt ID number. Attempt int32 // The bookmark itself. JobBookmark *string // The name of the job in question. JobName *string // The unique run identifier associated with the previous job run. PreviousRunId *string // The run ID number. Run int32 // The run ID number. RunId *string // The version of the job. Version int32 noSmithyDocumentSerde } // Specifies how job bookmark data should be encrypted. type JobBookmarksEncryption struct { // The encryption mode to use for job bookmarks data. JobBookmarksEncryptionMode JobBookmarksEncryptionMode // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. KmsKeyArn *string noSmithyDocumentSerde } // Specifies code that runs when a job is run. type JobCommand struct { // The name of the job command. For an Apache Spark ETL job, this must be glueetl . // For a Python shell job, it must be pythonshell . For an Apache Spark streaming // ETL job, this must be gluestreaming . For a Ray job, this must be glueray . Name *string // The Python version being used to run a Python shell job. Allowed values are 2 // or 3. PythonVersion *string // In Ray jobs, Runtime is used to specify the versions of Ray, Python and // additional libraries available in your environment. This field is not used in // other job types. For supported runtime environment values, see Working with Ray // jobs (https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-runtimes.html) // in the Glue Developer Guide. Runtime *string // Specifies the Amazon Simple Storage Service (Amazon S3) path to a script that // runs a job. ScriptLocation *string noSmithyDocumentSerde } // The details of a Job node present in the workflow. type JobNodeDetails struct { // The information for the job runs represented by the job node. JobRuns []JobRun noSmithyDocumentSerde } // Contains information about a job run. type JobRun struct { // This field is deprecated. Use MaxCapacity instead. The number of Glue data // processing units (DPUs) allocated to this JobRun. From 2 to 100 DPUs can be // allocated; the default is 10. A DPU is a relative measure of processing power // that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more // information, see the Glue pricing page (https://aws.amazon.com/glue/pricing/) . // // Deprecated: This property is deprecated, use MaxCapacity instead. AllocatedCapacity int32 // The job arguments associated with this run. For this job run, they replace the // default arguments set in the job definition itself. You can specify arguments // here that your own job-execution script consumes, as well as arguments that Glue // itself consumes. Job arguments may be logged. Do not pass plaintext secrets as // arguments. Retrieve secrets from a Glue Connection, Secrets Manager or other // secret management mechanism if you intend to keep them within the Job. For // information about how to specify and consume your own Job arguments, see the // Calling Glue APIs in Python (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) // topic in the developer guide. For information about the arguments you can // provide to this field when configuring Spark jobs, see the Special Parameters // Used by Glue (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) // topic in the developer guide. For information about the arguments you can // provide to this field when configuring Ray jobs, see Using job parameters in // Ray jobs (https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html) // in the developer guide. Arguments map[string]string // The number of the attempt to run this job. Attempt int32 // The date and time that this job run completed. CompletedOn *time.Time // This field populates only for Auto Scaling job runs, and represents the total // time each executor ran during the lifecycle of a job run in seconds, multiplied // by a DPU factor (1 for G.1X , 2 for G.2X , or 0.25 for G.025X workers). This // value may be different than the executionEngineRuntime * MaxCapacity as in the // case of Auto Scaling jobs, as the number of executors running at a given time // may be less than the MaxCapacity . Therefore, it is possible that the value of // DPUSeconds is less than executionEngineRuntime * MaxCapacity . DPUSeconds *float64 // An error message associated with this job run. ErrorMessage *string // Indicates whether the job is run with a standard or flexible execution class. // The standard execution-class is ideal for time-sensitive workloads that require // fast job startup and dedicated resources. The flexible execution class is // appropriate for time-insensitive jobs whose start and completion times may vary. // Only jobs with Glue version 3.0 and above and command type glueetl will be // allowed to set ExecutionClass to FLEX . The flexible execution class is // available for Spark jobs. ExecutionClass ExecutionClass // The amount of time (in seconds) that the job run consumed resources. ExecutionTime int32 // In Spark jobs, GlueVersion determines the versions of Apache Spark and Python // that Glue available in a job. The Python version indicates the version supported // for jobs of type Spark. Ray jobs should set GlueVersion to 4.0 or greater. // However, the versions of Ray, Python and additional libraries available in your // Ray job are determined by the Runtime parameter of the Job command. For more // information about the available Glue versions and corresponding Spark and Python // versions, see Glue version (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) // in the developer guide. Jobs that are created without specifying a Glue version // default to Glue 0.9. GlueVersion *string // The ID of this job run. Id *string // The name of the job definition being used in this run. JobName *string // The current state of the job run. For more information about the statuses of // jobs that have terminated abnormally, see Glue Job Run Statuses (https://docs.aws.amazon.com/glue/latest/dg/job-run-statuses.html) // . JobRunState JobRunState // The last time that this job run was modified. LastModifiedOn *time.Time // The name of the log group for secure logging that can be server-side encrypted // in Amazon CloudWatch using KMS. This name can be /aws-glue/jobs/ , in which case // the default encryption is NONE . If you add a role name and // SecurityConfiguration name (in other words, // /aws-glue/jobs-yourRoleName-yourSecurityConfigurationName/ ), then that security // configuration is used to encrypt the log group. LogGroupName *string // For Glue version 1.0 or earlier jobs, using the standard worker type, the // number of Glue data processing units (DPUs) that can be allocated when this job // runs. A DPU is a relative measure of processing power that consists of 4 vCPUs // of compute capacity and 16 GB of memory. For more information, see the Glue // pricing page (https://aws.amazon.com/glue/pricing/) . For Glue version 2.0+ // jobs, you cannot specify a Maximum capacity . Instead, you should specify a // Worker type and the Number of workers . Do not set MaxCapacity if using // WorkerType and NumberOfWorkers . The value that can be allocated for MaxCapacity // depends on whether you are running a Python shell job, an Apache Spark ETL job, // or an Apache Spark streaming ETL job: // - When you specify a Python shell job ( JobCommand.Name ="pythonshell"), you // can allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU. // - When you specify an Apache Spark ETL job ( JobCommand.Name ="glueetl") or // Apache Spark streaming ETL job ( JobCommand.Name ="gluestreaming"), you can // allocate from 2 to 100 DPUs. The default is 10 DPUs. This job type cannot have a // fractional DPU allocation. MaxCapacity *float64 // Specifies configuration properties of a job run notification. NotificationProperty *NotificationProperty // The number of workers of a defined workerType that are allocated when a job // runs. NumberOfWorkers *int32 // A list of predecessors to this job run. PredecessorRuns []Predecessor // The ID of the previous run of this job. For example, the JobRunId specified in // the StartJobRun action. PreviousRunId *string // The name of the SecurityConfiguration structure to be used with this job run. SecurityConfiguration *string // The date and time at which this job run was started. StartedOn *time.Time // The JobRun timeout in minutes. This is the maximum time that a job run can // consume resources before it is terminated and enters TIMEOUT status. This value // overrides the timeout value set in the parent job. Streaming jobs do not have a // timeout. The default for non-streaming jobs is 2,880 minutes (48 hours). Timeout *int32 // The name of the trigger that started this job run. TriggerName *string // The type of predefined worker that is allocated when a job runs. Accepts a // value of G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X // for Ray jobs. // - For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of // memory) with 84GB disk (approximately 34GB free), and provides 1 executor per // worker. We recommend this worker type for workloads such as data transforms, // joins, and queries, to offers a scalable and cost effective way to run most // jobs. // - For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of // memory) with 128GB disk (approximately 77GB free), and provides 1 executor per // worker. We recommend this worker type for workloads such as data transforms, // joins, and queries, to offers a scalable and cost effective way to run most // jobs. // - For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of // memory) with 256GB disk (approximately 235GB free), and provides 1 executor per // worker. We recommend this worker type for jobs whose workloads contain your most // demanding transforms, aggregations, joins, and queries. This worker type is // available only for Glue version 3.0 or later Spark ETL jobs in the following // Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West // (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), // Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm). // - For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of // memory) with 512GB disk (approximately 487GB free), and provides 1 executor per // worker. We recommend this worker type for jobs whose workloads contain your most // demanding transforms, aggregations, joins, and queries. This worker type is // available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon // Web Services Regions as supported for the G.4X worker type. // - For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of // memory) with 84GB disk (approximately 34GB free), and provides 1 executor per // worker. We recommend this worker type for low volume streaming jobs. This worker // type is only available for Glue version 3.0 streaming jobs. // - For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of // memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray // workers based on the autoscaler. WorkerType WorkerType noSmithyDocumentSerde } // Specifies information used to update an existing job definition. The previous // job definition is completely overwritten by this information. type JobUpdate struct { // This field is deprecated. Use MaxCapacity instead. The number of Glue data // processing units (DPUs) to allocate to this job. You can allocate a minimum of 2 // DPUs; the default is 10. A DPU is a relative measure of processing power that // consists of 4 vCPUs of compute capacity and 16 GB of memory. For more // information, see the Glue pricing page (https://aws.amazon.com/glue/pricing/) . // // Deprecated: This property is deprecated, use MaxCapacity instead. AllocatedCapacity int32 // The representation of a directed acyclic graph on which both the Glue Studio // visual component and Glue Studio code generation is based. CodeGenConfigurationNodes map[string]CodeGenConfigurationNode // The JobCommand that runs this job (required). Command *JobCommand // The connections used for this job. Connections *ConnectionsList // The default arguments for every run of this job, specified as name-value pairs. // You can specify arguments here that your own job-execution script consumes, as // well as arguments that Glue itself consumes. Job arguments may be logged. Do not // pass plaintext secrets as arguments. Retrieve secrets from a Glue Connection, // Secrets Manager or other secret management mechanism if you intend to keep them // within the Job. For information about how to specify and consume your own Job // arguments, see the Calling Glue APIs in Python (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) // topic in the developer guide. For information about the arguments you can // provide to this field when configuring Spark jobs, see the Special Parameters // Used by Glue (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) // topic in the developer guide. For information about the arguments you can // provide to this field when configuring Ray jobs, see Using job parameters in // Ray jobs (https://docs.aws.amazon.com/glue/latest/dg/author-job-ray-job-parameters.html) // in the developer guide. DefaultArguments map[string]string // Description of the job being defined. Description *string // Indicates whether the job is run with a standard or flexible execution class. // The standard execution-class is ideal for time-sensitive workloads that require // fast job startup and dedicated resources. The flexible execution class is // appropriate for time-insensitive jobs whose start and completion times may vary. // Only jobs with Glue version 3.0 and above and command type glueetl will be // allowed to set ExecutionClass to FLEX . The flexible execution class is // available for Spark jobs. ExecutionClass ExecutionClass // An ExecutionProperty specifying the maximum number of concurrent runs allowed // for this job. ExecutionProperty *ExecutionProperty // In Spark jobs, GlueVersion determines the versions of Apache Spark and Python // that Glue available in a job. The Python version indicates the version supported // for jobs of type Spark. Ray jobs should set GlueVersion to 4.0 or greater. // However, the versions of Ray, Python and additional libraries available in your // Ray job are determined by the Runtime parameter of the Job command. For more // information about the available Glue versions and corresponding Spark and Python // versions, see Glue version (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) // in the developer guide. Jobs that are created without specifying a Glue version // default to Glue 0.9. GlueVersion *string // This field is reserved for future use. LogUri *string // For Glue version 1.0 or earlier jobs, using the standard worker type, the // number of Glue data processing units (DPUs) that can be allocated when this job // runs. A DPU is a relative measure of processing power that consists of 4 vCPUs // of compute capacity and 16 GB of memory. For more information, see the Glue // pricing page (https://aws.amazon.com/glue/pricing/) . For Glue version 2.0+ // jobs, you cannot specify a Maximum capacity . Instead, you should specify a // Worker type and the Number of workers . Do not set MaxCapacity if using // WorkerType and NumberOfWorkers . The value that can be allocated for MaxCapacity // depends on whether you are running a Python shell job, an Apache Spark ETL job, // or an Apache Spark streaming ETL job: // - When you specify a Python shell job ( JobCommand.Name ="pythonshell"), you // can allocate either 0.0625 or 1 DPU. The default is 0.0625 DPU. // - When you specify an Apache Spark ETL job ( JobCommand.Name ="glueetl") or // Apache Spark streaming ETL job ( JobCommand.Name ="gluestreaming"), you can // allocate from 2 to 100 DPUs. The default is 10 DPUs. This job type cannot have a // fractional DPU allocation. MaxCapacity *float64 // The maximum number of times to retry this job if it fails. MaxRetries int32 // Arguments for this job that are not overridden when providing job arguments in // a job run, specified as name-value pairs. NonOverridableArguments map[string]string // Specifies the configuration properties of a job notification. NotificationProperty *NotificationProperty // The number of workers of a defined workerType that are allocated when a job // runs. NumberOfWorkers *int32 // The name or Amazon Resource Name (ARN) of the IAM role associated with this job // (required). Role *string // The name of the SecurityConfiguration structure to be used with this job. SecurityConfiguration *string // The details for a source control configuration for a job, allowing // synchronization of job artifacts to or from a remote repository. SourceControlDetails *SourceControlDetails // The job timeout in minutes. This is the maximum time that a job run can consume // resources before it is terminated and enters TIMEOUT status. The default is // 2,880 minutes (48 hours). Timeout *int32 // The type of predefined worker that is allocated when a job runs. Accepts a // value of G.1X, G.2X, G.4X, G.8X or G.025X for Spark jobs. Accepts the value Z.2X // for Ray jobs. // - For the G.1X worker type, each worker maps to 1 DPU (4 vCPUs, 16 GB of // memory) with 84GB disk (approximately 34GB free), and provides 1 executor per // worker. We recommend this worker type for workloads such as data transforms, // joins, and queries, to offers a scalable and cost effective way to run most // jobs. // - For the G.2X worker type, each worker maps to 2 DPU (8 vCPUs, 32 GB of // memory) with 128GB disk (approximately 77GB free), and provides 1 executor per // worker. We recommend this worker type for workloads such as data transforms, // joins, and queries, to offers a scalable and cost effective way to run most // jobs. // - For the G.4X worker type, each worker maps to 4 DPU (16 vCPUs, 64 GB of // memory) with 256GB disk (approximately 235GB free), and provides 1 executor per // worker. We recommend this worker type for jobs whose workloads contain your most // demanding transforms, aggregations, joins, and queries. This worker type is // available only for Glue version 3.0 or later Spark ETL jobs in the following // Amazon Web Services Regions: US East (Ohio), US East (N. Virginia), US West // (Oregon), Asia Pacific (Singapore), Asia Pacific (Sydney), Asia Pacific (Tokyo), // Canada (Central), Europe (Frankfurt), Europe (Ireland), and Europe (Stockholm). // - For the G.8X worker type, each worker maps to 8 DPU (32 vCPUs, 128 GB of // memory) with 512GB disk (approximately 487GB free), and provides 1 executor per // worker. We recommend this worker type for jobs whose workloads contain your most // demanding transforms, aggregations, joins, and queries. This worker type is // available only for Glue version 3.0 or later Spark ETL jobs, in the same Amazon // Web Services Regions as supported for the G.4X worker type. // - For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPUs, 4 GB of // memory) with 84GB disk (approximately 34GB free), and provides 1 executor per // worker. We recommend this worker type for low volume streaming jobs. This worker // type is only available for Glue version 3.0 streaming jobs. // - For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPUs, 64 GB of // memory) with 128 GB disk (approximately 120GB free), and provides up to 8 Ray // workers based on the autoscaler. WorkerType WorkerType noSmithyDocumentSerde } // Specifies a transform that joins two datasets into one dataset using a // comparison phrase on the specified data property keys. You can use inner, outer, // left, right, left semi, and left anti joins. type Join struct { // A list of the two columns to be joined. // // This member is required. Columns []JoinColumn // The data inputs identified by their node names. // // This member is required. Inputs []string // Specifies the type of join to be performed on the datasets. // // This member is required. JoinType JoinType // The name of the transform node. // // This member is required. Name *string noSmithyDocumentSerde } // Specifies a column to be joined. type JoinColumn struct { // The column to be joined. // // This member is required. From *string // The key of the column to be joined. // // This member is required. Keys [][]string noSmithyDocumentSerde } // A classifier for JSON content. type JsonClassifier struct { // A JsonPath string defining the JSON data for the classifier to classify. Glue // supports a subset of JsonPath, as described in Writing JsonPath Custom // Classifiers (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json) // . // // This member is required. JsonPath *string // The name of the classifier. // // This member is required. Name *string // The time that this classifier was registered. CreationTime *time.Time // The time that this classifier was last updated. LastUpdated *time.Time // The version of this classifier. Version int64 noSmithyDocumentSerde } // Additional options for streaming. type KafkaStreamingSourceOptions struct { // When this option is set to 'true', the data output will contain an additional // column named "__src_timestamp" that indicates the time when the corresponding // record received by the topic. The default value is 'false'. This option is // supported in Glue version 4.0 or later. AddRecordTimestamp *string // The specific TopicPartitions to consume. You must specify at least one of // "topicName" , "assign" or "subscribePattern" . Assign *string // A list of bootstrap server URLs, for example, as // b-1.vpc-test-2.o4q88o.c6.kafka.us-east-1.amazonaws.com:9094 . This option must // be specified in the API call or defined in the table metadata in the Data // Catalog. BootstrapServers *string // An optional classification. Classification *string // The name of the connection. ConnectionName *string // Specifies the delimiter character. Delimiter *string // When this option is set to 'true', for each batch, it will emit the metrics for // the duration between the oldest record received by the topic and the time it // arrives in Glue to CloudWatch. The metric's name is // "glue.driver.streaming.maxConsumerLagInMs". The default value is 'false'. This // option is supported in Glue version 4.0 or later. EmitConsumerLagMetrics *string // The end point when a batch query is ended. Possible values are either "latest" // or a JSON string that specifies an ending offset for each TopicPartition . EndingOffsets *string // Whether to include the Kafka headers. When the option is set to "true", the // data output will contain an additional column named // "glue_streaming_kafka_headers" with type Array[Struct(key: String, value: // String)] . The default value is "false". This option is available in Glue // version 3.0 or later only. IncludeHeaders *bool // The rate limit on the maximum number of offsets that are processed per trigger // interval. The specified total number of offsets is proportionally split across // topicPartitions of different volumes. The default value is null, which means // that the consumer reads all offsets until the known latest offset. MaxOffsetsPerTrigger *int64 // The desired minimum number of partitions to read from Kafka. The default value // is null, which means that the number of spark partitions is equal to the number // of Kafka partitions. MinPartitions *int32 // The number of times to retry before failing to fetch Kafka offsets. The default // value is 3 . NumRetries *int32 // The timeout in milliseconds to poll data from Kafka in Spark job executors. The // default value is 512 . PollTimeoutMs *int64 // The time in milliseconds to wait before retrying to fetch Kafka offsets. The // default value is 10 . RetryIntervalMs *int64 // The protocol used to communicate with brokers. The possible values are "SSL" or // "PLAINTEXT" . SecurityProtocol *string // The starting position in the Kafka topic to read data from. The possible values // are "earliest" or "latest" . The default value is "latest" . StartingOffsets *string // The timestamp of the record in the Kafka topic to start reading data from. The // possible values are a timestamp string in UTC format of the pattern // yyyy-mm-ddTHH:MM:SSZ (where Z represents a UTC timezone offset with a +/-. For // example: "2023-04-04T08:00:00+08:00"). Only one of StartingTimestamp or // StartingOffsets must be set. StartingTimestamp *time.Time // A Java regex string that identifies the topic list to subscribe to. You must // specify at least one of "topicName" , "assign" or "subscribePattern" . SubscribePattern *string // The topic name as specified in Apache Kafka. You must specify at least one of // "topicName" , "assign" or "subscribePattern" . TopicName *string noSmithyDocumentSerde } // A partition key pair consisting of a name and a type. type KeySchemaElement struct { // The name of a partition key. // // This member is required. Name *string // The type of a partition key. // // This member is required. Type *string noSmithyDocumentSerde } // Additional options for the Amazon Kinesis streaming data source. type KinesisStreamingSourceOptions struct { // Adds a time delay between two consecutive getRecords operations. The default // value is "False" . This option is only configurable for Glue version 2.0 and // above. AddIdleTimeBetweenReads *bool // When this option is set to 'true', the data output will contain an additional // column named "__src_timestamp" that indicates the time when the corresponding // record received by the stream. The default value is 'false'. This option is // supported in Glue version 4.0 or later. AddRecordTimestamp *string // Avoids creating an empty microbatch job by checking for unread data in the // Kinesis data stream before the batch is started. The default value is "False" . AvoidEmptyBatches *bool // An optional classification. Classification *string // Specifies the delimiter character. Delimiter *string // The minimum time interval between two ListShards API calls for your script to // consider resharding. The default value is 1s . DescribeShardInterval *int64 // When this option is set to 'true', for each batch, it will emit the metrics for // the duration between the oldest record received by the stream and the time it // arrives in Glue to CloudWatch. The metric's name is // "glue.driver.streaming.maxConsumerLagInMs". The default value is 'false'. This // option is supported in Glue version 4.0 or later. EmitConsumerLagMetrics *string // The URL of the Kinesis endpoint. EndpointUrl *string // The minimum time delay between two consecutive getRecords operations, specified // in ms. The default value is 1000 . This option is only configurable for Glue // version 2.0 and above. IdleTimeBetweenReadsInMs *int64 // The maximum number of records to fetch per shard in the Kinesis data stream. // The default value is 100000 . MaxFetchRecordsPerShard *int64 // The maximum time spent in the job executor to fetch a record from the Kinesis // data stream per shard, specified in milliseconds (ms). The default value is 1000 // . MaxFetchTimeInMs *int64 // The maximum number of records to fetch from the Kinesis data stream in each // getRecords operation. The default value is 10000 . MaxRecordPerRead *int64 // The maximum cool-off time period (specified in ms) between two retries of a // Kinesis Data Streams API call. The default value is 10000 . MaxRetryIntervalMs *int64 // The maximum number of retries for Kinesis Data Streams API requests. The // default value is 3 . NumRetries *int32 // The cool-off time period (specified in ms) before retrying the Kinesis Data // Streams API call. The default value is 1000 . RetryIntervalMs *int64 // The Amazon Resource Name (ARN) of the role to assume using AWS Security Token // Service (AWS STS). This role must have permissions for describe or read record // operations for the Kinesis data stream. You must use this parameter when // accessing a data stream in a different account. Used in conjunction with // "awsSTSSessionName" . RoleArn *string // An identifier for the session assuming the role using AWS STS. You must use // this parameter when accessing a data stream in a different account. Used in // conjunction with "awsSTSRoleARN" . RoleSessionName *string // The starting position in the Kinesis data stream to read data from. The // possible values are "latest" , "trim_horizon" , "earliest" , or a timestamp // string in UTC format in the pattern yyyy-mm-ddTHH:MM:SSZ (where Z represents a // UTC timezone offset with a +/-. For example: "2023-04-04T08:00:00-04:00"). The // default value is "latest" . Note: Using a value that is a timestamp string in // UTC format for "startingPosition" is supported only for Glue version 4.0 or // later. StartingPosition StartingPosition // The timestamp of the record in the Kinesis data stream to start reading data // from. The possible values are a timestamp string in UTC format of the pattern // yyyy-mm-ddTHH:MM:SSZ (where Z represents a UTC timezone offset with a +/-. For // example: "2023-04-04T08:00:00+08:00"). StartingTimestamp *time.Time // The Amazon Resource Name (ARN) of the Kinesis data stream. StreamArn *string // The name of the Kinesis data stream. StreamName *string noSmithyDocumentSerde } // Specifies configuration properties for a labeling set generation task run. type LabelingSetGenerationTaskRunProperties struct { // The Amazon Simple Storage Service (Amazon S3) path where you will generate the // labeling set. OutputS3Path *string noSmithyDocumentSerde } // Specifies Lake Formation configuration settings for the crawler. type LakeFormationConfiguration struct { // Required for cross account crawls. For same account crawls as the target data, // this can be left as null. AccountId *string // Specifies whether to use Lake Formation credentials for the crawler instead of // the IAM role credentials. UseLakeFormationCredentials *bool noSmithyDocumentSerde } // When there are multiple versions of a blueprint and the latest version has some // errors, this attribute indicates the last successful blueprint definition that // is available with the service. type LastActiveDefinition struct { // Specifies a path in Amazon S3 where the blueprint is published by the Glue // developer. BlueprintLocation *string // Specifies a path in Amazon S3 where the blueprint is copied when you create or // update the blueprint. BlueprintServiceLocation *string // The description of the blueprint. Description *string // The date and time the blueprint was last modified. LastModifiedOn *time.Time // A JSON string specifying the parameters for the blueprint. ParameterSpec *string noSmithyDocumentSerde } // Status and error information about the most recent crawl. type LastCrawlInfo struct { // If an error occurred, the error information about the last crawl. ErrorMessage *string // The log group for the last crawl. LogGroup *string // The log stream for the last crawl. LogStream *string // The prefix for a message about this crawl. MessagePrefix *string // The time at which the crawl started. StartTime *time.Time // Status of the last crawl. Status LastCrawlStatus noSmithyDocumentSerde } // Specifies data lineage configuration settings for the crawler. type LineageConfiguration struct { // Specifies whether data lineage is enabled for the crawler. Valid values are: // - ENABLE: enables data lineage for the crawler // - DISABLE: disables data lineage for the crawler CrawlerLineageSettings CrawlerLineageSettings noSmithyDocumentSerde } // The location of resources. type Location struct { // An Amazon DynamoDB table location. DynamoDB []CodeGenNodeArg // A JDBC location. Jdbc []CodeGenNodeArg // An Amazon Simple Storage Service (Amazon S3) location. S3 []CodeGenNodeArg noSmithyDocumentSerde } // Defines column statistics supported for integer data columns. type LongColumnStatisticsData struct { // The number of distinct values in a column. // // This member is required. NumberOfDistinctValues int64 // The number of null values in the column. // // This member is required. NumberOfNulls int64 // The highest value in the column. MaximumValue int64 // The lowest value in the column. MinimumValue int64 noSmithyDocumentSerde } // Specifies the mapping of data property keys. type Mapping struct { // Only applicable to nested data structures. If you want to change the parent // structure, but also one of its children, you can fill out this data strucutre. // It is also Mapping , but its FromPath will be the parent's FromPath plus the // FromPath from this structure. For the children part, suppose you have the // structure: { "FromPath": "OuterStructure", "ToKey": "OuterStructure", "ToType": // "Struct", "Dropped": false, "Chidlren": [{ "FromPath": "inner", "ToKey": // "inner", "ToType": "Double", "Dropped": false, }] } You can specify a Mapping // that looks like: { "FromPath": "OuterStructure", "ToKey": "OuterStructure", // "ToType": "Struct", "Dropped": false, "Chidlren": [{ "FromPath": "inner", // "ToKey": "inner", "ToType": "Double", "Dropped": false, }] } Children []Mapping // If true, then the column is removed. Dropped *bool // The table or column to be modified. FromPath []string // The type of the data to be modified. FromType *string // After the apply mapping, what the name of the column should be. Can be the same // as FromPath . ToKey *string // The data type that the data is to be modified to. ToType *string noSmithyDocumentSerde } // Defines a mapping. type MappingEntry struct { // The source path. SourcePath *string // The name of the source table. SourceTable *string // The source type. SourceType *string // The target path. TargetPath *string // The target table. TargetTable *string // The target type. TargetType *string noSmithyDocumentSerde } // Specifies a transform that merges a DynamicFrame with a staging DynamicFrame // based on the specified primary keys to identify records. Duplicate records // (records with the same primary keys) are not de-duplicated. type Merge struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // The list of primary key fields to match records from the source and staging // dynamic frames. // // This member is required. PrimaryKeys [][]string // The source DynamicFrame that will be merged with a staging DynamicFrame . // // This member is required. Source *string noSmithyDocumentSerde } // A structure containing metadata information for a schema version. type MetadataInfo struct { // The time at which the entry was created. CreatedTime *string // The metadata key’s corresponding value. MetadataValue *string // Other metadata belonging to the same metadata key. OtherMetadataValueList []OtherMetadataValueListItem noSmithyDocumentSerde } // A structure containing a key value pair for metadata. type MetadataKeyValuePair struct { // A metadata key. MetadataKey *string // A metadata key’s corresponding value. MetadataValue *string noSmithyDocumentSerde } // Specifies a Microsoft SQL server data source in the Glue Data Catalog. type MicrosoftSQLServerCatalogSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string noSmithyDocumentSerde } // Specifies a target that uses Microsoft SQL. type MicrosoftSQLServerCatalogTarget struct { // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string noSmithyDocumentSerde } // A structure for a machine learning transform. type MLTransform struct { // A timestamp. The time and date that this machine learning transform was created. CreatedOn *time.Time // A user-defined, long-form description text for the machine learning transform. // Descriptions are not guaranteed to be unique and can be changed at any time. Description *string // An EvaluationMetrics object. Evaluation metrics provide an estimate of the // quality of your machine learning transform. EvaluationMetrics *EvaluationMetrics // This value determines which version of Glue this machine learning transform is // compatible with. Glue 1.0 is recommended for most customers. If the value is not // set, the Glue compatibility defaults to Glue 0.9. For more information, see // Glue Versions (https://docs.aws.amazon.com/glue/latest/dg/release-notes.html#release-notes-versions) // in the developer guide. GlueVersion *string // A list of Glue table definitions used by the transform. InputRecordTables []GlueTable // A count identifier for the labeling files generated by Glue for this transform. // As you create a better transform, you can iteratively download, label, and // upload the labeling file. LabelCount int32 // A timestamp. The last point in time when this machine learning transform was // modified. LastModifiedOn *time.Time // The number of Glue data processing units (DPUs) that are allocated to task runs // for this transform. You can allocate from 2 to 100 DPUs; the default is 10. A // DPU is a relative measure of processing power that consists of 4 vCPUs of // compute capacity and 16 GB of memory. For more information, see the Glue // pricing page (http://aws.amazon.com/glue/pricing/) . MaxCapacity is a mutually // exclusive option with NumberOfWorkers and WorkerType . // - If either NumberOfWorkers or WorkerType is set, then MaxCapacity cannot be // set. // - If MaxCapacity is set then neither NumberOfWorkers or WorkerType can be set. // - If WorkerType is set, then NumberOfWorkers is required (and vice versa). // - MaxCapacity and NumberOfWorkers must both be at least 1. // When the WorkerType field is set to a value other than Standard , the // MaxCapacity field is set automatically and becomes read-only. MaxCapacity *float64 // The maximum number of times to retry after an MLTaskRun of the machine learning // transform fails. MaxRetries *int32 // A user-defined name for the machine learning transform. Names are not // guaranteed unique and can be changed at any time. Name *string // The number of workers of a defined workerType that are allocated when a task of // the transform runs. If WorkerType is set, then NumberOfWorkers is required (and // vice versa). NumberOfWorkers *int32 // A TransformParameters object. You can use parameters to tune (customize) the // behavior of the machine learning transform by specifying what data it learns // from and your preference on various tradeoffs (such as precious vs. recall, or // accuracy vs. cost). Parameters *TransformParameters // The name or Amazon Resource Name (ARN) of the IAM role with the required // permissions. The required permissions include both Glue service role permissions // to Glue resources, and Amazon S3 permissions required by the transform. // - This role needs Glue service role permissions to allow access to resources // in Glue. See Attach a Policy to IAM Users That Access Glue (https://docs.aws.amazon.com/glue/latest/dg/attach-policy-iam-user.html) // . // - This role needs permission to your Amazon Simple Storage Service (Amazon // S3) sources, targets, temporary directory, scripts, and any libraries used by // the task run for this transform. Role *string // A map of key-value pairs representing the columns and data types that this // transform can run against. Has an upper bound of 100 columns. Schema []SchemaColumn // The current status of the machine learning transform. Status TransformStatusType // The timeout in minutes of the machine learning transform. Timeout *int32 // The encryption-at-rest settings of the transform that apply to accessing user // data. Machine learning transforms can access user data encrypted in Amazon S3 // using KMS. TransformEncryption *TransformEncryption // The unique transform ID that is generated for the machine learning transform. // The ID is guaranteed to be unique and does not change. TransformId *string // The type of predefined worker that is allocated when a task of this transform // runs. Accepts a value of Standard, G.1X, or G.2X. // - For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory // and a 50GB disk, and 2 executors per worker. // - For the G.1X worker type, each worker provides 4 vCPU, 16 GB of memory and a // 64GB disk, and 1 executor per worker. // - For the G.2X worker type, each worker provides 8 vCPU, 32 GB of memory and a // 128GB disk, and 1 executor per worker. // MaxCapacity is a mutually exclusive option with NumberOfWorkers and WorkerType . // - If either NumberOfWorkers or WorkerType is set, then MaxCapacity cannot be // set. // - If MaxCapacity is set then neither NumberOfWorkers or WorkerType can be set. // - If WorkerType is set, then NumberOfWorkers is required (and vice versa). // - MaxCapacity and NumberOfWorkers must both be at least 1. WorkerType WorkerType noSmithyDocumentSerde } // The encryption-at-rest settings of the transform that apply to accessing user // data. type MLUserDataEncryption struct { // The encryption mode applied to user data. Valid values are: // - DISABLED: encryption is disabled // - SSEKMS: use of server-side encryption with Key Management Service (SSE-KMS) // for user data stored in Amazon S3. // // This member is required. MlUserDataEncryptionMode MLUserDataEncryptionModeString // The ID for the customer-provided KMS key. KmsKeyId *string noSmithyDocumentSerde } // Specifies an Amazon DocumentDB or MongoDB data store to crawl. type MongoDBTarget struct { // The name of the connection to use to connect to the Amazon DocumentDB or // MongoDB target. ConnectionName *string // The path of the Amazon DocumentDB or MongoDB target (database/collection). Path *string // Indicates whether to scan all the records, or to sample rows from the table. // Scanning all the records can take a long time when the table is not a high // throughput table. A value of true means to scan all records, while a value of // false means to sample the records. If no value is specified, the value defaults // to true . ScanAll *bool noSmithyDocumentSerde } // Specifies a MySQL data source in the Glue Data Catalog. type MySQLCatalogSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string noSmithyDocumentSerde } // Specifies a target that uses MySQL. type MySQLCatalogTarget struct { // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string noSmithyDocumentSerde } // A node represents an Glue component (trigger, crawler, or job) on a workflow // graph. type Node struct { // Details of the crawler when the node represents a crawler. CrawlerDetails *CrawlerNodeDetails // Details of the Job when the node represents a Job. JobDetails *JobNodeDetails // The name of the Glue component represented by the node. Name *string // Details of the Trigger when the node represents a Trigger. TriggerDetails *TriggerNodeDetails // The type of Glue component represented by the node. Type NodeType // The unique Id assigned to the node within the workflow. UniqueId *string noSmithyDocumentSerde } // Specifies configuration properties of a notification. type NotificationProperty struct { // After a job run starts, the number of minutes to wait before sending a job run // delay notification. NotifyDelayAfter *int32 noSmithyDocumentSerde } // Represents whether certain values are recognized as null values for removal. type NullCheckBoxList struct { // Specifies that an empty string is considered as a null value. IsEmpty *bool // Specifies that an integer value of -1 is considered as a null value. IsNegOne *bool // Specifies that a value spelling out the word 'null' is considered as a null // value. IsNullString *bool noSmithyDocumentSerde } // Represents a custom null value such as a zeros or other value being used as a // null placeholder unique to the dataset. type NullValueField struct { // The datatype of the value. // // This member is required. Datatype *Datatype // The value of the null placeholder. // // This member is required. Value *string noSmithyDocumentSerde } // A structure representing an open format table. type OpenTableFormatInput struct { // Specifies an IcebergInput structure that defines an Apache Iceberg metadata // table. IcebergInput *IcebergInput noSmithyDocumentSerde } // Specifies an option value. type Option struct { // Specifies the description of the option. Description *string // Specifies the label of the option. Label *string // Specifies the value of the option. Value *string noSmithyDocumentSerde } // Specifies an Oracle data source in the Glue Data Catalog. type OracleSQLCatalogSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string noSmithyDocumentSerde } // Specifies a target that uses Oracle SQL. type OracleSQLCatalogTarget struct { // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string noSmithyDocumentSerde } // Specifies the sort order of a sorted column. type Order struct { // The name of the column. // // This member is required. Column *string // Indicates that the column is sorted in ascending order ( == 1 ), or in // descending order ( ==0 ). // // This member is required. SortOrder int32 noSmithyDocumentSerde } // A structure containing other metadata for a schema version belonging to the // same metadata key. type OtherMetadataValueListItem struct { // The time at which the entry was created. CreatedTime *string // The metadata key’s corresponding value for the other metadata belonging to the // same metadata key. MetadataValue *string noSmithyDocumentSerde } // Represents a slice of table data. type Partition struct { // The ID of the Data Catalog in which the partition resides. CatalogId *string // The time at which the partition was created. CreationTime *time.Time // The name of the catalog database in which to create the partition. DatabaseName *string // The last time at which the partition was accessed. LastAccessTime *time.Time // The last time at which column statistics were computed for this partition. LastAnalyzedTime *time.Time // These key-value pairs define partition parameters. Parameters map[string]string // Provides information about the physical location where the partition is stored. StorageDescriptor *StorageDescriptor // The name of the database table in which to create the partition. TableName *string // The values of the partition. Values []string noSmithyDocumentSerde } // Contains information about a partition error. type PartitionError struct { // The details about the partition error. ErrorDetail *ErrorDetail // The values that define the partition. PartitionValues []string noSmithyDocumentSerde } // A structure for a partition index. type PartitionIndex struct { // The name of the partition index. // // This member is required. IndexName *string // The keys for the partition index. // // This member is required. Keys []string noSmithyDocumentSerde } // A descriptor for a partition index in a table. type PartitionIndexDescriptor struct { // The name of the partition index. // // This member is required. IndexName *string // The status of the partition index. The possible statuses are: // - CREATING: The index is being created. When an index is in a CREATING state, // the index or its table cannot be deleted. // - ACTIVE: The index creation succeeds. // - FAILED: The index creation fails. // - DELETING: The index is deleted from the list of indexes. // // This member is required. IndexStatus PartitionIndexStatus // A list of one or more keys, as KeySchemaElement structures, for the partition // index. // // This member is required. Keys []KeySchemaElement // A list of errors that can occur when registering partition indexes for an // existing table. BackfillErrors []BackfillError noSmithyDocumentSerde } // The structure used to create and update a partition. type PartitionInput struct { // The last time at which the partition was accessed. LastAccessTime *time.Time // The last time at which column statistics were computed for this partition. LastAnalyzedTime *time.Time // These key-value pairs define partition parameters. Parameters map[string]string // Provides information about the physical location where the partition is stored. StorageDescriptor *StorageDescriptor // The values of the partition. Although this parameter is not required by the // SDK, you must specify this parameter for a valid input. The values for the keys // for the new partition must be passed as an array of String objects that must be // ordered in the same order as the partition keys appearing in the Amazon S3 // prefix. Otherwise Glue will add the values to the wrong keys. Values []string noSmithyDocumentSerde } // Contains a list of values defining partitions. type PartitionValueList struct { // The list of values. // // This member is required. Values []string noSmithyDocumentSerde } // Specifies the physical requirements for a connection. type PhysicalConnectionRequirements struct { // The connection's Availability Zone. This field is redundant because the // specified subnet implies the Availability Zone to be used. Currently the field // must be populated, but it will be deprecated in the future. AvailabilityZone *string // The security group ID list used by the connection. SecurityGroupIdList []string // The subnet ID used by the connection. SubnetId *string noSmithyDocumentSerde } // Specifies a transform that identifies, removes or masks PII data. type PIIDetection struct { // Indicates the types of entities the PIIDetection transform will identify as PII // data. PII type entities include: PERSON_NAME, DATE, USA_SNN, EMAIL, USA_ITIN, // USA_PASSPORT_NUMBER, PHONE_NUMBER, BANK_ACCOUNT, IP_ADDRESS, MAC_ADDRESS, // USA_CPT_CODE, USA_HCPCS_CODE, USA_NATIONAL_DRUG_CODE, // USA_MEDICARE_BENEFICIARY_IDENTIFIER, // USA_HEALTH_INSURANCE_CLAIM_NUMBER,CREDIT_CARD,USA_NATIONAL_PROVIDER_IDENTIFIER,USA_DEA_NUMBER,USA_DRIVING_LICENSE // // This member is required. EntityTypesToDetect []string // The node ID inputs to the transform. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // Indicates the type of PIIDetection transform. // // This member is required. PiiType PiiType // Indicates the value that will replace the detected entity. MaskValue *string // Indicates the output column name that will contain any entity type detected in // that row. OutputColumnName *string // Indicates the fraction of the data to sample when scanning for PII entities. SampleFraction *float64 // Indicates the fraction of the data that must be met in order for a column to be // identified as PII data. ThresholdFraction *float64 noSmithyDocumentSerde } // Specifies a PostgresSQL data source in the Glue Data Catalog. type PostgreSQLCatalogSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string noSmithyDocumentSerde } // Specifies a target that uses Postgres SQL. type PostgreSQLCatalogTarget struct { // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string noSmithyDocumentSerde } // A job run that was used in the predicate of a conditional trigger that // triggered this job run. type Predecessor struct { // The name of the job definition used by the predecessor job run. JobName *string // The job-run ID of the predecessor job run. RunId *string noSmithyDocumentSerde } // Defines the predicate of the trigger, which determines when it fires. type Predicate struct { // A list of the conditions that determine when the trigger will fire. Conditions []Condition // An optional field if only one condition is listed. If multiple conditions are // listed, then this field is required. Logical Logical noSmithyDocumentSerde } // Permissions granted to a principal. type PrincipalPermissions struct { // The permissions that are granted to the principal. Permissions []Permission // The principal who is granted permissions. Principal *DataLakePrincipal noSmithyDocumentSerde } // Defines a property predicate. type PropertyPredicate struct { // The comparator used to compare this property to others. Comparator Comparator // The key of the property. Key *string // The value of the property. Value *string noSmithyDocumentSerde } // A Glue Studio node that uses a Glue DataBrew recipe in Glue jobs. type Recipe struct { // The nodes that are inputs to the recipe node, identified by id. // // This member is required. Inputs []string // The name of the Glue Studio node. // // This member is required. Name *string // A reference to the DataBrew recipe used by the node. // // This member is required. RecipeReference *RecipeReference noSmithyDocumentSerde } // A reference to a Glue DataBrew recipe. type RecipeReference struct { // The ARN of the DataBrew recipe. // // This member is required. RecipeArn *string // The RecipeVersion of the DataBrew recipe. // // This member is required. RecipeVersion *string noSmithyDocumentSerde } // When crawling an Amazon S3 data source after the first crawl is complete, // specifies whether to crawl the entire dataset again or to crawl only folders // that were added since the last crawler run. For more information, see // Incremental Crawls in Glue (https://docs.aws.amazon.com/glue/latest/dg/incremental-crawls.html) // in the developer guide. type RecrawlPolicy struct { // Specifies whether to crawl the entire dataset again or to crawl only folders // that were added since the last crawler run. A value of CRAWL_EVERYTHING // specifies crawling the entire dataset again. A value of CRAWL_NEW_FOLDERS_ONLY // specifies crawling only folders that were added since the last crawler run. A // value of CRAWL_EVENT_MODE specifies crawling only the changes identified by // Amazon S3 events. RecrawlBehavior RecrawlBehavior noSmithyDocumentSerde } // Specifies an Amazon Redshift data store. type RedshiftSource struct { // The database to read from. // // This member is required. Database *string // The name of the Amazon Redshift data store. // // This member is required. Name *string // The database table to read from. // // This member is required. Table *string // The Amazon S3 path where temporary data can be staged when copying out of the // database. RedshiftTmpDir *string // The IAM role with permissions. TmpDirIAMRole *string noSmithyDocumentSerde } // Specifies a target that uses Amazon Redshift. type RedshiftTarget struct { // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string // The Amazon S3 path where temporary data can be staged when copying out of the // database. RedshiftTmpDir *string // The IAM role with permissions. TmpDirIAMRole *string // The set of options to configure an upsert operation when writing to a Redshift // target. UpsertRedshiftOptions *UpsertRedshiftTargetOptions noSmithyDocumentSerde } // A wrapper structure that may contain the registry name and Amazon Resource Name // (ARN). type RegistryId struct { // Arn of the registry to be updated. One of RegistryArn or RegistryName has to be // provided. RegistryArn *string // Name of the registry. Used only for lookup. One of RegistryArn or RegistryName // has to be provided. RegistryName *string noSmithyDocumentSerde } // A structure containing the details for a registry. type RegistryListItem struct { // The data the registry was created. CreatedTime *string // A description of the registry. Description *string // The Amazon Resource Name (ARN) of the registry. RegistryArn *string // The name of the registry. RegistryName *string // The status of the registry. Status RegistryStatus // The date the registry was updated. UpdatedTime *string noSmithyDocumentSerde } // Specifies a Relational database data source in the Glue Data Catalog. type RelationalCatalogSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string noSmithyDocumentSerde } // Specifies a transform that renames a single data property key. type RenameField struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // A JSON path to a variable in the data structure for the source data. // // This member is required. SourcePath []string // A JSON path to a variable in the data structure for the target data. // // This member is required. TargetPath []string noSmithyDocumentSerde } // The URIs for function resources. type ResourceUri struct { // The type of the resource. ResourceType ResourceType // The URI for accessing the resource. Uri *string noSmithyDocumentSerde } // Specifies a Delta Lake data source that is registered in the Glue Data Catalog. // The data source must be stored in Amazon S3. type S3CatalogDeltaSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the Delta Lake data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string // Specifies additional connection options. AdditionalDeltaOptions map[string]string // Specifies the data schema for the Delta Lake source. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies a Hudi data source that is registered in the Glue Data Catalog. The // Hudi data source must be stored in Amazon S3. type S3CatalogHudiSource struct { // The name of the database to read from. // // This member is required. Database *string // The name of the Hudi data source. // // This member is required. Name *string // The name of the table in the database to read from. // // This member is required. Table *string // Specifies additional connection options. AdditionalHudiOptions map[string]string // Specifies the data schema for the Hudi source. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies an Amazon S3 data store in the Glue Data Catalog. type S3CatalogSource struct { // The database to read from. // // This member is required. Database *string // The name of the data store. // // This member is required. Name *string // The database table to read from. // // This member is required. Table *string // Specifies additional connection options. AdditionalOptions *S3SourceAdditionalOptions // Partitions satisfying this predicate are deleted. Files within the retention // period in these partitions are not deleted. Set to "" – empty by default. PartitionPredicate *string noSmithyDocumentSerde } // Specifies a data target that writes to Amazon S3 using the Glue Data Catalog. type S3CatalogTarget struct { // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string // Specifies native partitioning using a sequence of keys. PartitionKeys [][]string // A policy that specifies update behavior for the crawler. SchemaChangePolicy *CatalogSchemaChangePolicy noSmithyDocumentSerde } // Specifies a command-separated value (CSV) data store stored in Amazon S3. type S3CsvSource struct { // The name of the data store. // // This member is required. Name *string // A list of the Amazon S3 paths to read from. // // This member is required. Paths []string // Specifies the character to use for quoting. The default is a double quote: '"' . // Set this to -1 to turn off quoting entirely. // // This member is required. QuoteChar QuoteChar // Specifies the delimiter character. The default is a comma: ",", but any other // character can be specified. // // This member is required. Separator Separator // Specifies additional connection options. AdditionalOptions *S3DirectSourceAdditionalOptions // Specifies how the data is compressed. This is generally not necessary if the // data has a standard file extension. Possible values are "gzip" and "bzip" ). CompressionType CompressionType // Specifies a character to use for escaping. This option is used only when // reading CSV files. The default value is none . If enabled, the character which // immediately follows is used as-is, except for a small set of well-known escapes // ( \n , \r , \t , and \0 ). Escaper *string // A string containing a JSON list of Unix-style glob patterns to exclude. For // example, "[\"**.pdf\"]" excludes all PDF files. Exclusions []string // Grouping files is turned on by default when the input contains more than 50,000 // files. To turn on grouping with fewer than 50,000 files, set this parameter to // "inPartition". To disable grouping when there are more than 50,000 files, set // this parameter to "none" . GroupFiles *string // The target group size in bytes. The default is computed based on the input data // size and the size of your cluster. When there are fewer than 50,000 input files, // "groupFiles" must be set to "inPartition" for this to take effect. GroupSize *string // This option controls the duration in milliseconds after which the s3 listing is // likely to be consistent. Files with modification timestamps falling within the // last maxBand milliseconds are tracked specially when using JobBookmarks to // account for Amazon S3 eventual consistency. Most users don't need to set this // option. The default is 900000 milliseconds, or 15 minutes. MaxBand *int32 // This option specifies the maximum number of files to save from the last maxBand // seconds. If this number is exceeded, extra files are skipped and only processed // in the next job run. MaxFilesInBand *int32 // A Boolean value that specifies whether a single record can span multiple lines. // This can occur when a field contains a quoted new-line character. You must set // this option to True if any record spans multiple lines. The default value is // False , which allows for more aggressive file-splitting during parsing. Multiline *bool // A Boolean value that specifies whether to use the advanced SIMD CSV reader // along with Apache Arrow based columnar memory formats. Only available in Glue // version 3.0. OptimizePerformance bool // Specifies the data schema for the S3 CSV source. OutputSchemas []GlueSchema // If set to true, recursively reads files in all subdirectories under the // specified paths. Recurse *bool // A Boolean value that specifies whether to skip the first data line. The default // value is False . SkipFirst *bool // A Boolean value that specifies whether to treat the first line as a header. The // default value is False . WithHeader *bool // A Boolean value that specifies whether to write the header to output. The // default value is True . WriteHeader *bool noSmithyDocumentSerde } // Specifies a target that writes to a Delta Lake data source in the Glue Data // Catalog. type S3DeltaCatalogTarget struct { // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string // Specifies additional connection options for the connector. AdditionalOptions map[string]string // Specifies native partitioning using a sequence of keys. PartitionKeys [][]string // A policy that specifies update behavior for the crawler. SchemaChangePolicy *CatalogSchemaChangePolicy noSmithyDocumentSerde } // Specifies a target that writes to a Delta Lake data source in Amazon S3. type S3DeltaDirectTarget struct { // Specifies how the data is compressed. This is generally not necessary if the // data has a standard file extension. Possible values are "gzip" and "bzip" ). // // This member is required. Compression DeltaTargetCompressionType // Specifies the data output format for the target. // // This member is required. Format TargetFormat // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The Amazon S3 path of your Delta Lake data source to write to. // // This member is required. Path *string // Specifies additional connection options for the connector. AdditionalOptions map[string]string // Specifies native partitioning using a sequence of keys. PartitionKeys [][]string // A policy that specifies update behavior for the crawler. SchemaChangePolicy *DirectSchemaChangePolicy noSmithyDocumentSerde } // Specifies a Delta Lake data source stored in Amazon S3. type S3DeltaSource struct { // The name of the Delta Lake source. // // This member is required. Name *string // A list of the Amazon S3 paths to read from. // // This member is required. Paths []string // Specifies additional connection options. AdditionalDeltaOptions map[string]string // Specifies additional options for the connector. AdditionalOptions *S3DirectSourceAdditionalOptions // Specifies the data schema for the Delta Lake source. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies additional connection options for the Amazon S3 data store. type S3DirectSourceAdditionalOptions struct { // Sets the upper limit for the target number of files that will be processed. BoundedFiles *int64 // Sets the upper limit for the target size of the dataset in bytes that will be // processed. BoundedSize *int64 // Sets option to enable a sample path. EnableSamplePath *bool // If enabled, specifies the sample path. SamplePath *string noSmithyDocumentSerde } // Specifies a data target that writes to Amazon S3. type S3DirectTarget struct { // Specifies the data output format for the target. // // This member is required. Format TargetFormat // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // A single Amazon S3 path to write to. // // This member is required. Path *string // Specifies how the data is compressed. This is generally not necessary if the // data has a standard file extension. Possible values are "gzip" and "bzip" ). Compression *string // Specifies native partitioning using a sequence of keys. PartitionKeys [][]string // A policy that specifies update behavior for the crawler. SchemaChangePolicy *DirectSchemaChangePolicy noSmithyDocumentSerde } // Specifies how Amazon Simple Storage Service (Amazon S3) data should be // encrypted. type S3Encryption struct { // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. KmsKeyArn *string // The encryption mode to use for Amazon S3 data. S3EncryptionMode S3EncryptionMode noSmithyDocumentSerde } // Specifies a data target that writes to Amazon S3 in Apache Parquet columnar // storage. type S3GlueParquetTarget struct { // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // A single Amazon S3 path to write to. // // This member is required. Path *string // Specifies how the data is compressed. This is generally not necessary if the // data has a standard file extension. Possible values are "gzip" and "bzip" ). Compression ParquetCompressionType // Specifies native partitioning using a sequence of keys. PartitionKeys [][]string // A policy that specifies update behavior for the crawler. SchemaChangePolicy *DirectSchemaChangePolicy noSmithyDocumentSerde } // Specifies a target that writes to a Hudi data source in the Glue Data Catalog. type S3HudiCatalogTarget struct { // Specifies additional connection options for the connector. // // This member is required. AdditionalOptions map[string]string // The name of the database to write to. // // This member is required. Database *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The name of the table in the database to write to. // // This member is required. Table *string // Specifies native partitioning using a sequence of keys. PartitionKeys [][]string // A policy that specifies update behavior for the crawler. SchemaChangePolicy *CatalogSchemaChangePolicy noSmithyDocumentSerde } // Specifies a target that writes to a Hudi data source in Amazon S3. type S3HudiDirectTarget struct { // Specifies additional connection options for the connector. // // This member is required. AdditionalOptions map[string]string // Specifies how the data is compressed. This is generally not necessary if the // data has a standard file extension. Possible values are "gzip" and "bzip" ). // // This member is required. Compression HudiTargetCompressionType // Specifies the data output format for the target. // // This member is required. Format TargetFormat // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // The Amazon S3 path of your Hudi data source to write to. // // This member is required. Path *string // Specifies native partitioning using a sequence of keys. PartitionKeys [][]string // A policy that specifies update behavior for the crawler. SchemaChangePolicy *DirectSchemaChangePolicy noSmithyDocumentSerde } // Specifies a Hudi data source stored in Amazon S3. type S3HudiSource struct { // The name of the Hudi source. // // This member is required. Name *string // A list of the Amazon S3 paths to read from. // // This member is required. Paths []string // Specifies additional connection options. AdditionalHudiOptions map[string]string // Specifies additional options for the connector. AdditionalOptions *S3DirectSourceAdditionalOptions // Specifies the data schema for the Hudi source. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies a JSON data store stored in Amazon S3. type S3JsonSource struct { // The name of the data store. // // This member is required. Name *string // A list of the Amazon S3 paths to read from. // // This member is required. Paths []string // Specifies additional connection options. AdditionalOptions *S3DirectSourceAdditionalOptions // Specifies how the data is compressed. This is generally not necessary if the // data has a standard file extension. Possible values are "gzip" and "bzip" ). CompressionType CompressionType // A string containing a JSON list of Unix-style glob patterns to exclude. For // example, "[\"**.pdf\"]" excludes all PDF files. Exclusions []string // Grouping files is turned on by default when the input contains more than 50,000 // files. To turn on grouping with fewer than 50,000 files, set this parameter to // "inPartition". To disable grouping when there are more than 50,000 files, set // this parameter to "none" . GroupFiles *string // The target group size in bytes. The default is computed based on the input data // size and the size of your cluster. When there are fewer than 50,000 input files, // "groupFiles" must be set to "inPartition" for this to take effect. GroupSize *string // A JsonPath string defining the JSON data. JsonPath *string // This option controls the duration in milliseconds after which the s3 listing is // likely to be consistent. Files with modification timestamps falling within the // last maxBand milliseconds are tracked specially when using JobBookmarks to // account for Amazon S3 eventual consistency. Most users don't need to set this // option. The default is 900000 milliseconds, or 15 minutes. MaxBand *int32 // This option specifies the maximum number of files to save from the last maxBand // seconds. If this number is exceeded, extra files are skipped and only processed // in the next job run. MaxFilesInBand *int32 // A Boolean value that specifies whether a single record can span multiple lines. // This can occur when a field contains a quoted new-line character. You must set // this option to True if any record spans multiple lines. The default value is // False , which allows for more aggressive file-splitting during parsing. Multiline *bool // Specifies the data schema for the S3 JSON source. OutputSchemas []GlueSchema // If set to true, recursively reads files in all subdirectories under the // specified paths. Recurse *bool noSmithyDocumentSerde } // Specifies an Apache Parquet data store stored in Amazon S3. type S3ParquetSource struct { // The name of the data store. // // This member is required. Name *string // A list of the Amazon S3 paths to read from. // // This member is required. Paths []string // Specifies additional connection options. AdditionalOptions *S3DirectSourceAdditionalOptions // Specifies how the data is compressed. This is generally not necessary if the // data has a standard file extension. Possible values are "gzip" and "bzip" ). CompressionType ParquetCompressionType // A string containing a JSON list of Unix-style glob patterns to exclude. For // example, "[\"**.pdf\"]" excludes all PDF files. Exclusions []string // Grouping files is turned on by default when the input contains more than 50,000 // files. To turn on grouping with fewer than 50,000 files, set this parameter to // "inPartition". To disable grouping when there are more than 50,000 files, set // this parameter to "none" . GroupFiles *string // The target group size in bytes. The default is computed based on the input data // size and the size of your cluster. When there are fewer than 50,000 input files, // "groupFiles" must be set to "inPartition" for this to take effect. GroupSize *string // This option controls the duration in milliseconds after which the s3 listing is // likely to be consistent. Files with modification timestamps falling within the // last maxBand milliseconds are tracked specially when using JobBookmarks to // account for Amazon S3 eventual consistency. Most users don't need to set this // option. The default is 900000 milliseconds, or 15 minutes. MaxBand *int32 // This option specifies the maximum number of files to save from the last maxBand // seconds. If this number is exceeded, extra files are skipped and only processed // in the next job run. MaxFilesInBand *int32 // Specifies the data schema for the S3 Parquet source. OutputSchemas []GlueSchema // If set to true, recursively reads files in all subdirectories under the // specified paths. Recurse *bool noSmithyDocumentSerde } // Specifies additional connection options for the Amazon S3 data store. type S3SourceAdditionalOptions struct { // Sets the upper limit for the target number of files that will be processed. BoundedFiles *int64 // Sets the upper limit for the target size of the dataset in bytes that will be // processed. BoundedSize *int64 noSmithyDocumentSerde } // Specifies a data store in Amazon Simple Storage Service (Amazon S3). type S3Target struct { // The name of a connection which allows a job or crawler to access data in Amazon // S3 within an Amazon Virtual Private Cloud environment (Amazon VPC). ConnectionName *string // A valid Amazon dead-letter SQS ARN. For example, // arn:aws:sqs:region:account:deadLetterQueue . DlqEventQueueArn *string // A valid Amazon SQS ARN. For example, arn:aws:sqs:region:account:sqs . EventQueueArn *string // A list of glob patterns used to exclude from the crawl. For more information, // see Catalog Tables with a Crawler (https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html) // . Exclusions []string // The path to the Amazon S3 target. Path *string // Sets the number of files in each leaf folder to be crawled when crawling sample // files in a dataset. If not set, all the files are crawled. A valid value is an // integer between 1 and 249. SampleSize *int32 noSmithyDocumentSerde } // A scheduling object using a cron statement to schedule an event. type Schedule struct { // A cron expression used to specify the schedule (see Time-Based Schedules for // Jobs and Crawlers (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html) // . For example, to run something every day at 12:15 UTC, you would specify: // cron(15 12 * * ? *) . ScheduleExpression *string // The state of the schedule. State ScheduleState noSmithyDocumentSerde } // A policy that specifies update and deletion behaviors for the crawler. type SchemaChangePolicy struct { // The deletion behavior when the crawler finds a deleted object. DeleteBehavior DeleteBehavior // The update behavior when the crawler finds a changed schema. UpdateBehavior UpdateBehavior noSmithyDocumentSerde } // A key-value pair representing a column and data type that this transform can // run against. The Schema parameter of the MLTransform may contain up to 100 of // these structures. type SchemaColumn struct { // The type of data in the column. DataType *string // The name of the column. Name *string noSmithyDocumentSerde } // The unique ID of the schema in the Glue schema registry. type SchemaId struct { // The name of the schema registry that contains the schema. RegistryName *string // The Amazon Resource Name (ARN) of the schema. One of SchemaArn or SchemaName // has to be provided. SchemaArn *string // The name of the schema. One of SchemaArn or SchemaName has to be provided. SchemaName *string noSmithyDocumentSerde } // An object that contains minimal details for a schema. type SchemaListItem struct { // The date and time that a schema was created. CreatedTime *string // A description for the schema. Description *string // the name of the registry where the schema resides. RegistryName *string // The Amazon Resource Name (ARN) for the schema. SchemaArn *string // The name of the schema. SchemaName *string // The status of the schema. SchemaStatus SchemaStatus // The date and time that a schema was updated. UpdatedTime *string noSmithyDocumentSerde } // An object that references a schema stored in the Glue Schema Registry. type SchemaReference struct { // A structure that contains schema identity fields. Either this or the // SchemaVersionId has to be provided. SchemaId *SchemaId // The unique ID assigned to a version of the schema. Either this or the SchemaId // has to be provided. SchemaVersionId *string // The version number of the schema. SchemaVersionNumber *int64 noSmithyDocumentSerde } // An object that contains the error details for an operation on a schema version. type SchemaVersionErrorItem struct { // The details of the error for the schema version. ErrorDetails *ErrorDetails // The version number of the schema. VersionNumber int64 noSmithyDocumentSerde } // An object containing the details about a schema version. type SchemaVersionListItem struct { // The date and time the schema version was created. CreatedTime *string // The Amazon Resource Name (ARN) of the schema. SchemaArn *string // The unique identifier of the schema version. SchemaVersionId *string // The status of the schema version. Status SchemaVersionStatus // The version number of the schema. VersionNumber int64 noSmithyDocumentSerde } // A structure containing the schema version information. type SchemaVersionNumber struct { // The latest version available for the schema. LatestVersion bool // The version number of the schema. VersionNumber int64 noSmithyDocumentSerde } // Specifies a security configuration. type SecurityConfiguration struct { // The time at which this security configuration was created. CreatedTimeStamp *time.Time // The encryption configuration associated with this security configuration. EncryptionConfiguration *EncryptionConfiguration // The name of the security configuration. Name *string noSmithyDocumentSerde } // Defines a non-overlapping region of a table's partitions, allowing multiple // requests to be run in parallel. type Segment struct { // The zero-based index number of the segment. For example, if the total number of // segments is 4, SegmentNumber values range from 0 through 3. // // This member is required. SegmentNumber int32 // The total number of segments. // // This member is required. TotalSegments int32 noSmithyDocumentSerde } // Specifies a transform that chooses the data property keys that you want to keep. type SelectFields struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // A JSON path to a variable in the data structure. // // This member is required. Paths [][]string noSmithyDocumentSerde } // Specifies a transform that chooses one DynamicFrame from a collection of // DynamicFrames . The output is the selected DynamicFrame type SelectFromCollection struct { // The index for the DynamicFrame to be selected. // // This member is required. Index int32 // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string noSmithyDocumentSerde } // Information about a serialization/deserialization program (SerDe) that serves // as an extractor and loader. type SerDeInfo struct { // Name of the SerDe. Name *string // These key-value pairs define initialization parameters for the SerDe. Parameters map[string]string // Usually the class that implements the SerDe. An example is // org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe . SerializationLibrary *string noSmithyDocumentSerde } // The period in which a remote Spark runtime environment is running. type Session struct { // The command object.See SessionCommand. Command *SessionCommand // The number of connections used for the session. Connections *ConnectionsList // The time and date when the session was created. CreatedOn *time.Time // A map array of key-value pairs. Max is 75 pairs. DefaultArguments map[string]string // The description of the session. Description *string // The error message displayed during the session. ErrorMessage *string // The Glue version determines the versions of Apache Spark and Python that Glue // supports. The GlueVersion must be greater than 2.0. GlueVersion *string // The ID of the session. Id *string // The number of Glue data processing units (DPUs) that can be allocated when the // job runs. A DPU is a relative measure of processing power that consists of 4 // vCPUs of compute capacity and 16 GB memory. MaxCapacity *float64 // The code execution progress of the session. Progress float64 // The name or Amazon Resource Name (ARN) of the IAM role associated with the // Session. Role *string // The name of the SecurityConfiguration structure to be used with the session. SecurityConfiguration *string // The session status. Status SessionStatus noSmithyDocumentSerde } // The SessionCommand that runs the job. type SessionCommand struct { // Specifies the name of the SessionCommand. Can be 'glueetl' or 'gluestreaming'. Name *string // Specifies the Python version. The Python version indicates the version // supported for jobs of type Spark. PythonVersion *string noSmithyDocumentSerde } // Specifies skewed values in a table. Skewed values are those that occur with // very high frequency. type SkewedInfo struct { // A list of names of columns that contain skewed values. SkewedColumnNames []string // A mapping of skewed values to the columns that contain them. SkewedColumnValueLocationMaps map[string]string // A list of values that appear so frequently as to be considered skewed. SkewedColumnValues []string noSmithyDocumentSerde } // Specifies configuration for Snowflake nodes in Glue Studio. type SnowflakeNodeData struct { // Specifies what action to take when writing to a table with preexisting data. // Valid values: append , merge , truncate , drop . Action *string // Specifies additional options passed to the Snowflake connector. If options are // specified elsewhere in this node, this will take precedence. AdditionalOptions map[string]string // Specifies whether automatic query pushdown is enabled. If pushdown is enabled, // then when a query is run on Spark, if part of the query can be "pushed down" to // the Snowflake server, it is pushed down. This improves performance of some // queries. AutoPushdown bool // Specifies a Glue Data Catalog Connection to a Snowflake endpoint. Connection *Option // Specifies a Snowflake database for your node to use. Database *string // Not currently used. IamRole *Option // Specifies a merge action. Valid values: simple , custom . If simple, merge // behavior is defined by MergeWhenMatched and MergeWhenNotMatched . If custom, // defined by MergeClause . MergeAction *string // A SQL statement that specifies a custom merge behavior. MergeClause *string // Specifies how to resolve records that match preexisting data when merging. // Valid values: update , delete . MergeWhenMatched *string // Specifies how to process records that do not match preexisting data when // merging. Valid values: insert , none . MergeWhenNotMatched *string // A SQL string run after the Snowflake connector performs its standard actions. PostAction *string // A SQL string run before the Snowflake connector performs its standard actions. PreAction *string // A SQL string used to retrieve data with the query sourcetype. SampleQuery *string // Specifies a Snowflake database schema for your node to use. Schema *string // Specifies the columns combined to identify a record when detecting matches for // merges and upserts. A list of structures with value , label and description // keys. Each structure describes a column. SelectedColumns []Option // Specifies how retrieved data is specified. Valid values: "table" , "query" . SourceType *string // The name of a staging table used when performing merge or upsert append // actions. Data is written to this table, then moved to table by a generated // postaction. StagingTable *string // Specifies a Snowflake table for your node to use. Table *string // Manually defines the target schema for the node. A list of structures with value // , label and description keys. Each structure defines a column. TableSchema []Option // Not currently used. TempDir *string // Used when Action is append . Specifies the resolution behavior when a row // already exists. If true, preexisting rows will be updated. If false, those rows // will be inserted. Upsert bool noSmithyDocumentSerde } // Specifies a Snowflake data source. type SnowflakeSource struct { // Configuration for the Snowflake data source. // // This member is required. Data *SnowflakeNodeData // The name of the Snowflake data source. // // This member is required. Name *string // Specifies user-defined schemas for your output data. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies a Snowflake target. type SnowflakeTarget struct { // Specifies the data of the Snowflake target node. // // This member is required. Data *SnowflakeNodeData // The name of the Snowflake target. // // This member is required. Name *string // The nodes that are inputs to the data target. Inputs []string noSmithyDocumentSerde } // Specifies a field to sort by and a sort order. type SortCriterion struct { // The name of the field on which to sort. FieldName *string // An ascending or descending sort. Sort Sort noSmithyDocumentSerde } // The details for a source control configuration for a job, allowing // synchronization of job artifacts to or from a remote repository. type SourceControlDetails struct { // The type of authentication, which can be an authentication token stored in // Amazon Web Services Secrets Manager, or a personal access token. AuthStrategy SourceControlAuthStrategy // The value of an authorization token. AuthToken *string // An optional branch in the remote repository. Branch *string // An optional folder in the remote repository. Folder *string // The last commit ID for a commit in the remote repository. LastCommitId *string // The owner of the remote repository that contains the job artifacts. Owner *string // The provider for the remote repository. Provider SourceControlProvider // The name of the remote repository that contains the job artifacts. Repository *string noSmithyDocumentSerde } // Specifies a connector to an Apache Spark data source. type SparkConnectorSource struct { // The name of the connection that is associated with the connector. // // This member is required. ConnectionName *string // The type of connection, such as marketplace.spark or custom.spark, designating // a connection to an Apache Spark data store. // // This member is required. ConnectionType *string // The name of a connector that assists with accessing the data store in Glue // Studio. // // This member is required. ConnectorName *string // The name of the data source. // // This member is required. Name *string // Additional connection options for the connector. AdditionalOptions map[string]string // Specifies data schema for the custom spark source. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies a target that uses an Apache Spark connector. type SparkConnectorTarget struct { // The name of a connection for an Apache Spark connector. // // This member is required. ConnectionName *string // The type of connection, such as marketplace.spark or custom.spark, designating // a connection to an Apache Spark data store. // // This member is required. ConnectionType *string // The name of an Apache Spark connector. // // This member is required. ConnectorName *string // The nodes that are inputs to the data target. // // This member is required. Inputs []string // The name of the data target. // // This member is required. Name *string // Additional connection options for the connector. AdditionalOptions map[string]string // Specifies the data schema for the custom spark target. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies a transform where you enter a SQL query using Spark SQL syntax to // transform the data. The output is a single DynamicFrame . type SparkSQL struct { // The data inputs identified by their node names. You can associate a table name // with each input node to use in the SQL query. The name you choose must meet the // Spark SQL naming restrictions. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // A list of aliases. An alias allows you to specify what name to use in the SQL // for a given input. For example, you have a datasource named "MyDataSource". If // you specify From as MyDataSource, and Alias as SqlName, then in your SQL you // can do: select * from SqlName and that gets data from MyDataSource. // // This member is required. SqlAliases []SqlAlias // A SQL query that must use Spark SQL syntax and return a single data set. // // This member is required. SqlQuery *string // Specifies the data schema for the SparkSQL transform. OutputSchemas []GlueSchema noSmithyDocumentSerde } // Specifies a transform that writes samples of the data to an Amazon S3 bucket. type Spigot struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // A path in Amazon S3 where the transform will write a subset of records from the // dataset to a JSON file in an Amazon S3 bucket. // // This member is required. Path *string // The probability (a decimal value with a maximum value of 1) of picking any // given record. A value of 1 indicates that each row read from the dataset should // be included in the sample output. Prob *float64 // Specifies a number of records to write starting from the beginning of the // dataset. Topk *int32 noSmithyDocumentSerde } // Specifies a transform that splits data property keys into two DynamicFrames . // The output is a collection of DynamicFrames : one with selected data property // keys, and one with the remaining data property keys. type SplitFields struct { // The data inputs identified by their node names. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // A JSON path to a variable in the data structure. // // This member is required. Paths [][]string noSmithyDocumentSerde } // Represents a single entry in the list of values for SqlAliases . type SqlAlias struct { // A temporary name given to a table, or a column in a table. // // This member is required. Alias *string // A table, or a column in a table. // // This member is required. From *string noSmithyDocumentSerde } // The batch condition that started the workflow run. Either the number of events // in the batch size arrived, in which case the BatchSize member is non-zero, or // the batch window expired, in which case the BatchWindow member is non-zero. type StartingEventBatchCondition struct { // Number of events in the batch. BatchSize *int32 // Duration of the batch window in seconds. BatchWindow *int32 noSmithyDocumentSerde } // The statement or request for a particular action to occur in a session. type Statement struct { // The execution code of the statement. Code *string // The unix time and date that the job definition was completed. CompletedOn int64 // The ID of the statement. Id int32 // The output in JSON. Output *StatementOutput // The code execution progress. Progress float64 // The unix time and date that the job definition was started. StartedOn int64 // The state while request is actioned. State StatementState noSmithyDocumentSerde } // The code execution output in JSON format. type StatementOutput struct { // The code execution output. Data *StatementOutputData // The name of the error in the output. ErrorName *string // The error value of the output. ErrorValue *string // The execution count of the output. ExecutionCount int32 // The status of the code execution output. Status StatementState // The traceback of the output. Traceback []string noSmithyDocumentSerde } // The code execution output in JSON format. type StatementOutputData struct { // The code execution output in text format. TextPlain *string noSmithyDocumentSerde } // Describes the physical storage of table data. type StorageDescriptor struct { // A list of locations that point to the path where a Delta table is located. AdditionalLocations []string // A list of reducer grouping columns, clustering columns, and bucketing columns // in the table. BucketColumns []string // A list of the Columns in the table. Columns []Column // True if the data in the table is compressed, or False if not. Compressed bool // The input format: SequenceFileInputFormat (binary), or TextInputFormat , or a // custom format. InputFormat *string // The physical location of the table. By default, this takes the form of the // warehouse location, followed by the database location in the warehouse, followed // by the table name. Location *string // Must be specified if the table contains any dimension columns. NumberOfBuckets int32 // The output format: SequenceFileOutputFormat (binary), or // IgnoreKeyTextOutputFormat , or a custom format. OutputFormat *string // The user-supplied properties in key-value form. Parameters map[string]string // An object that references a schema stored in the Glue Schema Registry. When // creating a table, you can pass an empty list of columns for the schema, and // instead use a schema reference. SchemaReference *SchemaReference // The serialization/deserialization (SerDe) information. SerdeInfo *SerDeInfo // The information about values that appear frequently in a column (skewed values). SkewedInfo *SkewedInfo // A list specifying the sort order of each bucket in the table. SortColumns []Order // True if the table data is stored in subdirectories, or False if not. StoredAsSubDirectories bool noSmithyDocumentSerde } // Specifies options related to data preview for viewing a sample of your data. type StreamingDataPreviewOptions struct { // The polling time in milliseconds. PollingTime *int64 // The limit to the number of records polled. RecordPollingLimit *int64 noSmithyDocumentSerde } // Defines column statistics supported for character sequence data values. type StringColumnStatisticsData struct { // The average string length in the column. // // This member is required. AverageLength float64 // The size of the longest string in the column. // // This member is required. MaximumLength int64 // The number of distinct values in a column. // // This member is required. NumberOfDistinctValues int64 // The number of null values in the column. // // This member is required. NumberOfNulls int64 noSmithyDocumentSerde } // Represents a collection of related data organized in columns and rows. type Table struct { // The table name. For Hive compatibility, this must be entirely lowercase. // // This member is required. Name *string // The ID of the Data Catalog in which the table resides. CatalogId *string // The time when the table definition was created in the Data Catalog. CreateTime *time.Time // The person or entity who created the table. CreatedBy *string // The name of the database where the table metadata resides. For Hive // compatibility, this must be all lowercase. DatabaseName *string // A description of the table. Description *string // A FederatedTable structure that references an entity outside the Glue Data // Catalog. FederatedTable *FederatedTable // Indicates whether the table has been registered with Lake Formation. IsRegisteredWithLakeFormation bool // The last time that the table was accessed. This is usually taken from HDFS, and // might not be reliable. LastAccessTime *time.Time // The last time that column statistics were computed for this table. LastAnalyzedTime *time.Time // The owner of the table. Owner *string // These key-value pairs define properties associated with the table. Parameters map[string]string // A list of columns by which the table is partitioned. Only primitive types are // supported as partition keys. When you create a table used by Amazon Athena, and // you do not specify any partitionKeys , you must at least set the value of // partitionKeys to an empty list. For example: "PartitionKeys": [] PartitionKeys []Column // The retention time for this table. Retention int32 // A storage descriptor containing information about the physical storage of this // table. StorageDescriptor *StorageDescriptor // The type of this table. Glue will create tables with the EXTERNAL_TABLE type. // Other services, such as Athena, may create tables with additional table types. // Glue related table types: EXTERNAL_TABLE Hive compatible attribute - indicates a // non-Hive managed table. GOVERNED Used by Lake Formation. The Glue Data Catalog // understands GOVERNED . TableType *string // A TableIdentifier structure that describes a target table for resource linking. TargetTable *TableIdentifier // The last time that the table was updated. UpdateTime *time.Time // The ID of the table version. VersionId *string // Included for Apache Hive compatibility. Not used in the normal course of Glue // operations. ViewExpandedText *string // Included for Apache Hive compatibility. Not used in the normal course of Glue // operations. If the table is a VIRTUAL_VIEW , certain Athena configuration // encoded in base64. ViewOriginalText *string noSmithyDocumentSerde } // An error record for table operations. type TableError struct { // The details about the error. ErrorDetail *ErrorDetail // The name of the table. For Hive compatibility, this must be entirely lowercase. TableName *string noSmithyDocumentSerde } // A structure that describes a target table for resource linking. type TableIdentifier struct { // The ID of the Data Catalog in which the table resides. CatalogId *string // The name of the catalog database that contains the target table. DatabaseName *string // The name of the target table. Name *string // Region of the target table. Region *string noSmithyDocumentSerde } // A structure used to define a table. type TableInput struct { // The table name. For Hive compatibility, this is folded to lowercase when it is // stored. // // This member is required. Name *string // A description of the table. Description *string // The last time that the table was accessed. LastAccessTime *time.Time // The last time that column statistics were computed for this table. LastAnalyzedTime *time.Time // The table owner. Included for Apache Hive compatibility. Not used in the normal // course of Glue operations. Owner *string // These key-value pairs define properties associated with the table. Parameters map[string]string // A list of columns by which the table is partitioned. Only primitive types are // supported as partition keys. When you create a table used by Amazon Athena, and // you do not specify any partitionKeys , you must at least set the value of // partitionKeys to an empty list. For example: "PartitionKeys": [] PartitionKeys []Column // The retention time for this table. Retention int32 // A storage descriptor containing information about the physical storage of this // table. StorageDescriptor *StorageDescriptor // The type of this table. Glue will create tables with the EXTERNAL_TABLE type. // Other services, such as Athena, may create tables with additional table types. // Glue related table types: EXTERNAL_TABLE Hive compatible attribute - indicates a // non-Hive managed table. GOVERNED Used by Lake Formation. The Glue Data Catalog // understands GOVERNED . TableType *string // A TableIdentifier structure that describes a target table for resource linking. TargetTable *TableIdentifier // Included for Apache Hive compatibility. Not used in the normal course of Glue // operations. ViewExpandedText *string // Included for Apache Hive compatibility. Not used in the normal course of Glue // operations. If the table is a VIRTUAL_VIEW , certain Athena configuration // encoded in base64. ViewOriginalText *string noSmithyDocumentSerde } // Specifies a version of a table. type TableVersion struct { // The table in question. Table *Table // The ID value that identifies this table version. A VersionId is a string // representation of an integer. Each version is incremented by 1. VersionId *string noSmithyDocumentSerde } // An error record for table-version operations. type TableVersionError struct { // The details about the error. ErrorDetail *ErrorDetail // The name of the table in question. TableName *string // The ID value of the version in question. A VersionID is a string representation // of an integer. Each version is incremented by 1. VersionId *string noSmithyDocumentSerde } // The sampling parameters that are associated with the machine learning transform. type TaskRun struct { // The last point in time that the requested task run was completed. CompletedOn *time.Time // The list of error strings associated with this task run. ErrorString *string // The amount of time (in seconds) that the task run consumed resources. ExecutionTime int32 // The last point in time that the requested task run was updated. LastModifiedOn *time.Time // The names of the log group for secure logging, associated with this task run. LogGroupName *string // Specifies configuration properties associated with this task run. Properties *TaskRunProperties // The date and time that this task run started. StartedOn *time.Time // The current status of the requested task run. Status TaskStatusType // The unique identifier for this task run. TaskRunId *string // The unique identifier for the transform. TransformId *string noSmithyDocumentSerde } // The criteria that are used to filter the task runs for the machine learning // transform. type TaskRunFilterCriteria struct { // Filter on task runs started after this date. StartedAfter *time.Time // Filter on task runs started before this date. StartedBefore *time.Time // The current status of the task run. Status TaskStatusType // The type of task run. TaskRunType TaskType noSmithyDocumentSerde } // The configuration properties for the task run. type TaskRunProperties struct { // The configuration properties for an exporting labels task run. ExportLabelsTaskRunProperties *ExportLabelsTaskRunProperties // The configuration properties for a find matches task run. FindMatchesTaskRunProperties *FindMatchesTaskRunProperties // The configuration properties for an importing labels task run. ImportLabelsTaskRunProperties *ImportLabelsTaskRunProperties // The configuration properties for a labeling set generation task run. LabelingSetGenerationTaskRunProperties *LabelingSetGenerationTaskRunProperties // The type of task run. TaskType TaskType noSmithyDocumentSerde } // The sorting criteria that are used to sort the list of task runs for the // machine learning transform. type TaskRunSortCriteria struct { // The column to be used to sort the list of task runs for the machine learning // transform. // // This member is required. Column TaskRunSortColumnType // The sort direction to be used to sort the list of task runs for the machine // learning transform. // // This member is required. SortDirection SortDirectionType noSmithyDocumentSerde } // Specifies the parameters in the config file of the dynamic transform. type TransformConfigParameter struct { // Specifies the name of the parameter in the config file of the dynamic transform. // // This member is required. Name *string // Specifies the parameter type in the config file of the dynamic transform. // // This member is required. Type ParamType // Specifies whether the parameter is optional or not in the config file of the // dynamic transform. IsOptional *bool // Specifies the list type of the parameter in the config file of the dynamic // transform. ListType ParamType // Specifies the validation message in the config file of the dynamic transform. ValidationMessage *string // Specifies the validation rule in the config file of the dynamic transform. ValidationRule *string // Specifies the value of the parameter in the config file of the dynamic // transform. Value []string noSmithyDocumentSerde } // The encryption-at-rest settings of the transform that apply to accessing user // data. Machine learning transforms can access user data encrypted in Amazon S3 // using KMS. Additionally, imported labels and trained transforms can now be // encrypted using a customer provided KMS key. type TransformEncryption struct { // An MLUserDataEncryption object containing the encryption mode and // customer-provided KMS key ID. MlUserDataEncryption *MLUserDataEncryption // The name of the security configuration. TaskRunSecurityConfigurationName *string noSmithyDocumentSerde } // The criteria used to filter the machine learning transforms. type TransformFilterCriteria struct { // The time and date after which the transforms were created. CreatedAfter *time.Time // The time and date before which the transforms were created. CreatedBefore *time.Time // This value determines which version of Glue this machine learning transform is // compatible with. Glue 1.0 is recommended for most customers. If the value is not // set, the Glue compatibility defaults to Glue 0.9. For more information, see // Glue Versions (https://docs.aws.amazon.com/glue/latest/dg/release-notes.html#release-notes-versions) // in the developer guide. GlueVersion *string // Filter on transforms last modified after this date. LastModifiedAfter *time.Time // Filter on transforms last modified before this date. LastModifiedBefore *time.Time // A unique transform name that is used to filter the machine learning transforms. Name *string // Filters on datasets with a specific schema. The Map object is an array of // key-value pairs representing the schema this transform accepts, where Column is // the name of a column, and Type is the type of the data such as an integer or // string. Has an upper bound of 100 columns. Schema []SchemaColumn // Filters the list of machine learning transforms by the last known status of the // transforms (to indicate whether a transform can be used or not). One of // "NOT_READY", "READY", or "DELETING". Status TransformStatusType // The type of machine learning transform that is used to filter the machine // learning transforms. TransformType TransformType noSmithyDocumentSerde } // The algorithm-specific parameters that are associated with the machine learning // transform. type TransformParameters struct { // The type of machine learning transform. For information about the types of // machine learning transforms, see Creating Machine Learning Transforms (https://docs.aws.amazon.com/glue/latest/dg/add-job-machine-learning-transform.html) // . // // This member is required. TransformType TransformType // The parameters for the find matches algorithm. FindMatchesParameters *FindMatchesParameters noSmithyDocumentSerde } // The sorting criteria that are associated with the machine learning transform. type TransformSortCriteria struct { // The column to be used in the sorting criteria that are associated with the // machine learning transform. // // This member is required. Column TransformSortColumnType // The sort direction to be used in the sorting criteria that are associated with // the machine learning transform. // // This member is required. SortDirection SortDirectionType noSmithyDocumentSerde } // Information about a specific trigger. type Trigger struct { // The actions initiated by this trigger. Actions []Action // A description of this trigger. Description *string // Batch condition that must be met (specified number of events received or batch // time window expired) before EventBridge event trigger fires. EventBatchingCondition *EventBatchingCondition // Reserved for future use. Id *string // The name of the trigger. Name *string // The predicate of this trigger, which defines when it will fire. Predicate *Predicate // A cron expression used to specify the schedule (see Time-Based Schedules for // Jobs and Crawlers (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html) // . For example, to run something every day at 12:15 UTC, you would specify: // cron(15 12 * * ? *) . Schedule *string // The current state of the trigger. State TriggerState // The type of trigger that this is. Type TriggerType // The name of the workflow associated with the trigger. WorkflowName *string noSmithyDocumentSerde } // The details of a Trigger node present in the workflow. type TriggerNodeDetails struct { // The information of the trigger represented by the trigger node. Trigger *Trigger noSmithyDocumentSerde } // A structure used to provide information used to update a trigger. This object // updates the previous trigger definition by overwriting it completely. type TriggerUpdate struct { // The actions initiated by this trigger. Actions []Action // A description of this trigger. Description *string // Batch condition that must be met (specified number of events received or batch // time window expired) before EventBridge event trigger fires. EventBatchingCondition *EventBatchingCondition // Reserved for future use. Name *string // The predicate of this trigger, which defines when it will fire. Predicate *Predicate // A cron expression used to specify the schedule (see Time-Based Schedules for // Jobs and Crawlers (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html) // . For example, to run something every day at 12:15 UTC, you would specify: // cron(15 12 * * ? *) . Schedule *string noSmithyDocumentSerde } // A partition that contains unfiltered metadata. type UnfilteredPartition struct { // The list of columns the user has permissions to access. AuthorizedColumns []string // A Boolean value indicating that the partition location is registered with Lake // Formation. IsRegisteredWithLakeFormation bool // The partition object. Partition *Partition noSmithyDocumentSerde } // Specifies a transform that combines the rows from two or more datasets into a // single result. type Union struct { // The node ID inputs to the transform. // // This member is required. Inputs []string // The name of the transform node. // // This member is required. Name *string // Indicates the type of Union transform. Specify ALL to join all rows from data // sources to the resulting DynamicFrame. The resulting union does not remove // duplicate rows. Specify DISTINCT to remove duplicate rows in the resulting // DynamicFrame. // // This member is required. UnionType UnionType noSmithyDocumentSerde } // Specifies a custom CSV classifier to be updated. type UpdateCsvClassifierRequest struct { // The name of the classifier. // // This member is required. Name *string // Enables the processing of files that contain only one column. AllowSingleColumn *bool // Indicates whether the CSV file contains a header. ContainsHeader CsvHeaderOption // Specifies the configuration of custom datatypes. CustomDatatypeConfigured *bool // Specifies a list of supported custom datatypes. CustomDatatypes []string // A custom symbol to denote what separates each column entry in the row. Delimiter *string // Specifies not to trim values before identifying the type of column values. The // default value is true. DisableValueTrimming *bool // A list of strings representing column names. Header []string // A custom symbol to denote what combines content into a single column value. It // must be different from the column delimiter. QuoteSymbol *string noSmithyDocumentSerde } // Specifies a grok classifier to update when passed to UpdateClassifier . type UpdateGrokClassifierRequest struct { // The name of the GrokClassifier . // // This member is required. Name *string // An identifier of the data format that the classifier matches, such as Twitter, // JSON, Omniture logs, Amazon CloudWatch Logs, and so on. Classification *string // Optional custom grok patterns used by this classifier. CustomPatterns *string // The grok pattern used by this classifier. GrokPattern *string noSmithyDocumentSerde } // Specifies a JSON classifier to be updated. type UpdateJsonClassifierRequest struct { // The name of the classifier. // // This member is required. Name *string // A JsonPath string defining the JSON data for the classifier to classify. Glue // supports a subset of JsonPath, as described in Writing JsonPath Custom // Classifiers (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json) // . JsonPath *string noSmithyDocumentSerde } // Specifies an XML classifier to be updated. type UpdateXMLClassifierRequest struct { // The name of the classifier. // // This member is required. Name *string // An identifier of the data format that the classifier matches. Classification *string // The XML tag designating the element that contains each record in an XML // document being parsed. This cannot identify a self-closing element (closed by /> // ). An empty row element that contains only attributes can be parsed as long as // it ends with a closing tag (for example, is okay, but is not). RowTag *string noSmithyDocumentSerde } // The options to configure an upsert operation when writing to a Redshift target . type UpsertRedshiftTargetOptions struct { // The name of the connection to use to write to Redshift. ConnectionName *string // The physical location of the Redshift table. TableLocation *string // The keys used to determine whether to perform an update or insert. UpsertKeys []string noSmithyDocumentSerde } // Represents the equivalent of a Hive user-defined function ( UDF ) definition. type UserDefinedFunction struct { // The ID of the Data Catalog in which the function resides. CatalogId *string // The Java class that contains the function code. ClassName *string // The time at which the function was created. CreateTime *time.Time // The name of the catalog database that contains the function. DatabaseName *string // The name of the function. FunctionName *string // The owner of the function. OwnerName *string // The owner type. OwnerType PrincipalType // The resource URIs for the function. ResourceUris []ResourceUri noSmithyDocumentSerde } // A structure used to create or update a user-defined function. type UserDefinedFunctionInput struct { // The Java class that contains the function code. ClassName *string // The name of the function. FunctionName *string // The owner of the function. OwnerName *string // The owner type. OwnerType PrincipalType // The resource URIs for the function. ResourceUris []ResourceUri noSmithyDocumentSerde } // A workflow is a collection of multiple dependent Glue jobs and crawlers that // are run to complete a complex ETL task. A workflow manages the execution and // monitoring of all its jobs and crawlers. type Workflow struct { // This structure indicates the details of the blueprint that this particular // workflow is created from. BlueprintDetails *BlueprintDetails // The date and time when the workflow was created. CreatedOn *time.Time // A collection of properties to be used as part of each execution of the // workflow. The run properties are made available to each job in the workflow. A // job can modify the properties for the next jobs in the flow. DefaultRunProperties map[string]string // A description of the workflow. Description *string // The graph representing all the Glue components that belong to the workflow as // nodes and directed connections between them as edges. Graph *WorkflowGraph // The date and time when the workflow was last modified. LastModifiedOn *time.Time // The information about the last execution of the workflow. LastRun *WorkflowRun // You can use this parameter to prevent unwanted multiple updates to data, to // control costs, or in some cases, to prevent exceeding the maximum number of // concurrent runs of any of the component jobs. If you leave this parameter blank, // there is no limit to the number of concurrent workflow runs. MaxConcurrentRuns *int32 // The name of the workflow. Name *string noSmithyDocumentSerde } // A workflow graph represents the complete workflow containing all the Glue // components present in the workflow and all the directed connections between // them. type WorkflowGraph struct { // A list of all the directed connections between the nodes belonging to the // workflow. Edges []Edge // A list of the the Glue components belong to the workflow represented as nodes. Nodes []Node noSmithyDocumentSerde } // A workflow run is an execution of a workflow providing all the runtime // information. type WorkflowRun struct { // The date and time when the workflow run completed. CompletedOn *time.Time // This error message describes any error that may have occurred in starting the // workflow run. Currently the only error message is "Concurrent runs exceeded for // workflow: foo ." ErrorMessage *string // The graph representing all the Glue components that belong to the workflow as // nodes and directed connections between them as edges. Graph *WorkflowGraph // Name of the workflow that was run. Name *string // The ID of the previous workflow run. PreviousRunId *string // The date and time when the workflow run was started. StartedOn *time.Time // The batch condition that started the workflow run. StartingEventBatchCondition *StartingEventBatchCondition // The statistics of the run. Statistics *WorkflowRunStatistics // The status of the workflow run. Status WorkflowRunStatus // The ID of this workflow run. WorkflowRunId *string // The workflow run properties which were set during the run. WorkflowRunProperties map[string]string noSmithyDocumentSerde } // Workflow run statistics provides statistics about the workflow run. type WorkflowRunStatistics struct { // Indicates the count of job runs in the ERROR state in the workflow run. ErroredActions int32 // Total number of Actions that have failed. FailedActions int32 // Total number Actions in running state. RunningActions int32 // Total number of Actions that have stopped. StoppedActions int32 // Total number of Actions that have succeeded. SucceededActions int32 // Total number of Actions that timed out. TimeoutActions int32 // Total number of Actions in the workflow run. TotalActions int32 // Indicates the count of job runs in WAITING state in the workflow run. WaitingActions int32 noSmithyDocumentSerde } // A classifier for XML content. type XMLClassifier struct { // An identifier of the data format that the classifier matches. // // This member is required. Classification *string // The name of the classifier. // // This member is required. Name *string // The time that this classifier was registered. CreationTime *time.Time // The time that this classifier was last updated. LastUpdated *time.Time // The XML tag designating the element that contains each record in an XML // document being parsed. This can't identify a self-closing element (closed by /> // ). An empty row element that contains only attributes can be parsed as long as // it ends with a closing tag (for example, is okay, but is not). RowTag *string // The version of this classifier. Version int64 noSmithyDocumentSerde } type noSmithyDocumentSerde = smithydocument.NoSerde