package awskendra // Provides the configuration information required for Amazon Kendra Web Crawler. // // Example: // // The code below shows an example of how to instantiate this type. // // The values are placeholders you should change. // import "github.com/aws/aws-cdk-go/awscdk" // // webCrawlerConfigurationProperty := &WebCrawlerConfigurationProperty{ // Urls: &WebCrawlerUrlsProperty{ // SeedUrlConfiguration: &WebCrawlerSeedUrlConfigurationProperty{ // SeedUrls: []*string{ // jsii.String("seedUrls"), // }, // // // the properties below are optional // WebCrawlerMode: jsii.String("webCrawlerMode"), // }, // SiteMapsConfiguration: &WebCrawlerSiteMapsConfigurationProperty{ // SiteMaps: []*string{ // jsii.String("siteMaps"), // }, // }, // }, // // // the properties below are optional // AuthenticationConfiguration: &WebCrawlerAuthenticationConfigurationProperty{ // BasicAuthentication: []interface{}{ // &WebCrawlerBasicAuthenticationProperty{ // Credentials: jsii.String("credentials"), // Host: jsii.String("host"), // Port: jsii.Number(123), // }, // }, // }, // CrawlDepth: jsii.Number(123), // MaxContentSizePerPageInMegaBytes: jsii.Number(123), // MaxLinksPerPage: jsii.Number(123), // MaxUrlsPerMinuteCrawlRate: jsii.Number(123), // ProxyConfiguration: &ProxyConfigurationProperty{ // Host: jsii.String("host"), // Port: jsii.Number(123), // // // the properties below are optional // Credentials: jsii.String("credentials"), // }, // UrlExclusionPatterns: []*string{ // jsii.String("urlExclusionPatterns"), // }, // UrlInclusionPatterns: []*string{ // jsii.String("urlInclusionPatterns"), // }, // } // // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html // type CfnDataSource_WebCrawlerConfigurationProperty struct { // Specifies the seed or starting point URLs of the websites or the sitemap URLs of the websites you want to crawl. // // You can include website subdomains. You can list up to 100 seed URLs and up to three sitemap URLs. // // You can only crawl websites that use the secure communication protocol, Hypertext Transfer Protocol Secure (HTTPS). If you receive an error when crawling a website, it could be that the website is blocked from crawling. // // *When selecting websites to index, you must adhere to the [Amazon Acceptable Use Policy](https://docs.aws.amazon.com/aup/) and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler to index your own webpages, or webpages that you have authorization to index.* // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-urls // Urls interface{} `field:"required" json:"urls" yaml:"urls"` // Configuration information required to connect to websites using authentication. // // You can connect to websites using basic authentication of user name and password. You use a secret in [AWS Secrets Manager](https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html) to store your authentication credentials. // // You must provide the website host name and port number. For example, the host name of https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-authenticationconfiguration // AuthenticationConfiguration interface{} `field:"optional" json:"authenticationConfiguration" yaml:"authenticationConfiguration"` // The 'depth' or number of levels from the seed level to crawl. // // For example, the seed URL page is depth 1 and any hyperlinks on this page that are also crawled are depth 2. // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-crawldepth // CrawlDepth *float64 `field:"optional" json:"crawlDepth" yaml:"crawlDepth"` // The maximum size (in MB) of a web page or attachment to crawl. // // Files larger than this size (in MB) are skipped/not crawled. // // The default maximum size of a web page or attachment is set to 50 MB. // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-maxcontentsizeperpageinmegabytes // MaxContentSizePerPageInMegaBytes *float64 `field:"optional" json:"maxContentSizePerPageInMegaBytes" yaml:"maxContentSizePerPageInMegaBytes"` // The maximum number of URLs on a web page to include when crawling a website. // // This number is per web page. // // As a website’s web pages are crawled, any URLs the web pages link to are also crawled. URLs on a web page are crawled in order of appearance. // // The default maximum links per page is 100. // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-maxlinksperpage // MaxLinksPerPage *float64 `field:"optional" json:"maxLinksPerPage" yaml:"maxLinksPerPage"` // The maximum number of URLs crawled per website host per minute. // // A minimum of one URL is required. // // The default maximum number of URLs crawled per website host per minute is 300. // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-maxurlsperminutecrawlrate // MaxUrlsPerMinuteCrawlRate *float64 `field:"optional" json:"maxUrlsPerMinuteCrawlRate" yaml:"maxUrlsPerMinuteCrawlRate"` // Configuration information required to connect to your internal websites via a web proxy. // // You must provide the website host name and port number. For example, the host name of https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard port for HTTPS. // // Web proxy credentials are optional and you can use them to connect to a web proxy server that requires basic authentication. To store web proxy credentials, you use a secret in [AWS Secrets Manager](https://docs.aws.amazon.com/secretsmanager/latest/userguide/intro.html) . // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-proxyconfiguration // ProxyConfiguration interface{} `field:"optional" json:"proxyConfiguration" yaml:"proxyConfiguration"` // A list of regular expression patterns to exclude certain URLs to crawl. // // URLs that match the patterns are excluded from the index. URLs that don't match the patterns are included in the index. If a URL matches both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-urlexclusionpatterns // UrlExclusionPatterns *[]*string `field:"optional" json:"urlExclusionPatterns" yaml:"urlExclusionPatterns"` // A list of regular expression patterns to include certain URLs to crawl. // // URLs that match the patterns are included in the index. URLs that don't match the patterns are excluded from the index. If a URL matches both an inclusion and exclusion pattern, the exclusion pattern takes precedence and the URL file isn't included in the index. // See: http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-kendra-datasource-webcrawlerconfiguration.html#cfn-kendra-datasource-webcrawlerconfiguration-urlinclusionpatterns // UrlInclusionPatterns *[]*string `field:"optional" json:"urlInclusionPatterns" yaml:"urlInclusionPatterns"` }