/* * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ /* * Do not modify this file. This file is generated from the kendra-2019-02-03.normal.json service model. */ using System; using System.Collections.Generic; using System.Xml.Serialization; using System.Text; using System.IO; using System.Net; using Amazon.Runtime; using Amazon.Runtime.Internal; namespace Amazon.Kendra.Model { /// /// Provides the configuration information required for Amazon Kendra Web Crawler. /// public partial class WebCrawlerConfiguration { private AuthenticationConfiguration _authenticationConfiguration; private int? _crawlDepth; private float? _maxContentSizePerPageInMegaBytes; private int? _maxLinksPerPage; private int? _maxUrlsPerMinuteCrawlRate; private ProxyConfiguration _proxyConfiguration; private List _urlExclusionPatterns = new List(); private List _urlInclusionPatterns = new List(); private Urls _urls; /// /// Gets and sets the property AuthenticationConfiguration. /// /// Configuration information required to connect to websites using authentication. /// /// /// /// You can connect to websites using basic authentication of user name and password. /// You use a secret in Secrets /// Manager to store your authentication credentials. /// /// /// /// You must provide the website host name and port number. For example, the host name /// of https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard /// port for HTTPS. /// /// public AuthenticationConfiguration AuthenticationConfiguration { get { return this._authenticationConfiguration; } set { this._authenticationConfiguration = value; } } // Check to see if AuthenticationConfiguration property is set internal bool IsSetAuthenticationConfiguration() { return this._authenticationConfiguration != null; } /// /// Gets and sets the property CrawlDepth. /// /// The 'depth' or number of levels from the seed level to crawl. For example, the seed /// URL page is depth 1 and any hyperlinks on this page that are also crawled are depth /// 2. /// /// [AWSProperty(Min=0, Max=10)] public int CrawlDepth { get { return this._crawlDepth.GetValueOrDefault(); } set { this._crawlDepth = value; } } // Check to see if CrawlDepth property is set internal bool IsSetCrawlDepth() { return this._crawlDepth.HasValue; } /// /// Gets and sets the property MaxContentSizePerPageInMegaBytes. /// /// The maximum size (in MB) of a web page or attachment to crawl. /// /// /// /// Files larger than this size (in MB) are skipped/not crawled. /// /// /// /// The default maximum size of a web page or attachment is set to 50 MB. /// /// [AWSProperty(Max=50)] public float MaxContentSizePerPageInMegaBytes { get { return this._maxContentSizePerPageInMegaBytes.GetValueOrDefault(); } set { this._maxContentSizePerPageInMegaBytes = value; } } // Check to see if MaxContentSizePerPageInMegaBytes property is set internal bool IsSetMaxContentSizePerPageInMegaBytes() { return this._maxContentSizePerPageInMegaBytes.HasValue; } /// /// Gets and sets the property MaxLinksPerPage. /// /// The maximum number of URLs on a web page to include when crawling a website. This /// number is per web page. /// /// /// /// As a website’s web pages are crawled, any URLs the web pages link to are also crawled. /// URLs on a web page are crawled in order of appearance. /// /// /// /// The default maximum links per page is 100. /// /// [AWSProperty(Min=1, Max=1000)] public int MaxLinksPerPage { get { return this._maxLinksPerPage.GetValueOrDefault(); } set { this._maxLinksPerPage = value; } } // Check to see if MaxLinksPerPage property is set internal bool IsSetMaxLinksPerPage() { return this._maxLinksPerPage.HasValue; } /// /// Gets and sets the property MaxUrlsPerMinuteCrawlRate. /// /// The maximum number of URLs crawled per website host per minute. /// /// /// /// A minimum of one URL is required. /// /// /// /// The default maximum number of URLs crawled per website host per minute is 300. /// /// [AWSProperty(Min=1, Max=300)] public int MaxUrlsPerMinuteCrawlRate { get { return this._maxUrlsPerMinuteCrawlRate.GetValueOrDefault(); } set { this._maxUrlsPerMinuteCrawlRate = value; } } // Check to see if MaxUrlsPerMinuteCrawlRate property is set internal bool IsSetMaxUrlsPerMinuteCrawlRate() { return this._maxUrlsPerMinuteCrawlRate.HasValue; } /// /// Gets and sets the property ProxyConfiguration. /// /// Configuration information required to connect to your internal websites via a web /// proxy. /// /// /// /// You must provide the website host name and port number. For example, the host name /// of https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard /// port for HTTPS. /// /// /// /// Web proxy credentials are optional and you can use them to connect to a web proxy /// server that requires basic authentication. To store web proxy credentials, you use /// a secret in Secrets /// Manager. /// /// public ProxyConfiguration ProxyConfiguration { get { return this._proxyConfiguration; } set { this._proxyConfiguration = value; } } // Check to see if ProxyConfiguration property is set internal bool IsSetProxyConfiguration() { return this._proxyConfiguration != null; } /// /// Gets and sets the property UrlExclusionPatterns. /// /// A list of regular expression patterns to exclude certain URLs to crawl. URLs that /// match the patterns are excluded from the index. URLs that don't match the patterns /// are included in the index. If a URL matches both an inclusion and exclusion pattern, /// the exclusion pattern takes precedence and the URL file isn't included in the index. /// /// [AWSProperty(Min=0, Max=250)] public List UrlExclusionPatterns { get { return this._urlExclusionPatterns; } set { this._urlExclusionPatterns = value; } } // Check to see if UrlExclusionPatterns property is set internal bool IsSetUrlExclusionPatterns() { return this._urlExclusionPatterns != null && this._urlExclusionPatterns.Count > 0; } /// /// Gets and sets the property UrlInclusionPatterns. /// /// A list of regular expression patterns to include certain URLs to crawl. URLs that /// match the patterns are included in the index. URLs that don't match the patterns are /// excluded from the index. If a URL matches both an inclusion and exclusion pattern, /// the exclusion pattern takes precedence and the URL file isn't included in the index. /// /// [AWSProperty(Min=0, Max=250)] public List UrlInclusionPatterns { get { return this._urlInclusionPatterns; } set { this._urlInclusionPatterns = value; } } // Check to see if UrlInclusionPatterns property is set internal bool IsSetUrlInclusionPatterns() { return this._urlInclusionPatterns != null && this._urlInclusionPatterns.Count > 0; } /// /// Gets and sets the property Urls. /// /// Specifies the seed or starting point URLs of the websites or the sitemap URLs of the /// websites you want to crawl. /// /// /// /// You can include website subdomains. You can list up to 100 seed URLs and up to three /// sitemap URLs. /// /// /// /// You can only crawl websites that use the secure communication protocol, Hypertext /// Transfer Protocol Secure (HTTPS). If you receive an error when crawling a website, /// it could be that the website is blocked from crawling. /// /// /// /// When selecting websites to index, you must adhere to the Amazon /// Acceptable Use Policy and all other Amazon terms. Remember that you must only /// use Amazon Kendra Web Crawler to index your own web pages, or web pages that you have /// authorization to index. /// /// [AWSProperty(Required=true)] public Urls Urls { get { return this._urls; } set { this._urls = value; } } // Check to see if Urls property is set internal bool IsSetUrls() { return this._urls != null; } } }