/*
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
/*
* Do not modify this file. This file is generated from the kendra-2019-02-03.normal.json service model.
*/
using System;
using System.Collections.Generic;
using System.Xml.Serialization;
using System.Text;
using System.IO;
using System.Net;
using Amazon.Runtime;
using Amazon.Runtime.Internal;
namespace Amazon.Kendra.Model
{
///
/// Provides the configuration information required for Amazon Kendra Web Crawler.
///
public partial class WebCrawlerConfiguration
{
private AuthenticationConfiguration _authenticationConfiguration;
private int? _crawlDepth;
private float? _maxContentSizePerPageInMegaBytes;
private int? _maxLinksPerPage;
private int? _maxUrlsPerMinuteCrawlRate;
private ProxyConfiguration _proxyConfiguration;
private List _urlExclusionPatterns = new List();
private List _urlInclusionPatterns = new List();
private Urls _urls;
///
/// Gets and sets the property AuthenticationConfiguration.
///
/// Configuration information required to connect to websites using authentication.
///
///
///
/// You can connect to websites using basic authentication of user name and password.
/// You use a secret in Secrets
/// Manager to store your authentication credentials.
///
///
///
/// You must provide the website host name and port number. For example, the host name
/// of https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard
/// port for HTTPS.
///
///
public AuthenticationConfiguration AuthenticationConfiguration
{
get { return this._authenticationConfiguration; }
set { this._authenticationConfiguration = value; }
}
// Check to see if AuthenticationConfiguration property is set
internal bool IsSetAuthenticationConfiguration()
{
return this._authenticationConfiguration != null;
}
///
/// Gets and sets the property CrawlDepth.
///
/// The 'depth' or number of levels from the seed level to crawl. For example, the seed
/// URL page is depth 1 and any hyperlinks on this page that are also crawled are depth
/// 2.
///
///
[AWSProperty(Min=0, Max=10)]
public int CrawlDepth
{
get { return this._crawlDepth.GetValueOrDefault(); }
set { this._crawlDepth = value; }
}
// Check to see if CrawlDepth property is set
internal bool IsSetCrawlDepth()
{
return this._crawlDepth.HasValue;
}
///
/// Gets and sets the property MaxContentSizePerPageInMegaBytes.
///
/// The maximum size (in MB) of a web page or attachment to crawl.
///
///
///
/// Files larger than this size (in MB) are skipped/not crawled.
///
///
///
/// The default maximum size of a web page or attachment is set to 50 MB.
///
///
[AWSProperty(Max=50)]
public float MaxContentSizePerPageInMegaBytes
{
get { return this._maxContentSizePerPageInMegaBytes.GetValueOrDefault(); }
set { this._maxContentSizePerPageInMegaBytes = value; }
}
// Check to see if MaxContentSizePerPageInMegaBytes property is set
internal bool IsSetMaxContentSizePerPageInMegaBytes()
{
return this._maxContentSizePerPageInMegaBytes.HasValue;
}
///
/// Gets and sets the property MaxLinksPerPage.
///
/// The maximum number of URLs on a web page to include when crawling a website. This
/// number is per web page.
///
///
///
/// As a website’s web pages are crawled, any URLs the web pages link to are also crawled.
/// URLs on a web page are crawled in order of appearance.
///
///
///
/// The default maximum links per page is 100.
///
///
[AWSProperty(Min=1, Max=1000)]
public int MaxLinksPerPage
{
get { return this._maxLinksPerPage.GetValueOrDefault(); }
set { this._maxLinksPerPage = value; }
}
// Check to see if MaxLinksPerPage property is set
internal bool IsSetMaxLinksPerPage()
{
return this._maxLinksPerPage.HasValue;
}
///
/// Gets and sets the property MaxUrlsPerMinuteCrawlRate.
///
/// The maximum number of URLs crawled per website host per minute.
///
///
///
/// A minimum of one URL is required.
///
///
///
/// The default maximum number of URLs crawled per website host per minute is 300.
///
///
[AWSProperty(Min=1, Max=300)]
public int MaxUrlsPerMinuteCrawlRate
{
get { return this._maxUrlsPerMinuteCrawlRate.GetValueOrDefault(); }
set { this._maxUrlsPerMinuteCrawlRate = value; }
}
// Check to see if MaxUrlsPerMinuteCrawlRate property is set
internal bool IsSetMaxUrlsPerMinuteCrawlRate()
{
return this._maxUrlsPerMinuteCrawlRate.HasValue;
}
///
/// Gets and sets the property ProxyConfiguration.
///
/// Configuration information required to connect to your internal websites via a web
/// proxy.
///
///
///
/// You must provide the website host name and port number. For example, the host name
/// of https://a.example.com/page1.html is "a.example.com" and the port is 443, the standard
/// port for HTTPS.
///
///
///
/// Web proxy credentials are optional and you can use them to connect to a web proxy
/// server that requires basic authentication. To store web proxy credentials, you use
/// a secret in Secrets
/// Manager.
///
///
public ProxyConfiguration ProxyConfiguration
{
get { return this._proxyConfiguration; }
set { this._proxyConfiguration = value; }
}
// Check to see if ProxyConfiguration property is set
internal bool IsSetProxyConfiguration()
{
return this._proxyConfiguration != null;
}
///
/// Gets and sets the property UrlExclusionPatterns.
///
/// A list of regular expression patterns to exclude certain URLs to crawl. URLs that
/// match the patterns are excluded from the index. URLs that don't match the patterns
/// are included in the index. If a URL matches both an inclusion and exclusion pattern,
/// the exclusion pattern takes precedence and the URL file isn't included in the index.
///
///
[AWSProperty(Min=0, Max=250)]
public List UrlExclusionPatterns
{
get { return this._urlExclusionPatterns; }
set { this._urlExclusionPatterns = value; }
}
// Check to see if UrlExclusionPatterns property is set
internal bool IsSetUrlExclusionPatterns()
{
return this._urlExclusionPatterns != null && this._urlExclusionPatterns.Count > 0;
}
///
/// Gets and sets the property UrlInclusionPatterns.
///
/// A list of regular expression patterns to include certain URLs to crawl. URLs that
/// match the patterns are included in the index. URLs that don't match the patterns are
/// excluded from the index. If a URL matches both an inclusion and exclusion pattern,
/// the exclusion pattern takes precedence and the URL file isn't included in the index.
///
///
[AWSProperty(Min=0, Max=250)]
public List UrlInclusionPatterns
{
get { return this._urlInclusionPatterns; }
set { this._urlInclusionPatterns = value; }
}
// Check to see if UrlInclusionPatterns property is set
internal bool IsSetUrlInclusionPatterns()
{
return this._urlInclusionPatterns != null && this._urlInclusionPatterns.Count > 0;
}
///
/// Gets and sets the property Urls.
///
/// Specifies the seed or starting point URLs of the websites or the sitemap URLs of the
/// websites you want to crawl.
///
///
///
/// You can include website subdomains. You can list up to 100 seed URLs and up to three
/// sitemap URLs.
///
///
///
/// You can only crawl websites that use the secure communication protocol, Hypertext
/// Transfer Protocol Secure (HTTPS). If you receive an error when crawling a website,
/// it could be that the website is blocked from crawling.
///
///
///
/// When selecting websites to index, you must adhere to the Amazon
/// Acceptable Use Policy and all other Amazon terms. Remember that you must only
/// use Amazon Kendra Web Crawler to index your own web pages, or web pages that you have
/// authorization to index.
///
///
[AWSProperty(Required=true)]
public Urls Urls
{
get { return this._urls; }
set { this._urls = value; }
}
// Check to see if Urls property is set
internal bool IsSetUrls()
{
return this._urls != null;
}
}
}