/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#pragma once
#include Provides the configuration information for the seed or starting point URLs to
* crawl. When selecting websites to index, you must adhere to the Amazon Acceptable Use Policy and all
* other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler to
* index your own web pages, or web pages that you have authorization to index.
* See Also:
AWS
* API Reference
The list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline const Aws::VectorThe list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline bool SeedUrlsHasBeenSet() const { return m_seedUrlsHasBeenSet; } /** *The list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline void SetSeedUrls(const Aws::VectorThe list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline void SetSeedUrls(Aws::VectorThe list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline SeedUrlConfiguration& WithSeedUrls(const Aws::VectorThe list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline SeedUrlConfiguration& WithSeedUrls(Aws::VectorThe list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline SeedUrlConfiguration& AddSeedUrls(const Aws::String& value) { m_seedUrlsHasBeenSet = true; m_seedUrls.push_back(value); return *this; } /** *The list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline SeedUrlConfiguration& AddSeedUrls(Aws::String&& value) { m_seedUrlsHasBeenSet = true; m_seedUrls.push_back(std::move(value)); return *this; } /** *The list of seed or starting point URLs of the websites you want to * crawl.
The list can include a maximum of 100 seed URLs.
*/ inline SeedUrlConfiguration& AddSeedUrls(const char* value) { m_seedUrlsHasBeenSet = true; m_seedUrls.push_back(value); return *this; } /** *You can choose one of the following modes:
* HOST_ONLY
—crawl only the website host names. For example, if the
* seed URL is "abc.example.com", then only URLs with host name "abc.example.com"
* are crawled.
SUBDOMAINS
—crawl the website host
* names with subdomains. For example, if the seed URL is "abc.example.com", then
* "a.abc.example.com" and "b.abc.example.com" are also crawled.
* EVERYTHING
—crawl the website host names with subdomains and other
* domains that the web pages link to.
The default mode is set
* to HOST_ONLY
.
You can choose one of the following modes:
* HOST_ONLY
—crawl only the website host names. For example, if the
* seed URL is "abc.example.com", then only URLs with host name "abc.example.com"
* are crawled.
SUBDOMAINS
—crawl the website host
* names with subdomains. For example, if the seed URL is "abc.example.com", then
* "a.abc.example.com" and "b.abc.example.com" are also crawled.
* EVERYTHING
—crawl the website host names with subdomains and other
* domains that the web pages link to.
The default mode is set
* to HOST_ONLY
.
You can choose one of the following modes:
* HOST_ONLY
—crawl only the website host names. For example, if the
* seed URL is "abc.example.com", then only URLs with host name "abc.example.com"
* are crawled.
SUBDOMAINS
—crawl the website host
* names with subdomains. For example, if the seed URL is "abc.example.com", then
* "a.abc.example.com" and "b.abc.example.com" are also crawled.
* EVERYTHING
—crawl the website host names with subdomains and other
* domains that the web pages link to.
The default mode is set
* to HOST_ONLY
.
You can choose one of the following modes:
* HOST_ONLY
—crawl only the website host names. For example, if the
* seed URL is "abc.example.com", then only URLs with host name "abc.example.com"
* are crawled.
SUBDOMAINS
—crawl the website host
* names with subdomains. For example, if the seed URL is "abc.example.com", then
* "a.abc.example.com" and "b.abc.example.com" are also crawled.
* EVERYTHING
—crawl the website host names with subdomains and other
* domains that the web pages link to.
The default mode is set
* to HOST_ONLY
.
You can choose one of the following modes:
* HOST_ONLY
—crawl only the website host names. For example, if the
* seed URL is "abc.example.com", then only URLs with host name "abc.example.com"
* are crawled.
SUBDOMAINS
—crawl the website host
* names with subdomains. For example, if the seed URL is "abc.example.com", then
* "a.abc.example.com" and "b.abc.example.com" are also crawled.
* EVERYTHING
—crawl the website host names with subdomains and other
* domains that the web pages link to.
The default mode is set
* to HOST_ONLY
.
You can choose one of the following modes:
* HOST_ONLY
—crawl only the website host names. For example, if the
* seed URL is "abc.example.com", then only URLs with host name "abc.example.com"
* are crawled.
SUBDOMAINS
—crawl the website host
* names with subdomains. For example, if the seed URL is "abc.example.com", then
* "a.abc.example.com" and "b.abc.example.com" are also crawled.
* EVERYTHING
—crawl the website host names with subdomains and other
* domains that the web pages link to.
The default mode is set
* to HOST_ONLY
.