/** * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ #pragma once #include #include #include #include #include namespace Aws { namespace Utils { namespace Json { class JsonValue; class JsonView; } // namespace Json } // namespace Utils namespace kendra { namespace Model { /** *

Provides the configuration information for the seed or starting point URLs to * crawl.

When selecting websites to index, you must adhere to the Amazon Acceptable Use Policy and all * other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler to * index your own web pages, or web pages that you have authorization to index. *

See Also:

AWS * API Reference

*/ class SeedUrlConfiguration { public: AWS_KENDRA_API SeedUrlConfiguration(); AWS_KENDRA_API SeedUrlConfiguration(Aws::Utils::Json::JsonView jsonValue); AWS_KENDRA_API SeedUrlConfiguration& operator=(Aws::Utils::Json::JsonView jsonValue); AWS_KENDRA_API Aws::Utils::Json::JsonValue Jsonize() const; /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline const Aws::Vector& GetSeedUrls() const{ return m_seedUrls; } /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline bool SeedUrlsHasBeenSet() const { return m_seedUrlsHasBeenSet; } /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline void SetSeedUrls(const Aws::Vector& value) { m_seedUrlsHasBeenSet = true; m_seedUrls = value; } /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline void SetSeedUrls(Aws::Vector&& value) { m_seedUrlsHasBeenSet = true; m_seedUrls = std::move(value); } /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline SeedUrlConfiguration& WithSeedUrls(const Aws::Vector& value) { SetSeedUrls(value); return *this;} /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline SeedUrlConfiguration& WithSeedUrls(Aws::Vector&& value) { SetSeedUrls(std::move(value)); return *this;} /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline SeedUrlConfiguration& AddSeedUrls(const Aws::String& value) { m_seedUrlsHasBeenSet = true; m_seedUrls.push_back(value); return *this; } /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline SeedUrlConfiguration& AddSeedUrls(Aws::String&& value) { m_seedUrlsHasBeenSet = true; m_seedUrls.push_back(std::move(value)); return *this; } /** *

The list of seed or starting point URLs of the websites you want to * crawl.

The list can include a maximum of 100 seed URLs.

*/ inline SeedUrlConfiguration& AddSeedUrls(const char* value) { m_seedUrlsHasBeenSet = true; m_seedUrls.push_back(value); return *this; } /** *

You can choose one of the following modes:

  • * HOST_ONLY—crawl only the website host names. For example, if the * seed URL is "abc.example.com", then only URLs with host name "abc.example.com" * are crawled.

  • SUBDOMAINS—crawl the website host * names with subdomains. For example, if the seed URL is "abc.example.com", then * "a.abc.example.com" and "b.abc.example.com" are also crawled.

  • * EVERYTHING—crawl the website host names with subdomains and other * domains that the web pages link to.

The default mode is set * to HOST_ONLY.

*/ inline const WebCrawlerMode& GetWebCrawlerMode() const{ return m_webCrawlerMode; } /** *

You can choose one of the following modes:

  • * HOST_ONLY—crawl only the website host names. For example, if the * seed URL is "abc.example.com", then only URLs with host name "abc.example.com" * are crawled.

  • SUBDOMAINS—crawl the website host * names with subdomains. For example, if the seed URL is "abc.example.com", then * "a.abc.example.com" and "b.abc.example.com" are also crawled.

  • * EVERYTHING—crawl the website host names with subdomains and other * domains that the web pages link to.

The default mode is set * to HOST_ONLY.

*/ inline bool WebCrawlerModeHasBeenSet() const { return m_webCrawlerModeHasBeenSet; } /** *

You can choose one of the following modes:

  • * HOST_ONLY—crawl only the website host names. For example, if the * seed URL is "abc.example.com", then only URLs with host name "abc.example.com" * are crawled.

  • SUBDOMAINS—crawl the website host * names with subdomains. For example, if the seed URL is "abc.example.com", then * "a.abc.example.com" and "b.abc.example.com" are also crawled.

  • * EVERYTHING—crawl the website host names with subdomains and other * domains that the web pages link to.

The default mode is set * to HOST_ONLY.

*/ inline void SetWebCrawlerMode(const WebCrawlerMode& value) { m_webCrawlerModeHasBeenSet = true; m_webCrawlerMode = value; } /** *

You can choose one of the following modes:

  • * HOST_ONLY—crawl only the website host names. For example, if the * seed URL is "abc.example.com", then only URLs with host name "abc.example.com" * are crawled.

  • SUBDOMAINS—crawl the website host * names with subdomains. For example, if the seed URL is "abc.example.com", then * "a.abc.example.com" and "b.abc.example.com" are also crawled.

  • * EVERYTHING—crawl the website host names with subdomains and other * domains that the web pages link to.

The default mode is set * to HOST_ONLY.

*/ inline void SetWebCrawlerMode(WebCrawlerMode&& value) { m_webCrawlerModeHasBeenSet = true; m_webCrawlerMode = std::move(value); } /** *

You can choose one of the following modes:

  • * HOST_ONLY—crawl only the website host names. For example, if the * seed URL is "abc.example.com", then only URLs with host name "abc.example.com" * are crawled.

  • SUBDOMAINS—crawl the website host * names with subdomains. For example, if the seed URL is "abc.example.com", then * "a.abc.example.com" and "b.abc.example.com" are also crawled.

  • * EVERYTHING—crawl the website host names with subdomains and other * domains that the web pages link to.

The default mode is set * to HOST_ONLY.

*/ inline SeedUrlConfiguration& WithWebCrawlerMode(const WebCrawlerMode& value) { SetWebCrawlerMode(value); return *this;} /** *

You can choose one of the following modes:

  • * HOST_ONLY—crawl only the website host names. For example, if the * seed URL is "abc.example.com", then only URLs with host name "abc.example.com" * are crawled.

  • SUBDOMAINS—crawl the website host * names with subdomains. For example, if the seed URL is "abc.example.com", then * "a.abc.example.com" and "b.abc.example.com" are also crawled.

  • * EVERYTHING—crawl the website host names with subdomains and other * domains that the web pages link to.

The default mode is set * to HOST_ONLY.

*/ inline SeedUrlConfiguration& WithWebCrawlerMode(WebCrawlerMode&& value) { SetWebCrawlerMode(std::move(value)); return *this;} private: Aws::Vector m_seedUrls; bool m_seedUrlsHasBeenSet = false; WebCrawlerMode m_webCrawlerMode; bool m_webCrawlerModeHasBeenSet = false; }; } // namespace Model } // namespace kendra } // namespace Aws