/* * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with * the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions * and limitations under the License. */ package com.amazonaws.services.kendra.model; import java.io.Serializable; import javax.annotation.Generated; import com.amazonaws.protocol.StructuredPojo; import com.amazonaws.protocol.ProtocolMarshaller; /** *
* Provides the configuration information for the seed or starting point URLs to crawl. *
** When selecting websites to index, you must adhere to the Amazon Acceptable * Use Policy and all other Amazon terms. Remember that you must only use Amazon Kendra Web Crawler to index your * own web pages, or web pages that you have authorization to index. *
* * @see AWS API * Documentation */ @Generated("com.amazonaws:aws-java-sdk-code-generator") public class SeedUrlConfiguration implements Serializable, Cloneable, StructuredPojo { /** ** The list of seed or starting point URLs of the websites you want to crawl. *
** The list can include a maximum of 100 seed URLs. *
*/ private java.util.List* You can choose one of the following modes: *
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
* The default mode is set to HOST_ONLY
.
*
* The list of seed or starting point URLs of the websites you want to crawl. *
** The list can include a maximum of 100 seed URLs. *
* * @return The list of seed or starting point URLs of the websites you want to crawl. *
* The list can include a maximum of 100 seed URLs.
*/
public java.util.List
* The list of seed or starting point URLs of the websites you want to crawl.
*
* The list can include a maximum of 100 seed URLs.
*
* The list can include a maximum of 100 seed URLs.
*/
public void setSeedUrls(java.util.Collection
* The list of seed or starting point URLs of the websites you want to crawl.
*
* The list can include a maximum of 100 seed URLs.
*
* NOTE: This method appends the values to the existing list (if any). Use
* {@link #setSeedUrls(java.util.Collection)} or {@link #withSeedUrls(java.util.Collection)} if you want to override
* the existing values.
*
* The list can include a maximum of 100 seed URLs.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public SeedUrlConfiguration withSeedUrls(String... seedUrls) {
if (this.seedUrls == null) {
setSeedUrls(new java.util.ArrayList
* The list of seed or starting point URLs of the websites you want to crawl.
*
* The list can include a maximum of 100 seed URLs.
*
* The list can include a maximum of 100 seed URLs.
* @return Returns a reference to this object so that method calls can be chained together.
*/
public SeedUrlConfiguration withSeedUrls(java.util.Collection
* You can choose one of the following modes:
*
*
*
*
* The default mode is set to
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
* HOST_ONLY
.
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is
* "abc.example.com", then only URLs with host name "abc.example.com" are crawled.
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages
* link to.
*
* The default mode is set to HOST_ONLY
.
* @see WebCrawlerMode
*/
public void setWebCrawlerMode(String webCrawlerMode) {
this.webCrawlerMode = webCrawlerMode;
}
/**
*
* You can choose one of the following modes: *
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
* The default mode is set to HOST_ONLY
.
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is
* "abc.example.com", then only URLs with host name "abc.example.com" are crawled.
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages
* link to.
*
* The default mode is set to HOST_ONLY
.
* @see WebCrawlerMode
*/
public String getWebCrawlerMode() {
return this.webCrawlerMode;
}
/**
*
* You can choose one of the following modes: *
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
* The default mode is set to HOST_ONLY
.
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is
* "abc.example.com", then only URLs with host name "abc.example.com" are crawled.
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages
* link to.
*
* The default mode is set to HOST_ONLY
.
* @return Returns a reference to this object so that method calls can be chained together.
* @see WebCrawlerMode
*/
public SeedUrlConfiguration withWebCrawlerMode(String webCrawlerMode) {
setWebCrawlerMode(webCrawlerMode);
return this;
}
/**
*
* You can choose one of the following modes: *
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is "abc.example.com", then
* only URLs with host name "abc.example.com" are crawled.
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages link
* to.
*
* The default mode is set to HOST_ONLY
.
*
* HOST_ONLY
—crawl only the website host names. For example, if the seed URL is
* "abc.example.com", then only URLs with host name "abc.example.com" are crawled.
*
* SUBDOMAINS
—crawl the website host names with subdomains. For example, if the seed URL is
* "abc.example.com", then "a.abc.example.com" and "b.abc.example.com" are also crawled.
*
* EVERYTHING
—crawl the website host names with subdomains and other domains that the web pages
* link to.
*
* The default mode is set to HOST_ONLY
.
* @return Returns a reference to this object so that method calls can be chained together.
* @see WebCrawlerMode
*/
public SeedUrlConfiguration withWebCrawlerMode(WebCrawlerMode webCrawlerMode) {
this.webCrawlerMode = webCrawlerMode.toString();
return this;
}
/**
* Returns a string representation of this object. This is useful for testing and debugging. Sensitive data will be
* redacted from this string using a placeholder value.
*
* @return A string representation of this object.
*
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{");
if (getSeedUrls() != null)
sb.append("SeedUrls: ").append(getSeedUrls()).append(",");
if (getWebCrawlerMode() != null)
sb.append("WebCrawlerMode: ").append(getWebCrawlerMode());
sb.append("}");
return sb.toString();
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (obj instanceof SeedUrlConfiguration == false)
return false;
SeedUrlConfiguration other = (SeedUrlConfiguration) obj;
if (other.getSeedUrls() == null ^ this.getSeedUrls() == null)
return false;
if (other.getSeedUrls() != null && other.getSeedUrls().equals(this.getSeedUrls()) == false)
return false;
if (other.getWebCrawlerMode() == null ^ this.getWebCrawlerMode() == null)
return false;
if (other.getWebCrawlerMode() != null && other.getWebCrawlerMode().equals(this.getWebCrawlerMode()) == false)
return false;
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int hashCode = 1;
hashCode = prime * hashCode + ((getSeedUrls() == null) ? 0 : getSeedUrls().hashCode());
hashCode = prime * hashCode + ((getWebCrawlerMode() == null) ? 0 : getWebCrawlerMode().hashCode());
return hashCode;
}
@Override
public SeedUrlConfiguration clone() {
try {
return (SeedUrlConfiguration) super.clone();
} catch (CloneNotSupportedException e) {
throw new IllegalStateException("Got a CloneNotSupportedException from Object.clone() " + "even though we're Cloneable!", e);
}
}
@com.amazonaws.annotation.SdkInternalApi
@Override
public void marshall(ProtocolMarshaller protocolMarshaller) {
com.amazonaws.services.kendra.model.transform.SeedUrlConfigurationMarshaller.getInstance().marshall(this, protocolMarshaller);
}
}