/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.indices.recovery;
import org.opensearch.action.admin.indices.dangling.DanglingIndexInfo;
import org.opensearch.action.admin.indices.dangling.delete.DeleteDanglingIndexRequest;
import org.opensearch.action.admin.indices.dangling.import_index.ImportDanglingIndexRequest;
import org.opensearch.action.admin.indices.dangling.list.ListDanglingIndicesRequest;
import org.opensearch.action.admin.indices.dangling.list.ListDanglingIndicesResponse;
import org.opensearch.action.admin.indices.dangling.list.NodeListDanglingIndicesResponse;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.indices.IndicesService;
import org.opensearch.core.rest.RestStatus;
import org.opensearch.test.OpenSearchIntegTestCase;
import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope;
import org.opensearch.test.InternalTestCluster;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import static org.opensearch.cluster.metadata.IndexGraveyard.SETTING_MAX_TOMBSTONES;
import static org.opensearch.gateway.DanglingIndicesState.AUTO_IMPORT_DANGLING_INDICES_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.notNullValue;
/**
* This class tests how dangling indices are handled, in terms of how they
* are discovered, and how they can be accessed and manipulated through the
* API.
*
*
See also DanglingIndicesRestIT
in the qa:smoke-test-http
* project.
*
* @see org.opensearch.action.admin.indices.dangling
*/
@ClusterScope(numDataNodes = 0, scope = OpenSearchIntegTestCase.Scope.TEST)
public class DanglingIndicesIT extends OpenSearchIntegTestCase {
private static final String INDEX_NAME = "test-idx-1";
private static final String OTHER_INDEX_NAME = INDEX_NAME + "-other";
private Settings buildSettings(int maxTombstones, boolean writeDanglingIndices, boolean importDanglingIndices) {
return Settings.builder()
// Limit the indices kept in the graveyard. This can be set to zero, so that
// when we delete an index, it's definitely considered to be dangling.
.put(SETTING_MAX_TOMBSTONES.getKey(), maxTombstones)
.put(IndicesService.WRITE_DANGLING_INDICES_INFO_SETTING.getKey(), writeDanglingIndices)
.put(AUTO_IMPORT_DANGLING_INDICES_SETTING.getKey(), importDanglingIndices)
.build();
}
/**
* Check that when the auto-recovery setting is enabled and a dangling index is
* discovered, then that index is recovered into the cluster.
*/
public void testDanglingIndicesAreRecoveredWhenSettingIsEnabled() throws Exception {
final Settings settings = buildSettings(0, true, true);
internalCluster().startNodes(3, settings);
createIndices(INDEX_NAME);
ensurePendingDanglingIndicesWritten();
boolean refreshIntervalChanged = randomBoolean();
if (refreshIntervalChanged) {
client().admin()
.indices()
.prepareUpdateSettings(INDEX_NAME)
.setSettings(Settings.builder().put("index.refresh_interval", "42s").build())
.get();
ensurePendingDanglingIndicesWritten();
}
if (randomBoolean()) {
client().admin().indices().prepareClose(INDEX_NAME).get();
}
ensureGreen(INDEX_NAME);
// Restart node, deleting the index in its absence, so that there is a dangling index to recover
internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
internalCluster().validateClusterFormed();
assertAcked(client().admin().indices().prepareDelete(INDEX_NAME));
return super.onNodeStopped(nodeName);
}
});
assertBusy(() -> assertTrue("Expected dangling index " + INDEX_NAME + " to be recovered", indexExists(INDEX_NAME)));
if (refreshIntervalChanged) {
assertThat(
client().admin().indices().prepareGetSettings(INDEX_NAME).get().getSetting(INDEX_NAME, "index.refresh_interval"),
equalTo("42s")
);
}
ensureGreen(INDEX_NAME);
final IndexMetadata indexMetadata = clusterService().state().metadata().index(INDEX_NAME);
assertThat(indexMetadata.getSettings().get(IndexMetadata.SETTING_HISTORY_UUID), notNullValue());
}
private void ensurePendingDanglingIndicesWritten() throws Exception {
assertBusy(
() -> internalCluster().getInstances(IndicesService.class)
.forEach(indicesService -> assertTrue(indicesService.allPendingDanglingIndicesWritten()))
);
}
/**
* Check that when dangling indices are discovered, then they are not recovered into
* the cluster when the recovery setting is disabled.
*/
public void testDanglingIndicesAreNotRecoveredWhenSettingIsDisabled() throws Exception {
internalCluster().startNodes(3, buildSettings(0, true, false));
createDanglingIndices(INDEX_NAME);
// Since index recovery is async, we can't prove index recovery will never occur, just that it doesn't occur within some reasonable
// amount of time
assertFalse(
"Did not expect dangling index " + INDEX_NAME + " to be recovered",
waitUntil(() -> indexExists(INDEX_NAME), 1, TimeUnit.SECONDS)
);
}
/**
* Check that when dangling indices are not written, then they cannot be recovered into the cluster.
*/
public void testDanglingIndicesAreNotRecoveredWhenNotWritten() throws Exception {
internalCluster().startNodes(3, buildSettings(0, false, true));
createDanglingIndices(INDEX_NAME);
// Since index recovery is async, we can't prove index recovery will never occur, just that it doesn't occur within some reasonable
// amount of time
assertFalse(
"Did not expect dangling index " + INDEX_NAME + " to be recovered",
waitUntil(() -> indexExists(INDEX_NAME), 1, TimeUnit.SECONDS)
);
}
/**
* Check that when dangling indices are discovered, then they can be listed.
*/
public void testDanglingIndicesCanBeListed() throws Exception {
internalCluster().startNodes(3, buildSettings(0, true, false));
final String stoppedNodeName = createDanglingIndices(INDEX_NAME);
final ListDanglingIndicesResponse response = client().admin()
.cluster()
.listDanglingIndices(new ListDanglingIndicesRequest())
.actionGet();
assertThat(response.status(), equalTo(RestStatus.OK));
final List nodeResponses = response.getNodes();
assertThat("Didn't get responses from all nodes", nodeResponses, hasSize(3));
for (NodeListDanglingIndicesResponse nodeResponse : nodeResponses) {
if (nodeResponse.getNode().getName().equals(stoppedNodeName)) {
assertThat("Expected node that was stopped to have one dangling index", nodeResponse.getDanglingIndices(), hasSize(1));
final DanglingIndexInfo danglingIndexInfo = nodeResponse.getDanglingIndices().get(0);
assertThat(danglingIndexInfo.getIndexName(), equalTo(INDEX_NAME));
} else {
assertThat("Expected node that was never stopped to have no dangling indices", nodeResponse.getDanglingIndices(), empty());
}
}
}
/**
* Check that when dangling index auto imports are enabled, and a dangling index is is discovered
* but cannot be imported due to a name clash with an existing index, then that dangling index can
* still be listed through the API.
*/
public void testDanglingIndicesCanBeListedWhenAutoImportEnabled() throws Exception {
internalCluster().startNodes(3, buildSettings(0, true, true));
createIndices(INDEX_NAME);
ensurePendingDanglingIndicesWritten();
// Restart node, deleting the indices in its absence, so that there is a dangling index to recover
internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
internalCluster().validateClusterFormed();
assertAcked(client().admin().indices().prepareDelete(INDEX_NAME));
// Create another index with the same name, which will prevent the dangling
// index from being restored, and also ensures that we are checking index
// existence by UUID, not name.
//
// Note: don't call `createIndices()` here as it calls `ensureGreen()`, which will
// fail while a node is offline
createIndex(INDEX_NAME);
ensurePendingDanglingIndicesWritten();
return super.onNodeStopped(nodeName);
}
});
final List danglingIndices = listDanglingIndices();
assertThat(danglingIndices, hasSize(1));
assertThat(danglingIndices.get(0).getIndexName(), equalTo(INDEX_NAME));
}
/**
* Check that dangling indices can be imported.
*/
public void testDanglingIndicesCanBeImported() throws Exception {
internalCluster().startNodes(3, buildSettings(0, true, false));
final String stoppedNodeName = createDanglingIndices(INDEX_NAME);
final String danglingIndexUUID = findDanglingIndexForNode(stoppedNodeName, INDEX_NAME);
final ImportDanglingIndexRequest request = new ImportDanglingIndexRequest(danglingIndexUUID, true);
client().admin().cluster().importDanglingIndex(request).get();
assertTrue("Expected dangling index " + INDEX_NAME + " to be recovered", indexExists(INDEX_NAME));
}
/**
* Check that the when sending an import-dangling-indices request, the specified UUIDs are validated as
* being dangling.
*/
public void testDanglingIndicesMustExistToBeImported() {
internalCluster().startNodes(1, buildSettings(0, true, false));
final ImportDanglingIndexRequest request = new ImportDanglingIndexRequest("NonExistentUUID", true);
final IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
() -> client().admin().cluster().importDanglingIndex(request).actionGet()
);
assertThat(e.getMessage(), containsString("No dangling index found for UUID [NonExistentUUID]"));
}
/**
* Check that a dangling index can only be imported if "accept_data_loss" is set to true.
*/
public void testMustAcceptDataLossToImportDanglingIndex() throws Exception {
internalCluster().startNodes(3, buildSettings(0, true, false));
final String stoppedNodeName = createDanglingIndices(INDEX_NAME);
final String danglingIndexUUID = findDanglingIndexForNode(stoppedNodeName, INDEX_NAME);
final ImportDanglingIndexRequest request = new ImportDanglingIndexRequest(danglingIndexUUID, false);
Exception e = expectThrows(Exception.class, () -> client().admin().cluster().importDanglingIndex(request).actionGet());
assertThat(e.getMessage(), containsString("accept_data_loss must be set to true"));
}
/**
* Check that dangling indices can be deleted. Since this requires that
* we add an entry to the index graveyard, the graveyard size must be
* greater than 1. To test deletes, we set the index graveyard size to
* 1, then create two indices and delete them both while one node in
* the cluster is stopped. The deletion of the second pushes the deletion
* of the first out of the graveyard. When the stopped node is resumed,
* only the second index will be found into the graveyard and the the
* other will be considered dangling, and can therefore be listed and
* deleted through the API
*/
public void testDanglingIndexCanBeDeleted() throws Exception {
final Settings settings = buildSettings(1, true, false);
internalCluster().startNodes(3, settings);
final String stoppedNodeName = createDanglingIndices(INDEX_NAME, OTHER_INDEX_NAME);
final String danglingIndexUUID = findDanglingIndexForNode(stoppedNodeName, INDEX_NAME);
client().admin().cluster().deleteDanglingIndex(new DeleteDanglingIndexRequest(danglingIndexUUID, true)).actionGet();
// The dangling index that we deleted ought to have been removed from disk. Check by
// creating and deleting another index, which creates a new tombstone entry, which should
// not cause the deleted dangling index to be considered "live" again, just because its
// tombstone has been pushed out of the graveyard.
createIndex("additional");
assertAcked(client().admin().indices().prepareDelete("additional"));
assertThat(listDanglingIndices(), is(empty()));
}
/**
* Check that when dangling index auto imports are enabled, and a dangling index is is discovered
* but cannot be imported due to a name clash with an existing index, then that dangling index can
* still be deleted through the API.
*/
public void testDanglingIndexCanBeDeletedWhenAutoImportEnabled() throws Exception {
final Settings settings = buildSettings(1, true, true);
internalCluster().startNodes(3, settings);
createIndices(INDEX_NAME, OTHER_INDEX_NAME);
ensurePendingDanglingIndicesWritten();
AtomicReference stoppedNodeName = new AtomicReference<>();
// Restart node, deleting the indices in its absence, so that there is a dangling index to recover
internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
internalCluster().validateClusterFormed();
stoppedNodeName.set(nodeName);
assertAcked(client().admin().indices().prepareDelete(INDEX_NAME));
assertAcked(client().admin().indices().prepareDelete(OTHER_INDEX_NAME));
// Create another index with the same name, which will prevent the dangling
// index from being restored, and also ensures that we are checking index
// existence by UUID, not name.
//
// Note: don't call `createIndices()` here as it calls `ensureGreen()`, which will
// fail while a node is offline
createIndex(INDEX_NAME);
ensurePendingDanglingIndicesWritten();
return super.onNodeStopped(nodeName);
}
});
final String danglingIndexUUID = findDanglingIndexForNode(stoppedNodeName.get(), INDEX_NAME);
client().admin().cluster().deleteDanglingIndex(new DeleteDanglingIndexRequest(danglingIndexUUID, true)).actionGet();
// The dangling index that we deleted ought to have been removed from disk. Check by
// creating and deleting another index, which creates a new tombstone entry, which should
// not cause the deleted dangling index to be considered "live" again, just because its
// tombstone has been pushed out of the graveyard.
createIndex("additional");
assertAcked(client().admin().indices().prepareDelete("additional"));
assertThat(listDanglingIndices(), is(empty()));
}
/**
* Check that when a index is found to be dangling on more than one node, it can be deleted.
*/
public void testDanglingIndexOverMultipleNodesCanBeDeleted() throws Exception {
final Settings settings = buildSettings(1, true, false);
internalCluster().startNodes(3, settings);
createIndices(INDEX_NAME, OTHER_INDEX_NAME);
ensurePendingDanglingIndicesWritten();
// Restart 2 nodes, deleting the indices in their absence, so that there is a dangling index to recover
internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
internalCluster().validateClusterFormed();
assertAcked(client().admin().indices().prepareDelete(INDEX_NAME));
assertAcked(client().admin().indices().prepareDelete(OTHER_INDEX_NAME));
return super.onNodeStopped(nodeName);
}
});
return super.onNodeStopped(nodeName);
}
});
final AtomicReference> danglingIndices = new AtomicReference<>();
final List results = listDanglingIndices();
// Both the stopped nodes should have found a dangling index.
assertThat(results, hasSize(2));
danglingIndices.set(results);
// Try to delete the index - this request should succeed
client().admin()
.cluster()
.deleteDanglingIndex(new DeleteDanglingIndexRequest(danglingIndices.get().get(0).getIndexUUID(), true))
.actionGet();
// The dangling index that we deleted ought to have been removed from disk. Check by
// creating and deleting another index, which creates a new tombstone entry, which should
// not cause the deleted dangling index to be considered "live" again, just because its
// tombstone has been pushed out of the graveyard.
createIndex("additional");
assertAcked(client().admin().indices().prepareDelete("additional"));
assertBusy(() -> assertThat(listDanglingIndices(), empty()));
}
/**
* Check that when deleting a dangling index, it is required that the "accept_data_loss" flag is set.
*/
public void testDeleteDanglingIndicesRequiresDataLossFlagToBeTrue() throws Exception {
final Settings settings = buildSettings(1, true, false);
internalCluster().startNodes(3, settings);
final String stoppedNodeName = createDanglingIndices(INDEX_NAME, OTHER_INDEX_NAME);
final String danglingIndexUUID = findDanglingIndexForNode(stoppedNodeName, INDEX_NAME);
Exception e = expectThrows(
Exception.class,
() -> client().admin().cluster().deleteDanglingIndex(new DeleteDanglingIndexRequest(danglingIndexUUID, false)).actionGet()
);
assertThat(e.getMessage(), containsString("accept_data_loss must be set to true"));
}
/**
* Helper that fetches the current list of dangling indices.
*/
private List listDanglingIndices() {
final ListDanglingIndicesResponse response = client().admin()
.cluster()
.listDanglingIndices(new ListDanglingIndicesRequest())
.actionGet();
assertThat(response.status(), equalTo(RestStatus.OK));
final List nodeResponses = response.getNodes();
final List results = new ArrayList<>();
for (NodeListDanglingIndicesResponse nodeResponse : nodeResponses) {
results.addAll(nodeResponse.getDanglingIndices());
}
return results;
}
/**
* Simple helper that creates one or more indices, and importantly,
* checks that they are green before proceeding. This is important
* because the tests in this class stop and restart nodes, assuming
* that each index has a primary or replica shard on every node, and if
* a node is stopped prematurely, this assumption is broken.
*/
private void createIndices(String... indices) {
assert indices.length > 0;
for (String index : indices) {
createIndex(index, Settings.builder().put("number_of_replicas", 2).put("routing.allocation.total_shards_per_node", 1).build());
}
ensureGreen(indices);
}
/**
* Creates a number of dangling indices by first creating then, then stopping a data node
* and deleting the indices while the node is stopped.
* @param indices the indices to create and delete
* @return the name of the stopped node
*/
private String createDanglingIndices(String... indices) throws Exception {
createIndices(indices);
ensurePendingDanglingIndicesWritten();
AtomicReference stoppedNodeName = new AtomicReference<>();
final int nodes = internalCluster().size();
// Restart node, deleting the indices in its absence, so that there is a dangling index to recover
internalCluster().restartRandomDataNode(new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
internalCluster().validateClusterFormed();
stoppedNodeName.set(nodeName);
for (String index : indices) {
assertAcked(client().admin().indices().prepareDelete(index));
}
return super.onNodeStopped(nodeName);
}
});
ensureStableCluster(nodes);
return stoppedNodeName.get();
}
private String findDanglingIndexForNode(String stoppedNodeName, String indexName) {
String danglingIndexUUID = null;
final ListDanglingIndicesResponse response = client().admin()
.cluster()
.listDanglingIndices(new ListDanglingIndicesRequest())
.actionGet();
assertThat(response.status(), equalTo(RestStatus.OK));
final List nodeResponses = response.getNodes();
for (NodeListDanglingIndicesResponse nodeResponse : nodeResponses) {
if (nodeResponse.getNode().getName().equals(stoppedNodeName)) {
final DanglingIndexInfo danglingIndexInfo = nodeResponse.getDanglingIndices().get(0);
assertThat(danglingIndexInfo.getIndexName(), equalTo(indexName));
danglingIndexUUID = danglingIndexInfo.getIndexUUID();
break;
}
}
assertNotNull("Failed to find a dangling index UUID for node [" + stoppedNodeName + "]", danglingIndexUUID);
return danglingIndexUUID;
}
}