From 052fae3d52662507ffe5e5768cbf31a26b06fe48 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Thu, 30 Jun 2022 16:21:56 +0200 Subject: [PATCH 1/6] HDDS-6970. EC: Ensure DatanodeAdminMonitor can handle EC containers during decommission --- .../ContainerIdenticalReplicaCount.java | 262 ++++++++++++++++++ .../scm/container/ContainerReplicaCount.java | 252 ++--------------- .../container/ECContainerReplicaCount.java | 33 ++- .../replication/LegacyReplicationManager.java | 15 +- .../replication/ReplicationManager.java | 16 +- .../scm/node/DatanodeAdminMonitorImpl.java | 4 +- .../states/TestContainerReplicaCount.java | 71 ++--- .../scm/node/TestDatanodeAdminMonitor.java | 3 +- 8 files changed, 383 insertions(+), 273 deletions(-) create mode 100644 hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerIdenticalReplicaCount.java diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerIdenticalReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerIdenticalReplicaCount.java new file mode 100644 index 000000000000..ea433ff9aacb --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerIdenticalReplicaCount.java @@ -0,0 +1,262 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.container; + +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; + +import java.util.Set; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; + +/** + * Immutable object that is created with a set of ContainerReplica objects and + * the number of in flight replica add and deletes, the container replication + * factor and the min count which must be available for maintenance. This + * information can be used to determine if the container is over or under + * replicated and also how many additional replicas need created or removed. + */ +public class ContainerIdenticalReplicaCount implements ContainerReplicaCount { + + private int healthyCount; + private int decommissionCount; + private int maintenanceCount; + private final int inFlightAdd; + private final int inFlightDel; + private final int repFactor; + private final int minHealthyForMaintenance; + private final ContainerInfo container; + private final Set replica; + + public ContainerIdenticalReplicaCount(ContainerInfo container, + Set replica, int inFlightAdd, + int inFlightDelete, int replicationFactor, + int minHealthyForMaintenance) { + this.healthyCount = 0; + this.decommissionCount = 0; + this.maintenanceCount = 0; + this.inFlightAdd = inFlightAdd; + this.inFlightDel = inFlightDelete; + this.repFactor = replicationFactor; + this.replica = replica; + this.minHealthyForMaintenance + = Math.min(this.repFactor, minHealthyForMaintenance); + this.container = container; + + for (ContainerReplica cr : this.replica) { + HddsProtos.NodeOperationalState state = + cr.getDatanodeDetails().getPersistedOpState(); + if (state == DECOMMISSIONED || state == DECOMMISSIONING) { + decommissionCount++; + } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) { + maintenanceCount++; + } else { + healthyCount++; + } + } + } + + public int getHealthyCount() { + return healthyCount; + } + + @Override + public int getDecommissionCount() { + return decommissionCount; + } + + @Override + public int getMaintenanceCount() { + return maintenanceCount; + } + + public int getReplicationFactor() { + return repFactor; + } + + @Override + public ContainerInfo getContainer() { + return container; + } + + @Override + public Set getReplicas() { + return replica; + } + + @Override + public String toString() { + return "Container State: " + container.getState() + + " Replica Count: " + replica.size() + + " Healthy Count: " + healthyCount + + " Decommission Count: " + decommissionCount + + " Maintenance Count: " + maintenanceCount + + " inFlightAdd Count: " + inFlightAdd + + " inFightDel Count: " + inFlightDel + + " ReplicationFactor: " + repFactor + + " minMaintenance Count: " + minHealthyForMaintenance; + } + + /** + * Calculates the delta of replicas which need to be created or removed + * to ensure the container is correctly replicated when considered inflight + * adds and deletes. + * + * When considering inflight operations, it is assumed any operation will + * fail. However, to consider the worst case and avoid data loss, we always + * assume a delete will succeed and and add will fail. In this way, we will + * avoid scheduling too many deletes which could result in dataloss. + * + * Decisions around over-replication are made only on healthy replicas, + * ignoring any in maintenance and also any inflight adds. InFlight adds are + * ignored, as they may not complete, so if we have: + * + * H, H, H, IN_FLIGHT_ADD + * + * And then schedule a delete, we could end up under-replicated (add fails, + * delete completes). It is better to let the inflight operations complete + * and then deal with any further over or under replication. + * + * For maintenance replicas, assuming replication factor 3, and minHealthy + * 2, it is possible for all 3 hosts to be put into maintenance, leaving the + * following (H = healthy, M = maintenance): + * + * H, H, M, M, M + * + * Even though we are tracking 5 replicas, this is not over replicated as we + * ignore the maintenance copies. Later, the replicas could look like: + * + * H, H, H, H, M + * + * At this stage, the container is over replicated by 1, so one replica can be + * removed. + * + * For containers which have replication factor healthy replica, we ignore any + * inflight add or deletes, as they may fail. Instead, wait for them to + * complete and then deal with any excess or deficit. + * + * For under replicated containers we do consider inflight add and delete to + * avoid scheduling more adds than needed. There is additional logic around + * containers with maintenance replica to ensure minHealthyForMaintenance + * replia are maintained. + * + * @return Delta of replicas needed. Negative indicates over replication and + * containers should be removed. Positive indicates over replication + * and zero indicates the containers has replicationFactor healthy + * replica + */ + public int additionalReplicaNeeded() { + int delta = missingReplicas(); + + if (delta < 0) { + // Over replicated, so may need to remove a container. Do not consider + // inFlightAdds, as they may fail, but do consider inFlightDel which + // will reduce the over-replication if it completes. + // Note this could make the delta positive if there are too many in flight + // deletes, which will result in an additional being scheduled. + return delta + inFlightDel; + } else { + // May be under or perfectly replicated. + // We must consider in flight add and delete when calculating the new + // containers needed, but we bound the lower limit at zero to allow + // inflight operations to complete before handling any potential over + // replication + return Math.max(0, delta - inFlightAdd + inFlightDel); + } + } + + /** + * Returns the count of replicas which need to be created or removed to + * ensure the container is perfectly replicate. Inflight operations are not + * considered here, but the logic to determine the missing or excess counts + * for maintenance is present. + * + * Decisions around over-replication are made only on healthy replicas, + * ignoring any in maintenance. For example, if we have: + * + * H, H, H, M, M + * + * This will not be consider over replicated until one of the Maintenance + * replicas moves to Healthy. + * + * If the container is perfectly replicated, zero will be return. + * + * If it is under replicated a positive value will be returned, indicating + * how many replicas must be added. + * + * If it is over replicated a negative value will be returned, indicating now + * many replicas to remove. + * + * @return Zero if the container is perfectly replicated, a positive value + * for under replicated and a negative value for over replicated. + */ + private int missingReplicas() { + int delta = repFactor - healthyCount; + + if (delta < 0) { + // Over replicated, so may need to remove a container. + return delta; + } else if (delta > 0) { + // May be under-replicated, depending on maintenance. + delta = Math.max(0, delta - maintenanceCount); + int neededHealthy = + Math.max(0, minHealthyForMaintenance - healthyCount); + delta = Math.max(neededHealthy, delta); + return delta; + } else { // delta == 0 + // We have exactly the number of healthy replicas needed. + return delta; + } + } + + /** + * Return true if the container is sufficiently replicated. Decommissioning + * and Decommissioned containers are ignored in this check, assuming they will + * eventually be removed from the cluster. + * This check ignores inflight additions, as those replicas have not yet been + * created and the create could fail for some reason. + * The check does consider inflight deletes as there may be 3 healthy replicas + * now, but once the delete completes it will reduce to 2. + * We also assume a replica in Maintenance state cannot be removed, so the + * pending delete would affect only the healthy replica count. + * + * @return True if the container is sufficiently replicated and False + * otherwise. + */ + @Override + public boolean isSufficientlyReplicated() { + return missingReplicas() + inFlightDel <= 0; + } + + /** + * Return true is the container is over replicated. Decommission and + * maintenance containers are ignored for this check. + * The check ignores inflight additions, as they may fail, but it does + * consider inflight deletes, as they would reduce the over replication when + * they complete. + * + * @return True if the container is over replicated, false otherwise. + */ + @Override + public boolean isOverReplicated() { + return missingReplicas() + inFlightDel < 0; + } + +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java index ec5a87a89090..882eabe44acf 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,238 +22,36 @@ import java.util.Set; -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; -import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; /** - * Immutable object that is created with a set of ContainerReplica objects and - * the number of in flight replica add and deletes, the container replication - * factor and the min count which must be available for maintenance. This - * information can be used to determine if the container is over or under - * replicated and also how many additional replicas need created or removed. + * Common interface for EC and non-EC container replica counts. + * TODO pull up more methods if needed */ -public class ContainerReplicaCount { +public interface ContainerReplicaCount { + ContainerInfo getContainer(); - private int healthyCount = 0; - private int decommissionCount = 0; - private int maintenanceCount = 0; - private int inFlightAdd = 0; - private int inFlightDel = 0; - private int repFactor; - private int minHealthyForMaintenance; - private ContainerInfo container; - private Set replica; + Set getReplicas(); - public ContainerReplicaCount(ContainerInfo container, - Set replica, int inFlightAdd, - int inFlightDelete, int replicationFactor, - int minHealthyForMaintenance) { - this.healthyCount = 0; - this.decommissionCount = 0; - this.maintenanceCount = 0; - this.inFlightAdd = inFlightAdd; - this.inFlightDel = inFlightDelete; - this.repFactor = replicationFactor; - this.replica = replica; - this.minHealthyForMaintenance - = Math.min(this.repFactor, minHealthyForMaintenance); - this.container = container; + boolean isSufficientlyReplicated(); - for (ContainerReplica cr : this.replica) { - HddsProtos.NodeOperationalState state = - cr.getDatanodeDetails().getPersistedOpState(); - if (state == DECOMMISSIONED || state == DECOMMISSIONING) { - decommissionCount++; - } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) { - maintenanceCount++; - } else { - healthyCount++; - } - } - } + boolean isOverReplicated(); - public int getHealthyCount() { - return healthyCount; - } + int getDecommissionCount(); - public int getDecommissionCount() { - return decommissionCount; - } - - public int getMaintenanceCount() { - return maintenanceCount; - } - - public int getReplicationFactor() { - return repFactor; - } - - public ContainerInfo getContainer() { - return container; - } - - public Set getReplica() { - return replica; - } - - @Override - public String toString() { - return "Container State: " + container.getState() + - " Replica Count: " + replica.size() + - " Healthy Count: " + healthyCount + - " Decommission Count: " + decommissionCount + - " Maintenance Count: " + maintenanceCount + - " inFlightAdd Count: " + inFlightAdd + - " inFightDel Count: " + inFlightDel + - " ReplicationFactor: " + repFactor + - " minMaintenance Count: " + minHealthyForMaintenance; - } + int getMaintenanceCount(); /** - * Calculates the the delta of replicas which need to be created or removed + * Calculates the delta of replicas which need to be created or removed * to ensure the container is correctly replicated when considered inflight * adds and deletes. * - * When considering inflight operations, it is assumed any operation will - * fail. However, to consider the worst case and avoid data loss, we always - * assume a delete will succeed and and add will fail. In this way, we will - * avoid scheduling too many deletes which could result in dataloss. - * - * Decisions around over-replication are made only on healthy replicas, - * ignoring any in maintenance and also any inflight adds. InFlight adds are - * ignored, as they may not complete, so if we have: - * - * H, H, H, IN_FLIGHT_ADD - * - * And then schedule a delete, we could end up under-replicated (add fails, - * delete completes). It is better to let the inflight operations complete - * and then deal with any further over or under replication. - * - * For maintenance replicas, assuming replication factor 3, and minHealthy - * 2, it is possible for all 3 hosts to be put into maintenance, leaving the - * following (H = healthy, M = maintenance): - * - * H, H, M, M, M - * - * Even though we are tracking 5 replicas, this is not over replicated as we - * ignore the maintenance copies. Later, the replicas could look like: - * - * H, H, H, H, M - * - * At this stage, the container is over replicated by 1, so one replica can be - * removed. - * - * For containers which have replication factor healthy replica, we ignore any - * inflight add or deletes, as they may fail. Instead, wait for them to - * complete and then deal with any excess or deficit. - * - * For under replicated containers we do consider inflight add and delete to - * avoid scheduling more adds than needed. There is additional logic around - * containers with maintenance replica to ensure minHealthyForMaintenance - * replia are maintained. - * * @return Delta of replicas needed. Negative indicates over replication and * containers should be removed. Positive indicates over replication * and zero indicates the containers has replicationFactor healthy * replica */ - public int additionalReplicaNeeded() { - int delta = missingReplicas(); - - if (delta < 0) { - // Over replicated, so may need to remove a container. Do not consider - // inFlightAdds, as they may fail, but do consider inFlightDel which - // will reduce the over-replication if it completes. - // Note this could make the delta positive if there are too many in flight - // deletes, which will result in an additional being scheduled. - return delta + inFlightDel; - } else { - // May be under or perfectly replicated. - // We must consider in flight add and delete when calculating the new - // containers needed, but we bound the lower limit at zero to allow - // inflight operations to complete before handling any potential over - // replication - return Math.max(0, delta - inFlightAdd + inFlightDel); - } - } - - /** - * Returns the count of replicas which need to be created or removed to - * ensure the container is perfectly replicate. Inflight operations are not - * considered here, but the logic to determine the missing or excess counts - * for maintenance is present. - * - * Decisions around over-replication are made only on healthy replicas, - * ignoring any in maintenance. For example, if we have: - * - * H, H, H, M, M - * - * This will not be consider over replicated until one of the Maintenance - * replicas moves to Healthy. - * - * If the container is perfectly replicated, zero will be return. - * - * If it is under replicated a positive value will be returned, indicating - * how many replicas must be added. - * - * If it is over replicated a negative value will be returned, indicating now - * many replicas to remove. - * - * @return Zero if the container is perfectly replicated, a positive value - * for under replicated and a negative value for over replicated. - */ - private int missingReplicas() { - int delta = repFactor - healthyCount; - - if (delta < 0) { - // Over replicated, so may need to remove a container. - return delta; - } else if (delta > 0) { - // May be under-replicated, depending on maintenance. - delta = Math.max(0, delta - maintenanceCount); - int neededHealthy = - Math.max(0, minHealthyForMaintenance - healthyCount); - delta = Math.max(neededHealthy, delta); - return delta; - } else { // delta == 0 - // We have exactly the number of healthy replicas needed. - return delta; - } - } - - /** - * Return true if the container is sufficiently replicated. Decommissioning - * and Decommissioned containers are ignored in this check, assuming they will - * eventually be removed from the cluster. - * This check ignores inflight additions, as those replicas have not yet been - * created and the create could fail for some reason. - * The check does consider inflight deletes as there may be 3 healthy replicas - * now, but once the delete completes it will reduce to 2. - * We also assume a replica in Maintenance state cannot be removed, so the - * pending delete would affect only the healthy replica count. - * - * @return True if the container is sufficiently replicated and False - * otherwise. - */ - public boolean isSufficientlyReplicated() { - return missingReplicas() + inFlightDel <= 0; - } - - /** - * Return true is the container is over replicated. Decommission and - * maintenance containers are ignored for this check. - * The check ignores inflight additions, as they may fail, but it does - * consider inflight deletes, as they would reduce the over replication when - * they complete. - * - * @return True if the container is over replicated, false otherwise. - */ - public boolean isOverReplicated() { - return missingReplicas() + inFlightDel < 0; - } + int additionalReplicaNeeded(); /** * Returns true if the container is healthy, meaning all replica which are not @@ -262,22 +60,24 @@ public boolean isOverReplicated() { * * @return true if the container is healthy, false otherwise */ - public boolean isHealthy() { - return (container.getState() == HddsProtos.LifeCycleState.CLOSED - || container.getState() == HddsProtos.LifeCycleState.QUASI_CLOSED) - && replica.stream() + default boolean isHealthy() { + HddsProtos.LifeCycleState containerState = getContainer().getState(); + return (containerState == HddsProtos.LifeCycleState.CLOSED + || containerState == HddsProtos.LifeCycleState.QUASI_CLOSED) + && getReplicas().stream() .filter(r -> r.getDatanodeDetails().getPersistedOpState() == IN_SERVICE) .allMatch(r -> LegacyReplicationManager.compareState( - container.getState(), r.getState())); + containerState, r.getState())); + } /** - * Returns true is there are no replicas of a container available, ie the - * set of container replica passed in the constructor has zero entries. + * Returns true is there are no replicas of the container available, ie the + * set of container replicas has zero entries. * * @return true if there are no replicas, false otherwise. */ - public boolean isMissing() { - return replica.size() == 0; + default boolean isMissing() { + return getReplicas().isEmpty(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java index fe3aafe404e8..120befb76163 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java @@ -59,7 +59,7 @@ * * Maintenance copies are not considered until they are back to IN_SERVICE */ -public class ECContainerReplicaCount { +public class ECContainerReplicaCount implements ContainerReplicaCount { private final ContainerInfo containerInfo; private final ECReplicationConfig repConfig; @@ -128,10 +128,26 @@ public ECContainerReplicaCount(ContainerInfo containerInfo, } } + @Override + public ContainerInfo getContainer() { + return containerInfo; + } + + @Override public Set getReplicas() { return replicas; } + @Override + public int getDecommissionCount() { + return decommissionIndexes.size(); + } + + @Override + public int getMaintenanceCount() { + return maintenanceIndexes.size(); + } + /** * Get a set containing all decommissioning indexes, or an empty set if none * are decommissioning. Note it is possible for an index to be @@ -292,6 +308,16 @@ public boolean isOverReplicated(boolean includePendingDelete) { return false; } + @Override + public boolean isOverReplicated() { + return isOverReplicated(false); + } + + @Override + public int additionalReplicaNeeded() { + return 0; + } + /** * Return an unsorted list of any replica indexes which have more than one * replica and are therefore over-replicated. Maintenance replicas are ignored @@ -365,6 +391,11 @@ public boolean isSufficientlyReplicated(boolean includePendingAdd) { >= repConfig.getData() + remainingMaintenanceRedundancy; } + @Override + public boolean isSufficientlyReplicated() { + return isSufficientlyReplicated(false); + } + /** * Check if there is an entry in the map for all expected replica indexes, * and also that the count against each index is greater than zero. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java index 79e11bab5a3e..c5c81752dc5b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerIdenticalReplicaCount; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; @@ -492,7 +493,7 @@ protected void processContainer(ContainerInfo container, return; } - ContainerReplicaCount replicaSet = + ContainerIdenticalReplicaCount replicaSet = getContainerReplicaCount(container, replicas); ContainerPlacementStatus placementStatus = getPlacementStatus( replicas, container.getReplicationConfig().getRequiredNodes()); @@ -990,9 +991,9 @@ public ContainerReplicaCount getContainerReplicaCount(ContainerInfo container) * @return ContainerReplicaCount representing the current state of the * container */ - private ContainerReplicaCount getContainerReplicaCount( + private ContainerIdenticalReplicaCount getContainerReplicaCount( ContainerInfo container, Set replica) { - return new ContainerReplicaCount( + return new ContainerIdenticalReplicaCount( container, replica, getInflightAdd(container.containerID()), @@ -1122,10 +1123,10 @@ private void forceCloseContainer(final ContainerInfo container, * current replica count and inflight adds and deletes */ private void handleUnderReplicatedContainer(final ContainerInfo container, - final ContainerReplicaCount replicaSet, + final ContainerIdenticalReplicaCount replicaSet, final ContainerPlacementStatus placementStatus) { LOG.debug("Handling under-replicated container: {}", container); - Set replicas = replicaSet.getReplica(); + Set replicas = replicaSet.getReplicas(); try { if (replicaSet.isSufficientlyReplicated() @@ -1236,9 +1237,9 @@ private void handleUnderReplicatedContainer(final ContainerInfo container, * current replica count and inflight adds and deletes */ private void handleOverReplicatedContainer(final ContainerInfo container, - final ContainerReplicaCount replicaSet) { + final ContainerIdenticalReplicaCount replicaSet) { - final Set replicas = replicaSet.getReplica(); + final Set replicas = replicaSet.getReplicas(); final ContainerID id = container.containerID(); final int replicationFactor = container.getReplicationConfig().getRequiredNodes(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 5d4da102d93d..e4d2470929a0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ContainerReplicaCount; +import org.apache.hadoop.hdds.scm.container.ECContainerReplicaCount; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport.HealthState; import org.apache.hadoop.hdds.scm.ha.SCMContext; @@ -337,7 +338,11 @@ private synchronized void run() { */ public ContainerReplicaCount getContainerReplicaCount(ContainerID containerID) throws ContainerNotFoundException { - return legacyReplicationManager.getContainerReplicaCount(containerID); + ContainerInfo container = containerManager.getContainer(containerID); + if (container.getReplicationType() == EC) { + return getECContainerReplicaCount(container); + } + return legacyReplicationManager.getContainerReplicaCount(container); } /** @@ -493,6 +498,15 @@ public boolean isContainerReplicatingOrDeleting(ContainerID containerID) { .isContainerReplicatingOrDeleting(containerID); } + private ECContainerReplicaCount getECContainerReplicaCount( + ContainerInfo containerInfo) throws ContainerNotFoundException { + Set replicas = containerManager.getContainerReplicas( + containerInfo.containerID()); + List pendingOps = + containerReplicaPendingOps.getPendingOps(containerInfo.containerID()); + return new ECContainerReplicaCount(containerInfo, replicas, pendingOps, 0); + } + /** * Wrap the call to nodeManager.getNodeStatus, catching any * NodeNotFoundException and instead throwing an IllegalStateException. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java index f096f4e83f2d..fc5e2c73062e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java @@ -304,7 +304,7 @@ private boolean checkContainersReplicatedOnNode(DatanodeDetails dn) if (underReplicated < CONTAINER_DETAILS_LOGGING_LIMIT || LOG.isDebugEnabled()) { LOG.info("Under Replicated Container {} {}; {}", - cid, replicaSet, replicaDetails(replicaSet.getReplica())); + cid, replicaSet, replicaDetails(replicaSet.getReplicas())); } underReplicated++; } @@ -315,7 +315,7 @@ private boolean checkContainersReplicatedOnNode(DatanodeDetails dn) if (unhealthy < CONTAINER_DETAILS_LOGGING_LIMIT || LOG.isDebugEnabled()) { LOG.info("Unhealthy Container {} {}; {}", - cid, replicaSet, replicaDetails(replicaSet.getReplica())); + cid, replicaSet, replicaDetails(replicaSet.getReplicas())); } unhealthy++; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerReplicaCount.java index 542084f4816f..b0135956e375 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerReplicaCount.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerIdenticalReplicaCount; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ContainerReplicaCount; @@ -62,7 +63,7 @@ public void testThreeHealthyReplica() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, 0, false); } @@ -71,7 +72,7 @@ public void testTwoHealthyReplica() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -80,7 +81,7 @@ public void testOneHealthyReplica() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 2, false); } @@ -90,7 +91,7 @@ public void testTwoHealthyAndInflightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 1, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, false, 0, false); } @@ -105,7 +106,7 @@ public void testThreeHealthyAndInflightAdd() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 1, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, true, 0, false); } @@ -119,7 +120,7 @@ public void testThreeHealthyAndInflightDelete() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 1, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 1, 3, 2); validate(rcnt, false, 1, false); } @@ -133,7 +134,7 @@ public void testThreeHealthyAndInflightAddAndInFlightDelete() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 1, 1, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 1, 1, 3, 2); validate(rcnt, false, 0, false); } @@ -143,7 +144,7 @@ public void testFourHealthyReplicas() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, -1, true); } @@ -153,7 +154,7 @@ public void testFourHealthyReplicasAndInFlightDelete() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 1, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 1, 3, 2); validate(rcnt, true, 0, false); } @@ -163,7 +164,7 @@ public void testFourHealthyReplicasAndTwoInFlightDelete() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 2, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 2, 3, 2); validate(rcnt, false, 1, false); } @@ -172,7 +173,7 @@ public void testOneHealthyReplicaRepFactorOne() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 1, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, true, 0, false); } @@ -181,7 +182,7 @@ public void testOneHealthyReplicaRepFactorOneInFlightDelete() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 1, 1, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 1, 1, 2); validate(rcnt, false, 1, false); } @@ -190,7 +191,7 @@ public void testTwoHealthyReplicaTwoInflightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 2, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 2, 0, 3, 2); validate(rcnt, false, 0, false); } @@ -204,7 +205,7 @@ public void testThreeHealthyAndTwoDecommission() { IN_SERVICE, DECOMMISSIONING, DECOMMISSIONING); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, 0, false); } @@ -214,7 +215,7 @@ public void testOneDecommissionedReplica() { registerNodes(IN_SERVICE, IN_SERVICE, DECOMMISSIONING); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -224,7 +225,7 @@ public void testTwoHealthyOneDecommissionedneInFlightAdd() { registerNodes(IN_SERVICE, IN_SERVICE, DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 1, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, false, 0, false); } @@ -234,7 +235,7 @@ public void testAllDecommissioned() { registerNodes(DECOMMISSIONED, DECOMMISSIONED, DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 3, false); } @@ -243,7 +244,7 @@ public void testAllDecommissionedRepFactorOne() { Set replica = registerNodes(DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 1, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, false, 1, false); } @@ -253,7 +254,7 @@ public void testAllDecommissionedRepFactorOneInFlightAdd() { Set replica = registerNodes(DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 1, 0, 1, 2); + new ContainerIdenticalReplicaCount(container, replica, 1, 0, 1, 2); validate(rcnt, false, 0, false); } @@ -262,7 +263,7 @@ public void testOneHealthyOneDecommissioningRepFactorOne() { Set replica = registerNodes(DECOMMISSIONED, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 1, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, true, 0, false); } @@ -276,7 +277,7 @@ public void testOneHealthyTwoMaintenanceMinRepOfTwo() { registerNodes(IN_SERVICE, IN_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -286,7 +287,7 @@ public void testOneHealthyThreeMaintenanceMinRepOfTwo() { IN_MAINTENANCE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -296,7 +297,7 @@ public void testOneHealthyTwoMaintenanceMinRepOfOne() { registerNodes(IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 1); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 1); validate(rcnt, true, 0, false); } @@ -306,7 +307,7 @@ public void testOneHealthyThreeMaintenanceMinRepOfTwoInFlightAdd() { IN_MAINTENANCE, ENTERING_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 1, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, false, 0, false); } @@ -316,7 +317,7 @@ public void testAllMaintenance() { registerNodes(IN_MAINTENANCE, ENTERING_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 2, false); } @@ -331,7 +332,7 @@ public void testThreeHealthyTwoInMaintenance() { IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, 0, false); } @@ -347,7 +348,7 @@ public void testFourHealthyOneInMaintenance() { IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, -1, true); } @@ -356,7 +357,7 @@ public void testOneMaintenanceMinRepOfTwoRepFactorOne() { Set replica = registerNodes(IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 1, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, false, 1, false); } @@ -365,7 +366,7 @@ public void testOneMaintenanceMinRepOfTwoRepFactorOneInFlightAdd() { Set replica = registerNodes(IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 1, 0, 1, 2); + new ContainerIdenticalReplicaCount(container, replica, 1, 0, 1, 2); validate(rcnt, false, 0, false); } @@ -374,7 +375,7 @@ public void testOneHealthyOneMaintenanceRepFactorOne() { Set replica = registerNodes(IN_MAINTENANCE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 1, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, true, 0, false); } @@ -385,7 +386,7 @@ public void testTwoDecomTwoMaintenanceOneInflightAdd() { IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 1, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -395,7 +396,7 @@ public void testHealthyContainerIsHealthy() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); assertTrue(rcnt.isHealthy()); } @@ -419,7 +420,7 @@ public void testIsHealthyWithDifferentReplicaStateNotHealthy() { } ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); assertFalse(rcnt.isHealthy()); } @@ -430,7 +431,7 @@ public void testIsHealthyWithMaintReplicaIsHealthy() { ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); assertTrue(rcnt.isHealthy()); } @@ -439,7 +440,7 @@ public void testContainerWithNoReplicasIsMissing() { Set replica = new HashSet<>(); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); ContainerReplicaCount rcnt = - new ContainerReplicaCount(container, replica, 0, 0, 3, 2); + new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); assertTrue(rcnt.isMissing()); assertFalse(rcnt.isSufficientlyReplicated()); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java index 2f7c1c603d01..97ed50f3546f 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerIdenticalReplicaCount; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; @@ -453,7 +454,7 @@ private ContainerReplicaCount generateReplicaCount(ContainerID containerID, .setState(containerState) .build(); - return new ContainerReplicaCount(container, replicas, 0, 0, 3, 2); + return new ContainerIdenticalReplicaCount(container, replicas, 0, 0, 3, 2); } /** From 0cc46abf89ca0dbe3a76f104283377abc44d31e2 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Fri, 1 Jul 2022 11:20:58 +0200 Subject: [PATCH 2/6] Address review comments --- .../scm/container/ContainerReplicaCount.java | 21 +-- .../container/ECContainerReplicaCount.java | 10 +- ...t.java => RatisContainerReplicaCount.java} | 14 +- .../replication/LegacyReplicationManager.java | 12 +- ...va => TestRatisContainerReplicaCount.java} | 149 +++++++++--------- .../scm/node/TestDatanodeAdminMonitor.java | 4 +- 6 files changed, 102 insertions(+), 108 deletions(-) rename hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/{ContainerIdenticalReplicaCount.java => RatisContainerReplicaCount.java} (95%) rename hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/{TestContainerReplicaCount.java => TestRatisContainerReplicaCount.java} (78%) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java index 882eabe44acf..3884a7e37f29 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java @@ -41,18 +41,6 @@ public interface ContainerReplicaCount { int getMaintenanceCount(); - /** - * Calculates the delta of replicas which need to be created or removed - * to ensure the container is correctly replicated when considered inflight - * adds and deletes. - * - * @return Delta of replicas needed. Negative indicates over replication and - * containers should be removed. Positive indicates over replication - * and zero indicates the containers has replicationFactor healthy - * replica - */ - int additionalReplicaNeeded(); - /** * Returns true if the container is healthy, meaning all replica which are not * in a decommission or maintenance state are in the same state as the @@ -72,12 +60,9 @@ && getReplicas().stream() } /** - * Returns true is there are no replicas of the container available, ie the - * set of container replicas has zero entries. + * Return true if there are insufficient replicas to recover this container. * - * @return true if there are no replicas, false otherwise. + * @return true if there are insufficient replicas, false otherwise. */ - default boolean isMissing() { - return getReplicas().isEmpty(); - } + boolean isMissing(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java index 120befb76163..375b0c8da193 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java @@ -148,6 +148,11 @@ public int getMaintenanceCount() { return maintenanceIndexes.size(); } + @Override + public boolean isMissing() { + return unRecoverable(); + } + /** * Get a set containing all decommissioning indexes, or an empty set if none * are decommissioning. Note it is possible for an index to be @@ -313,11 +318,6 @@ public boolean isOverReplicated() { return isOverReplicated(false); } - @Override - public int additionalReplicaNeeded() { - return 0; - } - /** * Return an unsorted list of any replica indexes which have more than one * replica and are therefore over-replicated. Maintenance replicas are ignored diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerIdenticalReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/RatisContainerReplicaCount.java similarity index 95% rename from hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerIdenticalReplicaCount.java rename to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/RatisContainerReplicaCount.java index ea433ff9aacb..9811fbe1a188 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerIdenticalReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/RatisContainerReplicaCount.java @@ -33,7 +33,7 @@ * information can be used to determine if the container is over or under * replicated and also how many additional replicas need created or removed. */ -public class ContainerIdenticalReplicaCount implements ContainerReplicaCount { +public class RatisContainerReplicaCount implements ContainerReplicaCount { private int healthyCount; private int decommissionCount; @@ -45,7 +45,7 @@ public class ContainerIdenticalReplicaCount implements ContainerReplicaCount { private final ContainerInfo container; private final Set replica; - public ContainerIdenticalReplicaCount(ContainerInfo container, + public RatisContainerReplicaCount(ContainerInfo container, Set replica, int inFlightAdd, int inFlightDelete, int replicationFactor, int minHealthyForMaintenance) { @@ -259,4 +259,14 @@ public boolean isOverReplicated() { return missingReplicas() + inFlightDel < 0; } + /** + * Returns true is there are no replicas of the container available, ie the + * set of container replicas has zero entries. + * + * @return true if there are no replicas, false otherwise. + */ + @Override + public boolean isMissing() { + return getReplicas().isEmpty(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java index c5c81752dc5b..374a23141352 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java @@ -34,7 +34,7 @@ import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.container.ContainerID; -import org.apache.hadoop.hdds.scm.container.ContainerIdenticalReplicaCount; +import org.apache.hadoop.hdds.scm.container.RatisContainerReplicaCount; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; @@ -493,7 +493,7 @@ protected void processContainer(ContainerInfo container, return; } - ContainerIdenticalReplicaCount replicaSet = + RatisContainerReplicaCount replicaSet = getContainerReplicaCount(container, replicas); ContainerPlacementStatus placementStatus = getPlacementStatus( replicas, container.getReplicationConfig().getRequiredNodes()); @@ -991,9 +991,9 @@ public ContainerReplicaCount getContainerReplicaCount(ContainerInfo container) * @return ContainerReplicaCount representing the current state of the * container */ - private ContainerIdenticalReplicaCount getContainerReplicaCount( + private RatisContainerReplicaCount getContainerReplicaCount( ContainerInfo container, Set replica) { - return new ContainerIdenticalReplicaCount( + return new RatisContainerReplicaCount( container, replica, getInflightAdd(container.containerID()), @@ -1123,7 +1123,7 @@ private void forceCloseContainer(final ContainerInfo container, * current replica count and inflight adds and deletes */ private void handleUnderReplicatedContainer(final ContainerInfo container, - final ContainerIdenticalReplicaCount replicaSet, + final RatisContainerReplicaCount replicaSet, final ContainerPlacementStatus placementStatus) { LOG.debug("Handling under-replicated container: {}", container); Set replicas = replicaSet.getReplicas(); @@ -1237,7 +1237,7 @@ private void handleUnderReplicatedContainer(final ContainerInfo container, * current replica count and inflight adds and deletes */ private void handleOverReplicatedContainer(final ContainerInfo container, - final ContainerIdenticalReplicaCount replicaSet) { + final RatisContainerReplicaCount replicaSet) { final Set replicas = replicaSet.getReplicas(); final ContainerID id = container.containerID(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java similarity index 78% rename from hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerReplicaCount.java rename to hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java index b0135956e375..26bd7083b89e 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java @@ -21,10 +21,9 @@ import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.container.ContainerID; -import org.apache.hadoop.hdds.scm.container.ContainerIdenticalReplicaCount; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; -import org.apache.hadoop.hdds.scm.container.ContainerReplicaCount; +import org.apache.hadoop.hdds.scm.container.RatisContainerReplicaCount; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -49,9 +48,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse; /** - * Class used to test the ContainerReplicaCount class. + * Class used to test the RatisContainerReplicaCount class. */ -public class TestContainerReplicaCount { +public class TestRatisContainerReplicaCount { @BeforeEach public void setup() { @@ -62,8 +61,8 @@ public void testThreeHealthyReplica() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, 0, false); } @@ -71,8 +70,8 @@ public void testThreeHealthyReplica() { public void testTwoHealthyReplica() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -80,8 +79,8 @@ public void testTwoHealthyReplica() { public void testOneHealthyReplica() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 2, false); } @@ -90,8 +89,8 @@ public void testTwoHealthyAndInflightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, false, 0, false); } @@ -105,8 +104,8 @@ public void testThreeHealthyAndInflightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, true, 0, false); } @@ -119,8 +118,8 @@ public void testThreeHealthyAndInflightDelete() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 1, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 1, 3, 2); validate(rcnt, false, 1, false); } @@ -133,8 +132,8 @@ public void testThreeHealthyAndInflightAddAndInFlightDelete() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 1, 1, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 1, 1, 3, 2); validate(rcnt, false, 0, false); } @@ -143,8 +142,8 @@ public void testFourHealthyReplicas() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, -1, true); } @@ -153,8 +152,8 @@ public void testFourHealthyReplicasAndInFlightDelete() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 1, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 1, 3, 2); validate(rcnt, true, 0, false); } @@ -163,8 +162,8 @@ public void testFourHealthyReplicasAndTwoInFlightDelete() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 2, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 2, 3, 2); validate(rcnt, false, 1, false); } @@ -172,8 +171,8 @@ public void testFourHealthyReplicasAndTwoInFlightDelete() { public void testOneHealthyReplicaRepFactorOne() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, true, 0, false); } @@ -181,8 +180,8 @@ public void testOneHealthyReplicaRepFactorOne() { public void testOneHealthyReplicaRepFactorOneInFlightDelete() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 1, 1, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 1, 1, 2); validate(rcnt, false, 1, false); } @@ -190,8 +189,8 @@ public void testOneHealthyReplicaRepFactorOneInFlightDelete() { public void testTwoHealthyReplicaTwoInflightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 2, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 2, 0, 3, 2); validate(rcnt, false, 0, false); } @@ -204,8 +203,8 @@ public void testThreeHealthyAndTwoDecommission() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, DECOMMISSIONING, DECOMMISSIONING); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, 0, false); } @@ -214,8 +213,8 @@ public void testOneDecommissionedReplica() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, DECOMMISSIONING); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -224,8 +223,8 @@ public void testTwoHealthyOneDecommissionedneInFlightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, false, 0, false); } @@ -234,8 +233,8 @@ public void testAllDecommissioned() { Set replica = registerNodes(DECOMMISSIONED, DECOMMISSIONED, DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 3, false); } @@ -243,8 +242,8 @@ public void testAllDecommissioned() { public void testAllDecommissionedRepFactorOne() { Set replica = registerNodes(DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, false, 1, false); } @@ -253,8 +252,8 @@ public void testAllDecommissionedRepFactorOne() { public void testAllDecommissionedRepFactorOneInFlightAdd() { Set replica = registerNodes(DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 1, 0, 1, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 1, 0, 1, 2); validate(rcnt, false, 0, false); } @@ -262,8 +261,8 @@ public void testAllDecommissionedRepFactorOneInFlightAdd() { public void testOneHealthyOneDecommissioningRepFactorOne() { Set replica = registerNodes(DECOMMISSIONED, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, true, 0, false); } @@ -276,8 +275,8 @@ public void testOneHealthyTwoMaintenanceMinRepOfTwo() { Set replica = registerNodes(IN_SERVICE, IN_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -286,8 +285,8 @@ public void testOneHealthyThreeMaintenanceMinRepOfTwo() { Set replica = registerNodes(IN_SERVICE, IN_MAINTENANCE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -296,8 +295,8 @@ public void testOneHealthyTwoMaintenanceMinRepOfOne() { Set replica = registerNodes(IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 1); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 1); validate(rcnt, true, 0, false); } @@ -306,8 +305,8 @@ public void testOneHealthyThreeMaintenanceMinRepOfTwoInFlightAdd() { Set replica = registerNodes(IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, false, 0, false); } @@ -316,8 +315,8 @@ public void testAllMaintenance() { Set replica = registerNodes(IN_MAINTENANCE, ENTERING_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, false, 2, false); } @@ -331,8 +330,8 @@ public void testThreeHealthyTwoInMaintenance() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, 0, false); } @@ -347,8 +346,8 @@ public void testFourHealthyOneInMaintenance() { registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); validate(rcnt, true, -1, true); } @@ -356,8 +355,8 @@ public void testFourHealthyOneInMaintenance() { public void testOneMaintenanceMinRepOfTwoRepFactorOne() { Set replica = registerNodes(IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, false, 1, false); } @@ -365,8 +364,8 @@ public void testOneMaintenanceMinRepOfTwoRepFactorOne() { public void testOneMaintenanceMinRepOfTwoRepFactorOneInFlightAdd() { Set replica = registerNodes(IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 1, 0, 1, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 1, 0, 1, 2); validate(rcnt, false, 0, false); } @@ -374,8 +373,8 @@ public void testOneMaintenanceMinRepOfTwoRepFactorOneInFlightAdd() { public void testOneHealthyOneMaintenanceRepFactorOne() { Set replica = registerNodes(IN_MAINTENANCE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 1, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 1, 2); validate(rcnt, true, 0, false); } @@ -385,8 +384,8 @@ public void testTwoDecomTwoMaintenanceOneInflightAdd() { registerNodes(DECOMMISSIONED, DECOMMISSIONING, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 1, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 1, 0, 3, 2); validate(rcnt, false, 1, false); } @@ -395,8 +394,8 @@ public void testHealthyContainerIsHealthy() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); assertTrue(rcnt.isHealthy()); } @@ -419,8 +418,8 @@ public void testIsHealthyWithDifferentReplicaStateNotHealthy() { break; } ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); assertFalse(rcnt.isHealthy()); } @@ -430,8 +429,8 @@ public void testIsHealthyWithMaintReplicaIsHealthy() { registerNodes(IN_SERVICE, IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); assertTrue(rcnt.isHealthy()); } @@ -439,13 +438,13 @@ public void testIsHealthyWithMaintReplicaIsHealthy() { public void testContainerWithNoReplicasIsMissing() { Set replica = new HashSet<>(); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); - ContainerReplicaCount rcnt = - new ContainerIdenticalReplicaCount(container, replica, 0, 0, 3, 2); + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); assertTrue(rcnt.isMissing()); assertFalse(rcnt.isSufficientlyReplicated()); } - private void validate(ContainerReplicaCount rcnt, + private void validate(RatisContainerReplicaCount rcnt, boolean sufficientlyReplicated, int replicaDelta, boolean overReplicated) { assertEquals(sufficientlyReplicated, rcnt.isSufficientlyReplicated()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java index 97ed50f3546f..15f62c3aa385 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.ContainerID; -import org.apache.hadoop.hdds.scm.container.ContainerIdenticalReplicaCount; +import org.apache.hadoop.hdds.scm.container.RatisContainerReplicaCount; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.ContainerReplica; @@ -454,7 +454,7 @@ private ContainerReplicaCount generateReplicaCount(ContainerID containerID, .setState(containerState) .build(); - return new ContainerIdenticalReplicaCount(container, replicas, 0, 0, 3, 2); + return new RatisContainerReplicaCount(container, replicas, 0, 0, 3, 2); } /** From 957fada54d3612999d4efbab140d28e198a5bdd0 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Fri, 1 Jul 2022 11:22:24 +0200 Subject: [PATCH 3/6] Address warnings --- .../TestRatisContainerReplicaCount.java | 89 +++++++++---------- 1 file changed, 42 insertions(+), 47 deletions(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java index 26bd7083b89e..51a6bdfe6ffe 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -24,7 +24,6 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.RatisContainerReplicaCount; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import java.util.HashSet; @@ -50,14 +49,10 @@ /** * Class used to test the RatisContainerReplicaCount class. */ -public class TestRatisContainerReplicaCount { - - @BeforeEach - public void setup() { - } +class TestRatisContainerReplicaCount { @Test - public void testThreeHealthyReplica() { + void testThreeHealthyReplica() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -67,7 +62,7 @@ public void testThreeHealthyReplica() { } @Test - public void testTwoHealthyReplica() { + void testTwoHealthyReplica() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -76,7 +71,7 @@ public void testTwoHealthyReplica() { } @Test - public void testOneHealthyReplica() { + void testOneHealthyReplica() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -85,7 +80,7 @@ public void testOneHealthyReplica() { } @Test - public void testTwoHealthyAndInflightAdd() { + void testTwoHealthyAndInflightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -94,13 +89,13 @@ public void testTwoHealthyAndInflightAdd() { validate(rcnt, false, 0, false); } - @Test /** * This does not schedule a container to be removed, as the inFlight add may * fail and then the delete would make things under-replicated. Once the add * completes there will be 4 healthy and it will get taken care of then. */ - public void testThreeHealthyAndInflightAdd() { + @Test + void testThreeHealthyAndInflightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -109,12 +104,12 @@ public void testThreeHealthyAndInflightAdd() { validate(rcnt, true, 0, false); } - @Test /** * As the inflight delete may fail, but as it will make the the container * under replicated, we go ahead and schedule another replica to be added. */ - public void testThreeHealthyAndInflightDelete() { + @Test + void testThreeHealthyAndInflightDelete() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -123,12 +118,12 @@ public void testThreeHealthyAndInflightDelete() { validate(rcnt, false, 1, false); } - @Test /** * This is NOT sufficiently replicated as the inflight add may fail and the * inflight del could succeed, leaving only 2 healthy replicas. */ - public void testThreeHealthyAndInflightAddAndInFlightDelete() { + @Test + void testThreeHealthyAndInflightAddAndInFlightDelete() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -138,7 +133,7 @@ public void testThreeHealthyAndInflightAddAndInFlightDelete() { } @Test - public void testFourHealthyReplicas() { + void testFourHealthyReplicas() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -148,7 +143,7 @@ public void testFourHealthyReplicas() { } @Test - public void testFourHealthyReplicasAndInFlightDelete() { + void testFourHealthyReplicasAndInFlightDelete() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -158,7 +153,7 @@ public void testFourHealthyReplicasAndInFlightDelete() { } @Test - public void testFourHealthyReplicasAndTwoInFlightDelete() { + void testFourHealthyReplicasAndTwoInFlightDelete() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -168,7 +163,7 @@ public void testFourHealthyReplicasAndTwoInFlightDelete() { } @Test - public void testOneHealthyReplicaRepFactorOne() { + void testOneHealthyReplicaRepFactorOne() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -177,7 +172,7 @@ public void testOneHealthyReplicaRepFactorOne() { } @Test - public void testOneHealthyReplicaRepFactorOneInFlightDelete() { + void testOneHealthyReplicaRepFactorOneInFlightDelete() { Set replica = registerNodes(IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -186,7 +181,7 @@ public void testOneHealthyReplicaRepFactorOneInFlightDelete() { } @Test - public void testTwoHealthyReplicaTwoInflightAdd() { + void testTwoHealthyReplicaTwoInflightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -199,7 +194,7 @@ public void testTwoHealthyReplicaTwoInflightAdd() { */ @Test - public void testThreeHealthyAndTwoDecommission() { + void testThreeHealthyAndTwoDecommission() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, DECOMMISSIONING, DECOMMISSIONING); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -209,7 +204,7 @@ public void testThreeHealthyAndTwoDecommission() { } @Test - public void testOneDecommissionedReplica() { + void testOneDecommissionedReplica() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, DECOMMISSIONING); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -219,7 +214,7 @@ public void testOneDecommissionedReplica() { } @Test - public void testTwoHealthyOneDecommissionedneInFlightAdd() { + void testTwoHealthyOneDecommissionedneInFlightAdd() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -229,7 +224,7 @@ public void testTwoHealthyOneDecommissionedneInFlightAdd() { } @Test - public void testAllDecommissioned() { + void testAllDecommissioned() { Set replica = registerNodes(DECOMMISSIONED, DECOMMISSIONED, DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -239,7 +234,7 @@ public void testAllDecommissioned() { } @Test - public void testAllDecommissionedRepFactorOne() { + void testAllDecommissionedRepFactorOne() { Set replica = registerNodes(DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -249,7 +244,7 @@ public void testAllDecommissionedRepFactorOne() { } @Test - public void testAllDecommissionedRepFactorOneInFlightAdd() { + void testAllDecommissionedRepFactorOneInFlightAdd() { Set replica = registerNodes(DECOMMISSIONED); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -258,7 +253,7 @@ public void testAllDecommissionedRepFactorOneInFlightAdd() { } @Test - public void testOneHealthyOneDecommissioningRepFactorOne() { + void testOneHealthyOneDecommissioningRepFactorOne() { Set replica = registerNodes(DECOMMISSIONED, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -271,7 +266,7 @@ public void testOneHealthyOneDecommissioningRepFactorOne() { */ @Test - public void testOneHealthyTwoMaintenanceMinRepOfTwo() { + void testOneHealthyTwoMaintenanceMinRepOfTwo() { Set replica = registerNodes(IN_SERVICE, IN_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -281,7 +276,7 @@ public void testOneHealthyTwoMaintenanceMinRepOfTwo() { } @Test - public void testOneHealthyThreeMaintenanceMinRepOfTwo() { + void testOneHealthyThreeMaintenanceMinRepOfTwo() { Set replica = registerNodes(IN_SERVICE, IN_MAINTENANCE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -291,7 +286,7 @@ public void testOneHealthyThreeMaintenanceMinRepOfTwo() { } @Test - public void testOneHealthyTwoMaintenanceMinRepOfOne() { + void testOneHealthyTwoMaintenanceMinRepOfOne() { Set replica = registerNodes(IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -301,7 +296,7 @@ public void testOneHealthyTwoMaintenanceMinRepOfOne() { } @Test - public void testOneHealthyThreeMaintenanceMinRepOfTwoInFlightAdd() { + void testOneHealthyThreeMaintenanceMinRepOfTwoInFlightAdd() { Set replica = registerNodes(IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -311,7 +306,7 @@ public void testOneHealthyThreeMaintenanceMinRepOfTwoInFlightAdd() { } @Test - public void testAllMaintenance() { + void testAllMaintenance() { Set replica = registerNodes(IN_MAINTENANCE, ENTERING_MAINTENANCE, IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -320,13 +315,13 @@ public void testAllMaintenance() { validate(rcnt, false, 2, false); } - @Test /** * As we have exactly 3 healthy, but then an excess of maintenance copies * we ignore the over-replication caused by the maintenance copies until they * come back online, and then deal with them. */ - public void testThreeHealthyTwoInMaintenance() { + @Test + void testThreeHealthyTwoInMaintenance() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -335,13 +330,13 @@ public void testThreeHealthyTwoInMaintenance() { validate(rcnt, true, 0, false); } - @Test /** * This is somewhat similar to testThreeHealthyTwoInMaintenance() except now * one of the maintenance copies has become healthy and we will need to remove * the over-replicated healthy container. */ - public void testFourHealthyOneInMaintenance() { + @Test + void testFourHealthyOneInMaintenance() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_SERVICE, IN_MAINTENANCE); @@ -352,7 +347,7 @@ public void testFourHealthyOneInMaintenance() { } @Test - public void testOneMaintenanceMinRepOfTwoRepFactorOne() { + void testOneMaintenanceMinRepOfTwoRepFactorOne() { Set replica = registerNodes(IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -361,7 +356,7 @@ public void testOneMaintenanceMinRepOfTwoRepFactorOne() { } @Test - public void testOneMaintenanceMinRepOfTwoRepFactorOneInFlightAdd() { + void testOneMaintenanceMinRepOfTwoRepFactorOneInFlightAdd() { Set replica = registerNodes(IN_MAINTENANCE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -370,7 +365,7 @@ public void testOneMaintenanceMinRepOfTwoRepFactorOneInFlightAdd() { } @Test - public void testOneHealthyOneMaintenanceRepFactorOne() { + void testOneHealthyOneMaintenanceRepFactorOne() { Set replica = registerNodes(IN_MAINTENANCE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = @@ -379,7 +374,7 @@ public void testOneHealthyOneMaintenanceRepFactorOne() { } @Test - public void testTwoDecomTwoMaintenanceOneInflightAdd() { + void testTwoDecomTwoMaintenanceOneInflightAdd() { Set replica = registerNodes(DECOMMISSIONED, DECOMMISSIONING, IN_MAINTENANCE, ENTERING_MAINTENANCE); @@ -390,7 +385,7 @@ public void testTwoDecomTwoMaintenanceOneInflightAdd() { } @Test - public void testHealthyContainerIsHealthy() { + void testHealthyContainerIsHealthy() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); @@ -400,7 +395,7 @@ public void testHealthyContainerIsHealthy() { } @Test - public void testIsHealthyWithDifferentReplicaStateNotHealthy() { + void testIsHealthyWithDifferentReplicaStateNotHealthy() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_SERVICE); for (ContainerReplica r : replica) { @@ -424,7 +419,7 @@ public void testIsHealthyWithDifferentReplicaStateNotHealthy() { } @Test - public void testIsHealthyWithMaintReplicaIsHealthy() { + void testIsHealthyWithMaintReplicaIsHealthy() { Set replica = registerNodes(IN_SERVICE, IN_SERVICE, IN_MAINTENANCE, ENTERING_MAINTENANCE); @@ -435,7 +430,7 @@ public void testIsHealthyWithMaintReplicaIsHealthy() { } @Test - public void testContainerWithNoReplicasIsMissing() { + void testContainerWithNoReplicasIsMissing() { Set replica = new HashSet<>(); ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = From eff1b3c2af5160ccf7c2f7afcf28b1b08d8bfe8b Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Fri, 1 Jul 2022 11:32:05 +0200 Subject: [PATCH 4/6] Add TODO for remainingRedundancyForMaintenance=0 --- .../hdds/scm/container/replication/ReplicationManager.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index e4d2470929a0..8e4ef51b842b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -504,6 +504,7 @@ private ECContainerReplicaCount getECContainerReplicaCount( containerInfo.containerID()); List pendingOps = containerReplicaPendingOps.getPendingOps(containerInfo.containerID()); + // TODO: define maintenance redundancy for EC return new ECContainerReplicaCount(containerInfo, replicas, pendingOps, 0); } From 2f52e465e1a24f9de9749ffb886d79ff0f528407 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Fri, 1 Jul 2022 14:11:55 +0200 Subject: [PATCH 5/6] Add Jira ID in TODO --- .../hdds/scm/container/replication/ReplicationManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 8e4ef51b842b..eb40996a557c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -504,7 +504,7 @@ private ECContainerReplicaCount getECContainerReplicaCount( containerInfo.containerID()); List pendingOps = containerReplicaPendingOps.getPendingOps(containerInfo.containerID()); - // TODO: define maintenance redundancy for EC + // TODO: define maintenance redundancy for EC (HDDS-6975) return new ECContainerReplicaCount(containerInfo, replicas, pendingOps, 0); } From a98c8fc47fada423f9e3666fbf0ad7dd42d511ae Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Fri, 1 Jul 2022 18:12:12 +0200 Subject: [PATCH 6/6] Unify isMissing and unRecoverable as isUnrecoverable --- .../hadoop/hdds/scm/container/ContainerReplicaCount.java | 2 +- .../hdds/scm/container/ECContainerReplicaCount.java | 8 ++------ .../hdds/scm/container/RatisContainerReplicaCount.java | 2 +- .../scm/container/replication/ECContainerHealthCheck.java | 2 +- .../container/replication/ECUnderReplicationHandler.java | 2 +- .../container/replication/LegacyReplicationManager.java | 2 +- .../scm/container/states/TestECContainerReplicaCount.java | 8 ++++---- .../container/states/TestRatisContainerReplicaCount.java | 2 +- 8 files changed, 12 insertions(+), 16 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java index 3884a7e37f29..e23c4e691e0f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java @@ -64,5 +64,5 @@ && getReplicas().stream() * * @return true if there are insufficient replicas, false otherwise. */ - boolean isMissing(); + boolean isUnrecoverable(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java index 375b0c8da193..822fb3a54889 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ECContainerReplicaCount.java @@ -148,11 +148,6 @@ public int getMaintenanceCount() { return maintenanceIndexes.size(); } - @Override - public boolean isMissing() { - return unRecoverable(); - } - /** * Get a set containing all decommissioning indexes, or an empty set if none * are decommissioning. Note it is possible for an index to be @@ -205,7 +200,8 @@ public Set maintenanceIndexes() { * Ie, less than EC Datanum containers are present. * @return True if the container cannot be recovered, false otherwise. */ - public boolean unRecoverable() { + @Override + public boolean isUnrecoverable() { Set distinct = new HashSet<>(); distinct.addAll(healthyIndexes.keySet()); distinct.addAll(decommissionIndexes.keySet()); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/RatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/RatisContainerReplicaCount.java index 9811fbe1a188..f7c5b28f0058 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/RatisContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/RatisContainerReplicaCount.java @@ -266,7 +266,7 @@ public boolean isOverReplicated() { * @return true if there are no replicas, false otherwise. */ @Override - public boolean isMissing() { + public boolean isUnrecoverable() { return getReplicas().isEmpty(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java index 8e86564823cc..b79fc03d0721 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECContainerHealthCheck.java @@ -71,7 +71,7 @@ public ContainerHealthResult checkHealth(ContainerInfo container, return new ContainerHealthResult.UnderReplicatedHealthResult( container, remainingRedundancy, dueToDecommission, replicaCount.isSufficientlyReplicated(true), - replicaCount.unRecoverable()); + replicaCount.isUnrecoverable()); } if (replicaCount.isOverReplicated(false)) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java index 3bf016124d47..64cdadf266cd 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ECUnderReplicationHandler.java @@ -118,7 +118,7 @@ public Map> processAndCreateCommands( container.getContainerID(), replicaCount.getReplicas()); return emptyMap(); } - if (replicaCount.unRecoverable()) { + if (replicaCount.isUnrecoverable()) { LOG.warn("The container {} is unrecoverable. The available replicas" + " are: {}.", container.containerID(), replicaCount.getReplicas()); return emptyMap(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java index 374a23141352..33e697867c06 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyReplicationManager.java @@ -525,7 +525,7 @@ protected void processContainer(ContainerInfo container, if (!sufficientlyReplicated) { report.incrementAndSample( HealthState.UNDER_REPLICATED, container.containerID()); - if (replicaSet.isMissing()) { + if (replicaSet.isUnrecoverable()) { report.incrementAndSample(HealthState.MISSING, container.containerID()); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java index 94c1dd5e4b60..37e105fbee24 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestECContainerReplicaCount.java @@ -70,7 +70,7 @@ public void testPerfectlyReplicatedContainer() { new ECContainerReplicaCount(container, replica, Collections.emptyList(), 1); Assertions.assertTrue(rcnt.isSufficientlyReplicated(false)); - Assertions.assertFalse(rcnt.unRecoverable()); + Assertions.assertFalse(rcnt.isUnrecoverable()); } @Test @@ -467,14 +467,14 @@ public void testMissing() { ECContainerReplicaCount rcnt = new ECContainerReplicaCount(container, new HashSet<>(), Collections.emptyList(), 1); - Assertions.assertTrue(rcnt.unRecoverable()); + Assertions.assertTrue(rcnt.isUnrecoverable()); Assertions.assertEquals(5, rcnt.unavailableIndexes(true).size()); Set replica = ReplicationTestUtil .createReplicas(Pair.of(IN_SERVICE, 1), Pair.of(IN_MAINTENANCE, 2)); rcnt = new ECContainerReplicaCount(container, replica, Collections.emptyList(), 1); - Assertions.assertTrue(rcnt.unRecoverable()); + Assertions.assertTrue(rcnt.isUnrecoverable()); Assertions.assertEquals(3, rcnt.unavailableIndexes(true).size()); Assertions.assertEquals(0, rcnt.additionalMaintenanceCopiesNeeded()); @@ -485,7 +485,7 @@ public void testMissing() { rcnt = new ECContainerReplicaCount(container, replica, Collections.emptyList(), 1); // Not missing as the decommission replicas are still online - Assertions.assertFalse(rcnt.unRecoverable()); + Assertions.assertFalse(rcnt.isUnrecoverable()); Assertions.assertEquals(0, rcnt.unavailableIndexes(true).size()); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java index 51a6bdfe6ffe..5e5767c8ec02 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/states/TestRatisContainerReplicaCount.java @@ -435,7 +435,7 @@ void testContainerWithNoReplicasIsMissing() { ContainerInfo container = createContainer(HddsProtos.LifeCycleState.CLOSED); RatisContainerReplicaCount rcnt = new RatisContainerReplicaCount(container, replica, 0, 0, 3, 2); - assertTrue(rcnt.isMissing()); + assertTrue(rcnt.isUnrecoverable()); assertFalse(rcnt.isSufficientlyReplicated()); }