Skip to content

Commit b97b7dc

Browse files
authored
HDDS-12326. Allow Quasi_Closed to Closed if there is an unhealthy replica <= highest BCSID (#7869)
1 parent a31d0fb commit b97b7dc

File tree

3 files changed

+156
-3
lines changed

3 files changed

+156
-3
lines changed

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,17 +99,33 @@ private boolean canForceCloseContainer(final ContainerInfo container,
9999
final Set<ContainerReplica> replicas) {
100100
final int replicationFactor =
101101
container.getReplicationConfig().getRequiredNodes();
102-
final long uniqueQuasiClosedReplicaCount = replicas.stream()
103-
.filter(r -> r.getState() == State.QUASI_CLOSED)
102+
103+
final long uniqueQuasiClosedOrUnhealthyReplicaCount = replicas.stream()
104+
.filter(r -> r.getState() == State.QUASI_CLOSED || r.getState() == State.UNHEALTHY)
104105
.map(ContainerReplica::getOriginDatanodeId)
105106
.distinct()
106107
.count();
108+
109+
long maxQCSeq = -1;
110+
long maxUnhealthySeq = -1;
111+
for (ContainerReplica r : replicas) {
112+
if (r.getState() == State.QUASI_CLOSED) {
113+
maxQCSeq = Math.max(maxQCSeq, r.getSequenceId());
114+
} else if (r.getState() == State.UNHEALTHY) {
115+
maxUnhealthySeq = Math.max(maxUnhealthySeq, r.getSequenceId());
116+
}
117+
}
118+
107119
// We can only force close the container if we have seen all the replicas from unique origins.
108120
// Due to unexpected behavior when writing to ratis containers, it is possible for blocks to be committed
109121
// on the ratis leader, but not on the followers. A failure on the leader can result in two replicas
110122
// without the latest transactions, which are then force closed. This can result in data loss.
111123
// Note that if the 3rd replica is permanently lost, the container will be stuck in QUASI_CLOSED state forever.
112-
return uniqueQuasiClosedReplicaCount >= replicationFactor;
124+
// It is possible to CLOSE a container that has one QC and the remaining UNHEALTHY, provided the QC is one of the
125+
// replicas with the highest sequence ID. If an UNHEALTHY replica has a higher sequence ID, the container will
126+
// remain in QUASI_CLOSED state.
127+
return maxQCSeq > -1 && maxQCSeq >= maxUnhealthySeq
128+
&& uniqueQuasiClosedOrUnhealthyReplicaCount >= replicationFactor;
113129
}
114130

115131
/**

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestQuasiClosedContainerHandler.java

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,90 @@ public void testQuasiClosedWithAllUniqueOriginSendsForceClose() {
177177
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
178178
}
179179

180+
/**
181+
* When a container is QUASI_CLOSED with some unhealthy and all 3 are reported with unique
182+
* origins, it should be forced closed.
183+
*/
184+
@Test
185+
public void testQuasiClosedWithAllUniqueOriginAndUnhealthySendsForceClose() {
186+
ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo(
187+
ratisReplicationConfig, 1, QUASI_CLOSED);
188+
// These 3 replicas will have the same BCSID and unique origin node ids
189+
Set<ContainerReplica> containerReplicas = ReplicationTestUtil
190+
.createReplicas(containerInfo.containerID(),
191+
State.QUASI_CLOSED, 0, 0);
192+
containerReplicas.addAll(ReplicationTestUtil
193+
.createReplicas(containerInfo.containerID(),
194+
State.UNHEALTHY, 0));
195+
ContainerCheckRequest request = new ContainerCheckRequest.Builder()
196+
.setPendingOps(Collections.emptyList())
197+
.setReport(new ReplicationManagerReport())
198+
.setContainerInfo(containerInfo)
199+
.setContainerReplicas(containerReplicas)
200+
.build();
201+
ContainerCheckRequest readRequest = new ContainerCheckRequest.Builder()
202+
.setPendingOps(Collections.emptyList())
203+
.setReport(new ReplicationManagerReport())
204+
.setContainerInfo(containerInfo)
205+
.setContainerReplicas(containerReplicas)
206+
.setReadOnly(true)
207+
.build();
208+
209+
assertFalse(quasiClosedContainerHandler.handle(request));
210+
assertFalse(quasiClosedContainerHandler.handle(readRequest));
211+
verify(replicationManager, times(2))
212+
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
213+
}
214+
215+
/**
216+
* If it's possible to force close replicas then only replicas with the
217+
* highest Sequence ID (also known as BCSID) should be closed.
218+
*/
219+
@Test
220+
public void testQuasiClosedWithUnhealthyHavingHighestSeq() {
221+
final ContainerInfo containerInfo =
222+
getContainer(HddsProtos.LifeCycleState.QUASI_CLOSED);
223+
containerInfo.setUsedBytes(99);
224+
final ContainerID id = containerInfo.containerID();
225+
226+
// create replicas with unique origin DNs
227+
DatanodeDetails dnOne = randomDatanodeDetails();
228+
DatanodeDetails dnTwo = randomDatanodeDetails();
229+
DatanodeDetails dnThree = randomDatanodeDetails();
230+
231+
// 1001 is the highest sequence id
232+
final ContainerReplica replicaOne = getReplicas(
233+
id, State.QUASI_CLOSED, 1000L, dnOne.getUuid(), dnOne);
234+
final ContainerReplica replicaTwo = getReplicas(
235+
id, State.QUASI_CLOSED, 1000L, dnTwo.getUuid(), dnTwo);
236+
final ContainerReplica replicaThree = getReplicas(
237+
id, State.UNHEALTHY, 1001L, dnThree.getUuid(), dnThree);
238+
Set<ContainerReplica> containerReplicas = new HashSet<>();
239+
containerReplicas.add(replicaOne);
240+
containerReplicas.add(replicaTwo);
241+
containerReplicas.add(replicaThree);
242+
243+
ContainerCheckRequest request = new ContainerCheckRequest.Builder()
244+
.setPendingOps(Collections.emptyList())
245+
.setReport(new ReplicationManagerReport())
246+
.setContainerInfo(containerInfo)
247+
.setContainerReplicas(containerReplicas)
248+
.build();
249+
ContainerCheckRequest readRequest = new ContainerCheckRequest.Builder()
250+
.setPendingOps(Collections.emptyList())
251+
.setReport(new ReplicationManagerReport())
252+
.setContainerInfo(containerInfo)
253+
.setContainerReplicas(containerReplicas)
254+
.setReadOnly(true)
255+
.build();
256+
257+
assertFalse(quasiClosedContainerHandler.handle(request));
258+
assertFalse(quasiClosedContainerHandler.handle(readRequest));
259+
// verify no close commands are sent as the container cannot be closed.
260+
verify(replicationManager, times(0))
261+
.sendCloseContainerReplicaCommand(eq(containerInfo), any(), anyBoolean());
262+
}
263+
180264
/**
181265
* The replicas are QUASI_CLOSED, but all of them have the same origin node
182266
* id. Since all replicas must have unique origin node ids, the handler

hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,5 +58,58 @@
5858
{ "type": "closeContainerCommand", "datanode": "d2" }
5959
],
6060
"commands": []
61+
},
62+
{ "description": "Quasi-Closed with 2 replicas and unhealthy", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10,
63+
"replicas": [
64+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"},
65+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
66+
{ "state": "UNHEALTHY", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o3"}
67+
],
68+
"expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 0, "unhealthy": 0 },
69+
"checkCommands": [
70+
{ "type": "closeContainerCommand", "datanode": "d1" },
71+
{ "type": "closeContainerCommand", "datanode": "d2" }
72+
],
73+
"commands": [
74+
{ "type": "replicateContainerCommand" }
75+
]
76+
},
77+
{ "description": "Quasi-Closed with 1 replica and two unhealthy", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10,
78+
"replicas": [
79+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"},
80+
{ "state": "UNHEALTHY", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
81+
{ "state": "UNHEALTHY", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o3"}
82+
],
83+
"expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 0, "unhealthy": 0 },
84+
"checkCommands": [
85+
{ "type": "closeContainerCommand", "datanode": "d1" }
86+
],
87+
"commands": [
88+
{ "type": "replicateContainerCommand", "datanode": "d1" },
89+
{ "type": "replicateContainerCommand", "datanode": "d1" }
90+
]
91+
},
92+
{ "description": "Quasi-Closed with 2 replicas and unhealthy where unhealthy is highest BCSID", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11,
93+
"replicas": [
94+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"},
95+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
96+
{ "state": "UNHEALTHY", "index": 0, "datanode": "d3", "sequenceId": 11, "isEmpty": false, "origin": "o3"}
97+
],
98+
"expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 },
99+
"checkCommands": [],
100+
"commands": [
101+
{ "type": "replicateContainerCommand" }
102+
]
103+
},
104+
{ "description": "Quasi-Closed with 3 QC and one unhealthy", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11,
105+
"replicas": [
106+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"},
107+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
108+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
109+
{ "state": "UNHEALTHY", "index": 0, "datanode": "d4", "sequenceId": 11, "isEmpty": false, "origin": "o3"}
110+
],
111+
"expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 },
112+
"checkCommands": [],
113+
"commands": []
61114
}
62115
]

0 commit comments

Comments
 (0)