Skip to content

Commit 52b954d

Browse files
sodonnelS O'Donnell
authored andcommitted
HDDS-12232. Move container from QUASI_CLODED to CLOSED only when SCM sees all 3 origin node replicas (apache#7834)
Co-authored-by: S O'Donnell <sodonnell@cloudera.com>
1 parent 6a8e80c commit 52b954d

3 files changed

Lines changed: 128 additions & 27 deletions

File tree

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/QuasiClosedContainerHandler.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,12 @@ private boolean canForceCloseContainer(final ContainerInfo container,
104104
.map(ContainerReplica::getOriginDatanodeId)
105105
.distinct()
106106
.count();
107-
return uniqueQuasiClosedReplicaCount > (replicationFactor / 2);
107+
// We can only force close the container if we have seen all the replicas from unique origins.
108+
// Due to unexpected behavior when writing to ratis containers, it is possible for blocks to be committed
109+
// on the ratis leader, but not on the followers. A failure on the leader can result in two replicas
110+
// without the latest transactions, which are then force closed. This can result in data loss.
111+
// Note that if the 3rd replica is permanently lost, the container will be stuck in QUASI_CLOSED state forever.
112+
return uniqueQuasiClosedReplicaCount >= replicationFactor;
108113
}
109114

110115
/**

hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestQuasiClosedContainerHandler.java

Lines changed: 60 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
3131
import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
3232
import org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil;
33-
import org.junit.jupiter.api.Assertions;
3433
import org.junit.jupiter.api.BeforeEach;
3534
import org.junit.jupiter.api.Test;
3635
import org.mockito.Mockito;
@@ -44,10 +43,13 @@
4443
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED;
4544
import static org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer;
4645
import static org.apache.hadoop.hdds.scm.HddsTestUtils.getReplicas;
46+
import static org.junit.jupiter.api.Assertions.assertEquals;
47+
import static org.junit.jupiter.api.Assertions.assertFalse;
4748
import static org.mockito.ArgumentMatchers.any;
4849
import static org.mockito.ArgumentMatchers.anyBoolean;
4950
import static org.mockito.ArgumentMatchers.eq;
5051
import static org.mockito.Mockito.times;
52+
import static org.mockito.Mockito.verify;
5153

5254
/**
5355
* Tests for {@link QuasiClosedContainerHandler}. This handler is only meant
@@ -81,8 +83,8 @@ public void testECContainerReturnsFalse() {
8183
.setContainerReplicas(containerReplicas)
8284
.build();
8385

84-
Assertions.assertFalse(quasiClosedContainerHandler.handle(request));
85-
Mockito.verify(replicationManager, times(0))
86+
assertFalse(quasiClosedContainerHandler.handle(request));
87+
verify(replicationManager, times(0))
8688
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
8789
}
8890

@@ -100,19 +102,18 @@ public void testOpenContainerReturnsFalse() {
100102
.setContainerReplicas(containerReplicas)
101103
.build();
102104

103-
Assertions.assertFalse(quasiClosedContainerHandler.handle(request));
104-
Mockito.verify(replicationManager, times(0))
105+
assertFalse(quasiClosedContainerHandler.handle(request));
106+
verify(replicationManager, times(0))
105107
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
106108
}
107109

108110
/**
109-
* When a container is QUASI_CLOSED, and it has greater than 50% of its
111+
* When a container is QUASI_CLOSED, and it has only 2
110112
* replicas in QUASI_CLOSED state with unique origin node id,
111-
* the handler should send force close commands to the replica(s) with
112-
* highest BCSID.
113+
* the handler should not force close it as all 3 unique replicas are needed.
113114
*/
114115
@Test
115-
public void testQuasiClosedWithQuorumReturnsTrue() {
116+
public void testQuasiClosedWithQuorumReturnsFalse() {
116117
ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo(
117118
ratisReplicationConfig, 1, QUASI_CLOSED);
118119
Set<ContainerReplica> containerReplicas = ReplicationTestUtil
@@ -136,16 +137,50 @@ public void testQuasiClosedWithQuorumReturnsTrue() {
136137
.setReadOnly(true)
137138
.build();
138139

139-
Assertions.assertFalse(quasiClosedContainerHandler.handle(request));
140-
Assertions.assertFalse(quasiClosedContainerHandler.handle(readRequest));
141-
Mockito.verify(replicationManager, times(2))
140+
assertFalse(quasiClosedContainerHandler.handle(request));
141+
assertFalse(quasiClosedContainerHandler.handle(readRequest));
142+
verify(replicationManager, times(0))
143+
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
144+
assertEquals(1, request.getReport().getStat(
145+
ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
146+
}
147+
148+
/**
149+
* When a container is QUASI_CLOSED, and all 3 replicas are reported with unique
150+
* origins, it should be forced closed.
151+
*/
152+
@Test
153+
public void testQuasiClosedWithAllUniqueOriginSendsForceClose() {
154+
ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo(
155+
ratisReplicationConfig, 1, QUASI_CLOSED);
156+
// These 3 replicas will have the same BCSID and unique origin node ids
157+
Set<ContainerReplica> containerReplicas = ReplicationTestUtil
158+
.createReplicas(containerInfo.containerID(),
159+
State.QUASI_CLOSED, 0, 0, 0);
160+
ContainerCheckRequest request = new ContainerCheckRequest.Builder()
161+
.setPendingOps(Collections.emptyList())
162+
.setReport(new ReplicationManagerReport())
163+
.setContainerInfo(containerInfo)
164+
.setContainerReplicas(containerReplicas)
165+
.build();
166+
ContainerCheckRequest readRequest = new ContainerCheckRequest.Builder()
167+
.setPendingOps(Collections.emptyList())
168+
.setReport(new ReplicationManagerReport())
169+
.setContainerInfo(containerInfo)
170+
.setContainerReplicas(containerReplicas)
171+
.setReadOnly(true)
172+
.build();
173+
174+
assertFalse(quasiClosedContainerHandler.handle(request));
175+
assertFalse(quasiClosedContainerHandler.handle(readRequest));
176+
verify(replicationManager, times(3))
142177
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
143178
}
144179

145180
/**
146181
* The replicas are QUASI_CLOSED, but all of them have the same origin node
147-
* id. Since a quorum (greater than 50% of replicas with unique origin node
148-
* ids in QUASI_CLOSED state) is not formed, the handler should return false.
182+
* id. Since all replicas must have unique origin node ids, the handler
183+
* should not force close it.
149184
*/
150185
@Test
151186
public void testHealthyQuasiClosedContainerReturnsFalse() {
@@ -161,17 +196,16 @@ public void testHealthyQuasiClosedContainerReturnsFalse() {
161196
.setContainerReplicas(containerReplicas)
162197
.build();
163198

164-
Assertions.assertFalse(quasiClosedContainerHandler.handle(request));
165-
Mockito.verify(replicationManager, times(0))
199+
assertFalse(quasiClosedContainerHandler.handle(request));
200+
verify(replicationManager, times(0))
166201
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
167-
Assertions.assertEquals(1, request.getReport().getStat(
202+
assertEquals(1, request.getReport().getStat(
168203
ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
169204
}
170205

171206
/**
172207
* Only one replica is in QUASI_CLOSED state. This fails the condition of
173-
* having greater than 50% of replicas with unique origin nodes in
174-
* QUASI_CLOSED state. The handler should return false.
208+
* having all replicas with unique origin nodes in QUASI_CLOSED state.
175209
*/
176210
@Test
177211
public void testQuasiClosedWithTwoOpenReplicasReturnsFalse() {
@@ -191,10 +225,10 @@ public void testQuasiClosedWithTwoOpenReplicasReturnsFalse() {
191225
.setContainerReplicas(containerReplicas)
192226
.build();
193227

194-
Assertions.assertFalse(quasiClosedContainerHandler.handle(request));
195-
Mockito.verify(replicationManager, times(0))
228+
assertFalse(quasiClosedContainerHandler.handle(request));
229+
verify(replicationManager, times(0))
196230
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
197-
Assertions.assertEquals(1, request.getReport().getStat(
231+
assertEquals(1, request.getReport().getStat(
198232
ReplicationManagerReport.HealthState.QUASI_CLOSED_STUCK));
199233
}
200234

@@ -240,14 +274,14 @@ public void testReplicasWithHighestBCSIDAreClosed() {
240274
.setReadOnly(true)
241275
.build();
242276

243-
Assertions.assertFalse(quasiClosedContainerHandler.handle(request));
244-
Assertions.assertFalse(quasiClosedContainerHandler.handle(readRequest));
277+
assertFalse(quasiClosedContainerHandler.handle(request));
278+
assertFalse(quasiClosedContainerHandler.handle(readRequest));
245279
// verify close command was sent for replicas with sequence ID 1001, that
246280
// is dnTwo and dnThree
247-
Mockito.verify(replicationManager, times(1))
281+
verify(replicationManager, times(1))
248282
.sendCloseContainerReplicaCommand(eq(containerInfo), eq(dnTwo),
249283
anyBoolean());
250-
Mockito.verify(replicationManager, times(1))
284+
verify(replicationManager, times(1))
251285
.sendCloseContainerReplicaCommand(eq(containerInfo), eq(dnThree),
252286
anyBoolean());
253287
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
[
2+
{ "description": "Quasi-closed replicas with one open", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 12,
3+
"replicas": [
4+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 12, "isEmpty": false, "origin": "o1"},
5+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 12, "isEmpty": false, "origin": "o2"},
6+
{ "state": "OPEN", "index": 0, "datanode": "d3", "sequenceId": 12, "isEmpty": false, "origin": "o3"}
7+
],
8+
"expectation": { "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1},
9+
"checkCommands": [
10+
{ "type": "closeContainerCommand", "datanode": "d3" }
11+
],
12+
"commands": []
13+
},
14+
{ "description": "Quasi-closed with 2 replicas", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 12,
15+
"replicas": [
16+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 12, "isEmpty": false, "origin": "o1"},
17+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 12, "isEmpty": false, "origin": "o2"}
18+
],
19+
"expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1},
20+
"checkCommands": [],
21+
"commands": [
22+
{ "type": "replicateContainerCommand" }
23+
]
24+
},
25+
{ "description": "Quasi-closed with 3 replicas 2 origins", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 12,
26+
"replicas": [
27+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 12, "isEmpty": false, "origin": "o1"},
28+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 12, "isEmpty": false, "origin": "o2"},
29+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 12, "isEmpty": false, "origin": "o2"}
30+
],
31+
"expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1},
32+
"checkCommands": [],
33+
"commands": []
34+
},
35+
{ "description": "Quasi-closed with 3 replicas 3 origins", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 12,
36+
"replicas": [
37+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 12, "isEmpty": false, "origin": "o1"},
38+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 12, "isEmpty": false, "origin": "o2"},
39+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 12, "isEmpty": false, "origin": "o3"}
40+
],
41+
"expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 0 },
42+
"checkCommands": [
43+
{ "type": "closeContainerCommand", "datanode": "d1" },
44+
{ "type": "closeContainerCommand", "datanode": "d2" },
45+
{ "type": "closeContainerCommand", "datanode": "d3" }
46+
],
47+
"commands": []
48+
},
49+
{ "description": "Quasi-closed with 3 replicas 3 origins different BCSID", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 12,
50+
"replicas": [
51+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 12, "isEmpty": false, "origin": "o1"},
52+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 12, "isEmpty": false, "origin": "o2"},
53+
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 11, "isEmpty": false, "origin": "o3"}
54+
],
55+
"expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 0 },
56+
"checkCommands": [
57+
{ "type": "closeContainerCommand", "datanode": "d1" },
58+
{ "type": "closeContainerCommand", "datanode": "d2" }
59+
],
60+
"commands": []
61+
}
62+
]

0 commit comments

Comments
 (0)