Skip to content

Commit b63efef

Browse files
authored
HDDS-3498. Shutdown datanode if address is already in use (apache#7256)
(cherry picked from commit 30da31f)
1 parent 0d16e1f commit b63efef

3 files changed

Lines changed: 23 additions & 7 deletions

File tree

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/EndpointStateMachine.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -234,12 +234,17 @@ public void logIfNeeded(Exception ex) {
234234
}
235235

236236
if (missCounter == 0) {
237+
long missedDurationSeconds = TimeUnit.MILLISECONDS.toSeconds(
238+
this.getMissedCount() * getScmHeartbeatInterval(this.conf)
239+
);
237240
LOG.warn(
238-
"Unable to communicate to {} server at {} for past {} seconds.",
239-
serverName,
240-
getAddress().getHostString() + ":" + getAddress().getPort(),
241-
TimeUnit.MILLISECONDS.toSeconds(this.getMissedCount() *
242-
getScmHeartbeatInterval(this.conf)), ex);
241+
"Unable to communicate to {} server at {}:{} for past {} seconds.",
242+
serverName,
243+
address.getAddress(),
244+
address.getPort(),
245+
missedDurationSeconds,
246+
ex
247+
);
243248
}
244249

245250
if (LOG.isTraceEnabled()) {

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package org.apache.hadoop.ozone.container.common.states.endpoint;
1818

1919
import java.io.IOException;
20+
import java.net.BindException;
2021
import java.util.concurrent.Callable;
2122

2223
import org.apache.hadoop.hdds.conf.ConfigurationSource;
@@ -104,7 +105,7 @@ public EndpointStateMachine.EndPointStates call() throws Exception {
104105
LOG.debug("Cannot execute GetVersion task as endpoint state machine " +
105106
"is in {} state", rpcEndPoint.getState());
106107
}
107-
} catch (DiskOutOfSpaceException ex) {
108+
} catch (DiskOutOfSpaceException | BindException ex) {
108109
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN);
109110
} catch (IOException ex) {
110111
rpcEndPoint.logIfNeeded(ex);

hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.hadoop.ozone.container.common.transport.server;
2020

2121
import java.io.IOException;
22+
import java.net.BindException;
2223
import java.util.Collections;
2324
import java.util.List;
2425
import java.util.UUID;
@@ -185,7 +186,16 @@ public HddsProtos.ReplicationType getServerType() {
185186
@Override
186187
public void start() throws IOException {
187188
if (!isStarted) {
188-
server.start();
189+
try {
190+
server.start();
191+
} catch (IOException e) {
192+
LOG.error("Error while starting the server", e);
193+
if (e.getMessage().contains("Failed to bind to address")) {
194+
throw new BindException(e.getMessage());
195+
} else {
196+
throw e;
197+
}
198+
}
189199
int realPort = server.getPort();
190200

191201
if (port == 0) {

0 commit comments

Comments
 (0)