Skip to content

Commit 41ea9fb

Browse files
authored
HDDS-10029. Improved logs for SCMDeletedBlockTransactionStatusManager (apache#5888)
1 parent 8ab0d07 commit 41ea9fb

1 file changed

Lines changed: 21 additions & 21 deletions

File tree

hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ protected void onSent(UUID dnId, long scmCmdId) {
201201
}
202202

203203
protected void onDatanodeDead(UUID dnId) {
204-
LOG.info("Clean SCMCommand record for DN: {}", dnId);
204+
LOG.info("Clean SCMCommand record for Datanode: {}", dnId);
205205
scmCmdStatusRecord.remove(dnId);
206206
}
207207

@@ -252,12 +252,14 @@ private void updateStatus(UUID dnId, long scmCmdId,
252252
CommandStatus.Status newStatus) {
253253
Map<Long, CmdStatusData> recordForDn = scmCmdStatusRecord.get(dnId);
254254
if (recordForDn == null) {
255-
LOG.warn("Unknown Datanode: {} scmCmdId {} newStatus {}",
255+
LOG.warn("Unknown Datanode: {} Scm Command ID: {} report status {}",
256256
dnId, scmCmdId, newStatus);
257257
return;
258258
}
259259
if (recordForDn.get(scmCmdId) == null) {
260-
LOG.warn("Unknown SCM Command: {} Datanode {} newStatus {}",
260+
// Because of the delay in the DN report, the DN sometimes report obsolete
261+
// Command status that are cleared by the SCM.
262+
LOG.debug("Unknown SCM Command ID: {} Datanode: {} report status {}",
261263
scmCmdId, dnId, newStatus);
262264
return;
263265
}
@@ -293,22 +295,23 @@ private void updateStatus(UUID dnId, long scmCmdId,
293295
// which should not normally occur.
294296
LOG.error("Received {} status for a command marked TO_BE_SENT. " +
295297
"This indicates a potential issue in command handling. " +
296-
"SCM Command ID: {}, Datanode ID: {}, Current Status: {}",
298+
"SCM Command ID: {}, Datanode: {}, Current status: {}",
297299
newStatus, scmCmdId, dnId, oldStatus);
298300
removeScmCommand(dnId, scmCmdId);
299301
changed = true;
300302
}
301303
break;
302304
default:
303-
LOG.error("Can not update to Unknown new Status: {}", newStatus);
305+
LOG.error("Unexpected status from Datanode: {}. SCM Command ID: {} with status: {}.",
306+
dnId, scmCmdId, newStatus);
304307
break;
305308
}
306309
if (!changed) {
307-
LOG.warn("Cannot update illegal status for DN: {} ScmCommandId {} " +
308-
"Status From {} to {}", dnId, scmCmdId, oldStatus, newStatus);
310+
LOG.warn("Cannot update illegal status for Datanode: {} SCM Command ID: {} " +
311+
"status {} by DN report status {}", dnId, scmCmdId, oldStatus, newStatus);
309312
} else {
310-
LOG.debug("Successful update DN: {} ScmCommandId {} Status From {} to" +
311-
" {}", dnId, scmCmdId, oldStatus, newStatus);
313+
LOG.debug("Successful update Datanode: {} SCM Command ID: {} status From {} to" +
314+
" {}, DN report status {}", dnId, scmCmdId, oldStatus, statusData.getStatus(), newStatus);
312315
}
313316
}
314317

@@ -320,11 +323,8 @@ private void removeTimeoutScmCommand(UUID dnId,
320323
if (updateTime != null &&
321324
Duration.between(updateTime, now).toMillis() > timeoutMs) {
322325
CmdStatusData state = removeScmCommand(dnId, scmCmdId);
323-
LOG.warn("Remove Timeout SCM BlockDeletionCommand {} for DN {} " +
324-
"after without update {}ms}", state, dnId, timeoutMs);
325-
} else {
326-
LOG.warn("Timeout SCM scmCmdIds {} for DN {} " +
327-
"after without update {}ms}", scmCmdIds, dnId, timeoutMs);
326+
LOG.warn("SCM BlockDeletionCommand {} for Datanode: {} was removed after {}ms without update",
327+
state, dnId, timeoutMs);
328328
}
329329
}
330330
}
@@ -335,7 +335,7 @@ private CmdStatusData removeScmCommand(UUID dnId, long scmCmdId) {
335335
return null;
336336
}
337337
CmdStatusData statusData = record.remove(scmCmdId);
338-
LOG.debug("Remove ScmCommand {} for DN: {} ", statusData, dnId);
338+
LOG.debug("Remove ScmCommand {} for Datanode: {} ", statusData, dnId);
339339
return statusData;
340340
}
341341

@@ -483,7 +483,7 @@ public void commitTransactions(
483483
// Mostly likely it's a retried delete command response.
484484
if (LOG.isDebugEnabled()) {
485485
LOG.debug(
486-
"Transaction txId={} commit by dnId={} for containerID={}"
486+
"Transaction txId: {} commit by Datanode: {} for ContainerId: {}"
487487
+ " failed. Corresponding entry not found.", txID, dnId,
488488
containerId);
489489
}
@@ -508,13 +508,13 @@ public void commitTransactions(
508508
transactionToDNsCommitMap.remove(txID);
509509
transactionToRetryCountMap.remove(txID);
510510
if (LOG.isDebugEnabled()) {
511-
LOG.debug("Purging txId={} from block deletion log", txID);
511+
LOG.debug("Purging txId: {} from block deletion log", txID);
512512
}
513513
txIDsToBeDeleted.add(txID);
514514
}
515515
}
516516
if (LOG.isDebugEnabled()) {
517-
LOG.debug("Datanode txId={} containerId={} committed by dnId={}",
517+
LOG.debug("Datanode txId: {} ContainerId: {} committed by Datanode: {}",
518518
txID, containerId, dnId);
519519
}
520520
} catch (IOException e) {
@@ -557,7 +557,7 @@ private void processSCMCommandStatus(List<CommandStatus> deleteBlockStatus,
557557
lastStatus.put(cmdStatus.getCmdId(), cmdStatus);
558558
summary.put(cmdStatus.getCmdId(), cmdStatus.getStatus());
559559
});
560-
LOG.debug("CommandStatus {} from Datanode {} ", summary, dnID);
560+
LOG.debug("CommandStatus {} from Datanode: {} ", summary, dnID);
561561
for (Map.Entry<Long, CommandStatus> entry : lastStatus.entrySet()) {
562562
CommandStatus.Status status = entry.getValue().getStatus();
563563
scmDeleteBlocksCommandStatusManager.updateStatusByDNCommandStatus(
@@ -568,11 +568,11 @@ private void processSCMCommandStatus(List<CommandStatus> deleteBlockStatus,
568568
private boolean isTransactionFailed(DeleteBlockTransactionResult result) {
569569
if (LOG.isDebugEnabled()) {
570570
LOG.debug(
571-
"Got block deletion ACK from datanode, TXIDs={}, " + "success={}",
571+
"Got block deletion ACK from datanode, TXIDs {}, " + "success {}",
572572
result.getTxID(), result.getSuccess());
573573
}
574574
if (!result.getSuccess()) {
575-
LOG.warn("Got failed ACK for TXID={}, prepare to resend the "
575+
LOG.warn("Got failed ACK for TXID {}, prepare to resend the "
576576
+ "TX in next interval", result.getTxID());
577577
return true;
578578
}

0 commit comments

Comments
 (0)