-
Notifications
You must be signed in to change notification settings - Fork 599
HDDS-12239. Volume should not be marked as unhealthy when disk full #7830
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
64e4685
b620c21
caaa427
4404202
49d1956
b6f9c70
354efd1
b24600f
fb07fb0
1bdcbac
a9a3b46
16e4c88
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,9 +58,13 @@ public HddsVolume chooseVolume(List<HddsVolume> volumes, | |
| throw new DiskOutOfSpaceException("No more available volumes"); | ||
| } | ||
|
|
||
| List<HddsVolume> volumesWithWriteAllowed = | ||
| volumes.stream().filter(k -> k.getStorageState() != StorageVolume.VolumeState.READ_ONLY) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| AvailableSpaceFilter filter = new AvailableSpaceFilter(maxContainerSize); | ||
|
|
||
| List<HddsVolume> volumesWithEnoughSpace = volumes.stream() | ||
| List<HddsVolume> volumesWithEnoughSpace = volumesWithWriteAllowed.stream() | ||
| .filter(filter) | ||
| .collect(Collectors.toList()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please combine the streams, apply both filters to the same stream. (Or move the condition to |
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -220,14 +220,16 @@ public void checkAllVolumes(StorageVolumeChecker checker) | |
| Set<? extends StorageVolume> failedVolumes; | ||
| try { | ||
| failedVolumes = checker.checkAllVolumes(allVolumes); | ||
| if (failedVolumes.size() > 0) { | ||
| LOG.warn("checkAllVolumes got {} failed volumes - {}", | ||
| failedVolumes.size(), failedVolumes); | ||
| } | ||
| } catch (InterruptedException e) { | ||
| Thread.currentThread().interrupt(); | ||
| throw new IOException("Interrupted while running disk check", e); | ||
| } | ||
|
|
||
| if (failedVolumes.size() > 0) { | ||
| LOG.warn("checkAllVolumes got {} failed volumes - {}", | ||
| failedVolumes.size(), failedVolumes); | ||
| if (failedVolumeMap.size() > 0 || failedVolumes.size() > 0) { | ||
|
sadanand48 marked this conversation as resolved.
Outdated
|
||
| handleVolumeFailures(failedVolumes); | ||
| } else { | ||
| LOG.debug("checkAllVolumes encountered no failures"); | ||
|
|
@@ -482,7 +484,7 @@ public StorageLocationReport[] getStorageReport() { | |
| rootDir = volumeInfo.get().getRootDir(); | ||
| SpaceUsageSource usage = volumeInfo.get().getCurrentUsage(); | ||
| scmUsed = usage.getUsedSpace(); | ||
| remaining = usage.getAvailable(); | ||
| remaining = volume.getStorageState() == HddsVolume.VolumeState.READ_ONLY ? 0 : usage.getAvailable(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will remaining "0" confuse the usage statistics on Recon and SCM UI? Maybe we would consider including the READ only state in storage report.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes this is good suggestion. Currently in SCM usage CLI it shows aggregated DN space usage including all volume. We can show count of READ only volume.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs proto change to include volume state in report. Can be done via another JIRA to include volume status also. |
||
| capacity = usage.getCapacity(); | ||
| committed = (volume instanceof HddsVolume) ? | ||
| ((HddsVolume) volume).getCommittedBytes() : 0; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| import java.io.IOException; | ||
| import java.util.List; | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| import static org.apache.hadoop.ozone.container.common.volume.VolumeChoosingUtil.logIfSomeVolumesOutOfSpace; | ||
| import static org.apache.hadoop.ozone.container.common.volume.VolumeChoosingUtil.throwDiskOutOfSpace; | ||
|
|
@@ -51,6 +52,10 @@ public HddsVolume chooseVolume(List<HddsVolume> volumes, | |
| throw new DiskOutOfSpaceException("No more available volumes"); | ||
| } | ||
|
|
||
| List<HddsVolume> volumesWithWriteAllowed = | ||
| volumes.stream().filter(k -> k.getStorageState() != StorageVolume.VolumeState.READ_ONLY) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| AvailableSpaceFilter filter = new AvailableSpaceFilter(maxContainerSize); | ||
|
|
||
| // since volumes could've been removed because of the failure | ||
|
|
@@ -61,7 +66,7 @@ public HddsVolume chooseVolume(List<HddsVolume> volumes, | |
| int startVolumeIndex = currentVolumeIndex; | ||
|
|
||
| while (true) { | ||
| final HddsVolume volume = volumes.get(currentVolumeIndex); | ||
| final HddsVolume volume = volumesWithWriteAllowed.get(currentVolumeIndex); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Volumes should not be eagerly pre-filtered, they can be filtered in the loop. |
||
| // adjust for remaining capacity in Open containers | ||
| boolean hasEnoughSpace = filter.test(volume); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,7 +106,8 @@ public enum VolumeState { | |
| NON_EXISTENT, | ||
| INCONSISTENT, | ||
| NOT_FORMATTED, | ||
| NOT_INITIALIZED | ||
| NOT_INITIALIZED, | ||
| READ_ONLY | ||
| } | ||
|
|
||
| private volatile VolumeState state; | ||
|
|
@@ -599,35 +600,47 @@ private void cleanTmpDiskCheckDir() { | |
| @Override | ||
| public synchronized VolumeCheckResult check(@Nullable Boolean unused) | ||
| throws Exception { | ||
| DiskCheckUtil.ReadWriteStatus readWriteStatus = DiskCheckUtil.checkPermissions(storageDir); | ||
|
|
||
| boolean directoryChecksPassed = | ||
| DiskCheckUtil.checkExistence(storageDir) && | ||
| DiskCheckUtil.checkPermissions(storageDir); | ||
| DiskCheckUtil.checkExistence(storageDir); | ||
| // If the directory is not present or has incorrect permissions, fail the | ||
| // volume immediately. This is not an intermittent error. | ||
| if (!directoryChecksPassed) { | ||
| if (!directoryChecksPassed || readWriteStatus == DiskCheckUtil.ReadWriteStatus.READ_FAIL) { | ||
| if (Thread.currentThread().isInterrupted()) { | ||
| throw new InterruptedException("Directory check of volume " + this + | ||
| " interrupted."); | ||
| } | ||
| return VolumeCheckResult.FAILED; | ||
| } | ||
|
|
||
| if (readWriteStatus == DiskCheckUtil.ReadWriteStatus.WRITE_FAIL) { | ||
| setState(VolumeState.READ_ONLY); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ashishkumar50 , thanks for working on this. Back to the container schema V2 age, when disk is full, it's not able to read blocks because rocksdb cannot be opened due to disk full. Now container V3 is used, thing will be better. But it's not sure whether read block can succeed or not.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For READ failure we are already checking in container meta scanner which runs every 3 hours, That will take care of marking container unhealthy if rocksdb doesn't open or read doesn't pass through. |
||
| return VolumeCheckResult.HEALTHY; | ||
| } | ||
|
|
||
| // If IO test count is set to 0, IO tests for disk health are disabled. | ||
| if (ioTestCount == 0) { | ||
| return VolumeCheckResult.HEALTHY; | ||
|
errose28 marked this conversation as resolved.
|
||
| } | ||
|
|
||
| // Since IO errors may be intermittent, volume remains healthy until the | ||
| // threshold of failures is crossed. | ||
| boolean diskChecksPassed = DiskCheckUtil.checkReadWrite(storageDir, | ||
| readWriteStatus = DiskCheckUtil.checkReadWrite(storageDir, | ||
| diskCheckDir, healthCheckFileSize); | ||
| if (readWriteStatus == DiskCheckUtil.ReadWriteStatus.WRITE_FAIL) { | ||
| // Mark volume as READ only | ||
| setState(VolumeState.READ_ONLY); | ||
| return VolumeCheckResult.HEALTHY; | ||
| } | ||
| if (Thread.currentThread().isInterrupted()) { | ||
| // Thread interrupt may have caused IO operations to abort. Do not | ||
| // consider this a failure. | ||
| throw new InterruptedException("IO check of volume " + this + | ||
| " interrupted."); | ||
| } | ||
|
|
||
| boolean diskChecksPassed = readWriteStatus == DiskCheckUtil.ReadWriteStatus.READ_WRITE_OK; | ||
| // Move the sliding window of IO test results forward 1 by adding the | ||
| // latest entry and removing the oldest entry from the window. | ||
| // Update the failure counter for the new window. | ||
|
|
@@ -640,8 +653,7 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) | |
| currentIOFailureCount.decrementAndGet(); | ||
| } | ||
|
|
||
| // If the failure threshold has been crossed, fail the volume without | ||
| // further scans. | ||
| // If the failure threshold has been crossed, mark volume as READ only | ||
| // Once the volume is failed, it will not be checked anymore. | ||
| // The failure counts can be left as is. | ||
| if (currentIOFailureCount.get() > ioFailureTolerance) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This naming is more consistent with the container scanner, which uses ScanResult. FWIW we moved the scan result to a separate class in the reconciliation branch. Volume scan is simpler than container scan though so I think enum is good for now.