Skip to content

Commit 9a2bde1

Browse files
hemantk-12xichen01
authored andcommitted
HDDS-9802. Tool to fix corrupted snapshot chain (apache#6386)
(cherry picked from commit e907316)
1 parent 986be02 commit 9a2bde1

8 files changed

Lines changed: 376 additions & 4 deletions

File tree

hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/IOUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public static void close(Logger logger, AutoCloseable... closeables) {
6868
* Close each argument, catching exceptions and logging them as error.
6969
*/
7070
public static void close(Logger logger,
71-
Collection<AutoCloseable> closeables) {
71+
Collection<? extends AutoCloseable> closeables) {
7272
if (closeables == null) {
7373
return;
7474
}
@@ -95,7 +95,7 @@ public static void closeQuietly(AutoCloseable... closeables) {
9595
/**
9696
* Close each argument, swallowing exceptions.
9797
*/
98-
public static void closeQuietly(Collection<AutoCloseable> closeables) {
98+
public static void closeQuietly(Collection<? extends AutoCloseable> closeables) {
9999
close(null, closeables);
100100
}
101101
}

hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/SnapshotInfo.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@
4949
* This class is used for storing info related to Snapshots.
5050
*
5151
* Each snapshot created has an associated SnapshotInfo entry
52-
* containing the snapshotid, snapshot path,
53-
* snapshot checkpoint directory, previous snapshotid
52+
* containing the snapshotId, snapshot path,
53+
* snapshot checkpoint directory, previous snapshotId
5454
* for the snapshot path & global amongst other necessary fields.
5555
*/
5656
public final class SnapshotInfo implements Auditable, CopyObject<SnapshotInfo> {

hadoop-ozone/dist/src/shell/ozone/ozone

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ function ozone_usage
5959
ozone_add_subcommand "dtutil" client "operations related to delegation tokens"
6060
ozone_add_subcommand "admin" client "Ozone admin tool"
6161
ozone_add_subcommand "debug" client "Ozone debug tool"
62+
ozone_add_subcommand "repair" client "Ozone repair tool"
6263
ozone_add_subcommand "checknative" client "checks if native libraries are loaded"
6364

6465
ozone_generate_usage "${OZONE_SHELL_EXECNAME}" false
@@ -236,6 +237,11 @@ function ozonecmd_case
236237
OZONE_DEBUG_OPTS="${OZONE_DEBUG_OPTS} ${OZONE_MODULE_ACCESS_ARGS}"
237238
OZONE_RUN_ARTIFACT_NAME="ozone-tools"
238239
;;
240+
repair)
241+
OZONE_CLASSNAME=org.apache.hadoop.ozone.repair.OzoneRepair
242+
OZONE_DEBUG_OPTS="${OZONE_DEBUG_OPTS} ${OZONE_MODULE_ACCESS_ARGS}"
243+
OZONE_RUN_ARTIFACT_NAME="ozone-tools"
244+
;;
239245
checknative)
240246
OZONE_CLASSNAME=org.apache.hadoop.ozone.shell.checknative.CheckNative
241247
OZONE_RUN_ARTIFACT_NAME="ozone-tools"
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.ozone.repair;
20+
21+
import com.google.common.annotations.VisibleForTesting;
22+
import org.apache.hadoop.hdds.cli.GenericCli;
23+
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
24+
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
25+
import picocli.CommandLine;
26+
27+
/**
28+
* Ozone Repair Command line tool.
29+
*/
30+
@CommandLine.Command(name = "ozone repair",
31+
description = "Operational tool to repair Ozone",
32+
versionProvider = HddsVersionProvider.class,
33+
mixinStandardHelpOptions = true)
34+
public class OzoneRepair extends GenericCli {
35+
36+
private OzoneConfiguration ozoneConf;
37+
38+
public OzoneRepair() {
39+
super(OzoneRepair.class);
40+
}
41+
42+
@VisibleForTesting
43+
public OzoneRepair(OzoneConfiguration configuration) {
44+
super(OzoneRepair.class);
45+
this.ozoneConf = configuration;
46+
}
47+
48+
public OzoneConfiguration getOzoneConf() {
49+
if (ozoneConf == null) {
50+
ozoneConf = createOzoneConfiguration();
51+
}
52+
return ozoneConf;
53+
}
54+
55+
/**
56+
* Main for the Ozone Repair shell Command handling.
57+
*
58+
* @param argv - System Args Strings[]
59+
* @throws Exception
60+
*/
61+
public static void main(String[] argv) throws Exception {
62+
new OzoneRepair().run(argv);
63+
}
64+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.ozone.repair;
20+
21+
import org.apache.hadoop.hdds.cli.GenericCli;
22+
import org.apache.hadoop.hdds.cli.SubcommandWithParent;
23+
import org.kohsuke.MetaInfServices;
24+
import picocli.CommandLine;
25+
26+
import java.util.concurrent.Callable;
27+
28+
/**
29+
* Ozone Repair CLI for RocksDB.
30+
*/
31+
@CommandLine.Command(name = "ldb",
32+
description = "Operational tool to repair RocksDB table.")
33+
@MetaInfServices(SubcommandWithParent.class)
34+
public class RDBRepair implements Callable<Void>, SubcommandWithParent {
35+
36+
@CommandLine.Spec
37+
private CommandLine.Model.CommandSpec spec;
38+
39+
@CommandLine.Option(names = {"--db"},
40+
required = true,
41+
description = "Database File Path")
42+
private String dbPath;
43+
44+
public String getDbPath() {
45+
return dbPath;
46+
}
47+
48+
@Override
49+
public Void call() {
50+
GenericCli.missingSubcommand(spec);
51+
return null;
52+
}
53+
54+
@Override
55+
public Class<?> getParentType() {
56+
return OzoneRepair.class;
57+
}
58+
}
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.ozone.repair.om;
20+
21+
import org.apache.hadoop.hdds.cli.SubcommandWithParent;
22+
import org.apache.hadoop.hdds.utils.IOUtils;
23+
import org.apache.hadoop.hdds.utils.db.StringCodec;
24+
import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksDB;
25+
import org.apache.hadoop.hdds.utils.db.managed.ManagedRocksIterator;
26+
import org.apache.hadoop.ozone.debug.RocksDBUtils;
27+
import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
28+
import org.apache.hadoop.ozone.repair.RDBRepair;
29+
import org.apache.hadoop.ozone.shell.bucket.BucketUri;
30+
import org.kohsuke.MetaInfServices;
31+
import org.rocksdb.ColumnFamilyDescriptor;
32+
import org.rocksdb.ColumnFamilyHandle;
33+
import org.rocksdb.RocksDBException;
34+
import picocli.CommandLine;
35+
import picocli.CommandLine.Model.CommandSpec;
36+
37+
import java.io.IOException;
38+
import java.nio.charset.StandardCharsets;
39+
import java.util.ArrayList;
40+
import java.util.Arrays;
41+
import java.util.HashSet;
42+
import java.util.List;
43+
import java.util.Objects;
44+
import java.util.Set;
45+
import java.util.UUID;
46+
import java.util.concurrent.Callable;
47+
48+
import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
49+
import static org.apache.hadoop.ozone.OzoneConsts.SNAPSHOT_INFO_TABLE;
50+
51+
/**
52+
* Tool to repair snapshotInfoTable in case it has corrupted entries.
53+
*/
54+
@CommandLine.Command(
55+
name = "snapshot",
56+
description = "CLI to update global and path previous snapshot for a snapshot in case snapshot chain is corrupted."
57+
)
58+
@MetaInfServices(SubcommandWithParent.class)
59+
public class SnapshotRepair implements Callable<Void>, SubcommandWithParent {
60+
61+
@CommandLine.Spec
62+
private static CommandSpec spec;
63+
64+
@CommandLine.ParentCommand
65+
private RDBRepair parent;
66+
67+
@CommandLine.Mixin
68+
private BucketUri bucketUri;
69+
70+
@CommandLine.Parameters(description = "Snapshot name to update", index = "1")
71+
private String snapshotName;
72+
73+
@CommandLine.Option(names = {"--global-previous", "--gp"},
74+
required = true,
75+
description = "Global previous snapshotId to set for the given snapshot")
76+
private UUID globalPreviousSnapshotId;
77+
78+
@CommandLine.Option(names = {"--path-previous", "--pp"},
79+
required = true,
80+
description = "Path previous snapshotId to set for the given snapshot")
81+
private UUID pathPreviousSnapshotId;
82+
83+
@CommandLine.Option(names = {"--dry-run"},
84+
required = true,
85+
description = "To dry-run the command.", defaultValue = "true")
86+
private boolean dryRun;
87+
88+
@Override
89+
public Void call() throws Exception {
90+
List<ColumnFamilyHandle> cfHandleList = new ArrayList<>();
91+
List<ColumnFamilyDescriptor> cfDescList = RocksDBUtils.getColumnFamilyDescriptors(parent.getDbPath());
92+
93+
try (ManagedRocksDB db = ManagedRocksDB.open(parent.getDbPath(), cfDescList, cfHandleList)) {
94+
ColumnFamilyHandle snapshotInfoCfh = getSnapshotInfoCfh(cfHandleList);
95+
if (snapshotInfoCfh == null) {
96+
System.err.println(SNAPSHOT_INFO_TABLE + " is not in a column family in DB for the given path.");
97+
return null;
98+
}
99+
100+
String snapshotInfoTableKey = SnapshotInfo.getTableKey(bucketUri.getValue().getVolumeName(),
101+
bucketUri.getValue().getBucketName(), snapshotName);
102+
103+
SnapshotInfo snapshotInfo = getSnapshotInfo(db, snapshotInfoCfh, snapshotInfoTableKey);
104+
if (snapshotInfo == null) {
105+
System.err.println(snapshotName + " does not exist for given bucketUri: " + OM_KEY_PREFIX +
106+
bucketUri.getValue().getVolumeName() + OM_KEY_PREFIX + bucketUri.getValue().getBucketName());
107+
return null;
108+
}
109+
110+
// snapshotIdSet is the set of the all existed snapshots ID to make that the provided global previous and path
111+
// previous exist and after the update snapshot does not point to ghost snapshot.
112+
Set<UUID> snapshotIdSet = getSnapshotIdSet(db, snapshotInfoCfh);
113+
114+
if (Objects.equals(snapshotInfo.getSnapshotId(), globalPreviousSnapshotId)) {
115+
System.err.println("globalPreviousSnapshotId: '" + globalPreviousSnapshotId +
116+
"' is equal to given snapshot's ID: '" + snapshotInfo.getSnapshotId() + "'.");
117+
return null;
118+
}
119+
120+
if (Objects.equals(snapshotInfo.getSnapshotId(), pathPreviousSnapshotId)) {
121+
System.err.println("pathPreviousSnapshotId: '" + pathPreviousSnapshotId +
122+
"' is equal to given snapshot's ID: '" + snapshotInfo.getSnapshotId() + "'.");
123+
return null;
124+
}
125+
126+
if (!snapshotIdSet.contains(globalPreviousSnapshotId)) {
127+
System.err.println("globalPreviousSnapshotId: '" + globalPreviousSnapshotId +
128+
"' does not exist in snapshotInfoTable.");
129+
return null;
130+
}
131+
132+
if (!snapshotIdSet.contains(pathPreviousSnapshotId)) {
133+
System.err.println("pathPreviousSnapshotId: '" + pathPreviousSnapshotId +
134+
"' does not exist in snapshotInfoTable.");
135+
return null;
136+
}
137+
138+
snapshotInfo.setGlobalPreviousSnapshotId(globalPreviousSnapshotId);
139+
snapshotInfo.setPathPreviousSnapshotId(pathPreviousSnapshotId);
140+
141+
if (dryRun) {
142+
System.out.println("SnapshotInfo would be updated to : " + snapshotInfo);
143+
} else {
144+
byte[] snapshotInfoBytes = SnapshotInfo.getCodec().toPersistedFormat(snapshotInfo);
145+
db.get()
146+
.put(snapshotInfoCfh, StringCodec.get().toPersistedFormat(snapshotInfoTableKey), snapshotInfoBytes);
147+
148+
System.out.println("Snapshot Info is updated to : " +
149+
getSnapshotInfo(db, snapshotInfoCfh, snapshotInfoTableKey));
150+
}
151+
} catch (RocksDBException exception) {
152+
System.err.println("Failed to update the RocksDB for the given path: " + parent.getDbPath());
153+
System.err.println(
154+
"Make sure that Ozone entity (OM, SCM or DN) is not running for the give dbPath and current host.");
155+
System.err.println(exception);
156+
} finally {
157+
IOUtils.closeQuietly(cfHandleList);
158+
}
159+
160+
return null;
161+
}
162+
163+
private Set<UUID> getSnapshotIdSet(ManagedRocksDB db, ColumnFamilyHandle snapshotInfoCfh)
164+
throws IOException {
165+
Set<UUID> snapshotIdSet = new HashSet<>();
166+
try (ManagedRocksIterator iterator = new ManagedRocksIterator(db.get().newIterator(snapshotInfoCfh))) {
167+
iterator.get().seekToFirst();
168+
169+
while (iterator.get().isValid()) {
170+
SnapshotInfo snapshotInfo = SnapshotInfo.getCodec().fromPersistedFormat(iterator.get().value());
171+
snapshotIdSet.add(snapshotInfo.getSnapshotId());
172+
iterator.get().next();
173+
}
174+
}
175+
return snapshotIdSet;
176+
}
177+
178+
private ColumnFamilyHandle getSnapshotInfoCfh(List<ColumnFamilyHandle> cfHandleList) throws RocksDBException {
179+
byte[] nameBytes = SNAPSHOT_INFO_TABLE.getBytes(StandardCharsets.UTF_8);
180+
181+
for (ColumnFamilyHandle cf : cfHandleList) {
182+
if (Arrays.equals(cf.getName(), nameBytes)) {
183+
return cf;
184+
}
185+
}
186+
187+
return null;
188+
}
189+
190+
private SnapshotInfo getSnapshotInfo(ManagedRocksDB db, ColumnFamilyHandle snapshotInfoCfh, String snapshotInfoLKey)
191+
throws IOException, RocksDBException {
192+
byte[] bytes = db.get().get(snapshotInfoCfh, StringCodec.get().toPersistedFormat(snapshotInfoLKey));
193+
return bytes != null ? SnapshotInfo.getCodec().fromPersistedFormat(bytes) : null;
194+
}
195+
196+
@Override
197+
public Class<?> getParentType() {
198+
return RDBRepair.class;
199+
}
200+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
/**
20+
* OM related repair tools.
21+
*/
22+
package org.apache.hadoop.ozone.repair.om;

0 commit comments

Comments
 (0)