Skip to content

Commit 85b6dec

Browse files
committed
HDFS-17732. addExpectedReplicasToPending should add EC block expectedStorages in PendingReconstructionBlocks
1 parent 5067082 commit 85b6dec

File tree

3 files changed

+78
-18
lines changed

3 files changed

+78
-18
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,30 +1252,21 @@ public boolean commitOrCompleteLastBlock(BlockCollection bc,
12521252
/**
12531253
* If IBR is not sent from expected locations yet, add the datanodes to
12541254
* pendingReconstruction in order to keep RedundancyMonitor from scheduling
1255-
* the block. In case of erasure coding blocks, adds only in case there
1256-
* isn't any missing node.
1255+
* the block.
12571256
*/
12581257
public void addExpectedReplicasToPending(BlockInfo blk) {
1259-
boolean addForStriped = false;
12601258
DatanodeStorageInfo[] expectedStorages =
12611259
blk.getUnderConstructionFeature().getExpectedStorageLocations();
1262-
if (blk.isStriped()) {
1263-
BlockInfoStriped blkStriped = (BlockInfoStriped) blk;
1264-
addForStriped =
1265-
blkStriped.getRealTotalBlockNum() == expectedStorages.length;
1266-
}
1267-
if (!blk.isStriped() || addForStriped) {
1268-
if (expectedStorages.length - blk.numNodes() > 0) {
1269-
ArrayList<DatanodeStorageInfo> pendingNodes = new ArrayList<>();
1270-
for (DatanodeStorageInfo storage : expectedStorages) {
1271-
DatanodeDescriptor dnd = storage.getDatanodeDescriptor();
1272-
if (blk.findStorageInfo(dnd) == null) {
1273-
pendingNodes.add(storage);
1274-
}
1260+
if (expectedStorages.length - blk.numNodes() > 0) {
1261+
ArrayList<DatanodeStorageInfo> pendingNodes = new ArrayList<>();
1262+
for (DatanodeStorageInfo storage : expectedStorages) {
1263+
DatanodeDescriptor dnd = storage.getDatanodeDescriptor();
1264+
if (blk.findStorageInfo(dnd) == null) {
1265+
pendingNodes.add(storage);
12751266
}
1276-
pendingReconstruction.increment(blk,
1277-
pendingNodes.toArray(new DatanodeStorageInfo[pendingNodes.size()]));
12781267
}
1268+
pendingReconstruction.increment(blk,
1269+
pendingNodes.toArray(new DatanodeStorageInfo[pendingNodes.size()]));
12791270
}
12801271
}
12811272

@@ -3868,6 +3859,10 @@ private Block addStoredBlock(final BlockInfo block,
38683859
processExtraRedundancyBlock(storedBlock, fileRedundancy, node,
38693860
delNodeHint);
38703861
}
3862+
if (storedBlock.isStriped() && storedBlock.isComplete() && hasEnoughEffectiveReplicas(
3863+
storedBlock, num, 0)) {
3864+
pendingReconstruction.remove(storedBlock);
3865+
}
38713866
// If the file redundancy has reached desired value
38723867
// we can remove any corrupt replicas the block may have
38733868
int corruptReplicasCount = corruptReplicas.numCorruptReplicas(storedBlock);
@@ -5049,6 +5044,9 @@ public void checkRedundancy(BlockCollection bc) {
50495044
} else if (shouldProcessExtraRedundancy(n, expected)) {
50505045
processExtraRedundancyBlock(block, expected, null, null);
50515046
}
5047+
if (block.isStriped() && hasEnoughEffectiveReplicas(block, n, 0)) {
5048+
pendingReconstruction.remove(block);
5049+
}
50525050
}
50535051
}
50545052

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_CLIENT_TOPOLOGY_RESOLUTION_ENABLED;
2222
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_FILE_CLOSE_NUM_COMMITTED_ALLOWED_KEY;
23+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY;
24+
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY;
2325
import static org.apache.hadoop.hdfs.client.HdfsAdmin.TRASH_PERMISSION;
2426
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_CLIENT_CONTEXT;
2527
import static org.assertj.core.api.Assertions.assertThat;
@@ -107,10 +109,15 @@
107109
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
108110
import org.apache.hadoop.hdfs.protocol.HdfsConstants.StoragePolicySatisfierMode;
109111
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
112+
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
110113
import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
111114
import org.apache.hadoop.hdfs.protocol.OpenFilesIterator;
115+
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped;
116+
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
117+
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
112118
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
113119
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant;
120+
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
114121
import org.apache.hadoop.hdfs.server.datanode.DataNode;
115122
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
116123
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
@@ -2780,4 +2787,54 @@ public void testAllRackFailureDuringPipelineSetup() throws Exception {
27802787
}
27812788
}
27822789

2790+
@Test(timeout = 60000)
2791+
public void testECAddExpectedReplicasToPending() throws Exception {
2792+
HdfsConfiguration conf = new HdfsConfiguration();
2793+
conf.setInt(DFS_NAMENODE_FILE_CLOSE_NUM_COMMITTED_ALLOWED_KEY, 0);
2794+
conf.setInt(DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, 10);
2795+
conf.setInt(DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 3);
2796+
try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build()) {
2797+
cluster.waitActive();
2798+
final DistributedFileSystem dfs = cluster.getFileSystem();
2799+
Path dir = new Path("/dir");
2800+
dfs.mkdirs(dir);
2801+
dfs.enableErasureCodingPolicy("XOR-2-1-1024k");
2802+
dfs.setErasureCodingPolicy(dir, "XOR-2-1-1024k");
2803+
2804+
try (FSDataOutputStream str = dfs.create(new Path("/dir/file"));) {
2805+
DataNodeTestUtils.pauseIBR(cluster.getDataNodes().get(0));
2806+
DataNodeTestUtils.pauseIBR(cluster.getDataNodes().get(1));
2807+
Thread.sleep(1000);
2808+
for (int i = 0; i < 1024 * 1024; i++) {
2809+
str.write(i);
2810+
}
2811+
str.flush();
2812+
// Wait for dn2 IBR.
2813+
Thread.sleep(2000);
2814+
}
2815+
2816+
LocatedBlocks locatedBlocks = dfs.getClient().getBlockLocations("/dir/file", 1024 * 1024);
2817+
BlockManager blockManager = cluster.getNamesystem().getBlockManager();
2818+
BlockInfoStriped blockInfo = (BlockInfoStriped) blockManager.getStoredBlock(
2819+
locatedBlocks.getLocatedBlocks().get(0).getBlock().getLocalBlock());
2820+
assertEquals(1, blockInfo.numNodes());
2821+
int pendingNum =
2822+
BlockManagerTestUtil.getNumReplicasInPendingReconstruction(blockManager, blockInfo);
2823+
assertEquals(2, pendingNum);
2824+
2825+
DataNodeTestUtils.resumeIBR(cluster.getDataNodes().get(0));
2826+
DataNodeTestUtils.resumeIBR(cluster.getDataNodes().get(1));
2827+
// Wait for dn0 dn1 IBR.
2828+
Thread.sleep(2000);
2829+
pendingNum =
2830+
BlockManagerTestUtil.getNumReplicasInPendingReconstruction(blockManager, blockInfo);
2831+
assertEquals(0, pendingNum);
2832+
2833+
blockInfo = (BlockInfoStriped) blockManager.getStoredBlock(
2834+
locatedBlocks.getLocatedBlocks().get(0).getBlock().getLocalBlock());
2835+
assertEquals(2, blockInfo.numNodes());
2836+
assertEquals(BlockUCState.COMPLETE, blockInfo.getBlockUCState());
2837+
}
2838+
}
2839+
27832840
}

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ public static void wakeupPendingReconstructionTimerThread(
172172
blockManager.pendingReconstruction.getTimerThread().interrupt();
173173
}
174174

175+
public static int getNumReplicasInPendingReconstruction(final BlockManager blockManager,
176+
BlockInfo blockInfo) {
177+
return blockManager.pendingReconstruction.getNumReplicas(blockInfo);
178+
}
179+
175180
public static HeartbeatManager getHeartbeatManager(
176181
final BlockManager blockManager) {
177182
return blockManager.getDatanodeManager().getHeartbeatManager();

0 commit comments

Comments
 (0)