Skip to content

Commit 2d4fd39

Browse files
committed
Check MD5 while doing snapshot
There is a feature available in S3 that clients can use to ensure data integrity on upload. Whenever an object is PUT to an S3 bucket, the client is able to get back the `MD5` base64 encoded and check that it's the same `MD5` as the local one. For reference, please see the [S3 PutObject API](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html). Closes elastic#186.
1 parent 9206b0d commit 2d4fd39

File tree

1 file changed

+29
-2
lines changed

1 file changed

+29
-2
lines changed

src/main/java/org/elasticsearch/cloud/aws/blobstore/DefaultS3OutputStream.java

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,18 @@
2121

2222
import com.amazonaws.AmazonClientException;
2323
import com.amazonaws.services.s3.model.*;
24+
import com.amazonaws.util.Base64;
25+
import org.elasticsearch.common.logging.ESLogger;
26+
import org.elasticsearch.common.logging.Loggers;
2427
import org.elasticsearch.common.unit.ByteSizeUnit;
2528
import org.elasticsearch.common.unit.ByteSizeValue;
2629

2730
import java.io.ByteArrayInputStream;
2831
import java.io.IOException;
2932
import java.io.InputStream;
33+
import java.security.DigestInputStream;
34+
import java.security.MessageDigest;
35+
import java.security.NoSuchAlgorithmException;
3036
import java.util.ArrayList;
3137
import java.util.List;
3238

@@ -49,7 +55,7 @@
4955
public class DefaultS3OutputStream extends S3OutputStream {
5056

5157
private static final ByteSizeValue MULTIPART_MAX_SIZE = new ByteSizeValue(5, ByteSizeUnit.GB);
52-
58+
private static final ESLogger logger = Loggers.getLogger("cloud.aws");
5359
/**
5460
* Multipart Upload API data
5561
*/
@@ -120,7 +126,28 @@ protected void doUpload(S3BlobStore blobStore, String bucketName, String blobNam
120126
md.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
121127
}
122128
md.setContentLength(length);
123-
blobStore.client().putObject(bucketName, blobName, is, md);
129+
130+
InputStream inputStream = is;
131+
132+
// We try to compute a MD5 while reading it
133+
MessageDigest messageDigest;
134+
try {
135+
messageDigest = MessageDigest.getInstance("MD5");
136+
inputStream = new DigestInputStream(is, messageDigest);
137+
} catch (NoSuchAlgorithmException impossible) {
138+
// Every implementation of the Java platform is required to support MD5 (see MessageDigest)
139+
throw new RuntimeException(impossible);
140+
}
141+
PutObjectResult putObjectResult = blobStore.client().putObject(bucketName, blobName, inputStream, md);
142+
143+
String localMd5 = Base64.encodeAsString(messageDigest.digest());
144+
String remoteMd5 = putObjectResult.getContentMd5();
145+
if (!localMd5.equals(remoteMd5)) {
146+
logger.debug("MD5 local [{}], remote [{}] are not equal...", localMd5, remoteMd5);
147+
throw new AmazonS3Exception("MD5 local [" + localMd5 +
148+
"], remote [" + remoteMd5 +
149+
"] are not equal...");
150+
}
124151
}
125152

126153
private void initializeMultipart() {

0 commit comments

Comments
 (0)