Skip to content

Commit 74f1a4c

Browse files
committed
add s3 checksum algorithm
Signed-off-by: jorgee <jorge.ejarque@seqera.io>
1 parent 24903f2 commit 74f1a4c

File tree

15 files changed

+140
-22
lines changed

15 files changed

+140
-22
lines changed

docs/reference/config.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ aws {
101101
connectionTimeout = 10000
102102
uploadStorageClass = 'INTELLIGENT_TIERING'
103103
storageEncryption = 'AES256'
104+
checksumAlgorithm = 'SHA256'
104105
}
105106
batch {
106107
cliPath = '/home/ec2-user/miniconda/bin/aws'
@@ -194,6 +195,9 @@ The following settings are available:
194195
`aws.client.anonymous`
195196
: Allow the access of public S3 buckets without the need to provide AWS credentials (default: `false`). Any service that does not accept unsigned requests will return a service access error.
196197

198+
`aws.client.checksumAlgorithm`
199+
: The S3 checksum algorithm to be used when saving objects on S3. Can be one of `CRC32`, `CRC32C`, `SHA1`, `SHA256` or `CRC64NVME`.
200+
197201
`aws.client.s3Acl`
198202
: Allow the setting of predefined bucket permissions, also known as *canned ACL*. Permitted values are `Private`, `PublicRead`, `PublicReadWrite`, `AuthenticatedRead`, `LogDeliveryWrite`, `BucketOwnerRead`, `BucketOwnerFullControl`, and `AwsExecRead` (default: none). See [Amazon docs](https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl) for details.
199203

modules/nf-lang/src/main/java/nextflow/config/scopes/AwsClientConfig.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ public class AwsClientConfig implements ConfigScope {
2929
""")
3030
public boolean anonymous;
3131

32+
@ConfigOption
33+
@Description("""
34+
The S3 checksum algorithm to be used when saving objects on S3. Can be one of `CRC32`, `CRC32C`, `SHA1`, `SHA256` or `CRC64NVME`.
35+
""")
36+
public String checksumAlgorithm;
37+
3238
@ConfigOption
3339
@Description("""
3440
Specify predefined bucket permissions, also known as *canned ACL*. Can be one of `Private`, `PublicRead`, `PublicReadWrite`, `AuthenticatedRead`, `LogDeliveryWrite`, `BucketOwnerRead`, `BucketOwnerFullControl`, or `AwsExecRead`.

plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchExecutor.groovy

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,13 +370,17 @@ class AwsBatchExecutor extends Executor implements ExtensionPoint, TaskArrayExec
370370
final debug = opts.debug ? ' --debug' : ''
371371
final sse = opts.storageEncryption ? " --sse $opts.storageEncryption" : ''
372372
final kms = opts.storageKmsKeyId ? " --sse-kms-key-id $opts.storageKmsKeyId" : ''
373+
final checksum = opts.checksumAlgorithm ? " --checksum-algorithm $opts.checksumAlgorithm" : ''
373374
final requesterPays = opts.requesterPays ? ' --request-payer requester' : ''
374-
final aws = "$cli s3 cp --only-show-errors${sse}${kms}${debug}${requesterPays}"
375+
final aws = "$cli s3 cp --only-show-errors${sse}${kms}${checksum}${debug}${requesterPays}"
375376
final cmd = "trap \"{ ret=\$?; $aws ${TaskRun.CMD_LOG} ${workDir}/${TaskRun.CMD_LOG}||true; exit \$ret; }\" EXIT; $aws ${workDir}/${TaskRun.CMD_RUN} - | bash 2>&1 | tee ${TaskRun.CMD_LOG}"
376377
return cmd
377378
}
378379

379380
static String s5Cmd(String workDir, AwsOptions opts) {
381+
if( opts.checksumAlgorithm ){
382+
log.warn1("Checksum Algorithm is not supported by `s5cmd` command. This option will be ignored in command line operations.")
383+
}
380384
final cli = opts.getS5cmdPath()
381385
final sse = opts.storageEncryption ? " --sse $opts.storageEncryption" : ''
382386
final kms = opts.storageKmsKeyId ? " --sse-kms-key-id $opts.storageKmsKeyId" : ''

plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsOptions.groovy

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ class AwsOptions implements CloudTransferOptions {
116116
return awsConfig.s3Config.getStorageClass()
117117
}
118118

119+
String getChecksumAlgorithm(){
120+
return awsConfig.s3Config.getChecksumAlgorithm()
121+
}
122+
119123
String getStorageEncryption() {
120124
return awsConfig.s3Config.getStorageEncryption()
121125
}

plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsS3Config.groovy

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package nextflow.cloud.aws.config
1919

20+
import software.amazon.awssdk.services.s3.model.ChecksumAlgorithm
21+
2022
import static nextflow.cloud.aws.util.AwsHelper.*
2123

2224
import software.amazon.awssdk.services.s3.model.ObjectCannedACL
@@ -41,6 +43,8 @@ class AwsS3Config {
4143

4244
private String storageKmsKeyId
4345

46+
private String checksumAlgorithm
47+
4448
private Boolean debug
4549

4650
private ObjectCannedACL s3Acl
@@ -59,6 +63,7 @@ class AwsS3Config {
5963
this.storageClass = parseStorageClass((opts.storageClass ?: opts.uploadStorageClass) as String) // 'uploadStorageClass' is kept for legacy purposes
6064
this.storageEncryption = parseStorageEncryption(opts.storageEncryption as String)
6165
this.storageKmsKeyId = opts.storageKmsKeyId
66+
this.checksumAlgorithm = parseChecksumAlgorithm(opts.checksumAlgorithm as String)
6267
this.pathStyleAccess = opts.s3PathStyleAccess as Boolean
6368
this.anonymous = opts.anonymous as Boolean
6469
this.s3Acl = parseS3Acl(opts.s3Acl as String)
@@ -85,6 +90,15 @@ class AwsS3Config {
8590
return null
8691
}
8792

93+
private String parseChecksumAlgorithm(String value) {
94+
if (value) {
95+
if( value in ChecksumAlgorithm.knownValues()*.toString() )
96+
return value
97+
log.warn "Unsupported AWS checksum algorithm: $value"
98+
}
99+
return null
100+
}
101+
88102
// ==== getters =====
89103
String getEndpoint() {
90104
return endpoint
@@ -102,6 +116,10 @@ class AwsS3Config {
102116
return storageKmsKeyId
103117
}
104118

119+
String getChecksumAlgorithm() {
120+
return checksumAlgorithm
121+
}
122+
105123
Boolean getDebug() {
106124
return debug
107125
}

plugins/nf-amazon/src/main/nextflow/cloud/aws/nio/S3Client.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ public class S3Client {
7676

7777
private boolean global;
7878

79+
private ChecksumAlgorithm checksumAlgorithm;
80+
7981
public S3Client(AwsClientFactory factory, Properties props, boolean global) {
8082
S3SyncClientConfiguration clientConfig = S3SyncClientConfiguration.create(props);
8183
this.factory = factory;
@@ -155,6 +157,9 @@ private PutObjectRequest preparePutObjectRequest(PutObjectRequest.Builder reqBui
155157
if( storageEncryption!=null ) {
156158
reqBuilder.serverSideEncryption(storageEncryption);
157159
}
160+
if( checksumAlgorithm != null ) {
161+
reqBuilder.checksumAlgorithm(checksumAlgorithm);
162+
}
158163
if( contentType!=null ) {
159164
reqBuilder.contentType(contentType);
160165
}
@@ -183,6 +188,9 @@ public PutObjectResponse putObject(String bucket, String keyName, InputStream in
183188
if( storageEncryption!=null ) {
184189
reqBuilder.serverSideEncryption(storageEncryption);
185190
}
191+
if( checksumAlgorithm != null ) {
192+
reqBuilder.checksumAlgorithm(checksumAlgorithm);
193+
}
186194
if( contentType!=null ) {
187195
reqBuilder.contentType(contentType);
188196
}
@@ -217,6 +225,9 @@ public void copyObject(CopyObjectRequest.Builder reqBuilder, List<Tag> tags, Str
217225
if( kmsKeyId !=null ) {
218226
reqBuilder.ssekmsKeyId(kmsKeyId);
219227
}
228+
if( checksumAlgorithm != null ) {
229+
reqBuilder.checksumAlgorithm(checksumAlgorithm);
230+
}
220231
if( contentType!=null ) {
221232
reqBuilder.metadataDirective(MetadataDirective.REPLACE);
222233
reqBuilder.contentType(contentType);
@@ -281,6 +292,13 @@ public void setTransferManagerThreads(String value) {
281292
}
282293
}
283294

295+
public void setChecksumAlgorithm(String alg){
296+
if( alg == null )
297+
return;
298+
this.checksumAlgorithm = ChecksumAlgorithm.fromValue(alg);
299+
log.debug("Setting S3 ChecksumAlgorithm={}", alg);
300+
}
301+
284302
public ObjectCannedACL getCannedAcl() {
285303
return cannedAcl;
286304
}
@@ -340,6 +358,9 @@ public void multipartCopyObject(S3Path s3Source, S3Path s3Target, Long objectSiz
340358
if( kmsKeyId != null ) {
341359
reqBuilder.ssekmsKeyId(kmsKeyId);
342360
}
361+
if( checksumAlgorithm != null ) {
362+
reqBuilder.checksumAlgorithm(checksumAlgorithm);
363+
}
343364

344365
if( tags != null && tags.size()>0 ) {
345366
reqBuilder.tagging( Tagging.builder().tagSet(tags).build() );
@@ -548,6 +569,9 @@ private PutObjectRequest.Builder updateBuilder(PutObjectRequest.Builder porBuild
548569
porBuilder.serverSideEncryption(storageEncryption);
549570
if( kmsKeyId != null )
550571
porBuilder.ssekmsKeyId(kmsKeyId);
572+
if( checksumAlgorithm != null ) {
573+
porBuilder.checksumAlgorithm(checksumAlgorithm);
574+
}
551575
if( tags != null && !tags.isEmpty() )
552576
porBuilder.tagging(Tagging.builder().tagSet(tags).build());
553577
return porBuilder;
@@ -593,6 +617,9 @@ public void copyFile(CopyObjectRequest.Builder reqBuilder, List<Tag> tags, Strin
593617
if( kmsKeyId !=null ) {
594618
reqBuilder.ssekmsKeyId(kmsKeyId);
595619
}
620+
if( checksumAlgorithm != null ) {
621+
reqBuilder.checksumAlgorithm(checksumAlgorithm);
622+
}
596623
if( contentType!=null ) {
597624
reqBuilder.metadataDirective(MetadataDirective.REPLACE);
598625
reqBuilder.contentType(contentType);

plugins/nf-amazon/src/main/nextflow/cloud/aws/nio/S3FileSystemProvider.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ private S3OutputStream createUploaderOutputStream( S3Path fileToUpload ) {
340340
.setCannedAcl(s3.getCannedAcl())
341341
.setStorageClass(storageClass)
342342
.setStorageEncryption(props.getProperty("storage_encryption"))
343+
.setChecksumAlgorithm(props.getProperty("checksum_algorithm"))
343344
.setKmsKeyId(props.getProperty("storage_kms_key_id"))
344345
.setContentType(fileToUpload.getContentType())
345346
.setTags(fileToUpload.getTagsList());
@@ -742,6 +743,7 @@ protected S3FileSystem createFileSystem(URI uri, AwsConfig awsConfig) {
742743
// set the client acl
743744
client.setCannedAcl(getProp(props, "s_3_acl", "s3_acl", "s3acl", "s3Acl"));
744745
client.setStorageEncryption(props.getProperty("storage_encryption"));
746+
client.setChecksumAlgorithm(props.getProperty("checksum_algorithm"));
745747
client.setKmsKeyId(props.getProperty("storage_kms_key_id"));
746748
client.setTransferManagerThreads(props.getProperty("transfer_manager_threads"));
747749
client.setRequesterPaysEnabled(props.getProperty("requester_pays"));

plugins/nf-amazon/src/main/nextflow/cloud/aws/nio/S3OutputStream.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ public final class S3OutputStream extends OutputStream {
8787

8888
private String contentType;
8989

90+
private ChecksumAlgorithm checksumAlgorithm;
91+
9092
/**
9193
* Indicates if the stream has been closed.
9294
*/
@@ -201,6 +203,12 @@ public S3OutputStream setKmsKeyId(String kmsKeyId) {
201203
return this;
202204
}
203205

206+
public S3OutputStream setChecksumAlgorithm(String checksumAlgorithm){
207+
if( checksumAlgorithm !=null )
208+
this.checksumAlgorithm = ChecksumAlgorithm.fromValue(checksumAlgorithm);
209+
return this;
210+
}
211+
204212
public S3OutputStream setContentType(String type) {
205213
this.contentType = type;
206214
return this;
@@ -428,6 +436,10 @@ private CreateMultipartUploadResponse initiateMultipartUpload() throws IOExcepti
428436
reqBuilder.serverSideEncryption(storageEncryption);
429437
}
430438

439+
if( checksumAlgorithm != null ) {
440+
reqBuilder.checksumAlgorithm(checksumAlgorithm);
441+
}
442+
431443
if( contentType != null ) {
432444
reqBuilder.contentType(contentType);
433445
}
@@ -614,6 +626,10 @@ private void putObject(final InputStream content, final long contentLength, byte
614626
reqBuilder.serverSideEncryption( storageEncryption );
615627
}
616628

629+
if( checksumAlgorithm != null ) {
630+
reqBuilder.checksumAlgorithm(checksumAlgorithm);
631+
}
632+
617633
if( contentType != null ) {
618634
reqBuilder.contentType(contentType);
619635
}

plugins/nf-amazon/src/main/nextflow/cloud/aws/util/S3BashLib.groovy

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class S3BashLib extends BashFunLib<S3BashLib> {
3838
private String s5cmdPath
3939
private String acl = ''
4040
private String requesterPays = ''
41+
private String checksumAlgorithm = ''
4142

4243
S3BashLib withCliPath(String cliPath) {
4344
if( cliPath )
@@ -61,6 +62,11 @@ class S3BashLib extends BashFunLib<S3BashLib> {
6162
this.storageClass = value
6263
return this
6364
}
65+
S3BashLib withChecksumAlgorithm(String value) {
66+
if( value )
67+
this.checksumAlgorithm = value ? "--checksum-algorithm $value " : ''
68+
return this
69+
}
6470

6571
S3BashLib withStorageEncryption(String value) {
6672
if( value )
@@ -112,11 +118,11 @@ class S3BashLib extends BashFunLib<S3BashLib> {
112118
local name=\$1
113119
local s3path=\$2
114120
if [[ "\$name" == - ]]; then
115-
$cli s3 cp --only-show-errors ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass - "\$s3path"
121+
$cli s3 cp --only-show-errors ${debug}${acl}${storageEncryption}${storageKmsKeyId}${checksumAlgorithm}${requesterPays}--storage-class $storageClass - "\$s3path"
116122
elif [[ -d "\$name" ]]; then
117-
$cli s3 cp --only-show-errors --recursive ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
123+
$cli s3 cp --only-show-errors --recursive ${debug}${acl}${storageEncryption}${storageKmsKeyId}${checksumAlgorithm}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
118124
else
119-
$cli s3 cp --only-show-errors ${debug}${acl}${storageEncryption}${storageKmsKeyId}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
125+
$cli s3 cp --only-show-errors ${debug}${acl}${storageEncryption}${storageKmsKeyId}${checksumAlgorithm}${requesterPays}--storage-class $storageClass "\$name" "\$s3path/\$name"
120126
fi
121127
}
122128
@@ -187,6 +193,7 @@ class S3BashLib extends BashFunLib<S3BashLib> {
187193
.withMaxTransferAttempts( opts.maxTransferAttempts )
188194
.withCliPath( opts.awsCli )
189195
.withStorageClass(opts.storageClass )
196+
.withChecksumAlgorithm( opts.checksumAlgorithm )
190197
.withStorageEncryption( opts.storageEncryption )
191198
.withStorageKmsKeyId( opts.storageKmsKeyId )
192199
.withRetryMode( opts.retryMode )

plugins/nf-amazon/src/test/nextflow/cloud/aws/batch/AwsBatchFileCopyStrategyTest.groovy

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ class AwsBatchFileCopyStrategyTest extends Specification {
126126
1 * opts.getAwsCli() >> 'aws'
127127
1 * opts.getStorageClass() >> null
128128
1 * opts.getStorageEncryption() >> null
129+
1 * opts.getChecksumAlgorithm() >> null
129130

130131
script == '''\
131132
# bash helper functions
@@ -217,6 +218,7 @@ class AwsBatchFileCopyStrategyTest extends Specification {
217218
1 * opts.getAwsCli() >> '/foo/aws'
218219
1 * opts.getStorageClass() >> 'STANDARD_IA'
219220
1 * opts.getStorageEncryption() >> 'AES256'
221+
1 * opts.getChecksumAlgorithm() >> 'SHA256'
220222

221223
script == '''\
222224
# bash helper functions
@@ -281,11 +283,11 @@ class AwsBatchFileCopyStrategyTest extends Specification {
281283
local name=$1
282284
local s3path=$2
283285
if [[ "$name" == - ]]; then
284-
/foo/aws s3 cp --only-show-errors --sse AES256 --storage-class STANDARD_IA - "$s3path"
286+
/foo/aws s3 cp --only-show-errors --sse AES256 --checksum-algorithm SHA256 --storage-class STANDARD_IA - "$s3path"
285287
elif [[ -d "$name" ]]; then
286-
/foo/aws s3 cp --only-show-errors --recursive --sse AES256 --storage-class STANDARD_IA "$name" "$s3path/$name"
288+
/foo/aws s3 cp --only-show-errors --recursive --sse AES256 --checksum-algorithm SHA256 --storage-class STANDARD_IA "$name" "$s3path/$name"
287289
else
288-
/foo/aws s3 cp --only-show-errors --sse AES256 --storage-class STANDARD_IA "$name" "$s3path/$name"
290+
/foo/aws s3 cp --only-show-errors --sse AES256 --checksum-algorithm SHA256 --storage-class STANDARD_IA "$name" "$s3path/$name"
289291
fi
290292
}
291293

0 commit comments

Comments
 (0)