Skip to content

Commit 605b672

Browse files
committed
fix: use create time of database for retries
1 parent 1e9c4a6 commit 605b672

File tree

2 files changed

+93
-24
lines changed

2 files changed

+93
-24
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/GapicSpannerRpc.java

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@
148148
import java.util.concurrent.ScheduledExecutorService;
149149
import java.util.concurrent.ScheduledThreadPoolExecutor;
150150
import java.util.concurrent.ThreadFactory;
151+
import java.util.concurrent.TimeUnit;
151152
import javax.annotation.Nullable;
152153
import org.threeten.bp.Duration;
153154

@@ -391,22 +392,20 @@ public boolean shouldRetry(
391392

392393
private final class OperationFutureCallable<RequestT, ResponseT, MetadataT extends Message>
393394
implements Callable<OperationFuture<ResponseT, MetadataT>> {
394-
final Class<MetadataT> metadataClass;
395395
final OperationCallable<RequestT, ResponseT, MetadataT> operationCallable;
396396
final RequestT initialRequest;
397397
final String instanceName;
398398
final OperationsLister lister;
399-
final Function<MetadataT, Timestamp> getStartTimeFunction;
399+
final Function<Operation, Timestamp> getStartTimeFunction;
400+
Timestamp initialCallTime;
400401
boolean isRetry = false;
401402

402403
OperationFutureCallable(
403-
Class<MetadataT> metadataClass,
404404
OperationCallable<RequestT, ResponseT, MetadataT> operationCallable,
405405
RequestT initialRequest,
406406
String instanceName,
407407
OperationsLister lister,
408-
Function<MetadataT, Timestamp> getStartTimeFunction) {
409-
this.metadataClass = metadataClass;
408+
Function<Operation, Timestamp> getStartTimeFunction) {
410409
this.operationCallable = operationCallable;
411410
this.initialRequest = initialRequest;
412411
this.instanceName = instanceName;
@@ -422,11 +421,17 @@ public OperationFuture<ResponseT, MetadataT> call() throws Exception {
422421
if (isRetry) {
423422
// Query the backend to see if the operation was actually created, and that the
424423
// problem was caused by a network problem or other transient problem client side.
425-
Operation operation = mostRecentOperation(metadataClass, lister, getStartTimeFunction);
424+
Operation operation = mostRecentOperation(lister, getStartTimeFunction, initialCallTime);
426425
if (operation != null) {
427426
// Operation found, resume tracking that operation.
428427
operationName = operation.getName();
429428
}
429+
} else {
430+
initialCallTime =
431+
Timestamp.newBuilder()
432+
.setSeconds(
433+
TimeUnit.SECONDS.convert(System.currentTimeMillis(), TimeUnit.MILLISECONDS))
434+
.build();
430435
}
431436
isRetry = true;
432437

@@ -443,8 +448,10 @@ private interface OperationsLister {
443448
Paginated<Operation> listOperations(String nextPageToken);
444449
}
445450

446-
private <T extends Message> Operation mostRecentOperation(
447-
Class<T> metadataClass, OperationsLister lister, Function<T, Timestamp> getStartTimeFunction)
451+
private Operation mostRecentOperation(
452+
OperationsLister lister,
453+
Function<Operation, Timestamp> getStartTimeFunction,
454+
Timestamp initialCallTime)
448455
throws InvalidProtocolBufferException {
449456
Operation res = null;
450457
Timestamp currMaxStartTime = null;
@@ -453,16 +460,18 @@ private <T extends Message> Operation mostRecentOperation(
453460
do {
454461
operations = lister.listOperations(nextPageToken);
455462
for (Operation op : operations.getResults()) {
456-
T metadata = op.getMetadata().unpack(metadataClass);
457-
Timestamp startTime = getStartTimeFunction.apply(metadata);
458-
if (res == null || TimestampComparator.INSTANCE.compare(startTime, currMaxStartTime) > 0) {
463+
Timestamp startTime = getStartTimeFunction.apply(op);
464+
if (res == null
465+
|| (TimestampComparator.INSTANCE.compare(startTime, currMaxStartTime) > 0
466+
&& TimestampComparator.INSTANCE.compare(startTime, initialCallTime) >= 0)) {
459467
currMaxStartTime = startTime;
460468
res = op;
461469
}
462470
// If the operation does not report any start time, then the operation that is not yet done
463471
// is the one that is the most recent.
464472
if (startTime == null && currMaxStartTime == null && !op.getDone()) {
465473
res = op;
474+
break;
466475
}
467476
}
468477
} while (operations.getNextPageToken() != null);
@@ -684,7 +693,6 @@ public OperationFuture<Database, CreateDatabaseMetadata> createDatabase(
684693

685694
OperationFutureCallable<CreateDatabaseRequest, Database, CreateDatabaseMetadata> callable =
686695
new OperationFutureCallable<CreateDatabaseRequest, Database, CreateDatabaseMetadata>(
687-
CreateDatabaseMetadata.class,
688696
databaseAdminStub.createDatabaseOperationCallable(),
689697
request,
690698
instanceName,
@@ -701,9 +709,23 @@ public Paginated<Operation> listOperations(String nextPageToken) {
701709
nextPageToken);
702710
}
703711
},
704-
new Function<CreateDatabaseMetadata, Timestamp>() {
712+
new Function<Operation, Timestamp>() {
705713
@Override
706-
public Timestamp apply(CreateDatabaseMetadata input) {
714+
public Timestamp apply(Operation input) {
715+
if (input.getDone() && input.hasResponse()) {
716+
try {
717+
Timestamp createTime =
718+
input.getResponse().unpack(Database.class).getCreateTime();
719+
if (Timestamp.getDefaultInstance().equals(createTime)) {
720+
// Create time was not returned by the server (proto objects never return
721+
// null, instead they return the default instance). Return null from this
722+
// method to indicate that there is no known create time.
723+
return null;
724+
}
725+
} catch (InvalidProtocolBufferException e) {
726+
return null;
727+
}
728+
}
707729
return null;
708730
}
709731
});
@@ -790,7 +812,6 @@ public OperationFuture<Backup, CreateBackupMetadata> createBackup(
790812
.build();
791813
OperationFutureCallable<CreateBackupRequest, Backup, CreateBackupMetadata> callable =
792814
new OperationFutureCallable<CreateBackupRequest, Backup, CreateBackupMetadata>(
793-
CreateBackupMetadata.class,
794815
databaseAdminStub.createBackupOperationCallable(),
795816
request,
796817
instanceName,
@@ -807,10 +828,18 @@ public Paginated<Operation> listOperations(String nextPageToken) {
807828
nextPageToken);
808829
}
809830
},
810-
new Function<CreateBackupMetadata, Timestamp>() {
831+
new Function<Operation, Timestamp>() {
811832
@Override
812-
public Timestamp apply(CreateBackupMetadata input) {
813-
return input.getProgress().getStartTime();
833+
public Timestamp apply(Operation input) {
834+
try {
835+
return input
836+
.getMetadata()
837+
.unpack(CreateBackupMetadata.class)
838+
.getProgress()
839+
.getStartTime();
840+
} catch (InvalidProtocolBufferException e) {
841+
return null;
842+
}
814843
}
815844
});
816845
return RetryHelper.runWithRetries(
@@ -835,7 +864,6 @@ public final OperationFuture<Database, RestoreDatabaseMetadata> restoreDatabase(
835864

836865
OperationFutureCallable<RestoreDatabaseRequest, Database, RestoreDatabaseMetadata> callable =
837866
new OperationFutureCallable<RestoreDatabaseRequest, Database, RestoreDatabaseMetadata>(
838-
RestoreDatabaseMetadata.class,
839867
databaseAdminStub.restoreDatabaseOperationCallable(),
840868
request,
841869
databaseInstanceName,
@@ -852,10 +880,18 @@ public Paginated<Operation> listOperations(String nextPageToken) {
852880
nextPageToken);
853881
}
854882
},
855-
new Function<RestoreDatabaseMetadata, Timestamp>() {
883+
new Function<Operation, Timestamp>() {
856884
@Override
857-
public Timestamp apply(RestoreDatabaseMetadata input) {
858-
return input.getProgress().getStartTime();
885+
public Timestamp apply(Operation input) {
886+
try {
887+
return input
888+
.getMetadata()
889+
.unpack(RestoreDatabaseMetadata.class)
890+
.getProgress()
891+
.getStartTime();
892+
} catch (InvalidProtocolBufferException e) {
893+
return null;
894+
}
859895
}
860896
});
861897
return RetryHelper.runWithRetries(

google-cloud-spanner/src/test/java/com/google/cloud/spanner/it/ITDatabaseAdminTest.java

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ public void testRetryNonIdempotentRpcsReturningLongRunningOperations() throws Ex
269269

270270
List<Database> databases = new ArrayList<>();
271271
List<Backup> backups = new ArrayList<>();
272+
String initialDatabaseId;
273+
Timestamp initialDbCreateTime;
272274

273275
try {
274276
// CreateDatabase
@@ -277,14 +279,19 @@ public void testRetryNonIdempotentRpcsReturningLongRunningOperations() throws Ex
277279
SpannerOptions options =
278280
testHelper.getOptions().toBuilder().setInterceptorProvider(createDbInterceptor).build();
279281
try (Spanner spanner = options.getService()) {
280-
String databaseId = testHelper.getUniqueDatabaseId();
282+
initialDatabaseId = testHelper.getUniqueDatabaseId();
281283
DatabaseAdminClient client = spanner.getDatabaseAdminClient();
282284
OperationFuture<Database, CreateDatabaseMetadata> op =
283285
client.createDatabase(
284286
testHelper.getInstanceId().getInstance(),
285-
databaseId,
287+
initialDatabaseId,
286288
Collections.<String>emptyList());
287289
databases.add(op.get());
290+
// Keep track of the original create time of this database, as we will drop this database
291+
// later and create another one with the exact same name. That means that the ListOperations
292+
// call will return at least two CreateDatabase operations. The retry logic should always
293+
// pick the last one.
294+
initialDbCreateTime = op.get().getCreateTime();
288295
// Assert that the CreateDatabase RPC was called only once, and that the operation tracking
289296
// was resumed through a GetOperation call.
290297
assertThat(createDbInterceptor.methodCount.get()).isEqualTo(1);
@@ -363,6 +370,32 @@ public void testRetryNonIdempotentRpcsReturningLongRunningOperations() throws Ex
363370
}
364371
}
365372
}
373+
374+
// Create another database with the exact same name as the first database.
375+
createDbInterceptor = new InjectErrorInterceptorProvider("CreateDatabase");
376+
options =
377+
testHelper.getOptions().toBuilder().setInterceptorProvider(createDbInterceptor).build();
378+
try (Spanner spanner = options.getService()) {
379+
DatabaseAdminClient client = spanner.getDatabaseAdminClient();
380+
// First drop the initial database.
381+
client.dropDatabase(testHelper.getInstanceId().getInstance(), initialDatabaseId);
382+
// Now re-create a database with the exact same name.
383+
OperationFuture<Database, CreateDatabaseMetadata> op =
384+
client.createDatabase(
385+
testHelper.getInstanceId().getInstance(),
386+
initialDatabaseId,
387+
Collections.<String>emptyList());
388+
// Check that the second database was created and has a greater creation time than the
389+
// first.
390+
Timestamp secondCreationTime = op.get().getCreateTime();
391+
// TODO: Change this to greaterThan when the create time of a database is reported back by
392+
// the server.
393+
assertThat(secondCreationTime).isAtLeast(initialDbCreateTime);
394+
// Assert that the CreateDatabase RPC was called only once, and that the operation tracking
395+
// was resumed through a GetOperation call.
396+
assertThat(createDbInterceptor.methodCount.get()).isEqualTo(1);
397+
assertThat(createDbInterceptor.getOperationCount.get()).isAtLeast(1);
398+
}
366399
} finally {
367400
DatabaseAdminClient client = testHelper.getClient().getDatabaseAdminClient();
368401
for (Database database : databases) {

0 commit comments

Comments
 (0)