|
| 1 | +/* |
| 2 | + * Copyright (C) 2012-2015 DataStax Inc. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | +package com.datastax.driver.core; |
| 17 | + |
| 18 | +import com.datastax.driver.core.exceptions.NoHostAvailableException; |
| 19 | +import com.google.common.util.concurrent.FutureCallback; |
| 20 | +import com.google.common.util.concurrent.Futures; |
| 21 | +import com.google.common.util.concurrent.Uninterruptibles; |
| 22 | +import org.slf4j.Logger; |
| 23 | +import org.slf4j.LoggerFactory; |
| 24 | +import org.testng.annotations.AfterClass; |
| 25 | +import org.testng.annotations.BeforeClass; |
| 26 | +import org.testng.annotations.Test; |
| 27 | + |
| 28 | +import java.util.List; |
| 29 | +import java.util.Random; |
| 30 | +import java.util.concurrent.CountDownLatch; |
| 31 | +import java.util.concurrent.Semaphore; |
| 32 | +import java.util.concurrent.TimeUnit; |
| 33 | +import java.util.concurrent.atomic.AtomicInteger; |
| 34 | + |
| 35 | +import static org.assertj.core.api.Assertions.assertThat; |
| 36 | +import static org.testng.Assert.fail; |
| 37 | + |
| 38 | +public class ReusedStreamIdTest { |
| 39 | + |
| 40 | + static Logger logger = LoggerFactory.getLogger(ReusedStreamIdTest.class); |
| 41 | + |
| 42 | + private CCMBridge ccm; |
| 43 | + |
| 44 | + @BeforeClass(groups = "long") |
| 45 | + public void setupCluster() throws InterruptedException { |
| 46 | + ccm = CCMBridge.builder("test") |
| 47 | + .withNodes(2) |
| 48 | + .build(); |
| 49 | + } |
| 50 | + |
| 51 | + @AfterClass(groups = "long") |
| 52 | + public void shutdownCluster() { |
| 53 | + if (ccm != null) |
| 54 | + ccm.stop(); |
| 55 | + } |
| 56 | + |
| 57 | + /** |
| 58 | + * Ensures that if activity tied to future completion blocks netty io threads that this does not cause the |
| 59 | + * driver to possibly invoke the wrong handler for a response, as described in JAVA-1179. |
| 60 | + * <p> |
| 61 | + * This is accomplished by setting up a 2 node cluster and setting a low read timeout (1 second). The test |
| 62 | + * submits 10 concurrent requests repeatedly for up to 500 queries and on completion of each request may block |
| 63 | + * in a callback randomly (25% of the time) for 1 second, causing a retry on the next host to trigger. If a new |
| 64 | + * stream id is not allocated on the retry, its possible it could use an already used stream id and cause the driver |
| 65 | + * to invoke the wrong handlers for a response. The test checks for this by ensuring that the column returned in a |
| 66 | + * response matches the one queried. If the column received does not match, the test fails. In cases where this |
| 67 | + * bug is present, it should be detected within 10 seconds. |
| 68 | + * |
| 69 | + * @jira_ticket JAVA-1179 |
| 70 | + * @test_category queries:async_callback |
| 71 | + */ |
| 72 | + @Test(groups = "long") |
| 73 | + public void should_not_receive_wrong_response_when_callbacks_block_io_thread() { |
| 74 | + // Low Read Timeout to trigger retry behavior. |
| 75 | + Cluster cluster = Cluster.builder() |
| 76 | + .addContactPoint(CCMBridge.IP_PREFIX + '1') |
| 77 | + .withSocketOptions(new SocketOptions().setReadTimeoutMillis(1000)) |
| 78 | + .build(); |
| 79 | + |
| 80 | + int concurrency = 10; |
| 81 | + final Semaphore semaphore = new Semaphore(concurrency); |
| 82 | + // RNG to determine sleep times. |
| 83 | + final Random random = new Random(); |
| 84 | + |
| 85 | + try { |
| 86 | + Session session = cluster.connect(); |
| 87 | + |
| 88 | + // Use the system.local table and alternate between columns that are queried. |
| 89 | + List<ColumnMetadata> columnsToGrab = cluster.getMetadata().getKeyspace("system").getTable("local").getColumns(); |
| 90 | + assertThat(columnsToGrab.size()).isGreaterThan(1); |
| 91 | + |
| 92 | + final CountDownLatch errorTrigger = new CountDownLatch(1); |
| 93 | + |
| 94 | + long start = System.currentTimeMillis(); |
| 95 | + // 500 iterations will take roughly 1 minute. |
| 96 | + int iterations = 500; |
| 97 | + final AtomicInteger completed = new AtomicInteger(0); |
| 98 | + |
| 99 | + for (int i = 1; i <= iterations; i++) { |
| 100 | + try { |
| 101 | + if (errorTrigger.getCount() == 0) { |
| 102 | + fail(String.format("Error triggered at or before %d of %d requests after %dms.", i, iterations, |
| 103 | + System.currentTimeMillis() - start)); |
| 104 | + } |
| 105 | + semaphore.acquire(); |
| 106 | + final String column = columnsToGrab.get(i % columnsToGrab.size()).getName(); |
| 107 | + String query = String.format("select %s from system.local", column); |
| 108 | + ResultSetFuture future = session.executeAsync(query); |
| 109 | + |
| 110 | + Futures.addCallback(future, new FutureCallback<ResultSet>() { |
| 111 | + @Override |
| 112 | + public void onSuccess(ResultSet result) { |
| 113 | + semaphore.release(); |
| 114 | + // Expect the column that you queried to be present, if its not we got the wrong response |
| 115 | + // back. |
| 116 | + int columnIndex = result.getColumnDefinitions().getIndexOf(column); |
| 117 | + if (columnIndex == -1) { |
| 118 | + logger.error("Got response without column {}, got columns {} from Host {}.", column, |
| 119 | + result.getColumnDefinitions(), result.getExecutionInfo().getQueriedHost()); |
| 120 | + errorTrigger.countDown(); |
| 121 | + return; |
| 122 | + } |
| 123 | + completed.incrementAndGet(); |
| 124 | + // Block netty io thread 25% of the time. |
| 125 | + int num = random.nextInt(1); |
| 126 | + if (num == 0) { |
| 127 | + // Sleep exactly one second, should trigger retry. |
| 128 | + Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS); |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + @Override |
| 133 | + public void onFailure(Throwable t) { |
| 134 | + semaphore.release(); |
| 135 | + // NHAEs are inevitable because of low query timeouts and blocked threads. |
| 136 | + if (!(t instanceof NoHostAvailableException)) { |
| 137 | + logger.error("Unexpected error encountered.", t); |
| 138 | + errorTrigger.countDown(); |
| 139 | + } |
| 140 | + } |
| 141 | + }); |
| 142 | + } catch (InterruptedException e) { |
| 143 | + fail("Test interrupted", e); |
| 144 | + } |
| 145 | + if (i % (iterations / 10) == 0) { |
| 146 | + logger.info("Submitted {} of {} requests. ({} completed successfully)", i, iterations, completed.get()); |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + // Wait for 10 seconds for any remaining requests to possibly trigger an error, its likely |
| 151 | + // that if we get to this point this will not happen. |
| 152 | + Uninterruptibles.awaitUninterruptibly(errorTrigger, 10, TimeUnit.SECONDS); |
| 153 | + if (errorTrigger.getCount() == 0) { |
| 154 | + fail(String.format("Error triggered after %dms.", System.currentTimeMillis() - start)); |
| 155 | + } |
| 156 | + // Sanity check to ensure that at least some requests succeeded, we expect some failures if both |
| 157 | + // hosts timeout as its likely they could be blocked on the event loop. |
| 158 | + assertThat(completed.get()).isGreaterThan(0); |
| 159 | + } finally { |
| 160 | + try { |
| 161 | + // Acquire all permits to make sure all inflight requests complete. |
| 162 | + if (!semaphore.tryAcquire(concurrency, 10, TimeUnit.SECONDS)) { |
| 163 | + fail("Could not acquire all permits within 10 seconds of completion."); |
| 164 | + } |
| 165 | + } catch (InterruptedException e) { |
| 166 | + fail("Interrupted.", e); |
| 167 | + } |
| 168 | + cluster.close(); |
| 169 | + } |
| 170 | + } |
| 171 | +} |
0 commit comments