Skip to content

Commit ec35f7f

Browse files
authored
PYTHON-3823 Audit benchmark data_size and calculate dynamically it where possible (mongodb#1439)
1 parent fc22053 commit ec35f7f

File tree

1 file changed

+51
-93
lines changed

1 file changed

+51
-93
lines changed

test/performance/perf_test.py

Lines changed: 51 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import tempfile
2222
import time
2323
import warnings
24-
from typing import Any, List
24+
from typing import Any, List, Optional
2525

2626
try:
2727
import simplejson as json
@@ -70,9 +70,8 @@ def __exit__(self, *args):
7070

7171

7272
class PerformanceTest:
73-
dataset: Any
74-
data_size: Any
75-
do_task: Any
73+
dataset: str
74+
data_size: int
7675
fail: Any
7776

7877
@classmethod
@@ -87,7 +86,9 @@ def tearDown(self):
8786
name = self.__class__.__name__[4:]
8887
median = self.percentile(50)
8988
megabytes_per_sec = self.data_size / median / 1000000
90-
print(f"Running {self.__class__.__name__}. MEDIAN={self.percentile(50)}")
89+
print(
90+
f"Running {self.__class__.__name__}. MB/s={megabytes_per_sec}, MEDIAN={self.percentile(50)}"
91+
)
9192
result_data.append(
9293
{
9394
"info": {
@@ -105,6 +106,9 @@ def tearDown(self):
105106
def before(self):
106107
pass
107108

109+
def do_task(self):
110+
raise NotImplementedError
111+
108112
def after(self):
109113
pass
110114

@@ -120,12 +124,13 @@ def percentile(self, percentile):
120124
def runTest(self):
121125
results = []
122126
start = time.monotonic()
123-
self.max_iterations = NUM_ITERATIONS
124127
for i in range(NUM_ITERATIONS):
125128
if time.monotonic() - start > MAX_ITERATION_TIME:
126129
with warnings.catch_warnings():
127130
warnings.simplefilter("default")
128-
warnings.warn("Test timed out, completed %s iterations." % i)
131+
warnings.warn(
132+
f"Test timed out after {MAX_ITERATION_TIME}s, completed {i}/{NUM_ITERATIONS} iterations."
133+
)
129134
break
130135
self.before()
131136
with Timer() as timer:
@@ -142,6 +147,7 @@ def setUp(self):
142147
# Location of test data.
143148
with open(os.path.join(TEST_PATH, os.path.join("extended_bson", self.dataset))) as data:
144149
self.document = loads(data.read())
150+
self.data_size = len(encode(self.document)) * NUM_DOCS
145151

146152
def do_task(self):
147153
for _ in range(NUM_DOCS):
@@ -154,44 +160,40 @@ def setUp(self):
154160
with open(os.path.join(TEST_PATH, os.path.join("extended_bson", self.dataset))) as data:
155161
self.document = encode(json.loads(data.read()))
156162

163+
self.data_size = len(self.document) * NUM_DOCS
164+
157165
def do_task(self):
158166
for _ in range(NUM_DOCS):
159167
decode(self.document)
160168

161169

162170
class TestFlatEncoding(BsonEncodingTest, unittest.TestCase):
163171
dataset = "flat_bson.json"
164-
data_size = 75310000
165172

166173

167174
class TestFlatDecoding(BsonDecodingTest, unittest.TestCase):
168175
dataset = "flat_bson.json"
169-
data_size = 75310000
170176

171177

172178
class TestDeepEncoding(BsonEncodingTest, unittest.TestCase):
173179
dataset = "deep_bson.json"
174-
data_size = 19640000
175180

176181

177182
class TestDeepDecoding(BsonDecodingTest, unittest.TestCase):
178183
dataset = "deep_bson.json"
179-
data_size = 19640000
180184

181185

182186
class TestFullEncoding(BsonEncodingTest, unittest.TestCase):
183187
dataset = "full_bson.json"
184-
data_size = 57340000
185188

186189

187190
class TestFullDecoding(BsonDecodingTest, unittest.TestCase):
188191
dataset = "full_bson.json"
189-
data_size = 57340000
190192

191193

192194
# SINGLE-DOC BENCHMARKS
193195
class TestRunCommand(PerformanceTest, unittest.TestCase):
194-
data_size = 160000
196+
data_size = len(encode({"hello": True})) * NUM_DOCS
195197

196198
def setUp(self):
197199
self.client = client_context.client
@@ -200,7 +202,7 @@ def setUp(self):
200202
def do_task(self):
201203
command = self.client.perftest.command
202204
for _ in range(NUM_DOCS):
203-
command("ping")
205+
command("hello", True)
204206

205207

206208
class TestDocument(PerformanceTest):
@@ -225,116 +227,83 @@ def after(self):
225227
self.client.perftest.drop_collection("corpus")
226228

227229

228-
class TestFindOneByID(TestDocument, unittest.TestCase):
229-
data_size = 16220000
230+
class FindTest(TestDocument):
231+
dataset = "tweet.json"
230232

231233
def setUp(self):
232-
self.dataset = "tweet.json"
233234
super().setUp()
234-
235+
self.data_size = len(encode(self.document)) * NUM_DOCS
235236
documents = [self.document.copy() for _ in range(NUM_DOCS)]
236237
self.corpus = self.client.perftest.corpus
237238
result = self.corpus.insert_many(documents)
238239
self.inserted_ids = result.inserted_ids
239240

240-
def do_task(self):
241-
find_one = self.corpus.find_one
242-
for _id in self.inserted_ids:
243-
find_one({"_id": _id})
244-
245241
def before(self):
246242
pass
247243

248244
def after(self):
249245
pass
250246

251247

252-
class TestSmallDocInsertOne(TestDocument, unittest.TestCase):
253-
data_size = 2750000
248+
class TestFindOneByID(FindTest, unittest.TestCase):
249+
def do_task(self):
250+
find_one = self.corpus.find_one
251+
for _id in self.inserted_ids:
252+
find_one({"_id": _id})
253+
254+
255+
class SmallDocInsertTest(TestDocument):
256+
dataset = "small_doc.json"
254257

255258
def setUp(self):
256-
self.dataset = "small_doc.json"
257259
super().setUp()
258-
260+
self.data_size = len(encode(self.document)) * NUM_DOCS
259261
self.documents = [self.document.copy() for _ in range(NUM_DOCS)]
260262

263+
264+
class TestSmallDocInsertOne(SmallDocInsertTest, unittest.TestCase):
261265
def do_task(self):
262266
insert_one = self.corpus.insert_one
263267
for doc in self.documents:
264268
insert_one(doc)
265269

266270

267-
class TestLargeDocInsertOne(TestDocument, unittest.TestCase):
268-
data_size = 27310890
271+
class LargeDocInsertTest(TestDocument):
272+
dataset = "large_doc.json"
269273

270274
def setUp(self):
271-
self.dataset = "large_doc.json"
272275
super().setUp()
276+
n_docs = 10
277+
self.data_size = len(encode(self.document)) * n_docs
278+
self.documents = [self.document.copy() for _ in range(n_docs)]
273279

274-
self.documents = [self.document.copy() for _ in range(10)]
275280

281+
class TestLargeDocInsertOne(LargeDocInsertTest, unittest.TestCase):
276282
def do_task(self):
277283
insert_one = self.corpus.insert_one
278284
for doc in self.documents:
279285
insert_one(doc)
280286

281287

282288
# MULTI-DOC BENCHMARKS
283-
class TestFindManyAndEmptyCursor(TestDocument, unittest.TestCase):
284-
data_size = 16220000
285-
286-
def setUp(self):
287-
self.dataset = "tweet.json"
288-
super().setUp()
289-
290-
for _ in range(10):
291-
self.client.perftest.command("insert", "corpus", documents=[self.document] * 1000)
292-
self.corpus = self.client.perftest.corpus
293-
289+
class TestFindManyAndEmptyCursor(FindTest, unittest.TestCase):
294290
def do_task(self):
295291
list(self.corpus.find())
296292

297-
def before(self):
298-
pass
299-
300-
def after(self):
301-
pass
302-
303-
304-
class TestSmallDocBulkInsert(TestDocument, unittest.TestCase):
305-
data_size = 2750000
306-
307-
def setUp(self):
308-
self.dataset = "small_doc.json"
309-
super().setUp()
310-
self.documents = [self.document.copy() for _ in range(NUM_DOCS)]
311-
312-
def before(self):
313-
self.corpus = self.client.perftest.create_collection("corpus")
314293

294+
class TestSmallDocBulkInsert(SmallDocInsertTest, unittest.TestCase):
315295
def do_task(self):
316296
self.corpus.insert_many(self.documents, ordered=True)
317297

318298

319-
class TestLargeDocBulkInsert(TestDocument, unittest.TestCase):
320-
data_size = 27310890
321-
322-
def setUp(self):
323-
self.dataset = "large_doc.json"
324-
super().setUp()
325-
self.documents = [self.document.copy() for _ in range(10)]
326-
327-
def before(self):
328-
self.corpus = self.client.perftest.create_collection("corpus")
329-
299+
class TestLargeDocBulkInsert(LargeDocInsertTest, unittest.TestCase):
330300
def do_task(self):
331301
self.corpus.insert_many(self.documents, ordered=True)
332302

333303

334-
class TestGridFsUpload(PerformanceTest, unittest.TestCase):
335-
data_size = 52428800
336-
304+
class GridFsTest(PerformanceTest):
337305
def setUp(self):
306+
super().setUp()
338307
self.client = client_context.client
339308
self.client.drop_database("perftest")
340309

@@ -343,44 +312,33 @@ def setUp(self):
343312
)
344313
with open(gridfs_path, "rb") as data:
345314
self.document = data.read()
346-
315+
self.data_size = len(self.document)
347316
self.bucket = GridFSBucket(self.client.perftest)
348317

349318
def tearDown(self):
350319
super().tearDown()
351320
self.client.drop_database("perftest")
352321

322+
323+
class TestGridFsUpload(GridFsTest, unittest.TestCase):
353324
def before(self):
325+
# Create the bucket.
354326
self.bucket.upload_from_stream("init", b"x")
355327

356328
def do_task(self):
357329
self.bucket.upload_from_stream("gridfstest", self.document)
358330

359331

360-
class TestGridFsDownload(PerformanceTest, unittest.TestCase):
361-
data_size = 52428800
362-
332+
class TestGridFsDownload(GridFsTest, unittest.TestCase):
363333
def setUp(self):
364-
self.client = client_context.client
365-
self.client.drop_database("perftest")
366-
367-
gridfs_path = os.path.join(
368-
TEST_PATH, os.path.join("single_and_multi_document", "gridfs_large.bin")
369-
)
370-
371-
self.bucket = GridFSBucket(self.client.perftest)
372-
with open(gridfs_path, "rb") as gfile:
373-
self.uploaded_id = self.bucket.upload_from_stream("gridfstest", gfile)
374-
375-
def tearDown(self):
376-
super().tearDown()
377-
self.client.drop_database("perftest")
334+
super().setUp()
335+
self.uploaded_id = self.bucket.upload_from_stream("gridfstest", self.document)
378336

379337
def do_task(self):
380338
self.bucket.open_download_stream(self.uploaded_id).read()
381339

382340

383-
proc_client = None
341+
proc_client: Optional[MongoClient] = None
384342

385343

386344
def proc_init(*dummy):

0 commit comments

Comments
 (0)