Skip to content
Closed
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.benchmarks/
build/
env/
dist/
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: certs publish test
.PHONY: certs publish test bench

certs:
curl http://ci.kennethreitz.org/job/ca-bundle/lastSuccessfulBuild/artifact/cacerts.pem -o hyper/certs.pem
Expand All @@ -10,3 +10,6 @@ publish:

test:
py.test -n 4 --cov hyperframe test/

bench:
python -m pytest bench/ --benchmark-only --benchmark-group-by=name --benchmark-autosave --benchmark-compare
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be really good to move this into a tox environment, and then to have the makefile delegate to tox. Saves people the effort of managing their virtual environments.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can make this change.

Empty file added bench/__init__.py
Empty file.
19 changes: 19 additions & 0 deletions bench/test_flags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from bench.utils.factories import FRAME_FACTORIES
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be better to use relative imports here, I think, rather than rely on the local directory being placed in sys.path.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can change this.



class TestFlagBenchmarks:
def test_set_flags(self, benchmark):
frame = FRAME_FACTORIES["headers"]()
benchmark(frame.flags.add, "PADDED")

def test_get_flags(self, benchmark):
frame = FRAME_FACTORIES["headers"]()
frame.flags.add("PADDED")
benchmark(lambda: "PADDED" in frame.flags)

def test_remove_flags(self, benchmark):
frame = FRAME_FACTORIES["headers"]()
@benchmark
def _benchmark():
frame.flags.add("PADDED")
frame.flags.remove("PADDED")
57 changes: 57 additions & 0 deletions bench/test_parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from bench.utils.factories import FRAME_FACTORIES
from bench.utils import get_int


NUMBER_OF_FRAMES = 100


def _parse_function(frame_and_data):
frame, data = frame_and_data
data = memoryview(data[get_int(0, NUMBER_OF_FRAMES - 1)])
frame.parse_frame_header(data[:9])
frame.parse_body(data[9:])


def _create_frames(frame_type):
data = [FRAME_FACTORIES[frame_type]().serialize() for _ in range(100)]
frame = FRAME_FACTORIES[frame_type]()
frame = type(frame)(frame.stream_id)
return frame, data


class TestParseBenchmarks:
def test_parse_data_frame(self, benchmark):
benchmark(_parse_function, _create_frames("data"))

def test_parse_priority_frame(self, benchmark):
benchmark(_parse_function, _create_frames("priority"))

def test_parse_rst_stream_frame(self, benchmark):
benchmark(_parse_function, _create_frames("rst_stream"))

def test_parse_settings_frame(self, benchmark):
benchmark(_parse_function, _create_frames("settings"))

def test_parse_settings_ack_frame(self, benchmark):
benchmark(_parse_function, _create_frames("settings_ack"))

def test_parse_push_promise_frame(self, benchmark):
benchmark(_parse_function, _create_frames("push_promise"))

def test_parse_ping_frame(self, benchmark):
benchmark(_parse_function, _create_frames("ping"))

def test_parse_headers_frame(self, benchmark):
benchmark(_parse_function, _create_frames("headers"))

def test_parse_go_away_frame(self, benchmark):
benchmark(_parse_function, _create_frames("go_away"))

def test_parse_window_update_frame(self, benchmark):
benchmark(_parse_function, _create_frames("window_update"))

def test_parse_continuation_frame(self, benchmark):
benchmark(_parse_function, _create_frames("continuation"))

def test_parse_alt_svc_frame(self, benchmark):
benchmark(_parse_function, _create_frames("alt_svc"))
52 changes: 52 additions & 0 deletions bench/test_serialize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from bench.utils.factories import FRAME_FACTORIES
from bench.utils import get_int


NUMBER_OF_FRAMES = 100


def _serialize_function(frames):
frame = frames[get_int(0, NUMBER_OF_FRAMES - 1)]
frame.serialize()


def _create_frames(frame_type):
return [FRAME_FACTORIES[frame_type]() for _ in range(100)]


class TestSerializeBenchmarks:
def test_serialize_data_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("data"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that pytest is already in use elsewhere in the codebase, I wonder if it would be worth condensing this using @pytest.mark.parametrize, something like:

@pytest.mark.parametrize('frame_name', [  'data',  'priority',  ...,  'rst_stream' ]) def test_serialize_frame(frame_name, benchmark): benchmark(_serialize_function, _create_frames(frame_name))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, I will look into this tonight.


def test_serialize_priority_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("priority"))

def test_serialize_rst_stream_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("rst_stream"))

def test_serialize_settings_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("settings"))

def test_serialize_settings_ack_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("settings_ack"))

def test_serialize_push_promise_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("push_promise"))

def test_serialize_ping_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("ping"))

def test_serialize_headers_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("headers"))

def test_serialize_go_away_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("go_away"))

def test_serialize_window_update_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("window_update"))

def test_serialize_continuation_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("continuation"))

def test_serialize_alt_svc_frame(self, benchmark):
benchmark(_serialize_function, _create_frames("alt_svc"))
28 changes: 28 additions & 0 deletions bench/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
import struct
import sys
import time
import random
import warnings

# Seed was randomly generated with 128 bytes of entropy because old Python
# versions have low initial entropy when seeds have too many NULL bytes.
_SEED = open(os.path.join(os.path.dirname(__file__), "seed"), "rb").read()
_RANDOM = random.Random(_SEED)


def reset_rng():
global _RANDOM
_RANDOM = random.Random(_SEED)


def get_bool():
return True if get_int(0, 1) == 1 else False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given your own performance notes, let's reduce the overhead of this a bit by replacing it with return bool(get_int(0, 1))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True if x else False is faster than bool(x) according to my notes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, ok then. =)



def get_int(l, h):
return _RANDOM.randint(l, h)


def get_bytes(n):
return b''.join([struct.pack("@B", _RANDOM.randint(0, 255)) for _ in range(n)])
147 changes: 147 additions & 0 deletions bench/utils/factories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
from bench.utils import get_bytes, get_int, get_bool
from hyperframe.frame import *


MAX_STREAM_ID = 0x7FFFFFFF
MAX_ERROR_CODE = 0x7FFFFFFF
MAX_PAYLOAD_LENGTH = 0xFFFF
MAX_PADDING_LENGTH = 0xFF
MAX_STREAM_WEIGHT = 0xFF


def data_frame_factory():
frame = DataFrame(get_int(1, MAX_STREAM_ID))
frame.data = get_bytes(get_int(0, MAX_PAYLOAD_LENGTH))

if get_bool():
frame.flags.add("END_STREAM")

frame.flags.add("PADDED")
frame.pad_length = get_int(0, MAX_PADDING_LENGTH)

return frame


def priority_frame_factory():
frame = PriorityFrame(get_int(1, MAX_STREAM_ID))
frame.exclusive = get_bool()
frame.depends_on = get_int(0, MAX_STREAM_ID)
frame.stream_weight = get_int(0, MAX_STREAM_WEIGHT)

return frame


def rst_stream_frame_factory():
frame = RstStreamFrame(get_int(1, MAX_STREAM_ID))
frame.error_code = get_int(1, MAX_ERROR_CODE)

return frame


def settings_frame_factory():
frame = SettingsFrame()
for _ in range(get_int(0, 16)):
frame.settings[get_int(1, 0xFF)] = get_int(0, 0xFFFFFFFF)

return frame


def settings_frame_ack_factory():
frame = SettingsFrame()
frame.flags.add("ACK")
return frame


def push_promise_frame_factory():
frame = PushPromiseFrame(get_int(1, MAX_STREAM_ID))
frame.data = get_bytes(get_int(0, MAX_PAYLOAD_LENGTH))

if get_bool():
frame.flags.add("END_HEADERS")

if get_int(0, 10) != 0:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason the odds of putting padding on this are 9/10?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wanted to test both padded and non-padded but I figured most frames with optional padding should be padded. Should I just increase this to 100% of frames with optional padding will be padded?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we may as well. =)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, I will make this change.

frame.flags.add("PADDED")
frame.pad_length = get_int(0, MAX_PADDING_LENGTH)

return frame


def ping_frame_factory():
frame = PingFrame()
frame.data = get_bytes(8)

if get_bool():
frame.flags.add("ACK")

return frame


def go_away_frame_factory():
frame = GoAwayFrame()
frame.last_stream_id = get_int(1, MAX_STREAM_ID)
frame.error_code = get_int(1, MAX_ERROR_CODE)

return frame


def window_update_frame_factory():
frame = WindowUpdateFrame(get_int(0, MAX_STREAM_ID))
frame.window_increment = get_int(1, 0x7FFFFFFF)

return frame


def headers_frame_factory():
frame = HeadersFrame(get_int(1, MAX_STREAM_ID))
frame.data = get_bytes(get_int(0, MAX_PAYLOAD_LENGTH))

if get_bool():
frame.flags.add("END_HEADERS")
if get_bool():
frame.flags.add("END_STREAM")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic is not quite right. You can have a HEADERS frame with END_STREAM set but not END_HEADERS: specifically, if that HEADERS frame is going to be followed by a CONTINUATION frame. CONTINUATION frames do not carry END_STREAM.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahhh, I had them mixed up, I will change these around. Thanks for the clarification.

Copy link
Member Author

@sethmlarson sethmlarson Oct 13, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated side note: I wish there was some way that all caps could appear less threatening. So many years of Internet have conditioned me to see all caps comments as angry and that is my mind's initial knee-jerk reaction until I read HEADERS haha :)
You have done nothing wrong it was just a funny thing that I noticed. I know you would not get that livid about a PR. Although maybe you would, I dunno! ;)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Heh, that's ok, keep doing this programming lark and you'll get to the mindset I have, where ALL_CAPS means COMPILE_TIME_CONSTANT.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Lukasa Oh I've been programming a long time, I've only recently joined Github. Been at it for about 12 years now. ;) I understand the convention but apparently social rules come first mentally when reading comments haha

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

;) Nice. Happily that's not a problem I have, though it may just be that I only spend time in the parts of the internet where all caps isn't used much.


if get_bool():
frame.flags.add("PRIORITY")
frame.depends_on = get_int(0, MAX_STREAM_ID)
frame.exclusive = get_bool()
frame.stream_weight = get_int(0, MAX_STREAM_WEIGHT)

frame.flags.add("PADDED")
frame.pad_length = get_int(0, MAX_PADDING_LENGTH)

return frame


def continuation_frame_factory():
frame = ContinuationFrame(get_int(1, MAX_STREAM_ID))
frame.data = get_bytes(get_int(0, MAX_PAYLOAD_LENGTH))

if get_bool():
frame.flags.add("END_HEADERS")

return frame


def alt_svc_frame_factory():
frame = AltSvcFrame(get_int(1, MAX_STREAM_ID))

frame.origin = get_bytes(get_int(1, 0xFFFF))
frame.field = get_bytes(get_int(0, MAX_PAYLOAD_LENGTH - 2))

return frame


FRAME_FACTORIES = {
"data": data_frame_factory,
"priority": priority_frame_factory,
"rst_stream": rst_stream_frame_factory,
"settings": settings_frame_factory,
"settings_ack": settings_frame_ack_factory,
"push_promise": push_promise_frame_factory,
"ping": ping_frame_factory,
"go_away": go_away_frame_factory,
"window_update": window_update_frame_factory,
"headers": headers_frame_factory,
"continuation": continuation_frame_factory,
"alt_svc": alt_svc_frame_factory
}
Binary file added bench/utils/seed
Binary file not shown.
10 changes: 5 additions & 5 deletions hyperframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def serialize_padding_data(self):
def parse_padding_data(self, data):
if 'PADDED' in self.flags:
try:
self.pad_length = struct.unpack('!B', data[:1])[0]
self.pad_length = _STRUCT_B.unpack(data[:1])[0]
except struct.error:
raise InvalidFrameError("Invalid Padding data")
return 1
Expand Down Expand Up @@ -265,10 +265,10 @@ def serialize_body(self):

def parse_body(self, data):
padding_data_length = self.parse_padding_data(data)
self.body_len = len(data)
self.data = (
data[padding_data_length:len(data)-self.total_padding].tobytes()
data[padding_data_length:self.body_len-self.total_padding].tobytes()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did two bytes of indentation get stripped here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as below.

)
self.body_len = len(data)

if self.total_padding and self.total_padding >= self.body_len:
raise InvalidPaddingError("Padding is too long.")
Expand Down Expand Up @@ -549,7 +549,7 @@ def parse_body(self, data):

self.body_len = len(data)

if len(data) > 8:
if self.body_len > 8:
self.additional_data = data[8:].tobytes()


Expand Down Expand Up @@ -645,7 +645,7 @@ def parse_body(self, data):

self.body_len = len(data)
self.data = (
data[priority_data_length:len(data)-self.total_padding].tobytes()
data[priority_data_length:self.body_len-self.total_padding].tobytes()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did two bytes of indentation get stripped here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line length limits and being no good place to break the line.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can save the extra characters by changing priority_data_length to priority_data_len.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good plan, I will make that change here and above and revert the white space.

)

if self.total_padding and self.total_padding >= self.body_len:
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import itertools
import os
import re
import sys
Expand Down Expand Up @@ -54,5 +53,5 @@
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: Implementation :: CPython',
],
]
)