list
0 1 2 3 4 5 6 7 8 ... ┌─┬─┬─┬─┬─┬─┬─┬─┬─ │ │ │ │ │ │ │ │ │ ... └─┴─┴─┴─┴─┴─┴─┴─┴─ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │ │ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ │ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ │ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ ... │ Memory hierarchy (but see Gustavo Duarte’s What Your Computer Does While You Wait and Dan Luu’s How Misaligning Data Can Increase Performance 12×)
Why do I ignore those issues?
Why do I ignore those issues?
The Standard Library does
Record
┌─┬─┬─┬─┬─┬─┬─┬─┐ │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ │address of type│ → ... ├─┼─┼─┼─┼─┼─┼─┼─┤ ... ┌─┬─┬─┬─┬─┬─┬─┬─┐ │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ │address of type│ → <type 'int'> ├─┼─┼─┼─┼─┼─┼─┼─┤ │ 123 │ └─┴─┴─┴─┴─┴─┴─┴─┘ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ │address of type│ → <type 'float'> ├─┼─┼─┼─┼─┼─┼─┼─┤ │2.997924580e+08│ └─┴─┴─┴─┴─┴─┴─┴─┘ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ │address of type│ → <type 'str'> ├─┼─┼─┼─┼─┼─┼─┼─┤ │ length = 6 │ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ hash │ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ flags │ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ address = 0 │ ├─┼─┼─┼─┼─┼─┼─┴─┘ │H e l l o !│ └─┴─┴─┴─┴─┴─┘ ┌─┬─┬─┬─┬─┬─┬─┬─┐ a + 0 │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ a + 8 │address of type│ → <type 'int'> ├─┼─┼─┼─┼─┼─┼─┼─┤ a + 16 │ 123 │ └─┴─┴─┴─┴─┴─┴─┴─┘ struct
>>> from struct import pack, unpack >>> data = pack('IIf', 2, 9, 20.0) >>> len(data) 12 >>> data.encode('hex') '02000000090000000000a041' >>> unpack('IIf', data) (2, 9, 20.0) Array
a + 0 1 2 3 4 5 ┌─┬─┬─┬─┬─┬─┐ │H e l l o !│ └─┴─┴─┴─┴─┴─┘ ┌─┬─┬─┬─┬─┬─┬─┬─┐ a+0 a+4 │h |e | ├─┼─┼─┼─┼─┼─┼─┼─┤ a+8 a+12 │l |l │ ├─┼─┼─┼─┼─┼─┼─┼─┤ a+16 a+20 │o |! │ └─┴─┴─┴─┴─┴─┴─┴─┘ array
>>> from array import array >>> a = array('d', [1.0, 2.0, 3.0, 4.0, 5.0]) >>> a[3] 4.0 >>> a[3] = 44.0 >>> a array('d', [1.0, 2.0, 3.0, 44.0, 5.0]) >>> a.itemsize 8 >>> len(a) 5 >>> len(a) * a.itemsize 40 ┌─┬─┬─┬─┬─┬─┬─┬─┐ │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ │address of type│ → <type 'float'> ├─┼─┼─┼─┼─┼─┼─┼─┤ │ 1.234 e+00 │ └─┴─┴─┴─┴─┴─┴─┴─┘ ↑ ↓
┌─┬─┬─┬─┬─┬─┬─┬─┐ │ 1.234 │ 5.678 │ ├─┼─┼─┼─┼─┼─┼─┼─┤ >>> import numpy as np >>> a = np.arange(0, 5) >>> print(a) [0 1 2 3 4] >>> a.sum() 10 >>> print(a * a) [ 0 1 4 9 16] >>> print(a + 10) [10 11 12 13 14] tuple
>>> tup = (1, 2.0, 'Hello!') >>> tup = (1, 2.0, 'Hello!') >>> tup = (1, 2.0, 'Hello!') ┌─┬─┬─┬─┬─┬─┬─┬─┐ │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ │address of type│ → <type tuple> ├─┼─┼─┼─┼─┼─┼─┼─┤ │ 3 │ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ address │ → <int 1> ├─┼─┼─┼─┼─┼─┼─┼─┤ │ address │ → <float 2.0> ├─┼─┼─┼─┼─┼─┼─┼─┤ │ address │ → <str 'Hello!'> └─┴─┴─┴─┴─┴─┴─┴─┘ len(tup) tup[i] >>> from collections import namedtuple >>> City = namedtuple('City', 'name province') >>> m = City('Montréal', 'Québec') >>> m.name 'Montréal' >>> m.province 'Québec' tuple
>>> t = (1, 2, 3) >>> len(t) 3 >>> t[9] Traceback (most recent call last): ... IndexError: tuple index out of range list
─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─ #######│ list │####### ─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─ names references ↓ ─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─ #######│ list │####### ─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ │address of type│ → <type list> ├─┼─┼─┼─┼─┼─┼─┼─┤ │ 3 │ ├─┼─┼─┼─┼─┼─┼─┼─┤ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │ array address │ → │ address │ → ├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ 4 │ │ address │ → └─┴─┴─┴─┴─┴─┴─┴─┘ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ address │ → ├─┼─┼─┼─┼─┼─┼─┼─┤ │ - │ └─┴─┴─┴─┴─┴─┴─┴─┘ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │reference count│ ├─┼─┼─┼─┼─┼─┼─┼─┤ │address of type│ → <type list> ├─┼─┼─┼─┼─┼─┼─┼─┤ │ 3 │ ├─┼─┼─┼─┼─┼─┼─┼─┤ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │ array address │ → │ address │ → ├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ 4 │ │ address │ → └─┴─┴─┴─┴─┴─┴─┴─┘ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ address │ → ├─┼─┼─┼─┼─┼─┼─┼─┤ │ - │ └─┴─┴─┴─┴─┴─┴─┴─┘ >>> list(range(10)) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> sum(range(10)) 45 >>> sum(range(1000)) 499500 >>> sum(range(1000000)) 499999500000 >>> list(range(10)) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] >>> sum(range(10)) 45 >>> sum(range(1000)) 499500 >>> sum(range(1000000)) 499999500000 | Appends | Reallocations | Address copies |
|---|---|---|
| 1,000 | 27 | 8,603 |
| 1,000,000 | 85 | 9,692,995 |
| 1,000,000,000 | 143 | 9,058,551,346 |
amortization
┌───────────────────────┬───────────┐ │ 991 item addresses │ 129 slots │ └───────────────────────┴───────────┘ Speed vs space
list
# Single-item: s[i] s[i] = v del s[i] s.append(v) s.insert(i, v) s.pop(i) v in s # Several-item: t = s[7:-7] s == t s.count('Montréal') s.index('Montréal') s.remove('Montréal') for item in s: ... problem
# Single-item operations that touch: # 1 item # n items s[i] s[i] = v del s[len(s)] del s[0] s.append(v) s.insert(len(s), v) s.insert(0, v) s.pop() s.pop(0) v in s # Reverse order while mylist: item = list.pop(0) print item for item in reversed(mylist): print item list
copy vs view
t = s[400:500] ┌─────────────────────────┐ │ 1,000 item list │ └─────────────────────────┘ 8 * 100 = 800 bytes copied ↓ ┌───────────────┐ │ 100 item list │ └───────────────┘ t = s[400:500] ┌────────────────────────┐ │ 1,000 floats │ └────────────────────────┘ ↑ ┌─────────┐ │ slice │ │+400 <500│ └─────────┘ ┌─┬─┬─┬─┬─┬─┬─┬─┐ s[0] │ address │ → item ├─┼─┼─┼─┼─┼─┼─┼─┤ s[1] │ address │ → item ├─┼─┼─┼─┼─┼─┼─┼─┤ s[2] │ address │ → item ├─┼─┼─┼─┼─┼─┼─┼─┤ s[3] │ address │ → item ├─┼─┼─┼─┼─┼─┼─┼─┤ ⋮ >>> d = {} >>> d[1] = 'un' >>> d[2.0] = 'deux' >>> d['three'] = 'trois' ┌─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┐ │ hash │ key address │ value address │ ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ │ hash │ key address │ value address │ ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ │ hash │ key address │ value address │ ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ │ ... │ ... │ ... │ see
Speed vs space
# Single item # Several item d[key] d == d2 d[key] = v d.update(d2) key in d list(d) d.get(key) for k in d.keys(): ... d.pop(key) for k in d.values(): ... del d[key] for k, v in d.items(): ... >>> from collections import defaultdict >>> >>> animals = defaultdict(list) >>> animals['Mammal'].append('dog') >>> animals['Reptile'].append('lizard') >>> animals['Mammal'].append('cat') {'Mammal': ['dog', 'cat'], 'Reptile': ['lizard']} >>> # Most common 2 letters in a phrase >>> >>> from collections import Counter >>> c = Counter('château frontenac, quebec') >>> c.most_common(2) [('e', 4), ('c', 3)] {('google.com', 80): 7125773, ('google.com', 443): 1520951, ('heroku.com', 22): 245080, ('linkedin.com', 443): 5724124, ('rackspace.com', 443): 5002620} project_languages = { frozenset(['python']): 328, frozenset(['python', 'js']): 217, frozenset(['ruby']): 274, frozenset(['ruby', 'js']): 261, frozenset(['js']): 79, } OrderedDict
>>> from collections import OrderedDict >>> >>> d = dict.fromkeys('Montréal') >>> list(d.keys()) ['a', 'r', 't', 'é', 'M', 'l', 'o', 'n'] >>> >>> d = OrderedDict.fromkeys('Montréal') >>> list(d.keys()) ['M', 'o', 'n', 't', 'r', 'é', 'a', 'l'] # Fast 1-member ops member in t t.add(member) t.remove(member) # Fast n-member ops t | u t & u t - u >>> a = [1.0, 2.2, 3.0, 3.4, 3.5, 4.0, 4.5] >>> import bisect >>> left = bisect.bisect_left(a, 3.0) >>> right = bisect.bisect_right(a, 4.0) >>> a[left:right] [3.0, 3.4, 3.5, 4.0] Use the deque
# Efficient operations: >>> from collections import deque >>> d = deque() >>> d.append(5) >>> d.appendleft(4) >>> d.pop() 5 >>> d.popleft() 4 # Great for threading # Great for multiprocessing from queue import Queue >>> import heapq >>> a = [6, 2, 4, 3, 1, 5, 7] >>> heapq.heapify(a) >>> a [1, 2, 4, 3, 6, 5, 7] >>> print(' {}\n {}\n{}' ... .format(a[0:1], a[1:3], a[3:7])) [1] [2, 4] [3, 6, 5, 7] >>> heapq.heappop(a) 1 >>> heapq.heappop(a) 2 >>> heapq.heappush(a, 0) >>> heapq.heappop(a) 0 # Linked list in the old days ┌─┬─┬─┬─┬─┬─┬─┬─┐ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │ next address │ → │ next address │ → ├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ data │ │ data │ ├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ ... │ │ ... │ ├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ ... │ │ ... │ ├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤ │ ... │ │ ... │ └─┴─┴─┴─┴─┴─┴─┴─┘ └─┴─┴─┴─┴─┴─┴─┴─┘ # Linked list when "everything is an object" ┌─┬─┬─┬─┬─┬─┬─┬─┐ ┌─┬─┬─┬─┬─┬─┬─┬─┐ │ next address │ → │ next address │ → ├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤ │object address │ │object address │ └─┴─┴─┴─┴─┴─┴─┴─┘ └─┴─┴─┴─┴─┴─┴─┴─┘ ↓ ↓ <object> <object> see
@brandon_rhodes