Skip to content

Commit 87dac86

Browse files
authored
Pythonic name, 1D output, unit tests
1 parent 2f95104 commit 87dac86

File tree

2 files changed

+120
-43
lines changed

2 files changed

+120
-43
lines changed

stationary_bootstrap.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import numpy as np
2+
3+
def stationary_bootstrap(data: np.ndarray, m: float, sample_length: int)-> np.ndarray:
4+
"""
5+
Generate a bootstrapped sample of a time series using the stationary bootstrap method
6+
(Politis & Romano, 1994). This method resamples data with random-length blocks to
7+
preserve temporal dependency.
8+
9+
Args:
10+
data (numpy.ndarray): A 1-dimensional array containing the time-series data..
11+
m (float): The average block length for resampling. Must be positive.
12+
sample_length (int): The desired length of the bootstrapped sample. Must be positive.
13+
14+
Returns:
15+
np.ndarray: An array of length `sample_length` containing the bootstrapped sample.
16+
17+
Raises:
18+
ValueError: If m is not positive.
19+
ValueError: If sampleLength is not positive.
20+
ValueError: If data is not an numpy array.
21+
ValueError: If data array is empty.
22+
23+
Example of use:
24+
>>> import numpy as np
25+
>>> data = np.array([1,2,3,4,5,6,7,8,9,10])
26+
>>> m = 4
27+
>>> sample_length = 12
28+
>>> stationary_bootstrap(data, m, sample_length)
29+
Out[0]: array([9.,3.,4.,5.,6.,7.,8.,7.,2.,3.,4.,2.])
30+
31+
Reference:
32+
Dimitris N. Politis & Joseph P. Romano (1994) The Stationary Bootstrap, Journal of the American Statistical
33+
Association, 89:428, 1303-1313, DOI: 10.1080/01621459.1994.10476870
34+
35+
Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021.
36+
"""
37+
38+
# Input validation
39+
if m <= 0:
40+
raise ValueError("Block length 'm' must be positive")
41+
if sample_length <= 0:
42+
raise ValueError("Sample length must be positive")
43+
if not isinstance(data, np.ndarray):
44+
raise ValueError("data needs to be as a numpy array")
45+
if data.size == 0:
46+
raise ValueError("Data array cannot be empty")
47+
48+
49+
accept = 1/m
50+
data_length = data.shape[0]
51+
52+
sample_index = np.random.randint(0,high =data_length,size=1)
53+
sample = np.zeros((sample_length,))
54+
for i_sample in range(sample_length):
55+
if np.random.uniform(0,1,1)>=accept:
56+
sample_index += 1
57+
if sample_index >= data_length:
58+
sample_index=0
59+
else:
60+
sample_index = np.random.randint(0,high = data_length,size=1)
61+
62+
sample[i_sample] = data[sample_index]
63+
return sample

tests.py

Lines changed: 57 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,97 @@
1-
# Collection of tests that show how Stationary boostrap works. This will be written again in pytest
21
import numpy as np
32
import pytest
4-
from StationaryBootstrap import StationaryBootstrap
3+
from stationary_bootstrap import stationary_bootstrap
54

65

76
# Normal behaviour
87
def test_normal():
98
data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
109
m = 4 # Average length of the block
1110
sampleLength = 12 # Length of output sample
12-
ans = StationaryBootstrap(data, m, sampleLength)
13-
assert(isinstance(ans, np.ndarray))
11+
ans = stationary_bootstrap(data, m, sampleLength)
12+
assert(isinstance(ans, np.ndarray), "Output is not a numpy ndarray.")
1413

1514

1615
# Is output same length as sampleLength
1716
def test_correct_length():
1817
data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
1918
m = 4 # Average length of the block
2019
sampleLength = 12 # Length of output sample
21-
ans = StationaryBootstrap(data, m, sampleLength)
22-
assert(len(ans)== sampleLength)
20+
ans = stationary_bootstrap(data, m, sampleLength)
21+
assert(len(ans)== sampleLength, "Sample length does not match the specified sample length.")
2322

23+
# Is output same length as sampleLength
24+
def test_correct_shape():
25+
data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
26+
m = 4 # Average length of the block
27+
sample_length = 12 # Length of output sample
28+
ans = stationary_bootstrap(data, m, sample_length)
29+
assert(ans.shape ==(sample_length,), "Output is not the specified shape.")
30+
31+
# Test if the output values are within the input data range
32+
def test_bootstrap_validity_of_values():
33+
data = np.array([10, 20, 30, 40])
34+
m = 1.5
35+
sample_length = 15
36+
result = stationary_bootstrap(data, m, sample_length)
37+
assert np.all(np.isin(result, data)), "Output contains values not in the original data."
2438

2539
# One element sampled always
2640
def test_one_element_always_sampled():
2741
data = np.array([0.4])
2842
sampleLength = 4
2943
m = 4
30-
ans = StationaryBootstrap(data, m, sampleLength)
31-
assert(ans == np.array([[0.4], [0.4], [0.4], [0.4]]))
32-
44+
ans = stationary_bootstrap(data, m, sampleLength)
45+
assert(np.array_equal(ans, np.array([[0.4], [0.4], [0.4], [0.4]])), "Single element should be repeated in the output.")
3346

3447
# Sample of length 1
3548
def test_sample_of_length_one():
3649
data = np.array([0.5])
3750
m = 4
3851
sampleLength = 1
39-
ans = StationaryBootstrap(data, m, sampleLength)
52+
ans = stationary_bootstrap(data, m, sampleLength)
4053
assert(ans == np.array([0.5]))
4154

42-
# Sampling empty data
43-
#data = np.array([])
44-
#sampleLength = 1
45-
#ans = StationaryBootstrap(data, m, sampleLength)
46-
#print(ans == np.array([0.5]))
47-
48-
# Negative sample length parameter
49-
#data = np.array([0.5])
50-
#sampleLength = -1
51-
#ans = StationaryBootstrap(data, m, sampleLength)
52-
#print(ans == np.array([0.5]))
53-
54-
55-
# negative average length
56-
#def test_negative_average_length():
57-
# data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
58-
# m = -4 # Average length of the block
59-
# sampleLength = 12 # Length of output sample
60-
# ans = StationaryBootstrap(data, m, sampleLength)
61-
# print(ans)
62-
#print("Fix this")
63-
55+
# Test if an error is raised for non-positive block length (m)
56+
def test_invalid_block_length():
57+
data = np.array([1, 2, 3])
58+
m = 0 # Invalid block length
59+
sample_length = 10
60+
with pytest.raises(ValueError, match="Block length 'm' must be positive"):
61+
stationary_bootstrap(data, m, sample_length)
62+
63+
# Test if an error is raised when data array is empty
64+
def test_empty_data_array():
65+
data = np.array([])
66+
m = 2.0
67+
sample_length = 5
68+
with pytest.raises(ValueError, match="Data array cannot be empty"):
69+
stationary_bootstrap(data, m, sample_length)
70+
71+
# Test if an error is raised for invalid sample length
72+
def test_invalid_sample_length():
73+
data = np.array([1, 2, 3])
74+
m = 1.0
75+
sample_length = -5 # Invalid sample length
76+
with pytest.raises(ValueError, match="Sample length must be positive"):
77+
stationary_bootstrap(data, m, sample_length)
6478

6579
# Average length longer than sample
6680
def test_average_length_longer_than_sample():
6781
data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
6882
m = 20 # Average length of the block
6983
sampleLength = 12 # Length of output sample
70-
ans = StationaryBootstrap(data, m, sampleLength)
84+
ans = stationary_bootstrap(data, m, sampleLength)
7185
assert(len(ans)== sampleLength)
7286

73-
7487
# Data in columns
7588
def test_data_passed_in_column():
7689
data = np.array([[0.4],[0.2],[0.1],[0.4],[0.3],[0.1],[0.3],[0.4],[0.2],[0.5],[0.1],[0.2]]) # Original time-series
7790
m = 4 # Average length of the block
7891
sampleLength = 12 # Length of output sample
79-
ans = StationaryBootstrap(data, m, sampleLength)
92+
ans = stationary_bootstrap(data, m, sampleLength)
8093
data2 = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2])
81-
ans2 = StationaryBootstrap(data2, m, sampleLength)
94+
ans2 = stationary_bootstrap(data2, m, sampleLength)
8295
assert(ans.size == ans2.size)
8396

8497

@@ -87,23 +100,24 @@ def test_negative_input_data():
87100
data = np.array([-0.4,0.2,-0.1,0.4,-0.3,0.1,-0.3,0.4,-0.2,-0.5,0.1,-0.2]) # Original time-series
88101
m = 4 # Average length of the block
89102
sampleLength = 12 # Length of output sample
90-
ans = StationaryBootstrap(data, m, sampleLength)
103+
ans = stationary_bootstrap(data, m, sampleLength)
91104
assert(len(ans)== sampleLength)
92105

93106

94107
# Data not in numpy array
95-
#data = [0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2] # Original time-series
96-
#m = 4 # Average length of the block
97-
#sampleLength = 12 # Length of output sample
98-
#ans = StationaryBootstrap(data, m, sampleLength)
99-
#print(ans)
108+
def test_data_not_numpy():
109+
data = [0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2] # Original time-series
110+
m = 4 # Average length of the block
111+
sampleLength = 12 # Length of output sample
112+
with pytest.raises(ValueError, match="data needs to be as a numpy array"):
113+
stationary_bootstrap(data, m, sampleLength)
100114

101115
# Data contains strings
102116
def test_string_number_input_data():
103117
data = np.array(["-0.4","0.2","-0.1","0.4","-0.3","0.1","0.3","0.4","0.2","0.5","0.1","0.2"]) # Original time-series
104118
m = 4 # Average length of the block
105119
sampleLength = 12 # Length of output sample
106-
ans = StationaryBootstrap(data, m, sampleLength)
120+
ans = stationary_bootstrap(data, m, sampleLength)
107121
assert(len(ans)== sampleLength)
108122

109123

0 commit comments

Comments
 (0)