open-source-modelling
diff --git a/‎stationary_bootstrap.py‎
Lines changed: 63 additions & 0 deletions b/‎stationary_bootstrap.py‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎tests.py‎
Lines changed: 57 additions & 43 deletions b/‎tests.py‎
Lines changed: 57 additions & 43 deletions
@@ -0,0 +1,63 @@
+import numpy as np
+
+def stationary_bootstrap(data: np.ndarray, m: float, sample_length: int)-> np.ndarray:
+ """
+ Generate a bootstrapped sample of a time series using the stationary bootstrap method
+ (Politis & Romano, 1994). This method resamples data with random-length blocks to 
+ preserve temporal dependency.
+ 
+ Args: 
+ data (numpy.ndarray): A 1-dimensional array containing the time-series data..
+ m (float): The average block length for resampling. Must be positive.
+ sample_length (int): The desired length of the bootstrapped sample. Must be positive.
+ 
+ Returns: 
+ np.ndarray: An array of length `sample_length` containing the bootstrapped sample.
+
+ Raises:
+ ValueError: If m is not positive.
+ ValueError: If sampleLength is not positive.
+ ValueError: If data is not an numpy array.
+ ValueError: If data array is empty.
+ 
+ Example of use:
+ >>> import numpy as np
+ >>> data = np.array([1,2,3,4,5,6,7,8,9,10])
+ >>> m = 4
+ >>> sample_length = 12
+ >>> stationary_bootstrap(data, m, sample_length)
+ Out[0]: array([9.,3.,4.,5.,6.,7.,8.,7.,2.,3.,4.,2.])
+
+ Reference:
+ Dimitris N. Politis & Joseph P. Romano (1994) The Stationary Bootstrap, Journal of the American Statistical 
+ Association, 89:428, 1303-1313, DOI: 10.1080/01621459.1994.10476870 
+
+ Implemented by Gregor Fabjan from Qnity Consultants on 12/11/2021.
+ """
+
+ # Input validation
+ if m <= 0:
+ raise ValueError("Block length 'm' must be positive")
+ if sample_length <= 0:
+ raise ValueError("Sample length must be positive")
+ if not isinstance(data, np.ndarray):
+ raise ValueError("data needs to be as a numpy array")
+ if data.size == 0:
+ raise ValueError("Data array cannot be empty")
+ 
+
+ accept = 1/m 
+ data_length = data.shape[0]
+
+ sample_index = np.random.randint(0,high =data_length,size=1)
+ sample = np.zeros((sample_length,))
+ for i_sample in range(sample_length):
+ if np.random.uniform(0,1,1)>=accept:
+ sample_index += 1
+ if sample_index >= data_length:
+ sample_index=0 
+ else:
+ sample_index = np.random.randint(0,high = data_length,size=1)
+
+ sample[i_sample] = data[sample_index]
+ return sample
@@ -1,84 +1,97 @@
-# Collection of tests that show how Stationary boostrap works. This will be written again in pytest
 import numpy as np
 import pytest
-from StationaryBootstrap import StationaryBootstrap
+from stationary_bootstrap import stationary_bootstrap
 
 
 # Normal behaviour
 def test_normal():
  data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
  m = 4 # Average length of the block
  sampleLength = 12 # Length of output sample
- ans = StationaryBootstrap(data, m, sampleLength)
- assert(isinstance(ans, np.ndarray))
+ ans = stationary_bootstrap(data, m, sampleLength)
+ assert(isinstance(ans, np.ndarray), "Output is not a numpy ndarray.")
 
 
 # Is output same length as sampleLength
 def test_correct_length():
  data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
  m = 4 # Average length of the block
  sampleLength = 12 # Length of output sample
- ans = StationaryBootstrap(data, m, sampleLength)
- assert(len(ans)== sampleLength)
+ ans = stationary_bootstrap(data, m, sampleLength)
+ assert(len(ans)== sampleLength, "Sample length does not match the specified sample length.")
 
+# Is output same length as sampleLength
+def test_correct_shape():
+ data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
+ m = 4 # Average length of the block
+ sample_length = 12 # Length of output sample
+ ans = stationary_bootstrap(data, m, sample_length)
+ assert(ans.shape ==(sample_length,), "Output is not the specified shape.")
+
+# Test if the output values are within the input data range
+def test_bootstrap_validity_of_values():
+ data = np.array([10, 20, 30, 40])
+ m = 1.5
+ sample_length = 15
+ result = stationary_bootstrap(data, m, sample_length)
+ assert np.all(np.isin(result, data)), "Output contains values not in the original data."
 
 # One element sampled always
 def test_one_element_always_sampled():
  data = np.array([0.4])
  sampleLength = 4
  m = 4
- ans = StationaryBootstrap(data, m, sampleLength)
- assert(ans == np.array([[0.4], [0.4], [0.4], [0.4]]))
-
+ ans = stationary_bootstrap(data, m, sampleLength)
+ assert(np.array_equal(ans, np.array([[0.4], [0.4], [0.4], [0.4]])), "Single element should be repeated in the output.")
 
 # Sample of length 1
 def test_sample_of_length_one():
  data = np.array([0.5])
  m = 4
  sampleLength = 1
- ans = StationaryBootstrap(data, m, sampleLength)
+ ans = stationary_bootstrap(data, m, sampleLength)
  assert(ans == np.array([0.5]))
 
-# Sampling empty data
-#data = np.array([])
-#sampleLength = 1
-#ans = StationaryBootstrap(data, m, sampleLength)
-#print(ans == np.array([0.5]))
-
-# Negative sample length parameter
-#data = np.array([0.5])
-#sampleLength = -1
-#ans = StationaryBootstrap(data, m, sampleLength)
-#print(ans == np.array([0.5]))
-
-
-# negative average length 
-#def test_negative_average_length():
-# data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
-# m = -4 # Average length of the block
-# sampleLength = 12 # Length of output sample
-# ans = StationaryBootstrap(data, m, sampleLength)
-# print(ans)
-#print("Fix this")
-
+# Test if an error is raised for non-positive block length (m)
+def test_invalid_block_length():
+ data = np.array([1, 2, 3])
+ m = 0 # Invalid block length
+ sample_length = 10
+ with pytest.raises(ValueError, match="Block length 'm' must be positive"):
+ stationary_bootstrap(data, m, sample_length)
+
+# Test if an error is raised when data array is empty
+def test_empty_data_array():
+ data = np.array([])
+ m = 2.0
+ sample_length = 5
+ with pytest.raises(ValueError, match="Data array cannot be empty"):
+ stationary_bootstrap(data, m, sample_length)
+
+# Test if an error is raised for invalid sample length
+def test_invalid_sample_length():
+ data = np.array([1, 2, 3])
+ m = 1.0
+ sample_length = -5 # Invalid sample length
+ with pytest.raises(ValueError, match="Sample length must be positive"):
+ stationary_bootstrap(data, m, sample_length)
 
 # Average length longer than sample 
 def test_average_length_longer_than_sample():
  data = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) # Original time-series
  m = 20 # Average length of the block
  sampleLength = 12 # Length of output sample
- ans = StationaryBootstrap(data, m, sampleLength)
+ ans = stationary_bootstrap(data, m, sampleLength)
  assert(len(ans)== sampleLength)
 
-
 # Data in columns
 def test_data_passed_in_column():
  data = np.array([[0.4],[0.2],[0.1],[0.4],[0.3],[0.1],[0.3],[0.4],[0.2],[0.5],[0.1],[0.2]]) # Original time-series
  m = 4 # Average length of the block
  sampleLength = 12 # Length of output sample
- ans = StationaryBootstrap(data, m, sampleLength)
+ ans = stationary_bootstrap(data, m, sampleLength)
  data2 = np.array([0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2]) 
- ans2 = StationaryBootstrap(data2, m, sampleLength)
+ ans2 = stationary_bootstrap(data2, m, sampleLength)
  assert(ans.size == ans2.size)
 
 
@@ -87,23 +100,24 @@ def test_negative_input_data():
  data = np.array([-0.4,0.2,-0.1,0.4,-0.3,0.1,-0.3,0.4,-0.2,-0.5,0.1,-0.2]) # Original time-series
  m = 4 # Average length of the block
  sampleLength = 12 # Length of output sample
- ans = StationaryBootstrap(data, m, sampleLength)
+ ans = stationary_bootstrap(data, m, sampleLength)
  assert(len(ans)== sampleLength)
 
 
 # Data not in numpy array
-#data = [0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2] # Original time-series
-#m = 4 # Average length of the block
-#sampleLength = 12 # Length of output sample
-#ans = StationaryBootstrap(data, m, sampleLength)
-#print(ans)
+def test_data_not_numpy():
+ data = [0.4,0.2,0.1,0.4,0.3,0.1,0.3,0.4,0.2,0.5,0.1,0.2] # Original time-series
+ m = 4 # Average length of the block
+ sampleLength = 12 # Length of output sample
+ with pytest.raises(ValueError, match="data needs to be as a numpy array"):
+ stationary_bootstrap(data, m, sampleLength)
 
 # Data contains strings
 def test_string_number_input_data():
  data = np.array(["-0.4","0.2","-0.1","0.4","-0.3","0.1","0.3","0.4","0.2","0.5","0.1","0.2"]) # Original time-series
  m = 4 # Average length of the block
  sampleLength = 12 # Length of output sample
- ans = StationaryBootstrap(data, m, sampleLength)
+ ans = stationary_bootstrap(data, m, sampleLength)
  assert(len(ans)== sampleLength)