# Load the Python libraries import math import random import timeit import numpy as np from scipy import stats as sci import statistics as stats from itertools import cycle
# Load plotting libraries import matplotlib.pyplot as plt
The greatest common divisor (GCD) of two or more integers, which are not all zero, is the largest positive integer that divides each of the integers. For two integers $ x, y $, the greatest common divisor of $x$ and $y$ is denoted $ gcd(x,y) $ [2]. For example, the GCD of 8 and 12 is 4, that is, $ gcd(8,12)=4 $.
# Example values m = 12000000 n = 76000000
# Function that returns the GCD of two values (intuitive algorithn) def gcd_simple(m, n): i = min(m, n) while (m % i != 0) or (n % i != 0): i -= 1 return i
start_time = timeit.default_timer() print(gcd_simple(m, n)) print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
4000000 >> elapsed time 1080.9609 ms
# Function that returns the GCD of two values (euclidean algorithm) def gcd_euclidean(m, n): m = min(m, n) n = max(m, n) while m > 0: t = m m = n % m n = t return n
start_time = timeit.default_timer() print(gcd_euclidean(m, n)) print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
4000000 >> elapsed time 1.6196000000001654 ms
In mathematics, the Fibonacci numbers, commonly denoted $ F_n $, form a sequence, called the Fibonacci sequence, such that each number is the sum of the two preceding ones, starting from 0 and 1. That is [3],
$ \quad F_{0}=0,\quad F_{1}=1, $
and
$ \quad F_{n}=F_{n-1}+F_{n-2} $
for n > 1.
The beginning of the sequence is thus:
$ \quad 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, ... $
# Example value n = 35
# Function that returns the n-fibonacci value (recursive algorithm) def fibo_rec(n): if n < 2: return n else: return fibo_rec(n - 1) + fibo_rec(n - 2)
start_time = timeit.default_timer() print(fibo_rec(n)) print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
9227465 >> elapsed time 5066.8688 ms
# Function that returns the n-fibonacci value (iterative algorithm) def fibo_iter(n): i, j = 1, 0 for k in range(n): j = i + j i = j - i return j
start_time = timeit.default_timer() print(fibo_iter(n)) print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
9227465 >> elapsed time 0.9330999999992429 ms
With the De Moivre's formula:
# Function that returns the n-fibonacci value (De Moivre equation) def fibo_de_moivre(n): golden_ratio = (1 + 5**(1/2)) / 2 f = (golden_ratio**n - (-golden_ratio)**(-n)) / 5**(1/2) return int(f)
start_time = timeit.default_timer() print(fibo_de_moivre(n)) print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
9227465 >> elapsed time 0.2905000000001934 ms
In number theory, integer factorization is the decomposition of a composite number into a product of smaller integers. If these factors are further restricted to prime numbers, the process is called prime factorization [4].
# Example values a, b = 104723, 104729 n = a * b n
10967535067
# Function that returns the factorization of an integer (ascending approach) def fact_int(n): nn = n**(1/2) m = 2 while m < nn: if n % m == 0: return (m, n // m) m += 1 return (1, n)
start_time = timeit.default_timer() print(fact_int(n)) print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
(104723, 104729) >> elapsed time 82.78020000000019 ms
# Function that returns the factorization of an integer (descending approach) def fact_int_2(n): nn = n**(1/2) m = int(nn) while m > 1: if n % m == 0: return (m, n // m) m -= 1 return (1, n)
start_time = timeit.default_timer() print(fact_int_2(n)) print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
(104723, 104729) >> elapsed time 0.9137999999992985 ms
The Tower of Hanoi is a mathematical game or puzzle. It consists of three rods and a number of disks of different sizes, which can slide onto any rod. The puzzle starts with the disks in a neat stack in ascending order of size on one rod, the smallest at the top, thus making a conical shape [5].
# Disk number between 2 and 64 (not recommended) n_disks = 5
# Move n disk from source to destination def hanoi_rec(n, source, aux, target): if n > 0: hanoi_rec(n - 1, source, target, aux) print('Move disk', n, 'from:', source, 'to:', target) hanoi_rec(n - 1, aux, source, target)
start_time = timeit.default_timer() print('>> number of movements:', (2**n_disks - 1)) hanoi_rec(n_disks, 'T1', 'T2', 'T3') print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
>> number of movements: 31 Move disk 1 from: T1 to: T3 Move disk 2 from: T1 to: T2 Move disk 1 from: T3 to: T2 Move disk 3 from: T1 to: T3 Move disk 1 from: T2 to: T1 Move disk 2 from: T2 to: T3 Move disk 1 from: T1 to: T3 Move disk 4 from: T1 to: T2 Move disk 1 from: T3 to: T2 Move disk 2 from: T3 to: T1 Move disk 1 from: T2 to: T1 Move disk 3 from: T3 to: T2 Move disk 1 from: T1 to: T3 Move disk 2 from: T1 to: T2 Move disk 1 from: T3 to: T2 Move disk 5 from: T1 to: T3 Move disk 1 from: T2 to: T1 Move disk 2 from: T2 to: T3 Move disk 1 from: T1 to: T3 Move disk 3 from: T2 to: T1 Move disk 1 from: T3 to: T2 Move disk 2 from: T3 to: T1 Move disk 1 from: T2 to: T1 Move disk 4 from: T2 to: T3 Move disk 1 from: T1 to: T3 Move disk 2 from: T1 to: T2 Move disk 1 from: T3 to: T2 Move disk 3 from: T1 to: T3 Move disk 1 from: T2 to: T1 Move disk 2 from: T2 to: T3 Move disk 1 from: T1 to: T3 >> elapsed time 17.174699999999987 ms
# Move n disk from source to destination def hanoi_iter(n, source, aux, target): n_movements = 2**n_disks - 1 print('>> number of movements:', n_movements) tw_src = [] for i in range(n_disks, 0, -1): tw_src.append(i) labels = [source, aux, target] towers = [tw_src, [], []] indexes = cycle([0, 1, 2] if n % 2 == 0 else [0, 2, 1]) temp = next(indexes) disk = 0 for i in range(1, n_movements + 1): if i % 2 == 1: disk = 1 s, t = temp, next(indexes) temp = t towers[t].append(towers[s].pop()) print('Move disk', disk, 'from:', labels[s], 'to:', labels[t]) else: i_t2, i_t3 = [i for i in range(3) if i != temp] v_t2 = towers[i_t2][len(towers[i_t2]) - 1] if len(towers[i_t2]) else math.inf v_t3 = towers[i_t3][len(towers[i_t3]) - 1] if len(towers[i_t3]) else math.inf if v_t2 < v_t3: s, t = i_t2, i_t3 else: s, t = i_t3, i_t2 disk = towers[s].pop() towers[t].append(disk) print('Move disk', disk, 'from:', labels[s], 'to:', labels[t])
start_time = timeit.default_timer() hanoi_iter(n_disks, 'T1', 'T2', 'T3') print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
>> number of movements: 31 Move disk 1 from: T1 to: T3 Move disk 2 from: T1 to: T2 Move disk 1 from: T3 to: T2 Move disk 3 from: T1 to: T3 Move disk 1 from: T2 to: T1 Move disk 2 from: T2 to: T3 Move disk 1 from: T1 to: T3 Move disk 4 from: T1 to: T2 Move disk 1 from: T3 to: T2 Move disk 2 from: T3 to: T1 Move disk 1 from: T2 to: T1 Move disk 3 from: T3 to: T2 Move disk 1 from: T1 to: T3 Move disk 2 from: T1 to: T2 Move disk 1 from: T3 to: T2 Move disk 5 from: T1 to: T3 Move disk 1 from: T2 to: T1 Move disk 2 from: T2 to: T3 Move disk 1 from: T1 to: T3 Move disk 3 from: T2 to: T1 Move disk 1 from: T3 to: T2 Move disk 2 from: T3 to: T1 Move disk 1 from: T2 to: T1 Move disk 4 from: T2 to: T3 Move disk 1 from: T1 to: T3 Move disk 2 from: T1 to: T2 Move disk 1 from: T3 to: T2 Move disk 3 from: T1 to: T3 Move disk 1 from: T2 to: T1 Move disk 2 from: T2 to: T3 Move disk 1 from: T1 to: T3 >> elapsed time 15.435800000000555 ms
Bubble sort, sometimes referred to as sinking sort, is a simple sorting algorithm that repeatedly steps through the list, compares adjacent elements and swaps them if they are in the wrong order. The pass through the list is repeated until the list is sorted. The algorithm, which is a comparison sort, is named for the way smaller or larger elements "bubble" to the top of the list [6].
# Bubble-sort: non-efficient sorting algorithm def bubble_sort(array): n = len(array) for i in range(n): for j in range(0, n-i-1): if array[j] > array[j+1]: array[j], array[j+1] = array[j+1], array[j] return array
With time complexity of $ \Theta(n^2) $ and space complexity of $ \Theta(n) $.
# Example values n = 100 raw_data = [] for i in range(n): raw_data.append(int(random.random() * n))
# Sorting data sorted_data = bubble_sort(raw_data.copy())
# Plotting fig = plt.figure(figsize = (16, 8)) fig.subplots_adjust(hspace = 0.15, wspace = 0.15) # Plotting results before sorting plt.subplot(1, 2, 1) plt.plot(raw_data, linewidth=0, marker="o", markersize=3, color="#3366cc") plt.title("Data Before Sorting", fontsize = 14) plt.ylabel('Value') plt.xlabel('Index') # Plotting results after sorting plt.subplot(1, 2, 2) plt.plot(sorted_data, linewidth=0, marker="o", markersize=3, color="#109618") plt.title("Data After Sorting", fontsize = 14) plt.ylabel('Value') plt.xlabel('Index') plt.show()
# Validation values x_data = [0] diff_raw_data = [0] diff_sorted_data = [0] for i in range(1, n): x_data.append(i) diff_raw_data.append(raw_data[i] - raw_data[i-1]) diff_sorted_data.append(sorted_data[i] - sorted_data[i-1])
# Showing gap differences raw_sum_abs = sum([abs(v) for v in diff_raw_data]) sorted_sum_abs = sum([abs(v) for v in diff_sorted_data]) print('Raw data - Gap sum:', raw_sum_abs) print('Sorted data - Gap sum:', sorted_sum_abs)
Raw data - Gap sum: 3085 Sorted data - Gap sum: 95
# Plotting fig = plt.figure(figsize = (16, 8)) fig.subplots_adjust(hspace = 0.15, wspace = 0.15) # Plotting results before sorting plt.subplot(1, 2, 1) plt.step(x_data, diff_raw_data, where='mid', color="#3366cc", alpha=0.6) plt.plot(diff_raw_data, linewidth=0, marker="o", markersize=3, color="#3366cc") plt.title("Differences between Raw points", fontsize = 14) plt.ylabel('Diff') plt.xlabel('Index') # Plotting results after sorting plt.subplot(1, 2, 2) plt.step(x_data, diff_sorted_data, where='mid', color="#109618", alpha=0.6) plt.plot(diff_sorted_data, linewidth=0, marker="o", markersize=3, color="#109618") plt.title("Differences between Sorted points", fontsize = 14) plt.ylabel('Diff') plt.xlabel('Index') plt.show()
Calculate the Standard Deviation of the residuals between the raw points:
# Calculate variance of the residuals stats.stdev(diff_raw_data)
39.243877473969356
Calculate the Standard Deviation of the residuals between the sorted points:
# Calculate variance of the residuals stats.stdev(diff_sorted_data)
1.0952145677879515
In geometry, the convex hull or convex envelope or convex closure of a shape is the smallest convex set that contains it. For a bounded subset of the plane, the convex hull may be visualized as the shape enclosed by a rubber band stretched around the subset [7].
Formally, the convex hull may be defined either as the intersection of all convex sets containing a given subset of a Euclidean space, or equivalently as the set of all convex combinations of points in the subset. Convex hulls of open sets are open, and convex hulls of compact sets are compact. Every convex set is the convex hull of its extreme points.
# Example values n = 1000 points = [] for i in range(n): x = int(random.random() * n) y = int(random.random() * n) points.append((x, y)) points = np.array(points)
Andrew’s monotone chain algorithm is used, which runs in $ \Theta(n \log_{}{n}) $ time in general, or $ \Theta(n) $ time if the input is already sorted [5].
# Returns the convex hull, assuming that each points[i] <= points[i + 1]. Runs in O(n) time def make_hull_presorted(points): if len(points) <= 1: return list(points) # Andrew's monotone chain algorithm upperhull = [] lowerhull = [] for hull in (upperhull, lowerhull): for p in (points if (hull is upperhull) else reversed(points)): while len(hull) >= 2: qx, qy = hull[-1] rx, ry = hull[-2] if (qx - rx) * (p[1] - ry) >= (qy - ry) * (p[0] - rx): del hull[-1] else: break hull.append(p) del hull[-1] if not (len(upperhull) == 1 and upperhull == lowerhull): upperhull.extend(lowerhull) return upperhull # This algorithm runs in O(n log n) time def convex_hull_greedy(points): sorted_points = sorted(points, key = lambda p: p[0]) hull = make_hull_presorted(sorted_points) hull.append(hull[0]) return hull
# Run algorithm start_time = timeit.default_timer() hull = convex_hull_greedy(points) print('>> elapsed time', (timeit.default_timer() - start_time) * 1000, 'ms')
>> elapsed time 42.057800000000256 ms
# Plotting convex hull results plt.figure(figsize = (8, 8)) plt.plot(points[:,0], points[:,1], linewidth=0, marker="o", markersize=2, color="black") for i in range(1, len(hull)): p1 = hull[i-1] p2 = hull[i] plt.plot([p1[0], p2[0]], [p1[1], p2[1]], color="#3366cc") plt.title("Convex Hull - Iterative", fontsize = 14) plt.ylabel('y') plt.xlabel('x') plt.show()
Practical example of the use of an algorithm to solve a specific problem.
# Initialize variables n = 256 data_raw = [25] * n total = sum(data_raw) print('Total values:', total)
Total values: 6400
# Creating target distribution alpha = 0.02 t_func = sci.norm(n/2, alpha*n) x = np.linspace(0, n, n) y = t_func.pdf(x) * total
# Create pretty x axis labels def get_x_labels(n): x_labels = [] for ix in range(n): if ix % 10 == 0: x_labels.append(str(ix)) else: x_labels.append('') return x_labels
# Function that plots a symbol distribution def plot_symbol_dist(data, x, y): # Prepare data n = len(data) y_pos = np.arange(n) symbols = get_x_labels(n) # Plot distribution plt.figure(figsize = (12, 5)) plt.plot(x, y, '-', color = '#ff7f0e', lw = 3, label = 'Real distribution') plt.bar(y_pos, data, align='center', alpha=0.5) plt.xticks(y_pos, symbols, fontsize = 10, rotation = 50) plt.ylabel('Value') plt.title('Distribution') plt.show()
# Plot current distribution plot_symbol_dist(data_raw, x, y)
# Function that calculates the new distribution def get_new_dist(data_raw, y): a = [] b = [] n = len(data_raw) carry = 0 for i in range(n // 2): y1 = data_raw[i] + carry y2 = max(math.floor(y[i]), 1) y_new = min(y1, y2) a.append(y_new) carry = y1 - y_new print('Forward carry:', carry) for i in range(n - 1, n // 2 - 1, -1): y1 = data_raw[i] + carry y2 = max(math.floor(y[i]), 1) y_new = min(y1, y2) b.append(y_new) carry = y1 - y_new print('backward carry:', carry) # Concatenate arrays data_new = a + b[::-1] return data_new
data_new = get_new_dist(data_raw, y) print('n temp:', len(data_new))
Forward carry: 0 backward carry: 0 n temp: 256
plot_symbol_dist(data_new, x, y)
Validation:
# Difference between distributions print('>> diff:', total - sum(data_new))
>> diff: 0