Generating Normally Distributed Data
np.random.normal method
import numpy as np # Parameters: mean=0.98, standard deviation=4, number of samples=10000 data = np.random.normal(loc=0, scale=1, size=10000) print(data[:50]) # Print the first 50 data points Probability Density Function (PDF)
scipy.stats module's norm.pdf
from scipy.stats import norm import numpy as np # Parameters: mean=0.98, standard deviation=4, number of samples=10000 mean = 0.98 # Mean std_dev = 4 # Standard deviation data = np.random.normal(loc=0, scale=1, size=10000) # Calculate PDF pdf = norm.pdf(data, loc=mean, scale=std_dev) print(pdf) #Probability Density [0.09838715 0.09937757 0.09967584 ... 0.09653173 0.07867429 0.09311813] PDF Plot
from scipy.stats import norm import numpy as np import matplotlib.pyplot as plt # Parameters: mean=0.98, standard deviation=4, number of samples=10000 mean = 0.98 # Mean std_dev = 4 # Standard deviation data = np.random.normal(loc=mean , scale=std_dev, size=10000) # Plot histogram (data distribution) plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Histogram') # Plot probability density function (PDF) x = np.linspace(min(data), max(data), 10000) # Define the x-axis range pdf = norm.pdf(x, loc=mean, scale=std_dev) # Calculate the probability density function plt.plot(x, pdf, 'k', linewidth=2, label='PDF (Normal)') # Add title and legend plt.title('Normal Distribution') plt.xlabel('Value') plt.ylabel('Density') plt.legend() # Display the plot plt.show() 'k' specifies a black curve. More options are available as shown in the table below:
| Color | Code | Description |
|---|---|---|
'b' | Blue | blue |
'r' | Red | red |
'g' | Green | green |
'c' | Cyan | cyan |
'm' | Magenta | magenta |
'y' | Yellow | yellow |
'k' | Black | black |
'w' | White | white |
Cumulative Distribution
norm.cdf calculates the cumulative distribution function for the normal distribution.
from scipy.stats import norm import numpy as np import matplotlib.pyplot as plt # Parameters: mean=0.98, standard deviation=4, number of samples=10000 mean = 0.98 # Mean std_dev = 4 # Standard deviation data = np.random.normal(loc=mean , scale=std_dev, size=10000) # Plot histogram (data distribution) plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Histogram') # Plot cumulative distribution function (CDF) x = np.linspace(min(data), max(data), 10000) # Define the x-axis range cdf = norm.cdf(x, loc=mean, scale=std_dev) # Calculate the cumulative distribution function plt.plot(x, cdf, 'k', linewidth=2, label='CDF (Normal)') # Add title and legend plt.title('Normal Distribution') plt.xlabel('Value') plt.ylabel('Density') plt.legend() # Display the plot plt.show() Normal Distribution Percent Point Function
norm.ppf calculates the quantile (inverse of CDF) for a given probability.
from scipy.stats import norm import numpy as np # Calculate quantiles, for example, for probabilities 0.018 and 0.819 q1 = norm.ppf(0.018, loc=0, scale=1) q2 = norm.ppf(0.819, loc=0, scale=1) print(f"0.018 quantile: {q1}, 0.819 quantile: {q2}") PS E:\learn\learnpy> & "D:/Program Files/Python311/python.exe" e:/learn/learnpy/learn.py 0.018 quantile: -2.0969274291643423, 0.819 quantile: 0.9115607350675405 PS E:\learn\learnpy> Normal Distribution Fitting
scipy.stats's norm.fit for normal distribution fitting, estimating mean and standard deviation.
from scipy.stats import norm import numpy as np # Generate normally distributed data data = np.random.normal(loc=3, scale=1.67, size=10000) # Fit the data mu, sigma = norm.fit(data) print(f"Fitted mean: {mu}, Fitted standard deviation: {sigma}") PS E:\learn\learnpy> & "D:/Program Files/Python311/python.exe" e:/learn/learnpy/learn.py Fitted mean: 3.006577810135438, Fitted standard deviation: 1.672727044555993
Top comments (0)