1+ {
2+ "nbformat" : 4 ,
3+ "nbformat_minor" : 0 ,
4+ "metadata" : {
5+ "colab" : {
6+ "name" : " t-Test.ipynb" ,
7+ "version" : " 0.3.2" ,
8+ "provenance" : []
9+ },
10+ "kernelspec" : {
11+ "name" : " python3" ,
12+ "display_name" : " Python 3"
13+ }
14+ },
15+ "cells" : [
16+ {
17+ "cell_type" : " code" ,
18+ "metadata" : {
19+ "id" : " Y21N_2yv3Grl" ,
20+ "colab_type" : " code" ,
21+ "colab" : {}
22+ },
23+ "source" : [
24+ " ## Import the packages\n " ,
25+ " import numpy as np\n " ,
26+ " from scipy import stats"
27+ ],
28+ "execution_count" : 0 ,
29+ "outputs" : []
30+ },
31+ {
32+ "cell_type" : " code" ,
33+ "metadata" : {
34+ "id" : " Aga_0SM43OdO" ,
35+ "colab_type" : " code" ,
36+ "colab" : {
37+ "base_uri" : " https://localhost:8080/" ,
38+ "height" : 85
39+ },
40+ "outputId" : " 8d4b8f3d-a6df-4129-b3b6-867144288009"
41+ },
42+ "source" : [
43+ " ## Define 2 random distributions\n " ,
44+ " \n " ,
45+ " #Sample Size\n " ,
46+ " N = 10\n " ,
47+ " \n " ,
48+ " #Gaussian distributed data with mean = 2 and var = 1\n " ,
49+ " a = np.random.randn(N) + 2\n " ,
50+ " print(a)\n " ,
51+ " \n " ,
52+ " #Gaussian distributed data with with mean = 0 and var = 1\n " ,
53+ " b = np.random.randn(N)\n " ,
54+ " print(b)"
55+ ],
56+ "execution_count" : 6 ,
57+ "outputs" : [
58+ {
59+ "output_type" : " stream" ,
60+ "text" : [
61+ " [3.41987841 2.4642942 1.3074381 1.88900262 1.5018451 2.08785958\n " ,
62+ " 4.18763608 2.76111147 1.25673154 1.22916177]\n " ,
63+ " [ 0.09625918 -0.426427 -0.81593085 -0.27386856 -0.19758738 0.71729565\n " ,
64+ " -0.44211666 0.07106772 -0.53144206 -0.21403634]\n "
65+ ],
66+ "name" : " stdout"
67+ }
68+ ]
69+ },
70+ {
71+ "cell_type" : " code" ,
72+ "metadata" : {
73+ "id" : " DGw_0SoQ2Uhj" ,
74+ "colab_type" : " code" ,
75+ "colab" : {
76+ "base_uri" : " https://localhost:8080/" ,
77+ "height" : 51
78+ },
79+ "outputId" : " b6caa6b7-64df-44e7-b626-13fb8165baeb"
80+ },
81+ "source" : [
82+ " ## Calculate the Standard Deviation\n " ,
83+ " \n " ,
84+ " #Calculate the variance to get the standard deviation\n " ,
85+ " \n " ,
86+ " #For unbiased max likelihood estimate we have to divide the var by N-1, and therefore the parameter ddof = 1\n " ,
87+ " var_a = a.var(ddof=1)\n " ,
88+ " var_b = b.var(ddof=1)\n " ,
89+ " \n " ,
90+ " #std deviation\n " ,
91+ " s = np.sqrt((var_a + var_b)/2)\n " ,
92+ " \n " ,
93+ " print(\" Std Deviation:\" , s)\n " ,
94+ " \n " ,
95+ " ## Calculate the t-statistics\n " ,
96+ " t = (a.mean() - b.mean())/(s*np.sqrt(2/N))\n " ,
97+ " \n " ,
98+ " print(\" T-value:\" , t)"
99+ ],
100+ "execution_count" : 8 ,
101+ "outputs" : [
102+ {
103+ "output_type" : " stream" ,
104+ "text" : [
105+ " Std Deviation: 0.7693967525636721\n " ,
106+ " T-value: 7.0104093570005945\n "
107+ ],
108+ "name" : " stdout"
109+ }
110+ ]
111+ },
112+ {
113+ "cell_type" : " code" ,
114+ "metadata" : {
115+ "id" : " 9atPC3HO3Z2U" ,
116+ "colab_type" : " code" ,
117+ "colab" : {
118+ "base_uri" : " https://localhost:8080/" ,
119+ "height" : 51
120+ },
121+ "outputId" : " 683ac7bd-8bd8-4e55-d3fc-7942748b66d1"
122+ },
123+ "source" : [
124+ " ## Compare with the critical t-value\n " ,
125+ " \n " ,
126+ " #Degrees of freedom\n " ,
127+ " df = 2*N - 2\n " ,
128+ " \n " ,
129+ " #p-value after comparison with the t\n " ,
130+ " p = 1 - stats.t.cdf(t,df=df)\n " ,
131+ " \n " ,
132+ " print(\" t-Score = \" + str(t))\n " ,
133+ " print(\" p-Value = \" + str(2*p))\n " ,
134+ " \n " ,
135+ " #Note that we multiply the p value by 2 because its a twp tail t-test\n " ,
136+ " \n " ,
137+ " ### You can see that after comparing the t statistic with the critical t value (computed internally)\n " ,
138+ " # we get a good p value of 0.0005 and thus we reject the null hypothesis and thus it proves that the mean\n " ,
139+ " # of the two distributions are different and statistically significant."
140+ ],
141+ "execution_count" : 9 ,
142+ "outputs" : [
143+ {
144+ "output_type" : " stream" ,
145+ "text" : [
146+ " t-Score = 7.0104093570005945\n " ,
147+ " p-Value = 1.522899394812427e-06\n "
148+ ],
149+ "name" : " stdout"
150+ }
151+ ]
152+ },
153+ {
154+ "cell_type" : " code" ,
155+ "metadata" : {
156+ "id" : " I_ve3N6a3Mlo" ,
157+ "colab_type" : " code" ,
158+ "colab" : {
159+ "base_uri" : " https://localhost:8080/" ,
160+ "height" : 51
161+ },
162+ "outputId" : " cc8bcc64-e1a2-4c05-98b9-db0cf91a0a01"
163+ },
164+ "source" : [
165+ " ## Cross Checking with the internal scipy function\n " ,
166+ " t2, p2 = stats.ttest_ind(a,b)\n " ,
167+ " print(\" t = \" + str(t2))\n " ,
168+ " print(\" p = \" + str(2*p2))"
169+ ],
170+ "execution_count" : 10 ,
171+ "outputs" : [
172+ {
173+ "output_type" : " stream" ,
174+ "text" : [
175+ " t = 7.010409357000594\n " ,
176+ " p = 3.045798789679482e-06\n "
177+ ],
178+ "name" : " stdout"
179+ }
180+ ]
181+ }
182+ ]
183+ }
0 commit comments