Skip to content

Commit 7f51229

Browse files
committed
added all sol for course-16
1 parent 9961a69 commit 7f51229

File tree

4 files changed

+551
-7
lines changed

4 files changed

+551
-7
lines changed

16_introduction-to-databases-in-python/02_applying-filtering-ordering-and-grouping-to-queries.md

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,29 +133,98 @@ print(results[:20])
133133
### Counting distinct data
134134

135135
```
136+
# Build a query to count the distinct states values: stmt
137+
stmt = select([func.count(census.columns.state.distinct())])
138+
139+
# Execute the query and store the scalar result: distinct_state_count
140+
distinct_state_count = connection.execute(stmt).scalar()
141+
142+
# Print the distinct_state_count
143+
print(distinct_state_count)
136144
137145
```
138146

139147
### Count of records by state
140148

141149
```
150+
# Import func
151+
from sqlalchemy import func
152+
153+
# Build a query to select the state and count of ages by state: stmt
154+
stmt = select([census.columns.state, func.count(census.columns.age)])
155+
156+
# Group stmt by state
157+
stmt = stmt.group_by(census.columns.state)
158+
159+
# Execute the statement and store all the records: results
160+
results = connection.execute(stmt).fetchall()
161+
162+
# Print results
163+
print(results)
164+
165+
# Print the keys/column names of the results returned
166+
print(results[0].keys())
142167
143168
```
144169

145170
### Determining the population sum by state
146171

147172
```
173+
# Import func
174+
from sqlalchemy import func
175+
176+
# Build an expression to calculate the sum of pop2008 labeled as population
177+
pop2008_sum = func.sum(census.columns.pop2008).label('population')
178+
179+
# Build a query to select the state and sum of pop2008: stmt
180+
stmt = select([census.columns.state, pop2008_sum])
148181
182+
# Group stmt by state
183+
stmt = stmt.group_by(census.columns.state)
184+
185+
# Execute the statement and store all the records: results
186+
results = connection.execute(stmt).fetchall()
187+
188+
# Print results
189+
print(results)
190+
191+
# Print the keys/column names of the results returned
192+
print(results[0].keys())
149193
```
150194

151195
### ResultsSets and pandas DataFrames
152196

153197
```
198+
# import pandas
199+
import pandas as pd
200+
201+
# Create a DataFrame from the results: df
202+
df = pd.DataFrame(results)
154203
204+
# Set column names
205+
df.columns = results[0].keys()
206+
207+
# Print the DataFrame
208+
print(df)
155209
```
156210

157211
### From SQLAlchemy results to a plot
158212

159213
```
214+
# Import pyplot as plt from matplotlib
215+
import matplotlib.pyplot as plt
216+
217+
# Create a DataFrame from the results: df
218+
df = pd.DataFrame(results)
219+
220+
# Set Column names
221+
df.columns = results[0].keys()
222+
223+
# Print the DataFrame
224+
print(df)
225+
226+
# Plot the DataFrame
227+
df.plot.bar()
228+
plt.show()
160229
161230
```

16_introduction-to-databases-in-python/03_advanced-sqlalchemy-queries.md

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,203 @@ In this chapter, you will learn to perform advanced—and incredibly useful—qu
77
### Connecting to a MySQL database
88

99
```
10+
# Import create_engine function
11+
from sqlalchemy import create_engine
12+
13+
# Create an engine to the census database
14+
engine = create_engine('mysql+pymysql://student:datacamp@courses.csrrinzqubik.us-east-1.rds.amazonaws.com:3306/census')
15+
16+
# Print the table names
17+
print(engine.table_names())
1018
1119
```
1220

1321
### Calculating a difference between two columns
1422

1523
```
24+
# Build query to return state names by population difference from 2008 to 2000: stmt
25+
stmt = select([census.columns.state, (census.columns.pop2008-census.columns.pop2000).label('pop_change')])
26+
27+
# Append group by for the state: stmt_grouped
28+
stmt_grouped = stmt.group_by(census.columns.state)
29+
30+
# Append order by for pop_change descendingly: stmt_ordered
31+
stmt_ordered = stmt_grouped.order_by(desc('pop_change'))
32+
33+
# Return only 5 results: stmt_top5
34+
stmt_top5 = stmt_ordered.limit(5)
35+
36+
# Use connection to execute stmt_top5 and fetch all results
37+
results = connection.execute(stmt_top5).fetchall()
38+
39+
# Print the state and population change for each record
40+
for result in results:
41+
print('{}:{}'.format(result.state, result.pop_change))
1642
1743
```
1844

1945
### Determining the overall percentage of women
2046

2147
```
48+
# import case, cast and Float from sqlalchemy
49+
from sqlalchemy import case, cast, Float
50+
51+
# Build an expression to calculate female population in 2000
52+
female_pop2000 = func.sum(
53+
case([
54+
(census.columns.sex == 'F', census.columns.pop2000)
55+
], else_=0))
56+
57+
# Cast an expression to calculate total population in 2000 to Float
58+
total_pop2000 = cast(func.sum(census.columns.pop2000), Float)
59+
60+
# Build a query to calculate the percentage of women in 2000: stmt
61+
stmt = select([female_pop2000 / total_pop2000 * 100])
62+
63+
# Execute the query and store the scalar result: percent_female
64+
percent_female = connection.execute(stmt).scalar()
65+
66+
# Print the percentage
67+
print(percent_female)
2268
2369
```
2470

2571
### Automatic joins with an established relationship
2672

2773
```
74+
# Build a statement to join census and state_fact tables: stmt
75+
stmt = select([census.columns.pop2000, state_fact.columns.abbreviation])
76+
77+
# Execute the statement and get the first result: result
78+
result = connection.execute(stmt).first()
79+
80+
# Loop over the keys in the result object and print the key and value
81+
for key in result.keys():
82+
print(key, getattr(result, key))
2883
2984
```
3085

3186
### Joins
3287

3388
```
89+
# Build a statement to select the census and state_fact tables: stmt
90+
stmt = select([census, state_fact])
91+
92+
# Add a select_from clause that wraps a join for the census and state_fact
93+
# tables where the census state column and state_fact name column match
94+
stmt_join = stmt.select_from(
95+
census.join(state_fact, census.columns.state == state_fact.columns.name))
96+
97+
# Execute the statement and get the first result: result
98+
result = connection.execute(stmt_join).first()
99+
100+
# Loop over the keys in the result object and print the key and value
101+
for key in result.keys():
102+
print(key, getattr(result, key))
34103
35104
```
36105

37106
### More practice with joins
38107

39108
```
109+
# Build a statement to select the state, sum of 2008 population and census
110+
# division name: stmt
111+
stmt = select([
112+
census.columns.state,
113+
func.sum(census.columns.pop2008),
114+
state_fact.columns.census_division_name
115+
])
116+
117+
# Append select_from to join the census and state_fact tables by the census state and state_fact name columns
118+
stmt_joined = stmt.select_from(
119+
census.join(state_fact, census.columns.state == state_fact.columns.name)
120+
)
121+
122+
# Append a group by for the state_fact name column
123+
stmt_grouped = stmt_joined.group_by(state_fact.columns.name)
124+
125+
# Execute the statement and get the results: results
126+
results = connection.execute(stmt_grouped).fetchall()
127+
128+
# Loop over the results object and print each record.
129+
for record in results:
130+
print(record)
40131
41132
```
42133

43134
### Using alias to handle same table joined queries
44135

45136
```
137+
# Make an alias of the employees table: managers
138+
managers = employees.alias()
139+
140+
# Build a query to select names of managers and their employees: stmt
141+
stmt = select(
142+
[managers.columns.name.label('manager'),
143+
employees.columns.name.label('employee')]
144+
)
145+
146+
# Match managers id with employees mgr: stmt_matched
147+
stmt_matched = stmt.where(managers.columns.id == employees.columns.mgr)
148+
149+
# Order the statement by the managers name: stmt_ordered
150+
stmt_ordered = stmt_matched.order_by(managers.columns.name)
151+
152+
# Execute statement: results
153+
results = connection.execute(stmt_ordered).fetchall()
154+
155+
# Print records
156+
for record in results:
157+
print(record)
46158
47159
```
48160

49161
### Leveraging functions and group_bys with hierarchical data
50162

51163
```
164+
# Make an alias of the employees table: managers
165+
managers = employees.alias()
166+
167+
# Build a query to select names of managers and counts of their employees: stmt
168+
stmt = select([managers.columns.name, func.count(employees.columns.id)])
169+
170+
# Append a where clause that ensures the manager id and employee mgr are equal: stmt_matched
171+
stmt_matched = stmt.where(managers.columns.id == employees.columns.mgr)
172+
173+
# Group by Managers Name: stmt_grouped
174+
stmt_grouped = stmt_matched.group_by(managers.columns.name)
175+
176+
# Execute statement: results
177+
results = connection.execute(stmt_grouped).fetchall()
178+
179+
# print manager
180+
for record in results:
181+
print(record)
52182
53183
```
54184

55185
### Working on blocks of records
56186

57187
```
188+
# Start a while loop checking for more results
189+
while more_results:
190+
# Fetch the first 50 results from the ResultProxy: partial_results
191+
partial_results = results_proxy.fetchmany(50)
192+
193+
# if empty list, set more_results to False
194+
if partial_results == []:
195+
more_results = False
196+
197+
# Loop over the fetched records and increment the count for the state
198+
for row in partial_results:
199+
if row.state in state_count:
200+
state_count[row.state] += 1
201+
else:
202+
state_count[row.state] = 1
203+
204+
# Close the ResultProxy, and thus the connection
205+
results_proxy.close()
58206
207+
# Print the count by state
208+
print(state_count)
59209
```

0 commit comments

Comments
 (0)