@@ -7,53 +7,203 @@ In this chapter, you will learn to perform advanced—and incredibly useful—qu
77### Connecting to a MySQL database
88
99```
10+ # Import create_engine function
11+ from sqlalchemy import create_engine
12+
13+ # Create an engine to the census database
14+ engine = create_engine('mysql+pymysql://student:datacamp@courses.csrrinzqubik.us-east-1.rds.amazonaws.com:3306/census')
15+
16+ # Print the table names
17+ print(engine.table_names())
1018
1119```
1220
1321### Calculating a difference between two columns
1422
1523```
24+ # Build query to return state names by population difference from 2008 to 2000: stmt
25+ stmt = select([census.columns.state, (census.columns.pop2008-census.columns.pop2000).label('pop_change')])
26+
27+ # Append group by for the state: stmt_grouped
28+ stmt_grouped = stmt.group_by(census.columns.state)
29+
30+ # Append order by for pop_change descendingly: stmt_ordered
31+ stmt_ordered = stmt_grouped.order_by(desc('pop_change'))
32+
33+ # Return only 5 results: stmt_top5
34+ stmt_top5 = stmt_ordered.limit(5)
35+
36+ # Use connection to execute stmt_top5 and fetch all results
37+ results = connection.execute(stmt_top5).fetchall()
38+
39+ # Print the state and population change for each record
40+ for result in results:
41+ print('{}:{}'.format(result.state, result.pop_change))
1642
1743```
1844
1945### Determining the overall percentage of women
2046
2147```
48+ # import case, cast and Float from sqlalchemy
49+ from sqlalchemy import case, cast, Float
50+
51+ # Build an expression to calculate female population in 2000
52+ female_pop2000 = func.sum(
53+ case([
54+ (census.columns.sex == 'F', census.columns.pop2000)
55+ ], else_=0))
56+
57+ # Cast an expression to calculate total population in 2000 to Float
58+ total_pop2000 = cast(func.sum(census.columns.pop2000), Float)
59+
60+ # Build a query to calculate the percentage of women in 2000: stmt
61+ stmt = select([female_pop2000 / total_pop2000 * 100])
62+
63+ # Execute the query and store the scalar result: percent_female
64+ percent_female = connection.execute(stmt).scalar()
65+
66+ # Print the percentage
67+ print(percent_female)
2268
2369```
2470
2571### Automatic joins with an established relationship
2672
2773```
74+ # Build a statement to join census and state_fact tables: stmt
75+ stmt = select([census.columns.pop2000, state_fact.columns.abbreviation])
76+
77+ # Execute the statement and get the first result: result
78+ result = connection.execute(stmt).first()
79+
80+ # Loop over the keys in the result object and print the key and value
81+ for key in result.keys():
82+ print(key, getattr(result, key))
2883
2984```
3085
3186### Joins
3287
3388```
89+ # Build a statement to select the census and state_fact tables: stmt
90+ stmt = select([census, state_fact])
91+
92+ # Add a select_from clause that wraps a join for the census and state_fact
93+ # tables where the census state column and state_fact name column match
94+ stmt_join = stmt.select_from(
95+ census.join(state_fact, census.columns.state == state_fact.columns.name))
96+
97+ # Execute the statement and get the first result: result
98+ result = connection.execute(stmt_join).first()
99+
100+ # Loop over the keys in the result object and print the key and value
101+ for key in result.keys():
102+ print(key, getattr(result, key))
34103
35104```
36105
37106### More practice with joins
38107
39108```
109+ # Build a statement to select the state, sum of 2008 population and census
110+ # division name: stmt
111+ stmt = select([
112+ census.columns.state,
113+ func.sum(census.columns.pop2008),
114+ state_fact.columns.census_division_name
115+ ])
116+
117+ # Append select_from to join the census and state_fact tables by the census state and state_fact name columns
118+ stmt_joined = stmt.select_from(
119+ census.join(state_fact, census.columns.state == state_fact.columns.name)
120+ )
121+
122+ # Append a group by for the state_fact name column
123+ stmt_grouped = stmt_joined.group_by(state_fact.columns.name)
124+
125+ # Execute the statement and get the results: results
126+ results = connection.execute(stmt_grouped).fetchall()
127+
128+ # Loop over the results object and print each record.
129+ for record in results:
130+ print(record)
40131
41132```
42133
43134### Using alias to handle same table joined queries
44135
45136```
137+ # Make an alias of the employees table: managers
138+ managers = employees.alias()
139+
140+ # Build a query to select names of managers and their employees: stmt
141+ stmt = select(
142+ [managers.columns.name.label('manager'),
143+ employees.columns.name.label('employee')]
144+ )
145+
146+ # Match managers id with employees mgr: stmt_matched
147+ stmt_matched = stmt.where(managers.columns.id == employees.columns.mgr)
148+
149+ # Order the statement by the managers name: stmt_ordered
150+ stmt_ordered = stmt_matched.order_by(managers.columns.name)
151+
152+ # Execute statement: results
153+ results = connection.execute(stmt_ordered).fetchall()
154+
155+ # Print records
156+ for record in results:
157+ print(record)
46158
47159```
48160
49161### Leveraging functions and group_bys with hierarchical data
50162
51163```
164+ # Make an alias of the employees table: managers
165+ managers = employees.alias()
166+
167+ # Build a query to select names of managers and counts of their employees: stmt
168+ stmt = select([managers.columns.name, func.count(employees.columns.id)])
169+
170+ # Append a where clause that ensures the manager id and employee mgr are equal: stmt_matched
171+ stmt_matched = stmt.where(managers.columns.id == employees.columns.mgr)
172+
173+ # Group by Managers Name: stmt_grouped
174+ stmt_grouped = stmt_matched.group_by(managers.columns.name)
175+
176+ # Execute statement: results
177+ results = connection.execute(stmt_grouped).fetchall()
178+
179+ # print manager
180+ for record in results:
181+ print(record)
52182
53183```
54184
55185### Working on blocks of records
56186
57187```
188+ # Start a while loop checking for more results
189+ while more_results:
190+ # Fetch the first 50 results from the ResultProxy: partial_results
191+ partial_results = results_proxy.fetchmany(50)
192+
193+ # if empty list, set more_results to False
194+ if partial_results == []:
195+ more_results = False
196+
197+ # Loop over the fetched records and increment the count for the state
198+ for row in partial_results:
199+ if row.state in state_count:
200+ state_count[row.state] += 1
201+ else:
202+ state_count[row.state] = 1
203+
204+ # Close the ResultProxy, and thus the connection
205+ results_proxy.close()
58206
207+ # Print the count by state
208+ print(state_count)
59209```
0 commit comments