elmoallistair
diff --git a/‎16_introduction-to-databases-in-python/02_applying-filtering-ordering-and-grouping-to-queries.md‎
Lines changed: 69 additions & 0 deletions b/‎16_introduction-to-databases-in-python/02_applying-filtering-ordering-and-grouping-to-queries.md‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎16_introduction-to-databases-in-python/03_advanced-sqlalchemy-queries.md‎
Lines changed: 150 additions & 0 deletions b/‎16_introduction-to-databases-in-python/03_advanced-sqlalchemy-queries.md‎
Lines changed: 150 additions & 0 deletions
@@ -133,29 +133,98 @@ print(results[:20])
 ### Counting distinct data
 
 ```
+# Build a query to count the distinct states values: stmt
+stmt = select([func.count(census.columns.state.distinct())])
+
+# Execute the query and store the scalar result: distinct_state_count
+distinct_state_count = connection.execute(stmt).scalar()
+
+# Print the distinct_state_count
+print(distinct_state_count)
 
 ```
 
 ### Count of records by state
 
 ```
+# Import func
+from sqlalchemy import func
+
+# Build a query to select the state and count of ages by state: stmt
+stmt = select([census.columns.state, func.count(census.columns.age)])
+
+# Group stmt by state
+stmt = stmt.group_by(census.columns.state)
+
+# Execute the statement and store all the records: results
+results = connection.execute(stmt).fetchall()
+
+# Print results
+print(results)
+
+# Print the keys/column names of the results returned
+print(results[0].keys())
 
 ```
 
 ### Determining the population sum by state
 
 ```
+# Import func
+from sqlalchemy import func
+
+# Build an expression to calculate the sum of pop2008 labeled as population
+pop2008_sum = func.sum(census.columns.pop2008).label('population')
+
+# Build a query to select the state and sum of pop2008: stmt
+stmt = select([census.columns.state, pop2008_sum])
 
+# Group stmt by state
+stmt = stmt.group_by(census.columns.state)
+
+# Execute the statement and store all the records: results
+results = connection.execute(stmt).fetchall()
+
+# Print results
+print(results)
+
+# Print the keys/column names of the results returned
+print(results[0].keys())
 ```
 
 ### ResultsSets and pandas DataFrames
 
 ```
+# import pandas
+import pandas as pd
+
+# Create a DataFrame from the results: df
+df = pd.DataFrame(results)
 
+# Set column names
+df.columns = results[0].keys()
+
+# Print the DataFrame
+print(df)
 ```
 
 ### From SQLAlchemy results to a plot
 
 ```
+# Import pyplot as plt from matplotlib
+import matplotlib.pyplot as plt
+
+# Create a DataFrame from the results: df
+df = pd.DataFrame(results)
+
+# Set Column names
+df.columns = results[0].keys()
+
+# Print the DataFrame
+print(df)
+
+# Plot the DataFrame
+df.plot.bar()
+plt.show()
 
 ```
@@ -7,53 +7,203 @@ In this chapter, you will learn to perform advanced—and incredibly useful—qu
 ### Connecting to a MySQL database
 
 ```
+# Import create_engine function
+from sqlalchemy import create_engine
+
+# Create an engine to the census database
+engine = create_engine('mysql+pymysql://student:datacamp@courses.csrrinzqubik.us-east-1.rds.amazonaws.com:3306/census')
+
+# Print the table names
+print(engine.table_names())
 
 ```
 
 ### Calculating a difference between two columns
 
 ```
+# Build query to return state names by population difference from 2008 to 2000: stmt
+stmt = select([census.columns.state, (census.columns.pop2008-census.columns.pop2000).label('pop_change')])
+
+# Append group by for the state: stmt_grouped
+stmt_grouped = stmt.group_by(census.columns.state)
+
+# Append order by for pop_change descendingly: stmt_ordered
+stmt_ordered = stmt_grouped.order_by(desc('pop_change'))
+
+# Return only 5 results: stmt_top5
+stmt_top5 = stmt_ordered.limit(5)
+
+# Use connection to execute stmt_top5 and fetch all results
+results = connection.execute(stmt_top5).fetchall()
+
+# Print the state and population change for each record
+for result in results:
+ print('{}:{}'.format(result.state, result.pop_change))
 
 ```
 
 ### Determining the overall percentage of women
 
 ```
+# import case, cast and Float from sqlalchemy
+from sqlalchemy import case, cast, Float
+
+# Build an expression to calculate female population in 2000
+female_pop2000 = func.sum(
+ case([
+ (census.columns.sex == 'F', census.columns.pop2000)
+ ], else_=0))
+
+# Cast an expression to calculate total population in 2000 to Float
+total_pop2000 = cast(func.sum(census.columns.pop2000), Float)
+
+# Build a query to calculate the percentage of women in 2000: stmt
+stmt = select([female_pop2000 / total_pop2000 * 100])
+
+# Execute the query and store the scalar result: percent_female
+percent_female = connection.execute(stmt).scalar()
+
+# Print the percentage
+print(percent_female)
 
 ```
 
 ### Automatic joins with an established relationship
 
 ```
+# Build a statement to join census and state_fact tables: stmt
+stmt = select([census.columns.pop2000, state_fact.columns.abbreviation])
+
+# Execute the statement and get the first result: result
+result = connection.execute(stmt).first()
+
+# Loop over the keys in the result object and print the key and value
+for key in result.keys():
+ print(key, getattr(result, key))
 
 ```
 
 ### Joins
 
 ```
+# Build a statement to select the census and state_fact tables: stmt
+stmt = select([census, state_fact])
+
+# Add a select_from clause that wraps a join for the census and state_fact
+# tables where the census state column and state_fact name column match
+stmt_join = stmt.select_from(
+ census.join(state_fact, census.columns.state == state_fact.columns.name))
+
+# Execute the statement and get the first result: result
+result = connection.execute(stmt_join).first()
+
+# Loop over the keys in the result object and print the key and value
+for key in result.keys():
+ print(key, getattr(result, key))
 
 ```
 
 ### More practice with joins
 
 ```
+# Build a statement to select the state, sum of 2008 population and census
+# division name: stmt
+stmt = select([
+ census.columns.state,
+ func.sum(census.columns.pop2008),
+ state_fact.columns.census_division_name
+])
+
+# Append select_from to join the census and state_fact tables by the census state and state_fact name columns
+stmt_joined = stmt.select_from(
+ census.join(state_fact, census.columns.state == state_fact.columns.name)
+)
+
+# Append a group by for the state_fact name column
+stmt_grouped = stmt_joined.group_by(state_fact.columns.name)
+
+# Execute the statement and get the results: results
+results = connection.execute(stmt_grouped).fetchall()
+
+# Loop over the results object and print each record.
+for record in results:
+ print(record)
 
 ```
 
 ### Using alias to handle same table joined queries
 
 ```
+# Make an alias of the employees table: managers
+managers = employees.alias()
+
+# Build a query to select names of managers and their employees: stmt
+stmt = select(
+ [managers.columns.name.label('manager'),
+ employees.columns.name.label('employee')]
+)
+
+# Match managers id with employees mgr: stmt_matched
+stmt_matched = stmt.where(managers.columns.id == employees.columns.mgr)
+
+# Order the statement by the managers name: stmt_ordered
+stmt_ordered = stmt_matched.order_by(managers.columns.name)
+
+# Execute statement: results
+results = connection.execute(stmt_ordered).fetchall()
+
+# Print records
+for record in results:
+ print(record)
 
 ```
 
 ### Leveraging functions and group_bys with hierarchical data
 
 ```
+# Make an alias of the employees table: managers
+managers = employees.alias()
+
+# Build a query to select names of managers and counts of their employees: stmt
+stmt = select([managers.columns.name, func.count(employees.columns.id)])
+
+# Append a where clause that ensures the manager id and employee mgr are equal: stmt_matched 
+stmt_matched = stmt.where(managers.columns.id == employees.columns.mgr)
+
+# Group by Managers Name: stmt_grouped
+stmt_grouped = stmt_matched.group_by(managers.columns.name)
+
+# Execute statement: results
+results = connection.execute(stmt_grouped).fetchall()
+
+# print manager
+for record in results:
+ print(record)
 
 ```
 
 ### Working on blocks of records
 
 ```
+# Start a while loop checking for more results
+while more_results:
+ # Fetch the first 50 results from the ResultProxy: partial_results
+ partial_results = results_proxy.fetchmany(50)
+
+ # if empty list, set more_results to False
+ if partial_results == []:
+ more_results = False
+
+ # Loop over the fetched records and increment the count for the state
+ for row in partial_results:
+ if row.state in state_count:
+ state_count[row.state] += 1
+ else:
+ state_count[row.state] = 1
+
+# Close the ResultProxy, and thus the connection
+results_proxy.close()
 
+# Print the count by state
+print(state_count)
 ```