VISHNUCVINOD
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 0 - Course Agenda.py‎
Lines changed: 3 additions & 3 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 0 - Course Agenda.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 1 - Introductions/ASP 1.1 - Databricks Platform.py‎
Lines changed: 3 additions & 3 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 1 - Introductions/ASP 1.1 - Databricks Platform.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 1 - Introductions/Labs/ASP 1.1L - Explore Datasets Lab.py‎
Lines changed: 3 additions & 3 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 1 - Introductions/Labs/ASP 1.1L - Explore Datasets Lab.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/ASP 2.1 - Spark SQL.py‎
Lines changed: 12 additions & 8 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/ASP 2.1 - Spark SQL.py‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/ASP 2.2 - Reader & Writer.py‎
Lines changed: 7 additions & 7 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/ASP 2.2 - Reader & Writer.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/ASP 2.3 - DataFrame & Column.py‎
Lines changed: 4 additions & 4 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/ASP 2.3 - DataFrame & Column.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.2L - Spark SQL Lab.py‎ renamed to ‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.1L - Spark SQL Lab.py‎
Lines changed: 4 additions & 4 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.2L - Spark SQL Lab.py‎ renamed to ‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.1L - Spark SQL Lab.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.3L - Ingesting Data Lab.py‎ renamed to ‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.2L - Ingesting Data Lab.py‎
Lines changed: 6 additions & 6 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.3L - Ingesting Data Lab.py‎ renamed to ‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.2L - Ingesting Data Lab.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.4L - Purchase Revenues Lab.py‎ renamed to ‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.3L - Purchase Revenues Lab.py‎
Lines changed: 7 additions & 7 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.4L - Purchase Revenues Lab.py‎ renamed to ‎Apache-Spark-Programming-with-Databricks/ASP 2 - Spark Core/Labs/ASP 2.3L - Purchase Revenues Lab.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎Apache-Spark-Programming-with-Databricks/ASP 3 - Functions/ASP 3.1 - Aggregation.py‎
Lines changed: 9 additions & 10 deletions b/‎Apache-Spark-Programming-with-Databricks/ASP 3 - Functions/ASP 3.1 - Aggregation.py‎
Lines changed: 9 additions & 10 deletions
@@ -22,11 +22,11 @@
 # MAGIC 
 # MAGIC ## Spark Core
 # MAGIC * [ASP 2.1 - Spark SQL]($./ASP 2 - Spark Core/ASP 2.1 - Spark SQL)
+# MAGIC * [ASP 2.1L - Spark SQL Lab]($./ASP 2 - Spark Core/Labs/ASP 2.1L - Spark SQL Lab)
 # MAGIC * [ASP 2.2 - Reader & Writer]($./ASP 2 - Spark Core/ASP 2.2 - Reader & Writer)
-# MAGIC * [ASP 2.2L - Spark SQL Lab]($./ASP 2 - Spark Core/Labs/ASP 2.2L - Spark SQL Lab)
+# MAGIC * [ASP 2.2L - Ingesting Data Lab]($./ASP 2 - Spark Core/Labs/ASP 2.2L - Ingesting Data Lab)
 # MAGIC * [ASP 2.3 - DataFrame & Column]($./ASP 2 - Spark Core/ASP 2.3 - DataFrame & Column)
-# MAGIC * [ASP 2.3L - Ingesting Data Lab]($./ASP 2 - Spark Core/Labs/ASP 2.3L - Ingesting Data Lab)
-# MAGIC * [ASP 2.4L - Purchase Revenues Lab]($./ASP 2 - Spark Core/Labs/ASP 2.4L - Purchase Revenues Lab)
+# MAGIC * [ASP 2.3L - Purchase Revenues Lab]($./ASP 2 - Spark Core/Labs/ASP 2.3L - Purchase Revenues Lab)
 
 # COMMAND ----------
 
 
@@ -30,7 +30,7 @@
 # COMMAND ----------
 
 # MAGIC %md ### Setup
-# MAGIC Run classroom setup to mount Databricks training datasets and create your own database for BedBricks.
+# MAGIC Run classroom setup to [mount](https://docs.databricks.com/data/databricks-file-system.html#mount-storage) Databricks training datasets and create your own database for BedBricks.
 # MAGIC 
 # MAGIC Use the **`%run`** magic command to run another notebook within a notebook
 
@@ -189,7 +189,7 @@
 
 # MAGIC %md ## Our First Table
 # MAGIC 
-# MAGIC Is located in the path identfied by **`eventsPath`** (a variable we created for you).
+# MAGIC Is located in the path identfied by **`events_path`** (a variable we created for you).
 # MAGIC 
 # MAGIC We can see those files by running the following cell
 
@@ -219,7 +219,7 @@
 # COMMAND ----------
 
 # MAGIC %sql
-# MAGIC CREATE TABLE IF NOT EXISTS events USING parquet OPTIONS (path "${c.events_path}");
+# MAGIC CREATE TABLE IF NOT EXISTS events USING delta OPTIONS (path "${c.events_path}");
 
 # COMMAND ----------
 
 
@@ -84,7 +84,7 @@
 # COMMAND ----------
 
 # MAGIC %md 
-# MAGIC #### Q1: What products are available for purchase at BedBricks?
+# MAGIC #### 4.1: What products are available for purchase at BedBricks?
 # MAGIC 
 # MAGIC The **`products`** dataset contains the ID, name, and price of products on the BedBricks retail site.
 # MAGIC 
@@ -105,7 +105,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md #### Q2: What is the average purchase revenue for a transaction at BedBricks?
+# MAGIC %md #### 4.2: What is the average purchase revenue for a transaction at BedBricks?
 # MAGIC 
 # MAGIC The **`sales`** dataset contains order information representing successfully processed sales. 
 # MAGIC Most fields correspond directly with fields from the clickstream data associated with a sale finalization event.
@@ -131,7 +131,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md #### Q3: What types of events are recorded on the BedBricks website?
+# MAGIC %md #### 4.3: What types of events are recorded on the BedBricks website?
 # MAGIC 
 # MAGIC The **`events`** dataset contains two weeks worth of parsed JSON records, created by consuming updates to an operational database. 
 # MAGIC Records are received whenever: (1) a new user visits the site, (2) a user provides their email for the first time.
 
@@ -63,10 +63,12 @@
 
 # COMMAND ----------
 
-display(spark.table("products")
- .select("name", "price")
- .where("price < 200")
- .orderBy("price"))
+display(spark
+ .table("products")
+ .select("name", "price")
+ .where("price < 200")
+ .orderBy("price")
+ )
 
 # COMMAND ----------
 
@@ -153,10 +155,12 @@
 
 # COMMAND ----------
 
-budget_df = (spark.table("products")
- .select("name", "price")
- .where("price < 200")
- .orderBy("price"))
+budget_df = (spark
+ .table("products")
+ .select("name", "price")
+ .where("price < 200")
+ .orderBy("price")
+ )
 
 # COMMAND ----------
 
 
@@ -245,7 +245,7 @@
 # MAGIC (df.write 
 # MAGIC .option("compression", "snappy")
 # MAGIC .mode("overwrite") 
-# MAGIC .parquet(outPath) 
+# MAGIC .parquet(output_dir) 
 # MAGIC )
 # MAGIC ```
 # MAGIC 
@@ -261,19 +261,19 @@
 
 # COMMAND ----------
 
-users_output_path = working_dir + "/users.parquet"
+users_output_dir = working_dir + "/users.parquet"
 
 (users_df
  .write
  .option("compression", "snappy")
  .mode("overwrite")
- .parquet(users_output_path)
+ .parquet(users_output_dir)
 )
 
 # COMMAND ----------
 
 display(
- dbutils.fs.ls(users_output_path)
+ dbutils.fs.ls(users_output_dir)
 )
 
 # COMMAND ----------
@@ -285,7 +285,7 @@
 
 (users_df
  .write
- .parquet(users_output_path, compression="snappy", mode="overwrite")
+ .parquet(users_output_dir, compression="snappy", mode="overwrite")
 )
 
 # COMMAND ----------
@@ -298,7 +298,7 @@
 
 # COMMAND ----------
 
-events_df.write.mode("overwrite").saveAsTable("events_p")
+events_df.write.mode("overwrite").saveAsTable("events")
 
 # COMMAND ----------
 
@@ -334,7 +334,7 @@
 # MAGIC %md
 # MAGIC ### Write Results to a Delta Table
 # MAGIC 
-# MAGIC Write **`events_df`** with the DataFrameWriter's **`save`** method and the following configurations: Delta format, overwrite mode
+# MAGIC Write **`events_df`** with the DataFrameWriter's **`save`** method and the following configurations: Delta format & overwrite mode.
 
 # COMMAND ----------
 
 
@@ -30,7 +30,7 @@
 
 # COMMAND ----------
 
-events_df = spark.read.parquet(events_path)
+events_df = spark.read.format("delta").load(events_path)
 display(events_df)
 
 # COMMAND ----------
@@ -45,9 +45,9 @@
 
 from pyspark.sql.functions import col
 
-events_df.device
-events_df["device"]
-col("device")
+print(events_df.device)
+print(events_df["device"])
+print(col("device"))
 
 # COMMAND ----------
 
 
@@ -58,8 +58,8 @@
 
 # TODO
 mac_df = (events_df
- .FILL_IN
-)
+  .FILL_IN
+ )
 
 # COMMAND ----------
 
@@ -74,7 +74,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **4.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
@@ -99,7 +99,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **5.1: CHECK YOUR WORK**
 # MAGIC - You should only see **`macOS`** values in the **`device`** column
 # MAGIC - The fifth row should be an event with timestamp **`1592539226602157`**
 
 
@@ -38,11 +38,11 @@
 products_csv_path = f"{datasets_dir}/products/products.csv"
 products_df = FILL_IN
 
-productsDF.printSchema()
+products_df.printSchema()
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **1.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
@@ -62,7 +62,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **2.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
@@ -90,7 +90,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **3.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
@@ -99,7 +99,7 @@
 # COMMAND ----------
 
 # MAGIC %md ### 4. Write to Delta
-# MAGIC Write **`productsDF`** to the filepath provided in the variable **`productsOutputPath`**
+# MAGIC Write **`products_df`** to the filepath provided in the variable **`products_output_path`**
 
 # COMMAND ----------
 
@@ -109,7 +109,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **4.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
 
@@ -28,7 +28,7 @@
 
 # COMMAND ----------
 
-events_df = spark.read.parquet(events_path)
+events_df = spark.read.format("delta").load(events_path)
 display(events_df)
 
 # COMMAND ----------
@@ -44,7 +44,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **1.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
@@ -66,7 +66,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **2.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
@@ -91,7 +91,7 @@
 
 # MAGIC %md
 # MAGIC ### 4. Drop unneeded column
-# MAGIC Since there's only one event type, drop **`event_name`** from **`purchasesDF`**.
+# MAGIC Since there's only one event type, drop **`event_name`** from **`purchases_df`**.
 
 # COMMAND ----------
 
@@ -101,13 +101,13 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **4.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
 expected_columns = {"device", "ecommerce", "event_previous_timestamp", "event_timestamp",
  "geo", "items", "revenue", "traffic_source",
- "user_first_touch_timestamp", "user_id"}
+  "user_first_touch_timestamp", "user_id"}
 assert(set(final_df.columns) == expected_columns)
 
 # COMMAND ----------
@@ -125,7 +125,7 @@
 
 # COMMAND ----------
 
-# MAGIC %md **CHECK YOUR WORK**
+# MAGIC %md **5.1: CHECK YOUR WORK**
 
 # COMMAND ----------
 
 
@@ -30,7 +30,7 @@
 
 # COMMAND ----------
 
-df = spark.read.parquet(events_path)
+df = spark.read.format("delta").load(events_path)
 display(df)
 
 # COMMAND ----------
@@ -140,10 +140,10 @@
 from pyspark.sql.functions import avg, approx_count_distinct
 
 state_aggregates_df = (df
- .groupBy("geo.state")
- .agg(avg("ecommerce.total_item_quantity").alias("avg_quantity"),
- approx_count_distinct("user_id").alias("distinct_users"))
- )
+  .groupBy("geo.state")
+  .agg(avg("ecommerce.total_item_quantity").alias("avg_quantity"),
+  approx_count_distinct("user_id").alias("distinct_users"))
+  )
 
 display(state_aggregates_df)
 
@@ -164,11 +164,10 @@
 
 from pyspark.sql.functions import cos, sqrt
 
-display(
- spark.range(10) # Create a DataFrame with a single column called "id" with a range of integer values
- .withColumn("sqrt", sqrt("id"))
- .withColumn("cos", cos("id"))
-)
+display(spark.range(10) # Create a DataFrame with a single column called "id" with a range of integer values
+ .withColumn("sqrt", sqrt("id"))
+ .withColumn("cos", cos("id"))
+ )
 
 # COMMAND ----------