Skip to content

Commit 435d468

Browse files
author
daboncanplay
committed
publishing v2.2.8
1 parent 6c81588 commit 435d468

24 files changed

+420
-262
lines changed

ASP 0 - Course Agenda.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
# MAGIC %md
1111
# MAGIC # Apache Spark Programming with Databricks
1212
# MAGIC ## Course Agenda
13+
# MAGIC
14+
# MAGIC Taught over 4 half-days or 2 full-days.
1315

1416
# COMMAND ----------
1517

@@ -26,12 +28,15 @@
2628
# MAGIC * [ASP 2.2 - Reader & Writer]($./ASP 2 - Spark Core/ASP 2.2 - Reader & Writer)
2729
# MAGIC * [ASP 2.2L - Ingesting Data Lab]($./ASP 2 - Spark Core/ASP 2.2L - Ingesting Data Lab)
2830
# MAGIC * [ASP 2.3 - DataFrame & Column]($./ASP 2 - Spark Core/ASP 2.3 - DataFrame & Column)
29-
# MAGIC * [ASP 2.3L - Purchase Revenues Lab]($./ASP 2 - Spark Core/ASP 2.3L - Purchase Revenues Lab)
3031

3132
# COMMAND ----------
3233

3334
# MAGIC %md
3435
# MAGIC # Day 2
36+
# MAGIC
37+
# MAGIC ## Spark Core (Continued)
38+
# MAGIC * [ASP 2.3L - Purchase Revenues Lab]($./ASP 2 - Spark Core/ASP 2.3L - Purchase Revenues Lab)
39+
# MAGIC
3540
# MAGIC ## Functions
3641
# MAGIC * [ASP 3.1 - Aggregation]($./ASP 3 - Functions/ASP 3.1 - Aggregation)
3742
# MAGIC * [ASP 3.1L - Revenue by Traffic Lab]($./ASP 3 - Functions/ASP 3.1L - Revenue by Traffic Lab)
@@ -40,15 +45,17 @@
4045
# MAGIC * [ASP 3.3 - Complex Types]($./ASP 3 - Functions/ASP 3.3 - Complex Types)
4146
# MAGIC * [ASP 3.3L - Users]($./ASP 3 - Functions/ASP 3.3L - Users)
4247
# MAGIC * [ASP 3.4 - Additional Functions]($./ASP 3 - Functions/ASP 3.4 - Additional Functions)
43-
# MAGIC * [ASP 3.4L - Abandoned Carts Lab]($./ASP 3 - Functions/ASP 3.4L - Abandoned Carts Lab)
44-
# MAGIC * [ASP 3.5 - UDFs]($./ASP 3 - Functions/ASP 3.5 - UDFs)
45-
# MAGIC * [ASP 3.5L - Sort Day Lab]($./ASP 3 - Functions/ASP 3.5L - Sort Day Lab)
4648
# MAGIC
4749

4850
# COMMAND ----------
4951

5052
# MAGIC %md
5153
# MAGIC # Day 3
54+
# MAGIC ## Functions (Continued)
55+
# MAGIC * [ASP 3.4L - Abandoned Carts Lab]($./ASP 3 - Functions/ASP 3.4L - Abandoned Carts Lab)
56+
# MAGIC * [ASP 3.5 - UDFs]($./ASP 3 - Functions/ASP 3.5 - UDFs)
57+
# MAGIC * [ASP 3.5L - Sort Day Lab]($./ASP 3 - Functions/ASP 3.5L - Sort Day Lab)
58+
# MAGIC
5259
# MAGIC ## Performance
5360
# MAGIC * [ASP 4.1 - Query Optimization]($./ASP 4 - Performance/ASP 4.1 - Query Optimization)
5461
# MAGIC * [ASP 4.2 - Partitioning]($./ASP 4 - Performance/ASP 4.2 - Partitioning)

ASP 1 - Introductions/ASP 1.1 - Databricks Platform.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@
130130
# MAGIC | Horizontal Rule | `---` |
131131
# MAGIC | Code | ``` `code` ``` |
132132
# MAGIC | Link | `[text](https://www.example.com)` |
133-
# MAGIC | Image | `[alt text](image.jpg)`|
133+
# MAGIC | Image | `![alt text](image.jpg)`|
134134
# MAGIC | Ordered List | `1. First items` <br> `2. Second Item` <br> `3. Third Item` |
135135
# MAGIC | Unordered List | `- First items` <br> `- Second Item` <br> `- Third Item` |
136136
# MAGIC | Code Block | ```` ``` ```` <br> `code block` <br> ```` ``` ````|
@@ -265,7 +265,7 @@
265265

266266
# COMMAND ----------
267267

268-
print(f"Database Name: {DA.db_name}")
268+
print(f"Database Name: {DA.schema_name}")
269269

270270
# COMMAND ----------
271271

@@ -274,7 +274,7 @@
274274
# COMMAND ----------
275275

276276
# MAGIC %sql
277-
# MAGIC SHOW TABLES IN ${DA.db_name}
277+
# MAGIC SHOW TABLES IN ${DA.schema_name}
278278

279279
# COMMAND ----------
280280

ASP 2 - Spark Core/ASP 2.2 - Reader & Writer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@
307307

308308
# COMMAND ----------
309309

310-
print(f"Database Name: {DA.db_name}")
310+
print(f"Database Name: {DA.schema_name}")
311311

312312
# COMMAND ----------
313313

@@ -316,7 +316,7 @@
316316
# COMMAND ----------
317317

318318
# MAGIC %sql
319-
# MAGIC SHOW TABLES IN ${DA.db_name}
319+
# MAGIC SHOW TABLES IN ${DA.schema_name}
320320

321321
# COMMAND ----------
322322

Includes/Classroom-Setup-5.1a.py

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33

44
# COMMAND ----------
55

6-
@TestHelper.monkey_patch
6+
@ValidationHelper.monkey_patch
77
def validate_1_1(self, df):
88
suite = DA.tests.new("5.1a-1.1")
99

10-
suite.test_true(df.isStreaming, description="The query is streaming")
10+
suite.test_true(actual_value=lambda: df.isStreaming, description="The query is streaming")
1111

1212
columns = ['order_id', 'email', 'transaction_timestamp', 'total_item_quantity', 'purchase_revenue_in_usd', 'unique_items', 'items']
13-
suite.test_sequence(actual_value=df.columns,
13+
suite.test_sequence(actual_value=lambda: df.columns,
1414
expected_value=columns,
1515
test_column_order=False,
1616
description=f"DataFrame contains all {len(columns)} columns",
@@ -21,44 +21,53 @@ def validate_1_1(self, df):
2121

2222
# COMMAND ----------
2323

24-
@TestHelper.monkey_patch
25-
def validate_2_1(self, schema:StructType):
26-
24+
@ValidationHelper.monkey_patch
25+
def validate_2_1(self, schema: StructType):
26+
2727
suite = DA.tests.new("5.1a-2.1")
28-
29-
suite.test_equals(actual_value=type(schema), expected_value=StructType, description="Schema is of type StructType", hint="Found [[ACTUAL_VALUE]]")
30-
31-
suite.test_length(schema.fieldNames(), 7, description="Schema contians seven fields", hint="Found [[LEN_ACTUAL_VALUE]]: [[ACTUAL_VALUE]]")
32-
33-
suite.test_struct_field(schema, "order_id", "LongType", None)
34-
suite.test_struct_field(schema, "email", "StringType", None)
35-
suite.test_struct_field(schema, "transaction_timestamp", "LongType", None)
36-
suite.test_struct_field(schema, "total_item_quantity", "LongType", None)
37-
suite.test_struct_field(schema, "purchase_revenue_in_usd", "DoubleType", None)
38-
suite.test_struct_field(schema, "unique_items", "LongType", None)
39-
suite.test_struct_field(schema, "items", "StructType", None)
40-
28+
29+
suite.test_equals(
30+
actual_value=lambda: type(schema),
31+
expected_value=StructType,
32+
description="Schema is of type StructType",
33+
hint="Found [[ACTUAL_VALUE]]",
34+
)
35+
36+
suite.test_length(
37+
lambda: schema.fieldNames(),
38+
expected_length=7,
39+
description="Schema contians seven fields",
40+
hint="Found [[LEN_ACTUAL_VALUE]]: [[ACTUAL_VALUE]]",
41+
)
42+
43+
suite.test_schema_field(lambda: schema, "order_id", "LongType", None)
44+
suite.test_schema_field(lambda: schema, "email", "StringType", None)
45+
suite.test_schema_field(lambda: schema, "transaction_timestamp", "LongType", None)
46+
suite.test_schema_field(lambda: schema, "total_item_quantity", "LongType", None)
47+
suite.test_schema_field(lambda: schema, "purchase_revenue_in_usd", "DoubleType", None)
48+
suite.test_schema_field(lambda: schema, "unique_items", "LongType", None)
49+
suite.test_schema_field(lambda: schema, "items", "StructType", None)
50+
4151
suite.display_results()
4252
assert suite.passed, "One or more tests failed."
4353

44-
4554
# COMMAND ----------
4655

47-
@TestHelper.monkey_patch
56+
@ValidationHelper.monkey_patch
4857
def validate_3_1(self, query):
4958
suite = DA.tests.new("5.1a-3.1")
5059

51-
suite.test_true(query.isActive, description="The query is active")
60+
suite.test_true(actual_value=lambda: query.isActive, description="The query is active")
5261

53-
suite.test_equals(coupon_sales_query.lastProgress["name"], "coupon_sales",
62+
suite.test_equals(lambda: coupon_sales_query.lastProgress["name"], "coupon_sales",
5463
description="The query name is \"coupon_sales\".")
5564

5665
coupons_output_path = f"{DA.paths.working_dir}/coupon-sales/output"
57-
suite.test(actual_value=None, test_function=lambda: len(dbutils.fs.ls(coupons_output_path)) > 0,
66+
suite.test(actual_value=lambda: None, test_function=lambda: len(dbutils.fs.ls(coupons_output_path)) > 0,
5867
description=f"Found at least one file in .../coupon-sales/output")
5968

6069
coupons_checkpoint_path = f"{DA.paths.checkpoints}/coupon-sales"
61-
suite.test(actual_value=None, test_function=lambda: len(dbutils.fs.ls(coupons_checkpoint_path)) > 0,
70+
suite.test(actual_value=lambda: None, test_function=lambda: len(dbutils.fs.ls(coupons_checkpoint_path)) > 0,
6271
description=f"Found at least one file in .../coupon-sales")
6372

6473
suite.display_results()
@@ -67,43 +76,43 @@ def validate_3_1(self, query):
6776

6877
# COMMAND ----------
6978

70-
@TestHelper.monkey_patch
79+
@ValidationHelper.monkey_patch
7180
def validate_4_1(self, query_id, query_status):
7281
suite = DA.tests.new("5.1a-4.1")
7382

74-
suite.test_sequence(actual_value=query_status.keys(),
83+
suite.test_sequence(actual_value=lambda: query_status.keys(),
7584
expected_value=["message", "isDataAvailable", "isTriggerActive"],
7685
test_column_order=False,
7786
description="Valid status value.")
7887

79-
suite.test_equals(type(query_id), str, description="Valid query_id value.")
88+
suite.test_equals(lambda: type(query_id), str, description="Valid query_id value.")
8089

8190
suite.display_results()
8291
assert suite.passed, "One or more tests failed."
8392

8493
# COMMAND ----------
8594

86-
@TestHelper.monkey_patch
95+
@ValidationHelper.monkey_patch
8796
def validate_5_1(self, query):
8897
suite = DA.tests.new("5.1a-5.1")
8998

90-
suite.test_false(query.isActive, description="The query is not active")
99+
suite.test_false(actual_value=lambda: query.isActive, description="The query is not active")
91100

92101
suite.display_results()
93102
assert suite.passed, "One or more tests failed."
94103

95104

96105
# COMMAND ----------
97106

98-
DA = DBAcademyHelper(**helper_arguments) # Create the DA object
99-
DA.reset_environment() # Reset by removing databases and files from other lessons
100-
DA.init(install_datasets=True, # Initialize, install and validate the datasets
101-
create_db=True) # Continue initialization, create the user-db
107+
DA = DBAcademyHelper(course_config, lesson_config)
108+
DA.reset_lesson()
109+
DA.init()
110+
DA.conclude_setup()
102111

103112
DA.paths.sales = f"{DA.paths.datasets}/ecommerce/sales/sales.delta"
104113
DA.paths.users = f"{DA.paths.datasets}/ecommerce/users/users.delta"
105114
DA.paths.events = f"{DA.paths.datasets}/ecommerce/events/events.delta"
106115
DA.paths.products = f"{DA.paths.datasets}/products/products.delta"
107116

108-
DA.conclude_setup() # Conclude setup by advertising environmental changes
117+
DA.conclude_setup()
109118

Includes/Classroom-Setup-5.1b.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,71 +3,71 @@
33

44
# COMMAND ----------
55

6-
@TestHelper.monkey_patch
6+
@ValidationHelper.monkey_patch
77
def validate_1_1(self, schema):
88
suite = DA.tests.new("5.1b-1.1")
99

10-
suite.test_equals(actual_value=type(schema), expected_value=StructType, description="Schema is of type StructType", hint="Found [[ACTUAL_VALUE]]")
10+
suite.test_equals(lambda: type(schema), expected_value=StructType, description="Schema is of type StructType", hint="Found [[ACTUAL_VALUE]]")
1111

12-
suite.test_length(schema.fieldNames(), 12, description="Schema contians 12 field", hint="Found [[LEN_ACTUAL_VALUE]]: [[ACTUAL_VALUE]]")
12+
suite.test_length(lambda: schema.fieldNames(), 12, description="Schema contians 12 field", hint="Found [[LEN_ACTUAL_VALUE]]: [[ACTUAL_VALUE]]")
1313

14-
suite.test_struct_field(schema, "device", "StringType", None)
15-
suite.test_struct_field(schema, "ecommerce", "StructType", None)
16-
suite.test_struct_field(schema, "event_name", "StringType", None)
17-
suite.test_struct_field(schema, "event_previous_timestamp", "LongType", None)
18-
suite.test_struct_field(schema, "event_timestamp", "LongType", None)
19-
suite.test_struct_field(schema, "geo", "StructType", None)
20-
suite.test_struct_field(schema, "items", "ArrayType", None)
21-
suite.test_struct_field(schema, "traffic_source", "StringType", None)
22-
suite.test_struct_field(schema, "user_first_touch_timestamp", "LongType", None)
23-
suite.test_struct_field(schema, "user_id", "StringType", None)
24-
suite.test_struct_field(schema, "hour", "IntegerType", None)
25-
suite.test_struct_field(schema, "createdAt", "TimestampType", None)
14+
suite.test_schema_field(lambda: schema, "device", "StringType", None)
15+
suite.test_schema_field(lambda: schema, "ecommerce", "StructType", None)
16+
suite.test_schema_field(lambda: schema, "event_name", "StringType", None)
17+
suite.test_schema_field(lambda: schema, "event_previous_timestamp", "LongType", None)
18+
suite.test_schema_field(lambda: schema, "event_timestamp", "LongType", None)
19+
suite.test_schema_field(lambda: schema, "geo", "StructType", None)
20+
suite.test_schema_field(lambda: schema, "items", "ArrayType", None)
21+
suite.test_schema_field(lambda: schema, "traffic_source", "StringType", None)
22+
suite.test_schema_field(lambda: schema, "user_first_touch_timestamp", "LongType", None)
23+
suite.test_schema_field(lambda: schema, "user_id", "StringType", None)
24+
suite.test_schema_field(lambda: schema, "hour", "IntegerType", None)
25+
suite.test_schema_field(lambda: schema, "createdAt", "TimestampType", None)
2626

2727
suite.display_results()
2828
assert suite.passed, "One or more tests failed."
2929

3030

3131
# COMMAND ----------
3232

33-
@TestHelper.monkey_patch
33+
@ValidationHelper.monkey_patch
3434
def validate_2_1(self, schema):
3535
suite = DA.tests.new("5.1b-2.1")
3636

37-
suite.test_equals(actual_value=type(schema), expected_value=StructType, description="Schema is of type StructType", hint="Found [[ACTUAL_VALUE]]")
37+
suite.test_equals(lambda: type(schema), expected_value=StructType, description="Schema is of type StructType", hint="Found [[ACTUAL_VALUE]]")
3838

39-
suite.test_length(schema.fieldNames(), 3, description="Schema contians three field", hint="Found [[LEN_ACTUAL_VALUE]]: [[ACTUAL_VALUE]]")
39+
suite.test_length(lambda: schema.fieldNames(), 3, description="Schema contians three field", hint="Found [[LEN_ACTUAL_VALUE]]: [[ACTUAL_VALUE]]")
4040

41-
suite.test_struct_field(schema, "traffic_source", "StringType", None)
42-
suite.test_struct_field(schema, "active_users", "LongType", None)
43-
suite.test_struct_field(schema, "hour", "IntegerType", None)
41+
suite.test_schema_field(lambda: schema, "traffic_source", "StringType", None)
42+
suite.test_schema_field(lambda: schema, "active_users", "LongType", None)
43+
suite.test_schema_field(lambda: schema, "hour", "IntegerType", None)
4444

4545
suite.display_results()
4646
assert suite.passed, "One or more tests failed."
4747

4848

4949
# COMMAND ----------
5050

51-
@TestHelper.monkey_patch
51+
@ValidationHelper.monkey_patch
5252
def validate_4_1(self):
5353
suite = DA.tests.new("5.1b-4.1")
5454

55-
suite.test_length(spark.streams.active, 0, description="All queries have stopped streaming")
55+
suite.test_length(lambda: spark.streams.active, 0, description="All queries have stopped streaming")
5656

5757
suite.display_results()
5858
assert suite.passed, "One or more tests failed."
5959

6060
# COMMAND ----------
6161

62-
DA = DBAcademyHelper(**helper_arguments) # Create the DA object
63-
DA.reset_environment() # Reset by removing databases and files from other lessons
64-
DA.init(install_datasets=True, # Initialize, install and validate the datasets
65-
create_db=True) # Continue initialization, create the user-db
62+
DA = DBAcademyHelper(course_config, lesson_config)
63+
DA.reset_lesson()
64+
DA.init()
65+
DA.conclude_setup()
6666

6767
DA.paths.sales = f"{DA.paths.datasets}/ecommerce/sales/sales.delta"
6868
DA.paths.users = f"{DA.paths.datasets}/ecommerce/users/users.delta"
6969
DA.paths.events = f"{DA.paths.datasets}/ecommerce/events/events.delta"
7070
DA.paths.products = f"{DA.paths.datasets}/products/products.delta"
7171

72-
DA.conclude_setup() # Conclude setup by advertising environmental changes
72+
DA.conclude_setup()
7373

0 commit comments

Comments
 (0)