File tree Expand file tree Collapse file tree 7 files changed +27
-23
lines changed
Apache-Spark-Programming-with-Databricks Expand file tree Collapse file tree 7 files changed +27
-23
lines changed Original file line number Diff line number Diff line change 5
5
# MAGIC <img src="https://databricks.com/wp-content/uploads/2018/03/db-academy-rgb-1200px.png" alt="Databricks Learning" style="width: 600px">
6
6
# MAGIC </div>
7
7
8
+ # COMMAND ----------
9
+
10
+
11
+
8
12
# COMMAND ----------
9
13
10
14
# MAGIC %md # Reader & Writer
32
36
# MAGIC %md ## DataFrameReader
33
37
# MAGIC Interface used to load a DataFrame from external storage systems
34
38
# MAGIC
35
- # MAGIC ```
36
- # MAGIC spark.read.parquet("path/to/files")
37
- # MAGIC ```
39
+ # MAGIC **`spark.read.parquet("path/to/files")`**
38
40
# MAGIC
39
41
# MAGIC DataFrameReader is accessible through the SparkSession attribute **`read`**. This class includes methods to load DataFrames from different external storage systems.
40
42
Original file line number Diff line number Diff line change 33
33
34
34
# COMMAND ----------
35
35
36
- events_df = spark .read .parquet ( datasets_dir + " /events/events.parquet/ " )
36
+ events_df = spark .read .format ( " parquet" ). load ( f" { datasets_dir } /events/events.parquet" )
37
37
display (events_df )
38
38
39
39
# COMMAND ----------
43
43
44
44
# COMMAND ----------
45
45
46
- delta_path = working_dir + " /delta-events"
46
+ delta_path = f" { working_dir } /delta-events"
47
47
events_df .write .format ("delta" ).mode ("overwrite" ).save (delta_path )
48
48
49
49
# COMMAND ----------
97
97
98
98
# COMMAND ----------
99
99
100
- display (dbutils .fs .ls (delta_path + " /_delta_log/" ))
100
+ display (dbutils .fs .ls (f" { delta_path } /_delta_log/" ))
101
101
102
102
# COMMAND ----------
103
103
112
112
113
113
# COMMAND ----------
114
114
115
- display (spark .read .json (delta_path + " /_delta_log/00000000000000000000.json" ))
115
+ display (spark .read .json (f" { delta_path } /_delta_log/00000000000000000000.json" ))
116
116
117
117
# COMMAND ----------
118
118
122
122
123
123
# COMMAND ----------
124
124
125
- display (spark .read .json (delta_path + " /_delta_log/00000000000000000001.json" ))
125
+ display (spark .read .json (f" { delta_path } /_delta_log/00000000000000000001.json" ))
126
126
127
127
# COMMAND ----------
128
128
129
129
# MAGIC %md Finally, let's take a look at the files inside one of the state partitions. The files inside corresponds to the partition commit (file 01) in the _delta_log directory.
130
130
131
131
# COMMAND ----------
132
132
133
- display (dbutils .fs .ls (delta_path + " /state=CA/" ))
133
+ display (dbutils .fs .ls (f" { delta_path } /state=CA/" ))
134
134
135
135
# COMMAND ----------
136
136
167
167
168
168
# COMMAND ----------
169
169
170
- display (dbutils .fs .ls (delta_path + " /state=CA/" ))
170
+ display (dbutils .fs .ls (f" { delta_path } /state=CA/" ))
171
171
172
172
# COMMAND ----------
173
173
Original file line number Diff line number Diff line change 21
21
22
22
# COMMAND ----------
23
23
24
- sales_df = spark .read .parquet (datasets_dir + " / sales / sales .parquet / )
24
+ sales_df = spark .read .parquet (datasets_dir + "/sales/sales.parquet" )
25
25
delta_sales_path = working_dir + "/delta-sales"
26
26
27
27
# COMMAND ----------
Original file line number Diff line number Diff line change 5
5
# MAGIC <img src="https://databricks.com/wp-content/uploads/2018/03/db-academy-rgb-1200px.png" alt="Databricks Learning" style="width: 600px">
6
6
# MAGIC </div>
7
7
8
+ # COMMAND ----------
9
+
10
+
11
+
8
12
# COMMAND ----------
9
13
10
14
# MAGIC %md # Reader & Writer
32
36
# MAGIC %md ## DataFrameReader
33
37
# MAGIC Interface used to load a DataFrame from external storage systems
34
38
# MAGIC
35
- # MAGIC ```
36
- # MAGIC spark.read.parquet("path/to/files")
37
- # MAGIC ```
39
+ # MAGIC **`spark.read.parquet("path/to/files")`**
38
40
# MAGIC
39
41
# MAGIC DataFrameReader is accessible through the SparkSession attribute **`read`**. This class includes methods to load DataFrames from different external storage systems.
40
42
Original file line number Diff line number Diff line change 33
33
34
34
# COMMAND ----------
35
35
36
- events_df = spark .read .parquet ( datasets_dir + " /events/events.parquet/ " )
36
+ events_df = spark .read .format ( " parquet" ). load ( f" { datasets_dir } /events/events.parquet" )
37
37
display (events_df )
38
38
39
39
# COMMAND ----------
43
43
44
44
# COMMAND ----------
45
45
46
- delta_path = working_dir + " /delta-events"
46
+ delta_path = f" { working_dir } /delta-events"
47
47
events_df .write .format ("delta" ).mode ("overwrite" ).save (delta_path )
48
48
49
49
# COMMAND ----------
97
97
98
98
# COMMAND ----------
99
99
100
- display (dbutils .fs .ls (delta_path + " /_delta_log/" ))
100
+ display (dbutils .fs .ls (f" { delta_path } /_delta_log/" ))
101
101
102
102
# COMMAND ----------
103
103
112
112
113
113
# COMMAND ----------
114
114
115
- display (spark .read .json (delta_path + " /_delta_log/00000000000000000000.json" ))
115
+ display (spark .read .json (f" { delta_path } /_delta_log/00000000000000000000.json" ))
116
116
117
117
# COMMAND ----------
118
118
122
122
123
123
# COMMAND ----------
124
124
125
- display (spark .read .json (delta_path + " /_delta_log/00000000000000000001.json" ))
125
+ display (spark .read .json (f" { delta_path } /_delta_log/00000000000000000001.json" ))
126
126
127
127
# COMMAND ----------
128
128
129
129
# MAGIC %md Finally, let's take a look at the files inside one of the state partitions. The files inside corresponds to the partition commit (file 01) in the _delta_log directory.
130
130
131
131
# COMMAND ----------
132
132
133
- display (dbutils .fs .ls (delta_path + " /state=CA/" ))
133
+ display (dbutils .fs .ls (f" { delta_path } /state=CA/" ))
134
134
135
135
# COMMAND ----------
136
136
167
167
168
168
# COMMAND ----------
169
169
170
- display (dbutils .fs .ls (delta_path + " /state=CA/" ))
170
+ display (dbutils .fs .ls (f" { delta_path } /state=CA/" ))
171
171
172
172
# COMMAND ----------
173
173
Original file line number Diff line number Diff line change 21
21
22
22
# COMMAND ----------
23
23
24
- sales_df = spark .read .parquet (datasets_dir + " / sales / sales .parquet / )
24
+ sales_df = spark .read .parquet (datasets_dir + "/sales/sales.parquet" )
25
25
delta_sales_path = working_dir + "/delta-sales"
26
26
27
27
# COMMAND ----------
Original file line number Diff line number Diff line change 11
11
# MAGIC
12
12
# MAGIC * Name: **Apache Spark Programming with Databricks**
13
13
# MAGIC * Version: **2.0.1**
14
- # MAGIC * Built On: **Jan 28, 2022 at 20:38:07 UTC**
14
+ # MAGIC * Built On: **Jan 28, 2022 at 22:18:24 UTC**
15
15
16
16
# COMMAND ----------
17
17
You can’t perform that action at this time.
0 commit comments