There was an error while loading. Please reload this page.
1 parent b56b6da commit 7b6ffeaCopy full SHA for 7b6ffea
gen-avro-parquet.sh
@@ -1,6 +1,6 @@
1
#this script transforms the HDFS stored CSV files to AVRO and Parquet formats using spark-shell and a small scala script
2
cd /tmp
3
-#gets the ETL script into /tmp, the saved file will be called etl.scala
+#gets the ETL script into /tmp, the saved file will be called avro-parquet.scala
4
wget -q https://raw.githubusercontent.com/academyofdata/clusterdock/master/avro-parquet.scala
5
#runs spark-shell providing the script as input
6
HADOOP_USER_NAME=spark spark-shell --packages com.databricks:spark-csv_2.10:1.5.0 com.databricks:spark-avro_2.10:3.2.0 -i avro-parquet.scala
0 commit comments