Skip to content

Commit b61242f

Browse files
committed
ch 10 changes
1 parent e3bd183 commit b61242f

File tree

5 files changed

+469
-0
lines changed

5 files changed

+469
-0
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
.idea
2+
target
3+
cookbook-app.iml
4+
cookbookapp.iml
5+
cookbook-app.iws
6+
cookbook-app.ipr
7+
cookbookapp.iml
8+
workspace.xml
9+
dependency-reduced-pom.xml
10+
model.zip
11+
LocalExecuteExample.csv
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project
3+
xmlns="http://maven.apache.org/POM/4.0.0"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
6+
<modelVersion>4.0.0</modelVersion>
7+
<groupId>com.javadeeplearningcookbook.app</groupId>
8+
<artifactId>cookbookapp</artifactId>
9+
<version>1.0-SNAPSHOT</version>
10+
<build>
11+
<plugins>
12+
<plugin>
13+
<groupId>org.apache.maven.plugins</groupId>
14+
<artifactId>maven-compiler-plugin</artifactId>
15+
<version>3.8.0</version>
16+
<configuration>
17+
<source>1.8</source>
18+
<target>1.8</target>
19+
</configuration>
20+
</plugin>
21+
<!--<plugin>
22+
<artifactId>maven-assembly-plugin</artifactId>
23+
<configuration>
24+
<archive>
25+
<manifest>
26+
<mainClass>com.javadeeplearningcookbook.examples.CustomerRetentionPredictionExample</mainClass>
27+
</manifest>
28+
</archive>
29+
<descriptorRefs>
30+
<descriptorRef>jar-with-dependencies</descriptorRef>
31+
</descriptorRefs>
32+
</configuration>
33+
</plugin>-->
34+
<!--<plugin>
35+
<artifactId>maven-jar-plugin</artifactId>
36+
<configuration>
37+
<archive>
38+
<manifest>
39+
<mainClass>
40+
com.javadeeplearningcookbook.examples.CustomerRetentionPredictionExample
41+
</mainClass>
42+
</manifest>
43+
</archive>
44+
</configuration>
45+
</plugin>-->
46+
<!-- <plugin>
47+
<groupId>org.apache.maven.plugins</groupId>
48+
<artifactId>maven-dependency-plugin</artifactId>
49+
<version>3.1.1</version>
50+
<executions>
51+
<execution>
52+
<id>Analize</id>
53+
<goals>
54+
<goal>analyze</goal>
55+
</goals>
56+
</execution>
57+
</executions>
58+
</plugin>-->
59+
<plugin>
60+
<groupId>org.apache.maven.plugins</groupId>
61+
<artifactId>maven-shade-plugin</artifactId>
62+
<version>3.2.0</version>
63+
<executions>
64+
<execution>
65+
<phase>package</phase>
66+
<goals>
67+
<goal>shade</goal>
68+
</goals>
69+
<configuration>
70+
<filters>
71+
<filter>
72+
<artifact>*:*</artifact>
73+
<excludes>
74+
<exclude>META-INF/*.SF</exclude>
75+
<exclude>META-INF/*.DSA</exclude>
76+
<exclude>META-INF/*.RSA</exclude>
77+
</excludes>
78+
</filter>
79+
</filters>
80+
<transformers>
81+
<transformer
82+
implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer" />
83+
<transformer
84+
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
85+
<mainClass>com.javacookbook.app.SparkExample</mainClass>
86+
</transformer>
87+
</transformers>
88+
</configuration>
89+
</execution>
90+
</executions>
91+
</plugin>
92+
</plugins>
93+
</build>
94+
<dependencies>
95+
<dependency>
96+
<groupId>junit</groupId>
97+
<artifactId>junit</artifactId>
98+
<version>4.11</version>
99+
<scope>test</scope>
100+
</dependency>
101+
<dependency>
102+
<groupId>org.apache.spark</groupId>
103+
<artifactId>spark-core_2.11</artifactId>
104+
<version>2.3.3</version>
105+
</dependency>
106+
<dependency>
107+
<groupId>org.datavec</groupId>
108+
<artifactId>datavec-spark_2.11</artifactId>
109+
<version>1.0.0-beta4_spark_2</version>
110+
</dependency>
111+
<dependency>
112+
<groupId>org.deeplearning4j</groupId>
113+
<artifactId>dl4j-spark_2.11</artifactId>
114+
<version>1.0.0-beta4_spark_2</version>
115+
</dependency>
116+
<dependency>
117+
<groupId>org.deeplearning4j</groupId>
118+
<artifactId>dl4j-spark-parameterserver_2.11</artifactId>
119+
<version>1.0.0-beta4_spark_2</version>
120+
</dependency>
121+
<dependency>
122+
<groupId>org.nd4j</groupId>
123+
<artifactId>nd4j-native-platform</artifactId>
124+
<version>1.0.0-beta4</version>
125+
</dependency>
126+
<dependency>
127+
<groupId>org.datavec</groupId>
128+
<artifactId>datavec-api</artifactId>
129+
<version>1.0.0-beta4</version>
130+
</dependency>
131+
<dependency>
132+
<groupId>org.deeplearning4j</groupId>
133+
<artifactId>deeplearning4j-zoo</artifactId>
134+
<version>1.0.0-beta4</version>
135+
</dependency>
136+
<dependency>
137+
<groupId>org.slf4j</groupId>
138+
<artifactId>slf4j-simple</artifactId>
139+
<version>1.8.0-beta4</version>
140+
</dependency>
141+
<dependency>
142+
<groupId>org.slf4j</groupId>
143+
<artifactId>slf4j-api</artifactId>
144+
<version>1.8.0-beta4</version>
145+
</dependency>
146+
<dependency>
147+
<groupId>com.beust</groupId>
148+
<artifactId>jcommander</artifactId>
149+
<version>1.72</version>
150+
</dependency>
151+
</dependencies>
152+
<!-- Uncomment to use snapshot version -->
153+
<!--<repositories>
154+
<repository>
155+
<id>snapshots-repo</id>
156+
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
157+
<releases>
158+
<enabled>false</enabled>
159+
</releases>
160+
<snapshots>
161+
<enabled>true</enabled>
162+
<updatePolicy>daily</updatePolicy> &lt;!&ndash; Optional, update daily &ndash;&gt;
163+
</snapshots>
164+
</repository>
165+
</repositories>-->
166+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package com.javacookbook.app;
2+
3+
import com.beust.jcommander.Parameter;
4+
import org.datavec.image.loader.NativeImageLoader;
5+
import org.deeplearning4j.common.resources.DL4JResources;
6+
import org.deeplearning4j.common.resources.ResourceType;
7+
import org.deeplearning4j.datasets.fetchers.TinyImageNetFetcher;
8+
import org.deeplearning4j.spark.util.SparkDataUtils;
9+
10+
import java.io.File;
11+
12+
/*
13+
Use this source code to pre-process the data and save the batch files to your local disk.
14+
You would need to manually transfer them to HDFS.
15+
*/
16+
public class PreProcessLocal {
17+
//Replace
18+
private String localSaveDir = "D:/Application/imagenet-preprocessed/";
19+
20+
@Parameter(names = {"--batchSize"}, description = "Batch size for saving the data", required = false)
21+
private int batchSize = 32;
22+
23+
public static void main(String[] args) throws Exception {
24+
new PreProcessLocal().entryPoint(args);
25+
}
26+
27+
protected void entryPoint(String[] args) throws Exception {
28+
29+
TinyImageNetFetcher f = new TinyImageNetFetcher();
30+
f.downloadAndExtract();
31+
32+
//Preprocess the training set
33+
File baseDirTrain = DL4JResources.getDirectory(ResourceType.DATASET, f.localCacheName() + "/train");
34+
File saveDirTrain = new File(localSaveDir, "train");
35+
if(!saveDirTrain.exists())
36+
saveDirTrain.mkdirs();
37+
SparkDataUtils.createFileBatchesLocal(baseDirTrain, NativeImageLoader.ALLOWED_FORMATS, true, saveDirTrain, batchSize);
38+
39+
//Preprocess the test set
40+
File baseDirTest = DL4JResources.getDirectory(ResourceType.DATASET, f.localCacheName() + "/test");
41+
File saveDirTest = new File(localSaveDir, "test");
42+
if(!saveDirTest.exists())
43+
saveDirTest.mkdirs();
44+
SparkDataUtils.createFileBatchesLocal(baseDirTest, NativeImageLoader.ALLOWED_FORMATS, true, saveDirTest, batchSize);
45+
46+
System.out.println("----- Data Preprocessing Complete -----");
47+
}
48+
49+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package com.javacookbook.app;
2+
3+
import com.beust.jcommander.JCommander;
4+
import com.beust.jcommander.Parameter;
5+
import org.apache.spark.SparkConf;
6+
import org.apache.spark.api.java.JavaRDD;
7+
import org.apache.spark.api.java.JavaSparkContext;
8+
import org.datavec.image.loader.NativeImageLoader;
9+
import org.deeplearning4j.spark.util.SparkDataUtils;
10+
import org.deeplearning4j.spark.util.SparkUtils;
11+
12+
/**
13+
* This file is for preparing the training data for the tiny imagenet CNN example.
14+
* Either this class OR PreprocessLocal (but not both) must be run before training can be run on a cluster via TrainSpark.
15+
*
16+
* PreprocessSpark requires that the tiny imagenet source image files (.jpg format) available on network storage that
17+
* Spark can access, such as HDFS, Azure blob storage, S3 etc.
18+
*
19+
* To get these image files, you have two options:
20+
*
21+
* Option 1: Direct download (We followed this approach in this source.)
22+
* Step 1: Download https://deeplearning4jblob.blob.core.windows.net/datasets/tinyimagenet_200_dl4j.v1.zip or http://cs231n.stanford.edu/tiny-imagenet-200.zip
23+
* Step 2: Extract files locally
24+
* Step 3: Copy contents (in their existing train/test subdirectories) to remote storage (for example, using Hadoop FS utils or similar)
25+
*
26+
* Option 2: Use TinyImageNetFetcher to download
27+
* Step 1: Run {@code new TinyImageNetFetcher().downloadAndExtract()} to download the files
28+
* Step 2: Copy the contents of the following directory to remote storage (for example, using Hadoop FS utils or similar)
29+
* Windows: C:\Users\<username>\.deeplearning4j\data\TINYIMAGENET_200
30+
* Linux: ~/.deeplearning4j/data/TINYIMAGENET_200
31+
*
32+
* After completing the steps of option 1 or option 2, then run this script to preprocess the data.
33+
*
34+
* @author Alex Black
35+
*/
36+
public class PreprocessSpark {
37+
38+
/*
39+
Sample data to be passed: --sourceDir="hdfs://localhost:9000/user/hadoop/tiny-imagenet-200/" ;
40+
*/
41+
@Parameter(names = {"--sourceDir"}, description = "Directory to get source image files", required = true)
42+
public String sourceDir=null;
43+
44+
/*
45+
Sample data to be passed: --saveDir="hdfs://localhost:9000/user/hadoop/batches/" ;
46+
*/
47+
@Parameter(names = {"--saveDir"}, description = "Directory to save the preprocessed data files on remote storage (for example, HDFS)", required = true)
48+
private String saveDir=null;
49+
50+
@Parameter(names = {"--batchSize"}, description = "Batch size for saving the data", required = false)
51+
private int batchSize = 32;
52+
53+
public static void main(String[] args) throws Exception {
54+
new PreprocessSpark().entryPoint(args);
55+
}
56+
57+
protected void entryPoint(String[] args) throws Exception {
58+
JCommander jcmdr = new JCommander(this);
59+
jcmdr.parse(args);
60+
//JCommanderUtils.parseArgs(this, args);
61+
SparkConf conf = new SparkConf();
62+
conf.setMaster("local[*]");
63+
conf.setAppName("DL4JTinyImageNetSparkPreproc");
64+
JavaSparkContext sc = new JavaSparkContext(conf);
65+
66+
//Create training set
67+
JavaRDD<String> filePathsTrain = SparkUtils.listPaths(sc, sourceDir + "/train", true, NativeImageLoader.ALLOWED_FORMATS);
68+
SparkDataUtils.createFileBatchesSpark(filePathsTrain, saveDir, batchSize, sc);
69+
70+
//Create test set
71+
JavaRDD<String> filePathsTest = SparkUtils.listPaths(sc, sourceDir + "/test", true, NativeImageLoader.ALLOWED_FORMATS);
72+
SparkDataUtils.createFileBatchesSpark(filePathsTest, saveDir, batchSize, sc);
73+
74+
75+
System.out.println("----- Data Preprocessing Complete -----");
76+
}
77+
78+
}

0 commit comments

Comments
 (0)