achmand
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎1_word_count/README.md‎
Lines changed: 1 addition & 1 deletion b/‎1_word_count/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎1_word_count/src/main/java/tutorial/WordCount.java‎
Lines changed: 3 additions & 2 deletions b/‎1_word_count/src/main/java/tutorial/WordCount.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎graph_api/ouput‎ b/‎graph_api/ouput‎
diff --git a/‎graph_api/pom.xml‎
Lines changed: 51 additions & 0 deletions b/‎graph_api/pom.xml‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎graph_api/src/main/java/tutorial/Author.java‎
Lines changed: 23 additions & 0 deletions b/‎graph_api/src/main/java/tutorial/Author.java‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎graph_api/src/main/java/tutorial/DegreeSeparation.java‎
Lines changed: 126 additions & 0 deletions b/‎graph_api/src/main/java/tutorial/DegreeSeparation.java‎
Lines changed: 126 additions & 0 deletions
diff --git a/‎graph_api/src/main/java/tutorial/Publication.java‎
Lines changed: 35 additions & 0 deletions b/‎graph_api/src/main/java/tutorial/Publication.java‎
Lines changed: 35 additions & 0 deletions
@@ -3,7 +3,7 @@
 
 .idea
 target
-*/META-INF/*
+META-INF
 
 # User-specific stuff
 .idea/**/workspace.xml
 
@@ -148,7 +148,7 @@ Now open a terminal and execute the following commands to run the JAR file on Ap
 
 *NOTE: Make sure the paths match your locations on your machine.*
 
-Once the cluster is running on the machine execute the JAR file by executing the following (replace with your paths) `FLINKPATH RUN -c MAINCLASS JARPATH --input INPUTPATH --output OUTPUTPATH`. 
+Once the cluster is running on the machine execute the JAR file by executing the following (replace with your paths) `FLINKPATH run -c MAINCLASS JARPATH --input INPUTPATH --output OUTPUTPATH`. 
 
 On our machine this command was as follows:
 `flink/build-target/bin/flink run -c tutorial.WordCount flink-java-tutorials/1_word_count/out/artifacts/1_word_count_jar/1_word_count.jar --input flink-java-tutorials/1_word_count/pride_and_prejudice.txt --output flink-java-tutorials/1_word_count/ouput`.
 
@@ -1,6 +1,7 @@
 package tutorial;
 
 // importing packages
+
 import org.apache.flink.api.common.functions.FlatMapFunction;
 import org.apache.flink.api.java.DataSet;
 import org.apache.flink.api.java.ExecutionEnvironment;
@@ -13,7 +14,7 @@
 */
 public class WordCount {
 
- public static void main(String[] args) throws Exception{
+ public static void main(String[] args) throws Exception {
 
  // returns the execution environment (the context 'Local or Remote' in which a program is executed)
  // LocalEnvironment will cause execution in the current JVM
@@ -42,7 +43,7 @@ public static void main(String[] args) throws Exception{
 
  // output the final result
  // check that the argument 'output' was passed to save in that path
- if(parameters.has("output")){
+ if (parameters.has("output")) {
  // write result as CSV row delimiter is a line break, field delimiter is a space
  result.writeAsCsv(parameters.get("output"), "\n", " ");
 
 
@@ -68,10 +68,10 @@ sudo tar -xvzf ~/Downloads/apache-maven-3.2.5-bin.tar.gz
 sudo nano /etc/environment 
 
  # add the following environment variable
-M2_HOME="/opt/apache-maven-3.6.0"
+M2_HOME="/opt/apache-maven-3.2.5"
 
 # append the bin directory to the PATH variable
-/opt/apache-maven-3.6.0/bin
+/opt/apache-maven-3.2.5/bin
 
 # so the result should be something similar to the below
 PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/opt/apache-maven-3.2.5/bin"
 
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <groupId>gg</groupId>
+ <artifactId>gg</artifactId>
+ <version>1.0-SNAPSHOT</version>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>6</source>
+ <target>6</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>com.google.code.gson</groupId>
+ <artifactId>gson</artifactId>
+ <version>2.8.5</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flink</groupId>
+ <artifactId>flink-java</artifactId>
+ <version>1.8.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flink</groupId>
+ <artifactId>flink-streaming-java_2.12</artifactId>
+ <version>1.8.0</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flink</groupId>
+ <artifactId>flink-clients_2.12</artifactId>
+ <version>1.8.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flink</groupId>
+ <artifactId>flink-gelly_2.12</artifactId>
+ <version>1.8.0</version>
+ </dependency>
+ </dependencies>
+</project>
@@ -0,0 +1,23 @@
+package tutorial;
+
+public class Author {
+
+ public String name;
+ public String org;
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getOrg() {
+ return org;
+ }
+
+ public void setOrg(String org) {
+ this.org = org;
+ }
+}
@@ -0,0 +1,126 @@
+package tutorial;
+
+// importing packages
+
+import com.google.gson.Gson;
+import org.apache.flink.api.common.functions.FilterFunction;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.utils.ParameterTool;
+import org.apache.flink.core.fs.FileSystem;
+import org.apache.flink.graph.Edge;
+import org.apache.flink.graph.Graph;
+import org.apache.flink.graph.Vertex;
+import org.apache.flink.graph.library.SingleSourceShortestPaths;
+import org.apache.flink.types.NullValue;
+import org.apache.flink.util.Collector;
+import scala.Tuple2;
+
+import java.util.ArrayList;
+
+// bipartite graph -> use projection -> for recommendations
+
+/*
+Implementing Degree of Separation using Flink's Gelly Graph API
+*/
+public class DegreeSeparation {
+ final static Gson gson = new Gson();
+
+ public static void main(String[] args) throws Exception {
+
+ // returns the execution environment (the context 'Local or Remote' in which a program is executed)
+ // LocalEnvironment will cause execution in the current JVM
+ // RemoteEnvironment will cause execution on a remote setup
+ final ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
+
+ // provides utility methods for reading and parsing the program arguments
+ // in this tutorial we will have to provide the input file and the output file as arguments
+ final ParameterTool parameters = ParameterTool.fromArgs(args);
+
+ // register parameters globally so it can be available for each node in the cluster
+ environment.getConfig().setGlobalJobParameters(parameters);
+
+ // read text file from the parameter 'input' passed in args
+ // line-by-line and returns them as Strings
+ DataSet<String> textLines = environment.readTextFile(parameters.get("input"));
+
+ // Author -> Collaborating Author
+ DataSet<Tuple2<String, String>> authors = textLines.flatMap(new Tokenizer());
+
+ // convert the dataset to edges in a graph
+ DataSet<Edge<String, NullValue>> edges = authors.map(new MapFunction<Tuple2<String, String>, Edge<String, NullValue>>() {
+ @Override
+ public Edge<String, NullValue> map(Tuple2<String, String> value) {
+ Edge<String, NullValue> edge = new Edge();
+ edge.setSource(value._1()); // author
+ edge.setTarget(value._2()); // collaboration
+ return edge;
+ }
+ });
+
+ // creates graph from the edges generated
+ Graph<String, NullValue, NullValue> collaborationGraph = Graph.fromDataSet(edges, environment);
+
+ // we need to add weights since we will apply SingleSourceShortestPaths
+ Graph<String, NullValue, Double> wCollaborationGraph = collaborationGraph.mapEdges(new MapFunction<Edge<String, NullValue>, Double>() {
+ @Override
+ public Double map(Edge<String, NullValue> stringNullValueEdge) {
+ return 1.0;
+ }
+ });
+
+ // use the SingleSourceShortestPaths to get all the collaboration authors for the collaboration authors
+ // for a specified authors (similar to friends of friends)
+ SingleSourceShortestPaths<String, NullValue> singleSourceShortestPaths = new SingleSourceShortestPaths<String, NullValue>(parameters.get("author"), 1000);
+ DataSet<Vertex<String, Double>> result = singleSourceShortestPaths.run(wCollaborationGraph);
+
+ System.out.println(result.count());
+
+ // the collaboration authors for the collaboration authors for a specified authors (similar to friends of friends)
+ DataSet<Vertex<String, Double>> resultAuthor = result.filter(new FilterFunction<Vertex<String, Double>>() {
+ @Override
+ public boolean filter(Vertex<String, Double> value) {
+ if (value.f1 == 2.0) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ });
+
+ // output the final result
+ // check that the argument 'output' was passed to save in that path
+ if (parameters.has("output")) {
+ resultAuthor.writeAsText(parameters.get("output"), FileSystem.WriteMode.OVERWRITE);
+ environment.execute("Graph API Tutorial");
+ }
+ }
+
+ public static class Tokenizer implements FlatMapFunction<String, Tuple2<String, String>> {
+
+ @Override
+ public void flatMap(String value, Collector<Tuple2<String, String>> out) {
+ Publication publication = gson.fromJson(value, Publication.class);
+ ArrayList<Author> authors = publication.getAuthors();
+
+ // no collaboration (one author)
+ if (authors.size() <= 1) {
+ return;
+ }
+
+ for (int i = 0; i < authors.size() - 1; i++) {
+ String currentAuthor = authors.get(i).name;
+ for (int j = i + 1; j < authors.size(); j++) {
+ String collaboration = authors.get(j).name;
+
+ // must output two tuples since we need to create
+ // two edges for an undirected edge
+ out.collect(new Tuple2<String, String>(currentAuthor, collaboration));
+ out.collect(new Tuple2<String, String>(collaboration, currentAuthor));
+ }
+ }
+ }
+ }
+}
@@ -0,0 +1,35 @@
+package tutorial;
+
+import java.util.ArrayList;
+
+public class Publication {
+
+ private String id;
+ private String title;
+ private ArrayList<Author> authors;
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public ArrayList<Author> getAuthors() {
+ return authors;
+ }
+
+ public void setAuthors(ArrayList<Author> authors) {
+ this.authors = authors;
+ }
+
+}