Skip to content

Commit 5a9e58f

Browse files
committed
sync to 0.24.0
1 parent 61bc069 commit 5a9e58f

File tree

527 files changed

+35063
-149
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

527 files changed

+35063
-149
lines changed

CHANGELOG.md

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,23 @@
1+
# 0.24.0
2+
- improve sync instance
3+
- support external table
4+
- copy task support tunnel endpoint router
5+
- fix `user.reload` bug, mk it work
6+
- add instance.getStatus(boolean )
7+
- optimize mr multi output
8+
19
# 0.23.2
210
- make `OdpsHooks` thread safe
311

412
# 0.23.1
513
- add `tables.loadTables` and `tables.reloadTables`
614

7-
# 0.22.3
8-
- add `listRolesForUserName` and `listRolesForUserID`
9-
- refine `OnlineModel`
15+
# 0.22.3
16+
- add `listRolesForUserName` and `listRolesForUserID`
17+
- refine `OnlineModel`
1018

11-
# 0.22.2
12-
- fix mr get summary hang
19+
# 0.22.2
20+
- fix mr get summary hang
1321

1422
# 0.22.1
1523
- revert instance retry, rm guid in job model
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?xml version="1.0"?>
2+
<project
3+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
4+
xmlns="http://maven.apache.org/POM/4.0.0"
5+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
6+
<modelVersion>4.0.0</modelVersion>
7+
<groupId>com.aliyun.odps</groupId>
8+
<artifactId>odps-mapreduce-examples</artifactId>
9+
<packaging>jar</packaging>
10+
<version>1.0</version>
11+
<name>Mapreduce examples</name>
12+
<url>http://maven.apache.org</url>
13+
14+
<dependencies>
15+
<dependency>
16+
<groupId>com.aliyun.odps</groupId>
17+
<artifactId>odps-sdk-mapred</artifactId>
18+
<version>0.21.3-public</version>
19+
</dependency>
20+
</dependencies>
21+
</project>
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
3+
* agreements. See the NOTICE file distributed with this work for additional information regarding
4+
* copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
5+
* "License"); you may not use this file except in compliance with the License. You may obtain a
6+
* copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software distributed under the License
11+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
* or implied. See the License for the specific language governing permissions and limitations under
13+
* the License.
14+
*/
15+
16+
package com.aliyun.odps.mapred.open.example;
17+
18+
import java.io.IOException;
19+
import java.util.Iterator;
20+
import java.util.regex.Matcher;
21+
import java.util.regex.Pattern;
22+
23+
import com.aliyun.odps.data.Record;
24+
import com.aliyun.odps.data.TableInfo;
25+
import com.aliyun.odps.mapred.JobClient;
26+
import com.aliyun.odps.mapred.Mapper;
27+
import com.aliyun.odps.mapred.MapperBase;
28+
import com.aliyun.odps.mapred.ReducerBase;
29+
import com.aliyun.odps.mapred.RunningJob;
30+
import com.aliyun.odps.mapred.TaskContext;
31+
import com.aliyun.odps.mapred.conf.JobConf;
32+
import com.aliyun.odps.mapred.utils.InputUtils;
33+
import com.aliyun.odps.mapred.utils.OutputUtils;
34+
import com.aliyun.odps.mapred.utils.SchemaUtils;
35+
36+
/**
37+
*
38+
* Extracts matching regexs from input files and counts them.
39+
*
40+
**/
41+
public class Grep {
42+
43+
/**
44+
* RegexMapper
45+
**/
46+
public class RegexMapper extends MapperBase {
47+
private Pattern pattern;
48+
private int group;
49+
50+
private Record word;
51+
private Record one;
52+
53+
@Override
54+
public void setup(TaskContext context) throws IOException {
55+
JobConf job = (JobConf) context.getJobConf();
56+
pattern = Pattern.compile(job.get("mapred.mapper.regex"));
57+
group = job.getInt("mapred.mapper.regex.group", 0);
58+
59+
word = context.createMapOutputKeyRecord();
60+
one = context.createMapOutputValueRecord();
61+
one.set(new Object[] {1L});
62+
}
63+
64+
@Override
65+
public void map(long recordNum, Record record, TaskContext context) throws IOException {
66+
for (int i = 0; i < record.getColumnCount(); ++i) {
67+
String text = record.get(i).toString();
68+
Matcher matcher = pattern.matcher(text);
69+
while (matcher.find()) {
70+
word.set(new Object[] {matcher.group(group)});
71+
context.write(word, one);
72+
}
73+
}
74+
}
75+
}
76+
77+
/**
78+
* LongSumReducer
79+
**/
80+
public class LongSumReducer extends ReducerBase {
81+
private Record result = null;
82+
83+
@Override
84+
public void setup(TaskContext context) throws IOException {
85+
result = context.createOutputRecord();
86+
}
87+
88+
@Override
89+
public void reduce(Record key, Iterator<Record> values, TaskContext context) throws IOException {
90+
long count = 0;
91+
while (values.hasNext()) {
92+
Record val = values.next();
93+
count += (Long) val.get(0);
94+
}
95+
result.set(0, key.get(0));
96+
result.set(1, count);
97+
context.write(result);
98+
}
99+
}
100+
101+
/**
102+
* A {@link Mapper} that swaps keys and values.
103+
**/
104+
public class InverseMapper extends MapperBase {
105+
private Record word;
106+
private Record count;
107+
108+
@Override
109+
public void setup(TaskContext context) throws IOException {
110+
word = context.createMapOutputValueRecord();
111+
count = context.createMapOutputKeyRecord();
112+
}
113+
114+
/**
115+
* The inverse function. Input keys and values are swapped.
116+
**/
117+
@Override
118+
public void map(long recordNum, Record record, TaskContext context) throws IOException {
119+
word.set(new Object[] {record.get(0).toString()});
120+
count.set(new Object[] {(Long) record.get(1)});
121+
context.write(count, word);
122+
}
123+
}
124+
125+
/**
126+
* IdentityReducer
127+
**/
128+
public class IdentityReducer extends ReducerBase {
129+
private Record result = null;
130+
131+
@Override
132+
public void setup(TaskContext context) throws IOException {
133+
result = context.createOutputRecord();
134+
}
135+
136+
/** Writes all keys and values directly to output. **/
137+
138+
@Override
139+
public void reduce(Record key, Iterator<Record> values, TaskContext context) throws IOException {
140+
result.set(0, key.get(0));
141+
142+
while (values.hasNext()) {
143+
Record val = values.next();
144+
result.set(1, val.get(0));
145+
context.write(result);
146+
}
147+
}
148+
}
149+
150+
public static void main(String[] args) throws Exception {
151+
if (args.length < 4) {
152+
System.err.println("Grep <inDir> <tmpDir> <outDir> <regex> [<group>]");
153+
System.exit(2);
154+
}
155+
156+
JobConf grepJob = new JobConf();
157+
158+
grepJob.setMapperClass(RegexMapper.class);
159+
grepJob.setReducerClass(LongSumReducer.class);
160+
161+
grepJob.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
162+
grepJob.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
163+
164+
InputUtils.addTable(TableInfo.builder().tableName(args[0]).build(), grepJob);
165+
OutputUtils.addTable(TableInfo.builder().tableName(args[1]).build(), grepJob);
166+
167+
grepJob.set("mapred.mapper.regex", args[3]);
168+
if (args.length == 5) {
169+
grepJob.set("mapred.mapper.regex.group", args[4]);
170+
}
171+
172+
@SuppressWarnings("unused")
173+
RunningJob rjGrep = JobClient.runJob(grepJob);
174+
175+
JobConf sortJob = new JobConf();
176+
177+
sortJob.setMapperClass(InverseMapper.class);
178+
sortJob.setReducerClass(IdentityReducer.class);
179+
180+
sortJob.setMapOutputKeySchema(SchemaUtils.fromString("count:bigint"));
181+
sortJob.setMapOutputValueSchema(SchemaUtils.fromString("word:string"));
182+
183+
InputUtils.addTable(TableInfo.builder().tableName(args[1]).build(), sortJob);
184+
OutputUtils.addTable(TableInfo.builder().tableName(args[2]).build(), sortJob);
185+
186+
sortJob.setNumReduceTasks(1); // write a single file
187+
sortJob.setOutputKeySortColumns(new String[] {"count"});
188+
189+
@SuppressWarnings("unused")
190+
RunningJob rjSort = JobClient.runJob(sortJob);
191+
}
192+
193+
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
3+
* agreements. See the NOTICE file distributed with this work for additional information regarding
4+
* copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
5+
* "License"); you may not use this file except in compliance with the License. You may obtain a
6+
* copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software distributed under the License
11+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12+
* or implied. See the License for the specific language governing permissions and limitations under
13+
* the License.
14+
*/
15+
16+
package com.aliyun.odps.mapred.open.example;
17+
18+
import java.io.IOException;
19+
import java.util.ArrayList;
20+
import java.util.Iterator;
21+
import java.util.List;
22+
23+
import org.apache.commons.logging.Log;
24+
import org.apache.commons.logging.LogFactory;
25+
26+
import com.aliyun.odps.data.Record;
27+
import com.aliyun.odps.data.TableInfo;
28+
import com.aliyun.odps.mapred.JobClient;
29+
import com.aliyun.odps.mapred.MapperBase;
30+
import com.aliyun.odps.mapred.ReducerBase;
31+
import com.aliyun.odps.mapred.conf.JobConf;
32+
import com.aliyun.odps.mapred.utils.InputUtils;
33+
import com.aliyun.odps.mapred.utils.OutputUtils;
34+
import com.aliyun.odps.mapred.utils.SchemaUtils;
35+
36+
/**
37+
* Join, mr_join_src1/mr_join_src2(key bigint, value string), mr_join_out(key bigint, value1 string,
38+
* value2 string)
39+
*
40+
*/
41+
public class Join {
42+
43+
public static final Log LOG = LogFactory.getLog(Join.class);
44+
45+
public static class JoinMapper extends MapperBase {
46+
47+
private Record mapkey;
48+
private Record mapvalue;
49+
private long tag;
50+
51+
@Override
52+
public void setup(TaskContext context) throws IOException {
53+
mapkey = context.createMapOutputKeyRecord();
54+
mapvalue = context.createMapOutputValueRecord();
55+
tag = context.getInputTableInfo().getLabel().equals("left") ? 0 : 1;
56+
}
57+
58+
@Override
59+
public void map(long key, Record record, TaskContext context) throws IOException {
60+
mapkey.set(0, record.get(0));
61+
mapkey.set(1, tag);
62+
63+
for (int i = 1; i < record.getColumnCount(); i++) {
64+
mapvalue.set(i - 1, record.get(i));
65+
}
66+
context.write(mapkey, mapvalue);
67+
}
68+
69+
}
70+
71+
public static class JoinReducer extends ReducerBase {
72+
73+
private Record result = null;
74+
75+
@Override
76+
public void setup(TaskContext context) throws IOException {
77+
result = context.createOutputRecord();
78+
}
79+
80+
@Override
81+
public void reduce(Record key, Iterator<Record> values, TaskContext context) throws IOException {
82+
long k = key.getBigint(0);
83+
List<Object[]> leftValues = new ArrayList<Object[]>();
84+
85+
while (values.hasNext()) {
86+
Record value = values.next();
87+
long tag = (Long) key.get(1);
88+
89+
if (tag == 0) {
90+
leftValues.add(value.toArray().clone());
91+
} else {
92+
for (Object[] leftValue : leftValues) {
93+
int index = 0;
94+
result.set(index++, k);
95+
for (int i = 0; i < leftValue.length; i++) {
96+
result.set(index++, leftValue[i]);
97+
}
98+
for (int i = 0; i < value.getColumnCount(); i++) {
99+
result.set(index++, value.get(i));
100+
}
101+
context.write(result);
102+
}
103+
}
104+
}
105+
106+
}
107+
108+
}
109+
110+
public static void main(String[] args) throws Exception {
111+
if (args.length != 3) {
112+
System.err.println("Usage: Join <input table1> <input table2> <out>");
113+
System.exit(2);
114+
}
115+
JobConf job = new JobConf();
116+
117+
job.setMapperClass(JoinMapper.class);
118+
job.setReducerClass(JoinReducer.class);
119+
120+
job.setMapOutputKeySchema(SchemaUtils.fromString("key:bigint,tag:bigint"));
121+
job.setMapOutputValueSchema(SchemaUtils.fromString("value:string"));
122+
123+
job.setPartitionColumns(new String[] {"key"});
124+
job.setOutputKeySortColumns(new String[] {"key", "tag"});
125+
job.setOutputGroupingColumns(new String[] {"key"});
126+
job.setNumReduceTasks(1);
127+
128+
InputUtils.addTable(TableInfo.builder().tableName(args[0]).label("left").build(), job);
129+
InputUtils.addTable(TableInfo.builder().tableName(args[1]).label("right").build(), job);
130+
OutputUtils.addTable(TableInfo.builder().tableName(args[2]).build(), job);
131+
132+
JobClient.runJob(job);
133+
}
134+
135+
}

0 commit comments

Comments
 (0)