Skip to content

Commit 5024db6

Browse files
authored
Merge pull request #154 from Yancey1989/add_secret
Support registry resource
2 parents 6178f9d + efc94db commit 5024db6

File tree

11 files changed

+386
-13
lines changed

11 files changed

+386
-13
lines changed

doc/usage_cn.md

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,21 @@ Usage: paddlecloud <flags> <subcommand> <subcommand args>
3333

3434
Subcommands:
3535
commands list all command names
36+
delete Delete the specify resource.
37+
file Simple file operations.
3638
get Print resources
3739
help describe subcommands and their syntax
3840
kill Stop the job. -rm will remove the job from history.
3941
logs Print logs of the job.
42+
registry Add registry secret on paddlecloud.
4043
submit Submit job to PaddlePaddle Cloud.
4144

45+
Subcommands for PFS:
46+
cp uoload or download files
47+
ls List files on PaddlePaddle Cloud
48+
mkdir mkdir directoies on PaddlePaddle Cloud
49+
rm rm files on PaddlePaddle Cloud
50+
4251

4352
Use "paddlecloud flags" for a list of top-level flags
4453
```
@@ -123,13 +132,30 @@ scp -r my_training_package/ user@tunnel-server:/mnt/hdfs_mulan/idl/idl-dl/mypack
123132
- 提交基于V1 API的训练任务
124133

125134
```bash
126-
paddlecloud submit -jobname my-paddlecloud-job -cpu 1 -gpu 0 -memory 1Gi -parallelism 10 -pscpu 1 -pservers 3 -psmemory 1Gi -passes 1 -topology trainer_config.py /pfs/[datacenter_name]/home/[username]/ctr_demo_package
135+
paddlecloud submit -jobname my-paddlecloud-job \
136+
-cpu 1 \
137+
-gpu 0 \
138+
-memory 1Gi \
139+
-parallelism 10 \
140+
-pscpu 1 \
141+
-pservers 3 \
142+
-psmemory 1Gi \
143+
-passes 1 \
144+
-topology trainer_config.py /pfs/[datacenter_name]/home/[username]/ctr_demo_package
127145
```
128146

129147
- 提交基于V2 API的训练任务
130148

131149
```bash
132-
paddlecloud submit -jobname my-paddlecloud-job -cpu 1 -gpu 0 -memory 1Gi -parallelism 10 -pscpu 1 -pservers 3 -psmemory 1Gi -passes 1 -entry "python trainer_config.py" /pfs/[datacenter_name]/home/[username]/ctr_demo_package
150+
paddlecloud submit -jobname my-paddlecloud-job \
151+
-cpu 1 \
152+
-gpu 0 \
153+
-memory 1Gi \
154+
-parallelism 10 \
155+
-pscpu 1 \
156+
-pservers 3 \
157+
-psmemory 1Gi \
158+
-entry "python trainer_config.py" /pfs/[datacenter_name]/home/[username]/ctr_demo_package
133159
```
134160

135161
参数说明:
@@ -147,6 +173,43 @@ paddlecloud submit -jobname my-paddlecloud-job -cpu 1 -gpu 0 -memory 1Gi -parall
147173
- `-passes`:执行训练的pass个数
148174
- `package`:HDFS 训练任务package的路径
149175

176+
### 使用自定义的Runtime Docker Image
177+
runtime Docker Image是实际被Kubernetes调度的Docker Image,如果在某些情况下需要自定义属于某个任务的Docker Image可以通过以下方式
178+
- 自定义Runtime Docker Image
179+
```bash
180+
git clone https://github.com/PaddlePaddle/cloud.git && cd cloud/docker
181+
./build_docker.sh {PaddlePaddle production image} {runtime Docker image}
182+
docker push {runtime Docker image}
183+
```
184+
- 使用自定义的runtime Docker Image来运行Job
185+
```bash
186+
paddlecloud submit -image {runtime Docker image} -jobname ...
187+
```
188+
189+
- 使用私有registry的runtime Docker image
190+
- 在PaddleCloud上添加registry认证信息
191+
```bash
192+
paddlecloud registry \
193+
-username {your username}
194+
-password {your password}
195+
-server {your registry server}
196+
-name {your registry name}
197+
```
198+
- 使用私有registry提交任务
199+
```bash
200+
paddlecloud submit \
201+
-image {runtime Docker image} \
202+
-registry {your registry name}
203+
```
204+
- 查看所有的registry
205+
```bash
206+
paddlecloud get registry
207+
```
208+
- 删除指定的registry
209+
```bash
210+
paddlecloud delete registry
211+
```
212+
150213
## 查看任务状态
151214

152215
用户可以查看任务、任务节点、用户空间配额的当前状态。

docker/build_docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ docker run --rm -it -v $PWD:/cloud $base_image \
2525
#Build Docker Image
2626
cat > Dockerfile <<EOF
2727
FROM ${base_image}
28-
RUN pip install -U kubernetes && apt-get install -y iputils-ping
28+
RUN pip install -U kubernetes && apt-get update -y && apt-get install -y iputils-ping
2929
ADD ./paddle_k8s /usr/bin
3030
ADD ./k8s_tools.py /root/
3131
ADD ./python/dist/pcloud-0.1.1-py2-none-any.whl /tmp/

go/cmd/paddlecloud/paddlecloud.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@ func main() {
1818
subcommands.Register(&paddlecloud.GetCommand{}, "")
1919
subcommands.Register(&paddlecloud.KillCommand{}, "")
2020
subcommands.Register(&paddlecloud.SimpleFileCmd{}, "")
21-
subcommands.Register(&pfsmod.LsCmd{}, "")
22-
subcommands.Register(&pfsmod.CpCmd{}, "")
23-
subcommands.Register(&pfsmod.RmCmd{}, "")
24-
subcommands.Register(&pfsmod.MkdirCmd{}, "")
21+
subcommands.Register(&paddlecloud.RegistryCmd{}, "")
22+
subcommands.Register(&paddlecloud.DeleteCommand{}, "")
23+
subcommands.Register(&pfsmod.LsCmd{}, "PFS")
24+
subcommands.Register(&pfsmod.CpCmd{}, "PFS")
25+
subcommands.Register(&pfsmod.RmCmd{}, "PFS")
26+
subcommands.Register(&pfsmod.MkdirCmd{}, "PFS")
2527

2628
flag.Parse()
2729
ctx := context.Background()

go/paddlecloud/delete.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package paddlecloud
2+
3+
import (
4+
"context"
5+
"flag"
6+
"fmt"
7+
"os"
8+
9+
"github.com/google/subcommands"
10+
)
11+
12+
// DeleteCommand do job killings
13+
type DeleteCommand struct {
14+
}
15+
16+
// Name is subcommands name
17+
func (*DeleteCommand) Name() string { return "delete" }
18+
19+
// Synopsis is subcommands synopsis
20+
func (*DeleteCommand) Synopsis() string { return "Delete the specify resource." }
21+
22+
// Usage is subcommands usage
23+
func (*DeleteCommand) Usage() string {
24+
return `delete registry [registry-name]
25+
`
26+
}
27+
28+
// SetFlags registers subcommands flags
29+
func (p *DeleteCommand) SetFlags(f *flag.FlagSet) {
30+
}
31+
32+
// Execute kill command
33+
func (p *DeleteCommand) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
34+
if f.NArg() != 2 {
35+
f.Usage()
36+
return subcommands.ExitFailure
37+
}
38+
if f.Arg(0) == RegistryCmdName {
39+
name := f.Arg(1)
40+
r := RegistryCmd{SecretName: KubeRegistryName(name)}
41+
err := r.Delete()
42+
if err != nil {
43+
fmt.Fprintf(os.Stderr, "error delete registry: %v\n", err)
44+
return subcommands.ExitFailure
45+
}
46+
fmt.Fprintf(os.Stdout, "registry: [%s] is deleted\n", name)
47+
} else {
48+
f.Usage()
49+
return subcommands.ExitFailure
50+
}
51+
return subcommands.ExitSuccess
52+
}

go/paddlecloud/get.go

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ func (*GetCommand) Synopsis() string { return "Print resources" }
2727

2828
// Usage is subcommands usage
2929
func (*GetCommand) Usage() string {
30-
return `get [jobs|workers [jobname]|quota]:
30+
return `get [jobs|workers|registry [jobname]|quota]:
3131
Print resources.
3232
`
3333
}
@@ -47,6 +47,8 @@ func (p *GetCommand) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{
4747
jobs()
4848
} else if f.Arg(0) == "quota" {
4949
quota()
50+
} else if f.Arg(0) == "registry" {
51+
registry()
5052
} else if f.Arg(0) == "workers" {
5153
if f.NArg() != 2 {
5254
f.Usage()
@@ -91,7 +93,35 @@ func workers(jobname string) error {
9193
w.Flush()
9294
return nil
9395
}
94-
96+
func registry() error {
97+
respBody, err := utils.GetCall(utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/", nil)
98+
if err != nil {
99+
fmt.Fprintf(os.Stderr, "err getting registry secret: %v\n", err)
100+
return err
101+
}
102+
var respObj interface{}
103+
err = json.Unmarshal(respBody, &respObj)
104+
if err != nil {
105+
return err
106+
}
107+
items := respObj.(map[string]interface{})["msg"].(map[string]interface{})["items"].([]interface{})
108+
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
109+
if len(items) >= 0 {
110+
fmt.Fprintf(w, "ID\tNAME\tDATA\n")
111+
}
112+
idx := 0
113+
for _, r := range items {
114+
metadata := r.(map[string]interface{})["metadata"].(map[string]interface{})
115+
name := RegistryName(metadata["name"].(string))
116+
if len(name) != 0 {
117+
cTime := metadata["creation_timestamp"].(string)
118+
fmt.Fprintf(w, "%d\t%s\t%s\n", idx, name, cTime)
119+
idx++
120+
}
121+
}
122+
w.Flush()
123+
return err
124+
}
95125
func jobs() error {
96126
respBody, err := utils.GetCall(utils.Config.ActiveConfig.Endpoint+"/api/v1/jobs/", nil)
97127
if err != nil {

go/paddlecloud/registry.go

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package paddlecloud
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"errors"
7+
"flag"
8+
"fmt"
9+
"os"
10+
"strings"
11+
12+
"github.com/PaddlePaddle/cloud/go/utils"
13+
"github.com/golang/glog"
14+
"github.com/google/subcommands"
15+
)
16+
17+
const (
18+
// RegistryCmdName is subcommand name
19+
RegistryCmdName = "registry"
20+
// RegistryPrefix is the prefix for Kubernetes secret name
21+
RegistryPrefix = "pcloud-registry"
22+
)
23+
24+
// RegistryCmd is Docker registry secret information
25+
type RegistryCmd struct {
26+
SecretName string `json:"name"`
27+
Username string `json:"username"`
28+
Password string `json:"password"`
29+
Server string `json:"server"`
30+
}
31+
32+
// Name is the subcommand name
33+
func (r *RegistryCmd) Name() string { return RegistryCmdName }
34+
35+
// Synopsis is the subcommand's synopsis
36+
func (r *RegistryCmd) Synopsis() string { return "Add registry secret on paddlecloud." }
37+
38+
// Usage is the subcommand's usage
39+
func (r *RegistryCmd) Usage() string {
40+
return `registry <options> [add|del]:
41+
`
42+
}
43+
44+
// SetFlags registers subcommands flags.
45+
func (r *RegistryCmd) SetFlags(f *flag.FlagSet) {
46+
f.StringVar(&r.SecretName, "name", "", "registry secret name")
47+
f.StringVar(&r.Username, "username", "", "your Docker registry username")
48+
f.StringVar(&r.Password, "password", "", "your Docker registry password")
49+
f.StringVar(&r.Server, "server", "", "your Docker registry Server")
50+
}
51+
func (r *RegistryCmd) addRegistrySecret() error {
52+
jsonString, err := json.Marshal(r)
53+
if err != nil {
54+
return err
55+
}
56+
glog.V(10).Infof("Add registry secret: %s to %s\n", jsonString, utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/")
57+
respBody, err := utils.PostCall(utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/", jsonString)
58+
if err != nil {
59+
return err
60+
}
61+
var respObj interface{}
62+
if err = json.Unmarshal(respBody, &respObj); err != nil {
63+
return err
64+
}
65+
// FIXME: Return an error if error message is not empty. Use response code instead
66+
errMsg := respObj.(map[string]interface{})["msg"].(string)
67+
if len(errMsg) > 0 {
68+
return errors.New(errMsg)
69+
}
70+
return nil
71+
}
72+
73+
// Delete the specify registry
74+
func (r *RegistryCmd) Delete() error {
75+
jsonString, err := json.Marshal(r)
76+
if err != nil {
77+
return err
78+
}
79+
glog.V(10).Infof("Delete registry secret: %s to %s\n", jsonString, utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/")
80+
respBody, err := utils.DeleteCall(utils.Config.ActiveConfig.Endpoint+"/api/v1/registry/", jsonString)
81+
if err != nil {
82+
return err
83+
}
84+
85+
var respObj interface{}
86+
if err = json.Unmarshal(respBody, &respObj); err != nil {
87+
return err
88+
}
89+
// FIXME: Return an error if error message is not empty. Use response code instead
90+
errMsg := respObj.(map[string]interface{})["msg"].(string)
91+
if len(errMsg) > 0 {
92+
return errors.New(errMsg)
93+
}
94+
return nil
95+
}
96+
func (r *RegistryCmd) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
97+
if r.SecretName == "" || r.Username == "" || r.Password == "" || r.Server == "" {
98+
f.Usage()
99+
return subcommands.ExitFailure
100+
}
101+
r.SecretName = strings.Join([]string{RegistryPrefix, r.SecretName}, "-")
102+
err := r.addRegistrySecret()
103+
if err != nil {
104+
fmt.Fprintf(os.Stderr, "add registry secret failed: %s\n", err)
105+
return subcommands.ExitFailure
106+
}
107+
return subcommands.ExitSuccess
108+
}
109+
110+
// KubeRegistryName add a prefix for the name
111+
func KubeRegistryName(name string) string {
112+
return RegistryPrefix + "-" + name
113+
}
114+
115+
// RegistryName is registry secret name for PaddleCloud
116+
func RegistryName(name string) string {
117+
if strings.HasPrefix(name, RegistryPrefix) {
118+
return name[len(RegistryPrefix)+1 : len(name)]
119+
}
120+
return ""
121+
}

go/paddlecloud/submit.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ type SubmitCmd struct {
3030
Topology string `json:"topology"`
3131
Datacenter string `json:"datacenter"`
3232
Passes int `json:"passes"`
33+
Image string `json:"image"`
34+
Registry string `json:"registry"`
3335
}
3436

3537
// Name is subcommands name.
@@ -59,6 +61,8 @@ func (p *SubmitCmd) SetFlags(f *flag.FlagSet) {
5961
f.StringVar(&p.Entry, "entry", "", "Command of starting trainer process. Defaults to paddle train")
6062
f.StringVar(&p.Topology, "topology", "", "Will Be Deprecated .py file contains paddle v1 job configs")
6163
f.IntVar(&p.Passes, "passes", 1, "Pass count for training job")
64+
f.StringVar(&p.Image, "image", "", "Runtime Docker image for the job")
65+
f.StringVar(&p.Registry, "registry", "", "Registry secret name for the runtime Docker image")
6266
}
6367

6468
// Execute submit command.

paddlecloud/paddlecloud/urls.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
url(r"^api/v1/workers/", paddlejob.views.WorkersView.as_view()),
3333
url(r"^api/v1/quota/", paddlejob.views.QuotaView.as_view()),
3434
url(r"^api/v1/file/", paddlejob.views.SimpleFileView.as_view()),
35+
url(r"^api/v1/registry/", paddlejob.registry.RegistryView.as_view()),
3536
]
3637

3738
urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)

paddlecloud/paddlejob/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from paddle_job import PaddleJob
2-
__all__ = ["PaddleJob"]
2+
import registry
3+
__all__ = ["PaddleJob", "registry"]

0 commit comments

Comments
 (0)