Skip to content
This repository was archived by the owner on Jan 24, 2024. It is now read-only.

Commit 6b68be7

Browse files
authored
Merge pull request #40 from NHZlX/mobilenet_ssd_pascal_model
add mobilenet ssd pascal model
2 parents 0d2c250 + 6bb6ee9 commit 6b68be7

File tree

7 files changed

+644
-0
lines changed

7 files changed

+644
-0
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Mobilenet SSD
2+
3+
We offer the mobilenet(1.0) ssd model trained on PASCAL VOC0712 dataset. This model can be deployed on embedded system
4+
and you can modify the network to adapt to your own application.
5+
6+
## run the demo
7+
1. Install PaddlePaddle(see: [PaddlePaddle installation instructions](http://paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/index_en.html))
8+
9+
2. Download the [parameters](https://pan.baidu.com/s/1o7S8yWq) trained on PASCAL VOC0712.
10+
11+
3. `python infer.py`
12+
13+
14+
## train on your own dataset
15+
You can modify the network to adapt to your own application. PaddlePaddle provides a detailed document to show how to train your model with SSD, refer the document [here](https://github.com/PaddlePaddle/models/tree/develop/ssd).
16+

models/mobilenet_ssd_pascal/config/__init__.py

Whitespace-only changes.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
background
2+
aeroplane
3+
bicycle
4+
bird
5+
boat
6+
bottle
7+
bus
8+
car
9+
cat
10+
chair
11+
cow
12+
diningtable
13+
dog
14+
horse
15+
motorbike
16+
person
17+
pottedplant
18+
sheep
19+
sofa
20+
train
21+
tvmonitor
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from easydict import EasyDict as edict
2+
import numpy as np
3+
4+
__C = edict()
5+
cfg = __C
6+
7+
__C.TRAIN = edict()
8+
9+
__C.IMG_WIDTH = 300
10+
__C.IMG_HEIGHT = 300
11+
__C.IMG_CHANNEL = 3
12+
__C.CLASS_NUM = 21
13+
__C.BACKGROUND_ID = 0
14+
15+
# training settings
16+
__C.TRAIN.MOMENTUM = 0.9
17+
__C.TRAIN.BATCH_SIZE = 32
18+
__C.TRAIN.LEARNING_RATE = 0.0005 / 4
19+
#__C.TRAIN.LEARNING_RATE = 0.001 / __C.TRAIN.BATCH_SIZE
20+
__C.TRAIN.NUM_PASS = 50000
21+
__C.TRAIN.L2REGULARIZATION = 0.0005 * 4
22+
#__C.TRAIN.L2REGULARIZATION = 0.0005 * __C.TRAIN.BATCH_SIZE
23+
__C.TRAIN.LEARNING_RATE_DECAY_A = 0.1
24+
__C.TRAIN.LEARNING_RATE_DECAY_B = 16551 * 80
25+
__C.TRAIN.LEARNING_RATE_SCHEDULE = 'discexp'
26+
#__C.TRAIN.LEARNING_RATE_SCHEDULE = 'constant'
27+
28+
__C.NET = edict()
29+
30+
# configuration for multibox_loss_layer
31+
__C.NET.MBLOSS = edict()
32+
__C.NET.MBLOSS.OVERLAP_THRESHOLD = 0.5
33+
__C.NET.MBLOSS.NEG_POS_RATIO = 3.0
34+
__C.NET.MBLOSS.NEG_OVERLAP = 0.5
35+
36+
# configuration for detection_map
37+
__C.NET.DETMAP = edict()
38+
__C.NET.DETMAP.OVERLAP_THRESHOLD = 0.5
39+
__C.NET.DETMAP.EVAL_DIFFICULT = False
40+
__C.NET.DETMAP.AP_TYPE = "11point"
41+
42+
# configuration for detection_output_layer
43+
__C.NET.DETOUT = edict()
44+
__C.NET.DETOUT.CONFIDENCE_THRESHOLD = 0.01
45+
__C.NET.DETOUT.NMS_THRESHOLD = 0.45
46+
__C.NET.DETOUT.NMS_TOP_K = 400
47+
__C.NET.DETOUT.KEEP_TOP_K = 200
48+
49+
################################################
50+
__C.NET.CONV11 = edict()
51+
__C.NET.CONV11.PB = edict()
52+
__C.NET.CONV11.PB.MIN_SIZE = [60]
53+
__C.NET.CONV11.PB.ASPECT_RATIO = [2.]
54+
#__C.NET.CONV11.PB.ASPECT_RATIO = [2.]
55+
__C.NET.CONV11.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2]
56+
57+
58+
__C.NET.CONV13 = edict()
59+
__C.NET.CONV13.PB = edict()
60+
__C.NET.CONV13.PB.MIN_SIZE = [105]
61+
__C.NET.CONV13.PB.MAX_SIZE = [150]
62+
__C.NET.CONV13.PB.ASPECT_RATIO = [2., 3.]
63+
__C.NET.CONV13.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2]
64+
65+
66+
__C.NET.CONV14_2 = edict()
67+
__C.NET.CONV14_2.PB = edict()
68+
__C.NET.CONV14_2.PB.MIN_SIZE = [150]
69+
__C.NET.CONV14_2.PB.MAX_SIZE = [195]
70+
__C.NET.CONV14_2.PB.ASPECT_RATIO = [2., 3.]
71+
__C.NET.CONV14_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2]
72+
73+
74+
__C.NET.CONV15_2 = edict()
75+
__C.NET.CONV15_2.PB = edict()
76+
__C.NET.CONV15_2.PB.MIN_SIZE = [195]
77+
__C.NET.CONV15_2.PB.MAX_SIZE = [240]
78+
__C.NET.CONV15_2.PB.ASPECT_RATIO = [2., 3.]
79+
__C.NET.CONV15_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2]
80+
81+
__C.NET.CONV16_2 = edict()
82+
__C.NET.CONV16_2.PB = edict()
83+
__C.NET.CONV16_2.PB.MIN_SIZE = [240]
84+
__C.NET.CONV16_2.PB.MAX_SIZE = [285]
85+
__C.NET.CONV16_2.PB.ASPECT_RATIO = [2., 3.]
86+
__C.NET.CONV16_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2]
87+
88+
__C.NET.CONV17_2 = edict()
89+
__C.NET.CONV17_2.PB = edict()
90+
__C.NET.CONV17_2.PB.MIN_SIZE = [285]
91+
__C.NET.CONV17_2.PB.MAX_SIZE = [300]
92+
__C.NET.CONV17_2.PB.ASPECT_RATIO = [2., 3.]
93+
__C.NET.CONV17_2.PB.VARIANCE = [0.1, 0.1, 0.2, 0.2]
94+
133 KB
Loading
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import numpy as np
2+
import gzip
3+
import copy
4+
import cv2, os
5+
6+
import paddle.v2 as paddle
7+
from mobilenet_ssd_pascal import net_conf
8+
from config.pascal_voc_conf import cfg
9+
10+
label_lists = open('./config/label_list').readlines()
11+
12+
def _infer(inferer, infer_data, threshold):
13+
ret = []
14+
infer_res = inferer.infer(input=infer_data)
15+
keep_inds = np.where(infer_res[:, 2] >= threshold)[0]
16+
for idx in keep_inds:
17+
ret.append([
18+
infer_res[idx][0], infer_res[idx][1] - 1, infer_res[idx][2],
19+
infer_res[idx][3], infer_res[idx][4], infer_res[idx][5],
20+
infer_res[idx][6]
21+
])
22+
return ret
23+
24+
def draw_result(frame, ret_res, h, w):
25+
print ret_res
26+
for det_res in ret_res:
27+
img_idx = int(det_res[0])
28+
label = int(det_res[1])
29+
conf_score = det_res[2]
30+
xmin = int(round(det_res[3] * w))
31+
ymin = int(round(det_res[4] * h))
32+
xmax = int(round(det_res[5] * w))
33+
ymax = int(round(det_res[6] * h))
34+
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
35+
(0, (1 - xmin) * 255, xmin * 255), 2)
36+
font=cv2.FONT_HERSHEY_SIMPLEX
37+
cv2.putText(frame, label_lists[label + 1].strip(), (xmin + 10, ymin + 10), font, 1.0, (255, 0, 0), 2)
38+
39+
40+
def pre_process(img):
41+
img = cv2.resize(img, (cfg.IMG_HEIGHT, cfg.IMG_WIDTH), interpolation=cv2.INTER_AREA)
42+
# image should be RGB format
43+
img = img[:, :, ::-1]
44+
# image shoud be in CHW format
45+
img = np.swapaxes(img, 1, 2)
46+
img = np.swapaxes(img, 1, 0)
47+
img = img.astype('float32')
48+
49+
img_mean = np.array([104, 117, 124])[:, np.newaxis, np.newaxis].astype(
50+
'float32')
51+
img -= img_mean
52+
img = img.flatten()
53+
return img
54+
55+
def infer(model_path,threshold):
56+
57+
net = net_conf(mode='infer')
58+
59+
assert os.path.isfile(model_path), 'Invalid model.'
60+
parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))
61+
62+
#build the inference network
63+
inferer = paddle.inference.Inference(
64+
output_layer=net, parameters=parameters)
65+
66+
test_data = []
67+
68+
frame = cv2.imread('./images/example.jpg')
69+
70+
h, w, _ = frame.shape
71+
img = copy.deepcopy(frame)
72+
73+
# preprocess the image
74+
img = pre_process(img)
75+
test_data.append([img])
76+
77+
#the forward process
78+
ret_res = _infer(inferer, test_data, threshold)
79+
80+
draw_result(frame, ret_res, h, w)
81+
cv2.imwrite('./images/result.jpg', frame)
82+
83+
if __name__ == "__main__":
84+
# init paddle environment
85+
paddle.init(use_gpu=False, trainer_count=1, gpu_id= 3)
86+
87+
infer(model_path='./mobilenet_ssd_pascal.tar.gz',
88+
threshold=0.3)

0 commit comments

Comments
 (0)