Skip to content

Commit d0bdf7b

Browse files
committed
Create make_ted_dataset.py
1 parent c190923 commit d0bdf7b

File tree

1 file changed

+165
-0
lines changed

1 file changed

+165
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# ------------------------------------------------------------------------------
2+
# Copyright (c) ETRI. All rights reserved.
3+
# Licensed under the BSD 3-Clause License.
4+
# This file is part of Youtube-Gesture-Dataset, a sub-project of AIR(AI for Robots) project.
5+
# You can refer to details of AIR project at https://aiforrobots.github.io
6+
# Written by Youngwoo Yoon (youngwoo@etri.re.kr)
7+
# ------------------------------------------------------------------------------
8+
9+
import os
10+
11+
from tqdm import tqdm_gui
12+
import unicodedata
13+
14+
from data_utils import *
15+
16+
17+
def read_subtitle(vid):
18+
postfix_in_filename = '-en.vtt'
19+
file_list = glob.glob(my_config.SUBTITLE_PATH + '/*' + vid + postfix_in_filename)
20+
if len(file_list) > 1:
21+
print('more than one subtitle. check this.', file_list)
22+
assert False
23+
if len(file_list) == 1:
24+
return WebVTT().read(file_list[0])
25+
else:
26+
return []
27+
28+
29+
# turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
30+
def unicode_to_ascii(s):
31+
return ''.join(
32+
c for c in unicodedata.normalize('NFD', s)
33+
if unicodedata.category(c) != 'Mn'
34+
)
35+
36+
37+
# lowercase, trim, and remove non-letter characters
38+
def normalize_string(s):
39+
s = unicode_to_ascii(s.lower().strip())
40+
s = re.sub(r"([,.!?])", r" \1 ", s) # isolate some marks
41+
s = re.sub(r"(['])", r"", s) # remove apostrophe
42+
s = re.sub(r"[^a-zA-Z,.!?]+", r" ", s) # replace other characters with whitespace
43+
s = re.sub(r"\s+", r" ", s).strip()
44+
return s
45+
46+
47+
def normalize_subtitle(vtt_subtitle):
48+
for i, sub in enumerate(vtt_subtitle):
49+
vtt_subtitle[i].text = normalize_string(vtt_subtitle[i].text)
50+
return vtt_subtitle
51+
52+
53+
def make_ted_gesture_dataset():
54+
dataset_train = []
55+
dataset_val = []
56+
dataset_test = []
57+
n_saved_clips = [0, 0, 0]
58+
59+
video_files = sorted(glob.glob(my_config.VIDEO_PATH + "/*.mp4"), key=os.path.getmtime)
60+
for v_i, video_file in enumerate(tqdm_gui(video_files)):
61+
vid = os.path.split(video_file)[1][-15:-4]
62+
print(vid)
63+
64+
# load clip, video, and subtitle
65+
clip_data = load_clip_data(vid)
66+
if clip_data is None:
67+
print('[ERROR] clip data file does not exist!')
68+
break
69+
70+
video_wrapper = read_video(my_config.VIDEO_PATH, vid)
71+
72+
subtitle_type = my_config.SUBTITLE_TYPE
73+
subtitle = SubtitleWrapper(vid, subtitle_type).get()
74+
75+
if subtitle is None:
76+
print('[WARNING] subtitle does not exist! skipping this video.')
77+
continue
78+
79+
dataset_train.append({'vid': vid, 'clips': []})
80+
dataset_val.append({'vid': vid, 'clips': []})
81+
dataset_test.append({'vid': vid, 'clips': []})
82+
83+
word_index = 0
84+
valid_clip_count = 0
85+
for ia, clip in enumerate(clip_data):
86+
start_frame_no, end_frame_no, clip_pose_all = clip['clip_info'][0], clip['clip_info'][1], clip['frames']
87+
clip_word_list = []
88+
89+
# skip FALSE clips
90+
if not clip['clip_info'][2]:
91+
continue
92+
93+
# train/val/test split
94+
if valid_clip_count % 10 == 9:
95+
dataset = dataset_test
96+
dataset_idx = 2
97+
elif valid_clip_count % 10 == 8:
98+
dataset = dataset_val
99+
dataset_idx = 1
100+
else:
101+
dataset = dataset_train
102+
dataset_idx = 0
103+
valid_clip_count += 1
104+
105+
# get subtitle that fits clip
106+
for ib in range(word_index - 1, len(subtitle)):
107+
if ib < 0:
108+
continue
109+
110+
word_s = video_wrapper.second2frame(subtitle[ib]['start'])
111+
word_e = video_wrapper.second2frame(subtitle[ib]['end'])
112+
word = subtitle[ib]['word']
113+
114+
if word_s >= end_frame_no:
115+
word_index = ib
116+
break
117+
118+
if word_e <= start_frame_no:
119+
continue
120+
121+
word = normalize_string(word)
122+
clip_word_list.append([word, word_s, word_e])
123+
124+
if clip_word_list:
125+
clip_skeleton = []
126+
127+
# get skeletons of the upper body in the clip
128+
for frame in clip_pose_all:
129+
if frame:
130+
clip_skeleton.append(get_skeleton_from_frame(frame)[:24])
131+
else: # frame with no skeleton
132+
clip_skeleton.append([0] * 24)
133+
134+
# proceed if skeleton list is not empty
135+
if len(clip_skeleton) > 0:
136+
# save subtitles and skeletons corresponding to clips
137+
n_saved_clips[dataset_idx] += 1
138+
dataset[-1]['clips'].append({'words': clip_word_list,
139+
'skeletons': clip_skeleton,
140+
'start_frame_no': start_frame_no, 'end_frame_no': end_frame_no,
141+
'vid': vid
142+
})
143+
print('{} ({}, {})'.format(vid, start_frame_no, end_frame_no))
144+
else:
145+
print('{} ({}, {}) - consecutive missing frames'.format(vid, start_frame_no, end_frame_no))
146+
147+
# for debugging
148+
# if vid == 'yq3TQoMjXTw':
149+
# break
150+
151+
print('writing to pickle...')
152+
with open('ted_gesture_dataset_train.pickle', 'wb') as f:
153+
pickle.dump(dataset_train, f)
154+
with open('ted_gesture_dataset_train_small.pickle', 'wb') as f: # for debugging
155+
pickle.dump(dataset_train[0:10], f)
156+
with open('ted_gesture_dataset_val.pickle', 'wb') as f:
157+
pickle.dump(dataset_val, f)
158+
with open('ted_gesture_dataset_test.pickle', 'wb') as f:
159+
pickle.dump(dataset_test, f)
160+
161+
print('no. of saved clips: train {}, val {}, test {}'.format(n_saved_clips[0], n_saved_clips[1], n_saved_clips[2]))
162+
163+
164+
if __name__ == '__main__':
165+
make_ted_gesture_dataset()

0 commit comments

Comments
 (0)