Skip to content

Commit cd8a64c

Browse files
committed
add GPT-2 run files
1 parent a84ab71 commit cd8a64c

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

convert.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import torch
2+
from transformers import AutoTokenizer, AutoModelForCausalLM
3+
4+
tokenizer = AutoTokenizer.from_pretrained("gpt2")
5+
model = AutoModelForCausalLM.from_pretrained("gpt2")
6+
7+
with open("text.en", "r") as f:
8+
input_encoder = tokenizer(f.read(), return_tensors="pt")
9+
10+
print(":: Converting to ONNX runtime")
11+
torch.onnx.export(model, input_encoder["input_ids"], "gpt2.onnx")

run.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python
2+
"""
3+
Copyright (c) 2018 Intel Corporation
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License.
13+
"""
14+
import os
15+
import time
16+
from argparse import ArgumentParser
17+
18+
import torch
19+
from transformers import AutoTokenizer, AutoModelForCausalLM
20+
21+
from openvino.inference_engine import IECore
22+
23+
if __name__ == '__main__':
24+
parser = ArgumentParser()
25+
parser.add_argument("--model", help="Path to an .xml file with a trained model.", default = "gpt2.xml", type=str)
26+
27+
print("-"*70)
28+
print("Loading Pytorch model")
29+
tokenizer = AutoTokenizer.from_pretrained("gpt2")
30+
model = AutoModelForCausalLM.from_pretrained("gpt2")
31+
with open("text.en", "r") as f:
32+
input_encoder = tokenizer(f.read(), return_tensors="pt")
33+
34+
st = time.time()
35+
model(input_encoder["input_ids"])
36+
print(f"Pytorch inference in {time.time() - st:.5f}s")
37+
del model, tokenizer
38+
39+
args = parser.parse_args()
40+
print("-"*70)
41+
model_xml = args.model
42+
model_bin = os.path.splitext(model_xml)[0] + ".bin"
43+
44+
# Plugin initialization for specified device and load extensions library if specified.
45+
print("Creating Inference Engine...")
46+
ie = IECore()
47+
48+
# Read IR
49+
print("Loading network")
50+
net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin")
51+
52+
print("Loading IR to the plugin...")
53+
exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2)
54+
print(f"exec_net: {exec_net}")
55+
print("-"*70)
56+
57+
# this is a bit tricky. So the input to the model is the input from ONNX graph
58+
# IECore makes a networkX graph of the "computation graph" and when we run .infer
59+
# it passes it through. If you are unsure of what to pass you can always check the
60+
# <model>.xml file. In case of pytorch models the value "input.1" is the usual
61+
# suspect. Happy Hunting!
62+
inputs = input_encoder["input_ids"].tolist()
63+
st = time.time()
64+
out = exec_net.infer(inputs={"0": [1 for _ in range(127)]})
65+
print(f"OpenVino inference in {time.time() - st:.5f}s")
66+
print("-"*70)

0 commit comments

Comments
 (0)