|
| 1 | +#!/usr/bin/env python |
| 2 | +""" |
| 3 | + Copyright (c) 2018 Intel Corporation |
| 4 | + Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + you may not use this file except in compliance with the License. |
| 6 | + You may obtain a copy of the License at |
| 7 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | + Unless required by applicable law or agreed to in writing, software |
| 9 | + distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 11 | + See the License for the specific language governing permissions and |
| 12 | + limitations under the License. |
| 13 | +""" |
| 14 | +import os |
| 15 | +import time |
| 16 | +from argparse import ArgumentParser |
| 17 | + |
| 18 | +import torch |
| 19 | +from transformers import AutoTokenizer, AutoModelForCausalLM |
| 20 | + |
| 21 | +from openvino.inference_engine import IECore |
| 22 | + |
| 23 | +if __name__ == '__main__': |
| 24 | + parser = ArgumentParser() |
| 25 | + parser.add_argument("--model", help="Path to an .xml file with a trained model.", default = "gpt2.xml", type=str) |
| 26 | + |
| 27 | + print("-"*70) |
| 28 | + print("Loading Pytorch model") |
| 29 | + tokenizer = AutoTokenizer.from_pretrained("gpt2") |
| 30 | + model = AutoModelForCausalLM.from_pretrained("gpt2") |
| 31 | + with open("text.en", "r") as f: |
| 32 | + input_encoder = tokenizer(f.read(), return_tensors="pt") |
| 33 | + |
| 34 | + st = time.time() |
| 35 | + model(input_encoder["input_ids"]) |
| 36 | + print(f"Pytorch inference in {time.time() - st:.5f}s") |
| 37 | + del model, tokenizer |
| 38 | + |
| 39 | + args = parser.parse_args() |
| 40 | + print("-"*70) |
| 41 | + model_xml = args.model |
| 42 | + model_bin = os.path.splitext(model_xml)[0] + ".bin" |
| 43 | + |
| 44 | + # Plugin initialization for specified device and load extensions library if specified. |
| 45 | + print("Creating Inference Engine...") |
| 46 | + ie = IECore() |
| 47 | + |
| 48 | + # Read IR |
| 49 | + print("Loading network") |
| 50 | + net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin") |
| 51 | + |
| 52 | + print("Loading IR to the plugin...") |
| 53 | + exec_net = ie.load_network(network=net, device_name="CPU", num_requests=2) |
| 54 | + print(f"exec_net: {exec_net}") |
| 55 | + print("-"*70) |
| 56 | + |
| 57 | + # this is a bit tricky. So the input to the model is the input from ONNX graph |
| 58 | + # IECore makes a networkX graph of the "computation graph" and when we run .infer |
| 59 | + # it passes it through. If you are unsure of what to pass you can always check the |
| 60 | + # <model>.xml file. In case of pytorch models the value "input.1" is the usual |
| 61 | + # suspect. Happy Hunting! |
| 62 | + inputs = input_encoder["input_ids"].tolist() |
| 63 | + st = time.time() |
| 64 | + out = exec_net.infer(inputs={"0": [1 for _ in range(127)]}) |
| 65 | + print(f"OpenVino inference in {time.time() - st:.5f}s") |
| 66 | + print("-"*70) |
0 commit comments