- Notifications
You must be signed in to change notification settings - Fork 340
Open
Description
1. System information
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Linux Ubuntu 18.04
- TensorFlow installation (pip package or built from source): pip
- TensorFlow library (version, if pip package or github SHA, if built from source): 1.15.5
2. Code
Provide code to help us reproduce your issues using one of the following options:
Pretrained model: mask_rcnn_inception_v2_coco
pipeline.config: pipeline.config
Training script: model_main.py
Original config file downloaded from: here
3. Failure after conversion
After adding the below code for quantization aware training in pipeline config file:
graph_rewriter { quantization { delay: 48000 weight_bits: 8 activation_bits: 8 } }
The training failed with :
INFO:tensorflow:Graph was finalized. I0823 06:00:53.102821 139930893776704 monitored_session.py:240] Graph was finalized. INFO:tensorflow:Restoring parameters from retrain/f1_mask_rcnn/model.ckpt-2 I0823 06:00:53.103820 139930893776704 saver.py:1284] Restoring parameters from retrain/f1_mask_rcnn/model.ckpt-2 2021-08-23 06:00:53.974053: W tensorflow/core/framework/op_kernel.cc:1651] OP_REQUIRES failed at save_restore_v2_ops.cc:184 : Not found: Key SecondStageFeatureExtractor_1/InceptionV2/Mixed_5a/Branch_0/Conv2d_0a_1x1/act_quant/max not found in checkpoint Traceback (most recent call last): File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1365, in _do_call return fn(*args) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1350, in _run_fn target_list, run_metadata) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1443, in _call_tf_sessionrun run_metadata) tensorflow.python.framework.errors_impl.NotFoundError: Key SecondStageFeatureExtractor_1/InceptionV2/Mixed_5a/Branch_0/Conv2d_0a_1x1/act_quant/max not found in checkpoint [[{{node save/RestoreV2}}]] During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 1290, in restore {self.saver_def.filename_tensor_name: save_path}) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 956, in run run_metadata_ptr) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1180, in _run feed_dict_tensor, options, run_metadata) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1359, in _do_run run_metadata) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py", line 1384, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.NotFoundError: Key SecondStageFeatureExtractor_1/InceptionV2/Mixed_5a/Branch_0/Conv2d_0a_1x1/act_quant/max not found in checkpoint [[node save/RestoreV2 (defined at /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py:1748) ]] Original stack trace for 'save/RestoreV2': File "object_detection/model_main.py", line 109, in <module> tf.app.run() File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/platform/app.py", line 40, in run _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef) File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 303, in run _run_main(main, args) File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 251, in _run_main sys.exit(main(argv)) File "object_detection/model_main.py", line 105, in main tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 473, in train_and_evaluate return executor.run() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 613, in run return self.run_local() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 714, in run_local saving_listeners=saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 370, in train loss = self._train_model(input_fn, hooks, saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1161, in _train_model return self._train_model_default(input_fn, hooks, saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1195, in _train_model_default saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1495, in _train_with_estimator_spec any_step_done = True File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 861, in __exit__ self._close_internal(exception_type) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 894, in _close_internal h.end(self._coordinated_creator.tf_sess) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/basic_session_run_hooks.py", line 600, in end self._save(session, last_step) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/basic_session_run_hooks.py", line 619, in _save if l.after_save(session, step): File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 519, in after_save self._evaluate(global_step_value) # updates self.eval_result File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 539, in _evaluate self._evaluator.evaluate_and_export()) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 920, in evaluate_and_export hooks=self._eval_spec.hooks) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 480, in evaluate name=name) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 522, in _actual_eval return _evaluate() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 504, in _evaluate self._evaluate_build_graph(input_fn, hooks, checkpoint_path)) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1511, in _evaluate_build_graph self._call_model_fn_eval(input_fn, self.config)) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1547, in _call_model_fn_eval features, labels, ModeKeys.EVAL, config) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1149, in _call_model_fn model_fn_results = self._model_fn(features=features, **kwargs) File "/tensorflow/models/research/object_detection/model_lib.py", line 475, in model_fn save_relative_paths=True) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 828, in __init__ self.build() File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 840, in build self._build(self._filename, build_save=True, build_restore=True) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 878, in _build build_restore=build_restore) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 502, in _build_internal restore_sequentially, reshape) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 381, in _AddShardedRestoreOps name="restore_shard")) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 328, in _AddRestoreOps restore_sequentially) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 575, in bulk_restore return io_ops.restore_v2(filename_tensor, names, slices, dtypes) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gen_io_ops.py", line 1696, in restore_v2 name=name) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper op_def=op_def) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func return func(*args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op attrs, op_def, compute_device) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal op_def=op_def) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__ self._traceback = tf_stack.extract_stack() During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 1300, in restore names_to_keys = object_graph_key_mapping(save_path) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 1618, in object_graph_key_mapping object_graph_string = reader.get_tensor(trackable.OBJECT_GRAPH_PROTO_KEY) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/pywrap_tensorflow_internal.py", line 915, in get_tensor return CheckpointReader_GetTensor(self, compat.as_bytes(tensor_str)) tensorflow.python.framework.errors_impl.NotFoundError: Key _CHECKPOINTABLE_OBJECT_GRAPH not found in checkpoint During handling of the above exception, another exception occurred: Traceback (most recent call last): File "object_detection/model_main.py", line 109, in <module> tf.app.run() File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/platform/app.py", line 40, in run _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef) File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 303, in run _run_main(main, args) File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 251, in _run_main sys.exit(main(argv)) File "object_detection/model_main.py", line 105, in main tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 473, in train_and_evaluate return executor.run() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 613, in run return self.run_local() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 714, in run_local saving_listeners=saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 370, in train loss = self._train_model(input_fn, hooks, saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1161, in _train_model return self._train_model_default(input_fn, hooks, saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1195, in _train_model_default saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1495, in _train_with_estimator_spec any_step_done = True File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 861, in __exit__ self._close_internal(exception_type) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 894, in _close_internal h.end(self._coordinated_creator.tf_sess) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/basic_session_run_hooks.py", line 600, in end self._save(session, last_step) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/basic_session_run_hooks.py", line 619, in _save if l.after_save(session, step): File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 519, in after_save self._evaluate(global_step_value) # updates self.eval_result File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 539, in _evaluate self._evaluator.evaluate_and_export()) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 920, in evaluate_and_export hooks=self._eval_spec.hooks) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 480, in evaluate name=name) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 522, in _actual_eval return _evaluate() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 511, in _evaluate output_dir=self.eval_dir(name)) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1619, in _evaluate_run config=self._session_config) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/evaluation.py", line 269, in _evaluate_once session_creator=session_creator, hooks=hooks) as session: File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1014, in __init__ stop_grace_period_secs=stop_grace_period_secs) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 725, in __init__ self._sess = _RecoverableSession(self._coordinated_creator) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1207, in __init__ _WrappedSession.__init__(self, self._create_session()) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1212, in _create_session return self._sess_creator.create_session() File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 878, in create_session self.tf_sess = self._session_creator.create_session() File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 647, in create_session init_fn=self._scaffold.init_fn) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/session_manager.py", line 290, in prepare_session config=config) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/session_manager.py", line 204, in _restore_checkpoint saver.restore(sess, checkpoint_filename_with_path) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 1306, in restore err, "a Variable name or other graph key that is missing") tensorflow.python.framework.errors_impl.NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error: Key SecondStageFeatureExtractor_1/InceptionV2/Mixed_5a/Branch_0/Conv2d_0a_1x1/act_quant/max not found in checkpoint [[node save/RestoreV2 (defined at /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py:1748) ]] Original stack trace for 'save/RestoreV2': File "object_detection/model_main.py", line 109, in <module> tf.app.run() File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/platform/app.py", line 40, in run _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef) File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 303, in run _run_main(main, args) File "/usr/local/lib/python3.6/dist-packages/absl/app.py", line 251, in _run_main sys.exit(main(argv)) File "object_detection/model_main.py", line 105, in main tf.estimator.train_and_evaluate(estimator, train_spec, eval_specs[0]) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 473, in train_and_evaluate return executor.run() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 613, in run return self.run_local() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 714, in run_local saving_listeners=saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 370, in train loss = self._train_model(input_fn, hooks, saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1161, in _train_model return self._train_model_default(input_fn, hooks, saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1195, in _train_model_default saving_listeners) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1495, in _train_with_estimator_spec any_step_done = True File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 861, in __exit__ self._close_internal(exception_type) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 894, in _close_internal h.end(self._coordinated_creator.tf_sess) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/basic_session_run_hooks.py", line 600, in end self._save(session, last_step) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/basic_session_run_hooks.py", line 619, in _save if l.after_save(session, step): File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 519, in after_save self._evaluate(global_step_value) # updates self.eval_result File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 539, in _evaluate self._evaluator.evaluate_and_export()) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/training.py", line 920, in evaluate_and_export hooks=self._eval_spec.hooks) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 480, in evaluate name=name) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 522, in _actual_eval return _evaluate() File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 504, in _evaluate self._evaluate_build_graph(input_fn, hooks, checkpoint_path)) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1511, in _evaluate_build_graph self._call_model_fn_eval(input_fn, self.config)) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1547, in _call_model_fn_eval features, labels, ModeKeys.EVAL, config) File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1149, in _call_model_fn model_fn_results = self._model_fn(features=features, **kwargs) File "/tensorflow/models/research/object_detection/model_lib.py", line 475, in model_fn save_relative_paths=True) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 828, in __init__ self.build() File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 840, in build self._build(self._filename, build_save=True, build_restore=True) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 878, in _build build_restore=build_restore) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 502, in _build_internal restore_sequentially, reshape) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 381, in _AddShardedRestoreOps name="restore_shard")) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 328, in _AddRestoreOps restore_sequentially) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/saver.py", line 575, in bulk_restore return io_ops.restore_v2(filename_tensor, names, slices, dtypes) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gen_io_ops.py", line 1696, in restore_v2 name=name) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper op_def=op_def) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func return func(*args, **kwargs) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op attrs, op_def, compute_device) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal op_def=op_def) File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__ self._traceback = tf_stack.extract_stack() Metadata
Metadata
Assignees
Labels
No labels