Finrodx
diff --git a/‎Context.cpp‎
Lines changed: 9 additions & 7 deletions b/‎Context.cpp‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎Context.h‎
Lines changed: 21 additions & 0 deletions b/‎Context.h‎
Lines changed: 21 additions & 0 deletions
@@ -20,11 +20,17 @@ static inline void argErrorHandler(int arg, const char * msg, void * data) {
  throw std::runtime_error(new_error.str());
 }
 
-Context::Context() {
+Context::Context()
+: thc_state(nullptr) {
 
  THSetDefaultErrorHandler(errorHandler,nullptr);
  THSetDefaultArgErrorHandler(argErrorHandler,nullptr);
 
+ generator_registry[static_cast<int>(Backend::CPU)]
+ .reset(new CPUGenerator(this));
+ Type::registerAll(this);
+}
+void Context::doInitCUDA() {
 #ifdef AT_CUDA_ENABLED
  thc_state = THCState_alloc();
  THCState_setDeviceAllocator(thc_state, THCCachingAllocator_get());
@@ -33,15 +39,11 @@ Context::Context() {
  generator_registry[static_cast<int>(Backend::CUDA)]
  .reset(new CUDAGenerator(this));
 #endif
-
- generator_registry[static_cast<int>(Backend::CPU)]
- .reset(new CPUGenerator(this));
- Type::registerAll(this);
 }
-
 Context::~Context() {
 #ifdef AT_CUDA_ENABLED
- THCState_free(thc_state);
+ if(thc_state)
+ THCState_free(thc_state);
 #endif
 }
 
 
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 #include "ATen/Generator.h"
 #include "ATen/Type.h"
 #include "ATen/Utils.h"
@@ -13,29 +14,49 @@ class Context {
 public:
  Context();
  Type & getType(Backend p, ScalarType s) {
+ initCUDAIfNeeded(p);
  auto & type = type_registry[static_cast<int>(p)][static_cast<int>(s)];
  if(!type)
  runtime_error("%s%s%sType is not enabled.",toString(p),toString(s));
  return *type;
  }
  Generator & defaultGenerator(Backend p) {
+ initCUDAIfNeeded(p);
  auto & generator = generator_registry[static_cast<int>(p)];
  if(!generator)
  runtime_error("%s backend type not enabled.",toString(p));
  return *generator;
  }
  bool hasCUDA() const;
+ // defined in header so that getType has ability to inline
+ // call_once check. getType is called fairly frequently
+ THCState* lazyInitCUDA() {
+ std::call_once(thc_init,[&] {
+ doInitCUDA();
+ });
+ return thc_state;
+ }
  ~Context();
  std::unique_ptr<Generator>
  generator_registry[static_cast<int>(Backend::NumOptions)];
  std::unique_ptr<Type> type_registry
  [static_cast<int>(Backend::NumOptions)]
  [static_cast<int>(ScalarType::NumOptions)];
  THCState * thc_state;
+private:
+ void initCUDAIfNeeded(Backend p) {
+ if(p == Backend::CUDA)
+ lazyInitCUDA();
+ }
+ void doInitCUDA();
+ std::once_flag thc_init;
 };
 
 Context & globalContext();
 
+static inline void init() {
+ globalContext();
+}
 
 static inline Type& getType(Backend p, ScalarType s) {
  return globalContext().getType(p,s);