TFP Probabilistic Layers: Regression

View on TensorFlow.org Run in Google Colab View source on GitHub Download notebook

In this example we show how to fit regression models using TFP's "probabilistic layers."

Dependencies & Prerequisites

Import

Make things Fast!

Before we dive in, let's make sure we're using a GPU for this demo.

To do this, select "Runtime" -> "Change runtime type" -> "Hardware accelerator" -> "GPU".

The following snippet will verify that we have access to a GPU.

if tf.test.gpu_device_name() != '/device:GPU:0': print('WARNING: GPU device not found.') else: print('SUCCESS: Found GPU: {}'.format(tf.test.gpu_device_name())) 
 WARNING: GPU device not found. 

Motivation

Wouldn't it be great if we could use TFP to specify a probabilistic model then simply minimize the negative log-likelihood, i.e.,

negloglik = lambda y, rv_y: -rv_y.log_prob(y) 

Well not only is it possible, but this colab shows how! (In context of linear regression problems.)

Synthesize dataset.

Case 1: No Uncertainty

# Build model. model = tf_keras.Sequential([  tf_keras.layers.Dense(1),  tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=1)), ]) # Do inference. model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01), loss=negloglik) model.fit(x, y, epochs=1000, verbose=False); # Profit. [print(np.squeeze(w.numpy())) for w in model.weights]; yhat = model(x_tst) assert isinstance(yhat, tfd.Distribution) 
 0.13032457 5.13029 

Figure 1: No uncertainty.

png

Case 2: Aleatoric Uncertainty

# Build model. model = tf_keras.Sequential([  tf_keras.layers.Dense(1 + 1),  tfp.layers.DistributionLambda(  lambda t: tfd.Normal(loc=t[..., :1],  scale=1e-3 + tf.math.softplus(0.05 * t[...,1:]))), ]) # Do inference. model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01), loss=negloglik) model.fit(x, y, epochs=1000, verbose=False); # Profit. [print(np.squeeze(w.numpy())) for w in model.weights]; yhat = model(x_tst) assert isinstance(yhat, tfd.Distribution) 
 [0.14738432 0.1815331 ] [4.4812164 1.2219843] 

Figure 2: Aleatoric Uncertainty

png

Case 3: Epistemic Uncertainty

# Specify the surrogate posterior over `keras.layers.Dense` `kernel` and `bias`. def posterior_mean_field(kernel_size, bias_size=0, dtype=None):  n = kernel_size + bias_size  c = np.log(np.expm1(1.))  return tf_keras.Sequential([  tfp.layers.VariableLayer(2 * n, dtype=dtype),  tfp.layers.DistributionLambda(lambda t: tfd.Independent(  tfd.Normal(loc=t[..., :n],  scale=1e-5 + tf.nn.softplus(c + t[..., n:])),  reinterpreted_batch_ndims=1)),  ]) 
# Specify the prior over `keras.layers.Dense` `kernel` and `bias`. def prior_trainable(kernel_size, bias_size=0, dtype=None):  n = kernel_size + bias_size  return tf_keras.Sequential([  tfp.layers.VariableLayer(n, dtype=dtype),  tfp.layers.DistributionLambda(lambda t: tfd.Independent(  tfd.Normal(loc=t, scale=1),  reinterpreted_batch_ndims=1)),  ]) 
# Build model. model = tf_keras.Sequential([  tfp.layers.DenseVariational(1, posterior_mean_field, prior_trainable, kl_weight=1/x.shape[0]),  tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=1)), ]) # Do inference. model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01), loss=negloglik) model.fit(x, y, epochs=1000, verbose=False); # Profit. [print(np.squeeze(w.numpy())) for w in model.weights]; yhat = model(x_tst) assert isinstance(yhat, tfd.Distribution) 
 [ 0.1387333 5.125723 -4.112224 -2.2171402] [0.12476114 5.147452 ] 

Figure 3: Epistemic Uncertainty

png

Case 4: Aleatoric & Epistemic Uncertainty

# Build model. model = tf_keras.Sequential([  tfp.layers.DenseVariational(1 + 1, posterior_mean_field, prior_trainable, kl_weight=1/x.shape[0]),  tfp.layers.DistributionLambda(  lambda t: tfd.Normal(loc=t[..., :1],  scale=1e-3 + tf.math.softplus(0.01 * t[...,1:]))), ]) # Do inference. model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01), loss=negloglik) model.fit(x, y, epochs=1000, verbose=False); # Profit. [print(np.squeeze(w.numpy())) for w in model.weights]; yhat = model(x_tst) assert isinstance(yhat, tfd.Distribution) 
 [ 0.12753433 2.7504077 5.160624 3.8251898 -3.4283297 -0.8961645 -2.2378397 0.1496858 ] [0.14511648 2.7104297 5.1248145 3.7724588 ] 

Figure 4: Both Aleatoric & Epistemic Uncertainty

png

Case 5: Functional Uncertainty

Custom PSD Kernel

# For numeric stability, set the default floating-point dtype to float64 tf_keras.backend.set_floatx('float64') # Build model. num_inducing_points = 40 model = tf_keras.Sequential([  tf_keras.layers.InputLayer(input_shape=[1]),  tf_keras.layers.Dense(1, kernel_initializer='ones', use_bias=False),  tfp.layers.VariationalGaussianProcess(  num_inducing_points=num_inducing_points,  kernel_provider=RBFKernelFn(),  event_shape=[1],  inducing_index_points_initializer=tf.constant_initializer(  np.linspace(*x_range, num=num_inducing_points,  dtype=x.dtype)[..., np.newaxis]),  unconstrained_observation_noise_variance_initializer=(  tf.constant_initializer(np.array(0.54).astype(x.dtype))),  ), ]) # Do inference. batch_size = 32 loss = lambda y, rv_y: rv_y.variational_loss(  y, kl_weight=np.array(batch_size, x.dtype) / x.shape[0]) model.compile(optimizer=tf_keras.optimizers.Adam(learning_rate=0.01), loss=loss) model.fit(x, y, batch_size=batch_size, epochs=1000, verbose=False) # Profit. yhat = model(x_tst) assert isinstance(yhat, tfd.Distribution) 

Figure 5: Functional Uncertainty

png