fhaque
diff --git a/‎advanced_functionality/scikit_bring_your_own/container/decision_trees/predictor.py‎
Lines changed: 0 additions & 3 deletions b/‎advanced_functionality/scikit_bring_your_own/container/decision_trees/predictor.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎advanced_functionality/scikit_bring_your_own/scikit_bring_your_own.ipynb‎
Lines changed: 28 additions & 6 deletions b/‎advanced_functionality/scikit_bring_your_own/scikit_bring_your_own.ipynb‎
Lines changed: 28 additions & 6 deletions
@@ -72,9 +72,6 @@ def transformation():
 
  print('Invoked with {} records'.format(data.shape[0]))
 
- # Drop first column, since sample notebook uses training data to show case predictions
- data.drop(data.columns[[0]],axis=1,inplace=True)
-
  # Do the prediction
  predictions = ScoringService.predict(data)
 
 
@@ -292,8 +292,8 @@
  "\n",
  "The scripts are:\n",
  "\n",
- "* `train_local.sh`: Run this with the name of the image and it will run training on the local tree. You'll want to modify the directory `test_dir/input/data/...` to be set up with the correct channels and data for your algorithm. Also, you'll want to modify the file `input/config/hyperparameters.json` to have the hyperparameter settings that you want to test (as strings).\n",
- "* `serve_local.sh`: Run this with the name of the image once you've trained the model and it should serve the model. It will run and wait for requests. Simply use the keyboard interrupt to stop it.\n",
+ "* `train_local.sh`: Run this with the name of the image and it will run training on the local tree. For example, you can run `$ ./train_local.sh sagemaker-decision-trees`. It will generate a model under the `/test_dir/model` directory. You'll want to modify the directory `test_dir/input/data/...` to be set up with the correct channels and data for your algorithm. Also, you'll want to modify the file `input/config/hyperparameters.json` to have the hyperparameter settings that you want to test (as strings).\n",
+ "* `serve_local.sh`: Run this with the name of the image once you've trained the model and it should serve the model. For example, you can run `$ ./serve_local.sh sagemaker-decision-trees`. It will run and wait for requests. Simply use the keyboard interrupt to stop it.\n",
  "* `predict.sh`: Run this with the name of a payload file and (optionally) the HTTP content type you want. The content type will default to `text/csv`. For example, you can run `$ ./predict.sh payload.csv text/csv`.\n",
  "\n",
  "The directories as shipped are set up to test the decision trees sample algorithm presented here."
@@ -459,7 +459,26 @@
  "outputs": [],
  "source": [
  "shape=pd.read_csv(\"data/iris.csv\", header=None)\n",
- "\n",
+ "shape.sample(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# drop the label column in the training set\n",
+ "shape.drop(shape.columns[[0]],axis=1,inplace=True)\n",
+ "shape.sample(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
  "import itertools\n",
  "\n",
  "a = [50*i for i in range(3)]\n",
@@ -533,7 +552,9 @@
  "\n",
  "transformer = tree.transformer(instance_count=1,\n",
  " instance_type='ml.m4.xlarge',\n",
- " output_path=output_path)"
+ " output_path=output_path,\n",
+ " assemble_with='Line',\n",
+ " accept='text/csv')"
  ]
  },
  {
@@ -544,7 +565,8 @@
  "\n",
  "* The __data_location__ which is the location of input data\n",
  "* The __content_type__ which is the content type set when making HTTP request to container to get prediction\n",
- "* The __split_type__ which is the delimiter used for splitting input data "
+ "* The __split_type__ which is the delimiter used for splitting input data \n",
+ "* The __input_filter__ which indicates the first column (ID) of the input will be dropped before making HTTP request to container"
  ]
  },
  {
@@ -553,7 +575,7 @@
  "metadata": {},
  "outputs": [],
  "source": [
- "transformer.transform(data_location, content_type='text/csv', split_type='Line')\n",
+ "transformer.transform(data_location, content_type='text/csv', split_type='Line', input_filter='$[1:]')\n",
  "transformer.wait()"
  ]
  },