oracle · darenr · Jun 21, 2022 · Jun 21, 2022
@@ -1,3 +1,35 @@
+RELEASE_BRANCH := release/ads
+DOCS_RELEASE_BRANCH := release
+CLONE_DIR := /tmp/advanced-ds
+DOCS_CLONE_DIR := /tmp/ads-docs
+COPY_INVENTORY := setup.py CONTRIBUTING.md LICENSE.txt MANIFEST.in README-development.md README.md SECURITY.md THIRD_PARTY_LICENSES.txt
+
+prepare-release-branch: clean
+@git checkout master
+@git clean -xdf
+@git pull
+git checkout -b release/$(RELEASE_VERSION)
+
+prepare-ads:
+@echo "Started advanced-ds clone at $$(date)"
+@git clone ssh://git@bitbucket.oci.oraclecorp.com:7999/odsc/advanced-ds.git --branch $(RELEASE_BRANCH) --depth 1 $(CLONE_DIR)
+@echo "Finished cloning at $$(date)" 
+cp -r $(CLONE_DIR)/ads .
+$(foreach var,$(COPY_INVENTORY),cp $(CLONE_DIR)/$(var) .;)
+
+prepare-docs: 
+@echo "Started ads_docs clone at $$(date)"
+@git clone ssh://git@bitbucket.oci.oraclecorp.com:7999/odsc/ads_docs.git --branch $(DOCS_RELEASE_BRANCH) --depth 1 $(DOCS_CLONE_DIR)
+@echo "Finished cloning at $$(date)" 
+cp -r $(DOCS_CLONE_DIR)/source docs/ && cp $(DOCS_CLONE_DIR)/requirements.txt docs
+
+prepare: prepare-release-branch prepare-ads prepare-docs
+
+push: clean
+@bash -c 'if [[ $$(git branch | grep \*) == "* release/$(RELEASE_VERSION)" ]];then echo "Version matching current branch"; else echo "Set proper value to RELEASE_VERSION";exit 1 ; fi'
+@git add .
+@git commit -m "Release version: $(RELEASE_VERSION)"
+@git push --set-upstream origin release/$(RELEASE_VERSION)
 
 dist: clean
 @python3 setup.py sdist bdist_wheel
@@ -10,3 +42,5 @@ clean:
 @find ./ -name '*.pyc' -exec rm -f {} \;
 @find ./ -name 'Thumbs.db' -exec rm -f {} \;
 @find ./ -name '*~' -exec rm -f {} \;
+@rm -rf $(CLONE_DIR)
+@rm -rf $(DOCS_CLONE_DIR)
@@ -2,19 +2,19 @@
 
 [![PyPI](https://img.shields.io/pypi/v/oracle-ads.svg)](https://pypi.org/project/oracle-ads/)
 
-The [Oracle Accelerated Data Science (ADS) SDK](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html) is maintained by the [Oracle Cloud Infrastructure Data Science service](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm) team. It speeds up common data science activities by providing tools that automate and/or simplify common data science tasks, along with providing a data scientist friendly pythonic interface to Oracle Cloud Infrastructure (OCI) services, most notably OCI Data Science, Data Flow, Object storage, and the Autonomous Database. ADS gives you an interface to manage the lifecycle of machine learning models, from data acquisition to model evaluation, interpretation, and model deployment. 
+The [Oracle Accelerated Data Science (ADS) SDK](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html) is maintained by the Oracle Cloud Infrastructure (OCI) [Data Science service](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm) team. It speeds up common data science activities by providing tools that automate and simplify common data science tasks. Additionally, provides data scientists a friendly pythonic interface to OCI services. Some of the more notable services are OCI Data Science, Model Catalog, Model Deployment, Jobs, Data Flow, Object Storage, Vault, Big Data Service, Data Catalog, and the Autonomous Database. ADS gives you an interface to manage the life cycle of machine learning models, from data acquisition to model evaluation, interpretation, and model deployment.
 
 With ADS you can:
 
  - Read datasets from Oracle Object Storage, Oracle RDBMS (ATP/ADW/On-prem), AWS S3 and other sources into `Pandas dataframes`.
- - Easily compute summary statistics on your dataframes and perform data profiling. 
- - Tune models using hyperparameter optimization with the `ADSTuner` tool. 
- - Generate detailed evaluation reports of your model candidates with the `ADSEvaluator` module. 
+ - Use feature types to characterize your data, create meaning summary statistics and plot. Use the warning and validation system to test the quality of your data.
+ - Tune models using hyperparameter optimization with the `ADSTuner` tool.
+ - Generate detailed evaluation reports of your model candidates with the `ADSEvaluator` module.
  - Save machine learning models to the [OCI Data Science Model Catalog](https://docs.oracle.com/en-us/iaas/data-science/using/models-about.htm).
- - Deploy those models as HTTP endpoints with [Model Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm).
+ - Deploy models as HTTP endpoints with [Model Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm).
  - Launch distributed ETL, data processing, and model training jobs in Spark with [OCI Data Flow](https://docs.oracle.com/en-us/iaas/data-flow/using/home.htm).
- - Train machine learning models in OCI Data Science [Jobs](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm). 
- - Manage the lifecycle of conda environments through the `ads conda` command line interface (CLI). 
+ - Train machine learning models in OCI Data Science [Jobs](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm).
+ - Manage the life cycle of conda environments through the `ads conda` command line interface (CLI).
 
 ## Installation
 
@@ -28,44 +28,98 @@ You have various options when installing ADS.
 
 ### Installing extras libraries
 
-To use ADS within a [Notebook Session](https://docs.oracle.com/en-us/iaas/data-science/using/manage-notebook-sessions.htm) of the OCI Data Science service: 
+The `all-optional` module will install all optional dependencies.
 
 ```bash
- $ python3 -m pip install oracle-ads[notebook]
+ $ python3 -m pip install oracle-ads[all-optional]
 ```
 
-For machine learning tasks install
+To work with gradient boosting models, install the `boosted` module. This module includes XGBoost and LightGBM model classes.
 
 ```bash
  $ python3 -m pip install oracle-ads[boosted]
 ```
 
-To work on text related tasks run
+For big data use cases using Oracle Big Data Service (BDS), install the `bds` module. It includes the following libraries, `ibis-framework[impala]`, `hdfs[kerberos]` and `sqlalchemy`.
 
 ```bash
- $ python3 -m pip install oracle-ads[text]
+ $ python3 -m pip install oracle-ads[bds]
 ```
 
-For access to a broad set of data formats (for example, Excel, Avro, etc.) run
+To work with a broad set of data formats (for example, Excel, Avro, etc.) install the `data` module. It includes the `fastavro`, `openpyxl`, `pandavro`, `asteval`, `datefinder`, `htmllistparse`, and `sqlalchemy` libraries.
 
 ```bash
  $ python3 -m pip install oracle-ads[data]
 ```
 
+To work with geospatial data install the `geo` module. It includes the `geopandas` and libraries from the `viz` module.
+
+```bash
+ $ python3 -m pip install oracle-ads[geo]
+```
+
+Install the `notebook` module to use ADS within a OCI Data Science service [notebook session](https://docs.oracle.com/en-us/iaas/data-science/using/manage-notebook-sessions.htm). This module installs `ipywidgets` and `ipython` libraries.
+
+```bash
+ $ python3 -m pip install oracle-ads[notebook]
+```
+
+To work with ONNX-compatible run times and libraries designed to maximize performance and model portability, install the `onnx` module. It includes the following libraries, `onnx`, `onnxruntime`, `onnxmltools`, `skl2onnx`, `xgboost`, `lightgbm` and libraries from the `viz` module.
+
+```bash
+ $ python3 -m pip install oracle-ads[onnx]
+```
+
+For infrastructure tasks, install the `opctl` module. It includes the following libraries, `oci-cli`, `docker`, `conda-pack`, `nbconvert`, `nbformat`, and `inflection`.
+
+```bash
+ $ python3 -m pip install oracle-ads[opctl]
+```
+
+For hyperparameter optimization tasks install the `optuna` module. It includes the `optuna` and libraries from the `viz` module.
+
+```bash
+ $ python3 -m pip install oracle-ads[optuna]
+```
+
+Install the `tensorflow` module to include `tensorflow` and libraries from the `viz` module.
+
+```bash
+ $ python3 -m pip install oracle-ads[tensorflow]
+```
+
+For text related tasks, install the `text` module. This will include the `wordcloud`, `spacy` libraries.
+
+```bash
+ $ python3 -m pip install oracle-ads[text]
+```
+
+Install the `torch` module to include `pytorch` and libraries from the `viz` module.
+
+```bash
+ $ python3 -m pip install oracle-ads[torch]
+```
+
+Install the `viz` module to include libraries for visualization tasks. Some of the key packages are `bokeh`, `folium`, `seaborn` and related packages.
+
+```bash
+ $ python3 -m pip install oracle-ads[viz]
+```
+
 **Note**
 
 Multiple extra dependencies can be installed together. For example:
 
 ```bash
- $ python3 -m pip install oracle-ads[notebook,boosted,text]
+ $ python3 -m pip install oracle-ads[notebook,viz,text]
 ```
 
 ## Documentation
 
  - [Oracle Accelerated Data Science SDK (ADS) Documentation](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html)
- - [Oracle Cloud Infrastructure Data Science and AI services Examples](https://github.com/oracle/oci-data-science-ai-samples)
+ - [OCI Data Science and AI services Examples](https://github.com/oracle/oci-data-science-ai-samples)
  - [Oracle AI & Data Science Blog](https://blogs.oracle.com/ai-and-datascience/)
- - [Oracle Cloud Infrastructure Documentation](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm)
+ - [OCI Documentation](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm)
 
 ## Examples
 
@@ -75,48 +129,37 @@ Multiple extra dependencies can be installed together. For example:
  import ads
  from ads.common.auth import default_signer
  import oci
+ import pandas as pd
 
  ads.set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT")
- bucket_name = <bucket-name>
- file_name = <file-name>
+ bucket_name = <bucket_name>
+ key = <key>
  namespace = <namespace>
- df = pd.read_csv(f"oci://{bucket_name}@{namespace}/{file_name}", storage_options=default_signer())
+ df = pd.read_csv(f"oci://{bucket_name}@{namespace}/{key}", storage_options=default_signer())
 ```
 
-### Load data from ADB (simple)
+### Load data from ADB 
+
+This example uses SQL injection safe binding variables.
 
 ```python
+ import ads
+ import pandas as pd
+
  connection_parameters = {
- "user_name": "<username>",
+ "user_name": "<user_name>",
  "password": "<password>",
- "service_name": "<service_name_{high|med|low}>",
- "wallet_location": "/full/path/to/my_wallet.zip",
+ "service_name": "<tns_name>",
+ "wallet_location": "<file_path>",
  }
- import pandas as pd
- import ads
 
- # simple read of a SQL query into a dataframe with no bind variables
- df = pd.DataFrame.ads.read_sql(
- "SELECT * FROM SH.SALES",
- connection_parameters=connection_parameters,
- )
-```
-
-### Load data from ADB (using sql-injection-safe bind variables)
-
-```python
  df = pd.DataFrame.ads.read_sql(
  """
- SELECT
- *
- FROM
- SH.SALES
- WHERE
- ROWNUM <= :max_rows
+ SELECT *
+ FROM SH.SALES
+ WHERE ROWNUM <= :max_rows
  """,
- bind_variables={
- max_rows : 100
- },
+ bind_variables={ max_rows : 100 },
  connection_parameters=connection_parameters,
  )
 ```
@@ -129,8 +172,8 @@ Find Getting Started instructions for developers in [README-development.md](http
 
 ## Security
 
-Please consult the security guide [SECURITY.md](https://github.com/oracle/accelerated-data-science/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process.
+Consult the security guide [SECURITY.md](https://github.com/oracle/accelerated-data-science/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process.
 
 ## License
 
-Copyright (c) 2020, 2022 Oracle and/or its affiliates. Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+Copyright (c) 2020, 2022 Oracle and/or its affiliates. Licensed under the [Universal Permissive License v1.0](https://oss.oracle.com/licenses/upl/)
@@ -9,18 +9,18 @@
 import logging
 import sys
 
-import IPython
-from IPython import get_ipython
-from IPython.core.error import UsageError
+import oci
+
 import matplotlib.font_manager # causes matplotlib to regenerate its fonts
 import json
 
 import ocifs
-import oci
 from ads.common.decorator.deprecate import deprecated
+from ads.common.ipython import configure_plotting, _log_traceback
 from ads.feature_engineering.accessor.series_accessor import ADSSeriesAccessor
 from ads.feature_engineering.accessor.dataframe_accessor import ADSDataFrameAccessor
 
+
 os.environ["GIT_PYTHON_REFRESH"] = "quiet"
 
 __version__ = ""
@@ -35,9 +35,12 @@
 oci_key_profile = "DEFAULT"
 test_mode = os.environ.get("TEST_MODE", False)
 resource_principal_mode = bool(os.environ.get("RESOURCE_PRINCIPAL_MODE", False))
+orig_ipython_traceback = None
 
 
-def set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT"):
+def set_auth(
+ auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT"
+):
  """
  Enable/disable resource principal identity or keypair identity in a notebook session.
 
@@ -92,7 +95,11 @@ def set_debug_mode(mode=True):
  """
  global debug_mode
  debug_mode = mode
+ import IPython
+
  if debug_mode:
+ from ads.common.ipython import orig_ipython_traceback
+
  IPython.core.interactiveshell.InteractiveShell.showtraceback = (
  orig_ipython_traceback
  )
@@ -169,37 +176,4 @@ def hello():
  )
 
 
-def _log_traceback(self, exc_tuple=None, **kwargs):
- try:
- etype, value, tb = self._get_exc_info(exc_tuple)
- except ValueError:
- print("No traceback available to show.", file=sys.stderr)
- return
- msg = etype.__name__, str(value)
- logger.error("ADS Exception", exc_info=(etype, value, tb))
- sys.stderr.write("{0}: {1}".format(*msg))
-
-
-if IPython.core.interactiveshell.InteractiveShell.showtraceback != _log_traceback:
- orig_ipython_traceback = (
- IPython.core.interactiveshell.InteractiveShell.showtraceback
- )
-
-# Override the default showtraceback behavior of ipython, to show only the error message and log the stacktrace
-IPython.core.interactiveshell.InteractiveShell.showtraceback = _log_traceback
-
-ipy = get_ipython()
-if ipy is not None:
- try:
- # show matplotlib plots inline
- ipy.run_line_magic("matplotlib", "inline")
- except UsageError:
- # ignore error and use the default matplotlib mode
- pass
-else:
- import matplotlib as mpl
-
- mpl.rcParams["backend"] = "agg"
- import matplotlib.pyplot as plt
-
- plt.switch_backend("agg")
+configure_plotting()
@@ -1,3 +1,3 @@
 {
- "version": "2.6.1"
+ "version": "2.6.2"
 }