Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,35 @@
RELEASE_BRANCH := release/ads
DOCS_RELEASE_BRANCH := release
CLONE_DIR := /tmp/advanced-ds
DOCS_CLONE_DIR := /tmp/ads-docs
COPY_INVENTORY := setup.py CONTRIBUTING.md LICENSE.txt MANIFEST.in README-development.md README.md SECURITY.md THIRD_PARTY_LICENSES.txt

prepare-release-branch: clean
@git checkout master
@git clean -xdf
@git pull
git checkout -b release/$(RELEASE_VERSION)

prepare-ads:
@echo "Started advanced-ds clone at $$(date)"
@git clone ssh://git@bitbucket.oci.oraclecorp.com:7999/odsc/advanced-ds.git --branch $(RELEASE_BRANCH) --depth 1 $(CLONE_DIR)
@echo "Finished cloning at $$(date)"
cp -r $(CLONE_DIR)/ads .
$(foreach var,$(COPY_INVENTORY),cp $(CLONE_DIR)/$(var) .;)

prepare-docs:
@echo "Started ads_docs clone at $$(date)"
@git clone ssh://git@bitbucket.oci.oraclecorp.com:7999/odsc/ads_docs.git --branch $(DOCS_RELEASE_BRANCH) --depth 1 $(DOCS_CLONE_DIR)
@echo "Finished cloning at $$(date)"
cp -r $(DOCS_CLONE_DIR)/source docs/ && cp $(DOCS_CLONE_DIR)/requirements.txt docs

prepare: prepare-release-branch prepare-ads prepare-docs

push: clean
@bash -c 'if [[ $$(git branch | grep \*) == "* release/$(RELEASE_VERSION)" ]];then echo "Version matching current branch"; else echo "Set proper value to RELEASE_VERSION";exit 1 ; fi'
@git add .
@git commit -m "Release version: $(RELEASE_VERSION)"
@git push --set-upstream origin release/$(RELEASE_VERSION)

dist: clean
@python3 setup.py sdist bdist_wheel
Expand All @@ -10,3 +42,5 @@ clean:
@find ./ -name '*.pyc' -exec rm -f {} \;
@find ./ -name 'Thumbs.db' -exec rm -f {} \;
@find ./ -name '*~' -exec rm -f {} \;
@rm -rf $(CLONE_DIR)
@rm -rf $(DOCS_CLONE_DIR)
135 changes: 89 additions & 46 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@

[![PyPI](https://img.shields.io/pypi/v/oracle-ads.svg)](https://pypi.org/project/oracle-ads/)

The [Oracle Accelerated Data Science (ADS) SDK](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html) is maintained by the [Oracle Cloud Infrastructure Data Science service](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm) team. It speeds up common data science activities by providing tools that automate and/or simplify common data science tasks, along with providing a data scientist friendly pythonic interface to Oracle Cloud Infrastructure (OCI) services, most notably OCI Data Science, Data Flow, Object storage, and the Autonomous Database. ADS gives you an interface to manage the lifecycle of machine learning models, from data acquisition to model evaluation, interpretation, and model deployment.
The [Oracle Accelerated Data Science (ADS) SDK](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html) is maintained by the Oracle Cloud Infrastructure (OCI) [Data Science service](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm) team. It speeds up common data science activities by providing tools that automate and simplify common data science tasks. Additionally, provides data scientists a friendly pythonic interface to OCI services. Some of the more notable services are OCI Data Science, Model Catalog, Model Deployment, Jobs, Data Flow, Object Storage, Vault, Big Data Service, Data Catalog, and the Autonomous Database. ADS gives you an interface to manage the life cycle of machine learning models, from data acquisition to model evaluation, interpretation, and model deployment.

With ADS you can:

- Read datasets from Oracle Object Storage, Oracle RDBMS (ATP/ADW/On-prem), AWS S3 and other sources into `Pandas dataframes`.
- Easily compute summary statistics on your dataframes and perform data profiling.
- Tune models using hyperparameter optimization with the `ADSTuner` tool.
- Generate detailed evaluation reports of your model candidates with the `ADSEvaluator` module.
- Use feature types to characterize your data, create meaning summary statistics and plot. Use the warning and validation system to test the quality of your data.
- Tune models using hyperparameter optimization with the `ADSTuner` tool.
- Generate detailed evaluation reports of your model candidates with the `ADSEvaluator` module.
- Save machine learning models to the [OCI Data Science Model Catalog](https://docs.oracle.com/en-us/iaas/data-science/using/models-about.htm).
- Deploy those models as HTTP endpoints with [Model Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm).
- Deploy models as HTTP endpoints with [Model Deployment](https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-about.htm).
- Launch distributed ETL, data processing, and model training jobs in Spark with [OCI Data Flow](https://docs.oracle.com/en-us/iaas/data-flow/using/home.htm).
- Train machine learning models in OCI Data Science [Jobs](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm).
- Manage the lifecycle of conda environments through the `ads conda` command line interface (CLI).
- Train machine learning models in OCI Data Science [Jobs](https://docs.oracle.com/en-us/iaas/data-science/using/jobs-about.htm).
- Manage the life cycle of conda environments through the `ads conda` command line interface (CLI).

## Installation

Expand All @@ -28,44 +28,98 @@ You have various options when installing ADS.

### Installing extras libraries

To use ADS within a [Notebook Session](https://docs.oracle.com/en-us/iaas/data-science/using/manage-notebook-sessions.htm) of the OCI Data Science service:
The `all-optional` module will install all optional dependencies.

```bash
$ python3 -m pip install oracle-ads[notebook]
$ python3 -m pip install oracle-ads[all-optional]
```

For machine learning tasks install
To work with gradient boosting models, install the `boosted` module. This module includes XGBoost and LightGBM model classes.

```bash
$ python3 -m pip install oracle-ads[boosted]
```

To work on text related tasks run
For big data use cases using Oracle Big Data Service (BDS), install the `bds` module. It includes the following libraries, `ibis-framework[impala]`, `hdfs[kerberos]` and `sqlalchemy`.

```bash
$ python3 -m pip install oracle-ads[text]
$ python3 -m pip install oracle-ads[bds]
```

For access to a broad set of data formats (for example, Excel, Avro, etc.) run
To work with a broad set of data formats (for example, Excel, Avro, etc.) install the `data` module. It includes the `fastavro`, `openpyxl`, `pandavro`, `asteval`, `datefinder`, `htmllistparse`, and `sqlalchemy` libraries.

```bash
$ python3 -m pip install oracle-ads[data]
```

To work with geospatial data install the `geo` module. It includes the `geopandas` and libraries from the `viz` module.

```bash
$ python3 -m pip install oracle-ads[geo]
```

Install the `notebook` module to use ADS within a OCI Data Science service [notebook session](https://docs.oracle.com/en-us/iaas/data-science/using/manage-notebook-sessions.htm). This module installs `ipywidgets` and `ipython` libraries.

```bash
$ python3 -m pip install oracle-ads[notebook]
```

To work with ONNX-compatible run times and libraries designed to maximize performance and model portability, install the `onnx` module. It includes the following libraries, `onnx`, `onnxruntime`, `onnxmltools`, `skl2onnx`, `xgboost`, `lightgbm` and libraries from the `viz` module.

```bash
$ python3 -m pip install oracle-ads[onnx]
```

For infrastructure tasks, install the `opctl` module. It includes the following libraries, `oci-cli`, `docker`, `conda-pack`, `nbconvert`, `nbformat`, and `inflection`.

```bash
$ python3 -m pip install oracle-ads[opctl]
```

For hyperparameter optimization tasks install the `optuna` module. It includes the `optuna` and libraries from the `viz` module.

```bash
$ python3 -m pip install oracle-ads[optuna]
```

Install the `tensorflow` module to include `tensorflow` and libraries from the `viz` module.

```bash
$ python3 -m pip install oracle-ads[tensorflow]
```

For text related tasks, install the `text` module. This will include the `wordcloud`, `spacy` libraries.

```bash
$ python3 -m pip install oracle-ads[text]
```

Install the `torch` module to include `pytorch` and libraries from the `viz` module.

```bash
$ python3 -m pip install oracle-ads[torch]
```

Install the `viz` module to include libraries for visualization tasks. Some of the key packages are `bokeh`, `folium`, `seaborn` and related packages.

```bash
$ python3 -m pip install oracle-ads[viz]
```

**Note**

Multiple extra dependencies can be installed together. For example:

```bash
$ python3 -m pip install oracle-ads[notebook,boosted,text]
$ python3 -m pip install oracle-ads[notebook,viz,text]
```

## Documentation

- [Oracle Accelerated Data Science SDK (ADS) Documentation](https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/index.html)
- [Oracle Cloud Infrastructure Data Science and AI services Examples](https://github.com/oracle/oci-data-science-ai-samples)
- [OCI Data Science and AI services Examples](https://github.com/oracle/oci-data-science-ai-samples)
- [Oracle AI & Data Science Blog](https://blogs.oracle.com/ai-and-datascience/)
- [Oracle Cloud Infrastructure Documentation](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm)
- [OCI Documentation](https://docs.oracle.com/en-us/iaas/data-science/using/data-science.htm)

## Examples

Expand All @@ -75,48 +129,37 @@ Multiple extra dependencies can be installed together. For example:
import ads
from ads.common.auth import default_signer
import oci
import pandas as pd

ads.set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT")
bucket_name = <bucket-name>
file_name = <file-name>
bucket_name = <bucket_name>
key = <key>
namespace = <namespace>
df = pd.read_csv(f"oci://{bucket_name}@{namespace}/{file_name}", storage_options=default_signer())
df = pd.read_csv(f"oci://{bucket_name}@{namespace}/{key}", storage_options=default_signer())
```

### Load data from ADB (simple)
### Load data from ADB

This example uses SQL injection safe binding variables.

```python
import ads
import pandas as pd

connection_parameters = {
"user_name": "<username>",
"user_name": "<user_name>",
"password": "<password>",
"service_name": "<service_name_{high|med|low}>",
"wallet_location": "/full/path/to/my_wallet.zip",
"service_name": "<tns_name>",
"wallet_location": "<file_path>",
}
import pandas as pd
import ads

# simple read of a SQL query into a dataframe with no bind variables
df = pd.DataFrame.ads.read_sql(
"SELECT * FROM SH.SALES",
connection_parameters=connection_parameters,
)
```

### Load data from ADB (using sql-injection-safe bind variables)

```python
df = pd.DataFrame.ads.read_sql(
"""
SELECT
*
FROM
SH.SALES
WHERE
ROWNUM <= :max_rows
SELECT *
FROM SH.SALES
WHERE ROWNUM <= :max_rows
""",
bind_variables={
max_rows : 100
},
bind_variables={ max_rows : 100 },
connection_parameters=connection_parameters,
)
```
Expand All @@ -129,8 +172,8 @@ Find Getting Started instructions for developers in [README-development.md](http

## Security

Please consult the security guide [SECURITY.md](https://github.com/oracle/accelerated-data-science/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process.
Consult the security guide [SECURITY.md](https://github.com/oracle/accelerated-data-science/blob/main/SECURITY.md) for our responsible security vulnerability disclosure process.

## License

Copyright (c) 2020, 2022 Oracle and/or its affiliates. Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
Copyright (c) 2020, 2022 Oracle and/or its affiliates. Licensed under the [Universal Permissive License v1.0](https://oss.oracle.com/licenses/upl/)
52 changes: 13 additions & 39 deletions ads/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@
import logging
import sys

import IPython
from IPython import get_ipython
from IPython.core.error import UsageError
import oci

import matplotlib.font_manager # causes matplotlib to regenerate its fonts
import json

import ocifs
import oci
from ads.common.decorator.deprecate import deprecated
from ads.common.ipython import configure_plotting, _log_traceback
from ads.feature_engineering.accessor.series_accessor import ADSSeriesAccessor
from ads.feature_engineering.accessor.dataframe_accessor import ADSDataFrameAccessor


os.environ["GIT_PYTHON_REFRESH"] = "quiet"

__version__ = ""
Expand All @@ -35,9 +35,12 @@
oci_key_profile = "DEFAULT"
test_mode = os.environ.get("TEST_MODE", False)
resource_principal_mode = bool(os.environ.get("RESOURCE_PRINCIPAL_MODE", False))
orig_ipython_traceback = None


def set_auth(auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT"):
def set_auth(
auth="api_key", oci_config_location=oci.config.DEFAULT_LOCATION, profile="DEFAULT"
):
"""
Enable/disable resource principal identity or keypair identity in a notebook session.

Expand Down Expand Up @@ -92,7 +95,11 @@ def set_debug_mode(mode=True):
"""
global debug_mode
debug_mode = mode
import IPython

if debug_mode:
from ads.common.ipython import orig_ipython_traceback

IPython.core.interactiveshell.InteractiveShell.showtraceback = (
orig_ipython_traceback
)
Expand Down Expand Up @@ -169,37 +176,4 @@ def hello():
)


def _log_traceback(self, exc_tuple=None, **kwargs):
try:
etype, value, tb = self._get_exc_info(exc_tuple)
except ValueError:
print("No traceback available to show.", file=sys.stderr)
return
msg = etype.__name__, str(value)
logger.error("ADS Exception", exc_info=(etype, value, tb))
sys.stderr.write("{0}: {1}".format(*msg))


if IPython.core.interactiveshell.InteractiveShell.showtraceback != _log_traceback:
orig_ipython_traceback = (
IPython.core.interactiveshell.InteractiveShell.showtraceback
)

# Override the default showtraceback behavior of ipython, to show only the error message and log the stacktrace
IPython.core.interactiveshell.InteractiveShell.showtraceback = _log_traceback

ipy = get_ipython()
if ipy is not None:
try:
# show matplotlib plots inline
ipy.run_line_magic("matplotlib", "inline")
except UsageError:
# ignore error and use the default matplotlib mode
pass
else:
import matplotlib as mpl

mpl.rcParams["backend"] = "agg"
import matplotlib.pyplot as plt

plt.switch_backend("agg")
configure_plotting()
2 changes: 1 addition & 1 deletion ads/ads_version.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"version": "2.6.1"
"version": "2.6.2"
}
Loading