|
| 1 | +""" Utilities for processing Jupyter notebooks. """ |
| 2 | + |
| 3 | +import json |
| 4 | +import warnings |
| 5 | +from pathlib import Path |
| 6 | +from typing import Any |
| 7 | + |
| 8 | + |
| 9 | +def process_notebook(file: Path) -> str: |
| 10 | + """ |
| 11 | + Process a Jupyter notebook file and return an executable Python script as a string. |
| 12 | +
|
| 13 | + Parameters |
| 14 | + ---------- |
| 15 | + file : Path |
| 16 | + The path to the Jupyter notebook file. |
| 17 | +
|
| 18 | + Returns |
| 19 | + ------- |
| 20 | + str |
| 21 | + The executable Python script as a string. |
| 22 | +
|
| 23 | + Raises |
| 24 | + ------ |
| 25 | + ValueError |
| 26 | + If an unexpected cell type is encountered. |
| 27 | + """ |
| 28 | + with file.open(encoding="utf-8") as f: |
| 29 | + notebook: dict[str, Any] = json.load(f) |
| 30 | + |
| 31 | + # Check if the notebook contains worksheets |
| 32 | + if worksheets := notebook.get("worksheets"): |
| 33 | + # https://github.com/ipython/ipython/wiki/IPEP-17:-Notebook-Format-4#remove-multiple-worksheets |
| 34 | + # "The `worksheets` field is a list, but we have no UI to support multiple worksheets. |
| 35 | + # Our design has since shifted to heading-cell based structure, so we never intend to |
| 36 | + # support the multiple worksheet model. The worksheets list of lists shall be replaced |
| 37 | + # with a single list, called `cells`." |
| 38 | + warnings.warn("Worksheets are deprecated as of IPEP-17.", DeprecationWarning) |
| 39 | + |
| 40 | + if len(worksheets) > 1: |
| 41 | + warnings.warn( |
| 42 | + "Multiple worksheets are not supported. Only the first worksheet will be processed.", UserWarning |
| 43 | + ) |
| 44 | + |
| 45 | + notebook = worksheets[0] |
| 46 | + |
| 47 | + result = [] |
| 48 | + |
| 49 | + for cell in notebook["cells"]: |
| 50 | + cell_type = cell.get("cell_type") |
| 51 | + |
| 52 | + # Validate cell type and handle unexpected types |
| 53 | + if cell_type not in ("markdown", "code", "raw"): |
| 54 | + raise ValueError(f"Unknown cell type: {cell_type}") |
| 55 | + |
| 56 | + str_ = "".join(cell.get("source", [])) |
| 57 | + if not str_: |
| 58 | + continue |
| 59 | + |
| 60 | + # Convert Markdown and raw cells to multi-line comments |
| 61 | + if cell_type in ("markdown", "raw"): |
| 62 | + str_ = f'"""\n{str_}\n"""' |
| 63 | + |
| 64 | + result.append(str_) |
| 65 | + |
| 66 | + return "\n\n".join(result) |
0 commit comments