Source code for dataworkspaces.kits.jupyter

"""
Integration with Jupyter notebooks. This module provides a
:class:`~LineageBuilder` subclass to simplify Lineage for Notebooks.

It also provides a collection of IPython *magics* (macros) for working
in Jupyter notebooks.
"""
import os
import sys
import ipykernel # type: ignore
from IPython.core.getipython import get_ipython # type: ignore
from IPython.core.magic import (Magics, magics_class, line_magic) # type: ignore
from IPython.core.display import display # type: ignore
from IPython.display import IFrame, HTML, Markdown # type: ignore

import requests
import json
from urllib.parse import urljoin
import re
from os.path import join, basename, dirname, abspath, expanduser, curdir, exists
from notebook.notebookapp import list_running_servers # type: ignore
from typing import Optional, List, Any, Dict, Tuple, Callable, Sequence
assert Dict # keep pyflakes happy
import shlex
import argparse

from collections import namedtuple


from dataworkspaces.lineage import LineageBuilder
from dataworkspaces.workspace import _find_containing_workspace
from dataworkspaces.api import take_snapshot, get_snapshot_history,\
                               make_lineage_table, make_lineage_graph,\
                               get_results, get_resource_info
from dataworkspaces.errors import ConfigurationError


def _get_notebook_name(verbose=False) -> Optional[str]:
    """
    Return the full path of the jupyter notebook.
    See https://github.com/jupyter/notebook/issues/1000

    In some situations (e.g. running on the command line via nbconvert),
    the notebook name is not available. We return None in those cases.
    """
    # kernel_id = re.search('kernel-(.*).json',
    #                       ipykernel.connect.get_connection_file()).group(1)
    try:
        ipy = get_ipython()
        info = ipy.ev("DWS_JUPYTER_INFO")
        return info.notebook_path
    except Exception as e:
        if verbose:
            print("DWS Jupyter extension was not loaded: %s" % e)
    try:
        connection_file = ipykernel.connect.get_connection_file()
        mo=re.search('kernel-(.*).json', connection_file)
        if mo is not None:
            kernel_id = mo.group(1)
            servers = list_running_servers()
            for ss in servers:
                response = requests.get(urljoin(ss['url'], 'api/sessions'),
                                        params={'token': ss.get('token', '')})
                for nn in json.loads(response.text):
                    if nn['kernel']['id'] == kernel_id:
                        relative_path = nn['notebook']['path']
                        return join(ss['notebook_dir'], relative_path)
            print("Did not find a matching notebook server for %s" % connection_file)
            return None
    except Exception as e:
        if verbose:
            print("Unable to use notebook API to access session info: %s" % e)
    # all our atempts failed
    return None


def _remove_notebook_extn(notebook_name):
    if notebook_name.endswith('.ipynb'):
        return notebook_name[0:-6]
    elif notebook_name.endswith('.py'):
        return notebook_name[0:-3]
    else:
        return notebook_name

[docs]def get_step_name_for_notebook() -> Optional[str]: """ Get the step name for a notebook by getting the path and then extracting the base name. In some situations (e.g. running on the command line via nbconvert), the notebook name is not available. We return None in those cases. """ notebook_path = _get_notebook_name() if notebook_path is not None: return _remove_notebook_extn(basename(notebook_path)) else: return None
[docs]def is_notebook() -> bool: """Return true if this code is running in a notebook. """ try: # if running in ipython, get_ipython() will be in the global contect shell = get_ipython().__class__.__name__ # type: ignore if shell == 'ZMQInteractiveShell': return True # Jupyter notebook or qtconsole elif shell == 'TerminalInteractiveShell': return False # Terminal running IPython else: return False # Other type (?) except NameError: return False # Probably standard Python interpreter or a script
def get_notebook_directory(): notebook_path = _get_notebook_name() if notebook_path is not None: return dirname(notebook_path) else: return curdir
[docs]class NotebookLineageBuilder(LineageBuilder): """Notebooks are the final step in a pipeline (and potentially the only step). We customize the standard lineage builder to get the step name from the notebook's name and to always have a results directory. If you are not running this notebook in a server context (e.g. via nbconvert), the step name won't be available. In that case, you can explicitly pass in the step name to the constructor. """ def __init__(self, results_dir:str, step_name:Optional[str]=None, run_description:Optional[str]=None): super().__init__() if step_name is not None: self.step_name = step_name else: notebook_step_name = get_step_name_for_notebook() if notebook_step_name is None: raise ConfigurationError("Unable to infer the name of this notebook. "+ "Please either use the DWS notebook magic or pass the name in explicitly to the lineage builder.") self.step_name = notebook_step_name notebook_path = _get_notebook_name() if notebook_path is not None: self.code.append(notebook_path) else: # if we are not running in a server content, # use the current directory as the code resource path self.code.append(abspath(curdir)) self.results_dir = results_dir self.run_description = run_description
############################################################################ # Code for IPython magic extension # ############################################################################ DwsJupyterInfo = namedtuple('DwsJupyterInfo', ['notebook_name', 'notebook_path', 'workspace_dir', 'notebook_server_dir', 'error']) init_jscode=r"""%%javascript var dws_initialization_msg = "Ran DWS initialization. The following magic commands have been added to your notebook:\n- `%dws_info` - print information about your dws environment\n- `%dws_history` - print a history of snapshots in this workspace\n- `%dws_snapshot` - save and create a new snapshot\n- `%dws_lineage_table` - show a table of lineage for the workspace resources\n- `%dws_lineage_graph` - show a graph of lineage for a resource\n- `%dws_results` - show results from a run (results.json file)\n\nRun any command with the `--help` option to see a list\nof options for that command.\n\nThe variable `DWS_JUPYTER_NOTEBOOK` has been added to\nyour variables, for use in future DWS calls.\n\nIf you want to disable the DWS magic commands (e.g. when running in a batch context), set the variable `DWS_MAGIC_DISABLE` to `True` ahead of the `%load_ext` call."; if (typeof Jupyter == "undefined") { alert("Unable to initialize DWS magic. This version only works with Jupyter Notebooks, not nbconvert or JupyterLab."); throw "Unable to initialize DWS magic. This version only works with Jupyter Notebooks, not nbconvert or JupyterLab."; } else if (Jupyter.notebook.hasOwnProperty('kernel') && Jupyter.notebook.kernel!=null) { var DWSComm = Jupyter.notebook.kernel.comm_manager.new_comm('dws_comm_target', {}) DWSComm.on_msg(function(msg) { console.log("DWS Got msg status: " + msg.content.data.status); console.log("DWS msg type: " + msg.content.data.msg_type); if (msg.content.data.status != 'ok') { if (msg.content.data.hasOwnProperty('cell')) { var cell = Jupyter.notebook.get_cell(msg.content.data.cell-1); cell.output_area.append_output({'data':{'text/plain':msg.content.data.status}, 'metadata':{}, 'output_type':'display_data' }); } alert(msg.content.data.status); return; } if (msg.content.data.msg_type == "snapshot-result") { var cell = Jupyter.notebook.get_cell(msg.content.data.cell-1); cell.output_area.append_output({'data':{'text/plain':msg.content.data.message}, 'metadata':{}, 'output_type':'display_data' }); alert(msg.content.data.message); } else if (msg.content.data.msg_type == "init-ack") { var cell = Jupyter.notebook.get_cell(msg.content.data.cell-1); cell.output_area.append_output({'data':{'text/markdown':dws_initialization_msg}, 'metadata':{}, 'output_type':'display_data' }); //alert(dws_initialization_msg); } }); // Send data var cellno = Jupyter.notebook.find_cell_index(Jupyter.notebook.get_selected_cell()); DWSComm.send({'msg_type':'init', 'notebook_name': Jupyter.notebook.notebook_name, 'notebook_path': Jupyter.notebook.notebook_path, 'cell':cellno}); window.DWSComm = DWSComm; } else { // this happens when evaluating the javascript upon loading the notebook console.log("kernal was null"); } """ snapshot_jscode="""%%javascript Jupyter.notebook.save_notebook(); """ snapshot_jscode2="""%%javascript if (window.hasOwnProperty('DWSComm')) { window.DWSComm.send({'msg_type':'snapshot', 'cell':Jupyter.notebook.find_cell_index(Jupyter.notebook.get_selected_cell())}); console.log("sending snapshot"); } """ class DwsMagicError(ConfigurationError): pass class DwsMagicArgParseExit(Exception): """Throw this in our overriding the exit method""" pass class DwsMagicParseArgs(argparse.ArgumentParser): """Specialized version of the argument parser that can work in the context of ipython magic commands. Should never call sys.exit() and needs its own line parsing. """ def parse_magic_line(self, line): return self.parse_args(shlex.split(line)) def error(self, msg): raise DwsMagicError(msg) def exit(self, status=0, message=None): assert status==0, "Expecting a status of 0" raise DwsMagicArgParseExit() # Colormaps for heatmaps # These were generated using seaborn: # def tobyte(c): # return int(round(255*c)) # MINIMIZE_COLORMAP = ['rgb(%s,%s,%s)'%(tobyte(c[0]),tobyte(c[1]),tobyte(c[2])) # for c in seaborn.diverging_palette(150, 10, s=50, l=50, n=7)] # MAXIMIZE_COLORMAP = ['rgb(%s,%s,%s)'%(tobyte(c[0]),tobyte(c[1]),tobyte(c[2])) # for c in seaborn.diverging_palette(10, 150, s=50, l=50, n=7)] # The two maps are just the reverse of each other with maximize having greener colors toward # the high indexes and redder colors toward the low indexes, and minimize being the opposite. # By pre-generating the colormaps, we avoid a dependency on seaborn. MINIMIZE_COLORMAP=['rgb(84,128,107)', 'rgb(138,168,153)', 'rgb(193,210,201)', 'rgb(242,242,242)', 'rgb(232,190,192)', 'rgb(212,136,140)', 'rgb(193,84,89)'] MAXIMIZE_COLORMAP=['rgb(193,84,89)', 'rgb(212,136,140)', 'rgb(232,190,192)', 'rgb(242,242,242)', 'rgb(193,210,201)', 'rgb(138,168,153)', 'rgb(84,128,107)'] def _fmt_scalar(s): """Helper function to round metrics""" if not isinstance(s, float) and (not hasattr(s, 'dtype') or s.dtype!='f'): return s # non-floats left alone elif s >=1.0: return round(s, 1) elif s>=0.01: return round(s, 3) else: return s _BINS_TO_LABELS={ 1: [3], 2: [2,4], 3: [2,3,4], 4: [1,2,4,5], 5: [1,2,3,4,5], 6: [0,1,2,4,5,6], 7: [0,1,2,3,4,5,6] } # type: Dict[int,Sequence[int]] def _metric_col_to_colormap(col): """Given a metric column, return a series representing the heatmap indexes (0 through 6). Returns a series with the same number of elements as the column. """ import pandas as pd # type: ignore import numpy as np # type: ignore nunique = len(col.dropna().unique()) num_bins = min(nunique, 7) if num_bins<2: return col.apply(lambda v: -1 if pd.isna(v) else 3) elif num_bins==2: minval=col.min() return col.apply(lambda v: -1 if pd.isna(v) else (2 if v==minval else 4)) # qcut() may collapse bins, so we need to figure out how many bins it will # actually give us. num_actual_bins = len(pd.qcut(col, num_bins, duplicates='drop').dtype.categories) labels=_BINS_TO_LABELS[num_actual_bins] try: return pd.qcut(col, num_bins, labels=labels, duplicates='drop').astype(np.float32).fillna(-1.0).astype(np.int32) except Exception as e: print(e, file=sys.stderr) print("problem binning columns, unique=%s, num_actual_bins=%s, labels=%s"% (nunique, num_actual_bins, labels), file=sys.stderr) print("col: %s" % repr(col), file=sys.stderr) raise @magics_class class DwsMagics(Magics): def __init__(self, shell): super().__init__(shell) try: self.disabled = get_ipython().ev('DWS_MAGIC_DISABLE') except NameError: self.disabled = False if self.disabled: print("Loaded Data Workspaces magic commands in disabled state.", file=sys.stderr) return self._snapshot_args = None # type: Optional[argparse.Namespace] def target_func(comm, open_msg): self.comm = comm @comm.on_msg def _recv(msg): ipy = get_ipython() data = msg['content']['data'] msg_type = data['msg_type'] if msg_type=='init': # It looks like the notebook is always running with the cwd set to the notebook # However, the notebook path from the browser is relative to where the # notebook server was started #npath = data['notebook_path'] #if not isabs(npath): # npath = join(abspath(expanduser(curdir)), npath) abscwd = abspath(expanduser(os.getcwd())) npath = join(abscwd, data['notebook_name']) assert exists(npath), "Wrong calculation for absolute notebook path, got %s" % npath assert npath.endswith(data['notebook_path']), \ "Unexpacted notebook path from client, got %s, but absolute is %s" %\ (data['notebook_path'], npath) notebook_server_dir = npath[0:-(len(data['notebook_path'])+1)] notebook_dir=dirname(npath) workspace_dir = _find_containing_workspace(notebook_dir) error = None if workspace_dir is None: error = "Unable to find a containing workspace for note book at %s" % npath DWS_JUPYTER_INFO=DwsJupyterInfo(data['notebook_name'], npath, workspace_dir, notebook_server_dir, error) ipy.push({'DWS_JUPYTER_INFO': DWS_JUPYTER_INFO}) if error: comm.send({'status':error}) raise Exception(error) else: comm.send({'status':'ok', 'msg_type':'init-ack', 'cell':data['cell']}) self.dws_jupyter_info = DWS_JUPYTER_INFO elif msg_type=='snapshot': cell = data['cell'] try: assert self._snapshot_args is not None r = take_snapshot(self.dws_jupyter_info.workspace_dir, tag=self._snapshot_args.tag, message=self._snapshot_args.message) self._snapshot_args = None comm.send({'msg_type':'snapshot-result', 'status':'ok', 'message':'Successfully completed snapshot. Hash is %s'%r[0:8], 'cell':cell}) except Exception as e: comm.send({'msg_type':'snapshot-result', 'status':"Snapshot failed with error '%s'"% e, 'cell':cell}) raise else: raise Exception("Unknown message type %s" % msg_type) self.shell.kernel.comm_manager.register_target('dws_comm_target', target_func) self.shell.run_cell(init_jscode, store_history=False, silent=True) async def _call_snapshot(self): await self.shell.run_cell_async(snapshot_jscode) await self.shell.run_cell_async(snapshot_jscode2) @line_magic def dws_info(self, line): import pandas as pd # TODO: support case where pandas wasn't installed parser = DwsMagicParseArgs("dws_info", description="Print some information about this workspace") try: parser.parse_magic_line(line) except DwsMagicArgParseExit: return # user asked for help if self.disabled: display(Markdown("DWS magic commands are disabled. To enable, set `DWS_MAGIC_DISABLE` to `False` and restart kernel.")) return print("Notebook name: %s" % self.dws_jupyter_info.notebook_name) print("Notebook path: %s" % self.dws_jupyter_info.notebook_path) print("Workspace directory: %s" % self.dws_jupyter_info.workspace_dir) print("Notebook server dir: %s" % self.dws_jupyter_info.notebook_server_dir) if self.dws_jupyter_info.error is not None: print("Error message: %s" % self.dws_jupyter_info.error) return resources = get_resource_info(self.dws_jupyter_info.workspace_dir) df = pd.DataFrame({ 'Resource':[r.name for r in resources], 'Role':[r.role for r in resources], 'Type':[r.resource_type for r in resources], 'Local Path':[r.local_path for r in resources] }) with pd.option_context('display.max_colwidth', 80): display(df) @line_magic def dws_snapshot(self, line): parser = DwsMagicParseArgs("dws_snapshot", description="Save the notebook and create a new snapshot") parser.add_argument('-m', '--message', type=str, default=None, help="Message describing the snapshot") parser.add_argument('-t', '--tag', type=str, default=None, help="Tag for the snapshot. Note that a given tag can "+ "only be used once (without deleting the old one).") try: args = parser.parse_magic_line(line) except DwsMagicArgParseExit: return # user asked for help if self.disabled: display(Markdown("DWS magic commands are disabled. To enable, set `DWS_MAGIC_DISABLE` to `False` and restart kernel.")) return self._snapshot_args = args msg = "Initiating snapshot" if args.tag: msg += " with tag '%s'" % args.tag if args.message: msg += " with message '%s'" % args.message print(msg + '...') import tornado.ioloop tornado.ioloop.IOLoop.current().spawn_callback(self._call_snapshot) @line_magic def dws_history(self, line): parser = DwsMagicParseArgs("dws_history", description="Print a history of snapshots in this workspace") parser.add_argument('--max-count', type=int, default=None, help="Maximum number of snapshots to show") parser.add_argument('--tail', default=False, action='store_true', help="Just show the last 10 entries in reverse order") parser.add_argument('--baseline', default=None, type=str, help="Snapshot tag or hash to use as a basis for metrics comparison. "+ "Will color the fonts of values green or red, "+ "depending on whether they are better (worse) than "+ "the baseline.") parser.add_argument('--heatmap', default=False, action='store_true', help="Show a heatmap for metrics columns") parser.add_argument('--maximize-metrics', default=None, type=str, help="Metrics where larger values are better (e.g. accuracy)") parser.add_argument('--minimize-metrics', default=None, type=str, help="Metrics where smaller values are better (e.g. loss)") # TODO: future feature # parser.add_argument('--round-metrics', type=int, default=None, # help="If specified, round metrics to this many decimal places") try: args = parser.parse_magic_line(line) except DwsMagicArgParseExit: return # user asked for help if self.disabled: display(Markdown("DWS magic commands are disabled. To enable, set `DWS_MAGIC_DISABLE` to `False` and restart kernel.")) return import pandas as pd # TODO: support case where pandas wasn't installed import numpy as np # type: ignore if args.heatmap: if args.baseline is not None: print("Cannot specify both --baseline and --heatmap", file=sys.stderr) return if args.max_count and args.tail: max_count = args.max_count elif args.tail: max_count = 10 else: max_count = None history = get_snapshot_history(self.dws_jupyter_info.workspace_dir, max_count=max_count, reverse=args.tail) entries = [] index = [] columns = ['timestamp', 'hash', 'tags', 'message'] baseline_snapshot = None # type: Optional[int] # not every snapshot has the same metrics, so we build an inclusive list metrics = [] # type: List[str] for s in history: d = {'timestamp':s.timestamp[0:19], 'hash':s.hashval[0:8], 'tags':', '.join([tag for tag in s.tags]), 'message':s.message if s.message is not None else ''} if s.metrics is not None: for (m, v) in s.metrics.items(): d[m] = v if m not in columns: columns.append(m) metrics.append(m) entries.append(d) index.append(s.snapshot_number) if (args.baseline is not None): if args.baseline in s.tags: baseline_snapshot = s.snapshot_number elif s.hashval[0:min(len(args.baseline),8)]==args.baseline[0:8]: baseline_snapshot = s.snapshot_number if (args.baseline is not None) and (baseline_snapshot is None): print("Did not find a tag or hash corresponding to baseline '%s'" % args.baseline, file=sys.stderr) return history_df = pd.DataFrame(entries, index=index, columns=columns) maximize_metrics = set(['accuracy', 'precision', 'recall']) if args.maximize_metrics: maximize_metrics = maximize_metrics.union(set(args.maximize_metrics.split(','))) minimize_metrics = set(['loss']) if args.minimize_metrics: minimize_metrics = minimize_metrics.union(set(args.minimize_metrics.split(','))) def truncate(v, l=30): s = repr(v) return s if len(s)<=(l-3) else s[0:l-3]+'...' def cleanup_dict_or_string_metric(val): if isinstance(val, dict) or isinstance(val, str): return truncate(val) else: return val element_styling_fns = [] # type: List[Tuple[str, Callable[[Any], None]]] if args.heatmap: heatmap_maximize_cols = [] # type: List[str] heatmap_minimize_cols = [] # type: List[str] color_templ="border: 1px solid darkgrey; background-color: %s; color: %s" # TODO: split this out to a separate function def color_max_metric_col(col): bins = _metric_col_to_colormap(col) return bins.apply(lambda b: color_templ%(MAXIMIZE_COLORMAP[b], 'white' if b<2 or b>4 else 'black') if b!=-1 else color_templ%('lightgrey', 'black')) def color_min_metric_col(col): bins = _metric_col_to_colormap(col) return bins.apply(lambda b: color_templ%(MINIMIZE_COLORMAP[b], 'white' if b<2 or b>4 else 'black') if b!=-1 else color_templ%('lightgrey', 'black')) class BaselineElementStyle: def __init__(self, metric:str, baseline, maximize:bool): self.metric=metric self.baseline=baseline self.baseline_round = abs(self.baseline*0.005) self.maximize=maximize def __call__(self, val): # if a value is within 0.5% of the baseline, we consider it baseline if pd.isna(val): return 'color: grey' elif val>(self.baseline+self.baseline_round): return 'color: green' if self.maximize else 'color: red' elif val<(self.baseline-self.baseline_round): return 'color: red' if self.maximize else 'color: green' else: # within baseline rounding return 'color: black; font-weight: bold' for metric in metrics: if history_df[metric].dtype.kind in ('f', 'i'): # float or int if baseline_snapshot is not None: baseline_val = history_df.loc[baseline_snapshot][metric] if metric in maximize_metrics: element_styling_fns.append((metric, BaselineElementStyle(metric, baseline_val, maximize=True)),) elif metric in minimize_metrics: element_styling_fns.append((metric, BaselineElementStyle(metric, baseline_val, maximize=False)),) elif args.heatmap: if metric in maximize_metrics: heatmap_maximize_cols.append(metric) elif metric in minimize_metrics: heatmap_minimize_cols.append(metric) elif history_df[metric].dtype==np.dtype('object'): history_df[metric] = history_df[metric].apply(cleanup_dict_or_string_metric) result = history_df def get_style(df_or_style): return df_or_style.style if isinstance(df_or_style, pd.DataFrame) else df_or_style for (metric, styling_fn) in element_styling_fns: result = get_style(result).applymap(styling_fn, subset=[metric]) if args.heatmap: result = get_style(result).apply(color_max_metric_col, subset=heatmap_maximize_cols) result = get_style(result).apply(color_min_metric_col, subset=heatmap_minimize_cols) return result @line_magic def dws_lineage_table(self, line): import pandas as pd # TODO: support case where pandas wasn't installed parser = DwsMagicParseArgs("dws_lineage_table", description="Show a table of lineage for the workspace's resources") parser.add_argument('--snapshot', default=None, type=str, help="If specified, print lineage as of the specified snapshot hash or tag") try: args = parser.parse_magic_line(line) except DwsMagicArgParseExit: return # user asked for help if self.disabled: display(Markdown("DWS magic commands are disabled. To enable, set `DWS_MAGIC_DISABLE` to `False` and restart kernel.")) return rows = [r for r in make_lineage_table(self.dws_jupyter_info.workspace_dir, args.snapshot)] return pd.DataFrame(rows, columns=['Resource', 'Lineage Type', 'Details', 'Inputs']).set_index('Resource') @line_magic def dws_lineage_graph(self, line): parser = DwsMagicParseArgs("dws_lineage_table", description="Show a graph of lineage for a resource") parser.add_argument('--resource', default=None, type=str, help="Graph lineage from this resource. Defaults to the results resource. Error if not specified and there is more than one.") parser.add_argument('--snapshot', default=None, type=str, help="If specified, graph lineage as of the specified snapshot hash or tag") try: args = parser.parse_magic_line(line) except DwsMagicArgParseExit: return # user asked for help if self.disabled: display(Markdown("DWS magic commands are disabled. To enable, set `DWS_MAGIC_DISABLE` to `False` and restart kernel.")) return output_file = join(dirname(self.dws_jupyter_info.notebook_path), 'lineage_'+_remove_notebook_extn(self.dws_jupyter_info.notebook_name)+'.html') make_lineage_graph(output_file, self.dws_jupyter_info.workspace_dir, resource_name=args.resource, tag_or_hash=args.snapshot, width=780, height=380) return display(IFrame(basename(output_file), width=800, height=400)) @line_magic def dws_results(self, line): parser = DwsMagicParseArgs("dws_results", description="Show results from a run (results.json file)") parser.add_argument('--resource', default=None, type=str, help="Look for the results.json file in this resource. Otherwise, will look in all results resources and return the first match.") parser.add_argument('--snapshot', default=None, type=str, help="If specified, get results as of the specified snapshot or tag. Otherwise, looks at current workspace and then most recent snapshot.") try: args = parser.parse_magic_line(line) except DwsMagicArgParseExit: return # user asked for help if self.disabled: display(Markdown("DWS magic commands are disabled. To enable, set `DWS_MAGIC_DISABLE` to `False` and restart kernel.")) return rtn = get_results(self.dws_jupyter_info.workspace_dir, tag_or_hash=args.snapshot, resource_name=args.resource) if rtn is None: print("Did not find a results.json file.", file=sys.stderr) return (results, rpath) = rtn import pandas as pd html_list = ['<h3>%s</h3>' % rpath] def truncate_dict(d, maxlen=50, roundme=False): d2 = {} for (k, v) in d.items(): if roundme: d2[k] = _fmt_scalar(v) else: d2[k] = v s = repr(d2) if len(s)>maxlen: return s[0:(maxlen-3)]+'...' else: return s def subdict_to_df(d, parent_name, name, roundme=False): keys=[] values = [] for (k, v) in d.items(): if not isinstance(v, dict): keys.append(k) if roundme: values.append(_fmt_scalar(v)) else: values.append(v) else: keys.append(k) values.append(truncate_dict(v, roundme=roundme)) df = pd.DataFrame({'Property':keys, 'Value':values}).set_index('Property') html_list.append("<h5>%s: %s</h5>"% (parent_name, name)) html_list.append(df.to_html()) def dict_to_df(d, name, roundme=False): keys=[] values = [] subdicts = [] for (k, v) in d.items(): if not isinstance(v, dict): keys.append(k) if roundme: values.append(_fmt_scalar(v)) else: values.append(v) elif k not in ('parameters', 'metrics'): subdicts.append((k, v)) df = pd.DataFrame({'Property':keys, 'Value':values}).set_index('Property') html_list.append("<h4>%s</h4>"% name) html_list.append(df.to_html()) for (k, v) in subdicts: subdict_to_df(v, name, k, roundme=roundme) dict_to_df(results, 'General Properties') if 'parameters' in results: dict_to_df(results['parameters'], 'Parameters') if 'metrics' in results: dict_to_df(results['metrics'], 'Metrics', roundme=True) return HTML('\n'.join(html_list)) def load_ipython_extension(ipython): ipython.register_magics(DwsMagics)