资源简介

Python数据科学手册源代码

资源截图

代码片段和文件信息


import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from ipywidgets import interact


def visualize_tree(estimator X y boundaries=True
                   xlim=None ylim=None ax=None):
    ax = ax or plt.gca()
    
    # Plot the training points
    ax.scatter(X[: 0] X[: 1] c=y s=30 cmap=‘viridis‘
               clim=(y.min() y.max()) zorder=3)
    ax.axis(‘tight‘)
    ax.axis(‘off‘)
    if xlim is None:
        xlim = ax.get_xlim()
    if ylim is None:
        ylim = ax.get_ylim()
    
    # fit the estimator
    estimator.fit(X y)
    xx yy = np.meshgrid(np.linspace(*xlim num=200)
                         np.linspace(*ylim num=200))
    Z = estimator.predict(np.c_[xx.ravel() yy.ravel()])

    # Put the result into a color plot
    n_classes = len(np.unique(y))
    Z = Z.reshape(xx.shape)
    contours = ax.contourf(xx yy Z alpha=0.3
                           levels=np.arange(n_classes + 1) - 0.5
                           cmap=‘viridis‘ clim=(y.min() y.max())
                           zorder=1)

    ax.set(xlim=xlim ylim=ylim)
    
    # Plot the decision boundaries
    def plot_boundaries(i xlim ylim):
        if i >= 0:
            tree = estimator.tree_
        
            if tree.feature[i] == 0:
                ax.plot([tree.threshold[i] tree.threshold[i]] ylim ‘-k‘ zorder=2)
                plot_boundaries(tree.children_left[i]
                                [xlim[0] tree.threshold[i]] ylim)
                plot_boundaries(tree.children_right[i]
                                [tree.threshold[i] xlim[1]] ylim)
        
            elif tree.feature[i] == 1:
                ax.plot(xlim [tree.threshold[i] tree.threshold[i]] ‘-k‘ zorder=2)
                plot_boundaries(tree.children_left[i] xlim
                                [ylim[0] tree.threshold[i]])
                plot_boundaries(tree.children_right[i] xlim
                                [tree.threshold[i] ylim[1]])
            
    if boundaries:
        plot_boundaries(0 xlim ylim)


def plot_tree_interactive(X y):
    def interactive_tree(depth=5):
        clf = DecisionTreeClassifier(max_depth=depth random_state=0)
        visualize_tree(clf X y)

    return interact(interactive_tree depth=[1 5])


def randomized_tree_interactive(X y):
    N = int(0.75 * X.shape[0])
    
    xlim = (X[: 0].min() X[: 0].max())
    ylim = (X[: 1].min() X[: 1].max())
    
    def fit_randomized_tree(random_state=0):
        clf = DecisionTreeClassifier(max_depth=15)
        i = np.arange(len(y))
        rng = np.random.RandomState(random_state)
        rng.shuffle(i)
        visualize_tree(clf X[i[:N]] y[i[:N]] boundaries=False
                       xlim=xlim ylim=ylim)
    
    interact(fit_randomized_tree random_state=[0 100]);

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2018-08-30 17:23  PythonDataScienceHandbook-master\
     文件        1240  2018-08-30 17:23  PythonDataScienceHandbook-master\.gitignore
     文件         259  2018-08-30 17:23  PythonDataScienceHandbook-master\.gitmodules
     文件        1083  2018-08-30 17:23  PythonDataScienceHandbook-master\LICENSE-CODE
     文件       18650  2018-08-30 17:23  PythonDataScienceHandbook-master\LICENSE-TEXT
     文件        3699  2018-08-30 17:23  PythonDataScienceHandbook-master\README.md
     文件         117  2018-08-30 17:23  PythonDataScienceHandbook-master\environment.yml
     目录           0  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\
     文件       13987  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\00.00-Preface.ipynb
     文件        8510  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.00-IPython-Beyond-Normal-Python.ipynb
     文件       15448  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.01-Help-And-Documentation.ipynb
     文件       10620  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.02-Shell-Keyboard-Shortcuts.ipynb
     文件        9937  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.03-Magic-Commands.ipynb
     文件        9131  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.04-Input-Output-History.ipynb
     文件       11580  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.05-IPython-And-Shell-Commands.ipynb
     文件       21251  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.06-Errors-and-Debugging.ipynb
     文件       18952  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.07-Timing-and-Profiling.ipynb
     文件        5629  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\01.08-More-IPython-Resources.ipynb
     文件        7586  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.00-Introduction-to-NumPy.ipynb
     文件       23921  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.01-Understanding-Data-Types.ipynb
     文件       33517  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.02-The-Basics-Of-NumPy-Arrays.ipynb
     文件       32076  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.03-Computation-on-arrays-ufuncs.ipynb
     文件       31322  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.04-Computation-on-arrays-aggregates.ipynb
     文件      102239  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.05-Computation-on-arrays-broadcasting.ipynb
     文件       41172  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.06-Boolean-Arrays-and-Masks.ipynb
     文件       63008  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.07-Fancy-Indexing.ipynb
     文件       62237  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.08-Sorting.ipynb
     文件       17253  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\02.09-Structured-Data-NumPy.ipynb
     文件        7073  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\03.00-Introduction-to-Pandas.ipynb
     文件       40120  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\03.01-Introducing-Pandas-objects.ipynb
     文件       41324  2018-08-30 17:23  PythonDataScienceHandbook-master\notebooks\03.02-Data-Indexing-and-Selection.ipynb
............此处省略135个文件信息

评论

共有 条评论