from sklearn import svm 
    from sklearn import neighbors 
    from sklearn import cross_validation
    from sklearn import naive_bayes
    from sklearn import linear_model
    from sklearn import datasets 
    from sklearn import tree
    from matplotlib.colors import ListedColormap
    
    
    
    def create_random_data(num_instances, average, deviation): 
    	return np.random.randn(num_instances,1) * deviation + average 
    
    def create_hoops_dataset(): 
    	# basketball heights and weigths
    	bheights = create_random_data(100, 190, 10)
    	bweights = create_random_data(100, 110, 8) 
    	# normal heights and weights 
    	nheights = create_random_data(100, 170, 10)
    	nweights = create_random_data(100, 80, 12)
    	return [bheights, bweights, nheights, nweights]
    
    
    def show_various_plots(bh, bw, nh, nw): 
    	figure()
    	subplot(2,2,1)
    	title('Heights')
    	ylabel('Height(cm)');
    	# plot(bh, linestyle='', marker='b', nh, 'go')
    	plot(bh, linestyle='-', marker='o', color='b')
    	plot(nh, linestyle='-', marker='o', color='g')
    	subplot(2,2,2)
    	title('Weights')
    	ylabel('Weight(gr)');
    	plot(bw, linestyle='-', marker='o', color='b')
    	plot(nw, linestyle='-', marker='o', color='g')
    	subplot(2,2,3)
    	xlabel('Height(cm)');
    	hist([bh,nh], 20)
    	subplot(2,2,4)
    	xlabel('Weight(gr)');
    	hist([bw,nw], 20)
    	
    def assemble_feature_matrix(bh, bw, nh, nw): 
    	# vector of heights for everyone 
    	h = np.vstack([bh, nh])
    	# vector of weights for everyone 
    	w = np.vstack([bw, nw])
    	# vector of class labels used for classes  
    	cl = np.array([0] * bh.shape[0] + [1] * nh.shape[0])
    	return np.hstack([w,h]),cl;
    	
    
    def plot_scatter(X,y):
    	# do a scatter plot
    	scatter(X[:, 0], X[:, 1], c=y, cmap=cm.Paired);	
    	
    
    def plot_scatter_with_classifiers(X,y): 
    	# Create color maps
    	cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    	cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])	
    
            classifier_dict = {'KNN1': neighbors.KNeighborsClassifier(1), 
                               'KNN3': neighbors.KNeighborsClassifier(3), 
                               'KNN5': neighbors.KNeighborsClassifier(5), 
                               'SVM': svm.SVC(kernel='linear', C=1), 
                               'NBG': naive_bayes.GaussianNB(),
                               'DTREE': tree.DecisionTreeClassifier(),
                               'PRCPTRON': linear_model.SGDClassifier(loss='perceptron', 
                                                                      eta0=1, 
                                                                      learning_rate='constant', 
                                                                      penalty=None)
            }
            
            for clf_name in sorted(classifier_dict): 
                    clf = classifier_dict[clf_name]
                    clf.fit(X,y)
                    # Plot the decision boundary. For that, we will assign a color to each
                    # point in the mesh [x_min, m_max]x[y_min, y_max].
                    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
                    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
                    hx = (x_max - x_min)/100.0
                    hy = (y_max - y_min)/100.0
                    xx, yy = np.meshgrid(np.arange(x_min, x_max, hx),
                                         np.arange(y_min, y_max, hy))
                    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
                    
                    # Put the result into a color plot
                    Z = Z.reshape(xx.shape)
                    figure()
                    title(clf_name)
                    pcolormesh(xx, yy, Z, cmap=cmap_light)
    	
                    # Plot also the training points
                    scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
                    xlim(xx.min(), xx.max())
                    ylim(yy.min(), yy.max())
                    show();
    
    
    
    [bh, bw, nh, nw] = create_hoops_dataset() 
    show_various_plots(bh,bw,nh,nw)
    X, y = assemble_feature_matrix(bh,bw,nh,nw)
    plot_scatter_with_classifiers(X,y)
    
    X,y = datasets.make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)
    plot_scatter_with_classifiers(X,y)
    
    
    
    X_train, X_test, y_train, y_test =  cross_validation.train_test_split(X,y,test_size=0.4, random_state=0)
    X_train.shape, y_train.shape      
    
    # train a svm classifier 
    clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
    
    # predict on test test 
    clf.score(X_test, y_test)
    
    # train a nearest neighbor classifier 
    clf = neighbors.KNeighborsClassifier(3).fit(X_train,y_train);
    clf.score(X_test, y_test)
    
    # Do 5 fold cross-validation and calculate the resulting score 
    scores = cross_validation.cross_val_score(clf, X, y, cv=5)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))