This repository was archived by the owner on Jan 25, 2023. It is now read-only.
forked from scikit-learn/scikit-learn
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_ard.py
More file actions
82 lines (68 loc) · 2.53 KB
/
plot_ard.py
File metadata and controls
82 lines (68 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""
==================================================
Automatic Relevance Determination Regression (ARD)
==================================================
Fit regression model with Bayesian Ridge Regression.
See :ref:`bayesian_ridge_regression` for more information on the regressor.
Compared to the OLS (ordinary least squares) estimator, the coefficient
weights are slightly shifted toward zeros, which stabilises them.
The histogram of the estimated weights is very peaked, as a sparsity-inducing
prior is implied on the weights.
The estimation of the model is done by iteratively maximizing the
marginal log-likelihood of the observations.
"""
print(__doc__)
import numpy as np
import pylab as pl
from scipy import stats
from sklearn.linear_model import ARDRegression, LinearRegression
###############################################################################
# Generating simulated data with Gaussian weigthts
# Parameters of the example
np.random.seed(0)
n_samples, n_features = 100, 100
# Create gaussian data
X = np.random.randn(n_samples, n_features)
# Create weigts with a precision lambda_ of 4.
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
relevant_features = np.random.randint(0, n_features, 10)
for i in relevant_features:
w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_))
# Create noite with a precision alpha of 50.
alpha_ = 50.
noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples)
# Create the target
y = np.dot(X, w) + noise
###############################################################################
# Fit the ARD Regression
clf = ARDRegression(compute_score=True)
clf.fit(X, y)
ols = LinearRegression()
ols.fit(X, y)
###############################################################################
# Plot the true weights, the estimated weights and the histogram of the
# weights
pl.figure(figsize=(6, 5))
pl.title("Weights of the model")
pl.plot(clf.coef_, 'b-', label="ARD estimate")
pl.plot(ols.coef_, 'r--', label="OLS estimate")
pl.plot(w, 'g-', label="Ground truth")
pl.xlabel("Features")
pl.ylabel("Values of the weights")
pl.legend(loc=1)
pl.figure(figsize=(6, 5))
pl.title("Histogram of the weights")
pl.hist(clf.coef_, bins=n_features, log=True)
pl.plot(clf.coef_[relevant_features], 5 * np.ones(len(relevant_features)),
'ro', label="Relevant features")
pl.ylabel("Features")
pl.xlabel("Values of the weights")
pl.legend(loc=1)
pl.figure(figsize=(6, 5))
pl.title("Marginal log-likelihood")
pl.plot(clf.scores_)
pl.ylabel("Score")
pl.xlabel("Iterations")
pl.show()