import pandas as pd
pd.set_option('display.max_rows', 500)
# 定義したfrequency_tableをimport
from frequency_table import frequency_table
サンプルデータ: data を作成します。
from sklearn.datasets import load_boston
boston = load_boston()
data = pd.DataFrame(
boston['data'],
columns=boston['feature_names'],
)
data['y'] = boston['target']
data.head(10)
CRIM | ZN | INDUS | CHAS | NOX | RM | AGE | DIS | RAD | TAX | PTRATIO | B | LSTAT | y | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.00632 | 18.0 | 2.31 | 0.0 | 0.538 | 6.575 | 65.2 | 4.0900 | 1.0 | 296.0 | 15.3 | 396.90 | 4.98 | 24.0 |
1 | 0.02731 | 0.0 | 7.07 | 0.0 | 0.469 | 6.421 | 78.9 | 4.9671 | 2.0 | 242.0 | 17.8 | 396.90 | 9.14 | 21.6 |
2 | 0.02729 | 0.0 | 7.07 | 0.0 | 0.469 | 7.185 | 61.1 | 4.9671 | 2.0 | 242.0 | 17.8 | 392.83 | 4.03 | 34.7 |
3 | 0.03237 | 0.0 | 2.18 | 0.0 | 0.458 | 6.998 | 45.8 | 6.0622 | 3.0 | 222.0 | 18.7 | 394.63 | 2.94 | 33.4 |
4 | 0.06905 | 0.0 | 2.18 | 0.0 | 0.458 | 7.147 | 54.2 | 6.0622 | 3.0 | 222.0 | 18.7 | 396.90 | 5.33 | 36.2 |
5 | 0.02985 | 0.0 | 2.18 | 0.0 | 0.458 | 6.430 | 58.7 | 6.0622 | 3.0 | 222.0 | 18.7 | 394.12 | 5.21 | 28.7 |
6 | 0.08829 | 12.5 | 7.87 | 0.0 | 0.524 | 6.012 | 66.6 | 5.5605 | 5.0 | 311.0 | 15.2 | 395.60 | 12.43 | 22.9 |
7 | 0.14455 | 12.5 | 7.87 | 0.0 | 0.524 | 6.172 | 96.1 | 5.9505 | 5.0 | 311.0 | 15.2 | 396.90 | 19.15 | 27.1 |
8 | 0.21124 | 12.5 | 7.87 | 0.0 | 0.524 | 5.631 | 100.0 | 6.0821 | 5.0 | 311.0 | 15.2 | 386.63 | 29.93 | 16.5 |
9 | 0.17004 | 12.5 | 7.87 | 0.0 | 0.524 | 6.004 | 85.9 | 6.5921 | 5.0 | 311.0 | 15.2 | 386.71 | 17.10 | 18.9 |
定義したfrequency_tableで度数分布表を作成します。
freq_table = frequency_table(data)
freq_table.groupby("Variable").head()
Variable | Value | Count | Proportion | |
---|---|---|---|---|
0 | CRIM | (-0.08360000000000001, 8.903] | 439.0 | 0.867589 |
1 | CRIM | (8.903, 17.8] | 44.0 | 0.086957 |
2 | CRIM | (17.8, 26.697] | 14.0 | 0.027668 |
3 | CRIM | (35.594, 44.491] | 3.0 | 0.005929 |
4 | CRIM | (44.491, 53.388] | 2.0 | 0.003953 |
10 | ZN | (-0.101, 10.0] | 372.0 | 0.735178 |
11 | ZN | (20.0, 30.0] | 33.0 | 0.065217 |
12 | ZN | (10.0, 20.0] | 33.0 | 0.065217 |
13 | ZN | (70.0, 80.0] | 18.0 | 0.035573 |
14 | ZN | (30.0, 40.0] | 17.0 | 0.033597 |
20 | INDUS | (16.828, 19.556] | 132.0 | 0.260870 |
21 | INDUS | (5.916, 8.644] | 99.0 | 0.195652 |
22 | INDUS | (3.188, 5.916] | 95.0 | 0.187747 |
23 | INDUS | (0.432, 3.188] | 56.0 | 0.110672 |
24 | INDUS | (19.556, 22.284] | 45.0 | 0.088933 |
30 | CHAS | (-0.002, 0.1] | 471.0 | 0.930830 |
31 | CHAS | (0.9, 1.0] | 35.0 | 0.069170 |
32 | CHAS | (0.8, 0.9] | 0.0 | 0.000000 |
33 | CHAS | (0.7, 0.8] | 0.0 | 0.000000 |
34 | CHAS | (0.6, 0.7] | 0.0 | 0.000000 |
40 | NOX | (0.482, 0.531] | 89.0 | 0.175889 |
41 | NOX | (0.434, 0.482] | 80.0 | 0.158103 |
42 | NOX | (0.579, 0.628] | 78.0 | 0.154150 |
43 | NOX | (0.384, 0.434] | 75.0 | 0.148221 |
44 | NOX | (0.531, 0.579] | 60.0 | 0.118577 |
50 | RM | (5.649, 6.17] | 177.0 | 0.349802 |
51 | RM | (6.17, 6.692] | 151.0 | 0.298419 |
52 | RM | (6.692, 7.214] | 69.0 | 0.136364 |
53 | RM | (5.127, 5.649] | 45.0 | 0.088933 |
54 | RM | (7.214, 7.736] | 22.0 | 0.043478 |
60 | AGE | (90.29, 100.0] | 168.0 | 0.332016 |
61 | AGE | (80.58, 90.29] | 71.0 | 0.140316 |
62 | AGE | (70.87, 80.58] | 42.0 | 0.083004 |
63 | AGE | (32.03, 41.74] | 42.0 | 0.083004 |
64 | AGE | (61.16, 70.87] | 39.0 | 0.077075 |
70 | DIS | (1.118, 2.229] | 150.0 | 0.296443 |
71 | DIS | (2.229, 3.329] | 111.0 | 0.219368 |
72 | DIS | (3.329, 4.429] | 81.0 | 0.160079 |
73 | DIS | (4.429, 5.528] | 61.0 | 0.120553 |
74 | DIS | (5.528, 6.628] | 46.0 | 0.090909 |
80 | RAD | (3.3, 5.6] | 225.0 | 0.444664 |
81 | RAD | (21.7, 24.0] | 132.0 | 0.260870 |
82 | RAD | (0.976, 3.3] | 82.0 | 0.162055 |
83 | RAD | (5.6, 7.9] | 43.0 | 0.084980 |
84 | RAD | (7.9, 10.2] | 24.0 | 0.047431 |
90 | TAX | (658.6, 711.0] | 137.0 | 0.270751 |
91 | TAX | (291.8, 344.2] | 108.0 | 0.213439 |
92 | TAX | (239.4, 291.8] | 100.0 | 0.197628 |
93 | TAX | (396.6, 449.0] | 74.0 | 0.146245 |
94 | TAX | (186.475, 239.4] | 54.0 | 0.106719 |
100 | PTRATIO | (20.12, 21.06] | 178.0 | 0.351779 |
101 | PTRATIO | (18.24, 19.18] | 76.0 | 0.150198 |
102 | PTRATIO | (17.3, 18.24] | 68.0 | 0.134387 |
103 | PTRATIO | (14.48, 15.42] | 58.0 | 0.114625 |
104 | PTRATIO | (19.18, 20.12] | 40.0 | 0.079051 |
110 | B | (357.242, 396.9] | 413.0 | 0.816206 |
111 | B | (317.584, 357.242] | 33.0 | 0.065217 |
112 | B | (-0.0776, 39.978] | 18.0 | 0.035573 |
113 | B | (277.926, 317.584] | 11.0 | 0.021739 |
114 | B | (79.636, 119.294] | 10.0 | 0.019763 |
120 | LSTAT | (5.354, 8.978] | 108.0 | 0.213439 |
121 | LSTAT | (8.978, 12.602] | 99.0 | 0.195652 |
122 | LSTAT | (12.602, 16.226] | 84.0 | 0.166008 |
123 | LSTAT | (1.693, 5.354] | 75.0 | 0.148221 |
124 | LSTAT | (16.226, 19.85] | 64.0 | 0.126482 |
130 | y | (18.5, 23.0] | 154.0 | 0.304348 |
131 | y | (14.0, 18.5] | 85.0 | 0.167984 |
132 | y | (23.0, 27.5] | 84.0 | 0.166008 |
133 | y | (9.5, 14.0] | 55.0 | 0.108696 |
134 | y | (27.5, 32.0] | 39.0 | 0.077075 |
freq_table
Variable | Value | Count | Proportion | |
---|---|---|---|---|
0 | CRIM | (-0.08360000000000001, 8.903] | 439.0 | 0.867589 |
1 | CRIM | (8.903, 17.8] | 44.0 | 0.086957 |
2 | CRIM | (17.8, 26.697] | 14.0 | 0.027668 |
3 | CRIM | (35.594, 44.491] | 3.0 | 0.005929 |
4 | CRIM | (44.491, 53.388] | 2.0 | 0.003953 |
5 | CRIM | (80.079, 88.976] | 1.0 | 0.001976 |
6 | CRIM | (71.182, 80.079] | 1.0 | 0.001976 |
7 | CRIM | (62.285, 71.182] | 1.0 | 0.001976 |
8 | CRIM | (26.697, 35.594] | 1.0 | 0.001976 |
9 | CRIM | (53.388, 62.285] | 0.0 | 0.000000 |
10 | ZN | (-0.101, 10.0] | 372.0 | 0.735178 |
11 | ZN | (20.0, 30.0] | 33.0 | 0.065217 |
12 | ZN | (10.0, 20.0] | 33.0 | 0.065217 |
13 | ZN | (70.0, 80.0] | 18.0 | 0.035573 |
14 | ZN | (30.0, 40.0] | 17.0 | 0.033597 |
15 | ZN | (50.0, 60.0] | 10.0 | 0.019763 |
16 | ZN | (80.0, 90.0] | 9.0 | 0.017787 |
17 | ZN | (40.0, 50.0] | 6.0 | 0.011858 |
18 | ZN | (90.0, 100.0] | 5.0 | 0.009881 |
19 | ZN | (60.0, 70.0] | 3.0 | 0.005929 |
20 | INDUS | (16.828, 19.556] | 132.0 | 0.260870 |
21 | INDUS | (5.916, 8.644] | 99.0 | 0.195652 |
22 | INDUS | (3.188, 5.916] | 95.0 | 0.187747 |
23 | INDUS | (0.432, 3.188] | 56.0 | 0.110672 |
24 | INDUS | (19.556, 22.284] | 45.0 | 0.088933 |
25 | INDUS | (8.644, 11.372] | 44.0 | 0.086957 |
26 | INDUS | (11.372, 14.1] | 20.0 | 0.039526 |
27 | INDUS | (25.012, 27.74] | 12.0 | 0.023715 |
28 | INDUS | (14.1, 16.828] | 3.0 | 0.005929 |
29 | INDUS | (22.284, 25.012] | 0.0 | 0.000000 |
30 | CHAS | (-0.002, 0.1] | 471.0 | 0.930830 |
31 | CHAS | (0.9, 1.0] | 35.0 | 0.069170 |
32 | CHAS | (0.8, 0.9] | 0.0 | 0.000000 |
33 | CHAS | (0.7, 0.8] | 0.0 | 0.000000 |
34 | CHAS | (0.6, 0.7] | 0.0 | 0.000000 |
35 | CHAS | (0.5, 0.6] | 0.0 | 0.000000 |
36 | CHAS | (0.4, 0.5] | 0.0 | 0.000000 |
37 | CHAS | (0.3, 0.4] | 0.0 | 0.000000 |
38 | CHAS | (0.2, 0.3] | 0.0 | 0.000000 |
39 | CHAS | (0.1, 0.2] | 0.0 | 0.000000 |
40 | NOX | (0.482, 0.531] | 89.0 | 0.175889 |
41 | NOX | (0.434, 0.482] | 80.0 | 0.158103 |
42 | NOX | (0.579, 0.628] | 78.0 | 0.154150 |
43 | NOX | (0.384, 0.434] | 75.0 | 0.148221 |
44 | NOX | (0.531, 0.579] | 60.0 | 0.118577 |
45 | NOX | (0.677, 0.725] | 57.0 | 0.112648 |
46 | NOX | (0.628, 0.677] | 30.0 | 0.059289 |
47 | NOX | (0.725, 0.774] | 21.0 | 0.041502 |
48 | NOX | (0.822, 0.871] | 16.0 | 0.031621 |
49 | NOX | (0.774, 0.822] | 0.0 | 0.000000 |
50 | RM | (5.649, 6.17] | 177.0 | 0.349802 |
51 | RM | (6.17, 6.692] | 151.0 | 0.298419 |
52 | RM | (6.692, 7.214] | 69.0 | 0.136364 |
53 | RM | (5.127, 5.649] | 45.0 | 0.088933 |
54 | RM | (7.214, 7.736] | 22.0 | 0.043478 |
55 | RM | (4.605, 5.127] | 14.0 | 0.027668 |
56 | RM | (7.736, 8.258] | 13.0 | 0.025692 |
57 | RM | (8.258, 8.78] | 9.0 | 0.017787 |
58 | RM | (4.083, 4.605] | 4.0 | 0.007905 |
59 | RM | (3.555, 4.083] | 2.0 | 0.003953 |
60 | AGE | (90.29, 100.0] | 168.0 | 0.332016 |
61 | AGE | (80.58, 90.29] | 71.0 | 0.140316 |
62 | AGE | (70.87, 80.58] | 42.0 | 0.083004 |
63 | AGE | (32.03, 41.74] | 42.0 | 0.083004 |
64 | AGE | (61.16, 70.87] | 39.0 | 0.077075 |
65 | AGE | (51.45, 61.16] | 38.0 | 0.075099 |
66 | AGE | (41.74, 51.45] | 32.0 | 0.063241 |
67 | AGE | (12.61, 22.32] | 31.0 | 0.061265 |
68 | AGE | (22.32, 32.03] | 29.0 | 0.057312 |
69 | AGE | (2.802, 12.61] | 14.0 | 0.027668 |
70 | DIS | (1.118, 2.229] | 150.0 | 0.296443 |
71 | DIS | (2.229, 3.329] | 111.0 | 0.219368 |
72 | DIS | (3.329, 4.429] | 81.0 | 0.160079 |
73 | DIS | (4.429, 5.528] | 61.0 | 0.120553 |
74 | DIS | (5.528, 6.628] | 46.0 | 0.090909 |
75 | DIS | (6.628, 7.728] | 26.0 | 0.051383 |
76 | DIS | (7.728, 8.827] | 19.0 | 0.037549 |
77 | DIS | (8.827, 9.927] | 7.0 | 0.013834 |
78 | DIS | (9.927, 11.027] | 4.0 | 0.007905 |
79 | DIS | (11.027, 12.126] | 1.0 | 0.001976 |
80 | RAD | (3.3, 5.6] | 225.0 | 0.444664 |
81 | RAD | (21.7, 24.0] | 132.0 | 0.260870 |
82 | RAD | (0.976, 3.3] | 82.0 | 0.162055 |
83 | RAD | (5.6, 7.9] | 43.0 | 0.084980 |
84 | RAD | (7.9, 10.2] | 24.0 | 0.047431 |
85 | RAD | (19.4, 21.7] | 0.0 | 0.000000 |
86 | RAD | (17.1, 19.4] | 0.0 | 0.000000 |
87 | RAD | (14.8, 17.1] | 0.0 | 0.000000 |
88 | RAD | (12.5, 14.8] | 0.0 | 0.000000 |
89 | RAD | (10.2, 12.5] | 0.0 | 0.000000 |
90 | TAX | (658.6, 711.0] | 137.0 | 0.270751 |
91 | TAX | (291.8, 344.2] | 108.0 | 0.213439 |
92 | TAX | (239.4, 291.8] | 100.0 | 0.197628 |
93 | TAX | (396.6, 449.0] | 74.0 | 0.146245 |
94 | TAX | (186.475, 239.4] | 54.0 | 0.106719 |
95 | TAX | (344.2, 396.6] | 32.0 | 0.063241 |
96 | TAX | (449.0, 501.4] | 1.0 | 0.001976 |
97 | TAX | (606.2, 658.6] | 0.0 | 0.000000 |
98 | TAX | (553.8, 606.2] | 0.0 | 0.000000 |
99 | TAX | (501.4, 553.8] | 0.0 | 0.000000 |
100 | PTRATIO | (20.12, 21.06] | 178.0 | 0.351779 |
101 | PTRATIO | (18.24, 19.18] | 76.0 | 0.150198 |
102 | PTRATIO | (17.3, 18.24] | 68.0 | 0.134387 |
103 | PTRATIO | (14.48, 15.42] | 58.0 | 0.114625 |
104 | PTRATIO | (19.18, 20.12] | 40.0 | 0.079051 |
105 | PTRATIO | (16.36, 17.3] | 36.0 | 0.071146 |
106 | PTRATIO | (21.06, 22.0] | 18.0 | 0.035573 |
107 | PTRATIO | (15.42, 16.36] | 15.0 | 0.029644 |
108 | PTRATIO | (12.59, 13.54] | 15.0 | 0.029644 |
109 | PTRATIO | (13.54, 14.48] | 2.0 | 0.003953 |
110 | B | (357.242, 396.9] | 413.0 | 0.816206 |
111 | B | (317.584, 357.242] | 33.0 | 0.065217 |
112 | B | (-0.0776, 39.978] | 18.0 | 0.035573 |
113 | B | (277.926, 317.584] | 11.0 | 0.021739 |
114 | B | (79.636, 119.294] | 10.0 | 0.019763 |
115 | B | (238.268, 277.926] | 7.0 | 0.013834 |
116 | B | (39.978, 79.636] | 6.0 | 0.011858 |
117 | B | (198.61, 238.268] | 3.0 | 0.005929 |
118 | B | (158.952, 198.61] | 3.0 | 0.005929 |
119 | B | (119.294, 158.952] | 2.0 | 0.003953 |
120 | LSTAT | (5.354, 8.978] | 108.0 | 0.213439 |
121 | LSTAT | (8.978, 12.602] | 99.0 | 0.195652 |
122 | LSTAT | (12.602, 16.226] | 84.0 | 0.166008 |
123 | LSTAT | (1.693, 5.354] | 75.0 | 0.148221 |
124 | LSTAT | (16.226, 19.85] | 64.0 | 0.126482 |
125 | LSTAT | (19.85, 23.474] | 30.0 | 0.059289 |
126 | LSTAT | (23.474, 27.098] | 21.0 | 0.041502 |
127 | LSTAT | (27.098, 30.722] | 16.0 | 0.031621 |
128 | LSTAT | (34.346, 37.97] | 5.0 | 0.009881 |
129 | LSTAT | (30.722, 34.346] | 4.0 | 0.007905 |
130 | y | (18.5, 23.0] | 154.0 | 0.304348 |
131 | y | (14.0, 18.5] | 85.0 | 0.167984 |
132 | y | (23.0, 27.5] | 84.0 | 0.166008 |
133 | y | (9.5, 14.0] | 55.0 | 0.108696 |
134 | y | (27.5, 32.0] | 39.0 | 0.077075 |
135 | y | (32.0, 36.5] | 29.0 | 0.057312 |
136 | y | (4.954, 9.5] | 22.0 | 0.043478 |
137 | y | (45.5, 50.0] | 21.0 | 0.041502 |
138 | y | (41.0, 45.5] | 10.0 | 0.019763 |
139 | y | (36.5, 41.0] | 7.0 | 0.013834 |