In [1]:
import pandas as pd
pd.set_option('display.max_rows', 500)

# 定義したfrequency_tableをimport
from frequency_table import frequency_table

サンプルデータ作成

サンプルデータ: data を作成します。

In [2]:
from sklearn.datasets import load_boston

boston = load_boston()
data = pd.DataFrame(
    boston['data'],
    columns=boston['feature_names'],
)
data['y'] = boston['target']
data.head(10)
Out[2]:
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT y
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 396.90 5.33 36.2
5 0.02985 0.0 2.18 0.0 0.458 6.430 58.7 6.0622 3.0 222.0 18.7 394.12 5.21 28.7
6 0.08829 12.5 7.87 0.0 0.524 6.012 66.6 5.5605 5.0 311.0 15.2 395.60 12.43 22.9
7 0.14455 12.5 7.87 0.0 0.524 6.172 96.1 5.9505 5.0 311.0 15.2 396.90 19.15 27.1
8 0.21124 12.5 7.87 0.0 0.524 5.631 100.0 6.0821 5.0 311.0 15.2 386.63 29.93 16.5
9 0.17004 12.5 7.87 0.0 0.524 6.004 85.9 6.5921 5.0 311.0 15.2 386.71 17.10 18.9

度数分布の作成

定義したfrequency_tableで度数分布表を作成します。

In [3]:
freq_table = frequency_table(data)

確認

  • 作成した度数分布表を確認します。
  • 要素数が多い変数で出力が膨大にならないようにheadしています
  • 最後にheadせずに全変数の結果を出力します
In [4]:
freq_table.groupby("Variable").head()
Out[4]:
Variable Value Count Proportion
0 CRIM (-0.08360000000000001, 8.903] 439.0 0.867589
1 CRIM (8.903, 17.8] 44.0 0.086957
2 CRIM (17.8, 26.697] 14.0 0.027668
3 CRIM (35.594, 44.491] 3.0 0.005929
4 CRIM (44.491, 53.388] 2.0 0.003953
10 ZN (-0.101, 10.0] 372.0 0.735178
11 ZN (20.0, 30.0] 33.0 0.065217
12 ZN (10.0, 20.0] 33.0 0.065217
13 ZN (70.0, 80.0] 18.0 0.035573
14 ZN (30.0, 40.0] 17.0 0.033597
20 INDUS (16.828, 19.556] 132.0 0.260870
21 INDUS (5.916, 8.644] 99.0 0.195652
22 INDUS (3.188, 5.916] 95.0 0.187747
23 INDUS (0.432, 3.188] 56.0 0.110672
24 INDUS (19.556, 22.284] 45.0 0.088933
30 CHAS (-0.002, 0.1] 471.0 0.930830
31 CHAS (0.9, 1.0] 35.0 0.069170
32 CHAS (0.8, 0.9] 0.0 0.000000
33 CHAS (0.7, 0.8] 0.0 0.000000
34 CHAS (0.6, 0.7] 0.0 0.000000
40 NOX (0.482, 0.531] 89.0 0.175889
41 NOX (0.434, 0.482] 80.0 0.158103
42 NOX (0.579, 0.628] 78.0 0.154150
43 NOX (0.384, 0.434] 75.0 0.148221
44 NOX (0.531, 0.579] 60.0 0.118577
50 RM (5.649, 6.17] 177.0 0.349802
51 RM (6.17, 6.692] 151.0 0.298419
52 RM (6.692, 7.214] 69.0 0.136364
53 RM (5.127, 5.649] 45.0 0.088933
54 RM (7.214, 7.736] 22.0 0.043478
60 AGE (90.29, 100.0] 168.0 0.332016
61 AGE (80.58, 90.29] 71.0 0.140316
62 AGE (70.87, 80.58] 42.0 0.083004
63 AGE (32.03, 41.74] 42.0 0.083004
64 AGE (61.16, 70.87] 39.0 0.077075
70 DIS (1.118, 2.229] 150.0 0.296443
71 DIS (2.229, 3.329] 111.0 0.219368
72 DIS (3.329, 4.429] 81.0 0.160079
73 DIS (4.429, 5.528] 61.0 0.120553
74 DIS (5.528, 6.628] 46.0 0.090909
80 RAD (3.3, 5.6] 225.0 0.444664
81 RAD (21.7, 24.0] 132.0 0.260870
82 RAD (0.976, 3.3] 82.0 0.162055
83 RAD (5.6, 7.9] 43.0 0.084980
84 RAD (7.9, 10.2] 24.0 0.047431
90 TAX (658.6, 711.0] 137.0 0.270751
91 TAX (291.8, 344.2] 108.0 0.213439
92 TAX (239.4, 291.8] 100.0 0.197628
93 TAX (396.6, 449.0] 74.0 0.146245
94 TAX (186.475, 239.4] 54.0 0.106719
100 PTRATIO (20.12, 21.06] 178.0 0.351779
101 PTRATIO (18.24, 19.18] 76.0 0.150198
102 PTRATIO (17.3, 18.24] 68.0 0.134387
103 PTRATIO (14.48, 15.42] 58.0 0.114625
104 PTRATIO (19.18, 20.12] 40.0 0.079051
110 B (357.242, 396.9] 413.0 0.816206
111 B (317.584, 357.242] 33.0 0.065217
112 B (-0.0776, 39.978] 18.0 0.035573
113 B (277.926, 317.584] 11.0 0.021739
114 B (79.636, 119.294] 10.0 0.019763
120 LSTAT (5.354, 8.978] 108.0 0.213439
121 LSTAT (8.978, 12.602] 99.0 0.195652
122 LSTAT (12.602, 16.226] 84.0 0.166008
123 LSTAT (1.693, 5.354] 75.0 0.148221
124 LSTAT (16.226, 19.85] 64.0 0.126482
130 y (18.5, 23.0] 154.0 0.304348
131 y (14.0, 18.5] 85.0 0.167984
132 y (23.0, 27.5] 84.0 0.166008
133 y (9.5, 14.0] 55.0 0.108696
134 y (27.5, 32.0] 39.0 0.077075
In [5]:
freq_table
Out[5]:
Variable Value Count Proportion
0 CRIM (-0.08360000000000001, 8.903] 439.0 0.867589
1 CRIM (8.903, 17.8] 44.0 0.086957
2 CRIM (17.8, 26.697] 14.0 0.027668
3 CRIM (35.594, 44.491] 3.0 0.005929
4 CRIM (44.491, 53.388] 2.0 0.003953
5 CRIM (80.079, 88.976] 1.0 0.001976
6 CRIM (71.182, 80.079] 1.0 0.001976
7 CRIM (62.285, 71.182] 1.0 0.001976
8 CRIM (26.697, 35.594] 1.0 0.001976
9 CRIM (53.388, 62.285] 0.0 0.000000
10 ZN (-0.101, 10.0] 372.0 0.735178
11 ZN (20.0, 30.0] 33.0 0.065217
12 ZN (10.0, 20.0] 33.0 0.065217
13 ZN (70.0, 80.0] 18.0 0.035573
14 ZN (30.0, 40.0] 17.0 0.033597
15 ZN (50.0, 60.0] 10.0 0.019763
16 ZN (80.0, 90.0] 9.0 0.017787
17 ZN (40.0, 50.0] 6.0 0.011858
18 ZN (90.0, 100.0] 5.0 0.009881
19 ZN (60.0, 70.0] 3.0 0.005929
20 INDUS (16.828, 19.556] 132.0 0.260870
21 INDUS (5.916, 8.644] 99.0 0.195652
22 INDUS (3.188, 5.916] 95.0 0.187747
23 INDUS (0.432, 3.188] 56.0 0.110672
24 INDUS (19.556, 22.284] 45.0 0.088933
25 INDUS (8.644, 11.372] 44.0 0.086957
26 INDUS (11.372, 14.1] 20.0 0.039526
27 INDUS (25.012, 27.74] 12.0 0.023715
28 INDUS (14.1, 16.828] 3.0 0.005929
29 INDUS (22.284, 25.012] 0.0 0.000000
30 CHAS (-0.002, 0.1] 471.0 0.930830
31 CHAS (0.9, 1.0] 35.0 0.069170
32 CHAS (0.8, 0.9] 0.0 0.000000
33 CHAS (0.7, 0.8] 0.0 0.000000
34 CHAS (0.6, 0.7] 0.0 0.000000
35 CHAS (0.5, 0.6] 0.0 0.000000
36 CHAS (0.4, 0.5] 0.0 0.000000
37 CHAS (0.3, 0.4] 0.0 0.000000
38 CHAS (0.2, 0.3] 0.0 0.000000
39 CHAS (0.1, 0.2] 0.0 0.000000
40 NOX (0.482, 0.531] 89.0 0.175889
41 NOX (0.434, 0.482] 80.0 0.158103
42 NOX (0.579, 0.628] 78.0 0.154150
43 NOX (0.384, 0.434] 75.0 0.148221
44 NOX (0.531, 0.579] 60.0 0.118577
45 NOX (0.677, 0.725] 57.0 0.112648
46 NOX (0.628, 0.677] 30.0 0.059289
47 NOX (0.725, 0.774] 21.0 0.041502
48 NOX (0.822, 0.871] 16.0 0.031621
49 NOX (0.774, 0.822] 0.0 0.000000
50 RM (5.649, 6.17] 177.0 0.349802
51 RM (6.17, 6.692] 151.0 0.298419
52 RM (6.692, 7.214] 69.0 0.136364
53 RM (5.127, 5.649] 45.0 0.088933
54 RM (7.214, 7.736] 22.0 0.043478
55 RM (4.605, 5.127] 14.0 0.027668
56 RM (7.736, 8.258] 13.0 0.025692
57 RM (8.258, 8.78] 9.0 0.017787
58 RM (4.083, 4.605] 4.0 0.007905
59 RM (3.555, 4.083] 2.0 0.003953
60 AGE (90.29, 100.0] 168.0 0.332016
61 AGE (80.58, 90.29] 71.0 0.140316
62 AGE (70.87, 80.58] 42.0 0.083004
63 AGE (32.03, 41.74] 42.0 0.083004
64 AGE (61.16, 70.87] 39.0 0.077075
65 AGE (51.45, 61.16] 38.0 0.075099
66 AGE (41.74, 51.45] 32.0 0.063241
67 AGE (12.61, 22.32] 31.0 0.061265
68 AGE (22.32, 32.03] 29.0 0.057312
69 AGE (2.802, 12.61] 14.0 0.027668
70 DIS (1.118, 2.229] 150.0 0.296443
71 DIS (2.229, 3.329] 111.0 0.219368
72 DIS (3.329, 4.429] 81.0 0.160079
73 DIS (4.429, 5.528] 61.0 0.120553
74 DIS (5.528, 6.628] 46.0 0.090909
75 DIS (6.628, 7.728] 26.0 0.051383
76 DIS (7.728, 8.827] 19.0 0.037549
77 DIS (8.827, 9.927] 7.0 0.013834
78 DIS (9.927, 11.027] 4.0 0.007905
79 DIS (11.027, 12.126] 1.0 0.001976
80 RAD (3.3, 5.6] 225.0 0.444664
81 RAD (21.7, 24.0] 132.0 0.260870
82 RAD (0.976, 3.3] 82.0 0.162055
83 RAD (5.6, 7.9] 43.0 0.084980
84 RAD (7.9, 10.2] 24.0 0.047431
85 RAD (19.4, 21.7] 0.0 0.000000
86 RAD (17.1, 19.4] 0.0 0.000000
87 RAD (14.8, 17.1] 0.0 0.000000
88 RAD (12.5, 14.8] 0.0 0.000000
89 RAD (10.2, 12.5] 0.0 0.000000
90 TAX (658.6, 711.0] 137.0 0.270751
91 TAX (291.8, 344.2] 108.0 0.213439
92 TAX (239.4, 291.8] 100.0 0.197628
93 TAX (396.6, 449.0] 74.0 0.146245
94 TAX (186.475, 239.4] 54.0 0.106719
95 TAX (344.2, 396.6] 32.0 0.063241
96 TAX (449.0, 501.4] 1.0 0.001976
97 TAX (606.2, 658.6] 0.0 0.000000
98 TAX (553.8, 606.2] 0.0 0.000000
99 TAX (501.4, 553.8] 0.0 0.000000
100 PTRATIO (20.12, 21.06] 178.0 0.351779
101 PTRATIO (18.24, 19.18] 76.0 0.150198
102 PTRATIO (17.3, 18.24] 68.0 0.134387
103 PTRATIO (14.48, 15.42] 58.0 0.114625
104 PTRATIO (19.18, 20.12] 40.0 0.079051
105 PTRATIO (16.36, 17.3] 36.0 0.071146
106 PTRATIO (21.06, 22.0] 18.0 0.035573
107 PTRATIO (15.42, 16.36] 15.0 0.029644
108 PTRATIO (12.59, 13.54] 15.0 0.029644
109 PTRATIO (13.54, 14.48] 2.0 0.003953
110 B (357.242, 396.9] 413.0 0.816206
111 B (317.584, 357.242] 33.0 0.065217
112 B (-0.0776, 39.978] 18.0 0.035573
113 B (277.926, 317.584] 11.0 0.021739
114 B (79.636, 119.294] 10.0 0.019763
115 B (238.268, 277.926] 7.0 0.013834
116 B (39.978, 79.636] 6.0 0.011858
117 B (198.61, 238.268] 3.0 0.005929
118 B (158.952, 198.61] 3.0 0.005929
119 B (119.294, 158.952] 2.0 0.003953
120 LSTAT (5.354, 8.978] 108.0 0.213439
121 LSTAT (8.978, 12.602] 99.0 0.195652
122 LSTAT (12.602, 16.226] 84.0 0.166008
123 LSTAT (1.693, 5.354] 75.0 0.148221
124 LSTAT (16.226, 19.85] 64.0 0.126482
125 LSTAT (19.85, 23.474] 30.0 0.059289
126 LSTAT (23.474, 27.098] 21.0 0.041502
127 LSTAT (27.098, 30.722] 16.0 0.031621
128 LSTAT (34.346, 37.97] 5.0 0.009881
129 LSTAT (30.722, 34.346] 4.0 0.007905
130 y (18.5, 23.0] 154.0 0.304348
131 y (14.0, 18.5] 85.0 0.167984
132 y (23.0, 27.5] 84.0 0.166008
133 y (9.5, 14.0] 55.0 0.108696
134 y (27.5, 32.0] 39.0 0.077075
135 y (32.0, 36.5] 29.0 0.057312
136 y (4.954, 9.5] 22.0 0.043478
137 y (45.5, 50.0] 21.0 0.041502
138 y (41.0, 45.5] 10.0 0.019763
139 y (36.5, 41.0] 7.0 0.013834