bạn có thể làm một cái gì đó như
# create a random df
df = pd.DataFrame[np.random.randn[10, 10], columns=list['ABCDEFGHIJ']]
# sort valeus
df = df.sort_values[by = ['A'], ascending=True]
# use your code but on a transposed dataframe
new = np.array_split[df.T, 5] # split columns into 5 bins
# list comprehension to transposed dataframes
dfs = [new[i].T for i in range[len[new]]]
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]6
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]9
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]32
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]1
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]2
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]35
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]36
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]37
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]38
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]39
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]90
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]36
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]92
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]93
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]94
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]95
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]97
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]98
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]99
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]93______27
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]7
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]38
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
025equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]6
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
027____1028# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
029# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
060equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]9
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
062# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
06# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
10Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
12Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]0
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
06# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
069_______95 Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]4
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
102# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
103____30# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
105 # random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
106# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
107Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]6
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]7
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
06Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]51
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
106Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]0
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
06equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]7____95
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]9
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
06Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]9
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]0
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]1
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]2
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]3
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]4
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
126equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]38
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]39
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
129equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]6
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]7
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]7
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]38
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]05
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]38
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
069equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]36
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]09
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
06Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]11____95
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]9
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]14
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
06Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]9
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]0
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]1
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]2
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]3
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]4
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]5
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]6
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]24
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]9
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]6
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]9
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]32
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]1
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]31
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]93
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]94
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]95
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]36
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]37
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]38
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]40
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]38
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]39
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]43
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]98
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]24
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]38
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]48
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]6
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]11
equal frequency binning [5, 10, 11, 13] [15, 35, 50, 55] [72, 92, 204, 215] equal width binning [[5, 10, 11, 13, 15, 35, 50, 55, 72], [92], [204, 215]]38
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]53
# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
06# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
027______356# random df
df = pd.DataFrame[np.random.randn[1000, 5], columns=list['ABCDE']]
# sort on A
df.sort_values['A', inplace=True]
# create bins
df['bin'] = pd.cut[df['A'], 20, include_lowest = True]
# group on bin
group = df.groupby['bin']
# list comprehension to split groups into list of dataframes
dfs = [group.get_group[x] for x in group.groups]
[ A B C D E bin
218 -2.716093 0.833726 -0.771400 0.691251 0.162448 [-2.723, -2.413]
207 -2.581388 -2.318333 -0.001467 0.035277 1.219666 [-2.723, -2.413]
380 -2.499710 1.946709 -0.519070 1.653383 0.309689 [-2.723, -2.413]
866 -2.492050 0.246500 -0.596392 0.872888 2.371652 [-2.723, -2.413]
876 -2.469238 -0.156470 -0.841065 -1.248793 -0.489665 [-2.723, -2.413]
314 -2.456308 0.630691 -0.072146 1.139697 0.663674 [-2.723, -2.413]
310 -2.455353 0.075842 0.589515 -0.427233 1.207979 [-2.723, -2.413]
660 -2.427255 0.890125 -0.042716 -1.038401 0.651324 [-2.723, -2.413],
A B C D E bin
571 -2.355430 0.383794 -1.266575 -1.214833 -0.862611 [-2.413, -2.11]
977 -2.354416 -1.964189 0.440376 0.028032 -0.181360 [-2.413, -2.11]
83 -2.276908 0.288462 0.370555 -0.546359 -2.033892 [-2.413, -2.11]
196 -2.213729 -1.087783 -0.592884 1.233886 1.051164 [-2.413, -2.11]
227 -2.146631 0.365183 -0.095293 -0.882414 0.385117 [-2.413, -2.11]
39 -2.136800 -1.150065 0.180182 -0.424071 0.040370 [-2.413, -2.11],
A B C D E bin
104 -2.108961 -0.396602 -1.014224 -1.277124 0.001030 [-2.11, -1.806]
360 -2.098928 1.093483 1.438421 -0.980215 0.010359 [-2.11, -1.806]
530 -2.088592 1.043201 -0.522468 0.482176 -0.680166 [-2.11, -1.806]
158 -2.062759 2.070387 2.124621 -2.751532 0.674055 [-2.11, -1.806]
971 -2.053039 0.347577 -0.498513 1.917305 -1.746493 [-2.11, -1.806]
658 -2.002482 -1.222292 -0.398816 0.279228 -1.485782 [-2.11, -1.806]
90 -1.985261 3.499251 -2.089028 1.238524 -1.781089 [-2.11, -1.806]
466 -1.973640 -1.609920 -1.029454 0.809143 -0.228893 [-2.11, -1.806]
40 -1.966016 -1.479240 -1.564966 -0.310133 1.338023 [-2.11, -1.806]
279 -1.943666 0.762493 0.060038 0.449159 0.244411 [-2.11, -1.806]
204 -1.940045 0.844901 -0.343691 -1.144836 1.385915 [-2.11, -1.806]
780 -1.918548 0.212452 0.225789 0.216110 1.710532 [-2.11, -1.806]
289 -1.897438 0.847664 0.689778 -0.454152 -0.747836 [-2.11, -1.806]
159 -1.848425 0.477726 0.391384 -0.477804 0.168160 [-2.11, -1.806],
. . .
029Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]58
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]59
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]5
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]61
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]62
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]64
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]66
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]68
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]70
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]72
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]74
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]76
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]78
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]80
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]82
Input: [5, 10, 11, 13, 15, 35, 50, 55, 72, 92, 204, 215] Output: [5, 10, 11, 13, 15, 35, 50, 55, 72] [92] [204, 215]63_____3858
Binning khoảng thời gian bằng nhau là gì?
Tạo nhóm tần số bằng nhau chia tập dữ liệu thành các ngăn có cùng số lượng mẫu . Việc tạo thùng định lượng chỉ định cùng một số quan sát cho mỗi thùng.
Tại sao sử dụng tính năng tạo tần số bằng nhau?
Tần suất bằng nhau thay vào đó sẽ đảm bảo rằng mọi ngăn chứa lượng dữ liệu gần như giống nhau , điều này thường thích hợp hơn nếu sau đó bạn phải sử dụng .
Làm cách nào để chia dữ liệu thành thùng gấu trúc?
Sử dụng cut khi bạn cần phân đoạn và sắp xếp các giá trị dữ liệu vào các ngăn . Hàm này cũng hữu ích để chuyển từ biến liên tục sang biến phân loại. Ví dụ: cut có thể chuyển đổi độ tuổi thành các nhóm độ tuổi. Hỗ trợ tạo thùng thành một số lượng thùng bằng nhau hoặc một mảng thùng được chỉ định trước.
Ba chiến lược tạo thùng phổ biến có sẵn là gì?
Binning không giám sát. Xếp chồng chiều rộng bằng nhau, Xếp chồng tần số bằng nhau . Binning được giám sát. Tạo thùng dựa trên entropy .