pd.crosstab
"Create a cross tabulation using pivot_table."
import sys
import random
import pandas as pd
data = [
[42, "Eighth", 4280],
[42, "Eighth", 4281],
[42, "Eighth", 4282],
[42, "Seventh", 4270],
[42, "Seventh", 4271],
[42, "Seventh", 4272],
[42, "Sixth", 4260],
[42, "Sixth", 4261],
[42, "Sixth", 4262],
[43, "Eighth", 4380],
[43, "Eighth", 4381],
[43, "Eighth", 4382],
[43, "Seventh", 4370],
[43, "Seventh", 4371],
[43, "Seventh", 4372],
[43, "Sixth", 4360],
[43, "Sixth", 4361],
[43, "Sixth", 4362],
[44, "Eighth", 4480],
[44, "Eighth", 4481],
[44, "Eighth", 4482],
[44, "Seventh", 4470],
[44, "Seventh", 4471],
[44, "Seventh", 4472],
[44, "Sixth", 4460],
[44, "Sixth", 4461],
[44, "Sixth", 4462]
]
random.shuffle(data)
columns = pd.Index(data = ["street", "avenue", "accidents"])
df = pd.DataFrame(data = data, columns = columns)
print(df)
print()
crosstab = df.pivot_table(
index = "street",
columns = "avenue",
aggfunc = "count",
margins = True
)
print(crosstab)
print()
crosstab = crosstab.droplevel(axis = 1, level = 0)
print(crosstab)
sys.exit(0)
accidents avenue Eighth Seventh Sixth All street 42 3 3 3 9 43 3 3 3 9 44 3 3 3 9 All 9 9 9 27 avenue Eighth Seventh Sixth All street 42 3 3 3 9 43 3 3 3 9 44 3 3 3 9 All 9 9 9 27
Three reports have come in about the number of accidents at Eighth Avenue & 42nd Street.
"Create a cross tabulation."
import sys
import random
import pandas as pd
data = [
[42, "Eighth", 4280],
[42, "Eighth", 4281],
[42, "Eighth", 4282],
[42, "Seventh", 4270],
[42, "Seventh", 4271],
[42, "Seventh", 4272],
[42, "Sixth", 4260],
[42, "Sixth", 4261],
[42, "Sixth", 4262],
[43, "Eighth", 4380],
[43, "Eighth", 4381],
[43, "Eighth", 4382],
[43, "Seventh", 4370],
[43, "Seventh", 4371],
[43, "Seventh", 4372],
[43, "Sixth", 4360],
[43, "Sixth", 4361],
[43, "Sixth", 4362],
[44, "Eighth", 4480],
[44, "Eighth", 4481],
[44, "Eighth", 4482],
[44, "Seventh", 4470],
[44, "Seventh", 4471],
[44, "Seventh", 4472],
[44, "Sixth", 4460],
[44, "Sixth", 4461],
[44, "Sixth", 4462]
]
random.shuffle(data)
columns = pd.Index(data = ["street", "avenue", "accidents"])
df = pd.DataFrame(data = data, columns = columns)
print(df)
print()
crosstab = pd.crosstab(
index = df.street,
columns = df.avenue,
margins = True
)
print(crosstab)
sys.exit(0)
street avenue accidents
0 42 Seventh 4271
1 42 Seventh 4270
2 43 Eighth 4382
3 44 Eighth 4481
4 43 Seventh 4371
5 43 Sixth 4362
6 42 Eighth 4280
7 43 Sixth 4361
8 43 Sixth 4360
9 42 Sixth 4261
10 42 Eighth 4281
11 44 Eighth 4482
12 44 Seventh 4471
13 44 Sixth 4460
14 44 Eighth 4480
15 43 Seventh 4372
16 42 Sixth 4262
17 42 Eighth 4282
18 44 Seventh 4472
19 43 Seventh 4370
20 43 Eighth 4380
21 42 Sixth 4260
22 43 Eighth 4381
23 42 Seventh 4272
24 44 Sixth 4462
25 44 Seventh 4470
26 44 Sixth 4461
avenue Eighth Seventh Sixth All
street
42 3 3 3 9
43 3 3 3 9
44 3 3 3 9
All 9 9 9 27
df.accidents.
To compute the mean number of accidents per row,
crosstab = pd.crosstab( index = df.street, columns = df.avenue, values = df.accidents, aggfunc = "mean", margins = True ) print(crosstab)
avenue Eighth Seventh Sixth All street 42 4281 4271 4261 4271 43 4381 4371 4361 4371 44 4481 4471 4461 4471 All 4381 4371 4361 4371But this is as far as we can go with a
crosstab.
To compute the mean number of accidents and vehicles per row,
we would have to go back to a
pivot_table.
[44, "Sixth", 4462]
import matplotlib.pyplot as plt
crosstab = pd.crosstab( index = df.street, columns = df.avenue, margins = False #Don't output the "All" row and column. ) print(crosstab) axes = crosstab.plot.bar(rot = 0) #also try crosstab.T.plot.bar(rot = 0) figure = plt.gcf() figure.canvas.set_window_title("crosstab") axes.set_title("Cross Tabulation") axes.set_ylabel("Number of observations at each intersection") plt.show()