In [1]:
import pandas as pd
import numpy as np
In [2]:
df = pd.read_excel('../../data/02. sales-funnel.xlsx')
df.head()
Out[2]:
Account | Name | Rep | Manager | Product | Quantity | Price | Status | |
---|---|---|---|---|---|---|---|---|
0 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | CPU | 1 | 30000 | presented |
1 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | Software | 1 | 10000 | presented |
2 | 714466 | Trantow-Barrows | Craig Booker | Debra Henley | Maintenance | 2 | 5000 | pending |
3 | 737550 | Fritsch, Russel and Anderson | Craig Booker | Debra Henley | CPU | 1 | 35000 | declined |
4 | 146832 | Kiehn-Spinka | Daniel Hilton | Debra Henley | CPU | 2 | 65000 | won |
In [3]:
pd.pivot_table(df, index=["Name"])
C:\Users\Playdata\AppData\Local\Temp\ipykernel_7024\439457872.py:1: FutureWarning: pivot_table dropped a column because it failed to aggregate. This behavior is deprecated and will raise in a future version of pandas. Select only the columns that can be aggregated.
pd.pivot_table(df, index=["Name"])
Out[3]:
Account | Price | Quantity | |
---|---|---|---|
Name | |||
Barton LLC | 740150 | 35000 | 1.000000 |
Fritsch, Russel and Anderson | 737550 | 35000 | 1.000000 |
Herman LLC | 141962 | 65000 | 2.000000 |
Jerde-Hilpert | 412290 | 5000 | 2.000000 |
Kassulke, Ondricka and Metz | 307599 | 7000 | 3.000000 |
Keeling LLC | 688981 | 100000 | 5.000000 |
Kiehn-Spinka | 146832 | 65000 | 2.000000 |
Koepp Ltd | 729833 | 35000 | 2.000000 |
Kulas Inc | 218895 | 25000 | 1.500000 |
Purdy-Kunde | 163416 | 30000 | 1.000000 |
Stokes LLC | 239344 | 7500 | 1.000000 |
Trantow-Barrows | 714466 | 15000 | 1.333333 |
- 중복된 Name 하나로 합쳐짐
- price : (30,000+10,000+5,000) / 3 = 15,000
- quantity : (1+1+2) / 3 = 1.33333
In [4]:
pd.pivot_table(df, index=['Name', 'Rep', 'Manager'])
C:\Users\Playdata\AppData\Local\Temp\ipykernel_7024\2628239784.py:1: FutureWarning: pivot_table dropped a column because it failed to aggregate. This behavior is deprecated and will raise in a future version of pandas. Select only the columns that can be aggregated.
pd.pivot_table(df, index=['Name', 'Rep', 'Manager'])
Out[4]:
Account | Price | Quantity | |||
---|---|---|---|---|---|
Name | Rep | Manager | |||
Barton LLC | John Smith | Debra Henley | 740150 | 35000 | 1.000000 |
Fritsch, Russel and Anderson | Craig Booker | Debra Henley | 737550 | 35000 | 1.000000 |
Herman LLC | Cedric Moss | Fred Anderson | 141962 | 65000 | 2.000000 |
Jerde-Hilpert | John Smith | Debra Henley | 412290 | 5000 | 2.000000 |
Kassulke, Ondricka and Metz | Wendy Yule | Fred Anderson | 307599 | 7000 | 3.000000 |
Keeling LLC | Wendy Yule | Fred Anderson | 688981 | 100000 | 5.000000 |
Kiehn-Spinka | Daniel Hilton | Debra Henley | 146832 | 65000 | 2.000000 |
Koepp Ltd | Wendy Yule | Fred Anderson | 729833 | 35000 | 2.000000 |
Kulas Inc | Daniel Hilton | Debra Henley | 218895 | 25000 | 1.500000 |
Purdy-Kunde | Cedric Moss | Fred Anderson | 163416 | 30000 | 1.000000 |
Stokes LLC | Cedric Moss | Fred Anderson | 239344 | 7500 | 1.000000 |
Trantow-Barrows | Craig Booker | Debra Henley | 714466 | 15000 | 1.333333 |
In [5]:
pd.pivot_table(df, index=['Manager', 'Rep'])
C:\Users\Playdata\AppData\Local\Temp\ipykernel_7024\4047874417.py:1: FutureWarning: pivot_table dropped a column because it failed to aggregate. This behavior is deprecated and will raise in a future version of pandas. Select only the columns that can be aggregated.
pd.pivot_table(df, index=['Manager', 'Rep'])
Out[5]:
Account | Price | Quantity | ||
---|---|---|---|---|
Manager | Rep | |||
Debra Henley | Craig Booker | 720237.0 | 20000.000000 | 1.250000 |
Daniel Hilton | 194874.0 | 38333.333333 | 1.666667 | |
John Smith | 576220.0 | 20000.000000 | 1.500000 | |
Fred Anderson | Cedric Moss | 196016.5 | 27500.000000 | 1.250000 |
Wendy Yule | 614061.5 | 44250.000000 | 3.000000 |
In [6]:
# Craig Bookr : (30000+ 35000 + 5000+ 10000) / 4 = 20000.000
# default 평균
pd.pivot_table(df, index=['Manager', 'Rep'], values=['Price'])
Out[6]:
Price | ||
---|---|---|
Manager | Rep | |
Debra Henley | Craig Booker | 20000.000000 |
Daniel Hilton | 38333.333333 | |
John Smith | 20000.000000 | |
Fred Anderson | Cedric Moss | 27500.000000 |
Wendy Yule | 44250.000000 |
In [7]:
# Craig Booker : 30000+35000+5000+10000 = 80000
# 합게를 사용하려면 aggfunc 옵션 사용
pd.pivot_table(df, index=['Manager', 'Rep'], values=['Price'],
aggfunc=np.sum)
Out[7]:
Price | ||
---|---|---|
Manager | Rep | |
Debra Henley | Craig Booker | 80000 |
Daniel Hilton | 115000 | |
John Smith | 40000 | |
Fred Anderson | Cedric Moss | 110000 |
Wendy Yule | 177000 |
In [8]:
pd.pivot_table(df, index = ["Manager", 'Rep'],values=['Price'],
aggfunc=[np.mean, len])
Out[8]:
mean | len | ||
---|---|---|---|
Price | Price | ||
Manager | Rep | ||
Debra Henley | Craig Booker | 20000.000000 | 4 |
Daniel Hilton | 38333.333333 | 3 | |
John Smith | 20000.000000 | 2 | |
Fred Anderson | Cedric Moss | 27500.000000 | 4 |
Wendy Yule | 44250.000000 | 4 |
In [9]:
# NaN은 0으로 채움
pd.pivot_table(df, index = ["Manager", 'Rep'],values=['Price'],
columns=['Product'], aggfunc=[np.sum], fill_value=0)
Out[9]:
sum | |||||
---|---|---|---|---|---|
Price | |||||
Product | CPU | Maintenance | Monitor | Software | |
Manager | Rep | ||||
Debra Henley | Craig Booker | 65000 | 5000 | 0 | 10000 |
Daniel Hilton | 105000 | 0 | 0 | 10000 | |
John Smith | 35000 | 5000 | 0 | 0 | |
Fred Anderson | Cedric Moss | 95000 | 5000 | 0 | 10000 |
Wendy Yule | 165000 | 7000 | 5000 | 0 |
In [10]:
# margins=True : 총계 출력
pd.pivot_table(df, index = ["Manager", 'Rep','Product'],
values=['Price', 'Quantity'],
aggfunc=[np.sum, np.mean], fill_value=0, margins=True)
Out[10]:
sum | mean | |||||
---|---|---|---|---|---|---|
Price | Quantity | Price | Quantity | |||
Manager | Rep | Product | ||||
Debra Henley | Craig Booker | CPU | 65000 | 2 | 32500.000000 | 1.000000 |
Maintenance | 5000 | 2 | 5000.000000 | 2.000000 | ||
Software | 10000 | 1 | 10000.000000 | 1.000000 | ||
Daniel Hilton | CPU | 105000 | 4 | 52500.000000 | 2.000000 | |
Software | 10000 | 1 | 10000.000000 | 1.000000 | ||
John Smith | CPU | 35000 | 1 | 35000.000000 | 1.000000 | |
Maintenance | 5000 | 2 | 5000.000000 | 2.000000 | ||
Fred Anderson | Cedric Moss | CPU | 95000 | 3 | 47500.000000 | 1.500000 |
Maintenance | 5000 | 1 | 5000.000000 | 1.000000 | ||
Software | 10000 | 1 | 10000.000000 | 1.000000 | ||
Wendy Yule | CPU | 165000 | 7 | 82500.000000 | 3.500000 | |
Maintenance | 7000 | 3 | 7000.000000 | 3.000000 | ||
Monitor | 5000 | 2 | 5000.000000 | 2.000000 | ||
All | 522000 | 30 | 30705.882353 | 1.764706 |
반응형
'데이터분석' 카테고리의 다른 글
[23.06.19] Python groupby, aggregate, transform, filter - 12(1) (0) | 2023.06.19 |
---|---|
[23.06.16] Python apply, 문제 - 11(4) (0) | 2023.06.16 |
[23.06.16] Python melt - 11(1) (0) | 2023.06.16 |
[23.06.15] Python 누락 값 처리 - 10(3) (0) | 2023.06.15 |
[23.06.15] Python merge - 10(2) (0) | 2023.06.15 |