Pandas自动化
# 课程
Python自动化办公--Pandas玩转Excel(全30集) (opens new window)
整理了 25 个 Pandas 实用技巧,拿走不谢 (opens new window)
# 1.创建文件
import pandas as pd
#创建数据帧(类似表格中的sheet)
df = pd.DataFrame({'ID': [1, 2, 3], 'name': ['aa', 'bb', 'cc']})
#设置索引
df = df.set_index('ID')
print(df)
#创建表格
df.to_excel('C:/Temp/output.xlsx')
print('done')
1
2
3
4
5
6
7
8
9
10
11
12
13
2
3
4
5
6
7
8
9
10
11
12
13
# 3.行,列, 单元格
Series 序列
import pandas as pd
# 字典
d = {'x': 100, 'y': 200, 'z': 300}
print(d.keys()) #dict_keys(['x', 'y', 'z'])
print(d.values()) #dict_values([100, 200, 300])
print(d['x']) #100
# 序列
s1 = pd.Series(d)
print(s1.index) #Index(['x', 'y', 'z'], dtype='object')
print(s1.values) #[100 200 300]
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
创建Series
import pandas as pd
s1 = pd.Series([100, 200, 300], index=['x', 'y', 'z'])
print(s1)
# x 100
# y 200
# z 300
# dtype: int64
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
列和行
import pandas as pd
s1 = pd.Series([1, 2, 3], index=[1, 2, 3], name='A')
s2 = pd.Series([10, 20, 30], index=[1, 2, 3], name='B')
s3 = pd.Series([100, 200, 300], index=[1, 2, 3], name='C')
# 将序列 当做一列
df = pd.DataFrame({s1.name: s1, s2.name: s2, s3.name: s3})
print(df)
# A B C
# 1 1 10 100
# 2 2 20 200
# 3 3 30 300
# 将序列 当做一行
df2 = pd.DataFrame([s1, s2, s3])
print(df2)
# 1 2 3
# A 1 2 3
# B 10 20 30
# C 100 200 300
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
错位情况
s1 = pd.Series([1, 2, 3], index=[1, 2, 3], name='A')
s2 = pd.Series([10, 20, 30], index=[1, 2, 3], name='B')
s3 = pd.Series([100, 200, 300], index=[2, 3, 4], name='C')
#当做一列
df = pd.DataFrame({s1.name: s1, s2.name: s2, s3.name: s3})
print(df)
# A B C
# 1 1.0 10.0 NaN
# 2 2.0 20.0 100.0
# 3 3.0 30.0 200.0
# 4 NaN NaN 300.0
1
2
3
4
5
6
7
8
9
10
11
12
13
2
3
4
5
6
7
8
9
10
11
12
13
# 4.数据区域读取填充数字
原始数据
import pandas as pd
from datetime import date, timedelta
#skiprows=3 跳过前三个空行
#usecols="C:F" 数据在C-F 列
#dtype={'ID': str} 设为字符串 (序列存在NaN时 默认转为float64)
books = pd.read_excel('C:/Temp/Books.xlsx', skiprows=3, usecols="C:F",
index_col=None, dtype={'ID': str, 'InStore': str, 'Date': str})
# print(type(books['ID'])) #<class 'pandas.core.series.Series'>
# books['ID'].at[0] = 100 #ID 的第零行 设为 100
# print(books['ID'])
# 循环填充
start = date(2018, 1, 1)
for i in books.index:
books['ID'].at[i] = i + 1
books['InStore'].at[i] = 'Yes' if i % 2 == 0 else 'No'
books['Date'].at[i] = start
print(books)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# 5.填充日期序列
# 6.函数填充 计算列
import pandas as pd
books = pd.read_excel('C:/Temp/Books.xlsx', index_col='ID')
# books["Price"] = books["ListPrice"] * books["Discount"]
#区间填充
# for i in range(5, 16):
# books["Price"].at[i] = books["ListPrice"].at[i] * books["Discount"].at[i]
#函数(价格+2)
def add_2(x):
return x+2
books['ListPrice'] = books['ListPrice'].apply(add_2) #apply(函数名)
#books['ListPrice'] = books['ListPrice'].apply(lambda x:x+2) #使用匿名函数
print(books)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
去GitHub编辑 (opens new window)
上次更新: 2022/10/21, 22:31:13