Pandas自动化

# 课程

Python自动化办公--Pandas玩转Excel（全30集） (opens new window)

整理了 25 个 Pandas 实用技巧，拿走不谢 (opens new window)

# 1.创建文件

import pandas as pd

#创建数据帧(类似表格中的sheet)
df = pd.DataFrame({'ID': [1, 2, 3], 'name': ['aa', 'bb', 'cc']})

#设置索引
df = df.set_index('ID')
print(df)

#创建表格
df.to_excel('C:/Temp/output.xlsx')

print('done')

1
2
3
4
5
6
7
8
9
10
11
12
13

# 3.行，列，单元格

Series 序列

import pandas as pd

# 字典
d = {'x': 100, 'y': 200, 'z': 300}
print(d.keys()) #dict_keys(['x', 'y', 'z'])
print(d.values()) #dict_values([100, 200, 300])
print(d['x']) #100

# 序列
s1 = pd.Series(d)
print(s1.index) #Index(['x', 'y', 'z'], dtype='object')
print(s1.values) #[100 200 300]

1
2
3
4
5
6
7
8
9
10
11
12

创建Series

import pandas as pd

s1 = pd.Series([100, 200, 300], index=['x', 'y', 'z'])
print(s1)
# x    100
# y    200
# z    300
# dtype: int64

1
2
3
4
5
6
7
8

列和行

import pandas as pd

s1 = pd.Series([1, 2, 3], index=[1, 2, 3], name='A')
s2 = pd.Series([10, 20, 30], index=[1, 2, 3], name='B')
s3 = pd.Series([100, 200, 300], index=[1, 2, 3], name='C')

# 将序列 当做一列
df = pd.DataFrame({s1.name: s1, s2.name: s2, s3.name: s3})
print(df)
#    A   B    C
# 1  1  10  100
# 2  2  20  200
# 3  3  30  300

# 将序列 当做一行
df2 = pd.DataFrame([s1, s2, s3])
print(df2)
#      1    2    3
# A    1    2    3
# B   10   20   30
# C  100  200  300

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

错位情况

s1 = pd.Series([1, 2, 3], index=[1, 2, 3], name='A')
s2 = pd.Series([10, 20, 30], index=[1, 2, 3], name='B')
s3 = pd.Series([100, 200, 300], index=[2, 3, 4], name='C')

#当做一列
df = pd.DataFrame({s1.name: s1, s2.name: s2, s3.name: s3})
print(df)

#      A     B      C
# 1  1.0  10.0    NaN
# 2  2.0  20.0  100.0
# 3  3.0  30.0  200.0
# 4  NaN   NaN  300.0

1
2
3
4
5
6
7
8
9
10
11
12
13

# 4.数据区域读取填充数字

原始数据

import pandas as pd
from datetime import date, timedelta

#skiprows=3 跳过前三个空行
#usecols="C:F" 数据在C-F 列
#dtype={'ID': str} 设为字符串 (序列存在NaN时 默认转为float64)
books = pd.read_excel('C:/Temp/Books.xlsx', skiprows=3, usecols="C:F",
                      index_col=None, dtype={'ID': str, 'InStore': str, 'Date': str})

# print(type(books['ID'])) #<class 'pandas.core.series.Series'>

# books['ID'].at[0] = 100 #ID 的第零行 设为 100
# print(books['ID'])

# 循环填充

start = date(2018, 1, 1)
for i in books.index:
    books['ID'].at[i] = i + 1
    books['InStore'].at[i] = 'Yes' if i % 2 == 0 else 'No'
    books['Date'].at[i] = start
print(books)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22

# 5.填充日期序列

# 6.函数填充计算列

import pandas as pd

books = pd.read_excel('C:/Temp/Books.xlsx', index_col='ID')

# books["Price"] = books["ListPrice"] * books["Discount"]

#区间填充
# for i in range(5, 16):
#     books["Price"].at[i] = books["ListPrice"].at[i] * books["Discount"].at[i]

#函数（价格+2）
def add_2(x):
    return x+2

books['ListPrice'] = books['ListPrice'].apply(add_2) #apply(函数名)

#books['ListPrice'] = books['ListPrice'].apply(lambda x:x+2) #使用匿名函数

print(books)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

去GitHub编辑

上次更新: 2022/10/21, 22:31:13

← HTML网页转成PDF wordpress批量发布文章(python-tk)→