Python中pandas库的常用操作实例

 更新时间:2023年07月29日 09:29:35   作者:菜菜01  
这篇文章主要介绍了Python中pandas库的常用操作实例,Pandas 库是一个免费、开源的第三方 Python 库,是 Python 数据分析必不可少的工具之一,它为 Python 数据分析提供了高性能,且易于使用的数据结构,即 Series 和 DataFrame,需要的朋友可以参考下

pandas库常用操作

series

使用列表创建

import pandas as pd
data = pd.Series([i for i in range(4)],index=['a','b','c','d'])
data

使用字典创建

import pandas as pd
dict1 = {"a":0,"b":1,"c":2,"d":3}
data = pd.Series(dict1)
data

DataFrame

使用数组创建

import pandas as pd
import numpy as np
arr = np.array([i for i in range(12)]).reshape(3,4)
data = pd.DataFrame(arr)
data

指定列索引

import pandas as pd
import numpy as np
arr = np.array([i for i in range(12)]).reshape(3,4)
data = pd.DataFrame(arr,columns=['a','b','c','d'])
data

index

查询索引

#series
import pandas as pd
data = pd.Series([i for i in range(3)],index=['a','b','c'])
data.index
#DataFrame
import pandas as pd
import numpy as np
arr = np.array([i for i in range(9)]).reshape(3,3)
data = pd.DataFrame(arr,columns=['a','b','c'])
data.columns

查询索引

#series
import pandas as pd
data = pd.Series([i for i in range(3)],index=['a','b','c'])
data[2] #2
data['c'] #2
data[0:2] #0,1
data['a':'c'] #0,1
data[[0,2]] #0,2
data[['a','c']] #0,2
#dataframe
import pandas as pd
import numpy as np
arr = np.array([i for i in range(9)]).reshape(3,3)
data = pd.DataFrame(arr,columns=['a','b','c'])
data['a'] 
""""
0    0
1    3
2    6
""""
data[['a','c']]
""""
	a	c
0	0	2
1	3	5
2	6	8
""""
data[:2]
"""
a	b	c
0	0	1	2
1	3	4	5
"""
##loc是index,iloc是行号
data.loc[:,["a","c"]]
data.iloc[:,[0,2]]

sort

sort in index

#series
import pandas as pd
data = pd.Series([1,3,0],index=['a','b','c'])
data.sort_index(axis=0)
data.sort_index(axis=0,ascending=False)
#DataFrame
import pandas as pd
import numpy as np
data = pd.DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c'])
data.sort_index()

sort in values

#series
import pandas as pd
data = pd.Series([1,3,0],index=['a','b','c'])
data.sort_values(axis=0)
data.sort_values(axis=0,ascending=False)
#DataFrame
import pandas as pd
import numpy as np
data = pd.DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c'])
data.sort_values()
data.sort_values(by='a')

multidnex

read , write file

import pandas as pd
data = pd.DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c'])
data.to_csv(r'./data.csv',index=False)
data = pd.read_csv(r'./data.csv')
data
import pandas as pd
data = pd.DataFrame(np.arange(9).reshape(3,3),columns=['a','b','c'])
data.to_excel(r'./data.xlsx',index=False)
data = pd.read_excel(r'./data.xlsx')
data

空值和缺失值处理

import pandas as pd
import numpy as np
from numpy import NaN
data = pd.Series([1,NaN,3])
data.isnull()
#delete
data.dropna()
#fill
data.fillna(6)
#重复值处理
import pandas as pd
data = pd.DataFrame({'id': [1, 2, 3, 4, 4, 5],
                            'name': ['小铭', '小月月', '彭岩', '刘华', '刘华', '周华'],
                            'age': [18, 18, 29, 58, 58, 36],
                            'height': [180, 180, 185, 175, 175, 178],
                            'gender': ['女', '女', '男', '男', '男', '男']})
data.duplicated() 
data.duplicateds()

更改数据类型

import pandas as pd
dict1 = {'A':['5', '6', '7'], 'B':['3', '2', '1']}
data = pd.DataFrame(dict1)
data.dtypes 
data = pd.DataFrame(dict1,dtype='int')
data.dtypes
data.astype(dtype='float')

数据合并

import pandas as pd
data1 = pd.DataFrame({'A': ['A0', 'A0', 'A1'],
                    'B': ['B0', 'B0', 'B1']})
data2 = pd.DataFrame({'C': ['C0', 'C0', 'C1', 'C3'],
                    'D': ['D0', 'D2', 'D2', 'D3']})
# 横向堆叠合并df1和df2,采用外连接的方式
pd.concat([data1,data2], join='outer',axis=1)

数据聚合和组内运算

import pandas as pd
import numpy as np
dict1 = {"Key":['C','B','C','A','B','B','A','C','A'],"Data":[2,4,6,8,10,1,14,16,18]}
data = pd.DataFrame(dict1)
data
for i in pd.groupby(by='Key'):
    print(i)

到此这篇关于Python中pandas库的常用操作实例的文章就介绍到这了,更多相关pandas库的常用操作内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!

相关文章

最新评论