Pandas中loc,iloc函数的用法

用loc,iloc,直接取值三种方法;对DataFrame,Series,行和列进行操作 

import pandas as pd
#读取college数据集
college = pd.read_csv('data/college.csv', index_col='INSTNM')

iloc通过行标签取数 索引值的下标

# 选取第61行
pd.options.display.max_rows = 6
college.iloc[60]
'''
CITY                  Anchorage
STABBR                       AK
HBCU                          0
                        ...    
UG25ABV                  0.4386
MD_EARN_WNE_P10           42500
GRAD_DEBT_MDN_SUPP      19449.5
Name: University of Alaska Anchorage, Length: 26, dtype: object
'''


# 选取多个不连续的行
college.iloc[[60, 99, 3]] #在series中取值62行,101行,5行
  CITY STABBR HBCU MENONLY WOMENONLY RELAFFIL SATVRMID SATMTMID DISTANCEONLY UGDS ... UGDS_2MOR UGDS_NRA UGDS_UNKN PPTUG_EF CURROPER PCTPELL PCTFLOAN UG25ABV MD_EARN_WNE_P10 GRAD_DEBT_MDN_SUPP
INSTNM                                          
University of Alaska Anchorage Anchorage AK 0.0 0.0 0.0 0 NaN NaN 0.0 12865.0 ... 0.0980 0.0181 0.0457 0.4539 1 0.2385 0.2647 0.4386 42500 19449.5
International Academy of Hair Design Tempe AZ 0.0 0.0 0.0 0 NaN NaN 0.0 188.0 ... 0.0160 0.0000 0.0638 0.0000 0 0.7185 0.7346 0.3905 22200 10556
University of Alabama in Huntsville Huntsville AL 0.0 0.0 0.0 0 595.0 590.0 0.0 5451.0 ... 0.0172 0.0332 0.0350 0.2146 1 0.3072 0.4596 0.2640 45500 24097

3 rows × 26 columns

# iloc可以用切片连续选取 
college.iloc[99:102] #选取99行到101行,99,100,101
  CITY STABBR HBCU MENONLY WOMENONLY RELAFFIL SATVRMID SATMTMID DISTANCEONLY UGDS ... UGDS_2MOR UGDS_NRA UGDS_UNKN PPTUG_EF CURROPER PCTPELL PCTFLOAN UG25ABV MD_EARN_WNE_P10 GRAD_DEBT_MDN_SUPP
INSTNM                                          
International Academy of Hair Design Tempe AZ 0.0 0.0 0.0 0 NaN NaN 0.0 188.0 ... 0.0160 0.0000 0.0638 0.0000 0 0.7185 0.7346 0.3905 22200 10556
GateWay Community College Phoenix AZ 0.0 0.0 0.0 0 NaN NaN 0.0 5211.0 ... 0.0127 0.0161 0.0702 0.7465 1 0.3270 0.2189 0.5832 29800 7283
Mesa Community College Mesa AZ 0.0 0.0 0.0 0 NaN NaN 0.0 19055.0 ... 0.0205 0.0257 0.0682 0.6457 1 0.3423 0.2207 0.4010 35200 8000

3 rows × 26 columns

 

loc通过行标签取数 索引值

# 也可以通过行标签选取
college.loc['University of Alaska Anchorage']
'''
CITY                  Anchorage
STABBR                       AK
HBCU                          0
MENONLY                       0
WOMENONLY                     0
                        ...    
PCTPELL                  0.2385
PCTFLOAN                 0.2647
UG25ABV                  0.4386
MD_EARN_WNE_P10           42500
GRAD_DEBT_MDN_SUPP      19449.5
Name: University of Alaska Anchorage, Length: 26, dtype: object
'''

用loc加列表来选取 

# 也可以用loc加列表来选取
labels = ['University of Alaska Anchorage','International Academy of Hair Design','University of Alabama in Huntsville']
college.loc[labels]
  CITY STABBR HBCU MENONLY WOMENONLY RELAFFIL SATVRMID SATMTMID DISTANCEONLY UGDS ... UGDS_2MOR UGDS_NRA UGDS_UNKN PPTUG_EF CURROPER PCTPELL PCTFLOAN UG25ABV MD_EARN_WNE_P10 GRAD_DEBT_MDN_SUPP
INSTNM                                          
University of Alaska Anchorage Anchorage AK 0.0 0.0 0.0 0 NaN NaN 0.0 12865.0 ... 0.0980 0.0181 0.0457 0.4539 1 0.2385 0.2647 0.4386 42500 19449.5
International Academy of Hair Design Tempe AZ 0.0 0.0 0.0 0 NaN NaN 0.0 188.0 ... 0.0160 0.0000 0.0638 0.0000 0 0.7185 0.7346 0.3905 22200 10556
University of Alabama in Huntsville Huntsville AL 0.0 0.0 0.0 0 595.0 590.0 0.0 5451.0 ... 0.0172 0.0332 0.0350 0.2146 1 0.3072 0.4596 0.2640 45500 24097

3 rows × 26 columns

loc可以用标签连续选取start-stop

# loc可以用标签连续选取start-stop
start = 'Amridge University'
stop = 'Athens State University'
college.loc[start:stop]
  CITY STABBR HBCU MENONLY WOMENONLY RELAFFIL SATVRMID SATMTMID DISTANCEONLY UGDS ... UGDS_2MOR UGDS_NRA UGDS_UNKN PPTUG_EF CURROPER PCTPELL PCTFLOAN UG25ABV MD_EARN_WNE_P10 GRAD_DEBT_MDN_SUPP
INSTNM                                          
Amridge University Montgomery AL 0.0 0.0 0.0 1 NaN NaN 1.0 291.0 ... 0.0000 0.0000 0.2715 0.4536 1 0.6801 0.7795 0.8540 40100 23370
University of Alabama in Huntsville Huntsville AL 0.0 0.0 0.0 0 595.0 590.0 0.0 5451.0 ... 0.0172 0.0332 0.0350 0.2146 1 0.3072 0.4596 0.2640 45500 24097
Alabama State University Montgomery AL 1.0 0.0 0.0 0 425.0 430.0 0.0 4811.0 ... 0.0098 0.0243 0.0137 0.0892 1 0.7347 0.7554 0.1270 26600 33118.5
The University of Alabama Tuscaloosa AL 0.0 0.0 0.0 0 555.0 565.0 0.0 29851.0 ... 0.0261 0.0268 0.0026 0.0844 1 0.2040 0.4010 0.0853 41900 23750
Central Alabama Community College Alexander City AL 0.0 0.0 0.0 0 NaN NaN 0.0 1592.0 ... 0.0000 0.0000 0.0019 0.3882 1 0.5892 0.3977 0.3153 27500 16127
Athens State University Athens AL 0.0 0.0 0.0 0 NaN NaN 0.0 2991.0 ... 0.0174 0.0057 0.0334 0.5517 1 0.4088 0.6296 0.6410 39000 18595

6 rows × 26 columns

index.tolist()提取行索引生成列表 

#index.tolist()提取行索引生成列表 在series中,多选取一行,代表,多添加一行的列名
college.iloc[[60, 49, 3]].index.tolist()#选了三行
['University of Alaska Anchorage',
 'Snead State Community College',
 'University of Alabama in Huntsville']

用iloc选取前3行和前4列 

# 读取college数据集,给行索引命名为INSTNM;选取前3行和前4列
college = pd.read_csv('data/college.csv', index_col='INSTNM')
college.iloc[:3, :4]
 
  CITY STABBR HBCU MENONLY
INSTNM        
Alabama A & M University Normal AL 1.0 0.0
University of Alabama at Birmingham Birmingham AL 0.0 0.0
Amridge University Montgomery AL 0.0 0.0

用loc选取前3行和前4列

 

  CITY STABBR HBCU MENONLY
INSTNM        
Alabama A & M University Normal AL 1.0 0.0
University of Alabama at Birmingham Birmingham AL 0.0 0.0
Amridge University Montgomery AL 0.0 0.0

选取两列的所有的行

college.iloc[:, [4,6]].head()
college.loc[:, ['WOMENONLY', 'SATVRMID']].head()

 

  WOMENONLY SATVRMID
INSTNM    
Alabama A & M University 0.0 424.0
University of Alabama at Birmingham 0.0 570.0
Amridge University 0.0 NaN
University of Alabama in Huntsville 0.0 595.0
Alabama State University 0.0 425.0

选取不连续的行和列

# 选取不连续的行和列
college.iloc[[100, 200], [7, 15]]

	                                  SATMTMID	UGDS_NHPI
INSTNM		
GateWay Community College	            NaN	     0.0029
American Baptist Seminary of the West	NaN	     NaN

 

不用loc,iloc行切片

 

#从行索引10到20,每隔一个取一行
# 读取college数据集;从行索引10到20,每隔一个取一行
college = pd.read_csv('data/college.csv', index_col='INSTNM')
college[10:20:2]
  CITY STABBR HBCU MENONLY WOMENONLY RELAFFIL SATVRMID SATMTMID DISTANCEONLY UGDS ... UGDS_2MOR UGDS_NRA UGDS_UNKN PPTUG_EF CURROPER PCTPELL PCTFLOAN UG25ABV MD_EARN_WNE_P10 GRAD_DEBT_MDN_SUPP
INSTNM                                          
Birmingham Southern College Birmingham AL 0.0 0.0 0.0 1 560.0 560.0 0.0 1180.0 ... 0.0051 0.0000 0.0051 0.0017 1 0.1920 0.4809 0.0152 44200 27000
Concordia College Alabama Selma AL 1.0 0.0 0.0 1 420.0 400.0 0.0 322.0 ... 0.0031 0.0466 0.0000 0.1056 1 0.8667 0.9333 0.2367 19900 PrivacySuppressed
Enterprise State Community College Enterprise AL 0.0 0.0 0.0 0 NaN NaN 0.0 1729.0 ... 0.0254 0.0012 0.0069 0.3823 1 0.4895 0.2263 0.3399 24600 8273
Faulkner University Montgomery AL 0.0 0.0 0.0 1 NaN NaN 0.0 2367.0 ... 0.0173 0.0182 0.0258 0.2302 1 0.5812 0.7253 0.4589 37200 22000
New Beginning College of Cosmetology Albertville AL 0.0 0.0 0.0 0 NaN NaN 0.0 115.0 ... 0.0000 0.0000 0.0000 0.0783 1 0.8224 0.8553 0.3933 NaN 5500

5 rows × 26 columns

Series切片求10到19之间,每隔2个间隔的值

# Series也可以进行同样的切片
city = college['CITY']
city[10:20:2]
'''
INSTNM
Birmingham Southern College              Birmingham
Concordia College Alabama                     Selma
Enterprise State Community College       Enterprise
Faulkner University                      Montgomery
New Beginning College of Cosmetology    Albertville
Name: CITY, dtype: object
'''

查看第4002个行索引标签

# 查看第4002个行索引标签
college.index[4001]
#'Spokane Community College'

对DataFrame用标签切片

# Series和DataFrame都可以用标签进行切片。下面是对DataFrame用标签切片
start = 'Mesa Community College'
stop = 'Spokane Community College'
college[start:stop:1500]
CITY STABBR HBCU MENONLY WOMENONLY RELAFFIL SATVRMID SATMTMID DISTANCEONLY UGDS ... UGDS_2MOR UGDS_NRA UGDS_UNKN PPTUG_EF CURROPER PCTPELL PCTFLOAN UG25ABV MD_EARN_WNE_P10 GRAD_DEBT_MDN_SUPP
INSTNM                                          
Mesa Community College Mesa AZ 0.0 0.0 0.0 0 NaN NaN 0.0 19055.0 ... 0.0205 0.0257 0.0682 0.6457 1 0.3423 0.2207 0.4010 35200 8000
Hair Academy Inc-New Carrollton New Carrollton MD 0.0 0.0 0.0 0 NaN NaN 0.0 504.0 ... 0.0000 0.0000 0.0000 0.4683 1 0.9756 1.0000 0.5882 15200 9666
National College of Natural Medicine Portland OR 0.0 0.0 0.0 0 NaN NaN 0.0 NaN ... NaN NaN NaN NaN 1 NaN NaN NaN NaN PrivacySuppressed

3 rows × 26 columns

对Series用标签切片

# 下面是对Series用标签切片
city[start:stop:1500]
’‘’
INSTNM
Mesa Community College                            Mesa
Hair Academy Inc-New Carrollton         New Carrollton
National College of Natural Medicine          Portland
Name: CITY, dtype: object
‘’‘

直接切片不能用于列,只能用于DataFrame的行和Series,也不能同时选取行和列。

# 下面尝试选取两列,导致错误
# college[:10, ['CITY', 'STABBR']]
# TypeError: '(slice(None, 10, None), ['CITY', 'STABBR'])' is an invalid key
# 只能用.loc和.iloc选取
first_ten_instnm = college.index[:10]
college.loc[first_ten_instnm, ['CITY', 'STABBR']]
  CITY STABBR
INSTNM    
A & W Healthcare Educators New Orleans LA
A T Still University of Health Sciences Kirksville MO
ABC Beauty Academy Garland TX
ABC Beauty College Inc Arkadelphia AR
AI Miami International University of Art and Design Miami FL

 

上一篇:SpringMVC简介


下一篇:软件工程第二节课 《springboot 连接 sql数据库,实现 增删改查功能》