用loc,iloc,直接取值三种方法;对DataFrame,Series,行和列进行操作
import pandas as pd
#读取college数据集
college = pd.read_csv('data/college.csv', index_col='INSTNM')
iloc通过行标签取数 索引值的下标
# 选取第61行
pd.options.display.max_rows = 6
college.iloc[60]
'''
CITY Anchorage
STABBR AK
HBCU 0
...
UG25ABV 0.4386
MD_EARN_WNE_P10 42500
GRAD_DEBT_MDN_SUPP 19449.5
Name: University of Alaska Anchorage, Length: 26, dtype: object
'''
# 选取多个不连续的行
college.iloc[[60, 99, 3]] #在series中取值62行,101行,5行
CITY | STABBR | HBCU | MENONLY | WOMENONLY | RELAFFIL | SATVRMID | SATMTMID | DISTANCEONLY | UGDS | ... | UGDS_2MOR | UGDS_NRA | UGDS_UNKN | PPTUG_EF | CURROPER | PCTPELL | PCTFLOAN | UG25ABV | MD_EARN_WNE_P10 | GRAD_DEBT_MDN_SUPP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INSTNM | |||||||||||||||||||||
University of Alaska Anchorage | Anchorage | AK | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 12865.0 | ... | 0.0980 | 0.0181 | 0.0457 | 0.4539 | 1 | 0.2385 | 0.2647 | 0.4386 | 42500 | 19449.5 |
International Academy of Hair Design | Tempe | AZ | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 188.0 | ... | 0.0160 | 0.0000 | 0.0638 | 0.0000 | 0 | 0.7185 | 0.7346 | 0.3905 | 22200 | 10556 |
University of Alabama in Huntsville | Huntsville | AL | 0.0 | 0.0 | 0.0 | 0 | 595.0 | 590.0 | 0.0 | 5451.0 | ... | 0.0172 | 0.0332 | 0.0350 | 0.2146 | 1 | 0.3072 | 0.4596 | 0.2640 | 45500 | 24097 |
3 rows × 26 columns
# iloc可以用切片连续选取
college.iloc[99:102] #选取99行到101行,99,100,101
CITY | STABBR | HBCU | MENONLY | WOMENONLY | RELAFFIL | SATVRMID | SATMTMID | DISTANCEONLY | UGDS | ... | UGDS_2MOR | UGDS_NRA | UGDS_UNKN | PPTUG_EF | CURROPER | PCTPELL | PCTFLOAN | UG25ABV | MD_EARN_WNE_P10 | GRAD_DEBT_MDN_SUPP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INSTNM | |||||||||||||||||||||
International Academy of Hair Design | Tempe | AZ | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 188.0 | ... | 0.0160 | 0.0000 | 0.0638 | 0.0000 | 0 | 0.7185 | 0.7346 | 0.3905 | 22200 | 10556 |
GateWay Community College | Phoenix | AZ | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 5211.0 | ... | 0.0127 | 0.0161 | 0.0702 | 0.7465 | 1 | 0.3270 | 0.2189 | 0.5832 | 29800 | 7283 |
Mesa Community College | Mesa | AZ | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 19055.0 | ... | 0.0205 | 0.0257 | 0.0682 | 0.6457 | 1 | 0.3423 | 0.2207 | 0.4010 | 35200 | 8000 |
3 rows × 26 columns
loc通过行标签取数 索引值
# 也可以通过行标签选取
college.loc['University of Alaska Anchorage']
'''
CITY Anchorage
STABBR AK
HBCU 0
MENONLY 0
WOMENONLY 0
...
PCTPELL 0.2385
PCTFLOAN 0.2647
UG25ABV 0.4386
MD_EARN_WNE_P10 42500
GRAD_DEBT_MDN_SUPP 19449.5
Name: University of Alaska Anchorage, Length: 26, dtype: object
'''
用loc加列表来选取
# 也可以用loc加列表来选取
labels = ['University of Alaska Anchorage','International Academy of Hair Design','University of Alabama in Huntsville']
college.loc[labels]
CITY | STABBR | HBCU | MENONLY | WOMENONLY | RELAFFIL | SATVRMID | SATMTMID | DISTANCEONLY | UGDS | ... | UGDS_2MOR | UGDS_NRA | UGDS_UNKN | PPTUG_EF | CURROPER | PCTPELL | PCTFLOAN | UG25ABV | MD_EARN_WNE_P10 | GRAD_DEBT_MDN_SUPP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INSTNM | |||||||||||||||||||||
University of Alaska Anchorage | Anchorage | AK | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 12865.0 | ... | 0.0980 | 0.0181 | 0.0457 | 0.4539 | 1 | 0.2385 | 0.2647 | 0.4386 | 42500 | 19449.5 |
International Academy of Hair Design | Tempe | AZ | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 188.0 | ... | 0.0160 | 0.0000 | 0.0638 | 0.0000 | 0 | 0.7185 | 0.7346 | 0.3905 | 22200 | 10556 |
University of Alabama in Huntsville | Huntsville | AL | 0.0 | 0.0 | 0.0 | 0 | 595.0 | 590.0 | 0.0 | 5451.0 | ... | 0.0172 | 0.0332 | 0.0350 | 0.2146 | 1 | 0.3072 | 0.4596 | 0.2640 | 45500 | 24097 |
3 rows × 26 columns
loc可以用标签连续选取start-stop
# loc可以用标签连续选取start-stop
start = 'Amridge University'
stop = 'Athens State University'
college.loc[start:stop]
CITY | STABBR | HBCU | MENONLY | WOMENONLY | RELAFFIL | SATVRMID | SATMTMID | DISTANCEONLY | UGDS | ... | UGDS_2MOR | UGDS_NRA | UGDS_UNKN | PPTUG_EF | CURROPER | PCTPELL | PCTFLOAN | UG25ABV | MD_EARN_WNE_P10 | GRAD_DEBT_MDN_SUPP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INSTNM | |||||||||||||||||||||
Amridge University | Montgomery | AL | 0.0 | 0.0 | 0.0 | 1 | NaN | NaN | 1.0 | 291.0 | ... | 0.0000 | 0.0000 | 0.2715 | 0.4536 | 1 | 0.6801 | 0.7795 | 0.8540 | 40100 | 23370 |
University of Alabama in Huntsville | Huntsville | AL | 0.0 | 0.0 | 0.0 | 0 | 595.0 | 590.0 | 0.0 | 5451.0 | ... | 0.0172 | 0.0332 | 0.0350 | 0.2146 | 1 | 0.3072 | 0.4596 | 0.2640 | 45500 | 24097 |
Alabama State University | Montgomery | AL | 1.0 | 0.0 | 0.0 | 0 | 425.0 | 430.0 | 0.0 | 4811.0 | ... | 0.0098 | 0.0243 | 0.0137 | 0.0892 | 1 | 0.7347 | 0.7554 | 0.1270 | 26600 | 33118.5 |
The University of Alabama | Tuscaloosa | AL | 0.0 | 0.0 | 0.0 | 0 | 555.0 | 565.0 | 0.0 | 29851.0 | ... | 0.0261 | 0.0268 | 0.0026 | 0.0844 | 1 | 0.2040 | 0.4010 | 0.0853 | 41900 | 23750 |
Central Alabama Community College | Alexander City | AL | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 1592.0 | ... | 0.0000 | 0.0000 | 0.0019 | 0.3882 | 1 | 0.5892 | 0.3977 | 0.3153 | 27500 | 16127 |
Athens State University | Athens | AL | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 2991.0 | ... | 0.0174 | 0.0057 | 0.0334 | 0.5517 | 1 | 0.4088 | 0.6296 | 0.6410 | 39000 | 18595 |
6 rows × 26 columns
index.tolist()提取行索引生成列表
#index.tolist()提取行索引生成列表 在series中,多选取一行,代表,多添加一行的列名
college.iloc[[60, 49, 3]].index.tolist()#选了三行
['University of Alaska Anchorage',
'Snead State Community College',
'University of Alabama in Huntsville']
用iloc选取前3行和前4列
# 读取college数据集,给行索引命名为INSTNM;选取前3行和前4列
college = pd.read_csv('data/college.csv', index_col='INSTNM')
college.iloc[:3, :4]
CITY | STABBR | HBCU | MENONLY | |
---|---|---|---|---|
INSTNM | ||||
Alabama A & M University | Normal | AL | 1.0 | 0.0 |
University of Alabama at Birmingham | Birmingham | AL | 0.0 | 0.0 |
Amridge University | Montgomery | AL | 0.0 | 0.0 |
用loc选取前3行和前4列
CITY | STABBR | HBCU | MENONLY | |
---|---|---|---|---|
INSTNM | ||||
Alabama A & M University | Normal | AL | 1.0 | 0.0 |
University of Alabama at Birmingham | Birmingham | AL | 0.0 | 0.0 |
Amridge University | Montgomery | AL | 0.0 | 0.0 |
选取两列的所有的行
college.iloc[:, [4,6]].head()
college.loc[:, ['WOMENONLY', 'SATVRMID']].head()
WOMENONLY | SATVRMID | |
---|---|---|
INSTNM | ||
Alabama A & M University | 0.0 | 424.0 |
University of Alabama at Birmingham | 0.0 | 570.0 |
Amridge University | 0.0 | NaN |
University of Alabama in Huntsville | 0.0 | 595.0 |
Alabama State University | 0.0 | 425.0 |
选取不连续的行和列
# 选取不连续的行和列
college.iloc[[100, 200], [7, 15]]
SATMTMID UGDS_NHPI
INSTNM
GateWay Community College NaN 0.0029
American Baptist Seminary of the West NaN NaN
不用loc,iloc行切片
#从行索引10到20,每隔一个取一行
# 读取college数据集;从行索引10到20,每隔一个取一行
college = pd.read_csv('data/college.csv', index_col='INSTNM')
college[10:20:2]
CITY | STABBR | HBCU | MENONLY | WOMENONLY | RELAFFIL | SATVRMID | SATMTMID | DISTANCEONLY | UGDS | ... | UGDS_2MOR | UGDS_NRA | UGDS_UNKN | PPTUG_EF | CURROPER | PCTPELL | PCTFLOAN | UG25ABV | MD_EARN_WNE_P10 | GRAD_DEBT_MDN_SUPP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INSTNM | |||||||||||||||||||||
Birmingham Southern College | Birmingham | AL | 0.0 | 0.0 | 0.0 | 1 | 560.0 | 560.0 | 0.0 | 1180.0 | ... | 0.0051 | 0.0000 | 0.0051 | 0.0017 | 1 | 0.1920 | 0.4809 | 0.0152 | 44200 | 27000 |
Concordia College Alabama | Selma | AL | 1.0 | 0.0 | 0.0 | 1 | 420.0 | 400.0 | 0.0 | 322.0 | ... | 0.0031 | 0.0466 | 0.0000 | 0.1056 | 1 | 0.8667 | 0.9333 | 0.2367 | 19900 | PrivacySuppressed |
Enterprise State Community College | Enterprise | AL | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 1729.0 | ... | 0.0254 | 0.0012 | 0.0069 | 0.3823 | 1 | 0.4895 | 0.2263 | 0.3399 | 24600 | 8273 |
Faulkner University | Montgomery | AL | 0.0 | 0.0 | 0.0 | 1 | NaN | NaN | 0.0 | 2367.0 | ... | 0.0173 | 0.0182 | 0.0258 | 0.2302 | 1 | 0.5812 | 0.7253 | 0.4589 | 37200 | 22000 |
New Beginning College of Cosmetology | Albertville | AL | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 115.0 | ... | 0.0000 | 0.0000 | 0.0000 | 0.0783 | 1 | 0.8224 | 0.8553 | 0.3933 | NaN | 5500 |
5 rows × 26 columns
Series切片求10到19之间,每隔2个间隔的值
# Series也可以进行同样的切片
city = college['CITY']
city[10:20:2]
'''
INSTNM
Birmingham Southern College Birmingham
Concordia College Alabama Selma
Enterprise State Community College Enterprise
Faulkner University Montgomery
New Beginning College of Cosmetology Albertville
Name: CITY, dtype: object
'''
查看第4002个行索引标签
# 查看第4002个行索引标签
college.index[4001]
#'Spokane Community College'
对DataFrame用标签切片
# Series和DataFrame都可以用标签进行切片。下面是对DataFrame用标签切片
start = 'Mesa Community College'
stop = 'Spokane Community College'
college[start:stop:1500]
CITY | STABBR | HBCU | MENONLY | WOMENONLY | RELAFFIL | SATVRMID | SATMTMID | DISTANCEONLY | UGDS | ... | UGDS_2MOR | UGDS_NRA | UGDS_UNKN | PPTUG_EF | CURROPER | PCTPELL | PCTFLOAN | UG25ABV | MD_EARN_WNE_P10 | GRAD_DEBT_MDN_SUPP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INSTNM | |||||||||||||||||||||
Mesa Community College | Mesa | AZ | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 19055.0 | ... | 0.0205 | 0.0257 | 0.0682 | 0.6457 | 1 | 0.3423 | 0.2207 | 0.4010 | 35200 | 8000 |
Hair Academy Inc-New Carrollton | New Carrollton | MD | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | 504.0 | ... | 0.0000 | 0.0000 | 0.0000 | 0.4683 | 1 | 0.9756 | 1.0000 | 0.5882 | 15200 | 9666 |
National College of Natural Medicine | Portland | OR | 0.0 | 0.0 | 0.0 | 0 | NaN | NaN | 0.0 | NaN | ... | NaN | NaN | NaN | NaN | 1 | NaN | NaN | NaN | NaN | PrivacySuppressed |
3 rows × 26 columns
对Series用标签切片
# 下面是对Series用标签切片
city[start:stop:1500]
’‘’
INSTNM
Mesa Community College Mesa
Hair Academy Inc-New Carrollton New Carrollton
National College of Natural Medicine Portland
Name: CITY, dtype: object
‘’‘
直接切片不能用于列,只能用于DataFrame的行和Series,也不能同时选取行和列。
# 下面尝试选取两列,导致错误
# college[:10, ['CITY', 'STABBR']]
# TypeError: '(slice(None, 10, None), ['CITY', 'STABBR'])' is an invalid key
# 只能用.loc和.iloc选取
first_ten_instnm = college.index[:10]
college.loc[first_ten_instnm, ['CITY', 'STABBR']]
CITY | STABBR | |
---|---|---|
INSTNM | ||
A & W Healthcare Educators | New Orleans | LA |
A T Still University of Health Sciences | Kirksville | MO |
ABC Beauty Academy | Garland | TX |
ABC Beauty College Inc | Arkadelphia | AR |
AI Miami International University of Art and Design | Miami | FL |