< ๋๋ฝ๋ ๊ฐ ์ฒ๋ฆฌํ๊ธฐ >
# isna() ๋ฉ์๋
๋๋ฝ๋ ๊ฐ ๊ฐ์ ํ์ธํ๊ธฐ
[ ์ฝ๋ ]
ns_book.isna().sum()
[ ๊ฒฐ๊ณผ ]
๋ฒํธ 0
๋์๋ช
403
์ ์ 201
์ถํ์ฌ 4795
๋ฐํ๋
๋ 17
ISBN 0
์ธํธ ISBN 339811
๋ถ๊ฐ๊ธฐํธ 76648
๊ถ 333415
๋์๊ถ์ 0
๋์ถ๊ฑด์ 0
๋ฑ๋ก์ผ์ 0
dtype: int64
# ๋๋ฝ๋ ๊ฐ์ผ๋ก ํ์ํ๊ธฐ : None๊ณผ np.nan
import numpy as np
ns_book.loc[0, '๋ถ๊ฐ๊ธฐํธ'] = np.nan
ns_book.head(2)
# ๋ค์์ ์ด์ dtype ๋ณ๊ฒฝ
๋ค์์ ์ด์ ๋ณ๊ฒฝ๋ ๋์ ๋๋ฆฌ ํ์์ ์ด์ฉํ๋ฉด ๋ฉ๋๋ค.
df1 = df.astype({'col1':'int32', 'col3':'int64'})
print(df1.dtypes)
>>
col1 int32
col2 object
col3 int64
col4 bool
dtype: object
# fillna() ๋ฉ์๋
๋๋ฝ๋ ๊ฐ ๋ฐ๊พธ๊ธฐ
[ ์ฝ๋ ]
col = ['col1','col2','col3','col4','col5']
row = ['row1','row2','row3','row4','row5']
na = np.nan
data = [[na, 2,na, 4,na],
[ 6, 7,na, 9,na],
[11,na,na,14,15],
[na,17,na,na,20],
[na,22,na,na,25]]
df = pd.DataFrame(data,row,col)
print(df)
[ ๊ฒฐ๊ณผ ]
col1 col2 col3 col4 col5
row1 NaN 2.0 NaN 4.0 NaN
row2 6.0 7.0 NaN 9.0 NaN
row3 11.0 NaN NaN 14.0 15.0
row4 NaN 17.0 NaN NaN 20.0
row5 NaN 22.0 NaN NaN 25.0
value์ ํ์์ ๋ฐ๋ฅธ ์ฌ์ฉ
value๊ฐ ์ซ์๋ ๋ฌธ์์ผ ๊ฒฝ์ฐ ๊ทธ๋๋ก ๊ฒฐ์ธก๊ฐ์ ๋์ฒดํ๊ฒ ๋ฉ๋๋ค. ์ฌ๊ธฐ์๋ A๋ก ๋ฐ๊ฟ๋ณด๊ฒ ์ต๋๋ค.
print(df.fillna('A'))
>>
col1 col2 col3 col4 col5
row1 A 2.0 A 4.0 A
row2 6.0 7.0 A 9.0 A
row3 11.0 A A 14.0 15.0
row4 A 17.0 A A 20.0
row5 A 22.0 A A 25.0
dictํํ๋ก ์ ๋ ฅํ ๊ฒฝ์ฐ ๊ฐ๊ฐ ๋ ์ด๋ธ๊ฐ์ ๋ํด ์ํ๋ ๊ฐ์ผ๋ก์ ๋ณ๊ฒฝ์ด ๊ฐ๋ฅํฉ๋๋ค.
dict = {'col1':'A','col2':'B','col3':'C','col4':'D','col5':'E'}
print(df.fillna(value=dict))
>>
col1 col2 col3 col4 col5
row1 A 2.0 C 4.0 E
row2 6.0 7.0 C 9.0 E
row3 11.0 B C 14.0 15.0
row4 A 17.0 C D 20.0
row5 A 22.0 C D 25.0
# replace() ๋ฉ์๋
NaN์ ๋ฌผ๋ก ์ด๋ค ๊ฐ๋ ๋ฐ๊ฟ ์ ์๋ ํธ๋ฆฌํ ๋ฉ์๋์ด๋ค
[ ์์ 1 ]
1์ 99๋ก ๋ฐ๊พธ๊ธฐ
print(df.replace(to_replace=1,value=99))
>>
col1 col2 col3 col4
row1 A w 99 alpha
row2 B x 2 beta
row3 C y 3 gamma
row4 D z 4 delta
- ํ์
replace( ์๋ ๊ฐ, ์๋ก์ด ๊ฐ )
ns_book.replace(np.nan, '์์')
[ ๊ฒฐ๊ณผ ]
[ ์์ 2 ]
to_replace๊ฐ์ด listํํ์ด๊ณ value๊ฐ์ด ๋จ์ผ๊ฐ์ด๋ฉด ์ ์ฒด๊ฐ ๋์ผํ๊ฒ ๋ณ๊ฒฝ๋ฉ๋๋ค.
A, B, y, z๋ฅผ -๋ก ๋ณ๊ฒฝํด๋ณด๊ฒ ์ต๋๋ค.
print(df.replace(to_replace=['A','B','y','z'],value='-'))
>>
col1 col2 col3 col4
row1 - w 1 alpha
row2 - x 2 beta
row3 C - 3 gamma
row4 D - 4 delta
- ํ์
replace( [ ์๋ ๊ฐ1, ์๋ ๊ฐ2], [์๋ก์ด ๊ฐ1, ์๋ก์ด ๊ฐ2] )
ns_book.replace([np.nan, '2021'], ['์์','21'])
[ ๊ฒฐ๊ณผ ]
[ ์์ 3 ]
to_replace๊ฐ๊ณผ value๊ฐ์ด ๋ชจ๋ ๊ฐ์ ๊ธธ์ด์ listํํ์ผ ๊ฒฝ์ฐ ๊ฐ๊ฐ ๊ฐ์ ์์์ ๊ฐ์ผ๋ก ๋ณ๊ฒฝ๋ฉ๋๋ค.
A, B, y, z๋ฅผ ๊ฐ๊ฐ a, b, Y, Z๋ก ๋ณ๊ฒฝํด๋ณด๊ฒ ์ต๋๋ค.
print(df.replace(to_replace=['A','B','y','z'],value=['a','b','Y','Z']))
>>
col1 col2 col3 col4
row1 a w 1 alpha
row2 b x 2 beta
row3 C Y 3 gamma
row4 D Z 4 delta
- ํ์
replace ( { ์ด ์ด๋ฆ : { ์๋ ๊ฐ : ์๋ก์ด ๊ฐ },
์ด ์ด๋ฆ2 : { ์๋ ๊ฐ2 : ์๋ก์ด ๊ฐ2 } } )
ns_book.replace({'๋ถ๊ฐ๊ธฐํธ' : {np.nan : '์์'},
'๋ฐํ๋
๋' : {'2021' : '21'}}).head()
# Reference
09-13. ๊ฐ ๋ณ๊ฒฝ (replace)
####DataFrame.replace(to_replace=None, value=None, inplace=False, limit=None, regex=False, method='…
wikidocs.net