39번
Cannot mask with non-boolean array containing NA / NaN values
Ans = df.loc[df.choice_description.str.contains('Black')
Ans = df.loc[df.choice_description.str.contains('Black',na=False)]
Ans.head(5)
필터 표현식 앞에 물결표 연산자를 추가하면 조건에 맞지 않는 행이 반환됩니다.
Ans = df.loc[~df.choice_description.str.contains('Vegetables',na=True)]
Ans
* 데이터 타입으로 컬럼 선택
- df.select_dtypes(exclude/include) : select 메서드
Ans = df.select_dtypes(exclude = object)
Ans
id 거주인구 근무인구 방문인구 총 유동인구 평균 속도 평균 소요 시간 평균 기온 일강수량 평균 풍속
0 22448 32249.987 3418.266 102709.092 138377.345 39.556 29.167 5.0 0.0 2.50
1 22449 213500.997 10341.172 112692.789 336534.958 32.900 30.900 5.0 0.0 2.50
2 22450 1212382.218 96920.834 541194.481 1850497.533 29.538 35.692 2.9 0.0 2.40
3 22451 33991.653 6034.253 72155.919 112181.825 30.000 23.500 2.9 0.0 2.40
4 22452 155036.925 9403.969 150882.409 315323.303 41.583 14.375 5.1 0.0 2.30
... ... ... ... ... ... ... ... ... ... ...
9616 32064 228260.005 16891.732 152832.449 397984.186 44.514 34.054 18.1 0.0 4.70
9617 32065 459959.064 26007.122 249700.419 735666.605 48.609 61.377 18.0 0.0 4.35
9618 32066 28397.481 3144.895 84052.697 115595.073 41.053 29.421 20.3 0.0 3.00
9619 32067 348037.846 29106.286 251129.660 628273.792 46.595 49.189 17.6 0.0 3.50
9620 32068 1010643.372 65673.477 447622.068 1523938.917 40.863 27.765 14.1 0.0 4.80
9621 rows × 10 columns
df와 df.loc의 차이
-df[] : 열만 지정 가능 -> 열을 지정하여 행의 값을 찾고 싶은 것
df['거주인구']
0 32249.987
1 213500.997
2 1212382.218
3 33991.653
4 155036.925
...
9616 228260.005
9617 459959.064
9618 28397.481
9619 348037.846
9620 1010643.372
Name: 거주인구, Length: 9621, dtype: float64
df[:,'거주인구']
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4204/835568591.py in <module>
----> 1 df[:,'거주인구']
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3456 if self.columns.nlevels > 1:
3457 return self._getitem_multilevel(key)
-> 3458 indexer = self.columns.get_loc(key)
3459 if is_integer(indexer):
3460 indexer = [indexer]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3359 casted_key = self._maybe_cast_indexer(key)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
3363 raise KeyError(key) from err
C:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
TypeError: '(slice(None, None, None), '거주인구')' is an invalid key
-df.loc[r,c] : 행과 열 모두로 찾는 것 (object일떄)
df.loc['거주인구']
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4204/3390504387.py in <module>
----> 1 df.loc['거주인구']
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
929
930 maybe_callable = com.apply_if_callable(key, self.obj)
--> 931 return self._getitem_axis(maybe_callable, axis=axis)
932
933 def _is_scalar_access(self, key: tuple):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1162 # fall thru to straight lookup
1163 self._validate_key(key, axis)
-> 1164 return self._get_label(key, axis=axis)
1165
1166 def _get_slice_axis(self, slice_obj: slice, axis: int):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
1111 def _get_label(self, label, axis: int):
1112 # GH#5667 this will fail if the label is not present in the axis.
-> 1113 return self.obj.xs(label, axis=axis)
1114
1115 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3774 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
3775 else:
-> 3776 loc = index.get_loc(key)
3777
3778 if isinstance(loc, np.ndarray):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\range.py in get_loc(self, key, method, tolerance)
386 except ValueError as err:
387 raise KeyError(key) from err
--> 388 raise KeyError(key)
389 return super().get_loc(key, method=method, tolerance=tolerance)
390
KeyError: '거주인구'
df.loc[:,'거주인구']
0 32249.987
1 213500.997
2 1212382.218
3 33991.653
4 155036.925
...
9616 228260.005
9617 459959.064
9618 28397.481
9619 348037.846
9620 1010643.372
Name: 거주인구, Length: 9621, dtype: float64
* 결측치 확인
isnull().sum() / info()
*drop=True : 원래의 index를 버림