first commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,284 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays.categorical import CategoricalAccessor
|
||||
from pandas.core.indexes.accessors import Properties
|
||||
|
||||
|
||||
class TestCatAccessor:
|
||||
@pytest.mark.parametrize(
|
||||
"method",
|
||||
[
|
||||
lambda x: x.cat.set_categories([1, 2, 3]),
|
||||
lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
|
||||
lambda x: x.cat.rename_categories([1, 2, 3]),
|
||||
lambda x: x.cat.remove_unused_categories(),
|
||||
lambda x: x.cat.remove_categories([2]),
|
||||
lambda x: x.cat.add_categories([4]),
|
||||
lambda x: x.cat.as_ordered(),
|
||||
lambda x: x.cat.as_unordered(),
|
||||
],
|
||||
)
|
||||
def test_getname_categorical_accessor(self, method):
|
||||
# GH#17509
|
||||
ser = Series([1, 2, 3], name="A").astype("category")
|
||||
expected = "A"
|
||||
result = method(ser).name
|
||||
assert result == expected
|
||||
|
||||
def test_cat_accessor(self):
|
||||
ser = Series(Categorical(["a", "b", np.nan, "a"]))
|
||||
tm.assert_index_equal(ser.cat.categories, Index(["a", "b"]))
|
||||
assert not ser.cat.ordered, False
|
||||
|
||||
exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||||
# issue #37643 inplace kwarg deprecated
|
||||
return_value = ser.cat.set_categories(["b", "a"], inplace=True)
|
||||
|
||||
assert return_value is None
|
||||
tm.assert_categorical_equal(ser.values, exp)
|
||||
|
||||
res = ser.cat.set_categories(["b", "a"])
|
||||
tm.assert_categorical_equal(res.values, exp)
|
||||
|
||||
ser[:] = "a"
|
||||
ser = ser.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(ser.cat.categories, Index(["a"]))
|
||||
|
||||
def test_cat_accessor_api(self):
|
||||
# GH#9322
|
||||
|
||||
assert Series.cat is CategoricalAccessor
|
||||
ser = Series(list("aabbcde")).astype("category")
|
||||
assert isinstance(ser.cat, CategoricalAccessor)
|
||||
|
||||
invalid = Series([1])
|
||||
with pytest.raises(AttributeError, match="only use .cat accessor"):
|
||||
invalid.cat
|
||||
assert not hasattr(invalid, "cat")
|
||||
|
||||
def test_cat_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
cat = Series(list("aabbcde")).astype("category")
|
||||
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
|
||||
cat.cat.xlabel = "a"
|
||||
|
||||
def test_cat_accessor_updates_on_inplace(self):
|
||||
ser = Series(list("abc")).astype("category")
|
||||
return_value = ser.drop(0, inplace=True)
|
||||
assert return_value is None
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
return_value = ser.cat.remove_unused_categories(inplace=True)
|
||||
|
||||
assert return_value is None
|
||||
assert len(ser.cat.categories) == 2
|
||||
|
||||
def test_categorical_delegations(self):
|
||||
|
||||
# invalid accessor
|
||||
msg = r"Can only use \.cat accessor with a 'category' dtype"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([1, 2, 3]).cat()
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(["a", "b", "c"]).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series(np.arange(5.0)).cat
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
Series([Timestamp("20130101")]).cat
|
||||
|
||||
# Series should delegate calls to '.categories', '.codes', '.ordered'
|
||||
# and the methods '.set_categories()' 'drop_unused_categories()' to the
|
||||
# categorical
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
ser.cat.categories = [1, 2, 3]
|
||||
exp_categories = Index([1, 2, 3])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
|
||||
exp_codes = Series([0, 1, 2, 0], dtype="int8")
|
||||
tm.assert_series_equal(ser.cat.codes, exp_codes)
|
||||
|
||||
assert ser.cat.ordered
|
||||
ser = ser.cat.as_unordered()
|
||||
assert not ser.cat.ordered
|
||||
return_value = ser.cat.as_ordered(inplace=True)
|
||||
assert return_value is None
|
||||
assert ser.cat.ordered
|
||||
|
||||
# reorder
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
exp_categories = Index(["c", "b", "a"])
|
||||
exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
|
||||
ser = ser.cat.set_categories(["c", "b", "a"])
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
|
||||
|
||||
# remove unused categories
|
||||
ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
|
||||
exp_categories = Index(["a", "b"])
|
||||
exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
|
||||
ser = ser.cat.remove_unused_categories()
|
||||
tm.assert_index_equal(ser.cat.categories, exp_categories)
|
||||
tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
|
||||
tm.assert_numpy_array_equal(ser.__array__(), exp_values)
|
||||
|
||||
# This method is likely to be confused, so test that it raises an error
|
||||
# on wrong inputs:
|
||||
msg = "'Series' object has no attribute 'set_categories'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
ser.set_categories([4, 3, 2, 1])
|
||||
|
||||
# right: ser.cat.set_categories([4,3,2,1])
|
||||
|
||||
# GH#18862 (let Series.cat.rename_categories take callables)
|
||||
ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
|
||||
result = ser.cat.rename_categories(lambda x: x.upper())
|
||||
expected = Series(
|
||||
Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
date_range("1/1/2015", periods=5),
|
||||
date_range("1/1/2015", periods=5, tz="MET"),
|
||||
period_range("1/1/2015", freq="D", periods=5),
|
||||
timedelta_range("1 days", "10 days"),
|
||||
],
|
||||
)
|
||||
def test_dt_accessor_api_for_categorical(self, idx):
|
||||
# https://github.com/pandas-dev/pandas/issues/10661
|
||||
|
||||
ser = Series(idx)
|
||||
cat = ser.astype("category")
|
||||
|
||||
# only testing field (like .day)
|
||||
# and bool (is_month_start)
|
||||
attr_names = type(ser._values)._datetimelike_ops
|
||||
|
||||
assert isinstance(cat.dt, Properties)
|
||||
|
||||
special_func_defs = [
|
||||
("strftime", ("%Y-%m-%d",), {}),
|
||||
("round", ("D",), {}),
|
||||
("floor", ("D",), {}),
|
||||
("ceil", ("D",), {}),
|
||||
("asfreq", ("D",), {}),
|
||||
]
|
||||
if idx.dtype == "M8[ns]":
|
||||
# exclude dt64tz since that is already localized and would raise
|
||||
tup = ("tz_localize", ("UTC",), {})
|
||||
special_func_defs.append(tup)
|
||||
elif idx.dtype.kind == "M":
|
||||
# exclude dt64 since that is not localized so would raise
|
||||
tup = ("tz_convert", ("EST",), {})
|
||||
special_func_defs.append(tup)
|
||||
|
||||
_special_func_names = [f[0] for f in special_func_defs]
|
||||
|
||||
_ignore_names = ["components", "tz_localize", "tz_convert"]
|
||||
|
||||
func_names = [
|
||||
fname
|
||||
for fname in dir(ser.dt)
|
||||
if not (
|
||||
fname.startswith("_")
|
||||
or fname in attr_names
|
||||
or fname in _special_func_names
|
||||
or fname in _ignore_names
|
||||
)
|
||||
]
|
||||
|
||||
func_defs = [(fname, (), {}) for fname in func_names]
|
||||
|
||||
for f_def in special_func_defs:
|
||||
if f_def[0] in dir(ser.dt):
|
||||
func_defs.append(f_def)
|
||||
|
||||
for func, args, kwargs in func_defs:
|
||||
with warnings.catch_warnings():
|
||||
if func == "to_period":
|
||||
# dropping TZ
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
res = getattr(cat.dt, func)(*args, **kwargs)
|
||||
exp = getattr(ser.dt, func)(*args, **kwargs)
|
||||
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
for attr in attr_names:
|
||||
if attr in ["week", "weekofyear"]:
|
||||
# GH#33595 Deprecate week and weekofyear
|
||||
continue
|
||||
res = getattr(cat.dt, attr)
|
||||
exp = getattr(ser.dt, attr)
|
||||
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
def test_dt_accessor_api_for_categorical_invalid(self):
|
||||
invalid = Series([1, 2, 3]).astype("category")
|
||||
msg = "Can only use .dt accessor with datetimelike"
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
invalid.dt
|
||||
assert not hasattr(invalid, "str")
|
||||
|
||||
def test_reorder_categories_updates_dtype(self):
|
||||
# GH#43232
|
||||
ser = Series(["a", "b", "c"], dtype="category")
|
||||
orig_dtype = ser.dtype
|
||||
|
||||
# Need to construct this before calling reorder_categories inplace
|
||||
expected = ser.cat.reorder_categories(["c", "b", "a"])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="`inplace` parameter"):
|
||||
ser.cat.reorder_categories(["c", "b", "a"], inplace=True)
|
||||
|
||||
assert not orig_dtype.categories.equals(ser.dtype.categories)
|
||||
assert not orig_dtype.categories.equals(expected.dtype.categories)
|
||||
assert ser.dtype == expected.dtype
|
||||
assert ser.dtype.categories.equals(expected.dtype.categories)
|
||||
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_set_categories_setitem(self):
|
||||
# GH#43334
|
||||
|
||||
df = DataFrame({"Survived": [1, 0, 1], "Sex": [0, 1, 1]}, dtype="category")
|
||||
|
||||
# change the dtype in-place
|
||||
df["Survived"].cat.categories = ["No", "Yes"]
|
||||
df["Sex"].cat.categories = ["female", "male"]
|
||||
|
||||
# values should not be coerced to NaN
|
||||
assert list(df["Sex"]) == ["female", "male", "male"]
|
||||
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
|
||||
|
||||
df["Sex"] = Categorical(df["Sex"], categories=["female", "male"], ordered=False)
|
||||
df["Survived"] = Categorical(
|
||||
df["Survived"], categories=["No", "Yes"], ordered=False
|
||||
)
|
||||
|
||||
# values should not be coerced to NaN
|
||||
assert list(df["Sex"]) == ["female", "male", "male"]
|
||||
assert list(df["Survived"]) == ["Yes", "No", "Yes"]
|
||||
@@ -0,0 +1,796 @@
|
||||
import calendar
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
import locale
|
||||
import unicodedata
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.timezones import maybe_get_tz
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_integer_dtype,
|
||||
is_list_like,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
DatetimeArray,
|
||||
PeriodArray,
|
||||
TimedeltaArray,
|
||||
)
|
||||
import pandas.core.common as com
|
||||
|
||||
ok_for_period = PeriodArray._datetimelike_ops
|
||||
ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"]
|
||||
ok_for_dt = DatetimeArray._datetimelike_ops
|
||||
ok_for_dt_methods = [
|
||||
"to_period",
|
||||
"to_pydatetime",
|
||||
"tz_localize",
|
||||
"tz_convert",
|
||||
"normalize",
|
||||
"strftime",
|
||||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
"day_name",
|
||||
"month_name",
|
||||
"isocalendar",
|
||||
]
|
||||
ok_for_td = TimedeltaArray._datetimelike_ops
|
||||
ok_for_td_methods = [
|
||||
"components",
|
||||
"to_pytimedelta",
|
||||
"total_seconds",
|
||||
"round",
|
||||
"floor",
|
||||
"ceil",
|
||||
]
|
||||
|
||||
|
||||
def get_dir(ser):
|
||||
# check limited display api
|
||||
results = [r for r in ser.dt.__dir__() if not r.startswith("_")]
|
||||
return sorted(set(results))
|
||||
|
||||
|
||||
class TestSeriesDatetimeValues:
|
||||
def _compare(self, ser, name):
|
||||
# GH 7207, 11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
def get_expected(ser, prop):
|
||||
result = getattr(Index(ser._values), prop)
|
||||
if isinstance(result, np.ndarray):
|
||||
if is_integer_dtype(result):
|
||||
result = result.astype("int64")
|
||||
elif not is_list_like(result) or isinstance(result, DataFrame):
|
||||
return result
|
||||
return Series(result, index=ser.index, name=ser.name)
|
||||
|
||||
left = getattr(ser.dt, name)
|
||||
right = get_expected(ser, name)
|
||||
if not (is_list_like(left) and is_list_like(right)):
|
||||
assert left == right
|
||||
elif isinstance(left, DataFrame):
|
||||
tm.assert_frame_equal(left, right)
|
||||
else:
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "s", "ms"])
|
||||
def test_dt_namespace_accessor_datetime64(self, freq):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# datetimeindex
|
||||
dti = date_range("20130101", periods=5, freq=freq)
|
||||
ser = Series(dti, name="xxx")
|
||||
|
||||
for prop in ok_for_dt:
|
||||
# we test freq below
|
||||
# we ignore week and weekofyear because they are deprecated
|
||||
if prop not in ["freq", "week", "weekofyear"]:
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
result = ser.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.tz_localize("US/Eastern")
|
||||
exp_values = DatetimeIndex(ser.values).tz_localize("US/Eastern")
|
||||
expected = Series(exp_values, index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == "US/Eastern"
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
|
||||
|
||||
# let's localize, then convert
|
||||
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
|
||||
exp_values = (
|
||||
DatetimeIndex(ser.values).tz_localize("UTC").tz_convert("US/Eastern")
|
||||
)
|
||||
expected = Series(exp_values, index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_namespace_accessor_datetime64tz(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# datetimeindex with tz
|
||||
dti = date_range("20130101", periods=5, tz="US/Eastern")
|
||||
ser = Series(dti, name="xxx")
|
||||
for prop in ok_for_dt:
|
||||
|
||||
# we test freq below
|
||||
# we ignore week and weekofyear because they are deprecated
|
||||
if prop not in ["freq", "week", "weekofyear"]:
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_dt_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
result = ser.dt.to_pydatetime()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.tz_convert("CET")
|
||||
expected = Series(ser._values.tz_convert("CET"), index=ser.index, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
tz_result = result.dt.tz
|
||||
assert str(tz_result) == "CET"
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == DatetimeIndex(ser.values, freq="infer").freq
|
||||
|
||||
def test_dt_namespace_accessor_timedelta(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# timedelta index
|
||||
cases = [
|
||||
Series(
|
||||
timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"
|
||||
),
|
||||
Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"),
|
||||
Series(
|
||||
timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"),
|
||||
name="xxx",
|
||||
),
|
||||
]
|
||||
for ser in cases:
|
||||
for prop in ok_for_td:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_td_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
result = ser.dt.components
|
||||
assert isinstance(result, DataFrame)
|
||||
tm.assert_index_equal(result.index, ser.index)
|
||||
|
||||
result = ser.dt.to_pytimedelta()
|
||||
assert isinstance(result, np.ndarray)
|
||||
assert result.dtype == object
|
||||
|
||||
result = ser.dt.total_seconds()
|
||||
assert isinstance(result, Series)
|
||||
assert result.dtype == "float64"
|
||||
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == TimedeltaIndex(ser.values, freq="infer").freq
|
||||
|
||||
def test_dt_namespace_accessor_period(self):
|
||||
# GH#7207, GH#11128
|
||||
# test .dt namespace accessor
|
||||
|
||||
# periodindex
|
||||
pi = period_range("20130101", periods=5, freq="D")
|
||||
ser = Series(pi, name="xxx")
|
||||
|
||||
for prop in ok_for_period:
|
||||
# we test freq below
|
||||
if prop != "freq":
|
||||
self._compare(ser, prop)
|
||||
|
||||
for prop in ok_for_period_methods:
|
||||
getattr(ser.dt, prop)
|
||||
|
||||
freq_result = ser.dt.freq
|
||||
assert freq_result == PeriodIndex(ser.values).freq
|
||||
|
||||
def test_dt_namespace_accessor_index_and_values(self):
|
||||
|
||||
# both
|
||||
index = date_range("20130101", periods=3, freq="D")
|
||||
dti = date_range("20140204", periods=3, freq="s")
|
||||
ser = Series(dti, index=index, name="xxx")
|
||||
exp = Series(
|
||||
np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx"
|
||||
)
|
||||
tm.assert_series_equal(ser.dt.year, exp)
|
||||
|
||||
exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.month, exp)
|
||||
|
||||
exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.second, exp)
|
||||
|
||||
exp = Series([ser[0]] * 3, index=index, name="xxx")
|
||||
tm.assert_series_equal(ser.dt.normalize(), exp)
|
||||
|
||||
def test_dt_accessor_limited_display_api(self):
|
||||
# tznaive
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
|
||||
|
||||
# tzaware
|
||||
ser = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx")
|
||||
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
|
||||
|
||||
# Period
|
||||
ser = Series(
|
||||
period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
|
||||
)
|
||||
results = get_dir(ser)
|
||||
tm.assert_almost_equal(
|
||||
results, sorted(set(ok_for_period + ok_for_period_methods))
|
||||
)
|
||||
|
||||
def test_dt_accessor_ambiguous_freq_conversions(self):
|
||||
# GH#11295
|
||||
# ambiguous time error on the conversions
|
||||
ser = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx")
|
||||
ser = ser.dt.tz_localize("UTC").dt.tz_convert("America/Chicago")
|
||||
|
||||
exp_values = date_range(
|
||||
"2015-01-01", "2016-01-01", freq="T", tz="UTC"
|
||||
).tz_convert("America/Chicago")
|
||||
# freq not preserved by tz_localize above
|
||||
exp_values = exp_values._with_freq(None)
|
||||
expected = Series(exp_values, name="xxx")
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_dt_accessor_not_writeable(self):
|
||||
# no setting allowed
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx")
|
||||
with pytest.raises(ValueError, match="modifications"):
|
||||
ser.dt.hour = 5
|
||||
|
||||
# trying to set a copy
|
||||
msg = "modifications to a property of a datetimelike.+not supported"
|
||||
with pd.option_context("chained_assignment", "raise"):
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
ser.dt.hour[0] = 5
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, dates",
|
||||
[
|
||||
["round", ["2012-01-02", "2012-01-02", "2012-01-01"]],
|
||||
["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]],
|
||||
["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]],
|
||||
],
|
||||
)
|
||||
def test_dt_round(self, method, dates):
|
||||
# round
|
||||
ser = Series(
|
||||
pd.to_datetime(
|
||||
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
|
||||
),
|
||||
name="xxx",
|
||||
)
|
||||
result = getattr(ser.dt, method)("D")
|
||||
expected = Series(pd.to_datetime(dates), name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_round_tz(self):
|
||||
ser = Series(
|
||||
pd.to_datetime(
|
||||
["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"]
|
||||
),
|
||||
name="xxx",
|
||||
)
|
||||
result = ser.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D")
|
||||
|
||||
exp_values = pd.to_datetime(
|
||||
["2012-01-01", "2012-01-01", "2012-01-01"]
|
||||
).tz_localize("US/Eastern")
|
||||
expected = Series(exp_values, name="xxx")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("method", ["ceil", "round", "floor"])
|
||||
def test_dt_round_tz_ambiguous(self, method):
|
||||
# GH 18946 round near "fall back" DST
|
||||
df1 = DataFrame(
|
||||
[
|
||||
pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True),
|
||||
pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True),
|
||||
pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True),
|
||||
],
|
||||
columns=["date"],
|
||||
)
|
||||
df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid")
|
||||
# infer
|
||||
result = getattr(df1.date.dt, method)("H", ambiguous="infer")
|
||||
expected = df1["date"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# bool-array
|
||||
result = getattr(df1.date.dt, method)("H", ambiguous=[True, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# NaT
|
||||
result = getattr(df1.date.dt, method)("H", ambiguous="NaT")
|
||||
expected = df1["date"].copy()
|
||||
expected.iloc[0:2] = pd.NaT
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# raise
|
||||
with tm.external_error_raised(pytz.AmbiguousTimeError):
|
||||
getattr(df1.date.dt, method)("H", ambiguous="raise")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, ts_str, freq",
|
||||
[
|
||||
["ceil", "2018-03-11 01:59:00-0600", "5min"],
|
||||
["round", "2018-03-11 01:59:00-0600", "5min"],
|
||||
["floor", "2018-03-11 03:01:00-0500", "2H"],
|
||||
],
|
||||
)
|
||||
def test_dt_round_tz_nonexistent(self, method, ts_str, freq):
|
||||
# GH 23324 round near "spring forward" DST
|
||||
ser = Series([pd.Timestamp(ts_str, tz="America/Chicago")])
|
||||
result = getattr(ser.dt, method)(freq, nonexistent="shift_forward")
|
||||
expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = getattr(ser.dt, method)(freq, nonexistent="NaT")
|
||||
expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"):
|
||||
getattr(ser.dt, method)(freq, nonexistent="raise")
|
||||
|
||||
def test_dt_namespace_accessor_categorical(self):
|
||||
# GH 19468
|
||||
dti = DatetimeIndex(["20171111", "20181212"]).repeat(2)
|
||||
ser = Series(pd.Categorical(dti), name="foo")
|
||||
result = ser.dt.year
|
||||
expected = Series([2017, 2017, 2018, 2018], name="foo")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_tz_localize_categorical(self, tz_aware_fixture):
|
||||
# GH 27952
|
||||
tz = tz_aware_fixture
|
||||
datetimes = Series(
|
||||
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = categorical.dt.tz_localize(tz)
|
||||
expected = datetimes.dt.tz_localize(tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_tz_convert_categorical(self, tz_aware_fixture):
|
||||
# GH 27952
|
||||
tz = tz_aware_fixture
|
||||
datetimes = Series(
|
||||
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = categorical.dt.tz_convert(tz)
|
||||
expected = datetimes.dt.tz_convert(tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("accessor", ["year", "month", "day"])
|
||||
def test_dt_other_accessors_categorical(self, accessor):
|
||||
# GH 27952
|
||||
datetimes = Series(
|
||||
["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]"
|
||||
)
|
||||
categorical = datetimes.astype("category")
|
||||
result = getattr(categorical.dt, accessor)
|
||||
expected = getattr(datetimes.dt, accessor)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_no_new_attributes(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/10673
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"))
|
||||
with pytest.raises(AttributeError, match="You cannot add any new attribute"):
|
||||
ser.dt.xlabel = "a"
|
||||
|
||||
# error: Unsupported operand types for + ("List[None]" and "List[str]")
|
||||
@pytest.mark.parametrize(
|
||||
"time_locale", [None] + (tm.get_locales() or []) # type: ignore[operator]
|
||||
)
|
||||
def test_dt_accessor_datetime_name_accessors(self, time_locale):
|
||||
# Test Monday -> Sunday and January -> December, in that sequence
|
||||
if time_locale is None:
|
||||
# If the time_locale is None, day-name and month_name should
|
||||
# return the english attributes
|
||||
expected_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
expected_months = [
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
]
|
||||
else:
|
||||
with tm.set_locale(time_locale, locale.LC_TIME):
|
||||
expected_days = calendar.day_name[:]
|
||||
expected_months = calendar.month_name[1:]
|
||||
|
||||
ser = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365))
|
||||
english_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
|
||||
name = name.capitalize()
|
||||
assert ser.dt.day_name(locale=time_locale)[day] == name
|
||||
assert ser.dt.day_name(locale=None)[day] == eng_name
|
||||
ser = pd.concat([ser, Series([pd.NaT])])
|
||||
assert np.isnan(ser.dt.day_name(locale=time_locale).iloc[-1])
|
||||
|
||||
ser = Series(date_range(freq="M", start="2012", end="2013"))
|
||||
result = ser.dt.month_name(locale=time_locale)
|
||||
expected = Series([month.capitalize() for month in expected_months])
|
||||
|
||||
# work around https://github.com/pandas-dev/pandas/issues/22342
|
||||
result = result.str.normalize("NFD")
|
||||
expected = expected.str.normalize("NFD")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for s_date, expected in zip(ser, expected_months):
|
||||
result = s_date.month_name(locale=time_locale)
|
||||
expected = expected.capitalize()
|
||||
|
||||
result = unicodedata.normalize("NFD", result)
|
||||
expected = unicodedata.normalize("NFD", expected)
|
||||
|
||||
assert result == expected
|
||||
|
||||
ser = pd.concat([ser, Series([pd.NaT])])
|
||||
assert np.isnan(ser.dt.month_name(locale=time_locale).iloc[-1])
|
||||
|
||||
def test_strftime(self):
|
||||
# GH 10086
|
||||
ser = Series(date_range("20130101", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(date_range("2015-02-03 11:22:33.4567", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2015/02/03 11-22-33",
|
||||
"2015/02/04 11-22-33",
|
||||
"2015/02/05 11-22-33",
|
||||
"2015/02/06 11-22-33",
|
||||
"2015/02/07 11-22-33",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(period_range("20130101", periods=5))
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H-%M-%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2015/02/03 11-22-33",
|
||||
"2015/02/03 11-22-34",
|
||||
"2015/02/03 11-22-35",
|
||||
"2015/02/03 11-22-36",
|
||||
"2015/02/03 11-22-37",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_dt64_days(self):
|
||||
ser = Series(date_range("20130101", periods=5))
|
||||
ser.iloc[0] = pd.NaT
|
||||
result = ser.dt.strftime("%Y/%m/%d")
|
||||
expected = Series(
|
||||
[np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
datetime_index = date_range("20150301", periods=5)
|
||||
result = datetime_index.strftime("%Y/%m/%d")
|
||||
|
||||
expected = Index(
|
||||
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
|
||||
dtype=np.object_,
|
||||
)
|
||||
# dtype may be S10 or U10 depending on python version
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_strftime_period_days(self):
|
||||
period_index = period_range("20150301", periods=5)
|
||||
result = period_index.strftime("%Y/%m/%d")
|
||||
expected = Index(
|
||||
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
|
||||
dtype="=U10",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_strftime_dt64_microsecond_resolution(self):
|
||||
ser = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)])
|
||||
result = ser.dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_period_hours(self):
|
||||
ser = Series(period_range("20130101", periods=4, freq="H"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S")
|
||||
expected = Series(
|
||||
[
|
||||
"2013/01/01 00:00:00",
|
||||
"2013/01/01 01:00:00",
|
||||
"2013/01/01 02:00:00",
|
||||
"2013/01/01 03:00:00",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_strftime_period_minutes(self):
|
||||
ser = Series(period_range("20130101", periods=4, freq="L"))
|
||||
result = ser.dt.strftime("%Y/%m/%d %H:%M:%S.%l")
|
||||
expected = Series(
|
||||
[
|
||||
"2013/01/01 00:00:00.000",
|
||||
"2013/01/01 00:00:00.001",
|
||||
"2013/01/01 00:00:00.002",
|
||||
"2013/01/01 00:00:00.003",
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
DatetimeIndex(["2019-01-01", pd.NaT]),
|
||||
PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"),
|
||||
],
|
||||
)
|
||||
def test_strftime_nat(self, data):
|
||||
# GH 29578
|
||||
ser = Series(data)
|
||||
result = ser.dt.strftime("%Y-%m-%d")
|
||||
expected = Series(["2019-01-01", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_valid_dt_with_missing_values(self):
|
||||
|
||||
from datetime import (
|
||||
date,
|
||||
time,
|
||||
)
|
||||
|
||||
# GH 8689
|
||||
ser = Series(date_range("20130101", periods=5, freq="D"))
|
||||
ser.iloc[2] = pd.NaT
|
||||
|
||||
for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]:
|
||||
expected = getattr(ser.dt, attr).copy()
|
||||
expected.iloc[2] = np.nan
|
||||
result = getattr(ser.dt, attr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.dt.date
|
||||
expected = Series(
|
||||
[
|
||||
date(2013, 1, 1),
|
||||
date(2013, 1, 2),
|
||||
np.nan,
|
||||
date(2013, 1, 4),
|
||||
date(2013, 1, 5),
|
||||
],
|
||||
dtype="object",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.dt.time
|
||||
expected = Series([time(0), time(0), np.nan, time(0), time(0)], dtype="object")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dt_accessor_api(self):
|
||||
# GH 9322
|
||||
from pandas.core.indexes.accessors import (
|
||||
CombinedDatetimelikeProperties,
|
||||
DatetimeProperties,
|
||||
)
|
||||
|
||||
assert Series.dt is CombinedDatetimelikeProperties
|
||||
|
||||
ser = Series(date_range("2000-01-01", periods=3))
|
||||
assert isinstance(ser.dt, DatetimeProperties)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser", [Series(np.arange(5)), Series(list("abcde")), Series(np.random.randn(5))]
|
||||
)
|
||||
def test_dt_accessor_invalid(self, ser):
|
||||
# GH#9322 check that series with incorrect dtypes don't have attr
|
||||
with pytest.raises(AttributeError, match="only use .dt accessor"):
|
||||
ser.dt
|
||||
assert not hasattr(ser, "dt")
|
||||
|
||||
def test_dt_accessor_updates_on_inplace(self):
|
||||
ser = Series(date_range("2018-01-01", periods=10))
|
||||
ser[2] = None
|
||||
return_value = ser.fillna(pd.Timestamp("2018-01-01"), inplace=True)
|
||||
assert return_value is None
|
||||
result = ser.dt.date
|
||||
assert result[0] == result[2]
|
||||
|
||||
def test_date_tz(self):
|
||||
# GH11757
|
||||
rng = DatetimeIndex(
|
||||
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"],
|
||||
tz="US/Eastern",
|
||||
)
|
||||
ser = Series(rng)
|
||||
expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)])
|
||||
tm.assert_series_equal(ser.dt.date, expected)
|
||||
tm.assert_series_equal(ser.apply(lambda x: x.date()), expected)
|
||||
|
||||
def test_dt_timetz_accessor(self, tz_naive_fixture):
|
||||
# GH21358
|
||||
tz = maybe_get_tz(tz_naive_fixture)
|
||||
|
||||
dtindex = DatetimeIndex(
|
||||
["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz
|
||||
)
|
||||
ser = Series(dtindex)
|
||||
expected = Series(
|
||||
[time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)]
|
||||
)
|
||||
result = ser.dt.timetz
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_series, expected_output",
|
||||
[
|
||||
[["2020-01-01"], [[2020, 1, 3]]],
|
||||
[[pd.NaT], [[np.NaN, np.NaN, np.NaN]]],
|
||||
[["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
|
||||
[["2010-01-01", pd.NaT], [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]]],
|
||||
# see GH#36032
|
||||
[["2016-01-08", "2016-01-04"], [[2016, 1, 5], [2016, 1, 1]]],
|
||||
[["2016-01-07", "2016-01-01"], [[2016, 1, 4], [2015, 53, 5]]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.filterwarnings("ignore:Inferring datetime64:FutureWarning")
|
||||
def test_isocalendar(self, input_series, expected_output):
|
||||
result = pd.to_datetime(Series(input_series)).dt.isocalendar()
|
||||
expected_frame = DataFrame(
|
||||
expected_output, columns=["year", "week", "day"], dtype="UInt32"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected_frame)
|
||||
|
||||
def test_hour_index(self):
|
||||
dt_series = Series(
|
||||
date_range(start="2021-01-01", periods=5, freq="h"),
|
||||
index=[2, 6, 7, 8, 11],
|
||||
dtype="category",
|
||||
)
|
||||
result = dt_series.dt.hour
|
||||
expected = Series(
|
||||
[0, 1, 2, 3, 4],
|
||||
index=[2, 6, 7, 8, 11],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesPeriodValuesDtAccessor:
|
||||
@pytest.mark.parametrize(
|
||||
"input_vals",
|
||||
[
|
||||
[Period("2016-01", freq="M"), Period("2016-02", freq="M")],
|
||||
[Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="H"),
|
||||
Period("2016-01-01 01:00:00", freq="H"),
|
||||
],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="M"),
|
||||
Period("2016-01-01 00:01:00", freq="M"),
|
||||
],
|
||||
[
|
||||
Period("2016-01-01 00:00:00", freq="S"),
|
||||
Period("2016-01-01 00:00:01", freq="S"),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_end_time_timevalues(self, input_vals):
|
||||
# GH#17157
|
||||
# Check that the time part of the Period is adjusted by end_time
|
||||
# when using the dt accessor on a Series
|
||||
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
|
||||
|
||||
ser = Series(input_vals)
|
||||
result = ser.dt.end_time
|
||||
expected = ser.apply(lambda x: x.end_time)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("input_vals", [("2001"), ("NaT")])
|
||||
def test_to_period(self, input_vals):
|
||||
# GH#21205
|
||||
expected = Series([input_vals], dtype="Period[D]")
|
||||
result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_week_and_weekofyear_are_deprecated():
|
||||
# GH#33595 Deprecate week and weekofyear
|
||||
series = pd.to_datetime(Series(["2020-01-01"]))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
series.dt.week
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
series.dt.weekofyear
|
||||
|
||||
|
||||
def test_normalize_pre_epoch_dates():
|
||||
# GH: 36294
|
||||
ser = pd.to_datetime(Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"]))
|
||||
result = ser.dt.normalize()
|
||||
expected = pd.to_datetime(Series(["1969-01-01", "2016-01-01"]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,9 @@
|
||||
from pandas import Series
|
||||
|
||||
|
||||
class TestSparseAccessor:
|
||||
def test_sparse_accessor_updates_on_inplace(self):
|
||||
ser = Series([1, 1, 2, 3], dtype="Sparse[int]")
|
||||
return_value = ser.drop([0, 1], inplace=True)
|
||||
assert return_value is None
|
||||
assert ser.sparse.density == 1.0
|
||||
@@ -0,0 +1,25 @@
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestStrAccessor:
|
||||
def test_str_attribute(self):
|
||||
# GH#9068
|
||||
methods = ["strip", "rstrip", "lstrip"]
|
||||
ser = Series([" jack", "jill ", " jesse ", "frank"])
|
||||
for method in methods:
|
||||
expected = Series([getattr(str, method)(x) for x in ser.values])
|
||||
tm.assert_series_equal(getattr(Series.str, method)(ser.str), expected)
|
||||
|
||||
# str accessor only valid with string values
|
||||
ser = Series(range(5))
|
||||
with pytest.raises(AttributeError, match="only use .str accessor"):
|
||||
ser.str.repeat(2)
|
||||
|
||||
def test_str_accessor_updates_on_inplace(self):
|
||||
ser = Series(list("abc"))
|
||||
return_value = ser.drop([0], inplace=True)
|
||||
assert return_value is None
|
||||
assert len(ser.str.lower()) == 2
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,484 @@
|
||||
"""
|
||||
Also test support for datetime64[ns] in Series / DataFrame
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
import re
|
||||
|
||||
from dateutil.tz import (
|
||||
gettz,
|
||||
tzutc,
|
||||
)
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_fancy_getitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
|
||||
assert s[48] == 48
|
||||
assert s["1/2/2009"] == 48
|
||||
assert s["2009-1-2"] == 48
|
||||
assert s[datetime(2009, 1, 2)] == 48
|
||||
assert s[Timestamp(datetime(2009, 1, 2))] == 48
|
||||
with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
|
||||
s["2009-1-3"]
|
||||
tm.assert_series_equal(
|
||||
s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
|
||||
)
|
||||
|
||||
|
||||
def test_fancy_setitem():
|
||||
dti = date_range(
|
||||
freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
|
||||
)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
s[48] = -1
|
||||
assert s[48] == -1
|
||||
s["1/2/2009"] = -2
|
||||
assert s[48] == -2
|
||||
s["1/2/2009":"2009-06-05"] = -3
|
||||
assert (s[48:54] == -3).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
|
||||
def test_getitem_setitem_datetime_tz(tz_source):
|
||||
if tz_source == "pytz":
|
||||
tzget = pytz.timezone
|
||||
else:
|
||||
# handle special case for utc in dateutil
|
||||
tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
|
||||
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="H", tz=tzget("US/Eastern"))
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
# also test Timestamp tz handling, GH #2789
|
||||
result = ts.copy()
|
||||
result["1990-01-01 09:00:00+00:00"] = 0
|
||||
result["1990-01-01 09:00:00+00:00"] = ts[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 03:00:00-06:00"] = 0
|
||||
result["1990-01-01 03:00:00-06:00"] = ts[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
# repeat with datetimes
|
||||
result = ts.copy()
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
|
||||
result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts.copy()
|
||||
dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
|
||||
dt = dt.to_pydatetime()
|
||||
result[dt] = 0
|
||||
result[dt] = ts[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_datetimeindex():
|
||||
N = 50
|
||||
# testing with timezone, GH #2785
|
||||
rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04:00:00"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00"] = 0
|
||||
result["1990-01-01 04:00:00"] = ts[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
|
||||
result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04:00:00"
|
||||
rb = "1990-01-01 07:00:00"
|
||||
# GH#18435 strings get a pass from tzawareness compat
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
lb = "1990-01-01 04:00:00-0500"
|
||||
rb = "1990-01-01 07:00:00-0500"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# But we do not give datetimes a pass on tzawareness compat
|
||||
msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
|
||||
naive = datetime(1990, 1, 1, 4)
|
||||
for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#36148 will require tzawareness compat
|
||||
result = ts[key]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#36148 will require tzawareness compat
|
||||
result[datetime(1990, 1, 1, 4)] = 0
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#36148 will require tzawareness compat
|
||||
result[datetime(1990, 1, 1, 4)] = ts[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#36148 will require tzawareness compat
|
||||
result = ts[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#36148 will require tzawareness compat
|
||||
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = 0
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#36148 will require tzawareness compat
|
||||
result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = datetime(1990, 1, 1, 4)
|
||||
rb = datetime(1990, 1, 1, 7)
|
||||
msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# tznaive vs tzaware comparison is invalid
|
||||
# see GH#18376, GH#18162
|
||||
ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
|
||||
lb = Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
|
||||
rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result.iloc[4:8] = ts.iloc[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
# also test partial date slicing
|
||||
result = ts["1990-01-02"]
|
||||
expected = ts[24:48]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-02"] = 0
|
||||
result["1990-01-02"] = ts[24:48]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_getitem_setitem_periodindex():
|
||||
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="H")
|
||||
ts = Series(np.random.randn(N), index=rng)
|
||||
|
||||
result = ts["1990-01-01 04"]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04"] = 0
|
||||
result["1990-01-01 04"] = ts[4]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
result = ts["1990-01-01 04":"1990-01-01 07"]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result["1990-01-01 04":"1990-01-01 07"] = 0
|
||||
result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
lb = "1990-01-01 04"
|
||||
rb = "1990-01-01 07"
|
||||
result = ts[(ts.index >= lb) & (ts.index <= rb)]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 2782
|
||||
result = ts[ts.index[4]]
|
||||
expected = ts[4]
|
||||
assert result == expected
|
||||
|
||||
result = ts[ts.index[4:8]]
|
||||
expected = ts[4:8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts.copy()
|
||||
result[ts.index[4:8]] = 0
|
||||
result.iloc[4:8] = ts.iloc[4:8]
|
||||
tm.assert_series_equal(result, ts)
|
||||
|
||||
|
||||
def test_datetime_indexing():
|
||||
|
||||
index = date_range("1/1/2000", "1/7/2000")
|
||||
index = index.repeat(3)
|
||||
|
||||
s = Series(len(index), index=index)
|
||||
stamp = Timestamp("1/8/2000")
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
# not monotonic
|
||||
s = Series(len(index), index=index)
|
||||
s = s[::-1]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape(repr(stamp))):
|
||||
s[stamp]
|
||||
s[stamp] = 0
|
||||
assert s[stamp] == 0
|
||||
|
||||
|
||||
"""
|
||||
test duplicates in time series
|
||||
"""
|
||||
|
||||
|
||||
def test_indexing_with_duplicate_datetimeindex(
|
||||
rand_series_with_duplicate_datetimeindex,
|
||||
):
|
||||
ts = rand_series_with_duplicate_datetimeindex
|
||||
|
||||
uniques = ts.index.unique()
|
||||
for date in uniques:
|
||||
result = ts[date]
|
||||
|
||||
mask = ts.index == date
|
||||
total = (ts.index == date).sum()
|
||||
expected = ts[mask]
|
||||
if total > 1:
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
tm.assert_almost_equal(result, expected[0])
|
||||
|
||||
cp = ts.copy()
|
||||
cp[date] = 0
|
||||
expected = Series(np.where(mask, 0, ts), index=ts.index)
|
||||
tm.assert_series_equal(cp, expected)
|
||||
|
||||
key = datetime(2000, 1, 6)
|
||||
with pytest.raises(KeyError, match=re.escape(repr(key))):
|
||||
ts[key]
|
||||
|
||||
# new index
|
||||
ts[datetime(2000, 1, 6)] = 0
|
||||
assert ts[datetime(2000, 1, 6)] == 0
|
||||
|
||||
|
||||
def test_loc_getitem_over_size_cutoff(monkeypatch):
|
||||
# #1821
|
||||
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
# create large list of non periodic datetime
|
||||
dates = []
|
||||
sec = timedelta(seconds=1)
|
||||
half_sec = timedelta(microseconds=500000)
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
n = 1100
|
||||
for i in range(n):
|
||||
dates.append(d)
|
||||
dates.append(d + sec)
|
||||
dates.append(d + sec + half_sec)
|
||||
dates.append(d + sec + sec + half_sec)
|
||||
d += 3 * sec
|
||||
|
||||
# duplicate some values in the list
|
||||
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
|
||||
for p in duplicate_positions:
|
||||
dates[p + 1] = dates[p]
|
||||
|
||||
df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD"))
|
||||
|
||||
pos = n * 3
|
||||
timestamp = df.index[pos]
|
||||
assert timestamp in df.index
|
||||
|
||||
# it works!
|
||||
df.loc[timestamp]
|
||||
assert len(df.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_over_size_cutoff_period_index(monkeypatch):
|
||||
# GH 27136
|
||||
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
|
||||
|
||||
n = 1100
|
||||
idx = period_range("1/1/2000", freq="T", periods=n)
|
||||
assert idx._engine.over_size_threshold
|
||||
|
||||
s = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
pos = n - 1
|
||||
timestamp = idx[pos]
|
||||
assert timestamp in s.index
|
||||
|
||||
# it works!
|
||||
s[timestamp]
|
||||
assert len(s.loc[[timestamp]]) > 0
|
||||
|
||||
|
||||
def test_indexing_unordered():
|
||||
# GH 2437
|
||||
rng = date_range(start="2011-01-01", end="2011-01-15")
|
||||
ts = Series(np.random.rand(len(rng)), index=rng)
|
||||
ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
|
||||
|
||||
for t in ts.index:
|
||||
|
||||
expected = ts[t]
|
||||
result = ts2[t]
|
||||
assert expected == result
|
||||
|
||||
# GH 3448 (ranges)
|
||||
def compare(slobj):
|
||||
result = ts2[slobj].copy()
|
||||
result = result.sort_index()
|
||||
expected = ts[slobj]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
compare(slice("2011-01-01", "2011-01-15"))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
compare(slice("2010-12-30", "2011-01-15"))
|
||||
compare(slice("2011-01-01", "2011-01-16"))
|
||||
|
||||
# partial ranges
|
||||
compare(slice("2011-01-01", "2011-01-6"))
|
||||
compare(slice("2011-01-06", "2011-01-8"))
|
||||
compare(slice("2011-01-06", "2011-01-12"))
|
||||
|
||||
# single values
|
||||
result = ts2["2011"].sort_index()
|
||||
expected = ts["2011"]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_indexing_unordered2():
|
||||
|
||||
# diff freq
|
||||
rng = date_range(datetime(2005, 1, 1), periods=20, freq="M")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts = ts.take(np.random.permutation(20))
|
||||
|
||||
result = ts["2005"]
|
||||
for t in result.index:
|
||||
assert t.year == 2005
|
||||
|
||||
|
||||
def test_indexing():
|
||||
idx = date_range("2001-1-1", periods=20, freq="M")
|
||||
ts = Series(np.random.rand(len(idx)), index=idx)
|
||||
|
||||
# getting
|
||||
|
||||
# GH 3070, make sure semantics work on Series/Frame
|
||||
expected = ts["2001"]
|
||||
expected.name = "A"
|
||||
|
||||
df = DataFrame({"A": ts})
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#36179 string indexing on rows for DataFrame deprecated
|
||||
result = df["2001"]["A"]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# setting
|
||||
ts["2001"] = 1
|
||||
expected = ts["2001"]
|
||||
expected.name = "A"
|
||||
|
||||
df.loc["2001", "A"] = 1
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#36179 string indexing on rows for DataFrame deprecated
|
||||
result = df["2001"]["A"]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
|
||||
def test_getitem_str_month_with_datetimeindex():
|
||||
# GH3546 (not including times on the last day)
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S")
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013-05"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
|
||||
def test_getitem_str_year_with_datetimeindex():
|
||||
idx = [
|
||||
Timestamp("2013-05-31 00:00"),
|
||||
Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
|
||||
]
|
||||
ts = Series(range(len(idx)), index=idx)
|
||||
expected = ts["2013"]
|
||||
tm.assert_series_equal(expected, ts)
|
||||
|
||||
|
||||
def test_getitem_str_second_with_datetimeindex():
|
||||
# GH14826, indexing with a seconds resolution string / datetime object
|
||||
df = DataFrame(
|
||||
np.random.rand(5, 5),
|
||||
columns=["open", "high", "low", "close", "volume"],
|
||||
index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
|
||||
)
|
||||
|
||||
# this is a single date, so will raise
|
||||
with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
|
||||
df["2012-01-02 18:01:02"]
|
||||
|
||||
msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[df.index[2]]
|
||||
@@ -0,0 +1,73 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesDelItem:
|
||||
def test_delitem(self):
|
||||
# GH#5542
|
||||
# should delete the item inplace
|
||||
s = Series(range(5))
|
||||
del s[0]
|
||||
|
||||
expected = Series(range(1, 5), index=range(1, 5))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
del s[1]
|
||||
expected = Series(range(2, 5), index=range(2, 5))
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# only 1 left, del, add, del
|
||||
s = Series(1)
|
||||
del s[0]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
s[0] = 1
|
||||
tm.assert_series_equal(s, Series(1))
|
||||
del s[0]
|
||||
tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64")))
|
||||
|
||||
def test_delitem_object_index(self):
|
||||
# Index(dtype=object)
|
||||
s = Series(1, index=["a"])
|
||||
del s["a"]
|
||||
tm.assert_series_equal(
|
||||
s, Series(dtype="int64", index=Index([], dtype="object"))
|
||||
)
|
||||
s["a"] = 1
|
||||
tm.assert_series_equal(s, Series(1, index=["a"]))
|
||||
del s["a"]
|
||||
tm.assert_series_equal(
|
||||
s, Series(dtype="int64", index=Index([], dtype="object"))
|
||||
)
|
||||
|
||||
def test_delitem_missing_key(self):
|
||||
# empty
|
||||
s = Series(dtype=object)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
del s[0]
|
||||
|
||||
def test_delitem_extension_dtype(self):
|
||||
# GH#40386
|
||||
# DatetimeTZDtype
|
||||
dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
ser = Series(dti)
|
||||
|
||||
expected = ser[[0, 2]]
|
||||
del ser[1]
|
||||
assert ser.dtype == dti.dtype
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
# PeriodDtype
|
||||
pi = dti.tz_localize(None).to_period("D")
|
||||
ser = Series(pi)
|
||||
|
||||
expected = ser[:2]
|
||||
del ser[2]
|
||||
assert ser.dtype == pi.dtype
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@@ -0,0 +1,214 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import Float64Index
|
||||
|
||||
|
||||
def test_get():
|
||||
# GH 6383
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
s = Series(
|
||||
np.array(
|
||||
[
|
||||
43,
|
||||
48,
|
||||
60,
|
||||
48,
|
||||
50,
|
||||
51,
|
||||
50,
|
||||
45,
|
||||
57,
|
||||
48,
|
||||
56,
|
||||
45,
|
||||
51,
|
||||
39,
|
||||
55,
|
||||
43,
|
||||
54,
|
||||
52,
|
||||
51,
|
||||
54,
|
||||
]
|
||||
),
|
||||
index=Float64Index(
|
||||
[
|
||||
25.0,
|
||||
36.0,
|
||||
49.0,
|
||||
64.0,
|
||||
81.0,
|
||||
100.0,
|
||||
121.0,
|
||||
144.0,
|
||||
169.0,
|
||||
196.0,
|
||||
1225.0,
|
||||
1296.0,
|
||||
1369.0,
|
||||
1444.0,
|
||||
1521.0,
|
||||
1600.0,
|
||||
1681.0,
|
||||
1764.0,
|
||||
1849.0,
|
||||
1936.0,
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
result = s.get(25, 0)
|
||||
expected = 43
|
||||
assert result == expected
|
||||
|
||||
# GH 7407
|
||||
# with a boolean accessor
|
||||
df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3})
|
||||
vc = df.i.value_counts()
|
||||
result = vc.get(99, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
vc = df.b.value_counts()
|
||||
result = vc.get(False, default="Missing")
|
||||
assert result == 3
|
||||
|
||||
result = vc.get(True, default="Missing")
|
||||
assert result == "Missing"
|
||||
|
||||
|
||||
def test_get_nan():
|
||||
# GH 8569
|
||||
s = Float64Index(range(10)).to_series()
|
||||
assert s.get(np.nan) is None
|
||||
assert s.get(np.nan, default="Missing") == "Missing"
|
||||
|
||||
|
||||
def test_get_nan_multiple():
|
||||
# GH 8569
|
||||
# ensure that fixing "test_get_nan" above hasn't broken get
|
||||
# with multiple elements
|
||||
s = Float64Index(range(10)).to_series()
|
||||
|
||||
idx = [2, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [2, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
# GH 17295 - all missing keys
|
||||
idx = [20, 30]
|
||||
assert s.get(idx) is None
|
||||
|
||||
idx = [np.nan, np.nan]
|
||||
assert s.get(idx) is None
|
||||
|
||||
|
||||
def test_get_with_default():
|
||||
# GH#7725
|
||||
d0 = ["a", "b", "c", "d"]
|
||||
d1 = np.arange(4, dtype="int64")
|
||||
others = ["e", 10]
|
||||
|
||||
for data, index in ((d0, d1), (d1, d0)):
|
||||
s = Series(data, index=index)
|
||||
for i, d in zip(index, data):
|
||||
assert s.get(i) == d
|
||||
assert s.get(i, d) == d
|
||||
assert s.get(i, "z") == d
|
||||
for other in others:
|
||||
assert s.get(other, "z") == "z"
|
||||
assert s.get(other, other) == other
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr",
|
||||
[np.random.randn(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern")],
|
||||
)
|
||||
def test_get_with_ea(arr):
|
||||
# GH#21260
|
||||
ser = Series(arr, index=[2 * i for i in range(len(arr))])
|
||||
assert ser.get(4) == ser.iloc[2]
|
||||
|
||||
result = ser.get([4, 6])
|
||||
expected = ser.iloc[[2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.get(slice(2))
|
||||
expected = ser.iloc[[0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert ser.get(-1) is None
|
||||
assert ser.get(ser.index.max() + 1) is None
|
||||
|
||||
ser = Series(arr[:6], index=list("abcdef"))
|
||||
assert ser.get("c") == ser.iloc[2]
|
||||
|
||||
result = ser.get(slice("b", "d"))
|
||||
expected = ser.iloc[[1, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.get("Z")
|
||||
assert result is None
|
||||
|
||||
assert ser.get(4) == ser.iloc[4]
|
||||
assert ser.get(-1) == ser.iloc[-1]
|
||||
assert ser.get(len(ser)) is None
|
||||
|
||||
# GH#21257
|
||||
ser = Series(arr)
|
||||
ser2 = ser[::2]
|
||||
assert ser2.get(1) is None
|
||||
|
||||
|
||||
def test_getitem_get(string_series, object_series):
|
||||
for obj in [string_series, object_series]:
|
||||
idx = obj.index[5]
|
||||
|
||||
assert obj[idx] == obj.get(idx)
|
||||
assert obj[idx] == obj[5]
|
||||
|
||||
assert string_series.get(-1) == string_series.get(string_series.index[-1])
|
||||
assert string_series[5] == string_series.get(string_series.index[5])
|
||||
|
||||
|
||||
def test_get_none():
|
||||
# GH#5652
|
||||
s1 = Series(dtype=object)
|
||||
s2 = Series(dtype=object, index=list("abc"))
|
||||
for s in [s1, s2]:
|
||||
result = s.get(None)
|
||||
assert result is None
|
||||
@@ -0,0 +1,714 @@
|
||||
"""
|
||||
Series.__getitem__ test classes are organized by the type of key passed.
|
||||
"""
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import (
|
||||
conversion,
|
||||
timezones,
|
||||
)
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
class TestSeriesGetitemScalars:
|
||||
def test_getitem_object_index_float_string(self):
|
||||
# GH#17286
|
||||
ser = Series([1] * 4, index=Index(["a", "b", "c", 1.0]))
|
||||
assert ser["a"] == 1
|
||||
assert ser[1.0] == 1
|
||||
|
||||
def test_getitem_float_keys_tuple_values(self):
|
||||
# see GH#13509
|
||||
|
||||
# unique Index
|
||||
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo")
|
||||
result = ser[0.0]
|
||||
assert result == (1, 1)
|
||||
|
||||
# non-unique Index
|
||||
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo")
|
||||
ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo")
|
||||
|
||||
result = ser[0.0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_unrecognized_scalar(self):
|
||||
# GH#32684 a scalar key that is not recognized by lib.is_scalar
|
||||
|
||||
# a series that might be produced via `frame.dtypes`
|
||||
ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")])
|
||||
|
||||
key = ser.index[1]
|
||||
|
||||
result = ser[key]
|
||||
assert result == 2
|
||||
|
||||
def test_getitem_negative_out_of_bounds(self):
|
||||
ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10))
|
||||
|
||||
msg = "index -11 is out of bounds for axis 0 with size 10"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
ser[-11]
|
||||
|
||||
def test_getitem_out_of_bounds_indexerror(self, datetime_series):
|
||||
# don't segfault, GH#495
|
||||
msg = r"index \d+ is out of bounds for axis 0 with size \d+"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
datetime_series[len(datetime_series)]
|
||||
|
||||
def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self):
|
||||
# GH#917
|
||||
# With a RangeIndex, an int key gives a KeyError
|
||||
ser = Series([], dtype=object)
|
||||
with pytest.raises(KeyError, match="-1"):
|
||||
ser[-1]
|
||||
|
||||
def test_getitem_keyerror_with_int64index(self):
|
||||
ser = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
ser[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
ser["c"]
|
||||
|
||||
# not monotonic
|
||||
ser = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^5$"):
|
||||
ser[5]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
ser["c"]
|
||||
|
||||
def test_getitem_int64(self, datetime_series):
|
||||
idx = np.int64(5)
|
||||
assert datetime_series[idx] == datetime_series[5]
|
||||
|
||||
def test_getitem_full_range(self):
|
||||
# github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7
|
||||
ser = Series(range(5), index=list(range(5)))
|
||||
result = ser[list(range(5))]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series with DatetimeIndex
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"])
|
||||
def test_getitem_pydatetime_tz(self, tzstr):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
index = date_range(
|
||||
start="2012-12-24 16:00", end="2012-12-24 18:00", freq="H", tz=tzstr
|
||||
)
|
||||
ts = Series(index=index, data=index.hour)
|
||||
time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr)
|
||||
|
||||
dt = datetime(2012, 12, 24, 17, 0)
|
||||
time_datetime = conversion.localize_pydatetime(dt, tz)
|
||||
assert ts[time_pandas] == ts[time_datetime]
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_string_index_alias_tz_aware(self, tz):
|
||||
rng = date_range("1/1/2000", periods=10, tz=tz)
|
||||
ser = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ser["1/3/2000"]
|
||||
tm.assert_almost_equal(result, ser[2])
|
||||
|
||||
def test_getitem_time_object(self):
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
mask = (rng.hour == 9) & (rng.minute == 30)
|
||||
result = ts[time(9, 30)]
|
||||
expected = ts[mask]
|
||||
result.index = result.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Series with CategoricalIndex
|
||||
|
||||
def test_getitem_scalar_categorical_index(self):
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
ser = Series([1, 2], index=cats)
|
||||
|
||||
expected = ser.iloc[0]
|
||||
result = ser[cats[0]]
|
||||
assert result == expected
|
||||
|
||||
def test_getitem_numeric_categorical_listlike_matches_scalar(self):
|
||||
# GH#15470
|
||||
ser = Series(["a", "b", "c"], index=pd.CategoricalIndex([2, 1, 0]))
|
||||
|
||||
# 0 is treated as a label
|
||||
assert ser[0] == "c"
|
||||
|
||||
# the listlike analogue should also be treated as labels
|
||||
res = ser[[0]]
|
||||
expected = ser.iloc[-1:]
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
res2 = ser[[0, 1, 2]]
|
||||
tm.assert_series_equal(res2, ser.iloc[::-1])
|
||||
|
||||
def test_getitem_integer_categorical_not_positional(self):
|
||||
# GH#14865
|
||||
ser = Series(["a", "b", "c"], index=Index([1, 2, 3], dtype="category"))
|
||||
assert ser.get(3) == "c"
|
||||
assert ser[3] == "c"
|
||||
|
||||
def test_getitem_str_with_timedeltaindex(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
key = "6 days, 23:11:12"
|
||||
indexer = rng.get_loc(key)
|
||||
assert indexer == 133
|
||||
|
||||
result = ser[key]
|
||||
assert result == ser.iloc[133]
|
||||
|
||||
msg = r"^Timedelta\('50 days 00:00:00'\)$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
rng.get_loc("50 days")
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser["50 days"]
|
||||
|
||||
|
||||
class TestSeriesGetitemSlices:
|
||||
def test_getitem_partial_str_slice_with_datetimeindex(self):
|
||||
# GH#34860
|
||||
arr = date_range("1/1/2008", "1/1/2009")
|
||||
ser = arr.to_series()
|
||||
result = ser["2008"]
|
||||
|
||||
rng = date_range(start="2008-01-01", end="2008-12-31")
|
||||
expected = Series(rng, index=rng)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_slice_strings_with_datetimeindex(self):
|
||||
idx = DatetimeIndex(
|
||||
["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]
|
||||
)
|
||||
|
||||
ts = Series(np.random.randn(len(idx)), index=idx)
|
||||
|
||||
result = ts["1/2/2000":]
|
||||
expected = ts[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts["1/2/2000":"1/3/2000"]
|
||||
expected = ts[1:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_str_slice_with_timedeltaindex(self):
|
||||
rng = timedelta_range("1 day 10:11:12", freq="h", periods=500)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = ser["5 day":"6 day"]
|
||||
expected = ser.iloc[86:134]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["5 day":]
|
||||
expected = ser.iloc[86:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[:"6 day"]
|
||||
expected = ser.iloc[:134]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self):
|
||||
# higher reso
|
||||
rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000)
|
||||
ser = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = ser["1 day 10:11:12":]
|
||||
expected = ser.iloc[0:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["1 day 10:11:12.001":]
|
||||
expected = ser.iloc[1000:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser["1 days, 10:11:12.001001"]
|
||||
assert result == ser.iloc[1001]
|
||||
|
||||
def test_getitem_slice_2d(self, datetime_series):
|
||||
# GH#30588 multi-dimensional indexing deprecated
|
||||
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match="Support for multi-dimensional indexing"
|
||||
):
|
||||
# GH#30867 Don't want to support this long-term, but
|
||||
# for now ensure that the warning from Index
|
||||
# doesn't comes through via Series.__getitem__.
|
||||
result = datetime_series[:, np.newaxis]
|
||||
expected = datetime_series.values[:, np.newaxis]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# FutureWarning from NumPy.
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_getitem_median_slice_bug(self):
|
||||
index = date_range("20090415", "20090519", freq="2B")
|
||||
s = Series(np.random.randn(13), index=index)
|
||||
|
||||
indexer = [slice(6, 7, None)]
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#31299
|
||||
result = s[indexer]
|
||||
expected = s[indexer[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"slc, positions",
|
||||
[
|
||||
[slice(date(2018, 1, 1), None), [0, 1, 2]],
|
||||
[slice(date(2019, 1, 2), None), [2]],
|
||||
[slice(date(2020, 1, 1), None), []],
|
||||
[slice(None, date(2020, 1, 1)), [0, 1, 2]],
|
||||
[slice(None, date(2019, 1, 1)), [0]],
|
||||
],
|
||||
)
|
||||
def test_getitem_slice_date(self, slc, positions):
|
||||
# https://github.com/pandas-dev/pandas/issues/31501
|
||||
ser = Series(
|
||||
[0, 1, 2],
|
||||
DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]),
|
||||
)
|
||||
result = ser[slc]
|
||||
expected = ser.take(positions)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_getitem_slice_float_raises(self, datetime_series):
|
||||
msg = (
|
||||
"cannot do slice indexing on DatetimeIndex with these indexers "
|
||||
r"\[{key}\] of type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.0")):
|
||||
datetime_series[4.0:10.0]
|
||||
|
||||
with pytest.raises(TypeError, match=msg.format(key=r"4\.5")):
|
||||
datetime_series[4.5:10.0]
|
||||
|
||||
def test_getitem_slice_bug(self):
|
||||
ser = Series(range(10), index=list(range(10)))
|
||||
result = ser[-12:]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
result = ser[-7:]
|
||||
tm.assert_series_equal(result, ser[3:])
|
||||
|
||||
result = ser[:-12]
|
||||
tm.assert_series_equal(result, ser[:0])
|
||||
|
||||
def test_getitem_slice_integers(self):
|
||||
ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
|
||||
|
||||
result = ser[:4]
|
||||
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesGetitemListLike:
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index, Series])
|
||||
def test_getitem_no_matches(self, box):
|
||||
# GH#33462 we expect the same behavior for list/ndarray/Index/Series
|
||||
ser = Series(["A", "B"])
|
||||
|
||||
key = Series(["C"], dtype=object)
|
||||
key = box(key)
|
||||
|
||||
msg = r"None of \[Index\(\['C'\], dtype='object'\)\] are in the \[index\]"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[key]
|
||||
|
||||
def test_getitem_intlist_intindex_periodvalues(self):
|
||||
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
|
||||
|
||||
result = ser[[2, 4]]
|
||||
exp = Series(
|
||||
[pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")],
|
||||
index=[2, 4],
|
||||
dtype="Period[D]",
|
||||
)
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == "Period[D]"
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index])
|
||||
def test_getitem_intlist_intervalindex_non_int(self, box):
|
||||
# GH#33404 fall back to positional since ints are unambiguous
|
||||
dti = date_range("2000-01-03", periods=3)._with_freq(None)
|
||||
ii = pd.IntervalIndex.from_breaks(dti)
|
||||
ser = Series(range(len(ii)), index=ii)
|
||||
|
||||
expected = ser.iloc[:1]
|
||||
key = box([0])
|
||||
result = ser[key]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, Index])
|
||||
@pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64])
|
||||
def test_getitem_intlist_multiindex_numeric_level(self, dtype, box):
|
||||
# GH#33404 do _not_ fall back to positional since ints are ambiguous
|
||||
idx = Index(range(4)).astype(dtype)
|
||||
dti = date_range("2000-01-03", periods=3)
|
||||
mi = pd.MultiIndex.from_product([idx, dti])
|
||||
ser = Series(range(len(mi))[::-1], index=mi)
|
||||
|
||||
key = box([5])
|
||||
with pytest.raises(KeyError, match="5"):
|
||||
ser[key]
|
||||
|
||||
def test_getitem_uint_array_key(self, any_unsigned_int_numpy_dtype):
|
||||
# GH #37218
|
||||
ser = Series([1, 2, 3])
|
||||
key = np.array([4], dtype=any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(KeyError, match="4"):
|
||||
ser[key]
|
||||
with pytest.raises(KeyError, match="4"):
|
||||
ser.loc[key]
|
||||
|
||||
|
||||
class TestGetitemBooleanMask:
|
||||
def test_getitem_boolean(self, string_series):
|
||||
ser = string_series
|
||||
mask = ser > ser.median()
|
||||
|
||||
# passing list is OK
|
||||
result = ser[list(mask)]
|
||||
expected = ser[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.index, ser.index[mask])
|
||||
|
||||
def test_getitem_boolean_empty(self):
|
||||
ser = Series([], dtype=np.int64)
|
||||
ser.index.name = "index_name"
|
||||
ser = ser[ser.isna()]
|
||||
assert ser.index.name == "index_name"
|
||||
assert ser.dtype == np.int64
|
||||
|
||||
# GH#5877
|
||||
# indexing with empty series
|
||||
ser = Series(["A", "B"])
|
||||
expected = Series(dtype=object, index=Index([], dtype="int64"))
|
||||
result = ser[Series([], dtype=object)]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# invalid because of the boolean indexer
|
||||
# that's empty or not-aligned
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of "
|
||||
r"the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ser[Series([], dtype=bool)]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ser[Series([True], dtype=bool)]
|
||||
|
||||
def test_getitem_boolean_object(self, string_series):
|
||||
# using column from DataFrame
|
||||
|
||||
ser = string_series
|
||||
mask = ser > ser.median()
|
||||
omask = mask.astype(object)
|
||||
|
||||
# getitem
|
||||
result = ser[omask]
|
||||
expected = ser[mask]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = ser.copy()
|
||||
cop = ser.copy()
|
||||
cop[omask] = 5
|
||||
s2[mask] = 5
|
||||
tm.assert_series_equal(cop, s2)
|
||||
|
||||
# nans raise exception
|
||||
omask[5:10] = np.nan
|
||||
msg = "Cannot mask with non-boolean array containing NA / NaN values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[omask]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[omask] = 5
|
||||
|
||||
def test_getitem_boolean_dt64_copies(self):
|
||||
# GH#36210
|
||||
dti = date_range("2016-01-01", periods=4, tz="US/Pacific")
|
||||
key = np.array([True, True, False, False])
|
||||
|
||||
ser = Series(dti._data)
|
||||
|
||||
res = ser[key]
|
||||
assert res._values._data.base is None
|
||||
|
||||
# compare with numeric case for reference
|
||||
ser2 = Series(range(4))
|
||||
res2 = ser2[key]
|
||||
assert res2._values.base is None
|
||||
|
||||
def test_getitem_boolean_corner(self, datetime_series):
|
||||
ts = datetime_series
|
||||
mask_shifted = ts.shift(1, freq=BDay()) > ts.median()
|
||||
|
||||
msg = (
|
||||
r"Unalignable boolean Series provided as indexer \(index of "
|
||||
r"the boolean Series and of the indexed object do not match"
|
||||
)
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts[mask_shifted]
|
||||
|
||||
with pytest.raises(IndexingError, match=msg):
|
||||
ts.loc[mask_shifted]
|
||||
|
||||
def test_getitem_boolean_different_order(self, string_series):
|
||||
ordered = string_series.sort_values()
|
||||
|
||||
sel = string_series[ordered > 0]
|
||||
exp = string_series[string_series > 0]
|
||||
tm.assert_series_equal(sel, exp)
|
||||
|
||||
def test_getitem_boolean_contiguous_preserve_freq(self):
|
||||
rng = date_range("1/1/2000", "3/1/2000", freq="B")
|
||||
|
||||
mask = np.zeros(len(rng), dtype=bool)
|
||||
mask[10:20] = True
|
||||
|
||||
masked = rng[mask]
|
||||
expected = rng[10:20]
|
||||
assert expected.freq == rng.freq
|
||||
tm.assert_index_equal(masked, expected)
|
||||
|
||||
mask[22] = True
|
||||
masked = rng[mask]
|
||||
assert masked.freq is None
|
||||
|
||||
|
||||
class TestGetitemCallable:
|
||||
def test_getitem_callable(self):
|
||||
# GH#12533
|
||||
ser = Series(4, index=list("ABCD"))
|
||||
result = ser[lambda x: "A"]
|
||||
assert result == ser.loc["A"]
|
||||
|
||||
result = ser[lambda x: ["A", "B"]]
|
||||
expected = ser.loc[["A", "B"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[lambda x: [True, False, True, True]]
|
||||
expected = ser.iloc[[0, 2, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_generator(string_series):
|
||||
gen = (x > 0 for x in string_series)
|
||||
result = string_series[gen]
|
||||
result2 = string_series[iter(string_series > 0)]
|
||||
expected = string_series[string_series > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series",
|
||||
[
|
||||
Series([0, 1]),
|
||||
Series(date_range("2012-01-01", periods=2)),
|
||||
Series(date_range("2012-01-01", periods=2, tz="CET")),
|
||||
],
|
||||
)
|
||||
def test_getitem_ndim_deprecated(series):
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning,
|
||||
match="Support for multi-dimensional indexing",
|
||||
):
|
||||
result = series[:, None]
|
||||
|
||||
expected = np.asarray(series)[:, None]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_multilevel_scalar_slice_not_implemented(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
# not implementing this for now
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
ser = df["A"]
|
||||
|
||||
msg = r"\(2000, slice\(3, 4, None\)\)"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[2000, 3:4]
|
||||
|
||||
|
||||
def test_getitem_dataframe_raises():
|
||||
rng = list(range(10))
|
||||
ser = Series(10, index=rng)
|
||||
df = DataFrame(rng, index=rng)
|
||||
msg = (
|
||||
"Indexing a Series with DataFrame is not supported, "
|
||||
"use the appropriate DataFrame column"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser[df > 5]
|
||||
|
||||
|
||||
def test_getitem_assignment_series_aligment():
|
||||
# https://github.com/pandas-dev/pandas/issues/37427
|
||||
# with getitem, when assigning with a Series, it is not first aligned
|
||||
ser = Series(range(10))
|
||||
idx = np.array([2, 4, 9])
|
||||
ser[idx] = Series([10, 11, 12])
|
||||
expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12])
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
def test_getitem_duplicate_index_mistyped_key_raises_keyerror():
|
||||
# GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError
|
||||
ser = Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0])
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser[None]
|
||||
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser.index.get_loc(None)
|
||||
|
||||
with pytest.raises(KeyError, match="None"):
|
||||
ser.index._engine.get_loc(None)
|
||||
|
||||
|
||||
def test_getitem_1tuple_slice_without_multiindex():
|
||||
ser = Series(range(5))
|
||||
key = (slice(3),)
|
||||
|
||||
result = ser[key]
|
||||
expected = ser[key[0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_preserve_name(datetime_series):
|
||||
result = datetime_series[datetime_series > 0]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
result = datetime_series[[0, 2, 4]]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
result = datetime_series[5:10]
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
|
||||
def test_getitem_with_integer_labels():
|
||||
# integer indexes, be careful
|
||||
ser = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
||||
inds = [0, 2, 5, 7, 8]
|
||||
arr_inds = np.array([0, 2, 5, 7, 8])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser[inds]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser[arr_inds]
|
||||
|
||||
|
||||
def test_getitem_missing(datetime_series):
|
||||
# missing
|
||||
d = datetime_series.index[0] - BDay()
|
||||
msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[d]
|
||||
|
||||
|
||||
def test_getitem_fancy(string_series, object_series):
|
||||
slice1 = string_series[[1, 2, 3]]
|
||||
slice2 = object_series[[1, 2, 3]]
|
||||
assert string_series.index[2] == slice1.index[1]
|
||||
assert object_series.index[2] == slice2.index[1]
|
||||
assert string_series[2] == slice1[1]
|
||||
assert object_series[2] == slice2[1]
|
||||
|
||||
|
||||
def test_getitem_box_float64(datetime_series):
|
||||
value = datetime_series[5]
|
||||
assert isinstance(value, np.float64)
|
||||
|
||||
|
||||
def test_getitem_unordered_dup():
|
||||
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
|
||||
assert is_scalar(obj["c"])
|
||||
assert obj["c"] == 0
|
||||
|
||||
|
||||
def test_getitem_dups():
|
||||
ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
|
||||
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
|
||||
result = ser["C"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_categorical_str():
|
||||
# GH#31765
|
||||
ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"]))
|
||||
result = ser["a"]
|
||||
expected = ser.iloc[[0, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Check the intermediate steps work as expected
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ser.index.get_value(ser, "a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_slice_can_reorder_not_uniquely_indexed():
|
||||
ser = Series(1, index=["a", "a", "b", "b", "c"])
|
||||
ser[::-1] # it works!
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index_vals", ["aabcd", "aadcb"])
|
||||
def test_duplicated_index_getitem_positional_indexer(index_vals):
|
||||
# GH 11747
|
||||
s = Series(range(5), index=list(index_vals))
|
||||
result = s[3]
|
||||
assert result == 3
|
||||
|
||||
|
||||
class TestGetitemDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825
|
||||
ser = Series([1, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_setitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825
|
||||
ser = Series([1, 2, 3])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser[key] = 1
|
||||
@@ -0,0 +1,360 @@
|
||||
""" test get/set & misc """
|
||||
from datetime import timedelta
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
concat,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_basic_indexing():
|
||||
s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"])
|
||||
|
||||
msg = "index 5 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'c'$"):
|
||||
s["c"]
|
||||
|
||||
s = s.sort_index()
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5]
|
||||
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s[5] = 0
|
||||
|
||||
|
||||
def test_basic_getitem_with_labels(datetime_series):
|
||||
indices = datetime_series.index[[5, 10, 15]]
|
||||
|
||||
result = datetime_series[indices]
|
||||
expected = datetime_series.reindex(indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = datetime_series[indices[0] : indices[2]]
|
||||
expected = datetime_series.loc[indices[0] : indices[2]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_basic_getitem_dt64tz_values():
|
||||
|
||||
# GH12089
|
||||
# with tz for values
|
||||
ser = Series(
|
||||
date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
||||
)
|
||||
expected = Timestamp("2011-01-01", tz="US/Eastern")
|
||||
result = ser.loc["a"]
|
||||
assert result == expected
|
||||
result = ser.iloc[0]
|
||||
assert result == expected
|
||||
result = ser["a"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_getitem_setitem_ellipsis():
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
np.fix(s)
|
||||
|
||||
result = s[...]
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
s[...] = 5
|
||||
assert (result == 5).all()
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:.*append method is deprecated.*:FutureWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"result_1, duplicate_item, expected_1",
|
||||
[
|
||||
[
|
||||
Series({1: 12, 2: [1, 2, 2, 3]}),
|
||||
Series({1: 313}),
|
||||
Series({1: 12}, dtype=object),
|
||||
],
|
||||
[
|
||||
Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
||||
Series({1: [1, 2, 3]}),
|
||||
Series({1: [1, 2, 3]}),
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
|
||||
# GH 17610
|
||||
result = result_1.append(duplicate_item)
|
||||
expected = expected_1.append(duplicate_item)
|
||||
tm.assert_series_equal(result[1], expected)
|
||||
assert result[2] == result_1[2]
|
||||
|
||||
|
||||
def test_getitem_setitem_integers():
|
||||
# caused bug without test
|
||||
s = Series([1, 2, 3], ["a", "b", "c"])
|
||||
|
||||
assert s.iloc[0] == s["a"]
|
||||
s.iloc[0] = 5
|
||||
tm.assert_almost_equal(s["a"], 5)
|
||||
|
||||
|
||||
def test_series_box_timestamp():
|
||||
rng = date_range("20090415", "20090519", freq="B")
|
||||
ser = Series(rng)
|
||||
assert isinstance(ser[0], Timestamp)
|
||||
assert isinstance(ser.at[1], Timestamp)
|
||||
assert isinstance(ser.iat[2], Timestamp)
|
||||
assert isinstance(ser.loc[3], Timestamp)
|
||||
assert isinstance(ser.iloc[4], Timestamp)
|
||||
|
||||
ser = Series(rng, index=rng)
|
||||
assert isinstance(ser[0], Timestamp)
|
||||
assert isinstance(ser.at[rng[1]], Timestamp)
|
||||
assert isinstance(ser.iat[2], Timestamp)
|
||||
assert isinstance(ser.loc[rng[3]], Timestamp)
|
||||
assert isinstance(ser.iloc[4], Timestamp)
|
||||
|
||||
|
||||
def test_series_box_timedelta():
|
||||
rng = timedelta_range("1 day 1 s", periods=5, freq="h")
|
||||
ser = Series(rng)
|
||||
assert isinstance(ser[0], Timedelta)
|
||||
assert isinstance(ser.at[1], Timedelta)
|
||||
assert isinstance(ser.iat[2], Timedelta)
|
||||
assert isinstance(ser.loc[3], Timedelta)
|
||||
assert isinstance(ser.iloc[4], Timedelta)
|
||||
|
||||
|
||||
def test_getitem_ambiguous_keyerror(indexer_sl):
|
||||
ser = Series(range(10), index=list(range(0, 20, 2)))
|
||||
with pytest.raises(KeyError, match=r"^1$"):
|
||||
indexer_sl(ser)[1]
|
||||
|
||||
|
||||
def test_getitem_dups_with_missing(indexer_sl):
|
||||
# breaks reindex, so need to use .loc internally
|
||||
# GH 4246
|
||||
ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
|
||||
with pytest.raises(KeyError, match=re.escape("['bam'] not in index")):
|
||||
indexer_sl(ser)[["foo", "bar", "bah", "bam"]]
|
||||
|
||||
|
||||
def test_setitem_ambiguous_keyerror(indexer_sl):
|
||||
s = Series(range(10), index=list(range(0, 20, 2)))
|
||||
|
||||
# equivalent of an append
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[1] = 5
|
||||
expected = concat([s, Series([5], index=[1])])
|
||||
tm.assert_series_equal(s2, expected)
|
||||
|
||||
|
||||
def test_setitem(datetime_series, string_series):
|
||||
datetime_series[datetime_series.index[5]] = np.NaN
|
||||
datetime_series[[1, 2, 17]] = np.NaN
|
||||
datetime_series[6] = np.NaN
|
||||
assert np.isnan(datetime_series[6])
|
||||
assert np.isnan(datetime_series[2])
|
||||
datetime_series[np.isnan(datetime_series)] = 5
|
||||
assert not np.isnan(datetime_series[2])
|
||||
|
||||
|
||||
def test_setslice(datetime_series):
|
||||
sl = datetime_series[5:20]
|
||||
assert len(sl) == len(sl.index)
|
||||
assert sl.index.is_unique is True
|
||||
|
||||
|
||||
# FutureWarning from NumPy about [slice(None, 5).
|
||||
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
||||
def test_basic_getitem_setitem_corner(datetime_series):
|
||||
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
||||
msg = "key of type tuple not found and not a MultiIndex"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[:, 2]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
datetime_series[:, 2] = 2
|
||||
|
||||
# weird lists. [slice(0, 5)] will work but not two slices
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# GH#31299
|
||||
result = datetime_series[[slice(None, 5)]]
|
||||
expected = datetime_series[:5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# OK
|
||||
msg = r"unhashable type(: 'slice')?"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
datetime_series[[5, slice(None, None)]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
datetime_series[[5, slice(None, None)]] = 2
|
||||
|
||||
|
||||
def test_slice(string_series, object_series):
|
||||
numSlice = string_series[10:20]
|
||||
numSliceEnd = string_series[-10:]
|
||||
objSlice = object_series[10:20]
|
||||
|
||||
assert string_series.index[9] not in numSlice.index
|
||||
assert object_series.index[9] not in objSlice.index
|
||||
|
||||
assert len(numSlice) == len(numSlice.index)
|
||||
assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
||||
|
||||
assert numSlice.index[1] == string_series.index[11]
|
||||
assert tm.equalContents(numSliceEnd, np.array(string_series)[-10:])
|
||||
|
||||
# Test return view.
|
||||
sl = string_series[10:20]
|
||||
sl[:] = 0
|
||||
|
||||
assert (string_series[10:20] == 0).all()
|
||||
|
||||
|
||||
def test_timedelta_assignment():
|
||||
# GH 8209
|
||||
s = Series([], dtype=object)
|
||||
s.loc["B"] = timedelta(1)
|
||||
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
|
||||
|
||||
s = s.reindex(s.index.insert(0, "A"))
|
||||
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
|
||||
|
||||
s.loc["A"] = timedelta(1)
|
||||
expected = Series(Timedelta("1 days"), index=["A", "B"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_underlying_data_conversion():
|
||||
# GH 4080
|
||||
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
|
||||
return_value = df.set_index(["a", "b", "c"], inplace=True)
|
||||
assert return_value is None
|
||||
s = Series([1], index=[(2, 2, 2)])
|
||||
df["val"] = 0
|
||||
df
|
||||
df["val"].update(s)
|
||||
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]}
|
||||
)
|
||||
return_value = expected.set_index(["a", "b", "c"], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_preserve_refs(datetime_series):
|
||||
seq = datetime_series[[5, 10, 15]]
|
||||
seq[1] = np.NaN
|
||||
assert not np.isnan(datetime_series[10])
|
||||
|
||||
|
||||
def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl):
|
||||
index = lexsorted_two_level_string_multiindex
|
||||
ser = Series(np.random.randn(len(index)), index=index, name="sth")
|
||||
|
||||
result = indexer_sl(ser)["foo"]
|
||||
assert result.name == ser.name
|
||||
|
||||
|
||||
"""
|
||||
miscellaneous methods
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index",
|
||||
[
|
||||
date_range("2014-01-01", periods=20, freq="MS"),
|
||||
period_range("2014-01", periods=20, freq="M"),
|
||||
timedelta_range("0", periods=20, freq="H"),
|
||||
],
|
||||
)
|
||||
def test_slice_with_negative_step(index):
|
||||
keystr1 = str(index[9])
|
||||
keystr2 = str(index[13])
|
||||
|
||||
ser = Series(np.arange(20), index)
|
||||
SLC = IndexSlice
|
||||
|
||||
for key in [keystr1, index[9]]:
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1])
|
||||
|
||||
for key2 in [keystr2, index[13]]:
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1], SLC[13:8:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[key:key2:-1], SLC[0:0:-1])
|
||||
|
||||
|
||||
def test_tuple_index():
|
||||
# GH 35534 - Selecting values when a Series has an Index of tuples
|
||||
s = Series([1, 2], index=[("a",), ("b",)])
|
||||
assert s[("a",)] == 1
|
||||
assert s[("b",)] == 2
|
||||
s[("b",)] = 3
|
||||
assert s[("b",)] == 3
|
||||
|
||||
|
||||
def test_frozenset_index():
|
||||
# GH35747 - Selecting values when a Series has an Index of frozenset
|
||||
idx0, idx1 = frozenset("a"), frozenset("b")
|
||||
s = Series([1, 2], index=[idx0, idx1])
|
||||
assert s[idx0] == 1
|
||||
assert s[idx1] == 2
|
||||
s[idx1] = 3
|
||||
assert s[idx1] == 3
|
||||
|
||||
|
||||
def test_loc_setitem_all_false_indexer():
|
||||
# GH#45778
|
||||
ser = Series([1, 2], index=["a", "b"])
|
||||
expected = ser.copy()
|
||||
rhs = Series([6, 7], index=["a", "b"])
|
||||
ser.loc[ser > 100] = rhs
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
|
||||
class TestDepreactedIndexers:
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825
|
||||
ser = Series([1, 2])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.loc[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
|
||||
def test_getitem_dict_and_set_deprecated_multiindex(self, key):
|
||||
# GH#42825
|
||||
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.loc[key]
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}])
|
||||
def test_setitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825
|
||||
ser = Series([1, 2])
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.loc[key] = 1
|
||||
|
||||
@pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
|
||||
def test_setitem_dict_and_set_deprecated_multiindex(self, key):
|
||||
# GH#42825
|
||||
ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.loc[key] = 1
|
||||
@@ -0,0 +1,69 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_mask():
|
||||
# compare with tested results in test_where
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(~cond, np.nan)
|
||||
tm.assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.where(~cond)
|
||||
rs2 = s.mask(cond)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(~cond, -s)
|
||||
rs2 = s.mask(cond, -s)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
rs = s2.where(~cond[:3])
|
||||
rs2 = s2.mask(cond[:3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s2.where(~cond[:3], -s2)
|
||||
rs2 = s2.mask(cond[:3], -s2)
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.mask(cond[:3].values, -s)
|
||||
|
||||
|
||||
def test_mask_casts():
|
||||
# dtype changes
|
||||
ser = Series([1, 2, 3, 4])
|
||||
result = ser.mask(ser > 2, np.nan)
|
||||
expected = Series([1, 2, np.nan, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_casts2():
|
||||
# see gh-21891
|
||||
ser = Series([1, 2])
|
||||
res = ser.mask([True, False])
|
||||
|
||||
exp = Series([np.nan, 2])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_mask_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, inplace=True)
|
||||
tm.assert_series_equal(rs.dropna(), s[~cond])
|
||||
tm.assert_series_equal(rs, s.mask(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.mask(cond, -s, inplace=True)
|
||||
tm.assert_series_equal(rs, s.mask(cond, -s))
|
||||
@@ -0,0 +1,45 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_series_set_value():
|
||||
# GH#1561
|
||||
|
||||
dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
|
||||
index = DatetimeIndex(dates)
|
||||
|
||||
s = Series(dtype=object)
|
||||
s._set_value(dates[0], 1.0)
|
||||
s._set_value(dates[1], np.nan)
|
||||
|
||||
expected = Series([1.0, np.nan], index=index)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_set_value_dt64(datetime_series):
|
||||
idx = datetime_series.index[10]
|
||||
res = datetime_series._set_value(idx, 0)
|
||||
assert res is None
|
||||
assert datetime_series[idx] == 0
|
||||
|
||||
|
||||
def test_set_value_str_index(string_series):
|
||||
# equiv
|
||||
ser = string_series.copy()
|
||||
res = ser._set_value("foobar", 0)
|
||||
assert res is None
|
||||
assert ser.index[-1] == "foobar"
|
||||
assert ser["foobar"] == 0
|
||||
|
||||
ser2 = string_series.copy()
|
||||
ser2.loc["foobar"] = 0
|
||||
assert ser2.index[-1] == "foobar"
|
||||
assert ser2["foobar"] == 0
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,33 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_take():
|
||||
ser = Series([-1, 5, 6, 2, 4])
|
||||
|
||||
actual = ser.take([1, 3, 4])
|
||||
expected = Series([5, 2, 4], index=[1, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
actual = ser.take([-1, 3, 4])
|
||||
expected = Series([4, 2, 4], index=[4, 3, 4])
|
||||
tm.assert_series_equal(actual, expected)
|
||||
|
||||
msg = lambda x: f"index {x} is out of bounds for( axis 0 with)? size 5"
|
||||
with pytest.raises(IndexError, match=msg(10)):
|
||||
ser.take([1, 10])
|
||||
with pytest.raises(IndexError, match=msg(5)):
|
||||
ser.take([2, 5])
|
||||
|
||||
|
||||
def test_take_categorical():
|
||||
# https://github.com/pandas-dev/pandas/issues/20664
|
||||
ser = Series(pd.Categorical(["a", "b", "c"]))
|
||||
result = ser.take([-2, -2, 0])
|
||||
expected = Series(
|
||||
pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,466 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_where_unsafe_int(any_signed_int_numpy_dtype):
|
||||
s = Series(np.arange(10), dtype=any_signed_int_numpy_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(
|
||||
list(range(2, 7)) + list(range(5, 10)),
|
||||
dtype=any_signed_int_numpy_dtype,
|
||||
)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe_float(float_numpy_dtype):
|
||||
s = Series(np.arange(10), dtype=float_numpy_dtype)
|
||||
mask = s < 5
|
||||
|
||||
s[mask] = range(2, 7)
|
||||
data = list(range(2, 7)) + list(range(5, 10))
|
||||
expected = Series(data, dtype=float_numpy_dtype)
|
||||
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype,expected_dtype",
|
||||
[
|
||||
(np.int8, np.float64),
|
||||
(np.int16, np.float64),
|
||||
(np.int32, np.float64),
|
||||
(np.int64, np.float64),
|
||||
(np.float32, np.float32),
|
||||
(np.float64, np.float64),
|
||||
],
|
||||
)
|
||||
def test_where_unsafe_upcast(dtype, expected_dtype):
|
||||
# see gh-9743
|
||||
s = Series(np.arange(10), dtype=dtype)
|
||||
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
||||
mask = s < 5
|
||||
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
|
||||
s[mask] = values
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
def test_where_unsafe():
|
||||
# see gh-9731
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
values = [2.5, 3.5, 4.5, 5.5]
|
||||
|
||||
mask = s > 5
|
||||
expected = Series(list(range(6)) + values, dtype="float64")
|
||||
|
||||
s[mask] = values
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# see gh-3235
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s < 5
|
||||
s[mask] = range(2, 7)
|
||||
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
|
||||
tm.assert_series_equal(s, expected)
|
||||
assert s.dtype == expected.dtype
|
||||
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
mask = s > 5
|
||||
s[mask] = [0] * 4
|
||||
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = Series(np.arange(10))
|
||||
mask = s > 5
|
||||
|
||||
msg = "cannot set using a list-like indexer with a different length than the value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [5, 4, 3, 2, 1]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[mask] = [0] * 5
|
||||
|
||||
# dtype changes
|
||||
s = Series([1, 2, 3, 4])
|
||||
result = s.where(s > 2, np.nan)
|
||||
expected = Series([np.nan, np.nan, 3, 4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
s = Series(range(10)).astype(float)
|
||||
s[8] = None
|
||||
result = s[8]
|
||||
assert isna(result)
|
||||
|
||||
s = Series(range(10)).astype(float)
|
||||
s[s > 8] = None
|
||||
result = s[isna(s)]
|
||||
expected = Series(np.nan, index=[9])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.where(cond).dropna()
|
||||
rs2 = s[cond]
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
rs = s.where(cond, -s)
|
||||
tm.assert_series_equal(rs, s.abs())
|
||||
|
||||
rs = s.where(cond)
|
||||
assert s.shape == rs.shape
|
||||
assert rs is not s
|
||||
|
||||
# test alignment
|
||||
cond = Series([True, False, False, True, False], index=s.index)
|
||||
s2 = -(s.abs())
|
||||
|
||||
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
||||
rs = s2.where(cond[:3])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
expected = s2.abs()
|
||||
expected.iloc[0] = s2[0]
|
||||
rs = s2.where(cond[:3], -s2)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_error():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(1)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond[:3].values, -s)
|
||||
|
||||
# GH 2745
|
||||
s = Series([1, 2])
|
||||
s[[True, False]] = [0, 1]
|
||||
expected = Series([0, 2])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# failures
|
||||
msg = "cannot set using a list-like indexer with a different length than the value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = [0, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s[[True, False]] = []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
||||
def test_where_array_like(klass):
|
||||
# see gh-15414
|
||||
s = Series([1, 2, 3])
|
||||
cond = [False, True, True]
|
||||
expected = Series([np.nan, 2, 3])
|
||||
|
||||
result = s.where(klass(cond))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cond",
|
||||
[
|
||||
[1, 0, 1],
|
||||
Series([2, 5, 7]),
|
||||
["True", "False", "True"],
|
||||
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
|
||||
],
|
||||
)
|
||||
def test_where_invalid_input(cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
s = Series([1, 2, 3])
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where([True])
|
||||
|
||||
|
||||
def test_where_ndframe_align():
|
||||
msg = "Array conditional must be same shape as self"
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.where(cond)
|
||||
|
||||
expected = Series([np.nan, 2, np.nan])
|
||||
|
||||
out = s.where(Series(cond))
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
|
||||
def test_where_setitem_invalid():
|
||||
# GH 2702
|
||||
# make sure correct exceptions are raised on invalid list assignment
|
||||
|
||||
msg = (
|
||||
lambda x: f"cannot set using a {x} indexer with a "
|
||||
"different length than the value"
|
||||
)
|
||||
# slice
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[0:3] = list(range(27))
|
||||
|
||||
s[0:3] = list(range(3))
|
||||
expected = Series([0, 1, 2])
|
||||
tm.assert_series_equal(s.astype(np.int64), expected)
|
||||
|
||||
# slice with step
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[0:4:2] = list(range(27))
|
||||
|
||||
s = Series(list("abcdef"))
|
||||
s[0:4:2] = list(range(2))
|
||||
expected = Series([0, "b", 1, "d", "e", "f"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# neg slices
|
||||
s = Series(list("abcdef"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("slice")):
|
||||
s[:-1] = list(range(27))
|
||||
|
||||
s[-3:-1] = list(range(2))
|
||||
expected = Series(["a", "b", "c", 0, 1, "f"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# list
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("list-like")):
|
||||
s[[0, 1, 2]] = list(range(27))
|
||||
|
||||
s = Series(list("abc"))
|
||||
|
||||
with pytest.raises(ValueError, match=msg("list-like")):
|
||||
s[[0, 1, 2]] = list(range(2))
|
||||
|
||||
# scalar
|
||||
s = Series(list("abc"))
|
||||
s[0] = list(range(10))
|
||||
expected = Series([list(range(10)), "b", "c"])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("size", range(2, 6))
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False, False, False], [True, False], [False]]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min]
|
||||
)
|
||||
# Test numpy arrays, lists and tuples as the input to be
|
||||
# broadcast
|
||||
@pytest.mark.parametrize(
|
||||
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
|
||||
)
|
||||
def test_broadcast(size, mask, item, box):
|
||||
# GH#8801, GH#4195
|
||||
selection = np.resize(mask, size)
|
||||
|
||||
data = np.arange(size, dtype=float)
|
||||
|
||||
# Construct the expected series by taking the source
|
||||
# data or item based on the selection
|
||||
expected = Series(
|
||||
[item if use_item else data[i] for i, use_item in enumerate(selection)]
|
||||
)
|
||||
|
||||
s = Series(data)
|
||||
|
||||
s[selection] = item
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.where(~selection, box(item))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(data)
|
||||
result = s.mask(selection, box(item))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_inplace():
|
||||
s = Series(np.random.randn(5))
|
||||
cond = s > 0
|
||||
|
||||
rs = s.copy()
|
||||
|
||||
rs.where(cond, inplace=True)
|
||||
tm.assert_series_equal(rs.dropna(), s[cond])
|
||||
tm.assert_series_equal(rs, s.where(cond))
|
||||
|
||||
rs = s.copy()
|
||||
rs.where(cond, -s, inplace=True)
|
||||
tm.assert_series_equal(rs, s.where(cond, -s))
|
||||
|
||||
|
||||
def test_where_dups():
|
||||
# GH 4550
|
||||
# where crashes with dups in index
|
||||
s1 = Series(list(range(3)))
|
||||
s2 = Series(list(range(3)))
|
||||
comb = pd.concat([s1, s2])
|
||||
result = comb.where(comb < 2)
|
||||
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 4548
|
||||
# inplace updating not working with dups
|
||||
comb[comb < 1] = 5
|
||||
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(comb, expected)
|
||||
|
||||
comb[comb < 2] += 10
|
||||
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
||||
tm.assert_series_equal(comb, expected)
|
||||
|
||||
|
||||
def test_where_numeric_with_string():
|
||||
# GH 9280
|
||||
s = Series([1, 2, 3])
|
||||
w = s.where(s > 1, "X")
|
||||
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, ["X", "Y", "Z"])
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
|
||||
assert not is_integer(w[0])
|
||||
assert is_integer(w[1])
|
||||
assert is_integer(w[2])
|
||||
assert isinstance(w[0], str)
|
||||
assert w.dtype == "object"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["timedelta64[ns]", "datetime64[ns]"])
|
||||
def test_where_datetimelike_coerce(dtype):
|
||||
ser = Series([1, 2], dtype=dtype)
|
||||
expected = Series([10, 10])
|
||||
mask = np.array([False, False])
|
||||
|
||||
rs = ser.where(mask, [10, 10])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
rs = ser.where(mask, 10)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
rs = ser.where(mask, 10.0)
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
rs = ser.where(mask, [10.0, 10.0])
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
rs = ser.where(mask, [10.0, np.nan])
|
||||
expected = Series([10, None], dtype="object")
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_datetimetz():
|
||||
# GH 15701
|
||||
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
|
||||
ser = Series([Timestamp(t) for t in timestamps], dtype="datetime64[ns, UTC]")
|
||||
rs = ser.where(Series([False, True]))
|
||||
expected = Series([pd.NaT, ser[1]], dtype="datetime64[ns, UTC]")
|
||||
tm.assert_series_equal(rs, expected)
|
||||
|
||||
|
||||
def test_where_sparse():
|
||||
# GH#17198 make sure we dont get an AttributeError for sp_index
|
||||
ser = Series(pd.arrays.SparseArray([1, 2]))
|
||||
result = ser.where(ser >= 2, 0)
|
||||
expected = Series(pd.arrays.SparseArray([0, 2]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_empty_series_and_empty_cond_having_non_bool_dtypes():
|
||||
# https://github.com/pandas-dev/pandas/issues/34592
|
||||
ser = Series([], dtype=float)
|
||||
result = ser.where([])
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
def test_where_categorical(frame_or_series):
|
||||
# https://github.com/pandas-dev/pandas/issues/18888
|
||||
exp = frame_or_series(
|
||||
pd.Categorical(["A", "A", "B", "B", np.nan], categories=["A", "B", "C"]),
|
||||
dtype="category",
|
||||
)
|
||||
df = frame_or_series(["A", "A", "B", "B", "C"], dtype="category")
|
||||
res = df.where(df != "C")
|
||||
tm.assert_equal(exp, res)
|
||||
|
||||
|
||||
def test_where_datetimelike_categorical(request, tz_naive_fixture):
|
||||
# GH#37682
|
||||
tz = tz_naive_fixture
|
||||
|
||||
dr = date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
|
||||
lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
|
||||
rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
|
||||
|
||||
mask = np.array([True, True, False])
|
||||
|
||||
# DatetimeIndex.where
|
||||
res = lvals.where(mask, rvals)
|
||||
tm.assert_index_equal(res, dr)
|
||||
|
||||
# DatetimeArray.where
|
||||
res = lvals._data._where(mask, rvals)
|
||||
tm.assert_datetime_array_equal(res, dr._data)
|
||||
|
||||
# Series.where
|
||||
res = Series(lvals).where(mask, rvals)
|
||||
tm.assert_series_equal(res, Series(dr))
|
||||
|
||||
# DataFrame.where
|
||||
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))
|
||||
|
||||
tm.assert_frame_equal(res, pd.DataFrame(dr))
|
||||
@@ -0,0 +1,81 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_xs_datetimelike_wrapping():
|
||||
# GH#31630 a case where we shouldn't wrap datetime64 in Timestamp
|
||||
arr = date_range("2016-01-01", periods=3)._data._data
|
||||
|
||||
ser = Series(arr, dtype=object)
|
||||
for i in range(len(ser)):
|
||||
ser.iloc[i] = arr[i]
|
||||
assert ser.dtype == object
|
||||
assert isinstance(ser[0], np.datetime64)
|
||||
|
||||
result = ser.xs(0)
|
||||
assert isinstance(result, np.datetime64)
|
||||
|
||||
|
||||
class TestXSWithMultiIndex:
|
||||
def test_xs_level_series(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
ser = df["A"]
|
||||
expected = ser[:, "two"]
|
||||
result = df.xs("two", level=1)["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_getitem_multiindex_xs_by_label(self):
|
||||
# GH#5684
|
||||
idx = MultiIndex.from_tuples(
|
||||
[("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")]
|
||||
)
|
||||
ser = Series([1, 2, 3, 4], index=idx)
|
||||
return_value = ser.index.set_names(["L1", "L2"], inplace=True)
|
||||
assert return_value is None
|
||||
expected = Series([1, 3], index=["a", "b"])
|
||||
return_value = expected.index.set_names(["L1"], inplace=True)
|
||||
assert return_value is None
|
||||
|
||||
result = ser.xs("one", level="L2")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_getitem_multiindex_xs(xs):
|
||||
# GH#6258
|
||||
dt = list(date_range("20130903", periods=3))
|
||||
idx = MultiIndex.from_product([list("AB"), dt])
|
||||
ser = Series([1, 3, 4, 1, 3, 4], index=idx)
|
||||
expected = Series([1, 1], index=list("AB"))
|
||||
|
||||
result = ser.xs("20130903", level=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_xs_droplevel_false(self):
|
||||
# GH: 19056
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
|
||||
)
|
||||
ser = Series([1, 1, 1], index=mi)
|
||||
result = ser.xs("a", axis=0, drop_level=False)
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y")], names=["level1", "level2"]
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_key_as_list(self):
|
||||
# GH#41760
|
||||
mi = MultiIndex.from_tuples([("a", "x")], names=["level1", "level2"])
|
||||
ser = Series([1], index=mi)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.xs(["a", "x"], axis=0, drop_level=False)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
ser.xs(["a"], axis=0, drop_level=False)
|
||||
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Test files dedicated to individual (stand-alone) Series methods
|
||||
|
||||
Ideally these files/tests should correspond 1-to-1 with tests.frame.methods
|
||||
|
||||
These may also present opportunities for sharing/de-duplicating test code.
|
||||
"""
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user