first commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,190 @@
|
||||
""" common utilities """
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import (
|
||||
Float64Index,
|
||||
UInt64Index,
|
||||
)
|
||||
|
||||
|
||||
def _mklbl(prefix, n):
|
||||
return [f"{prefix}{i}" for i in range(n)]
|
||||
|
||||
|
||||
def _axify(obj, key, axis):
|
||||
# create a tuple accessor
|
||||
axes = [slice(None)] * obj.ndim
|
||||
axes[axis] = key
|
||||
return tuple(axes)
|
||||
|
||||
|
||||
class Base:
|
||||
"""indexing comprehensive base class"""
|
||||
|
||||
_kinds = {"series", "frame"}
|
||||
_typs = {
|
||||
"ints",
|
||||
"uints",
|
||||
"labels",
|
||||
"mixed",
|
||||
"ts",
|
||||
"floats",
|
||||
"empty",
|
||||
"ts_rev",
|
||||
"multi",
|
||||
}
|
||||
|
||||
def setup_method(self, method):
|
||||
|
||||
self.series_ints = Series(np.random.rand(4), index=np.arange(0, 8, 2))
|
||||
self.frame_ints = DataFrame(
|
||||
np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3)
|
||||
)
|
||||
|
||||
self.series_uints = Series(
|
||||
np.random.rand(4), index=UInt64Index(np.arange(0, 8, 2))
|
||||
)
|
||||
self.frame_uints = DataFrame(
|
||||
np.random.randn(4, 4),
|
||||
index=UInt64Index(range(0, 8, 2)),
|
||||
columns=UInt64Index(range(0, 12, 3)),
|
||||
)
|
||||
|
||||
self.series_floats = Series(
|
||||
np.random.rand(4), index=Float64Index(range(0, 8, 2))
|
||||
)
|
||||
self.frame_floats = DataFrame(
|
||||
np.random.randn(4, 4),
|
||||
index=Float64Index(range(0, 8, 2)),
|
||||
columns=Float64Index(range(0, 12, 3)),
|
||||
)
|
||||
|
||||
m_idces = [
|
||||
MultiIndex.from_product([[1, 2], [3, 4]]),
|
||||
MultiIndex.from_product([[5, 6], [7, 8]]),
|
||||
MultiIndex.from_product([[9, 10], [11, 12]]),
|
||||
]
|
||||
|
||||
self.series_multi = Series(np.random.rand(4), index=m_idces[0])
|
||||
self.frame_multi = DataFrame(
|
||||
np.random.randn(4, 4), index=m_idces[0], columns=m_idces[1]
|
||||
)
|
||||
|
||||
self.series_labels = Series(np.random.randn(4), index=list("abcd"))
|
||||
self.frame_labels = DataFrame(
|
||||
np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")
|
||||
)
|
||||
|
||||
self.series_mixed = Series(np.random.randn(4), index=[2, 4, "null", 8])
|
||||
self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, "null", 8])
|
||||
|
||||
self.series_ts = Series(
|
||||
np.random.randn(4), index=date_range("20130101", periods=4)
|
||||
)
|
||||
self.frame_ts = DataFrame(
|
||||
np.random.randn(4, 4), index=date_range("20130101", periods=4)
|
||||
)
|
||||
|
||||
dates_rev = date_range("20130101", periods=4).sort_values(ascending=False)
|
||||
self.series_ts_rev = Series(np.random.randn(4), index=dates_rev)
|
||||
self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev)
|
||||
|
||||
self.frame_empty = DataFrame()
|
||||
self.series_empty = Series(dtype=object)
|
||||
|
||||
# form agglomerates
|
||||
for kind in self._kinds:
|
||||
d = {}
|
||||
for typ in self._typs:
|
||||
d[typ] = getattr(self, f"{kind}_{typ}")
|
||||
|
||||
setattr(self, kind, d)
|
||||
|
||||
def generate_indices(self, f, values=False):
|
||||
"""
|
||||
generate the indices
|
||||
if values is True , use the axis values
|
||||
is False, use the range
|
||||
"""
|
||||
axes = f.axes
|
||||
if values:
|
||||
axes = (list(range(len(ax))) for ax in axes)
|
||||
|
||||
return itertools.product(*axes)
|
||||
|
||||
def get_value(self, name, f, i, values=False):
|
||||
"""return the value for the location i"""
|
||||
# check against values
|
||||
if values:
|
||||
return f.values[i]
|
||||
|
||||
elif name == "iat":
|
||||
return f.iloc[i]
|
||||
else:
|
||||
assert name == "at"
|
||||
return f.loc[i]
|
||||
|
||||
def check_values(self, f, func, values=False):
|
||||
|
||||
if f is None:
|
||||
return
|
||||
axes = f.axes
|
||||
indices = itertools.product(*axes)
|
||||
|
||||
for i in indices:
|
||||
result = getattr(f, func)[i]
|
||||
|
||||
# check against values
|
||||
if values:
|
||||
expected = f.values[i]
|
||||
else:
|
||||
expected = f
|
||||
for a in reversed(i):
|
||||
expected = expected.__getitem__(a)
|
||||
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def check_result(self, method, key, typs=None, axes=None, fails=None):
|
||||
def _eq(axis, obj, key):
|
||||
"""compare equal for these 2 keys"""
|
||||
axified = _axify(obj, key, axis)
|
||||
try:
|
||||
getattr(obj, method).__getitem__(axified)
|
||||
|
||||
except (IndexError, TypeError, KeyError) as detail:
|
||||
|
||||
# if we are in fails, the ok, otherwise raise it
|
||||
if fails is not None:
|
||||
if isinstance(detail, fails):
|
||||
return
|
||||
raise
|
||||
|
||||
if typs is None:
|
||||
typs = self._typs
|
||||
|
||||
if axes is None:
|
||||
axes = [0, 1]
|
||||
else:
|
||||
assert axes in [0, 1]
|
||||
axes = [axes]
|
||||
|
||||
# check
|
||||
for kind in self._kinds:
|
||||
|
||||
d = getattr(self, kind)
|
||||
for ax in axes:
|
||||
for typ in typs:
|
||||
assert typ in self._typs
|
||||
|
||||
obj = d[typ]
|
||||
if ax < obj.ndim:
|
||||
_eq(axis=ax, obj=obj, key=key)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,175 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
@pytest.fixture
|
||||
def series_with_interval_index(self):
|
||||
return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl):
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
expected = ser.iloc[:3]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[:3])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[:2.5])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[0.1:2.5])
|
||||
if indexer_sl is tm.loc:
|
||||
tm.assert_series_equal(expected, ser.loc[-1:3])
|
||||
|
||||
expected = ser.iloc[1:4]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]])
|
||||
|
||||
expected = ser.iloc[2:5]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2])
|
||||
|
||||
@pytest.mark.parametrize("direction", ["increasing", "decreasing"])
|
||||
def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl):
|
||||
tpls = [(0, 1), (2, 3), (4, 5)]
|
||||
if direction == "decreasing":
|
||||
tpls = tpls[::-1]
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
ser = Series(list("abc"), idx)
|
||||
|
||||
for key, expected in zip(idx.left, ser):
|
||||
if idx.closed_left:
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
indexer_sl(ser)[key]
|
||||
|
||||
for key, expected in zip(idx.right, ser):
|
||||
if idx.closed_right:
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
indexer_sl(ser)[key]
|
||||
|
||||
for key, expected in zip(idx.mid, ser):
|
||||
assert indexer_sl(ser)[key] == expected
|
||||
|
||||
def test_getitem_non_matching(self, series_with_interval_index, indexer_sl):
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
# this is a departure from our current
|
||||
# indexing scheme, but simpler
|
||||
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
|
||||
indexer_sl(ser)[[-1, 3, 4, 5]]
|
||||
|
||||
with pytest.raises(KeyError, match=r"\[-1\] not in index"):
|
||||
indexer_sl(ser)[[-1, 3]]
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_loc_getitem_large_series(self):
|
||||
ser = Series(
|
||||
np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001))
|
||||
)
|
||||
|
||||
result1 = ser.loc[:80000]
|
||||
result2 = ser.loc[0:80000]
|
||||
result3 = ser.loc[0:80000:1]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
|
||||
def test_loc_getitem_frame(self):
|
||||
# CategoricalIndex with IntervalIndex categories
|
||||
df = DataFrame({"A": range(10)})
|
||||
ser = pd.cut(df.A, 5)
|
||||
df["B"] = ser
|
||||
df = df.set_index("B")
|
||||
|
||||
result = df.loc[4]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match="10"):
|
||||
df.loc[10]
|
||||
|
||||
# single list-like
|
||||
result = df.loc[[4]]
|
||||
expected = df.iloc[4:6]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# non-unique
|
||||
result = df.loc[[4, 5]]
|
||||
expected = df.take([4, 5, 4, 5])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"None of \[\[10\]\] are"):
|
||||
df.loc[[10]]
|
||||
|
||||
# partial missing
|
||||
with pytest.raises(KeyError, match=r"\[10\] not in index"):
|
||||
df.loc[[10, 4]]
|
||||
|
||||
def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl):
|
||||
# GH#41831
|
||||
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
key = index[:-1]
|
||||
|
||||
obj = frame_or_series(range(2), index=index)
|
||||
if frame_or_series is DataFrame and indexer_sl is tm.setitem:
|
||||
obj = obj.T
|
||||
|
||||
result = indexer_sl(obj)[key]
|
||||
expected = obj
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
class TestIntervalIndexInsideMultiIndex:
|
||||
def test_mi_intervalindex_slicing_with_scalar(self):
|
||||
# GH#27456
|
||||
ii = IntervalIndex.from_arrays(
|
||||
[0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP"
|
||||
)
|
||||
idx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]),
|
||||
pd.Index(
|
||||
["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"]
|
||||
),
|
||||
ii,
|
||||
]
|
||||
)
|
||||
|
||||
idx.names = ["Item", "RID", "MP"]
|
||||
df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]})
|
||||
df.index = idx
|
||||
|
||||
query_df = DataFrame(
|
||||
{
|
||||
"Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"],
|
||||
"RID": ["RID1", "RID1", "RID1", "RID2", "RID2"],
|
||||
"MP": [0.2, 1.5, 1.6, 11.1, 10.9],
|
||||
}
|
||||
)
|
||||
|
||||
query_df = query_df.sort_index()
|
||||
|
||||
idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP])
|
||||
query_df.index = idx
|
||||
result = df.value.loc[query_df.index]
|
||||
|
||||
# the IntervalIndex level is indexed with floats, which map to
|
||||
# the intervals containing them. Matching the behavior we would get
|
||||
# with _only_ an IntervalIndex, we get an IntervalIndex level back.
|
||||
sliced_level = ii.take([0, 1, 1, 3, 2])
|
||||
expected_index = pd.MultiIndex.from_arrays(
|
||||
[idx.get_level_values(0), idx.get_level_values(1), sliced_level]
|
||||
)
|
||||
expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,209 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
@pytest.fixture
|
||||
def series_with_interval_index(self):
|
||||
return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6)))
|
||||
|
||||
def test_loc_with_interval(self, series_with_interval_index, indexer_sl):
|
||||
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
expected = 0
|
||||
result = indexer_sl(ser)[Interval(0, 1)]
|
||||
assert result == expected
|
||||
|
||||
expected = ser.iloc[3:5]
|
||||
result = indexer_sl(ser)[[Interval(3, 4), Interval(4, 5)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# missing or not exact
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")):
|
||||
indexer_sl(ser)[Interval(3, 5, closed="left")]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
indexer_sl(ser)[Interval(3, 5)]
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("Interval(-2, 0, closed='right')")
|
||||
):
|
||||
indexer_sl(ser)[Interval(-2, 0)]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")):
|
||||
indexer_sl(ser)[Interval(5, 6)]
|
||||
|
||||
def test_loc_with_scalar(self, series_with_interval_index, indexer_sl):
|
||||
|
||||
# loc with single label / list of labels:
|
||||
# - Intervals: only exact matches
|
||||
# - scalars: those that contain it
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
assert indexer_sl(ser)[1] == 0
|
||||
assert indexer_sl(ser)[1.5] == 1
|
||||
assert indexer_sl(ser)[2] == 1
|
||||
|
||||
expected = ser.iloc[1:4]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]])
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]])
|
||||
|
||||
expected = ser.iloc[[1, 1, 2, 1]]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2, 2.5, 1.5]])
|
||||
|
||||
expected = ser.iloc[2:5]
|
||||
tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2])
|
||||
|
||||
def test_loc_with_slices(self, series_with_interval_index, indexer_sl):
|
||||
|
||||
# loc with slices:
|
||||
# - Interval objects: only works with exact matches
|
||||
# - scalars: only works for non-overlapping, monotonic intervals,
|
||||
# and start/stop select location based on the interval that
|
||||
# contains them:
|
||||
# (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop))
|
||||
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
# slice of interval
|
||||
|
||||
expected = ser.iloc[:3]
|
||||
result = indexer_sl(ser)[Interval(0, 1) : Interval(2, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser.iloc[3:]
|
||||
result = indexer_sl(ser)[Interval(3, 4) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
msg = "Interval objects are not currently supported"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
indexer_sl(ser)[Interval(3, 6) :]
|
||||
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
indexer_sl(ser)[Interval(3, 4, closed="left") :]
|
||||
|
||||
def test_slice_step_ne1(self, series_with_interval_index):
|
||||
# GH#31658 slice of scalar with step != 1
|
||||
ser = series_with_interval_index.copy()
|
||||
expected = ser.iloc[0:4:2]
|
||||
|
||||
result = ser[0:4:2]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result2 = ser[0:4][::2]
|
||||
tm.assert_series_equal(result2, expected)
|
||||
|
||||
def test_slice_float_start_stop(self, series_with_interval_index):
|
||||
# GH#31658 slicing with integers is positional, with floats is not
|
||||
# supported
|
||||
ser = series_with_interval_index.copy()
|
||||
|
||||
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[1.5:9.5:2]
|
||||
|
||||
def test_slice_interval_step(self, series_with_interval_index):
|
||||
# GH#31658 allows for integer step!=1, not Interval step
|
||||
ser = series_with_interval_index.copy()
|
||||
msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser[0 : 4 : Interval(0, 1)]
|
||||
|
||||
def test_loc_with_overlap(self, indexer_sl):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 5), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
# scalar
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[4]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = indexer_sl(ser)[[4]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
# interval
|
||||
expected = 0
|
||||
result = indexer_sl(ser)[Interval(1, 5)]
|
||||
result == expected
|
||||
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[[Interval(1, 5), Interval(3, 7)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")):
|
||||
indexer_sl(ser)[Interval(3, 5)]
|
||||
|
||||
msg = r"None of \[\[Interval\(3, 5, closed='right'\)\]\]"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
indexer_sl(ser)[[Interval(3, 5)]]
|
||||
|
||||
# slices with interval (only exact matches)
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[Interval(1, 5) : Interval(3, 7)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
msg = "'can only get slices from an IntervalIndex if bounds are"
|
||||
" non-overlapping and all monotonic increasing or decreasing'"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
indexer_sl(ser)[Interval(1, 6) : Interval(3, 8)]
|
||||
|
||||
if indexer_sl is tm.loc:
|
||||
# slices with scalar raise for overlapping intervals
|
||||
# TODO KeyError is the appropriate error?
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[1:4]
|
||||
|
||||
def test_non_unique(self, indexer_sl):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
result = indexer_sl(ser)[Interval(1, 3)]
|
||||
assert result == 0
|
||||
|
||||
result = indexer_sl(ser)[[Interval(1, 3)]]
|
||||
expected = ser.iloc[0:1]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_non_unique_moar(self, indexer_sl):
|
||||
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)])
|
||||
ser = Series(range(len(idx)), index=idx)
|
||||
|
||||
expected = ser.iloc[[0, 1]]
|
||||
result = indexer_sl(ser)[Interval(1, 3)]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser
|
||||
result = indexer_sl(ser)[Interval(1, 3) :]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
expected = ser.iloc[[0, 1]]
|
||||
result = indexer_sl(ser)[[Interval(1, 3)]]
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_loc_getitem_missing_key_error_message(
|
||||
self, frame_or_series, series_with_interval_index
|
||||
):
|
||||
# GH#27365
|
||||
ser = series_with_interval_index.copy()
|
||||
obj = frame_or_series(ser)
|
||||
with pytest.raises(KeyError, match=r"\[6\]"):
|
||||
obj.loc[[4, 5, 6]]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,73 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
def test_detect_chained_assignment():
|
||||
# Inplace ops, originally from:
|
||||
# https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
|
||||
a = [12, 23]
|
||||
b = [123, None]
|
||||
c = [1234, 2345]
|
||||
d = [12345, 23456]
|
||||
tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")]
|
||||
events = {
|
||||
("eyes", "left"): a,
|
||||
("eyes", "right"): b,
|
||||
("ears", "left"): c,
|
||||
("ears", "right"): d,
|
||||
}
|
||||
multiind = MultiIndex.from_tuples(tuples, names=["part", "side"])
|
||||
zed = DataFrame(events, index=["a", "b"], columns=multiind)
|
||||
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
zed["eyes"]["right"].fillna(value=555, inplace=True)
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
|
||||
def test_cache_updating():
|
||||
# 5216
|
||||
# make sure that we don't try to set a dead cache
|
||||
a = np.random.rand(10, 3)
|
||||
df = DataFrame(a, columns=["x", "y", "z"])
|
||||
tuples = [(i, j) for i in range(5) for j in range(2)]
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df.index = index
|
||||
|
||||
# setting via chained assignment
|
||||
# but actually works, since everything is a view
|
||||
df.loc[0]["z"].iloc[0] = 1.0
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 1
|
||||
|
||||
# correct setting
|
||||
df.loc[(0, 0), "z"] = 2
|
||||
result = df.loc[(0, 0), "z"]
|
||||
assert result == 2
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_indexer_caching():
|
||||
# GH5727
|
||||
# make sure that indexers are in the _internal_names_set
|
||||
n = 1000001
|
||||
arrays = (range(n), range(n))
|
||||
index = MultiIndex.from_tuples(zip(*arrays))
|
||||
s = Series(np.zeros(n), index=index)
|
||||
str(s)
|
||||
|
||||
# setitem
|
||||
expected = Series(np.ones(n), index=index)
|
||||
s = Series(np.zeros(n), index=index)
|
||||
s[s == 0] = 1
|
||||
tm.assert_series_equal(s, expected)
|
||||
@@ -0,0 +1,50 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Period,
|
||||
Series,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_multiindex_period_datetime():
|
||||
# GH4861, using datetime in period of multiindex raises exception
|
||||
|
||||
idx1 = Index(["a", "a", "a", "b", "b"])
|
||||
idx2 = period_range("2012-01", periods=len(idx1), freq="M")
|
||||
s = Series(np.random.randn(len(idx1)), [idx1, idx2])
|
||||
|
||||
# try Period as index
|
||||
expected = s.iloc[0]
|
||||
result = s.loc["a", Period("2012-01")]
|
||||
assert result == expected
|
||||
|
||||
# try datetime as index
|
||||
result = s.loc["a", datetime(2012, 1, 1)]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_multiindex_datetime_columns():
|
||||
# GH35015, using datetime as column indices raises exception
|
||||
|
||||
mi = MultiIndex.from_tuples(
|
||||
[(to_datetime("02/29/2020"), to_datetime("03/01/2020"))], names=["a", "b"]
|
||||
)
|
||||
|
||||
df = DataFrame([], columns=mi)
|
||||
|
||||
expected_df = DataFrame(
|
||||
[],
|
||||
columns=MultiIndex.from_arrays(
|
||||
[[to_datetime("02/29/2020")], [to_datetime("03/01/2020")]], names=["a", "b"]
|
||||
),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
||||
@@ -0,0 +1,394 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of Series with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"access_method",
|
||||
[lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"level1_value, expected",
|
||||
[(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))],
|
||||
)
|
||||
def test_series_getitem_multiindex(access_method, level1_value, expected):
|
||||
|
||||
# GH 6018
|
||||
# series regression getitem with a multi-index
|
||||
|
||||
mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)], names=["A", "B"])
|
||||
ser = Series([1, 2, 3], index=mi)
|
||||
expected.index.name = "A"
|
||||
|
||||
result = access_method(ser, level1_value)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level0_value", ["D", "A"])
|
||||
def test_series_getitem_duplicates_multiindex(level0_value):
|
||||
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
|
||||
# the appropriate error, only in PY3 of course!
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
|
||||
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||||
names=["tag", "day"],
|
||||
)
|
||||
arr = np.random.randn(len(index), 1)
|
||||
df = DataFrame(arr, index=index, columns=["val"])
|
||||
|
||||
# confirm indexing on missing value raises KeyError
|
||||
if level0_value != "A":
|
||||
with pytest.raises(KeyError, match=r"^'A'$"):
|
||||
df.val["A"]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'X'$"):
|
||||
df.val["X"]
|
||||
|
||||
result = df.val[level0_value]
|
||||
expected = Series(
|
||||
arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.reindex(s.index[42:65])
|
||||
expected.index = expected.index.droplevel(0).droplevel(0)
|
||||
|
||||
result = indexer_sl(s)[2000, 3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_getitem_returns_scalar(
|
||||
multiindex_year_month_day_dataframe_random_data, indexer_sl
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = s.iloc[49]
|
||||
|
||||
result = indexer_sl(s)[2000, 3, 10]
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error,expected_error_msg",
|
||||
[
|
||||
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"),
|
||||
(lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"),
|
||||
(lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s)
|
||||
(lambda s: s[len(s)], KeyError, ""), # match should include len(s)
|
||||
(
|
||||
lambda s: s.iloc[len(s)],
|
||||
IndexError,
|
||||
"single positional indexer is out-of-bounds",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_series_getitem_indexing_errors(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
indexer,
|
||||
expected_error,
|
||||
expected_error_msg,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
with pytest.raises(expected_error, match=expected_error_msg):
|
||||
indexer(s)
|
||||
|
||||
|
||||
def test_series_getitem_corner_generator(
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
s = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
result = s[(x > 0 for x in s)]
|
||||
expected = s[s > 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_getitem_simple(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.values[:, 0]
|
||||
result = df["foo", "one"].values
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_error_msg",
|
||||
[
|
||||
(lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"),
|
||||
(lambda df: df["foobar"], r"^'foobar'$"),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_simple_key_error(
|
||||
multiindex_dataframe_random_data, indexer, expected_error_msg
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
with pytest.raises(KeyError, match=expected_error_msg):
|
||||
indexer(df)
|
||||
|
||||
|
||||
def test_frame_getitem_multicolumn_empty_level():
|
||||
df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]})
|
||||
df.columns = [
|
||||
["level1 item1", "level1 item2"],
|
||||
["", "level2 item2"],
|
||||
["level3 item1", "level3 item2"],
|
||||
]
|
||||
|
||||
result = df["level1 item1"]
|
||||
expected = DataFrame(
|
||||
[["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected_slice",
|
||||
[
|
||||
(lambda df: df["foo"], slice(3)),
|
||||
(lambda df: df["bar"], slice(3, 5)),
|
||||
(lambda df: df.loc[:, "bar"], slice(3, 5)),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_toplevel(
|
||||
multiindex_dataframe_random_data, indexer, expected_slice
|
||||
):
|
||||
df = multiindex_dataframe_random_data.T
|
||||
expected = df.reindex(columns=df.columns[expected_slice])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mixed_depth_get():
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.randn(4, 6), columns=index)
|
||||
|
||||
result = df["a"]
|
||||
expected = df["a", "", ""].rename("a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["routine1", "result1"]
|
||||
expected = df["routine1", "result1", ""]
|
||||
expected = expected.rename(("routine1", "result1"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_getitem_nan_multiindex(nulls_fixture):
|
||||
# GH#29751
|
||||
# loc on a multiindex containing nan values
|
||||
n = nulls_fixture # for code readability
|
||||
cols = ["a", "b", "c"]
|
||||
df = DataFrame(
|
||||
[[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]],
|
||||
columns=cols,
|
||||
).set_index(["a", "b"])
|
||||
df["c"] = df["c"].astype("int64")
|
||||
|
||||
idx = (21, n)
|
||||
result = df.loc[:idx]
|
||||
expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[idx:]
|
||||
expected = DataFrame(
|
||||
[[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols
|
||||
).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
idx1, idx2 = (21, n), (31, n)
|
||||
result = df.loc[idx1:idx2]
|
||||
expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"])
|
||||
expected["c"] = expected["c"].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer,expected",
|
||||
[
|
||||
(
|
||||
(["b"], ["bar", np.nan]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2, 3], [5, 6]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["a", "b"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("b", "bar"), ("b", np.nan)]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2, 3], [5, 6]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar"), ("b", np.nan)]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"], ["bar"]),
|
||||
(
|
||||
DataFrame(
|
||||
[[2], [5]],
|
||||
columns=MultiIndex.from_tuples([("b", "bar")]),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(
|
||||
(["b"], [np.nan]),
|
||||
(
|
||||
DataFrame(
|
||||
[[3], [6]],
|
||||
columns=MultiIndex(
|
||||
codes=[[1], [-1]], levels=[["a", "b"], ["bar", "foo"]]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
),
|
||||
),
|
||||
(("b", np.nan), Series([3, 6], dtype="int64", name=("b", np.nan))),
|
||||
],
|
||||
)
|
||||
def test_frame_getitem_nan_cols_multiindex(
|
||||
indexer,
|
||||
expected,
|
||||
nulls_fixture,
|
||||
):
|
||||
# Slicing MultiIndex including levels with nan values, for more information
|
||||
# see GH#25154
|
||||
df = DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("b", "bar"), ("b", nulls_fixture)]
|
||||
),
|
||||
dtype="int64",
|
||||
)
|
||||
|
||||
result = df.loc[:, indexer]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# test indexing of DataFrame with multi-level Index with duplicates
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dataframe_with_duplicate_index():
|
||||
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
|
||||
data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]]
|
||||
index = ["h1", "h3", "h5"]
|
||||
columns = MultiIndex(
|
||||
levels=[["A", "B"], ["A1", "A2", "B1", "B2"]],
|
||||
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
|
||||
names=["main", "sub"],
|
||||
)
|
||||
return DataFrame(data, index=index, columns=columns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]]
|
||||
)
|
||||
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
|
||||
# GH 4145
|
||||
df = dataframe_with_duplicate_index
|
||||
index = Index(["h1", "h3", "h5"])
|
||||
columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"])
|
||||
expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
|
||||
# GH 4146, not returning a block manager when selecting a unique index
|
||||
# from a duplicate index
|
||||
# as of 4879, this returns a Series (which is similar to what happens
|
||||
# with a non-unique)
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1")
|
||||
result = df["A"]["A1"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
|
||||
# selecting a non_unique from the 2nd level
|
||||
df = dataframe_with_duplicate_index
|
||||
expected = DataFrame(
|
||||
[["d", 4, 4], ["e", 5, 5]],
|
||||
index=Index(["B2", "B2"], name="sub"),
|
||||
columns=["h1", "h3", "h5"],
|
||||
).T
|
||||
result = df["A"]["B2"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_mi_empty_slice():
|
||||
# GH 15454
|
||||
df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]]))
|
||||
result = df[[]]
|
||||
expected = DataFrame(
|
||||
index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_empty_multiindex():
|
||||
# GH#36936
|
||||
arrays = [["a", "a", "b", "a"], ["a", "a", "b", "b"]]
|
||||
index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
|
||||
df = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
|
||||
|
||||
# loc on empty multiindex == loc with False mask
|
||||
empty_multiindex = df.loc[df.loc[:, "value"] == 0, :].index
|
||||
result = df.loc[empty_multiindex, :]
|
||||
expected = df.loc[[False] * len(df.index), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# replacing value with loc on empty multiindex
|
||||
df.loc[df.loc[df.loc[:, "value"] == 0].index, "value"] = 5
|
||||
result = df
|
||||
expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,171 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_multiindex_dataframe():
|
||||
"""
|
||||
Factory function to create simple 3 x 3 dataframe with
|
||||
both columns and row MultiIndex using supplied data or
|
||||
random data by default.
|
||||
"""
|
||||
|
||||
data = np.random.randn(3, 3)
|
||||
return DataFrame(
|
||||
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
(
|
||||
lambda df: df.iloc[0],
|
||||
lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[2],
|
||||
lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)),
|
||||
),
|
||||
(
|
||||
lambda df: df.iloc[:, 2],
|
||||
lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
arr = df.values
|
||||
result = indexer(df)
|
||||
expected = expected(arr)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_dataframe(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
result = df.iloc[[0, 1]]
|
||||
expected = df.xs(4, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_returns_scalar(simple_multiindex_dataframe):
|
||||
df = simple_multiindex_dataframe
|
||||
arr = df.values
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iloc_getitem_multiple_items():
|
||||
# GH 5528
|
||||
tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]])
|
||||
index = MultiIndex.from_tuples(tup)
|
||||
df = DataFrame(np.random.randn(4, 4), index=index)
|
||||
result = df.iloc[[2, 3]]
|
||||
expected = df.xs("b", drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_getitem_labels():
|
||||
# this is basically regular indexing
|
||||
arr = np.random.randn(4, 3)
|
||||
df = DataFrame(
|
||||
arr,
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]],
|
||||
)
|
||||
result = df.iloc[2, 2]
|
||||
expected = arr[2, 2]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_frame_getitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[:4]
|
||||
expected = df[:4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_frame_setitem_slice(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
df.iloc[:4] = 0
|
||||
|
||||
assert (df.values[:4] == 0).all()
|
||||
assert (df.values[4:] != 0).all()
|
||||
|
||||
|
||||
def test_indexing_ambiguity_bug_1678():
|
||||
# GH 1678
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")]
|
||||
)
|
||||
index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)])
|
||||
|
||||
df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns)
|
||||
|
||||
result = df.iloc[:, 1]
|
||||
expected = df.loc[:, ("Ohio", "Red")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_iloc_integer_locations():
|
||||
# GH 13797
|
||||
data = [
|
||||
["str00", "str01"],
|
||||
["str10", "str11"],
|
||||
["str20", "srt21"],
|
||||
["str30", "str31"],
|
||||
["str40", "str41"],
|
||||
]
|
||||
|
||||
index = MultiIndex.from_tuples(
|
||||
[("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")]
|
||||
)
|
||||
|
||||
expected = DataFrame(data)
|
||||
df = DataFrame(data, index=index)
|
||||
|
||||
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, indexes, values, expected_k",
|
||||
[
|
||||
# test without indexer value in first level of MultiIndex
|
||||
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
||||
# test like code sample 1 in the issue
|
||||
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]),
|
||||
# test like code sample 2 in the issue
|
||||
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
||||
# test like code sample 3 in the issue
|
||||
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]),
|
||||
],
|
||||
)
|
||||
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
|
||||
# GH17148
|
||||
df = DataFrame(data=data, columns=["i", "j", "k"])
|
||||
df = df.set_index(["i", "j"])
|
||||
|
||||
series = df.k.copy()
|
||||
for i, v in zip(indexes, values):
|
||||
series.iloc[i] += v
|
||||
|
||||
df["k"] = expected_k
|
||||
expected = df.k
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
def test_getitem_iloc(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.iloc[2]
|
||||
expected = df.xs(df.index[2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,97 @@
|
||||
from typing import (
|
||||
Any,
|
||||
List,
|
||||
)
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
m = 50
|
||||
n = 1000
|
||||
cols = ["jim", "joe", "jolie", "joline", "jolia"]
|
||||
|
||||
vals: List[Any] = [
|
||||
np.random.randint(0, 10, n),
|
||||
np.random.choice(list("abcdefghij"), n),
|
||||
np.random.choice(pd.date_range("20141009", periods=10).tolist(), n),
|
||||
np.random.choice(list("ZYXWVUTSRQ"), n),
|
||||
np.random.randn(n),
|
||||
]
|
||||
vals = list(map(tuple, zip(*vals)))
|
||||
|
||||
# bunch of keys for testing
|
||||
keys: List[Any] = [
|
||||
np.random.randint(0, 11, m),
|
||||
np.random.choice(list("abcdefghijk"), m),
|
||||
np.random.choice(pd.date_range("20141009", periods=11).tolist(), m),
|
||||
np.random.choice(list("ZYXWVUTSRQP"), m),
|
||||
]
|
||||
keys = list(map(tuple, zip(*keys)))
|
||||
keys += list(map(lambda t: t[:-1], vals[:: n // m]))
|
||||
|
||||
|
||||
# covers both unique index and non-unique index
|
||||
df = DataFrame(vals, columns=cols)
|
||||
a = pd.concat([df, df])
|
||||
b = df.drop_duplicates(subset=cols[:-1])
|
||||
|
||||
|
||||
def validate(mi, df, key):
|
||||
# check indexing into a multi-index before & past the lexsort depth
|
||||
|
||||
mask = np.ones(len(df)).astype("bool")
|
||||
|
||||
# test for all partials of this key
|
||||
for i, k in enumerate(key):
|
||||
mask &= df.iloc[:, i] == k
|
||||
|
||||
if not mask.any():
|
||||
assert key[: i + 1] not in mi.index
|
||||
continue
|
||||
|
||||
assert key[: i + 1] in mi.index
|
||||
right = df[mask].copy()
|
||||
|
||||
if i + 1 != len(key): # partial key
|
||||
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
|
||||
assert return_value is None
|
||||
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
||||
|
||||
else: # full key
|
||||
return_value = right.set_index(cols[:-1], inplace=True)
|
||||
assert return_value is None
|
||||
if len(right) == 1: # single hit
|
||||
right = Series(
|
||||
right["jolia"].values, name=right.index[0], index=["jolia"]
|
||||
)
|
||||
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
|
||||
else: # multi hit
|
||||
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
|
||||
@pytest.mark.parametrize("key", keys)
|
||||
@pytest.mark.parametrize("frame", [a, b])
|
||||
def test_multiindex_get_loc(lexsort_depth, key, frame):
|
||||
# GH7724, GH2646
|
||||
|
||||
with warnings.catch_warnings(record=True):
|
||||
if lexsort_depth == 0:
|
||||
df = frame.copy()
|
||||
else:
|
||||
df = frame.sort_values(by=cols[:lexsort_depth])
|
||||
|
||||
mi = df.set_index(cols[:-1])
|
||||
assert not mi.index._lexsort_depth < lexsort_depth
|
||||
validate(mi, df, key)
|
||||
@@ -0,0 +1,945 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexing import IndexingError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def single_level_multiindex():
|
||||
"""single level MultiIndex"""
|
||||
return MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_random_data_integer_multi_index():
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
return DataFrame(np.random.randn(6, 2), index=index)
|
||||
|
||||
|
||||
class TestMultiIndexLoc:
|
||||
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
frame.loc[("bar", "two"), "B"] = 5
|
||||
assert frame.loc[("bar", "two"), "B"] == 5
|
||||
|
||||
# with integer labels
|
||||
df = frame.copy()
|
||||
df.columns = list(range(3))
|
||||
df.loc[("bar", "two"), 1] = 7
|
||||
assert df.loc[("bar", "two"), 1] == 7
|
||||
|
||||
def test_loc_getitem_general(self):
|
||||
|
||||
# GH#2817
|
||||
data = {
|
||||
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||||
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||||
"year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012},
|
||||
}
|
||||
df = DataFrame(data).set_index(keys=["col", "year"])
|
||||
key = 4.0, 2012
|
||||
|
||||
# emits a PerformanceWarning, ok
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||||
|
||||
# this is ok
|
||||
return_value = df.sort_index(inplace=True)
|
||||
assert return_value is None
|
||||
res = df.loc[key]
|
||||
|
||||
# col has float dtype, result should be Float64Index
|
||||
index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"])
|
||||
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
def test_loc_getitem_multiindex_missing_label_raises(self):
|
||||
# GH#21593
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
def test_loc_getitem_list_of_tuples_with_multiindex(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
ser = multiindex_year_month_day_dataframe_random_data["A"]
|
||||
expected = ser.reindex(ser.index[49:51])
|
||||
result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_series(self):
|
||||
# GH14730
|
||||
# passing a series as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = Series([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = x.loc[[1, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH15424
|
||||
y1 = Series([1, 3], index=[1, 2])
|
||||
result = x.loc[y1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
empty = Series(data=[], dtype=np.float64)
|
||||
expected = Series(
|
||||
[],
|
||||
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_array(self):
|
||||
# GH15434
|
||||
# passing an array as a key with a MultiIndex
|
||||
index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]])
|
||||
x = Series(index=index, data=range(9), dtype=np.float64)
|
||||
y = np.array([1, 3])
|
||||
expected = Series(
|
||||
data=[0, 1, 2, 6, 7, 8],
|
||||
index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]),
|
||||
dtype=np.float64,
|
||||
)
|
||||
result = x.loc[y]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# empty array:
|
||||
empty = np.array([])
|
||||
expected = Series(
|
||||
[],
|
||||
index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64),
|
||||
dtype="float64",
|
||||
)
|
||||
result = x.loc[empty]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0-dim array (scalar):
|
||||
scalar = np.int64(1)
|
||||
expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64)
|
||||
result = x.loc[scalar]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_labels(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[["i", "i", "j"], ["A", "A", "B"]],
|
||||
index=[["i", "i", "j"], ["X", "X", "Y"]],
|
||||
)
|
||||
|
||||
# the first 2 rows
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc["i"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# 2nd (last) column
|
||||
expected = df.iloc[:, [2]].droplevel(0, axis=1)
|
||||
result = df.loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# bottom right corner
|
||||
expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1)
|
||||
result = df.loc["j"].loc[:, "j"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with a tuple
|
||||
expected = df.iloc[[0, 1]]
|
||||
result = df.loc[("i", "X")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_ints(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
expected = df.iloc[[0, 1]].droplevel(0)
|
||||
result = df.loc[4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_missing_label_raises(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
df.loc[2]
|
||||
|
||||
@pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])])
|
||||
def test_loc_multiindex_list_missing_label(self, key, pos):
|
||||
# GH 27148 - lists with missing labels _do_ raise
|
||||
df = DataFrame(
|
||||
np.random.randn(3, 3),
|
||||
columns=[[2, 2, 4], [6, 8, 10]],
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[key]
|
||||
|
||||
def test_loc_multiindex_too_many_dims_raises(self):
|
||||
# GH 14885
|
||||
s = Series(
|
||||
range(8),
|
||||
index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
|
||||
)
|
||||
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"):
|
||||
s.loc["a", "b"]
|
||||
with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"):
|
||||
s.loc["a", "d", "g"]
|
||||
with pytest.raises(IndexingError, match="Too many indexers"):
|
||||
s.loc["a", "d", "g", "j"]
|
||||
|
||||
def test_loc_multiindex_indexer_none(self):
|
||||
|
||||
# GH6788
|
||||
# multi-index indexer is None (meaning take all)
|
||||
attributes = ["Attribute" + str(i) for i in range(1)]
|
||||
attribute_values = ["Value" + str(i) for i in range(5)]
|
||||
|
||||
index = MultiIndex.from_product([attributes, attribute_values])
|
||||
df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
|
||||
df = DataFrame(df, columns=index)
|
||||
result = df[attributes]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# GH 7349
|
||||
# loc with a multi-index seems to be doing fallback
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(-1, 1),
|
||||
index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]),
|
||||
)
|
||||
|
||||
expected = df.loc[([1, 2],), :]
|
||||
result = df.loc[[1, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_incomplete(self):
|
||||
|
||||
# GH 7399
|
||||
# incomplete indexers
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.loc[:, "a":"c"]
|
||||
|
||||
result = s.loc[0:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[:4, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.loc[0:, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 7400
|
||||
# multiindexer getitem with list of indexers skips wrong element
|
||||
s = Series(
|
||||
np.arange(15, dtype="int64"),
|
||||
MultiIndex.from_product([range(5), ["a", "b", "c"]]),
|
||||
)
|
||||
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
|
||||
result = s.loc[2:4:2, "a":"c"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_get_loc_single_level(self, single_level_multiindex):
|
||||
single_level = single_level_multiindex
|
||||
s = Series(np.random.randn(len(single_level)), index=single_level)
|
||||
for k in single_level.values:
|
||||
s[k]
|
||||
|
||||
def test_loc_getitem_int_slice(self):
|
||||
# GH 3053
|
||||
# loc should treat integer slices like label slices
|
||||
|
||||
index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]])
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[6:8, :]
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]])
|
||||
df = DataFrame(np.random.randn(6, 6), index, index)
|
||||
result = df.loc[20:30, :]
|
||||
expected = df.iloc[2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# doc examples
|
||||
result = df.loc[10, :]
|
||||
expected = df.iloc[0:2]
|
||||
expected.index = ["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, 10]
|
||||
expected = df[10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index)
|
||||
)
|
||||
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||||
# GH #19686
|
||||
# .loc should work with nested indexers which can be
|
||||
# any list-like objects (see `is_list_like` (`pandas.api.types`)) or slices
|
||||
|
||||
def convert_nested_indexer(indexer_type, keys):
|
||||
if indexer_type == np.ndarray:
|
||||
return np.array(keys)
|
||||
if indexer_type == slice:
|
||||
return slice(*keys)
|
||||
return indexer_type(keys)
|
||||
|
||||
a = [10, 20, 30]
|
||||
b = [1, 2, 3]
|
||||
index = MultiIndex.from_product([a, b])
|
||||
df = DataFrame(
|
||||
np.arange(len(index), dtype="int64"), index=index, columns=["Data"]
|
||||
)
|
||||
|
||||
keys = ([10, 20], [2, 3])
|
||||
types = (indexer_type_1, indexer_type_2)
|
||||
|
||||
# check indexers with all the combinations of nested objects
|
||||
# of all the valid types
|
||||
indexer = tuple(
|
||||
convert_nested_indexer(indexer_type, k)
|
||||
for indexer_type, k in zip(types, keys)
|
||||
)
|
||||
if indexer_type_1 is set or indexer_type_2 is set:
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df.loc[indexer, "Data"]
|
||||
else:
|
||||
result = df.loc[indexer, "Data"]
|
||||
expected = Series(
|
||||
[1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys)
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_loc_one_dimensional_tuple(self, frame_or_series):
|
||||
# GH#37711
|
||||
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
|
||||
obj = frame_or_series([1, 2], index=mi)
|
||||
obj.loc[("a",)] = 0
|
||||
expected = frame_or_series([0, 2], index=mi)
|
||||
tm.assert_equal(obj, expected)
|
||||
|
||||
@pytest.mark.parametrize("indexer", [("a",), ("a")])
|
||||
def test_multiindex_one_dimensional_tuple_columns(self, indexer):
|
||||
# GH#37711
|
||||
mi = MultiIndex.from_tuples([("a", "A"), ("b", "A")])
|
||||
obj = DataFrame([1, 2], index=mi)
|
||||
obj.loc[indexer, :] = 0
|
||||
expected = DataFrame([0, 2], index=mi)
|
||||
tm.assert_frame_equal(obj, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, exp_value", [(slice(None), 1.0), ((1, 2), np.nan)]
|
||||
)
|
||||
def test_multiindex_setitem_columns_enlarging(self, indexer, exp_value):
|
||||
# GH#39147
|
||||
mi = MultiIndex.from_tuples([(1, 2), (3, 4)])
|
||||
df = DataFrame([[1, 2], [3, 4]], index=mi, columns=["a", "b"])
|
||||
df.loc[indexer, ["c", "d"]] = 1.0
|
||||
expected = DataFrame(
|
||||
[[1, 2, 1.0, 1.0], [3, 4, exp_value, exp_value]],
|
||||
index=mi,
|
||||
columns=["a", "b", "c", "d"],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_sorted_multiindex_after_union(self):
|
||||
# GH#44752
|
||||
midx = MultiIndex.from_product(
|
||||
[pd.date_range("20110101", periods=2), Index(["a", "b"])]
|
||||
)
|
||||
ser1 = Series(1, index=midx)
|
||||
ser2 = Series(1, index=midx[:2])
|
||||
df = pd.concat([ser1, ser2], axis=1)
|
||||
expected = df.copy()
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame({0: ser1, 1: ser2})
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = pd.concat([ser1, ser2.reindex(ser1.index)], axis=1)
|
||||
result = df.loc["2011-01-01":"2011-01-02"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, pos",
|
||||
[
|
||||
([], []), # empty ok
|
||||
(["A"], slice(3)),
|
||||
(["A", "D"], []), # "D" isn't present -> raise
|
||||
(["D", "E"], []), # no values found -> raise
|
||||
(["D"], []), # same, with single item list: GH 27148
|
||||
(pd.IndexSlice[:, ["foo"]], slice(2, None, 3)),
|
||||
(pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)),
|
||||
],
|
||||
)
|
||||
def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
|
||||
# GH 7866
|
||||
# multi-index slicing with missing indexers
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
|
||||
)
|
||||
ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
|
||||
expected = ser.iloc[pos]
|
||||
|
||||
if expected.size == 0 and indexer != []:
|
||||
with pytest.raises(KeyError, match=str(indexer)):
|
||||
ser.loc[indexer]
|
||||
else:
|
||||
warn = None
|
||||
msg = "MultiIndex with a nested sequence"
|
||||
if indexer == (slice(None), ["foo", "bah"]):
|
||||
# "bah" is not in idx.levels[1], so is ignored, will raise KeyError
|
||||
warn = FutureWarning
|
||||
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = ser.loc[indexer]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])])
|
||||
def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
|
||||
# GH 8737
|
||||
# empty indexer
|
||||
multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"]))
|
||||
df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
|
||||
df = df.sort_index(level=0, axis=1)
|
||||
|
||||
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
|
||||
result = df.loc[:, columns_indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_duplicates_multiindex_non_scalar_type_object():
|
||||
# regression from < 0.14.0
|
||||
# GH 7914
|
||||
df = DataFrame(
|
||||
[[np.mean, np.median], ["mean", "median"]],
|
||||
columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]),
|
||||
index=["function", "name"],
|
||||
)
|
||||
result = df.loc["function", ("functs", "mean")]
|
||||
expected = np.mean
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_getitem_tuple_plus_slice():
|
||||
# GH 671
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.arange(10),
|
||||
"b": np.arange(10),
|
||||
"c": np.random.randn(10),
|
||||
"d": np.random.randn(10),
|
||||
}
|
||||
).set_index(["a", "b"])
|
||||
expected = df.loc[0, 0]
|
||||
result = df.loc[(0, 0), :]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
result = df.loc[1]
|
||||
expected = df[-3:]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index):
|
||||
df = frame_random_data_integer_multi_index
|
||||
with pytest.raises(KeyError, match=r"^3$"):
|
||||
df.loc[3]
|
||||
|
||||
|
||||
def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
|
||||
# test setup - check key not in dataframe
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"):
|
||||
df.loc[("bar", "three"), "B"]
|
||||
|
||||
# in theory should be inserting in a sorted space????
|
||||
df.loc[("bar", "three"), "B"] = 0
|
||||
expected = 0
|
||||
result = df.sort_index().loc[("bar", "three"), "B"]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_loc_setitem_single_column_slice():
|
||||
# case from https://github.com/pandas-dev/pandas/issues/27841
|
||||
df = DataFrame(
|
||||
"string",
|
||||
index=list("abcd"),
|
||||
columns=MultiIndex.from_product([["Main"], ("another", "one")]),
|
||||
)
|
||||
df["labels"] = "a"
|
||||
df.loc[:, "labels"] = df.index
|
||||
tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index))
|
||||
|
||||
# test with non-object block
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
index=range(4),
|
||||
columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]),
|
||||
)
|
||||
expected = df.copy()
|
||||
df.loc[:, "B"] = np.arange(4)
|
||||
expected.iloc[:, 2] = np.arange(4)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_loc_nan_multiindex():
|
||||
# GH 5286
|
||||
tups = [
|
||||
("Good Things", "C", np.nan),
|
||||
("Good Things", "R", np.nan),
|
||||
("Bad Things", "C", np.nan),
|
||||
("Bad Things", "T", np.nan),
|
||||
("Okay Things", "N", "B"),
|
||||
("Okay Things", "N", "D"),
|
||||
("Okay Things", "B", np.nan),
|
||||
("Okay Things", "D", np.nan),
|
||||
]
|
||||
df = DataFrame(
|
||||
np.ones((8, 4)),
|
||||
columns=Index(["d1", "d2", "d3", "d4"]),
|
||||
index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]),
|
||||
)
|
||||
result = df.loc["Good Things"].loc["C"]
|
||||
expected = DataFrame(
|
||||
np.ones((1, 4)),
|
||||
index=Index([np.nan], dtype="object", name="u3"),
|
||||
columns=Index(["d1", "d2", "d3", "d4"], dtype="object"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_period_string_indexing():
|
||||
# GH 9892
|
||||
a = pd.period_range("2013Q1", "2013Q4", freq="Q")
|
||||
i = (1111, 2222, 3333)
|
||||
idx = MultiIndex.from_product((a, i), names=("Period", "CVR"))
|
||||
df = DataFrame(
|
||||
index=idx,
|
||||
columns=(
|
||||
"OMS",
|
||||
"OMK",
|
||||
"RES",
|
||||
"DRIFT_IND",
|
||||
"OEVRIG_IND",
|
||||
"FIN_IND",
|
||||
"VARE_UD",
|
||||
"LOEN_UD",
|
||||
"FIN_UD",
|
||||
),
|
||||
)
|
||||
result = df.loc[("2013Q1", 1111), "OMS"]
|
||||
|
||||
alt = df.loc[(a[0], 1111), "OMS"]
|
||||
assert np.isnan(alt)
|
||||
|
||||
# Because the resolution of the string matches, it is an exact lookup,
|
||||
# not a slice
|
||||
assert np.isnan(result)
|
||||
|
||||
# TODO: should it figure this out?
|
||||
# alt = df.loc["2013Q1", 1111, "OMS"]
|
||||
# assert np.isnan(alt)
|
||||
|
||||
|
||||
def test_loc_datetime_mask_slicing():
|
||||
# GH 16699
|
||||
dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"])
|
||||
m_idx = MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"])
|
||||
df = DataFrame(
|
||||
data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"]
|
||||
)
|
||||
result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"]
|
||||
expected = Series(
|
||||
[3],
|
||||
name="C1",
|
||||
index=MultiIndex.from_tuples(
|
||||
[(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))],
|
||||
names=["Idx1", "Idx2"],
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_datetime_series_tuple_slicing():
|
||||
# https://github.com/pandas-dev/pandas/issues/35858
|
||||
date = pd.Timestamp("2000")
|
||||
ser = Series(
|
||||
1,
|
||||
index=MultiIndex.from_tuples([("a", date)], names=["a", "b"]),
|
||||
name="c",
|
||||
)
|
||||
result = ser.loc[:, [date]]
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
def test_loc_with_mi_indexer():
|
||||
# https://github.com/pandas-dev/pandas/issues/35351
|
||||
df = DataFrame(
|
||||
data=[["a", 1], ["a", 0], ["b", 1], ["c", 2]],
|
||||
index=MultiIndex.from_tuples(
|
||||
[(0, 1), (1, 0), (1, 1), (1, 1)], names=["index", "date"]
|
||||
),
|
||||
columns=["author", "price"],
|
||||
)
|
||||
idx = MultiIndex.from_tuples([(0, 1), (1, 1)], names=["index", "date"])
|
||||
result = df.loc[idx, :]
|
||||
expected = DataFrame(
|
||||
[["a", 1], ["b", 1], ["c", 2]],
|
||||
index=MultiIndex.from_tuples([(0, 1), (1, 1), (1, 1)], names=["index", "date"]),
|
||||
columns=["author", "price"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_mi_with_level1_named_0():
|
||||
# GH#37194
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
|
||||
ser = Series(range(3), index=dti)
|
||||
df = ser.to_frame()
|
||||
df[1] = dti
|
||||
|
||||
df2 = df.set_index(0, append=True)
|
||||
assert df2.index.names == (None, 0)
|
||||
df2.index.get_loc(dti[0]) # smoke test
|
||||
|
||||
result = df2.loc[dti[0]]
|
||||
expected = df2.iloc[[0]].droplevel(None)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser2 = df2[1]
|
||||
assert ser2.index.names == (None, 0)
|
||||
|
||||
result = ser2.loc[dti[0]]
|
||||
expected = ser2.iloc[[0]].droplevel(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_str_slice(datapath):
|
||||
# GH#15928
|
||||
path = datapath("reshape", "merge", "data", "quotes2.csv")
|
||||
df = pd.read_csv(path, parse_dates=["time"])
|
||||
df2 = df.set_index(["ticker", "time"]).sort_index()
|
||||
|
||||
res = df2.loc[("AAPL", slice("2016-05-25 13:30:00")), :].droplevel(0)
|
||||
expected = df2.loc["AAPL"].loc[slice("2016-05-25 13:30:00"), :]
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
|
||||
def test_3levels_leading_period_index():
|
||||
# GH#24091
|
||||
pi = pd.PeriodIndex(
|
||||
["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
|
||||
name="datetime",
|
||||
freq="B",
|
||||
)
|
||||
lev2 = ["A", "A", "Z", "W"]
|
||||
lev3 = ["B", "C", "Q", "F"]
|
||||
mi = MultiIndex.from_arrays([pi, lev2, lev3])
|
||||
|
||||
ser = Series(range(4), index=mi, dtype=np.float64)
|
||||
result = ser.loc[(pi[0], "A", "B")]
|
||||
assert result == 0.0
|
||||
|
||||
|
||||
class TestKeyErrorsWithMultiIndex:
|
||||
def test_missing_keys_raises_keyerror(self):
|
||||
# GH#27420 KeyError, not TypeError
|
||||
df = DataFrame(np.arange(12).reshape(4, 3), columns=["A", "B", "C"])
|
||||
df2 = df.set_index(["A", "B"])
|
||||
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df2.loc[(1, 6)]
|
||||
|
||||
def test_missing_key_raises_keyerror2(self):
|
||||
# GH#21168 KeyError, not "IndexingError: Too many indexers"
|
||||
ser = Series(-1, index=MultiIndex.from_product([[0, 1]] * 2))
|
||||
|
||||
with pytest.raises(KeyError, match=r"\(0, 3\)"):
|
||||
ser.loc[0, 3]
|
||||
|
||||
def test_missing_key_combination(self):
|
||||
# GH: 19556
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
np.array(["a", "a", "b", "b"]),
|
||||
np.array(["1", "2", "2", "3"]),
|
||||
np.array(["c", "d", "c", "d"]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
df = DataFrame(np.random.rand(4, 3), index=mi)
|
||||
msg = r"\('b', '1', slice\(None, None, None\)\)"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[("b", "1", slice(None)), :]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.index.get_locs(("b", "1", slice(None)))
|
||||
with pytest.raises(KeyError, match=r"\('b', '1'\)"):
|
||||
df.loc[("b", "1"), :]
|
||||
|
||||
|
||||
def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_data):
|
||||
df = multiindex_year_month_day_dataframe_random_data
|
||||
ser = df["A"]
|
||||
result = ser[2000, 5]
|
||||
expected = df.loc[2000, 5]["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_with_nan():
|
||||
# GH: 27104
|
||||
df = DataFrame(
|
||||
{"col": [1, 2, 5], "ind1": ["a", "d", np.nan], "ind2": [1, 4, 5]}
|
||||
).set_index(["ind1", "ind2"])
|
||||
result = df.loc[["a"]]
|
||||
expected = DataFrame(
|
||||
{"col": [1]}, index=MultiIndex.from_tuples([("a", 1)], names=["ind1", "ind2"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["a"]
|
||||
expected = DataFrame({"col": [1]}, index=Index([1], name="ind2"))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_non_found_tuple():
|
||||
# GH: 25236
|
||||
df = DataFrame([[1, 2, 3, 4]], columns=["a", "b", "c", "d"]).set_index(
|
||||
["a", "b", "c"]
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"\(2\.0, 2\.0, 3\.0\)"):
|
||||
df.loc[(2.0, 2.0, 3.0)]
|
||||
|
||||
|
||||
def test_get_loc_datetime_index():
|
||||
# GH#24263
|
||||
index = pd.date_range("2001-01-01", periods=100)
|
||||
mi = MultiIndex.from_arrays([index])
|
||||
# Check if get_loc matches for Index and MultiIndex
|
||||
assert mi.get_loc("2001-01") == slice(0, 31, None)
|
||||
assert index.get_loc("2001-01") == slice(0, 31, None)
|
||||
|
||||
loc = mi[::2].get_loc("2001-01")
|
||||
expected = index[::2].get_loc("2001-01")
|
||||
assert loc == expected
|
||||
|
||||
loc = mi.repeat(2).get_loc("2001-01")
|
||||
expected = index.repeat(2).get_loc("2001-01")
|
||||
assert loc == expected
|
||||
|
||||
loc = mi.append(mi).get_loc("2001-01")
|
||||
expected = index.append(index).get_loc("2001-01")
|
||||
# TODO: standardize return type for MultiIndex.get_loc
|
||||
tm.assert_numpy_array_equal(loc.nonzero()[0], expected)
|
||||
|
||||
|
||||
def test_loc_setitem_indexer_differently_ordered():
|
||||
# GH#34603
|
||||
mi = MultiIndex.from_product([["a", "b"], [0, 1]])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
|
||||
|
||||
indexer = ("a", [1, 0])
|
||||
df.loc[indexer, :] = np.array([[9, 10], [11, 12]])
|
||||
expected = DataFrame([[11, 12], [9, 10], [5, 6], [7, 8]], index=mi)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_index_differently_ordered_slice_none():
|
||||
# GH#31330
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6], [7, 8]],
|
||||
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
result = df.loc[(slice(None), [2, 1]), :]
|
||||
expected = DataFrame(
|
||||
[[3, 4], [7, 8], [1, 2], [5, 6]],
|
||||
index=[["a", "b", "a", "b"], [2, 2, 1, 1]],
|
||||
columns=["a", "b"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("indexer", [[1, 2, 7, 6, 2, 3, 8, 7], [1, 2, 7, 6, 3, 8]])
|
||||
def test_loc_getitem_index_differently_ordered_slice_none_duplicates(indexer):
|
||||
# GH#40978
|
||||
df = DataFrame(
|
||||
[1] * 8,
|
||||
index=MultiIndex.from_tuples(
|
||||
[(1, 1), (1, 2), (1, 7), (1, 6), (2, 2), (2, 3), (2, 8), (2, 7)]
|
||||
),
|
||||
columns=["a"],
|
||||
)
|
||||
result = df.loc[(slice(None), indexer), :]
|
||||
expected = DataFrame(
|
||||
[1] * 8,
|
||||
index=[[1, 1, 2, 1, 2, 1, 2, 2], [1, 2, 2, 7, 7, 6, 3, 8]],
|
||||
columns=["a"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[df.index.isin(indexer, level=1), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
|
||||
def test_loc_getitem_drops_levels_for_one_row_dataframe():
|
||||
# GH#10521 "x" and "z" are both scalar indexing, so those levels are dropped
|
||||
mi = MultiIndex.from_arrays([["x"], ["y"], ["z"]], names=["a", "b", "c"])
|
||||
df = DataFrame({"d": [0]}, index=mi)
|
||||
expected = df.droplevel([0, 2])
|
||||
result = df.loc["x", :, "z"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser = Series([0], index=mi)
|
||||
result = ser.loc["x", :, "z"]
|
||||
expected = Series([0], index=Index(["y"], name="b"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mi_columns_loc_list_label_order():
|
||||
# GH 10710
|
||||
cols = MultiIndex.from_product([["A", "B", "C"], [1, 2]])
|
||||
df = DataFrame(np.zeros((5, 6)), columns=cols)
|
||||
result = df.loc[:, ["B", "A"]]
|
||||
expected = DataFrame(
|
||||
np.zeros((5, 4)),
|
||||
columns=MultiIndex.from_tuples([("B", 1), ("B", 2), ("A", 1), ("A", 2)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_mi_partial_indexing_list_raises():
|
||||
# GH 13501
|
||||
frame = DataFrame(
|
||||
np.arange(12).reshape((4, 3)),
|
||||
index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
|
||||
columns=[["Ohio", "Ohio", "Colorado"], ["Green", "Red", "Green"]],
|
||||
)
|
||||
frame.index.names = ["key1", "key2"]
|
||||
frame.columns.names = ["state", "color"]
|
||||
with pytest.raises(KeyError, match="\\[2\\] not in index"):
|
||||
frame.loc[["b", 2], "Colorado"]
|
||||
|
||||
|
||||
def test_mi_indexing_list_nonexistent_raises():
|
||||
# GH 15452
|
||||
s = Series(range(4), index=MultiIndex.from_product([[1, 2], ["a", "b"]]))
|
||||
with pytest.raises(KeyError, match="\\['not' 'found'\\] not in index"):
|
||||
s.loc[["not", "found"]]
|
||||
|
||||
|
||||
def test_mi_add_cell_missing_row_non_unique():
|
||||
# GH 16018
|
||||
result = DataFrame(
|
||||
[[1, 2, 5, 6], [3, 4, 7, 8]],
|
||||
index=["a", "a"],
|
||||
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
|
||||
)
|
||||
result.loc["c"] = -1
|
||||
result.loc["c", (1, "A")] = 3
|
||||
result.loc["d", (1, "A")] = 3
|
||||
expected = DataFrame(
|
||||
[
|
||||
[1.0, 2.0, 5.0, 6.0],
|
||||
[3.0, 4.0, 7.0, 8.0],
|
||||
[3.0, -1.0, -1, -1],
|
||||
[3.0, np.nan, np.nan, np.nan],
|
||||
],
|
||||
index=["a", "a", "c", "d"],
|
||||
columns=MultiIndex.from_product([[1, 2], ["A", "B"]]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_get_scalar_casting_to_float():
|
||||
# GH#41369
|
||||
df = DataFrame(
|
||||
{"a": 1.0, "b": 2}, index=MultiIndex.from_arrays([[3], [4]], names=["c", "d"])
|
||||
)
|
||||
result = df.loc[(3, 4), "b"]
|
||||
assert result == 2
|
||||
assert isinstance(result, np.int64)
|
||||
result = df.loc[[(3, 4)], "b"].iloc[0]
|
||||
assert result == 2
|
||||
assert isinstance(result, np.int64)
|
||||
|
||||
|
||||
def test_loc_empty_single_selector_with_names():
|
||||
# GH 19517
|
||||
idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=[1, 0])
|
||||
s2 = Series(index=idx, dtype=np.float64)
|
||||
result = s2.loc["a"]
|
||||
expected = Series([np.nan, np.nan], index=Index(["A", "B"], name=0))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_keyerror_rightmost_key_missing():
|
||||
# GH 20951
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [100, 100, 200, 200, 300, 300],
|
||||
"B": [10, 10, 20, 21, 31, 33],
|
||||
"C": range(6),
|
||||
}
|
||||
)
|
||||
df = df.set_index(["A", "B"])
|
||||
with pytest.raises(KeyError, match="^1$"):
|
||||
df.loc[(100, 1)]
|
||||
|
||||
|
||||
def test_multindex_series_loc_with_tuple_label():
|
||||
# GH#43908
|
||||
mi = MultiIndex.from_tuples([(1, 2), (3, (4, 5))])
|
||||
ser = Series([1, 2], index=mi)
|
||||
result = ser.loc[(3, (4, 5))]
|
||||
assert result == 2
|
||||
@@ -0,0 +1,151 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas._libs.index as _index
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexBasic:
|
||||
def test_multiindex_perf_warn(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"jim": [0, 0, 1, 1],
|
||||
"joe": ["x", "x", "z", "y"],
|
||||
"jolie": np.random.rand(4),
|
||||
}
|
||||
).set_index(["jim", "joe"])
|
||||
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(1, "z")]
|
||||
|
||||
df = df.iloc[[2, 1, 3, 0]]
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
df.loc[(0,)]
|
||||
|
||||
def test_indexing_over_hashtable_size_cutoff(self):
|
||||
n = 10000
|
||||
|
||||
old_cutoff = _index._SIZE_CUTOFF
|
||||
_index._SIZE_CUTOFF = 20000
|
||||
|
||||
s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n))))
|
||||
|
||||
# hai it works!
|
||||
assert s[("a", 5)] == 5
|
||||
assert s[("a", 6)] == 6
|
||||
assert s[("a", 7)] == 7
|
||||
|
||||
_index._SIZE_CUTOFF = old_cutoff
|
||||
|
||||
def test_multi_nan_indexing(self):
|
||||
# GH 3588
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
result = df.set_index(["a", "b"], drop=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"a": ["R1", "R2", np.nan, "R4"],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index(["R1", "R2", np.nan, "R4"], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_exclusive_nat_column_indexing(self):
|
||||
# GH 38025
|
||||
# test multi indexing when one column exclusively contains NaT values
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": [pd.NaT, pd.NaT, pd.NaT, pd.NaT],
|
||||
"b": ["C1", "C2", "C3", "C4"],
|
||||
"c": [10, 15, np.nan, 20],
|
||||
}
|
||||
)
|
||||
df = df.set_index(["a", "b"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"c": [10, 15, np.nan, 20],
|
||||
},
|
||||
index=[
|
||||
Index([pd.NaT, pd.NaT, pd.NaT, pd.NaT], name="a"),
|
||||
Index(["C1", "C2", "C3", "C4"], name="b"),
|
||||
],
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_nested_tuples_duplicates(self):
|
||||
# GH#30892
|
||||
|
||||
dti = pd.to_datetime(["20190101", "20190101", "20190102"])
|
||||
idx = Index(["a", "a", "c"])
|
||||
mi = MultiIndex.from_arrays([dti, idx], names=["index1", "index2"])
|
||||
|
||||
df = DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi)
|
||||
|
||||
expected = DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi)
|
||||
|
||||
df2 = df.copy(deep=True)
|
||||
df2.loc[(dti[0], "a"), "c2"] = 1.0
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
df3 = df.copy(deep=True)
|
||||
df3.loc[[(dti[0], "a")], "c2"] = 1.0
|
||||
tm.assert_frame_equal(df3, expected)
|
||||
|
||||
def test_multiindex_with_datatime_level_preserves_freq(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/35563
|
||||
idx = Index(range(2), name="A")
|
||||
dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
|
||||
mi = MultiIndex.from_product([idx, dti])
|
||||
df = DataFrame(np.random.randn(14, 2), index=mi)
|
||||
result = df.loc[0].index
|
||||
tm.assert_index_equal(result, dti)
|
||||
assert result.freq == dti.freq
|
||||
|
||||
def test_multiindex_complex(self):
|
||||
# GH#42145
|
||||
complex_data = [1 + 2j, 4 - 3j, 10 - 1j]
|
||||
non_complex_data = [3, 4, 5]
|
||||
result = DataFrame(
|
||||
{
|
||||
"x": complex_data,
|
||||
"y": non_complex_data,
|
||||
"z": non_complex_data,
|
||||
}
|
||||
)
|
||||
result.set_index(["x", "y"], inplace=True)
|
||||
expected = DataFrame(
|
||||
{"z": non_complex_data},
|
||||
index=MultiIndex.from_arrays(
|
||||
[complex_data, non_complex_data],
|
||||
names=("x", "y"),
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_rename_multiindex_with_duplicates(self):
|
||||
# GH 38015
|
||||
mi = MultiIndex.from_tuples([("A", "cat"), ("B", "cat"), ("B", "cat")])
|
||||
df = DataFrame(index=mi)
|
||||
df = df.rename(index={"A": "Apple"}, level=0)
|
||||
|
||||
mi2 = MultiIndex.from_tuples([("Apple", "cat"), ("B", "cat"), ("B", "cat")])
|
||||
expected = DataFrame(index=mi2)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
@@ -0,0 +1,252 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import (
|
||||
Float64Index,
|
||||
Int64Index,
|
||||
)
|
||||
|
||||
|
||||
class TestMultiIndexPartial:
|
||||
def test_getitem_partial_int(self):
|
||||
# GH 12416
|
||||
# with single item
|
||||
l1 = [10, 20]
|
||||
l2 = ["a", "b"]
|
||||
df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
|
||||
expected = DataFrame(index=range(2), columns=l2)
|
||||
result = df[20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with list
|
||||
expected = DataFrame(
|
||||
index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
|
||||
)
|
||||
result = df[[20]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# missing item:
|
||||
with pytest.raises(KeyError, match="1"):
|
||||
df[1]
|
||||
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
|
||||
df[[1]]
|
||||
|
||||
def test_series_slice_partial(self):
|
||||
pass
|
||||
|
||||
def test_xs_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = frame.xs("foo")
|
||||
result2 = frame.loc["foo"]
|
||||
expected = frame.T["foo"].T
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = ymd.xs((2000, 4))
|
||||
expected = ymd.loc[2000, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# ex from #1796
|
||||
index = MultiIndex(
|
||||
levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
|
||||
codes=[
|
||||
[0, 0, 0, 0, 1, 1, 1, 1],
|
||||
[0, 0, 1, 1, 0, 0, 1, 1],
|
||||
[0, 1, 0, 1, 0, 1, 0, 1],
|
||||
],
|
||||
)
|
||||
df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd"))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df.xs(["foo", "one"])
|
||||
expected = df.loc["foo", "one"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
ymd = ymd.T
|
||||
result = ymd[2000, 2]
|
||||
|
||||
expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
|
||||
expected.columns = expected.columns.droplevel(0).droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_fancy_slice_partial(
|
||||
self,
|
||||
multiindex_dataframe_random_data,
|
||||
multiindex_year_month_day_dataframe_random_data,
|
||||
):
|
||||
frame = multiindex_dataframe_random_data
|
||||
result = frame.loc["bar":"baz"]
|
||||
expected = frame[3:7]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
result = ymd.loc[(2000, 2):(2000, 4)]
|
||||
lev = ymd.index.codes[1]
|
||||
expected = ymd[(lev >= 1) & (lev <= 3)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_partial_column_select(self):
|
||||
idx = MultiIndex(
|
||||
codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
|
||||
levels=[["a", "b"], ["x", "y"], ["p", "q"]],
|
||||
)
|
||||
df = DataFrame(np.random.rand(3, 2), index=idx)
|
||||
|
||||
result = df.loc[("a", "y"), :]
|
||||
expected = df.loc[("a", "y")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[("a", "y"), [1, 0]]
|
||||
expected = df.loc[("a", "y")][[1, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
|
||||
df.loc[("a", "foo"), :]
|
||||
|
||||
# TODO(ArrayManager) rewrite test to not use .values
|
||||
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
|
||||
@td.skip_array_manager_invalid_test
|
||||
def test_partial_set(self, multiindex_year_month_day_dataframe_random_data):
|
||||
# GH #397
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd.copy()
|
||||
exp = ymd.copy()
|
||||
df.loc[2000, 4] = 0
|
||||
exp.loc[2000, 4].values[:] = 0
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df["A"].loc[2000, 4] = 1
|
||||
exp["A"].loc[2000, 4].values[:] = 1
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
df.loc[2000] = 5
|
||||
exp.loc[2000].values[:] = 5
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
# this works...for now
|
||||
df["A"].iloc[14] = 5
|
||||
assert df["A"].iloc[14] == 5
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, float])
|
||||
def test_getitem_intkey_leading_level(
|
||||
self, multiindex_year_month_day_dataframe_random_data, dtype
|
||||
):
|
||||
# GH#33355 dont fall-back to positional when leading level is int
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
levels = ymd.index.levels
|
||||
ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
|
||||
ser = ymd["A"]
|
||||
mi = ser.index
|
||||
assert isinstance(mi, MultiIndex)
|
||||
if dtype is int:
|
||||
assert isinstance(mi.levels[0], Int64Index)
|
||||
else:
|
||||
assert isinstance(mi.levels[0], Float64Index)
|
||||
|
||||
assert 14 not in mi.levels[0]
|
||||
assert not mi.levels[0]._should_fallback_to_positional
|
||||
assert not mi._should_fallback_to_positional
|
||||
|
||||
with pytest.raises(KeyError, match="14"):
|
||||
ser[14]
|
||||
with pytest.raises(KeyError, match="14"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
mi.get_value(ser, 14)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame.copy()
|
||||
result = frame.copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc[["foo", "bar"]] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = frame["A"].copy()
|
||||
result = frame["A"].copy()
|
||||
result.loc["foo":"bar"] = 0
|
||||
expected.loc["foo"] = 0
|
||||
expected.loc["bar"] = 0
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, exp_idx, exp_values",
|
||||
[
|
||||
(slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]),
|
||||
(
|
||||
slice(None, "2019-2"),
|
||||
date_range("2019", periods=2, freq="MS"),
|
||||
[0, 1, 2, 3],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
|
||||
# GH: 25165
|
||||
date_idx = date_range("2019", periods=2, freq="MS")
|
||||
df = DataFrame(
|
||||
list(range(4)),
|
||||
index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
|
||||
)
|
||||
expected = DataFrame(
|
||||
exp_values,
|
||||
index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
|
||||
)
|
||||
result = df[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis=0)[indexer]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[indexer, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df2 = df.swaplevel(0, 1).sort_index()
|
||||
expected = expected.swaplevel(0, 1).sort_index()
|
||||
|
||||
result = df2.loc[:, indexer, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_loc_getitem_partial_both_axis():
|
||||
# gh-12660
|
||||
iterables = [["a", "b"], [2, 1]]
|
||||
columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
|
||||
rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
|
||||
df = DataFrame(np.random.randn(4, 4), index=rows, columns=columns)
|
||||
expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
|
||||
result = df.loc["a", "b"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,509 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
def assert_equal(a, b):
|
||||
assert a == b
|
||||
|
||||
|
||||
class TestMultiIndexSetItem:
|
||||
def check(self, target, indexers, value, compare_fn=assert_equal, expected=None):
|
||||
target.loc[indexers] = value
|
||||
result = target.loc[indexers]
|
||||
if expected is None:
|
||||
expected = value
|
||||
compare_fn(result, expected)
|
||||
|
||||
def test_setitem_multiindex(self):
|
||||
# GH#7190
|
||||
cols = ["A", "w", "l", "a", "x", "X", "d", "profit"]
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"]
|
||||
)
|
||||
t, n = 0, 2
|
||||
|
||||
df = DataFrame(
|
||||
np.nan,
|
||||
columns=cols,
|
||||
index=index,
|
||||
)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=0)
|
||||
|
||||
df = DataFrame(-999, columns=cols, index=index)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=1)
|
||||
|
||||
df = DataFrame(columns=cols, index=index)
|
||||
self.check(target=df, indexers=((t, n), "X"), value=2)
|
||||
|
||||
# gh-7218: assigning with 0-dim arrays
|
||||
df = DataFrame(-999, columns=cols, index=index)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=((t, n), "X"),
|
||||
value=np.array(3),
|
||||
expected=3,
|
||||
)
|
||||
|
||||
def test_setitem_multiindex2(self):
|
||||
# GH#5206
|
||||
df = DataFrame(
|
||||
np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float
|
||||
)
|
||||
df["F"] = 99
|
||||
row_selection = df["A"] % 2 == 0
|
||||
col_selection = ["B", "C"]
|
||||
df.loc[row_selection, col_selection] = df["F"]
|
||||
output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"])
|
||||
tm.assert_frame_equal(df.loc[row_selection, col_selection], output)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(row_selection, col_selection),
|
||||
value=df["F"],
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=output,
|
||||
)
|
||||
|
||||
def test_setitem_multiindex3(self):
|
||||
# GH#11372
|
||||
idx = MultiIndex.from_product(
|
||||
[["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")]
|
||||
)
|
||||
cols = MultiIndex.from_product(
|
||||
[["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")]
|
||||
)
|
||||
|
||||
df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols)
|
||||
|
||||
subidx = MultiIndex.from_tuples(
|
||||
[("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))]
|
||||
)
|
||||
subcols = MultiIndex.from_tuples(
|
||||
[("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))]
|
||||
)
|
||||
|
||||
vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(subidx, subcols),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# set all columns
|
||||
vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols)
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(subidx, slice(None, None, None)),
|
||||
value=vals,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
)
|
||||
# identity
|
||||
copy = df.copy()
|
||||
self.check(
|
||||
target=df,
|
||||
indexers=(df.index, df.columns),
|
||||
value=df,
|
||||
compare_fn=tm.assert_frame_equal,
|
||||
expected=copy,
|
||||
)
|
||||
|
||||
# TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
|
||||
# all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_multiindex_setitem(self):
|
||||
|
||||
# GH 3738
|
||||
# setting with a multi-index right hand side
|
||||
arrays = [
|
||||
np.array(["bar", "bar", "baz", "qux", "qux", "bar"]),
|
||||
np.array(["one", "two", "one", "one", "two", "one"]),
|
||||
np.arange(0, 6, 1),
|
||||
]
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.random.randn(6, 3), index=arrays, columns=["A", "B", "C"]
|
||||
).sort_index()
|
||||
|
||||
expected = df_orig.loc[["bar"]] * 2
|
||||
df = df_orig.copy()
|
||||
df.loc[["bar"]] *= 2
|
||||
tm.assert_frame_equal(df.loc[["bar"]], expected)
|
||||
|
||||
# raise because these have differing levels
|
||||
msg = "cannot align on a multi-index with out specifying the join levels"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.loc["bar"] *= 2
|
||||
|
||||
def test_multiindex_setitem2(self):
|
||||
|
||||
# from SO
|
||||
# https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation
|
||||
df_orig = DataFrame.from_dict(
|
||||
{
|
||||
"price": {
|
||||
("DE", "Coal", "Stock"): 2,
|
||||
("DE", "Gas", "Stock"): 4,
|
||||
("DE", "Elec", "Demand"): 1,
|
||||
("FR", "Gas", "Stock"): 5,
|
||||
("FR", "Solar", "SupIm"): 0,
|
||||
("FR", "Wind", "SupIm"): 0,
|
||||
}
|
||||
}
|
||||
)
|
||||
df_orig.index = MultiIndex.from_tuples(
|
||||
df_orig.index, names=["Sit", "Com", "Type"]
|
||||
)
|
||||
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 2, 3]] *= 2
|
||||
|
||||
idx = pd.IndexSlice
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], :] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, :, "Stock"], "price"] *= 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_assignment(self):
|
||||
|
||||
# GH3777 part 2
|
||||
|
||||
# mixed dtype
|
||||
df = DataFrame(
|
||||
np.random.randint(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
)
|
||||
df["d"] = np.nan
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
df.loc[4, "d"] = arr
|
||||
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
|
||||
|
||||
def test_multiindex_assignment_single_dtype(self, using_array_manager):
|
||||
# GH3777 part 2b
|
||||
# single dtype
|
||||
arr = np.array([0.0, 1.0])
|
||||
|
||||
df = DataFrame(
|
||||
np.random.randint(5, 10, size=9).reshape(3, 3),
|
||||
columns=list("abc"),
|
||||
index=[[4, 4, 8], [8, 10, 12]],
|
||||
dtype=np.int64,
|
||||
)
|
||||
view = df["c"].iloc[:2].values
|
||||
|
||||
# arr can be losslessly cast to int, so this setitem is inplace
|
||||
df.loc[4, "c"] = arr
|
||||
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
|
||||
result = df.loc[4, "c"]
|
||||
tm.assert_series_equal(result, exp)
|
||||
if not using_array_manager:
|
||||
# FIXME(ArrayManager): this correctly preserves dtype,
|
||||
# but incorrectly is not inplace.
|
||||
# extra check for inplace-ness
|
||||
tm.assert_numpy_array_equal(view, exp.values)
|
||||
|
||||
# arr + 0.5 cannot be cast losslessly to int, so we upcast
|
||||
df.loc[4, "c"] = arr + 0.5
|
||||
result = df.loc[4, "c"]
|
||||
exp = exp + 0.5
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# scalar ok
|
||||
df.loc[4, "c"] = 10
|
||||
exp = Series(10, index=[8, 10], name="c", dtype="float64")
|
||||
tm.assert_series_equal(df.loc[4, "c"], exp)
|
||||
|
||||
# invalid assignments
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[4, "c"] = [0, 1, 2, 3]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[4, "c"] = [0]
|
||||
|
||||
# But with a length-1 listlike column indexer this behaves like
|
||||
# `df.loc[4, "c"] = 0
|
||||
df.loc[4, ["c"]] = [0]
|
||||
assert (df.loc[4, "c"] == 0).all()
|
||||
|
||||
def test_groupby_example(self):
|
||||
# groupby example
|
||||
NUM_ROWS = 100
|
||||
NUM_COLS = 10
|
||||
col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())]
|
||||
index_cols = col_names[:5]
|
||||
|
||||
df = DataFrame(
|
||||
np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
|
||||
dtype=np.int64,
|
||||
columns=col_names,
|
||||
)
|
||||
df = df.set_index(index_cols).sort_index()
|
||||
grp = df.groupby(level=index_cols[:4])
|
||||
df["new_col"] = np.nan
|
||||
|
||||
# we are actually operating on a copy here
|
||||
# but in this case, that's ok
|
||||
for name, df2 in grp:
|
||||
new_vals = np.arange(df2.shape[0])
|
||||
df.loc[name, "new_col"] = new_vals
|
||||
|
||||
def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
|
||||
s[2000, 3] = np.nan
|
||||
assert isna(s.values[42:65]).all()
|
||||
assert notna(s.values[:42]).all()
|
||||
assert notna(s.values[65:]).all()
|
||||
|
||||
s[2000, 3, 10] = np.nan
|
||||
assert isna(s.iloc[49])
|
||||
|
||||
with pytest.raises(KeyError, match="49"):
|
||||
# GH#33355 dont fall-back to positional when leading level is int
|
||||
s[49]
|
||||
|
||||
def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T.copy()
|
||||
values = df.values
|
||||
|
||||
result = df[df > 0]
|
||||
expected = df.where(df > 0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df[df > 0] = 5
|
||||
values[values > 0] = 5
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
df[df == 5] = 0
|
||||
values[values == 5] = 0
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
# a df that needs alignment first
|
||||
df[df[:-1] < 0] = 2
|
||||
np.putmask(values[:-1], values[:-1] < 0, 2)
|
||||
tm.assert_almost_equal(df.values, values)
|
||||
|
||||
with pytest.raises(TypeError, match="boolean values only"):
|
||||
df[df * 0] = 2
|
||||
|
||||
def test_frame_getitem_setitem_multislice(self):
|
||||
levels = [["t1", "t2"], ["a", "b", "c"]]
|
||||
codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]]
|
||||
midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"])
|
||||
df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx)
|
||||
|
||||
result = df.loc[:, "value"]
|
||||
tm.assert_series_equal(df["value"], result)
|
||||
|
||||
result = df.loc[df.index[1:3], "value"]
|
||||
tm.assert_series_equal(df["value"][1:3], result)
|
||||
|
||||
result = df.loc[:, :]
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
result = df
|
||||
df.loc[:, "value"] = 10
|
||||
result["value"] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
df.loc[:, :] = 10
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_frame_setitem_multi_column(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]]
|
||||
)
|
||||
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"]
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
# set with ndarray
|
||||
cp = df.copy()
|
||||
cp["a"] = cp["b"].values
|
||||
tm.assert_frame_equal(cp["a"], cp["b"])
|
||||
|
||||
def test_frame_setitem_multi_column2(self):
|
||||
|
||||
# ---------------------------------------
|
||||
# GH#1803
|
||||
columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")])
|
||||
df = DataFrame(index=[1, 3, 5], columns=columns)
|
||||
|
||||
# Works, but adds a column instead of updating the two existing ones
|
||||
df["A"] = 0.0 # Doesn't work
|
||||
assert (df["A"].values == 0).all()
|
||||
|
||||
# it broadcasts
|
||||
df["B", "1"] = [1, 2, 3]
|
||||
df["A"] = df["B", "1"]
|
||||
|
||||
sliced_a1 = df["A", "1"]
|
||||
sliced_a2 = df["A", "2"]
|
||||
sliced_b1 = df["B", "1"]
|
||||
tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False)
|
||||
tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False)
|
||||
assert sliced_a1.name == ("A", "1")
|
||||
assert sliced_a2.name == ("A", "2")
|
||||
assert sliced_b1.name == ("B", "1")
|
||||
|
||||
def test_loc_getitem_tuple_plus_columns(
|
||||
self, multiindex_year_month_day_dataframe_random_data
|
||||
):
|
||||
# GH #1013
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
df = ymd[:5]
|
||||
|
||||
result = df.loc[(2000, 1, 6), ["A", "B", "C"]]
|
||||
expected = df.loc[2000, 1, 6][["A", "B", "C"]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_setitem_slice_integers(self, frame_or_series):
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]
|
||||
)
|
||||
|
||||
obj = DataFrame(
|
||||
np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"]
|
||||
)
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
res = obj.loc[1:2]
|
||||
exp = obj.reindex(obj.index[2:])
|
||||
tm.assert_equal(res, exp)
|
||||
|
||||
obj.loc[1:2] = 7
|
||||
assert (obj.loc[1:2] == 7).values.all()
|
||||
|
||||
def test_setitem_change_dtype(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
dft = frame.T
|
||||
s = dft["foo", "two"]
|
||||
dft["foo", "two"] = s > s.median()
|
||||
tm.assert_series_equal(dft["foo", "two"], s > s.median())
|
||||
# assert isinstance(dft._data.blocks[1].items, MultiIndex)
|
||||
|
||||
reindexed = dft.reindex(columns=[("foo", "two")])
|
||||
tm.assert_series_equal(reindexed["foo", "two"], s > s.median())
|
||||
|
||||
def test_set_column_scalar_with_loc(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
subset = frame.index[[1, 4, 5]]
|
||||
|
||||
frame.loc[subset] = 99
|
||||
assert (frame.loc[subset].values == 99).all()
|
||||
|
||||
col = frame["B"]
|
||||
col[subset] = 97
|
||||
assert (frame.loc[subset, "B"] == 97).all()
|
||||
|
||||
def test_nonunique_assignment_1750(self):
|
||||
df = DataFrame(
|
||||
[[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD")
|
||||
)
|
||||
|
||||
df = df.set_index(["A", "B"])
|
||||
mi = MultiIndex.from_tuples([(1, 1)])
|
||||
|
||||
df.loc[mi, "C"] = "_"
|
||||
|
||||
assert (df.xs((1, 1))["C"] == "_").all()
|
||||
|
||||
def test_astype_assignment_with_dups(self):
|
||||
|
||||
# GH 4686
|
||||
# assignment with dups that has a dtype change
|
||||
cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")])
|
||||
df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object)
|
||||
index = df.index.copy()
|
||||
|
||||
df["A"] = df["A"].astype(np.float64)
|
||||
tm.assert_index_equal(df.index, index)
|
||||
|
||||
def test_setitem_nonmonotonic(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/31449
|
||||
index = MultiIndex.from_tuples(
|
||||
[("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"]
|
||||
)
|
||||
df = DataFrame(data=[0, 1, 2], index=index, columns=["e"])
|
||||
df.loc["a", "e"] = np.arange(99, 101, dtype="int64")
|
||||
expected = DataFrame({"e": [99, 1, 100]}, index=index)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestSetitemWithExpansionMultiIndex:
|
||||
def test_setitem_new_column_mixed_depth(self):
|
||||
arrays = [
|
||||
["a", "top", "top", "routine1", "routine1", "routine2"],
|
||||
["", "OD", "OD", "result1", "result2", "result1"],
|
||||
["", "wx", "wy", "", "", ""],
|
||||
]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
df = DataFrame(np.random.randn(4, 6), columns=index)
|
||||
|
||||
result = df.copy()
|
||||
expected = df.copy()
|
||||
result["b"] = [1, 2, 3, 4]
|
||||
expected["b", "", ""] = [1, 2, 3, 4]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_setitem_new_column_all_na(self):
|
||||
# GH#1534
|
||||
mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
|
||||
s = Series({(1, 1): 1, (1, 2): 2})
|
||||
df["new"] = s
|
||||
assert df["new"].isna().all()
|
||||
|
||||
|
||||
@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
|
||||
# is not a view
|
||||
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
|
||||
# this works because we are modifying the underlying array
|
||||
# really a no-no
|
||||
df = multiindex_dataframe_random_data.T
|
||||
df["foo"].values[:] = 0
|
||||
assert (df["foo"].values == 0).all()
|
||||
|
||||
|
||||
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
|
||||
# will raise/warn as its chained assignment
|
||||
df = multiindex_dataframe_random_data.T
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
|
||||
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data.T
|
||||
expected = frame
|
||||
df = frame.copy()
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df["foo"]["one"] = 2
|
||||
|
||||
result = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,769 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import UnsortedIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.indexing.common import _mklbl
|
||||
|
||||
|
||||
class TestMultiIndexSlicers:
|
||||
def test_per_axis_per_level_getitem(self):
|
||||
|
||||
# GH6134
|
||||
# example test case
|
||||
ix = MultiIndex.from_product(
|
||||
[_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
|
||||
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3")
|
||||
and (c == "C1" or c == "C2" or c == "C3")
|
||||
]
|
||||
]
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df = df.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
result = df.loc[(slice(None), slice(None)), :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
result = df.loc[:, (slice(None), slice(None))]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
# index
|
||||
result = df.loc[(slice(None), [1]), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), 1), :]
|
||||
expected = df.iloc[[0, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# columns
|
||||
result = df.loc[:, (slice(None), ["foo"])]
|
||||
expected = df.iloc[:, [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# both
|
||||
result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
|
||||
expected = df.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc["A", "a"]
|
||||
expected = DataFrame(
|
||||
{"bar": [1, 5, 9], "foo": [0, 4, 8]},
|
||||
index=Index([1, 2, 3], name="two"),
|
||||
columns=Index(["bar", "foo"], name="lvl1"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), [1, 2]), :]
|
||||
expected = df.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi-level series
|
||||
s = Series(np.arange(len(ix.to_numpy())), index=ix)
|
||||
result = s.loc["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = s.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in s.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# boolean indexers
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
expected = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"cannot index with a boolean indexer "
|
||||
"that is not the same length as the index"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), np.array([True, False])), :]
|
||||
|
||||
with pytest.raises(KeyError, match=r"\[1\] not in index"):
|
||||
# slice(None) is on the index, [1] is on the columns, but 1 is
|
||||
# not in the columns, so we raise
|
||||
# This used to treat [1] as positional GH#16396
|
||||
df.loc[slice(None), [1]]
|
||||
|
||||
# not lexsorted
|
||||
assert df.index._lexsort_depth == 2
|
||||
df = df.sort_index(level=1, axis=0)
|
||||
assert df.index._lexsort_depth == 0
|
||||
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be "
|
||||
r"lexsorted: slicing on levels \[1\], lexsort depth 0"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc[(slice(None), slice("bar")), :]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
|
||||
tm.assert_frame_equal(result, df.iloc[[1, 3], :])
|
||||
|
||||
def test_multiindex_slicers_non_unique(self):
|
||||
|
||||
# GH 7106
|
||||
# non-unique mi index support
|
||||
df = (
|
||||
DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "foo"],
|
||||
"B": ["a", "a", "a", "a"],
|
||||
"C": [1, 2, 1, 3],
|
||||
"D": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# this is equivalent of an xs expression
|
||||
result = df.xs(1, level=2, drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = (
|
||||
DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "foo"],
|
||||
"B": ["a", "a", "a", "a"],
|
||||
"C": [1, 2, 1, 2],
|
||||
"D": [1, 2, 3, 4],
|
||||
}
|
||||
)
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
assert not df.index.is_unique
|
||||
expected = (
|
||||
DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
|
||||
.set_index(["A", "B", "C"])
|
||||
.sort_index()
|
||||
)
|
||||
result = df.loc[(slice(None), slice(None), 1), :]
|
||||
assert not result.index.is_unique
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH12896
|
||||
# numpy-implementation dependent bug
|
||||
ints = [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
14,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
200000,
|
||||
200000,
|
||||
]
|
||||
n = len(ints)
|
||||
idx = MultiIndex.from_arrays([["a"] * n, ints])
|
||||
result = Series([1] * n, index=idx)
|
||||
result = result.sort_index()
|
||||
result = result.loc[(slice(None), slice(100000))]
|
||||
expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_datetimelike(self):
|
||||
|
||||
# GH 7429
|
||||
# buggy/inconsistent behavior when slicing with datetime-like
|
||||
import datetime
|
||||
|
||||
dates = [
|
||||
datetime.datetime(2012, 1, 1, 12, 12, 12) + datetime.timedelta(days=i)
|
||||
for i in range(6)
|
||||
]
|
||||
freq = [1, 2]
|
||||
index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
|
||||
|
||||
df = DataFrame(
|
||||
np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
|
||||
index=index,
|
||||
columns=list("ABCD"),
|
||||
)
|
||||
|
||||
# multi-axis slicing
|
||||
idx = pd.IndexSlice
|
||||
expected = df.iloc[[0, 2, 4], [0, 1]]
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
slice(1, 1),
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
idx[
|
||||
Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
|
||||
],
|
||||
idx[1:1],
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(
|
||||
slice(
|
||||
Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
|
||||
),
|
||||
1,
|
||||
),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with strings
|
||||
result = df.loc[
|
||||
(slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
|
||||
slice("A", "B"),
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[
|
||||
(idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_multiindex_slicers_edges(self):
|
||||
# GH 8132
|
||||
# various edge cases
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
|
||||
"B": ["B0", "B0", "B1", "B1", "B2"] * 3,
|
||||
"DATE": [
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-06-11",
|
||||
"2013-07-02",
|
||||
"2013-07-09",
|
||||
"2013-07-30",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
"2013-10-01",
|
||||
"2013-07-09",
|
||||
"2013-08-06",
|
||||
"2013-09-03",
|
||||
],
|
||||
"VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
|
||||
}
|
||||
)
|
||||
|
||||
df["DATE"] = pd.to_datetime(df["DATE"])
|
||||
df1 = df.set_index(["A", "B", "DATE"])
|
||||
df1 = df1.sort_index()
|
||||
|
||||
# A1 - Get all values under "A0" and "A1"
|
||||
result = df1.loc[(slice("A1")), :]
|
||||
expected = df1.iloc[0:10]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A2 - Get all values from the start to "A2"
|
||||
result = df1.loc[(slice("A2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A3 - Get all values under "B1" or "B2"
|
||||
result = df1.loc[(slice(None), slice("B1", "B2")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A4 - Get all values between 2013-07-02 and 2013-07-09
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B1 - Get all values in B0 that are also under A0, A1 and A2
|
||||
result = df1.loc[(slice("A2"), slice("B0")), :]
|
||||
expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
|
||||
# the As)
|
||||
result = df1.loc[(slice(None), slice("B2")), :]
|
||||
expected = df1
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B3 - Get all values from B1 to B2 and up to 2013-08-06
|
||||
result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
|
||||
expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# B4 - Same as A4 but the start of the date slice is not a key.
|
||||
# shows indexing on a partial selection slice
|
||||
result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
|
||||
expected = df1.iloc[[1, 2, 6, 7, 12]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_doc_examples(self):
|
||||
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# from indexing.rst / advanced
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.loc[idx[:, :, ["C1", "C3"]], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# not sorted
|
||||
msg = (
|
||||
"MultiIndex slicing requires the index to be lexsorted: "
|
||||
r"slicing on levels \[1\], lexsort depth 1"
|
||||
)
|
||||
with pytest.raises(UnsortedIndexError, match=msg):
|
||||
df.loc["A1", ("a", slice("foo"))]
|
||||
|
||||
# GH 16734: not sorted, but no real slicing
|
||||
tm.assert_frame_equal(
|
||||
df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
|
||||
)
|
||||
|
||||
df = df.sort_index(axis=1)
|
||||
|
||||
# slicing
|
||||
df.loc["A1", (slice(None), "foo")]
|
||||
df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
|
||||
|
||||
# setitem
|
||||
df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
|
||||
|
||||
def test_loc_axis_arguments(self):
|
||||
|
||||
index = MultiIndex.from_product(
|
||||
[_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = (
|
||||
DataFrame(
|
||||
np.arange(len(index) * len(columns), dtype="int64").reshape(
|
||||
(len(index), len(columns))
|
||||
),
|
||||
index=index,
|
||||
columns=columns,
|
||||
)
|
||||
.sort_index()
|
||||
.sort_index(axis=1)
|
||||
)
|
||||
|
||||
# axis 0
|
||||
result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="index")[:, :, ["C1", "C3"]]
|
||||
expected = df.loc[
|
||||
[
|
||||
(
|
||||
a,
|
||||
b,
|
||||
c,
|
||||
d,
|
||||
)
|
||||
for a, b, c, d in df.index.values
|
||||
if (c == "C1" or c == "C3")
|
||||
]
|
||||
]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis 1
|
||||
result = df.loc(axis=1)[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc(axis="columns")[:, "foo"]
|
||||
expected = df.loc[:, (slice(None), "foo")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# invalid axis
|
||||
for i in [-1, 2, "foo"]:
|
||||
msg = f"No axis named {i} for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc(axis=i)[:, :, ["C1", "C3"]]
|
||||
|
||||
def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self):
|
||||
|
||||
# GH29519
|
||||
df = DataFrame(
|
||||
np.arange(27).reshape(3, 9),
|
||||
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
|
||||
)
|
||||
result = df.loc(axis=1)["a1":"a2"]
|
||||
expected = df.iloc[:, :-3]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self):
|
||||
|
||||
# GH29519
|
||||
df = DataFrame(
|
||||
np.arange(27).reshape(3, 9),
|
||||
columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
|
||||
)
|
||||
result = df.loc(axis=1)["a1"]
|
||||
expected = df.iloc[:, :3]
|
||||
expected.columns = ["b1", "b2", "b3"]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_ax_single_level_indexer_simple_df(self):
|
||||
|
||||
# GH29519
|
||||
# test single level indexing on single index column data frame
|
||||
df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"])
|
||||
result = df.loc(axis=1)["a"]
|
||||
expected = Series(np.array([0, 3, 6]), name="a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_per_axis_per_level_setitem(self):
|
||||
|
||||
# test index maker
|
||||
idx = pd.IndexSlice
|
||||
|
||||
# test multi-index slicing with per axis and per index controls
|
||||
index = MultiIndex.from_tuples(
|
||||
[("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
|
||||
)
|
||||
columns = MultiIndex.from_tuples(
|
||||
[("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
|
||||
df_orig = DataFrame(
|
||||
np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
|
||||
)
|
||||
df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
|
||||
|
||||
# identity
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), slice(None))] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, :] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# index
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), [1]), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), :] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc(axis=0)[:, 1] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns
|
||||
df = df_orig.copy()
|
||||
df.loc[:, (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[:, [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# both
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["A", "a"] = 100
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[0:3, 0:2] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# setting with a list-like
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100, 100], [100, 100]], dtype="int64"
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = 100
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# not enough values
|
||||
df = df_orig.copy()
|
||||
|
||||
msg = "setting an array element with a sequence."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[[100], [100, 100]], dtype="int64"
|
||||
)
|
||||
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
|
||||
[100, 100, 100, 100], dtype="int64"
|
||||
)
|
||||
|
||||
# with an alignable rhs
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
|
||||
)
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
|
||||
(slice(None), 1), (slice(None), ["foo"])
|
||||
]
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
|
||||
rhs.loc[:, ("c", "bah")] = 10
|
||||
df = df_orig.copy()
|
||||
df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
|
||||
expected = df_orig.copy()
|
||||
expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_multiindex_label_slicing_with_negative_step(self):
|
||||
ser = Series(
|
||||
np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
|
||||
)
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0])
|
||||
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1])
|
||||
tm.assert_indexing_slices_equivalent(
|
||||
ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1]
|
||||
)
|
||||
|
||||
def test_multiindex_slice_first_level(self):
|
||||
# GH 12697
|
||||
freq = ["a", "b", "c", "d"]
|
||||
idx = MultiIndex.from_product([freq, np.arange(500)])
|
||||
df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
|
||||
df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
|
||||
result = df_slice.loc["a"]
|
||||
expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df_slice.loc["d"]
|
||||
expected = DataFrame(
|
||||
list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
|
||||
ymd = multiindex_year_month_day_dataframe_random_data
|
||||
s = ymd["A"]
|
||||
result = s[5:]
|
||||
expected = s.reindex(s.index[5:])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
exp = ymd["A"].copy()
|
||||
s[5:] = 0
|
||||
exp.values[5:] = 0
|
||||
tm.assert_numpy_array_equal(s.values, exp.values)
|
||||
|
||||
result = ymd[5:]
|
||||
expected = ymd.reindex(s.index[5:])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_slice_negative_stepsize(self):
|
||||
# GH#38071
|
||||
mi = MultiIndex.from_product([["a", "b"], [0, 1]])
|
||||
df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], index=mi)
|
||||
result = df.loc[("a", slice(None, None, -1)), :]
|
||||
expected = DataFrame(
|
||||
[[3, 4], [1, 2]], index=MultiIndex.from_tuples([("a", 1), ("a", 0)])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,127 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMultiIndexSorted:
|
||||
def test_getitem_multilevel_index_tuple_not_sorted(self):
|
||||
index_columns = list("abc")
|
||||
df = DataFrame(
|
||||
[[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"]
|
||||
)
|
||||
df = df.set_index(index_columns)
|
||||
query_index = df.index[:1]
|
||||
rs = df.loc[query_index, "data"]
|
||||
|
||||
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"])
|
||||
xp = Series(["x"], index=xp_idx, name="data")
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.sort_index(level=1).T
|
||||
|
||||
# buglet with int typechecking
|
||||
result = df.iloc[:, : np.int32(3)]
|
||||
expected = df.reindex(columns=df.columns[:3])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("key", [None, lambda x: x])
|
||||
def test_frame_getitem_not_sorted2(self, key):
|
||||
# 13431
|
||||
df = DataFrame(
|
||||
{
|
||||
"col1": ["b", "d", "b", "a"],
|
||||
"col2": [3, 1, 1, 2],
|
||||
"data": ["one", "two", "three", "four"],
|
||||
}
|
||||
)
|
||||
|
||||
df2 = df.set_index(["col1", "col2"])
|
||||
df2_original = df2.copy()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
return_value = df2.index.set_levels(
|
||||
["b", "d", "a"], level="col1", inplace=True
|
||||
)
|
||||
assert return_value is None
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
return_value = df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True)
|
||||
assert return_value is None
|
||||
assert not df2.index.is_monotonic
|
||||
|
||||
assert df2_original.index.equals(df2.index)
|
||||
expected = df2.sort_index(key=key)
|
||||
assert expected.index.is_monotonic
|
||||
|
||||
result = df2.sort_index(level=0, key=key)
|
||||
assert result.index.is_monotonic
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_sort_values_key(self, multiindex_dataframe_random_data):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
index = index.sort_values( # sort by third letter
|
||||
key=lambda x: x.map(lambda entry: entry[2])
|
||||
)
|
||||
result = DataFrame(range(8), index=index)
|
||||
|
||||
arrays = [
|
||||
["foo", "foo", "bar", "bar", "qux", "qux", "baz", "baz"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
expected = DataFrame(range(8), index=index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = frame.T
|
||||
df["foo", "four"] = "foo"
|
||||
|
||||
arrays = [np.array(x) for x in zip(*df.columns.values)]
|
||||
|
||||
result = df["foo"]
|
||||
result2 = df.loc[:, "foo"]
|
||||
expected = df.reindex(columns=df.columns[arrays[0] == "foo"])
|
||||
expected.columns = expected.columns.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
df = df.T
|
||||
result = df.xs("foo")
|
||||
result2 = df.loc["foo"]
|
||||
expected = df.reindex(df.index[arrays[0] == "foo"])
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
tm.assert_frame_equal(result2, expected)
|
||||
|
||||
def test_series_getitem_not_sorted(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
s = Series(np.random.randn(8), index=index)
|
||||
|
||||
arrays = [np.array(x) for x in zip(*index.values)]
|
||||
|
||||
result = s["qux"]
|
||||
result2 = s.loc["qux"]
|
||||
expected = s[arrays[0] == "qux"]
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_series_equal(result2, expected)
|
||||
@@ -0,0 +1,178 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timezone,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_at_timezone():
|
||||
# https://github.com/pandas-dev/pandas/issues/33544
|
||||
result = DataFrame({"foo": [datetime(2000, 1, 1)]})
|
||||
result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
|
||||
expected = DataFrame(
|
||||
{"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_selection_methods_of_assigned_col():
|
||||
# GH 29282
|
||||
df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
df2 = DataFrame(data={"c": [7, 8, 9]}, index=[2, 1, 0])
|
||||
df["c"] = df2["c"]
|
||||
df.at[1, "c"] = 11
|
||||
result = df
|
||||
expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [9, 11, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.at[1, "c"]
|
||||
assert result == 11
|
||||
|
||||
result = df["c"]
|
||||
expected = Series([9, 11, 7], name="c")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df[["c"]]
|
||||
expected = DataFrame({"c": [9, 11, 7]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestAtSetItem:
|
||||
def test_at_setitem_mixed_index_assignment(self):
|
||||
# GH#19860
|
||||
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
ser.at["a"] = 11
|
||||
assert ser.iat[0] == 11
|
||||
ser.at[1] = 22
|
||||
assert ser.iat[3] == 22
|
||||
|
||||
def test_at_setitem_categorical_missing(self):
|
||||
df = DataFrame(
|
||||
index=range(3), columns=range(3), dtype=CategoricalDtype(["foo", "bar"])
|
||||
)
|
||||
df.at[1, 1] = "foo"
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
[np.nan, np.nan, np.nan],
|
||||
[np.nan, "foo", np.nan],
|
||||
[np.nan, np.nan, np.nan],
|
||||
],
|
||||
dtype=CategoricalDtype(["foo", "bar"]),
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
|
||||
class TestAtSetItemWithExpansion:
|
||||
def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture):
|
||||
# GH#25506
|
||||
ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture)
|
||||
result = Series(ts)
|
||||
result.at[1] = ts
|
||||
expected = Series([ts, ts])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestAtWithDuplicates:
|
||||
def test_at_with_duplicate_axes_requires_scalar_lookup(self):
|
||||
# GH#33041 check that falling back to loc doesn't allow non-scalar
|
||||
# args to slip in
|
||||
|
||||
arr = np.random.randn(6).reshape(3, 2)
|
||||
df = DataFrame(arr, columns=["A", "A"])
|
||||
|
||||
msg = "Invalid call for scalar access"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[[1, 2]]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[1, ["A"]]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[:, "A"]
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[[1, 2]] = 1
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[1, ["A"]] = 1
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.at[:, "A"] = 1
|
||||
|
||||
|
||||
class TestAtErrors:
|
||||
# TODO: De-duplicate/parametrize
|
||||
# test_at_series_raises_key_error2, test_at_frame_raises_key_error2
|
||||
|
||||
def test_at_series_raises_key_error(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
|
||||
ser = Series([1, 2, 3], index=[3, 2, 1])
|
||||
result = indexer_al(ser)[1]
|
||||
assert result == 3
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(ser)["a"]
|
||||
|
||||
def test_at_frame_raises_key_error(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
|
||||
df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1])
|
||||
|
||||
result = indexer_al(df)[1, 0]
|
||||
assert result == 3
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(df)["a", 0]
|
||||
|
||||
with pytest.raises(KeyError, match="a"):
|
||||
indexer_al(df)[1, "a"]
|
||||
|
||||
def test_at_series_raises_key_error2(self, indexer_al):
|
||||
# at should not fallback
|
||||
# GH#7814
|
||||
# GH#31724 .at should match .loc
|
||||
ser = Series([1, 2, 3], index=list("abc"))
|
||||
result = indexer_al(ser)["a"]
|
||||
assert result == 1
|
||||
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
indexer_al(ser)[0]
|
||||
|
||||
def test_at_frame_raises_key_error2(self, indexer_al):
|
||||
# GH#31724 .at should match .loc
|
||||
df = DataFrame({"A": [1, 2, 3]}, index=list("abc"))
|
||||
result = indexer_al(df)["a", "A"]
|
||||
assert result == 1
|
||||
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
indexer_al(df)["a", 0]
|
||||
|
||||
def test_at_getitem_mixed_index_no_fallback(self):
|
||||
# GH#19860
|
||||
ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
ser.at[0]
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
ser.at[4]
|
||||
|
||||
def test_at_categorical_integers(self):
|
||||
# CategoricalIndex with integer categories that don't happen to match
|
||||
# the Categorical's codes
|
||||
ci = CategoricalIndex([3, 4])
|
||||
|
||||
arr = np.arange(4).reshape(2, 2)
|
||||
frame = DataFrame(arr, index=ci)
|
||||
|
||||
for df in [frame, frame.T]:
|
||||
for key in [0, 1]:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
df.at[key, key]
|
||||
@@ -0,0 +1,555 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_categorical_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
Interval,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import CategoricalDtype as CDT
|
||||
|
||||
|
||||
class TestCategoricalIndex:
|
||||
def setup_method(self, method):
|
||||
|
||||
self.df = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cab")), name="B"),
|
||||
)
|
||||
self.df2 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"),
|
||||
)
|
||||
|
||||
def test_loc_scalar(self):
|
||||
dtype = CDT(list("cab"))
|
||||
result = self.df.loc["a"]
|
||||
bidx = Series(list("aaa"), name="B").astype(dtype)
|
||||
assert bidx.dtype == dtype
|
||||
|
||||
expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = self.df.copy()
|
||||
df.loc["a"] = 20
|
||||
bidx2 = Series(list("aabbca"), name="B").astype(dtype)
|
||||
assert bidx2.dtype == dtype
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20],
|
||||
},
|
||||
index=Index(bidx2),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# value not in the categories
|
||||
with pytest.raises(KeyError, match=r"^'d'$"):
|
||||
df.loc["d"]
|
||||
|
||||
df2 = df.copy()
|
||||
expected = df2.copy()
|
||||
expected.index = expected.index.astype(object)
|
||||
expected.loc["d"] = 10
|
||||
df2.loc["d"] = 10
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_loc_setitem_with_expansion_non_category(self):
|
||||
# Setting-with-expansion with a new key "d" that is not among caegories
|
||||
df = self.df
|
||||
df.loc["a"] = 20
|
||||
|
||||
# Setting a new row on an existing column
|
||||
df3 = df.copy()
|
||||
df3.loc["d", "A"] = 10
|
||||
bidx3 = Index(list("aabbcad"), name="B")
|
||||
expected3 = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20, 10.0],
|
||||
},
|
||||
index=Index(bidx3),
|
||||
)
|
||||
tm.assert_frame_equal(df3, expected3)
|
||||
|
||||
# Settig a new row _and_ new column
|
||||
df4 = df.copy()
|
||||
df4.loc["d", "C"] = 10
|
||||
expected3 = DataFrame(
|
||||
{
|
||||
"A": [20, 20, 2, 3, 4, 20, np.nan],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 10],
|
||||
},
|
||||
index=Index(bidx3),
|
||||
)
|
||||
tm.assert_frame_equal(df4, expected3)
|
||||
|
||||
def test_loc_getitem_scalar_non_category(self):
|
||||
with pytest.raises(KeyError, match="^1$"):
|
||||
self.df.loc[1]
|
||||
|
||||
def test_slicing(self):
|
||||
cat = Series(Categorical([1, 2, 3, 4]))
|
||||
reverse = cat[::-1]
|
||||
exp = np.array([4, 3, 2, 1], dtype=np.int64)
|
||||
tm.assert_numpy_array_equal(reverse.__array__(), exp)
|
||||
|
||||
df = DataFrame({"value": (np.arange(100) + 1).astype("int64")})
|
||||
df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
|
||||
|
||||
expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10)
|
||||
result = df.iloc[10]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
{"value": np.arange(11, 21).astype("int64")},
|
||||
index=np.arange(10, 20).astype("int64"),
|
||||
)
|
||||
expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
|
||||
result = df.iloc[10:20]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8)
|
||||
result = df.loc[8]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_slicing_and_getting_ops(self):
|
||||
|
||||
# systematically test the slicing operations:
|
||||
# for all slicing ops:
|
||||
# - returning a dataframe
|
||||
# - returning a column
|
||||
# - returning a row
|
||||
# - returning a single value
|
||||
|
||||
cats = Categorical(
|
||||
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]
|
||||
)
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 3, 4, 5, 6, 7]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
# the expected values
|
||||
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
|
||||
idx2 = Index(["j", "k"])
|
||||
values2 = [3, 4]
|
||||
|
||||
# 2:4,: | "j":"k",:
|
||||
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
|
||||
|
||||
# :,"cats" | :,0
|
||||
exp_col = Series(cats, index=idx, name="cats")
|
||||
|
||||
# "j",: | 2,:
|
||||
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j")
|
||||
|
||||
# "j","cats | 2,0
|
||||
exp_val = "b"
|
||||
|
||||
# iloc
|
||||
# frame
|
||||
res_df = df.iloc[2:4, :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"].dtype)
|
||||
|
||||
# row
|
||||
res_row = df.iloc[2, :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col.dtype)
|
||||
|
||||
# single value
|
||||
res_val = df.iloc[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# loc
|
||||
# frame
|
||||
res_df = df.loc["j":"k", :]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"].dtype)
|
||||
|
||||
# row
|
||||
res_row = df.loc["j", :]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
# col
|
||||
res_col = df.loc[:, "cats"]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col.dtype)
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# single value
|
||||
res_val = df.loc["j", df.columns[0]]
|
||||
assert res_val == exp_val
|
||||
|
||||
# iat
|
||||
res_val = df.iat[2, 0]
|
||||
assert res_val == exp_val
|
||||
|
||||
# at
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# fancy indexing
|
||||
exp_fancy = df.iloc[[2]]
|
||||
|
||||
res_fancy = df[df["cats"] == "b"]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
res_fancy = df[df["values"] == 3]
|
||||
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||||
|
||||
# get_value
|
||||
res_val = df.at["j", "cats"]
|
||||
assert res_val == exp_val
|
||||
|
||||
# i : int, slice, or sequence of integers
|
||||
res_row = df.iloc[2]
|
||||
tm.assert_series_equal(res_row, exp_row)
|
||||
assert isinstance(res_row["cats"], str)
|
||||
|
||||
res_df = df.iloc[slice(2, 4)]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"].dtype)
|
||||
|
||||
res_df = df.iloc[[2, 3]]
|
||||
tm.assert_frame_equal(res_df, exp_df)
|
||||
assert is_categorical_dtype(res_df["cats"].dtype)
|
||||
|
||||
res_col = df.iloc[:, 0]
|
||||
tm.assert_series_equal(res_col, exp_col)
|
||||
assert is_categorical_dtype(res_col.dtype)
|
||||
|
||||
res_df = df.iloc[:, slice(0, 2)]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert is_categorical_dtype(res_df["cats"].dtype)
|
||||
|
||||
res_df = df.iloc[:, [0, 1]]
|
||||
tm.assert_frame_equal(res_df, df)
|
||||
assert is_categorical_dtype(res_df["cats"].dtype)
|
||||
|
||||
def test_slicing_doc_examples(self):
|
||||
|
||||
# GH 7918
|
||||
cats = Categorical(
|
||||
["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"]
|
||||
)
|
||||
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||||
values = [1, 2, 2, 2, 3, 4, 5]
|
||||
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||||
|
||||
result = df.iloc[2:4, :]
|
||||
expected = DataFrame(
|
||||
{
|
||||
"cats": Categorical(["b", "b"], categories=["a", "b", "c"]),
|
||||
"values": [2, 2],
|
||||
},
|
||||
index=["j", "k"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.iloc[2:4, :].dtypes
|
||||
expected = Series(["category", "int64"], ["cats", "values"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", "cats"]
|
||||
expected = Series(
|
||||
Categorical(["a", "b", "b"], categories=["a", "b", "c"]),
|
||||
index=["h", "i", "j"],
|
||||
name="cats",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc["h":"j", df.columns[0:1]]
|
||||
expected = DataFrame(
|
||||
{"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])},
|
||||
index=["h", "i", "j"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_getitem_listlike_labels(self):
|
||||
# list of labels
|
||||
result = self.df.loc[["c", "a"]]
|
||||
expected = self.df.iloc[[4, 0, 1, 5]]
|
||||
tm.assert_frame_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_loc_getitem_listlike_unused_category(self):
|
||||
# GH#37901 a label that is in index.categories but not in index
|
||||
# listlike containing an element in the categories but not in the values
|
||||
with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
|
||||
self.df2.loc[["a", "b", "e"]]
|
||||
|
||||
def test_loc_getitem_label_unused_category(self):
|
||||
# element in the categories but not in the values
|
||||
with pytest.raises(KeyError, match=r"^'e'$"):
|
||||
self.df2.loc["e"]
|
||||
|
||||
def test_loc_getitem_non_category(self):
|
||||
# not all labels in the categories
|
||||
with pytest.raises(KeyError, match=re.escape("['d'] not in index")):
|
||||
self.df2.loc[["a", "d"]]
|
||||
|
||||
def test_loc_setitem_expansion_label_unused_category(self):
|
||||
# assigning with a label that is in the categories but not in the index
|
||||
df = self.df2.copy()
|
||||
df.loc["e"] = 20
|
||||
result = df.loc[["a", "b", "e"]]
|
||||
exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B")
|
||||
expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_listlike_dtypes(self):
|
||||
# GH 11586
|
||||
|
||||
# unique categories and codes
|
||||
index = CategoricalIndex(["a", "b", "c"])
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[["a", "b"]]
|
||||
exp_index = CategoricalIndex(["a", "b"], categories=index.categories)
|
||||
exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
|
||||
exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories)
|
||||
exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_listlike_dtypes_duplicated_categories_and_codes(self):
|
||||
# duplicated categories and codes
|
||||
index = CategoricalIndex(["a", "b", "a"])
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index)
|
||||
|
||||
# unique slice
|
||||
res = df.loc[["a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"])
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]},
|
||||
index=CategoricalIndex(["a", "a", "a", "a", "b"]),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_listlike_dtypes_unused_category(self):
|
||||
# contains unused category
|
||||
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
|
||||
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
|
||||
|
||||
res = df.loc[["a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 2], "B": [5, 7, 6]},
|
||||
index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
# duplicated slice
|
||||
res = df.loc[["a", "a", "b"]]
|
||||
exp = DataFrame(
|
||||
{"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]},
|
||||
index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")),
|
||||
)
|
||||
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['x'] not in index")):
|
||||
df.loc[["a", "x"]]
|
||||
|
||||
def test_loc_getitem_listlike_unused_category_raises_keyerror(self):
|
||||
# key that is an *unused* category raises
|
||||
index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde"))
|
||||
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index)
|
||||
|
||||
with pytest.raises(KeyError, match="e"):
|
||||
# For comparison, check the scalar behavior
|
||||
df.loc["e"]
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("['e'] not in index")):
|
||||
df.loc[["a", "e"]]
|
||||
|
||||
def test_ix_categorical_index(self):
|
||||
# GH 12531
|
||||
df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ"))
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
expect = Series(df.loc["A", :], index=cdf.columns, name="A")
|
||||
tm.assert_series_equal(cdf.loc["A", :], expect)
|
||||
|
||||
expect = Series(df.loc[:, "X"], index=cdf.index, name="X")
|
||||
tm.assert_series_equal(cdf.loc[:, "X"], expect)
|
||||
|
||||
exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"])
|
||||
expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index)
|
||||
tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"])
|
||||
expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns)
|
||||
tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
|
||||
|
||||
def test_ix_categorical_index_non_unique(self):
|
||||
|
||||
# non-unique
|
||||
df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX"))
|
||||
cdf = df.copy()
|
||||
cdf.index = CategoricalIndex(df.index)
|
||||
cdf.columns = CategoricalIndex(df.columns)
|
||||
|
||||
exp_index = CategoricalIndex(list("AA"), categories=["A", "B"])
|
||||
expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index)
|
||||
tm.assert_frame_equal(cdf.loc["A", :], expect)
|
||||
|
||||
exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"])
|
||||
expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns)
|
||||
tm.assert_frame_equal(cdf.loc[:, "X"], expect)
|
||||
|
||||
expect = DataFrame(
|
||||
df.loc[["A", "B"], :],
|
||||
columns=cdf.columns,
|
||||
index=CategoricalIndex(list("AAB")),
|
||||
)
|
||||
tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect)
|
||||
|
||||
expect = DataFrame(
|
||||
df.loc[:, ["X", "Y"]],
|
||||
index=cdf.index,
|
||||
columns=CategoricalIndex(list("XXY")),
|
||||
)
|
||||
tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect)
|
||||
|
||||
def test_loc_slice(self):
|
||||
# GH9748
|
||||
msg = (
|
||||
"cannot do slice indexing on CategoricalIndex with these "
|
||||
r"indexers \[1\] of type int"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.df.loc[1:5]
|
||||
|
||||
result = self.df.loc["b":"c"]
|
||||
expected = self.df.iloc[[2, 3, 4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_and_at_with_categorical_index(self):
|
||||
# GH 20629
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"])
|
||||
)
|
||||
|
||||
s = df[0]
|
||||
assert s.loc["A"] == 1
|
||||
assert s.at["A"] == 1
|
||||
|
||||
assert df.loc["B", 1] == 4
|
||||
assert df.at["B", 1] == 4
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx_values",
|
||||
[
|
||||
# python types
|
||||
[1, 2, 3],
|
||||
[-1, -2, -3],
|
||||
[1.5, 2.5, 3.5],
|
||||
[-1.5, -2.5, -3.5],
|
||||
# numpy int/uint
|
||||
*(np.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_NUMPY_DTYPES),
|
||||
# numpy floats
|
||||
*(np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in tm.FLOAT_NUMPY_DTYPES),
|
||||
# numpy object
|
||||
np.array([1, "b", 3.5], dtype=object),
|
||||
# pandas scalars
|
||||
[Interval(1, 4), Interval(4, 6), Interval(6, 9)],
|
||||
[Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)],
|
||||
[Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")],
|
||||
# pandas Integer arrays
|
||||
*(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES),
|
||||
# other pandas arrays
|
||||
pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array,
|
||||
pd.date_range("2019-01-01", periods=3).array,
|
||||
pd.timedelta_range(start="1d", periods=3).array,
|
||||
],
|
||||
)
|
||||
def test_loc_getitem_with_non_string_categories(self, idx_values, ordered):
|
||||
# GH-17569
|
||||
cat_idx = CategoricalIndex(idx_values, ordered=ordered)
|
||||
df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx)
|
||||
sl = slice(idx_values[0], idx_values[1])
|
||||
|
||||
# scalar selection
|
||||
result = df.loc[idx_values[0]]
|
||||
expected = Series(["foo"], index=["A"], name=idx_values[0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list selection
|
||||
result = df.loc[idx_values[:2]]
|
||||
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice selection
|
||||
result = df.loc[sl]
|
||||
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# scalar assignment
|
||||
result = df.copy()
|
||||
result.loc[idx_values[0]] = "qux"
|
||||
expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# list assignment
|
||||
result = df.copy()
|
||||
result.loc[idx_values[:2], "A"] = ["qux", "qux2"]
|
||||
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# slice assignment
|
||||
result = df.copy()
|
||||
result.loc[sl, "A"] = ["qux", "qux2"]
|
||||
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_categorical_with_nan(self):
|
||||
# GH#41933
|
||||
ci = CategoricalIndex(["A", "B", np.nan])
|
||||
|
||||
ser = Series(range(3), index=ci)
|
||||
|
||||
assert ser[np.nan] == 2
|
||||
assert ser.loc[np.nan] == 2
|
||||
|
||||
df = DataFrame(ser)
|
||||
assert df.loc[np.nan, 0] == 2
|
||||
assert df.loc[np.nan][0] == 2
|
||||
@@ -0,0 +1,516 @@
|
||||
from string import ascii_letters as letters
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
import pandas.core.common as com
|
||||
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
|
||||
|
||||
def random_text(nobs=100):
|
||||
# Construct a DataFrame where each row is a random slice from 'letters'
|
||||
idxs = np.random.randint(len(letters), size=(nobs, 2))
|
||||
idxs.sort(axis=1)
|
||||
strings = [letters[x[0] : x[1]] for x in idxs]
|
||||
|
||||
return DataFrame(strings, columns=["letters"])
|
||||
|
||||
|
||||
class TestCaching:
|
||||
def test_slice_consolidate_invalidate_item_cache(self):
|
||||
|
||||
# this is chained assignment, but will 'work'
|
||||
with option_context("chained_assignment", None):
|
||||
|
||||
# #3970
|
||||
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
|
||||
|
||||
# Creates a second float block
|
||||
df["cc"] = 0.0
|
||||
|
||||
# caches a reference to the 'bb' series
|
||||
df["bb"]
|
||||
|
||||
# repr machinery triggers consolidation
|
||||
repr(df)
|
||||
|
||||
# Assignment to wrong series
|
||||
df["bb"].iloc[0] = 0.17
|
||||
df._clear_item_cache()
|
||||
tm.assert_almost_equal(df["bb"][0], 0.17)
|
||||
|
||||
@pytest.mark.parametrize("do_ref", [True, False])
|
||||
def test_setitem_cache_updating(self, do_ref):
|
||||
# GH 5424
|
||||
cont = ["one", "two", "three", "four", "five", "six", "seven"]
|
||||
|
||||
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})
|
||||
|
||||
# ref the cache
|
||||
if do_ref:
|
||||
df.loc[0, "c"]
|
||||
|
||||
# set it
|
||||
df.loc[7, "c"] = 1
|
||||
|
||||
assert df.loc[0, "c"] == 0.0
|
||||
assert df.loc[7, "c"] == 1.0
|
||||
|
||||
def test_setitem_cache_updating_slices(self):
|
||||
# GH 7084
|
||||
# not updating cache on series setting with slices
|
||||
expected = DataFrame(
|
||||
{"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014")
|
||||
)
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]})
|
||||
|
||||
# loop through df to update out
|
||||
six = Timestamp("5/7/2014")
|
||||
eix = Timestamp("5/9/2014")
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"]
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
# try via a chain indexing
|
||||
# this actually works
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
for ix, row in df.iterrows():
|
||||
v = out[row["C"]][six:eix] + row["D"]
|
||||
out[row["C"]][six:eix] = v
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||||
for ix, row in df.iterrows():
|
||||
out.loc[six:eix, row["C"]] += row["D"]
|
||||
|
||||
tm.assert_frame_equal(out, expected)
|
||||
tm.assert_series_equal(out["A"], expected["A"])
|
||||
|
||||
def test_altering_series_clears_parent_cache(self):
|
||||
# GH #33675
|
||||
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
|
||||
ser = df["A"]
|
||||
|
||||
assert "A" in df._item_cache
|
||||
|
||||
# Adding a new entry to ser swaps in a new array, so "A" needs to
|
||||
# be removed from df._item_cache
|
||||
ser["c"] = 5
|
||||
assert len(ser) == 3
|
||||
assert "A" not in df._item_cache
|
||||
assert df["A"] is not ser
|
||||
assert len(df["A"]) == 2
|
||||
|
||||
|
||||
class TestChaining:
|
||||
def test_setitem_chained_setfault(self):
|
||||
|
||||
# GH6026
|
||||
data = ["right", "left", "left", "left", "right", "left", "timeout"]
|
||||
mdata = ["right", "left", "left", "left", "right", "left", "none"]
|
||||
|
||||
df = DataFrame({"response": np.array(data)})
|
||||
mask = df.response == "timeout"
|
||||
df.response[mask] = "none"
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||||
|
||||
recarray = np.rec.fromarrays([data], names=["response"])
|
||||
df = DataFrame(recarray)
|
||||
mask = df.response == "timeout"
|
||||
df.response[mask] = "none"
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||||
|
||||
df = DataFrame({"response": data, "response1": data})
|
||||
mask = df.response == "timeout"
|
||||
df.response[mask] = "none"
|
||||
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))
|
||||
|
||||
# GH 6056
|
||||
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
|
||||
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
|
||||
df["A"].iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
|
||||
df.A.iloc[0] = np.nan
|
||||
result = df.head()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment(self):
|
||||
|
||||
pd.set_option("chained_assignment", "raise")
|
||||
|
||||
# work with the chain
|
||||
expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
|
||||
df = DataFrame(np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64")
|
||||
assert df._is_copy is None
|
||||
|
||||
df["A"][0] = -5
|
||||
df["A"][1] = -6
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_raises(self, using_array_manager):
|
||||
|
||||
# test with the chaining
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(range(2), dtype="int64"),
|
||||
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||||
}
|
||||
)
|
||||
assert df._is_copy is None
|
||||
|
||||
if not using_array_manager:
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df["A"][0] = -5
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df["A"][1] = np.nan
|
||||
|
||||
assert df["A"]._is_copy is None
|
||||
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
|
||||
# a mixed dataframe
|
||||
df["A"][0] = -5
|
||||
df["A"][1] = -6
|
||||
expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB"))
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_fails(self):
|
||||
|
||||
# Using a copy (the chain), fails
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": Series(range(2), dtype="int64"),
|
||||
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = -5
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_doc_example(self):
|
||||
|
||||
# Doc example
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": ["one", "one", "two", "three", "two", "one", "six"],
|
||||
"c": Series(range(7), dtype="int64"),
|
||||
}
|
||||
)
|
||||
assert df._is_copy is None
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
indexer = df.a.str.startswith("o")
|
||||
df[indexer]["c"] = 42
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_object_dtype(self, using_array_manager):
|
||||
|
||||
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
if not using_array_manager:
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df["A"][0] = 111
|
||||
|
||||
df.loc[0, "A"] = 111
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
|
||||
# a mixed dataframe
|
||||
df["A"][0] = 111
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_is_copy_pickle(self):
|
||||
|
||||
# gh-5475: Make sure that is_copy is picked up reconstruction
|
||||
df = DataFrame({"A": [1, 2]})
|
||||
assert df._is_copy is None
|
||||
|
||||
with tm.ensure_clean("__tmp__pickle") as path:
|
||||
df.to_pickle(path)
|
||||
df2 = pd.read_pickle(path)
|
||||
df2["B"] = df2["A"]
|
||||
df2["B"] = df2["A"]
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_setting_entire_column(self):
|
||||
|
||||
# gh-5597: a spurious raise as we are setting the entire column here
|
||||
|
||||
df = random_text(100000)
|
||||
|
||||
# Always a copy
|
||||
x = df.iloc[[0, 1, 2]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
x = df.iloc[[0, 1, 2, 4]]
|
||||
assert x._is_copy is not None
|
||||
|
||||
# Explicitly copy
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer].copy()
|
||||
|
||||
assert df._is_copy is None
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_implicit_take(self):
|
||||
|
||||
# Implicitly take
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df = df.loc[indexer]
|
||||
|
||||
assert df._is_copy is not None
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_implicit_take2(self):
|
||||
|
||||
# Implicitly take 2
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
|
||||
df = df.loc[indexer]
|
||||
assert df._is_copy is not None
|
||||
df.loc[:, "letters"] = df["letters"].apply(str.lower)
|
||||
|
||||
# Should be ok even though it's a copy!
|
||||
assert df._is_copy is None
|
||||
|
||||
df["letters"] = df["letters"].apply(str.lower)
|
||||
assert df._is_copy is None
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_str(self):
|
||||
|
||||
df = random_text(100000)
|
||||
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||||
df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_is_copy(self):
|
||||
|
||||
# an identical take, so no copy
|
||||
df = DataFrame({"a": [1]}).dropna()
|
||||
assert df._is_copy is None
|
||||
df["a"] += 1
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_sorting(self):
|
||||
|
||||
df = DataFrame(np.random.randn(10, 4))
|
||||
ser = df.iloc[:, 0].sort_values()
|
||||
|
||||
tm.assert_series_equal(ser, df.iloc[:, 0].sort_values())
|
||||
tm.assert_series_equal(ser, df[0].sort_values())
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_false_positives(self):
|
||||
|
||||
# see gh-6025: false positives
|
||||
df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]})
|
||||
str(df)
|
||||
|
||||
df["column1"] = df["column1"] + "b"
|
||||
str(df)
|
||||
|
||||
df = df[df["column2"] != 8]
|
||||
str(df)
|
||||
|
||||
df["column1"] = df["column1"] + "c"
|
||||
str(df)
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_undefined_column(self):
|
||||
|
||||
# from SO:
|
||||
# https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
|
||||
df = DataFrame(np.arange(0, 9), columns=["count"])
|
||||
df["group"] = "b"
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df.iloc[0:5]["group"] = "a"
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_detect_chained_assignment_changing_dtype(self, using_array_manager):
|
||||
|
||||
# Mixed type setting but same dtype & changing dtype
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": date_range("20130101", periods=5),
|
||||
"B": np.random.randn(5),
|
||||
"C": np.arange(5, dtype="int64"),
|
||||
"D": ["a", "b", "c", "d", "e"],
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df.loc[2]["D"] = "foo"
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df.loc[2]["C"] = "foo"
|
||||
|
||||
if not using_array_manager:
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df["C"][2] = "foo"
|
||||
else:
|
||||
# INFO(ArrayManager) for ArrayManager it doesn't matter if it's
|
||||
# changing the dtype or not
|
||||
df["C"][2] = "foo"
|
||||
assert df.loc[2, "C"] == "foo"
|
||||
|
||||
def test_setting_with_copy_bug(self):
|
||||
|
||||
# operating on a copy
|
||||
df = DataFrame(
|
||||
{"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
|
||||
)
|
||||
mask = pd.isna(df.c)
|
||||
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df[["c"]][mask] = df[["b"]][mask]
|
||||
|
||||
def test_setting_with_copy_bug_no_warning(self):
|
||||
# invalid warning as we are returning a new object
|
||||
# GH 8730
|
||||
df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])})
|
||||
df2 = df1[["x"]]
|
||||
|
||||
# this should not raise
|
||||
df2["y"] = ["g", "h", "i"]
|
||||
|
||||
def test_detect_chained_assignment_warnings_errors(self):
|
||||
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
|
||||
with option_context("chained_assignment", "warn"):
|
||||
with tm.assert_produces_warning(com.SettingWithCopyWarning):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
with option_context("chained_assignment", "raise"):
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
df.loc[0]["A"] = 111
|
||||
|
||||
def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
|
||||
# xref gh-13017.
|
||||
with option_context("chained_assignment", "warn"):
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"])
|
||||
|
||||
with tm.assert_produces_warning(com.SettingWithCopyWarning):
|
||||
df.c.loc[df.c > 0] = None
|
||||
|
||||
expected = DataFrame(
|
||||
[[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"]
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
|
||||
def test_detect_chained_assignment_warning_stacklevel(self, rhs):
|
||||
# GH#42570
|
||||
df = DataFrame(np.arange(25).reshape(5, 5))
|
||||
chained = df.loc[:3]
|
||||
with option_context("chained_assignment", "warn"):
|
||||
with tm.assert_produces_warning(com.SettingWithCopyWarning) as t:
|
||||
chained[2] = rhs
|
||||
assert t[0].filename == __file__
|
||||
|
||||
# TODO(ArrayManager) fast_xs with array-like scalars is not yet working
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_chained_getitem_with_lists(self):
|
||||
|
||||
# GH6394
|
||||
# Regression in chained getitem indexing with embedded list-like from
|
||||
# 0.12
|
||||
|
||||
df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]})
|
||||
expected = df["A"].iloc[2]
|
||||
result = df.loc[2, "A"]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result2 = df.iloc[2]["A"]
|
||||
tm.assert_numpy_array_equal(result2, expected)
|
||||
result3 = df["A"].loc[2]
|
||||
tm.assert_numpy_array_equal(result3, expected)
|
||||
result4 = df["A"].iloc[2]
|
||||
tm.assert_numpy_array_equal(result4, expected)
|
||||
|
||||
def test_cache_updating(self):
|
||||
# GH 4939, make sure to update the cache on setitem
|
||||
|
||||
df = tm.makeDataFrame()
|
||||
df["A"] # cache series
|
||||
df.loc["Hello Friend"] = df.iloc[0]
|
||||
assert "Hello Friend" in df["A"].index
|
||||
assert "Hello Friend" in df["B"].index
|
||||
|
||||
def test_cache_updating2(self):
|
||||
# 10264
|
||||
df = DataFrame(
|
||||
np.zeros((5, 5), dtype="int64"),
|
||||
columns=["a", "b", "c", "d", "e"],
|
||||
index=range(5),
|
||||
)
|
||||
df["f"] = 0
|
||||
df.f.values[3] = 1
|
||||
|
||||
df.f.values[3] = 2
|
||||
expected = DataFrame(
|
||||
np.zeros((5, 6), dtype="int64"),
|
||||
columns=["a", "b", "c", "d", "e", "f"],
|
||||
index=range(5),
|
||||
)
|
||||
expected.at[3, "f"] = 2
|
||||
tm.assert_frame_equal(df, expected)
|
||||
expected = Series([0, 0, 0, 2, 0], name="f")
|
||||
tm.assert_series_equal(df.f, expected)
|
||||
|
||||
def test_iloc_setitem_chained_assignment(self):
|
||||
# GH#3970
|
||||
with option_context("chained_assignment", None):
|
||||
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||||
df["cc"] = 0.0
|
||||
|
||||
ck = [True] * len(df)
|
||||
|
||||
df["bb"].iloc[0] = 0.13
|
||||
|
||||
# GH#3970 this lookup used to break the chained setting to 0.15
|
||||
df.iloc[ck]
|
||||
|
||||
df["bb"].iloc[0] = 0.15
|
||||
assert df["bb"].iloc[0] == 0.15
|
||||
|
||||
def test_getitem_loc_assignment_slice_state(self):
|
||||
# GH 13569
|
||||
df = DataFrame({"a": [10, 20, 30]})
|
||||
df["a"].loc[4] = 40
|
||||
tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]}))
|
||||
tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a"))
|
||||
@@ -0,0 +1,105 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.api.indexers import check_array_indexer
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer, expected",
|
||||
[
|
||||
# integer
|
||||
([1, 2], np.array([1, 2], dtype=np.intp)),
|
||||
(np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)),
|
||||
(pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)),
|
||||
(pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)),
|
||||
# boolean
|
||||
([True, False, True], np.array([True, False, True], dtype=np.bool_)),
|
||||
(np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)),
|
||||
(
|
||||
pd.array([True, False, True], dtype="boolean"),
|
||||
np.array([True, False, True], dtype=np.bool_),
|
||||
),
|
||||
# other
|
||||
([], np.array([], dtype=np.intp)),
|
||||
],
|
||||
)
|
||||
def test_valid_input(indexer, expected):
|
||||
arr = np.array([1, 2, 3])
|
||||
result = check_array_indexer(arr, indexer)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")]
|
||||
)
|
||||
def test_boolean_na_returns_indexer(indexer):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
result = check_array_indexer(arr, indexer)
|
||||
expected = np.array([True, False, False], dtype=bool)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
[True, False],
|
||||
pd.array([True, False], dtype="boolean"),
|
||||
np.array([True, False], dtype=np.bool_),
|
||||
],
|
||||
)
|
||||
def test_bool_raise_length(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "Boolean index has wrong length"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")]
|
||||
)
|
||||
def test_int_raise_missing_values(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "Cannot index with an integer indexer containing NA values"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
[0.0, 1.0],
|
||||
np.array([1.0, 2.0], dtype="float64"),
|
||||
np.array([True, False], dtype=object),
|
||||
pd.Index([True, False], dtype=object),
|
||||
],
|
||||
)
|
||||
def test_raise_invalid_array_dtypes(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "arrays used as indices must be of integer or boolean type"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
def test_raise_nullable_string_dtype(nullable_string_dtype):
|
||||
indexer = pd.array(["a", "b"], dtype=nullable_string_dtype)
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
msg = "arrays used as indices must be of integer or boolean type"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
check_array_indexer(arr, indexer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("indexer", [None, Ellipsis, slice(0, 3), (None,)])
|
||||
def test_pass_through_non_array_likes(indexer):
|
||||
arr = np.array([1, 2, 3])
|
||||
|
||||
result = check_array_indexer(arr, indexer)
|
||||
assert result == indexer
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,154 @@
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
def test_indexing_with_datetime_tz(self):
|
||||
|
||||
# GH#8260
|
||||
# support datetime64 with tz
|
||||
|
||||
idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
|
||||
dr = date_range("20130110", periods=3)
|
||||
df = DataFrame({"A": idx, "B": dr})
|
||||
df["C"] = idx
|
||||
df.iloc[1, 1] = pd.NaT
|
||||
df.iloc[1, 2] = pd.NaT
|
||||
|
||||
expected = Series(
|
||||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT],
|
||||
index=list("ABC"),
|
||||
dtype="object",
|
||||
name=1,
|
||||
)
|
||||
|
||||
# indexing
|
||||
result = df.iloc[1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = df.loc[1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_indexing_fast_xs(self):
|
||||
# indexing - fast_xs
|
||||
df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")})
|
||||
result = df.iloc[5]
|
||||
expected = Series(
|
||||
[Timestamp("2014-01-06 00:00:00+0000", tz="UTC")], index=["a"], name=5
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# indexing - boolean
|
||||
result = df[df.a > df.a[3]]
|
||||
expected = df.iloc[4:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_consistency_with_tz_aware_scalar(self):
|
||||
# xef gh-12938
|
||||
# various ways of indexing the same tz-aware scalar
|
||||
df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame()
|
||||
|
||||
df = pd.concat([df, df]).reset_index(drop=True)
|
||||
expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels")
|
||||
|
||||
result = df[0][0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iloc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.loc[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.iat[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, 0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].loc[0]
|
||||
assert result == expected
|
||||
|
||||
result = df[0].at[0]
|
||||
assert result == expected
|
||||
|
||||
def test_indexing_with_datetimeindex_tz(self, indexer_sl):
|
||||
|
||||
# GH 12050
|
||||
# indexing on a series with a datetimeindex with tz
|
||||
index = date_range("2015-01-01", periods=2, tz="utc")
|
||||
|
||||
ser = Series(range(2), index=index, dtype="int64")
|
||||
|
||||
# list-like indexing
|
||||
|
||||
for sel in (index, list(index)):
|
||||
# getitem
|
||||
result = indexer_sl(ser)[sel]
|
||||
expected = ser.copy()
|
||||
if sel is not index:
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
indexer_sl(result)[sel] = 1
|
||||
expected = Series(1, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# single element indexing
|
||||
|
||||
# getitem
|
||||
assert indexer_sl(ser)[index[1]] == 1
|
||||
|
||||
# setitem
|
||||
result = ser.copy()
|
||||
indexer_sl(result)[index[1]] = 5
|
||||
expected = Series([0, 5], index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_nanosecond_getitem_setitem_with_tz(self):
|
||||
# GH 11679
|
||||
data = ["2016-06-28 08:30:00.123456789"]
|
||||
index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]")
|
||||
df = DataFrame({"a": [10]}, index=index)
|
||||
result = df.loc[df.index[0]]
|
||||
expected = Series(10, index=["a"], name=df.index[0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
result.loc[df.index[0], "a"] = -1
|
||||
expected = DataFrame(-1, index=index, columns=["a"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_str_slice_millisecond_resolution(self, frame_or_series):
|
||||
# GH#33589
|
||||
|
||||
keys = [
|
||||
"2017-10-25T16:25:04.151",
|
||||
"2017-10-25T16:25:04.252",
|
||||
"2017-10-25T16:50:05.237",
|
||||
"2017-10-25T16:50:05.238",
|
||||
]
|
||||
obj = frame_or_series(
|
||||
[1, 2, 3, 4],
|
||||
index=[Timestamp(x) for x in keys],
|
||||
)
|
||||
result = obj[keys[1] : keys[2]]
|
||||
expected = frame_or_series(
|
||||
[2, 3],
|
||||
index=[
|
||||
Timestamp(keys[1]),
|
||||
Timestamp(keys[2]),
|
||||
],
|
||||
)
|
||||
tm.assert_equal(result, expected)
|
||||
@@ -0,0 +1,697 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
RangeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import (
|
||||
Float64Index,
|
||||
Int64Index,
|
||||
)
|
||||
|
||||
|
||||
def gen_obj(klass, index):
|
||||
if klass is Series:
|
||||
obj = Series(np.arange(len(index)), index=index)
|
||||
else:
|
||||
obj = DataFrame(
|
||||
np.random.randn(len(index), len(index)), index=index, columns=index
|
||||
)
|
||||
return obj
|
||||
|
||||
|
||||
class TestFloatIndexers:
|
||||
def check(self, result, original, indexer, getitem):
|
||||
"""
|
||||
comparator for results
|
||||
we need to take care if we are indexing on a
|
||||
Series or a frame
|
||||
"""
|
||||
if isinstance(original, Series):
|
||||
expected = original.iloc[indexer]
|
||||
else:
|
||||
if getitem:
|
||||
expected = original.iloc[:, indexer]
|
||||
else:
|
||||
expected = original.iloc[indexer]
|
||||
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_func",
|
||||
[
|
||||
tm.makeStringIndex,
|
||||
tm.makeUnicodeIndex,
|
||||
tm.makeCategoricalIndex,
|
||||
tm.makeDateIndex,
|
||||
tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex,
|
||||
],
|
||||
)
|
||||
def test_scalar_non_numeric(self, index_func, frame_or_series, indexer_sl):
|
||||
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
i = index_func(5)
|
||||
s = gen_obj(frame_or_series, i)
|
||||
|
||||
# getting
|
||||
with pytest.raises(KeyError, match="^3.0$"):
|
||||
indexer_sl(s)[3.0]
|
||||
|
||||
# contains
|
||||
assert 3.0 not in s
|
||||
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[3.0] = 10
|
||||
|
||||
if indexer_sl is tm.setitem:
|
||||
assert 3.0 in s2.axes[-1]
|
||||
elif indexer_sl is tm.loc:
|
||||
assert 3.0 in s2.axes[0]
|
||||
else:
|
||||
assert 3.0 not in s2.axes[0]
|
||||
assert 3.0 not in s2.axes[-1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_func",
|
||||
[
|
||||
tm.makeStringIndex,
|
||||
tm.makeUnicodeIndex,
|
||||
tm.makeCategoricalIndex,
|
||||
tm.makeDateIndex,
|
||||
tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex,
|
||||
],
|
||||
)
|
||||
def test_scalar_non_numeric_series_fallback(self, index_func):
|
||||
# fallsback to position selection, series only
|
||||
i = index_func(5)
|
||||
s = Series(np.arange(len(i)), index=i)
|
||||
s[3]
|
||||
with pytest.raises(KeyError, match="^3.0$"):
|
||||
s[3.0]
|
||||
|
||||
def test_scalar_with_mixed(self, indexer_sl):
|
||||
|
||||
s2 = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
s3 = Series([1, 2, 3], index=["a", "b", 1.5])
|
||||
|
||||
# lookup in a pure string index with an invalid indexer
|
||||
|
||||
with pytest.raises(KeyError, match="^1.0$"):
|
||||
indexer_sl(s2)[1.0]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^1\.0$"):
|
||||
indexer_sl(s2)[1.0]
|
||||
|
||||
result = indexer_sl(s2)["b"]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
# mixed index so we have label
|
||||
# indexing
|
||||
with pytest.raises(KeyError, match="^1.0$"):
|
||||
indexer_sl(s3)[1.0]
|
||||
|
||||
if indexer_sl is not tm.loc:
|
||||
# __getitem__ falls back to positional
|
||||
result = s3[1]
|
||||
expected = 2
|
||||
assert result == expected
|
||||
|
||||
with pytest.raises(KeyError, match=r"^1\.0$"):
|
||||
indexer_sl(s3)[1.0]
|
||||
|
||||
result = indexer_sl(s3)[1.5]
|
||||
expected = 3
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
|
||||
def test_scalar_integer(self, index_func, frame_or_series, indexer_sl):
|
||||
getitem = indexer_sl is not tm.loc
|
||||
|
||||
# test how scalar float indexers work on int indexes
|
||||
|
||||
# integer index
|
||||
i = index_func(5)
|
||||
obj = gen_obj(frame_or_series, i)
|
||||
|
||||
# coerce to equal int
|
||||
|
||||
result = indexer_sl(obj)[3.0]
|
||||
self.check(result, obj, 3, getitem)
|
||||
|
||||
if isinstance(obj, Series):
|
||||
|
||||
def compare(x, y):
|
||||
assert x == y
|
||||
|
||||
expected = 100
|
||||
else:
|
||||
compare = tm.assert_series_equal
|
||||
if getitem:
|
||||
expected = Series(100, index=range(len(obj)), name=3)
|
||||
else:
|
||||
expected = Series(100.0, index=range(len(obj)), name=3)
|
||||
|
||||
s2 = obj.copy()
|
||||
indexer_sl(s2)[3.0] = 100
|
||||
|
||||
result = indexer_sl(s2)[3.0]
|
||||
compare(result, expected)
|
||||
|
||||
result = indexer_sl(s2)[3]
|
||||
compare(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
|
||||
def test_scalar_integer_contains_float(self, index_func, frame_or_series):
|
||||
# contains
|
||||
# integer index
|
||||
index = index_func(5)
|
||||
obj = gen_obj(frame_or_series, index)
|
||||
|
||||
# coerce to equal int
|
||||
assert 3.0 in obj
|
||||
|
||||
def test_scalar_float(self, frame_or_series):
|
||||
|
||||
# scalar float indexers work on a float index
|
||||
index = Index(np.arange(5.0))
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
# assert all operations except for iloc are ok
|
||||
indexer = index[3]
|
||||
for idxr in [tm.loc, tm.setitem]:
|
||||
getitem = idxr is not tm.loc
|
||||
|
||||
# getting
|
||||
result = idxr(s)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# setting
|
||||
s2 = s.copy()
|
||||
|
||||
result = idxr(s2)[indexer]
|
||||
self.check(result, s, 3, getitem)
|
||||
|
||||
# random float is a KeyError
|
||||
with pytest.raises(KeyError, match=r"^3\.5$"):
|
||||
idxr(s)[3.5]
|
||||
|
||||
# contains
|
||||
assert 3.0 in s
|
||||
|
||||
# iloc succeeds with an integer
|
||||
expected = s.iloc[3]
|
||||
s2 = s.copy()
|
||||
|
||||
s2.iloc[3] = expected
|
||||
result = s2.iloc[3]
|
||||
self.check(result, s, 3, False)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_func",
|
||||
[
|
||||
tm.makeStringIndex,
|
||||
tm.makeUnicodeIndex,
|
||||
tm.makeDateIndex,
|
||||
tm.makeTimedeltaIndex,
|
||||
tm.makePeriodIndex,
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
def test_slice_non_numeric(self, index_func, idx, frame_or_series, indexer_sli):
|
||||
|
||||
# GH 4892
|
||||
# float_indexers should raise exceptions
|
||||
# on appropriate Index types & accessors
|
||||
|
||||
index = index_func(5)
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
# getitem
|
||||
if indexer_sli is tm.iloc:
|
||||
msg = (
|
||||
"cannot do positional indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
else:
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers "
|
||||
r"\[(3|4)(\.0)?\] "
|
||||
r"of type (float|int)"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indexer_sli(s)[idx]
|
||||
|
||||
# setitem
|
||||
if indexer_sli is tm.iloc:
|
||||
# otherwise we keep the same message as above
|
||||
msg = "slice indices must be integers or None or have an __index__ method"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
indexer_sli(s)[idx] = 0
|
||||
|
||||
def test_slice_integer(self):
|
||||
|
||||
# same as above, but for Integer based indexes
|
||||
# these coerce to a like integer
|
||||
# oob indicates if we are out of bounds
|
||||
# of positional indexing
|
||||
for index, oob in [
|
||||
(Int64Index(range(5)), False),
|
||||
(RangeIndex(5), False),
|
||||
(Int64Index(range(5)) + 10, True),
|
||||
]:
|
||||
|
||||
# s is an in-range index
|
||||
s = Series(range(5), index=index)
|
||||
|
||||
# getitem
|
||||
for idx in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
|
||||
|
||||
result = s.loc[idx]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(3, 5)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# getitem out-of-bounds
|
||||
for idx in [slice(-6, 6), slice(-6.0, 6.0)]:
|
||||
|
||||
result = s.loc[idx]
|
||||
|
||||
# these are all label indexing
|
||||
# except getitem which is positional
|
||||
# empty
|
||||
if oob:
|
||||
indexer = slice(0, 0)
|
||||
else:
|
||||
indexer = slice(-6, 6)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[-6\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[slice(-6.0, 6.0)]
|
||||
|
||||
# getitem odd floats
|
||||
for idx, res1 in [
|
||||
(slice(2.5, 4), slice(3, 5)),
|
||||
(slice(2, 3.5), slice(2, 4)),
|
||||
(slice(2.5, 3.5), slice(3, 4)),
|
||||
]:
|
||||
|
||||
result = s.loc[idx]
|
||||
if oob:
|
||||
res = slice(0, 0)
|
||||
else:
|
||||
res = res1
|
||||
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(2|3)\.5\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)])
|
||||
def test_integer_positional_indexing(self, idx):
|
||||
"""make sure that we are raising on positional indexing
|
||||
w.r.t. an integer index
|
||||
"""
|
||||
s = Series(range(2, 6), index=range(2, 6))
|
||||
|
||||
result = s[2:4]
|
||||
expected = s.iloc[2:4]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
klass = RangeIndex
|
||||
msg = (
|
||||
"cannot do (slice|positional) indexing "
|
||||
rf"on {klass.__name__} with these indexers \[(2|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.iloc[idx]
|
||||
|
||||
@pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
|
||||
def test_slice_integer_frame_getitem(self, index_func):
|
||||
|
||||
# similar to above, but on the getitem dim (of a DataFrame)
|
||||
index = index_func(5)
|
||||
|
||||
s = DataFrame(np.random.randn(5, 2), index=index)
|
||||
|
||||
# getitem
|
||||
for idx in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:
|
||||
|
||||
result = s.loc[idx]
|
||||
indexer = slice(0, 2)
|
||||
self.check(result, s, indexer, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(0|1)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
# getitem out-of-bounds
|
||||
for idx in [slice(-10, 10), slice(-10.0, 10.0)]:
|
||||
|
||||
result = s.loc[idx]
|
||||
self.check(result, s, slice(-10, 10), True)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[-10\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[slice(-10.0, 10.0)]
|
||||
|
||||
# getitem odd floats
|
||||
for idx, res in [
|
||||
(slice(0.5, 1), slice(1, 2)),
|
||||
(slice(0, 0.5), slice(0, 1)),
|
||||
(slice(0.5, 1.5), slice(1, 2)),
|
||||
]:
|
||||
|
||||
result = s.loc[idx]
|
||||
self.check(result, s, res, False)
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[0\.5\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
@pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex])
|
||||
def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func):
|
||||
|
||||
# similar to above, but on the getitem dim (of a DataFrame)
|
||||
index = index_func(5)
|
||||
|
||||
s = DataFrame(np.random.randn(5, 2), index=index)
|
||||
|
||||
# setitem
|
||||
sc = s.copy()
|
||||
sc.loc[idx] = 0
|
||||
result = sc.loc[idx].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
# positional indexing
|
||||
msg = (
|
||||
"cannot do slice indexing "
|
||||
rf"on {type(index).__name__} with these indexers \[(3|4)\.0\] of "
|
||||
"type float"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx] = 0
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s[idx]
|
||||
|
||||
@pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)])
|
||||
def test_slice_float(self, idx, frame_or_series, indexer_sl):
|
||||
|
||||
# same as above, but for floats
|
||||
index = Index(np.arange(5.0)) + 0.1
|
||||
s = gen_obj(frame_or_series, index)
|
||||
|
||||
expected = s.iloc[3:4]
|
||||
|
||||
# getitem
|
||||
result = indexer_sl(s)[idx]
|
||||
assert isinstance(result, type(s))
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# setitem
|
||||
s2 = s.copy()
|
||||
indexer_sl(s2)[idx] = 0
|
||||
result = indexer_sl(s2)[idx].values.ravel()
|
||||
assert (result == 0).all()
|
||||
|
||||
def test_floating_index_doc_example(self):
|
||||
|
||||
index = Index([1.5, 2, 3, 4.5, 5])
|
||||
s = Series(range(5), index=index)
|
||||
assert s[3] == 2
|
||||
assert s.loc[3] == 2
|
||||
assert s.iloc[3] == 3
|
||||
|
||||
def test_floating_misc(self, indexer_sl):
|
||||
|
||||
# related 236
|
||||
# scalar/slicing of a float index
|
||||
s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
|
||||
|
||||
# label based slicing
|
||||
result = indexer_sl(s)[1.0:3.0]
|
||||
expected = Series(1, index=[2.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# exact indexing when found
|
||||
|
||||
result = indexer_sl(s)[5.0]
|
||||
assert result == 2
|
||||
|
||||
result = indexer_sl(s)[5]
|
||||
assert result == 2
|
||||
|
||||
# value not found (and no fallbacking at all)
|
||||
|
||||
# scalar integers
|
||||
with pytest.raises(KeyError, match=r"^4$"):
|
||||
indexer_sl(s)[4]
|
||||
|
||||
# fancy floats/integers create the correct entry (as nan)
|
||||
# fancy tests
|
||||
expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
|
||||
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
|
||||
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
|
||||
|
||||
expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
|
||||
for fancy_idx in [[5, 0], np.array([5, 0])]:
|
||||
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
|
||||
|
||||
# all should return the same as we are slicing 'the same'
|
||||
result1 = indexer_sl(s)[2:5]
|
||||
result2 = indexer_sl(s)[2.0:5.0]
|
||||
result3 = indexer_sl(s)[2.0:5]
|
||||
result4 = indexer_sl(s)[2.1:5]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
tm.assert_series_equal(result1, result3)
|
||||
tm.assert_series_equal(result1, result4)
|
||||
|
||||
expected = Series([1, 2], index=[2.5, 5.0])
|
||||
result = indexer_sl(s)[2:5]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list selection
|
||||
result1 = indexer_sl(s)[[0.0, 5, 10]]
|
||||
result2 = s.iloc[[0, 2, 4]]
|
||||
tm.assert_series_equal(result1, result2)
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
indexer_sl(s)[[1.6, 5, 10]]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
indexer_sl(s)[[0, 1, 2]]
|
||||
|
||||
result = indexer_sl(s)[[2.5, 5]]
|
||||
tm.assert_series_equal(result, Series([1, 2], index=[2.5, 5.0]))
|
||||
|
||||
result = indexer_sl(s)[[2.5]]
|
||||
tm.assert_series_equal(result, Series([1], index=[2.5]))
|
||||
|
||||
def test_float64index_slicing_bug(self):
|
||||
# GH 5557, related to slicing a float index
|
||||
ser = {
|
||||
256: 2321.0,
|
||||
1: 78.0,
|
||||
2: 2716.0,
|
||||
3: 0.0,
|
||||
4: 369.0,
|
||||
5: 0.0,
|
||||
6: 269.0,
|
||||
7: 0.0,
|
||||
8: 0.0,
|
||||
9: 0.0,
|
||||
10: 3536.0,
|
||||
11: 0.0,
|
||||
12: 24.0,
|
||||
13: 0.0,
|
||||
14: 931.0,
|
||||
15: 0.0,
|
||||
16: 101.0,
|
||||
17: 78.0,
|
||||
18: 9643.0,
|
||||
19: 0.0,
|
||||
20: 0.0,
|
||||
21: 0.0,
|
||||
22: 63761.0,
|
||||
23: 0.0,
|
||||
24: 446.0,
|
||||
25: 0.0,
|
||||
26: 34773.0,
|
||||
27: 0.0,
|
||||
28: 729.0,
|
||||
29: 78.0,
|
||||
30: 0.0,
|
||||
31: 0.0,
|
||||
32: 3374.0,
|
||||
33: 0.0,
|
||||
34: 1391.0,
|
||||
35: 0.0,
|
||||
36: 361.0,
|
||||
37: 0.0,
|
||||
38: 61808.0,
|
||||
39: 0.0,
|
||||
40: 0.0,
|
||||
41: 0.0,
|
||||
42: 6677.0,
|
||||
43: 0.0,
|
||||
44: 802.0,
|
||||
45: 0.0,
|
||||
46: 2691.0,
|
||||
47: 0.0,
|
||||
48: 3582.0,
|
||||
49: 0.0,
|
||||
50: 734.0,
|
||||
51: 0.0,
|
||||
52: 627.0,
|
||||
53: 70.0,
|
||||
54: 2584.0,
|
||||
55: 0.0,
|
||||
56: 324.0,
|
||||
57: 0.0,
|
||||
58: 605.0,
|
||||
59: 0.0,
|
||||
60: 0.0,
|
||||
61: 0.0,
|
||||
62: 3989.0,
|
||||
63: 10.0,
|
||||
64: 42.0,
|
||||
65: 0.0,
|
||||
66: 904.0,
|
||||
67: 0.0,
|
||||
68: 88.0,
|
||||
69: 70.0,
|
||||
70: 8172.0,
|
||||
71: 0.0,
|
||||
72: 0.0,
|
||||
73: 0.0,
|
||||
74: 64902.0,
|
||||
75: 0.0,
|
||||
76: 347.0,
|
||||
77: 0.0,
|
||||
78: 36605.0,
|
||||
79: 0.0,
|
||||
80: 379.0,
|
||||
81: 70.0,
|
||||
82: 0.0,
|
||||
83: 0.0,
|
||||
84: 3001.0,
|
||||
85: 0.0,
|
||||
86: 1630.0,
|
||||
87: 7.0,
|
||||
88: 364.0,
|
||||
89: 0.0,
|
||||
90: 67404.0,
|
||||
91: 9.0,
|
||||
92: 0.0,
|
||||
93: 0.0,
|
||||
94: 7685.0,
|
||||
95: 0.0,
|
||||
96: 1017.0,
|
||||
97: 0.0,
|
||||
98: 2831.0,
|
||||
99: 0.0,
|
||||
100: 2963.0,
|
||||
101: 0.0,
|
||||
102: 854.0,
|
||||
103: 0.0,
|
||||
104: 0.0,
|
||||
105: 0.0,
|
||||
106: 0.0,
|
||||
107: 0.0,
|
||||
108: 0.0,
|
||||
109: 0.0,
|
||||
110: 0.0,
|
||||
111: 0.0,
|
||||
112: 0.0,
|
||||
113: 0.0,
|
||||
114: 0.0,
|
||||
115: 0.0,
|
||||
116: 0.0,
|
||||
117: 0.0,
|
||||
118: 0.0,
|
||||
119: 0.0,
|
||||
120: 0.0,
|
||||
121: 0.0,
|
||||
122: 0.0,
|
||||
123: 0.0,
|
||||
124: 0.0,
|
||||
125: 0.0,
|
||||
126: 67744.0,
|
||||
127: 22.0,
|
||||
128: 264.0,
|
||||
129: 0.0,
|
||||
260: 197.0,
|
||||
268: 0.0,
|
||||
265: 0.0,
|
||||
269: 0.0,
|
||||
261: 0.0,
|
||||
266: 1198.0,
|
||||
267: 0.0,
|
||||
262: 2629.0,
|
||||
258: 775.0,
|
||||
257: 0.0,
|
||||
263: 0.0,
|
||||
259: 0.0,
|
||||
264: 163.0,
|
||||
250: 10326.0,
|
||||
251: 0.0,
|
||||
252: 1228.0,
|
||||
253: 0.0,
|
||||
254: 2769.0,
|
||||
255: 0.0,
|
||||
}
|
||||
|
||||
# smoke test for the repr
|
||||
s = Series(ser)
|
||||
result = s.value_counts()
|
||||
str(result)
|
||||
@@ -0,0 +1,48 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
period_range,
|
||||
)
|
||||
|
||||
|
||||
def test_iat(float_frame):
|
||||
|
||||
for i, row in enumerate(float_frame.index):
|
||||
for j, col in enumerate(float_frame.columns):
|
||||
result = float_frame.iat[i, j]
|
||||
expected = float_frame.at[row, col]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_iat_duplicate_columns():
|
||||
# https://github.com/pandas-dev/pandas/issues/11754
|
||||
df = DataFrame([[1, 2]], columns=["x", "x"])
|
||||
assert df.iat[0, 0] == 1
|
||||
|
||||
|
||||
def test_iat_getitem_series_with_period_index():
|
||||
# GH#4390, iat incorrectly indexing
|
||||
index = period_range("1/1/2001", periods=10)
|
||||
ser = Series(np.random.randn(10), index=index)
|
||||
expected = ser[index[0]]
|
||||
result = ser.iat[0]
|
||||
assert expected == result
|
||||
|
||||
|
||||
def test_iat_setitem_item_cache_cleared(indexer_ial):
|
||||
# GH#45684
|
||||
data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)}
|
||||
df = DataFrame(data).copy()
|
||||
ser = df["y"]
|
||||
|
||||
# previously this iat setting would split the block and fail to clear
|
||||
# the item_cache.
|
||||
indexer_ial(df)[7, 0] = 9999
|
||||
|
||||
indexer_ial(df)[7, 1] = 1234
|
||||
|
||||
assert df.iat[7, 1] == 1234
|
||||
assert ser.iloc[-1] == 1234
|
||||
assert df.iloc[-1, -1] == 1234
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,61 @@
|
||||
# Tests aimed at pandas.core.indexers
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.indexers import (
|
||||
is_scalar_indexer,
|
||||
length_of_indexer,
|
||||
validate_indices,
|
||||
)
|
||||
|
||||
|
||||
def test_length_of_indexer():
|
||||
arr = np.zeros(4, dtype=bool)
|
||||
arr[0] = 1
|
||||
result = length_of_indexer(arr)
|
||||
assert result == 1
|
||||
|
||||
|
||||
def test_is_scalar_indexer():
|
||||
indexer = (0, 1)
|
||||
assert is_scalar_indexer(indexer, 2)
|
||||
assert not is_scalar_indexer(indexer[0], 2)
|
||||
|
||||
indexer = (np.array([2]), 1)
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
indexer = (np.array([2]), np.array([3]))
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
indexer = (np.array([2]), np.array([3, 4]))
|
||||
assert not is_scalar_indexer(indexer, 2)
|
||||
|
||||
assert not is_scalar_indexer(slice(None), 1)
|
||||
|
||||
indexer = 0
|
||||
assert is_scalar_indexer(indexer, 1)
|
||||
|
||||
indexer = (0,)
|
||||
assert is_scalar_indexer(indexer, 1)
|
||||
|
||||
|
||||
class TestValidateIndices:
|
||||
def test_validate_indices_ok(self):
|
||||
indices = np.asarray([0, 1])
|
||||
validate_indices(indices, 2)
|
||||
validate_indices(indices[:0], 0)
|
||||
validate_indices(np.array([-1, -1]), 0)
|
||||
|
||||
def test_validate_indices_low(self):
|
||||
indices = np.asarray([0, -2])
|
||||
with pytest.raises(ValueError, match="'indices' contains"):
|
||||
validate_indices(indices, 2)
|
||||
|
||||
def test_validate_indices_high(self):
|
||||
indices = np.asarray([0, 1, 2])
|
||||
with pytest.raises(IndexError, match="indices are out"):
|
||||
validate_indices(indices, 2)
|
||||
|
||||
def test_validate_indices_empty(self):
|
||||
with pytest.raises(IndexError, match="indices are out"):
|
||||
validate_indices(np.array([0, 1]), 0)
|
||||
@@ -0,0 +1,987 @@
|
||||
""" test fancy indexing & misc """
|
||||
|
||||
from datetime import datetime
|
||||
import re
|
||||
import weakref
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_float_dtype,
|
||||
is_integer_dtype,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
NaT,
|
||||
Series,
|
||||
date_range,
|
||||
offsets,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import Float64Index
|
||||
from pandas.tests.indexing.common import _mklbl
|
||||
from pandas.tests.indexing.test_floats import gen_obj
|
||||
|
||||
# ------------------------------------------------------------------------
|
||||
# Indexing test cases
|
||||
|
||||
|
||||
class TestFancy:
|
||||
"""pure get/set item & fancy indexing"""
|
||||
|
||||
def test_setitem_ndarray_1d(self):
|
||||
# GH5508
|
||||
|
||||
# len of indexer vs length of the 1d ndarray
|
||||
df = DataFrame(index=Index(np.arange(1, 11)))
|
||||
df["foo"] = np.zeros(10, dtype=np.float64)
|
||||
df["bar"] = np.zeros(10, dtype=complex)
|
||||
|
||||
# invalid
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[df.index[2:5], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])
|
||||
|
||||
# valid
|
||||
df.loc[df.index[2:6], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0])
|
||||
|
||||
result = df.loc[df.index[2:6], "bar"]
|
||||
expected = Series(
|
||||
[2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name="bar"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_setitem_ndarray_1d_2(self):
|
||||
# GH5508
|
||||
|
||||
# dtype getting changed?
|
||||
df = DataFrame(index=Index(np.arange(1, 11)))
|
||||
df["foo"] = np.zeros(10, dtype=np.float64)
|
||||
df["bar"] = np.zeros(10, dtype=complex)
|
||||
|
||||
msg = "Must have equal len keys and value when setting with an iterable"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df[2:5] = np.arange(1, 4) * 1j
|
||||
|
||||
def test_getitem_ndarray_3d(
|
||||
self, index, frame_or_series, indexer_sli, using_array_manager
|
||||
):
|
||||
# GH 25567
|
||||
obj = gen_obj(frame_or_series, index)
|
||||
idxr = indexer_sli(obj)
|
||||
nd3 = np.random.randint(5, size=(2, 2, 2))
|
||||
|
||||
msgs = []
|
||||
if frame_or_series is Series and indexer_sli in [tm.setitem, tm.iloc]:
|
||||
msgs.append(r"Wrong number of dimensions. values.ndim > ndim \[3 > 1\]")
|
||||
if using_array_manager:
|
||||
msgs.append("Passed array should be 1-dimensional")
|
||||
if frame_or_series is Series or indexer_sli is tm.iloc:
|
||||
msgs.append(r"Buffer has wrong number of dimensions \(expected 1, got 3\)")
|
||||
if using_array_manager:
|
||||
msgs.append("indexer should be 1-dimensional")
|
||||
if indexer_sli is tm.loc or (
|
||||
frame_or_series is Series and indexer_sli is tm.setitem
|
||||
):
|
||||
msgs.append("Cannot index with multidimensional key")
|
||||
if frame_or_series is DataFrame and indexer_sli is tm.setitem:
|
||||
msgs.append("Index data must be 1-dimensional")
|
||||
if isinstance(index, pd.IntervalIndex) and indexer_sli is tm.iloc:
|
||||
msgs.append("Index data must be 1-dimensional")
|
||||
if isinstance(index, (pd.TimedeltaIndex, pd.DatetimeIndex, pd.PeriodIndex)):
|
||||
msgs.append("Data must be 1-dimensional")
|
||||
if len(index) == 0 or isinstance(index, pd.MultiIndex):
|
||||
msgs.append("positional indexers are out-of-bounds")
|
||||
if type(index) is Index and not isinstance(index._values, np.ndarray):
|
||||
# e.g. Int64
|
||||
msgs.append("values must be a 1D array")
|
||||
|
||||
# string[pyarrow]
|
||||
msgs.append("only handle 1-dimensional arrays")
|
||||
|
||||
msg = "|".join(msgs)
|
||||
|
||||
potential_errors = (IndexError, ValueError, NotImplementedError)
|
||||
with pytest.raises(potential_errors, match=msg):
|
||||
idxr[nd3]
|
||||
|
||||
def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli):
|
||||
# GH 25567
|
||||
obj = gen_obj(frame_or_series, index)
|
||||
idxr = indexer_sli(obj)
|
||||
nd3 = np.random.randint(5, size=(2, 2, 2))
|
||||
|
||||
if indexer_sli is tm.iloc:
|
||||
err = ValueError
|
||||
msg = f"Cannot set values with ndim > {obj.ndim}"
|
||||
else:
|
||||
err = ValueError
|
||||
msg = "|".join(
|
||||
[
|
||||
r"Buffer has wrong number of dimensions \(expected 1, got 3\)",
|
||||
"Cannot set values with ndim > 1",
|
||||
"Index data must be 1-dimensional",
|
||||
"Data must be 1-dimensional",
|
||||
"Array conditional must be same shape as self",
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(err, match=msg):
|
||||
idxr[nd3] = 0
|
||||
|
||||
def test_getitem_ndarray_0d(self):
|
||||
# GH#24924
|
||||
key = np.array(0)
|
||||
|
||||
# dataframe __getitem__
|
||||
df = DataFrame([[1, 2], [3, 4]])
|
||||
result = df[key]
|
||||
expected = Series([1, 3], name=0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# series __getitem__
|
||||
ser = Series([1, 2])
|
||||
result = ser[key]
|
||||
assert result == 1
|
||||
|
||||
def test_inf_upcast(self):
|
||||
# GH 16957
|
||||
# We should be able to use np.inf as a key
|
||||
# np.inf should cause an index to convert to float
|
||||
|
||||
# Test with np.inf in rows
|
||||
df = DataFrame(columns=[0])
|
||||
df.loc[1] = 1
|
||||
df.loc[2] = 2
|
||||
df.loc[np.inf] = 3
|
||||
|
||||
# make sure we can look up the value
|
||||
assert df.loc[np.inf, 0] == 3
|
||||
|
||||
result = df.index
|
||||
expected = Float64Index([1, 2, np.inf])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_setitem_dtype_upcast(self):
|
||||
|
||||
# GH3216
|
||||
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
|
||||
df["c"] = np.nan
|
||||
assert df["c"].dtype == np.float64
|
||||
|
||||
df.loc[0, "c"] = "foo"
|
||||
expected = DataFrame(
|
||||
[{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize("val", [3.14, "wxyz"])
|
||||
def test_setitem_dtype_upcast2(self, val):
|
||||
|
||||
# GH10280
|
||||
df = DataFrame(
|
||||
np.arange(6, dtype="int64").reshape(2, 3),
|
||||
index=list("ab"),
|
||||
columns=["foo", "bar", "baz"],
|
||||
)
|
||||
|
||||
left = df.copy()
|
||||
left.loc["a", "bar"] = val
|
||||
right = DataFrame(
|
||||
[[0, val, 2], [3, 4, 5]],
|
||||
index=list("ab"),
|
||||
columns=["foo", "bar", "baz"],
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(left, right)
|
||||
assert is_integer_dtype(left["foo"])
|
||||
assert is_integer_dtype(left["baz"])
|
||||
|
||||
def test_setitem_dtype_upcast3(self):
|
||||
left = DataFrame(
|
||||
np.arange(6, dtype="int64").reshape(2, 3) / 10.0,
|
||||
index=list("ab"),
|
||||
columns=["foo", "bar", "baz"],
|
||||
)
|
||||
left.loc["a", "bar"] = "wxyz"
|
||||
|
||||
right = DataFrame(
|
||||
[[0, "wxyz", 0.2], [0.3, 0.4, 0.5]],
|
||||
index=list("ab"),
|
||||
columns=["foo", "bar", "baz"],
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(left, right)
|
||||
assert is_float_dtype(left["foo"])
|
||||
assert is_float_dtype(left["baz"])
|
||||
|
||||
def test_dups_fancy_indexing(self):
|
||||
|
||||
# GH 3455
|
||||
|
||||
df = tm.makeCustomDataframe(10, 3)
|
||||
df.columns = ["a", "a", "b"]
|
||||
result = df[["b", "a"]].columns
|
||||
expected = Index(["b", "a", "a"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_dups_fancy_indexing_across_dtypes(self):
|
||||
|
||||
# across dtypes
|
||||
df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa"))
|
||||
df.head()
|
||||
str(df)
|
||||
result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]])
|
||||
result.columns = list("aaaaaaa") # GH#3468
|
||||
|
||||
# GH#3509 smoke tests for indexing with duplicate columns
|
||||
df.iloc[:, 4]
|
||||
result.iloc[:, 4]
|
||||
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_dups_fancy_indexing_not_in_order(self):
|
||||
# GH 3561, dups not in selected order
|
||||
df = DataFrame(
|
||||
{"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")},
|
||||
index=["A", "A", "B", "C"],
|
||||
)
|
||||
rows = ["C", "B"]
|
||||
expected = DataFrame(
|
||||
{"test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"]}, index=rows
|
||||
)
|
||||
result = df.loc[rows]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[Index(rows)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
rows = ["C", "B", "E"]
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[rows]
|
||||
|
||||
# see GH5553, make sure we use the right indexer
|
||||
rows = ["F", "G", "H", "C", "B", "E"]
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[rows]
|
||||
|
||||
def test_dups_fancy_indexing_only_missing_label(self):
|
||||
|
||||
# List containing only missing label
|
||||
dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
"\"None of [Index(['E'], dtype='object')] are in the [index]\""
|
||||
),
|
||||
):
|
||||
dfnu.loc[["E"]]
|
||||
|
||||
@pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")])
|
||||
def test_dups_fancy_indexing_missing_label(self, vals):
|
||||
|
||||
# GH 4619; duplicate indexer with missing label
|
||||
df = DataFrame({"A": vals})
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[[0, 8, 0]]
|
||||
|
||||
def test_dups_fancy_indexing_non_unique(self):
|
||||
|
||||
# non unique with non unique selector
|
||||
df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[["A", "A", "E"]]
|
||||
|
||||
def test_dups_fancy_indexing2(self):
|
||||
# GH 5835
|
||||
# dups on index and missing values
|
||||
df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"])
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
df.loc[:, ["A", "B", "C"]]
|
||||
|
||||
def test_dups_fancy_indexing3(self):
|
||||
|
||||
# GH 6504, multi-axis indexing
|
||||
df = DataFrame(
|
||||
np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"]
|
||||
)
|
||||
|
||||
expected = df.iloc[0:6]
|
||||
result = df.loc[[1, 2]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df
|
||||
result = df.loc[:, ["a", "b"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df.iloc[0:6, :]
|
||||
result = df.loc[[1, 2], ["a", "b"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_duplicate_int_indexing(self, indexer_sl):
|
||||
# GH 17347
|
||||
ser = Series(range(3), index=[1, 1, 3])
|
||||
expected = Series(range(2), index=[1, 1])
|
||||
result = indexer_sl(ser)[[1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_indexing_mixed_frame_bug(self):
|
||||
|
||||
# GH3492
|
||||
df = DataFrame(
|
||||
{"a": {1: "aaa", 2: "bbb", 3: "ccc"}, "b": {1: 111, 2: 222, 3: 333}}
|
||||
)
|
||||
|
||||
# this works, new column is created correctly
|
||||
df["test"] = df["a"].apply(lambda x: "_" if x == "aaa" else x)
|
||||
|
||||
# this does not work, ie column test is not changed
|
||||
idx = df["test"] == "_"
|
||||
temp = df.loc[idx, "a"].apply(lambda x: "-----" if x == "aaa" else x)
|
||||
df.loc[idx, "test"] = temp
|
||||
assert df.iloc[0, 2] == "-----"
|
||||
|
||||
def test_multitype_list_index_access(self):
|
||||
# GH 10610
|
||||
df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23])
|
||||
|
||||
with pytest.raises(KeyError, match=re.escape("'[26, -8] not in index'")):
|
||||
df[[22, 26, -8]]
|
||||
assert df[21].shape[0] == df.shape[0]
|
||||
|
||||
def test_set_index_nan(self):
|
||||
|
||||
# GH 3586
|
||||
df = DataFrame(
|
||||
{
|
||||
"PRuid": {
|
||||
17: "nonQC",
|
||||
18: "nonQC",
|
||||
19: "nonQC",
|
||||
20: "10",
|
||||
21: "11",
|
||||
22: "12",
|
||||
23: "13",
|
||||
24: "24",
|
||||
25: "35",
|
||||
26: "46",
|
||||
27: "47",
|
||||
28: "48",
|
||||
29: "59",
|
||||
30: "10",
|
||||
},
|
||||
"QC": {
|
||||
17: 0.0,
|
||||
18: 0.0,
|
||||
19: 0.0,
|
||||
20: np.nan,
|
||||
21: np.nan,
|
||||
22: np.nan,
|
||||
23: np.nan,
|
||||
24: 1.0,
|
||||
25: np.nan,
|
||||
26: np.nan,
|
||||
27: np.nan,
|
||||
28: np.nan,
|
||||
29: np.nan,
|
||||
30: np.nan,
|
||||
},
|
||||
"data": {
|
||||
17: 7.9544899999999998,
|
||||
18: 8.0142609999999994,
|
||||
19: 7.8591520000000008,
|
||||
20: 0.86140349999999999,
|
||||
21: 0.87853110000000001,
|
||||
22: 0.8427041999999999,
|
||||
23: 0.78587700000000005,
|
||||
24: 0.73062459999999996,
|
||||
25: 0.81668560000000001,
|
||||
26: 0.81927080000000008,
|
||||
27: 0.80705009999999999,
|
||||
28: 0.81440240000000008,
|
||||
29: 0.80140849999999997,
|
||||
30: 0.81307740000000006,
|
||||
},
|
||||
"year": {
|
||||
17: 2006,
|
||||
18: 2007,
|
||||
19: 2008,
|
||||
20: 1985,
|
||||
21: 1985,
|
||||
22: 1985,
|
||||
23: 1985,
|
||||
24: 1985,
|
||||
25: 1985,
|
||||
26: 1985,
|
||||
27: 1985,
|
||||
28: 1985,
|
||||
29: 1985,
|
||||
30: 1986,
|
||||
},
|
||||
}
|
||||
).reset_index()
|
||||
|
||||
result = (
|
||||
df.set_index(["year", "PRuid", "QC"])
|
||||
.reset_index()
|
||||
.reindex(columns=df.columns)
|
||||
)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_multi_assign(self):
|
||||
|
||||
# GH 3626, an assignment of a sub-df to a df
|
||||
df = DataFrame(
|
||||
{
|
||||
"FC": ["a", "b", "a", "b", "a", "b"],
|
||||
"PF": [0, 0, 0, 0, 1, 1],
|
||||
"col1": list(range(6)),
|
||||
"col2": list(range(6, 12)),
|
||||
}
|
||||
)
|
||||
df.iloc[1, 0] = np.nan
|
||||
df2 = df.copy()
|
||||
|
||||
mask = ~df2.FC.isna()
|
||||
cols = ["col1", "col2"]
|
||||
|
||||
dft = df2 * 2
|
||||
dft.iloc[3, 3] = np.nan
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
"FC": ["a", np.nan, "a", "b", "a", "b"],
|
||||
"PF": [0, 0, 0, 0, 1, 1],
|
||||
"col1": Series([0, 1, 4, 6, 8, 10]),
|
||||
"col2": [12, 7, 16, np.nan, 20, 22],
|
||||
}
|
||||
)
|
||||
|
||||
# frame on rhs
|
||||
df2.loc[mask, cols] = dft.loc[mask, cols]
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
# with an ndarray on rhs
|
||||
# coerces to float64 because values has float64 dtype
|
||||
# GH 14001
|
||||
expected = DataFrame(
|
||||
{
|
||||
"FC": ["a", np.nan, "a", "b", "a", "b"],
|
||||
"PF": [0, 0, 0, 0, 1, 1],
|
||||
"col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0],
|
||||
"col2": [12, 7, 16, np.nan, 20, 22],
|
||||
}
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.loc[mask, cols] = dft.loc[mask, cols].values
|
||||
tm.assert_frame_equal(df2, expected)
|
||||
|
||||
def test_multi_assign_broadcasting_rhs(self):
|
||||
# broadcasting on the rhs is required
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": [1, 2, 0, 0, 0],
|
||||
"B": [0, 0, 0, 10, 11],
|
||||
"C": [0, 0, 0, 10, 11],
|
||||
"D": [3, 4, 5, 6, 7],
|
||||
}
|
||||
)
|
||||
|
||||
expected = df.copy()
|
||||
mask = expected["A"] == 0
|
||||
for col in ["A", "B"]:
|
||||
expected.loc[mask, col] = df["D"]
|
||||
|
||||
df.loc[df["A"] == 0, ["A", "B"]] = df["D"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# TODO(ArrayManager) setting single item with an iterable doesn't work yet
|
||||
# in the "split" path
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
def test_setitem_list(self):
|
||||
|
||||
# GH 6043
|
||||
# iloc with a list
|
||||
df = DataFrame(index=[0, 1], columns=[0])
|
||||
df.iloc[1, 0] = [1, 2, 3]
|
||||
df.iloc[1, 0] = [1, 2]
|
||||
|
||||
result = DataFrame(index=[0, 1], columns=[0])
|
||||
result.iloc[1, 0] = [1, 2]
|
||||
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_string_slice(self):
|
||||
# GH 14424
|
||||
# string indexing against datetimelike with object
|
||||
# dtype should properly raises KeyError
|
||||
df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object))
|
||||
assert df.index._is_all_dates
|
||||
with pytest.raises(KeyError, match="'2011'"):
|
||||
df["2011"]
|
||||
|
||||
with pytest.raises(KeyError, match="'2011'"):
|
||||
df.loc["2011", 0]
|
||||
|
||||
def test_string_slice_empty(self):
|
||||
# GH 14424
|
||||
|
||||
df = DataFrame()
|
||||
assert not df.index._is_all_dates
|
||||
with pytest.raises(KeyError, match="'2011'"):
|
||||
df["2011"]
|
||||
|
||||
with pytest.raises(KeyError, match="^0$"):
|
||||
df.loc["2011", 0]
|
||||
|
||||
def test_astype_assignment(self):
|
||||
|
||||
# GH4312 (iloc)
|
||||
df_orig = DataFrame(
|
||||
[["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
|
||||
)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64)
|
||||
expected = DataFrame(
|
||||
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True)
|
||||
expected = DataFrame(
|
||||
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH5702 (loc)
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
|
||||
expected = DataFrame(
|
||||
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
|
||||
expected = DataFrame(
|
||||
[["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_astype_assignment_full_replacements(self):
|
||||
# full replacements / no nans
|
||||
df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
|
||||
df.iloc[:, 0] = df["A"].astype(np.int64)
|
||||
expected = DataFrame({"A": [1, 2, 3, 4]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
|
||||
df.loc[:, "A"] = df["A"].astype(np.int64)
|
||||
expected = DataFrame({"A": [1, 2, 3, 4]})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize("indexer", [tm.getitem, tm.loc])
|
||||
def test_index_type_coercion(self, indexer):
|
||||
|
||||
# GH 11836
|
||||
# if we have an index type and set it with something that looks
|
||||
# to numpy like the same, but is actually, not
|
||||
# (e.g. setting with a float or string '0')
|
||||
# then we need to coerce to object
|
||||
|
||||
# integer indexes
|
||||
for s in [Series(range(5)), Series(range(5), index=range(1, 6))]:
|
||||
|
||||
assert s.index.is_integer()
|
||||
|
||||
s2 = s.copy()
|
||||
indexer(s2)[0.1] = 0
|
||||
assert s2.index.is_floating()
|
||||
assert indexer(s2)[0.1] == 0
|
||||
|
||||
s2 = s.copy()
|
||||
indexer(s2)[0.0] = 0
|
||||
exp = s.index
|
||||
if 0 not in s:
|
||||
exp = Index(s.index.tolist() + [0])
|
||||
tm.assert_index_equal(s2.index, exp)
|
||||
|
||||
s2 = s.copy()
|
||||
indexer(s2)["0"] = 0
|
||||
assert s2.index.is_object()
|
||||
|
||||
for s in [Series(range(5), index=np.arange(5.0))]:
|
||||
|
||||
assert s.index.is_floating()
|
||||
|
||||
s2 = s.copy()
|
||||
indexer(s2)[0.1] = 0
|
||||
assert s2.index.is_floating()
|
||||
assert indexer(s2)[0.1] == 0
|
||||
|
||||
s2 = s.copy()
|
||||
indexer(s2)[0.0] = 0
|
||||
tm.assert_index_equal(s2.index, s.index)
|
||||
|
||||
s2 = s.copy()
|
||||
indexer(s2)["0"] = 0
|
||||
assert s2.index.is_object()
|
||||
|
||||
|
||||
class TestMisc:
|
||||
def test_float_index_to_mixed(self):
|
||||
df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)})
|
||||
df["a"] = 10
|
||||
|
||||
expected = DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10})
|
||||
tm.assert_frame_equal(expected, df)
|
||||
|
||||
def test_float_index_non_scalar_assignment(self):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0])
|
||||
df.loc[df.index[:2]] = 1
|
||||
expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index)
|
||||
tm.assert_frame_equal(expected, df)
|
||||
|
||||
def test_loc_setitem_fullindex_views(self):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0])
|
||||
df2 = df.copy()
|
||||
df.loc[df.index] = df.loc[df.index]
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
def test_rhs_alignment(self):
|
||||
# GH8258, tests that both rows & columns are aligned to what is
|
||||
# assigned to. covers both uniform data-type & multi-type cases
|
||||
def run_tests(df, rhs, right_loc, right_iloc):
|
||||
# label, index, slice
|
||||
lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4)
|
||||
lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3)
|
||||
|
||||
left = df.copy()
|
||||
left.loc[lbl_one, lbl_two] = rhs
|
||||
tm.assert_frame_equal(left, right_loc)
|
||||
|
||||
left = df.copy()
|
||||
left.iloc[idx_one, idx_two] = rhs
|
||||
tm.assert_frame_equal(left, right_iloc)
|
||||
|
||||
left = df.copy()
|
||||
left.iloc[slice_one, slice_two] = rhs
|
||||
tm.assert_frame_equal(left, right_iloc)
|
||||
|
||||
xs = np.arange(20).reshape(5, 4)
|
||||
cols = ["jim", "joe", "jolie", "joline"]
|
||||
df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64")
|
||||
|
||||
# right hand side; permute the indices and multiplpy by -2
|
||||
rhs = -2 * df.iloc[3:0:-1, 2:0:-1]
|
||||
|
||||
# expected `right` result; just multiply by -2
|
||||
right_iloc = df.copy()
|
||||
right_iloc["joe"] = [1, 14, 10, 6, 17]
|
||||
right_iloc["jolie"] = [2, 13, 9, 5, 18]
|
||||
right_iloc.iloc[1:4, 1:3] *= -2
|
||||
right_loc = df.copy()
|
||||
right_loc.iloc[1:4, 1:3] *= -2
|
||||
|
||||
# run tests with uniform dtypes
|
||||
run_tests(df, rhs, right_loc, right_iloc)
|
||||
|
||||
# make frames multi-type & re-run tests
|
||||
for frame in [df, rhs, right_loc, right_iloc]:
|
||||
frame["joe"] = frame["joe"].astype("float64")
|
||||
frame["jolie"] = frame["jolie"].map("@{}".format)
|
||||
right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
|
||||
right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
|
||||
run_tests(df, rhs, right_loc, right_iloc)
|
||||
|
||||
def test_str_label_slicing_with_negative_step(self):
|
||||
SLC = pd.IndexSlice
|
||||
|
||||
for idx in [_mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20)]:
|
||||
idx = Index(idx)
|
||||
ser = Series(np.arange(20), index=idx)
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[idx[9] :: -1], SLC[9::-1])
|
||||
tm.assert_indexing_slices_equivalent(ser, SLC[: idx[9] : -1], SLC[:8:-1])
|
||||
tm.assert_indexing_slices_equivalent(
|
||||
ser, SLC[idx[13] : idx[9] : -1], SLC[13:8:-1]
|
||||
)
|
||||
tm.assert_indexing_slices_equivalent(
|
||||
ser, SLC[idx[9] : idx[13] : -1], SLC[:0]
|
||||
)
|
||||
|
||||
def test_slice_with_zero_step_raises(self, index, indexer_sl, frame_or_series):
|
||||
obj = frame_or_series(np.arange(len(index)), index=index)
|
||||
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
||||
indexer_sl(obj)[::0]
|
||||
|
||||
def test_loc_setitem_indexing_assignment_dict_already_exists(self):
|
||||
index = Index([-5, 0, 5], name="z")
|
||||
df = DataFrame({"x": [1, 2, 6], "y": [2, 2, 8]}, index=index)
|
||||
expected = df.copy()
|
||||
rhs = {"x": 9, "y": 99}
|
||||
df.loc[5] = rhs
|
||||
expected.loc[5] = [9, 99]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH#38335 same thing, mixed dtypes
|
||||
df = DataFrame({"x": [1, 2, 6], "y": [2.0, 2.0, 8.0]}, index=index)
|
||||
df.loc[5] = rhs
|
||||
expected = DataFrame({"x": [1, 2, 9], "y": [2.0, 2.0, 99.0]}, index=index)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_iloc_getitem_indexing_dtypes_on_empty(self):
|
||||
# Check that .iloc returns correct dtypes GH9983
|
||||
df = DataFrame({"a": [1, 2, 3], "b": ["b", "b2", "b3"]})
|
||||
df2 = df.iloc[[], :]
|
||||
|
||||
assert df2.loc[:, "a"].dtype == np.int64
|
||||
tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0])
|
||||
|
||||
@pytest.mark.parametrize("size", [5, 999999, 1000000])
|
||||
def test_loc_range_in_series_indexing(self, size):
|
||||
# range can cause an indexing error
|
||||
# GH 11652
|
||||
s = Series(index=range(size), dtype=np.float64)
|
||||
s.loc[range(1)] = 42
|
||||
tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0]))
|
||||
|
||||
s.loc[range(2)] = 43
|
||||
tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1]))
|
||||
|
||||
def test_partial_boolean_frame_indexing(self):
|
||||
# GH 17170
|
||||
df = DataFrame(
|
||||
np.arange(9.0).reshape(3, 3), index=list("abc"), columns=list("ABC")
|
||||
)
|
||||
index_df = DataFrame(1, index=list("ab"), columns=list("AB"))
|
||||
result = df[index_df.notnull()]
|
||||
expected = DataFrame(
|
||||
np.array([[0.0, 1.0, np.nan], [3.0, 4.0, np.nan], [np.nan] * 3]),
|
||||
index=list("abc"),
|
||||
columns=list("ABC"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_no_reference_cycle(self):
|
||||
df = DataFrame({"a": [0, 1], "b": [2, 3]})
|
||||
for name in ("loc", "iloc", "at", "iat"):
|
||||
getattr(df, name)
|
||||
wr = weakref.ref(df)
|
||||
del df
|
||||
assert wr() is None
|
||||
|
||||
def test_label_indexing_on_nan(self, nulls_fixture):
|
||||
# GH 32431
|
||||
df = Series([1, "{1,2}", 1, nulls_fixture])
|
||||
vc = df.value_counts(dropna=False)
|
||||
result1 = vc.loc[nulls_fixture]
|
||||
result2 = vc[nulls_fixture]
|
||||
|
||||
expected = 1
|
||||
assert result1 == expected
|
||||
assert result2 == expected
|
||||
|
||||
|
||||
class TestDataframeNoneCoercion:
|
||||
EXPECTED_SINGLE_ROW_RESULTS = [
|
||||
# For numeric series, we should coerce to NaN.
|
||||
([1, 2, 3], [np.nan, 2, 3]),
|
||||
([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]),
|
||||
# For datetime series, we should coerce to NaT.
|
||||
(
|
||||
[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
[NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
),
|
||||
# For objects, we should preserve the None value.
|
||||
(["foo", "bar", "baz"], [None, "bar", "baz"]),
|
||||
]
|
||||
|
||||
@pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS)
|
||||
def test_coercion_with_loc(self, expected):
|
||||
start_data, expected_result = expected
|
||||
|
||||
start_dataframe = DataFrame({"foo": start_data})
|
||||
start_dataframe.loc[0, ["foo"]] = None
|
||||
|
||||
expected_dataframe = DataFrame({"foo": expected_result})
|
||||
tm.assert_frame_equal(start_dataframe, expected_dataframe)
|
||||
|
||||
@pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS)
|
||||
def test_coercion_with_setitem_and_dataframe(self, expected):
|
||||
start_data, expected_result = expected
|
||||
|
||||
start_dataframe = DataFrame({"foo": start_data})
|
||||
start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None
|
||||
|
||||
expected_dataframe = DataFrame({"foo": expected_result})
|
||||
tm.assert_frame_equal(start_dataframe, expected_dataframe)
|
||||
|
||||
@pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS)
|
||||
def test_none_coercion_loc_and_dataframe(self, expected):
|
||||
start_data, expected_result = expected
|
||||
|
||||
start_dataframe = DataFrame({"foo": start_data})
|
||||
start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None
|
||||
|
||||
expected_dataframe = DataFrame({"foo": expected_result})
|
||||
tm.assert_frame_equal(start_dataframe, expected_dataframe)
|
||||
|
||||
def test_none_coercion_mixed_dtypes(self):
|
||||
start_dataframe = DataFrame(
|
||||
{
|
||||
"a": [1, 2, 3],
|
||||
"b": [1.0, 2.0, 3.0],
|
||||
"c": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
"d": ["a", "b", "c"],
|
||||
}
|
||||
)
|
||||
start_dataframe.iloc[0] = None
|
||||
|
||||
exp = DataFrame(
|
||||
{
|
||||
"a": [np.nan, 2, 3],
|
||||
"b": [np.nan, 2.0, 3.0],
|
||||
"c": [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)],
|
||||
"d": [None, "b", "c"],
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(start_dataframe, exp)
|
||||
|
||||
|
||||
class TestDatetimelikeCoercion:
|
||||
def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli):
|
||||
# dispatching _can_hold_element to underlying DatetimeArray
|
||||
tz = tz_naive_fixture
|
||||
|
||||
dti = date_range("2016-01-01", periods=3, tz=tz)
|
||||
ser = Series(dti)
|
||||
|
||||
values = ser._values
|
||||
|
||||
newval = "2018-01-01"
|
||||
values._validate_setitem_value(newval)
|
||||
|
||||
indexer_sli(ser)[0] = newval
|
||||
|
||||
if tz is None:
|
||||
# TODO(EA2D): we can make this no-copy in tz-naive case too
|
||||
assert ser.dtype == dti.dtype
|
||||
assert ser._values._data is values._data
|
||||
else:
|
||||
assert ser._values is values
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index])
|
||||
@pytest.mark.parametrize(
|
||||
"key", [[0, 1], slice(0, 2), np.array([True, True, False])]
|
||||
)
|
||||
def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer_sli, key, box):
|
||||
# dispatching _can_hold_element to underling DatetimeArray
|
||||
tz = tz_naive_fixture
|
||||
|
||||
if isinstance(key, slice) and indexer_sli is tm.loc:
|
||||
key = slice(0, 1)
|
||||
|
||||
dti = date_range("2016-01-01", periods=3, tz=tz)
|
||||
ser = Series(dti)
|
||||
|
||||
values = ser._values
|
||||
|
||||
newvals = box(["2019-01-01", "2010-01-02"])
|
||||
values._validate_setitem_value(newvals)
|
||||
|
||||
indexer_sli(ser)[key] = newvals
|
||||
|
||||
if tz is None:
|
||||
# TODO(EA2D): we can make this no-copy in tz-naive case too
|
||||
assert ser.dtype == dti.dtype
|
||||
assert ser._values._data is values._data
|
||||
else:
|
||||
assert ser._values is values
|
||||
|
||||
@pytest.mark.parametrize("scalar", ["3 Days", offsets.Hour(4)])
|
||||
def test_setitem_td64_scalar(self, indexer_sli, scalar):
|
||||
# dispatching _can_hold_element to underling TimedeltaArray
|
||||
tdi = timedelta_range("1 Day", periods=3)
|
||||
ser = Series(tdi)
|
||||
|
||||
values = ser._values
|
||||
values._validate_setitem_value(scalar)
|
||||
|
||||
indexer_sli(ser)[0] = scalar
|
||||
assert ser._values._data is values._data
|
||||
|
||||
@pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index])
|
||||
@pytest.mark.parametrize(
|
||||
"key", [[0, 1], slice(0, 2), np.array([True, True, False])]
|
||||
)
|
||||
def test_setitem_td64_string_values(self, indexer_sli, key, box):
|
||||
# dispatching _can_hold_element to underling TimedeltaArray
|
||||
if isinstance(key, slice) and indexer_sli is tm.loc:
|
||||
key = slice(0, 1)
|
||||
|
||||
tdi = timedelta_range("1 Day", periods=3)
|
||||
ser = Series(tdi)
|
||||
|
||||
values = ser._values
|
||||
|
||||
newvals = box(["10 Days", "44 hours"])
|
||||
values._validate_setitem_value(newvals)
|
||||
|
||||
indexer_sli(ser)[key] = newvals
|
||||
assert ser._values._data is values._data
|
||||
|
||||
|
||||
def test_extension_array_cross_section():
|
||||
# A cross-section of a homogeneous EA should be an EA
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": pd.array([1, 2], dtype="Int64"),
|
||||
"B": pd.array([3, 4], dtype="Int64"),
|
||||
},
|
||||
index=["a", "b"],
|
||||
)
|
||||
expected = Series(pd.array([1, 3], dtype="Int64"), index=["A", "B"], name="a")
|
||||
result = df.loc["a"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_extension_array_cross_section_converts():
|
||||
# all numeric columns -> numeric series
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": pd.array([1, 2], dtype="Int64"),
|
||||
"B": np.array([1, 2], dtype="int64"),
|
||||
},
|
||||
index=["a", "b"],
|
||||
)
|
||||
result = df.loc["a"]
|
||||
expected = Series([1, 1], dtype="Int64", index=["A", "B"], name="a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# mixed columns -> object series
|
||||
df = DataFrame(
|
||||
{"A": pd.array([1, 2], dtype="Int64"), "B": np.array(["a", "b"])},
|
||||
index=["a", "b"],
|
||||
)
|
||||
result = df.loc["a"]
|
||||
expected = Series([1, "a"], dtype=object, index=["A", "B"], name="a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iloc[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,75 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, dtype",
|
||||
[
|
||||
([], "object"),
|
||||
([1, 2, 3], "int64"),
|
||||
([1.0, 2.0, 3.0], "float64"),
|
||||
(["a", "b", "c"], "object"),
|
||||
(["a", "b", "c"], "string"),
|
||||
([1, 2, 3], "datetime64[ns]"),
|
||||
([1, 2, 3], "datetime64[ns, CET]"),
|
||||
([1, 2, 3], "timedelta64[ns]"),
|
||||
(["2000", "2001", "2002"], "Period[D]"),
|
||||
([1, 0, 3], "Sparse"),
|
||||
([pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(3, 4)], "interval"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"mask", [[True, False, False], [True, True, True], [False, False, False]]
|
||||
)
|
||||
@pytest.mark.parametrize("indexer_class", [list, pd.array, pd.Index, pd.Series])
|
||||
@pytest.mark.parametrize("frame", [True, False])
|
||||
def test_series_mask_boolean(values, dtype, mask, indexer_class, frame):
|
||||
# In case len(values) < 3
|
||||
index = ["a", "b", "c"][: len(values)]
|
||||
mask = mask[: len(values)]
|
||||
|
||||
obj = pd.Series(values, dtype=dtype, index=index)
|
||||
if frame:
|
||||
if len(values) == 0:
|
||||
# Otherwise obj is an empty DataFrame with shape (0, 1)
|
||||
obj = pd.DataFrame(dtype=dtype)
|
||||
else:
|
||||
obj = obj.to_frame()
|
||||
|
||||
if indexer_class is pd.array:
|
||||
mask = pd.array(mask, dtype="boolean")
|
||||
elif indexer_class is pd.Series:
|
||||
mask = pd.Series(mask, index=obj.index, dtype="boolean")
|
||||
else:
|
||||
mask = indexer_class(mask)
|
||||
|
||||
expected = obj[mask]
|
||||
|
||||
result = obj[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
if indexer_class is pd.Series:
|
||||
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
result = obj.iloc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
else:
|
||||
result = obj.iloc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[mask]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_na_treated_as_false(frame_or_series, indexer_sli):
|
||||
# https://github.com/pandas-dev/pandas/issues/31503
|
||||
obj = frame_or_series([1, 2, 3])
|
||||
|
||||
mask = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = indexer_sli(obj)[mask]
|
||||
expected = indexer_sli(obj)[mask.fillna(False)]
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
@@ -0,0 +1,668 @@
|
||||
"""
|
||||
test setting *parts* of objects both positionally and label based
|
||||
|
||||
TODO: these should be split among the indexer tests
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestEmptyFrameSetitemExpansion:
|
||||
def test_empty_frame_setitem_index_name_retained(self):
|
||||
# GH#31368 empty frame has non-None index.name -> retained
|
||||
df = DataFrame({}, index=pd.RangeIndex(0, name="df_index"))
|
||||
series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
|
||||
|
||||
df["series"] = series
|
||||
expected = DataFrame(
|
||||
{"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index")
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_empty_frame_setitem_index_name_inherited(self):
|
||||
# GH#36527 empty frame has None index.name -> not retained
|
||||
df = DataFrame()
|
||||
series = Series(1.23, index=pd.RangeIndex(4, name="series_index"))
|
||||
df["series"] = series
|
||||
expected = DataFrame(
|
||||
{"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index")
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_loc_setitem_zerolen_series_columns_align(self):
|
||||
# columns will align
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[0] = Series(1, index=range(4))
|
||||
expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# columns will align
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[0] = Series(1, index=["B"])
|
||||
|
||||
exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64")
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_loc_setitem_zerolen_list_length_must_match_columns(self):
|
||||
# list-like must conform
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
|
||||
msg = "cannot set a row with mismatched columns"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[0] = [1, 2, 3]
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[3] = [6, 7] # length matches len(df.columns) --> OK!
|
||||
|
||||
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64)
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_partial_set_empty_frame(self):
|
||||
|
||||
# partially set with an empty object
|
||||
# frame
|
||||
df = DataFrame()
|
||||
|
||||
msg = "cannot set a frame with no defined columns"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[1] = 1
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[1] = Series([1], index=["foo"])
|
||||
|
||||
msg = "cannot set a frame with no defined index and a scalar"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.loc[:, 1] = 1
|
||||
|
||||
def test_partial_set_empty_frame2(self):
|
||||
# these work as they don't really change
|
||||
# anything but the index
|
||||
# GH#5632
|
||||
expected = DataFrame(columns=["foo"], index=Index([], dtype="object"))
|
||||
|
||||
df = DataFrame(index=Index([], dtype="object"))
|
||||
df["foo"] = Series([], dtype="object")
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame()
|
||||
df["foo"] = Series(df.index)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame()
|
||||
df["foo"] = df.index
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame3(self):
|
||||
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
|
||||
expected["foo"] = expected["foo"].astype("float64")
|
||||
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = []
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = Series(np.arange(len(df)), dtype="float64")
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame4(self):
|
||||
df = DataFrame(index=Index([], dtype="int64"))
|
||||
df["foo"] = range(len(df))
|
||||
|
||||
expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
|
||||
# range is int-dtype-like, so we get int64 dtype
|
||||
expected["foo"] = expected["foo"].astype("int64")
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame5(self):
|
||||
df = DataFrame()
|
||||
tm.assert_index_equal(df.columns, Index([], dtype=object))
|
||||
df2 = DataFrame()
|
||||
df2[1] = Series([1], index=["foo"])
|
||||
df.loc[:, 1] = Series([1], index=["foo"])
|
||||
tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
def test_partial_set_empty_frame_no_index(self):
|
||||
# no index to start
|
||||
expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df[0] = Series(1, index=range(4))
|
||||
df.dtypes
|
||||
str(df)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=["A", "B"])
|
||||
df.loc[:, 0] = Series(1, index=range(4))
|
||||
df.dtypes
|
||||
str(df)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_row(self):
|
||||
# GH#5720, GH#5744
|
||||
# don't create rows when empty
|
||||
expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64"))
|
||||
expected["A"] = expected["A"].astype("int64")
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
expected["New"] = expected["New"].astype("float64")
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
y["New"] = np.nan
|
||||
tm.assert_frame_equal(y, expected)
|
||||
|
||||
expected = DataFrame(columns=["a", "b", "c c", "d"])
|
||||
expected["d"] = expected["d"].astype("int64")
|
||||
df = DataFrame(columns=["a", "b", "c c"])
|
||||
df["d"] = 3
|
||||
tm.assert_frame_equal(df, expected)
|
||||
tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object))
|
||||
|
||||
# reindex columns is ok
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
|
||||
y = df[df.A > 5]
|
||||
result = y.reindex(columns=["A", "B", "C"])
|
||||
expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64"))
|
||||
expected["A"] = expected["A"].astype("int64")
|
||||
expected["B"] = expected["B"].astype("float64")
|
||||
expected["C"] = expected["C"].astype("float64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_set_empty_frame_set_series(self):
|
||||
# GH#5756
|
||||
# setting with empty Series
|
||||
df = DataFrame(Series(dtype=object))
|
||||
expected = DataFrame({0: Series(dtype=object)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(Series(name="foo", dtype=object))
|
||||
expected = DataFrame({"foo": Series(dtype=object)})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_copy_assignment(self):
|
||||
# GH#5932
|
||||
# copy on empty with assignment fails
|
||||
df = DataFrame(index=[0])
|
||||
df = df.copy()
|
||||
df["a"] = 0
|
||||
expected = DataFrame(0, index=[0], columns=["a"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_empty_frame_empty_consistencies(self):
|
||||
# GH#6171
|
||||
# consistency on empty frames
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df["x"] = [1, 2]
|
||||
expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]})
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df["x"] = ["1", "2"]
|
||||
expected = DataFrame({"x": ["1", "2"], "y": [np.nan, np.nan]}, dtype=object)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = DataFrame(columns=["x", "y"])
|
||||
df.loc[0, "x"] = 1
|
||||
expected = DataFrame({"x": [1], "y": [np.nan]})
|
||||
tm.assert_frame_equal(df, expected, check_dtype=False)
|
||||
|
||||
|
||||
class TestPartialSetting:
|
||||
def test_partial_setting(self):
|
||||
|
||||
# GH2578, allow ix and friends to partially set
|
||||
|
||||
# series
|
||||
s_orig = Series([1, 2, 3])
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5
|
||||
expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s[5] = 5.0
|
||||
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
s = s_orig.copy()
|
||||
s.loc[5] = 5.0
|
||||
expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# iloc/iat raise
|
||||
s = s_orig.copy()
|
||||
|
||||
msg = "iloc cannot enlarge its target object"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iloc[3] = 5.0
|
||||
|
||||
msg = "index 3 is out of bounds for axis 0 with size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[3] = 5.0
|
||||
|
||||
def test_partial_setting_frame(self):
|
||||
df_orig = DataFrame(
|
||||
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
|
||||
)
|
||||
|
||||
# iloc/iat raise
|
||||
df = df_orig.copy()
|
||||
|
||||
msg = "iloc cannot enlarge its target object"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iloc[4, 2] = 5.0
|
||||
|
||||
msg = "index 2 is out of bounds for axis 0 with size 2"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.iat[4, 2] = 5.0
|
||||
|
||||
# row setting where it exists
|
||||
expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.iloc[1] = df.iloc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[1] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# like 2578, partial setting with dtype preservation
|
||||
expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[3] = df.loc[2]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, overwrite
|
||||
expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]}))
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "B"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed dtype frame, overwrite
|
||||
expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])}))
|
||||
df = df_orig.copy()
|
||||
df["B"] = df["B"].astype(np.float64)
|
||||
df.loc[:, "B"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# single dtype frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected["C"] = df["A"]
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "C"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# mixed frame, partial setting
|
||||
expected = df_orig.copy()
|
||||
expected["C"] = df["A"]
|
||||
df = df_orig.copy()
|
||||
df.loc[:, "C"] = df.loc[:, "A"]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting2(self):
|
||||
# GH 8473
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df_orig = DataFrame(
|
||||
np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]
|
||||
)
|
||||
|
||||
expected = pd.concat(
|
||||
[df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True
|
||||
)
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, "A"] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, "A"] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq)
|
||||
expected = pd.concat([df_orig, exp_other], axis=1)
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
df = df_orig.copy()
|
||||
df.at[dates[-1] + dates.freq, 0] = 7
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_setting_mixed_dtype(self):
|
||||
|
||||
# in a mixed dtype environment, try to preserve dtypes
|
||||
# by appending
|
||||
df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])
|
||||
|
||||
s = df.loc[1].copy()
|
||||
s.name = 2
|
||||
expected = pd.concat([df, DataFrame(s).T.infer_objects()])
|
||||
|
||||
df.loc[2] = df.loc[1]
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_series_partial_set(self):
|
||||
# partial set with new index
|
||||
# Regression from GH4825
|
||||
ser = Series([0.1, 0.2], index=[1, 2])
|
||||
|
||||
# loc equiv to .reindex
|
||||
expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3])
|
||||
with pytest.raises(KeyError, match=r"not in index"):
|
||||
ser.loc[[3, 2, 3]]
|
||||
|
||||
result = ser.reindex([3, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[3, 2, 3, "x"]]
|
||||
|
||||
result = ser.reindex([3, 2, 3, "x"])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1])
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, "x", 1]]
|
||||
|
||||
result = ser.reindex([2, 2, "x", 1])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# raises as nothing is in the index
|
||||
msg = (
|
||||
r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are "
|
||||
r"in the \[index\]\""
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[[3, 3, 3]]
|
||||
|
||||
expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, 3]]
|
||||
|
||||
result = ser.reindex([2, 2, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3], index=[1, 2, 3])
|
||||
expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[3, 4, 4]]
|
||||
|
||||
result = s.reindex([3, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[5, 3, 3]]
|
||||
|
||||
result = s.reindex([5, 3, 3])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[5, 4, 4]]
|
||||
|
||||
result = s.reindex([5, 4, 4])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[7, 2, 2]]
|
||||
|
||||
result = s.reindex([7, 2, 2])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4])
|
||||
expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
s.loc[[4, 5, 5]]
|
||||
|
||||
result = s.reindex([4, 5, 5])
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
# iloc
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1])
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
def test_series_partial_set_with_name(self):
|
||||
# GH 11497
|
||||
|
||||
idx = Index([1, 2], dtype="int64", name="idx")
|
||||
ser = Series([0.1, 0.2], index=idx, name="s")
|
||||
|
||||
# loc
|
||||
with pytest.raises(KeyError, match=r"\[3\] not in index"):
|
||||
ser.loc[[3, 2, 3]]
|
||||
|
||||
with pytest.raises(KeyError, match=r"not in index"):
|
||||
ser.loc[[3, 2, 3, "x"]]
|
||||
|
||||
exp_idx = Index([2, 2, 1], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s")
|
||||
result = ser.loc[[2, 2, 1]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
with pytest.raises(KeyError, match=r"\['x'\] not in index"):
|
||||
ser.loc[[2, 2, "x", 1]]
|
||||
|
||||
# raises as nothing is in the index
|
||||
msg = (
|
||||
r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', "
|
||||
r"name='idx'\)\] are in the \[index\]\""
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[[3, 3, 3]]
|
||||
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
ser.loc[[2, 2, 3]]
|
||||
|
||||
idx = Index([1, 2, 3], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]]
|
||||
|
||||
idx = Index([4, 5, 6, 7], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]]
|
||||
|
||||
idx = Index([1, 2, 3, 4], dtype="int64", name="idx")
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]]
|
||||
|
||||
# iloc
|
||||
exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx")
|
||||
expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s")
|
||||
result = ser.iloc[[1, 1, 0, 0]]
|
||||
tm.assert_series_equal(result, expected, check_index_type=True)
|
||||
|
||||
@pytest.mark.parametrize("key", [100, 100.0])
|
||||
def test_setitem_with_expansion_numeric_into_datetimeindex(self, key):
|
||||
# GH#4940 inserting non-strings
|
||||
orig = tm.makeTimeDataFrame()
|
||||
df = orig.copy()
|
||||
|
||||
df.loc[key, :] = df.iloc[0]
|
||||
ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name)
|
||||
ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0)
|
||||
expected = DataFrame(ex_data, index=ex_index, columns=orig.columns)
|
||||
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_partial_set_invalid(self):
|
||||
|
||||
# GH 4940
|
||||
# allow only setting of 'valid' values
|
||||
|
||||
orig = tm.makeTimeDataFrame()
|
||||
|
||||
# allow object conversion here
|
||||
df = orig.copy()
|
||||
df.loc["a", :] = df.iloc[0]
|
||||
ser = Series(df.iloc[0], name="a")
|
||||
exp = pd.concat([orig, DataFrame(ser).T.infer_objects()])
|
||||
tm.assert_frame_equal(df, exp)
|
||||
tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
|
||||
assert df.index.dtype == "object"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels,expected_idx",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-08", "2000-01-12"],
|
||||
[
|
||||
Period("2000-01-04", freq="D"),
|
||||
Period("2000-01-08", freq="D"),
|
||||
Period("2000-01-12", freq="D"),
|
||||
],
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-08", "2000-01-12"],
|
||||
[
|
||||
Timestamp("2000-01-04"),
|
||||
Timestamp("2000-01-08"),
|
||||
Timestamp("2000-01-12"),
|
||||
],
|
||||
),
|
||||
(
|
||||
pd.timedelta_range(start="1 day", periods=20),
|
||||
["4D", "8D", "12D"],
|
||||
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes(
|
||||
self, idx, labels, expected_idx, frame_or_series
|
||||
):
|
||||
# GH 11278
|
||||
obj = frame_or_series(range(20), index=idx)
|
||||
|
||||
expected_value = [3, 7, 11]
|
||||
expected = frame_or_series(expected_value, expected_idx)
|
||||
|
||||
tm.assert_equal(expected, obj.loc[labels])
|
||||
if frame_or_series is Series:
|
||||
tm.assert_series_equal(expected, obj[labels])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-30"],
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
["2000-01-04", "2000-01-30"],
|
||||
),
|
||||
(pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
|
||||
self, idx, labels
|
||||
):
|
||||
# GH 11278
|
||||
ser = Series(range(20), index=idx)
|
||||
df = DataFrame(range(20), index=idx)
|
||||
msg = r"not in index"
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[labels]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,labels,msg",
|
||||
[
|
||||
(
|
||||
period_range(start="2000", periods=20, freq="D"),
|
||||
["4D", "8D"],
|
||||
(
|
||||
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
|
||||
r"are in the \[index\]"
|
||||
),
|
||||
),
|
||||
(
|
||||
date_range(start="2000", periods=20, freq="D"),
|
||||
["4D", "8D"],
|
||||
(
|
||||
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
|
||||
r"are in the \[index\]"
|
||||
),
|
||||
),
|
||||
(
|
||||
pd.timedelta_range(start="1 day", periods=20),
|
||||
["2000-01-04", "2000-01-08"],
|
||||
(
|
||||
r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
|
||||
r"dtype='object'\)\] are in the \[index\]"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
|
||||
self, idx, labels, msg
|
||||
):
|
||||
# GH 11278
|
||||
ser = Series(range(20), index=idx)
|
||||
df = DataFrame(range(20), index=idx)
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.loc[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser[labels]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.loc[labels]
|
||||
|
||||
|
||||
class TestStringSlicing:
|
||||
def test_slice_irregular_datetime_index_with_nan(self):
|
||||
# GH36953
|
||||
index = pd.to_datetime(["2012-01-01", "2012-01-02", "2012-01-03", None])
|
||||
df = DataFrame(range(len(index)), index=index)
|
||||
expected = DataFrame(range(len(index[:3])), index=index[:3])
|
||||
result = df["2012-01-01":"2012-01-04"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,319 @@
|
||||
""" test scalar indexing, including at and iat """
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.indexing.common import Base
|
||||
|
||||
|
||||
class TestScalar(Base):
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
def test_at_and_iat_get(self, kind):
|
||||
def _check(f, func, values=False):
|
||||
|
||||
if f is not None:
|
||||
indices = self.generate_indices(f, values)
|
||||
for i in indices:
|
||||
result = getattr(f, func)[i]
|
||||
expected = self.get_value(func, f, i, values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
d = getattr(self, kind)
|
||||
|
||||
# iat
|
||||
for f in [d["ints"], d["uints"]]:
|
||||
_check(f, "iat", values=True)
|
||||
|
||||
for f in [d["labels"], d["ts"], d["floats"]]:
|
||||
if f is not None:
|
||||
msg = "iAt based indexing can only have integer indexers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.check_values(f, "iat")
|
||||
|
||||
# at
|
||||
for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]:
|
||||
_check(f, "at")
|
||||
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["ints", "uints"])
|
||||
def test_iat_set_ints(self, kind, col):
|
||||
f = getattr(self, kind)[col]
|
||||
if f is not None:
|
||||
indices = self.generate_indices(f, True)
|
||||
for i in indices:
|
||||
f.iat[i] = 1
|
||||
expected = self.get_value("iat", f, i, True)
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["labels", "ts", "floats"])
|
||||
def test_iat_set_other(self, kind, col):
|
||||
f = getattr(self, kind)[col]
|
||||
if f is not None:
|
||||
msg = "iAt based indexing can only have integer indexers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
indices = self.generate_indices(f, False)
|
||||
for i in indices:
|
||||
f.iat[i] = 1
|
||||
expected = self.get_value("iat", f, i, False)
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
@pytest.mark.parametrize("kind", ["series", "frame"])
|
||||
@pytest.mark.parametrize("col", ["ints", "uints", "labels", "ts", "floats"])
|
||||
def test_at_set_ints_other(self, kind, col):
|
||||
f = getattr(self, kind)[col]
|
||||
if f is not None:
|
||||
indices = self.generate_indices(f, False)
|
||||
for i in indices:
|
||||
f.at[i] = 1
|
||||
expected = self.get_value("at", f, i, False)
|
||||
tm.assert_almost_equal(expected, 1)
|
||||
|
||||
|
||||
class TestAtAndiAT:
|
||||
# at and iat tests that don't need Base class
|
||||
|
||||
def test_float_index_at_iat(self):
|
||||
ser = Series([1, 2, 3], index=[0.1, 0.2, 0.3])
|
||||
for el, item in ser.items():
|
||||
assert ser.at[el] == item
|
||||
for i in range(len(ser)):
|
||||
assert ser.iat[i] == i + 1
|
||||
|
||||
def test_at_iat_coercion(self):
|
||||
|
||||
# as timestamp is not a tuple!
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"])
|
||||
s = df["A"]
|
||||
|
||||
result = s.at[dates[5]]
|
||||
xp = s.values[5]
|
||||
assert result == xp
|
||||
|
||||
# GH 7729
|
||||
# make sure we are boxing the returns
|
||||
s = Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]")
|
||||
expected = Timestamp("2014-02-02")
|
||||
|
||||
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
|
||||
result = r()
|
||||
assert result == expected
|
||||
|
||||
s = Series(["1 days", "2 days"], dtype="timedelta64[ns]")
|
||||
expected = Timedelta("2 days")
|
||||
|
||||
for r in [lambda: s.iat[1], lambda: s.iloc[1]]:
|
||||
result = r()
|
||||
assert result == expected
|
||||
|
||||
def test_iat_invalid_args(self):
|
||||
pass
|
||||
|
||||
def test_imethods_with_dups(self):
|
||||
|
||||
# GH6493
|
||||
# iat/iloc with dups
|
||||
|
||||
s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64")
|
||||
result = s.iloc[2]
|
||||
assert result == 2
|
||||
result = s.iat[2]
|
||||
assert result == 2
|
||||
|
||||
msg = "index 10 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[10]
|
||||
msg = "index -10 is out of bounds for axis 0 with size 5"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
s.iat[-10]
|
||||
|
||||
result = s.iloc[[2, 3]]
|
||||
expected = Series([2, 3], [2, 2], dtype="int64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = s.to_frame()
|
||||
result = df.iloc[2]
|
||||
expected = Series(2, index=[0], name=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.iat[2, 0]
|
||||
assert result == 2
|
||||
|
||||
def test_frame_at_with_duplicate_axes(self):
|
||||
# GH#33041
|
||||
arr = np.random.randn(6).reshape(3, 2)
|
||||
df = DataFrame(arr, columns=["A", "A"])
|
||||
|
||||
result = df.at[0, "A"]
|
||||
expected = df.iloc[0]
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.T.at["A", 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# setter
|
||||
df.at[1, "A"] = 2
|
||||
expected = Series([2.0, 2.0], index=["A", "A"], name=1)
|
||||
tm.assert_series_equal(df.iloc[1], expected)
|
||||
|
||||
def test_at_getitem_dt64tz_values(self):
|
||||
# gh-15822
|
||||
df = DataFrame(
|
||||
{
|
||||
"name": ["John", "Anderson"],
|
||||
"date": [
|
||||
Timestamp(2017, 3, 13, 13, 32, 56),
|
||||
Timestamp(2017, 2, 16, 12, 10, 3),
|
||||
],
|
||||
}
|
||||
)
|
||||
df["date"] = df["date"].dt.tz_localize("Asia/Shanghai")
|
||||
|
||||
expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai")
|
||||
|
||||
result = df.loc[0, "date"]
|
||||
assert result == expected
|
||||
|
||||
result = df.at[0, "date"]
|
||||
assert result == expected
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_series(self):
|
||||
# GH 19860
|
||||
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2])
|
||||
for el, item in s.items():
|
||||
assert s.at[el] == s.loc[el] == item
|
||||
for i in range(len(s)):
|
||||
assert s.iat[i] == s.iloc[i] == i + 1
|
||||
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
s.at[4]
|
||||
with pytest.raises(KeyError, match="^4$"):
|
||||
s.loc[4]
|
||||
|
||||
def test_mixed_index_at_iat_loc_iloc_dataframe(self):
|
||||
# GH 19860
|
||||
df = DataFrame(
|
||||
[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2]
|
||||
)
|
||||
for rowIdx, row in df.iterrows():
|
||||
for el, item in row.items():
|
||||
assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item
|
||||
|
||||
for row in range(2):
|
||||
for i in range(5):
|
||||
assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i
|
||||
|
||||
with pytest.raises(KeyError, match="^3$"):
|
||||
df.at[0, 3]
|
||||
with pytest.raises(KeyError, match="^3$"):
|
||||
df.loc[0, 3]
|
||||
|
||||
def test_iat_setter_incompatible_assignment(self):
|
||||
# GH 23236
|
||||
result = DataFrame({"a": [0, 1], "b": [4, 5]})
|
||||
result.iat[0, 0] = None
|
||||
expected = DataFrame({"a": [None, 1], "b": [4, 5]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_iat_dont_wrap_object_datetimelike():
|
||||
# GH#32809 .iat calls go through DataFrame._get_value, should not
|
||||
# call maybe_box_datetimelike
|
||||
dti = date_range("2016-01-01", periods=3)
|
||||
tdi = dti - dti
|
||||
ser = Series(dti.to_pydatetime(), dtype=object)
|
||||
ser2 = Series(tdi.to_pytimedelta(), dtype=object)
|
||||
df = DataFrame({"A": ser, "B": ser2})
|
||||
assert (df.dtypes == object).all()
|
||||
|
||||
for result in [df.at[0, "A"], df.iat[0, 0], df.loc[0, "A"], df.iloc[0, 0]]:
|
||||
assert result is ser[0]
|
||||
assert isinstance(result, datetime)
|
||||
assert not isinstance(result, Timestamp)
|
||||
|
||||
for result in [df.at[1, "B"], df.iat[1, 1], df.loc[1, "B"], df.iloc[1, 1]]:
|
||||
assert result is ser2[1]
|
||||
assert isinstance(result, timedelta)
|
||||
assert not isinstance(result, Timedelta)
|
||||
|
||||
|
||||
def test_at_with_tuple_index_get():
|
||||
# GH 26989
|
||||
# DataFrame.at getter works with Index of tuples
|
||||
df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)])
|
||||
assert df.index.nlevels == 1
|
||||
assert df.at[(1, 2), "a"] == 1
|
||||
|
||||
# Series.at getter works with Index of tuples
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 1
|
||||
assert series.at[(1, 2)] == 1
|
||||
|
||||
|
||||
def test_at_with_tuple_index_set():
|
||||
# GH 26989
|
||||
# DataFrame.at setter works with Index of tuples
|
||||
df = DataFrame({"a": [1, 2]}, index=[(1, 2), (3, 4)])
|
||||
assert df.index.nlevels == 1
|
||||
df.at[(1, 2), "a"] = 2
|
||||
assert df.at[(1, 2), "a"] == 2
|
||||
|
||||
# Series.at setter works with Index of tuples
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 1
|
||||
series.at[1, 2] = 3
|
||||
assert series.at[1, 2] == 3
|
||||
|
||||
|
||||
class TestMultiIndexScalar:
|
||||
def test_multiindex_at_get(self):
|
||||
# GH 26989
|
||||
# DataFrame.at and DataFrame.loc getter works with MultiIndex
|
||||
df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
|
||||
assert df.index.nlevels == 2
|
||||
assert df.at[(1, 3), "a"] == 1
|
||||
assert df.loc[(1, 3), "a"] == 1
|
||||
|
||||
# Series.at and Series.loc getter works with MultiIndex
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 2
|
||||
assert series.at[1, 3] == 1
|
||||
assert series.loc[1, 3] == 1
|
||||
|
||||
def test_multiindex_at_set(self):
|
||||
# GH 26989
|
||||
# DataFrame.at and DataFrame.loc setter works with MultiIndex
|
||||
df = DataFrame({"a": [1, 2]}, index=[[1, 2], [3, 4]])
|
||||
assert df.index.nlevels == 2
|
||||
df.at[(1, 3), "a"] = 3
|
||||
assert df.at[(1, 3), "a"] == 3
|
||||
df.loc[(1, 3), "a"] = 4
|
||||
assert df.loc[(1, 3), "a"] == 4
|
||||
|
||||
# Series.at and Series.loc setter works with MultiIndex
|
||||
series = df["a"]
|
||||
assert series.index.nlevels == 2
|
||||
series.at[1, 3] = 5
|
||||
assert series.at[1, 3] == 5
|
||||
series.loc[1, 3] = 6
|
||||
assert series.loc[1, 3] == 6
|
||||
|
||||
def test_multiindex_at_get_one_level(self):
|
||||
# GH#38053
|
||||
s2 = Series((0, 1), index=[[False, True]])
|
||||
result = s2.at[False]
|
||||
assert result == 0
|
||||
Reference in New Issue
Block a user