first commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,58 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
concat,
|
||||
)
|
||||
|
||||
|
||||
def _check_mixed_float(df, dtype=None):
|
||||
# float16 are most likely to be upcasted to float32
|
||||
dtypes = {"A": "float32", "B": "float32", "C": "float16", "D": "float64"}
|
||||
if isinstance(dtype, str):
|
||||
dtypes = {k: dtype for k, v in dtypes.items()}
|
||||
elif isinstance(dtype, dict):
|
||||
dtypes.update(dtype)
|
||||
if dtypes.get("A"):
|
||||
assert df.dtypes["A"] == dtypes["A"]
|
||||
if dtypes.get("B"):
|
||||
assert df.dtypes["B"] == dtypes["B"]
|
||||
if dtypes.get("C"):
|
||||
assert df.dtypes["C"] == dtypes["C"]
|
||||
if dtypes.get("D"):
|
||||
assert df.dtypes["D"] == dtypes["D"]
|
||||
|
||||
|
||||
def _check_mixed_int(df, dtype=None):
|
||||
dtypes = {"A": "int32", "B": "uint64", "C": "uint8", "D": "int64"}
|
||||
if isinstance(dtype, str):
|
||||
dtypes = {k: dtype for k, v in dtypes.items()}
|
||||
elif isinstance(dtype, dict):
|
||||
dtypes.update(dtype)
|
||||
if dtypes.get("A"):
|
||||
assert df.dtypes["A"] == dtypes["A"]
|
||||
if dtypes.get("B"):
|
||||
assert df.dtypes["B"] == dtypes["B"]
|
||||
if dtypes.get("C"):
|
||||
assert df.dtypes["C"] == dtypes["C"]
|
||||
if dtypes.get("D"):
|
||||
assert df.dtypes["D"] == dtypes["D"]
|
||||
|
||||
|
||||
def zip_frames(frames: list[DataFrame], axis: int = 1) -> DataFrame:
|
||||
"""
|
||||
take a list of frames, zip them together under the
|
||||
assumption that these all have the first frames' index/columns.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_frame : DataFrame
|
||||
"""
|
||||
if axis == 1:
|
||||
columns = frames[0].columns
|
||||
zipped = [f.loc[:, c] for c in columns for f in frames]
|
||||
return concat(zipped, axis=1)
|
||||
else:
|
||||
index = frames[0].index
|
||||
zipped = [f.loc[i, :] for i in index for f in frames]
|
||||
return DataFrame(zipped)
|
||||
@@ -0,0 +1,284 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
NaT,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_frame_with_na():
|
||||
"""
|
||||
Fixture for DataFrame of floats with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']; some entries are missing
|
||||
|
||||
A B C D
|
||||
ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997
|
||||
DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872
|
||||
neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522
|
||||
0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018
|
||||
3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826
|
||||
soujjZ0A08 NaN NaN NaN NaN
|
||||
7W6NLGsjB9 NaN NaN NaN NaN
|
||||
... ... ... ... ...
|
||||
uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590
|
||||
n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717
|
||||
ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189
|
||||
uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503
|
||||
3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947
|
||||
2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083
|
||||
sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517
|
||||
|
||||
[30 rows x 4 columns]
|
||||
"""
|
||||
df = DataFrame(tm.getSeriesData())
|
||||
# set some NAs
|
||||
df.iloc[5:10] = np.nan
|
||||
df.iloc[15:20, -2:] = np.nan
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def bool_frame_with_na():
|
||||
"""
|
||||
Fixture for DataFrame of booleans with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D']; some entries are missing
|
||||
|
||||
A B C D
|
||||
zBZxY2IDGd False False False False
|
||||
IhBWBMWllt False True True True
|
||||
ctjdvZSR6R True False True True
|
||||
AVTujptmxb False True False True
|
||||
G9lrImrSWq False False False True
|
||||
sFFwdIUfz2 NaN NaN NaN NaN
|
||||
s15ptEJnRb NaN NaN NaN NaN
|
||||
... ... ... ... ...
|
||||
UW41KkDyZ4 True True False False
|
||||
l9l6XkOdqV True False False False
|
||||
X2MeZfzDYA False True False False
|
||||
xWkIKU7vfX False True False True
|
||||
QOhL6VmpGU False False False True
|
||||
22PwkRJdat False True False False
|
||||
kfboQ3VeIK True False True False
|
||||
|
||||
[30 rows x 4 columns]
|
||||
"""
|
||||
df = DataFrame(tm.getSeriesData()) > 0
|
||||
df = df.astype(object)
|
||||
# set some NAs
|
||||
df.iloc[5:10] = np.nan
|
||||
df.iloc[15:20, -2:] = np.nan
|
||||
|
||||
# For `any` tests we need to have at least one True before the first NaN
|
||||
# in each column
|
||||
for i in range(4):
|
||||
df.iloc[i, i] = True
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def float_string_frame():
|
||||
"""
|
||||
Fixture for DataFrame of floats and strings with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D', 'foo'].
|
||||
|
||||
A B C D foo
|
||||
w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar
|
||||
PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar
|
||||
ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar
|
||||
3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar
|
||||
khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar
|
||||
LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar
|
||||
HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar
|
||||
... ... ... ... ... ...
|
||||
9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar
|
||||
h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar
|
||||
mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar
|
||||
oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar
|
||||
9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar
|
||||
jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar
|
||||
lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar
|
||||
|
||||
[30 rows x 5 columns]
|
||||
"""
|
||||
df = DataFrame(tm.getSeriesData())
|
||||
df["foo"] = "bar"
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mixed_float_frame():
|
||||
"""
|
||||
Fixture for DataFrame of different float types with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D'].
|
||||
|
||||
A B C D
|
||||
GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993
|
||||
KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588
|
||||
VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731
|
||||
kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607
|
||||
CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266
|
||||
0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541
|
||||
tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710
|
||||
... ... ... ... ...
|
||||
7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237
|
||||
4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612
|
||||
B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653
|
||||
hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427
|
||||
1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827
|
||||
9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204
|
||||
xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502
|
||||
|
||||
[30 rows x 4 columns]
|
||||
"""
|
||||
df = DataFrame(tm.getSeriesData())
|
||||
df.A = df.A.astype("float32")
|
||||
df.B = df.B.astype("float32")
|
||||
df.C = df.C.astype("float16")
|
||||
df.D = df.D.astype("float64")
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mixed_int_frame():
|
||||
"""
|
||||
Fixture for DataFrame of different int types with index of unique strings
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D'].
|
||||
|
||||
A B C D
|
||||
mUrCZ67juP 0 1 2 2
|
||||
rw99ACYaKS 0 1 0 0
|
||||
7QsEcpaaVU 0 1 1 1
|
||||
xkrimI2pcE 0 1 0 0
|
||||
dz01SuzoS8 0 1 255 255
|
||||
ccQkqOHX75 -1 1 0 0
|
||||
DN0iXaoDLd 0 1 0 0
|
||||
... .. .. ... ...
|
||||
Dfb141wAaQ 1 1 254 254
|
||||
IPD8eQOVu5 0 1 0 0
|
||||
CcaKulsCmv 0 1 0 0
|
||||
rIBa8gu7E5 0 1 0 0
|
||||
RP6peZmh5o 0 1 1 1
|
||||
NMb9pipQWQ 0 1 0 0
|
||||
PqgbJEzjib 0 1 3 3
|
||||
|
||||
[30 rows x 4 columns]
|
||||
"""
|
||||
df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
|
||||
df.A = df.A.astype("int32")
|
||||
df.B = np.ones(len(df.B), dtype="uint64")
|
||||
df.C = df.C.astype("uint8")
|
||||
df.D = df.C.astype("int64")
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def timezone_frame():
|
||||
"""
|
||||
Fixture for DataFrame of date_range Series with different time zones
|
||||
|
||||
Columns are ['A', 'B', 'C']; some entries are missing
|
||||
|
||||
A B C
|
||||
0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
|
||||
1 2013-01-02 NaT NaT
|
||||
2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
|
||||
"""
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": date_range("20130101", periods=3),
|
||||
"B": date_range("20130101", periods=3, tz="US/Eastern"),
|
||||
"C": date_range("20130101", periods=3, tz="CET"),
|
||||
}
|
||||
)
|
||||
df.iloc[1, 1] = NaT
|
||||
df.iloc[1, 2] = NaT
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def uint64_frame():
|
||||
"""
|
||||
Fixture for DataFrame with uint64 values
|
||||
|
||||
Columns are ['A', 'B']
|
||||
"""
|
||||
return DataFrame(
|
||||
{"A": np.arange(3), "B": [2**63, 2**63 + 5, 2**63 + 10]}, dtype=np.uint64
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_frame():
|
||||
"""
|
||||
Fixture for simple 3x3 DataFrame
|
||||
|
||||
Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
|
||||
|
||||
one two three
|
||||
a 1.0 2.0 3.0
|
||||
b 4.0 5.0 6.0
|
||||
c 7.0 8.0 9.0
|
||||
"""
|
||||
arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
|
||||
|
||||
return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame_of_index_cols():
|
||||
"""
|
||||
Fixture for DataFrame of columns that can be used for indexing
|
||||
|
||||
Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
|
||||
'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
|
||||
|
||||
A B C D E (tuple, as, label)
|
||||
0 foo one a 0.608477 -0.012500 -1.664297
|
||||
1 foo two b -0.633460 0.249614 -0.364411
|
||||
2 foo three c 0.615256 2.154968 -0.834666
|
||||
3 bar one d 0.234246 1.085675 0.718445
|
||||
4 bar two e 0.533841 -0.005702 -3.533912
|
||||
"""
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": ["foo", "foo", "foo", "bar", "bar"],
|
||||
"B": ["one", "two", "three", "one", "two"],
|
||||
"C": ["a", "b", "c", "d", "e"],
|
||||
"D": np.random.randn(5),
|
||||
"E": np.random.randn(5),
|
||||
("tuple", "as", "label"): np.random.randn(5),
|
||||
}
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"any",
|
||||
"all",
|
||||
"count",
|
||||
"sum",
|
||||
"prod",
|
||||
"max",
|
||||
"min",
|
||||
"mean",
|
||||
"median",
|
||||
"skew",
|
||||
"kurt",
|
||||
"sem",
|
||||
"var",
|
||||
"std",
|
||||
"mad",
|
||||
]
|
||||
)
|
||||
def reduction_functions(request):
|
||||
return request.param
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,191 @@
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.construction import create_series_with_explicit_dtype
|
||||
|
||||
|
||||
class TestFromDict:
|
||||
# Note: these tests are specific to the from_dict method, not for
|
||||
# passing dictionaries to DataFrame.__init__
|
||||
|
||||
def test_from_dict_scalars_requires_index(self):
|
||||
msg = "If using all scalar values, you must pass an index"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_dict(OrderedDict([("b", 8), ("a", 5), ("a", 6)]))
|
||||
|
||||
def test_constructor_list_of_odicts(self):
|
||||
data = [
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
|
||||
OrderedDict([["a", 1.5], ["d", 6]]),
|
||||
OrderedDict(),
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
|
||||
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
|
||||
]
|
||||
|
||||
result = DataFrame(data)
|
||||
expected = DataFrame.from_dict(
|
||||
dict(zip(range(len(data)), data)), orient="index"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected.reindex(result.index))
|
||||
|
||||
def test_constructor_single_row(self):
|
||||
data = [OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]])]
|
||||
|
||||
result = DataFrame(data)
|
||||
expected = DataFrame.from_dict(dict(zip([0], data)), orient="index").reindex(
|
||||
result.index
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_constructor_list_of_series(self):
|
||||
data = [
|
||||
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
|
||||
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
|
||||
]
|
||||
sdict = OrderedDict(zip(["x", "y"], data))
|
||||
idx = Index(["a", "b", "c"])
|
||||
|
||||
# all named
|
||||
data2 = [
|
||||
Series([1.5, 3, 4], idx, dtype="O", name="x"),
|
||||
Series([1.5, 3, 6], idx, name="y"),
|
||||
]
|
||||
result = DataFrame(data2)
|
||||
expected = DataFrame.from_dict(sdict, orient="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# some unnamed
|
||||
data2 = [
|
||||
Series([1.5, 3, 4], idx, dtype="O", name="x"),
|
||||
Series([1.5, 3, 6], idx),
|
||||
]
|
||||
result = DataFrame(data2)
|
||||
|
||||
sdict = OrderedDict(zip(["x", "Unnamed 0"], data))
|
||||
expected = DataFrame.from_dict(sdict, orient="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# none named
|
||||
data = [
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["c", 4], ["d", 6]]),
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["d", 6]]),
|
||||
OrderedDict([["a", 1.5], ["d", 6]]),
|
||||
OrderedDict(),
|
||||
OrderedDict([["a", 1.5], ["b", 3], ["c", 4]]),
|
||||
OrderedDict([["b", 3], ["c", 4], ["d", 6]]),
|
||||
]
|
||||
data = [
|
||||
create_series_with_explicit_dtype(d, dtype_if_empty=object) for d in data
|
||||
]
|
||||
|
||||
result = DataFrame(data)
|
||||
sdict = OrderedDict(zip(range(len(data)), data))
|
||||
expected = DataFrame.from_dict(sdict, orient="index")
|
||||
tm.assert_frame_equal(result, expected.reindex(result.index))
|
||||
|
||||
result2 = DataFrame(data, index=np.arange(6))
|
||||
tm.assert_frame_equal(result, result2)
|
||||
|
||||
result = DataFrame([Series(dtype=object)])
|
||||
expected = DataFrame(index=[0])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
data = [
|
||||
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
|
||||
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 6.0]]),
|
||||
]
|
||||
sdict = OrderedDict(zip(range(len(data)), data))
|
||||
|
||||
idx = Index(["a", "b", "c"])
|
||||
data2 = [Series([1.5, 3, 4], idx, dtype="O"), Series([1.5, 3, 6], idx)]
|
||||
result = DataFrame(data2)
|
||||
expected = DataFrame.from_dict(sdict, orient="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_constructor_orient(self, float_string_frame):
|
||||
data_dict = float_string_frame.T._series
|
||||
recons = DataFrame.from_dict(data_dict, orient="index")
|
||||
expected = float_string_frame.reindex(index=recons.index)
|
||||
tm.assert_frame_equal(recons, expected)
|
||||
|
||||
# dict of sequence
|
||||
a = {"hi": [32, 3, 3], "there": [3, 5, 3]}
|
||||
rs = DataFrame.from_dict(a, orient="index")
|
||||
xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
def test_constructor_from_ordered_dict(self):
|
||||
# GH#8425
|
||||
a = OrderedDict(
|
||||
[
|
||||
("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])),
|
||||
("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])),
|
||||
("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])),
|
||||
]
|
||||
)
|
||||
expected = DataFrame.from_dict(a, orient="columns").T
|
||||
result = DataFrame.from_dict(a, orient="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_dict_columns_parameter(self):
|
||||
# GH#18529
|
||||
# Test new columns parameter for from_dict that was added to make
|
||||
# from_items(..., orient='index', columns=[...]) easier to replicate
|
||||
result = DataFrame.from_dict(
|
||||
OrderedDict([("A", [1, 2]), ("B", [4, 5])]),
|
||||
orient="index",
|
||||
columns=["one", "two"],
|
||||
)
|
||||
expected = DataFrame([[1, 2], [4, 5]], index=["A", "B"], columns=["one", "two"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "cannot use columns parameter with orient='columns'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_dict(
|
||||
{"A": [1, 2], "B": [4, 5]},
|
||||
orient="columns",
|
||||
columns=["one", "two"],
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data_dict, keys, orient",
|
||||
[
|
||||
({}, [], "index"),
|
||||
([{("a",): 1}, {("a",): 2}], [("a",)], "columns"),
|
||||
([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"),
|
||||
([{("a", "b"): 1}], [("a", "b")], "columns"),
|
||||
],
|
||||
)
|
||||
def test_constructor_from_dict_tuples(self, data_dict, keys, orient):
|
||||
# GH#16769
|
||||
df = DataFrame.from_dict(data_dict, orient)
|
||||
|
||||
result = df.columns
|
||||
expected = Index(keys, dtype="object", tupleize_cols=False)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_frame_dict_constructor_empty_series(self):
|
||||
s1 = Series(
|
||||
[1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])
|
||||
)
|
||||
s2 = Series(
|
||||
[1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])
|
||||
)
|
||||
s3 = Series(dtype=object)
|
||||
|
||||
# it works!
|
||||
DataFrame({"foo": s1, "bar": s2, "baz": s3})
|
||||
DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2})
|
||||
@@ -0,0 +1,466 @@
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas.compat import is_platform_little_endian
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
Index,
|
||||
Interval,
|
||||
RangeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestFromRecords:
|
||||
def test_from_records_with_datetimes(self):
|
||||
|
||||
# this may fail on certain platforms because of a numpy issue
|
||||
# related GH#6140
|
||||
if not is_platform_little_endian():
|
||||
pytest.skip("known failure of test on non-little endian")
|
||||
|
||||
# construction with a null in a recarray
|
||||
# GH#6140
|
||||
expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]})
|
||||
|
||||
arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
|
||||
dtypes = [("EXPIRY", "<M8[ns]")]
|
||||
|
||||
recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
|
||||
|
||||
result = DataFrame.from_records(recarray)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# coercion should work too
|
||||
arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])]
|
||||
dtypes = [("EXPIRY", "<M8[m]")]
|
||||
recarray = np.core.records.fromarrays(arrdata, dtype=dtypes)
|
||||
result = DataFrame.from_records(recarray)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_sequencelike(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": np.array(np.random.randn(6), dtype=np.float64),
|
||||
"A1": np.array(np.random.randn(6), dtype=np.float64),
|
||||
"B": np.array(np.arange(6), dtype=np.int64),
|
||||
"C": ["foo"] * 6,
|
||||
"D": np.array([True, False] * 3, dtype=bool),
|
||||
"E": np.array(np.random.randn(6), dtype=np.float32),
|
||||
"E1": np.array(np.random.randn(6), dtype=np.float32),
|
||||
"F": np.array(np.arange(6), dtype=np.int32),
|
||||
}
|
||||
)
|
||||
|
||||
# this is actually tricky to create the recordlike arrays and
|
||||
# have the dtypes be intact
|
||||
blocks = df._to_dict_of_blocks()
|
||||
tuples = []
|
||||
columns = []
|
||||
dtypes = []
|
||||
for dtype, b in blocks.items():
|
||||
columns.extend(b.columns)
|
||||
dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns])
|
||||
for i in range(len(df.index)):
|
||||
tup = []
|
||||
for _, b in blocks.items():
|
||||
tup.extend(b.iloc[i].values)
|
||||
tuples.append(tuple(tup))
|
||||
|
||||
recarray = np.array(tuples, dtype=dtypes).view(np.recarray)
|
||||
recarray2 = df.to_records()
|
||||
lists = [list(x) for x in tuples]
|
||||
|
||||
# tuples (lose the dtype info)
|
||||
result = DataFrame.from_records(tuples, columns=columns).reindex(
|
||||
columns=df.columns
|
||||
)
|
||||
|
||||
# created recarray and with to_records recarray (have dtype info)
|
||||
result2 = DataFrame.from_records(recarray, columns=columns).reindex(
|
||||
columns=df.columns
|
||||
)
|
||||
result3 = DataFrame.from_records(recarray2, columns=columns).reindex(
|
||||
columns=df.columns
|
||||
)
|
||||
|
||||
# list of tupels (no dtype info)
|
||||
result4 = DataFrame.from_records(lists, columns=columns).reindex(
|
||||
columns=df.columns
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, df, check_dtype=False)
|
||||
tm.assert_frame_equal(result2, df)
|
||||
tm.assert_frame_equal(result3, df)
|
||||
tm.assert_frame_equal(result4, df, check_dtype=False)
|
||||
|
||||
# tuples is in the order of the columns
|
||||
result = DataFrame.from_records(tuples)
|
||||
tm.assert_index_equal(result.columns, RangeIndex(8))
|
||||
|
||||
# test exclude parameter & we are casting the results here (as we don't
|
||||
# have dtype info to recover)
|
||||
columns_to_test = [columns.index("C"), columns.index("E1")]
|
||||
|
||||
exclude = list(set(range(8)) - set(columns_to_test))
|
||||
result = DataFrame.from_records(tuples, exclude=exclude)
|
||||
result.columns = [columns[i] for i in sorted(columns_to_test)]
|
||||
tm.assert_series_equal(result["C"], df["C"])
|
||||
tm.assert_series_equal(result["E1"], df["E1"])
|
||||
|
||||
def test_from_records_sequencelike_empty(self):
|
||||
# empty case
|
||||
result = DataFrame.from_records([], columns=["foo", "bar", "baz"])
|
||||
assert len(result) == 0
|
||||
tm.assert_index_equal(result.columns, Index(["foo", "bar", "baz"]))
|
||||
|
||||
result = DataFrame.from_records([])
|
||||
assert len(result) == 0
|
||||
assert len(result.columns) == 0
|
||||
|
||||
def test_from_records_dictlike(self):
|
||||
|
||||
# test the dict methods
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": np.array(np.random.randn(6), dtype=np.float64),
|
||||
"A1": np.array(np.random.randn(6), dtype=np.float64),
|
||||
"B": np.array(np.arange(6), dtype=np.int64),
|
||||
"C": ["foo"] * 6,
|
||||
"D": np.array([True, False] * 3, dtype=bool),
|
||||
"E": np.array(np.random.randn(6), dtype=np.float32),
|
||||
"E1": np.array(np.random.randn(6), dtype=np.float32),
|
||||
"F": np.array(np.arange(6), dtype=np.int32),
|
||||
}
|
||||
)
|
||||
|
||||
# columns is in a different order here than the actual items iterated
|
||||
# from the dict
|
||||
blocks = df._to_dict_of_blocks()
|
||||
columns = []
|
||||
for b in blocks.values():
|
||||
columns.extend(b.columns)
|
||||
|
||||
asdict = {x: y for x, y in df.items()}
|
||||
asdict2 = {x: y.values for x, y in df.items()}
|
||||
|
||||
# dict of series & dict of ndarrays (have dtype info)
|
||||
results = []
|
||||
results.append(DataFrame.from_records(asdict).reindex(columns=df.columns))
|
||||
results.append(
|
||||
DataFrame.from_records(asdict, columns=columns).reindex(columns=df.columns)
|
||||
)
|
||||
results.append(
|
||||
DataFrame.from_records(asdict2, columns=columns).reindex(columns=df.columns)
|
||||
)
|
||||
|
||||
for r in results:
|
||||
tm.assert_frame_equal(r, df)
|
||||
|
||||
def test_from_records_with_index_data(self):
|
||||
df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"])
|
||||
|
||||
data = np.random.randn(10)
|
||||
df1 = DataFrame.from_records(df, index=data)
|
||||
tm.assert_index_equal(df1.index, Index(data))
|
||||
|
||||
def test_from_records_bad_index_column(self):
|
||||
df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"])
|
||||
|
||||
# should pass
|
||||
df1 = DataFrame.from_records(df, index=["C"])
|
||||
tm.assert_index_equal(df1.index, Index(df.C))
|
||||
|
||||
df1 = DataFrame.from_records(df, index="C")
|
||||
tm.assert_index_equal(df1.index, Index(df.C))
|
||||
|
||||
# should fail
|
||||
msg = "|".join(
|
||||
[
|
||||
r"Length of values \(10\) does not match length of index \(1\)",
|
||||
]
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_records(df, index=[2])
|
||||
with pytest.raises(KeyError, match=r"^2$"):
|
||||
DataFrame.from_records(df, index=2)
|
||||
|
||||
def test_from_records_non_tuple(self):
|
||||
class Record:
|
||||
def __init__(self, *args):
|
||||
self.args = args
|
||||
|
||||
def __getitem__(self, i):
|
||||
return self.args[i]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.args)
|
||||
|
||||
recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)]
|
||||
tups = [tuple(rec) for rec in recs]
|
||||
|
||||
result = DataFrame.from_records(recs)
|
||||
expected = DataFrame.from_records(tups)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_len0_with_columns(self):
|
||||
# GH#2633
|
||||
result = DataFrame.from_records([], index="foo", columns=["foo", "bar"])
|
||||
expected = Index(["bar"])
|
||||
|
||||
assert len(result) == 0
|
||||
assert result.index.name == "foo"
|
||||
tm.assert_index_equal(result.columns, expected)
|
||||
|
||||
def test_from_records_series_list_dict(self):
|
||||
# GH#27358
|
||||
expected = DataFrame([[{"a": 1, "b": 2}, {"a": 3, "b": 4}]]).T
|
||||
data = Series([[{"a": 1, "b": 2}], [{"a": 3, "b": 4}]])
|
||||
result = DataFrame.from_records(data)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_series_categorical_index(self):
|
||||
# GH#32805
|
||||
index = CategoricalIndex(
|
||||
[Interval(-20, -10), Interval(-10, 0), Interval(0, 10)]
|
||||
)
|
||||
series_of_dicts = Series([{"a": 1}, {"a": 2}, {"b": 3}], index=index)
|
||||
frame = DataFrame.from_records(series_of_dicts, index=index)
|
||||
expected = DataFrame(
|
||||
{"a": [1, 2, np.NaN], "b": [np.NaN, np.NaN, 3]}, index=index
|
||||
)
|
||||
tm.assert_frame_equal(frame, expected)
|
||||
|
||||
def test_frame_from_records_utc(self):
|
||||
rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)}
|
||||
|
||||
# it works
|
||||
DataFrame.from_records([rec], index="begin_time")
|
||||
|
||||
def test_from_records_to_records(self):
|
||||
# from numpy documentation
|
||||
arr = np.zeros((2,), dtype=("i4,f4,a10"))
|
||||
arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
|
||||
|
||||
# TODO(wesm): unused
|
||||
frame = DataFrame.from_records(arr) # noqa
|
||||
|
||||
index = Index(np.arange(len(arr))[::-1])
|
||||
indexed_frame = DataFrame.from_records(arr, index=index)
|
||||
tm.assert_index_equal(indexed_frame.index, index)
|
||||
|
||||
# without names, it should go to last ditch
|
||||
arr2 = np.zeros((2, 3))
|
||||
tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
|
||||
|
||||
# wrong length
|
||||
msg = "|".join(
|
||||
[
|
||||
r"Length of values \(2\) does not match length of index \(1\)",
|
||||
]
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DataFrame.from_records(arr, index=index[:-1])
|
||||
|
||||
indexed_frame = DataFrame.from_records(arr, index="f1")
|
||||
|
||||
# what to do?
|
||||
records = indexed_frame.to_records()
|
||||
assert len(records.dtype.names) == 3
|
||||
|
||||
records = indexed_frame.to_records(index=False)
|
||||
assert len(records.dtype.names) == 2
|
||||
assert "index" not in records.dtype.names
|
||||
|
||||
def test_from_records_nones(self):
|
||||
tuples = [(1, 2, None, 3), (1, 2, None, 3), (None, 2, 5, 3)]
|
||||
|
||||
df = DataFrame.from_records(tuples, columns=["a", "b", "c", "d"])
|
||||
assert np.isnan(df["c"][0])
|
||||
|
||||
def test_from_records_iterator(self):
|
||||
arr = np.array(
|
||||
[(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5.0, 5.0, 6, 6), (7.0, 7.0, 8, 8)],
|
||||
dtype=[
|
||||
("x", np.float64),
|
||||
("u", np.float32),
|
||||
("y", np.int64),
|
||||
("z", np.int32),
|
||||
],
|
||||
)
|
||||
df = DataFrame.from_records(iter(arr), nrows=2)
|
||||
xp = DataFrame(
|
||||
{
|
||||
"x": np.array([1.0, 3.0], dtype=np.float64),
|
||||
"u": np.array([1.0, 3.0], dtype=np.float32),
|
||||
"y": np.array([2, 4], dtype=np.int64),
|
||||
"z": np.array([2, 4], dtype=np.int32),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(df.reindex_like(xp), xp)
|
||||
|
||||
# no dtypes specified here, so just compare with the default
|
||||
arr = [(1.0, 2), (3.0, 4), (5.0, 6), (7.0, 8)]
|
||||
df = DataFrame.from_records(iter(arr), columns=["x", "y"], nrows=2)
|
||||
tm.assert_frame_equal(df, xp.reindex(columns=["x", "y"]), check_dtype=False)
|
||||
|
||||
def test_from_records_tuples_generator(self):
|
||||
def tuple_generator(length):
|
||||
for i in range(length):
|
||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
yield (i, letters[i % len(letters)], i / length)
|
||||
|
||||
columns_names = ["Integer", "String", "Float"]
|
||||
columns = [
|
||||
[i[j] for i in tuple_generator(10)] for j in range(len(columns_names))
|
||||
]
|
||||
data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}
|
||||
expected = DataFrame(data, columns=columns_names)
|
||||
|
||||
generator = tuple_generator(10)
|
||||
result = DataFrame.from_records(generator, columns=columns_names)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_lists_generator(self):
|
||||
def list_generator(length):
|
||||
for i in range(length):
|
||||
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
yield [i, letters[i % len(letters)], i / length]
|
||||
|
||||
columns_names = ["Integer", "String", "Float"]
|
||||
columns = [
|
||||
[i[j] for i in list_generator(10)] for j in range(len(columns_names))
|
||||
]
|
||||
data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]}
|
||||
expected = DataFrame(data, columns=columns_names)
|
||||
|
||||
generator = list_generator(10)
|
||||
result = DataFrame.from_records(generator, columns=columns_names)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_columns_not_modified(self):
|
||||
tuples = [(1, 2, 3), (1, 2, 3), (2, 5, 3)]
|
||||
|
||||
columns = ["a", "b", "c"]
|
||||
original_columns = list(columns)
|
||||
|
||||
df = DataFrame.from_records(tuples, columns=columns, index="a") # noqa
|
||||
|
||||
assert columns == original_columns
|
||||
|
||||
def test_from_records_decimal(self):
|
||||
|
||||
tuples = [(Decimal("1.5"),), (Decimal("2.5"),), (None,)]
|
||||
|
||||
df = DataFrame.from_records(tuples, columns=["a"])
|
||||
assert df["a"].dtype == object
|
||||
|
||||
df = DataFrame.from_records(tuples, columns=["a"], coerce_float=True)
|
||||
assert df["a"].dtype == np.float64
|
||||
assert np.isnan(df["a"].values[-1])
|
||||
|
||||
def test_from_records_duplicates(self):
|
||||
result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])
|
||||
|
||||
expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"])
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_set_index_name(self):
|
||||
def create_dict(order_id):
|
||||
return {
|
||||
"order_id": order_id,
|
||||
"quantity": np.random.randint(1, 10),
|
||||
"price": np.random.randint(1, 10),
|
||||
}
|
||||
|
||||
documents = [create_dict(i) for i in range(10)]
|
||||
# demo missing data
|
||||
documents.append({"order_id": 10, "quantity": 5})
|
||||
|
||||
result = DataFrame.from_records(documents, index="order_id")
|
||||
assert result.index.name == "order_id"
|
||||
|
||||
# MultiIndex
|
||||
result = DataFrame.from_records(documents, index=["order_id", "quantity"])
|
||||
assert result.index.names == ("order_id", "quantity")
|
||||
|
||||
def test_from_records_misc_brokenness(self):
|
||||
# GH#2179
|
||||
|
||||
data = {1: ["foo"], 2: ["bar"]}
|
||||
|
||||
result = DataFrame.from_records(data, columns=["a", "b"])
|
||||
exp = DataFrame(data, columns=["a", "b"])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
# overlap in index/index_names
|
||||
|
||||
data = {"a": [1, 2, 3], "b": [4, 5, 6]}
|
||||
|
||||
result = DataFrame.from_records(data, index=["a", "b", "c"])
|
||||
exp = DataFrame(data, index=["a", "b", "c"])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
# GH#2623
|
||||
rows = []
|
||||
rows.append([datetime(2010, 1, 1), 1])
|
||||
rows.append([datetime(2010, 1, 2), "hi"]) # test col upconverts to obj
|
||||
df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
|
||||
result = df2_obj.dtypes
|
||||
expected = Series(
|
||||
[np.dtype("datetime64[ns]"), np.dtype("object")], index=["date", "test"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
rows = []
|
||||
rows.append([datetime(2010, 1, 1), 1])
|
||||
rows.append([datetime(2010, 1, 2), 1])
|
||||
df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
|
||||
result = df2_obj.dtypes
|
||||
expected = Series(
|
||||
[np.dtype("datetime64[ns]"), np.dtype("int64")], index=["date", "test"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_from_records_empty(self):
|
||||
# GH#3562
|
||||
result = DataFrame.from_records([], columns=["a", "b", "c"])
|
||||
expected = DataFrame(columns=["a", "b", "c"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = DataFrame.from_records([], columns=["a", "b", "b"])
|
||||
expected = DataFrame(columns=["a", "b", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_from_records_empty_with_nonempty_fields_gh3682(self):
|
||||
a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)])
|
||||
df = DataFrame.from_records(a, index="id")
|
||||
|
||||
ex_index = Index([1], name="id")
|
||||
expected = DataFrame({"value": [2]}, index=ex_index, columns=["value"])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
b = a[:0]
|
||||
df2 = DataFrame.from_records(b, index="id")
|
||||
tm.assert_frame_equal(df2, df.iloc[:0])
|
||||
|
||||
def test_from_records_empty2(self):
|
||||
# GH#42456
|
||||
dtype = [("prop", int)]
|
||||
shape = (0, len(dtype))
|
||||
arr = np.empty(shape, dtype=dtype)
|
||||
|
||||
result = DataFrame.from_records(arr)
|
||||
expected = DataFrame({"prop": np.array([], dtype=int)})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
alt = DataFrame(arr)
|
||||
tm.assert_frame_equal(alt, expected)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,60 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestDataFrameDelItem:
|
||||
def test_delitem(self, float_frame):
|
||||
del float_frame["A"]
|
||||
assert "A" not in float_frame
|
||||
|
||||
def test_delitem_multiindex(self):
|
||||
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
|
||||
df = DataFrame(np.random.randn(4, 4), columns=midx)
|
||||
assert len(df.columns) == 4
|
||||
assert ("A",) in df.columns
|
||||
assert "A" in df.columns
|
||||
|
||||
result = df["A"]
|
||||
assert isinstance(result, DataFrame)
|
||||
del df["A"]
|
||||
|
||||
assert len(df.columns) == 2
|
||||
|
||||
# A still in the levels, BUT get a KeyError if trying
|
||||
# to delete
|
||||
assert ("A",) not in df.columns
|
||||
with pytest.raises(KeyError, match=re.escape("('A',)")):
|
||||
del df[("A",)]
|
||||
|
||||
# behavior of dropped/deleted MultiIndex levels changed from
|
||||
# GH 2770 to GH 19027: MultiIndex no longer '.__contains__'
|
||||
# levels which are dropped/deleted
|
||||
assert "A" not in df.columns
|
||||
with pytest.raises(KeyError, match=re.escape("('A',)")):
|
||||
del df["A"]
|
||||
|
||||
def test_delitem_corner(self, float_frame):
|
||||
f = float_frame.copy()
|
||||
del f["D"]
|
||||
assert len(f.columns) == 3
|
||||
with pytest.raises(KeyError, match=r"^'D'$"):
|
||||
del f["D"]
|
||||
del f["B"]
|
||||
assert len(f.columns) == 2
|
||||
|
||||
def test_delitem_col_still_multiindex(self):
|
||||
arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]]
|
||||
|
||||
tuples = sorted(zip(*arrays))
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
|
||||
df = DataFrame(np.random.randn(3, 4), columns=index)
|
||||
del df[("a", "", "")]
|
||||
assert isinstance(df.columns, MultiIndex)
|
||||
@@ -0,0 +1,27 @@
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGet:
|
||||
def test_get(self, float_frame):
|
||||
b = float_frame.get("B")
|
||||
tm.assert_series_equal(b, float_frame["B"])
|
||||
|
||||
assert float_frame.get("foo") is None
|
||||
tm.assert_series_equal(
|
||||
float_frame.get("foo", float_frame["B"]), float_frame["B"]
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df",
|
||||
[
|
||||
DataFrame(),
|
||||
DataFrame(columns=list("AB")),
|
||||
DataFrame(columns=list("AB"), index=range(3)),
|
||||
],
|
||||
)
|
||||
def test_get_none(self, df):
|
||||
# see gh-5652
|
||||
assert df.get(None) is None
|
||||
@@ -0,0 +1,22 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestGetValue:
|
||||
def test_get_set_value_no_partial_indexing(self):
|
||||
# partial w/ MultiIndex raise exception
|
||||
index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
|
||||
df = DataFrame(index=index, columns=range(4))
|
||||
with pytest.raises(KeyError, match=r"^0$"):
|
||||
df._get_value(0, 1)
|
||||
|
||||
def test_get_value(self, float_frame):
|
||||
for idx in float_frame.index:
|
||||
for col in float_frame.columns:
|
||||
result = float_frame._get_value(idx, col)
|
||||
expected = float_frame[col][idx]
|
||||
assert result == expected
|
||||
@@ -0,0 +1,406 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
concat,
|
||||
get_dummies,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import SparseArray
|
||||
|
||||
|
||||
class TestGetitem:
|
||||
def test_getitem_unused_level_raises(self):
|
||||
# GH#20410
|
||||
mi = MultiIndex(
|
||||
levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
|
||||
codes=[[1, 0], [1, 0]],
|
||||
)
|
||||
df = DataFrame(-1, index=range(3), columns=mi)
|
||||
|
||||
with pytest.raises(KeyError, match="notevenone"):
|
||||
df["notevenone"]
|
||||
|
||||
def test_getitem_periodindex(self):
|
||||
rng = period_range("1/1/2000", periods=5)
|
||||
df = DataFrame(np.random.randn(10, 5), columns=rng)
|
||||
|
||||
ts = df[rng[0]]
|
||||
tm.assert_series_equal(ts, df.iloc[:, 0])
|
||||
|
||||
# GH#1211; smoketest unrelated to the rest of this test
|
||||
repr(df)
|
||||
|
||||
ts = df["1/1/2000"]
|
||||
tm.assert_series_equal(ts, df.iloc[:, 0])
|
||||
|
||||
def test_getitem_list_of_labels_categoricalindex_cols(self):
|
||||
# GH#16115
|
||||
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||||
|
||||
expected = DataFrame(
|
||||
[[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats
|
||||
)
|
||||
dummies = get_dummies(cats)
|
||||
result = dummies[list(dummies.columns)]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_sparse_column_return_type_and_dtype(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/23559
|
||||
data = SparseArray([0, 1])
|
||||
df = DataFrame({"A": data})
|
||||
expected = Series(data, name="A")
|
||||
result = df["A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Also check iloc and loc while we're here
|
||||
result = df.iloc[:, 0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.loc[:, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemListLike:
|
||||
def test_getitem_list_missing_key(self):
|
||||
# GH#13822, incorrect error string with non-unique columns when missing
|
||||
# column is accessed
|
||||
df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
|
||||
df.columns = ["x", "x", "z"]
|
||||
|
||||
# Check that we get the correct value in the KeyError
|
||||
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
|
||||
df[["x", "y", "z"]]
|
||||
|
||||
def test_getitem_list_duplicates(self):
|
||||
# GH#1943
|
||||
df = DataFrame(np.random.randn(4, 4), columns=list("AABC"))
|
||||
df.columns.name = "foo"
|
||||
|
||||
result = df[["B", "C"]]
|
||||
assert result.columns.name == "foo"
|
||||
|
||||
expected = df.iloc[:, 2:]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_dupe_cols(self):
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"])
|
||||
msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\""
|
||||
with pytest.raises(KeyError, match=re.escape(msg)):
|
||||
df[["baf"]]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx_type",
|
||||
[
|
||||
list,
|
||||
iter,
|
||||
Index,
|
||||
set,
|
||||
lambda l: dict(zip(l, range(len(l)))),
|
||||
lambda l: dict(zip(l, range(len(l)))).keys(),
|
||||
],
|
||||
ids=["list", "iter", "Index", "set", "dict", "dict_keys"],
|
||||
)
|
||||
@pytest.mark.parametrize("levels", [1, 2])
|
||||
def test_getitem_listlike(self, idx_type, levels, float_frame):
|
||||
# GH#21294
|
||||
|
||||
if levels == 1:
|
||||
frame, missing = float_frame, "food"
|
||||
else:
|
||||
# MultiIndex columns
|
||||
frame = DataFrame(
|
||||
np.random.randn(8, 3),
|
||||
columns=Index(
|
||||
[("foo", "bar"), ("baz", "qux"), ("peek", "aboo")],
|
||||
name=("sth", "sth2"),
|
||||
),
|
||||
)
|
||||
missing = ("good", "food")
|
||||
|
||||
keys = [frame.columns[1], frame.columns[0]]
|
||||
idx = idx_type(keys)
|
||||
idx_check = list(idx_type(keys))
|
||||
|
||||
if isinstance(idx, (set, dict)):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = frame[idx]
|
||||
else:
|
||||
result = frame[idx]
|
||||
|
||||
expected = frame.loc[:, idx_check]
|
||||
expected.columns.names = frame.columns.names
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
idx = idx_type(keys + [missing])
|
||||
with pytest.raises(KeyError, match="not in index"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
frame[idx]
|
||||
|
||||
def test_getitem_iloc_generator(self):
|
||||
# GH#39614
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
indexer = (x for x in [1, 2])
|
||||
result = df.iloc[indexer]
|
||||
expected = DataFrame({"a": [2, 3], "b": [5, 6]}, index=[1, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_getitem_iloc_two_dimensional_generator(self):
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
indexer = (x for x in [1, 2])
|
||||
result = df.iloc[indexer, 1]
|
||||
expected = Series([5, 6], name="b", index=[1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemCallable:
|
||||
def test_getitem_callable(self, float_frame):
|
||||
# GH#12533
|
||||
result = float_frame[lambda x: "A"]
|
||||
expected = float_frame.loc[:, "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = float_frame[lambda x: ["A", "B"]]
|
||||
expected = float_frame.loc[:, ["A", "B"]]
|
||||
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])
|
||||
|
||||
df = float_frame[:3]
|
||||
result = df[lambda x: [True, False, True]]
|
||||
expected = float_frame.iloc[[0, 2], :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_loc_multiindex_columns_one_level(self):
|
||||
# GH#29749
|
||||
df = DataFrame([[1, 2]], columns=[["a", "b"]])
|
||||
expected = DataFrame([1], columns=[["a"]])
|
||||
|
||||
result = df["a"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.loc[:, "a"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemBooleanMask:
|
||||
def test_getitem_bool_mask_categorical_index(self):
|
||||
|
||||
df3 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
[1, 1, 2, 1, 3, 2],
|
||||
dtype=CategoricalDtype([3, 2, 1], ordered=True),
|
||||
name="B",
|
||||
),
|
||||
)
|
||||
df4 = DataFrame(
|
||||
{
|
||||
"A": np.arange(6, dtype="int64"),
|
||||
},
|
||||
index=CategoricalIndex(
|
||||
[1, 1, 2, 1, 3, 2],
|
||||
dtype=CategoricalDtype([3, 2, 1], ordered=False),
|
||||
name="B",
|
||||
),
|
||||
)
|
||||
|
||||
result = df3[df3.index == "a"]
|
||||
expected = df3.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == "a"]
|
||||
expected = df4.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index == 1]
|
||||
expected = df3.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df4[df4.index == 1]
|
||||
expected = df4.iloc[[0, 1, 3]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# since we have an ordered categorical
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=True,
|
||||
# name='B')
|
||||
result = df3[df3.index < 2]
|
||||
expected = df3.iloc[[4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df3[df3.index > 1]
|
||||
expected = df3.iloc[[]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# unordered
|
||||
# cannot be compared
|
||||
|
||||
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||||
# categories=[3, 2, 1],
|
||||
# ordered=False,
|
||||
# name='B')
|
||||
msg = "Unordered Categoricals can only compare equality or not"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df4[df4.index < 2]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df4[df4.index > 1]
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data1,data2,expected_data",
|
||||
(
|
||||
(
|
||||
[[1, 2], [3, 4]],
|
||||
[[0.5, 6], [7, 8]],
|
||||
[[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]],
|
||||
),
|
||||
(
|
||||
[[1, 2], [3, 4]],
|
||||
[[5, 6], [7, 8]],
|
||||
[[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_getitem_bool_mask_duplicate_columns_mixed_dtypes(
|
||||
self,
|
||||
data1,
|
||||
data2,
|
||||
expected_data,
|
||||
):
|
||||
# GH#31954
|
||||
|
||||
df1 = DataFrame(np.array(data1))
|
||||
df2 = DataFrame(np.array(data2))
|
||||
df = concat([df1, df2], axis=1)
|
||||
|
||||
result = df[df > 2]
|
||||
|
||||
exdict = {i: np.array(col) for i, col in enumerate(expected_data)}
|
||||
expected = DataFrame(exdict).rename(columns={2: 0, 3: 1})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.fixture
|
||||
def df_dup_cols(self):
|
||||
dups = ["A", "A", "C", "D"]
|
||||
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
|
||||
return df
|
||||
|
||||
def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols):
|
||||
# `df.A > 6` is a DataFrame with a different shape from df
|
||||
|
||||
# boolean with the duplicate raises
|
||||
df = df_dup_cols
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
|
||||
df[df.A > 6]
|
||||
|
||||
def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols):
|
||||
# boolean indexing
|
||||
# GH#4879
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
|
||||
)
|
||||
expected = df[df.C > 6]
|
||||
expected.columns = df_dup_cols.columns
|
||||
|
||||
df = df_dup_cols
|
||||
result = df[df.C > 6]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result.dtypes
|
||||
str(result)
|
||||
|
||||
def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols):
|
||||
|
||||
# where
|
||||
df = DataFrame(
|
||||
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
|
||||
)
|
||||
# `df > 6` is a DataFrame with the same shape+alignment as df
|
||||
expected = df[df > 6]
|
||||
expected.columns = df_dup_cols.columns
|
||||
|
||||
df = df_dup_cols
|
||||
result = df[df > 6]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result.dtypes
|
||||
str(result)
|
||||
|
||||
def test_getitem_empty_frame_with_boolean(self):
|
||||
# Test for issue GH#11859
|
||||
|
||||
df = DataFrame()
|
||||
df2 = df[df > 0]
|
||||
tm.assert_frame_equal(df, df2)
|
||||
|
||||
|
||||
class TestGetitemSlice:
|
||||
def test_getitem_slice_float64(self, frame_or_series):
|
||||
values = np.arange(10.0, 50.0, 2)
|
||||
index = Index(values)
|
||||
|
||||
start, end = values[[5, 15]]
|
||||
|
||||
data = np.random.randn(20, 3)
|
||||
if frame_or_series is not DataFrame:
|
||||
data = data[:, 0]
|
||||
|
||||
obj = frame_or_series(data, index=index)
|
||||
|
||||
result = obj[start:end]
|
||||
expected = obj.iloc[5:16]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[start:end]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_getitem_datetime_slice(self):
|
||||
# GH#43223
|
||||
df = DataFrame(
|
||||
{"a": 0},
|
||||
index=DatetimeIndex(
|
||||
[
|
||||
"11.01.2011 22:00",
|
||||
"11.01.2011 23:00",
|
||||
"12.01.2011 00:00",
|
||||
"2011-01-13 00:00",
|
||||
]
|
||||
),
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df["2011-01-01":"2011-11-01"]
|
||||
expected = DataFrame(
|
||||
{"a": 0},
|
||||
index=DatetimeIndex(
|
||||
["11.01.2011 22:00", "11.01.2011 23:00", "2011-01-13 00:00"]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetitemDeprecatedIndexers:
|
||||
@pytest.mark.parametrize("key", [{"a", "b"}, {"a": "a"}])
|
||||
def test_getitem_dict_and_set_deprecated(self, key):
|
||||
# GH#42825
|
||||
df = DataFrame(
|
||||
[[1, 2], [3, 4]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)])
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df[key]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,106 @@
|
||||
"""
|
||||
test_insert is specifically for the DataFrame.insert method; not to be
|
||||
confused with tests with "insert" in their names that are really testing
|
||||
__setitem__.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameInsert:
|
||||
def test_insert(self):
|
||||
df = DataFrame(
|
||||
np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"]
|
||||
)
|
||||
|
||||
df.insert(0, "foo", df["a"])
|
||||
tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
|
||||
tm.assert_series_equal(df["a"], df["foo"], check_names=False)
|
||||
|
||||
df.insert(2, "bar", df["c"])
|
||||
tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
|
||||
tm.assert_almost_equal(df["c"], df["bar"], check_names=False)
|
||||
|
||||
with pytest.raises(ValueError, match="already exists"):
|
||||
df.insert(1, "a", df["b"])
|
||||
|
||||
msg = "cannot insert c, already exists"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.insert(1, "c", df["b"])
|
||||
|
||||
df.columns.name = "some_name"
|
||||
# preserve columns name field
|
||||
df.insert(0, "baz", df["c"])
|
||||
assert df.columns.name == "some_name"
|
||||
|
||||
def test_insert_column_bug_4032(self):
|
||||
|
||||
# GH#4032, inserting a column and renaming causing errors
|
||||
df = DataFrame({"b": [1.1, 2.2]})
|
||||
|
||||
df = df.rename(columns={})
|
||||
df.insert(0, "a", [1, 2])
|
||||
result = df.rename(columns={})
|
||||
|
||||
str(result)
|
||||
expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df.insert(0, "c", [1.3, 2.3])
|
||||
result = df.rename(columns={})
|
||||
|
||||
str(result)
|
||||
expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_insert_with_columns_dups(self):
|
||||
# GH#14291
|
||||
df = DataFrame()
|
||||
df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
|
||||
df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
|
||||
df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
|
||||
exp = DataFrame(
|
||||
[["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
|
||||
)
|
||||
tm.assert_frame_equal(df, exp)
|
||||
|
||||
def test_insert_item_cache(self, using_array_manager):
|
||||
df = DataFrame(np.random.randn(4, 3))
|
||||
ser = df[0]
|
||||
|
||||
if using_array_manager:
|
||||
expected_warning = None
|
||||
else:
|
||||
# with BlockManager warn about high fragmentation of single dtype
|
||||
expected_warning = PerformanceWarning
|
||||
|
||||
with tm.assert_produces_warning(expected_warning):
|
||||
for n in range(100):
|
||||
df[n + 3] = df[1] * n
|
||||
|
||||
ser.values[0] = 99
|
||||
|
||||
assert df.iloc[0, 0] == df[0][0]
|
||||
|
||||
def test_insert_EA_no_warning(self):
|
||||
# PerformanceWarning about fragmented frame should not be raised when
|
||||
# using EAs (https://github.com/pandas-dev/pandas/issues/44098)
|
||||
df = DataFrame(np.random.randint(0, 100, size=(3, 100)), dtype="Int64")
|
||||
with tm.assert_produces_warning(None):
|
||||
df["a"] = np.array([1, 2, 3])
|
||||
|
||||
def test_insert_frame(self):
|
||||
# GH#42403
|
||||
df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
|
||||
|
||||
msg = r"Expected a 1D array, got an array with shape \(2, 2\)"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.insert(1, "newcol", df)
|
||||
@@ -0,0 +1,94 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestLookup:
|
||||
def test_lookup_float(self, float_frame):
|
||||
df = float_frame
|
||||
rows = list(df.index) * len(df.columns)
|
||||
cols = list(df.columns) * len(df.index)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df.lookup(rows, cols)
|
||||
|
||||
expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_lookup_mixed(self, float_string_frame):
|
||||
df = float_string_frame
|
||||
rows = list(df.index) * len(df.columns)
|
||||
cols = list(df.columns) * len(df.index)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df.lookup(rows, cols)
|
||||
|
||||
expected = np.array(
|
||||
[df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_
|
||||
)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_lookup_bool(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"label": ["a", "b", "a", "c"],
|
||||
"mask_a": [True, True, False, True],
|
||||
"mask_b": [True, False, False, False],
|
||||
"mask_c": [False, True, False, True],
|
||||
}
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df["mask"] = df.lookup(df.index, "mask_" + df["label"])
|
||||
|
||||
exp_mask = np.array(
|
||||
[df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])]
|
||||
)
|
||||
|
||||
tm.assert_series_equal(df["mask"], Series(exp_mask, name="mask"))
|
||||
assert df["mask"].dtype == np.bool_
|
||||
|
||||
def test_lookup_raises(self, float_frame):
|
||||
with pytest.raises(KeyError, match="'One or more row labels was not found'"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
float_frame.lookup(["xyz"], ["A"])
|
||||
|
||||
with pytest.raises(KeyError, match="'One or more column labels was not found'"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
float_frame.lookup([float_frame.index[0]], ["xyz"])
|
||||
|
||||
with pytest.raises(ValueError, match="same size"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
float_frame.lookup(["a", "b", "c"], ["a"])
|
||||
|
||||
def test_lookup_requires_unique_axes(self):
|
||||
# GH#33041 raise with a helpful error message
|
||||
df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "A"])
|
||||
|
||||
rows = [0, 1]
|
||||
cols = ["A", "A"]
|
||||
|
||||
# homogeneous-dtype case
|
||||
with pytest.raises(ValueError, match="requires unique index and columns"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.lookup(rows, cols)
|
||||
with pytest.raises(ValueError, match="requires unique index and columns"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.T.lookup(cols, rows)
|
||||
|
||||
# heterogeneous dtype
|
||||
df["B"] = 0
|
||||
with pytest.raises(ValueError, match="requires unique index and columns"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.lookup(rows, cols)
|
||||
|
||||
|
||||
def test_lookup_deprecated():
|
||||
# GH#18262
|
||||
df = DataFrame(
|
||||
{"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]}
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.lookup(df.index, df["col"])
|
||||
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
Tests for DataFrame.mask; tests DataFrame.where as a side-effect.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DataFrame,
|
||||
Series,
|
||||
StringDtype,
|
||||
Timedelta,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameMask:
|
||||
def test_mask(self):
|
||||
df = DataFrame(np.random.randn(5, 3))
|
||||
cond = df > 0
|
||||
|
||||
rs = df.where(cond, np.nan)
|
||||
tm.assert_frame_equal(rs, df.mask(df <= 0))
|
||||
tm.assert_frame_equal(rs, df.mask(~cond))
|
||||
|
||||
other = DataFrame(np.random.randn(5, 3))
|
||||
rs = df.where(cond, other)
|
||||
tm.assert_frame_equal(rs, df.mask(df <= 0, other))
|
||||
tm.assert_frame_equal(rs, df.mask(~cond, other))
|
||||
|
||||
def test_mask2(self):
|
||||
# see GH#21891
|
||||
df = DataFrame([1, 2])
|
||||
res = df.mask([[True], [False]])
|
||||
|
||||
exp = DataFrame([np.nan, 2])
|
||||
tm.assert_frame_equal(res, exp)
|
||||
|
||||
def test_mask_inplace(self):
|
||||
# GH#8801
|
||||
df = DataFrame(np.random.randn(5, 3))
|
||||
cond = df > 0
|
||||
|
||||
rdf = df.copy()
|
||||
|
||||
return_value = rdf.where(cond, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(rdf, df.where(cond))
|
||||
tm.assert_frame_equal(rdf, df.mask(~cond))
|
||||
|
||||
rdf = df.copy()
|
||||
return_value = rdf.where(cond, -df, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(rdf, df.where(cond, -df))
|
||||
tm.assert_frame_equal(rdf, df.mask(~cond, -df))
|
||||
|
||||
def test_mask_edge_case_1xN_frame(self):
|
||||
# GH#4071
|
||||
df = DataFrame([[1, 2]])
|
||||
res = df.mask(DataFrame([[True, False]]))
|
||||
expec = DataFrame([[np.nan, 2]])
|
||||
tm.assert_frame_equal(res, expec)
|
||||
|
||||
def test_mask_callable(self):
|
||||
# GH#12533
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
result = df.mask(lambda x: x > 4, lambda x: x + 1)
|
||||
exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
|
||||
|
||||
# return ndarray and scalar
|
||||
result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
|
||||
exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
|
||||
|
||||
# chain
|
||||
result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
|
||||
exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))
|
||||
|
||||
def test_mask_dtype_bool_conversion(self):
|
||||
# GH#3733
|
||||
df = DataFrame(data=np.random.randn(100, 50))
|
||||
df = df.where(df > 0) # create nans
|
||||
bools = df > 0
|
||||
mask = isna(df)
|
||||
expected = bools.astype(object).mask(mask)
|
||||
result = bools.mask(mask)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_mask_pos_args_deprecation(self, frame_or_series):
|
||||
# https://github.com/pandas-dev/pandas/issues/41485
|
||||
obj = DataFrame({"a": range(5)})
|
||||
expected = DataFrame({"a": [-1, 1, -1, 3, -1]})
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
expected = tm.get_obj(expected, frame_or_series)
|
||||
|
||||
cond = obj % 2 == 0
|
||||
msg = (
|
||||
r"In a future version of pandas all arguments of "
|
||||
f"{frame_or_series.__name__}.mask except for "
|
||||
r"the arguments 'cond' and 'other' will be keyword-only"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = obj.mask(cond, -1, False)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_try_cast_deprecated(frame_or_series):
|
||||
|
||||
obj = DataFrame(np.random.randn(4, 3))
|
||||
if frame_or_series is not DataFrame:
|
||||
obj = obj[0]
|
||||
|
||||
mask = obj > 0
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# try_cast keyword deprecated
|
||||
obj.mask(mask, -1, try_cast=True)
|
||||
|
||||
|
||||
def test_mask_stringdtype(frame_or_series):
|
||||
# GH 40824
|
||||
obj = DataFrame(
|
||||
{"A": ["foo", "bar", "baz", NA]},
|
||||
index=["id1", "id2", "id3", "id4"],
|
||||
dtype=StringDtype(),
|
||||
)
|
||||
filtered_obj = DataFrame(
|
||||
{"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype()
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"A": [NA, "this", "that", NA]},
|
||||
index=["id1", "id2", "id3", "id4"],
|
||||
dtype=StringDtype(),
|
||||
)
|
||||
if frame_or_series is Series:
|
||||
obj = obj["A"]
|
||||
filtered_obj = filtered_obj["A"]
|
||||
expected = expected["A"]
|
||||
|
||||
filter_ser = Series([False, True, True, False])
|
||||
result = obj.mask(filter_ser, filtered_obj)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_mask_where_dtype_timedelta():
|
||||
# https://github.com/pandas-dev/pandas/issues/39548
|
||||
df = DataFrame([Timedelta(i, unit="d") for i in range(5)])
|
||||
|
||||
expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]"))
|
||||
tm.assert_frame_equal(df.mask(df.notna()), expected)
|
||||
|
||||
expected = DataFrame(
|
||||
[np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")]
|
||||
)
|
||||
tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected)
|
||||
@@ -0,0 +1,68 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas.core.dtypes.common import is_float_dtype
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
isna,
|
||||
)
|
||||
|
||||
|
||||
class TestSetValue:
|
||||
def test_set_value(self, float_frame):
|
||||
for idx in float_frame.index:
|
||||
for col in float_frame.columns:
|
||||
float_frame._set_value(idx, col, 1)
|
||||
assert float_frame[col][idx] == 1
|
||||
|
||||
def test_set_value_resize(self, float_frame):
|
||||
|
||||
res = float_frame._set_value("foobar", "B", 0)
|
||||
assert res is None
|
||||
assert float_frame.index[-1] == "foobar"
|
||||
assert float_frame._get_value("foobar", "B") == 0
|
||||
|
||||
float_frame.loc["foobar", "qux"] = 0
|
||||
assert float_frame._get_value("foobar", "qux") == 0
|
||||
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", "sam")
|
||||
assert res["baz"].dtype == np.object_
|
||||
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", True)
|
||||
assert res["baz"].dtype == np.object_
|
||||
|
||||
res = float_frame.copy()
|
||||
res._set_value("foobar", "baz", 5)
|
||||
assert is_float_dtype(res["baz"])
|
||||
assert isna(res["baz"].drop(["foobar"])).all()
|
||||
|
||||
res._set_value("foobar", "baz", "sam")
|
||||
assert res.loc["foobar", "baz"] == "sam"
|
||||
|
||||
def test_set_value_with_index_dtype_change(self):
|
||||
df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC"))
|
||||
|
||||
# this is actually ambiguous as the 2 is interpreted as a positional
|
||||
# so column is not created
|
||||
df = df_orig.copy()
|
||||
df._set_value("C", 2, 1.0)
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
# assert list(df.columns) == list(df_orig.columns) + [2]
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["C", 2] = 1.0
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
# assert list(df.columns) == list(df_orig.columns) + [2]
|
||||
|
||||
# create both new
|
||||
df = df_orig.copy()
|
||||
df._set_value("C", "D", 1.0)
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
assert list(df.columns) == list(df_orig.columns) + ["D"]
|
||||
|
||||
df = df_orig.copy()
|
||||
df.loc["C", "D"] = 1.0
|
||||
assert list(df.index) == list(df_orig.index) + ["C"]
|
||||
assert list(df.columns) == list(df_orig.columns) + ["D"]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,88 @@
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDataFrameTake:
|
||||
def test_take(self, float_frame):
|
||||
# homogeneous
|
||||
order = [3, 1, 2, 0]
|
||||
for df in [float_frame]:
|
||||
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["D", "B", "C", "A"]]
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
# negative indices
|
||||
order = [2, 1, -1]
|
||||
for df in [float_frame]:
|
||||
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.take(order, axis=0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["C", "B", "D"]]
|
||||
tm.assert_frame_equal(result, expected, check_names=False)
|
||||
|
||||
# illegal indices
|
||||
msg = "indices are out-of-bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, 30], axis=0)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, -31], axis=0)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, 5], axis=1)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
df.take([3, 1, 2, -5], axis=1)
|
||||
|
||||
def test_take_mixed_type(self, float_string_frame):
|
||||
|
||||
# mixed-dtype
|
||||
order = [4, 1, 2, 0, 3]
|
||||
for df in [float_string_frame]:
|
||||
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["foo", "B", "C", "A", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# negative indices
|
||||
order = [4, 1, -2]
|
||||
for df in [float_string_frame]:
|
||||
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["foo", "B", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame):
|
||||
# by dtype
|
||||
order = [1, 2, 0, 3]
|
||||
for df in [mixed_float_frame, mixed_int_frame]:
|
||||
|
||||
result = df.take(order, axis=0)
|
||||
expected = df.reindex(df.index.take(order))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# axis = 1
|
||||
result = df.take(order, axis=1)
|
||||
expected = df.loc[:, ["B", "C", "A", "D"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,929 @@
|
||||
from datetime import datetime
|
||||
|
||||
from hypothesis import given
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import np_version_under1p19
|
||||
|
||||
from pandas.core.dtypes.common import is_scalar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
StringDtype,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas._testing._hypothesis import OPTIONAL_ONE_OF_ALL
|
||||
|
||||
|
||||
@pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"])
|
||||
def where_frame(request, float_string_frame, mixed_float_frame, mixed_int_frame):
|
||||
if request.param == "default":
|
||||
return DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"])
|
||||
if request.param == "float_string":
|
||||
return float_string_frame
|
||||
if request.param == "mixed_float":
|
||||
return mixed_float_frame
|
||||
if request.param == "mixed_int":
|
||||
return mixed_int_frame
|
||||
|
||||
|
||||
def _safe_add(df):
|
||||
# only add to the numeric items
|
||||
def is_ok(s):
|
||||
return (
|
||||
issubclass(s.dtype.type, (np.integer, np.floating)) and s.dtype != "uint8"
|
||||
)
|
||||
|
||||
return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s) for c, s in df.items()))
|
||||
|
||||
|
||||
class TestDataFrameIndexingWhere:
|
||||
def test_where_get(self, where_frame, float_string_frame):
|
||||
def _check_get(df, cond, check_dtypes=True):
|
||||
other1 = _safe_add(df)
|
||||
rs = df.where(cond, other1)
|
||||
rs2 = df.where(cond.values, other1)
|
||||
for k, v in rs.items():
|
||||
exp = Series(np.where(cond[k], df[k], other1[k]), index=v.index)
|
||||
tm.assert_series_equal(v, exp, check_names=False)
|
||||
tm.assert_frame_equal(rs, rs2)
|
||||
|
||||
# dtypes
|
||||
if check_dtypes:
|
||||
assert (rs.dtypes == df.dtypes).all()
|
||||
|
||||
# check getting
|
||||
df = where_frame
|
||||
if df is float_string_frame:
|
||||
msg = "'>' not supported between instances of 'str' and 'int'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df > 0
|
||||
return
|
||||
cond = df > 0
|
||||
_check_get(df, cond)
|
||||
|
||||
def test_where_upcasting(self):
|
||||
# upcasting case (GH # 2794)
|
||||
df = DataFrame(
|
||||
{
|
||||
c: Series([1] * 3, dtype=c)
|
||||
for c in ["float32", "float64", "int32", "int64"]
|
||||
}
|
||||
)
|
||||
df.iloc[1, :] = 0
|
||||
result = df.dtypes
|
||||
expected = Series(
|
||||
[
|
||||
np.dtype("float32"),
|
||||
np.dtype("float64"),
|
||||
np.dtype("int32"),
|
||||
np.dtype("int64"),
|
||||
],
|
||||
index=["float32", "float64", "int32", "int64"],
|
||||
)
|
||||
|
||||
# when we don't preserve boolean casts
|
||||
#
|
||||
# expected = Series({ 'float32' : 1, 'float64' : 3 })
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_alignment(self, where_frame, float_string_frame, mixed_int_frame):
|
||||
# aligning
|
||||
def _check_align(df, cond, other, check_dtypes=True):
|
||||
rs = df.where(cond, other)
|
||||
for i, k in enumerate(rs.columns):
|
||||
result = rs[k]
|
||||
d = df[k].values
|
||||
c = cond[k].reindex(df[k].index).fillna(False).values
|
||||
|
||||
if is_scalar(other):
|
||||
o = other
|
||||
else:
|
||||
if isinstance(other, np.ndarray):
|
||||
o = Series(other[:, i], index=result.index).values
|
||||
else:
|
||||
o = other[k].values
|
||||
|
||||
new_values = d if c.all() else np.where(c, d, o)
|
||||
expected = Series(new_values, index=result.index, name=k)
|
||||
|
||||
# since we can't always have the correct numpy dtype
|
||||
# as numpy doesn't know how to downcast, don't check
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
# dtypes
|
||||
# can't check dtype when other is an ndarray
|
||||
|
||||
if check_dtypes and not isinstance(other, np.ndarray):
|
||||
assert (rs.dtypes == df.dtypes).all()
|
||||
|
||||
df = where_frame
|
||||
if df is float_string_frame:
|
||||
msg = "'>' not supported between instances of 'str' and 'int'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df > 0
|
||||
return
|
||||
|
||||
# other is a frame
|
||||
cond = (df > 0)[1:]
|
||||
_check_align(df, cond, _safe_add(df))
|
||||
|
||||
# check other is ndarray
|
||||
cond = df > 0
|
||||
warn = None
|
||||
if df is mixed_int_frame:
|
||||
warn = FutureWarning
|
||||
with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
|
||||
_check_align(df, cond, (_safe_add(df).values))
|
||||
|
||||
# integers are upcast, so don't check the dtypes
|
||||
cond = df > 0
|
||||
check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes)
|
||||
_check_align(df, cond, np.nan, check_dtypes=check_dtypes)
|
||||
|
||||
def test_where_invalid(self):
|
||||
# invalid conditions
|
||||
df = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"])
|
||||
cond = df > 0
|
||||
|
||||
err1 = (df + 1).values[0:2, :]
|
||||
msg = "other must be the same shape as self when an ndarray"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.where(cond, err1)
|
||||
|
||||
err2 = cond.iloc[:2, :].values
|
||||
other1 = _safe_add(df)
|
||||
msg = "Array conditional must be same shape as self"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.where(err2, other1)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.mask(True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.mask(0)
|
||||
|
||||
def test_where_set(self, where_frame, float_string_frame):
|
||||
# where inplace
|
||||
|
||||
def _check_set(df, cond, check_dtypes=True):
|
||||
dfi = df.copy()
|
||||
econd = cond.reindex_like(df).fillna(True)
|
||||
expected = dfi.mask(~econd)
|
||||
|
||||
return_value = dfi.where(cond, np.nan, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(dfi, expected)
|
||||
|
||||
# dtypes (and confirm upcasts)x
|
||||
if check_dtypes:
|
||||
for k, v in df.dtypes.items():
|
||||
if issubclass(v.type, np.integer) and not cond[k].all():
|
||||
v = np.dtype("float64")
|
||||
assert dfi[k].dtype == v
|
||||
|
||||
df = where_frame
|
||||
if df is float_string_frame:
|
||||
msg = "'>' not supported between instances of 'str' and 'int'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df > 0
|
||||
return
|
||||
|
||||
cond = df > 0
|
||||
_check_set(df, cond)
|
||||
|
||||
cond = df >= 0
|
||||
_check_set(df, cond)
|
||||
|
||||
# aligning
|
||||
cond = (df >= 0)[1:]
|
||||
_check_set(df, cond)
|
||||
|
||||
def test_where_series_slicing(self):
|
||||
# GH 10218
|
||||
# test DataFrame.where with Series slicing
|
||||
df = DataFrame({"a": range(3), "b": range(4, 7)})
|
||||
result = df.where(df["a"] == 1)
|
||||
expected = df[df["a"] == 1].reindex(df.index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [list, tuple, np.array])
|
||||
def test_where_array_like(self, klass):
|
||||
# see gh-15414
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
cond = [[False], [True], [True]]
|
||||
expected = DataFrame({"a": [np.nan, 2, 3]})
|
||||
|
||||
result = df.where(klass(cond))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df["b"] = 2
|
||||
expected["b"] = [2, np.nan, 2]
|
||||
cond = [[False, True], [True, False], [True, True]]
|
||||
|
||||
result = df.where(klass(cond))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cond",
|
||||
[
|
||||
[[1], [0], [1]],
|
||||
Series([[2], [5], [7]]),
|
||||
DataFrame({"a": [2, 5, 7]}),
|
||||
[["True"], ["False"], ["True"]],
|
||||
[[Timestamp("2017-01-01")], [pd.NaT], [Timestamp("2017-01-02")]],
|
||||
],
|
||||
)
|
||||
def test_where_invalid_input_single(self, cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
df = DataFrame({"a": [1, 2, 3]})
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.where(cond)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"cond",
|
||||
[
|
||||
[[0, 1], [1, 0], [1, 1]],
|
||||
Series([[0, 2], [5, 0], [4, 7]]),
|
||||
[["False", "True"], ["True", "False"], ["True", "True"]],
|
||||
DataFrame({"a": [2, 5, 7], "b": [4, 8, 9]}),
|
||||
[
|
||||
[pd.NaT, Timestamp("2017-01-01")],
|
||||
[Timestamp("2017-01-02"), pd.NaT],
|
||||
[Timestamp("2017-01-03"), Timestamp("2017-01-03")],
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_where_invalid_input_multiple(self, cond):
|
||||
# see gh-15414: only boolean arrays accepted
|
||||
df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]})
|
||||
msg = "Boolean array expected for the condition"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.where(cond)
|
||||
|
||||
def test_where_dataframe_col_match(self):
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]])
|
||||
cond = DataFrame([[True, False, True], [False, False, True]])
|
||||
|
||||
result = df.where(cond)
|
||||
expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# this *does* align, though has no matching columns
|
||||
cond.columns = ["a", "b", "c"]
|
||||
result = df.where(cond)
|
||||
expected = DataFrame(np.nan, index=df.index, columns=df.columns)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_ndframe_align(self):
|
||||
msg = "Array conditional must be same shape as self"
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]])
|
||||
|
||||
cond = [True]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.where(cond)
|
||||
|
||||
expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]])
|
||||
|
||||
out = df.where(Series(cond))
|
||||
tm.assert_frame_equal(out, expected)
|
||||
|
||||
cond = np.array([False, True, False, True])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.where(cond)
|
||||
|
||||
expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]])
|
||||
|
||||
out = df.where(Series(cond))
|
||||
tm.assert_frame_equal(out, expected)
|
||||
|
||||
def test_where_bug(self):
|
||||
# see gh-2793
|
||||
df = DataFrame(
|
||||
{"a": [1.0, 2.0, 3.0, 4.0], "b": [4.0, 3.0, 2.0, 1.0]}, dtype="float64"
|
||||
)
|
||||
expected = DataFrame(
|
||||
{"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]},
|
||||
dtype="float64",
|
||||
)
|
||||
result = df.where(df > 2, np.nan)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
return_value = result.where(result > 2, np.nan, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_bug_mixed(self, any_signed_int_numpy_dtype):
|
||||
# see gh-2793
|
||||
df = DataFrame(
|
||||
{
|
||||
"a": np.array([1, 2, 3, 4], dtype=any_signed_int_numpy_dtype),
|
||||
"b": np.array([4.0, 3.0, 2.0, 1.0], dtype="float64"),
|
||||
}
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
{"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]},
|
||||
dtype="float64",
|
||||
)
|
||||
|
||||
result = df.where(df > 2, np.nan)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
return_value = result.where(result > 2, np.nan, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_bug_transposition(self):
|
||||
# see gh-7506
|
||||
a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]})
|
||||
b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]})
|
||||
do_not_replace = b.isna() | (a > b)
|
||||
|
||||
expected = a.copy()
|
||||
expected[~do_not_replace] = b
|
||||
|
||||
result = a.where(do_not_replace, b)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
a = DataFrame({0: [4, 6], 1: [1, 0]})
|
||||
b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]})
|
||||
do_not_replace = b.isna() | (a > b)
|
||||
|
||||
expected = a.copy()
|
||||
expected[~do_not_replace] = b
|
||||
|
||||
result = a.where(do_not_replace, b)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_datetime(self):
|
||||
|
||||
# GH 3311
|
||||
df = DataFrame(
|
||||
{
|
||||
"A": date_range("20130102", periods=5),
|
||||
"B": date_range("20130104", periods=5),
|
||||
"C": np.random.randn(5),
|
||||
}
|
||||
)
|
||||
|
||||
stamp = datetime(2013, 1, 3)
|
||||
msg = "'>' not supported between instances of 'float' and 'datetime.datetime'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df > stamp
|
||||
|
||||
result = df[df.iloc[:, :-1] > stamp]
|
||||
|
||||
expected = df.copy()
|
||||
expected.loc[[0, 1], "A"] = np.nan
|
||||
expected.loc[:, "C"] = np.nan
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_none(self):
|
||||
# GH 4667
|
||||
# setting with None changes dtype
|
||||
df = DataFrame({"series": Series(range(10))}).astype(float)
|
||||
df[df > 7] = None
|
||||
expected = DataFrame(
|
||||
{"series": Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])}
|
||||
)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# GH 7656
|
||||
df = DataFrame(
|
||||
[
|
||||
{"A": 1, "B": np.nan, "C": "Test"},
|
||||
{"A": np.nan, "B": "Test", "C": np.nan},
|
||||
]
|
||||
)
|
||||
msg = "boolean setting on mixed-type"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.where(~isna(df), None, inplace=True)
|
||||
|
||||
def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self):
|
||||
# see gh-21947
|
||||
df = DataFrame(columns=["a"])
|
||||
cond = df
|
||||
assert (cond.dtypes == object).all()
|
||||
|
||||
result = df.where(cond)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
def test_where_align(self):
|
||||
def create():
|
||||
df = DataFrame(np.random.randn(10, 3))
|
||||
df.iloc[3:5, 0] = np.nan
|
||||
df.iloc[4:6, 1] = np.nan
|
||||
df.iloc[5:8, 2] = np.nan
|
||||
return df
|
||||
|
||||
# series
|
||||
df = create()
|
||||
expected = df.fillna(df.mean())
|
||||
result = df.where(pd.notna(df), df.mean(), axis="columns")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
return_value = df.where(pd.notna(df), df.mean(), inplace=True, axis="columns")
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = create().fillna(0)
|
||||
expected = df.apply(lambda x, y: x.where(x > 0, y), y=df[0])
|
||||
result = df.where(df > 0, df[0], axis="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.where(df > 0, df[0], axis="rows")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# frame
|
||||
df = create()
|
||||
expected = df.fillna(1)
|
||||
result = df.where(
|
||||
pd.notna(df), DataFrame(1, index=df.index, columns=df.columns)
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_complex(self):
|
||||
# GH 6345
|
||||
expected = DataFrame([[1 + 1j, 2], [np.nan, 4 + 1j]], columns=["a", "b"])
|
||||
df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=["a", "b"])
|
||||
df[df.abs() >= 5] = np.nan
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
def test_where_axis(self, using_array_manager):
|
||||
# GH 9736
|
||||
df = DataFrame(np.random.randn(2, 2))
|
||||
mask = DataFrame([[False, False], [False, False]])
|
||||
s = Series([0, 1])
|
||||
|
||||
expected = DataFrame([[0, 0], [1, 1]], dtype="float64")
|
||||
result = df.where(mask, s, axis="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, s, axis="index", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame([[0, 1], [0, 1]], dtype="float64")
|
||||
result = df.where(mask, s, axis="columns")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, s, axis="columns", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Upcast needed
|
||||
df = DataFrame([[1, 2], [3, 4]], dtype="int64")
|
||||
mask = DataFrame([[False, False], [False, False]])
|
||||
s = Series([0, np.nan])
|
||||
|
||||
expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype="float64")
|
||||
result = df.where(mask, s, axis="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, s, axis="index", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
warn = FutureWarning if using_array_manager else None
|
||||
expected = DataFrame([[0, np.nan], [0, np.nan]])
|
||||
with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"):
|
||||
result = df.where(mask, s, axis="columns")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: np.array([0, 0], dtype="int64"),
|
||||
1: np.array([np.nan, np.nan], dtype="float64"),
|
||||
}
|
||||
)
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, s, axis="columns", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_axis_multiple_dtypes(self):
|
||||
# Multiple dtypes (=> multiple Blocks)
|
||||
df = pd.concat(
|
||||
[
|
||||
DataFrame(np.random.randn(10, 2)),
|
||||
DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype="int64"),
|
||||
],
|
||||
ignore_index=True,
|
||||
axis=1,
|
||||
)
|
||||
mask = DataFrame(False, columns=df.columns, index=df.index)
|
||||
s1 = Series(1, index=df.columns)
|
||||
s2 = Series(2, index=df.index)
|
||||
|
||||
result = df.where(mask, s1, axis="columns")
|
||||
expected = DataFrame(1.0, columns=df.columns, index=df.index)
|
||||
expected[2] = expected[2].astype("int64")
|
||||
expected[3] = expected[3].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, s1, axis="columns", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.where(mask, s2, axis="index")
|
||||
expected = DataFrame(2.0, columns=df.columns, index=df.index)
|
||||
expected[2] = expected[2].astype("int64")
|
||||
expected[3] = expected[3].astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, s2, axis="index", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# DataFrame vs DataFrame
|
||||
d1 = df.copy().drop(1, axis=0)
|
||||
expected = df.copy()
|
||||
expected.loc[1, :] = np.nan
|
||||
|
||||
result = df.where(mask, d1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.where(mask, d1, axis="index")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, d1, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, d1, inplace=True, axis="index")
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
d2 = df.copy().drop(1, axis=1)
|
||||
expected = df.copy()
|
||||
expected.loc[:, 1] = np.nan
|
||||
|
||||
result = df.where(mask, d2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.where(mask, d2, axis="columns")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, d2, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
result = df.copy()
|
||||
return_value = result.where(mask, d2, inplace=True, axis="columns")
|
||||
assert return_value is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_callable(self):
|
||||
# GH 12533
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
result = df.where(lambda x: x > 4, lambda x: x + 1)
|
||||
exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, df.where(df > 4, df + 1))
|
||||
|
||||
# return ndarray and scalar
|
||||
result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99)
|
||||
exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, df.where(df % 2 == 0, 99))
|
||||
|
||||
# chain
|
||||
result = (df + 2).where(lambda x: x > 8, lambda x: x + 10)
|
||||
exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]])
|
||||
tm.assert_frame_equal(result, exp)
|
||||
tm.assert_frame_equal(result, (df + 2).where((df + 2) > 8, (df + 2) + 10))
|
||||
|
||||
def test_where_tz_values(self, tz_naive_fixture, frame_or_series):
|
||||
obj1 = DataFrame(
|
||||
DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
|
||||
columns=["date"],
|
||||
)
|
||||
obj2 = DataFrame(
|
||||
DatetimeIndex(["20150103", "20150104", "20150105"], tz=tz_naive_fixture),
|
||||
columns=["date"],
|
||||
)
|
||||
mask = DataFrame([True, True, False], columns=["date"])
|
||||
exp = DataFrame(
|
||||
DatetimeIndex(["20150101", "20150102", "20150105"], tz=tz_naive_fixture),
|
||||
columns=["date"],
|
||||
)
|
||||
if frame_or_series is Series:
|
||||
obj1 = obj1["date"]
|
||||
obj2 = obj2["date"]
|
||||
mask = mask["date"]
|
||||
exp = exp["date"]
|
||||
|
||||
result = obj1.where(mask, obj2)
|
||||
tm.assert_equal(exp, result)
|
||||
|
||||
def test_df_where_change_dtype(self):
|
||||
# GH#16979
|
||||
df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC"))
|
||||
mask = np.array([[True, False, False], [False, False, True]])
|
||||
|
||||
result = df.where(mask)
|
||||
expected = DataFrame(
|
||||
[[0, np.nan, np.nan], [np.nan, np.nan, 5]], columns=list("ABC")
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("kwargs", [{}, {"other": None}])
|
||||
def test_df_where_with_category(self, kwargs):
|
||||
# GH#16979
|
||||
df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC"))
|
||||
mask = np.array([[True, False, False], [False, False, True]])
|
||||
|
||||
# change type to category
|
||||
df.A = df.A.astype("category")
|
||||
df.B = df.B.astype("category")
|
||||
df.C = df.C.astype("category")
|
||||
|
||||
result = df.where(mask, **kwargs)
|
||||
A = pd.Categorical([0, np.nan], categories=[0, 3])
|
||||
B = pd.Categorical([np.nan, np.nan], categories=[1, 4])
|
||||
C = pd.Categorical([np.nan, 5], categories=[2, 5])
|
||||
expected = DataFrame({"A": A, "B": B, "C": C})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# Check Series.where while we're here
|
||||
result = df.A.where(mask[:, 0], **kwargs)
|
||||
expected = Series(A, name="A")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_where_categorical_filtering(self):
|
||||
# GH#22609 Verify filtering operations on DataFrames with categorical Series
|
||||
df = DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"])
|
||||
df["b"] = df["b"].astype("category")
|
||||
|
||||
result = df.where(df["a"] > 0)
|
||||
expected = df.copy()
|
||||
expected.loc[0, :] = np.nan
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_where_ea_other(self):
|
||||
# GH#38729/GH#38742
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
||||
arr = pd.array([7, pd.NA, 9])
|
||||
ser = Series(arr)
|
||||
mask = np.ones(df.shape, dtype=bool)
|
||||
mask[1, :] = False
|
||||
|
||||
# TODO: ideally we would get Int64 instead of object
|
||||
result = df.where(mask, ser, axis=0)
|
||||
expected = DataFrame({"A": [1, pd.NA, 3], "B": [4, pd.NA, 6]}).astype(object)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
ser2 = Series(arr[:2], index=["A", "B"])
|
||||
expected = DataFrame({"A": [1, 7, 3], "B": [4, pd.NA, 6]})
|
||||
expected["B"] = expected["B"].astype(object)
|
||||
result = df.where(mask, ser2, axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_where_interval_noop(self):
|
||||
# GH#44181
|
||||
df = DataFrame([pd.Interval(0, 0)])
|
||||
res = df.where(df.notna())
|
||||
tm.assert_frame_equal(res, df)
|
||||
|
||||
ser = df[0]
|
||||
res = ser.where(ser.notna())
|
||||
tm.assert_series_equal(res, ser)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
"timedelta64[ns]",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, Asia/Tokyo]",
|
||||
"Period[D]",
|
||||
],
|
||||
)
|
||||
def test_where_datetimelike_noop(self, dtype):
|
||||
# GH#45135, analogue to GH#44181 for Period don't raise on no-op
|
||||
# For td64/dt64/dt64tz we already don't raise, but also are
|
||||
# checking that we don't unnecessarily upcast to object.
|
||||
ser = Series(np.arange(3) * 10**9, dtype=np.int64).view(dtype)
|
||||
df = ser.to_frame()
|
||||
mask = np.array([False, False, False])
|
||||
|
||||
res = ser.where(~mask, "foo")
|
||||
tm.assert_series_equal(res, ser)
|
||||
|
||||
mask2 = mask.reshape(-1, 1)
|
||||
res2 = df.where(~mask2, "foo")
|
||||
tm.assert_frame_equal(res2, df)
|
||||
|
||||
res3 = ser.mask(mask, "foo")
|
||||
tm.assert_series_equal(res3, ser)
|
||||
|
||||
res4 = df.mask(mask2, "foo")
|
||||
tm.assert_frame_equal(res4, df)
|
||||
|
||||
|
||||
def test_where_try_cast_deprecated(frame_or_series):
|
||||
obj = DataFrame(np.random.randn(4, 3))
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
mask = obj > 0
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# try_cast keyword deprecated
|
||||
obj.where(mask, -1, try_cast=False)
|
||||
|
||||
|
||||
def test_where_int_downcasting_deprecated(using_array_manager):
|
||||
# GH#44597
|
||||
arr = np.arange(6).astype(np.int16).reshape(3, 2)
|
||||
df = DataFrame(arr)
|
||||
|
||||
mask = np.zeros(arr.shape, dtype=bool)
|
||||
mask[:, 0] = True
|
||||
|
||||
msg = "Downcasting integer-dtype"
|
||||
warn = FutureWarning if not using_array_manager else None
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
res = df.where(mask, 2**17)
|
||||
|
||||
expected = DataFrame({0: arr[:, 0], 1: np.array([2**17] * 3, dtype=np.int32)})
|
||||
tm.assert_frame_equal(res, expected)
|
||||
|
||||
|
||||
def test_where_copies_with_noop(frame_or_series):
|
||||
# GH-39595
|
||||
result = frame_or_series([1, 2, 3, 4])
|
||||
expected = result.copy()
|
||||
col = result[0] if frame_or_series is DataFrame else result
|
||||
|
||||
where_res = result.where(col < 5)
|
||||
where_res *= 2
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
where_res = result.where(col > 5, [1, 2, 3, 4])
|
||||
where_res *= 2
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_string_dtype(frame_or_series):
|
||||
# GH40824
|
||||
obj = frame_or_series(
|
||||
["a", "b", "c", "d"], index=["id1", "id2", "id3", "id4"], dtype=StringDtype()
|
||||
)
|
||||
filtered_obj = frame_or_series(
|
||||
["b", "c"], index=["id2", "id3"], dtype=StringDtype()
|
||||
)
|
||||
filter_ser = Series([False, True, True, False])
|
||||
|
||||
result = obj.where(filter_ser, filtered_obj)
|
||||
expected = frame_or_series(
|
||||
[pd.NA, "b", "c", pd.NA],
|
||||
index=["id1", "id2", "id3", "id4"],
|
||||
dtype=StringDtype(),
|
||||
)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_bool_comparison():
|
||||
# GH 10336
|
||||
df_mask = DataFrame(
|
||||
{"AAA": [True] * 4, "BBB": [False] * 4, "CCC": [True, False, True, False]}
|
||||
)
|
||||
result = df_mask.where(df_mask == False) # noqa:E712
|
||||
expected = DataFrame(
|
||||
{
|
||||
"AAA": np.array([np.nan] * 4, dtype=object),
|
||||
"BBB": [False] * 4,
|
||||
"CCC": [np.nan, False, np.nan, False],
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_none_nan_coerce():
|
||||
# GH 15613
|
||||
expected = DataFrame(
|
||||
{
|
||||
"A": [Timestamp("20130101"), pd.NaT, Timestamp("20130103")],
|
||||
"B": [1, 2, np.nan],
|
||||
}
|
||||
)
|
||||
result = expected.where(expected.notnull(), None)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_where_non_keyword_deprecation(frame_or_series):
|
||||
# GH 41485
|
||||
obj = frame_or_series(range(5))
|
||||
msg = (
|
||||
"In a future version of pandas all arguments of "
|
||||
f"{frame_or_series.__name__}.where except for the arguments 'cond' "
|
||||
"and 'other' will be keyword-only"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = obj.where(obj > 1, 10, False)
|
||||
expected = frame_or_series([10, 10, 2, 3, 4])
|
||||
tm.assert_equal(expected, result)
|
||||
|
||||
|
||||
def test_where_columns_casting():
|
||||
# GH 42295
|
||||
|
||||
df = DataFrame({"a": [1.0, 2.0], "b": [3, np.nan]})
|
||||
expected = df.copy()
|
||||
result = df.where(pd.notnull(df), None)
|
||||
# make sure dtypes don't change
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("as_cat", [True, False])
|
||||
def test_where_period_invalid_na(frame_or_series, as_cat, request):
|
||||
# GH#44697
|
||||
idx = pd.period_range("2016-01-01", periods=3, freq="D")
|
||||
if as_cat:
|
||||
idx = idx.astype("category")
|
||||
obj = frame_or_series(idx)
|
||||
|
||||
# NA value that we should *not* cast to Period dtype
|
||||
tdnat = pd.NaT.to_numpy("m8[ns]")
|
||||
|
||||
mask = np.array([True, True, False], ndmin=obj.ndim).T
|
||||
|
||||
if as_cat:
|
||||
msg = (
|
||||
r"Cannot setitem on a Categorical with a new category \(NaT\), "
|
||||
"set the categories first"
|
||||
)
|
||||
if np_version_under1p19:
|
||||
mark = pytest.mark.xfail(
|
||||
reason="When evaluating the f-string to generate the exception "
|
||||
"message, numpy somehow ends up trying to cast None to int, so "
|
||||
"ends up raising TypeError but with an unrelated message."
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
else:
|
||||
msg = "value should be a 'Period'"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.where(mask, tdnat)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.mask(mask, tdnat)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.mask(mask, tdnat, inplace=True)
|
||||
|
||||
|
||||
def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):
|
||||
# GH#44697
|
||||
arr = pd.array([1, 2, 3], dtype=any_numeric_ea_dtype)
|
||||
obj = frame_or_series(arr)
|
||||
|
||||
mask = np.array([True, True, False], ndmin=obj.ndim).T
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
r"datetime64\[.{1,2}\] cannot be converted to an? (Integer|Floating)Dtype",
|
||||
r"timedelta64\[.{1,2}\] cannot be converted to an? (Integer|Floating)Dtype",
|
||||
r"int\(\) argument must be a string, a bytes-like object or a number, "
|
||||
"not 'NaTType'",
|
||||
"object cannot be converted to a FloatingDtype",
|
||||
"'values' contains non-numeric NA",
|
||||
]
|
||||
)
|
||||
|
||||
for null in tm.NP_NAT_OBJECTS + [pd.NaT]:
|
||||
# NaT is an NA value that we should *not* cast to pd.NA dtype
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.where(mask, null)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.mask(mask, null)
|
||||
|
||||
|
||||
@given(data=OPTIONAL_ONE_OF_ALL)
|
||||
def test_where_inplace_casting(data):
|
||||
# GH 22051
|
||||
df = DataFrame({"a": data})
|
||||
df_copy = df.where(pd.notnull(df), None).copy()
|
||||
df.where(pd.notnull(df), None, inplace=True)
|
||||
tm.assert_equal(df, df_copy)
|
||||
@@ -0,0 +1,392 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
IndexSlice,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
import pandas.core.common as com
|
||||
|
||||
from pandas.tseries.offsets import BDay
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def four_level_index_dataframe():
|
||||
arr = np.array(
|
||||
[
|
||||
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
[-0.6662, -0.5243, -0.358, 0.89145, 2.5838],
|
||||
]
|
||||
)
|
||||
index = MultiIndex(
|
||||
levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]],
|
||||
codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]],
|
||||
names=["one", "two", "three", "four"],
|
||||
)
|
||||
return DataFrame(arr, index=index, columns=list("ABCDE"))
|
||||
|
||||
|
||||
class TestXS:
|
||||
def test_xs(self, float_frame, datetime_frame):
|
||||
idx = float_frame.index[5]
|
||||
xs = float_frame.xs(idx)
|
||||
for item, value in xs.items():
|
||||
if np.isnan(value):
|
||||
assert np.isnan(float_frame[item][idx])
|
||||
else:
|
||||
assert value == float_frame[item][idx]
|
||||
|
||||
# mixed-type xs
|
||||
test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}}
|
||||
frame = DataFrame(test_data)
|
||||
xs = frame.xs("1")
|
||||
assert xs.dtype == np.object_
|
||||
assert xs["A"] == 1
|
||||
assert xs["B"] == "1"
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')")
|
||||
):
|
||||
datetime_frame.xs(datetime_frame.index[0] - BDay())
|
||||
|
||||
# xs get column
|
||||
series = float_frame.xs("A", axis=1)
|
||||
expected = float_frame["A"]
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
# view is returned if possible
|
||||
series = float_frame.xs("A", axis=1)
|
||||
series[:] = 5
|
||||
assert (expected == 5).all()
|
||||
|
||||
def test_xs_corner(self):
|
||||
# pathological mixed-type reordering case
|
||||
df = DataFrame(index=[0])
|
||||
df["A"] = 1.0
|
||||
df["B"] = "foo"
|
||||
df["C"] = 2.0
|
||||
df["D"] = "bar"
|
||||
df["E"] = 3.0
|
||||
|
||||
xs = df.xs(0)
|
||||
exp = Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0)
|
||||
tm.assert_series_equal(xs, exp)
|
||||
|
||||
# no columns but Index(dtype=object)
|
||||
df = DataFrame(index=["a", "b", "c"])
|
||||
result = df.xs("a")
|
||||
expected = Series([], name="a", index=Index([]), dtype=np.float64)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_xs_duplicates(self):
|
||||
df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"])
|
||||
|
||||
cross = df.xs("c")
|
||||
exp = df.iloc[2]
|
||||
tm.assert_series_equal(cross, exp)
|
||||
|
||||
def test_xs_keep_level(self):
|
||||
df = DataFrame(
|
||||
{
|
||||
"day": {0: "sat", 1: "sun"},
|
||||
"flavour": {0: "strawberry", 1: "strawberry"},
|
||||
"sales": {0: 10, 1: 12},
|
||||
"year": {0: 2008, 1: 2008},
|
||||
}
|
||||
).set_index(["year", "flavour", "day"])
|
||||
result = df.xs("sat", level="day", drop_level=False)
|
||||
expected = df[:1]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_view(self, using_array_manager):
|
||||
# in 0.14 this will return a view if possible a copy otherwise, but
|
||||
# this is numpy dependent
|
||||
|
||||
dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))
|
||||
|
||||
if using_array_manager:
|
||||
# INFO(ArrayManager) with ArrayManager getting a row as a view is
|
||||
# not possible
|
||||
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
dm.xs(2)[:] = 20
|
||||
assert not (dm.xs(2) == 20).any()
|
||||
else:
|
||||
dm.xs(2)[:] = 20
|
||||
assert (dm.xs(2) == 20).all()
|
||||
|
||||
|
||||
class TestXSWithMultiIndex:
|
||||
def test_xs_doc_example(self):
|
||||
# TODO: more descriptive name
|
||||
# based on example in advanced.rst
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = list(zip(*arrays))
|
||||
|
||||
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
|
||||
|
||||
result = df.xs(("one", "bar"), level=("second", "first"), axis=1)
|
||||
|
||||
expected = df.iloc[:, [0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_integer_key(self):
|
||||
# see GH#2107
|
||||
dates = range(20111201, 20111205)
|
||||
ids = list("abcde")
|
||||
index = MultiIndex.from_product([dates, ids], names=["date", "secid"])
|
||||
df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"])
|
||||
|
||||
result = df.xs(20111201, level="date")
|
||||
expected = df.loc[20111201, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_level(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs("two", level="second")
|
||||
expected = df[df.index.get_level_values(1) == "two"]
|
||||
expected.index = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_level_eq_2(self):
|
||||
arr = np.random.randn(3, 5)
|
||||
index = MultiIndex(
|
||||
levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]],
|
||||
codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]],
|
||||
)
|
||||
df = DataFrame(arr, index=index)
|
||||
expected = DataFrame(arr[1:2], index=[["a"], ["b"]])
|
||||
result = df.xs("c", level=2)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_setting_with_copy_error(self, multiindex_dataframe_random_data):
|
||||
# this is a copy in 0.14
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs("two", level="second")
|
||||
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
|
||||
def test_xs_setting_with_copy_error_multiple(self, four_level_index_dataframe):
|
||||
# this is a copy in 0.14
|
||||
df = four_level_index_dataframe
|
||||
result = df.xs(("a", 4), level=["one", "four"])
|
||||
|
||||
# setting this will give a SettingWithCopyError
|
||||
# as we are trying to write a view
|
||||
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||||
with pytest.raises(com.SettingWithCopyError, match=msg):
|
||||
result[:] = 10
|
||||
|
||||
@pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])])
|
||||
def test_xs_with_duplicates(self, key, level, multiindex_dataframe_random_data):
|
||||
# see GH#13719
|
||||
frame = multiindex_dataframe_random_data
|
||||
df = concat([frame] * 2)
|
||||
assert df.index.is_unique is False
|
||||
expected = concat([frame.xs("one", level="second")] * 2)
|
||||
|
||||
if isinstance(key, list):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = df.xs(key, level=level)
|
||||
else:
|
||||
result = df.xs(key, level=level)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_missing_values_in_index(self):
|
||||
# see GH#6574
|
||||
# missing values in returned index should be preserved
|
||||
acc = [
|
||||
("a", "abcde", 1),
|
||||
("b", "bbcde", 2),
|
||||
("y", "yzcde", 25),
|
||||
("z", "xbcde", 24),
|
||||
("z", None, 26),
|
||||
("z", "zbcde", 25),
|
||||
("z", "ybcde", 26),
|
||||
]
|
||||
df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"])
|
||||
expected = DataFrame(
|
||||
{"cnt": [24, 26, 25, 26]},
|
||||
index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"),
|
||||
)
|
||||
|
||||
result = df.xs("z", level="a1")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"key, level, exp_arr, exp_index",
|
||||
[
|
||||
("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")),
|
||||
("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")),
|
||||
],
|
||||
)
|
||||
def test_xs_named_levels_axis_eq_1(self, key, level, exp_arr, exp_index):
|
||||
# see GH#2903
|
||||
arr = np.random.randn(4, 4)
|
||||
index = MultiIndex(
|
||||
levels=[["a", "b"], ["bar", "foo", "hello", "world"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 2, 3]],
|
||||
names=["lvl0", "lvl1"],
|
||||
)
|
||||
df = DataFrame(arr, columns=index)
|
||||
result = df.xs(key, level=level, axis=1)
|
||||
expected = DataFrame(exp_arr(arr), columns=exp_index)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer",
|
||||
[
|
||||
lambda df: df.xs(("a", 4), level=["one", "four"]),
|
||||
lambda df: df.xs("a").xs(4, level="four"),
|
||||
],
|
||||
)
|
||||
def test_xs_level_multiple(self, indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]]
|
||||
expected_index = MultiIndex(
|
||||
levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"]
|
||||
)
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list("ABCDE")
|
||||
)
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")]
|
||||
)
|
||||
def test_xs_level0(self, indexer, four_level_index_dataframe):
|
||||
df = four_level_index_dataframe
|
||||
expected_values = [
|
||||
[-0.5109, -2.3358, -0.4645, 0.05076, 0.364],
|
||||
[0.4473, 1.4152, 0.2834, 1.00661, 0.1744],
|
||||
]
|
||||
expected_index = MultiIndex(
|
||||
levels=[["b", "q"], [10.0032, 20.0], [4, 5]],
|
||||
codes=[[0, 1], [0, 1], [1, 0]],
|
||||
names=["two", "three", "four"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
expected_values, index=expected_index, columns=list("ABCDE")
|
||||
)
|
||||
|
||||
result = indexer(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_values(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(("bar", "two")).values
|
||||
expected = df.values[4]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_xs_loc_equality(self, multiindex_dataframe_random_data):
|
||||
df = multiindex_dataframe_random_data
|
||||
result = df.xs(("bar", "two"))
|
||||
expected = df.loc[("bar", "two")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [DataFrame, Series])
|
||||
def test_xs_IndexSlice_argument_not_implemented(self, klass):
|
||||
# GH#35301
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
)
|
||||
|
||||
obj = DataFrame(np.random.randn(6, 4), index=index)
|
||||
if klass is Series:
|
||||
obj = obj[0]
|
||||
|
||||
expected = obj.iloc[-2:].droplevel(0)
|
||||
|
||||
result = obj.xs(IndexSlice[("foo", "qux", 0), :])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = obj.loc[IndexSlice[("foo", "qux", 0), :]]
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [DataFrame, Series])
|
||||
def test_xs_levels_raises(self, klass):
|
||||
obj = DataFrame({"A": [1, 2, 3]})
|
||||
if klass is Series:
|
||||
obj = obj["A"]
|
||||
|
||||
msg = "Index must be a MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
obj.xs(0, level="as")
|
||||
|
||||
def test_xs_multiindex_droplevel_false(self):
|
||||
# GH#19056
|
||||
mi = MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y"), ("b", "x")], names=["level1", "level2"]
|
||||
)
|
||||
df = DataFrame([[1, 2, 3]], columns=mi)
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
expected = DataFrame(
|
||||
[[1, 2]],
|
||||
columns=MultiIndex.from_tuples(
|
||||
[("a", "x"), ("a", "y")], names=["level1", "level2"]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_droplevel_false(self):
|
||||
# GH#19056
|
||||
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
expected = DataFrame({"a": [1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_droplevel_false_view(self, using_array_manager):
|
||||
# GH#37832
|
||||
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
# check that result still views the same data as df
|
||||
assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)
|
||||
# modifying original df also modifies result when having a single block
|
||||
df.iloc[0, 0] = 2
|
||||
expected = DataFrame({"a": [2]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# with mixed dataframe, modifying the parent doesn't modify result
|
||||
# TODO the "split" path behaves differently here as with single block
|
||||
df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
|
||||
result = df.xs("a", axis=1, drop_level=False)
|
||||
df.iloc[0, 0] = 2
|
||||
if using_array_manager:
|
||||
# Here the behavior is consistent
|
||||
expected = DataFrame({"a": [2]})
|
||||
else:
|
||||
# FIXME: iloc does not update the array inplace using
|
||||
# "split" path
|
||||
expected = DataFrame({"a": [1]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_xs_list_indexer_droplevel_false(self):
|
||||
# GH#41760
|
||||
mi = MultiIndex.from_tuples([("x", "m", "a"), ("x", "n", "b"), ("y", "o", "c")])
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
with pytest.raises(KeyError, match="y"):
|
||||
df.xs(["x", "y"], drop_level=False, axis=1)
|
||||
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Test files dedicated to individual (stand-alone) DataFrame methods
|
||||
|
||||
Ideally these files/tests should correspond 1-to-1 with tests.series.methods
|
||||
|
||||
These may also present opportunities for sharing/de-duplicating test code.
|
||||
"""
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user