first commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,328 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import api
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class Base:
|
||||
def check(self, namespace, expected, ignored=None):
|
||||
# see which names are in the namespace, minus optional
|
||||
# ignored ones
|
||||
# compare vs the expected
|
||||
|
||||
result = sorted(f for f in dir(namespace) if not f.startswith("__"))
|
||||
if ignored is not None:
|
||||
result = sorted(set(result) - set(ignored))
|
||||
|
||||
expected = sorted(expected)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
class TestPDApi(Base):
|
||||
# these are optionally imported based on testing
|
||||
# & need to be ignored
|
||||
ignored = ["tests", "locale", "conftest"]
|
||||
|
||||
# top-level sub-packages
|
||||
public_lib = [
|
||||
"api",
|
||||
"arrays",
|
||||
"options",
|
||||
"test",
|
||||
"testing",
|
||||
"errors",
|
||||
"plotting",
|
||||
"io",
|
||||
"tseries",
|
||||
]
|
||||
private_lib = ["compat", "core", "pandas", "util"]
|
||||
|
||||
# these are already deprecated; awaiting removal
|
||||
deprecated_modules: list[str] = ["np", "datetime"]
|
||||
|
||||
# misc
|
||||
misc = ["IndexSlice", "NaT", "NA"]
|
||||
|
||||
# top-level classes
|
||||
classes = [
|
||||
"Categorical",
|
||||
"CategoricalIndex",
|
||||
"DataFrame",
|
||||
"DateOffset",
|
||||
"DatetimeIndex",
|
||||
"ExcelFile",
|
||||
"ExcelWriter",
|
||||
"Float64Index",
|
||||
"Flags",
|
||||
"Grouper",
|
||||
"HDFStore",
|
||||
"Index",
|
||||
"Int64Index",
|
||||
"MultiIndex",
|
||||
"Period",
|
||||
"PeriodIndex",
|
||||
"RangeIndex",
|
||||
"UInt64Index",
|
||||
"Series",
|
||||
"SparseDtype",
|
||||
"StringDtype",
|
||||
"Timedelta",
|
||||
"TimedeltaIndex",
|
||||
"Timestamp",
|
||||
"Interval",
|
||||
"IntervalIndex",
|
||||
"CategoricalDtype",
|
||||
"PeriodDtype",
|
||||
"IntervalDtype",
|
||||
"DatetimeTZDtype",
|
||||
"BooleanDtype",
|
||||
"Int8Dtype",
|
||||
"Int16Dtype",
|
||||
"Int32Dtype",
|
||||
"Int64Dtype",
|
||||
"UInt8Dtype",
|
||||
"UInt16Dtype",
|
||||
"UInt32Dtype",
|
||||
"UInt64Dtype",
|
||||
"Float32Dtype",
|
||||
"Float64Dtype",
|
||||
"NamedAgg",
|
||||
]
|
||||
|
||||
# these are already deprecated; awaiting removal
|
||||
deprecated_classes: list[str] = ["Float64Index", "Int64Index", "UInt64Index"]
|
||||
|
||||
# these should be deprecated in the future
|
||||
deprecated_classes_in_future: list[str] = ["SparseArray"]
|
||||
|
||||
# external modules exposed in pandas namespace
|
||||
modules: list[str] = []
|
||||
|
||||
# top-level functions
|
||||
funcs = [
|
||||
"array",
|
||||
"bdate_range",
|
||||
"concat",
|
||||
"crosstab",
|
||||
"cut",
|
||||
"date_range",
|
||||
"interval_range",
|
||||
"eval",
|
||||
"factorize",
|
||||
"get_dummies",
|
||||
"infer_freq",
|
||||
"isna",
|
||||
"isnull",
|
||||
"lreshape",
|
||||
"melt",
|
||||
"notna",
|
||||
"notnull",
|
||||
"offsets",
|
||||
"merge",
|
||||
"merge_ordered",
|
||||
"merge_asof",
|
||||
"period_range",
|
||||
"pivot",
|
||||
"pivot_table",
|
||||
"qcut",
|
||||
"show_versions",
|
||||
"timedelta_range",
|
||||
"unique",
|
||||
"value_counts",
|
||||
"wide_to_long",
|
||||
]
|
||||
|
||||
# top-level option funcs
|
||||
funcs_option = [
|
||||
"reset_option",
|
||||
"describe_option",
|
||||
"get_option",
|
||||
"option_context",
|
||||
"set_option",
|
||||
"set_eng_float_format",
|
||||
]
|
||||
|
||||
# top-level read_* funcs
|
||||
funcs_read = [
|
||||
"read_clipboard",
|
||||
"read_csv",
|
||||
"read_excel",
|
||||
"read_fwf",
|
||||
"read_gbq",
|
||||
"read_hdf",
|
||||
"read_html",
|
||||
"read_xml",
|
||||
"read_json",
|
||||
"read_pickle",
|
||||
"read_sas",
|
||||
"read_sql",
|
||||
"read_sql_query",
|
||||
"read_sql_table",
|
||||
"read_stata",
|
||||
"read_table",
|
||||
"read_feather",
|
||||
"read_parquet",
|
||||
"read_orc",
|
||||
"read_spss",
|
||||
]
|
||||
|
||||
# top-level json funcs
|
||||
funcs_json = ["json_normalize"]
|
||||
|
||||
# top-level to_* funcs
|
||||
funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"]
|
||||
|
||||
# top-level to deprecate in the future
|
||||
deprecated_funcs_in_future: list[str] = []
|
||||
|
||||
# these are already deprecated; awaiting removal
|
||||
deprecated_funcs: list[str] = []
|
||||
|
||||
# private modules in pandas namespace
|
||||
private_modules = [
|
||||
"_config",
|
||||
"_libs",
|
||||
"_is_numpy_dev",
|
||||
"_testing",
|
||||
"_typing",
|
||||
"_version",
|
||||
]
|
||||
|
||||
def test_api(self):
|
||||
|
||||
checkthese = (
|
||||
self.public_lib
|
||||
+ self.private_lib
|
||||
+ self.misc
|
||||
+ self.modules
|
||||
+ self.classes
|
||||
+ self.funcs
|
||||
+ self.funcs_option
|
||||
+ self.funcs_read
|
||||
+ self.funcs_json
|
||||
+ self.funcs_to
|
||||
+ self.private_modules
|
||||
)
|
||||
self.check(namespace=pd, expected=checkthese, ignored=self.ignored)
|
||||
|
||||
def test_api_all(self):
|
||||
expected = set(
|
||||
self.public_lib
|
||||
+ self.misc
|
||||
+ self.modules
|
||||
+ self.classes
|
||||
+ self.funcs
|
||||
+ self.funcs_option
|
||||
+ self.funcs_read
|
||||
+ self.funcs_json
|
||||
+ self.funcs_to
|
||||
) - set(self.deprecated_classes)
|
||||
actual = set(pd.__all__)
|
||||
|
||||
extraneous = actual - expected
|
||||
assert not extraneous
|
||||
|
||||
missing = expected - actual
|
||||
assert not missing
|
||||
|
||||
def test_depr(self):
|
||||
deprecated_list = (
|
||||
self.deprecated_modules
|
||||
+ self.deprecated_classes
|
||||
+ self.deprecated_classes_in_future
|
||||
+ self.deprecated_funcs
|
||||
+ self.deprecated_funcs_in_future
|
||||
)
|
||||
for depr in deprecated_list:
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
_ = getattr(pd, depr)
|
||||
|
||||
|
||||
def test_datetime():
|
||||
from datetime import datetime
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
assert datetime(2015, 1, 2, 0, 0) == datetime(2015, 1, 2, 0, 0)
|
||||
|
||||
assert isinstance(datetime(2015, 1, 2, 0, 0), datetime)
|
||||
|
||||
|
||||
def test_sparsearray():
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
assert isinstance(pd.array([1, 2, 3], dtype="Sparse"), pd.SparseArray)
|
||||
|
||||
|
||||
def test_np():
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", FutureWarning)
|
||||
assert (pd.np.arange(0, 10) == np.arange(0, 10)).all()
|
||||
|
||||
|
||||
class TestApi(Base):
|
||||
allowed = ["types", "extensions", "indexers"]
|
||||
|
||||
def test_api(self):
|
||||
self.check(api, self.allowed)
|
||||
|
||||
|
||||
class TestTesting(Base):
|
||||
funcs = [
|
||||
"assert_frame_equal",
|
||||
"assert_series_equal",
|
||||
"assert_index_equal",
|
||||
"assert_extension_array_equal",
|
||||
]
|
||||
|
||||
def test_testing(self):
|
||||
from pandas import testing # noqa: PDF015
|
||||
|
||||
self.check(testing, self.funcs)
|
||||
|
||||
def test_util_testing_deprecated(self):
|
||||
# avoid cache state affecting the test
|
||||
sys.modules.pop("pandas.util.testing", None)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
import pandas.util.testing # noqa: F401
|
||||
|
||||
assert "pandas.util.testing is deprecated" in str(m[0].message)
|
||||
assert "pandas.testing instead" in str(m[0].message)
|
||||
|
||||
def test_util_testing_deprecated_direct(self):
|
||||
# avoid cache state affecting the test
|
||||
sys.modules.pop("pandas.util.testing", None)
|
||||
with tm.assert_produces_warning(FutureWarning) as m:
|
||||
from pandas.util.testing import assert_series_equal # noqa: F401
|
||||
|
||||
assert "pandas.util.testing is deprecated" in str(m[0].message)
|
||||
assert "pandas.testing instead" in str(m[0].message)
|
||||
|
||||
def test_util_in_top_level(self):
|
||||
# in a subprocess to avoid import caching issues
|
||||
out = subprocess.check_output(
|
||||
[
|
||||
sys.executable,
|
||||
"-c",
|
||||
"import pandas; pandas.util.testing.assert_series_equal",
|
||||
],
|
||||
stderr=subprocess.STDOUT,
|
||||
).decode()
|
||||
assert "pandas.util.testing is deprecated" in out
|
||||
|
||||
with pytest.raises(AttributeError, match="foo"):
|
||||
pd.util.foo
|
||||
@@ -0,0 +1,63 @@
|
||||
import pandas._testing as tm
|
||||
from pandas.api import types
|
||||
from pandas.tests.api.test_api import Base
|
||||
|
||||
|
||||
class TestTypes(Base):
|
||||
|
||||
allowed = [
|
||||
"is_bool",
|
||||
"is_bool_dtype",
|
||||
"is_categorical",
|
||||
"is_categorical_dtype",
|
||||
"is_complex",
|
||||
"is_complex_dtype",
|
||||
"is_datetime64_any_dtype",
|
||||
"is_datetime64_dtype",
|
||||
"is_datetime64_ns_dtype",
|
||||
"is_datetime64tz_dtype",
|
||||
"is_dtype_equal",
|
||||
"is_float",
|
||||
"is_float_dtype",
|
||||
"is_int64_dtype",
|
||||
"is_integer",
|
||||
"is_integer_dtype",
|
||||
"is_number",
|
||||
"is_numeric_dtype",
|
||||
"is_object_dtype",
|
||||
"is_scalar",
|
||||
"is_sparse",
|
||||
"is_string_dtype",
|
||||
"is_signed_integer_dtype",
|
||||
"is_timedelta64_dtype",
|
||||
"is_timedelta64_ns_dtype",
|
||||
"is_unsigned_integer_dtype",
|
||||
"is_period_dtype",
|
||||
"is_interval",
|
||||
"is_interval_dtype",
|
||||
"is_re",
|
||||
"is_re_compilable",
|
||||
"is_dict_like",
|
||||
"is_iterator",
|
||||
"is_file_like",
|
||||
"is_list_like",
|
||||
"is_hashable",
|
||||
"is_array_like",
|
||||
"is_named_tuple",
|
||||
"pandas_dtype",
|
||||
"union_categoricals",
|
||||
"infer_dtype",
|
||||
"is_extension_array_dtype",
|
||||
]
|
||||
deprecated = ["is_extension_type"]
|
||||
dtypes = ["CategoricalDtype", "DatetimeTZDtype", "PeriodDtype", "IntervalDtype"]
|
||||
|
||||
def test_types(self):
|
||||
|
||||
self.check(types, self.allowed + self.dtypes + self.deprecated)
|
||||
|
||||
def test_deprecated_from_api_types(self):
|
||||
|
||||
for t in self.deprecated:
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
getattr(types, t)(1)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
from pandas.core.groupby.base import transformation_kernels
|
||||
|
||||
# tshift only works on time index and is deprecated
|
||||
# There is no Series.cumcount or DataFrame.cumcount
|
||||
series_transform_kernels = [
|
||||
x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"]
|
||||
]
|
||||
frame_transform_kernels = [
|
||||
x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"]
|
||||
]
|
||||
@@ -0,0 +1,18 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def int_frame_const_col():
|
||||
"""
|
||||
Fixture for DataFrame of ints which are constant per column
|
||||
|
||||
Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3]
|
||||
"""
|
||||
df = DataFrame(
|
||||
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
return df
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,97 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_agg_relabel():
|
||||
# GH 26513
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
|
||||
# simplest case with one column, one func
|
||||
result = df.agg(foo=("B", "sum"))
|
||||
expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# test on same column with different methods
|
||||
result = df.agg(foo=("B", "sum"), bar=("B", "min"))
|
||||
expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_relabel_multi_columns_multi_methods():
|
||||
# GH 26513, test on multiple columns with multiple methods
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
result = df.agg(
|
||||
foo=("A", "sum"),
|
||||
bar=("B", "mean"),
|
||||
cat=("A", "min"),
|
||||
dat=("B", "max"),
|
||||
f=("A", "max"),
|
||||
g=("C", "min"),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
|
||||
"B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
|
||||
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
|
||||
},
|
||||
index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_relabel_partial_functions():
|
||||
# GH 26513, test on partial, functools or more complex cases
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
||||
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
|
||||
expected = pd.DataFrame(
|
||||
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.agg(
|
||||
foo=("A", min),
|
||||
bar=("A", np.min),
|
||||
cat=("B", max),
|
||||
dat=("C", "min"),
|
||||
f=("B", np.sum),
|
||||
kk=("B", lambda x: min(x)),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
|
||||
"B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
|
||||
"C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
|
||||
},
|
||||
index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_namedtuple():
|
||||
# GH 26513
|
||||
df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
|
||||
result = df.agg(
|
||||
foo=pd.NamedAgg("B", "sum"),
|
||||
bar=pd.NamedAgg("B", min),
|
||||
cat=pd.NamedAgg(column="B", aggfunc="count"),
|
||||
fft=pd.NamedAgg("B", aggfunc="max"),
|
||||
)
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = df.agg(
|
||||
foo=pd.NamedAgg("A", "min"),
|
||||
bar=pd.NamedAgg(column="B", aggfunc="max"),
|
||||
cat=pd.NamedAgg(column="A", aggfunc="max"),
|
||||
)
|
||||
expected = pd.DataFrame(
|
||||
{"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
|
||||
index=pd.Index(["foo", "bar", "cat"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,249 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import frame_transform_kernels
|
||||
from pandas.tests.frame.common import zip_frames
|
||||
|
||||
|
||||
def unpack_obj(obj, klass, axis):
|
||||
"""
|
||||
Helper to ensure we have the right type of object for a test parametrized
|
||||
over frame_or_series.
|
||||
"""
|
||||
if klass is not DataFrame:
|
||||
obj = obj["A"]
|
||||
if axis != 0:
|
||||
pytest.skip(f"Test is only for DataFrame with axis={axis}")
|
||||
return obj
|
||||
|
||||
|
||||
def test_transform_ufunc(axis, float_frame, frame_or_series):
|
||||
# GH 35964
|
||||
obj = unpack_obj(float_frame, frame_or_series, axis)
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
f_sqrt = np.sqrt(obj)
|
||||
|
||||
# ufunc
|
||||
result = obj.transform(np.sqrt, axis=axis)
|
||||
expected = f_sqrt
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_transform_listlike(axis, float_frame, ops, names):
|
||||
# GH 35964
|
||||
other_axis = 1 if axis in {0, "index"} else 0
|
||||
with np.errstate(all="ignore"):
|
||||
expected = zip_frames([op(float_frame) for op in ops], axis=other_axis)
|
||||
if axis in {0, "index"}:
|
||||
expected.columns = MultiIndex.from_product([float_frame.columns, names])
|
||||
else:
|
||||
expected.index = MultiIndex.from_product([float_frame.index, names])
|
||||
result = float_frame.transform(ops, axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ops", [[], np.array([])])
|
||||
def test_transform_empty_listlike(float_frame, ops, frame_or_series):
|
||||
obj = unpack_obj(float_frame, frame_or_series, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="No transform functions were provided"):
|
||||
obj.transform(ops)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [dict, Series])
|
||||
def test_transform_dictlike(axis, float_frame, box):
|
||||
# GH 35964
|
||||
if axis == 0 or axis == "index":
|
||||
e = float_frame.columns[0]
|
||||
expected = float_frame[[e]].transform(np.abs)
|
||||
else:
|
||||
e = float_frame.index[0]
|
||||
expected = float_frame.iloc[[0]].transform(np.abs)
|
||||
result = float_frame.transform(box({e: np.abs}), axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_dictlike_mixed():
|
||||
# GH 40018 - mix of lists and non-lists in values of a dictionary
|
||||
df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})
|
||||
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
|
||||
expected = DataFrame(
|
||||
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
|
||||
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{},
|
||||
{"A": []},
|
||||
{"A": [], "B": "cumsum"},
|
||||
{"A": "cumsum", "B": []},
|
||||
{"A": [], "B": ["cumsum"]},
|
||||
{"A": ["cumsum"], "B": []},
|
||||
],
|
||||
)
|
||||
def test_transform_empty_dictlike(float_frame, ops, frame_or_series):
|
||||
obj = unpack_obj(float_frame, frame_or_series, 0)
|
||||
|
||||
with pytest.raises(ValueError, match="No transform functions were provided"):
|
||||
obj.transform(ops)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_apply", [True, False])
|
||||
def test_transform_udf(axis, float_frame, use_apply, frame_or_series):
|
||||
# GH 35964
|
||||
obj = unpack_obj(float_frame, frame_or_series, axis)
|
||||
|
||||
# transform uses UDF either via apply or passing the entire DataFrame
|
||||
def func(x):
|
||||
# transform is using apply iff x is not a DataFrame
|
||||
if use_apply == isinstance(x, frame_or_series):
|
||||
# Force transform to fallback
|
||||
raise ValueError
|
||||
return x + 1
|
||||
|
||||
result = obj.transform(func, axis=axis)
|
||||
expected = obj + 1
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
|
||||
frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
|
||||
def test_transform_bad_dtype(op, frame_or_series, request):
|
||||
# GH 35964
|
||||
if op == "rank":
|
||||
request.node.add_marker(
|
||||
pytest.mark.xfail(
|
||||
raises=ValueError, reason="GH 40418: rank does not raise a TypeError"
|
||||
)
|
||||
)
|
||||
|
||||
obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
# tshift is deprecated
|
||||
warn = None if op != "tshift" else FutureWarning
|
||||
with tm.assert_produces_warning(warn):
|
||||
with pytest.raises(TypeError, match="unsupported operand|not supported"):
|
||||
obj.transform(op)
|
||||
with pytest.raises(TypeError, match="Transform function failed"):
|
||||
obj.transform([op])
|
||||
with pytest.raises(TypeError, match="Transform function failed"):
|
||||
obj.transform({"A": op})
|
||||
with pytest.raises(TypeError, match="Transform function failed"):
|
||||
obj.transform({"A": [op]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", frame_kernels_raise)
|
||||
def test_transform_partial_failure_typeerror(op):
|
||||
# GH 35964
|
||||
|
||||
# Using object makes most transform kernels fail
|
||||
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
|
||||
|
||||
expected = df[["B"]].transform([op])
|
||||
match = r"\['A'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = df.transform([op])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = df[["B"]].transform({"B": op})
|
||||
match = r"\['A'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = df.transform({"A": op, "B": op})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = df[["B"]].transform({"B": [op]})
|
||||
match = r"\['A'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = df.transform({"A": [op], "B": [op]})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = df.transform({"A": ["shift"], "B": [op]})
|
||||
match = rf"\['{op}'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = df.transform({"A": [op, "shift"], "B": [op]})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_partial_failure_valueerror():
|
||||
# GH 40211
|
||||
match = ".*did not transform successfully"
|
||||
|
||||
def op(x):
|
||||
if np.sum(np.sum(x)) < 10:
|
||||
raise ValueError
|
||||
return x
|
||||
|
||||
df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})
|
||||
|
||||
expected = df[["B"]].transform([op])
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = df.transform([op])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = df[["B"]].transform({"B": op})
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = df.transform({"A": op, "B": op})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = df[["B"]].transform({"B": [op]})
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = df.transform({"A": [op], "B": [op]})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = df.transform({"A": ["shift"], "B": [op]})
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = df.transform({"A": [op, "shift"], "B": [op]})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_apply", [True, False])
|
||||
def test_transform_passes_args(use_apply, frame_or_series):
|
||||
# GH 35964
|
||||
# transform uses UDF either via apply or passing the entire DataFrame
|
||||
expected_args = [1, 2]
|
||||
expected_kwargs = {"c": 3}
|
||||
|
||||
def f(x, a, b, c):
|
||||
# transform is using apply iff x is not a DataFrame
|
||||
if use_apply == isinstance(x, frame_or_series):
|
||||
# Force transform to fallback
|
||||
raise ValueError
|
||||
assert [a, b] == expected_args
|
||||
assert c == expected_kwargs["c"]
|
||||
return x
|
||||
|
||||
frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs)
|
||||
|
||||
|
||||
def test_transform_empty_dataframe():
|
||||
# https://github.com/pandas-dev/pandas/issues/39636
|
||||
df = DataFrame([], columns=["col1", "col2"])
|
||||
result = df.transform(lambda x: x + 10)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df["col1"].transform(lambda x: x + 10)
|
||||
tm.assert_series_equal(result, df["col1"])
|
||||
@@ -0,0 +1,359 @@
|
||||
# Tests specifically aimed at detecting bad arguments.
|
||||
# This file is organized by reason for exception.
|
||||
# 1. always invalid argument values
|
||||
# 2. missing column(s)
|
||||
# 3. incompatible ops/dtype/args/kwargs
|
||||
# 4. invalid result shape/type
|
||||
# If your test does not fit into one of these categories, add to this list.
|
||||
|
||||
from itertools import chain
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Series,
|
||||
date_range,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.base import SpecificationError
|
||||
|
||||
|
||||
@pytest.mark.parametrize("result_type", ["foo", 1])
|
||||
def test_result_type_error(result_type, int_frame_const_col):
|
||||
# allowed result_type
|
||||
df = int_frame_const_col
|
||||
|
||||
msg = (
|
||||
"invalid value for result_type, must be one of "
|
||||
"{None, 'reduce', 'broadcast', 'expand'}"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
|
||||
|
||||
|
||||
def test_apply_invalid_axis_value():
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
|
||||
msg = "No axis named 2 for object type DataFrame"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(lambda x: x, 2)
|
||||
|
||||
|
||||
def test_applymap_invalid_na_action(float_frame):
|
||||
# GH 23803
|
||||
with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"):
|
||||
float_frame.applymap(lambda x: len(str(x)), na_action="abc")
|
||||
|
||||
|
||||
def test_agg_raises():
|
||||
# GH 26513
|
||||
df = DataFrame({"A": [0, 1], "B": [1, 2]})
|
||||
msg = "Must provide"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.agg()
|
||||
|
||||
|
||||
def test_map_with_invalid_na_action_raises():
|
||||
# https://github.com/pandas-dev/pandas/issues/32815
|
||||
s = Series([1, 2, 3])
|
||||
msg = "na_action must either be 'ignore' or None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.map(lambda x: x, na_action="____")
|
||||
|
||||
|
||||
def test_map_categorical_na_action():
|
||||
values = Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
|
||||
s = Series(values, name="XX", index=list("abcdefg"))
|
||||
with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
|
||||
s.map(lambda x: x, na_action="ignore")
|
||||
|
||||
|
||||
def test_map_datetimetz_na_action():
|
||||
values = date_range("2011-01-01", "2011-01-02", freq="H").tz_localize("Asia/Tokyo")
|
||||
s = Series(values, name="XX")
|
||||
with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
|
||||
s.map(lambda x: x, na_action="ignore")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [DataFrame, Series])
|
||||
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
|
||||
@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
|
||||
def test_nested_renamer(box, method, func):
|
||||
# GH 35964
|
||||
obj = box({"A": [1]})
|
||||
match = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=match):
|
||||
getattr(obj, method)(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"renamer",
|
||||
[{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}],
|
||||
)
|
||||
def test_series_nested_renamer(renamer):
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
s.agg(renamer)
|
||||
|
||||
|
||||
def test_apply_dict_depr():
|
||||
|
||||
tsdf = DataFrame(
|
||||
np.random.randn(10, 3),
|
||||
columns=["A", "B", "C"],
|
||||
index=date_range("1/1/2000", periods=10),
|
||||
)
|
||||
msg = "nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
tsdf.A.agg({"foo": ["sum", "mean"]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["agg", "transform"])
|
||||
def test_dict_nested_renaming_depr(method):
|
||||
|
||||
df = DataFrame({"A": range(5), "B": 5})
|
||||
|
||||
# nested renaming
|
||||
msg = r"nested renamer is not supported"
|
||||
with pytest.raises(SpecificationError, match=msg):
|
||||
getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
|
||||
@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
|
||||
def test_missing_column(method, func):
|
||||
# GH 40004
|
||||
obj = DataFrame({"A": [1]})
|
||||
match = re.escape("Column(s) ['B'] do not exist")
|
||||
with pytest.raises(KeyError, match=match):
|
||||
getattr(obj, method)(func)
|
||||
|
||||
|
||||
def test_transform_mixed_column_name_dtypes():
|
||||
# GH39025
|
||||
df = DataFrame({"a": ["1"]})
|
||||
msg = r"Column\(s\) \[1, 'b'\] do not exist"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.transform({"a": int, 1: str, "b": int})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)]
|
||||
)
|
||||
def test_apply_str_axis_1_raises(how, args):
|
||||
# GH 39211 - some ops don't support axis=1
|
||||
df = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
msg = f"Operation {how} does not support axis=1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(how, axis=1, args=args)
|
||||
|
||||
|
||||
def test_transform_axis_1_raises():
|
||||
# GH 35964
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series([1]).transform("sum", axis=1)
|
||||
|
||||
|
||||
def test_apply_modify_traceback():
|
||||
data = DataFrame(
|
||||
{
|
||||
"A": [
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
"bar",
|
||||
"bar",
|
||||
"bar",
|
||||
"bar",
|
||||
"foo",
|
||||
"foo",
|
||||
"foo",
|
||||
],
|
||||
"B": [
|
||||
"one",
|
||||
"one",
|
||||
"one",
|
||||
"two",
|
||||
"one",
|
||||
"one",
|
||||
"one",
|
||||
"two",
|
||||
"two",
|
||||
"two",
|
||||
"one",
|
||||
],
|
||||
"C": [
|
||||
"dull",
|
||||
"dull",
|
||||
"shiny",
|
||||
"dull",
|
||||
"dull",
|
||||
"shiny",
|
||||
"shiny",
|
||||
"dull",
|
||||
"shiny",
|
||||
"shiny",
|
||||
"shiny",
|
||||
],
|
||||
"D": np.random.randn(11),
|
||||
"E": np.random.randn(11),
|
||||
"F": np.random.randn(11),
|
||||
}
|
||||
)
|
||||
|
||||
data.loc[4, "C"] = np.nan
|
||||
|
||||
def transform(row):
|
||||
if row["C"].startswith("shin") and row["A"] == "foo":
|
||||
row["D"] = 7
|
||||
return row
|
||||
|
||||
def transform2(row):
|
||||
if notna(row["C"]) and row["C"].startswith("shin") and row["A"] == "foo":
|
||||
row["D"] = 7
|
||||
return row
|
||||
|
||||
msg = "'float' object has no attribute 'startswith'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
data.apply(transform, axis=1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
msg = "can't multiply sequence by non-int of type 'str'"
|
||||
with pytest.raises(expected, match=msg):
|
||||
df.agg(func, axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("mean", TypeError), # mean raises TypeError
|
||||
("prod", TypeError),
|
||||
("std", TypeError),
|
||||
("var", TypeError),
|
||||
("median", TypeError),
|
||||
("cumprod", TypeError),
|
||||
],
|
||||
)
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_raises_series(series, func, expected):
|
||||
# GH21224
|
||||
msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
|
||||
with pytest.raises(expected, match=msg):
|
||||
# e.g. Series('a b'.split()).cumprod() will raise
|
||||
series.agg(func)
|
||||
|
||||
|
||||
def test_agg_none_to_type():
|
||||
# GH 40543
|
||||
df = DataFrame({"a": [None]})
|
||||
msg = re.escape("int() argument must be a string")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.agg({"a": int})
|
||||
|
||||
|
||||
def test_transform_none_to_type():
|
||||
# GH#34377
|
||||
df = DataFrame({"a": [None]})
|
||||
msg = "Transform function failed"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.transform({"a": int})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
lambda x: np.array([1, 2]).reshape(-1, 2),
|
||||
lambda x: [1, 2],
|
||||
lambda x: Series([1, 2]),
|
||||
],
|
||||
)
|
||||
def test_apply_broadcast_error(int_frame_const_col, func):
|
||||
df = int_frame_const_col
|
||||
|
||||
# > 1 ndim
|
||||
msg = "too many dims to broadcast|cannot broadcast result"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.apply(func, axis=1, result_type="broadcast")
|
||||
|
||||
|
||||
def test_transform_and_agg_err_agg(axis, float_frame):
|
||||
# cannot both transform and agg
|
||||
msg = "cannot combine transform and aggregation operations"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with np.errstate(all="ignore"):
|
||||
float_frame.agg(["max", "sqrt"], axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, msg",
|
||||
[
|
||||
(["sqrt", "max"], "cannot combine transform and aggregation"),
|
||||
(
|
||||
{"foo": np.sqrt, "bar": "sum"},
|
||||
"cannot perform both aggregation and transformation",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_transform_and_agg_err_series(string_series, func, msg):
|
||||
# we are trying to transform with an aggregator
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with np.errstate(all="ignore"):
|
||||
string_series.agg(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]])
|
||||
def test_transform_wont_agg_frame(axis, float_frame, func):
|
||||
# GH 35964
|
||||
# cannot both transform and agg
|
||||
msg = "Function did not transform"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
float_frame.transform(func, axis=axis)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]])
|
||||
def test_transform_wont_agg_series(string_series, func):
|
||||
# GH 35964
|
||||
# we are trying to transform with an aggregator
|
||||
msg = "Function did not transform"
|
||||
|
||||
warn = RuntimeWarning if func[0] == "sqrt" else None
|
||||
warn_msg = "invalid value encountered in sqrt"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(warn, match=warn_msg):
|
||||
string_series.transform(func)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
|
||||
)
|
||||
@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
|
||||
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
|
||||
# GH 35964
|
||||
op = op_wrapper(all_reductions)
|
||||
|
||||
obj = DataFrame({"A": [1, 2, 3]})
|
||||
obj = tm.get_obj(obj, frame_or_series)
|
||||
|
||||
msg = "Function did not transform"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.transform(op)
|
||||
@@ -0,0 +1,889 @@
|
||||
from collections import (
|
||||
Counter,
|
||||
defaultdict,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
isna,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.apply.common import series_transform_kernels
|
||||
|
||||
|
||||
def test_series_map_box_timedelta():
|
||||
# GH#11349
|
||||
ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h"))
|
||||
|
||||
def f(x):
|
||||
return x.total_seconds()
|
||||
|
||||
ser.map(f)
|
||||
ser.apply(f)
|
||||
DataFrame(ser).applymap(f)
|
||||
|
||||
|
||||
def test_apply(datetime_series):
|
||||
with np.errstate(all="ignore"):
|
||||
tm.assert_series_equal(datetime_series.apply(np.sqrt), np.sqrt(datetime_series))
|
||||
|
||||
# element-wise apply
|
||||
import math
|
||||
|
||||
tm.assert_series_equal(datetime_series.apply(math.exp), np.exp(datetime_series))
|
||||
|
||||
# empty series
|
||||
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
|
||||
rs = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
# check all metadata (GH 9322)
|
||||
assert s is not rs
|
||||
assert s.index is rs.index
|
||||
assert s.dtype == rs.dtype
|
||||
assert s.name == rs.name
|
||||
|
||||
# index but no data
|
||||
s = Series(index=[1, 2, 3], dtype=np.float64)
|
||||
rs = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
|
||||
def test_apply_same_length_inference_bug():
|
||||
s = Series([1, 2])
|
||||
|
||||
def f(x):
|
||||
return (x, x + 1)
|
||||
|
||||
result = s.apply(f)
|
||||
expected = s.map(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
result = s.apply(f)
|
||||
expected = s.map(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_dont_convert_dtype():
|
||||
s = Series(np.random.randn(10))
|
||||
|
||||
def f(x):
|
||||
return x if x > 0 else np.nan
|
||||
|
||||
result = s.apply(f, convert_dtype=False)
|
||||
assert result.dtype == object
|
||||
|
||||
|
||||
def test_apply_args():
|
||||
s = Series(["foo,bar"])
|
||||
|
||||
result = s.apply(str.split, args=(",",))
|
||||
assert result[0] == ["foo", "bar"]
|
||||
assert isinstance(result[0], list)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"args, kwargs, increment",
|
||||
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
|
||||
)
|
||||
def test_agg_args(args, kwargs, increment):
|
||||
# GH 43357
|
||||
def f(x, a=0, b=0, c=0):
|
||||
return x + a + 10 * b + 100 * c
|
||||
|
||||
s = Series([1, 2])
|
||||
result = s.agg(f, 0, *args, **kwargs)
|
||||
expected = s + increment
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_map_box_timestamps():
|
||||
# GH#2689, GH#2627
|
||||
ser = Series(pd.date_range("1/1/2000", periods=10))
|
||||
|
||||
def func(x):
|
||||
return (x.hour, x.day, x.month)
|
||||
|
||||
# it works!
|
||||
ser.map(func)
|
||||
ser.apply(func)
|
||||
|
||||
|
||||
def test_series_map_stringdtype(any_string_dtype):
|
||||
# map test on StringDType, GH#40823
|
||||
ser1 = Series(
|
||||
data=["cat", "dog", "rabbit"],
|
||||
index=["id1", "id2", "id3"],
|
||||
dtype=any_string_dtype,
|
||||
)
|
||||
ser2 = Series(data=["id3", "id2", "id1", "id7000"], dtype=any_string_dtype)
|
||||
result = ser2.map(ser1)
|
||||
expected = Series(data=["rabbit", "dog", "cat", pd.NA], dtype=any_string_dtype)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_box():
|
||||
# ufunc will not be boxed. Same test cases as the test_map_box
|
||||
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
|
||||
s = Series(vals)
|
||||
assert s.dtype == "datetime64[ns]"
|
||||
# boxed value must be Timestamp instance
|
||||
res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
|
||||
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
vals = [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
]
|
||||
s = Series(vals)
|
||||
assert s.dtype == "datetime64[ns, US/Eastern]"
|
||||
res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
|
||||
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# timedelta
|
||||
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
|
||||
s = Series(vals)
|
||||
assert s.dtype == "timedelta64[ns]"
|
||||
res = s.apply(lambda x: f"{type(x).__name__}_{x.days}")
|
||||
exp = Series(["Timedelta_1", "Timedelta_2"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# period
|
||||
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
||||
s = Series(vals)
|
||||
assert s.dtype == "Period[M]"
|
||||
res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}")
|
||||
exp = Series(["Period_M", "Period_M"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_apply_datetimetz():
|
||||
values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
s = Series(values, name="XX")
|
||||
|
||||
result = s.apply(lambda x: x + pd.offsets.Day())
|
||||
exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
exp = Series(exp_values, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# change dtype
|
||||
# GH 14506 : Returned dtype changed from int32 to int64
|
||||
result = s.apply(lambda x: x.hour)
|
||||
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = Series(["Asia/Tokyo"] * 25, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_apply_categorical():
|
||||
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
|
||||
ser = Series(values, name="XX", index=list("abcdefg"))
|
||||
result = ser.apply(lambda x: x.lower())
|
||||
|
||||
# should be categorical dtype when the number of categories are
|
||||
# the same
|
||||
values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
|
||||
exp = Series(values, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp.values)
|
||||
|
||||
result = ser.apply(lambda x: "A")
|
||||
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == object
|
||||
|
||||
|
||||
@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]])
|
||||
def test_apply_categorical_with_nan_values(series):
|
||||
# GH 20714 bug fixed in: GH 24275
|
||||
s = Series(series, dtype="category")
|
||||
result = s.apply(lambda x: x.split("-")[0])
|
||||
result = result.astype(object)
|
||||
expected = Series(["1", "1", np.NaN], dtype="category")
|
||||
expected = expected.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_empty_integer_series_with_datetime_index():
|
||||
# GH 21245
|
||||
s = Series([], index=pd.date_range(start="2018-01-01", periods=0), dtype=int)
|
||||
result = s.apply(lambda x: x)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_transform(string_series):
|
||||
# transforming functions
|
||||
|
||||
with np.errstate(all="ignore"):
|
||||
|
||||
f_sqrt = np.sqrt(string_series)
|
||||
f_abs = np.abs(string_series)
|
||||
|
||||
# ufunc
|
||||
result = string_series.apply(np.sqrt)
|
||||
expected = f_sqrt.copy()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# list-like
|
||||
result = string_series.apply([np.sqrt])
|
||||
expected = f_sqrt.to_frame().copy()
|
||||
expected.columns = ["sqrt"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = string_series.apply(["sqrt"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multiple items in list
|
||||
# these are in the order as if we are applying both functions per
|
||||
# series and then concatting
|
||||
expected = concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["sqrt", "absolute"]
|
||||
result = string_series.apply([np.sqrt, np.abs])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# dict, provide renaming
|
||||
expected = concat([f_sqrt, f_abs], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
expected = expected.unstack().rename("series")
|
||||
|
||||
result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
|
||||
tm.assert_series_equal(result.reindex_like(expected), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", series_transform_kernels)
|
||||
def test_transform_partial_failure(op, request):
|
||||
# GH 35964
|
||||
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
|
||||
request.node.add_marker(
|
||||
pytest.mark.xfail(
|
||||
raises=AssertionError, reason=f"{op} is successful on any dtype"
|
||||
)
|
||||
)
|
||||
if op in ("rank", "fillna"):
|
||||
pytest.skip(f"{op} doesn't raise TypeError on object")
|
||||
|
||||
# Using object makes most transform kernels fail
|
||||
ser = Series(3 * [object])
|
||||
|
||||
expected = ser.transform(["shift"])
|
||||
match = rf"\['{op}'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ser.transform([op, "shift"])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = ser.transform({"B": "shift"})
|
||||
match = r"\['A'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ser.transform({"A": op, "B": "shift"})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = ser.transform({"B": ["shift"]})
|
||||
match = r"\['A'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ser.transform({"A": [op], "B": ["shift"]})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
match = r"\['B'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
expected = ser.transform({"A": ["shift"], "B": [op]})
|
||||
match = rf"\['{op}'\] did not transform successfully"
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ser.transform({"A": [op, "shift"], "B": [op]})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_partial_failure_valueerror():
|
||||
# GH 40211
|
||||
match = ".*did not transform successfully"
|
||||
|
||||
def noop(x):
|
||||
return x
|
||||
|
||||
def raising_op(_):
|
||||
raise ValueError
|
||||
|
||||
ser = Series(3 * [object])
|
||||
|
||||
expected = ser.transform([noop])
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ser.transform([noop, raising_op])
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = ser.transform({"B": noop})
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ser.transform({"A": raising_op, "B": noop})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = ser.transform({"B": [noop]})
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ser.transform({"A": [raising_op], "B": [noop]})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
expected = ser.transform({"A": [noop], "B": [noop]})
|
||||
with tm.assert_produces_warning(FutureWarning, match=match):
|
||||
result = ser.transform({"A": [noop, raising_op], "B": [noop]})
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
def test_demo():
|
||||
# demonstration tests
|
||||
s = Series(range(6), dtype="int64", name="series")
|
||||
|
||||
result = s.agg(["min", "max"])
|
||||
expected = Series([0, 5], index=["min", "max"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.agg({"foo": "min"})
|
||||
expected = Series([0], index=["foo"], name="series")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_agg_apply_evaluate_lambdas_the_same(string_series):
|
||||
# test that we are evaluating row-by-row first
|
||||
# before vectorized evaluation
|
||||
result = string_series.apply(lambda x: str(x))
|
||||
expected = string_series.agg(lambda x: str(x))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = string_series.apply(str)
|
||||
expected = string_series.agg(str)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_with_nested_series(datetime_series):
|
||||
# GH 2316
|
||||
# .agg with a reducer and a transform, what to do
|
||||
result = datetime_series.apply(lambda x: Series([x, x**2], index=["x", "x^2"]))
|
||||
expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_replicate_describe(string_series):
|
||||
# this also tests a result set that is all scalars
|
||||
expected = string_series.describe()
|
||||
result = string_series.apply(
|
||||
{
|
||||
"count": "count",
|
||||
"mean": "mean",
|
||||
"std": "std",
|
||||
"min": "min",
|
||||
"25%": lambda x: x.quantile(0.25),
|
||||
"50%": "median",
|
||||
"75%": lambda x: x.quantile(0.75),
|
||||
"max": "max",
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reduce(string_series):
|
||||
# reductions with named functions
|
||||
result = string_series.agg(["sum", "mean"])
|
||||
expected = Series(
|
||||
[string_series.sum(), string_series.mean()],
|
||||
["sum", "mean"],
|
||||
name=string_series.name,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_non_callable_aggregates(how):
|
||||
# test agg using non-callable series attributes
|
||||
# GH 39116 - expand to apply
|
||||
s = Series([1, 2, None])
|
||||
|
||||
# Calling agg w/ just a string arg same as calling s.arg
|
||||
result = getattr(s, how)("size")
|
||||
expected = s.size
|
||||
assert result == expected
|
||||
|
||||
# test when mixed w/ callable reducers
|
||||
result = getattr(s, how)(["size", "count", "mean"])
|
||||
expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_apply_no_suffix_index():
|
||||
# GH36189
|
||||
s = Series([4] * 3)
|
||||
result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
|
||||
expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map(datetime_series):
|
||||
index, data = tm.getMixedTypeDict()
|
||||
|
||||
source = Series(data["B"], index=data["C"])
|
||||
target = Series(data["C"][:4], index=data["D"][:4])
|
||||
|
||||
merged = target.map(source)
|
||||
|
||||
for k, v in merged.items():
|
||||
assert v == source[target[k]]
|
||||
|
||||
# input could be a dict
|
||||
merged = target.map(source.to_dict())
|
||||
|
||||
for k, v in merged.items():
|
||||
assert v == source[target[k]]
|
||||
|
||||
# function
|
||||
result = datetime_series.map(lambda x: x * 2)
|
||||
tm.assert_series_equal(result, datetime_series * 2)
|
||||
|
||||
# GH 10324
|
||||
a = Series([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series(["odd", "even", "odd", np.nan])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
a = Series(["a", "b", "c", "d"])
|
||||
b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"]))
|
||||
c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))
|
||||
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
a = Series(["a", "b", "c", "d"])
|
||||
b = Series(
|
||||
["B", "C", "D", "E"],
|
||||
dtype="category",
|
||||
index=pd.CategoricalIndex(["b", "c", "d", "e"]),
|
||||
)
|
||||
c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))
|
||||
|
||||
exp = Series(
|
||||
pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])
|
||||
)
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, "B", "C", "D"])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
|
||||
def test_map_empty(index):
|
||||
if isinstance(index, MultiIndex):
|
||||
pytest.skip("Initializing a Series from a MultiIndex is not supported")
|
||||
|
||||
s = Series(index)
|
||||
result = s.map({})
|
||||
|
||||
expected = Series(np.nan, index=s.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_compat():
|
||||
# related GH 8024
|
||||
s = Series([True, True, False], index=[1, 2, 3])
|
||||
result = s.map({True: "foo", False: "bar"})
|
||||
expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_int():
|
||||
left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
|
||||
right = Series({1: 11, 2: 22, 3: 33})
|
||||
|
||||
assert left.dtype == np.float_
|
||||
assert issubclass(right.dtype.type, np.integer)
|
||||
|
||||
merged = left.map(right)
|
||||
assert merged.dtype == np.float_
|
||||
assert isna(merged["d"])
|
||||
assert not isna(merged["c"])
|
||||
|
||||
|
||||
def test_map_type_inference():
|
||||
s = Series(range(3))
|
||||
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
|
||||
assert issubclass(s2.dtype.type, np.integer)
|
||||
|
||||
|
||||
def test_map_decimal(string_series):
|
||||
from decimal import Decimal
|
||||
|
||||
result = string_series.map(lambda x: Decimal(str(x)))
|
||||
assert result.dtype == np.object_
|
||||
assert isinstance(result[0], Decimal)
|
||||
|
||||
|
||||
def test_map_na_exclusion():
|
||||
s = Series([1.5, np.nan, 3, np.nan, 5])
|
||||
|
||||
result = s.map(lambda x: x * 2, na_action="ignore")
|
||||
exp = s * 2
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_map_dict_with_tuple_keys():
|
||||
"""
|
||||
Due to new MultiIndex-ing behaviour in v0.14.0,
|
||||
dicts with tuple keys passed to map were being
|
||||
converted to a multi-index, preventing tuple values
|
||||
from being mapped properly.
|
||||
"""
|
||||
# GH 18496
|
||||
df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]})
|
||||
label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"}
|
||||
|
||||
df["labels"] = df["a"].map(label_mappings)
|
||||
df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index)
|
||||
# All labels should be filled now
|
||||
tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False)
|
||||
|
||||
|
||||
def test_map_counter():
|
||||
s = Series(["a", "b", "c"], index=[1, 2, 3])
|
||||
counter = Counter()
|
||||
counter["b"] = 5
|
||||
counter["c"] += 1
|
||||
result = s.map(counter)
|
||||
expected = Series([0, 5, 1], index=[1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_defaultdict():
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
default_dict = defaultdict(lambda: "blank")
|
||||
default_dict[1] = "stuff"
|
||||
result = s.map(default_dict)
|
||||
expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_dict_na_key():
|
||||
# https://github.com/pandas-dev/pandas/issues/17648
|
||||
# Checks that np.nan key is appropriately mapped
|
||||
s = Series([1, 2, np.nan])
|
||||
expected = Series(["a", "b", "c"])
|
||||
result = s.map({1: "a", 2: "b", np.nan: "c"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_dict_subclass_with_missing():
|
||||
"""
|
||||
Test Series.map with a dictionary subclass that defines __missing__,
|
||||
i.e. sets a default value (GH #15999).
|
||||
"""
|
||||
|
||||
class DictWithMissing(dict):
|
||||
def __missing__(self, key):
|
||||
return "missing"
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithMissing({3: "three"})
|
||||
result = s.map(dictionary)
|
||||
expected = Series(["missing", "missing", "three"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_dict_subclass_without_missing():
|
||||
class DictWithoutMissing(dict):
|
||||
pass
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithoutMissing({3: "three"})
|
||||
result = s.map(dictionary)
|
||||
expected = Series([np.nan, np.nan, "three"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_abc_mapping(non_dict_mapping_subclass):
|
||||
# https://github.com/pandas-dev/pandas/issues/29733
|
||||
# Check collections.abc.Mapping support as mapper for Series.map
|
||||
s = Series([1, 2, 3])
|
||||
not_a_dictionary = non_dict_mapping_subclass({3: "three"})
|
||||
result = s.map(not_a_dictionary)
|
||||
expected = Series([np.nan, np.nan, "three"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_abc_mapping_with_missing(non_dict_mapping_subclass):
|
||||
# https://github.com/pandas-dev/pandas/issues/29733
|
||||
# Check collections.abc.Mapping support as mapper for Series.map
|
||||
class NonDictMappingWithMissing(non_dict_mapping_subclass):
|
||||
def __missing__(key):
|
||||
return "missing"
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
not_a_dictionary = NonDictMappingWithMissing({3: "three"})
|
||||
result = s.map(not_a_dictionary)
|
||||
# __missing__ is a dict concept, not a Mapping concept,
|
||||
# so it should not change the result!
|
||||
expected = Series([np.nan, np.nan, "three"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_box():
|
||||
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
|
||||
s = Series(vals)
|
||||
assert s.dtype == "datetime64[ns]"
|
||||
# boxed value must be Timestamp instance
|
||||
res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
|
||||
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
vals = [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
]
|
||||
s = Series(vals)
|
||||
assert s.dtype == "datetime64[ns, US/Eastern]"
|
||||
res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
|
||||
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# timedelta
|
||||
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
|
||||
s = Series(vals)
|
||||
assert s.dtype == "timedelta64[ns]"
|
||||
res = s.apply(lambda x: f"{type(x).__name__}_{x.days}")
|
||||
exp = Series(["Timedelta_1", "Timedelta_2"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# period
|
||||
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
||||
s = Series(vals)
|
||||
assert s.dtype == "Period[M]"
|
||||
res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}")
|
||||
exp = Series(["Period_M", "Period_M"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_map_categorical():
|
||||
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
|
||||
s = Series(values, name="XX", index=list("abcdefg"))
|
||||
|
||||
result = s.map(lambda x: x.lower())
|
||||
exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
|
||||
exp = Series(exp_values, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp_values)
|
||||
|
||||
result = s.map(lambda x: "A")
|
||||
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == object
|
||||
|
||||
|
||||
def test_map_datetimetz():
|
||||
values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
s = Series(values, name="XX")
|
||||
|
||||
# keep tz
|
||||
result = s.map(lambda x: x + pd.offsets.Day())
|
||||
exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
exp = Series(exp_values, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# change dtype
|
||||
# GH 14506 : Returned dtype changed from int32 to int64
|
||||
result = s.map(lambda x: x.hour)
|
||||
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = Series(["Asia/Tokyo"] * 25, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals,mapping,exp",
|
||||
[
|
||||
(list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]),
|
||||
(list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3),
|
||||
(list(range(3)), {0: 42}, [42] + [np.nan] * 3),
|
||||
],
|
||||
)
|
||||
def test_map_missing_mixed(vals, mapping, exp):
|
||||
# GH20495
|
||||
s = Series(vals + [np.nan])
|
||||
result = s.map(mapping)
|
||||
|
||||
tm.assert_series_equal(result, Series(exp))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dti,exp",
|
||||
[
|
||||
(
|
||||
Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
|
||||
DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
|
||||
),
|
||||
(
|
||||
tm.makeTimeSeries(nper=30),
|
||||
DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("aware", [True, False])
|
||||
def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
if aware:
|
||||
index = dti.tz_localize("UTC").index
|
||||
else:
|
||||
index = dti.index
|
||||
result = Series(index).apply(lambda x: Series([1, 2]))
|
||||
tm.assert_frame_equal(result, exp)
|
||||
|
||||
|
||||
def test_apply_scalar_on_date_time_index_aware_series():
|
||||
# GH 25959
|
||||
# Calling apply on a localized time series should not cause an error
|
||||
series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
|
||||
result = Series(series.index).apply(lambda x: 1)
|
||||
tm.assert_series_equal(result, Series(np.ones(30), dtype="int64"))
|
||||
|
||||
|
||||
def test_map_float_to_string_precision():
|
||||
# GH 13228
|
||||
ser = Series(1 / 3)
|
||||
result = ser.map(lambda val: str(val)).to_dict()
|
||||
expected = {0: "0.3333333333333333"}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_apply_to_timedelta():
|
||||
list_of_valid_strings = ["00:00:01", "00:00:02"]
|
||||
a = pd.to_timedelta(list_of_valid_strings)
|
||||
b = Series(list_of_valid_strings).apply(pd.to_timedelta)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
|
||||
|
||||
a = pd.to_timedelta(list_of_strings)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Inferring timedelta64"):
|
||||
ser = Series(list_of_strings)
|
||||
b = ser.apply(pd.to_timedelta)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sum], ["sum"]),
|
||||
([np.sum, np.mean], ["sum", "mean"]),
|
||||
(np.array([np.sum]), ["sum"]),
|
||||
(np.array([np.sum, np.mean]), ["sum", "mean"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_listlike_reducer(string_series, ops, names, how):
|
||||
# GH 39140
|
||||
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
|
||||
expected.name = "series"
|
||||
result = getattr(string_series, how)(ops)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{"A": np.sum},
|
||||
{"A": np.sum, "B": np.mean},
|
||||
Series({"A": np.sum}),
|
||||
Series({"A": np.sum, "B": np.mean}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_dictlike_reducer(string_series, ops, how):
|
||||
# GH 39140
|
||||
expected = Series({name: op(string_series) for name, op in ops.items()})
|
||||
expected.name = string_series.name
|
||||
result = getattr(string_series, how)(ops)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_apply_listlike_transformer(string_series, ops, names):
|
||||
# GH 39140
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([op(string_series) for op in ops], axis=1)
|
||||
expected.columns = names
|
||||
result = string_series.apply(ops)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops",
|
||||
[
|
||||
{"A": np.sqrt},
|
||||
{"A": np.sqrt, "B": np.exp},
|
||||
Series({"A": np.sqrt}),
|
||||
Series({"A": np.sqrt, "B": np.exp}),
|
||||
],
|
||||
)
|
||||
def test_apply_dictlike_transformer(string_series, ops):
|
||||
# GH 39140
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat({name: op(string_series) for name, op in ops.items()})
|
||||
expected.name = string_series.name
|
||||
result = string_series.apply(ops)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_retains_column_name():
|
||||
# GH 16380
|
||||
df = DataFrame({"x": range(3)}, Index(range(3), name="x"))
|
||||
result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y")))
|
||||
expected = DataFrame(
|
||||
[[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]],
|
||||
columns=Index(range(3), name="y"),
|
||||
index=Index(range(3), name="x"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,33 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_relabel_no_duplicated_method():
|
||||
# this is to test there is no duplicated method used in agg
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
|
||||
|
||||
result = df["A"].agg(foo="sum")
|
||||
expected = df["A"].agg({"foo": "sum"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["B"].agg(foo="min", bar="max")
|
||||
expected = df["B"].agg({"foo": "min", "bar": "max"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["B"].agg(foo=sum, bar=min, cat="max")
|
||||
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_relabel_duplicated_method():
|
||||
# this is to test with nested renaming, duplicated method can be used
|
||||
# if they are assigned with different new names
|
||||
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
|
||||
|
||||
result = df["A"].agg(foo="sum", bar="sum")
|
||||
expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df["B"].agg(foo=min, bar="min")
|
||||
expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,49 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
Series,
|
||||
concat,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ops, names",
|
||||
[
|
||||
([np.sqrt], ["sqrt"]),
|
||||
([np.abs, np.sqrt], ["absolute", "sqrt"]),
|
||||
(np.array([np.sqrt]), ["sqrt"]),
|
||||
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
|
||||
],
|
||||
)
|
||||
def test_transform_listlike(string_series, ops, names):
|
||||
# GH 35964
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([op(string_series) for op in ops], axis=1)
|
||||
expected.columns = names
|
||||
result = string_series.transform(ops)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [dict, Series])
|
||||
def test_transform_dictlike(string_series, box):
|
||||
# GH 35964
|
||||
with np.errstate(all="ignore"):
|
||||
expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1)
|
||||
expected.columns = ["foo", "bar"]
|
||||
result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs}))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform_dictlike_mixed():
|
||||
# GH 40018 - mix of lists and non-lists in values of a dictionary
|
||||
df = Series([1, 4])
|
||||
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
|
||||
expected = DataFrame(
|
||||
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
|
||||
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,302 @@
|
||||
from itertools import chain
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_number
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.base import maybe_normalize_deprecated_kernels
|
||||
from pandas.tests.apply.common import (
|
||||
frame_transform_kernels,
|
||||
series_transform_kernels,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
|
||||
@pytest.mark.parametrize(
|
||||
"args,kwds",
|
||||
[
|
||||
pytest.param([], {}, id="no_args_or_kwds"),
|
||||
pytest.param([1], {}, id="axis_from_args"),
|
||||
pytest.param([], {"axis": 1}, id="axis_from_kwds"),
|
||||
pytest.param([], {"numeric_only": True}, id="optional_kwds"),
|
||||
pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
|
||||
if len(args) > 1 and how == "agg":
|
||||
request.node.add_marker(
|
||||
pytest.mark.xfail(
|
||||
raises=TypeError,
|
||||
reason="agg/apply signature mismatch - agg passes 2nd "
|
||||
"argument to func",
|
||||
)
|
||||
)
|
||||
result = getattr(float_frame, how)(func, *args, **kwds)
|
||||
expected = getattr(float_frame, func)(*args, **kwds)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_with_string_args(datetime_series):
|
||||
|
||||
for arg in ["sum", "mean", "min", "max", "std"]:
|
||||
result = datetime_series.apply(arg)
|
||||
expected = getattr(datetime_series, arg)()
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
|
||||
@pytest.mark.parametrize("how", ["agg", "apply"])
|
||||
def test_apply_np_reducer(float_frame, op, how):
|
||||
# GH 39116
|
||||
float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
|
||||
result = getattr(float_frame, how)(op)
|
||||
# pandas ddof defaults to 1, numpy to 0
|
||||
kwargs = {"ddof": 1} if op in ("std", "var") else {}
|
||||
expected = Series(
|
||||
getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
|
||||
)
|
||||
@pytest.mark.parametrize("how", ["transform", "apply"])
|
||||
def test_apply_np_transformer(float_frame, op, how):
|
||||
# GH 39116
|
||||
|
||||
# float_frame will _usually_ have negative values, which will
|
||||
# trigger the warning here, but let's put one in just to be sure
|
||||
float_frame.iloc[0, 0] = -1.0
|
||||
warn = None
|
||||
if op in ["log", "sqrt"]:
|
||||
warn = RuntimeWarning
|
||||
|
||||
with tm.assert_produces_warning(warn):
|
||||
result = getattr(float_frame, how)(op)
|
||||
expected = getattr(np, op)(float_frame)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series(dtype=np.float64),
|
||||
[
|
||||
("sum", 0),
|
||||
("max", np.nan),
|
||||
("min", np.nan),
|
||||
("all", True),
|
||||
("any", False),
|
||||
("mean", np.nan),
|
||||
("prod", 1),
|
||||
("std", np.nan),
|
||||
("var", np.nan),
|
||||
("median", np.nan),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("sum", 6),
|
||||
("max", 3),
|
||||
("min", 1),
|
||||
("all", True),
|
||||
("any", True),
|
||||
("mean", 2),
|
||||
("prod", 6),
|
||||
("std", 1),
|
||||
("var", 1),
|
||||
("median", 2),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()),
|
||||
[
|
||||
("sum", "abc"),
|
||||
("max", "c"),
|
||||
("min", "a"),
|
||||
("all", True),
|
||||
("any", True),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_series(series, func, expected):
|
||||
# GH21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
result = series.agg(func)
|
||||
if is_number(expected):
|
||||
assert np.isclose(result, expected, equal_nan=True)
|
||||
else:
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
Series(dtype=np.float64),
|
||||
[
|
||||
("cumprod", Series([], Index([]), dtype=np.float64)),
|
||||
("cumsum", Series([], Index([]), dtype=np.float64)),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series([np.nan, 1, 2, 3]),
|
||||
[
|
||||
("cumprod", Series([np.nan, 1, 2, 6])),
|
||||
("cumsum", Series([np.nan, 1, 3, 6])),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform_series(series, func, expected):
|
||||
# GH21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
result = series.agg(func)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
DataFrame(),
|
||||
[
|
||||
("sum", Series(dtype="float64")),
|
||||
("max", Series(dtype="float64")),
|
||||
("min", Series(dtype="float64")),
|
||||
("all", Series(dtype=bool)),
|
||||
("any", Series(dtype=bool)),
|
||||
("mean", Series(dtype="float64")),
|
||||
("prod", Series(dtype="float64")),
|
||||
("std", Series(dtype="float64")),
|
||||
("var", Series(dtype="float64")),
|
||||
("median", Series(dtype="float64")),
|
||||
],
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([[np.nan, 1], [1, 2]]),
|
||||
[
|
||||
("sum", Series([1.0, 3])),
|
||||
("max", Series([1.0, 2])),
|
||||
("min", Series([1.0, 1])),
|
||||
("all", Series([True, True])),
|
||||
("any", Series([True, True])),
|
||||
("mean", Series([1, 1.5])),
|
||||
("prod", Series([1.0, 2])),
|
||||
("std", Series([np.nan, 0.707107])),
|
||||
("var", Series([np.nan, 0.5])),
|
||||
("median", Series([1, 1.5])),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
# test reducing functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table
|
||||
result = df.agg(func, axis=axis)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"df, func, expected",
|
||||
chain(
|
||||
tm.get_cython_table_params(
|
||||
DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]
|
||||
),
|
||||
tm.get_cython_table_params(
|
||||
DataFrame([[np.nan, 1], [1, 2]]),
|
||||
[
|
||||
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
|
||||
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
|
||||
],
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_agg_cython_table_transform_frame(df, func, expected, axis):
|
||||
# GH 21224
|
||||
# test transforming functions in
|
||||
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
|
||||
if axis == "columns" or axis == 1:
|
||||
# operating blockwise doesn't let us preserve dtypes
|
||||
expected = expected.astype("float64")
|
||||
|
||||
result = df.agg(func, axis=axis)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", series_transform_kernels)
|
||||
def test_transform_groupby_kernel_series(string_series, op):
|
||||
# GH 35964
|
||||
# TODO(2.0) Remove after pad/backfill deprecation enforced
|
||||
op = maybe_normalize_deprecated_kernels(op)
|
||||
args = [0.0] if op == "fillna" else []
|
||||
ones = np.ones(string_series.shape[0])
|
||||
expected = string_series.groupby(ones).transform(op, *args)
|
||||
result = string_series.transform(op, 0, *args)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("op", frame_transform_kernels)
|
||||
def test_transform_groupby_kernel_frame(
|
||||
axis, float_frame, op, using_array_manager, request
|
||||
):
|
||||
# TODO(2.0) Remove after pad/backfill deprecation enforced
|
||||
op = maybe_normalize_deprecated_kernels(op)
|
||||
# GH 35964
|
||||
if using_array_manager and op == "pct_change" and axis in (1, "columns"):
|
||||
# TODO(ArrayManager) shift with axis=1
|
||||
request.node.add_marker(
|
||||
pytest.mark.xfail(
|
||||
reason="shift axis=1 not yet implemented for ArrayManager"
|
||||
)
|
||||
)
|
||||
|
||||
args = [0.0] if op == "fillna" else []
|
||||
if axis == 0 or axis == "index":
|
||||
ones = np.ones(float_frame.shape[0])
|
||||
else:
|
||||
ones = np.ones(float_frame.shape[1])
|
||||
expected = float_frame.groupby(ones, axis=axis).transform(op, *args)
|
||||
result = float_frame.transform(op, axis, *args)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# same thing, but ensuring we have multiple blocks
|
||||
assert "E" not in float_frame.columns
|
||||
float_frame["E"] = float_frame["A"].copy()
|
||||
assert len(float_frame._mgr.arrays) > 1
|
||||
|
||||
if axis == 0 or axis == "index":
|
||||
ones = np.ones(float_frame.shape[0])
|
||||
else:
|
||||
ones = np.ones(float_frame.shape[1])
|
||||
expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args)
|
||||
result2 = float_frame.transform(op, axis, *args)
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
|
||||
def test_transform_method_name(method):
|
||||
# GH 19760
|
||||
df = DataFrame({"A": [-1, 2]})
|
||||
result = df.transform(method)
|
||||
expected = operator.methodcaller(method)(df)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,155 @@
|
||||
"""
|
||||
Assertion helpers for arithmetic tests.
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
BooleanArray,
|
||||
PandasArray,
|
||||
)
|
||||
|
||||
|
||||
def assert_cannot_add(left, right, msg="cannot add"):
|
||||
"""
|
||||
Helper to assert that left and right cannot be added.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : object
|
||||
right : object
|
||||
msg : str, default "cannot add"
|
||||
"""
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left + right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right + left
|
||||
|
||||
|
||||
def assert_invalid_addsub_type(left, right, msg=None):
|
||||
"""
|
||||
Helper to assert that left and right can be neither added nor subtracted.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : object
|
||||
right : object
|
||||
msg : str or None, default None
|
||||
"""
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left + right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right + left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left - right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right - left
|
||||
|
||||
|
||||
def get_upcast_box(left, right, is_cmp: bool = False):
|
||||
"""
|
||||
Get the box to use for 'expected' in an arithmetic or comparison operation.
|
||||
|
||||
Parameters
|
||||
left : Any
|
||||
right : Any
|
||||
is_cmp : bool, default False
|
||||
Whether the operation is a comparison method.
|
||||
"""
|
||||
|
||||
if isinstance(left, DataFrame) or isinstance(right, DataFrame):
|
||||
return DataFrame
|
||||
if isinstance(left, Series) or isinstance(right, Series):
|
||||
if is_cmp and isinstance(left, Index):
|
||||
# Index does not defer for comparisons
|
||||
return np.array
|
||||
return Series
|
||||
if isinstance(left, Index) or isinstance(right, Index):
|
||||
if is_cmp:
|
||||
return np.array
|
||||
return Index
|
||||
return tm.to_array
|
||||
|
||||
|
||||
def assert_invalid_comparison(left, right, box):
|
||||
"""
|
||||
Assert that comparison operations with mismatched types behave correctly.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left : np.ndarray, ExtensionArray, Index, or Series
|
||||
right : object
|
||||
box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array}
|
||||
"""
|
||||
# Not for tznaive-tzaware comparison
|
||||
|
||||
# Note: not quite the same as how we do this for tm.box_expected
|
||||
xbox = box if box not in [Index, array] else np.array
|
||||
|
||||
def xbox2(x):
|
||||
# Eventually we'd like this to be tighter, but for now we'll
|
||||
# just exclude PandasArray[bool]
|
||||
if isinstance(x, PandasArray):
|
||||
return x._ndarray
|
||||
if isinstance(x, BooleanArray):
|
||||
# NB: we are assuming no pd.NAs for now
|
||||
return x.astype(bool)
|
||||
return x
|
||||
|
||||
# rev_box: box to use for reversed comparisons
|
||||
rev_box = xbox
|
||||
if isinstance(right, Index) and isinstance(left, Series):
|
||||
rev_box = np.array
|
||||
|
||||
result = xbox2(left == right)
|
||||
expected = xbox(np.zeros(result.shape, dtype=np.bool_))
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
result = xbox2(right == left)
|
||||
tm.assert_equal(result, rev_box(expected))
|
||||
|
||||
result = xbox2(left != right)
|
||||
tm.assert_equal(result, ~expected)
|
||||
|
||||
result = xbox2(right != left)
|
||||
tm.assert_equal(result, rev_box(~expected))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"Invalid comparison between",
|
||||
"Cannot compare type",
|
||||
"not supported between",
|
||||
"invalid type promotion",
|
||||
(
|
||||
# GH#36706 npdev 1.20.0 2020-09-28
|
||||
r"The DTypes <class 'numpy.dtype\[datetime64\]'> and "
|
||||
r"<class 'numpy.dtype\[int64\]'> do not have a common DType. "
|
||||
"For example they cannot be stored in a single array unless the "
|
||||
"dtype is `object`."
|
||||
),
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left < right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left <= right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left > right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left >= right
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right < left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right <= left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right > left
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
right >= left
|
||||
@@ -0,0 +1,232 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import RangeIndex
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import (
|
||||
Float64Index,
|
||||
Int64Index,
|
||||
UInt64Index,
|
||||
)
|
||||
from pandas.core.computation import expressions as expr
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"]
|
||||
)
|
||||
def switch_numexpr_min_elements(request):
|
||||
_MIN_ELEMENTS = expr._MIN_ELEMENTS
|
||||
expr._MIN_ELEMENTS = request.param
|
||||
yield request.param
|
||||
expr._MIN_ELEMENTS = _MIN_ELEMENTS
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# doctest with +SKIP for one fixture fails during setup with
|
||||
# 'DoctestItem' object has no attribute 'callspec'
|
||||
# due to switch_numexpr_min_elements fixture
|
||||
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
|
||||
def one(request):
|
||||
"""
|
||||
Several variants of integer value 1. The zero-dim integer array
|
||||
behaves like an integer.
|
||||
|
||||
This fixture can be used to check that datetimelike indexes handle
|
||||
addition and subtraction of integers and zero-dimensional arrays
|
||||
of integers.
|
||||
|
||||
Examples
|
||||
--------
|
||||
dti = pd.date_range('2016-01-01', periods=2, freq='H')
|
||||
dti
|
||||
DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'],
|
||||
dtype='datetime64[ns]', freq='H')
|
||||
dti + one
|
||||
DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'],
|
||||
dtype='datetime64[ns]', freq='H')
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
zeros = [
|
||||
box_cls([0] * 5, dtype=dtype)
|
||||
for box_cls in [pd.Index, np.array, pd.array]
|
||||
for dtype in [np.int64, np.uint64, np.float64]
|
||||
]
|
||||
zeros.extend(
|
||||
[box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [pd.Index, np.array]]
|
||||
)
|
||||
zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]])
|
||||
zeros.extend([np.array(-0.0, dtype=np.float64)])
|
||||
zeros.extend([0, 0.0, -0.0])
|
||||
|
||||
|
||||
# doctest with +SKIP for zero fixture fails during setup with
|
||||
# 'DoctestItem' object has no attribute 'callspec'
|
||||
# due to switch_numexpr_min_elements fixture
|
||||
@pytest.fixture(params=zeros)
|
||||
def zero(request):
|
||||
"""
|
||||
Several types of scalar zeros and length 5 vectors of zeros.
|
||||
|
||||
This fixture can be used to check that numeric-dtype indexes handle
|
||||
division by any zero numeric-dtype.
|
||||
|
||||
Uses vector of length 5 for broadcasting with `numeric_idx` fixture,
|
||||
which creates numeric-dtype vectors also of length 5.
|
||||
|
||||
Examples
|
||||
--------
|
||||
arr = RangeIndex(5)
|
||||
arr / zeros
|
||||
Float64Index([nan, inf, inf, inf, inf], dtype='float64')
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Vector Fixtures
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
Float64Index(np.arange(5, dtype="float64")),
|
||||
Int64Index(np.arange(5, dtype="int64")),
|
||||
UInt64Index(np.arange(5, dtype="uint64")),
|
||||
RangeIndex(5),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def numeric_idx(request):
|
||||
"""
|
||||
Several types of numeric-dtypes Index objects
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Scalar Fixtures
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.Timedelta("10m7s").to_pytimedelta(),
|
||||
pd.Timedelta("10m7s"),
|
||||
pd.Timedelta("10m7s").to_timedelta64(),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def scalar_td(request):
|
||||
"""
|
||||
Several variants of Timedelta scalars representing 10 minutes and 7 seconds.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.offsets.Day(3),
|
||||
pd.offsets.Hour(72),
|
||||
pd.Timedelta(days=3).to_pytimedelta(),
|
||||
pd.Timedelta("72:00:00"),
|
||||
np.timedelta64(3, "D"),
|
||||
np.timedelta64(72, "h"),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def three_days(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset objects that each represent
|
||||
a 3-day timedelta
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.offsets.Hour(2),
|
||||
pd.offsets.Minute(120),
|
||||
pd.Timedelta(hours=2).to_pytimedelta(),
|
||||
pd.Timedelta(seconds=2 * 3600),
|
||||
np.timedelta64(2, "h"),
|
||||
np.timedelta64(120, "m"),
|
||||
],
|
||||
ids=lambda x: type(x).__name__,
|
||||
)
|
||||
def two_hours(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset objects that each represent
|
||||
a 2-hour timedelta
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
_common_mismatch = [
|
||||
pd.offsets.YearBegin(2),
|
||||
pd.offsets.MonthBegin(1),
|
||||
pd.offsets.Minute(),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
pd.Timedelta(minutes=30).to_pytimedelta(),
|
||||
np.timedelta64(30, "s"),
|
||||
pd.Timedelta(seconds=30),
|
||||
]
|
||||
+ _common_mismatch
|
||||
)
|
||||
def not_hourly(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset instances that are _not_
|
||||
compatible with Hourly frequencies.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.timedelta64(4, "h"),
|
||||
pd.Timedelta(hours=23).to_pytimedelta(),
|
||||
pd.Timedelta("23:00:00"),
|
||||
]
|
||||
+ _common_mismatch
|
||||
)
|
||||
def not_daily(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset instances that are _not_
|
||||
compatible with Daily frequencies.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.timedelta64(365, "D"),
|
||||
pd.Timedelta(days=365).to_pytimedelta(),
|
||||
pd.Timedelta(days=365),
|
||||
]
|
||||
+ _common_mismatch
|
||||
)
|
||||
def mismatched_freq(request):
|
||||
"""
|
||||
Several timedelta-like and DateOffset instances that are _not_
|
||||
compatible with Monthly or Annual frequencies.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__
|
||||
)
|
||||
def box_1d_array(request):
|
||||
"""
|
||||
Fixture to test behavior for Index, Series, tm.to_array, numpy Array and list
|
||||
classes
|
||||
"""
|
||||
return request.param
|
||||
@@ -0,0 +1,39 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._testing as tm
|
||||
from pandas.core.ops.array_ops import (
|
||||
comparison_op,
|
||||
na_logical_op,
|
||||
)
|
||||
|
||||
|
||||
def test_na_logical_op_2d():
|
||||
left = np.arange(8).reshape(4, 2)
|
||||
right = left.astype(object)
|
||||
right[0, 0] = np.nan
|
||||
|
||||
# Check that we fall back to the vec_binop branch
|
||||
with pytest.raises(TypeError, match="unsupported operand type"):
|
||||
operator.or_(left, right)
|
||||
|
||||
result = na_logical_op(left, right, operator.or_)
|
||||
expected = right
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_object_comparison_2d():
|
||||
left = np.arange(9).reshape(3, 3).astype(object)
|
||||
right = left.T
|
||||
|
||||
result = comparison_op(left, right, operator.eq)
|
||||
expected = np.eye(3).astype(bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Ensure that cython doesn't raise on non-writeable arg, which
|
||||
# we can get from np.broadcast_to
|
||||
right.flags.writeable = False
|
||||
result = comparison_op(left, right, operator.ne)
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
@@ -0,0 +1,25 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalComparisons:
|
||||
def test_categorical_nan_equality(self):
|
||||
cat = Series(Categorical(["a", "b", "c", np.nan]))
|
||||
expected = Series([True, True, True, False])
|
||||
result = cat == cat
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_categorical_tuple_equality(self):
|
||||
# GH 18050
|
||||
ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)])
|
||||
expected = Series([True, False, True, False, False])
|
||||
result = ser == (0, 0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.astype("category") == (0, 0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,316 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_list_like
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Period,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import (
|
||||
BooleanArray,
|
||||
IntervalArray,
|
||||
)
|
||||
from pandas.tests.arithmetic.common import get_upcast_box
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(Index([0, 2, 4, 4]), Index([1, 3, 5, 8])),
|
||||
(Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])),
|
||||
(
|
||||
timedelta_range("0 days", periods=3).insert(3, pd.NaT),
|
||||
timedelta_range("1 day", periods=3).insert(3, pd.NaT),
|
||||
),
|
||||
(
|
||||
date_range("20170101", periods=3).insert(3, pd.NaT),
|
||||
date_range("20170102", periods=3).insert(3, pd.NaT),
|
||||
),
|
||||
(
|
||||
date_range("20170101", periods=3, tz="US/Eastern").insert(3, pd.NaT),
|
||||
date_range("20170102", periods=3, tz="US/Eastern").insert(3, pd.NaT),
|
||||
),
|
||||
],
|
||||
ids=lambda x: str(x[0].dtype),
|
||||
)
|
||||
def left_right_dtypes(request):
|
||||
"""
|
||||
Fixture for building an IntervalArray from various dtypes
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def interval_array(left_right_dtypes):
|
||||
"""
|
||||
Fixture to generate an IntervalArray of various dtypes containing NA if possible
|
||||
"""
|
||||
left, right = left_right_dtypes
|
||||
return IntervalArray.from_arrays(left, right)
|
||||
|
||||
|
||||
def create_categorical_intervals(left, right, closed="right"):
|
||||
return Categorical(IntervalIndex.from_arrays(left, right, closed))
|
||||
|
||||
|
||||
def create_series_intervals(left, right, closed="right"):
|
||||
return Series(IntervalArray.from_arrays(left, right, closed))
|
||||
|
||||
|
||||
def create_series_categorical_intervals(left, right, closed="right"):
|
||||
return Series(Categorical(IntervalIndex.from_arrays(left, right, closed)))
|
||||
|
||||
|
||||
class TestComparison:
|
||||
@pytest.fixture(params=[operator.eq, operator.ne])
|
||||
def op(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
IntervalArray.from_arrays,
|
||||
IntervalIndex.from_arrays,
|
||||
create_categorical_intervals,
|
||||
create_series_intervals,
|
||||
create_series_categorical_intervals,
|
||||
],
|
||||
ids=[
|
||||
"IntervalArray",
|
||||
"IntervalIndex",
|
||||
"Categorical[Interval]",
|
||||
"Series[Interval]",
|
||||
"Series[Categorical[Interval]]",
|
||||
],
|
||||
)
|
||||
def interval_constructor(self, request):
|
||||
"""
|
||||
Fixture for all pandas native interval constructors.
|
||||
To be used as the LHS of IntervalArray comparisons.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
def elementwise_comparison(self, op, interval_array, other):
|
||||
"""
|
||||
Helper that performs elementwise comparisons between `array` and `other`
|
||||
"""
|
||||
other = other if is_list_like(other) else [other] * len(interval_array)
|
||||
expected = np.array([op(x, y) for x, y in zip(interval_array, other)])
|
||||
if isinstance(other, Series):
|
||||
return Series(expected, index=other.index)
|
||||
return expected
|
||||
|
||||
def test_compare_scalar_interval(self, op, interval_array):
|
||||
# matches first interval
|
||||
other = interval_array[0]
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# matches on a single endpoint but not both
|
||||
other = Interval(interval_array.left[0], interval_array.right[1])
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed):
|
||||
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
|
||||
other = Interval(0, 1, closed=other_closed)
|
||||
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_scalar_na(
|
||||
self, op, interval_array, nulls_fixture, box_with_array, request
|
||||
):
|
||||
box = box_with_array
|
||||
|
||||
if box is pd.DataFrame:
|
||||
if interval_array.dtype.subtype.kind not in "iuf":
|
||||
mark = pytest.mark.xfail(
|
||||
reason="raises on DataFrame.transpose (would be fixed by EA2D)"
|
||||
)
|
||||
request.node.add_marker(mark)
|
||||
|
||||
obj = tm.box_expected(interval_array, box)
|
||||
result = op(obj, nulls_fixture)
|
||||
|
||||
if nulls_fixture is pd.NA:
|
||||
# GH#31882
|
||||
exp = np.ones(interval_array.shape, dtype=bool)
|
||||
expected = BooleanArray(exp, exp)
|
||||
else:
|
||||
expected = self.elementwise_comparison(op, interval_array, nulls_fixture)
|
||||
|
||||
if not (box is Index and nulls_fixture is pd.NA):
|
||||
# don't cast expected from BooleanArray to ndarray[object]
|
||||
xbox = get_upcast_box(obj, nulls_fixture, True)
|
||||
expected = tm.box_expected(expected, xbox)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
rev = op(nulls_fixture, obj)
|
||||
tm.assert_equal(rev, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
0,
|
||||
1.0,
|
||||
True,
|
||||
"foo",
|
||||
Timestamp("2017-01-01"),
|
||||
Timestamp("2017-01-01", tz="US/Eastern"),
|
||||
Timedelta("0 days"),
|
||||
Period("2017-01-01", "D"),
|
||||
],
|
||||
)
|
||||
def test_compare_scalar_other(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_interval(self, op, interval_array, interval_constructor):
|
||||
# same endpoints
|
||||
other = interval_constructor(interval_array.left, interval_array.right)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# different endpoints
|
||||
other = interval_constructor(
|
||||
interval_array.left[::-1], interval_array.right[::-1]
|
||||
)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# all nan endpoints
|
||||
other = interval_constructor([np.nan] * 4, [np.nan] * 4)
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_interval_mixed_closed(
|
||||
self, op, interval_constructor, closed, other_closed
|
||||
):
|
||||
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
|
||||
other = interval_constructor(range(2), range(1, 3), closed=other_closed)
|
||||
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
(
|
||||
Interval(0, 1),
|
||||
Interval(Timedelta("1 day"), Timedelta("2 days")),
|
||||
Interval(4, 5, "both"),
|
||||
Interval(10, 20, "neither"),
|
||||
),
|
||||
(0, 1.5, Timestamp("20170103"), np.nan),
|
||||
(
|
||||
Timestamp("20170102", tz="US/Eastern"),
|
||||
Timedelta("2 days"),
|
||||
"baz",
|
||||
pd.NaT,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_compare_list_like_object(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_compare_list_like_nan(self, op, interval_array, nulls_fixture):
|
||||
other = [nulls_fixture] * 4
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
np.arange(4, dtype="int64"),
|
||||
np.arange(4, dtype="float64"),
|
||||
date_range("2017-01-01", periods=4),
|
||||
date_range("2017-01-01", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
period_range("2017-01-01", periods=4, freq="D"),
|
||||
Categorical(list("abab")),
|
||||
Categorical(date_range("2017-01-01", periods=4)),
|
||||
pd.array(list("abcd")),
|
||||
pd.array(["foo", 3.14, None, object()], dtype=object),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_compare_list_like_other(self, op, interval_array, other):
|
||||
result = op(interval_array, other)
|
||||
expected = self.elementwise_comparison(op, interval_array, other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("length", [1, 3, 5])
|
||||
@pytest.mark.parametrize("other_constructor", [IntervalArray, list])
|
||||
def test_compare_length_mismatch_errors(self, op, other_constructor, length):
|
||||
interval_array = IntervalArray.from_arrays(range(4), range(1, 5))
|
||||
other = other_constructor([Interval(0, 1)] * length)
|
||||
with pytest.raises(ValueError, match="Lengths must match to compare"):
|
||||
op(interval_array, other)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"constructor, expected_type, assert_func",
|
||||
[
|
||||
(IntervalIndex, np.array, tm.assert_numpy_array_equal),
|
||||
(Series, Series, tm.assert_series_equal),
|
||||
],
|
||||
)
|
||||
def test_index_series_compat(self, op, constructor, expected_type, assert_func):
|
||||
# IntervalIndex/Series that rely on IntervalArray for comparisons
|
||||
breaks = range(4)
|
||||
index = constructor(IntervalIndex.from_breaks(breaks))
|
||||
|
||||
# scalar comparisons
|
||||
other = index[0]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
other = breaks[0]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
# list-like comparisons
|
||||
other = IntervalArray.from_breaks(breaks)
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
other = [index[0], breaks[0], "foo"]
|
||||
result = op(index, other)
|
||||
expected = expected_type(self.elementwise_comparison(op, index, other))
|
||||
assert_func(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None])
|
||||
def test_comparison_operations(self, scalars):
|
||||
# GH #28981
|
||||
expected = Series([False, False])
|
||||
s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval")
|
||||
result = s == scalars
|
||||
tm.assert_series_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,379 @@
|
||||
# Arithmetic tests for DataFrame/Series/Index/Array classes that should
|
||||
# behave identically.
|
||||
# Specifically for object dtype
|
||||
import datetime
|
||||
from decimal import Decimal
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core import ops
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Comparisons
|
||||
|
||||
|
||||
class TestObjectComparisons:
|
||||
def test_comparison_object_numeric_nas(self, comparison_op):
|
||||
ser = Series(np.random.randn(10), dtype=object)
|
||||
shifted = ser.shift(2)
|
||||
|
||||
func = comparison_op
|
||||
|
||||
result = func(ser, shifted)
|
||||
expected = func(ser.astype(float), shifted.astype(float))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_object_comparisons(self):
|
||||
ser = Series(["a", "b", np.nan, "c", "a"])
|
||||
|
||||
result = ser == "a"
|
||||
expected = Series([True, False, False, False, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser < "a"
|
||||
expected = Series([False, False, False, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser != "a"
|
||||
expected = -(ser == "a")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_more_na_comparisons(self, dtype):
|
||||
left = Series(["a", np.nan, "c"], dtype=dtype)
|
||||
right = Series(["a", np.nan, "d"], dtype=dtype)
|
||||
|
||||
result = left == right
|
||||
expected = Series([True, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left != right
|
||||
expected = Series([False, True, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left == np.nan
|
||||
expected = Series([False, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = left != np.nan
|
||||
expected = Series([True, True, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Arithmetic
|
||||
|
||||
|
||||
class TestArithmetic:
|
||||
|
||||
# TODO: parametrize
|
||||
def test_pow_ops_object(self):
|
||||
# GH#22922
|
||||
# pow is weird with masking & 1, so testing here
|
||||
a = Series([1, np.nan, 1, np.nan], dtype=object)
|
||||
b = Series([1, np.nan, np.nan, 1], dtype=object)
|
||||
result = a**b
|
||||
expected = Series(a.values**b.values, dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = b**a
|
||||
expected = Series(b.values**a.values, dtype=object)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.add, ops.radd])
|
||||
@pytest.mark.parametrize("other", ["category", "Int64"])
|
||||
def test_add_extension_scalar(self, other, box_with_array, op):
|
||||
# GH#22378
|
||||
# Check that scalars satisfying is_extension_array_dtype(obj)
|
||||
# do not incorrectly try to dispatch to an ExtensionArray operation
|
||||
|
||||
arr = Series(["a", "b", "c"])
|
||||
expected = Series([op(x, other) for x in arr])
|
||||
|
||||
arr = tm.box_expected(arr, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = op(arr, other)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_objarr_add_str(self, box_with_array):
|
||||
ser = Series(["x", np.nan, "x"])
|
||||
expected = Series(["xa", np.nan, "xa"])
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = ser + "a"
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
def test_objarr_radd_str(self, box_with_array):
|
||||
ser = Series(["x", np.nan, "x"])
|
||||
expected = Series(["ax", np.nan, "ax"])
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
expected = tm.box_expected(expected, box_with_array)
|
||||
|
||||
result = "a" + ser
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
[1, 2, 3],
|
||||
[1.1, 2.2, 3.3],
|
||||
[Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
|
||||
["x", "y", 1],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
|
||||
ser = Series(data, dtype=dtype)
|
||||
|
||||
ser = tm.box_expected(ser, box_with_array)
|
||||
msg = "|".join(
|
||||
[
|
||||
"can only concatenate str",
|
||||
"did not contain a loop with signature matching types",
|
||||
"unsupported operand type",
|
||||
"must be str",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
"foo_" + ser
|
||||
|
||||
@pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
|
||||
def test_objarr_add_invalid(self, op, box_with_array):
|
||||
# invalid ops
|
||||
box = box_with_array
|
||||
|
||||
obj_ser = tm.makeObjectSeries()
|
||||
obj_ser.name = "objects"
|
||||
|
||||
obj_ser = tm.box_expected(obj_ser, box)
|
||||
msg = "|".join(
|
||||
["can only concatenate str", "unsupported operand type", "must be str"]
|
||||
)
|
||||
with pytest.raises(Exception, match=msg):
|
||||
op(obj_ser, 1)
|
||||
with pytest.raises(Exception, match=msg):
|
||||
op(obj_ser, np.array(1, dtype=np.int64))
|
||||
|
||||
# TODO: Moved from tests.series.test_operators; needs cleanup
|
||||
def test_operators_na_handling(self):
|
||||
ser = Series(["foo", "bar", "baz", np.nan])
|
||||
result = "prefix_" + ser
|
||||
expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + "_suffix"
|
||||
expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# TODO: parametrize over box
|
||||
@pytest.mark.parametrize("dtype", [None, object])
|
||||
def test_series_with_dtype_radd_timedelta(self, dtype):
|
||||
# note this test is _not_ aimed at timedelta64-dtyped Series
|
||||
ser = Series(
|
||||
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
|
||||
dtype=dtype,
|
||||
)
|
||||
expected = Series(
|
||||
[pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")]
|
||||
)
|
||||
|
||||
result = pd.Timedelta("3 days") + ser
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser + pd.Timedelta("3 days")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# TODO: cleanup & parametrize over box
|
||||
def test_mixed_timezone_series_ops_object(self):
|
||||
# GH#13043
|
||||
ser = Series(
|
||||
[
|
||||
Timestamp("2015-01-01", tz="US/Eastern"),
|
||||
Timestamp("2015-01-01", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
assert ser.dtype == object
|
||||
|
||||
exp = Series(
|
||||
[
|
||||
Timestamp("2015-01-02", tz="US/Eastern"),
|
||||
Timestamp("2015-01-02", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
|
||||
tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
|
||||
|
||||
# object series & object series
|
||||
ser2 = Series(
|
||||
[
|
||||
Timestamp("2015-01-03", tz="US/Eastern"),
|
||||
Timestamp("2015-01-05", tz="Asia/Tokyo"),
|
||||
],
|
||||
name="xxx",
|
||||
)
|
||||
assert ser2.dtype == object
|
||||
exp = Series([pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx")
|
||||
tm.assert_series_equal(ser2 - ser, exp)
|
||||
tm.assert_series_equal(ser - ser2, -exp)
|
||||
|
||||
ser = Series(
|
||||
[pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
|
||||
name="xxx",
|
||||
dtype=object,
|
||||
)
|
||||
assert ser.dtype == object
|
||||
|
||||
exp = Series([pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], name="xxx")
|
||||
tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
|
||||
tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
|
||||
|
||||
# TODO: cleanup & parametrize over box
|
||||
def test_iadd_preserves_name(self):
|
||||
# GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
|
||||
ser = Series([1, 2, 3])
|
||||
ser.index.name = "foo"
|
||||
|
||||
ser.index += 1
|
||||
assert ser.index.name == "foo"
|
||||
|
||||
ser.index -= 1
|
||||
assert ser.index.name == "foo"
|
||||
|
||||
def test_add_string(self):
|
||||
# from bug report
|
||||
index = pd.Index(["a", "b", "c"])
|
||||
index2 = index + "foo"
|
||||
|
||||
assert "a" not in index2
|
||||
assert "afoo" in index2
|
||||
|
||||
def test_iadd_string(self):
|
||||
index = pd.Index(["a", "b", "c"])
|
||||
# doesn't fail test unless there is a check before `+=`
|
||||
assert "a" in index
|
||||
|
||||
index += "_x"
|
||||
assert "a_x" in index
|
||||
|
||||
def test_add(self):
|
||||
index = tm.makeStringIndex(100)
|
||||
expected = pd.Index(index.values * 2)
|
||||
tm.assert_index_equal(index + index, expected)
|
||||
tm.assert_index_equal(index + index.tolist(), expected)
|
||||
tm.assert_index_equal(index.tolist() + index, expected)
|
||||
|
||||
# test add and radd
|
||||
index = pd.Index(list("abc"))
|
||||
expected = pd.Index(["a1", "b1", "c1"])
|
||||
tm.assert_index_equal(index + "1", expected)
|
||||
expected = pd.Index(["1a", "1b", "1c"])
|
||||
tm.assert_index_equal("1" + index, expected)
|
||||
|
||||
def test_sub_fail(self):
|
||||
index = tm.makeStringIndex(100)
|
||||
|
||||
msg = "unsupported operand type|Cannot broadcast"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - "a"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - index
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - index.tolist()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.tolist() - index
|
||||
|
||||
def test_sub_object(self):
|
||||
# GH#19369
|
||||
index = pd.Index([Decimal(1), Decimal(2)])
|
||||
expected = pd.Index([Decimal(0), Decimal(1)])
|
||||
|
||||
result = index - Decimal(1)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index - pd.Index([Decimal(1), Decimal(1)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "unsupported operand type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - "foo"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index - np.array([2, "foo"], dtype=object)
|
||||
|
||||
def test_rsub_object(self, fixed_now_ts):
|
||||
# GH#19369
|
||||
index = pd.Index([Decimal(1), Decimal(2)])
|
||||
expected = pd.Index([Decimal(1), Decimal(0)])
|
||||
|
||||
result = Decimal(2) - index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = np.array([Decimal(2), Decimal(2)]) - index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "unsupported operand type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
"foo" - index
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
np.array([True, fixed_now_ts]) - index
|
||||
|
||||
|
||||
class MyIndex(pd.Index):
|
||||
# Simple index subclass that tracks ops calls.
|
||||
|
||||
_calls: int
|
||||
|
||||
@classmethod
|
||||
def _simple_new(cls, values, name=None, dtype=None):
|
||||
result = object.__new__(cls)
|
||||
result._data = values
|
||||
result._name = name
|
||||
result._calls = 0
|
||||
result._reset_identity()
|
||||
|
||||
return result
|
||||
|
||||
def __add__(self, other):
|
||||
self._calls += 1
|
||||
return self._simple_new(self._data)
|
||||
|
||||
def __radd__(self, other):
|
||||
return self.__add__(other)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other",
|
||||
[
|
||||
[datetime.timedelta(1), datetime.timedelta(2)],
|
||||
[datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)],
|
||||
[pd.Period("2000"), pd.Period("2001")],
|
||||
["a", "b"],
|
||||
],
|
||||
ids=["timedelta", "datetime", "period", "object"],
|
||||
)
|
||||
def test_index_ops_defer_to_unknown_subclasses(other):
|
||||
# https://github.com/pandas-dev/pandas/issues/31109
|
||||
values = np.array(
|
||||
[datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object
|
||||
)
|
||||
a = MyIndex._simple_new(values)
|
||||
other = pd.Index(other)
|
||||
result = other + a
|
||||
assert isinstance(result, MyIndex)
|
||||
assert a._calls == 1
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,121 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import FloatingArray
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def left_array():
|
||||
return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def right_array():
|
||||
return pd.array([True, False, None] * 3, dtype="boolean")
|
||||
|
||||
|
||||
# Basic test for the arithmetic array ops
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname, exp",
|
||||
[
|
||||
("add", [True, True, None, True, False, None, None, None, None]),
|
||||
("mul", [True, False, None, False, False, None, None, None, None]),
|
||||
],
|
||||
ids=["add", "mul"],
|
||||
)
|
||||
def test_add_mul(left_array, right_array, opname, exp):
|
||||
op = getattr(operator, opname)
|
||||
result = op(left_array, right_array)
|
||||
expected = pd.array(exp, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_sub(left_array, right_array):
|
||||
msg = (
|
||||
r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), "
|
||||
r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\."
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
left_array - right_array
|
||||
|
||||
|
||||
def test_div(left_array, right_array):
|
||||
result = left_array / right_array
|
||||
expected = FloatingArray(
|
||||
np.array(
|
||||
[1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
|
||||
dtype="float64",
|
||||
),
|
||||
np.array([False, False, True, False, False, True, True, True, True]),
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"opname",
|
||||
[
|
||||
"floordiv",
|
||||
"mod",
|
||||
pytest.param(
|
||||
"pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour? GH34686")
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_op_int8(left_array, right_array, opname):
|
||||
op = getattr(operator, opname)
|
||||
result = op(left_array, right_array)
|
||||
expected = op(left_array.astype("Int8"), right_array.astype("Int8"))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
# Test generic characteristics / errors
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_error_invalid_values(data, all_arithmetic_operators):
|
||||
# invalid ops
|
||||
|
||||
op = all_arithmetic_operators
|
||||
s = pd.Series(data)
|
||||
ops = getattr(s, op)
|
||||
|
||||
# invalid scalars
|
||||
msg = (
|
||||
"did not contain a loop with signature matching types|"
|
||||
"BooleanArray cannot perform the operation|"
|
||||
"not supported for the input types, and the inputs could not be safely coerced "
|
||||
"to any supported types according to the casting rule ''safe''"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops("foo")
|
||||
msg = (
|
||||
r"unsupported operand type\(s\) for|"
|
||||
"Concatenation operation is not implemented for NumPy arrays"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops(pd.Timestamp("20180101"))
|
||||
|
||||
# invalid array-likes
|
||||
if op not in ("__mul__", "__rmul__"):
|
||||
# TODO(extension) numpy's mul with object array sees booleans as numbers
|
||||
msg = (
|
||||
r"unsupported operand type\(s\) for|can only concatenate str|"
|
||||
"not all arguments converted during string formatting"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ops(pd.Series("foo", index=s.index))
|
||||
@@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype():
|
||||
# with missing values
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert NA to integer"):
|
||||
arr.astype("int64")
|
||||
|
||||
with pytest.raises(ValueError, match="cannot convert float NaN to"):
|
||||
arr.astype("bool")
|
||||
|
||||
result = arr.astype("float64")
|
||||
expected = np.array([1, 0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("str")
|
||||
expected = np.array(["True", "False", "<NA>"], dtype="<U5")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.astype("int64")
|
||||
expected = np.array([1, 0, 1], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.astype("bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_to_boolean_array():
|
||||
# astype to BooleanArray
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = arr.astype("boolean")
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
result = arr.astype(pd.BooleanDtype())
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
|
||||
|
||||
def test_astype_to_integer_array():
|
||||
# astype to IntegerArray
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
result = arr.astype("Int64")
|
||||
expected = pd.array([1, 0, None], dtype="Int64")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
@@ -0,0 +1,58 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.tests.arrays.masked_shared import ComparisonOps
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dtype():
|
||||
return pd.BooleanDtype()
|
||||
|
||||
|
||||
class TestComparisonOps(ComparisonOps):
|
||||
def test_compare_scalar(self, data, comparison_op):
|
||||
self._compare_other(data, comparison_op, True)
|
||||
|
||||
def test_compare_array(self, data, comparison_op):
|
||||
other = pd.array([True] * len(data), dtype="boolean")
|
||||
self._compare_other(data, comparison_op, other)
|
||||
other = np.array([True] * len(data))
|
||||
self._compare_other(data, comparison_op, other)
|
||||
other = pd.Series([True] * len(data))
|
||||
self._compare_other(data, comparison_op, other)
|
||||
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA])
|
||||
def test_scalar(self, other, comparison_op, dtype):
|
||||
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
|
||||
|
||||
def test_array(self, comparison_op):
|
||||
op = comparison_op
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
|
||||
result = op(a, b)
|
||||
|
||||
values = op(a._data, b._data)
|
||||
mask = a._mask | b._mask
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
result[0] = None
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
@@ -0,0 +1,323 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.core.arrays.boolean import coerce_to_array
|
||||
|
||||
|
||||
def test_boolean_array_constructor():
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = BooleanArray(values, mask)
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(TypeError, match="values should be boolean numpy array"):
|
||||
BooleanArray(values.tolist(), mask)
|
||||
|
||||
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
|
||||
BooleanArray(values, mask.tolist())
|
||||
|
||||
with pytest.raises(TypeError, match="values should be boolean numpy array"):
|
||||
BooleanArray(values.astype(int), mask)
|
||||
|
||||
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
|
||||
BooleanArray(values, None)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
|
||||
BooleanArray(values.reshape(1, -1), mask)
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
|
||||
BooleanArray(values, mask.reshape(1, -1))
|
||||
|
||||
|
||||
def test_boolean_array_constructor_copy():
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
result = BooleanArray(values, mask)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
|
||||
result = BooleanArray(values, mask, copy=True)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
|
||||
def test_to_boolean_array():
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True]), np.array([False, False, False])
|
||||
)
|
||||
|
||||
result = pd.array([True, False, True], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, True]), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True]), np.array([False, False, True])
|
||||
)
|
||||
|
||||
result = pd.array([True, False, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_all_none():
|
||||
expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
|
||||
|
||||
result = pd.array([None, None, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b",
|
||||
[
|
||||
([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
|
||||
([True, np.nan], [True, None]),
|
||||
([True, pd.NA], [True, None]),
|
||||
([np.nan, np.nan], [None, None]),
|
||||
(np.array([np.nan, np.nan], dtype=float), [None, None]),
|
||||
],
|
||||
)
|
||||
def test_to_boolean_array_missing_indicators(a, b):
|
||||
result = pd.array(a, dtype="boolean")
|
||||
expected = pd.array(b, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
["foo", "bar"],
|
||||
["1", "2"],
|
||||
# "foo",
|
||||
[1, 2],
|
||||
[1.0, 2.0],
|
||||
pd.date_range("20130101", periods=2),
|
||||
np.array(["foo"]),
|
||||
np.array([1, 2]),
|
||||
np.array([1.0, 2.0]),
|
||||
[np.nan, {"a": 1}],
|
||||
],
|
||||
)
|
||||
def test_to_boolean_array_error(values):
|
||||
# error in converting existing arrays to BooleanArray
|
||||
msg = "Need to pass bool-like value"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.array(values, dtype="boolean")
|
||||
|
||||
|
||||
def test_to_boolean_array_from_integer_array():
|
||||
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_from_float_array():
|
||||
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_integer_like():
|
||||
# integers of 0's and 1's
|
||||
result = pd.array([1, 0, 1, 0], dtype="boolean")
|
||||
expected = pd.array([True, False, True, False], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# with missing values
|
||||
result = pd.array([1, 0, 1, None], dtype="boolean")
|
||||
expected = pd.array([True, False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_coerce_to_array():
|
||||
# TODO this is currently not public API
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask))
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
assert result._data is values
|
||||
assert result._mask is mask
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
|
||||
expected = BooleanArray(values, mask)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
assert result._data is not values
|
||||
assert result._mask is not mask
|
||||
|
||||
# mixed missing from values and mask
|
||||
values = [True, False, None, False]
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask))
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True, True]), np.array([False, False, True, True])
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# raise errors for wrong dimension
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
|
||||
coerce_to_array(values.reshape(1, -1))
|
||||
|
||||
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
|
||||
coerce_to_array(values, mask=mask.reshape(1, -1))
|
||||
|
||||
|
||||
def test_coerce_to_array_from_boolean_array():
|
||||
# passing BooleanArray to coerce_to_array
|
||||
values = np.array([True, False, True, False], dtype="bool")
|
||||
mask = np.array([False, False, False, True], dtype="bool")
|
||||
arr = BooleanArray(values, mask)
|
||||
result = BooleanArray(*coerce_to_array(arr))
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
# no copy
|
||||
assert result._data is arr._data
|
||||
assert result._mask is arr._mask
|
||||
|
||||
result = BooleanArray(*coerce_to_array(arr), copy=True)
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
assert result._data is not arr._data
|
||||
assert result._mask is not arr._mask
|
||||
|
||||
with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
|
||||
coerce_to_array(arr, mask=mask)
|
||||
|
||||
|
||||
def test_coerce_to_numpy_array():
|
||||
# with missing values -> object dtype
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
result = np.array(arr)
|
||||
expected = np.array([True, False, pd.NA], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# also with no missing values -> object dtype
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = np.array(arr)
|
||||
expected = np.array([True, False, True], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# force bool dtype
|
||||
result = np.array(arr, dtype="bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
# with missing values will raise error
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
msg = (
|
||||
"cannot convert to 'bool'-dtype NumPy array with missing values. "
|
||||
"Specify an appropriate 'na_value' for this dtype."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.array(arr, dtype="bool")
|
||||
|
||||
|
||||
def test_to_boolean_array_from_strings():
|
||||
result = BooleanArray._from_sequence_of_strings(
|
||||
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object)
|
||||
)
|
||||
expected = BooleanArray(
|
||||
np.array([True, False, True, True, False, False, False]),
|
||||
np.array([False, False, False, False, False, False, True]),
|
||||
)
|
||||
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_boolean_array_from_strings_invalid_string():
|
||||
with pytest.raises(ValueError, match="cannot be cast"):
|
||||
BooleanArray._from_sequence_of_strings(["donkey"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
|
||||
def test_to_numpy(box):
|
||||
con = pd.Series if box else pd.array
|
||||
# default (with or without missing values) -> object dtype
|
||||
arr = con([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([True, False, True], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy()
|
||||
expected = np.array([True, False, pd.NA], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy(dtype="str")
|
||||
expected = np.array([True, False, pd.NA], dtype="<U5")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# no missing values -> can convert to bool, otherwise raises
|
||||
arr = con([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype="bool")
|
||||
expected = np.array([True, False, True], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
|
||||
result = arr.to_numpy(dtype="bool")
|
||||
|
||||
# specify dtype and na_value
|
||||
arr = con([True, False, None], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=object, na_value=None)
|
||||
expected = np.array([True, False, None], dtype="object")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype=bool, na_value=False)
|
||||
expected = np.array([True, False, False], dtype="bool")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="int64", na_value=-99)
|
||||
expected = np.array([1, 0, -99], dtype="int64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = arr.to_numpy(dtype="float64", na_value=np.nan)
|
||||
expected = np.array([1, 0, np.nan], dtype="float64")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# converting to int or float without specifying na_value raises
|
||||
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
|
||||
arr.to_numpy(dtype="int64")
|
||||
with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
|
||||
arr.to_numpy(dtype="float64")
|
||||
|
||||
|
||||
def test_to_numpy_copy():
|
||||
# to_numpy can be zero-copy if no missing values
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=bool)
|
||||
result[0] = False
|
||||
tm.assert_extension_array_equal(
|
||||
arr, pd.array([False, False, True], dtype="boolean")
|
||||
)
|
||||
|
||||
arr = pd.array([True, False, True], dtype="boolean")
|
||||
result = arr.to_numpy(dtype=bool, copy=True)
|
||||
result[0] = False
|
||||
tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))
|
||||
@@ -0,0 +1,126 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor]
|
||||
)
|
||||
def test_ufuncs_binary(ufunc):
|
||||
# two BooleanArrays
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = ufunc(a, a)
|
||||
expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
s = pd.Series(a)
|
||||
result = ufunc(s, a)
|
||||
expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Boolean with numpy array
|
||||
arr = np.array([True, True, False])
|
||||
result = ufunc(a, arr)
|
||||
expected = pd.array(ufunc(a._data, arr), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(arr, a)
|
||||
expected = pd.array(ufunc(arr, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# BooleanArray with scalar
|
||||
result = ufunc(a, True)
|
||||
expected = pd.array(ufunc(a._data, True), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = ufunc(True, a)
|
||||
expected = pd.array(ufunc(True, a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# not handled types
|
||||
msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ufunc(a, "test")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ufunc", [np.logical_not])
|
||||
def test_ufuncs_unary(ufunc):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = ufunc(a)
|
||||
expected = pd.array(ufunc(a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
ser = pd.Series(a)
|
||||
result = ufunc(ser)
|
||||
expected = pd.Series(ufunc(a._data), dtype="boolean")
|
||||
expected[a._mask] = np.nan
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ufunc_numeric():
|
||||
# np.sqrt on np.bool returns float16, which we upcast to Float32
|
||||
# bc we do not have Float16
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
|
||||
res = np.sqrt(arr)
|
||||
|
||||
expected = pd.array([1, 0, None], dtype="Float32")
|
||||
tm.assert_extension_array_equal(res, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[True, False], [True, None]])
|
||||
def test_ufunc_reduce_raises(values):
|
||||
arr = pd.array(values, dtype="boolean")
|
||||
|
||||
res = np.add.reduce(arr)
|
||||
if arr[-1] is pd.NA:
|
||||
expected = pd.NA
|
||||
else:
|
||||
expected = arr._data.sum()
|
||||
tm.assert_almost_equal(res, expected)
|
||||
|
||||
|
||||
def test_value_counts_na():
|
||||
arr = pd.array([True, False, pd.NA], dtype="boolean")
|
||||
result = arr.value_counts(dropna=False)
|
||||
expected = pd.Series([1, 1, 1], index=arr, dtype="Int64")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = arr.value_counts(dropna=True)
|
||||
expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64")
|
||||
assert expected.index.dtype == arr.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_value_counts_with_normalize():
|
||||
ser = pd.Series([True, False, pd.NA], dtype="boolean")
|
||||
result = ser.value_counts(normalize=True)
|
||||
expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64") / 2
|
||||
assert expected.index.dtype == "boolean"
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_diff():
|
||||
a = pd.array(
|
||||
[True, True, False, False, True, None, True, None, False], dtype="boolean"
|
||||
)
|
||||
result = pd.core.algorithms.diff(a, 1)
|
||||
expected = pd.array(
|
||||
[None, False, True, False, True, None, None, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
ser = pd.Series(a)
|
||||
result = ser.diff()
|
||||
expected = pd.Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,13 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na", [None, np.nan, pd.NA])
|
||||
def test_setitem_missing_values(na):
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
expected = pd.array([True, None, None], dtype="boolean")
|
||||
arr[1] = na
|
||||
tm.assert_extension_array_equal(arr, expected)
|
||||
@@ -0,0 +1,254 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.arrays import BooleanArray
|
||||
from pandas.core.ops.mask_ops import (
|
||||
kleene_and,
|
||||
kleene_or,
|
||||
kleene_xor,
|
||||
)
|
||||
from pandas.tests.extension.base import BaseOpsUtil
|
||||
|
||||
|
||||
class TestLogicalOps(BaseOpsUtil):
|
||||
def test_numpy_scalars_ok(self, all_logical_operators):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
op = getattr(a, all_logical_operators)
|
||||
|
||||
tm.assert_extension_array_equal(op(True), op(np.bool_(True)))
|
||||
tm.assert_extension_array_equal(op(False), op(np.bool_(False)))
|
||||
|
||||
def get_op_from_name(self, op_name):
|
||||
short_opname = op_name.strip("_")
|
||||
short_opname = short_opname if "xor" in short_opname else short_opname + "_"
|
||||
try:
|
||||
op = getattr(operator, short_opname)
|
||||
except AttributeError:
|
||||
# Assume it is the reverse operator
|
||||
rop = getattr(operator, short_opname[1:])
|
||||
op = lambda x, y: rop(y, x)
|
||||
|
||||
return op
|
||||
|
||||
def test_empty_ok(self, all_logical_operators):
|
||||
a = pd.array([], dtype="boolean")
|
||||
op_name = all_logical_operators
|
||||
result = getattr(a, op_name)(True)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
result = getattr(a, op_name)(False)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
result = getattr(a, op_name)(pd.NA)
|
||||
tm.assert_extension_array_equal(a, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)]
|
||||
)
|
||||
def test_eq_mismatched_type(self, other):
|
||||
# GH-44499
|
||||
arr = pd.array([True, False])
|
||||
result = arr == other
|
||||
expected = pd.array([False, False])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = arr != other
|
||||
expected = pd.array([True, True])
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
def test_logical_length_mismatch_raises(self, all_logical_operators):
|
||||
op_name = all_logical_operators
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
msg = "Lengths must match to compare"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)([True, False])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)(np.array([True, False]))
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
getattr(a, op_name)(pd.array([True, False], dtype="boolean"))
|
||||
|
||||
def test_logical_nan_raises(self, all_logical_operators):
|
||||
op_name = all_logical_operators
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
msg = "Got float instead"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(a, op_name)(np.nan)
|
||||
|
||||
@pytest.mark.parametrize("other", ["a", 1])
|
||||
def test_non_bool_or_na_other_raises(self, other, all_logical_operators):
|
||||
a = pd.array([True, False], dtype="boolean")
|
||||
with pytest.raises(TypeError, match=str(type(other).__name__)):
|
||||
getattr(a, all_logical_operators)(other)
|
||||
|
||||
def test_kleene_or(self):
|
||||
# A clear test of behavior.
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a | b
|
||||
expected = pd.array(
|
||||
[True, True, True, True, False, None, True, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b | a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [True, None, None]),
|
||||
(True, [True, True, True]),
|
||||
(np.bool_(True), [True, True, True]),
|
||||
(False, [True, False, None]),
|
||||
(np.bool_(False), [True, False, None]),
|
||||
],
|
||||
)
|
||||
def test_kleene_or_scalar(self, other, expected):
|
||||
# TODO: test True & False
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a | other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other | a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
def test_kleene_and(self):
|
||||
# A clear test of behavior.
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a & b
|
||||
expected = pd.array(
|
||||
[True, False, None, False, False, False, None, False, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b & a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [None, False, None]),
|
||||
(True, [True, False, None]),
|
||||
(False, [False, False, False]),
|
||||
(np.bool_(True), [True, False, None]),
|
||||
(np.bool_(False), [False, False, False]),
|
||||
],
|
||||
)
|
||||
def test_kleene_and_scalar(self, other, expected):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a & other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other & a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
def test_kleene_xor(self):
|
||||
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
b = pd.array([True, False, None] * 3, dtype="boolean")
|
||||
result = a ^ b
|
||||
expected = pd.array(
|
||||
[False, True, None, True, False, None, None, None, None], dtype="boolean"
|
||||
)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = b ^ a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
)
|
||||
tm.assert_extension_array_equal(
|
||||
b, pd.array([True, False, None] * 3, dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, expected",
|
||||
[
|
||||
(pd.NA, [None, None, None]),
|
||||
(True, [False, True, None]),
|
||||
(np.bool_(True), [False, True, None]),
|
||||
(np.bool_(False), [True, False, None]),
|
||||
],
|
||||
)
|
||||
def test_kleene_xor_scalar(self, other, expected):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
result = a ^ other
|
||||
expected = pd.array(expected, dtype="boolean")
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
result = other ^ a
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
# ensure we haven't mutated anything inplace
|
||||
tm.assert_extension_array_equal(
|
||||
a, pd.array([True, False, None], dtype="boolean")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3])
|
||||
def test_no_masked_assumptions(self, other, all_logical_operators):
|
||||
# The logical operations should not assume that masked values are False!
|
||||
a = pd.arrays.BooleanArray(
|
||||
np.array([True, True, True, False, False, False, True, False, True]),
|
||||
np.array([False] * 6 + [True, True, True]),
|
||||
)
|
||||
b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
|
||||
if isinstance(other, list):
|
||||
other = pd.array(other, dtype="boolean")
|
||||
|
||||
result = getattr(a, all_logical_operators)(other)
|
||||
expected = getattr(b, all_logical_operators)(other)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
if isinstance(other, BooleanArray):
|
||||
other._data[other._mask] = True
|
||||
a._data[a._mask] = False
|
||||
|
||||
result = getattr(a, all_logical_operators)(other)
|
||||
expected = getattr(b, all_logical_operators)(other)
|
||||
tm.assert_extension_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and])
|
||||
def test_error_both_scalar(operation):
|
||||
msg = r"Either `left` or `right` need to be a np\.ndarray."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# masks need to be non-None, otherwise it ends up in an infinite recursion
|
||||
operation(True, True, np.zeros(1), np.zeros(1))
|
||||
@@ -0,0 +1,27 @@
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestUnaryOps:
|
||||
def test_invert(self):
|
||||
a = pd.array([True, False, None], dtype="boolean")
|
||||
expected = pd.array([False, True, None], dtype="boolean")
|
||||
tm.assert_extension_array_equal(~a, expected)
|
||||
|
||||
expected = pd.Series(expected, index=["a", "b", "c"], name="name")
|
||||
result = ~pd.Series(a, index=["a", "b", "c"], name="name")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"])
|
||||
result = ~df
|
||||
expected = pd.DataFrame(
|
||||
{"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_abs(self):
|
||||
# matching numpy behavior, abs is the identity function
|
||||
arr = pd.array([True, False, None], dtype="boolean")
|
||||
result = abs(arr)
|
||||
|
||||
tm.assert_extension_array_equal(result, arr)
|
||||
@@ -0,0 +1,60 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return pd.array(
|
||||
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
|
||||
dtype="boolean",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
|
||||
[
|
||||
([True, pd.NA], True, True, True, pd.NA),
|
||||
([False, pd.NA], False, False, pd.NA, False),
|
||||
([pd.NA], False, True, pd.NA, pd.NA),
|
||||
([], False, True, False, True),
|
||||
# GH-33253: all True / all False values buggy with skipna=False
|
||||
([True, True], True, True, True, True),
|
||||
([False, False], False, False, False, False),
|
||||
],
|
||||
)
|
||||
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
|
||||
# the methods return numpy scalars
|
||||
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
|
||||
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
|
||||
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
|
||||
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
|
||||
|
||||
for con in [pd.array, pd.Series]:
|
||||
a = con(values, dtype="boolean")
|
||||
assert a.any() is exp_any
|
||||
assert a.all() is exp_all
|
||||
assert a.any(skipna=False) is exp_any_noskip
|
||||
assert a.all(skipna=False) is exp_all_noskip
|
||||
|
||||
assert np.any(a.any()) is exp_any
|
||||
assert np.all(a.all()) is exp_all
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dropna", [True, False])
|
||||
def test_reductions_return_types(dropna, data, all_numeric_reductions):
|
||||
op = all_numeric_reductions
|
||||
s = pd.Series(data)
|
||||
if dropna:
|
||||
s = s.dropna()
|
||||
|
||||
if op == "sum":
|
||||
assert isinstance(getattr(s, op)(), np.int_)
|
||||
elif op == "prod":
|
||||
assert isinstance(getattr(s, op)(), np.int_)
|
||||
elif op in ("min", "max"):
|
||||
assert isinstance(getattr(s, op)(), np.bool_)
|
||||
else:
|
||||
# "mean", "std", "var", "median", "kurt", "skew"
|
||||
assert isinstance(getattr(s, op)(), np.float64)
|
||||
@@ -0,0 +1,13 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def test_repr():
|
||||
df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
|
||||
expected = " A\n0 True\n1 False\n2 <NA>"
|
||||
assert repr(df) == expected
|
||||
|
||||
expected = "0 True\n1 False\n2 <NA>\nName: A, dtype: boolean"
|
||||
assert repr(df.A) == expected
|
||||
|
||||
expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
|
||||
assert repr(df.A.array) == expected
|
||||
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user