first commit

This commit is contained in:
Carla Floricel
2022-08-02 09:52:52 -04:00
parent 417ea8660b
commit 05e52aa52b
10444 changed files with 2300232 additions and 0 deletions

View File

@@ -0,0 +1,328 @@
from __future__ import annotations
import subprocess
import sys
import pytest
import pandas as pd
from pandas import api
import pandas._testing as tm
class Base:
def check(self, namespace, expected, ignored=None):
# see which names are in the namespace, minus optional
# ignored ones
# compare vs the expected
result = sorted(f for f in dir(namespace) if not f.startswith("__"))
if ignored is not None:
result = sorted(set(result) - set(ignored))
expected = sorted(expected)
tm.assert_almost_equal(result, expected)
class TestPDApi(Base):
# these are optionally imported based on testing
# & need to be ignored
ignored = ["tests", "locale", "conftest"]
# top-level sub-packages
public_lib = [
"api",
"arrays",
"options",
"test",
"testing",
"errors",
"plotting",
"io",
"tseries",
]
private_lib = ["compat", "core", "pandas", "util"]
# these are already deprecated; awaiting removal
deprecated_modules: list[str] = ["np", "datetime"]
# misc
misc = ["IndexSlice", "NaT", "NA"]
# top-level classes
classes = [
"Categorical",
"CategoricalIndex",
"DataFrame",
"DateOffset",
"DatetimeIndex",
"ExcelFile",
"ExcelWriter",
"Float64Index",
"Flags",
"Grouper",
"HDFStore",
"Index",
"Int64Index",
"MultiIndex",
"Period",
"PeriodIndex",
"RangeIndex",
"UInt64Index",
"Series",
"SparseDtype",
"StringDtype",
"Timedelta",
"TimedeltaIndex",
"Timestamp",
"Interval",
"IntervalIndex",
"CategoricalDtype",
"PeriodDtype",
"IntervalDtype",
"DatetimeTZDtype",
"BooleanDtype",
"Int8Dtype",
"Int16Dtype",
"Int32Dtype",
"Int64Dtype",
"UInt8Dtype",
"UInt16Dtype",
"UInt32Dtype",
"UInt64Dtype",
"Float32Dtype",
"Float64Dtype",
"NamedAgg",
]
# these are already deprecated; awaiting removal
deprecated_classes: list[str] = ["Float64Index", "Int64Index", "UInt64Index"]
# these should be deprecated in the future
deprecated_classes_in_future: list[str] = ["SparseArray"]
# external modules exposed in pandas namespace
modules: list[str] = []
# top-level functions
funcs = [
"array",
"bdate_range",
"concat",
"crosstab",
"cut",
"date_range",
"interval_range",
"eval",
"factorize",
"get_dummies",
"infer_freq",
"isna",
"isnull",
"lreshape",
"melt",
"notna",
"notnull",
"offsets",
"merge",
"merge_ordered",
"merge_asof",
"period_range",
"pivot",
"pivot_table",
"qcut",
"show_versions",
"timedelta_range",
"unique",
"value_counts",
"wide_to_long",
]
# top-level option funcs
funcs_option = [
"reset_option",
"describe_option",
"get_option",
"option_context",
"set_option",
"set_eng_float_format",
]
# top-level read_* funcs
funcs_read = [
"read_clipboard",
"read_csv",
"read_excel",
"read_fwf",
"read_gbq",
"read_hdf",
"read_html",
"read_xml",
"read_json",
"read_pickle",
"read_sas",
"read_sql",
"read_sql_query",
"read_sql_table",
"read_stata",
"read_table",
"read_feather",
"read_parquet",
"read_orc",
"read_spss",
]
# top-level json funcs
funcs_json = ["json_normalize"]
# top-level to_* funcs
funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"]
# top-level to deprecate in the future
deprecated_funcs_in_future: list[str] = []
# these are already deprecated; awaiting removal
deprecated_funcs: list[str] = []
# private modules in pandas namespace
private_modules = [
"_config",
"_libs",
"_is_numpy_dev",
"_testing",
"_typing",
"_version",
]
def test_api(self):
checkthese = (
self.public_lib
+ self.private_lib
+ self.misc
+ self.modules
+ self.classes
+ self.funcs
+ self.funcs_option
+ self.funcs_read
+ self.funcs_json
+ self.funcs_to
+ self.private_modules
)
self.check(namespace=pd, expected=checkthese, ignored=self.ignored)
def test_api_all(self):
expected = set(
self.public_lib
+ self.misc
+ self.modules
+ self.classes
+ self.funcs
+ self.funcs_option
+ self.funcs_read
+ self.funcs_json
+ self.funcs_to
) - set(self.deprecated_classes)
actual = set(pd.__all__)
extraneous = actual - expected
assert not extraneous
missing = expected - actual
assert not missing
def test_depr(self):
deprecated_list = (
self.deprecated_modules
+ self.deprecated_classes
+ self.deprecated_classes_in_future
+ self.deprecated_funcs
+ self.deprecated_funcs_in_future
)
for depr in deprecated_list:
with tm.assert_produces_warning(FutureWarning):
_ = getattr(pd, depr)
def test_datetime():
from datetime import datetime
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
assert datetime(2015, 1, 2, 0, 0) == datetime(2015, 1, 2, 0, 0)
assert isinstance(datetime(2015, 1, 2, 0, 0), datetime)
def test_sparsearray():
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
assert isinstance(pd.array([1, 2, 3], dtype="Sparse"), pd.SparseArray)
def test_np():
import warnings
import numpy as np
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
assert (pd.np.arange(0, 10) == np.arange(0, 10)).all()
class TestApi(Base):
allowed = ["types", "extensions", "indexers"]
def test_api(self):
self.check(api, self.allowed)
class TestTesting(Base):
funcs = [
"assert_frame_equal",
"assert_series_equal",
"assert_index_equal",
"assert_extension_array_equal",
]
def test_testing(self):
from pandas import testing # noqa: PDF015
self.check(testing, self.funcs)
def test_util_testing_deprecated(self):
# avoid cache state affecting the test
sys.modules.pop("pandas.util.testing", None)
with tm.assert_produces_warning(FutureWarning) as m:
import pandas.util.testing # noqa: F401
assert "pandas.util.testing is deprecated" in str(m[0].message)
assert "pandas.testing instead" in str(m[0].message)
def test_util_testing_deprecated_direct(self):
# avoid cache state affecting the test
sys.modules.pop("pandas.util.testing", None)
with tm.assert_produces_warning(FutureWarning) as m:
from pandas.util.testing import assert_series_equal # noqa: F401
assert "pandas.util.testing is deprecated" in str(m[0].message)
assert "pandas.testing instead" in str(m[0].message)
def test_util_in_top_level(self):
# in a subprocess to avoid import caching issues
out = subprocess.check_output(
[
sys.executable,
"-c",
"import pandas; pandas.util.testing.assert_series_equal",
],
stderr=subprocess.STDOUT,
).decode()
assert "pandas.util.testing is deprecated" in out
with pytest.raises(AttributeError, match="foo"):
pd.util.foo

View File

@@ -0,0 +1,63 @@
import pandas._testing as tm
from pandas.api import types
from pandas.tests.api.test_api import Base
class TestTypes(Base):
allowed = [
"is_bool",
"is_bool_dtype",
"is_categorical",
"is_categorical_dtype",
"is_complex",
"is_complex_dtype",
"is_datetime64_any_dtype",
"is_datetime64_dtype",
"is_datetime64_ns_dtype",
"is_datetime64tz_dtype",
"is_dtype_equal",
"is_float",
"is_float_dtype",
"is_int64_dtype",
"is_integer",
"is_integer_dtype",
"is_number",
"is_numeric_dtype",
"is_object_dtype",
"is_scalar",
"is_sparse",
"is_string_dtype",
"is_signed_integer_dtype",
"is_timedelta64_dtype",
"is_timedelta64_ns_dtype",
"is_unsigned_integer_dtype",
"is_period_dtype",
"is_interval",
"is_interval_dtype",
"is_re",
"is_re_compilable",
"is_dict_like",
"is_iterator",
"is_file_like",
"is_list_like",
"is_hashable",
"is_array_like",
"is_named_tuple",
"pandas_dtype",
"union_categoricals",
"infer_dtype",
"is_extension_array_dtype",
]
deprecated = ["is_extension_type"]
dtypes = ["CategoricalDtype", "DatetimeTZDtype", "PeriodDtype", "IntervalDtype"]
def test_types(self):
self.check(types, self.allowed + self.dtypes + self.deprecated)
def test_deprecated_from_api_types(self):
for t in self.deprecated:
with tm.assert_produces_warning(FutureWarning):
getattr(types, t)(1)

View File

@@ -0,0 +1,10 @@
from pandas.core.groupby.base import transformation_kernels
# tshift only works on time index and is deprecated
# There is no Series.cumcount or DataFrame.cumcount
series_transform_kernels = [
x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"]
]
frame_transform_kernels = [
x for x in sorted(transformation_kernels) if x not in ["tshift", "cumcount"]
]

View File

@@ -0,0 +1,18 @@
import numpy as np
import pytest
from pandas import DataFrame
@pytest.fixture
def int_frame_const_col():
"""
Fixture for DataFrame of ints which are constant per column
Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3]
"""
df = DataFrame(
np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1,
columns=["A", "B", "C"],
)
return df

View File

@@ -0,0 +1,97 @@
import numpy as np
import pandas as pd
import pandas._testing as tm
def test_agg_relabel():
# GH 26513
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
# simplest case with one column, one func
result = df.agg(foo=("B", "sum"))
expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
tm.assert_frame_equal(result, expected)
# test on same column with different methods
result = df.agg(foo=("B", "sum"), bar=("B", "min"))
expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
tm.assert_frame_equal(result, expected)
def test_agg_relabel_multi_columns_multi_methods():
# GH 26513, test on multiple columns with multiple methods
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
result = df.agg(
foo=("A", "sum"),
bar=("B", "mean"),
cat=("A", "min"),
dat=("B", "max"),
f=("A", "max"),
g=("C", "min"),
)
expected = pd.DataFrame(
{
"A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
"B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
},
index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
)
tm.assert_frame_equal(result, expected)
def test_agg_relabel_partial_functions():
# GH 26513, test on partial, functools or more complex cases
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
expected = pd.DataFrame(
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
)
tm.assert_frame_equal(result, expected)
result = df.agg(
foo=("A", min),
bar=("A", np.min),
cat=("B", max),
dat=("C", "min"),
f=("B", np.sum),
kk=("B", lambda x: min(x)),
)
expected = pd.DataFrame(
{
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
"B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
"C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
},
index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
)
tm.assert_frame_equal(result, expected)
def test_agg_namedtuple():
# GH 26513
df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
result = df.agg(
foo=pd.NamedAgg("B", "sum"),
bar=pd.NamedAgg("B", min),
cat=pd.NamedAgg(column="B", aggfunc="count"),
fft=pd.NamedAgg("B", aggfunc="max"),
)
expected = pd.DataFrame(
{"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
)
tm.assert_frame_equal(result, expected)
result = df.agg(
foo=pd.NamedAgg("A", "min"),
bar=pd.NamedAgg(column="B", aggfunc="max"),
cat=pd.NamedAgg(column="A", aggfunc="max"),
)
expected = pd.DataFrame(
{"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
index=pd.Index(["foo", "bar", "cat"]),
)
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,249 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
Series,
)
import pandas._testing as tm
from pandas.tests.apply.common import frame_transform_kernels
from pandas.tests.frame.common import zip_frames
def unpack_obj(obj, klass, axis):
"""
Helper to ensure we have the right type of object for a test parametrized
over frame_or_series.
"""
if klass is not DataFrame:
obj = obj["A"]
if axis != 0:
pytest.skip(f"Test is only for DataFrame with axis={axis}")
return obj
def test_transform_ufunc(axis, float_frame, frame_or_series):
# GH 35964
obj = unpack_obj(float_frame, frame_or_series, axis)
with np.errstate(all="ignore"):
f_sqrt = np.sqrt(obj)
# ufunc
result = obj.transform(np.sqrt, axis=axis)
expected = f_sqrt
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"ops, names",
[
([np.sqrt], ["sqrt"]),
([np.abs, np.sqrt], ["absolute", "sqrt"]),
(np.array([np.sqrt]), ["sqrt"]),
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
],
)
def test_transform_listlike(axis, float_frame, ops, names):
# GH 35964
other_axis = 1 if axis in {0, "index"} else 0
with np.errstate(all="ignore"):
expected = zip_frames([op(float_frame) for op in ops], axis=other_axis)
if axis in {0, "index"}:
expected.columns = MultiIndex.from_product([float_frame.columns, names])
else:
expected.index = MultiIndex.from_product([float_frame.index, names])
result = float_frame.transform(ops, axis=axis)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("ops", [[], np.array([])])
def test_transform_empty_listlike(float_frame, ops, frame_or_series):
obj = unpack_obj(float_frame, frame_or_series, 0)
with pytest.raises(ValueError, match="No transform functions were provided"):
obj.transform(ops)
@pytest.mark.parametrize("box", [dict, Series])
def test_transform_dictlike(axis, float_frame, box):
# GH 35964
if axis == 0 or axis == "index":
e = float_frame.columns[0]
expected = float_frame[[e]].transform(np.abs)
else:
e = float_frame.index[0]
expected = float_frame.iloc[[0]].transform(np.abs)
result = float_frame.transform(box({e: np.abs}), axis=axis)
tm.assert_frame_equal(result, expected)
def test_transform_dictlike_mixed():
# GH 40018 - mix of lists and non-lists in values of a dictionary
df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
expected = DataFrame(
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"ops",
[
{},
{"A": []},
{"A": [], "B": "cumsum"},
{"A": "cumsum", "B": []},
{"A": [], "B": ["cumsum"]},
{"A": ["cumsum"], "B": []},
],
)
def test_transform_empty_dictlike(float_frame, ops, frame_or_series):
obj = unpack_obj(float_frame, frame_or_series, 0)
with pytest.raises(ValueError, match="No transform functions were provided"):
obj.transform(ops)
@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_udf(axis, float_frame, use_apply, frame_or_series):
# GH 35964
obj = unpack_obj(float_frame, frame_or_series, axis)
# transform uses UDF either via apply or passing the entire DataFrame
def func(x):
# transform is using apply iff x is not a DataFrame
if use_apply == isinstance(x, frame_or_series):
# Force transform to fallback
raise ValueError
return x + 1
result = obj.transform(func, axis=axis)
expected = obj + 1
tm.assert_equal(result, expected)
wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]
@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
def test_transform_bad_dtype(op, frame_or_series, request):
# GH 35964
if op == "rank":
request.node.add_marker(
pytest.mark.xfail(
raises=ValueError, reason="GH 40418: rank does not raise a TypeError"
)
)
obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms
obj = tm.get_obj(obj, frame_or_series)
# tshift is deprecated
warn = None if op != "tshift" else FutureWarning
with tm.assert_produces_warning(warn):
with pytest.raises(TypeError, match="unsupported operand|not supported"):
obj.transform(op)
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform([op])
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform({"A": op})
with pytest.raises(TypeError, match="Transform function failed"):
obj.transform({"A": [op]})
@pytest.mark.parametrize("op", frame_kernels_raise)
def test_transform_partial_failure_typeerror(op):
# GH 35964
# Using object makes most transform kernels fail
df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
expected = df[["B"]].transform([op])
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform([op])
tm.assert_equal(result, expected)
expected = df[["B"]].transform({"B": op})
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": op, "B": op})
tm.assert_equal(result, expected)
expected = df[["B"]].transform({"B": [op]})
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)
expected = df.transform({"A": ["shift"], "B": [op]})
match = rf"\['{op}'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)
def test_transform_partial_failure_valueerror():
# GH 40211
match = ".*did not transform successfully"
def op(x):
if np.sum(np.sum(x)) < 10:
raise ValueError
return x
df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})
expected = df[["B"]].transform([op])
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform([op])
tm.assert_equal(result, expected)
expected = df[["B"]].transform({"B": op})
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": op, "B": op})
tm.assert_equal(result, expected)
expected = df[["B"]].transform({"B": [op]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op], "B": [op]})
tm.assert_equal(result, expected)
expected = df.transform({"A": ["shift"], "B": [op]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = df.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)
@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_passes_args(use_apply, frame_or_series):
# GH 35964
# transform uses UDF either via apply or passing the entire DataFrame
expected_args = [1, 2]
expected_kwargs = {"c": 3}
def f(x, a, b, c):
# transform is using apply iff x is not a DataFrame
if use_apply == isinstance(x, frame_or_series):
# Force transform to fallback
raise ValueError
assert [a, b] == expected_args
assert c == expected_kwargs["c"]
return x
frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs)
def test_transform_empty_dataframe():
# https://github.com/pandas-dev/pandas/issues/39636
df = DataFrame([], columns=["col1", "col2"])
result = df.transform(lambda x: x + 10)
tm.assert_frame_equal(result, df)
result = df["col1"].transform(lambda x: x + 10)
tm.assert_series_equal(result, df["col1"])

View File

@@ -0,0 +1,359 @@
# Tests specifically aimed at detecting bad arguments.
# This file is organized by reason for exception.
# 1. always invalid argument values
# 2. missing column(s)
# 3. incompatible ops/dtype/args/kwargs
# 4. invalid result shape/type
# If your test does not fit into one of these categories, add to this list.
from itertools import chain
import re
import numpy as np
import pytest
from pandas import (
Categorical,
DataFrame,
Series,
date_range,
notna,
)
import pandas._testing as tm
from pandas.core.base import SpecificationError
@pytest.mark.parametrize("result_type", ["foo", 1])
def test_result_type_error(result_type, int_frame_const_col):
# allowed result_type
df = int_frame_const_col
msg = (
"invalid value for result_type, must be one of "
"{None, 'reduce', 'broadcast', 'expand'}"
)
with pytest.raises(ValueError, match=msg):
df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type)
def test_apply_invalid_axis_value():
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
msg = "No axis named 2 for object type DataFrame"
with pytest.raises(ValueError, match=msg):
df.apply(lambda x: x, 2)
def test_applymap_invalid_na_action(float_frame):
# GH 23803
with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"):
float_frame.applymap(lambda x: len(str(x)), na_action="abc")
def test_agg_raises():
# GH 26513
df = DataFrame({"A": [0, 1], "B": [1, 2]})
msg = "Must provide"
with pytest.raises(TypeError, match=msg):
df.agg()
def test_map_with_invalid_na_action_raises():
# https://github.com/pandas-dev/pandas/issues/32815
s = Series([1, 2, 3])
msg = "na_action must either be 'ignore' or None"
with pytest.raises(ValueError, match=msg):
s.map(lambda x: x, na_action="____")
def test_map_categorical_na_action():
values = Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
s = Series(values, name="XX", index=list("abcdefg"))
with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
s.map(lambda x: x, na_action="ignore")
def test_map_datetimetz_na_action():
values = date_range("2011-01-01", "2011-01-02", freq="H").tz_localize("Asia/Tokyo")
s = Series(values, name="XX")
with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
s.map(lambda x: x, na_action="ignore")
@pytest.mark.parametrize("box", [DataFrame, Series])
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
@pytest.mark.parametrize("func", [{"A": {"B": "sum"}}, {"A": {"B": ["sum"]}}])
def test_nested_renamer(box, method, func):
# GH 35964
obj = box({"A": [1]})
match = "nested renamer is not supported"
with pytest.raises(SpecificationError, match=match):
getattr(obj, method)(func)
@pytest.mark.parametrize(
"renamer",
[{"foo": ["min", "max"]}, {"foo": ["min", "max"], "bar": ["sum", "mean"]}],
)
def test_series_nested_renamer(renamer):
s = Series(range(6), dtype="int64", name="series")
msg = "nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
s.agg(renamer)
def test_apply_dict_depr():
tsdf = DataFrame(
np.random.randn(10, 3),
columns=["A", "B", "C"],
index=date_range("1/1/2000", periods=10),
)
msg = "nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
tsdf.A.agg({"foo": ["sum", "mean"]})
@pytest.mark.parametrize("method", ["agg", "transform"])
def test_dict_nested_renaming_depr(method):
df = DataFrame({"A": range(5), "B": 5})
# nested renaming
msg = r"nested renamer is not supported"
with pytest.raises(SpecificationError, match=msg):
getattr(df, method)({"A": {"foo": "min"}, "B": {"bar": "max"}})
@pytest.mark.parametrize("method", ["apply", "agg", "transform"])
@pytest.mark.parametrize("func", [{"B": "sum"}, {"B": ["sum"]}])
def test_missing_column(method, func):
# GH 40004
obj = DataFrame({"A": [1]})
match = re.escape("Column(s) ['B'] do not exist")
with pytest.raises(KeyError, match=match):
getattr(obj, method)(func)
def test_transform_mixed_column_name_dtypes():
# GH39025
df = DataFrame({"a": ["1"]})
msg = r"Column\(s\) \[1, 'b'\] do not exist"
with pytest.raises(KeyError, match=msg):
df.transform({"a": int, 1: str, "b": int})
@pytest.mark.parametrize(
"how, args", [("pct_change", ()), ("nsmallest", (1, ["a", "b"])), ("tail", 1)]
)
def test_apply_str_axis_1_raises(how, args):
# GH 39211 - some ops don't support axis=1
df = DataFrame({"a": [1, 2], "b": [3, 4]})
msg = f"Operation {how} does not support axis=1"
with pytest.raises(ValueError, match=msg):
df.apply(how, axis=1, args=args)
def test_transform_axis_1_raises():
# GH 35964
msg = "No axis named 1 for object type Series"
with pytest.raises(ValueError, match=msg):
Series([1]).transform("sum", axis=1)
def test_apply_modify_traceback():
data = DataFrame(
{
"A": [
"foo",
"foo",
"foo",
"foo",
"bar",
"bar",
"bar",
"bar",
"foo",
"foo",
"foo",
],
"B": [
"one",
"one",
"one",
"two",
"one",
"one",
"one",
"two",
"two",
"two",
"one",
],
"C": [
"dull",
"dull",
"shiny",
"dull",
"dull",
"shiny",
"shiny",
"dull",
"shiny",
"shiny",
"shiny",
],
"D": np.random.randn(11),
"E": np.random.randn(11),
"F": np.random.randn(11),
}
)
data.loc[4, "C"] = np.nan
def transform(row):
if row["C"].startswith("shin") and row["A"] == "foo":
row["D"] = 7
return row
def transform2(row):
if notna(row["C"]) and row["C"].startswith("shin") and row["A"] == "foo":
row["D"] = 7
return row
msg = "'float' object has no attribute 'startswith'"
with pytest.raises(AttributeError, match=msg):
data.apply(transform, axis=1)
@pytest.mark.parametrize(
"df, func, expected",
tm.get_cython_table_params(
DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]
),
)
def test_agg_cython_table_raises_frame(df, func, expected, axis):
# GH 21224
msg = "can't multiply sequence by non-int of type 'str'"
with pytest.raises(expected, match=msg):
df.agg(func, axis=axis)
@pytest.mark.parametrize(
"series, func, expected",
chain(
tm.get_cython_table_params(
Series("a b c".split()),
[
("mean", TypeError), # mean raises TypeError
("prod", TypeError),
("std", TypeError),
("var", TypeError),
("median", TypeError),
("cumprod", TypeError),
],
)
),
)
def test_agg_cython_table_raises_series(series, func, expected):
# GH21224
msg = r"[Cc]ould not convert|can't multiply sequence by non-int of type"
with pytest.raises(expected, match=msg):
# e.g. Series('a b'.split()).cumprod() will raise
series.agg(func)
def test_agg_none_to_type():
# GH 40543
df = DataFrame({"a": [None]})
msg = re.escape("int() argument must be a string")
with pytest.raises(TypeError, match=msg):
df.agg({"a": int})
def test_transform_none_to_type():
# GH#34377
df = DataFrame({"a": [None]})
msg = "Transform function failed"
with pytest.raises(TypeError, match=msg):
df.transform({"a": int})
@pytest.mark.parametrize(
"func",
[
lambda x: np.array([1, 2]).reshape(-1, 2),
lambda x: [1, 2],
lambda x: Series([1, 2]),
],
)
def test_apply_broadcast_error(int_frame_const_col, func):
df = int_frame_const_col
# > 1 ndim
msg = "too many dims to broadcast|cannot broadcast result"
with pytest.raises(ValueError, match=msg):
df.apply(func, axis=1, result_type="broadcast")
def test_transform_and_agg_err_agg(axis, float_frame):
# cannot both transform and agg
msg = "cannot combine transform and aggregation operations"
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
float_frame.agg(["max", "sqrt"], axis=axis)
@pytest.mark.parametrize(
"func, msg",
[
(["sqrt", "max"], "cannot combine transform and aggregation"),
(
{"foo": np.sqrt, "bar": "sum"},
"cannot perform both aggregation and transformation",
),
],
)
def test_transform_and_agg_err_series(string_series, func, msg):
# we are trying to transform with an aggregator
with pytest.raises(ValueError, match=msg):
with np.errstate(all="ignore"):
string_series.agg(func)
@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]])
def test_transform_wont_agg_frame(axis, float_frame, func):
# GH 35964
# cannot both transform and agg
msg = "Function did not transform"
with pytest.raises(ValueError, match=msg):
float_frame.transform(func, axis=axis)
@pytest.mark.parametrize("func", [["min", "max"], ["sqrt", "max"]])
def test_transform_wont_agg_series(string_series, func):
# GH 35964
# we are trying to transform with an aggregator
msg = "Function did not transform"
warn = RuntimeWarning if func[0] == "sqrt" else None
warn_msg = "invalid value encountered in sqrt"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(warn, match=warn_msg):
string_series.transform(func)
@pytest.mark.parametrize(
"op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
)
@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
# GH 35964
op = op_wrapper(all_reductions)
obj = DataFrame({"A": [1, 2, 3]})
obj = tm.get_obj(obj, frame_or_series)
msg = "Function did not transform"
with pytest.raises(ValueError, match=msg):
obj.transform(op)

View File

@@ -0,0 +1,889 @@
from collections import (
Counter,
defaultdict,
)
import numpy as np
import pytest
import pandas as pd
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
concat,
isna,
timedelta_range,
)
import pandas._testing as tm
from pandas.tests.apply.common import series_transform_kernels
def test_series_map_box_timedelta():
# GH#11349
ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h"))
def f(x):
return x.total_seconds()
ser.map(f)
ser.apply(f)
DataFrame(ser).applymap(f)
def test_apply(datetime_series):
with np.errstate(all="ignore"):
tm.assert_series_equal(datetime_series.apply(np.sqrt), np.sqrt(datetime_series))
# element-wise apply
import math
tm.assert_series_equal(datetime_series.apply(math.exp), np.exp(datetime_series))
# empty series
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
rs = s.apply(lambda x: x)
tm.assert_series_equal(s, rs)
# check all metadata (GH 9322)
assert s is not rs
assert s.index is rs.index
assert s.dtype == rs.dtype
assert s.name == rs.name
# index but no data
s = Series(index=[1, 2, 3], dtype=np.float64)
rs = s.apply(lambda x: x)
tm.assert_series_equal(s, rs)
def test_apply_same_length_inference_bug():
s = Series([1, 2])
def f(x):
return (x, x + 1)
result = s.apply(f)
expected = s.map(f)
tm.assert_series_equal(result, expected)
s = Series([1, 2, 3])
result = s.apply(f)
expected = s.map(f)
tm.assert_series_equal(result, expected)
def test_apply_dont_convert_dtype():
s = Series(np.random.randn(10))
def f(x):
return x if x > 0 else np.nan
result = s.apply(f, convert_dtype=False)
assert result.dtype == object
def test_apply_args():
s = Series(["foo,bar"])
result = s.apply(str.split, args=(",",))
assert result[0] == ["foo", "bar"]
assert isinstance(result[0], list)
@pytest.mark.parametrize(
"args, kwargs, increment",
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
)
def test_agg_args(args, kwargs, increment):
# GH 43357
def f(x, a=0, b=0, c=0):
return x + a + 10 * b + 100 * c
s = Series([1, 2])
result = s.agg(f, 0, *args, **kwargs)
expected = s + increment
tm.assert_series_equal(result, expected)
def test_series_map_box_timestamps():
# GH#2689, GH#2627
ser = Series(pd.date_range("1/1/2000", periods=10))
def func(x):
return (x.hour, x.day, x.month)
# it works!
ser.map(func)
ser.apply(func)
def test_series_map_stringdtype(any_string_dtype):
# map test on StringDType, GH#40823
ser1 = Series(
data=["cat", "dog", "rabbit"],
index=["id1", "id2", "id3"],
dtype=any_string_dtype,
)
ser2 = Series(data=["id3", "id2", "id1", "id7000"], dtype=any_string_dtype)
result = ser2.map(ser1)
expected = Series(data=["rabbit", "dog", "cat", pd.NA], dtype=any_string_dtype)
tm.assert_series_equal(result, expected)
def test_apply_box():
# ufunc will not be boxed. Same test cases as the test_map_box
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
s = Series(vals)
assert s.dtype == "datetime64[ns]"
# boxed value must be Timestamp instance
res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
tm.assert_series_equal(res, exp)
vals = [
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
]
s = Series(vals)
assert s.dtype == "datetime64[ns, US/Eastern]"
res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
tm.assert_series_equal(res, exp)
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
s = Series(vals)
assert s.dtype == "timedelta64[ns]"
res = s.apply(lambda x: f"{type(x).__name__}_{x.days}")
exp = Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
# period
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
s = Series(vals)
assert s.dtype == "Period[M]"
res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}")
exp = Series(["Period_M", "Period_M"])
tm.assert_series_equal(res, exp)
def test_apply_datetimetz():
values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
"Asia/Tokyo"
)
s = Series(values, name="XX")
result = s.apply(lambda x: x + pd.offsets.Day())
exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
"Asia/Tokyo"
)
exp = Series(exp_values, name="XX")
tm.assert_series_equal(result, exp)
# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.apply(lambda x: x.hour)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
tm.assert_series_equal(result, exp)
# not vectorized
def f(x):
if not isinstance(x, pd.Timestamp):
raise ValueError
return str(x.tz)
result = s.map(f)
exp = Series(["Asia/Tokyo"] * 25, name="XX")
tm.assert_series_equal(result, exp)
def test_apply_categorical():
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
ser = Series(values, name="XX", index=list("abcdefg"))
result = ser.apply(lambda x: x.lower())
# should be categorical dtype when the number of categories are
# the same
values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
exp = Series(values, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
tm.assert_categorical_equal(result.values, exp.values)
result = ser.apply(lambda x: "A")
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
assert result.dtype == object
@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]])
def test_apply_categorical_with_nan_values(series):
# GH 20714 bug fixed in: GH 24275
s = Series(series, dtype="category")
result = s.apply(lambda x: x.split("-")[0])
result = result.astype(object)
expected = Series(["1", "1", np.NaN], dtype="category")
expected = expected.astype(object)
tm.assert_series_equal(result, expected)
def test_apply_empty_integer_series_with_datetime_index():
# GH 21245
s = Series([], index=pd.date_range(start="2018-01-01", periods=0), dtype=int)
result = s.apply(lambda x: x)
tm.assert_series_equal(result, s)
def test_transform(string_series):
# transforming functions
with np.errstate(all="ignore"):
f_sqrt = np.sqrt(string_series)
f_abs = np.abs(string_series)
# ufunc
result = string_series.apply(np.sqrt)
expected = f_sqrt.copy()
tm.assert_series_equal(result, expected)
# list-like
result = string_series.apply([np.sqrt])
expected = f_sqrt.to_frame().copy()
expected.columns = ["sqrt"]
tm.assert_frame_equal(result, expected)
result = string_series.apply(["sqrt"])
tm.assert_frame_equal(result, expected)
# multiple items in list
# these are in the order as if we are applying both functions per
# series and then concatting
expected = concat([f_sqrt, f_abs], axis=1)
expected.columns = ["sqrt", "absolute"]
result = string_series.apply([np.sqrt, np.abs])
tm.assert_frame_equal(result, expected)
# dict, provide renaming
expected = concat([f_sqrt, f_abs], axis=1)
expected.columns = ["foo", "bar"]
expected = expected.unstack().rename("series")
result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
tm.assert_series_equal(result.reindex_like(expected), expected)
@pytest.mark.parametrize("op", series_transform_kernels)
def test_transform_partial_failure(op, request):
# GH 35964
if op in ("ffill", "bfill", "pad", "backfill", "shift"):
request.node.add_marker(
pytest.mark.xfail(
raises=AssertionError, reason=f"{op} is successful on any dtype"
)
)
if op in ("rank", "fillna"):
pytest.skip(f"{op} doesn't raise TypeError on object")
# Using object makes most transform kernels fail
ser = Series(3 * [object])
expected = ser.transform(["shift"])
match = rf"\['{op}'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([op, "shift"])
tm.assert_equal(result, expected)
expected = ser.transform({"B": "shift"})
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": op, "B": "shift"})
tm.assert_equal(result, expected)
expected = ser.transform({"B": ["shift"]})
match = r"\['A'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [op], "B": ["shift"]})
tm.assert_equal(result, expected)
match = r"\['B'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
expected = ser.transform({"A": ["shift"], "B": [op]})
match = rf"\['{op}'\] did not transform successfully"
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [op, "shift"], "B": [op]})
tm.assert_equal(result, expected)
def test_transform_partial_failure_valueerror():
# GH 40211
match = ".*did not transform successfully"
def noop(x):
return x
def raising_op(_):
raise ValueError
ser = Series(3 * [object])
expected = ser.transform([noop])
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform([noop, raising_op])
tm.assert_equal(result, expected)
expected = ser.transform({"B": noop})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": raising_op, "B": noop})
tm.assert_equal(result, expected)
expected = ser.transform({"B": [noop]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [raising_op], "B": [noop]})
tm.assert_equal(result, expected)
expected = ser.transform({"A": [noop], "B": [noop]})
with tm.assert_produces_warning(FutureWarning, match=match):
result = ser.transform({"A": [noop, raising_op], "B": [noop]})
tm.assert_equal(result, expected)
def test_demo():
# demonstration tests
s = Series(range(6), dtype="int64", name="series")
result = s.agg(["min", "max"])
expected = Series([0, 5], index=["min", "max"], name="series")
tm.assert_series_equal(result, expected)
result = s.agg({"foo": "min"})
expected = Series([0], index=["foo"], name="series")
tm.assert_series_equal(result, expected)
def test_agg_apply_evaluate_lambdas_the_same(string_series):
# test that we are evaluating row-by-row first
# before vectorized evaluation
result = string_series.apply(lambda x: str(x))
expected = string_series.agg(lambda x: str(x))
tm.assert_series_equal(result, expected)
result = string_series.apply(str)
expected = string_series.agg(str)
tm.assert_series_equal(result, expected)
def test_with_nested_series(datetime_series):
# GH 2316
# .agg with a reducer and a transform, what to do
result = datetime_series.apply(lambda x: Series([x, x**2], index=["x", "x^2"]))
expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
tm.assert_frame_equal(result, expected)
result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
tm.assert_frame_equal(result, expected)
def test_replicate_describe(string_series):
# this also tests a result set that is all scalars
expected = string_series.describe()
result = string_series.apply(
{
"count": "count",
"mean": "mean",
"std": "std",
"min": "min",
"25%": lambda x: x.quantile(0.25),
"50%": "median",
"75%": lambda x: x.quantile(0.75),
"max": "max",
}
)
tm.assert_series_equal(result, expected)
def test_reduce(string_series):
# reductions with named functions
result = string_series.agg(["sum", "mean"])
expected = Series(
[string_series.sum(), string_series.mean()],
["sum", "mean"],
name=string_series.name,
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_non_callable_aggregates(how):
# test agg using non-callable series attributes
# GH 39116 - expand to apply
s = Series([1, 2, None])
# Calling agg w/ just a string arg same as calling s.arg
result = getattr(s, how)("size")
expected = s.size
assert result == expected
# test when mixed w/ callable reducers
result = getattr(s, how)(["size", "count", "mean"])
expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
tm.assert_series_equal(result, expected)
def test_series_apply_no_suffix_index():
# GH36189
s = Series([4] * 3)
result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
tm.assert_series_equal(result, expected)
def test_map(datetime_series):
index, data = tm.getMixedTypeDict()
source = Series(data["B"], index=data["C"])
target = Series(data["C"][:4], index=data["D"][:4])
merged = target.map(source)
for k, v in merged.items():
assert v == source[target[k]]
# input could be a dict
merged = target.map(source.to_dict())
for k, v in merged.items():
assert v == source[target[k]]
# function
result = datetime_series.map(lambda x: x * 2)
tm.assert_series_equal(result, datetime_series * 2)
# GH 10324
a = Series([1, 2, 3, 4])
b = Series(["even", "odd", "even", "odd"], dtype="category")
c = Series(["even", "odd", "even", "odd"])
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
tm.assert_series_equal(a.map(b), exp)
exp = Series(["odd", "even", "odd", np.nan])
tm.assert_series_equal(a.map(c), exp)
a = Series(["a", "b", "c", "d"])
b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"]))
c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))
exp = Series([np.nan, 1, 2, 3])
tm.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, 1, 2, 3])
tm.assert_series_equal(a.map(c), exp)
a = Series(["a", "b", "c", "d"])
b = Series(
["B", "C", "D", "E"],
dtype="category",
index=pd.CategoricalIndex(["b", "c", "d", "e"]),
)
c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))
exp = Series(
pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])
)
tm.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, "B", "C", "D"])
tm.assert_series_equal(a.map(c), exp)
def test_map_empty(index):
if isinstance(index, MultiIndex):
pytest.skip("Initializing a Series from a MultiIndex is not supported")
s = Series(index)
result = s.map({})
expected = Series(np.nan, index=s.index)
tm.assert_series_equal(result, expected)
def test_map_compat():
# related GH 8024
s = Series([True, True, False], index=[1, 2, 3])
result = s.map({True: "foo", False: "bar"})
expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
def test_map_int():
left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
right = Series({1: 11, 2: 22, 3: 33})
assert left.dtype == np.float_
assert issubclass(right.dtype.type, np.integer)
merged = left.map(right)
assert merged.dtype == np.float_
assert isna(merged["d"])
assert not isna(merged["c"])
def test_map_type_inference():
s = Series(range(3))
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
assert issubclass(s2.dtype.type, np.integer)
def test_map_decimal(string_series):
from decimal import Decimal
result = string_series.map(lambda x: Decimal(str(x)))
assert result.dtype == np.object_
assert isinstance(result[0], Decimal)
def test_map_na_exclusion():
s = Series([1.5, np.nan, 3, np.nan, 5])
result = s.map(lambda x: x * 2, na_action="ignore")
exp = s * 2
tm.assert_series_equal(result, exp)
def test_map_dict_with_tuple_keys():
"""
Due to new MultiIndex-ing behaviour in v0.14.0,
dicts with tuple keys passed to map were being
converted to a multi-index, preventing tuple values
from being mapped properly.
"""
# GH 18496
df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]})
label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"}
df["labels"] = df["a"].map(label_mappings)
df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index)
# All labels should be filled now
tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False)
def test_map_counter():
s = Series(["a", "b", "c"], index=[1, 2, 3])
counter = Counter()
counter["b"] = 5
counter["c"] += 1
result = s.map(counter)
expected = Series([0, 5, 1], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
def test_map_defaultdict():
s = Series([1, 2, 3], index=["a", "b", "c"])
default_dict = defaultdict(lambda: "blank")
default_dict[1] = "stuff"
result = s.map(default_dict)
expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
tm.assert_series_equal(result, expected)
def test_map_dict_na_key():
# https://github.com/pandas-dev/pandas/issues/17648
# Checks that np.nan key is appropriately mapped
s = Series([1, 2, np.nan])
expected = Series(["a", "b", "c"])
result = s.map({1: "a", 2: "b", np.nan: "c"})
tm.assert_series_equal(result, expected)
def test_map_dict_subclass_with_missing():
"""
Test Series.map with a dictionary subclass that defines __missing__,
i.e. sets a default value (GH #15999).
"""
class DictWithMissing(dict):
def __missing__(self, key):
return "missing"
s = Series([1, 2, 3])
dictionary = DictWithMissing({3: "three"})
result = s.map(dictionary)
expected = Series(["missing", "missing", "three"])
tm.assert_series_equal(result, expected)
def test_map_dict_subclass_without_missing():
class DictWithoutMissing(dict):
pass
s = Series([1, 2, 3])
dictionary = DictWithoutMissing({3: "three"})
result = s.map(dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_abc_mapping(non_dict_mapping_subclass):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
s = Series([1, 2, 3])
not_a_dictionary = non_dict_mapping_subclass({3: "three"})
result = s.map(not_a_dictionary)
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_abc_mapping_with_missing(non_dict_mapping_subclass):
# https://github.com/pandas-dev/pandas/issues/29733
# Check collections.abc.Mapping support as mapper for Series.map
class NonDictMappingWithMissing(non_dict_mapping_subclass):
def __missing__(key):
return "missing"
s = Series([1, 2, 3])
not_a_dictionary = NonDictMappingWithMissing({3: "three"})
result = s.map(not_a_dictionary)
# __missing__ is a dict concept, not a Mapping concept,
# so it should not change the result!
expected = Series([np.nan, np.nan, "three"])
tm.assert_series_equal(result, expected)
def test_map_box():
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
s = Series(vals)
assert s.dtype == "datetime64[ns]"
# boxed value must be Timestamp instance
res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
tm.assert_series_equal(res, exp)
vals = [
pd.Timestamp("2011-01-01", tz="US/Eastern"),
pd.Timestamp("2011-01-02", tz="US/Eastern"),
]
s = Series(vals)
assert s.dtype == "datetime64[ns, US/Eastern]"
res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
tm.assert_series_equal(res, exp)
# timedelta
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
s = Series(vals)
assert s.dtype == "timedelta64[ns]"
res = s.apply(lambda x: f"{type(x).__name__}_{x.days}")
exp = Series(["Timedelta_1", "Timedelta_2"])
tm.assert_series_equal(res, exp)
# period
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
s = Series(vals)
assert s.dtype == "Period[M]"
res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}")
exp = Series(["Period_M", "Period_M"])
tm.assert_series_equal(res, exp)
def test_map_categorical():
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
s = Series(values, name="XX", index=list("abcdefg"))
result = s.map(lambda x: x.lower())
exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
exp = Series(exp_values, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
tm.assert_categorical_equal(result.values, exp_values)
result = s.map(lambda x: "A")
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
tm.assert_series_equal(result, exp)
assert result.dtype == object
def test_map_datetimetz():
values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
"Asia/Tokyo"
)
s = Series(values, name="XX")
# keep tz
result = s.map(lambda x: x + pd.offsets.Day())
exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
"Asia/Tokyo"
)
exp = Series(exp_values, name="XX")
tm.assert_series_equal(result, exp)
# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.map(lambda x: x.hour)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
tm.assert_series_equal(result, exp)
# not vectorized
def f(x):
if not isinstance(x, pd.Timestamp):
raise ValueError
return str(x.tz)
result = s.map(f)
exp = Series(["Asia/Tokyo"] * 25, name="XX")
tm.assert_series_equal(result, exp)
@pytest.mark.parametrize(
"vals,mapping,exp",
[
(list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]),
(list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3),
(list(range(3)), {0: 42}, [42] + [np.nan] * 3),
],
)
def test_map_missing_mixed(vals, mapping, exp):
# GH20495
s = Series(vals + [np.nan])
result = s.map(mapping)
tm.assert_series_equal(result, Series(exp))
@pytest.mark.parametrize(
"dti,exp",
[
(
Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
),
(
tm.makeTimeSeries(nper=30),
DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
),
],
)
@pytest.mark.parametrize("aware", [True, False])
def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
# GH 25959
# Calling apply on a localized time series should not cause an error
if aware:
index = dti.tz_localize("UTC").index
else:
index = dti.index
result = Series(index).apply(lambda x: Series([1, 2]))
tm.assert_frame_equal(result, exp)
def test_apply_scalar_on_date_time_index_aware_series():
# GH 25959
# Calling apply on a localized time series should not cause an error
series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
result = Series(series.index).apply(lambda x: 1)
tm.assert_series_equal(result, Series(np.ones(30), dtype="int64"))
def test_map_float_to_string_precision():
# GH 13228
ser = Series(1 / 3)
result = ser.map(lambda val: str(val)).to_dict()
expected = {0: "0.3333333333333333"}
assert result == expected
def test_apply_to_timedelta():
list_of_valid_strings = ["00:00:01", "00:00:02"]
a = pd.to_timedelta(list_of_valid_strings)
b = Series(list_of_valid_strings).apply(pd.to_timedelta)
tm.assert_series_equal(Series(a), b)
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
a = pd.to_timedelta(list_of_strings)
with tm.assert_produces_warning(FutureWarning, match="Inferring timedelta64"):
ser = Series(list_of_strings)
b = ser.apply(pd.to_timedelta)
tm.assert_series_equal(Series(a), b)
@pytest.mark.parametrize(
"ops, names",
[
([np.sum], ["sum"]),
([np.sum, np.mean], ["sum", "mean"]),
(np.array([np.sum]), ["sum"]),
(np.array([np.sum, np.mean]), ["sum", "mean"]),
],
)
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_listlike_reducer(string_series, ops, names, how):
# GH 39140
expected = Series({name: op(string_series) for name, op in zip(names, ops)})
expected.name = "series"
result = getattr(string_series, how)(ops)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ops",
[
{"A": np.sum},
{"A": np.sum, "B": np.mean},
Series({"A": np.sum}),
Series({"A": np.sum, "B": np.mean}),
],
)
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_dictlike_reducer(string_series, ops, how):
# GH 39140
expected = Series({name: op(string_series) for name, op in ops.items()})
expected.name = string_series.name
result = getattr(string_series, how)(ops)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"ops, names",
[
([np.sqrt], ["sqrt"]),
([np.abs, np.sqrt], ["absolute", "sqrt"]),
(np.array([np.sqrt]), ["sqrt"]),
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
],
)
def test_apply_listlike_transformer(string_series, ops, names):
# GH 39140
with np.errstate(all="ignore"):
expected = concat([op(string_series) for op in ops], axis=1)
expected.columns = names
result = string_series.apply(ops)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"ops",
[
{"A": np.sqrt},
{"A": np.sqrt, "B": np.exp},
Series({"A": np.sqrt}),
Series({"A": np.sqrt, "B": np.exp}),
],
)
def test_apply_dictlike_transformer(string_series, ops):
# GH 39140
with np.errstate(all="ignore"):
expected = concat({name: op(string_series) for name, op in ops.items()})
expected.name = string_series.name
result = string_series.apply(ops)
tm.assert_series_equal(result, expected)
def test_apply_retains_column_name():
# GH 16380
df = DataFrame({"x": range(3)}, Index(range(3), name="x"))
result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y")))
expected = DataFrame(
[[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]],
columns=Index(range(3), name="y"),
index=Index(range(3), name="x"),
)
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,33 @@
import pandas as pd
import pandas._testing as tm
def test_relabel_no_duplicated_method():
# this is to test there is no duplicated method used in agg
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
result = df["A"].agg(foo="sum")
expected = df["A"].agg({"foo": "sum"})
tm.assert_series_equal(result, expected)
result = df["B"].agg(foo="min", bar="max")
expected = df["B"].agg({"foo": "min", "bar": "max"})
tm.assert_series_equal(result, expected)
result = df["B"].agg(foo=sum, bar=min, cat="max")
expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"})
tm.assert_series_equal(result, expected)
def test_relabel_duplicated_method():
# this is to test with nested renaming, duplicated method can be used
# if they are assigned with different new names
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4]})
result = df["A"].agg(foo="sum", bar="sum")
expected = pd.Series([6, 6], index=["foo", "bar"], name="A")
tm.assert_series_equal(result, expected)
result = df["B"].agg(foo=min, bar="min")
expected = pd.Series([1, 1], index=["foo", "bar"], name="B")
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,49 @@
import numpy as np
import pytest
from pandas import (
DataFrame,
MultiIndex,
Series,
concat,
)
import pandas._testing as tm
@pytest.mark.parametrize(
"ops, names",
[
([np.sqrt], ["sqrt"]),
([np.abs, np.sqrt], ["absolute", "sqrt"]),
(np.array([np.sqrt]), ["sqrt"]),
(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
],
)
def test_transform_listlike(string_series, ops, names):
# GH 35964
with np.errstate(all="ignore"):
expected = concat([op(string_series) for op in ops], axis=1)
expected.columns = names
result = string_series.transform(ops)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("box", [dict, Series])
def test_transform_dictlike(string_series, box):
# GH 35964
with np.errstate(all="ignore"):
expected = concat([np.sqrt(string_series), np.abs(string_series)], axis=1)
expected.columns = ["foo", "bar"]
result = string_series.transform(box({"foo": np.sqrt, "bar": np.abs}))
tm.assert_frame_equal(result, expected)
def test_transform_dictlike_mixed():
# GH 40018 - mix of lists and non-lists in values of a dictionary
df = Series([1, 4])
result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
expected = DataFrame(
[[1.0, 1, 1.0], [2.0, 4, 2.0]],
columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
)
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,302 @@
from itertools import chain
import operator
import numpy as np
import pytest
from pandas.core.dtypes.common import is_number
from pandas import (
DataFrame,
Index,
Series,
)
import pandas._testing as tm
from pandas.core.groupby.base import maybe_normalize_deprecated_kernels
from pandas.tests.apply.common import (
frame_transform_kernels,
series_transform_kernels,
)
@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
@pytest.mark.parametrize(
"args,kwds",
[
pytest.param([], {}, id="no_args_or_kwds"),
pytest.param([1], {}, id="axis_from_args"),
pytest.param([], {"axis": 1}, id="axis_from_kwds"),
pytest.param([], {"numeric_only": True}, id="optional_kwds"),
pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
],
)
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
if len(args) > 1 and how == "agg":
request.node.add_marker(
pytest.mark.xfail(
raises=TypeError,
reason="agg/apply signature mismatch - agg passes 2nd "
"argument to func",
)
)
result = getattr(float_frame, how)(func, *args, **kwds)
expected = getattr(float_frame, func)(*args, **kwds)
tm.assert_series_equal(result, expected)
def test_with_string_args(datetime_series):
for arg in ["sum", "mean", "min", "max", "std"]:
result = datetime_series.apply(arg)
expected = getattr(datetime_series, arg)()
assert result == expected
@pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_np_reducer(float_frame, op, how):
# GH 39116
float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
result = getattr(float_frame, how)(op)
# pandas ddof defaults to 1, numpy to 0
kwargs = {"ddof": 1} if op in ("std", "var") else {}
expected = Series(
getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
)
@pytest.mark.parametrize("how", ["transform", "apply"])
def test_apply_np_transformer(float_frame, op, how):
# GH 39116
# float_frame will _usually_ have negative values, which will
# trigger the warning here, but let's put one in just to be sure
float_frame.iloc[0, 0] = -1.0
warn = None
if op in ["log", "sqrt"]:
warn = RuntimeWarning
with tm.assert_produces_warning(warn):
result = getattr(float_frame, how)(op)
expected = getattr(np, op)(float_frame)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"series, func, expected",
chain(
tm.get_cython_table_params(
Series(dtype=np.float64),
[
("sum", 0),
("max", np.nan),
("min", np.nan),
("all", True),
("any", False),
("mean", np.nan),
("prod", 1),
("std", np.nan),
("var", np.nan),
("median", np.nan),
],
),
tm.get_cython_table_params(
Series([np.nan, 1, 2, 3]),
[
("sum", 6),
("max", 3),
("min", 1),
("all", True),
("any", True),
("mean", 2),
("prod", 6),
("std", 1),
("var", 1),
("median", 2),
],
),
tm.get_cython_table_params(
Series("a b c".split()),
[
("sum", "abc"),
("max", "c"),
("min", "a"),
("all", True),
("any", True),
],
),
),
)
def test_agg_cython_table_series(series, func, expected):
# GH21224
# test reducing functions in
# pandas.core.base.SelectionMixin._cython_table
result = series.agg(func)
if is_number(expected):
assert np.isclose(result, expected, equal_nan=True)
else:
assert result == expected
@pytest.mark.parametrize(
"series, func, expected",
chain(
tm.get_cython_table_params(
Series(dtype=np.float64),
[
("cumprod", Series([], Index([]), dtype=np.float64)),
("cumsum", Series([], Index([]), dtype=np.float64)),
],
),
tm.get_cython_table_params(
Series([np.nan, 1, 2, 3]),
[
("cumprod", Series([np.nan, 1, 2, 6])),
("cumsum", Series([np.nan, 1, 3, 6])),
],
),
tm.get_cython_table_params(
Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]
),
),
)
def test_agg_cython_table_transform_series(series, func, expected):
# GH21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
result = series.agg(func)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"df, func, expected",
chain(
tm.get_cython_table_params(
DataFrame(),
[
("sum", Series(dtype="float64")),
("max", Series(dtype="float64")),
("min", Series(dtype="float64")),
("all", Series(dtype=bool)),
("any", Series(dtype=bool)),
("mean", Series(dtype="float64")),
("prod", Series(dtype="float64")),
("std", Series(dtype="float64")),
("var", Series(dtype="float64")),
("median", Series(dtype="float64")),
],
),
tm.get_cython_table_params(
DataFrame([[np.nan, 1], [1, 2]]),
[
("sum", Series([1.0, 3])),
("max", Series([1.0, 2])),
("min", Series([1.0, 1])),
("all", Series([True, True])),
("any", Series([True, True])),
("mean", Series([1, 1.5])),
("prod", Series([1.0, 2])),
("std", Series([np.nan, 0.707107])),
("var", Series([np.nan, 0.5])),
("median", Series([1, 1.5])),
],
),
),
)
def test_agg_cython_table_frame(df, func, expected, axis):
# GH 21224
# test reducing functions in
# pandas.core.base.SelectionMixin._cython_table
result = df.agg(func, axis=axis)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"df, func, expected",
chain(
tm.get_cython_table_params(
DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]
),
tm.get_cython_table_params(
DataFrame([[np.nan, 1], [1, 2]]),
[
("cumprod", DataFrame([[np.nan, 1], [1, 2]])),
("cumsum", DataFrame([[np.nan, 1], [1, 3]])),
],
),
),
)
def test_agg_cython_table_transform_frame(df, func, expected, axis):
# GH 21224
# test transforming functions in
# pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
if axis == "columns" or axis == 1:
# operating blockwise doesn't let us preserve dtypes
expected = expected.astype("float64")
result = df.agg(func, axis=axis)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("op", series_transform_kernels)
def test_transform_groupby_kernel_series(string_series, op):
# GH 35964
# TODO(2.0) Remove after pad/backfill deprecation enforced
op = maybe_normalize_deprecated_kernels(op)
args = [0.0] if op == "fillna" else []
ones = np.ones(string_series.shape[0])
expected = string_series.groupby(ones).transform(op, *args)
result = string_series.transform(op, 0, *args)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("op", frame_transform_kernels)
def test_transform_groupby_kernel_frame(
axis, float_frame, op, using_array_manager, request
):
# TODO(2.0) Remove after pad/backfill deprecation enforced
op = maybe_normalize_deprecated_kernels(op)
# GH 35964
if using_array_manager and op == "pct_change" and axis in (1, "columns"):
# TODO(ArrayManager) shift with axis=1
request.node.add_marker(
pytest.mark.xfail(
reason="shift axis=1 not yet implemented for ArrayManager"
)
)
args = [0.0] if op == "fillna" else []
if axis == 0 or axis == "index":
ones = np.ones(float_frame.shape[0])
else:
ones = np.ones(float_frame.shape[1])
expected = float_frame.groupby(ones, axis=axis).transform(op, *args)
result = float_frame.transform(op, axis, *args)
tm.assert_frame_equal(result, expected)
# same thing, but ensuring we have multiple blocks
assert "E" not in float_frame.columns
float_frame["E"] = float_frame["A"].copy()
assert len(float_frame._mgr.arrays) > 1
if axis == 0 or axis == "index":
ones = np.ones(float_frame.shape[0])
else:
ones = np.ones(float_frame.shape[1])
expected2 = float_frame.groupby(ones, axis=axis).transform(op, *args)
result2 = float_frame.transform(op, axis, *args)
tm.assert_frame_equal(result2, expected2)
@pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"])
def test_transform_method_name(method):
# GH 19760
df = DataFrame({"A": [-1, 2]})
result = df.transform(method)
expected = operator.methodcaller(method)(df)
tm.assert_frame_equal(result, expected)

View File

@@ -0,0 +1,155 @@
"""
Assertion helpers for arithmetic tests.
"""
import numpy as np
import pytest
from pandas import (
DataFrame,
Index,
Series,
array,
)
import pandas._testing as tm
from pandas.core.arrays import (
BooleanArray,
PandasArray,
)
def assert_cannot_add(left, right, msg="cannot add"):
"""
Helper to assert that left and right cannot be added.
Parameters
----------
left : object
right : object
msg : str, default "cannot add"
"""
with pytest.raises(TypeError, match=msg):
left + right
with pytest.raises(TypeError, match=msg):
right + left
def assert_invalid_addsub_type(left, right, msg=None):
"""
Helper to assert that left and right can be neither added nor subtracted.
Parameters
----------
left : object
right : object
msg : str or None, default None
"""
with pytest.raises(TypeError, match=msg):
left + right
with pytest.raises(TypeError, match=msg):
right + left
with pytest.raises(TypeError, match=msg):
left - right
with pytest.raises(TypeError, match=msg):
right - left
def get_upcast_box(left, right, is_cmp: bool = False):
"""
Get the box to use for 'expected' in an arithmetic or comparison operation.
Parameters
left : Any
right : Any
is_cmp : bool, default False
Whether the operation is a comparison method.
"""
if isinstance(left, DataFrame) or isinstance(right, DataFrame):
return DataFrame
if isinstance(left, Series) or isinstance(right, Series):
if is_cmp and isinstance(left, Index):
# Index does not defer for comparisons
return np.array
return Series
if isinstance(left, Index) or isinstance(right, Index):
if is_cmp:
return np.array
return Index
return tm.to_array
def assert_invalid_comparison(left, right, box):
"""
Assert that comparison operations with mismatched types behave correctly.
Parameters
----------
left : np.ndarray, ExtensionArray, Index, or Series
right : object
box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array}
"""
# Not for tznaive-tzaware comparison
# Note: not quite the same as how we do this for tm.box_expected
xbox = box if box not in [Index, array] else np.array
def xbox2(x):
# Eventually we'd like this to be tighter, but for now we'll
# just exclude PandasArray[bool]
if isinstance(x, PandasArray):
return x._ndarray
if isinstance(x, BooleanArray):
# NB: we are assuming no pd.NAs for now
return x.astype(bool)
return x
# rev_box: box to use for reversed comparisons
rev_box = xbox
if isinstance(right, Index) and isinstance(left, Series):
rev_box = np.array
result = xbox2(left == right)
expected = xbox(np.zeros(result.shape, dtype=np.bool_))
tm.assert_equal(result, expected)
result = xbox2(right == left)
tm.assert_equal(result, rev_box(expected))
result = xbox2(left != right)
tm.assert_equal(result, ~expected)
result = xbox2(right != left)
tm.assert_equal(result, rev_box(~expected))
msg = "|".join(
[
"Invalid comparison between",
"Cannot compare type",
"not supported between",
"invalid type promotion",
(
# GH#36706 npdev 1.20.0 2020-09-28
r"The DTypes <class 'numpy.dtype\[datetime64\]'> and "
r"<class 'numpy.dtype\[int64\]'> do not have a common DType. "
"For example they cannot be stored in a single array unless the "
"dtype is `object`."
),
]
)
with pytest.raises(TypeError, match=msg):
left < right
with pytest.raises(TypeError, match=msg):
left <= right
with pytest.raises(TypeError, match=msg):
left > right
with pytest.raises(TypeError, match=msg):
left >= right
with pytest.raises(TypeError, match=msg):
right < left
with pytest.raises(TypeError, match=msg):
right <= left
with pytest.raises(TypeError, match=msg):
right > left
with pytest.raises(TypeError, match=msg):
right >= left

View File

@@ -0,0 +1,232 @@
import numpy as np
import pytest
import pandas as pd
from pandas import RangeIndex
import pandas._testing as tm
from pandas.core.api import (
Float64Index,
Int64Index,
UInt64Index,
)
from pandas.core.computation import expressions as expr
@pytest.fixture(
autouse=True, scope="module", params=[0, 1000000], ids=["numexpr", "python"]
)
def switch_numexpr_min_elements(request):
_MIN_ELEMENTS = expr._MIN_ELEMENTS
expr._MIN_ELEMENTS = request.param
yield request.param
expr._MIN_ELEMENTS = _MIN_ELEMENTS
# ------------------------------------------------------------------
# doctest with +SKIP for one fixture fails during setup with
# 'DoctestItem' object has no attribute 'callspec'
# due to switch_numexpr_min_elements fixture
@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def one(request):
"""
Several variants of integer value 1. The zero-dim integer array
behaves like an integer.
This fixture can be used to check that datetimelike indexes handle
addition and subtraction of integers and zero-dimensional arrays
of integers.
Examples
--------
dti = pd.date_range('2016-01-01', periods=2, freq='H')
dti
DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'],
dtype='datetime64[ns]', freq='H')
dti + one
DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'],
dtype='datetime64[ns]', freq='H')
"""
return request.param
zeros = [
box_cls([0] * 5, dtype=dtype)
for box_cls in [pd.Index, np.array, pd.array]
for dtype in [np.int64, np.uint64, np.float64]
]
zeros.extend(
[box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [pd.Index, np.array]]
)
zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([np.array(-0.0, dtype=np.float64)])
zeros.extend([0, 0.0, -0.0])
# doctest with +SKIP for zero fixture fails during setup with
# 'DoctestItem' object has no attribute 'callspec'
# due to switch_numexpr_min_elements fixture
@pytest.fixture(params=zeros)
def zero(request):
"""
Several types of scalar zeros and length 5 vectors of zeros.
This fixture can be used to check that numeric-dtype indexes handle
division by any zero numeric-dtype.
Uses vector of length 5 for broadcasting with `numeric_idx` fixture,
which creates numeric-dtype vectors also of length 5.
Examples
--------
arr = RangeIndex(5)
arr / zeros
Float64Index([nan, inf, inf, inf, inf], dtype='float64')
"""
return request.param
# ------------------------------------------------------------------
# Vector Fixtures
@pytest.fixture(
params=[
Float64Index(np.arange(5, dtype="float64")),
Int64Index(np.arange(5, dtype="int64")),
UInt64Index(np.arange(5, dtype="uint64")),
RangeIndex(5),
],
ids=lambda x: type(x).__name__,
)
def numeric_idx(request):
"""
Several types of numeric-dtypes Index objects
"""
return request.param
# ------------------------------------------------------------------
# Scalar Fixtures
@pytest.fixture(
params=[
pd.Timedelta("10m7s").to_pytimedelta(),
pd.Timedelta("10m7s"),
pd.Timedelta("10m7s").to_timedelta64(),
],
ids=lambda x: type(x).__name__,
)
def scalar_td(request):
"""
Several variants of Timedelta scalars representing 10 minutes and 7 seconds.
"""
return request.param
@pytest.fixture(
params=[
pd.offsets.Day(3),
pd.offsets.Hour(72),
pd.Timedelta(days=3).to_pytimedelta(),
pd.Timedelta("72:00:00"),
np.timedelta64(3, "D"),
np.timedelta64(72, "h"),
],
ids=lambda x: type(x).__name__,
)
def three_days(request):
"""
Several timedelta-like and DateOffset objects that each represent
a 3-day timedelta
"""
return request.param
@pytest.fixture(
params=[
pd.offsets.Hour(2),
pd.offsets.Minute(120),
pd.Timedelta(hours=2).to_pytimedelta(),
pd.Timedelta(seconds=2 * 3600),
np.timedelta64(2, "h"),
np.timedelta64(120, "m"),
],
ids=lambda x: type(x).__name__,
)
def two_hours(request):
"""
Several timedelta-like and DateOffset objects that each represent
a 2-hour timedelta
"""
return request.param
_common_mismatch = [
pd.offsets.YearBegin(2),
pd.offsets.MonthBegin(1),
pd.offsets.Minute(),
]
@pytest.fixture(
params=[
pd.Timedelta(minutes=30).to_pytimedelta(),
np.timedelta64(30, "s"),
pd.Timedelta(seconds=30),
]
+ _common_mismatch
)
def not_hourly(request):
"""
Several timedelta-like and DateOffset instances that are _not_
compatible with Hourly frequencies.
"""
return request.param
@pytest.fixture(
params=[
np.timedelta64(4, "h"),
pd.Timedelta(hours=23).to_pytimedelta(),
pd.Timedelta("23:00:00"),
]
+ _common_mismatch
)
def not_daily(request):
"""
Several timedelta-like and DateOffset instances that are _not_
compatible with Daily frequencies.
"""
return request.param
@pytest.fixture(
params=[
np.timedelta64(365, "D"),
pd.Timedelta(days=365).to_pytimedelta(),
pd.Timedelta(days=365),
]
+ _common_mismatch
)
def mismatched_freq(request):
"""
Several timedelta-like and DateOffset instances that are _not_
compatible with Monthly or Annual frequencies.
"""
return request.param
# ------------------------------------------------------------------
@pytest.fixture(
params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__
)
def box_1d_array(request):
"""
Fixture to test behavior for Index, Series, tm.to_array, numpy Array and list
classes
"""
return request.param

View File

@@ -0,0 +1,39 @@
import operator
import numpy as np
import pytest
import pandas._testing as tm
from pandas.core.ops.array_ops import (
comparison_op,
na_logical_op,
)
def test_na_logical_op_2d():
left = np.arange(8).reshape(4, 2)
right = left.astype(object)
right[0, 0] = np.nan
# Check that we fall back to the vec_binop branch
with pytest.raises(TypeError, match="unsupported operand type"):
operator.or_(left, right)
result = na_logical_op(left, right, operator.or_)
expected = right
tm.assert_numpy_array_equal(result, expected)
def test_object_comparison_2d():
left = np.arange(9).reshape(3, 3).astype(object)
right = left.T
result = comparison_op(left, right, operator.eq)
expected = np.eye(3).astype(bool)
tm.assert_numpy_array_equal(result, expected)
# Ensure that cython doesn't raise on non-writeable arg, which
# we can get from np.broadcast_to
right.flags.writeable = False
result = comparison_op(left, right, operator.ne)
tm.assert_numpy_array_equal(result, ~expected)

View File

@@ -0,0 +1,25 @@
import numpy as np
from pandas import (
Categorical,
Series,
)
import pandas._testing as tm
class TestCategoricalComparisons:
def test_categorical_nan_equality(self):
cat = Series(Categorical(["a", "b", "c", np.nan]))
expected = Series([True, True, True, False])
result = cat == cat
tm.assert_series_equal(result, expected)
def test_categorical_tuple_equality(self):
# GH 18050
ser = Series([(0, 0), (0, 1), (0, 0), (1, 0), (1, 1)])
expected = Series([True, False, True, False, False])
result = ser == (0, 0)
tm.assert_series_equal(result, expected)
result = ser.astype("category") == (0, 0)
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,316 @@
import operator
import numpy as np
import pytest
from pandas.core.dtypes.common import is_list_like
import pandas as pd
from pandas import (
Categorical,
Index,
Interval,
IntervalIndex,
Period,
Series,
Timedelta,
Timestamp,
date_range,
period_range,
timedelta_range,
)
import pandas._testing as tm
from pandas.core.arrays import (
BooleanArray,
IntervalArray,
)
from pandas.tests.arithmetic.common import get_upcast_box
@pytest.fixture(
params=[
(Index([0, 2, 4, 4]), Index([1, 3, 5, 8])),
(Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])),
(
timedelta_range("0 days", periods=3).insert(3, pd.NaT),
timedelta_range("1 day", periods=3).insert(3, pd.NaT),
),
(
date_range("20170101", periods=3).insert(3, pd.NaT),
date_range("20170102", periods=3).insert(3, pd.NaT),
),
(
date_range("20170101", periods=3, tz="US/Eastern").insert(3, pd.NaT),
date_range("20170102", periods=3, tz="US/Eastern").insert(3, pd.NaT),
),
],
ids=lambda x: str(x[0].dtype),
)
def left_right_dtypes(request):
"""
Fixture for building an IntervalArray from various dtypes
"""
return request.param
@pytest.fixture
def interval_array(left_right_dtypes):
"""
Fixture to generate an IntervalArray of various dtypes containing NA if possible
"""
left, right = left_right_dtypes
return IntervalArray.from_arrays(left, right)
def create_categorical_intervals(left, right, closed="right"):
return Categorical(IntervalIndex.from_arrays(left, right, closed))
def create_series_intervals(left, right, closed="right"):
return Series(IntervalArray.from_arrays(left, right, closed))
def create_series_categorical_intervals(left, right, closed="right"):
return Series(Categorical(IntervalIndex.from_arrays(left, right, closed)))
class TestComparison:
@pytest.fixture(params=[operator.eq, operator.ne])
def op(self, request):
return request.param
@pytest.fixture(
params=[
IntervalArray.from_arrays,
IntervalIndex.from_arrays,
create_categorical_intervals,
create_series_intervals,
create_series_categorical_intervals,
],
ids=[
"IntervalArray",
"IntervalIndex",
"Categorical[Interval]",
"Series[Interval]",
"Series[Categorical[Interval]]",
],
)
def interval_constructor(self, request):
"""
Fixture for all pandas native interval constructors.
To be used as the LHS of IntervalArray comparisons.
"""
return request.param
def elementwise_comparison(self, op, interval_array, other):
"""
Helper that performs elementwise comparisons between `array` and `other`
"""
other = other if is_list_like(other) else [other] * len(interval_array)
expected = np.array([op(x, y) for x, y in zip(interval_array, other)])
if isinstance(other, Series):
return Series(expected, index=other.index)
return expected
def test_compare_scalar_interval(self, op, interval_array):
# matches first interval
other = interval_array[0]
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
# matches on a single endpoint but not both
other = Interval(interval_array.left[0], interval_array.right[1])
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed):
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
other = Interval(0, 1, closed=other_closed)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_scalar_na(
self, op, interval_array, nulls_fixture, box_with_array, request
):
box = box_with_array
if box is pd.DataFrame:
if interval_array.dtype.subtype.kind not in "iuf":
mark = pytest.mark.xfail(
reason="raises on DataFrame.transpose (would be fixed by EA2D)"
)
request.node.add_marker(mark)
obj = tm.box_expected(interval_array, box)
result = op(obj, nulls_fixture)
if nulls_fixture is pd.NA:
# GH#31882
exp = np.ones(interval_array.shape, dtype=bool)
expected = BooleanArray(exp, exp)
else:
expected = self.elementwise_comparison(op, interval_array, nulls_fixture)
if not (box is Index and nulls_fixture is pd.NA):
# don't cast expected from BooleanArray to ndarray[object]
xbox = get_upcast_box(obj, nulls_fixture, True)
expected = tm.box_expected(expected, xbox)
tm.assert_equal(result, expected)
rev = op(nulls_fixture, obj)
tm.assert_equal(rev, expected)
@pytest.mark.parametrize(
"other",
[
0,
1.0,
True,
"foo",
Timestamp("2017-01-01"),
Timestamp("2017-01-01", tz="US/Eastern"),
Timedelta("0 days"),
Period("2017-01-01", "D"),
],
)
def test_compare_scalar_other(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_list_like_interval(self, op, interval_array, interval_constructor):
# same endpoints
other = interval_constructor(interval_array.left, interval_array.right)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
# different endpoints
other = interval_constructor(
interval_array.left[::-1], interval_array.right[::-1]
)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
# all nan endpoints
other = interval_constructor([np.nan] * 4, [np.nan] * 4)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
def test_compare_list_like_interval_mixed_closed(
self, op, interval_constructor, closed, other_closed
):
interval_array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed)
other = interval_constructor(range(2), range(1, 3), closed=other_closed)
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"other",
[
(
Interval(0, 1),
Interval(Timedelta("1 day"), Timedelta("2 days")),
Interval(4, 5, "both"),
Interval(10, 20, "neither"),
),
(0, 1.5, Timestamp("20170103"), np.nan),
(
Timestamp("20170102", tz="US/Eastern"),
Timedelta("2 days"),
"baz",
pd.NaT,
),
],
)
def test_compare_list_like_object(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
def test_compare_list_like_nan(self, op, interval_array, nulls_fixture):
other = [nulls_fixture] * 4
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"other",
[
np.arange(4, dtype="int64"),
np.arange(4, dtype="float64"),
date_range("2017-01-01", periods=4),
date_range("2017-01-01", periods=4, tz="US/Eastern"),
timedelta_range("0 days", periods=4),
period_range("2017-01-01", periods=4, freq="D"),
Categorical(list("abab")),
Categorical(date_range("2017-01-01", periods=4)),
pd.array(list("abcd")),
pd.array(["foo", 3.14, None, object()], dtype=object),
],
ids=lambda x: str(x.dtype),
)
def test_compare_list_like_other(self, op, interval_array, other):
result = op(interval_array, other)
expected = self.elementwise_comparison(op, interval_array, other)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("length", [1, 3, 5])
@pytest.mark.parametrize("other_constructor", [IntervalArray, list])
def test_compare_length_mismatch_errors(self, op, other_constructor, length):
interval_array = IntervalArray.from_arrays(range(4), range(1, 5))
other = other_constructor([Interval(0, 1)] * length)
with pytest.raises(ValueError, match="Lengths must match to compare"):
op(interval_array, other)
@pytest.mark.parametrize(
"constructor, expected_type, assert_func",
[
(IntervalIndex, np.array, tm.assert_numpy_array_equal),
(Series, Series, tm.assert_series_equal),
],
)
def test_index_series_compat(self, op, constructor, expected_type, assert_func):
# IntervalIndex/Series that rely on IntervalArray for comparisons
breaks = range(4)
index = constructor(IntervalIndex.from_breaks(breaks))
# scalar comparisons
other = index[0]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
other = breaks[0]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
# list-like comparisons
other = IntervalArray.from_breaks(breaks)
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
other = [index[0], breaks[0], "foo"]
result = op(index, other)
expected = expected_type(self.elementwise_comparison(op, index, other))
assert_func(result, expected)
@pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None])
def test_comparison_operations(self, scalars):
# GH #28981
expected = Series([False, False])
s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval")
result = s == scalars
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,379 @@
# Arithmetic tests for DataFrame/Series/Index/Array classes that should
# behave identically.
# Specifically for object dtype
import datetime
from decimal import Decimal
import operator
import numpy as np
import pytest
import pandas as pd
from pandas import (
Series,
Timestamp,
)
import pandas._testing as tm
from pandas.core import ops
# ------------------------------------------------------------------
# Comparisons
class TestObjectComparisons:
def test_comparison_object_numeric_nas(self, comparison_op):
ser = Series(np.random.randn(10), dtype=object)
shifted = ser.shift(2)
func = comparison_op
result = func(ser, shifted)
expected = func(ser.astype(float), shifted.astype(float))
tm.assert_series_equal(result, expected)
def test_object_comparisons(self):
ser = Series(["a", "b", np.nan, "c", "a"])
result = ser == "a"
expected = Series([True, False, False, False, True])
tm.assert_series_equal(result, expected)
result = ser < "a"
expected = Series([False, False, False, False, False])
tm.assert_series_equal(result, expected)
result = ser != "a"
expected = -(ser == "a")
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("dtype", [None, object])
def test_more_na_comparisons(self, dtype):
left = Series(["a", np.nan, "c"], dtype=dtype)
right = Series(["a", np.nan, "d"], dtype=dtype)
result = left == right
expected = Series([True, False, False])
tm.assert_series_equal(result, expected)
result = left != right
expected = Series([False, True, True])
tm.assert_series_equal(result, expected)
result = left == np.nan
expected = Series([False, False, False])
tm.assert_series_equal(result, expected)
result = left != np.nan
expected = Series([True, True, True])
tm.assert_series_equal(result, expected)
# ------------------------------------------------------------------
# Arithmetic
class TestArithmetic:
# TODO: parametrize
def test_pow_ops_object(self):
# GH#22922
# pow is weird with masking & 1, so testing here
a = Series([1, np.nan, 1, np.nan], dtype=object)
b = Series([1, np.nan, np.nan, 1], dtype=object)
result = a**b
expected = Series(a.values**b.values, dtype=object)
tm.assert_series_equal(result, expected)
result = b**a
expected = Series(b.values**a.values, dtype=object)
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize("op", [operator.add, ops.radd])
@pytest.mark.parametrize("other", ["category", "Int64"])
def test_add_extension_scalar(self, other, box_with_array, op):
# GH#22378
# Check that scalars satisfying is_extension_array_dtype(obj)
# do not incorrectly try to dispatch to an ExtensionArray operation
arr = Series(["a", "b", "c"])
expected = Series([op(x, other) for x in arr])
arr = tm.box_expected(arr, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = op(arr, other)
tm.assert_equal(result, expected)
def test_objarr_add_str(self, box_with_array):
ser = Series(["x", np.nan, "x"])
expected = Series(["xa", np.nan, "xa"])
ser = tm.box_expected(ser, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = ser + "a"
tm.assert_equal(result, expected)
def test_objarr_radd_str(self, box_with_array):
ser = Series(["x", np.nan, "x"])
expected = Series(["ax", np.nan, "ax"])
ser = tm.box_expected(ser, box_with_array)
expected = tm.box_expected(expected, box_with_array)
result = "a" + ser
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"data",
[
[1, 2, 3],
[1.1, 2.2, 3.3],
[Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
["x", "y", 1],
],
)
@pytest.mark.parametrize("dtype", [None, object])
def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
ser = Series(data, dtype=dtype)
ser = tm.box_expected(ser, box_with_array)
msg = "|".join(
[
"can only concatenate str",
"did not contain a loop with signature matching types",
"unsupported operand type",
"must be str",
]
)
with pytest.raises(TypeError, match=msg):
"foo_" + ser
@pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
def test_objarr_add_invalid(self, op, box_with_array):
# invalid ops
box = box_with_array
obj_ser = tm.makeObjectSeries()
obj_ser.name = "objects"
obj_ser = tm.box_expected(obj_ser, box)
msg = "|".join(
["can only concatenate str", "unsupported operand type", "must be str"]
)
with pytest.raises(Exception, match=msg):
op(obj_ser, 1)
with pytest.raises(Exception, match=msg):
op(obj_ser, np.array(1, dtype=np.int64))
# TODO: Moved from tests.series.test_operators; needs cleanup
def test_operators_na_handling(self):
ser = Series(["foo", "bar", "baz", np.nan])
result = "prefix_" + ser
expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
tm.assert_series_equal(result, expected)
result = ser + "_suffix"
expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
tm.assert_series_equal(result, expected)
# TODO: parametrize over box
@pytest.mark.parametrize("dtype", [None, object])
def test_series_with_dtype_radd_timedelta(self, dtype):
# note this test is _not_ aimed at timedelta64-dtyped Series
ser = Series(
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
dtype=dtype,
)
expected = Series(
[pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")]
)
result = pd.Timedelta("3 days") + ser
tm.assert_series_equal(result, expected)
result = ser + pd.Timedelta("3 days")
tm.assert_series_equal(result, expected)
# TODO: cleanup & parametrize over box
def test_mixed_timezone_series_ops_object(self):
# GH#13043
ser = Series(
[
Timestamp("2015-01-01", tz="US/Eastern"),
Timestamp("2015-01-01", tz="Asia/Tokyo"),
],
name="xxx",
)
assert ser.dtype == object
exp = Series(
[
Timestamp("2015-01-02", tz="US/Eastern"),
Timestamp("2015-01-02", tz="Asia/Tokyo"),
],
name="xxx",
)
tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
# object series & object series
ser2 = Series(
[
Timestamp("2015-01-03", tz="US/Eastern"),
Timestamp("2015-01-05", tz="Asia/Tokyo"),
],
name="xxx",
)
assert ser2.dtype == object
exp = Series([pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx")
tm.assert_series_equal(ser2 - ser, exp)
tm.assert_series_equal(ser - ser2, -exp)
ser = Series(
[pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
name="xxx",
dtype=object,
)
assert ser.dtype == object
exp = Series([pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], name="xxx")
tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
# TODO: cleanup & parametrize over box
def test_iadd_preserves_name(self):
# GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
ser = Series([1, 2, 3])
ser.index.name = "foo"
ser.index += 1
assert ser.index.name == "foo"
ser.index -= 1
assert ser.index.name == "foo"
def test_add_string(self):
# from bug report
index = pd.Index(["a", "b", "c"])
index2 = index + "foo"
assert "a" not in index2
assert "afoo" in index2
def test_iadd_string(self):
index = pd.Index(["a", "b", "c"])
# doesn't fail test unless there is a check before `+=`
assert "a" in index
index += "_x"
assert "a_x" in index
def test_add(self):
index = tm.makeStringIndex(100)
expected = pd.Index(index.values * 2)
tm.assert_index_equal(index + index, expected)
tm.assert_index_equal(index + index.tolist(), expected)
tm.assert_index_equal(index.tolist() + index, expected)
# test add and radd
index = pd.Index(list("abc"))
expected = pd.Index(["a1", "b1", "c1"])
tm.assert_index_equal(index + "1", expected)
expected = pd.Index(["1a", "1b", "1c"])
tm.assert_index_equal("1" + index, expected)
def test_sub_fail(self):
index = tm.makeStringIndex(100)
msg = "unsupported operand type|Cannot broadcast"
with pytest.raises(TypeError, match=msg):
index - "a"
with pytest.raises(TypeError, match=msg):
index - index
with pytest.raises(TypeError, match=msg):
index - index.tolist()
with pytest.raises(TypeError, match=msg):
index.tolist() - index
def test_sub_object(self):
# GH#19369
index = pd.Index([Decimal(1), Decimal(2)])
expected = pd.Index([Decimal(0), Decimal(1)])
result = index - Decimal(1)
tm.assert_index_equal(result, expected)
result = index - pd.Index([Decimal(1), Decimal(1)])
tm.assert_index_equal(result, expected)
msg = "unsupported operand type"
with pytest.raises(TypeError, match=msg):
index - "foo"
with pytest.raises(TypeError, match=msg):
index - np.array([2, "foo"], dtype=object)
def test_rsub_object(self, fixed_now_ts):
# GH#19369
index = pd.Index([Decimal(1), Decimal(2)])
expected = pd.Index([Decimal(1), Decimal(0)])
result = Decimal(2) - index
tm.assert_index_equal(result, expected)
result = np.array([Decimal(2), Decimal(2)]) - index
tm.assert_index_equal(result, expected)
msg = "unsupported operand type"
with pytest.raises(TypeError, match=msg):
"foo" - index
with pytest.raises(TypeError, match=msg):
np.array([True, fixed_now_ts]) - index
class MyIndex(pd.Index):
# Simple index subclass that tracks ops calls.
_calls: int
@classmethod
def _simple_new(cls, values, name=None, dtype=None):
result = object.__new__(cls)
result._data = values
result._name = name
result._calls = 0
result._reset_identity()
return result
def __add__(self, other):
self._calls += 1
return self._simple_new(self._data)
def __radd__(self, other):
return self.__add__(other)
@pytest.mark.parametrize(
"other",
[
[datetime.timedelta(1), datetime.timedelta(2)],
[datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)],
[pd.Period("2000"), pd.Period("2001")],
["a", "b"],
],
ids=["timedelta", "datetime", "period", "object"],
)
def test_index_ops_defer_to_unknown_subclasses(other):
# https://github.com/pandas-dev/pandas/issues/31109
values = np.array(
[datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object
)
a = MyIndex._simple_new(values)
other = pd.Index(other)
result = other + a
assert isinstance(result, MyIndex)
assert a._calls == 1

View File

@@ -0,0 +1,121 @@
import operator
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.arrays import FloatingArray
@pytest.fixture
def data():
return pd.array(
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
dtype="boolean",
)
@pytest.fixture
def left_array():
return pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
@pytest.fixture
def right_array():
return pd.array([True, False, None] * 3, dtype="boolean")
# Basic test for the arithmetic array ops
# -----------------------------------------------------------------------------
@pytest.mark.parametrize(
"opname, exp",
[
("add", [True, True, None, True, False, None, None, None, None]),
("mul", [True, False, None, False, False, None, None, None, None]),
],
ids=["add", "mul"],
)
def test_add_mul(left_array, right_array, opname, exp):
op = getattr(operator, opname)
result = op(left_array, right_array)
expected = pd.array(exp, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_sub(left_array, right_array):
msg = (
r"numpy boolean subtract, the `-` operator, is (?:deprecated|not supported), "
r"use the bitwise_xor, the `\^` operator, or the logical_xor function instead\."
)
with pytest.raises(TypeError, match=msg):
left_array - right_array
def test_div(left_array, right_array):
result = left_array / right_array
expected = FloatingArray(
np.array(
[1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
dtype="float64",
),
np.array([False, False, True, False, False, True, True, True, True]),
)
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"opname",
[
"floordiv",
"mod",
pytest.param(
"pow", marks=pytest.mark.xfail(reason="TODO follow int8 behaviour? GH34686")
),
],
)
def test_op_int8(left_array, right_array, opname):
op = getattr(operator, opname)
result = op(left_array, right_array)
expected = op(left_array.astype("Int8"), right_array.astype("Int8"))
tm.assert_extension_array_equal(result, expected)
# Test generic characteristics / errors
# -----------------------------------------------------------------------------
def test_error_invalid_values(data, all_arithmetic_operators):
# invalid ops
op = all_arithmetic_operators
s = pd.Series(data)
ops = getattr(s, op)
# invalid scalars
msg = (
"did not contain a loop with signature matching types|"
"BooleanArray cannot perform the operation|"
"not supported for the input types, and the inputs could not be safely coerced "
"to any supported types according to the casting rule ''safe''"
)
with pytest.raises(TypeError, match=msg):
ops("foo")
msg = (
r"unsupported operand type\(s\) for|"
"Concatenation operation is not implemented for NumPy arrays"
)
with pytest.raises(TypeError, match=msg):
ops(pd.Timestamp("20180101"))
# invalid array-likes
if op not in ("__mul__", "__rmul__"):
# TODO(extension) numpy's mul with object array sees booleans as numbers
msg = (
r"unsupported operand type\(s\) for|can only concatenate str|"
"not all arguments converted during string formatting"
)
with pytest.raises(TypeError, match=msg):
ops(pd.Series("foo", index=s.index))

View File

@@ -0,0 +1,53 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
def test_astype():
# with missing values
arr = pd.array([True, False, None], dtype="boolean")
with pytest.raises(ValueError, match="cannot convert NA to integer"):
arr.astype("int64")
with pytest.raises(ValueError, match="cannot convert float NaN to"):
arr.astype("bool")
result = arr.astype("float64")
expected = np.array([1, 0, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
result = arr.astype("str")
expected = np.array(["True", "False", "<NA>"], dtype="<U5")
tm.assert_numpy_array_equal(result, expected)
# no missing values
arr = pd.array([True, False, True], dtype="boolean")
result = arr.astype("int64")
expected = np.array([1, 0, 1], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
result = arr.astype("bool")
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
def test_astype_to_boolean_array():
# astype to BooleanArray
arr = pd.array([True, False, None], dtype="boolean")
result = arr.astype("boolean")
tm.assert_extension_array_equal(result, arr)
result = arr.astype(pd.BooleanDtype())
tm.assert_extension_array_equal(result, arr)
def test_astype_to_integer_array():
# astype to IntegerArray
arr = pd.array([True, False, None], dtype="boolean")
result = arr.astype("Int64")
expected = pd.array([1, 0, None], dtype="Int64")
tm.assert_extension_array_equal(result, expected)

View File

@@ -0,0 +1,58 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.arrays import BooleanArray
from pandas.tests.arrays.masked_shared import ComparisonOps
@pytest.fixture
def data():
return pd.array(
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
dtype="boolean",
)
@pytest.fixture
def dtype():
return pd.BooleanDtype()
class TestComparisonOps(ComparisonOps):
def test_compare_scalar(self, data, comparison_op):
self._compare_other(data, comparison_op, True)
def test_compare_array(self, data, comparison_op):
other = pd.array([True] * len(data), dtype="boolean")
self._compare_other(data, comparison_op, other)
other = np.array([True] * len(data))
self._compare_other(data, comparison_op, other)
other = pd.Series([True] * len(data))
self._compare_other(data, comparison_op, other)
@pytest.mark.parametrize("other", [True, False, pd.NA])
def test_scalar(self, other, comparison_op, dtype):
ComparisonOps.test_scalar(self, other, comparison_op, dtype)
def test_array(self, comparison_op):
op = comparison_op
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")
result = op(a, b)
values = op(a._data, b._data)
mask = a._mask | b._mask
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
result[0] = None
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)

View File

@@ -0,0 +1,323 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.arrays import BooleanArray
from pandas.core.arrays.boolean import coerce_to_array
def test_boolean_array_constructor():
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(values, mask)
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
with pytest.raises(TypeError, match="values should be boolean numpy array"):
BooleanArray(values.tolist(), mask)
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
BooleanArray(values, mask.tolist())
with pytest.raises(TypeError, match="values should be boolean numpy array"):
BooleanArray(values.astype(int), mask)
with pytest.raises(TypeError, match="mask should be boolean numpy array"):
BooleanArray(values, None)
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
BooleanArray(values.reshape(1, -1), mask)
with pytest.raises(ValueError, match="values.shape must match mask.shape"):
BooleanArray(values, mask.reshape(1, -1))
def test_boolean_array_constructor_copy():
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(values, mask)
assert result._data is values
assert result._mask is mask
result = BooleanArray(values, mask, copy=True)
assert result._data is not values
assert result._mask is not mask
def test_to_boolean_array():
expected = BooleanArray(
np.array([True, False, True]), np.array([False, False, False])
)
result = pd.array([True, False, True], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, True]), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
expected = BooleanArray(
np.array([True, False, True]), np.array([False, False, True])
)
result = pd.array([True, False, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_all_none():
expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
result = pd.array([None, None, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"a, b",
[
([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
([True, np.nan], [True, None]),
([True, pd.NA], [True, None]),
([np.nan, np.nan], [None, None]),
(np.array([np.nan, np.nan], dtype=float), [None, None]),
],
)
def test_to_boolean_array_missing_indicators(a, b):
result = pd.array(a, dtype="boolean")
expected = pd.array(b, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize(
"values",
[
["foo", "bar"],
["1", "2"],
# "foo",
[1, 2],
[1.0, 2.0],
pd.date_range("20130101", periods=2),
np.array(["foo"]),
np.array([1, 2]),
np.array([1.0, 2.0]),
[np.nan, {"a": 1}],
],
)
def test_to_boolean_array_error(values):
# error in converting existing arrays to BooleanArray
msg = "Need to pass bool-like value"
with pytest.raises(TypeError, match=msg):
pd.array(values, dtype="boolean")
def test_to_boolean_array_from_integer_array():
result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_from_float_array():
result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_integer_like():
# integers of 0's and 1's
result = pd.array([1, 0, 1, 0], dtype="boolean")
expected = pd.array([True, False, True, False], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
# with missing values
result = pd.array([1, 0, 1, None], dtype="boolean")
expected = pd.array([True, False, True, None], dtype="boolean")
tm.assert_extension_array_equal(result, expected)
def test_coerce_to_array():
# TODO this is currently not public API
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(*coerce_to_array(values, mask=mask))
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)
assert result._data is values
assert result._mask is mask
result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
expected = BooleanArray(values, mask)
tm.assert_extension_array_equal(result, expected)
assert result._data is not values
assert result._mask is not mask
# mixed missing from values and mask
values = [True, False, None, False]
mask = np.array([False, False, False, True], dtype="bool")
result = BooleanArray(*coerce_to_array(values, mask=mask))
expected = BooleanArray(
np.array([True, False, True, True]), np.array([False, False, True, True])
)
tm.assert_extension_array_equal(result, expected)
result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
tm.assert_extension_array_equal(result, expected)
result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
tm.assert_extension_array_equal(result, expected)
# raise errors for wrong dimension
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
coerce_to_array(values.reshape(1, -1))
with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
coerce_to_array(values, mask=mask.reshape(1, -1))
def test_coerce_to_array_from_boolean_array():
# passing BooleanArray to coerce_to_array
values = np.array([True, False, True, False], dtype="bool")
mask = np.array([False, False, False, True], dtype="bool")
arr = BooleanArray(values, mask)
result = BooleanArray(*coerce_to_array(arr))
tm.assert_extension_array_equal(result, arr)
# no copy
assert result._data is arr._data
assert result._mask is arr._mask
result = BooleanArray(*coerce_to_array(arr), copy=True)
tm.assert_extension_array_equal(result, arr)
assert result._data is not arr._data
assert result._mask is not arr._mask
with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
coerce_to_array(arr, mask=mask)
def test_coerce_to_numpy_array():
# with missing values -> object dtype
arr = pd.array([True, False, None], dtype="boolean")
result = np.array(arr)
expected = np.array([True, False, pd.NA], dtype="object")
tm.assert_numpy_array_equal(result, expected)
# also with no missing values -> object dtype
arr = pd.array([True, False, True], dtype="boolean")
result = np.array(arr)
expected = np.array([True, False, True], dtype="object")
tm.assert_numpy_array_equal(result, expected)
# force bool dtype
result = np.array(arr, dtype="bool")
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
# with missing values will raise error
arr = pd.array([True, False, None], dtype="boolean")
msg = (
"cannot convert to 'bool'-dtype NumPy array with missing values. "
"Specify an appropriate 'na_value' for this dtype."
)
with pytest.raises(ValueError, match=msg):
np.array(arr, dtype="bool")
def test_to_boolean_array_from_strings():
result = BooleanArray._from_sequence_of_strings(
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object)
)
expected = BooleanArray(
np.array([True, False, True, True, False, False, False]),
np.array([False, False, False, False, False, False, True]),
)
tm.assert_extension_array_equal(result, expected)
def test_to_boolean_array_from_strings_invalid_string():
with pytest.raises(ValueError, match="cannot be cast"):
BooleanArray._from_sequence_of_strings(["donkey"])
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
def test_to_numpy(box):
con = pd.Series if box else pd.array
# default (with or without missing values) -> object dtype
arr = con([True, False, True], dtype="boolean")
result = arr.to_numpy()
expected = np.array([True, False, True], dtype="object")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy()
expected = np.array([True, False, pd.NA], dtype="object")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype="str")
expected = np.array([True, False, pd.NA], dtype="<U5")
tm.assert_numpy_array_equal(result, expected)
# no missing values -> can convert to bool, otherwise raises
arr = con([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype="bool")
expected = np.array([True, False, True], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
arr = con([True, False, None], dtype="boolean")
with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
result = arr.to_numpy(dtype="bool")
# specify dtype and na_value
arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype=object, na_value=None)
expected = np.array([True, False, None], dtype="object")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype=bool, na_value=False)
expected = np.array([True, False, False], dtype="bool")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype="int64", na_value=-99)
expected = np.array([1, 0, -99], dtype="int64")
tm.assert_numpy_array_equal(result, expected)
result = arr.to_numpy(dtype="float64", na_value=np.nan)
expected = np.array([1, 0, np.nan], dtype="float64")
tm.assert_numpy_array_equal(result, expected)
# converting to int or float without specifying na_value raises
with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
arr.to_numpy(dtype="int64")
with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
arr.to_numpy(dtype="float64")
def test_to_numpy_copy():
# to_numpy can be zero-copy if no missing values
arr = pd.array([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype=bool)
result[0] = False
tm.assert_extension_array_equal(
arr, pd.array([False, False, True], dtype="boolean")
)
arr = pd.array([True, False, True], dtype="boolean")
result = arr.to_numpy(dtype=bool, copy=True)
result[0] = False
tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))

View File

@@ -0,0 +1,126 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize(
"ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor]
)
def test_ufuncs_binary(ufunc):
# two BooleanArrays
a = pd.array([True, False, None], dtype="boolean")
result = ufunc(a, a)
expected = pd.array(ufunc(a._data, a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
s = pd.Series(a)
result = ufunc(s, a)
expected = pd.Series(ufunc(a._data, a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_series_equal(result, expected)
# Boolean with numpy array
arr = np.array([True, True, False])
result = ufunc(a, arr)
expected = pd.array(ufunc(a._data, arr), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
result = ufunc(arr, a)
expected = pd.array(ufunc(arr, a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
# BooleanArray with scalar
result = ufunc(a, True)
expected = pd.array(ufunc(a._data, True), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
result = ufunc(True, a)
expected = pd.array(ufunc(True, a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
# not handled types
msg = r"operand type\(s\) all returned NotImplemented from __array_ufunc__"
with pytest.raises(TypeError, match=msg):
ufunc(a, "test")
@pytest.mark.parametrize("ufunc", [np.logical_not])
def test_ufuncs_unary(ufunc):
a = pd.array([True, False, None], dtype="boolean")
result = ufunc(a)
expected = pd.array(ufunc(a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_extension_array_equal(result, expected)
ser = pd.Series(a)
result = ufunc(ser)
expected = pd.Series(ufunc(a._data), dtype="boolean")
expected[a._mask] = np.nan
tm.assert_series_equal(result, expected)
def test_ufunc_numeric():
# np.sqrt on np.bool returns float16, which we upcast to Float32
# bc we do not have Float16
arr = pd.array([True, False, None], dtype="boolean")
res = np.sqrt(arr)
expected = pd.array([1, 0, None], dtype="Float32")
tm.assert_extension_array_equal(res, expected)
@pytest.mark.parametrize("values", [[True, False], [True, None]])
def test_ufunc_reduce_raises(values):
arr = pd.array(values, dtype="boolean")
res = np.add.reduce(arr)
if arr[-1] is pd.NA:
expected = pd.NA
else:
expected = arr._data.sum()
tm.assert_almost_equal(res, expected)
def test_value_counts_na():
arr = pd.array([True, False, pd.NA], dtype="boolean")
result = arr.value_counts(dropna=False)
expected = pd.Series([1, 1, 1], index=arr, dtype="Int64")
assert expected.index.dtype == arr.dtype
tm.assert_series_equal(result, expected)
result = arr.value_counts(dropna=True)
expected = pd.Series([1, 1], index=arr[:-1], dtype="Int64")
assert expected.index.dtype == arr.dtype
tm.assert_series_equal(result, expected)
def test_value_counts_with_normalize():
ser = pd.Series([True, False, pd.NA], dtype="boolean")
result = ser.value_counts(normalize=True)
expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64") / 2
assert expected.index.dtype == "boolean"
tm.assert_series_equal(result, expected)
def test_diff():
a = pd.array(
[True, True, False, False, True, None, True, None, False], dtype="boolean"
)
result = pd.core.algorithms.diff(a, 1)
expected = pd.array(
[None, False, True, False, True, None, None, None, None], dtype="boolean"
)
tm.assert_extension_array_equal(result, expected)
ser = pd.Series(a)
result = ser.diff()
expected = pd.Series(expected)
tm.assert_series_equal(result, expected)

View File

@@ -0,0 +1,13 @@
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
@pytest.mark.parametrize("na", [None, np.nan, pd.NA])
def test_setitem_missing_values(na):
arr = pd.array([True, False, None], dtype="boolean")
expected = pd.array([True, None, None], dtype="boolean")
arr[1] = na
tm.assert_extension_array_equal(arr, expected)

View File

@@ -0,0 +1,254 @@
import operator
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.arrays import BooleanArray
from pandas.core.ops.mask_ops import (
kleene_and,
kleene_or,
kleene_xor,
)
from pandas.tests.extension.base import BaseOpsUtil
class TestLogicalOps(BaseOpsUtil):
def test_numpy_scalars_ok(self, all_logical_operators):
a = pd.array([True, False, None], dtype="boolean")
op = getattr(a, all_logical_operators)
tm.assert_extension_array_equal(op(True), op(np.bool_(True)))
tm.assert_extension_array_equal(op(False), op(np.bool_(False)))
def get_op_from_name(self, op_name):
short_opname = op_name.strip("_")
short_opname = short_opname if "xor" in short_opname else short_opname + "_"
try:
op = getattr(operator, short_opname)
except AttributeError:
# Assume it is the reverse operator
rop = getattr(operator, short_opname[1:])
op = lambda x, y: rop(y, x)
return op
def test_empty_ok(self, all_logical_operators):
a = pd.array([], dtype="boolean")
op_name = all_logical_operators
result = getattr(a, op_name)(True)
tm.assert_extension_array_equal(a, result)
result = getattr(a, op_name)(False)
tm.assert_extension_array_equal(a, result)
result = getattr(a, op_name)(pd.NA)
tm.assert_extension_array_equal(a, result)
@pytest.mark.parametrize(
"other", ["a", pd.Timestamp(2017, 1, 1, 12), np.timedelta64(4)]
)
def test_eq_mismatched_type(self, other):
# GH-44499
arr = pd.array([True, False])
result = arr == other
expected = pd.array([False, False])
tm.assert_extension_array_equal(result, expected)
result = arr != other
expected = pd.array([True, True])
tm.assert_extension_array_equal(result, expected)
def test_logical_length_mismatch_raises(self, all_logical_operators):
op_name = all_logical_operators
a = pd.array([True, False, None], dtype="boolean")
msg = "Lengths must match to compare"
with pytest.raises(ValueError, match=msg):
getattr(a, op_name)([True, False])
with pytest.raises(ValueError, match=msg):
getattr(a, op_name)(np.array([True, False]))
with pytest.raises(ValueError, match=msg):
getattr(a, op_name)(pd.array([True, False], dtype="boolean"))
def test_logical_nan_raises(self, all_logical_operators):
op_name = all_logical_operators
a = pd.array([True, False, None], dtype="boolean")
msg = "Got float instead"
with pytest.raises(TypeError, match=msg):
getattr(a, op_name)(np.nan)
@pytest.mark.parametrize("other", ["a", 1])
def test_non_bool_or_na_other_raises(self, other, all_logical_operators):
a = pd.array([True, False], dtype="boolean")
with pytest.raises(TypeError, match=str(type(other).__name__)):
getattr(a, all_logical_operators)(other)
def test_kleene_or(self):
# A clear test of behavior.
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")
result = a | b
expected = pd.array(
[True, True, True, True, False, None, True, None, None], dtype="boolean"
)
tm.assert_extension_array_equal(result, expected)
result = b | a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)
@pytest.mark.parametrize(
"other, expected",
[
(pd.NA, [True, None, None]),
(True, [True, True, True]),
(np.bool_(True), [True, True, True]),
(False, [True, False, None]),
(np.bool_(False), [True, False, None]),
],
)
def test_kleene_or_scalar(self, other, expected):
# TODO: test True & False
a = pd.array([True, False, None], dtype="boolean")
result = a | other
expected = pd.array(expected, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = other | a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True, False, None], dtype="boolean")
)
def test_kleene_and(self):
# A clear test of behavior.
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")
result = a & b
expected = pd.array(
[True, False, None, False, False, False, None, False, None], dtype="boolean"
)
tm.assert_extension_array_equal(result, expected)
result = b & a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)
@pytest.mark.parametrize(
"other, expected",
[
(pd.NA, [None, False, None]),
(True, [True, False, None]),
(False, [False, False, False]),
(np.bool_(True), [True, False, None]),
(np.bool_(False), [False, False, False]),
],
)
def test_kleene_and_scalar(self, other, expected):
a = pd.array([True, False, None], dtype="boolean")
result = a & other
expected = pd.array(expected, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = other & a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True, False, None], dtype="boolean")
)
def test_kleene_xor(self):
a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
b = pd.array([True, False, None] * 3, dtype="boolean")
result = a ^ b
expected = pd.array(
[False, True, None, True, False, None, None, None, None], dtype="boolean"
)
tm.assert_extension_array_equal(result, expected)
result = b ^ a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
)
tm.assert_extension_array_equal(
b, pd.array([True, False, None] * 3, dtype="boolean")
)
@pytest.mark.parametrize(
"other, expected",
[
(pd.NA, [None, None, None]),
(True, [False, True, None]),
(np.bool_(True), [False, True, None]),
(np.bool_(False), [True, False, None]),
],
)
def test_kleene_xor_scalar(self, other, expected):
a = pd.array([True, False, None], dtype="boolean")
result = a ^ other
expected = pd.array(expected, dtype="boolean")
tm.assert_extension_array_equal(result, expected)
result = other ^ a
tm.assert_extension_array_equal(result, expected)
# ensure we haven't mutated anything inplace
tm.assert_extension_array_equal(
a, pd.array([True, False, None], dtype="boolean")
)
@pytest.mark.parametrize("other", [True, False, pd.NA, [True, False, None] * 3])
def test_no_masked_assumptions(self, other, all_logical_operators):
# The logical operations should not assume that masked values are False!
a = pd.arrays.BooleanArray(
np.array([True, True, True, False, False, False, True, False, True]),
np.array([False] * 6 + [True, True, True]),
)
b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
if isinstance(other, list):
other = pd.array(other, dtype="boolean")
result = getattr(a, all_logical_operators)(other)
expected = getattr(b, all_logical_operators)(other)
tm.assert_extension_array_equal(result, expected)
if isinstance(other, BooleanArray):
other._data[other._mask] = True
a._data[a._mask] = False
result = getattr(a, all_logical_operators)(other)
expected = getattr(b, all_logical_operators)(other)
tm.assert_extension_array_equal(result, expected)
@pytest.mark.parametrize("operation", [kleene_or, kleene_xor, kleene_and])
def test_error_both_scalar(operation):
msg = r"Either `left` or `right` need to be a np\.ndarray."
with pytest.raises(TypeError, match=msg):
# masks need to be non-None, otherwise it ends up in an infinite recursion
operation(True, True, np.zeros(1), np.zeros(1))

View File

@@ -0,0 +1,27 @@
import pandas as pd
import pandas._testing as tm
class TestUnaryOps:
def test_invert(self):
a = pd.array([True, False, None], dtype="boolean")
expected = pd.array([False, True, None], dtype="boolean")
tm.assert_extension_array_equal(~a, expected)
expected = pd.Series(expected, index=["a", "b", "c"], name="name")
result = ~pd.Series(a, index=["a", "b", "c"], name="name")
tm.assert_series_equal(result, expected)
df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"])
result = ~df
expected = pd.DataFrame(
{"A": expected, "B": [False, True, True]}, index=["a", "b", "c"]
)
tm.assert_frame_equal(result, expected)
def test_abs(self):
# matching numpy behavior, abs is the identity function
arr = pd.array([True, False, None], dtype="boolean")
result = abs(arr)
tm.assert_extension_array_equal(result, arr)

View File

@@ -0,0 +1,60 @@
import numpy as np
import pytest
import pandas as pd
@pytest.fixture
def data():
return pd.array(
[True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False],
dtype="boolean",
)
@pytest.mark.parametrize(
"values, exp_any, exp_all, exp_any_noskip, exp_all_noskip",
[
([True, pd.NA], True, True, True, pd.NA),
([False, pd.NA], False, False, pd.NA, False),
([pd.NA], False, True, pd.NA, pd.NA),
([], False, True, False, True),
# GH-33253: all True / all False values buggy with skipna=False
([True, True], True, True, True, True),
([False, False], False, False, False, False),
],
)
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
# the methods return numpy scalars
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
for con in [pd.array, pd.Series]:
a = con(values, dtype="boolean")
assert a.any() is exp_any
assert a.all() is exp_all
assert a.any(skipna=False) is exp_any_noskip
assert a.all(skipna=False) is exp_all_noskip
assert np.any(a.any()) is exp_any
assert np.all(a.all()) is exp_all
@pytest.mark.parametrize("dropna", [True, False])
def test_reductions_return_types(dropna, data, all_numeric_reductions):
op = all_numeric_reductions
s = pd.Series(data)
if dropna:
s = s.dropna()
if op == "sum":
assert isinstance(getattr(s, op)(), np.int_)
elif op == "prod":
assert isinstance(getattr(s, op)(), np.int_)
elif op in ("min", "max"):
assert isinstance(getattr(s, op)(), np.bool_)
else:
# "mean", "std", "var", "median", "kurt", "skew"
assert isinstance(getattr(s, op)(), np.float64)

View File

@@ -0,0 +1,13 @@
import pandas as pd
def test_repr():
df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")})
expected = " A\n0 True\n1 False\n2 <NA>"
assert repr(df) == expected
expected = "0 True\n1 False\n2 <NA>\nName: A, dtype: boolean"
assert repr(df.A) == expected
expected = "<BooleanArray>\n[True, False, <NA>]\nLength: 3, dtype: boolean"
assert repr(df.A.array) == expected

Some files were not shown because too many files have changed in this diff Show More