first commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,42 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIndexConstructor:
|
||||
# Tests for the Index constructor, specifically for cases that do
|
||||
# not return a subclass
|
||||
|
||||
@pytest.mark.parametrize("value", [1, np.int64(1)])
|
||||
def test_constructor_corner(self, value):
|
||||
# corner case
|
||||
msg = (
|
||||
r"Index\(\.\.\.\) must be called with a collection of some "
|
||||
f"kind, {value} was passed"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Index(value)
|
||||
|
||||
@pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]])
|
||||
def test_construction_list_mixed_tuples(self, index_vals):
|
||||
# see gh-10697: if we are constructing from a mixed list of tuples,
|
||||
# make sure that we are independent of the sorting order.
|
||||
index = Index(index_vals)
|
||||
assert isinstance(index, Index)
|
||||
assert not isinstance(index, MultiIndex)
|
||||
|
||||
def test_constructor_wrong_kwargs(self):
|
||||
# GH #19348
|
||||
with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
Index([], foo="bar")
|
||||
|
||||
def test_constructor_cast(self):
|
||||
msg = "could not convert string to float"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(["a", "b", "c"], dtype=float)
|
||||
@@ -0,0 +1,148 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas._config.config as cf
|
||||
|
||||
from pandas import Index
|
||||
|
||||
|
||||
class TestIndexRendering:
|
||||
@pytest.mark.parametrize(
|
||||
"index,expected",
|
||||
[
|
||||
# ASCII
|
||||
# short
|
||||
(
|
||||
Index(["a", "bb", "ccc"]),
|
||||
"""Index(['a', 'bb', 'ccc'], dtype='object')""",
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["a", "bb", "ccc"] * 10),
|
||||
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
|
||||
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
|
||||
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
|
||||
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
|
||||
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
|
||||
" dtype='object')",
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["a", "bb", "ccc"] * 100),
|
||||
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n"
|
||||
" ...\n"
|
||||
" 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
|
||||
" dtype='object', length=300)",
|
||||
),
|
||||
# Non-ASCII
|
||||
# short
|
||||
(
|
||||
Index(["あ", "いい", "ううう"]),
|
||||
"""Index(['あ', 'いい', 'ううう'], dtype='object')""",
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 10),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう'],\n"
|
||||
" dtype='object')"
|
||||
),
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 100),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ',\n"
|
||||
" ...\n"
|
||||
" 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう'],\n"
|
||||
" dtype='object', length=300)"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_string_index_repr(self, index, expected):
|
||||
result = repr(index)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index,expected",
|
||||
[
|
||||
# short
|
||||
(
|
||||
Index(["あ", "いい", "ううう"]),
|
||||
("Index(['あ', 'いい', 'ううう'], dtype='object')"),
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 10),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう'],\n"
|
||||
" dtype='object')"
|
||||
""
|
||||
),
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 100),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ',\n"
|
||||
" ...\n"
|
||||
" 'ううう', 'あ', 'いい', 'ううう', 'あ', "
|
||||
"'いい', 'ううう', 'あ', 'いい',\n"
|
||||
" 'ううう'],\n"
|
||||
" dtype='object', length=300)"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_string_index_repr_with_unicode_option(self, index, expected):
|
||||
# Enable Unicode option -----------------------------------------
|
||||
with cf.option_context("display.unicode.east_asian_width", True):
|
||||
result = repr(index)
|
||||
assert result == expected
|
||||
|
||||
def test_repr_summary(self):
|
||||
with cf.option_context("display.max_seq_items", 10):
|
||||
result = repr(Index(np.arange(1000)))
|
||||
assert len(result) < 200
|
||||
assert "..." in result
|
||||
|
||||
def test_summary_bug(self):
|
||||
# GH#3869
|
||||
ind = Index(["{other}%s", "~:{range}:0"], name="A")
|
||||
result = ind._summary()
|
||||
# shouldn't be formatted accidentally.
|
||||
assert "~:{range}:0" in result
|
||||
assert "{other}%s" in result
|
||||
|
||||
def test_index_repr_bool_nan(self):
|
||||
# GH32146
|
||||
arr = Index([True, False, np.nan], dtype=object)
|
||||
exp1 = arr.format()
|
||||
out1 = ["True", "False", "NaN"]
|
||||
assert out1 == exp1
|
||||
|
||||
exp2 = repr(arr)
|
||||
out2 = "Index([True, False, nan], dtype='object')"
|
||||
assert out2 == exp2
|
||||
|
||||
def test_format_different_scalar_lengths(self):
|
||||
# GH#35439
|
||||
idx = Index(["aaaaaaaaa", "b"])
|
||||
expected = ["aaaaaaaaa", "b"]
|
||||
assert idx.format() == expected
|
||||
@@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
NaT,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetSliceBounds:
|
||||
@pytest.mark.parametrize("kind", ["getitem", "loc", None])
|
||||
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
|
||||
def test_get_slice_bounds_within(self, kind, side, expected):
|
||||
index = Index(list("abcdef"))
|
||||
with tm.assert_produces_warning(FutureWarning, match="'kind' argument"):
|
||||
result = index.get_slice_bound("e", kind=kind, side=side)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("kind", ["getitem", "loc", None])
|
||||
@pytest.mark.parametrize("side", ["left", "right"])
|
||||
@pytest.mark.parametrize(
|
||||
"data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)]
|
||||
)
|
||||
def test_get_slice_bounds_outside(self, kind, side, expected, data, bound):
|
||||
index = Index(data)
|
||||
with tm.assert_produces_warning(FutureWarning, match="'kind' argument"):
|
||||
result = index.get_slice_bound(bound, kind=kind, side=side)
|
||||
assert result == expected
|
||||
|
||||
def test_get_slice_bounds_invalid_side(self):
|
||||
with pytest.raises(ValueError, match="Invalid value for side kwarg"):
|
||||
Index([]).get_slice_bound("a", side="middle")
|
||||
|
||||
|
||||
class TestGetIndexerNonUnique:
|
||||
def test_get_indexer_non_unique_dtype_mismatch(self):
|
||||
# GH#25459
|
||||
indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0]))
|
||||
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
|
||||
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
@pytest.mark.slow # to_flat_index takes a while
|
||||
def test_get_loc_tuple_monotonic_above_size_cutoff(self):
|
||||
# Go through the libindex path for which using
|
||||
# _bin_search vs ndarray.searchsorted makes a difference
|
||||
|
||||
lev = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
dti = pd.date_range("2016-01-01", periods=100)
|
||||
|
||||
mi = pd.MultiIndex.from_product([lev, range(10**3), dti])
|
||||
oidx = mi.to_flat_index()
|
||||
|
||||
loc = len(oidx) // 2
|
||||
tup = oidx[loc]
|
||||
|
||||
res = oidx.get_loc(tup)
|
||||
assert res == loc
|
||||
|
||||
def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
|
||||
# case that goes through _maybe_get_bool_indexer
|
||||
idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object)
|
||||
|
||||
# we dont raise KeyError on nan
|
||||
res = idx.get_loc(np.nan)
|
||||
assert res == 1
|
||||
|
||||
# we only match on None, not on np.nan
|
||||
res = idx.get_loc(None)
|
||||
expected = np.array([False, False, True, False, False, True])
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
# we don't match at all on mismatched NA
|
||||
with pytest.raises(KeyError, match="NaT"):
|
||||
idx.get_loc(NaT)
|
||||
@@ -0,0 +1,11 @@
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_pickle_preserves_object_dtype():
|
||||
# GH#43188, GH#43155 don't infer numeric dtype
|
||||
index = Index([1, 2, 3], dtype=object)
|
||||
|
||||
result = tm.round_trip_pickle(index)
|
||||
assert result.dtype == object
|
||||
tm.assert_index_equal(index, result)
|
||||
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
Tests for ndarray-like method on the base Index class
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestReshape:
|
||||
def test_repeat(self):
|
||||
repeats = 2
|
||||
index = Index([1, 2, 3])
|
||||
expected = Index([1, 1, 2, 2, 3, 3])
|
||||
|
||||
result = index.repeat(repeats)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert(self):
|
||||
|
||||
# GH 7256
|
||||
# validate neg/pos inserts
|
||||
result = Index(["b", "c", "d"])
|
||||
|
||||
# test 0th element
|
||||
tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a"))
|
||||
|
||||
# test Nth element that follows Python list behavior
|
||||
tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e"))
|
||||
|
||||
# test loc +/- neq (0, -1)
|
||||
tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z"))
|
||||
|
||||
# test empty
|
||||
null_index = Index([])
|
||||
tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
|
||||
|
||||
def test_insert_missing(self, nulls_fixture):
|
||||
# GH#22295
|
||||
# test there is no mangling of NA values
|
||||
expected = Index(["a", nulls_fixture, "b", "c"])
|
||||
result = Index(list("abc")).insert(1, nulls_fixture)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")]
|
||||
)
|
||||
@pytest.mark.parametrize("loc", [-1, 2])
|
||||
def test_insert_datetime_into_object(self, loc, val):
|
||||
# GH#44509
|
||||
idx = Index(["1", "2", "3"])
|
||||
result = idx.insert(loc, val)
|
||||
expected = Index(["1", "2", val, "3"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert type(expected[2]) is type(val)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pos,expected",
|
||||
[
|
||||
(0, Index(["b", "c", "d"], name="index")),
|
||||
(-1, Index(["a", "b", "c"], name="index")),
|
||||
],
|
||||
)
|
||||
def test_delete(self, pos, expected):
|
||||
index = Index(["a", "b", "c", "d"], name="index")
|
||||
result = index.delete(pos)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
def test_delete_raises(self):
|
||||
index = Index(["a", "b", "c", "d"], name="index")
|
||||
msg = "index 5 is out of bounds for axis 0 with size 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
index.delete(5)
|
||||
|
||||
def test_append_multiple(self):
|
||||
index = Index(["a", "b", "c", "d", "e", "f"])
|
||||
|
||||
foos = [index[:2], index[2:4], index[4:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
# empty
|
||||
result = index.append([])
|
||||
tm.assert_index_equal(result, index)
|
||||
@@ -0,0 +1,261 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.algorithms import safe_sort
|
||||
|
||||
|
||||
class TestIndexSetOps:
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setops_disallow_true(self, method):
|
||||
idx1 = Index(["a", "b"])
|
||||
idx2 = Index(["b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
||||
getattr(idx1, method)(idx2, sort=True)
|
||||
|
||||
def test_setops_preserve_object_dtype(self):
|
||||
idx = Index([1, 2, 3], dtype=object)
|
||||
result = idx.intersection(idx[1:])
|
||||
expected = idx[1:]
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# if other is not monotonic increasing, intersection goes through
|
||||
# a different route
|
||||
result = idx.intersection(idx[1:][::-1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx._union(idx[1:], sort=None)
|
||||
expected = idx
|
||||
tm.assert_numpy_array_equal(result, expected.values)
|
||||
|
||||
result = idx.union(idx[1:], sort=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# if other is not monotonic increasing, _union goes through
|
||||
# a different route
|
||||
result = idx._union(idx[1:][::-1], sort=None)
|
||||
tm.assert_numpy_array_equal(result, expected.values)
|
||||
|
||||
result = idx.union(idx[1:][::-1], sort=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_union_base(self):
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[3:]
|
||||
second = index[:5]
|
||||
|
||||
result = first.union(second)
|
||||
|
||||
expected = Index([0, 1, 2, "a", "b", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [np.array, Series, list])
|
||||
def test_union_different_type_base(self, klass):
|
||||
# GH 10149
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[3:]
|
||||
second = index[:5]
|
||||
|
||||
result = first.union(klass(second.values))
|
||||
|
||||
assert tm.equalContents(result, index)
|
||||
|
||||
def test_union_sort_other_incomparable(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = Index([1, pd.Timestamp("2000")])
|
||||
# default (sort=None)
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1])
|
||||
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=None
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1], sort=None)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.union(idx[:1], sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
@pytest.mark.xfail(reason="GH#25151 need to decide on True behavior")
|
||||
def test_union_sort_other_incomparable_true(self):
|
||||
# TODO(GH#25151): decide on True behaviour
|
||||
# sort=True
|
||||
idx = Index([1, pd.Timestamp("2000")])
|
||||
with pytest.raises(TypeError, match=".*"):
|
||||
idx.union(idx[:1], sort=True)
|
||||
|
||||
@pytest.mark.xfail(reason="GH#25151 need to decide on True behavior")
|
||||
def test_intersection_equal_sort_true(self):
|
||||
# TODO(GH#25151): decide on True behaviour
|
||||
idx = Index(["c", "a", "b"])
|
||||
sorted_ = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
|
||||
|
||||
def test_intersection_base(self, sort):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:5]
|
||||
second = index[:3]
|
||||
|
||||
expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
|
||||
result = first.intersection(second, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [np.array, Series, list])
|
||||
def test_intersection_different_type_base(self, klass, sort):
|
||||
# GH 10149
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:5]
|
||||
second = index[:3]
|
||||
|
||||
result = first.intersection(klass(second.values), sort=sort)
|
||||
assert tm.equalContents(result, second)
|
||||
|
||||
def test_intersection_nosort(self):
|
||||
result = Index(["c", "b", "a"]).intersection(["b", "a"])
|
||||
expected = Index(["b", "a"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_equal_sort(self):
|
||||
idx = Index(["c", "a", "b"])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
||||
|
||||
def test_intersection_str_dates(self, sort):
|
||||
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
|
||||
|
||||
i1 = Index(dt_dates, dtype=object)
|
||||
i2 = Index(["aa"], dtype=object)
|
||||
result = i2.intersection(i1, sort=sort)
|
||||
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index2,expected_arr",
|
||||
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
|
||||
)
|
||||
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
|
||||
# non-monotonic non-unique
|
||||
index1 = Index(["A", "B", "A", "C"])
|
||||
expected = Index(expected_arr, dtype="object")
|
||||
result = index1.intersection(index2, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_difference_base(self, sort):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:4]
|
||||
second = index[3:]
|
||||
|
||||
result = first.difference(second, sort)
|
||||
expected = Index([0, "a", 1])
|
||||
if sort is None:
|
||||
expected = Index(safe_sort(expected))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_symmetric_difference(self):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:4]
|
||||
second = index[3:]
|
||||
|
||||
result = first.symmetric_difference(second)
|
||||
expected = Index([0, 1, 2, "a", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method,expected,sort",
|
||||
[
|
||||
(
|
||||
"intersection",
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "a1")],
|
||||
),
|
||||
False,
|
||||
),
|
||||
(
|
||||
"intersection",
|
||||
np.array(
|
||||
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "a1")],
|
||||
),
|
||||
None,
|
||||
),
|
||||
(
|
||||
"union",
|
||||
np.array(
|
||||
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
|
||||
dtype=[("num", int), ("let", "a1")],
|
||||
),
|
||||
None,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_tuple_union_bug(self, method, expected, sort):
|
||||
index1 = Index(
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "a1")],
|
||||
)
|
||||
)
|
||||
index2 = Index(
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
|
||||
dtype=[("num", int), ("let", "a1")],
|
||||
)
|
||||
)
|
||||
|
||||
result = getattr(index1, method)(index2, sort=sort)
|
||||
assert result.ndim == 1
|
||||
|
||||
expected = Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("first_list", [["b", "a"], []])
|
||||
@pytest.mark.parametrize("second_list", [["a", "b"], []])
|
||||
@pytest.mark.parametrize(
|
||||
"first_name, second_name, expected_name",
|
||||
[("A", "B", None), (None, "B", None), ("A", None, None)],
|
||||
)
|
||||
def test_union_name_preservation(
|
||||
self, first_list, second_list, first_name, second_name, expected_name, sort
|
||||
):
|
||||
first = Index(first_list, name=first_name)
|
||||
second = Index(second_list, name=second_name)
|
||||
union = first.union(second, sort=sort)
|
||||
|
||||
vals = set(first_list).union(second_list)
|
||||
|
||||
if sort is None and len(first_list) > 0 and len(second_list) > 0:
|
||||
expected = Index(sorted(vals), name=expected_name)
|
||||
tm.assert_index_equal(union, expected)
|
||||
else:
|
||||
expected = Index(vals, name=expected_name)
|
||||
tm.equalContents(union, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"diff_type, expected",
|
||||
[["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]],
|
||||
)
|
||||
def test_difference_object_type(self, diff_type, expected):
|
||||
# GH 13432
|
||||
idx1 = Index([0, 1, "A", "B"])
|
||||
idx2 = Index([0, 2, "A", "C"])
|
||||
result = getattr(idx1, diff_type)(idx2)
|
||||
expected = Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,13 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where_intlike_str_doesnt_cast_ints(self):
|
||||
idx = Index(range(3))
|
||||
mask = np.array([True, False, True])
|
||||
res = idx.where(mask, "2")
|
||||
expected = Index([0, "2", 2])
|
||||
tm.assert_index_equal(res, expected)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,62 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAppend:
|
||||
@pytest.fixture
|
||||
def ci(self):
|
||||
categories = list("cab")
|
||||
return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
|
||||
|
||||
def test_append(self, ci):
|
||||
# append cats with the same categories
|
||||
result = ci[:3].append(ci[3:])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
foos = [ci[:1], ci[1:3], ci[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
def test_append_empty(self, ci):
|
||||
# empty
|
||||
result = ci.append([])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
def test_append_mismatched_categories(self, ci):
|
||||
# appending with different categories or reordered is not ok
|
||||
msg = "all inputs must be Index"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci.append(ci.values.set_categories(list("abcd")))
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci.append(ci.values.reorder_categories(list("abc")))
|
||||
|
||||
def test_append_category_objects(self, ci):
|
||||
# with objects
|
||||
result = ci.append(Index(["c", "a"]))
|
||||
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_non_categories(self, ci):
|
||||
# invalid objects -> cast to object via concat_compat
|
||||
result = ci.append(Index(["a", "d"]))
|
||||
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_object(self, ci):
|
||||
# GH#14298 - if base object is not categorical -> coerce to object
|
||||
result = Index(["c", "a"]).append(ci)
|
||||
expected = Index(list("caaabbca"))
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_to_another(self):
|
||||
# hits Index._concat
|
||||
fst = Index(["a", "b"])
|
||||
snd = CategoricalIndex(["d", "e"])
|
||||
result = fst.append(snd)
|
||||
expected = Index(["a", "b", "d", "e"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,87 @@
|
||||
from datetime import date
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAstype:
|
||||
def test_astype(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
result = ci.astype(object)
|
||||
tm.assert_index_equal(result, Index(np.array(ci)))
|
||||
|
||||
# this IS equal, but not the same class
|
||||
assert result.equals(ci)
|
||||
assert isinstance(result, Index)
|
||||
assert not isinstance(result, CategoricalIndex)
|
||||
|
||||
# interval
|
||||
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")
|
||||
|
||||
ci = CategoricalIndex(
|
||||
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
|
||||
)
|
||||
|
||||
result = ci.astype("interval")
|
||||
expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = IntervalIndex(result.values)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("index_ordered", [True, False])
|
||||
def test_astype_category(self, name, dtype_ordered, index_ordered):
|
||||
# GH#18630
|
||||
index = CategoricalIndex(
|
||||
list("aabbca"), categories=list("cab"), ordered=index_ordered
|
||||
)
|
||||
if name:
|
||||
index = index.rename(name)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(
|
||||
index.tolist(),
|
||||
name=name,
|
||||
categories=index.categories,
|
||||
ordered=dtype_ordered,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = index.astype("category")
|
||||
expected = index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_categorical_date_roundtrip(self):
|
||||
# astype to categorical and back should preserve date objects
|
||||
v = date.today()
|
||||
|
||||
obj = Index([v, v])
|
||||
assert obj.dtype == object
|
||||
|
||||
cat = obj.astype("category")
|
||||
|
||||
rtrip = cat.astype(object)
|
||||
assert rtrip.dtype == object
|
||||
assert type(rtrip[0]) is date
|
||||
@@ -0,0 +1,390 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas._libs.arrays import NDArrayBacked
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.api import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
from pandas.tests.indexes.common import Base
|
||||
|
||||
|
||||
class TestCategoricalIndex(Base):
|
||||
_index_cls = CategoricalIndex
|
||||
|
||||
@pytest.fixture
|
||||
def simple_index(self) -> CategoricalIndex:
|
||||
return self._index_cls(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
@pytest.fixture
|
||||
def index(self, request):
|
||||
return tm.makeCategoricalIndex(100)
|
||||
|
||||
def create_index(self, *, categories=None, ordered=False):
|
||||
if categories is None:
|
||||
categories = list("cab")
|
||||
return CategoricalIndex(list("aabbca"), categories=categories, ordered=ordered)
|
||||
|
||||
def test_can_hold_identifiers(self):
|
||||
idx = self.create_index(categories=list("abcd"))
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
def test_pickle_compat_construction(self):
|
||||
# Once the deprecation is enforced, we can use the parent class's test
|
||||
with tm.assert_produces_warning(FutureWarning, match="without passing data"):
|
||||
self._index_cls()
|
||||
|
||||
def test_insert(self, simple_index):
|
||||
|
||||
ci = simple_index
|
||||
categories = ci.categories
|
||||
|
||||
# test 0th element
|
||||
result = ci.insert(0, "a")
|
||||
expected = CategoricalIndex(list("aaabbca"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# test Nth element that follows Python list behavior
|
||||
result = ci.insert(-1, "a")
|
||||
expected = CategoricalIndex(list("aabbcaa"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# test empty
|
||||
result = CategoricalIndex([], categories=categories).insert(0, "a")
|
||||
expected = CategoricalIndex(["a"], categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# invalid -> cast to object
|
||||
expected = ci.astype(object).insert(0, "d")
|
||||
result = ci.insert(0, "d")
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# GH 18295 (test missing)
|
||||
expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
|
||||
for na in (np.nan, pd.NaT, None):
|
||||
result = CategoricalIndex(list("aabcb")).insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_na_mismatched_dtype(self):
|
||||
ci = CategoricalIndex([0, 1, 1])
|
||||
result = ci.insert(0, pd.NaT)
|
||||
expected = Index([pd.NaT, 0, 1, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_delete(self, simple_index):
|
||||
|
||||
ci = simple_index
|
||||
categories = ci.categories
|
||||
|
||||
result = ci.delete(0)
|
||||
expected = CategoricalIndex(list("abbca"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
result = ci.delete(-1)
|
||||
expected = CategoricalIndex(list("aabbc"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
with tm.external_error_raised((IndexError, ValueError)):
|
||||
# Either depending on NumPy version
|
||||
ci.delete(10)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, non_lexsorted_data",
|
||||
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
|
||||
)
|
||||
def test_is_monotonic(self, data, non_lexsorted_data):
|
||||
c = CategoricalIndex(data)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(data, ordered=True)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(data, categories=reversed(data))
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is True
|
||||
|
||||
c = CategoricalIndex(data, categories=reversed(data), ordered=True)
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is True
|
||||
|
||||
# test when data is neither monotonic increasing nor decreasing
|
||||
reordered_data = [data[0], data[2], data[1]]
|
||||
c = CategoricalIndex(reordered_data, categories=reversed(data))
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
# non lexsorted categories
|
||||
categories = non_lexsorted_data
|
||||
|
||||
c = CategoricalIndex(categories[:2], categories=categories)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(categories[1:3], categories=categories)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
def test_has_duplicates(self):
|
||||
idx = CategoricalIndex([0, 0, 0], name="foo")
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories, expected",
|
||||
[
|
||||
(
|
||||
[1, 1, 1],
|
||||
[1, 2, 3],
|
||||
{
|
||||
"first": np.array([False, True, True]),
|
||||
"last": np.array([True, True, False]),
|
||||
False: np.array([True, True, True]),
|
||||
},
|
||||
),
|
||||
(
|
||||
[1, 1, 1],
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.array([False, True, True]),
|
||||
"last": np.array([True, True, False]),
|
||||
False: np.array([True, True, True]),
|
||||
},
|
||||
),
|
||||
(
|
||||
[2, "a", "b"],
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.zeros(shape=(3), dtype=np.bool_),
|
||||
"last": np.zeros(shape=(3), dtype=np.bool_),
|
||||
False: np.zeros(shape=(3), dtype=np.bool_),
|
||||
},
|
||||
),
|
||||
(
|
||||
list("abb"),
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.array([False, False, True]),
|
||||
"last": np.array([False, True, False]),
|
||||
False: np.array([False, True, True]),
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(self, data, categories, expected):
|
||||
|
||||
idx = CategoricalIndex(data, categories=categories, name="foo")
|
||||
for keep, e in expected.items():
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
|
||||
e = idx[~e]
|
||||
result = idx.drop_duplicates(keep=keep)
|
||||
tm.assert_index_equal(result, e)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories, expected_data",
|
||||
[
|
||||
([1, 1, 1], [1, 2, 3], [1]),
|
||||
([1, 1, 1], list("abc"), [np.nan]),
|
||||
([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
|
||||
([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
|
||||
],
|
||||
)
|
||||
def test_unique(self, data, categories, expected_data, ordered):
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
|
||||
idx = CategoricalIndex(data, dtype=dtype)
|
||||
expected = CategoricalIndex(expected_data, dtype=dtype)
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
|
||||
def test_repr_roundtrip(self):
|
||||
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
str(ci)
|
||||
tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
|
||||
|
||||
# formatting
|
||||
str(ci)
|
||||
|
||||
# long format
|
||||
# this is not reprable
|
||||
ci = CategoricalIndex(np.random.randint(0, 5, size=100))
|
||||
str(ci)
|
||||
|
||||
def test_isin(self):
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c"]), np.array([False, False, False, True, False, False])
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
|
||||
)
|
||||
|
||||
# mismatched categorical -> coerced to ndarray so doesn't matter
|
||||
result = ci.isin(ci.set_categories(list("abcdefghi")))
|
||||
expected = np.array([True] * 6)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = ci.isin(ci.set_categories(list("defghi")))
|
||||
expected = np.array([False] * 5 + [True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_identical(self):
|
||||
|
||||
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
||||
assert ci1.identical(ci1)
|
||||
assert ci1.identical(ci1.copy())
|
||||
assert not ci1.identical(ci2)
|
||||
|
||||
def test_ensure_copied_data(self, index):
|
||||
# gh-12309: Check the "copy" argument of each
|
||||
# Index.__new__ is honored.
|
||||
#
|
||||
# Must be tested separately from other indexes because
|
||||
# self.values is not an ndarray.
|
||||
|
||||
result = CategoricalIndex(index.values, copy=True)
|
||||
tm.assert_index_equal(index, result)
|
||||
assert not np.shares_memory(result._data._codes, index._data._codes)
|
||||
|
||||
result = CategoricalIndex(index.values, copy=False)
|
||||
assert result._data._codes is index._data._codes
|
||||
|
||||
def test_frame_repr(self):
|
||||
df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"]))
|
||||
result = repr(df)
|
||||
expected = " A\na 1\nb 2\nc 3"
|
||||
assert result == expected
|
||||
|
||||
def test_reindex_base(self):
|
||||
# See test_reindex.py
|
||||
pass
|
||||
|
||||
def test_map_str(self):
|
||||
# See test_map.py
|
||||
pass
|
||||
|
||||
|
||||
class TestCategoricalIndex2:
|
||||
# Tests that are not overriding a test in Base
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, engine_type",
|
||||
[
|
||||
(np.int8, libindex.Int8Engine),
|
||||
(np.int16, libindex.Int16Engine),
|
||||
(np.int32, libindex.Int32Engine),
|
||||
(np.int64, libindex.Int64Engine),
|
||||
],
|
||||
)
|
||||
def test_engine_type(self, dtype, engine_type):
|
||||
if dtype != np.int64:
|
||||
# num. of uniques required to push CategoricalIndex.codes to a
|
||||
# dtype (128 categories required for .codes dtype to be int16 etc.)
|
||||
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
|
||||
ci = CategoricalIndex(range(num_uniques))
|
||||
else:
|
||||
# having 2**32 - 2**31 categories would be very memory-intensive,
|
||||
# so we cheat a bit with the dtype
|
||||
ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
|
||||
arr = ci.values._ndarray.astype("int64")
|
||||
NDArrayBacked.__init__(ci._data, arr, ci.dtype)
|
||||
assert np.issubdtype(ci.codes.dtype, dtype)
|
||||
assert isinstance(ci._engine, engine_type)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,op_name",
|
||||
[
|
||||
(lambda idx: idx - idx, "__sub__"),
|
||||
(lambda idx: idx + idx, "__add__"),
|
||||
(lambda idx: idx - ["a", "b"], "__sub__"),
|
||||
(lambda idx: idx + ["a", "b"], "__add__"),
|
||||
(lambda idx: ["a", "b"] - idx, "__rsub__"),
|
||||
(lambda idx: ["a", "b"] + idx, "__radd__"),
|
||||
],
|
||||
)
|
||||
def test_disallow_addsub_ops(self, func, op_name):
|
||||
# GH 10039
|
||||
# set ops (+/-) raise TypeError
|
||||
idx = Index(Categorical(["a", "b"]))
|
||||
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
|
||||
msg = "|".join(
|
||||
[
|
||||
f"cannot perform {op_name} with this index type: CategoricalIndex",
|
||||
"can only concatenate list",
|
||||
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
||||
|
||||
def test_method_delegation(self):
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.set_categories(list("cab"))
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.rename_categories(list("efg"))
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("ffggef"), categories=list("efg"))
|
||||
)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = ci.rename_categories(lambda x: x.upper())
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.add_categories(["d"])
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.remove_categories(["c"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.as_unordered()
|
||||
tm.assert_index_equal(result, ci)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.as_ordered()
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
|
||||
)
|
||||
|
||||
# invalid
|
||||
msg = "cannot use inplace with CategoricalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.set_categories(list("cab"), inplace=True)
|
||||
@@ -0,0 +1,159 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexConstructors:
|
||||
def test_construction_without_data_deprecated(self):
|
||||
# Once the deprecation is enforced, we can add this case to
|
||||
# test_construction_disallows_scalar
|
||||
msg = "without passing data"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
CategoricalIndex(categories=list("abcd"), ordered=False)
|
||||
|
||||
def test_construction_disallows_scalar(self):
|
||||
msg = "must be called with a collection of some kind"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
|
||||
|
||||
def test_construction(self):
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
|
||||
categories = ci.categories
|
||||
|
||||
result = Index(ci)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
assert not result.ordered
|
||||
|
||||
result = Index(ci.values)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
assert not result.ordered
|
||||
|
||||
# empty
|
||||
result = CategoricalIndex([], categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
|
||||
assert not result.ordered
|
||||
|
||||
# passing categories
|
||||
result = CategoricalIndex(list("aabbca"), categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
|
||||
c = Categorical(list("aabbca"))
|
||||
result = CategoricalIndex(c)
|
||||
tm.assert_index_equal(result.categories, Index(list("abc")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(c, categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
ci = CategoricalIndex(c, categories=list("abcd"))
|
||||
result = CategoricalIndex(ci)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"))
|
||||
tm.assert_index_equal(result.categories, Index(list("ab")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
||||
tm.assert_index_equal(result.categories, Index(list("ab")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
||||
)
|
||||
assert result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
||||
expected = CategoricalIndex(
|
||||
ci, categories=list("ab"), ordered=True, dtype="category"
|
||||
)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# turn me to an Index
|
||||
result = Index(np.array(ci))
|
||||
assert isinstance(result, Index)
|
||||
assert not isinstance(result, CategoricalIndex)
|
||||
|
||||
def test_construction_with_dtype(self):
|
||||
|
||||
# specify dtype
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
|
||||
|
||||
result = Index(np.array(ci), dtype="category")
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
result = Index(np.array(ci).tolist(), dtype="category")
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
# these are generally only equal when the categories are reordered
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
# make sure indexes are handled
|
||||
idx = Index(range(3))
|
||||
expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
|
||||
result = CategoricalIndex(idx, categories=idx, ordered=True)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_construction_empty_with_bool_categories(self):
|
||||
# see GH#22702
|
||||
cat = CategoricalIndex([], categories=[True, False])
|
||||
categories = sorted(cat.categories.tolist())
|
||||
assert categories == [False, True]
|
||||
|
||||
def test_construction_with_categorical_dtype(self):
|
||||
# construction with CategoricalDtype
|
||||
# GH#18109
|
||||
data, cats, ordered = "a a b b".split(), "c b a".split(), True
|
||||
dtype = CategoricalDtype(categories=cats, ordered=ordered)
|
||||
|
||||
result = CategoricalIndex(data, dtype=dtype)
|
||||
expected = CategoricalIndex(data, categories=cats, ordered=ordered)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# GH#19032
|
||||
result = Index(data, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# error when combining categories/ordered and dtype kwargs
|
||||
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
CategoricalIndex(data, categories=cats, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# passing subclass-specific kwargs to pd.Index
|
||||
Index(data, categories=cats, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
CategoricalIndex(data, ordered=ordered, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
# passing subclass-specific kwargs to pd.Index
|
||||
Index(data, ordered=ordered, dtype=dtype)
|
||||
@@ -0,0 +1,90 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestEquals:
|
||||
def test_equals_categorical(self):
|
||||
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
||||
|
||||
assert ci1.equals(ci1)
|
||||
assert not ci1.equals(ci2)
|
||||
assert ci1.equals(ci1.astype(object))
|
||||
assert ci1.astype(object).equals(ci1)
|
||||
|
||||
assert (ci1 == ci1).all()
|
||||
assert not (ci1 != ci1).all()
|
||||
assert not (ci1 > ci1).all()
|
||||
assert not (ci1 < ci1).all()
|
||||
assert (ci1 <= ci1).all()
|
||||
assert (ci1 >= ci1).all()
|
||||
|
||||
assert not (ci1 == 1).all()
|
||||
assert (ci1 == Index(["a", "b"])).all()
|
||||
assert (ci1 == ci1.values).all()
|
||||
|
||||
# invalid comparisons
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
ci1 == Index(["a", "b", "c"])
|
||||
|
||||
msg = "Categoricals can only be compared if 'categories' are the same"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == ci2
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == Categorical(ci1.values, ordered=False)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == Categorical(ci1.values, categories=list("abc"))
|
||||
|
||||
# tests
|
||||
# make sure that we are testing for category inclusion properly
|
||||
ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca"))
|
||||
# Same categories, but different order
|
||||
# Unordered
|
||||
assert ci.equals(CategoricalIndex(list("aabca")))
|
||||
# Ordered
|
||||
assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca"))
|
||||
assert not ci.equals(CategoricalIndex(list("aabca")))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca") + [np.nan])
|
||||
assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
|
||||
assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
def test_equals_categorical_unordered(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603
|
||||
a = CategoricalIndex(["A"], categories=["A", "B"])
|
||||
b = CategoricalIndex(["A"], categories=["B", "A"])
|
||||
c = CategoricalIndex(["C"], categories=["B", "A"])
|
||||
assert a.equals(b)
|
||||
assert not a.equals(c)
|
||||
assert not b.equals(c)
|
||||
|
||||
def test_equals_non_category(self):
|
||||
# GH#37667 Case where other contains a value not among ci's
|
||||
# categories ("D") and also contains np.nan
|
||||
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
|
||||
other = Index(["A", "B", "D", np.nan])
|
||||
|
||||
assert not ci.equals(other)
|
||||
|
||||
def test_equals_multiindex(self):
|
||||
# dont raise NotImplementedError when calling is_dtype_compat
|
||||
|
||||
mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
|
||||
ci = mi.to_flat_index().astype("category")
|
||||
|
||||
assert not ci.equals(mi)
|
||||
@@ -0,0 +1,54 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestFillNA:
|
||||
def test_fillna_categorical(self):
|
||||
# GH#11343
|
||||
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
|
||||
# fill by value in categories
|
||||
exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
|
||||
tm.assert_index_equal(idx.fillna(1.0), exp)
|
||||
|
||||
cat = idx._data
|
||||
|
||||
# fill by value not in categories raises TypeError on EA, casts on CI
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(2.0)
|
||||
|
||||
result = idx.fillna(2.0)
|
||||
expected = idx.astype(object).fillna(2.0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_fillna_copies_with_no_nas(self):
|
||||
# Nothing to fill, should still get a copy for the Categorical method,
|
||||
# but OK to get a view on CategoricalIndex method
|
||||
ci = CategoricalIndex([0, 1, 1])
|
||||
result = ci.fillna(0)
|
||||
assert result is not ci
|
||||
assert tm.shares_memory(result, ci)
|
||||
|
||||
# But at the EA level we always get a copy.
|
||||
cat = ci._data
|
||||
result = cat.fillna(0)
|
||||
assert result._ndarray is not cat._ndarray
|
||||
assert result._ndarray.base is None
|
||||
assert not tm.shares_memory(result, cat)
|
||||
|
||||
def test_fillna_validates_with_no_nas(self):
|
||||
# We validate the fill value even if fillna is a no-op
|
||||
ci = CategoricalIndex([2, 3, 3])
|
||||
cat = ci._data
|
||||
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
res = ci.fillna(False)
|
||||
# nothing to fill, so we dont cast
|
||||
tm.assert_index_equal(res, ci)
|
||||
|
||||
# Same check directly on the Categorical
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(False)
|
||||
@@ -0,0 +1,114 @@
|
||||
"""
|
||||
Tests for CategoricalIndex.__repr__ and related methods.
|
||||
"""
|
||||
import pandas._config.config as cf
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
|
||||
|
||||
class TestCategoricalIndexRepr:
|
||||
def test_format_different_scalar_lengths(self):
|
||||
# GH#35439
|
||||
idx = CategoricalIndex(["aaaaaaaaa", "b"])
|
||||
expected = ["aaaaaaaaa", "b"]
|
||||
assert idx.format() == expected
|
||||
|
||||
def test_string_categorical_index_repr(self):
|
||||
# short
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"])
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa:E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
||||
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
|
||||
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
||||
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
||||
...
|
||||
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
||||
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa:E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("abcdefghijklmmo"))
|
||||
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
|
||||
'm', 'm', 'o'],
|
||||
categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa:E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# short
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa:E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
||||
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
||||
...
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa:E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
||||
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
|
||||
'す', 'せ', 'そ'],
|
||||
categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa:E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# Enable Unicode option -----------------------------------------
|
||||
with cf.option_context("display.unicode.east_asian_width", True):
|
||||
|
||||
# short
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa:E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
||||
'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ',
|
||||
...
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
||||
'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa:E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
||||
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
|
||||
'さ', 'し', 'す', 'せ', 'そ'],
|
||||
categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa:E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
@@ -0,0 +1,414 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take_fill_value(self):
|
||||
# GH 12631
|
||||
|
||||
# numeric category
|
||||
idx = CategoricalIndex([1, 2, 3], name="xxx")
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# object category
|
||||
idx = CategoricalIndex(
|
||||
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = CategoricalIndex(
|
||||
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = CategoricalIndex(
|
||||
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = CategoricalIndex(
|
||||
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_fill_value_datetime(self):
|
||||
|
||||
# datetime category
|
||||
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
|
||||
idx = CategoricalIndex(idx)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
expected = CategoricalIndex(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
|
||||
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
|
||||
expected = CategoricalIndex(expected, categories=exp_cats)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
expected = CategoricalIndex(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
idx = CategoricalIndex([1, 2, 3], name="foo")
|
||||
indices = [1, 0, -1]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
def test_get_loc(self):
|
||||
# GH 12531
|
||||
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
|
||||
idx1 = Index(list("abcde"))
|
||||
assert cidx1.get_loc("a") == idx1.get_loc("a")
|
||||
assert cidx1.get_loc("e") == idx1.get_loc("e")
|
||||
|
||||
for i in [cidx1, idx1]:
|
||||
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
||||
i.get_loc("NOT-EXIST")
|
||||
|
||||
# non-unique
|
||||
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
|
||||
idx2 = Index(list("aacded"))
|
||||
|
||||
# results in bool array
|
||||
res = cidx2.get_loc("d")
|
||||
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
|
||||
tm.assert_numpy_array_equal(
|
||||
res, np.array([False, False, False, True, False, True])
|
||||
)
|
||||
# unique element results in scalar
|
||||
res = cidx2.get_loc("e")
|
||||
assert res == idx2.get_loc("e")
|
||||
assert res == 4
|
||||
|
||||
for i in [cidx2, idx2]:
|
||||
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
||||
i.get_loc("NOT-EXIST")
|
||||
|
||||
# non-unique, sliceable
|
||||
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
|
||||
idx3 = Index(list("aabbb"))
|
||||
|
||||
# results in slice
|
||||
res = cidx3.get_loc("a")
|
||||
assert res == idx3.get_loc("a")
|
||||
assert res == slice(0, 2, None)
|
||||
|
||||
res = cidx3.get_loc("b")
|
||||
assert res == idx3.get_loc("b")
|
||||
assert res == slice(2, 5, None)
|
||||
|
||||
for i in [cidx3, idx3]:
|
||||
with pytest.raises(KeyError, match="'c'"):
|
||||
i.get_loc("c")
|
||||
|
||||
def test_get_loc_unique(self):
|
||||
cidx = CategoricalIndex(list("abc"))
|
||||
result = cidx.get_loc("b")
|
||||
assert result == 1
|
||||
|
||||
def test_get_loc_monotonic_nonunique(self):
|
||||
cidx = CategoricalIndex(list("abbc"))
|
||||
result = cidx.get_loc("b")
|
||||
expected = slice(1, 3, None)
|
||||
assert result == expected
|
||||
|
||||
def test_get_loc_nonmonotonic_nonunique(self):
|
||||
cidx = CategoricalIndex(list("abcb"))
|
||||
result = cidx.get_loc("b")
|
||||
expected = np.array([False, True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_loc_nan(self):
|
||||
# GH#41933
|
||||
ci = CategoricalIndex(["A", "B", np.nan])
|
||||
res = ci.get_loc(np.nan)
|
||||
|
||||
assert res == 2
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
def test_get_indexer_base(self):
|
||||
# Determined by cat ordering.
|
||||
idx = CategoricalIndex(list("cab"), categories=list("cab"))
|
||||
expected = np.arange(len(idx), dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
def test_get_indexer_requires_unique(self):
|
||||
np.random.seed(123456789)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
oidx = Index(np.array(ci))
|
||||
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
|
||||
for n in [1, 2, 5, len(ci)]:
|
||||
finder = oidx[np.random.randint(0, len(ci), size=n)]
|
||||
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
ci.get_indexer(finder)
|
||||
|
||||
# see gh-17323
|
||||
#
|
||||
# Even when indexer is equal to the
|
||||
# members in the index, we should
|
||||
# respect duplicates instead of taking
|
||||
# the fast-track path.
|
||||
for finder in [list("aabbca"), list("aababca")]:
|
||||
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
ci.get_indexer(finder)
|
||||
|
||||
def test_get_indexer_non_unique(self):
|
||||
|
||||
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
||||
idx2 = CategoricalIndex(list("abf"))
|
||||
|
||||
for indexer in [idx2, list("abf"), Index(list("abf"))]:
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx1.get_indexer(indexer)
|
||||
|
||||
r1, _ = idx1.get_indexer_non_unique(indexer)
|
||||
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
||||
tm.assert_almost_equal(r1, expected)
|
||||
|
||||
def test_get_indexer_method(self):
|
||||
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
||||
idx2 = CategoricalIndex(list("abf"))
|
||||
|
||||
msg = "method pad not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="pad")
|
||||
msg = "method backfill not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="backfill")
|
||||
|
||||
msg = "method nearest not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="nearest")
|
||||
|
||||
def test_get_indexer_array(self):
|
||||
arr = np.array(
|
||||
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
|
||||
dtype=object,
|
||||
)
|
||||
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
|
||||
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
|
||||
result = ci.get_indexer(arr)
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_same_order(self):
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/19551
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where(self, listlike_box):
|
||||
klass = listlike_box
|
||||
|
||||
i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
cond = [True] * len(i)
|
||||
expected = i
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * (len(i) - 1)
|
||||
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_non_categories(self):
|
||||
ci = CategoricalIndex(["a", "b", "c", "d"])
|
||||
mask = np.array([True, False, True, False])
|
||||
|
||||
result = ci.where(mask, 2)
|
||||
expected = Index(["a", 2, "c", 2], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# Test the Categorical method directly
|
||||
ci._data._where(mask, 2)
|
||||
|
||||
|
||||
class TestContains:
|
||||
def test_contains(self):
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
|
||||
|
||||
assert "a" in ci
|
||||
assert "z" not in ci
|
||||
assert "e" not in ci
|
||||
assert np.nan not in ci
|
||||
|
||||
# assert codes NOT in index
|
||||
assert 0 not in ci
|
||||
assert 1 not in ci
|
||||
|
||||
def test_contains_nan(self):
|
||||
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
|
||||
assert np.nan in ci
|
||||
|
||||
@pytest.mark.parametrize("unwrap", [True, False])
|
||||
def test_contains_na_dtype(self, unwrap):
|
||||
dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
|
||||
pi = dti.to_period("D")
|
||||
tdi = dti - dti[-1]
|
||||
ci = CategoricalIndex(dti)
|
||||
|
||||
obj = ci
|
||||
if unwrap:
|
||||
obj = ci._data
|
||||
|
||||
assert np.nan in obj
|
||||
assert None in obj
|
||||
assert pd.NaT in obj
|
||||
assert np.datetime64("NaT") in obj
|
||||
assert np.timedelta64("NaT") not in obj
|
||||
|
||||
obj2 = CategoricalIndex(tdi)
|
||||
if unwrap:
|
||||
obj2 = obj2._data
|
||||
|
||||
assert np.nan in obj2
|
||||
assert None in obj2
|
||||
assert pd.NaT in obj2
|
||||
assert np.datetime64("NaT") not in obj2
|
||||
assert np.timedelta64("NaT") in obj2
|
||||
|
||||
obj3 = CategoricalIndex(pi)
|
||||
if unwrap:
|
||||
obj3 = obj3._data
|
||||
|
||||
assert np.nan in obj3
|
||||
assert None in obj3
|
||||
assert pd.NaT in obj3
|
||||
assert np.datetime64("NaT") not in obj3
|
||||
assert np.timedelta64("NaT") not in obj3
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"item, expected",
|
||||
[
|
||||
(pd.Interval(0, 1), True),
|
||||
(1.5, True),
|
||||
(pd.Interval(0.5, 1.5), False),
|
||||
("a", False),
|
||||
(Timestamp(1), False),
|
||||
(pd.Timedelta(1), False),
|
||||
],
|
||||
ids=str,
|
||||
)
|
||||
def test_contains_interval(self, item, expected):
|
||||
# GH 23705
|
||||
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
|
||||
result = item in ci
|
||||
assert result is expected
|
||||
|
||||
def test_contains_list(self):
|
||||
# GH#21729
|
||||
idx = CategoricalIndex([1, 2, 3])
|
||||
|
||||
assert "a" not in idx
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a"] in idx
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a", "b"] in idx
|
||||
@@ -0,0 +1,115 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMap:
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories",
|
||||
[
|
||||
(list("abcbca"), list("cab")),
|
||||
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
|
||||
],
|
||||
ids=["string", "interval"],
|
||||
)
|
||||
def test_map_str(self, data, categories, ordered):
|
||||
# GH 31202 - override base class since we want to maintain categorical/ordered
|
||||
index = CategoricalIndex(data, categories=categories, ordered=ordered)
|
||||
result = index.map(str)
|
||||
expected = CategoricalIndex(
|
||||
map(str, data), categories=map(str, categories), ordered=ordered
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_map(self):
|
||||
ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = ci.map(lambda x: x.lower())
|
||||
exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
ci = CategoricalIndex(
|
||||
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
|
||||
)
|
||||
result = ci.map(lambda x: x.lower())
|
||||
exp = CategoricalIndex(
|
||||
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
|
||||
)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(
|
||||
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
|
||||
)
|
||||
|
||||
# change categories dtype
|
||||
ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
|
||||
|
||||
def f(x):
|
||||
return {"A": 10, "B": 20, "C": 30}.get(x)
|
||||
|
||||
result = ci.map(f)
|
||||
exp = CategoricalIndex(
|
||||
[10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False
|
||||
)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = ci.map({"A": 10, "B": 20, "C": 30})
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_map_with_categorical_series(self):
|
||||
# GH 12756
|
||||
a = Index([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
|
||||
tm.assert_index_equal(a.map(b), exp)
|
||||
exp = Index(["odd", "even", "odd", np.nan])
|
||||
tm.assert_index_equal(a.map(c), exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna),
|
||||
([1, 2, np.nan], pd.isna),
|
||||
([1, 1, np.nan], {1: False}),
|
||||
([1, 2, np.nan], {1: False, 2: False}),
|
||||
([1, 1, np.nan], Series([False, False])),
|
||||
([1, 2, np.nan], Series([False, False, False])),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan(self, data, f): # GH 24241
|
||||
values = pd.Categorical(data)
|
||||
result = values.map(f)
|
||||
if data[1] == 1:
|
||||
expected = pd.Categorical([False, False, np.nan])
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
else:
|
||||
expected = Index([False, False, np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_map_with_dict_or_series(self):
|
||||
orig_values = ["a", "B", 1, "a"]
|
||||
new_values = ["one", 2, 3.0, "one"]
|
||||
cur_index = CategoricalIndex(orig_values, name="XXX")
|
||||
expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
|
||||
|
||||
mapper = Series(new_values[:-1], index=orig_values[:-1])
|
||||
result = cur_index.map(mapper)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
mapper = {o: n for o, n in zip(orig_values[:-1], new_values[:-1])}
|
||||
result = cur_index.map(mapper)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_index_equal(result, expected)
|
||||
@@ -0,0 +1,86 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestReindex:
|
||||
def test_reindex_list_non_unique(self):
|
||||
# GH#11586
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"])
|
||||
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
|
||||
res, indexer = ci.reindex(["a", "c"])
|
||||
|
||||
tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
|
||||
|
||||
def test_reindex_categorcal_non_unique(self):
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"])
|
||||
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
|
||||
res, indexer = ci.reindex(Categorical(["a", "c"]))
|
||||
|
||||
exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
|
||||
|
||||
def test_reindex_list_non_unique_unused_category(self):
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
|
||||
res, indexer = ci.reindex(["a", "c"])
|
||||
exp = Index(["a", "a", "c"], dtype="object")
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
|
||||
|
||||
def test_reindex_categorical_non_unique_unused_category(self):
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
with tm.assert_produces_warning(FutureWarning, match="non-unique"):
|
||||
res, indexer = ci.reindex(Categorical(["a", "c"]))
|
||||
exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
|
||||
|
||||
def test_reindex_duplicate_target(self):
|
||||
# See GH25459
|
||||
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
res, indexer = cat.reindex(["a", "c", "c"])
|
||||
exp = Index(["a", "c", "c"], dtype="object")
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
||||
|
||||
res, indexer = cat.reindex(
|
||||
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
||||
)
|
||||
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
||||
|
||||
def test_reindex_empty_index(self):
|
||||
# See GH16770
|
||||
c = CategoricalIndex([])
|
||||
res, indexer = c.reindex(["a", "b"])
|
||||
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
|
||||
|
||||
def test_reindex_categorical_added_category(self):
|
||||
# GH 42424
|
||||
ci = CategoricalIndex(
|
||||
[Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
|
||||
ordered=True,
|
||||
)
|
||||
ci_add = CategoricalIndex(
|
||||
[
|
||||
Interval(0, 1, closed="right"),
|
||||
Interval(1, 2, closed="right"),
|
||||
Interval(2, 3, closed="right"),
|
||||
Interval(3, 4, closed="right"),
|
||||
],
|
||||
ordered=True,
|
||||
)
|
||||
result, _ = ci.reindex(ci_add)
|
||||
expected = ci_add
|
||||
tm.assert_index_equal(expected, result)
|
||||
@@ -0,0 +1,897 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
import gc
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import Timestamp
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_datetime64tz_dtype,
|
||||
is_integer_dtype,
|
||||
)
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
PeriodIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import ( # noqa:F401
|
||||
Float64Index,
|
||||
Int64Index,
|
||||
NumericIndex,
|
||||
UInt64Index,
|
||||
)
|
||||
from pandas.core.arrays import BaseMaskedArray
|
||||
|
||||
|
||||
class Base:
|
||||
"""
|
||||
Base class for index sub-class tests.
|
||||
"""
|
||||
|
||||
_index_cls: type[Index]
|
||||
|
||||
@pytest.fixture
|
||||
def simple_index(self):
|
||||
raise NotImplementedError("Method not implemented")
|
||||
|
||||
def create_index(self) -> Index:
|
||||
raise NotImplementedError("Method not implemented")
|
||||
|
||||
def test_pickle_compat_construction(self):
|
||||
# need an object to create with
|
||||
msg = "|".join(
|
||||
[
|
||||
r"Index\(\.\.\.\) must be called with a collection of some "
|
||||
r"kind, None was passed",
|
||||
r"DatetimeIndex\(\) must be called with a collection of some "
|
||||
r"kind, None was passed",
|
||||
r"TimedeltaIndex\(\) must be called with a collection of some "
|
||||
r"kind, None was passed",
|
||||
r"__new__\(\) missing 1 required positional argument: 'data'",
|
||||
r"__new__\(\) takes at least 2 arguments \(1 given\)",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self._index_cls()
|
||||
|
||||
def test_shift(self, simple_index):
|
||||
|
||||
# GH8083 test the base class for shift
|
||||
idx = simple_index
|
||||
msg = (
|
||||
f"This method is only implemented for DatetimeIndex, PeriodIndex and "
|
||||
f"TimedeltaIndex; Got type {type(idx).__name__}"
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1, 2)
|
||||
|
||||
def test_constructor_name_unhashable(self, simple_index):
|
||||
# GH#29069 check that name is hashable
|
||||
# See also same-named test in tests.series.test_constructors
|
||||
idx = simple_index
|
||||
with pytest.raises(TypeError, match="Index.name must be a hashable type"):
|
||||
type(idx)(idx, name=[])
|
||||
|
||||
def test_create_index_existing_name(self, simple_index):
|
||||
|
||||
# GH11193, when an existing index is passed, and a new name is not
|
||||
# specified, the new index should inherit the previous object name
|
||||
expected = simple_index
|
||||
if not isinstance(expected, MultiIndex):
|
||||
expected.name = "foo"
|
||||
result = Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Index(expected, name="bar")
|
||||
expected.name = "bar"
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
expected.names = ["foo", "bar"]
|
||||
result = Index(expected)
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
names=["foo", "bar"],
|
||||
),
|
||||
)
|
||||
|
||||
result = Index(expected, names=["A", "B"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
names=["A", "B"],
|
||||
),
|
||||
)
|
||||
|
||||
def test_numeric_compat(self, simple_index):
|
||||
|
||||
idx = simple_index
|
||||
# Check that this doesn't cover MultiIndex case, if/when it does,
|
||||
# we can remove multi.test_compat.test_numeric_compat
|
||||
assert not isinstance(idx, MultiIndex)
|
||||
if type(idx) is Index:
|
||||
return
|
||||
|
||||
typ = type(idx._data).__name__
|
||||
cls = type(idx).__name__
|
||||
lmsg = "|".join(
|
||||
[
|
||||
rf"unsupported operand type\(s\) for \*: '{typ}' and 'int'",
|
||||
"cannot perform (__mul__|__truediv__|__floordiv__) with "
|
||||
f"this index type: ({cls}|{typ})",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=lmsg):
|
||||
idx * 1
|
||||
rmsg = "|".join(
|
||||
[
|
||||
rf"unsupported operand type\(s\) for \*: 'int' and '{typ}'",
|
||||
"cannot perform (__rmul__|__rtruediv__|__rfloordiv__) with "
|
||||
f"this index type: ({cls}|{typ})",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=rmsg):
|
||||
1 * idx
|
||||
|
||||
div_err = lmsg.replace("*", "/")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
idx / 1
|
||||
div_err = rmsg.replace("*", "/")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
1 / idx
|
||||
|
||||
floordiv_err = lmsg.replace("*", "//")
|
||||
with pytest.raises(TypeError, match=floordiv_err):
|
||||
idx // 1
|
||||
floordiv_err = rmsg.replace("*", "//")
|
||||
with pytest.raises(TypeError, match=floordiv_err):
|
||||
1 // idx
|
||||
|
||||
def test_logical_compat(self, simple_index):
|
||||
idx = simple_index
|
||||
with pytest.raises(TypeError, match="cannot perform all"):
|
||||
idx.all()
|
||||
with pytest.raises(TypeError, match="cannot perform any"):
|
||||
idx.any()
|
||||
|
||||
def test_repr_roundtrip(self, simple_index):
|
||||
|
||||
idx = simple_index
|
||||
tm.assert_index_equal(eval(repr(idx)), idx)
|
||||
|
||||
def test_repr_max_seq_item_setting(self, simple_index):
|
||||
# GH10182
|
||||
idx = simple_index
|
||||
idx = idx.repeat(50)
|
||||
with pd.option_context("display.max_seq_items", None):
|
||||
repr(idx)
|
||||
assert "..." not in str(idx)
|
||||
|
||||
def test_ensure_copied_data(self, index):
|
||||
# Check the "copy" argument of each Index.__new__ is honoured
|
||||
# GH12309
|
||||
init_kwargs = {}
|
||||
if isinstance(index, PeriodIndex):
|
||||
# Needs "freq" specification:
|
||||
init_kwargs["freq"] = index.freq
|
||||
elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
|
||||
# RangeIndex cannot be initialized from data
|
||||
# MultiIndex and CategoricalIndex are tested separately
|
||||
return
|
||||
|
||||
index_type = type(index)
|
||||
result = index_type(index.values, copy=True, **init_kwargs)
|
||||
if is_datetime64tz_dtype(index.dtype):
|
||||
result = result.tz_localize("UTC").tz_convert(index.tz)
|
||||
if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
|
||||
index = index._with_freq(None)
|
||||
|
||||
tm.assert_index_equal(index, result)
|
||||
|
||||
if isinstance(index, PeriodIndex):
|
||||
# .values an object array of Period, thus copied
|
||||
result = index_type(ordinal=index.asi8, copy=False, **init_kwargs)
|
||||
tm.assert_numpy_array_equal(index.asi8, result.asi8, check_same="same")
|
||||
elif isinstance(index, IntervalIndex):
|
||||
# checked in test_interval.py
|
||||
pass
|
||||
elif type(index) is Index and not isinstance(index.dtype, np.dtype):
|
||||
result = index_type(index.values, copy=False, **init_kwargs)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
if isinstance(index._values, BaseMaskedArray):
|
||||
assert np.shares_memory(index._values._data, result._values._data)
|
||||
tm.assert_numpy_array_equal(
|
||||
index._values._data, result._values._data, check_same="same"
|
||||
)
|
||||
assert np.shares_memory(index._values._mask, result._values._mask)
|
||||
tm.assert_numpy_array_equal(
|
||||
index._values._mask, result._values._mask, check_same="same"
|
||||
)
|
||||
elif index.dtype == "string[python]":
|
||||
assert np.shares_memory(index._values._ndarray, result._values._ndarray)
|
||||
tm.assert_numpy_array_equal(
|
||||
index._values._ndarray, result._values._ndarray, check_same="same"
|
||||
)
|
||||
elif index.dtype == "string[pyarrow]":
|
||||
assert tm.shares_memory(result._values, index._values)
|
||||
else:
|
||||
raise NotImplementedError(index.dtype)
|
||||
else:
|
||||
result = index_type(index.values, copy=False, **init_kwargs)
|
||||
tm.assert_numpy_array_equal(index.values, result.values, check_same="same")
|
||||
|
||||
def test_memory_usage(self, index):
|
||||
index._engine.clear_mapping()
|
||||
result = index.memory_usage()
|
||||
if index.empty:
|
||||
# we report 0 for no-length
|
||||
assert result == 0
|
||||
return
|
||||
|
||||
# non-zero length
|
||||
index.get_loc(index[0])
|
||||
result2 = index.memory_usage()
|
||||
result3 = index.memory_usage(deep=True)
|
||||
|
||||
# RangeIndex, IntervalIndex
|
||||
# don't have engines
|
||||
# Index[EA] has engine but it does not have a Hashtable .mapping
|
||||
if not isinstance(index, (RangeIndex, IntervalIndex)) and not (
|
||||
type(index) is Index and not isinstance(index.dtype, np.dtype)
|
||||
):
|
||||
assert result2 > result
|
||||
|
||||
if index.inferred_type == "object":
|
||||
assert result3 > result2
|
||||
|
||||
def test_argsort(self, request, index):
|
||||
# separately tested
|
||||
if isinstance(index, CategoricalIndex):
|
||||
return
|
||||
|
||||
result = index.argsort()
|
||||
expected = np.array(index).argsort()
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
def test_numpy_argsort(self, index):
|
||||
result = np.argsort(index)
|
||||
expected = index.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.argsort(index, kind="mergesort")
|
||||
expected = index.argsort(kind="mergesort")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# these are the only two types that perform
|
||||
# pandas compatibility input validation - the
|
||||
# rest already perform separate (or no) such
|
||||
# validation via their 'values' attribute as
|
||||
# defined in pandas.core.indexes/base.py - they
|
||||
# cannot be changed at the moment due to
|
||||
# backwards compatibility concerns
|
||||
if isinstance(index, (CategoricalIndex, RangeIndex)):
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(index, axis=1)
|
||||
|
||||
msg = "the 'order' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.argsort(index, order=("a", "b"))
|
||||
|
||||
def test_repeat(self, simple_index):
|
||||
rep = 2
|
||||
idx = simple_index.copy()
|
||||
new_index_cls = Int64Index if isinstance(idx, RangeIndex) else idx._constructor
|
||||
expected = new_index_cls(idx.values.repeat(rep), name=idx.name)
|
||||
tm.assert_index_equal(idx.repeat(rep), expected)
|
||||
|
||||
idx = simple_index
|
||||
rep = np.arange(len(idx))
|
||||
expected = new_index_cls(idx.values.repeat(rep), name=idx.name)
|
||||
tm.assert_index_equal(idx.repeat(rep), expected)
|
||||
|
||||
def test_numpy_repeat(self, simple_index):
|
||||
rep = 2
|
||||
idx = simple_index
|
||||
expected = idx.repeat(rep)
|
||||
tm.assert_index_equal(np.repeat(idx, rep), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(idx, rep, axis=0)
|
||||
|
||||
def test_where(self, listlike_box, simple_index):
|
||||
klass = listlike_box
|
||||
|
||||
idx = simple_index
|
||||
if isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
|
||||
# where does not preserve freq
|
||||
idx = idx._with_freq(None)
|
||||
|
||||
cond = [True] * len(idx)
|
||||
result = idx.where(klass(cond))
|
||||
expected = idx
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * len(idx[1:])
|
||||
expected = Index([idx._na_value] + idx[1:].tolist(), dtype=idx.dtype)
|
||||
result = idx.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_base(self, index):
|
||||
result = index[1:4]
|
||||
|
||||
if not len(index):
|
||||
return
|
||||
|
||||
# test 0th element
|
||||
assert index[0:4].equals(result.insert(0, index[0]))
|
||||
|
||||
def test_insert_out_of_bounds(self, index):
|
||||
# TypeError/IndexError matches what np.insert raises in these cases
|
||||
|
||||
if len(index) > 0:
|
||||
err = TypeError
|
||||
else:
|
||||
err = IndexError
|
||||
if len(index) == 0:
|
||||
# 0 vs 0.5 in error message varies with numpy version
|
||||
msg = "index (0|0.5) is out of bounds for axis 0 with size 0"
|
||||
else:
|
||||
msg = "slice indices must be integers or None or have an __index__ method"
|
||||
with pytest.raises(err, match=msg):
|
||||
index.insert(0.5, "foo")
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
r"index -?\d+ is out of bounds for axis 0 with size \d+",
|
||||
"loc must be an integer between",
|
||||
]
|
||||
)
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
index.insert(len(index) + 1, 1)
|
||||
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
index.insert(-len(index) - 1, 1)
|
||||
|
||||
def test_delete_base(self, index):
|
||||
if not len(index):
|
||||
return
|
||||
|
||||
if isinstance(index, RangeIndex):
|
||||
# tested in class
|
||||
return
|
||||
|
||||
expected = index[1:]
|
||||
result = index.delete(0)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
expected = index[:-1]
|
||||
result = index.delete(-1)
|
||||
assert result.equals(expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
length = len(index)
|
||||
msg = f"index {length} is out of bounds for axis 0 with size {length}"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
index.delete(length)
|
||||
|
||||
def test_equals(self, index):
|
||||
if isinstance(index, IntervalIndex):
|
||||
# IntervalIndex tested separately, the index.equals(index.astype(object))
|
||||
# fails for IntervalIndex
|
||||
return
|
||||
|
||||
is_ea_idx = type(index) is Index and not isinstance(index.dtype, np.dtype)
|
||||
|
||||
assert index.equals(index)
|
||||
assert index.equals(index.copy())
|
||||
if not is_ea_idx:
|
||||
# doesn't hold for e.g. IntegerDtype
|
||||
assert index.equals(index.astype(object))
|
||||
|
||||
assert not index.equals(list(index))
|
||||
assert not index.equals(np.array(index))
|
||||
|
||||
# Cannot pass in non-int64 dtype to RangeIndex
|
||||
if not isinstance(index, RangeIndex) and not is_ea_idx:
|
||||
same_values = Index(index, dtype=object)
|
||||
assert index.equals(same_values)
|
||||
assert same_values.equals(index)
|
||||
|
||||
if index.nlevels == 1:
|
||||
# do not test MultiIndex
|
||||
assert not index.equals(Series(index))
|
||||
|
||||
def test_equals_op(self, simple_index):
|
||||
# GH9947, GH10637
|
||||
index_a = simple_index
|
||||
|
||||
n = len(index_a)
|
||||
index_b = index_a[0:-1]
|
||||
index_c = index_a[0:-1].append(index_a[-2:-1])
|
||||
index_d = index_a[0:1]
|
||||
|
||||
msg = "Lengths must match|could not be broadcast"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index_a == index_b
|
||||
expected1 = np.array([True] * n)
|
||||
expected2 = np.array([True] * (n - 1) + [False])
|
||||
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
||||
|
||||
# test comparisons with numpy arrays
|
||||
array_a = np.array(index_a)
|
||||
array_b = np.array(index_a[0:-1])
|
||||
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
||||
array_d = np.array(index_a[0:1])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index_a == array_b
|
||||
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
||||
|
||||
# test comparisons with Series
|
||||
series_a = Series(array_a)
|
||||
series_b = Series(array_b)
|
||||
series_c = Series(array_c)
|
||||
series_d = Series(array_d)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index_a == series_b
|
||||
|
||||
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
||||
|
||||
# cases where length is 1 for one of them
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_d
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
series_a == array_d
|
||||
|
||||
# comparing with a scalar should broadcast; note that we are excluding
|
||||
# MultiIndex because in this case each item in the index is a tuple of
|
||||
# length 2, and therefore is considered an array of length 2 in the
|
||||
# comparison instead of a scalar
|
||||
if not isinstance(index_a, MultiIndex):
|
||||
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
||||
# assuming the 2nd to last item is unique in the data
|
||||
item = index_a[-2]
|
||||
tm.assert_numpy_array_equal(index_a == item, expected3)
|
||||
# For RangeIndex we can convert to Int64Index
|
||||
tm.assert_series_equal(series_a == item, Series(expected3))
|
||||
|
||||
def test_format(self, simple_index):
|
||||
# GH35439
|
||||
idx = simple_index
|
||||
expected = [str(x) for x in idx]
|
||||
assert idx.format() == expected
|
||||
|
||||
def test_format_empty(self):
|
||||
# GH35712
|
||||
empty_idx = self._index_cls([])
|
||||
assert empty_idx.format() == []
|
||||
assert empty_idx.format(name=True) == [""]
|
||||
|
||||
def test_fillna(self, index):
|
||||
# GH 11343
|
||||
if len(index) == 0:
|
||||
return
|
||||
elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype):
|
||||
return
|
||||
elif isinstance(index, MultiIndex):
|
||||
idx = index.copy(deep=True)
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.fillna(idx[0])
|
||||
else:
|
||||
idx = index.copy(deep=True)
|
||||
result = idx.fillna(idx[0])
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is not idx
|
||||
|
||||
msg = "'value' must be a scalar, passed: "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.fillna([idx[0]])
|
||||
|
||||
idx = index.copy(deep=True)
|
||||
values = idx._values
|
||||
|
||||
values[1] = np.nan
|
||||
|
||||
idx = type(index)(values)
|
||||
|
||||
msg = "does not support 'downcast'"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
# For now at least, we only raise if there are NAs present
|
||||
idx.fillna(idx[0], downcast="infer")
|
||||
|
||||
expected = np.array([False] * len(idx), dtype=bool)
|
||||
expected[1] = True
|
||||
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||||
assert idx.hasnans is True
|
||||
|
||||
def test_nulls(self, index):
|
||||
# this is really a smoke test for the methods
|
||||
# as these are adequately tested for function elsewhere
|
||||
if len(index) == 0:
|
||||
tm.assert_numpy_array_equal(index.isna(), np.array([], dtype=bool))
|
||||
elif isinstance(index, MultiIndex):
|
||||
idx = index.copy()
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.isna()
|
||||
elif not index.hasnans:
|
||||
tm.assert_numpy_array_equal(index.isna(), np.zeros(len(index), dtype=bool))
|
||||
tm.assert_numpy_array_equal(index.notna(), np.ones(len(index), dtype=bool))
|
||||
else:
|
||||
result = isna(index)
|
||||
tm.assert_numpy_array_equal(index.isna(), result)
|
||||
tm.assert_numpy_array_equal(index.notna(), ~result)
|
||||
|
||||
def test_empty(self, simple_index):
|
||||
# GH 15270
|
||||
idx = simple_index
|
||||
assert not idx.empty
|
||||
assert idx[:0].empty
|
||||
|
||||
def test_join_self_unique(self, join_type, simple_index):
|
||||
idx = simple_index
|
||||
if idx.is_unique:
|
||||
joined = idx.join(idx, how=join_type)
|
||||
assert (idx == joined).all()
|
||||
|
||||
def test_map(self, simple_index):
|
||||
# callable
|
||||
idx = simple_index
|
||||
|
||||
result = idx.map(lambda x: x)
|
||||
# For RangeIndex we convert to Int64Index
|
||||
tm.assert_index_equal(result, idx, exact="equiv")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, index: {i: e for e, i in zip(values, index)},
|
||||
lambda values, index: Series(values, index),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(self, mapper, simple_index):
|
||||
|
||||
idx = simple_index
|
||||
if isinstance(idx, CategoricalIndex):
|
||||
# TODO(2.0): see if we can avoid skipping once
|
||||
# CategoricalIndex.reindex is removed.
|
||||
pytest.skip(f"skipping tests for {type(idx)}")
|
||||
|
||||
identity = mapper(idx.values, idx)
|
||||
|
||||
result = idx.map(identity)
|
||||
# For RangeIndex we convert to Int64Index
|
||||
tm.assert_index_equal(result, idx, exact="equiv")
|
||||
|
||||
# empty mappable
|
||||
dtype = None
|
||||
if idx._is_backward_compat_public_numeric_index:
|
||||
new_index_cls = NumericIndex
|
||||
if idx.dtype.kind == "f":
|
||||
dtype = idx.dtype
|
||||
else:
|
||||
new_index_cls = Float64Index
|
||||
|
||||
expected = new_index_cls([np.nan] * len(idx), dtype=dtype)
|
||||
result = idx.map(mapper(expected, idx))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_map_str(self, simple_index):
|
||||
# GH 31202
|
||||
idx = simple_index
|
||||
result = idx.map(str)
|
||||
expected = Index([str(x) for x in idx], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("copy", [True, False])
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype_category(self, copy, name, ordered, simple_index):
|
||||
# GH 18630
|
||||
idx = simple_index
|
||||
if name:
|
||||
idx = idx.rename(name)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=ordered)
|
||||
result = idx.astype(dtype, copy=copy)
|
||||
expected = CategoricalIndex(idx, name=name, ordered=ordered)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(idx.unique().tolist()[:-1], ordered)
|
||||
result = idx.astype(dtype, copy=copy)
|
||||
expected = CategoricalIndex(idx, name=name, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
if ordered is False:
|
||||
# dtype='category' defaults to ordered=False, so only test once
|
||||
result = idx.astype("category", copy=copy)
|
||||
expected = CategoricalIndex(idx, name=name)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_is_unique(self, simple_index):
|
||||
# initialize a unique index
|
||||
index = simple_index.drop_duplicates()
|
||||
assert index.is_unique is True
|
||||
|
||||
# empty index should be unique
|
||||
index_empty = index[:0]
|
||||
assert index_empty.is_unique is True
|
||||
|
||||
# test basic dupes
|
||||
index_dup = index.insert(0, index[0])
|
||||
assert index_dup.is_unique is False
|
||||
|
||||
# single NA should be unique
|
||||
index_na = index.insert(0, np.nan)
|
||||
assert index_na.is_unique is True
|
||||
|
||||
# multiple NA should not be unique
|
||||
index_na_dup = index_na.insert(0, np.nan)
|
||||
assert index_na_dup.is_unique is False
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_engine_reference_cycle(self, simple_index):
|
||||
# GH27585
|
||||
index = simple_index
|
||||
nrefs_pre = len(gc.get_referrers(index))
|
||||
index._engine
|
||||
assert len(gc.get_referrers(index)) == nrefs_pre
|
||||
|
||||
def test_getitem_2d_deprecated(self, simple_index):
|
||||
# GH#30588, GH#31479
|
||||
idx = simple_index
|
||||
msg = "Support for multi-dimensional indexing"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = idx[:, None]
|
||||
|
||||
assert isinstance(res, np.ndarray), type(res)
|
||||
|
||||
if not isinstance(idx, RangeIndex):
|
||||
# GH#44051 RangeIndex already raises
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = idx[True]
|
||||
assert isinstance(res, np.ndarray), type(res)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = idx[False]
|
||||
assert isinstance(res, np.ndarray), type(res)
|
||||
else:
|
||||
msg = "only integers, slices"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx[True]
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx[False]
|
||||
|
||||
def test_copy_shares_cache(self, simple_index):
|
||||
# GH32898, GH36840
|
||||
idx = simple_index
|
||||
idx.get_loc(idx[0]) # populates the _cache.
|
||||
copy = idx.copy()
|
||||
|
||||
assert copy._cache is idx._cache
|
||||
|
||||
def test_shallow_copy_shares_cache(self, simple_index):
|
||||
# GH32669, GH36840
|
||||
idx = simple_index
|
||||
idx.get_loc(idx[0]) # populates the _cache.
|
||||
shallow_copy = idx._view()
|
||||
|
||||
assert shallow_copy._cache is idx._cache
|
||||
|
||||
shallow_copy = idx._shallow_copy(idx._data)
|
||||
assert shallow_copy._cache is not idx._cache
|
||||
assert shallow_copy._cache == {}
|
||||
|
||||
def test_index_groupby(self, simple_index):
|
||||
idx = simple_index[:5]
|
||||
to_groupby = np.array([1, 2, np.nan, 2, 1])
|
||||
tm.assert_dict_equal(
|
||||
idx.groupby(to_groupby), {1.0: idx[[0, 4]], 2.0: idx[[1, 3]]}
|
||||
)
|
||||
|
||||
to_groupby = DatetimeIndex(
|
||||
[
|
||||
datetime(2011, 11, 1),
|
||||
datetime(2011, 12, 1),
|
||||
pd.NaT,
|
||||
datetime(2011, 12, 1),
|
||||
datetime(2011, 11, 1),
|
||||
],
|
||||
tz="UTC",
|
||||
).values
|
||||
|
||||
ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")]
|
||||
expected = {ex_keys[0]: idx[[0, 4]], ex_keys[1]: idx[[1, 3]]}
|
||||
tm.assert_dict_equal(idx.groupby(to_groupby), expected)
|
||||
|
||||
def test_append_preserves_dtype(self, simple_index):
|
||||
# In particular NumericIndex with dtype float32
|
||||
index = simple_index
|
||||
N = len(index)
|
||||
|
||||
result = index.append(index)
|
||||
assert result.dtype == index.dtype
|
||||
tm.assert_index_equal(result[:N], index, check_exact=True)
|
||||
tm.assert_index_equal(result[N:], index, check_exact=True)
|
||||
|
||||
alt = index.take(list(range(N)) * 2)
|
||||
tm.assert_index_equal(result, alt, check_exact=True)
|
||||
|
||||
def test_inv(self, simple_index):
|
||||
idx = simple_index
|
||||
|
||||
if idx.dtype.kind in ["i", "u"]:
|
||||
res = ~idx
|
||||
expected = Index(~idx.values, name=idx.name)
|
||||
tm.assert_index_equal(res, expected)
|
||||
|
||||
# check that we are matching Series behavior
|
||||
res2 = ~Series(idx)
|
||||
# TODO(2.0): once we preserve dtype, check_dtype can be True
|
||||
tm.assert_series_equal(res2, Series(expected), check_dtype=False)
|
||||
else:
|
||||
if idx.dtype.kind == "f":
|
||||
msg = "ufunc 'invert' not supported for the input types"
|
||||
else:
|
||||
msg = "bad operand"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
~idx
|
||||
|
||||
# check that we get the same behavior with Series
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
~Series(idx)
|
||||
|
||||
|
||||
class NumericBase(Base):
|
||||
"""
|
||||
Base class for numeric index (incl. RangeIndex) sub-class tests.
|
||||
"""
|
||||
|
||||
def test_constructor_unwraps_index(self, dtype):
|
||||
index_cls = self._index_cls
|
||||
|
||||
idx = Index([1, 2], dtype=dtype)
|
||||
result = index_cls(idx)
|
||||
expected = np.array([1, 2], dtype=idx.dtype)
|
||||
tm.assert_numpy_array_equal(result._data, expected)
|
||||
|
||||
def test_where(self):
|
||||
# Tested in numeric.test_indexing
|
||||
pass
|
||||
|
||||
def test_can_hold_identifiers(self, simple_index):
|
||||
idx = simple_index
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is False
|
||||
|
||||
def test_format(self, simple_index):
|
||||
# GH35439
|
||||
idx = simple_index
|
||||
max_width = max(len(str(x)) for x in idx)
|
||||
expected = [str(x).ljust(max_width) for x in idx]
|
||||
assert idx.format() == expected
|
||||
|
||||
def test_numeric_compat(self):
|
||||
pass # override Base method
|
||||
|
||||
def test_insert_non_na(self, simple_index):
|
||||
# GH#43921 inserting an element that we know we can hold should
|
||||
# not change dtype or type (except for RangeIndex)
|
||||
index = simple_index
|
||||
|
||||
result = index.insert(0, index[0])
|
||||
|
||||
cls = type(index)
|
||||
if cls is RangeIndex:
|
||||
cls = Int64Index
|
||||
|
||||
expected = cls([index[0]] + list(index), dtype=index.dtype)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_insert_na(self, nulls_fixture, simple_index):
|
||||
# GH 18295 (test missing)
|
||||
index = simple_index
|
||||
na_val = nulls_fixture
|
||||
|
||||
if na_val is pd.NaT:
|
||||
expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
|
||||
else:
|
||||
expected = Float64Index([index[0], np.nan] + list(index[1:]))
|
||||
|
||||
if index._is_backward_compat_public_numeric_index:
|
||||
# GH#43921 we preserve NumericIndex
|
||||
if index.dtype.kind == "f":
|
||||
expected = NumericIndex(expected, dtype=index.dtype)
|
||||
else:
|
||||
expected = NumericIndex(expected)
|
||||
|
||||
result = index.insert(1, na_val)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_arithmetic_explicit_conversions(self):
|
||||
# GH 8608
|
||||
# add/sub are overridden explicitly for Float/Int Index
|
||||
index_cls = self._index_cls
|
||||
if index_cls is RangeIndex:
|
||||
idx = RangeIndex(5)
|
||||
else:
|
||||
idx = index_cls(np.arange(5, dtype="int64"))
|
||||
|
||||
# float conversions
|
||||
arr = np.arange(5, dtype="int64") * 3.2
|
||||
expected = Float64Index(arr)
|
||||
fidx = idx * 3.2
|
||||
tm.assert_index_equal(fidx, expected)
|
||||
fidx = 3.2 * idx
|
||||
tm.assert_index_equal(fidx, expected)
|
||||
|
||||
# interops with numpy arrays
|
||||
expected = Float64Index(arr)
|
||||
a = np.zeros(5, dtype="float64")
|
||||
result = fidx - a
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = Float64Index(-arr)
|
||||
a = np.zeros(5, dtype="float64")
|
||||
result = a - fidx
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_invalid_dtype(self, invalid_dtype):
|
||||
# GH 29539
|
||||
dtype = invalid_dtype
|
||||
msg = rf"Incorrect `dtype` passed: expected \w+(?: \w+)?, received {dtype}"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self._index_cls([1, 2, 3], dtype=dtype)
|
||||
@@ -0,0 +1,41 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, False])
|
||||
def sort(request):
|
||||
"""
|
||||
Valid values for the 'sort' parameter used in the Index
|
||||
setops methods (intersection, union, etc.)
|
||||
|
||||
Caution:
|
||||
Don't confuse this one with the "sort" fixture used
|
||||
for DataFrame.append or concat. That one has
|
||||
parameters [True, False].
|
||||
|
||||
We can't combine them as sort=True is not permitted
|
||||
in the Index setops methods.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"])
|
||||
def freq_sample(request):
|
||||
"""
|
||||
Valid values for 'freq' parameter used to create date_range and
|
||||
timedelta_range..
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[list, tuple, np.array, array, Series])
|
||||
def listlike_box(request):
|
||||
"""
|
||||
Types that may be passed as the indexer to searchsorted.
|
||||
"""
|
||||
return request.param
|
||||
@@ -0,0 +1,139 @@
|
||||
""" generic datetimelike tests """
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.indexes.common import Base
|
||||
|
||||
|
||||
class DatetimeLike(Base):
|
||||
def test_isin(self, simple_index):
|
||||
index = simple_index[:4]
|
||||
result = index.isin(index)
|
||||
assert result.all()
|
||||
|
||||
result = index.isin(list(index))
|
||||
assert result.all()
|
||||
|
||||
result = index.isin([index[2], 5])
|
||||
expected = np.array([False, False, True, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_argsort_matches_array(self, simple_index):
|
||||
idx = simple_index
|
||||
idx = idx.insert(1, pd.NaT)
|
||||
|
||||
result = idx.argsort()
|
||||
expected = idx._data.argsort()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_can_hold_identifiers(self, simple_index):
|
||||
idx = simple_index
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is False
|
||||
|
||||
def test_shift_identity(self, simple_index):
|
||||
|
||||
idx = simple_index
|
||||
tm.assert_index_equal(idx, idx.shift(0))
|
||||
|
||||
def test_shift_empty(self, simple_index):
|
||||
# GH#14811
|
||||
idx = simple_index[:0]
|
||||
tm.assert_index_equal(idx, idx.shift(1))
|
||||
|
||||
def test_str(self, simple_index):
|
||||
|
||||
# test the string repr
|
||||
idx = simple_index
|
||||
idx.name = "foo"
|
||||
assert not (f"length={len(idx)}" in str(idx))
|
||||
assert "'foo'" in str(idx)
|
||||
assert type(idx).__name__ in str(idx)
|
||||
|
||||
if hasattr(idx, "tz"):
|
||||
if idx.tz is not None:
|
||||
assert idx.tz in str(idx)
|
||||
if isinstance(idx, pd.PeriodIndex):
|
||||
assert f"dtype='period[{idx.freqstr}]'" in str(idx)
|
||||
else:
|
||||
assert f"freq='{idx.freqstr}'" in str(idx)
|
||||
|
||||
def test_view(self, simple_index):
|
||||
idx = simple_index
|
||||
|
||||
idx_view = idx.view("i8")
|
||||
result = self._index_cls(idx)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
idx_view = idx.view(self._index_cls)
|
||||
result = self._index_cls(idx)
|
||||
tm.assert_index_equal(result, idx_view)
|
||||
|
||||
def test_map_callable(self, simple_index):
|
||||
index = simple_index
|
||||
expected = index + index.freq
|
||||
result = index.map(lambda x: x + x.freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# map to NaT
|
||||
result = index.map(lambda x: pd.NaT if x == index[0] else x)
|
||||
expected = pd.Index([pd.NaT] + index[1:].tolist())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, index: {i: e for e, i in zip(values, index)},
|
||||
lambda values, index: pd.Series(values, index, dtype=object),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(self, mapper, simple_index):
|
||||
index = simple_index
|
||||
expected = index + index.freq
|
||||
|
||||
# don't compare the freqs
|
||||
if isinstance(expected, (pd.DatetimeIndex, pd.TimedeltaIndex)):
|
||||
expected = expected._with_freq(None)
|
||||
|
||||
result = index.map(mapper(expected, index))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = pd.Index([pd.NaT] + index[1:].tolist())
|
||||
result = index.map(mapper(expected, index))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty map; these map to np.nan because we cannot know
|
||||
# to re-infer things
|
||||
expected = pd.Index([np.nan] * len(index))
|
||||
result = index.map(mapper([], []))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_getitem_preserves_freq(self, simple_index):
|
||||
index = simple_index
|
||||
assert index.freq is not None
|
||||
|
||||
result = index[:]
|
||||
assert result.freq == index.freq
|
||||
|
||||
def test_where_cast_str(self, simple_index):
|
||||
index = simple_index
|
||||
|
||||
mask = np.ones(len(index), dtype=bool)
|
||||
mask[-1] = False
|
||||
|
||||
result = index.where(mask, str(index[0]))
|
||||
expected = index.where(mask, index[0])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.where(mask, [str(index[0])])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
expected = index.astype(object).where(mask, "foo")
|
||||
result = index.where(mask, "foo")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.where(mask, ["foo"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,80 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
PeriodIndex,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class DropDuplicates:
|
||||
def test_drop_duplicates_metadata(self, idx):
|
||||
# GH#10115
|
||||
result = idx.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert idx.freq == result.freq
|
||||
|
||||
idx_dup = idx.append(idx)
|
||||
result = idx_dup.drop_duplicates()
|
||||
|
||||
expected = idx
|
||||
if not isinstance(idx, PeriodIndex):
|
||||
# freq is reset except for PeriodIndex
|
||||
assert idx_dup.freq is None
|
||||
assert result.freq is None
|
||||
expected = idx._with_freq(None)
|
||||
else:
|
||||
assert result.freq == expected.freq
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected, index",
|
||||
[
|
||||
("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
|
||||
("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
|
||||
(
|
||||
False,
|
||||
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
|
||||
np.arange(5, 10),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(self, keep, expected, index, idx):
|
||||
# to check Index/Series compat
|
||||
idx = idx.append(idx[:5])
|
||||
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
|
||||
expected = idx[~expected]
|
||||
|
||||
result = idx.drop_duplicates(keep=keep)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Series(idx).drop_duplicates(keep=keep)
|
||||
tm.assert_series_equal(result, Series(expected, index=index))
|
||||
|
||||
|
||||
class TestDropDuplicatesPeriodIndex(DropDuplicates):
|
||||
@pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
|
||||
def freq(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def idx(self, freq):
|
||||
return period_range("2011-01-01", periods=10, freq=freq, name="idx")
|
||||
|
||||
|
||||
class TestDropDuplicatesDatetimeIndex(DropDuplicates):
|
||||
@pytest.fixture
|
||||
def idx(self, freq_sample):
|
||||
return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
|
||||
|
||||
|
||||
class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
|
||||
@pytest.fixture
|
||||
def idx(self, freq_sample):
|
||||
return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|
||||
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class EqualsTests:
|
||||
def test_not_equals_numeric(self, index):
|
||||
|
||||
assert not index.equals(Index(index.asi8))
|
||||
assert not index.equals(Index(index.asi8.astype("u8")))
|
||||
assert not index.equals(Index(index.asi8).astype("f8"))
|
||||
|
||||
def test_equals(self, index):
|
||||
assert index.equals(index)
|
||||
assert index.equals(index.astype(object))
|
||||
assert index.equals(CategoricalIndex(index))
|
||||
assert index.equals(CategoricalIndex(index.astype(object)))
|
||||
|
||||
def test_not_equals_non_arraylike(self, index):
|
||||
assert not index.equals(list(index))
|
||||
|
||||
def test_not_equals_strings(self, index):
|
||||
|
||||
other = Index([str(x) for x in index], dtype=object)
|
||||
assert not index.equals(other)
|
||||
assert not index.equals(CategoricalIndex(other))
|
||||
|
||||
def test_not_equals_misc_strs(self, index):
|
||||
other = Index(list("abc"))
|
||||
assert not index.equals(other)
|
||||
|
||||
|
||||
class TestPeriodIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return period_range("2013-01-01", periods=5, freq="D")
|
||||
|
||||
# TODO: de-duplicate with other test_equals2 methods
|
||||
@pytest.mark.parametrize("freq", ["D", "M"])
|
||||
def test_equals2(self, freq):
|
||||
# GH#13107
|
||||
idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = PeriodIndex._simple_new(
|
||||
idx._values._simple_new(idx._values.asi8, freq="H")
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
|
||||
class TestDatetimeIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return date_range("2013-01-01", periods=5)
|
||||
|
||||
def test_equals2(self):
|
||||
# GH#13107
|
||||
idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
# check that we do not raise when comparing with OutOfBounds objects
|
||||
oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
|
||||
assert not idx.equals(oob)
|
||||
assert not idx2.equals(oob)
|
||||
assert not idx3.equals(oob)
|
||||
|
||||
# check that we do not raise when comparing with OutOfBounds dt64
|
||||
oob2 = oob.map(np.datetime64)
|
||||
assert not idx.equals(oob2)
|
||||
assert not idx2.equals(oob2)
|
||||
assert not idx3.equals(oob2)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_not_equals_bday(self, freq):
|
||||
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
|
||||
assert not rng.equals(list(rng))
|
||||
|
||||
|
||||
class TestTimedeltaIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return tm.makeTimedeltaIndex(10)
|
||||
|
||||
def test_equals2(self):
|
||||
# GH#13107
|
||||
idx = TimedeltaIndex(["1 days", "2 days", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"])
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.astype(object).equals(idx2.astype(object))
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# Check that we dont raise OverflowError on comparisons outside the
|
||||
# implementation range GH#28532
|
||||
oob = Index([timedelta(days=10**6)] * 3, dtype=object)
|
||||
assert not idx.equals(oob)
|
||||
assert not idx2.equals(oob)
|
||||
|
||||
oob2 = Index([np.timedelta64(x) for x in oob], dtype=object)
|
||||
assert (oob == oob2).all()
|
||||
assert not idx.equals(oob2)
|
||||
assert not idx2.equals(oob2)
|
||||
|
||||
oob3 = oob.map(np.timedelta64)
|
||||
assert (oob3 == oob).all()
|
||||
assert not idx.equals(oob3)
|
||||
assert not idx2.equals(oob3)
|
||||
@@ -0,0 +1,46 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
dtlike_dtypes = [
|
||||
np.dtype("timedelta64[ns]"),
|
||||
np.dtype("datetime64[ns]"),
|
||||
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
|
||||
pd.PeriodDtype("ns"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ldtype", dtlike_dtypes)
|
||||
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
|
||||
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
|
||||
|
||||
vals = np.tile(3600 * 10**9 * np.arange(3), 2)
|
||||
|
||||
def construct(dtype):
|
||||
if dtype is dtlike_dtypes[-1]:
|
||||
# PeriodArray will try to cast ints to strings
|
||||
return DatetimeIndex(vals).astype(dtype)
|
||||
return Index(vals, dtype=dtype)
|
||||
|
||||
left = construct(ldtype)
|
||||
right = construct(rdtype)
|
||||
|
||||
result = left.get_indexer_non_unique(right)
|
||||
|
||||
if ldtype is rdtype:
|
||||
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
|
||||
ex2 = np.array([], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result[0], ex1)
|
||||
tm.assert_numpy_array_equal(result[1], ex2)
|
||||
|
||||
else:
|
||||
no_matches = np.array([-1] * 6, dtype=np.intp)
|
||||
missing = np.arange(6, dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result[0], no_matches)
|
||||
tm.assert_numpy_array_equal(result[1], missing)
|
||||
@@ -0,0 +1,46 @@
|
||||
from pandas import (
|
||||
Index,
|
||||
NaT,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
def test_is_monotonic_with_nat():
|
||||
# GH#31437
|
||||
# PeriodIndex.is_monotonic should behave analogously to DatetimeIndex,
|
||||
# in particular never be monotonic when we have NaT
|
||||
dti = date_range("2016-01-01", periods=3)
|
||||
pi = dti.to_period("D")
|
||||
tdi = Index(dti.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert obj.is_monotonic
|
||||
assert obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
|
||||
dti1 = dti.insert(0, NaT)
|
||||
pi1 = dti1.to_period("D")
|
||||
tdi1 = Index(dti1.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert not obj.is_monotonic
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
|
||||
dti2 = dti.insert(3, NaT)
|
||||
pi2 = dti2.to_period("H")
|
||||
tdi2 = Index(dti2.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert not obj.is_monotonic
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
@@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class NATests:
|
||||
def test_nat(self, index_without_na):
|
||||
empty_index = index_without_na[:0]
|
||||
|
||||
index_with_na = index_without_na.copy(deep=True)
|
||||
index_with_na._data[1] = NaT
|
||||
|
||||
assert empty_index._na_value is NaT
|
||||
assert index_with_na._na_value is NaT
|
||||
assert index_without_na._na_value is NaT
|
||||
|
||||
idx = index_without_na
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
||||
assert idx.hasnans is False
|
||||
|
||||
idx = index_with_na
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
||||
assert idx.hasnans is True
|
||||
|
||||
|
||||
class TestDatetimeIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
|
||||
|
||||
|
||||
class TestTimedeltaIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self):
|
||||
return TimedeltaIndex(["1 days", "2 days"])
|
||||
|
||||
|
||||
class TestPeriodIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self):
|
||||
return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
@@ -0,0 +1,317 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def check_freq_ascending(ordered, orig, ascending):
|
||||
"""
|
||||
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
||||
when the original index is generated (or generate-able) with
|
||||
period_range/date_range/timedelta_range.
|
||||
"""
|
||||
if isinstance(ordered, PeriodIndex):
|
||||
assert ordered.freq == orig.freq
|
||||
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
||||
if ascending:
|
||||
assert ordered.freq.n == orig.freq.n
|
||||
else:
|
||||
assert ordered.freq.n == -1 * orig.freq.n
|
||||
|
||||
|
||||
def check_freq_nonmonotonic(ordered, orig):
|
||||
"""
|
||||
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
||||
when the original index is _not_ generated (or generate-able) with
|
||||
period_range/date_range//timedelta_range.
|
||||
"""
|
||||
if isinstance(ordered, PeriodIndex):
|
||||
assert ordered.freq == orig.freq
|
||||
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
||||
assert ordered.freq is None
|
||||
|
||||
|
||||
class TestSortValues:
|
||||
@pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
|
||||
def non_monotonic_idx(self, request):
|
||||
if request.param is DatetimeIndex:
|
||||
return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
elif request.param is PeriodIndex:
|
||||
dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
return dti.to_period("D")
|
||||
else:
|
||||
return TimedeltaIndex(
|
||||
["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
|
||||
)
|
||||
|
||||
def test_argmin_argmax(self, non_monotonic_idx):
|
||||
assert non_monotonic_idx.argmin() == 1
|
||||
assert non_monotonic_idx.argmax() == 0
|
||||
|
||||
def test_sort_values(self, non_monotonic_idx):
|
||||
idx = non_monotonic_idx
|
||||
ordered = idx.sort_values()
|
||||
assert ordered.is_monotonic
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
assert ordered[::-1].is_monotonic
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True)
|
||||
assert ordered.is_monotonic
|
||||
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
assert ordered[::-1].is_monotonic
|
||||
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
|
||||
|
||||
def check_sort_values_with_freq(self, idx):
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
check_freq_ascending(ordered, idx, True)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_ascending(ordered, idx, False)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
|
||||
check_freq_ascending(ordered, idx, True)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
|
||||
check_freq_ascending(ordered, idx, False)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "H"])
|
||||
def test_sort_values_with_freq_timedeltaindex(self, freq):
|
||||
# GH#10295
|
||||
idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
|
||||
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
DatetimeIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
|
||||
),
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="H",
|
||||
name="tzidx",
|
||||
tz="Asia/Tokyo",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_with_freq_datetimeindex(self, idx):
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "2D", "4D"])
|
||||
def test_sort_values_with_freq_periodindex(self, freq):
|
||||
# here with_freq refers to being period_range-like
|
||||
idx = PeriodIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
|
||||
)
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A"),
|
||||
Index([2011, 2012, 2013], name="idx"), # for compatibility check
|
||||
],
|
||||
)
|
||||
def test_sort_values_with_freq_periodindex2(self, idx):
|
||||
# here with_freq indicates this is period_range-like
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
def check_sort_values_without_freq(self, idx, expected):
|
||||
|
||||
ordered = idx.sort_values(na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
if not idx.isna().any():
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
if not idx.isna().any():
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
|
||||
exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
def test_sort_values_without_freq_timedeltaindex(self):
|
||||
# GH#10295
|
||||
|
||||
idx = TimedeltaIndex(
|
||||
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
|
||||
)
|
||||
expected = TimedeltaIndex(
|
||||
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
|
||||
)
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_dates,expected_dates",
|
||||
[
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_without_freq_datetimeindex(
|
||||
self, index_dates, expected_dates, tz_naive_fixture
|
||||
):
|
||||
tz = tz_naive_fixture
|
||||
|
||||
# without freq
|
||||
idx = DatetimeIndex(index_dates, tz=tz, name="idx")
|
||||
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
|
||||
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,expected",
|
||||
[
|
||||
(
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
"2011-01-02",
|
||||
"2011-01-01",
|
||||
],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
),
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-01",
|
||||
"2011-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
"2011-01-02",
|
||||
"2011-01-01",
|
||||
],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
),
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-01",
|
||||
"2011-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
||||
freq="D",
|
||||
name="idx3",
|
||||
),
|
||||
PeriodIndex(
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
freq="D",
|
||||
name="idx3",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
|
||||
),
|
||||
PeriodIndex(
|
||||
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
|
||||
),
|
||||
),
|
||||
(
|
||||
# For compatibility check
|
||||
Index([2011, 2013, 2015, 2012, 2011], name="idx"),
|
||||
Index([2011, 2011, 2012, 2013, 2015], name="idx"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_without_freq_periodindex(self, idx, expected):
|
||||
# here without_freq means not generateable by period_range
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
def test_sort_values_without_freq_periodindex_nat(self):
|
||||
# doesn't quite fit into check_sort_values_without_freq
|
||||
idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
|
||||
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
|
||||
|
||||
ordered = idx.sort_values(na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
|
||||
def test_order_stability_compat():
|
||||
# GH#35922. sort_values is stable both for normal and datetime-like Index
|
||||
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A")
|
||||
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
|
||||
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
|
||||
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_numpy_array_equal(indexer1, indexer2)
|
||||
@@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestValueCounts:
|
||||
# GH#7735
|
||||
|
||||
def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
orig = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def test_value_counts_unique_timedeltaindex(self):
|
||||
orig = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def test_value_counts_unique_periodindex(self):
|
||||
orig = period_range("2011-01-01 09:00", freq="H", periods=10)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def _check_value_counts_with_repeats(self, orig):
|
||||
# create repeated values, 'n'th element is repeated by n+1 times
|
||||
idx = type(orig)(
|
||||
np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
|
||||
)
|
||||
|
||||
exp_idx = orig[::-1]
|
||||
if not isinstance(exp_idx, PeriodIndex):
|
||||
exp_idx = exp_idx._with_freq(None)
|
||||
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), orig)
|
||||
|
||||
def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
idx = DatetimeIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
tz=tz,
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def test_value_counts_unique_timedeltaindex2(self):
|
||||
idx = TimedeltaIndex(
|
||||
[
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 08:00:00",
|
||||
"1 days 08:00:00",
|
||||
NaT,
|
||||
]
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def test_value_counts_unique_periodindex2(self):
|
||||
idx = PeriodIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
freq="H",
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def _check_value_counts_dropna(self, idx):
|
||||
exp_idx = idx[[2, 3]]
|
||||
expected = Series([3, 2], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
exp_idx = idx[[2, 3, -1]]
|
||||
expected = Series([3, 2, 1], index=exp_idx)
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), exp_idx)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user