first commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,171 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import period_range
|
||||
|
||||
# The various methods we support
|
||||
downsample_methods = [
|
||||
"min",
|
||||
"max",
|
||||
"first",
|
||||
"last",
|
||||
"sum",
|
||||
"mean",
|
||||
"sem",
|
||||
"median",
|
||||
"prod",
|
||||
"var",
|
||||
"std",
|
||||
"ohlc",
|
||||
"quantile",
|
||||
]
|
||||
upsample_methods = ["count", "size"]
|
||||
series_methods = ["nunique"]
|
||||
resample_methods = downsample_methods + upsample_methods + series_methods
|
||||
|
||||
|
||||
@pytest.fixture(params=downsample_methods)
|
||||
def downsample_method(request):
|
||||
"""Fixture for parametrization of Grouper downsample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=resample_methods)
|
||||
def resample_method(request):
|
||||
"""Fixture for parametrization of Grouper resample methods."""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_date_range_series():
|
||||
"""
|
||||
Series with date range index and random data for test purposes.
|
||||
"""
|
||||
|
||||
def _simple_date_range_series(start, end, freq="D"):
|
||||
rng = date_range(start, end, freq=freq)
|
||||
return Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
return _simple_date_range_series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def simple_period_range_series():
|
||||
"""
|
||||
Series with period range index and random data for test purposes.
|
||||
"""
|
||||
|
||||
def _simple_period_range_series(start, end, freq="D"):
|
||||
rng = period_range(start, end, freq=freq)
|
||||
return Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
return _simple_period_range_series
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_start():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 1)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_end():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return datetime(2005, 1, 10)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_freq():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return "D"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _index_name():
|
||||
"""Fixture for parametrization of index, series and frame."""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def index(_index_factory, _index_start, _index_end, _index_freq, _index_name):
|
||||
"""
|
||||
Fixture for parametrization of date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _static_values(index):
|
||||
"""
|
||||
Fixture for parametrization of values used in parametrization of
|
||||
Series and DataFrames with date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return np.arange(len(index))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _series_name():
|
||||
"""
|
||||
Fixture for parametrization of Series name for Series used with
|
||||
date_range, period_range and timedelta_range indexes
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def series(index, _series_name, _static_values):
|
||||
"""
|
||||
Fixture for parametrization of Series with date_range, period_range and
|
||||
timedelta_range indexes
|
||||
"""
|
||||
return Series(_static_values, index=index, name=_series_name)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_series_dti(series):
|
||||
"""
|
||||
Fixture for parametrization of empty Series with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
return series[:0]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def frame(index, _series_name, _static_values):
|
||||
"""
|
||||
Fixture for parametrization of DataFrame with date_range, period_range
|
||||
and timedelta_range indexes
|
||||
"""
|
||||
# _series_name is intentionally unused
|
||||
return DataFrame({"value": _static_values}, index=index)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_frame_dti(series):
|
||||
"""
|
||||
Fixture for parametrization of empty DataFrame with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
index = series.index[:0]
|
||||
return DataFrame(index=index)
|
||||
|
||||
|
||||
@pytest.fixture(params=[Series, DataFrame])
|
||||
def series_and_frame(request, series, frame):
|
||||
"""
|
||||
Fixture for parametrization of Series and DataFrame with date_range,
|
||||
period_range and timedelta_range indexes
|
||||
"""
|
||||
if request.param == Series:
|
||||
return series
|
||||
if request.param == DataFrame:
|
||||
return frame
|
||||
@@ -0,0 +1,256 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.groupby import DataError
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import period_range
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
from pandas.core.resample import _asfreq_compat
|
||||
|
||||
# a fixture value can be overridden by the test parameter value. Note that the
|
||||
# value of the fixture can be overridden this way even if the test doesn't use
|
||||
# it directly (doesn't mention it in the function prototype).
|
||||
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa:E501
|
||||
# in this module we override the fixture values defined in conftest.py
|
||||
# tuples of '_index_factory,_series_name,_index_start,_index_end'
|
||||
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
|
||||
|
||||
all_ts = pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end",
|
||||
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_index(_index_factory):
|
||||
def _create_index(*args, **kwargs):
|
||||
"""return the _index_factory created using the args, kwargs"""
|
||||
return _index_factory(*args, **kwargs)
|
||||
|
||||
return _create_index
|
||||
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2D", "1H"])
|
||||
@pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq(series_and_frame, freq, create_index):
|
||||
obj = series_and_frame
|
||||
|
||||
result = obj.resample(freq).asfreq()
|
||||
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
|
||||
expected = obj.reindex(new_index)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
||||
)
|
||||
def test_asfreq_fill_value(series, create_index):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
s = series
|
||||
|
||||
result = s.resample("1H").asfreq()
|
||||
new_index = create_index(s.index[0], s.index[-1], freq="1H")
|
||||
expected = s.reindex(new_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
frame = s.to_frame("value")
|
||||
frame.iloc[1] = None
|
||||
result = frame.resample("1H").asfreq(fill_value=4.0)
|
||||
new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
|
||||
expected = frame.reindex(new_index, fill_value=4.0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_interpolate(frame):
|
||||
# # 12925
|
||||
df = frame
|
||||
tm.assert_frame_equal(
|
||||
df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate()
|
||||
)
|
||||
|
||||
|
||||
def test_raises_on_non_datetimelike_index():
|
||||
# this is a non datetimelike index
|
||||
xp = DataFrame()
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
|
||||
"but got an instance of 'Index'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
xp.resample("A").mean()
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
||||
def test_resample_empty_series(freq, empty_series_dti, resample_method):
|
||||
# GH12771 & GH12868
|
||||
|
||||
if resample_method == "ohlc":
|
||||
pytest.skip("need to test for ohlc from GH13083")
|
||||
|
||||
s = empty_series_dti
|
||||
result = getattr(s.resample(freq), resample_method)()
|
||||
|
||||
expected = s.copy()
|
||||
expected.index = _asfreq_compat(s.index, freq)
|
||||
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
||||
def test_resample_nat_index_series(request, freq, series, resample_method):
|
||||
# GH39227
|
||||
|
||||
if freq == "M":
|
||||
request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))
|
||||
|
||||
s = series.copy()
|
||||
s.index = PeriodIndex([NaT] * len(s), freq=freq)
|
||||
result = getattr(s.resample(freq), resample_method)()
|
||||
|
||||
if resample_method == "ohlc":
|
||||
expected = DataFrame(
|
||||
[], index=s.index[:0].copy(), columns=["open", "high", "low", "close"]
|
||||
)
|
||||
tm.assert_frame_equal(result, expected, check_dtype=False)
|
||||
else:
|
||||
expected = s[:0].copy()
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
||||
@pytest.mark.parametrize("resample_method", ["count", "size"])
|
||||
def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
|
||||
# GH28427
|
||||
result = getattr(empty_series_dti.resample(freq), resample_method)()
|
||||
|
||||
index = _asfreq_compat(empty_series_dti.index, freq)
|
||||
|
||||
expected = Series([], dtype="int64", index=index, name=empty_series_dti.name)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
||||
def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
|
||||
# GH13212
|
||||
df = empty_frame_dti
|
||||
# count retains dimensions too
|
||||
result = getattr(df.resample(freq), resample_method)()
|
||||
if resample_method != "size":
|
||||
expected = df.copy()
|
||||
else:
|
||||
# GH14962
|
||||
expected = Series([], dtype=object)
|
||||
|
||||
expected.index = _asfreq_compat(df.index, freq)
|
||||
|
||||
tm.assert_index_equal(result.index, expected.index)
|
||||
assert result.index.freq == expected.index.freq
|
||||
tm.assert_almost_equal(result, expected, check_dtype=False)
|
||||
|
||||
# test size for GH13212 (currently stays as df)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
||||
def test_resample_count_empty_dataframe(freq, empty_frame_dti):
|
||||
# GH28427
|
||||
|
||||
empty_frame_dti["a"] = []
|
||||
|
||||
result = empty_frame_dti.resample(freq).count()
|
||||
|
||||
index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
|
||||
expected = DataFrame({"a": []}, dtype="int64", index=index)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
||||
def test_resample_size_empty_dataframe(freq, empty_frame_dti):
|
||||
# GH28427
|
||||
|
||||
empty_frame_dti["a"] = []
|
||||
|
||||
result = empty_frame_dti.resample(freq).size()
|
||||
|
||||
index = _asfreq_compat(empty_frame_dti.index, freq)
|
||||
|
||||
expected = Series([], dtype="int64", index=index)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
|
||||
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
|
||||
def test_resample_empty_dtypes(index, dtype, resample_method):
|
||||
# Empty series were sometimes causing a segfault (for the functions
|
||||
# with Cython bounds-checking disabled) or an IndexError. We just run
|
||||
# them to ensure they no longer do. (GH #10228)
|
||||
empty_series_dti = Series([], index, dtype)
|
||||
try:
|
||||
getattr(empty_series_dti.resample("d"), resample_method)()
|
||||
except DataError:
|
||||
# Ignore these since some combinations are invalid
|
||||
# (ex: doing mean with dtype of np.object_)
|
||||
pass
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_apply_to_empty_series(empty_series_dti):
|
||||
# GH 14313
|
||||
s = empty_series_dti
|
||||
for freq in ["M", "D", "H"]:
|
||||
result = s.resample(freq).apply(lambda x: 1)
|
||||
expected = s.resample(freq).apply(np.sum)
|
||||
|
||||
tm.assert_series_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resampler_is_iterable(series):
|
||||
# GH 15314
|
||||
freq = "H"
|
||||
tg = Grouper(freq=freq, convention="start")
|
||||
grouped = series.groupby(tg)
|
||||
resampled = series.resample(freq)
|
||||
for (rk, rv), (gk, gv) in zip(resampled, grouped):
|
||||
assert rk == gk
|
||||
tm.assert_series_equal(rv, gv)
|
||||
|
||||
|
||||
@all_ts
|
||||
def test_resample_quantile(series):
|
||||
# GH 15023
|
||||
s = series
|
||||
q = 0.75
|
||||
freq = "H"
|
||||
result = s.resample(freq).quantile(q)
|
||||
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
|
||||
tm.assert_series_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,316 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import (
|
||||
PeriodIndex,
|
||||
period_range,
|
||||
)
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
|
||||
from pandas.tseries.offsets import (
|
||||
BDay,
|
||||
Minute,
|
||||
)
|
||||
|
||||
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
||||
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
|
||||
|
||||
all_ts = pytest.mark.parametrize(
|
||||
"_index_factory,_series_name,_index_start,_index_end",
|
||||
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def _index_factory():
|
||||
return period_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def create_index(_index_factory):
|
||||
def _create_index(*args, **kwargs):
|
||||
"""return the _index_factory created using the args, kwargs"""
|
||||
return _index_factory(*args, **kwargs)
|
||||
|
||||
return _create_index
|
||||
|
||||
|
||||
# new test to check that all FutureWarning are triggered
|
||||
def test_deprecating_on_loffset_and_base():
|
||||
# GH 31809
|
||||
|
||||
idx = date_range("2001-01-01", periods=4, freq="T")
|
||||
df = DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"])
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Grouper(freq="10s", base=0)
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
pd.Grouper(freq="10s", loffset="0s")
|
||||
|
||||
# not checking the stacklevel for .groupby().resample() because it's complicated to
|
||||
# reconcile it with the stacklevel for Series.resample() and DataFrame.resample();
|
||||
# see GH #37603
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.groupby("a").resample("3T", base=0).sum()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.groupby("a").resample("3T", loffset="0s").sum()
|
||||
msg = "'offset' and 'base' cannot be present at the same time"
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.groupby("a").resample("3T", base=0, offset=0).sum()
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.resample("3T", base=0).sum()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
df.resample("3T", loffset="0s").sum()
|
||||
|
||||
|
||||
@all_ts
|
||||
@pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]])
|
||||
def test_resample_loffset_arg_type(frame, create_index, arg):
|
||||
# GH 13218, 15002
|
||||
df = frame
|
||||
expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)]
|
||||
expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D")
|
||||
|
||||
# loffset coerces PeriodIndex to DateTimeIndex
|
||||
if isinstance(expected_index, PeriodIndex):
|
||||
expected_index = expected_index.to_timestamp()
|
||||
|
||||
expected_index += timedelta(hours=2)
|
||||
expected = DataFrame({"value": expected_means}, index=expected_index)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result_agg = df.resample("2D", loffset="2H").agg(arg)
|
||||
|
||||
if isinstance(arg, list):
|
||||
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
|
||||
|
||||
tm.assert_frame_equal(result_agg, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"loffset", [timedelta(minutes=1), "1min", Minute(1), np.timedelta64(1, "m")]
|
||||
)
|
||||
def test_resample_loffset(loffset):
|
||||
# GH 7687
|
||||
rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
|
||||
s = Series(np.random.randn(14), index=rng)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.resample(
|
||||
"5min", closed="right", label="right", loffset=loffset
|
||||
).mean()
|
||||
idx = date_range("1/1/2000", periods=4, freq="5min")
|
||||
expected = Series(
|
||||
[s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
|
||||
index=idx + timedelta(minutes=1),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.index.freq == Minute(5)
|
||||
|
||||
# from daily
|
||||
dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D")
|
||||
ser = Series(np.random.rand(len(dti)), dti)
|
||||
|
||||
# to weekly
|
||||
result = ser.resample("w-sun").last()
|
||||
business_day_offset = BDay()
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
expected = ser.resample("w-sun", loffset=-business_day_offset).last()
|
||||
assert result.index[0] - business_day_offset == expected.index[0]
|
||||
|
||||
|
||||
def test_resample_loffset_upsample():
|
||||
# GH 20744
|
||||
rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min")
|
||||
s = Series(np.random.randn(14), index=rng)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.resample(
|
||||
"5min", closed="right", label="right", loffset=timedelta(minutes=1)
|
||||
).ffill()
|
||||
idx = date_range("1/1/2000", periods=4, freq="5min")
|
||||
expected = Series([s[0], s[5], s[10], s[-1]], index=idx + timedelta(minutes=1))
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_loffset_count():
|
||||
# GH 12725
|
||||
start_time = "1/1/2000 00:00:00"
|
||||
rng = date_range(start_time, periods=100, freq="S")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ts.resample("10S", loffset="1s").count()
|
||||
|
||||
expected_index = date_range(start_time, periods=10, freq="10S") + timedelta(
|
||||
seconds=1
|
||||
)
|
||||
expected = Series(10, index=expected_index)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Same issue should apply to .size() since it goes through
|
||||
# same code path
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = ts.resample("10S", loffset="1s").size()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_base():
|
||||
rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
resampled = ts.resample("5min", base=2).mean()
|
||||
exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min")
|
||||
tm.assert_index_equal(resampled.index, exp_rng)
|
||||
|
||||
|
||||
def test_resample_float_base():
|
||||
# GH25161
|
||||
dt = pd.to_datetime(
|
||||
["2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51", "2018-11-26 16:17:45.51"]
|
||||
)
|
||||
s = Series(np.arange(3), index=dt)
|
||||
|
||||
base = 17 + 43.51 / 60
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.resample("3min", base=base).size()
|
||||
expected = Series(
|
||||
3, index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"], freq="3min")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
|
||||
@pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]])
|
||||
def test_loffset_returns_datetimeindex(frame, kind, agg_arg):
|
||||
# make sure passing loffset returns DatetimeIndex in all cases
|
||||
# basic method taken from Base.test_resample_loffset_arg_type()
|
||||
df = frame
|
||||
expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)]
|
||||
expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq="2D")
|
||||
|
||||
# loffset coerces PeriodIndex to DateTimeIndex
|
||||
expected_index = expected_index.to_timestamp()
|
||||
expected_index += timedelta(hours=2)
|
||||
expected = DataFrame({"value": expected_means}, index=expected_index)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg)
|
||||
if isinstance(agg_arg, list):
|
||||
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
|
||||
tm.assert_frame_equal(result_agg, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start,end,start_freq,end_freq,base,offset",
|
||||
[
|
||||
("19910905", "19910909 03:00", "H", "24H", 10, "10H"),
|
||||
("19910905", "19910909 12:00", "H", "24H", 10, "10H"),
|
||||
("19910905", "19910909 23:00", "H", "24H", 10, "10H"),
|
||||
("19910905 10:00", "19910909", "H", "24H", 10, "10H"),
|
||||
("19910905 10:00", "19910909 10:00", "H", "24H", 10, "10H"),
|
||||
("19910905", "19910909 10:00", "H", "24H", 10, "10H"),
|
||||
("19910905 12:00", "19910909", "H", "24H", 10, "10H"),
|
||||
("19910905 12:00", "19910909 03:00", "H", "24H", 10, "10H"),
|
||||
("19910905 12:00", "19910909 12:00", "H", "24H", 10, "10H"),
|
||||
("19910905 12:00", "19910909 12:00", "H", "24H", 34, "34H"),
|
||||
("19910905 12:00", "19910909 12:00", "H", "17H", 10, "10H"),
|
||||
("19910905 12:00", "19910909 12:00", "H", "17H", 3, "3H"),
|
||||
("19910905 12:00", "19910909 1:00", "H", "M", 3, "3H"),
|
||||
("19910905", "19910913 06:00", "2H", "24H", 10, "10H"),
|
||||
("19910905", "19910905 01:39", "Min", "5Min", 3, "3Min"),
|
||||
("19910905", "19910905 03:18", "2Min", "5Min", 3, "3Min"),
|
||||
],
|
||||
)
|
||||
def test_resample_with_non_zero_base(start, end, start_freq, end_freq, base, offset):
|
||||
# GH 23882
|
||||
s = Series(0, index=period_range(start, end, freq=start_freq))
|
||||
s = s + np.arange(len(s))
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
result = s.resample(end_freq, base=base).mean()
|
||||
result = result.to_timestamp(end_freq)
|
||||
|
||||
# test that the replacement argument 'offset' works
|
||||
result_offset = s.resample(end_freq, offset=offset).mean()
|
||||
result_offset = result_offset.to_timestamp(end_freq)
|
||||
tm.assert_series_equal(result, result_offset)
|
||||
|
||||
# to_timestamp casts 24H -> D
|
||||
result = result.asfreq(end_freq) if end_freq == "24H" else result
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
expected = s.to_timestamp().resample(end_freq, base=base).mean()
|
||||
if end_freq == "M":
|
||||
# TODO: is non-tick the relevant characteristic? (GH 33815)
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_base_with_timedeltaindex():
|
||||
# GH 10530
|
||||
rng = timedelta_range(start="0s", periods=25, freq="s")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning):
|
||||
with_base = ts.resample("2s", base=5).mean()
|
||||
without_base = ts.resample("2s").mean()
|
||||
|
||||
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
|
||||
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
|
||||
|
||||
tm.assert_index_equal(without_base.index, exp_without_base)
|
||||
tm.assert_index_equal(with_base.index, exp_with_base)
|
||||
|
||||
|
||||
def test_interpolate_posargs_deprecation():
|
||||
# GH 41485
|
||||
idx = pd.to_datetime(["1992-08-27 07:46:48", "1992-08-27 07:46:59"])
|
||||
s = Series([1, 4], index=idx)
|
||||
|
||||
msg = (
|
||||
r"In a future version of pandas all arguments of Resampler\.interpolate "
|
||||
r"except for the argument 'method' will be keyword-only"
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.resample("3s").interpolate("linear", 0)
|
||||
|
||||
idx = pd.to_datetime(
|
||||
[
|
||||
"1992-08-27 07:46:48",
|
||||
"1992-08-27 07:46:51",
|
||||
"1992-08-27 07:46:54",
|
||||
"1992-08-27 07:46:57",
|
||||
]
|
||||
)
|
||||
expected = Series([1.0, 1.0, 1.0, 1.0], index=idx)
|
||||
|
||||
expected.index._data.freq = "3s"
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pad_backfill_deprecation():
|
||||
# GH 33396
|
||||
s = Series([1, 2, 3], index=date_range("20180101", periods=3, freq="h"))
|
||||
with tm.assert_produces_warning(FutureWarning, match="backfill"):
|
||||
s.resample("30min").backfill()
|
||||
with tm.assert_produces_warning(FutureWarning, match="pad"):
|
||||
s.resample("30min").pad()
|
||||
@@ -0,0 +1,878 @@
|
||||
from datetime import datetime
|
||||
|
||||
import dateutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.ccalendar import (
|
||||
DAYS,
|
||||
MONTHS,
|
||||
)
|
||||
from pandas._libs.tslibs.period import IncompatibleFrequency
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
from pandas.core.indexes.period import (
|
||||
Period,
|
||||
PeriodIndex,
|
||||
period_range,
|
||||
)
|
||||
from pandas.core.resample import _get_period_range_edges
|
||||
|
||||
import pandas.tseries.offsets as offsets
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def _index_factory():
|
||||
return period_range
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _series_name():
|
||||
return "pi"
|
||||
|
||||
|
||||
class TestPeriodIndex:
|
||||
@pytest.mark.parametrize("freq", ["2D", "1H", "2H"])
|
||||
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
|
||||
def test_asfreq(self, series_and_frame, freq, kind):
|
||||
# GH 12884, 15944
|
||||
# make sure .asfreq() returns PeriodIndex (except kind='timestamp')
|
||||
|
||||
obj = series_and_frame
|
||||
if kind == "timestamp":
|
||||
expected = obj.to_timestamp().resample(freq).asfreq()
|
||||
else:
|
||||
start = obj.index[0].to_timestamp(how="start")
|
||||
end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start")
|
||||
new_index = date_range(start=start, end=end, freq=freq, inclusive="left")
|
||||
expected = obj.to_timestamp().reindex(new_index).to_period(freq)
|
||||
result = obj.resample(freq, kind=kind).asfreq()
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_asfreq_fill_value(self, series):
|
||||
# test for fill value during resampling, issue 3715
|
||||
|
||||
s = series
|
||||
new_index = date_range(
|
||||
s.index[0].to_timestamp(how="start"),
|
||||
(s.index[-1]).to_timestamp(how="start"),
|
||||
freq="1H",
|
||||
)
|
||||
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
|
||||
result = s.resample("1H", kind="timestamp").asfreq(fill_value=4.0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
frame = s.to_frame("value")
|
||||
new_index = date_range(
|
||||
frame.index[0].to_timestamp(how="start"),
|
||||
(frame.index[-1]).to_timestamp(how="start"),
|
||||
freq="1H",
|
||||
)
|
||||
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
|
||||
result = frame.resample("1H", kind="timestamp").asfreq(fill_value=3.0)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["H", "12H", "2D", "W"])
|
||||
@pytest.mark.parametrize("kind", [None, "period", "timestamp"])
|
||||
@pytest.mark.parametrize("kwargs", [{"on": "date"}, {"level": "d"}])
|
||||
def test_selection(self, index, freq, kind, kwargs):
|
||||
# This is a bug, these should be implemented
|
||||
# GH 14008
|
||||
rng = np.arange(len(index), dtype=np.int64)
|
||||
df = DataFrame(
|
||||
{"date": index, "a": rng},
|
||||
index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
|
||||
)
|
||||
msg = (
|
||||
"Resampling from level= or on= selection with a PeriodIndex is "
|
||||
r"not currently supported, use \.set_index\(\.\.\.\) to "
|
||||
"explicitly set index"
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
df.resample(freq, kind=kind, **kwargs)
|
||||
|
||||
@pytest.mark.parametrize("month", MONTHS)
|
||||
@pytest.mark.parametrize("meth", ["ffill", "bfill"])
|
||||
@pytest.mark.parametrize("conv", ["start", "end"])
|
||||
@pytest.mark.parametrize("targ", ["D", "B", "M"])
|
||||
def test_annual_upsample_cases(
|
||||
self, targ, conv, meth, month, simple_period_range_series
|
||||
):
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"A-{month}")
|
||||
|
||||
result = getattr(ts.resample(targ, convention=conv), meth)()
|
||||
expected = result.to_timestamp(targ, how=conv)
|
||||
expected = expected.asfreq(targ, meth).to_period()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_basic_downsample(self, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
|
||||
result = ts.resample("a-dec").mean()
|
||||
|
||||
expected = ts.groupby(ts.index.year).mean()
|
||||
expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# this is ok
|
||||
tm.assert_series_equal(ts.resample("a-dec").mean(), result)
|
||||
tm.assert_series_equal(ts.resample("a").mean(), result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"rule,expected_error_msg",
|
||||
[
|
||||
("a-dec", "<YearEnd: month=12>"),
|
||||
("q-mar", "<QuarterEnd: startingMonth=3>"),
|
||||
("M", "<MonthEnd>"),
|
||||
("w-thu", "<Week: weekday=3>"),
|
||||
],
|
||||
)
|
||||
def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg):
|
||||
# These are incompatible period rules for resampling
|
||||
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed")
|
||||
msg = (
|
||||
"Frequency <Week: weekday=2> cannot be resampled to "
|
||||
f"{expected_error_msg}, as they are not sub or super periods"
|
||||
)
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
ts.resample(rule).mean()
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "2D"])
|
||||
def test_basic_upsample(self, freq, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
|
||||
result = ts.resample("a-dec").mean()
|
||||
|
||||
resampled = result.resample(freq, convention="end").ffill()
|
||||
expected = result.to_timestamp(freq, how="end")
|
||||
expected = expected.asfreq(freq, "ffill").to_period(freq)
|
||||
tm.assert_series_equal(resampled, expected)
|
||||
|
||||
def test_upsample_with_limit(self):
|
||||
rng = period_range("1/1/2000", periods=5, freq="A")
|
||||
ts = Series(np.random.randn(len(rng)), rng)
|
||||
|
||||
result = ts.resample("M", convention="end").ffill(limit=2)
|
||||
expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_annual_upsample(self, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC")
|
||||
df = DataFrame({"a": ts})
|
||||
rdf = df.resample("D").ffill()
|
||||
exp = df["a"].resample("D").ffill()
|
||||
tm.assert_series_equal(rdf["a"], exp)
|
||||
|
||||
rng = period_range("2000", "2003", freq="A-DEC")
|
||||
ts = Series([1, 2, 3, 4], index=rng)
|
||||
|
||||
result = ts.resample("M").ffill()
|
||||
ex_index = period_range("2000-01", "2003-12", freq="M")
|
||||
|
||||
expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("month", MONTHS)
|
||||
@pytest.mark.parametrize("target", ["D", "B", "M"])
|
||||
@pytest.mark.parametrize("convention", ["start", "end"])
|
||||
def test_quarterly_upsample(
|
||||
self, month, target, convention, simple_period_range_series
|
||||
):
|
||||
freq = f"Q-{month}"
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, "ffill").to_period()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("target", ["D", "B"])
|
||||
@pytest.mark.parametrize("convention", ["start", "end"])
|
||||
def test_monthly_upsample(self, target, convention, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, "ffill").to_period()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_basic(self):
|
||||
# GH3609
|
||||
s = Series(
|
||||
range(100),
|
||||
index=date_range("20130101", freq="s", periods=100, name="idx"),
|
||||
dtype="float",
|
||||
)
|
||||
s[10:30] = np.nan
|
||||
index = PeriodIndex(
|
||||
[Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")],
|
||||
name="idx",
|
||||
)
|
||||
expected = Series([34.5, 79.5], index=index)
|
||||
result = s.to_period().resample("T", kind="period").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
result2 = s.resample("T", kind="period").mean()
|
||||
tm.assert_series_equal(result2, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])]
|
||||
)
|
||||
def test_resample_count(self, freq, expected_vals):
|
||||
# GH12774
|
||||
series = Series(1, index=period_range(start="2000", periods=100))
|
||||
result = series.resample(freq).count()
|
||||
expected_index = period_range(
|
||||
start="2000", freq=freq, periods=len(expected_vals)
|
||||
)
|
||||
expected = Series(expected_vals, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_same_freq(self, resample_method):
|
||||
|
||||
# GH12770
|
||||
series = Series(range(3), index=period_range(start="2000", periods=3, freq="M"))
|
||||
expected = series
|
||||
|
||||
result = getattr(series.resample("M"), resample_method)()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_incompat_freq(self):
|
||||
msg = (
|
||||
"Frequency <MonthEnd> cannot be resampled to <Week: weekday=6>, "
|
||||
"as they are not sub or super periods"
|
||||
)
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
Series(
|
||||
range(3), index=period_range(start="2000", periods=3, freq="M")
|
||||
).resample("W").mean()
|
||||
|
||||
def test_with_local_timezone_pytz(self):
|
||||
# see gh-5430
|
||||
local_timezone = pytz.timezone("America/Los_Angeles")
|
||||
|
||||
start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc)
|
||||
# 1 day later
|
||||
end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc)
|
||||
|
||||
index = date_range(start, end, freq="H")
|
||||
|
||||
series = Series(1, index=index)
|
||||
series = series.tz_convert(local_timezone)
|
||||
result = series.resample("D", kind="period").mean()
|
||||
|
||||
# Create the expected series
|
||||
# Index is moved back a day with the timezone conversion from UTC to
|
||||
# Pacific
|
||||
expected_index = period_range(start=start, end=end, freq="D") - offsets.Day()
|
||||
expected = Series(1.0, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_with_pytz(self):
|
||||
# GH 13238
|
||||
s = Series(
|
||||
2, index=date_range("2017-01-01", periods=48, freq="H", tz="US/Eastern")
|
||||
)
|
||||
result = s.resample("D").mean()
|
||||
expected = Series(
|
||||
2.0,
|
||||
index=pd.DatetimeIndex(
|
||||
["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D"
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
# Especially assert that the timezone is LMT for pytz
|
||||
assert result.index.tz == pytz.timezone("US/Eastern")
|
||||
|
||||
def test_with_local_timezone_dateutil(self):
|
||||
# see gh-5430
|
||||
local_timezone = "dateutil/America/Los_Angeles"
|
||||
|
||||
start = datetime(
|
||||
year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
|
||||
)
|
||||
# 1 day later
|
||||
end = datetime(
|
||||
year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
|
||||
)
|
||||
|
||||
index = date_range(start, end, freq="H", name="idx")
|
||||
|
||||
series = Series(1, index=index)
|
||||
series = series.tz_convert(local_timezone)
|
||||
result = series.resample("D", kind="period").mean()
|
||||
|
||||
# Create the expected series
|
||||
# Index is moved back a day with the timezone conversion from UTC to
|
||||
# Pacific
|
||||
expected_index = (
|
||||
period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
|
||||
)
|
||||
expected = Series(1.0, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_nonexistent_time_bin_edge(self):
|
||||
# GH 19375
|
||||
index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15T")
|
||||
s = Series(np.zeros(len(index)), index=index)
|
||||
expected = s.tz_localize("US/Pacific")
|
||||
expected.index = pd.DatetimeIndex(expected.index, freq="900S")
|
||||
result = expected.resample("900S").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 23742
|
||||
index = date_range(start="2017-10-10", end="2017-10-20", freq="1H")
|
||||
index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo")
|
||||
df = DataFrame(data=list(range(len(index))), index=index)
|
||||
result = df.groupby(pd.Grouper(freq="1D")).count()
|
||||
expected = date_range(
|
||||
start="2017-10-09",
|
||||
end="2017-10-20",
|
||||
freq="D",
|
||||
tz="America/Sao_Paulo",
|
||||
nonexistent="shift_forward",
|
||||
inclusive="left",
|
||||
)
|
||||
tm.assert_index_equal(result.index, expected)
|
||||
|
||||
def test_resample_ambiguous_time_bin_edge(self):
|
||||
# GH 10117
|
||||
idx = date_range(
|
||||
"2014-10-25 22:00:00", "2014-10-26 00:30:00", freq="30T", tz="Europe/London"
|
||||
)
|
||||
expected = Series(np.zeros(len(idx)), index=idx)
|
||||
result = expected.resample("30T").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_fill_method_and_how_upsample(self):
|
||||
# GH2073
|
||||
s = Series(
|
||||
np.arange(9, dtype="int64"),
|
||||
index=date_range("2010-01-01", periods=9, freq="Q"),
|
||||
)
|
||||
last = s.resample("M").ffill()
|
||||
both = s.resample("M").ffill().resample("M").last().astype("int64")
|
||||
tm.assert_series_equal(last, both)
|
||||
|
||||
@pytest.mark.parametrize("day", DAYS)
|
||||
@pytest.mark.parametrize("target", ["D", "B"])
|
||||
@pytest.mark.parametrize("convention", ["start", "end"])
|
||||
def test_weekly_upsample(self, day, target, convention, simple_period_range_series):
|
||||
freq = f"W-{day}"
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
|
||||
result = ts.resample(target, convention=convention).ffill()
|
||||
expected = result.to_timestamp(target, how=convention)
|
||||
expected = expected.asfreq(target, "ffill").to_period()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_to_timestamps(self, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
|
||||
|
||||
result = ts.resample("A-DEC", kind="timestamp").mean()
|
||||
expected = ts.to_timestamp(how="start").resample("A-DEC").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_to_quarterly(self, simple_period_range_series):
|
||||
for month in MONTHS:
|
||||
ts = simple_period_range_series("1990", "1992", freq=f"A-{month}")
|
||||
quar_ts = ts.resample(f"Q-{month}").ffill()
|
||||
|
||||
stamps = ts.to_timestamp("D", how="start")
|
||||
qdates = period_range(
|
||||
ts.index[0].asfreq("D", "start"),
|
||||
ts.index[-1].asfreq("D", "end"),
|
||||
freq=f"Q-{month}",
|
||||
)
|
||||
|
||||
expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill")
|
||||
expected.index = qdates
|
||||
|
||||
tm.assert_series_equal(quar_ts, expected)
|
||||
|
||||
# conforms, but different month
|
||||
ts = simple_period_range_series("1990", "1992", freq="A-JUN")
|
||||
|
||||
for how in ["start", "end"]:
|
||||
result = ts.resample("Q-MAR", convention=how).ffill()
|
||||
expected = ts.asfreq("Q-MAR", how=how)
|
||||
expected = expected.reindex(result.index, method="ffill")
|
||||
|
||||
# .to_timestamp('D')
|
||||
# expected = expected.resample('Q-MAR').ffill()
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_fill_missing(self):
|
||||
rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A")
|
||||
|
||||
s = Series(np.random.randn(4), index=rng)
|
||||
|
||||
stamps = s.to_timestamp()
|
||||
filled = s.resample("A").ffill()
|
||||
expected = stamps.resample("A").ffill().to_period("A")
|
||||
tm.assert_series_equal(filled, expected)
|
||||
|
||||
def test_cant_fill_missing_dups(self):
|
||||
rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A")
|
||||
s = Series(np.random.randn(5), index=rng)
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
s.resample("A").ffill()
|
||||
|
||||
@pytest.mark.parametrize("freq", ["5min"])
|
||||
@pytest.mark.parametrize("kind", ["period", None, "timestamp"])
|
||||
def test_resample_5minute(self, freq, kind):
|
||||
rng = period_range("1/1/2000", "1/5/2000", freq="T")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
expected = ts.to_timestamp().resample(freq).mean()
|
||||
if kind != "timestamp":
|
||||
expected = expected.to_period(freq)
|
||||
result = ts.resample(freq, kind=kind).mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_upsample_daily_business_daily(self, simple_period_range_series):
|
||||
ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B")
|
||||
|
||||
result = ts.resample("D").asfreq()
|
||||
expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ts = simple_period_range_series("1/1/2000", "2/1/2000")
|
||||
result = ts.resample("H", convention="s").asfreq()
|
||||
exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="H")
|
||||
expected = ts.asfreq("H", how="s").reindex(exp_rng)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_irregular_sparse(self):
|
||||
dr = date_range(start="1/1/2012", freq="5min", periods=1000)
|
||||
s = Series(np.array(100), index=dr)
|
||||
# subset the data.
|
||||
subset = s[:"2012-01-04 06:55"]
|
||||
|
||||
result = subset.resample("10min").apply(len)
|
||||
expected = s.resample("10min").apply(len).loc[result.index]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_weekly_all_na(self):
|
||||
rng = date_range("1/1/2000", periods=10, freq="W-WED")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
result = ts.resample("W-THU").asfreq()
|
||||
|
||||
assert result.isna().all()
|
||||
|
||||
result = ts.resample("W-THU").asfreq().ffill()[:-1]
|
||||
expected = ts.asfreq("W-THU").ffill()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_resample_tz_localized(self):
|
||||
dr = date_range(start="2012-4-13", end="2012-5-1")
|
||||
ts = Series(range(len(dr)), index=dr)
|
||||
|
||||
ts_utc = ts.tz_localize("UTC")
|
||||
ts_local = ts_utc.tz_convert("America/Los_Angeles")
|
||||
|
||||
result = ts_local.resample("W").mean()
|
||||
|
||||
ts_local_naive = ts_local.copy()
|
||||
ts_local_naive.index = [
|
||||
x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()
|
||||
]
|
||||
|
||||
exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles")
|
||||
exp.index = pd.DatetimeIndex(exp.index, freq="W")
|
||||
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# it works
|
||||
result = ts_local.resample("D").mean()
|
||||
|
||||
# #2245
|
||||
idx = date_range(
|
||||
"2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney"
|
||||
)
|
||||
s = Series([1, 2], index=idx)
|
||||
|
||||
result = s.resample("D", closed="right", label="right").mean()
|
||||
ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
|
||||
expected = Series([1.5], index=ex_index)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# for good measure
|
||||
result = s.resample("D", kind="period").mean()
|
||||
ex_index = period_range("2001-09-20", periods=1, freq="D")
|
||||
expected = Series([1.5], index=ex_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 6397
|
||||
# comparing an offset that doesn't propagate tz's
|
||||
rng = date_range("1/1/2011", periods=20000, freq="H")
|
||||
rng = rng.tz_localize("EST")
|
||||
ts = DataFrame(index=rng)
|
||||
ts["first"] = np.random.randn(len(rng))
|
||||
ts["second"] = np.cumsum(np.random.randn(len(rng)))
|
||||
expected = DataFrame(
|
||||
{
|
||||
"first": ts.resample("A").sum()["first"],
|
||||
"second": ts.resample("A").mean()["second"],
|
||||
},
|
||||
columns=["first", "second"],
|
||||
)
|
||||
result = (
|
||||
ts.resample("A")
|
||||
.agg({"first": np.sum, "second": np.mean})
|
||||
.reindex(columns=["first", "second"])
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_closed_left_corner(self):
|
||||
# #1465
|
||||
s = Series(
|
||||
np.random.randn(21),
|
||||
index=date_range(start="1/1/2012 9:30", freq="1min", periods=21),
|
||||
)
|
||||
s[0] = np.nan
|
||||
|
||||
result = s.resample("10min", closed="left", label="right").mean()
|
||||
exp = s[1:].resample("10min", closed="left", label="right").mean()
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
result = s.resample("10min", closed="left", label="left").mean()
|
||||
exp = s[1:].resample("10min", closed="left", label="left").mean()
|
||||
|
||||
ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3)
|
||||
|
||||
tm.assert_index_equal(result.index, ex_index)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_quarterly_resampling(self):
|
||||
rng = period_range("2000Q1", periods=10, freq="Q-DEC")
|
||||
ts = Series(np.arange(10), index=rng)
|
||||
|
||||
result = ts.resample("A").mean()
|
||||
exp = ts.to_timestamp().resample("A").mean().to_period()
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_resample_weekly_bug_1726(self):
|
||||
# 8/6/12 is a Monday
|
||||
ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
|
||||
n = len(ind)
|
||||
data = [[x] * 5 for x in range(n)]
|
||||
df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind)
|
||||
|
||||
# it works!
|
||||
df.resample("W-MON", closed="left", label="left").first()
|
||||
|
||||
def test_resample_with_dst_time_change(self):
|
||||
# GH 15549
|
||||
index = (
|
||||
pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
|
||||
.tz_localize("UTC")
|
||||
.tz_convert("America/Chicago")
|
||||
)
|
||||
df = DataFrame([1, 2], index=index)
|
||||
result = df.resample("12h", closed="right", label="right").last().ffill()
|
||||
|
||||
expected_index_values = [
|
||||
"2016-03-09 12:00:00-06:00",
|
||||
"2016-03-10 00:00:00-06:00",
|
||||
"2016-03-10 12:00:00-06:00",
|
||||
"2016-03-11 00:00:00-06:00",
|
||||
"2016-03-11 12:00:00-06:00",
|
||||
"2016-03-12 00:00:00-06:00",
|
||||
"2016-03-12 12:00:00-06:00",
|
||||
"2016-03-13 00:00:00-06:00",
|
||||
"2016-03-13 13:00:00-05:00",
|
||||
"2016-03-14 01:00:00-05:00",
|
||||
"2016-03-14 13:00:00-05:00",
|
||||
"2016-03-15 01:00:00-05:00",
|
||||
"2016-03-15 13:00:00-05:00",
|
||||
]
|
||||
index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
|
||||
"America/Chicago"
|
||||
)
|
||||
index = pd.DatetimeIndex(index, freq="12h")
|
||||
expected = DataFrame(
|
||||
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0],
|
||||
index=index,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_resample_bms_2752(self):
|
||||
# GH2753
|
||||
foo = Series(index=pd.bdate_range("20000101", "20000201"), dtype=np.float64)
|
||||
res1 = foo.resample("BMS").mean()
|
||||
res2 = foo.resample("BMS").mean().resample("B").mean()
|
||||
assert res1.index[0] == Timestamp("20000103")
|
||||
assert res1.index[0] == res2.index[0]
|
||||
|
||||
# def test_monthly_convention_span(self):
|
||||
# rng = period_range('2000-01', periods=3, freq='M')
|
||||
# ts = Series(np.arange(3), index=rng)
|
||||
|
||||
# # hacky way to get same thing
|
||||
# exp_index = period_range('2000-01-01', '2000-03-31', freq='D')
|
||||
# expected = ts.asfreq('D', how='end').reindex(exp_index)
|
||||
# expected = expected.fillna(method='bfill')
|
||||
|
||||
# result = ts.resample('D', convention='span').mean()
|
||||
|
||||
# tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_default_right_closed_label(self):
|
||||
end_freq = ["D", "Q", "M", "D"]
|
||||
end_types = ["M", "A", "Q", "W"]
|
||||
|
||||
for from_freq, to_freq in zip(end_freq, end_types):
|
||||
idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
|
||||
df = DataFrame(np.random.randn(len(idx), 2), idx)
|
||||
|
||||
resampled = df.resample(to_freq).mean()
|
||||
tm.assert_frame_equal(
|
||||
resampled, df.resample(to_freq, closed="right", label="right").mean()
|
||||
)
|
||||
|
||||
def test_default_left_closed_label(self):
|
||||
others = ["MS", "AS", "QS", "D", "H"]
|
||||
others_freq = ["D", "Q", "M", "H", "T"]
|
||||
|
||||
for from_freq, to_freq in zip(others_freq, others):
|
||||
idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
|
||||
df = DataFrame(np.random.randn(len(idx), 2), idx)
|
||||
|
||||
resampled = df.resample(to_freq).mean()
|
||||
tm.assert_frame_equal(
|
||||
resampled, df.resample(to_freq, closed="left", label="left").mean()
|
||||
)
|
||||
|
||||
def test_all_values_single_bin(self):
|
||||
# 2070
|
||||
index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
|
||||
s = Series(np.random.randn(len(index)), index=index)
|
||||
|
||||
result = s.resample("A").mean()
|
||||
tm.assert_almost_equal(result[0], s.mean())
|
||||
|
||||
def test_evenly_divisible_with_no_extra_bins(self):
|
||||
# 4076
|
||||
# when the frequency is evenly divisible, sometimes extra bins
|
||||
|
||||
df = DataFrame(np.random.randn(9, 3), index=date_range("2000-1-1", periods=9))
|
||||
result = df.resample("5D").mean()
|
||||
expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
|
||||
expected.index = pd.DatetimeIndex(
|
||||
[Timestamp("2000-1-1"), Timestamp("2000-1-6")], freq="5D"
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
index = date_range(start="2001-5-4", periods=28)
|
||||
df = DataFrame(
|
||||
[
|
||||
{
|
||||
"REST_KEY": 1,
|
||||
"DLY_TRN_QT": 80,
|
||||
"DLY_SLS_AMT": 90,
|
||||
"COOP_DLY_TRN_QT": 30,
|
||||
"COOP_DLY_SLS_AMT": 20,
|
||||
}
|
||||
]
|
||||
* 28
|
||||
+ [
|
||||
{
|
||||
"REST_KEY": 2,
|
||||
"DLY_TRN_QT": 70,
|
||||
"DLY_SLS_AMT": 10,
|
||||
"COOP_DLY_TRN_QT": 50,
|
||||
"COOP_DLY_SLS_AMT": 20,
|
||||
}
|
||||
]
|
||||
* 28,
|
||||
index=index.append(index),
|
||||
).sort_index()
|
||||
|
||||
index = date_range("2001-5-4", periods=4, freq="7D")
|
||||
expected = DataFrame(
|
||||
[
|
||||
{
|
||||
"REST_KEY": 14,
|
||||
"DLY_TRN_QT": 14,
|
||||
"DLY_SLS_AMT": 14,
|
||||
"COOP_DLY_TRN_QT": 14,
|
||||
"COOP_DLY_SLS_AMT": 14,
|
||||
}
|
||||
]
|
||||
* 4,
|
||||
index=index,
|
||||
)
|
||||
result = df.resample("7D").count()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = DataFrame(
|
||||
[
|
||||
{
|
||||
"REST_KEY": 21,
|
||||
"DLY_TRN_QT": 1050,
|
||||
"DLY_SLS_AMT": 700,
|
||||
"COOP_DLY_TRN_QT": 560,
|
||||
"COOP_DLY_SLS_AMT": 280,
|
||||
}
|
||||
]
|
||||
* 4,
|
||||
index=index,
|
||||
)
|
||||
result = df.resample("7D").sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq, period_mult", [("H", 24), ("12H", 2)])
|
||||
@pytest.mark.parametrize("kind", [None, "period"])
|
||||
def test_upsampling_ohlc(self, freq, period_mult, kind):
|
||||
# GH 13083
|
||||
pi = period_range(start="2000", freq="D", periods=10)
|
||||
s = Series(range(len(pi)), index=pi)
|
||||
expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
|
||||
|
||||
# timestamp-based resampling doesn't include all sub-periods
|
||||
# of the last original period, so extend accordingly:
|
||||
new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi))
|
||||
expected = expected.reindex(new_index)
|
||||
result = s.resample(freq, kind=kind).ohlc()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"periods, values",
|
||||
[
|
||||
(
|
||||
[
|
||||
pd.NaT,
|
||||
"1970-01-01 00:00:00",
|
||||
pd.NaT,
|
||||
"1970-01-01 00:00:02",
|
||||
"1970-01-01 00:00:03",
|
||||
],
|
||||
[2, 3, 5, 7, 11],
|
||||
),
|
||||
(
|
||||
[
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
"1970-01-01 00:00:00",
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
"1970-01-01 00:00:02",
|
||||
"1970-01-01 00:00:03",
|
||||
pd.NaT,
|
||||
pd.NaT,
|
||||
],
|
||||
[1, 2, 3, 5, 6, 8, 7, 11, 12, 13],
|
||||
),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"freq, expected_values",
|
||||
[
|
||||
("1s", [3, np.NaN, 7, 11]),
|
||||
("2s", [3, (7 + 11) / 2]),
|
||||
("3s", [(3 + 7) / 2, 11]),
|
||||
],
|
||||
)
|
||||
def test_resample_with_nat(self, periods, values, freq, expected_values):
|
||||
# GH 13224
|
||||
index = PeriodIndex(periods, freq="S")
|
||||
frame = DataFrame(values, index=index)
|
||||
|
||||
expected_index = period_range(
|
||||
"1970-01-01 00:00:00", periods=len(expected_values), freq=freq
|
||||
)
|
||||
expected = DataFrame(expected_values, index=expected_index)
|
||||
result = frame.resample(freq).mean()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_resample_with_only_nat(self):
|
||||
# GH 13224
|
||||
pi = PeriodIndex([pd.NaT] * 3, freq="S")
|
||||
frame = DataFrame([2, 3, 5], index=pi, columns=["a"])
|
||||
expected_index = PeriodIndex(data=[], freq=pi.freq)
|
||||
expected = DataFrame(index=expected_index, columns=["a"], dtype="float64")
|
||||
result = frame.resample("1s").mean()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start,end,start_freq,end_freq,offset",
|
||||
[
|
||||
("19910905", "19910909 03:00", "H", "24H", "10H"),
|
||||
("19910905", "19910909 12:00", "H", "24H", "10H"),
|
||||
("19910905", "19910909 23:00", "H", "24H", "10H"),
|
||||
("19910905 10:00", "19910909", "H", "24H", "10H"),
|
||||
("19910905 10:00", "19910909 10:00", "H", "24H", "10H"),
|
||||
("19910905", "19910909 10:00", "H", "24H", "10H"),
|
||||
("19910905 12:00", "19910909", "H", "24H", "10H"),
|
||||
("19910905 12:00", "19910909 03:00", "H", "24H", "10H"),
|
||||
("19910905 12:00", "19910909 12:00", "H", "24H", "10H"),
|
||||
("19910905 12:00", "19910909 12:00", "H", "24H", "34H"),
|
||||
("19910905 12:00", "19910909 12:00", "H", "17H", "10H"),
|
||||
("19910905 12:00", "19910909 12:00", "H", "17H", "3H"),
|
||||
("19910905 12:00", "19910909 1:00", "H", "M", "3H"),
|
||||
("19910905", "19910913 06:00", "2H", "24H", "10H"),
|
||||
("19910905", "19910905 01:39", "Min", "5Min", "3Min"),
|
||||
("19910905", "19910905 03:18", "2Min", "5Min", "3Min"),
|
||||
],
|
||||
)
|
||||
def test_resample_with_offset(self, start, end, start_freq, end_freq, offset):
|
||||
# GH 23882 & 31809
|
||||
s = Series(0, index=period_range(start, end, freq=start_freq))
|
||||
s = s + np.arange(len(s))
|
||||
result = s.resample(end_freq, offset=offset).mean()
|
||||
result = result.to_timestamp(end_freq)
|
||||
|
||||
expected = s.to_timestamp().resample(end_freq, offset=offset).mean()
|
||||
if end_freq == "M":
|
||||
# TODO: is non-tick the relevant characteristic? (GH 33815)
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first,last,freq,exp_first,exp_last",
|
||||
[
|
||||
("19910905", "19920406", "D", "19910905", "19920406"),
|
||||
("19910905 00:00", "19920406 06:00", "D", "19910905", "19920406"),
|
||||
(
|
||||
"19910905 06:00",
|
||||
"19920406 06:00",
|
||||
"H",
|
||||
"19910905 06:00",
|
||||
"19920406 06:00",
|
||||
),
|
||||
("19910906", "19920406", "M", "1991-09", "1992-04"),
|
||||
("19910831", "19920430", "M", "1991-08", "1992-04"),
|
||||
("1991-08", "1992-04", "M", "1991-08", "1992-04"),
|
||||
],
|
||||
)
|
||||
def test_get_period_range_edges(self, first, last, freq, exp_first, exp_last):
|
||||
first = Period(first)
|
||||
last = Period(last)
|
||||
|
||||
exp_first = Period(exp_first, freq=freq)
|
||||
exp_last = Period(exp_last, freq=freq)
|
||||
|
||||
freq = pd.tseries.frequencies.to_offset(freq)
|
||||
result = _get_period_range_edges(first, last, freq)
|
||||
expected = (exp_first, exp_last)
|
||||
assert result == expected
|
||||
|
||||
def test_sum_min_count(self):
|
||||
# GH 19974
|
||||
index = date_range(start="2018", freq="M", periods=6)
|
||||
data = np.ones(6)
|
||||
data[3:6] = np.nan
|
||||
s = Series(data, index).to_period()
|
||||
result = s.resample("Q").sum(min_count=1)
|
||||
expected = Series(
|
||||
[3.0, np.nan], index=PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@@ -0,0 +1,752 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
NamedAgg,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
|
||||
|
||||
test_series = Series(np.random.rand(len(dti)), dti)
|
||||
_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_frame():
|
||||
return _test_frame.copy()
|
||||
|
||||
|
||||
def test_str():
|
||||
|
||||
r = test_series.resample("H")
|
||||
assert (
|
||||
"DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
|
||||
"label=left, convention=start, origin=start_day]" in str(r)
|
||||
)
|
||||
|
||||
r = test_series.resample("H", origin="2000-01-01")
|
||||
assert (
|
||||
"DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
|
||||
"label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r)
|
||||
)
|
||||
|
||||
|
||||
def test_api():
|
||||
|
||||
r = test_series.resample("H")
|
||||
result = r.mean()
|
||||
assert isinstance(result, Series)
|
||||
assert len(result) == 217
|
||||
|
||||
r = test_series.to_frame().resample("H")
|
||||
result = r.mean()
|
||||
assert isinstance(result, DataFrame)
|
||||
assert len(result) == 217
|
||||
|
||||
|
||||
def test_groupby_resample_api():
|
||||
|
||||
# GH 12448
|
||||
# .groupby(...).resample(...) hitting warnings
|
||||
# when appropriate
|
||||
df = DataFrame(
|
||||
{
|
||||
"date": date_range(start="2016-01-01", periods=4, freq="W"),
|
||||
"group": [1, 1, 2, 2],
|
||||
"val": [5, 6, 7, 8],
|
||||
}
|
||||
).set_index("date")
|
||||
|
||||
# replication step
|
||||
i = (
|
||||
date_range("2016-01-03", periods=8).tolist()
|
||||
+ date_range("2016-01-17", periods=8).tolist()
|
||||
)
|
||||
index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"])
|
||||
expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index)
|
||||
result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api():
|
||||
|
||||
# GH 15021
|
||||
# .groupby(...).resample(on=...) results in an unexpected
|
||||
# keyword warning.
|
||||
df = DataFrame(
|
||||
{
|
||||
"key": ["A", "B"] * 5,
|
||||
"dates": date_range("2016-01-01", periods=10),
|
||||
"values": np.random.randn(10),
|
||||
}
|
||||
)
|
||||
|
||||
expected = df.set_index("dates").groupby("key").resample("D").mean()
|
||||
|
||||
result = df.groupby("key").resample("D", on="dates").mean()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_pipe(test_frame):
|
||||
# GH17905
|
||||
|
||||
# series
|
||||
r = test_series.resample("H")
|
||||
expected = r.max() - r.mean()
|
||||
result = r.pipe(lambda x: x.max() - x.mean())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# dataframe
|
||||
r = test_frame.resample("H")
|
||||
expected = r.max() - r.mean()
|
||||
result = r.pipe(lambda x: x.max() - x.mean())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem(test_frame):
|
||||
|
||||
r = test_frame.resample("H")
|
||||
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns)
|
||||
|
||||
r = test_frame.resample("H")["B"]
|
||||
assert r._selected_obj.name == test_frame.columns[1]
|
||||
|
||||
# technically this is allowed
|
||||
r = test_frame.resample("H")["A", "B"]
|
||||
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
|
||||
|
||||
r = test_frame.resample("H")["A", "B"]
|
||||
tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("key", [["D"], ["A", "D"]])
|
||||
def test_select_bad_cols(key, test_frame):
|
||||
g = test_frame.resample("H")
|
||||
# 'A' should not be referenced as a bad column...
|
||||
# will have to rethink regex if you change message!
|
||||
msg = r"^\"Columns not found: 'D'\"$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
g[key]
|
||||
|
||||
|
||||
def test_attribute_access(test_frame):
|
||||
|
||||
r = test_frame.resample("H")
|
||||
tm.assert_series_equal(r.A.sum(), r["A"].sum())
|
||||
|
||||
|
||||
def test_api_compat_before_use():
|
||||
|
||||
# make sure that we are setting the binner
|
||||
# on these attributes
|
||||
for attr in ["groups", "ngroups", "indices"]:
|
||||
rng = date_range("1/1/2012", periods=100, freq="S")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
rs = ts.resample("30s")
|
||||
|
||||
# before use
|
||||
getattr(rs, attr)
|
||||
|
||||
# after grouper is initialized is ok
|
||||
rs.mean()
|
||||
getattr(rs, attr)
|
||||
|
||||
|
||||
def tests_skip_nuisance(test_frame):
|
||||
|
||||
df = test_frame
|
||||
df["D"] = "foo"
|
||||
r = df.resample("H")
|
||||
result = r[["A", "B"]].sum()
|
||||
expected = pd.concat([r.A.sum(), r.B.sum()], axis=1)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = r[["A", "B", "C"]].sum()
|
||||
result = r.sum()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_downsample_but_actually_upsampling():
|
||||
|
||||
# this is reindex / asfreq
|
||||
rng = date_range("1/1/2012", periods=100, freq="S")
|
||||
ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
|
||||
result = ts.resample("20s").asfreq()
|
||||
expected = Series(
|
||||
[0, 20, 40, 60, 80],
|
||||
index=date_range("2012-01-01 00:00:00", freq="20s", periods=5),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_combined_up_downsampling_of_irregular():
|
||||
|
||||
# since we are really doing an operation like this
|
||||
# ts2.resample('2s').mean().ffill()
|
||||
# preserve these semantics
|
||||
|
||||
rng = date_range("1/1/2012", periods=100, freq="S")
|
||||
ts = Series(np.arange(len(rng)), index=rng)
|
||||
ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
|
||||
|
||||
result = ts2.resample("2s").mean().ffill()
|
||||
expected = Series(
|
||||
[
|
||||
0.5,
|
||||
2.5,
|
||||
5.0,
|
||||
7.0,
|
||||
7.0,
|
||||
11.0,
|
||||
11.0,
|
||||
15.0,
|
||||
16.0,
|
||||
16.0,
|
||||
16.0,
|
||||
16.0,
|
||||
25.0,
|
||||
25.0,
|
||||
25.0,
|
||||
30.0,
|
||||
],
|
||||
index=pd.DatetimeIndex(
|
||||
[
|
||||
"2012-01-01 00:00:00",
|
||||
"2012-01-01 00:00:02",
|
||||
"2012-01-01 00:00:04",
|
||||
"2012-01-01 00:00:06",
|
||||
"2012-01-01 00:00:08",
|
||||
"2012-01-01 00:00:10",
|
||||
"2012-01-01 00:00:12",
|
||||
"2012-01-01 00:00:14",
|
||||
"2012-01-01 00:00:16",
|
||||
"2012-01-01 00:00:18",
|
||||
"2012-01-01 00:00:20",
|
||||
"2012-01-01 00:00:22",
|
||||
"2012-01-01 00:00:24",
|
||||
"2012-01-01 00:00:26",
|
||||
"2012-01-01 00:00:28",
|
||||
"2012-01-01 00:00:30",
|
||||
],
|
||||
dtype="datetime64[ns]",
|
||||
freq="2S",
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_transform():
|
||||
|
||||
r = test_series.resample("20min")
|
||||
expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
|
||||
result = r.transform("mean")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_fillna():
|
||||
|
||||
# need to upsample here
|
||||
rng = date_range("1/1/2012", periods=10, freq="2S")
|
||||
ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
|
||||
r = ts.resample("s")
|
||||
|
||||
expected = r.ffill()
|
||||
result = r.fillna(method="ffill")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = r.bfill()
|
||||
result = r.fillna(method="bfill")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
r"Invalid fill method\. Expecting pad \(ffill\), backfill "
|
||||
r"\(bfill\) or nearest\. Got 0"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
r.fillna(0)
|
||||
|
||||
|
||||
def test_apply_without_aggregation():
|
||||
|
||||
# both resample and groupby should work w/o aggregation
|
||||
r = test_series.resample("20min")
|
||||
g = test_series.groupby(pd.Grouper(freq="20min"))
|
||||
|
||||
for t in [g, r]:
|
||||
result = t.apply(lambda x: x)
|
||||
tm.assert_series_equal(result, test_series)
|
||||
|
||||
|
||||
def test_agg_consistency():
|
||||
|
||||
# make sure that we are consistent across
|
||||
# similar aggregations with and w/o selection list
|
||||
df = DataFrame(
|
||||
np.random.randn(1000, 3),
|
||||
index=date_range("1/1/2012", freq="S", periods=1000),
|
||||
columns=["A", "B", "C"],
|
||||
)
|
||||
|
||||
r = df.resample("3T")
|
||||
|
||||
msg = r"Column\(s\) \['r1', 'r2'\] do not exist"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
r.agg({"r1": "mean", "r2": "sum"})
|
||||
|
||||
|
||||
def test_agg_consistency_int_str_column_mix():
|
||||
# GH#39025
|
||||
df = DataFrame(
|
||||
np.random.randn(1000, 2),
|
||||
index=date_range("1/1/2012", freq="S", periods=1000),
|
||||
columns=[1, "a"],
|
||||
)
|
||||
|
||||
r = df.resample("3T")
|
||||
|
||||
msg = r"Column\(s\) \[2, 'b'\] do not exist"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
r.agg({2: "mean", "b": "sum"})
|
||||
|
||||
|
||||
# TODO(GH#14008): once GH 14008 is fixed, move these tests into
|
||||
# `Base` test class
|
||||
|
||||
|
||||
def test_agg():
|
||||
# test with all three Resampler apis and TimeGrouper
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
index.name = "date"
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays(
|
||||
[range(10), df.index], names=["index", "date"]
|
||||
)
|
||||
r = df.resample("2D")
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample("2D", on="date"),
|
||||
df_mult.resample("2D", level="date"),
|
||||
df.groupby(pd.Grouper(freq="2D")),
|
||||
]
|
||||
|
||||
a_mean = r["A"].mean()
|
||||
a_std = r["A"].std()
|
||||
a_sum = r["A"].sum()
|
||||
b_mean = r["B"].mean()
|
||||
b_std = r["B"].std()
|
||||
b_sum = r["B"].sum()
|
||||
|
||||
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
|
||||
for t in cases:
|
||||
warn = FutureWarning if t in cases[1:3] else None
|
||||
with tm.assert_produces_warning(
|
||||
warn,
|
||||
match=r"\['date'\] did not aggregate successfully",
|
||||
):
|
||||
# .var on dt64 column raises and is dropped
|
||||
result = t.aggregate([np.mean, np.std])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.concat([a_mean, b_std], axis=1)
|
||||
for t in cases:
|
||||
result = t.aggregate({"A": np.mean, "B": np.std})
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
result = t.aggregate(A=("A", np.mean), B=("B", np.std))
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
result = t.aggregate(A=NamedAgg("A", np.mean), B=NamedAgg("B", np.std))
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
|
||||
for t in cases:
|
||||
result = t.aggregate({"A": ["mean", "std"]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum], axis=1)
|
||||
expected.columns = ["mean", "sum"]
|
||||
for t in cases:
|
||||
result = t["A"].aggregate(["mean", "sum"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = t["A"].aggregate(mean="mean", sum="sum")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "nested renamer is not supported"
|
||||
for t in cases:
|
||||
with pytest.raises(pd.core.base.SpecificationError, match=msg):
|
||||
t.aggregate({"A": {"mean": "mean", "sum": "sum"}})
|
||||
|
||||
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")]
|
||||
)
|
||||
for t in cases:
|
||||
with pytest.raises(pd.core.base.SpecificationError, match=msg):
|
||||
t.aggregate(
|
||||
{
|
||||
"A": {"mean": "mean", "sum": "sum"},
|
||||
"B": {"mean2": "mean", "sum2": "sum"},
|
||||
}
|
||||
)
|
||||
|
||||
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
|
||||
)
|
||||
for t in cases:
|
||||
result = t.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
("r1", "A", "mean"),
|
||||
("r1", "A", "sum"),
|
||||
("r2", "B", "mean"),
|
||||
("r2", "B", "sum"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_agg_misc():
|
||||
# test with all three Resampler apis and TimeGrouper
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
index.name = "date"
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays(
|
||||
[range(10), df.index], names=["index", "date"]
|
||||
)
|
||||
|
||||
r = df.resample("2D")
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample("2D", on="date"),
|
||||
df_mult.resample("2D", level="date"),
|
||||
df.groupby(pd.Grouper(freq="2D")),
|
||||
]
|
||||
|
||||
# passed lambda
|
||||
for t in cases:
|
||||
result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
|
||||
rcustom = t["B"].apply(lambda x: np.std(x, ddof=1))
|
||||
expected = pd.concat([r["A"].sum(), rcustom], axis=1)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
result = t.agg(A=("A", np.sum), B=("B", lambda x: np.std(x, ddof=1)))
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
result = t.agg(
|
||||
A=NamedAgg("A", np.sum), B=NamedAgg("B", lambda x: np.std(x, ddof=1))
|
||||
)
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# agg with renamers
|
||||
expected = pd.concat(
|
||||
[t["A"].sum(), t["B"].sum(), t["A"].mean(), t["B"].mean()], axis=1
|
||||
)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")]
|
||||
)
|
||||
|
||||
msg = r"Column\(s\) \['result1', 'result2'\] do not exist"
|
||||
for t in cases:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
t[["A", "B"]].agg({"result1": np.sum, "result2": np.mean})
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
t[["A", "B"]].agg(A=("result1", np.sum), B=("result2", np.mean))
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
t[["A", "B"]].agg(
|
||||
A=NamedAgg("result1", np.sum), B=NamedAgg("result2", np.mean)
|
||||
)
|
||||
|
||||
# agg with different hows
|
||||
expected = pd.concat(
|
||||
[t["A"].sum(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1
|
||||
)
|
||||
expected.columns = pd.MultiIndex.from_tuples(
|
||||
[("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
|
||||
)
|
||||
for t in cases:
|
||||
result = t.agg({"A": ["sum", "std"], "B": ["mean", "std"]})
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
# equivalent of using a selection list / or not
|
||||
for t in cases:
|
||||
result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
|
||||
tm.assert_frame_equal(result, expected, check_like=True)
|
||||
|
||||
msg = "nested renamer is not supported"
|
||||
|
||||
# series like aggs
|
||||
for t in cases:
|
||||
with pytest.raises(pd.core.base.SpecificationError, match=msg):
|
||||
t["A"].agg({"A": ["sum", "std"]})
|
||||
|
||||
with pytest.raises(pd.core.base.SpecificationError, match=msg):
|
||||
t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
|
||||
|
||||
# errors
|
||||
# invalid names in the agg specification
|
||||
msg = r"Column\(s\) \['B'\] do not exist"
|
||||
for t in cases:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func", [["min"], ["mean", "max"], {"A": "sum"}, {"A": "prod", "B": "median"}]
|
||||
)
|
||||
def test_multi_agg_axis_1_raises(func):
|
||||
# GH#46904
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
index.name = "date"
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index).T
|
||||
res = df.resample("M", axis=1)
|
||||
with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"):
|
||||
res.agg(func)
|
||||
|
||||
|
||||
def test_agg_nested_dicts():
|
||||
|
||||
np.random.seed(1234)
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
index.name = "date"
|
||||
df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
|
||||
df_col = df.reset_index()
|
||||
df_mult = df_col.copy()
|
||||
df_mult.index = pd.MultiIndex.from_arrays(
|
||||
[range(10), df.index], names=["index", "date"]
|
||||
)
|
||||
r = df.resample("2D")
|
||||
cases = [
|
||||
r,
|
||||
df_col.resample("2D", on="date"),
|
||||
df_mult.resample("2D", level="date"),
|
||||
df.groupby(pd.Grouper(freq="2D")),
|
||||
]
|
||||
|
||||
msg = "nested renamer is not supported"
|
||||
for t in cases:
|
||||
with pytest.raises(pd.core.base.SpecificationError, match=msg):
|
||||
t.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
|
||||
|
||||
for t in cases:
|
||||
|
||||
with pytest.raises(pd.core.base.SpecificationError, match=msg):
|
||||
t[["A", "B"]].agg(
|
||||
{"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}
|
||||
)
|
||||
|
||||
with pytest.raises(pd.core.base.SpecificationError, match=msg):
|
||||
t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
|
||||
|
||||
|
||||
def test_try_aggregate_non_existing_column():
|
||||
# GH 16766
|
||||
data = [
|
||||
{"dt": datetime(2017, 6, 1, 0), "x": 1.0, "y": 2.0},
|
||||
{"dt": datetime(2017, 6, 1, 1), "x": 2.0, "y": 2.0},
|
||||
{"dt": datetime(2017, 6, 1, 2), "x": 3.0, "y": 1.5},
|
||||
]
|
||||
df = DataFrame(data).set_index("dt")
|
||||
|
||||
# Error as we don't have 'z' column
|
||||
msg = r"Column\(s\) \['z'\] do not exist"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})
|
||||
|
||||
|
||||
def test_selection_api_validation():
|
||||
# GH 13500
|
||||
index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
|
||||
|
||||
rng = np.arange(len(index), dtype=np.int64)
|
||||
df = DataFrame(
|
||||
{"date": index, "a": rng},
|
||||
index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
|
||||
)
|
||||
df_exp = DataFrame({"a": rng}, index=index)
|
||||
|
||||
# non DatetimeIndex
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
|
||||
"but got an instance of 'Int64Index'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.resample("2D", level="v")
|
||||
|
||||
msg = "The Grouper cannot specify both a key and a level!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample("2D", on="date", level="d")
|
||||
|
||||
msg = "unhashable type: 'list'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.resample("2D", on=["a", "date"])
|
||||
|
||||
msg = r"\"Level \['a', 'date'\] not found\""
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df.resample("2D", level=["a", "date"])
|
||||
|
||||
# upsampling not allowed
|
||||
msg = (
|
||||
"Upsampling from level= or on= selection is not supported, use "
|
||||
r"\.set_index\(\.\.\.\) to explicitly set index to datetime-like"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample("2D", level="d").asfreq()
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.resample("2D", on="date").asfreq()
|
||||
|
||||
exp = df_exp.resample("2D").sum()
|
||||
exp.index.name = "date"
|
||||
tm.assert_frame_equal(exp, df.resample("2D", on="date").sum())
|
||||
|
||||
exp.index.name = "d"
|
||||
tm.assert_frame_equal(exp, df.resample("2D", level="d").sum())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"]
|
||||
)
|
||||
def test_agg_with_datetime_index_list_agg_func(col_name):
|
||||
# GH 22660
|
||||
# The parametrized column names would get converted to dates by our
|
||||
# date parser. Some would result in OutOfBoundsError (ValueError) while
|
||||
# others would result in OverflowError when passed into Timestamp.
|
||||
# We catch these errors and move on to the correct branch.
|
||||
df = DataFrame(
|
||||
list(range(200)),
|
||||
index=date_range(
|
||||
start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin"
|
||||
),
|
||||
columns=[col_name],
|
||||
)
|
||||
result = df.resample("1d").aggregate(["mean"])
|
||||
expected = DataFrame(
|
||||
[47.5, 143.5, 195.5],
|
||||
index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
|
||||
columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_agg_readonly():
|
||||
# GH#31710 cython needs to allow readonly data
|
||||
index = date_range("2020-01-01", "2020-01-02", freq="1h")
|
||||
arr = np.zeros_like(index)
|
||||
arr.setflags(write=False)
|
||||
|
||||
ser = Series(arr, index=index)
|
||||
rs = ser.resample("1D")
|
||||
|
||||
expected = Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24])
|
||||
|
||||
result = rs.agg("last")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = rs.agg("first")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = rs.agg("max")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = rs.agg("min")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start,end,freq,data,resample_freq,origin,closed,exp_data,exp_end,exp_periods",
|
||||
[
|
||||
(
|
||||
"2000-10-01 23:30:00",
|
||||
"2000-10-02 00:26:00",
|
||||
"7min",
|
||||
[0, 3, 6, 9, 12, 15, 18, 21, 24],
|
||||
"17min",
|
||||
"end",
|
||||
None,
|
||||
[0, 18, 27, 63],
|
||||
"20001002 00:26:00",
|
||||
4,
|
||||
),
|
||||
(
|
||||
"20200101 8:26:35",
|
||||
"20200101 9:31:58",
|
||||
"77s",
|
||||
[1] * 51,
|
||||
"7min",
|
||||
"end",
|
||||
"right",
|
||||
[1, 6, 5, 6, 5, 6, 5, 6, 5, 6],
|
||||
"2020-01-01 09:30:45",
|
||||
10,
|
||||
),
|
||||
(
|
||||
"2000-10-01 23:30:00",
|
||||
"2000-10-02 00:26:00",
|
||||
"7min",
|
||||
[0, 3, 6, 9, 12, 15, 18, 21, 24],
|
||||
"17min",
|
||||
"end",
|
||||
"left",
|
||||
[0, 18, 27, 39, 24],
|
||||
"20001002 00:43:00",
|
||||
5,
|
||||
),
|
||||
(
|
||||
"2000-10-01 23:30:00",
|
||||
"2000-10-02 00:26:00",
|
||||
"7min",
|
||||
[0, 3, 6, 9, 12, 15, 18, 21, 24],
|
||||
"17min",
|
||||
"end_day",
|
||||
None,
|
||||
[3, 15, 45, 45],
|
||||
"2000-10-02 00:29:00",
|
||||
4,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_end_and_end_day_origin(
|
||||
start,
|
||||
end,
|
||||
freq,
|
||||
data,
|
||||
resample_freq,
|
||||
origin,
|
||||
closed,
|
||||
exp_data,
|
||||
exp_end,
|
||||
exp_periods,
|
||||
):
|
||||
rng = date_range(start, end, freq=freq)
|
||||
ts = Series(data, index=rng)
|
||||
|
||||
res = ts.resample(resample_freq, origin=origin, closed=closed).sum()
|
||||
expected = Series(
|
||||
exp_data,
|
||||
index=date_range(end=exp_end, freq=resample_freq, periods=exp_periods),
|
||||
)
|
||||
|
||||
tm.assert_series_equal(res, expected)
|
||||
@@ -0,0 +1,464 @@
|
||||
from textwrap import dedent
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
from pandas.util._test_decorators import async_mark
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.api import Int64Index
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
test_frame = DataFrame(
|
||||
{"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
|
||||
index=date_range("1/1/2000", freq="s", periods=40),
|
||||
)
|
||||
|
||||
|
||||
@async_mark()
|
||||
@td.check_file_leaks
|
||||
async def test_tab_complete_ipython6_warning(ip):
|
||||
from IPython.core.completer import provisionalcompleter
|
||||
|
||||
code = dedent(
|
||||
"""\
|
||||
import pandas._testing as tm
|
||||
s = tm.makeTimeSeries()
|
||||
rs = s.resample("D")
|
||||
"""
|
||||
)
|
||||
await ip.run_code(code)
|
||||
|
||||
# GH 31324 newer jedi version raises Deprecation warning;
|
||||
# appears resolved 2021-02-02
|
||||
with tm.assert_produces_warning(None):
|
||||
with provisionalcompleter("ignore"):
|
||||
list(ip.Completer.completions("rs.", 1))
|
||||
|
||||
|
||||
def test_deferred_with_groupby():
|
||||
|
||||
# GH 12486
|
||||
# support deferred resample ops with groupby
|
||||
data = [
|
||||
["2010-01-01", "A", 2],
|
||||
["2010-01-02", "A", 3],
|
||||
["2010-01-05", "A", 8],
|
||||
["2010-01-10", "A", 7],
|
||||
["2010-01-13", "A", 3],
|
||||
["2010-01-01", "B", 5],
|
||||
["2010-01-03", "B", 2],
|
||||
["2010-01-04", "B", 1],
|
||||
["2010-01-11", "B", 7],
|
||||
["2010-01-14", "B", 3],
|
||||
]
|
||||
|
||||
df = DataFrame(data, columns=["date", "id", "score"])
|
||||
df.date = pd.to_datetime(df.date)
|
||||
|
||||
def f(x):
|
||||
return x.set_index("date").resample("D").asfreq()
|
||||
|
||||
expected = df.groupby("id").apply(f)
|
||||
result = df.set_index("date").groupby("id").resample("D").asfreq()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
df = DataFrame(
|
||||
{
|
||||
"date": date_range(start="2016-01-01", periods=4, freq="W"),
|
||||
"group": [1, 1, 2, 2],
|
||||
"val": [5, 6, 7, 8],
|
||||
}
|
||||
).set_index("date")
|
||||
|
||||
def f(x):
|
||||
return x.resample("1D").ffill()
|
||||
|
||||
expected = df.groupby("group").apply(f)
|
||||
result = df.groupby("group").resample("1D").ffill()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem():
|
||||
g = test_frame.groupby("A")
|
||||
|
||||
expected = g.B.apply(lambda x: x.resample("2s").mean())
|
||||
|
||||
result = g.resample("2s").B.mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = g.B.resample("2s").mean()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = g.resample("2s").mean().B
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_getitem_multiple():
|
||||
|
||||
# GH 13174
|
||||
# multiple calls after selection causing an issue with aliasing
|
||||
data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
|
||||
df = DataFrame(data, index=date_range("2016-01-01", periods=2))
|
||||
r = df.groupby("id").resample("1D")
|
||||
result = r["buyer"].count()
|
||||
expected = Series(
|
||||
[1, 1],
|
||||
index=pd.MultiIndex.from_tuples(
|
||||
[(1, Timestamp("2016-01-01")), (2, Timestamp("2016-01-02"))],
|
||||
names=["id", None],
|
||||
),
|
||||
name="buyer",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = r["buyer"].count()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_on_api_with_getitem():
|
||||
# GH 17813
|
||||
df = DataFrame(
|
||||
{"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
|
||||
)
|
||||
exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
|
||||
result = df.groupby("id").resample("2D", on="date")["data"].sum()
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_groupby_with_origin():
|
||||
# GH 31809
|
||||
|
||||
freq = "1399min" # prime number that is smaller than 24h
|
||||
start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
|
||||
middle = "1/15/2000 00:00:00"
|
||||
|
||||
rng = date_range(start, end, freq="1231min") # prime number
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
ts2 = ts[middle:end]
|
||||
|
||||
# proves that grouper without a fixed origin does not work
|
||||
# when dealing with unusual frequencies
|
||||
simple_grouper = pd.Grouper(freq=freq)
|
||||
count_ts = ts.groupby(simple_grouper).agg("count")
|
||||
count_ts = count_ts[middle:end]
|
||||
count_ts2 = ts2.groupby(simple_grouper).agg("count")
|
||||
with pytest.raises(AssertionError, match="Index are different"):
|
||||
tm.assert_index_equal(count_ts.index, count_ts2.index)
|
||||
|
||||
# test origin on 1970-01-01 00:00:00
|
||||
origin = Timestamp(0)
|
||||
adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
|
||||
adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
|
||||
adjusted_count_ts = adjusted_count_ts[middle:end]
|
||||
adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
|
||||
tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
|
||||
|
||||
# test origin on 2049-10-18 20:00:00
|
||||
origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
|
||||
adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
|
||||
adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
|
||||
adjusted2_count_ts = adjusted2_count_ts[middle:end]
|
||||
adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
|
||||
tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
|
||||
|
||||
# both grouper use an adjusted timestamp that is a multiple of 1399 min
|
||||
# they should be equals even if the adjusted_timestamp is in the future
|
||||
tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
|
||||
|
||||
|
||||
def test_nearest():
|
||||
|
||||
# GH 17496
|
||||
# Resample nearest
|
||||
index = date_range("1/1/2000", periods=3, freq="T")
|
||||
result = Series(range(3), index=index).resample("20s").nearest()
|
||||
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 2, 2],
|
||||
index=pd.DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 00:00:00",
|
||||
"2000-01-01 00:00:20",
|
||||
"2000-01-01 00:00:40",
|
||||
"2000-01-01 00:01:00",
|
||||
"2000-01-01 00:01:20",
|
||||
"2000-01-01 00:01:40",
|
||||
"2000-01-01 00:02:00",
|
||||
],
|
||||
dtype="datetime64[ns]",
|
||||
freq="20S",
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_methods():
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
for f in ["first", "last", "median", "sem", "sum", "mean", "min", "max"]:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
for f in ["size"]:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for f in ["count"]:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# series only
|
||||
for f in ["nunique"]:
|
||||
result = getattr(r.B, f)()
|
||||
expected = g.B.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for f in ["nearest", "bfill", "ffill", "asfreq"]:
|
||||
result = getattr(r, f)()
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = r.ohlc()
|
||||
expected = g.apply(lambda x: x.resample("2s").ohlc())
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
for f in ["std", "var"]:
|
||||
result = getattr(r, f)(ddof=1)
|
||||
expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply():
|
||||
|
||||
g = test_frame.groupby("A")
|
||||
r = g.resample("2s")
|
||||
|
||||
# reduction
|
||||
expected = g.resample("2s").sum()
|
||||
|
||||
def f(x):
|
||||
return x.resample("2s").sum()
|
||||
|
||||
result = r.apply(f)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def f(x):
|
||||
return x.resample("2s").apply(lambda y: y.sum())
|
||||
|
||||
result = g.apply(f)
|
||||
# y.sum() results in int64 instead of int32 on 32-bit architectures
|
||||
expected = expected.astype("int64")
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_with_mutated_index():
|
||||
# GH 15169
|
||||
index = date_range("1-1-2015", "12-31-15", freq="D")
|
||||
df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index)
|
||||
|
||||
def f(x):
|
||||
s = Series([1, 2], index=["a", "b"])
|
||||
return s
|
||||
|
||||
expected = df.groupby(pd.Grouper(freq="M")).apply(f)
|
||||
|
||||
result = df.resample("M").apply(f)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# A case for series
|
||||
expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f)
|
||||
result = df["col1"].resample("M").apply(f)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_columns_multilevel():
|
||||
# GH 16231
|
||||
cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
|
||||
ind = date_range(start="2017-01-01", freq="15Min", periods=8)
|
||||
df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
|
||||
agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
|
||||
result = df.resample("H").apply(lambda x: agg_dict[x.name](x))
|
||||
expected = DataFrame(
|
||||
2 * [[0, 0.0]],
|
||||
index=date_range(start="2017-01-01", freq="1H", periods=2),
|
||||
columns=pd.MultiIndex.from_tuples(
|
||||
[("A", "a", "", "one"), ("B", "b", "i", "two")]
|
||||
),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_with_label():
|
||||
# GH 13235
|
||||
index = date_range("2000-01-01", freq="2D", periods=5)
|
||||
df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
|
||||
result = df.groupby("col0").resample("1W", label="left").sum()
|
||||
|
||||
mi = [
|
||||
np.array([0, 0, 1, 2]),
|
||||
pd.to_datetime(
|
||||
np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"])
|
||||
),
|
||||
]
|
||||
mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
|
||||
expected = DataFrame(
|
||||
data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_consistency_with_window():
|
||||
|
||||
# consistent return values with window
|
||||
df = test_frame
|
||||
expected = Int64Index([1, 2, 3], name="A")
|
||||
result = df.groupby("A").resample("2s").mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
result = df.groupby("A").rolling(20).mean()
|
||||
assert result.index.nlevels == 2
|
||||
tm.assert_index_equal(result.index.levels[0], expected)
|
||||
|
||||
|
||||
def test_median_duplicate_columns():
|
||||
# GH 14233
|
||||
|
||||
df = DataFrame(
|
||||
np.random.randn(20, 3),
|
||||
columns=list("aaa"),
|
||||
index=date_range("2012-01-01", periods=20, freq="s"),
|
||||
)
|
||||
df2 = df.copy()
|
||||
df2.columns = ["a", "b", "c"]
|
||||
expected = df2.resample("5s").median()
|
||||
result = df.resample("5s").median()
|
||||
expected.columns = result.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_to_one_column_of_df():
|
||||
# GH: 36951
|
||||
df = DataFrame(
|
||||
{"col": range(10), "col1": range(10, 20)},
|
||||
index=date_range("2012-01-01", periods=10, freq="20min"),
|
||||
)
|
||||
|
||||
# access "col" via getattr -> make sure we handle AttributeError
|
||||
result = df.resample("H").apply(lambda group: group.col.sum())
|
||||
expected = Series(
|
||||
[3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="H")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# access "col" via _getitem__ -> make sure we handle KeyErrpr
|
||||
result = df.resample("H").apply(lambda group: group["col"].sum())
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_agg():
|
||||
# GH: 33548
|
||||
df = DataFrame(
|
||||
{
|
||||
"cat": [
|
||||
"cat_1",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
"cat_2",
|
||||
"cat_1",
|
||||
],
|
||||
"num": [5, 20, 22, 3, 4, 30, 10, 50],
|
||||
"date": [
|
||||
"2019-2-1",
|
||||
"2018-02-03",
|
||||
"2020-3-11",
|
||||
"2019-2-2",
|
||||
"2019-2-2",
|
||||
"2018-12-4",
|
||||
"2020-3-11",
|
||||
"2020-12-12",
|
||||
],
|
||||
}
|
||||
)
|
||||
df["date"] = pd.to_datetime(df["date"])
|
||||
|
||||
resampled = df.groupby("cat").resample("Y", on="date")
|
||||
expected = resampled.sum()
|
||||
result = resampled.agg({"num": "sum"})
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_groupby_agg_listlike():
|
||||
# GH 42905
|
||||
ts = Timestamp("2021-02-28 00:00:00")
|
||||
df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
|
||||
resampled = df.groupby("class").resample("M")["value"]
|
||||
result = resampled.agg(["sum", "size"])
|
||||
expected = DataFrame(
|
||||
[[69, 1]],
|
||||
index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
|
||||
columns=["sum", "size"],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
|
||||
def test_empty(keys):
|
||||
# GH 26411
|
||||
df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
|
||||
result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
|
||||
expected = DataFrame(columns=["a", "b"]).set_index(keys, drop=False)
|
||||
if len(keys) == 1:
|
||||
expected.index.name = keys[0]
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("consolidate", [True, False])
|
||||
def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
|
||||
# https://github.com/pandas-dev/pandas/issues/39329
|
||||
|
||||
dates = date_range("2020-01-01", periods=15, freq="D")
|
||||
df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
|
||||
df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
|
||||
df = pd.concat([df1, df2], ignore_index=True)
|
||||
if consolidate:
|
||||
df = df._consolidate()
|
||||
|
||||
result = df.groupby(["key"]).resample("W", on="date").min()
|
||||
idx = pd.MultiIndex.from_arrays(
|
||||
[
|
||||
["A"] * 3 + ["B"] * 3,
|
||||
pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2),
|
||||
],
|
||||
names=["key", "date"],
|
||||
)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"key": ["A"] * 3 + ["B"] * 3,
|
||||
"date": pd.to_datetime(["2020-01-01", "2020-01-06", "2020-01-13"] * 2),
|
||||
"col1": [0, 5, 12] * 2,
|
||||
"col_object": ["val"] * 3 + [np.nan] * 3,
|
||||
},
|
||||
index=idx,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,357 @@
|
||||
from datetime import datetime
|
||||
from operator import methodcaller
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.groupby.grouper import Grouper
|
||||
from pandas.core.indexes.datetimes import date_range
|
||||
|
||||
test_series = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000))
|
||||
|
||||
|
||||
def test_apply():
|
||||
grouper = Grouper(freq="A", label="right", closed="right")
|
||||
|
||||
grouped = test_series.groupby(grouper)
|
||||
|
||||
def f(x):
|
||||
return x.sort_values()[-3:]
|
||||
|
||||
applied = grouped.apply(f)
|
||||
expected = test_series.groupby(lambda x: x.year).apply(f)
|
||||
|
||||
applied.index = applied.index.droplevel(0)
|
||||
expected.index = expected.index.droplevel(0)
|
||||
tm.assert_series_equal(applied, expected)
|
||||
|
||||
|
||||
def test_count():
|
||||
test_series[::3] = np.nan
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).count()
|
||||
|
||||
grouper = Grouper(freq="A", label="right", closed="right")
|
||||
result = test_series.groupby(grouper).count()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = test_series.resample("A").count()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_numpy_reduction():
|
||||
result = test_series.resample("A", closed="right").prod()
|
||||
|
||||
expected = test_series.groupby(lambda x: x.year).agg(np.prod)
|
||||
expected.index = result.index
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_apply_iteration():
|
||||
# #2300
|
||||
N = 1000
|
||||
ind = date_range(start="2000-01-01", freq="D", periods=N)
|
||||
df = DataFrame({"open": 1, "close": 2}, index=ind)
|
||||
tg = Grouper(freq="M")
|
||||
|
||||
_, grouper, _ = tg._get_grouper(df)
|
||||
|
||||
# Errors
|
||||
grouped = df.groupby(grouper, group_keys=False)
|
||||
|
||||
def f(df):
|
||||
return df["close"] / df["open"]
|
||||
|
||||
# it works!
|
||||
result = grouped.apply(f)
|
||||
tm.assert_index_equal(result.index, df.index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"name, func",
|
||||
[
|
||||
("Int64Index", tm.makeIntIndex),
|
||||
("Index", tm.makeUnicodeIndex),
|
||||
("Float64Index", tm.makeFloatIndex),
|
||||
("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)),
|
||||
],
|
||||
)
|
||||
def test_fails_on_no_datetime_index(name, func):
|
||||
n = 2
|
||||
index = func(n)
|
||||
df = DataFrame({"a": np.random.randn(n)}, index=index)
|
||||
|
||||
msg = (
|
||||
"Only valid with DatetimeIndex, TimedeltaIndex "
|
||||
f"or PeriodIndex, but got an instance of '{name}'"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.groupby(Grouper(freq="D"))
|
||||
|
||||
|
||||
def test_aaa_group_order():
|
||||
# GH 12840
|
||||
# check TimeGrouper perform stable sorts
|
||||
n = 20
|
||||
data = np.random.randn(n, 4)
|
||||
df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
grouped = df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5])
|
||||
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
|
||||
|
||||
|
||||
def test_aggregate_normal(resample_method):
|
||||
"""Check TimeGrouper's aggregation is identical as normal groupby."""
|
||||
|
||||
data = np.random.randn(20, 4)
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, 3, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
datetime(2013, 1, 3),
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
expected = getattr(normal_grouped, resample_method)()
|
||||
dt_result = getattr(dt_grouped, resample_method)()
|
||||
expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
tm.assert_equal(expected, dt_result)
|
||||
|
||||
# if TimeGrouper is used included, 'nth' doesn't work yet
|
||||
|
||||
"""
|
||||
for func in ['nth']:
|
||||
expected = getattr(normal_grouped, func)(3)
|
||||
expected.index = date_range(start='2013-01-01',
|
||||
freq='D', periods=5, name='key')
|
||||
dt_result = getattr(dt_grouped, func)(3)
|
||||
tm.assert_frame_equal(expected, dt_result)
|
||||
"""
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, unit",
|
||||
[
|
||||
("sum", {}, 0),
|
||||
("sum", {"min_count": 0}, 0),
|
||||
("sum", {"min_count": 1}, np.nan),
|
||||
("prod", {}, 1),
|
||||
("prod", {"min_count": 0}, 1),
|
||||
("prod", {"min_count": 1}, np.nan),
|
||||
],
|
||||
)
|
||||
def test_resample_entirely_nat_window(method, method_args, unit):
|
||||
s = Series([0] * 2 + [np.nan] * 2, index=date_range("2017", periods=4))
|
||||
result = methodcaller(method, **method_args)(s.resample("2d"))
|
||||
expected = Series(
|
||||
[0.0, unit], index=pd.DatetimeIndex(["2017-01-01", "2017-01-03"], freq="2D")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, fill_value",
|
||||
[("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)],
|
||||
)
|
||||
def test_aggregate_with_nat(func, fill_value):
|
||||
# check TimeGrouper's aggregation is identical as normal groupby
|
||||
# if NaT is included, 'var', 'std', 'mean', 'first','last'
|
||||
# and 'nth' doesn't work yet
|
||||
|
||||
n = 20
|
||||
data = np.random.randn(n, 4).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = getattr(normal_grouped, func)()
|
||||
dt_result = getattr(dt_grouped, func)()
|
||||
|
||||
pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"])
|
||||
expected = pd.concat([normal_result, pad])
|
||||
expected = expected.sort_index()
|
||||
dti = date_range(start="2013-01-01", freq="D", periods=5, name="key")
|
||||
expected.index = dti._with_freq(None) # TODO: is this desired?
|
||||
tm.assert_frame_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_aggregate_with_nat_size():
|
||||
# GH 9925
|
||||
n = 20
|
||||
data = np.random.randn(n, 4).astype("int64")
|
||||
normal_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
normal_df["key"] = [1, 2, np.nan, 4, 5] * 4
|
||||
|
||||
dt_df = DataFrame(data, columns=["A", "B", "C", "D"])
|
||||
dt_df["key"] = [
|
||||
datetime(2013, 1, 1),
|
||||
datetime(2013, 1, 2),
|
||||
pd.NaT,
|
||||
datetime(2013, 1, 4),
|
||||
datetime(2013, 1, 5),
|
||||
] * 4
|
||||
|
||||
normal_grouped = normal_df.groupby("key")
|
||||
dt_grouped = dt_df.groupby(Grouper(key="key", freq="D"))
|
||||
|
||||
normal_result = normal_grouped.size()
|
||||
dt_result = dt_grouped.size()
|
||||
|
||||
pad = Series([0], index=[3])
|
||||
expected = pd.concat([normal_result, pad])
|
||||
expected = expected.sort_index()
|
||||
expected.index = date_range(
|
||||
start="2013-01-01", freq="D", periods=5, name="key"
|
||||
)._with_freq(None)
|
||||
tm.assert_series_equal(expected, dt_result)
|
||||
assert dt_result.index.name == "key"
|
||||
|
||||
|
||||
def test_repr():
|
||||
# GH18203
|
||||
result = repr(Grouper(key="A", freq="H"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', origin='start_day')"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
result = repr(Grouper(key="A", freq="H", origin="2000-01-01"))
|
||||
expected = (
|
||||
"TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
|
||||
"closed='left', label='left', how='mean', "
|
||||
"convention='e', origin=Timestamp('2000-01-01 00:00:00'))"
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, method_args, expected_values",
|
||||
[
|
||||
("sum", {}, [1, 0, 1]),
|
||||
("sum", {"min_count": 0}, [1, 0, 1]),
|
||||
("sum", {"min_count": 1}, [1, np.nan, 1]),
|
||||
("sum", {"min_count": 2}, [np.nan, np.nan, np.nan]),
|
||||
("prod", {}, [1, 1, 1]),
|
||||
("prod", {"min_count": 0}, [1, 1, 1]),
|
||||
("prod", {"min_count": 1}, [1, np.nan, 1]),
|
||||
("prod", {"min_count": 2}, [np.nan, np.nan, np.nan]),
|
||||
],
|
||||
)
|
||||
def test_upsample_sum(method, method_args, expected_values):
|
||||
s = Series(1, index=date_range("2017", periods=2, freq="H"))
|
||||
resampled = s.resample("30T")
|
||||
index = pd.DatetimeIndex(
|
||||
["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"],
|
||||
freq="30T",
|
||||
)
|
||||
result = methodcaller(method, **method_args)(resampled)
|
||||
expected = Series(expected_values, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_groupby_resample_interpolate():
|
||||
# GH 35325
|
||||
d = {"price": [10, 11, 9], "volume": [50, 60, 50]}
|
||||
|
||||
df = DataFrame(d)
|
||||
|
||||
df["week_starting"] = date_range("01/01/2018", periods=3, freq="W")
|
||||
|
||||
result = (
|
||||
df.set_index("week_starting")
|
||||
.groupby("volume")
|
||||
.resample("1D")
|
||||
.interpolate(method="linear")
|
||||
)
|
||||
|
||||
msg = "containing strings is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected_ind = pd.MultiIndex.from_tuples(
|
||||
[
|
||||
(50, "2018-01-07"),
|
||||
(50, Timestamp("2018-01-08")),
|
||||
(50, Timestamp("2018-01-09")),
|
||||
(50, Timestamp("2018-01-10")),
|
||||
(50, Timestamp("2018-01-11")),
|
||||
(50, Timestamp("2018-01-12")),
|
||||
(50, Timestamp("2018-01-13")),
|
||||
(50, Timestamp("2018-01-14")),
|
||||
(50, Timestamp("2018-01-15")),
|
||||
(50, Timestamp("2018-01-16")),
|
||||
(50, Timestamp("2018-01-17")),
|
||||
(50, Timestamp("2018-01-18")),
|
||||
(50, Timestamp("2018-01-19")),
|
||||
(50, Timestamp("2018-01-20")),
|
||||
(50, Timestamp("2018-01-21")),
|
||||
(60, Timestamp("2018-01-14")),
|
||||
],
|
||||
names=["volume", "week_starting"],
|
||||
)
|
||||
|
||||
expected = DataFrame(
|
||||
data={
|
||||
"price": [
|
||||
10.0,
|
||||
9.928571428571429,
|
||||
9.857142857142858,
|
||||
9.785714285714286,
|
||||
9.714285714285714,
|
||||
9.642857142857142,
|
||||
9.571428571428571,
|
||||
9.5,
|
||||
9.428571428571429,
|
||||
9.357142857142858,
|
||||
9.285714285714286,
|
||||
9.214285714285714,
|
||||
9.142857142857142,
|
||||
9.071428571428571,
|
||||
9.0,
|
||||
11.0,
|
||||
],
|
||||
"volume": [50.0] * 15 + [60],
|
||||
},
|
||||
index=expected_ind,
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@@ -0,0 +1,193 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.timedeltas import timedelta_range
|
||||
|
||||
|
||||
def test_asfreq_bug():
|
||||
df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
|
||||
result = df.resample("1T").asfreq()
|
||||
expected = DataFrame(
|
||||
data=[1, np.nan, np.nan, 3],
|
||||
index=timedelta_range("0 day", periods=4, freq="1T"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_nat():
|
||||
# GH 13223
|
||||
index = pd.to_timedelta(["0s", pd.NaT, "2s"])
|
||||
result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
|
||||
expected = DataFrame(
|
||||
{"value": [2.5, np.nan, 5.0]},
|
||||
index=timedelta_range("0 day", periods=3, freq="1S"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_as_freq_with_subperiod():
|
||||
# GH 13022
|
||||
index = timedelta_range("00:00:00", "00:10:00", freq="5T")
|
||||
df = DataFrame(data={"value": [1, 5, 10]}, index=index)
|
||||
result = df.resample("2T").asfreq()
|
||||
expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
|
||||
expected = DataFrame(
|
||||
data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2T")
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_with_timedeltas():
|
||||
|
||||
expected = DataFrame({"A": np.arange(1480)})
|
||||
expected = expected.groupby(expected.index // 30).sum()
|
||||
expected.index = timedelta_range("0 days", freq="30T", periods=50)
|
||||
|
||||
df = DataFrame(
|
||||
{"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="T")
|
||||
)
|
||||
result = df.resample("30T").sum()
|
||||
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
s = df["A"]
|
||||
result = s.resample("30T").sum()
|
||||
tm.assert_series_equal(result, expected["A"])
|
||||
|
||||
|
||||
def test_resample_single_period_timedelta():
|
||||
|
||||
s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
|
||||
result = s.resample("2s").sum()
|
||||
expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_idempotency():
|
||||
|
||||
# GH 12072
|
||||
index = timedelta_range("0", periods=9, freq="10L")
|
||||
series = Series(range(9), index=index)
|
||||
result = series.resample("10L").mean()
|
||||
expected = series.astype(float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_offset_with_timedeltaindex():
|
||||
# GH 10530 & 31809
|
||||
rng = timedelta_range(start="0s", periods=25, freq="s")
|
||||
ts = Series(np.random.randn(len(rng)), index=rng)
|
||||
|
||||
with_base = ts.resample("2s", offset="5s").mean()
|
||||
without_base = ts.resample("2s").mean()
|
||||
|
||||
exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
|
||||
exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
|
||||
|
||||
tm.assert_index_equal(without_base.index, exp_without_base)
|
||||
tm.assert_index_equal(with_base.index, exp_with_base)
|
||||
|
||||
|
||||
def test_resample_categorical_data_with_timedeltaindex():
|
||||
# GH #12169
|
||||
df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
|
||||
df["Group"] = df["Group_obj"].astype("category")
|
||||
result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
|
||||
expected = DataFrame(
|
||||
{"Group_obj": ["A", "A"], "Group": ["A", "A"]},
|
||||
index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s"),
|
||||
)
|
||||
expected = expected.reindex(["Group_obj", "Group"], axis=1)
|
||||
expected["Group"] = expected["Group_obj"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_timedelta_values():
|
||||
# GH 13119
|
||||
# check that timedelta dtype is preserved when NaT values are
|
||||
# introduced by the resampling
|
||||
|
||||
times = timedelta_range("1 day", "6 day", freq="4D")
|
||||
df = DataFrame({"time": times}, index=times)
|
||||
|
||||
times2 = timedelta_range("1 day", "6 day", freq="2D")
|
||||
exp = Series(times2, index=times2, name="time")
|
||||
exp.iloc[1] = pd.NaT
|
||||
|
||||
res = df.resample("2D").first()["time"]
|
||||
tm.assert_series_equal(res, exp)
|
||||
res = df["time"].resample("2D").first()
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq, resample_freq",
|
||||
[
|
||||
("8H", "21h59min50s", "10S", "3H"), # GH 30353 example
|
||||
("3H", "22H", "1H", "5H"),
|
||||
("527D", "5006D", "3D", "10D"),
|
||||
("1D", "10D", "1D", "2D"), # GH 13022 example
|
||||
# tests that worked before GH 33498:
|
||||
("8H", "21h59min50s", "10S", "2H"),
|
||||
("0H", "21h59min50s", "10S", "3H"),
|
||||
("10D", "85D", "D", "2D"),
|
||||
],
|
||||
)
|
||||
def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
|
||||
# GH 33498
|
||||
# check that the timedelta bins does not contains an extra bin
|
||||
idx = timedelta_range(start=start, end=end, freq=freq)
|
||||
s = Series(np.arange(len(idx)), index=idx)
|
||||
result = s.resample(resample_freq).min()
|
||||
expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
|
||||
tm.assert_index_equal(result.index, expected_index)
|
||||
assert result.index.freq == expected_index.freq
|
||||
assert not np.isnan(result[-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("duplicates", [True, False])
|
||||
def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
|
||||
# GH 10603
|
||||
df = DataFrame(
|
||||
np.random.normal(size=(10000, 4)),
|
||||
index=timedelta_range(start="0s", periods=10000, freq="3906250n"),
|
||||
)
|
||||
if duplicates:
|
||||
# case with non-unique columns
|
||||
df.columns = ["A", "B", "A", "C"]
|
||||
|
||||
result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
|
||||
|
||||
expected = DataFrame(
|
||||
[[768] * 4] * 12 + [[528] * 4],
|
||||
index=timedelta_range(start="1s", periods=13, freq="3s"),
|
||||
)
|
||||
expected.columns = df.columns
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_resample_quantile_timedelta():
|
||||
# GH: 29485
|
||||
df = DataFrame(
|
||||
{"value": pd.to_timedelta(np.arange(4), unit="s")},
|
||||
index=pd.date_range("20200101", periods=4, tz="UTC"),
|
||||
)
|
||||
result = df.resample("2D").quantile(0.99)
|
||||
expected = DataFrame(
|
||||
{
|
||||
"value": [
|
||||
pd.Timedelta("0 days 00:00:00.990000"),
|
||||
pd.Timedelta("0 days 00:00:02.990000"),
|
||||
]
|
||||
},
|
||||
index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
Reference in New Issue
Block a user