From 9b375be5aa3610e8a21ef0b5b81e4db04270f3d3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 27 Jul 2024 17:14:00 +0200 Subject: [PATCH] TST (string dtype): clean-up xpasssing tests with future string dtype (#59323) --- pandas/tests/arithmetic/test_object.py | 3 --- pandas/tests/base/test_unique.py | 5 +---- pandas/tests/frame/constructors/test_from_dict.py | 3 ++- pandas/tests/frame/constructors/test_from_records.py | 4 +--- pandas/tests/frame/methods/test_fillna.py | 2 ++ pandas/tests/frame/methods/test_info.py | 11 ++++++----- pandas/tests/frame/methods/test_interpolate.py | 1 + pandas/tests/frame/test_arithmetic.py | 3 --- pandas/tests/indexes/interval/test_formats.py | 7 ++----- pandas/tests/indexing/test_coercion.py | 4 ---- pandas/tests/indexing/test_indexing.py | 4 ---- pandas/tests/series/methods/test_reindex.py | 3 --- pandas/tests/series/methods/test_replace.py | 1 - pandas/tests/series/test_formats.py | 2 +- 14 files changed, 16 insertions(+), 37 deletions(-) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index 884e6e002800e..4b5156d0007bb 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas.util._test_decorators as td import pandas as pd @@ -303,7 +301,6 @@ def test_iadd_string(self): index += "_x" assert "a_x" in index - @pytest.mark.xfail(using_string_dtype(), reason="add doesn't work") def test_add(self): index = pd.Index([str(i) for i in range(10)]) expected = pd.Index(index.values * 2) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 42730519b32fd..7f094db6ea524 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd import pandas._testing as tm from pandas.tests.base.common import allow_na_ops @@ -100,12 +98,11 @@ def test_nunique_null(null_obj, index_or_series_obj): @pytest.mark.single_cpu -@pytest.mark.xfail(using_string_dtype(), reason="decoding fails") def test_unique_bad_unicode(index_or_series): # regression test for #34550 uval = "\ud83d" # smiley emoji - obj = index_or_series([uval] * 2) + obj = index_or_series([uval] * 2, dtype=object) result = obj.unique() if isinstance(obj, pd.Index): diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 4237e796e052e..fc7c03dc25839 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -44,7 +44,7 @@ def test_constructor_single_row(self): ) tm.assert_frame_equal(result, expected) - @pytest.mark.skipif(using_string_dtype(), reason="columns inferring logic broken") + @pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken") def test_constructor_list_of_series(self): data = [ OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]), @@ -108,6 +108,7 @@ def test_constructor_list_of_series(self): expected = DataFrame.from_dict(sdict, orient="index") tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken") def test_constructor_orient(self, float_string_frame): data_dict = float_string_frame.T._series recons = DataFrame.from_dict(data_dict, orient="index") diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index ed3f9ac611405..abc3aab1c1492 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -57,9 +57,7 @@ def test_from_records_with_datetimes(self): expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]") tm.assert_frame_equal(result, expected) - @pytest.mark.skipif( - using_string_dtype(), reason="dtype checking logic doesn't work" - ) + @pytest.mark.xfail(using_string_dtype(), reason="dtype checking logic doesn't work") def test_from_records_sequencelike(self): df = DataFrame( { diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 5d18b5ed1f7cd..b72cac6f3f9a1 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -65,6 +65,7 @@ def test_fillna_datetime(self, datetime_frame): with pytest.raises(TypeError, match=msg): datetime_frame.fillna() + # TODO(infer_string) test as actual error instead of xfail @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string") def test_fillna_mixed_type(self, float_string_frame): mf = float_string_frame @@ -537,6 +538,7 @@ def test_fillna_col_reordering(self): filled = df.ffill() assert df.columns.tolist() == filled.columns.tolist() + # TODO(infer_string) test as actual error instead of xfail @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string") def test_fill_corner(self, float_frame, float_string_frame): mf = float_string_frame diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index 4e3726f4dc51d..17cb989626e70 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -15,6 +15,7 @@ from pandas import ( CategoricalIndex, DataFrame, + Index, MultiIndex, Series, date_range, @@ -360,7 +361,7 @@ def test_info_memory_usage(): df = DataFrame(data) df.columns = dtypes - df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object)) df_with_object_index.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() assert re.match(r"memory usage: [^+]+\+", res[-1]) @@ -398,25 +399,25 @@ def test_info_memory_usage(): @pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result") def test_info_memory_usage_deep_not_pypy(): - df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object)) assert ( df_with_object_index.memory_usage(index=True, deep=True).sum() > df_with_object_index.memory_usage(index=True).sum() ) - df_object = DataFrame({"a": ["a"]}) + df_object = DataFrame({"a": Series(["a"], dtype=object)}) assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum() @pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result") def test_info_memory_usage_deep_pypy(): - df_with_object_index = DataFrame({"a": [1]}, index=["foo"]) + df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object)) assert ( df_with_object_index.memory_usage(index=True, deep=True).sum() == df_with_object_index.memory_usage(index=True).sum() ) - df_object = DataFrame({"a": ["a"]}) + df_object = DataFrame({"a": Series(["a"], dtype=object)}) assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum() diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 7b206cc67d40d..b8a34d5eaa226 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -64,6 +64,7 @@ def test_interpolate_inplace(self, frame_or_series, request): assert np.shares_memory(orig, obj.values) assert orig.squeeze()[1] == 1.5 + # TODO(infer_string) raise proper TypeError in case of string dtype @pytest.mark.xfail( using_string_dtype(), reason="interpolate doesn't work for string" ) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d42d1d0316892..3971e58e8235e 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -11,8 +11,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -251,7 +249,6 @@ def test_timestamp_compare(self, left, right): with pytest.raises(TypeError, match=msg): right_f(pd.Timestamp("nat"), df) - @pytest.mark.xfail(using_string_dtype(), reason="can't compare string and int") def test_mixed_comparison(self): # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, # not raise TypeError diff --git a/pandas/tests/indexes/interval/test_formats.py b/pandas/tests/indexes/interval/test_formats.py index d20611a61b154..f858ae137ca4e 100644 --- a/pandas/tests/indexes/interval/test_formats.py +++ b/pandas/tests/indexes/interval/test_formats.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( DataFrame, DatetimeIndex, @@ -42,12 +40,11 @@ def test_repr_missing(self, constructor, expected, using_infer_string, request): result = repr(obj) assert result == expected - @pytest.mark.xfail(using_string_dtype(), reason="repr different") def test_repr_floats(self): # GH 32553 markers = Series( - ["foo", "bar"], + [1, 2], index=IntervalIndex( [ Interval(left, right) @@ -59,7 +56,7 @@ def test_repr_floats(self): ), ) result = str(markers) - expected = "(329.973, 345.137] foo\n(345.137, 360.191] bar\ndtype: object" + expected = "(329.973, 345.137] 1\n(345.137, 360.191] 2\ndtype: int64" assert result == expected @pytest.mark.parametrize( diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index f889fb0686f1d..d5002a47c3447 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -9,8 +9,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.compat import ( IS64, is_platform_windows, @@ -825,8 +823,6 @@ def replacer(self, how, from_key, to_key): raise ValueError return replacer - # Expected needs adjustment for the infer string option, seems to work as expecetd - @pytest.mark.skipif(using_string_dtype(), reason="TODO: test is to complex") def test_replace_series(self, how, to_key, from_key, replacer): index = pd.Index([3, 4], name="xxx") obj = pd.Series(self.rep[from_key], index=index, name="yyy") diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 67b3445e413f0..e8d16f8240db6 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas.errors import IndexingError from pandas.core.dtypes.common import ( @@ -426,7 +424,6 @@ def test_set_index_nan(self): ) tm.assert_frame_equal(result, df) - @pytest.mark.xfail(using_string_dtype(), reason="can't multiply arrow strings") def test_multi_assign(self): # GH 3626, an assignment of a sub-df to a df # set float64 to avoid upcast when setting nan @@ -652,7 +649,6 @@ def test_loc_setitem_fullindex_views(self): df.loc[df.index] = df.loc[df.index] tm.assert_frame_equal(df, df2) - @pytest.mark.xfail(using_string_dtype(), reason="can't set int into string") def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is # assigned to. covers both uniform data-type & multi-type cases diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 8901330108cb1..068446a5e216b 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( NA, Categorical, @@ -22,7 +20,6 @@ import pandas._testing as tm -@pytest.mark.xfail(using_string_dtype(), reason="share memory doesn't work for arrow") def test_reindex(datetime_series, string_series): identity = string_series.reindex(string_series.index) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 0df5b5a5d0108..97151784eb94c 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -359,7 +359,6 @@ def test_replace_mixed_types_with_string(self): expected = pd.Series([1, np.nan, 3, np.nan, 4, 5], dtype=object) tm.assert_series_equal(expected, result) - @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string") @pytest.mark.parametrize( "categorical, numeric", [ diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py index 78be4843f7a4d..1d95fbf8dccb8 100644 --- a/pandas/tests/series/test_formats.py +++ b/pandas/tests/series/test_formats.py @@ -144,7 +144,7 @@ def test_tidy_repr_name_0(self, arg): assert "Name: 0" in rep_str @pytest.mark.xfail( - using_string_dtype(), reason="TODO: investigate why this is failing" + using_string_dtype(), reason="TODO(infer_string): investigate failure" ) def test_newline(self): ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])