From 9b375be5aa3610e8a21ef0b5b81e4db04270f3d3 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 27 Jul 2024 17:14:00 +0200
Subject: [PATCH] TST (string dtype): clean-up xpasssing tests with future
 string dtype (#59323)

---
 pandas/tests/arithmetic/test_object.py               |  3 ---
 pandas/tests/base/test_unique.py                     |  5 +----
 pandas/tests/frame/constructors/test_from_dict.py    |  3 ++-
 pandas/tests/frame/constructors/test_from_records.py |  4 +---
 pandas/tests/frame/methods/test_fillna.py            |  2 ++
 pandas/tests/frame/methods/test_info.py              | 11 ++++++-----
 pandas/tests/frame/methods/test_interpolate.py       |  1 +
 pandas/tests/frame/test_arithmetic.py                |  3 ---
 pandas/tests/indexes/interval/test_formats.py        |  7 ++-----
 pandas/tests/indexing/test_coercion.py               |  4 ----
 pandas/tests/indexing/test_indexing.py               |  4 ----
 pandas/tests/series/methods/test_reindex.py          |  3 ---
 pandas/tests/series/methods/test_replace.py          |  1 -
 pandas/tests/series/test_formats.py                  |  2 +-
 14 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py
index 884e6e002800e..4b5156d0007bb 100644
--- a/pandas/tests/arithmetic/test_object.py
+++ b/pandas/tests/arithmetic/test_object.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -303,7 +301,6 @@ def test_iadd_string(self):
         index += "_x"
         assert "a_x" in index
 
-    @pytest.mark.xfail(using_string_dtype(), reason="add doesn't work")
     def test_add(self):
         index = pd.Index([str(i) for i in range(10)])
         expected = pd.Index(index.values * 2)
diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py
index 42730519b32fd..7f094db6ea524 100644
--- a/pandas/tests/base/test_unique.py
+++ b/pandas/tests/base/test_unique.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.tests.base.common import allow_na_ops
@@ -100,12 +98,11 @@ def test_nunique_null(null_obj, index_or_series_obj):
 
 
 @pytest.mark.single_cpu
-@pytest.mark.xfail(using_string_dtype(), reason="decoding fails")
 def test_unique_bad_unicode(index_or_series):
     # regression test for #34550
     uval = "\ud83d"  # smiley emoji
 
-    obj = index_or_series([uval] * 2)
+    obj = index_or_series([uval] * 2, dtype=object)
     result = obj.unique()
 
     if isinstance(obj, pd.Index):
diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py
index 4237e796e052e..fc7c03dc25839 100644
--- a/pandas/tests/frame/constructors/test_from_dict.py
+++ b/pandas/tests/frame/constructors/test_from_dict.py
@@ -44,7 +44,7 @@ def test_constructor_single_row(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.skipif(using_string_dtype(), reason="columns inferring logic broken")
+    @pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken")
     def test_constructor_list_of_series(self):
         data = [
             OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
@@ -108,6 +108,7 @@ def test_constructor_list_of_series(self):
         expected = DataFrame.from_dict(sdict, orient="index")
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="columns inferring logic broken")
     def test_constructor_orient(self, float_string_frame):
         data_dict = float_string_frame.T._series
         recons = DataFrame.from_dict(data_dict, orient="index")
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index ed3f9ac611405..abc3aab1c1492 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -57,9 +57,7 @@ def test_from_records_with_datetimes(self):
         expected["EXPIRY"] = expected["EXPIRY"].astype("M8[s]")
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.skipif(
-        using_string_dtype(), reason="dtype checking logic doesn't work"
-    )
+    @pytest.mark.xfail(using_string_dtype(), reason="dtype checking logic doesn't work")
     def test_from_records_sequencelike(self):
         df = DataFrame(
             {
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 5d18b5ed1f7cd..b72cac6f3f9a1 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -65,6 +65,7 @@ def test_fillna_datetime(self, datetime_frame):
         with pytest.raises(TypeError, match=msg):
             datetime_frame.fillna()
 
+    # TODO(infer_string) test as actual error instead of xfail
     @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
     def test_fillna_mixed_type(self, float_string_frame):
         mf = float_string_frame
@@ -537,6 +538,7 @@ def test_fillna_col_reordering(self):
         filled = df.ffill()
         assert df.columns.tolist() == filled.columns.tolist()
 
+    # TODO(infer_string) test as actual error instead of xfail
     @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
     def test_fill_corner(self, float_frame, float_string_frame):
         mf = float_string_frame
diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py
index 4e3726f4dc51d..17cb989626e70 100644
--- a/pandas/tests/frame/methods/test_info.py
+++ b/pandas/tests/frame/methods/test_info.py
@@ -15,6 +15,7 @@
 from pandas import (
     CategoricalIndex,
     DataFrame,
+    Index,
     MultiIndex,
     Series,
     date_range,
@@ -360,7 +361,7 @@ def test_info_memory_usage():
     df = DataFrame(data)
     df.columns = dtypes
 
-    df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
+    df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
     df_with_object_index.info(buf=buf, memory_usage=True)
     res = buf.getvalue().splitlines()
     assert re.match(r"memory usage: [^+]+\+", res[-1])
@@ -398,25 +399,25 @@ def test_info_memory_usage():
 
 @pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
 def test_info_memory_usage_deep_not_pypy():
-    df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
+    df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
     assert (
         df_with_object_index.memory_usage(index=True, deep=True).sum()
         > df_with_object_index.memory_usage(index=True).sum()
     )
 
-    df_object = DataFrame({"a": ["a"]})
+    df_object = DataFrame({"a": Series(["a"], dtype=object)})
     assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()
 
 
 @pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
 def test_info_memory_usage_deep_pypy():
-    df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
+    df_with_object_index = DataFrame({"a": [1]}, index=Index(["foo"], dtype=object))
     assert (
         df_with_object_index.memory_usage(index=True, deep=True).sum()
         == df_with_object_index.memory_usage(index=True).sum()
     )
 
-    df_object = DataFrame({"a": ["a"]})
+    df_object = DataFrame({"a": Series(["a"], dtype=object)})
     assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum()
 
 
diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py
index 7b206cc67d40d..b8a34d5eaa226 100644
--- a/pandas/tests/frame/methods/test_interpolate.py
+++ b/pandas/tests/frame/methods/test_interpolate.py
@@ -64,6 +64,7 @@ def test_interpolate_inplace(self, frame_or_series, request):
         assert np.shares_memory(orig, obj.values)
         assert orig.squeeze()[1] == 1.5
 
+    # TODO(infer_string) raise proper TypeError in case of string dtype
     @pytest.mark.xfail(
         using_string_dtype(), reason="interpolate doesn't work for string"
     )
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index d42d1d0316892..3971e58e8235e 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -11,8 +11,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -251,7 +249,6 @@ def test_timestamp_compare(self, left, right):
             with pytest.raises(TypeError, match=msg):
                 right_f(pd.Timestamp("nat"), df)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't compare string and int")
     def test_mixed_comparison(self):
         # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
         # not raise TypeError
diff --git a/pandas/tests/indexes/interval/test_formats.py b/pandas/tests/indexes/interval/test_formats.py
index d20611a61b154..f858ae137ca4e 100644
--- a/pandas/tests/indexes/interval/test_formats.py
+++ b/pandas/tests/indexes/interval/test_formats.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -42,12 +40,11 @@ def test_repr_missing(self, constructor, expected, using_infer_string, request):
         result = repr(obj)
         assert result == expected
 
-    @pytest.mark.xfail(using_string_dtype(), reason="repr different")
     def test_repr_floats(self):
         # GH 32553
 
         markers = Series(
-            ["foo", "bar"],
+            [1, 2],
             index=IntervalIndex(
                 [
                     Interval(left, right)
@@ -59,7 +56,7 @@ def test_repr_floats(self):
             ),
         )
         result = str(markers)
-        expected = "(329.973, 345.137]    foo\n(345.137, 360.191]    bar\ndtype: object"
+        expected = "(329.973, 345.137]    1\n(345.137, 360.191]    2\ndtype: int64"
         assert result == expected
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index f889fb0686f1d..d5002a47c3447 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -9,8 +9,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.compat import (
     IS64,
     is_platform_windows,
@@ -825,8 +823,6 @@ def replacer(self, how, from_key, to_key):
             raise ValueError
         return replacer
 
-    # Expected needs adjustment for the infer string option, seems to work as expecetd
-    @pytest.mark.skipif(using_string_dtype(), reason="TODO: test is to complex")
     def test_replace_series(self, how, to_key, from_key, replacer):
         index = pd.Index([3, 4], name="xxx")
         obj = pd.Series(self.rep[from_key], index=index, name="yyy")
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
index 67b3445e413f0..e8d16f8240db6 100644
--- a/pandas/tests/indexing/test_indexing.py
+++ b/pandas/tests/indexing/test_indexing.py
@@ -8,8 +8,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas.errors import IndexingError
 
 from pandas.core.dtypes.common import (
@@ -426,7 +424,6 @@ def test_set_index_nan(self):
         )
         tm.assert_frame_equal(result, df)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't multiply arrow strings")
     def test_multi_assign(self):
         # GH 3626, an assignment of a sub-df to a df
         # set float64 to avoid upcast when setting nan
@@ -652,7 +649,6 @@ def test_loc_setitem_fullindex_views(self):
         df.loc[df.index] = df.loc[df.index]
         tm.assert_frame_equal(df, df2)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't set int into string")
     def test_rhs_alignment(self):
         # GH8258, tests that both rows & columns are aligned to what is
         # assigned to. covers both uniform data-type & multi-type cases
diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
index 8901330108cb1..068446a5e216b 100644
--- a/pandas/tests/series/methods/test_reindex.py
+++ b/pandas/tests/series/methods/test_reindex.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas import (
     NA,
     Categorical,
@@ -22,7 +20,6 @@
 import pandas._testing as tm
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="share memory doesn't work for arrow")
 def test_reindex(datetime_series, string_series):
     identity = string_series.reindex(string_series.index)
 
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index 0df5b5a5d0108..97151784eb94c 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -359,7 +359,6 @@ def test_replace_mixed_types_with_string(self):
         expected = pd.Series([1, np.nan, 3, np.nan, 4, 5], dtype=object)
         tm.assert_series_equal(expected, result)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
     @pytest.mark.parametrize(
         "categorical, numeric",
         [
diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py
index 78be4843f7a4d..1d95fbf8dccb8 100644
--- a/pandas/tests/series/test_formats.py
+++ b/pandas/tests/series/test_formats.py
@@ -144,7 +144,7 @@ def test_tidy_repr_name_0(self, arg):
         assert "Name: 0" in rep_str
 
     @pytest.mark.xfail(
-        using_string_dtype(), reason="TODO: investigate why this is failing"
+        using_string_dtype(), reason="TODO(infer_string): investigate failure"
     )
     def test_newline(self):
         ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])