From 513343b3243e19baade71031025b3d9f6d838576 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Tue, 10 Mar 2026 00:24:51 +0000 Subject: [PATCH 1/2] chore: fix google3 import colabsqlviz tests --- bigframes/testing/__init__.py | 7 +- tests/system/small/bigquery/test_datetime.py | 12 +- tests/system/small/bigquery/test_geo.py | 24 +- tests/system/small/bigquery/test_sql.py | 32 +- tests/system/small/bigquery/test_struct.py | 2 +- tests/system/small/core/test_reshape.py | 2 +- tests/system/small/ml/test_metrics.py | 60 +-- tests/system/small/ml/test_utils.py | 8 +- tests/system/small/operations/test_dates.py | 10 +- .../small/operations/test_timedeltas.py | 46 +-- tests/system/small/test_dataframe.py | 340 +++++++++-------- tests/system/small/test_dataframe_io.py | 8 +- tests/system/small/test_groupby.py | 86 ++--- tests/system/small/test_multiindex.py | 134 +++---- tests/system/small/test_numpy.py | 18 +- tests/system/small/test_pandas.py | 38 +- tests/system/small/test_series.py | 344 ++++++++++-------- tests/system/small/test_session.py | 114 +++--- tests/system/small/test_window.py | 34 +- tests/unit/core/test_groupby.py | 24 +- 20 files changed, 720 insertions(+), 623 deletions(-) diff --git a/bigframes/testing/__init__.py b/bigframes/testing/__init__.py index 9c1fb7c283..098a67bddf 100644 --- a/bigframes/testing/__init__.py +++ b/bigframes/testing/__init__.py @@ -17,10 +17,5 @@ These modules are provided for testing the BigQuery DataFrames package. The interface is not considered stable. """ -from bigframes.testing.utils import ( - assert_frame_equal, - assert_index_equal, - assert_series_equal, -) -__all__ = ["assert_frame_equal", "assert_series_equal", "assert_index_equal"] +# Do not import modules contains pytest. (b/490160312) diff --git a/tests/system/small/bigquery/test_datetime.py b/tests/system/small/bigquery/test_datetime.py index 789ae47ae2..8db75cf366 100644 --- a/tests/system/small/bigquery/test_datetime.py +++ b/tests/system/small/bigquery/test_datetime.py @@ -41,7 +41,7 @@ def test_unix_seconds(scalars_dfs): .apply(lambda ts: _to_unix_epoch(ts, "s")) .astype("Int64") ) - bigframes.testing.assert_series_equal(actual_res, expected_res) + bigframes.testing.utils.assert_series_equal(actual_res, expected_res) def test_unix_seconds_after_type_casting(int_series): @@ -54,7 +54,7 @@ def test_unix_seconds_after_type_casting(int_series): .apply(lambda ts: _to_unix_epoch(ts, "s")) .astype("Int64") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_res, expected_res, check_index_type=False ) @@ -76,7 +76,7 @@ def test_unix_millis(scalars_dfs): .apply(lambda ts: _to_unix_epoch(ts, "ms")) .astype("Int64") ) - bigframes.testing.assert_series_equal(actual_res, expected_res) + bigframes.testing.utils.assert_series_equal(actual_res, expected_res) def test_unix_millis_after_type_casting(int_series): @@ -89,7 +89,7 @@ def test_unix_millis_after_type_casting(int_series): .apply(lambda ts: _to_unix_epoch(ts, "ms")) .astype("Int64") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_res, expected_res, check_index_type=False ) @@ -111,7 +111,7 @@ def test_unix_micros(scalars_dfs): .apply(lambda ts: _to_unix_epoch(ts, "us")) .astype("Int64") ) - bigframes.testing.assert_series_equal(actual_res, expected_res) + bigframes.testing.utils.assert_series_equal(actual_res, expected_res) def test_unix_micros_after_type_casting(int_series): @@ -124,7 +124,7 @@ def test_unix_micros_after_type_casting(int_series): .apply(lambda ts: _to_unix_epoch(ts, "us")) .astype("Int64") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_res, expected_res, check_index_type=False ) diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 24ecb7f639..66328ae9ad 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -57,7 +57,7 @@ def test_geo_st_area(session: bigframes.session.Session): geobf_s_result = bbq.st_area(geobf_s).to_pandas().round(-3) assert geobf_s_result.iloc[0] >= 1000 - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, geopd_s_result, check_dtype=False, @@ -110,7 +110,7 @@ def test_st_length_various_geometries(session): # Test default use_spheroid result_default = st_length(geoseries).to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( result_default, expected_lengths, rtol=1e-3, @@ -119,7 +119,7 @@ def test_st_length_various_geometries(session): # Test explicit use_spheroid=False result_explicit_false = st_length(geoseries, use_spheroid=False).to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( result_explicit_false, expected_lengths, rtol=1e-3, @@ -153,7 +153,7 @@ def test_geo_st_difference_with_geometry_objects(session: bigframes.session.Sess index=[0, 1, 2], dtype=geopandas.array.GeometryDtype(), ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, expected, check_index_type=False, @@ -192,7 +192,7 @@ def test_geo_st_difference_with_single_geometry_object( index=[0, 1, 2], dtype=geopandas.array.GeometryDtype(), ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, expected, check_index_type=False, @@ -218,7 +218,7 @@ def test_geo_st_difference_with_similar_geometry_objects( index=[0, 1, 2], dtype=geopandas.array.GeometryDtype(), ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, expected, check_index_type=False, @@ -274,7 +274,7 @@ def test_geo_st_distance_with_geometry_objects(session: bigframes.session.Sessio index=[0, 1, 2, 3], dtype="Float64", ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, expected, check_index_type=False, @@ -321,7 +321,7 @@ def test_geo_st_distance_with_single_geometry_object( ], dtype="Float64", ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, expected, check_index_type=False, @@ -356,7 +356,7 @@ def test_geo_st_intersection_with_geometry_objects(session: bigframes.session.Se index=[0, 1, 2], dtype=geopandas.array.GeometryDtype(), ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, expected, check_index_type=False, @@ -395,7 +395,7 @@ def test_geo_st_intersection_with_single_geometry_object( index=[0, 1, 2], dtype=geopandas.array.GeometryDtype(), ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, expected, check_index_type=False, @@ -425,7 +425,7 @@ def test_geo_st_intersection_with_similar_geometry_objects( index=[0, 1, 2], dtype=geopandas.array.GeometryDtype(), ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( geobf_s_result, expected, check_index_type=False, @@ -466,7 +466,7 @@ def test_geo_st_isclosed(session: bigframes.session.Session): ] expected_series = pd.Series(data=expected_data, dtype="boolean") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, expected_series, # We default to Int64 (nullable) dtype, but pandas defaults to int64 index. diff --git a/tests/system/small/bigquery/test_sql.py b/tests/system/small/bigquery/test_sql.py index fa43c24965..6bfcb75857 100644 --- a/tests/system/small/bigquery/test_sql.py +++ b/tests/system/small/bigquery/test_sql.py @@ -60,7 +60,9 @@ def test_sql_scalar_for_bool_series(scalars_df_index): result = bbq.sql_scalar("CAST({0} AS INT64)", [series]) expected = series.astype(dtypes.INT_DTYPE) expected.name = result.name - bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.utils.assert_series_equal( + result.to_pandas(), expected.to_pandas() + ) @pytest.mark.parametrize( @@ -84,7 +86,9 @@ def test_sql_scalar_outputs_all_scalar_types(scalars_df_index, column_name): result = bbq.sql_scalar("{0}", [series]) expected = series expected.name = result.name - bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.utils.assert_series_equal( + result.to_pandas(), expected.to_pandas() + ) def test_sql_scalar_for_array_series(repeated_df): @@ -114,14 +118,18 @@ def test_sql_scalar_for_array_series(repeated_df): + repeated_df["numeric_list_col"].list.len() + repeated_df["string_list_col"].list.len() ) - bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.utils.assert_series_equal( + result.to_pandas(), expected.to_pandas() + ) def test_sql_scalar_outputs_array_series(repeated_df): result = bbq.sql_scalar("{0}", [repeated_df["int_list_col"]]) expected = repeated_df["int_list_col"] expected.name = result.name - bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.utils.assert_series_equal( + result.to_pandas(), expected.to_pandas() + ) def test_sql_scalar_for_struct_series(nested_structs_df): @@ -132,14 +140,18 @@ def test_sql_scalar_for_struct_series(nested_structs_df): expected = nested_structs_df["person"].struct.field( "name" ).str.len() + nested_structs_df["person"].struct.field("age") - bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.utils.assert_series_equal( + result.to_pandas(), expected.to_pandas() + ) def test_sql_scalar_outputs_struct_series(nested_structs_df): result = bbq.sql_scalar("{0}", [nested_structs_df["person"]]) expected = nested_structs_df["person"] expected.name = result.name - bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.utils.assert_series_equal( + result.to_pandas(), expected.to_pandas() + ) def test_sql_scalar_for_json_series(json_df): @@ -151,11 +163,15 @@ def test_sql_scalar_for_json_series(json_df): ) expected = bbq.json_value(json_df["json_col"], "$.int_value") expected.name = result.name - bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.utils.assert_series_equal( + result.to_pandas(), expected.to_pandas() + ) def test_sql_scalar_outputs_json_series(json_df): result = bbq.sql_scalar("{0}", [json_df["json_col"]]) expected = json_df["json_col"] expected.name = result.name - bigframes.testing.assert_series_equal(result.to_pandas(), expected.to_pandas()) + bigframes.testing.utils.assert_series_equal( + result.to_pandas(), expected.to_pandas() + ) diff --git a/tests/system/small/bigquery/test_struct.py b/tests/system/small/bigquery/test_struct.py index 5e51a5fce0..5bcd208025 100644 --- a/tests/system/small/bigquery/test_struct.py +++ b/tests/system/small/bigquery/test_struct.py @@ -53,7 +53,7 @@ def test_struct_from_dataframe(columns_arg): srs = series.Series( columns_arg, ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( srs.to_pandas(), bbq.struct(srs.struct.explode()).to_pandas(), check_index_type=False, diff --git a/tests/system/small/core/test_reshape.py b/tests/system/small/core/test_reshape.py index 4d20ce887a..36ab79d5c4 100644 --- a/tests/system/small/core/test_reshape.py +++ b/tests/system/small/core/test_reshape.py @@ -56,7 +56,7 @@ def test_join_with_index( how=how, ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) diff --git a/tests/system/small/ml/test_metrics.py b/tests/system/small/ml/test_metrics.py index 5c59589816..46c3cf0a0e 100644 --- a/tests/system/small/ml/test_metrics.py +++ b/tests/system/small/ml/test_metrics.py @@ -162,7 +162,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session): pd_tpr = tpr.to_pandas() pd_thresholds = thresholds.to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( # skip testing the first value, as it is redundant and inconsistent across sklearn versions pd_thresholds[1:], pd.Series( @@ -172,7 +172,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session): ), check_index=False, ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_fpr, pd.Series( [0.0, 0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0], @@ -181,7 +181,7 @@ def test_roc_curve_binary_classification_prediction_returns_expected(session): ), check_index_type=False, ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_tpr, pd.Series( [ @@ -262,7 +262,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session): pd_tpr = tpr.to_pandas() pd_thresholds = thresholds.to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( # skip testing the first value, as it is redundant and inconsistent across sklearn versions pd_thresholds[1:], pd.Series( @@ -272,7 +272,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session): ), check_index=False, ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_fpr, pd.Series( [0.0, 0.0, 1.0], @@ -281,7 +281,7 @@ def test_roc_curve_binary_classification_decision_returns_expected(session): ), check_index_type=False, ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_tpr, pd.Series( [ @@ -354,7 +354,7 @@ def test_roc_curve_binary_classification_prediction_series(session): pd_tpr = tpr.to_pandas() pd_thresholds = thresholds.to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( # skip testing the first value, as it is redundant and inconsistent across sklearn versions pd_thresholds[1:], pd.Series( @@ -364,7 +364,7 @@ def test_roc_curve_binary_classification_prediction_series(session): ), check_index=False, ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_fpr, pd.Series( [0.0, 0.0, 0.0, 0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0], @@ -373,7 +373,7 @@ def test_roc_curve_binary_classification_prediction_series(session): ), check_index_type=False, ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_tpr, pd.Series( [ @@ -506,7 +506,7 @@ def test_confusion_matrix(session): 2: [0, 1, 2], } ).astype("int64") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -524,7 +524,7 @@ def test_confusion_matrix_column_index(session): {1: [1, 0, 1, 0], 2: [0, 0, 2, 0], 3: [0, 0, 0, 0], 4: [0, 1, 0, 1]}, index=[1, 2, 3, 4], ).astype("int64") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -543,7 +543,7 @@ def test_confusion_matrix_matches_sklearn(session): pd_df[["y_true"]], pd_df[["y_pred"]] ) expected_pd_df = pd.DataFrame(expected_confusion_matrix) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -565,7 +565,7 @@ def test_confusion_matrix_str_matches_sklearn(session): expected_confusion_matrix, index=["ant", "bird", "cat"] ) expected_pd_df.columns = pd.Index(["ant", "bird", "cat"]) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -586,7 +586,7 @@ def test_confusion_matrix_series(session): 2: [0, 1, 2], } ).astype("int64") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( confusion_matrix, expected_pd_df, check_index_type=False ) @@ -606,7 +606,7 @@ def test_recall_score(session): expected_index = [0, 1, 2] expected_recall = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( recall, expected_recall, check_index_type=False ) @@ -626,7 +626,7 @@ def test_recall_score_matches_sklearn(session): ) expected_index = [0, 1, 2] expected_recall = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( recall, expected_recall, check_index_type=False ) @@ -646,7 +646,7 @@ def test_recall_score_str_matches_sklearn(session): ) expected_index = ["ant", "bird", "cat"] expected_recall = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( recall, expected_recall, check_index_type=False ) @@ -664,7 +664,7 @@ def test_recall_score_series(session): expected_index = [0, 1, 2] expected_recall = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( recall, expected_recall, check_index_type=False ) @@ -684,7 +684,7 @@ def test_precision_score(session): expected_index = [0, 1, 2] expected_precision = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( precision_score, expected_precision, check_index_type=False ) @@ -707,7 +707,7 @@ def test_precision_score_matches_sklearn(session): ) expected_index = [0, 1, 2] expected_precision = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( precision_score, expected_precision, check_index_type=False ) @@ -729,7 +729,7 @@ def test_precision_score_str_matches_sklearn(session): ) expected_index = ["ant", "bird", "cat"] expected_precision = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( precision_score, expected_precision, check_index_type=False ) @@ -747,7 +747,7 @@ def test_precision_score_series(session): expected_index = [0, 1, 2] expected_precision = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( precision_score, expected_precision, check_index_type=False ) @@ -831,7 +831,9 @@ def test_f1_score(session): expected_index = [0, 1, 2] expected_f1 = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + f1_score, expected_f1, check_index_type=False + ) def test_f1_score_matches_sklearn(session): @@ -849,7 +851,9 @@ def test_f1_score_matches_sklearn(session): ) expected_index = [0, 1, 2] expected_f1 = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + f1_score, expected_f1, check_index_type=False + ) def test_f1_score_str_matches_sklearn(session): @@ -867,7 +871,9 @@ def test_f1_score_str_matches_sklearn(session): ) expected_index = ["ant", "bird", "cat"] expected_f1 = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + f1_score, expected_f1, check_index_type=False + ) def test_f1_score_series(session): @@ -883,7 +889,9 @@ def test_f1_score_series(session): expected_index = [0, 1, 2] expected_f1 = pd.Series(expected_values, index=expected_index) - bigframes.testing.assert_series_equal(f1_score, expected_f1, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + f1_score, expected_f1, check_index_type=False + ) def test_mean_squared_error(session: bigframes.Session): diff --git a/tests/system/small/ml/test_utils.py b/tests/system/small/ml/test_utils.py index 8d75754900..4d48569032 100644 --- a/tests/system/small/ml/test_utils.py +++ b/tests/system/small/ml/test_utils.py @@ -31,7 +31,7 @@ def test_convert_to_dataframe(session, data): (actual_result,) = utils.batch_convert_to_dataframe(bf_data) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result.to_pandas(), _DATA_FRAME, check_index_type=False, @@ -46,7 +46,7 @@ def test_convert_to_dataframe(session, data): def test_convert_pandas_to_dataframe(data, session): (actual_result,) = utils.batch_convert_to_dataframe(data, session=session) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result.to_pandas(), _DATA_FRAME, check_index_type=False, @@ -63,7 +63,7 @@ def test_convert_to_series(session, data): (actual_result,) = utils.batch_convert_to_series(bf_data) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result.to_pandas(), _SERIES, check_index_type=False, check_dtype=False ) @@ -75,6 +75,6 @@ def test_convert_to_series(session, data): def test_convert_pandas_to_series(data, session): (actual_result,) = utils.batch_convert_to_series(data, session=session) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result.to_pandas(), _SERIES, check_index_type=False, check_dtype=False ) diff --git a/tests/system/small/operations/test_dates.py b/tests/system/small/operations/test_dates.py index e9f5f07d28..826ee869f3 100644 --- a/tests/system/small/operations/test_dates.py +++ b/tests/system/small/operations/test_dates.py @@ -35,7 +35,7 @@ def test_date_diff_between_series(session): actual_result = (bf_df["col_1"] - bf_df["col_2"]).to_pandas() expected_result = (pd_df["col_1"] - pd_df["col_2"]).astype(dtypes.TIMEDELTA_DTYPE) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -47,7 +47,7 @@ def test_date_diff_literal_sub_series(scalars_dfs): actual_result = (literal - bf_df["date_col"]).to_pandas() expected_result = (literal - pd_df["date_col"]).astype(dtypes.TIMEDELTA_DTYPE) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -59,7 +59,7 @@ def test_date_diff_series_sub_literal(scalars_dfs): actual_result = (bf_df["date_col"] - literal).to_pandas() expected_result = (pd_df["date_col"] - literal).astype(dtypes.TIMEDELTA_DTYPE) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -70,7 +70,7 @@ def test_date_series_diff_agg(scalars_dfs): actual_result = bf_df["date_col"].diff().to_pandas() expected_result = pd_df["date_col"].diff().astype(dtypes.TIMEDELTA_DTYPE) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -86,6 +86,6 @@ def test_date_can_cast_after_accessor(scalars_dfs): pd.to_datetime(pd_df["date_col"]).dt.isocalendar().week.astype("Int64") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_dtype=False, check_index_type=False ) diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index 0329aece05..39ee5ca25e 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -87,7 +87,7 @@ def temporal_dfs(session): def _assert_series_equal(actual: pd.Series, expected: pd.Series): """Helper function specifically for timedelta testing. Don't use it outside of this module.""" - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual, expected, check_index_type=False, @@ -212,7 +212,7 @@ def test_timestamp_add__ts_series_plus_td_series(temporal_dfs, column, pd_dtype) actual_result = (bf_df[column] + bf_df["timedelta_col_1"]).to_pandas() expected_result = pd_df[column] + pd_df["timedelta_col_1"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -241,7 +241,7 @@ def test_timestamp_add__ts_series_plus_td_literal(temporal_dfs, literal): actual_result = (bf_df["timestamp_col"] + literal).to_pandas() expected_result = pd_df["timestamp_col"] + literal - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -259,7 +259,7 @@ def test_timestamp_add__td_series_plus_ts_series(temporal_dfs, column, pd_dtype) actual_result = (bf_df["timedelta_col_1"] + bf_df[column]).to_pandas() expected_result = pd_df["timedelta_col_1"] + pd_df[column] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -271,7 +271,7 @@ def test_timestamp_add__td_literal_plus_ts_series(temporal_dfs): actual_result = (timedelta + bf_df["datetime_col"]).to_pandas() expected_result = timedelta + pd_df["datetime_col"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -283,7 +283,7 @@ def test_timestamp_add__ts_literal_plus_td_series(temporal_dfs): actual_result = (timestamp + bf_df["timedelta_col_1"]).to_pandas() expected_result = timestamp + pd_df["timedelta_col_1"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -301,7 +301,7 @@ def test_timestamp_add_with_numpy_op(temporal_dfs, column, pd_dtype): actual_result = np.add(bf_df[column], bf_df["timedelta_col_1"]).to_pandas() expected_result = np.add(pd_df[column], pd_df["timedelta_col_1"]) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -316,7 +316,7 @@ def test_timestamp_add_dataframes(temporal_dfs): actual_result["timestamp_col"] = actual_result["timestamp_col"] expected_result = pd_df[columns] + timedelta - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result, expected_result, check_index_type=False ) @@ -337,7 +337,7 @@ def test_timestamp_sub__ts_series_minus_td_series( actual_result = (bf_df[column] - bf_df["timedelta_col_1"]).to_pandas() expected_result = pd_df[column] - pd_df["timedelta_col_1"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -360,7 +360,7 @@ def test_timestamp_sub__ts_series_minus_td_literal( # pandas type behavior changes per pandas version expected_result = (pd_df[column] - literal).astype(actual_result.dtype) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -374,7 +374,7 @@ def test_timestamp_sub__ts_literal_minus_td_series(temporal_dfs): ).to_pandas() # .astype(" pd.Timedelta(1, "h")] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -584,7 +584,7 @@ def test_timedelta_ordering(session): actual_result = (bf_df["col_2"] - bf_df["col_1"]).sort_values().to_pandas() expected_result = (pd_df["col_2"] - pd_df["col_1"]).sort_values() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -651,6 +651,6 @@ def test_timestamp_diff_after_type_casting(temporal_dfs): expected_result = pd_df["timestamp_col"] - pd_df["positive_int_col"].astype( "datetime64[us, UTC]" ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False, check_dtype=False ) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 8caeabb98b..9683a8bc52 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -134,7 +134,7 @@ def test_df_construct_structs(session): ] ).to_frame() bf_series = session.read_pandas(pd_frame) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_series.to_pandas(), pd_frame, check_index_type=False, check_dtype=False ) @@ -144,7 +144,7 @@ def test_df_construct_local_concat_pd(scalars_pandas_df_index, session): bf_df = session.read_pandas(pd_df) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_df.to_pandas(), pd_df, check_index_type=False, check_dtype=False ) @@ -319,7 +319,7 @@ def test_df_nlargest(scalars_df_index, scalars_pandas_df_index, keep): 3, ["bool_col", "int64_too"], keep=keep ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -337,7 +337,7 @@ def test_df_nsmallest(scalars_df_index, scalars_pandas_df_index, keep): bf_result = scalars_df_index.nsmallest(6, ["bool_col"], keep=keep) pd_result = scalars_pandas_df_index.nsmallest(6, ["bool_col"], keep=keep) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -356,7 +356,7 @@ def test_get_columns(scalars_dfs): col_names = ["bool_col", "float64_col", "int64_col"] df_subset = scalars_df.get(col_names) df_pandas = df_subset.to_pandas() - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( df_pandas.columns, scalars_pandas_df[col_names].columns ) @@ -403,7 +403,9 @@ def test_insert(scalars_dfs, loc, column, value, allow_duplicates): bf_df.insert(loc, column, value, allow_duplicates) pd_df.insert(loc, column, value, allow_duplicates) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df, check_dtype=False) + bigframes.testing.utils.assert_frame_equal( + bf_df.to_pandas(), pd_df, check_dtype=False + ) def test_mask_series_cond(scalars_df_index, scalars_pandas_df_index): @@ -597,7 +599,7 @@ def test_drop_column(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name = "int64_col" df_pandas = scalars_df.drop(columns=col_name).to_pandas() - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( df_pandas.columns, scalars_pandas_df.drop(columns=col_name).columns ) @@ -606,7 +608,7 @@ def test_drop_columns(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_names = ["int64_col", "geography_col", "time_col"] df_pandas = scalars_df.drop(columns=col_names).to_pandas() - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( df_pandas.columns, scalars_pandas_df.drop(columns=col_names).columns ) @@ -618,7 +620,7 @@ def test_drop_labels_axis_1(scalars_dfs): pd_result = scalars_pandas_df.drop(labels=labels, axis=1) bf_result = scalars_df.drop(labels=labels, axis=1).to_pandas() - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_drop_with_custom_column_labels(scalars_dfs): @@ -645,7 +647,7 @@ def test_df_memory_usage(scalars_dfs): pd_result = scalars_pandas_df.memory_usage() bf_result = scalars_df.memory_usage() - bigframes.testing.assert_series_equal(pd_result, bf_result, rtol=1.5) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result, rtol=1.5) def test_df_info(scalars_dfs): @@ -744,7 +746,7 @@ def test_select_dtypes(scalars_dfs, include, exclude): pd_result = scalars_pandas_df.select_dtypes(include=include, exclude=exclude) bf_result = scalars_df.select_dtypes(include=include, exclude=exclude).to_pandas() - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_drop_index(scalars_dfs): @@ -753,7 +755,7 @@ def test_drop_index(scalars_dfs): pd_result = scalars_pandas_df.drop(index=[4, 1, 2]) bf_result = scalars_df.drop(index=[4, 1, 2]).to_pandas() - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_drop_pandas_index(scalars_dfs): @@ -763,7 +765,7 @@ def test_drop_pandas_index(scalars_dfs): pd_result = scalars_pandas_df.drop(index=drop_index) bf_result = scalars_df.drop(index=drop_index).to_pandas() - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_drop_bigframes_index(scalars_dfs): @@ -774,7 +776,7 @@ def test_drop_bigframes_index(scalars_dfs): pd_result = scalars_pandas_df.drop(index=drop_pandas_index) bf_result = scalars_df.drop(index=drop_index).to_pandas() - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_drop_bigframes_index_with_na(scalars_dfs): @@ -791,7 +793,7 @@ def test_drop_bigframes_index_with_na(scalars_dfs): pd_result = scalars_pandas_df.drop(index=drop_pandas_index) # drop_pandas_index) bf_result = scalars_df.drop(index=drop_index).to_pandas() - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_drop_bigframes_multiindex(scalars_dfs): @@ -812,7 +814,7 @@ def test_drop_bigframes_multiindex(scalars_dfs): bf_result = scalars_df.drop(index=drop_index).to_pandas() pd_result = scalars_pandas_df.drop(index=drop_pandas_index) - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_drop_labels_axis_0(scalars_dfs): @@ -821,7 +823,7 @@ def test_drop_labels_axis_0(scalars_dfs): pd_result = scalars_pandas_df.drop(labels=[4, 1, 2], axis=0) bf_result = scalars_df.drop(labels=[4, 1, 2], axis=0).to_pandas() - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_drop_index_and_columns(scalars_dfs): @@ -830,14 +832,14 @@ def test_drop_index_and_columns(scalars_dfs): pd_result = scalars_pandas_df.drop(index=[4, 1, 2], columns="int64_col") bf_result = scalars_df.drop(index=[4, 1, 2], columns="int64_col").to_pandas() - bigframes.testing.assert_frame_equal(pd_result, bf_result) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result) def test_rename(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name_dict = {"bool_col": 1.2345} df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas() - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns ) @@ -847,7 +849,9 @@ def test_df_peek(scalars_dfs_maybe_ordered): peek_result = scalars_df.peek(n=3, force=False, allow_large_results=True) - bigframes.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns) + bigframes.testing.utils.assert_index_equal( + scalars_pandas_df.columns, peek_result.columns + ) assert len(peek_result) == 3 @@ -856,14 +860,18 @@ def test_df_peek_with_large_results_not_allowed(scalars_dfs_maybe_ordered): peek_result = scalars_df.peek(n=3, force=False, allow_large_results=False) - bigframes.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns) + bigframes.testing.utils.assert_index_equal( + scalars_pandas_df.columns, peek_result.columns + ) assert len(peek_result) == 3 def test_df_peek_filtered(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3, force=False) - bigframes.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns) + bigframes.testing.utils.assert_index_equal( + scalars_pandas_df.columns, peek_result.columns + ) assert len(peek_result) == 3 @@ -878,7 +886,7 @@ def test_df_peek_exception(scalars_dfs): def test_df_peek_force_default(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3) - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns ) assert len(peek_result) == 3 @@ -889,7 +897,7 @@ def test_df_peek_reset_index(scalars_dfs): peek_result = ( scalars_df[["int64_col", "int64_too"]].reset_index(drop=True).peek(n=3) ) - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns ) assert len(peek_result) == 3 @@ -989,7 +997,7 @@ def test_df_column_name_with_space(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name_dict = {"bool_col": "bool col"} df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas() - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns ) @@ -998,7 +1006,7 @@ def test_df_column_name_duplicate(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs col_name_dict = {"int64_too": "int64_col"} df_pandas = scalars_df.rename(columns=col_name_dict).to_pandas() - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( df_pandas.columns, scalars_pandas_df.rename(columns=col_name_dict).columns ) @@ -1009,7 +1017,7 @@ def test_get_df_column_name_duplicate(scalars_dfs): bf_result = scalars_df.rename(columns=col_name_dict)["int64_col"].to_pandas() pd_result = scalars_pandas_df.rename(columns=col_name_dict)["int64_col"] - bigframes.testing.assert_index_equal(bf_result.columns, pd_result.columns) + bigframes.testing.utils.assert_index_equal(bf_result.columns, pd_result.columns) @pytest.mark.parametrize( @@ -1126,7 +1134,7 @@ def test_assign_new_column_w_loc(scalars_dfs): # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes. pd_result["new_col"] = pd_result["new_col"].astype("Int64") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -1148,7 +1156,7 @@ def test_assign_new_column_w_setitem(scalars_dfs, scalar): # Convert default pandas dtypes `float64` to match BigQuery DataFrames dtypes. pd_result["new_col"] = pd_result["new_col"].astype("Float64") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_assign_new_column_w_setitem_dataframe(scalars_dfs): @@ -1161,7 +1169,7 @@ def test_assign_new_column_w_setitem_dataframe(scalars_dfs): # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes. pd_df["int64_col"] = pd_df["int64_col"].astype("Int64") - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df) def test_assign_new_column_w_setitem_dataframe_error(scalars_dfs): @@ -1187,7 +1195,7 @@ def test_assign_new_column_w_setitem_list(scalars_dfs): # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes. pd_result["new_col"] = pd_result["new_col"].astype("Int64") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_assign_new_column_w_setitem_list_repeated(scalars_dfs): @@ -1205,7 +1213,7 @@ def test_assign_new_column_w_setitem_list_repeated(scalars_dfs): pd_result["new_col"] = pd_result["new_col"].astype("Int64") pd_result["new_col_2"] = pd_result["new_col_2"].astype("Int64") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_assign_new_column_w_setitem_list_custom_index(scalars_dfs): @@ -1225,7 +1233,7 @@ def test_assign_new_column_w_setitem_list_custom_index(scalars_dfs): # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes. pd_result["new_col"] = pd_result["new_col"].astype("Int64") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_assign_new_column_w_setitem_list_error(scalars_dfs): @@ -1267,7 +1275,7 @@ def test_setitem_multicolumn_with_literals(scalars_dfs, key, value): bf_result[key] = value pd_result[key] = value - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result.to_pandas(), check_dtype=False ) @@ -1288,7 +1296,7 @@ def test_setitem_multicolumn_with_dataframes(scalars_dfs): bf_result[["int64_col", "int64_too"]] = bf_result[["int64_too", "int64_col"]] / 2 pd_result[["int64_col", "int64_too"]] = pd_result[["int64_too", "int64_col"]] / 2 - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result.to_pandas(), check_dtype=False ) @@ -1445,7 +1453,7 @@ def test_assign_different_df_w_loc( # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes. pd_result["int64_col"] = pd_result["int64_col"].astype("Int64") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_assign_different_df_w_setitem( @@ -1464,7 +1472,7 @@ def test_assign_different_df_w_setitem( # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes. pd_result["int64_col"] = pd_result["int64_col"].astype("Int64") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_assign_callable_lambda(scalars_dfs): @@ -1534,7 +1542,7 @@ def test_df_dropna_by_thresh(scalars_dfs, axis, ignore_index, subset, thresh): bf_result = df_result.to_pandas() # Pandas uses int64 instead of Int64 (nullable) dtype. pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_df_dropna_range_columns(scalars_dfs): @@ -1582,7 +1590,7 @@ def test_df_fillna(scalars_dfs, col, fill_value): bf_result = scalars_df[col].fillna(fill_value).to_pandas() pd_result = scalars_pandas_df[col].fillna(fill_value) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) def test_df_replace_scalar_scalar(scalars_dfs): @@ -1591,7 +1599,7 @@ def test_df_replace_scalar_scalar(scalars_dfs): pd_result = scalars_pandas_df.replace(555.555, 3) # pandas has narrower result types as they are determined dynamically - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result, check_dtype=False) def test_df_replace_regex_scalar(scalars_dfs): @@ -1599,7 +1607,7 @@ def test_df_replace_regex_scalar(scalars_dfs): bf_result = scalars_df.replace("^H.l", "Howdy, Planet!", regex=True).to_pandas() pd_result = scalars_pandas_df.replace("^H.l", "Howdy, Planet!", regex=True) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, ) @@ -1611,7 +1619,7 @@ def test_df_replace_list_scalar(scalars_dfs): pd_result = scalars_pandas_df.replace([555.555, 3.2], 3) # pandas has narrower result types as they are determined dynamically - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, check_dtype=False, @@ -1623,7 +1631,7 @@ def test_df_replace_value_dict(scalars_dfs): bf_result = scalars_df.replace(1, {"int64_col": 100, "int64_too": 200}).to_pandas() pd_result = scalars_pandas_df.replace(1, {"int64_col": 100, "int64_too": 200}) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, ) @@ -1840,7 +1848,9 @@ def test_df_cross_merge(scalars_dfs): ), "cross", ) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_frame_equal( + bf_result, pd_result, check_index_type=False + ) @pytest.mark.parametrize( @@ -1993,7 +2003,9 @@ def test_self_merge_self_w_on_args(): bf_result = bf_df1.merge( bf_df2, left_on=["A", "C"], right_on=["B", "C"], how="inner" ).to_pandas() - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_frame_equal( + bf_result, pd_result, check_index_type=False + ) @pytest.mark.parametrize( @@ -2034,7 +2046,7 @@ def test_get_dtypes(scalars_df_default_index): "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")), "duration_col": pd.ArrowDtype(pa.duration("us")), } - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( dtypes, pd.Series(dtypes_dict), ) @@ -2050,7 +2062,7 @@ def test_get_dtypes_array_struct_query(session): ) dtypes = df.dtypes - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( dtypes, pd.Series( { @@ -2070,7 +2082,7 @@ def test_get_dtypes_array_struct_query(session): def test_get_dtypes_array_struct_table(nested_df): dtypes = nested_df.dtypes - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( dtypes, pd.Series( { @@ -2608,7 +2620,7 @@ def test_combine( ) # Some dtype inconsistency for all-NULL columns - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) @pytest.mark.parametrize( @@ -2646,7 +2658,7 @@ def test_df_update(overwrite, filter_func): bf_df1.update(bf_df2, overwrite=overwrite, filter_func=filter_func) pd_df1.update(pd_df2, overwrite=overwrite, filter_func=filter_func) - bigframes.testing.assert_frame_equal(bf_df1.to_pandas(), pd_df1) + bigframes.testing.utils.assert_frame_equal(bf_df1.to_pandas(), pd_df1) def test_df_idxmin(): @@ -2658,7 +2670,7 @@ def test_df_idxmin(): bf_result = bf_df.idxmin().to_pandas() pd_result = pd_df.idxmin() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, check_dtype=False ) @@ -2672,7 +2684,7 @@ def test_df_idxmax(): bf_result = bf_df.idxmax().to_pandas() pd_result = pd_df.idxmax() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, check_dtype=False ) @@ -2712,10 +2724,10 @@ def test_df_align(join, axis): assert isinstance(bf_result1, dataframe.DataFrame) and isinstance( bf_result2, dataframe.DataFrame ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result1.to_pandas(), pd_result1, check_dtype=False ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result2.to_pandas(), pd_result2, check_dtype=False ) @@ -2742,7 +2754,7 @@ def test_combine_first( pd_result = pd_df_a.combine_first(pd_df_b) # Some dtype inconsistency for all-NULL columns - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) @pytest.mark.parametrize( @@ -2769,9 +2781,9 @@ def test_df_corr_w_numeric_only(scalars_dfs_maybe_ordered, columns, numeric_only # BigFrames and Pandas differ in their data type handling: # - Column types: BigFrames uses Float64, Pandas uses float64. # - Index types: BigFrames uses strign, Pandas uses object. - bigframes.testing.assert_index_equal(bf_result.columns, pd_result.columns) + bigframes.testing.utils.assert_index_equal(bf_result.columns, pd_result.columns) # Only check row order in ordered mode. - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, @@ -2813,9 +2825,9 @@ def test_cov_w_numeric_only(scalars_dfs_maybe_ordered, columns, numeric_only): # BigFrames and Pandas differ in their data type handling: # - Column types: BigFrames uses Float64, Pandas uses float64. # - Index types: BigFrames uses strign, Pandas uses object. - bigframes.testing.assert_index_equal(bf_result.columns, pd_result.columns) + bigframes.testing.utils.assert_index_equal(bf_result.columns, pd_result.columns) # Only check row order in ordered mode. - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, @@ -2836,7 +2848,7 @@ def test_df_corrwith_df(scalars_dfs_maybe_ordered): # BigFrames and Pandas differ in their data type handling: # - Column types: BigFrames uses Float64, Pandas uses float64. # - Index types: BigFrames uses strign, Pandas uses object. - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -2857,7 +2869,7 @@ def test_df_corrwith_df_numeric_only(scalars_dfs): # BigFrames and Pandas differ in their data type handling: # - Column types: BigFrames uses Float64, Pandas uses float64. # - Index types: BigFrames uses strign, Pandas uses object. - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -2886,7 +2898,7 @@ def test_df_corrwith_series(scalars_dfs_maybe_ordered): # BigFrames and Pandas differ in their data type handling: # - Column types: BigFrames uses Float64, Pandas uses float64. # - Index types: BigFrames uses strign, Pandas uses object. - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -3157,7 +3169,7 @@ def test_binop_df_df_binary_op( pd_result = pd_df_a - pd_df_b # Some dtype inconsistency for all-NULL columns - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) # Differnt table will only work for explicit index, since default index orders are arbitrary. @@ -3267,9 +3279,11 @@ def test_join_different_table_with_duplicate_column_name( pd_result = pd_df_a.join(pd_df_b, how=how, lsuffix="_l", rsuffix="_r") # Ensure no inplace changes - bigframes.testing.assert_index_equal(bf_df_a.columns, pd_df_a.columns) - bigframes.testing.assert_index_equal(bf_df_b.index.to_pandas(), pd_df_b.index) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_index_equal(bf_df_a.columns, pd_df_a.columns) + bigframes.testing.utils.assert_index_equal(bf_df_b.index.to_pandas(), pd_df_b.index) + bigframes.testing.utils.assert_frame_equal( + bf_result, pd_result, check_index_type=False + ) @all_joins @@ -3297,14 +3311,14 @@ def test_join_param_on_with_duplicate_column_name_not_on_col( pd_result = pd_df_a.join( pd_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r" ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.sort_index(), pd_result.sort_index(), check_like=True, check_index_type=False, check_names=False, ) - bigframes.testing.assert_index_equal(bf_result.columns, pd_result.columns) + bigframes.testing.utils.assert_index_equal(bf_result.columns, pd_result.columns) @pytest.mark.skipif( @@ -3335,14 +3349,14 @@ def test_join_param_on_with_duplicate_column_name_on_col( pd_result = pd_df_a.join( pd_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r" ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.sort_index(), pd_result.sort_index(), check_like=True, check_index_type=False, check_names=False, ) - bigframes.testing.assert_index_equal(bf_result.columns, pd_result.columns) + bigframes.testing.utils.assert_index_equal(bf_result.columns, pd_result.columns) @all_joins @@ -3487,7 +3501,7 @@ def test_dataframe_numeric_analytic_op( bf_series = operator(scalars_df_index[columns]) pd_series = operator(scalars_pandas_df_index[columns]) bf_result = bf_series.to_pandas() - bigframes.testing.assert_frame_equal(pd_series, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_series, bf_result, check_dtype=False) @pytest.mark.parametrize( @@ -3512,7 +3526,7 @@ def test_dataframe_general_analytic_op( bf_series = operator(scalars_df_index[col_names]) pd_series = operator(scalars_pandas_df_index[col_names]) bf_result = bf_series.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_series, bf_result, ) @@ -3530,7 +3544,7 @@ def test_dataframe_diff(scalars_df_index, scalars_pandas_df_index, periods): col_names = ["int64_too", "float64_col", "int64_col"] bf_result = scalars_df_index[col_names].diff(periods=periods).to_pandas() pd_result = scalars_pandas_df_index[col_names].diff(periods=periods) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, ) @@ -3549,7 +3563,7 @@ def test_dataframe_pct_change(scalars_df_index, scalars_pandas_df_index, periods bf_result = scalars_df_index[col_names].pct_change(periods=periods).to_pandas() # pandas 3.0 does not automatically ffill anymore pd_result = scalars_pandas_df_index[col_names].ffill().pct_change(periods=periods) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, ) @@ -3563,7 +3577,7 @@ def test_dataframe_agg_single_string(scalars_dfs): pd_result = scalars_pandas_df[numeric_cols].agg("sum") assert bf_result.dtype == "Float64" - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -3583,7 +3597,7 @@ def test_dataframe_agg_int_single_string(scalars_dfs, agg): pd_result = scalars_pandas_df[numeric_cols].agg(agg) assert bf_result.dtype == "Int64" - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -3638,7 +3652,7 @@ def test_dataframe_agg_int_multi_string(scalars_dfs): # Pandas may produce narrower numeric types # Pandas has object index type - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -3681,7 +3695,7 @@ def test_df_transpose_repeated_uses_cache(): bf_df = bf_df.transpose() + i pd_df = pd_df.transpose() + i - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_df, bf_df.to_pandas(), check_dtype=False, check_index_type=False ) @@ -3724,7 +3738,7 @@ def test_df_melt_default(scalars_dfs): pd_result = scalars_pandas_df[columns].melt() # Pandas produces int64 index, Bigframes produces Int64 (nullable) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, @@ -3753,7 +3767,7 @@ def test_df_melt_parameterized(scalars_dfs): ) # Pandas produces int64 index, Bigframes produces Int64 (nullable) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, check_dtype=False ) @@ -3806,7 +3820,7 @@ def test_df_pivot(scalars_dfs, values, index, columns): # Pandas produces NaN, where bq dataframes produces pd.NA bf_result = bf_result.fillna(float("nan")) pd_result = pd_result.fillna(float("nan")) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) @pytest.mark.parametrize( @@ -3827,7 +3841,7 @@ def test_df_pivot_hockey(hockey_df, hockey_pandas_df, values, index, columns): ) # Pandas produces NaN, where bq dataframes produces pd.NA - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) @pytest.mark.parametrize( @@ -3868,7 +3882,7 @@ def test_df_pivot_table( aggfunc=aggfunc, fill_value=fill_value, ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_column_type=False ) @@ -3948,7 +3962,7 @@ def test__dir__with_rename(scalars_dfs): def test_loc_select_columns_w_repeats(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[["int64_col", "int64_col", "int64_too"]].to_pandas() pd_result = scalars_pandas_df_index[["int64_col", "int64_col", "int64_too"]] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -3972,7 +3986,7 @@ def test_loc_select_columns_w_repeats(scalars_df_index, scalars_pandas_df_index) def test_iloc_slice(scalars_df_index, scalars_pandas_df_index, start, stop, step): bf_result = scalars_df_index.iloc[start:stop:step].to_pandas() pd_result = scalars_pandas_df_index.iloc[start:stop:step] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -3990,7 +4004,7 @@ def test_iloc_slice_after_cache( scalars_df_index.cache() bf_result = scalars_df_index.iloc[start:stop:step].to_pandas() pd_result = scalars_pandas_df_index.iloc[start:stop:step] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4023,7 +4037,7 @@ def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index): bf_result = scalars_df_index.iloc[index] pd_result = scalars_pandas_df_index.iloc[index] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -4048,14 +4062,14 @@ def test_iloc_tuple_multi_columns(scalars_df_index, scalars_pandas_df_index, ind bf_result = scalars_df_index.iloc[index].to_pandas() pd_result = scalars_pandas_df_index.iloc[index] - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_iloc_tuple_multi_columns_single_row(scalars_df_index, scalars_pandas_df_index): index = (2, [2, 1, 3, -4]) bf_result = scalars_df_index.iloc[index] pd_result = scalars_pandas_df_index.iloc[index] - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -4109,7 +4123,7 @@ def test_loc_bool_series(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.loc[scalars_df_index.bool_col].to_pandas() pd_result = scalars_pandas_df_index.loc[scalars_pandas_df_index.bool_col] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4120,7 +4134,7 @@ def test_loc_list_select_rows_and_columns(scalars_df_index, scalars_pandas_df_in bf_result = scalars_df_index.loc[idx_list, ["bool_col", "int64_col"]].to_pandas() pd_result = scalars_pandas_df_index.loc[idx_list, ["bool_col", "int64_col"]] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4129,7 +4143,7 @@ def test_loc_list_select_rows_and_columns(scalars_df_index, scalars_pandas_df_in def test_loc_select_column(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.loc[:, "int64_col"].to_pandas() pd_result = scalars_pandas_df_index.loc[:, "int64_col"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -4140,7 +4154,7 @@ def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_in pd_result = scalars_pandas_df_index.loc[ :, scalars_pandas_df_index.dtypes == "Int64" ] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4163,7 +4177,7 @@ def test_loc_select_with_column_condition_bf_series( pd_result = scalars_pandas_df_index.loc[ :, scalars_pandas_df_index.nunique() > size_half ] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4177,7 +4191,7 @@ def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_ind index = "Hello, World!" bf_result = scalars_df_index.loc[index] pd_result = scalars_pandas_df_index.loc[index] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -4189,7 +4203,7 @@ def test_loc_single_index_no_duplicate(scalars_df_index, scalars_pandas_df_index index = -2345 bf_result = scalars_df_index.loc[index] pd_result = scalars_pandas_df_index.loc[index] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -4203,7 +4217,7 @@ def test_at_with_duplicate(scalars_df_index, scalars_pandas_df_index): index = "Hello, World!" bf_result = scalars_df_index.at[index, "int64_too"] pd_result = scalars_pandas_df_index.at[index, "int64_too"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4228,7 +4242,7 @@ def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs): # pandas uses float64 instead pd_df["new_col"] = pd_df["new_col"].astype("Float64") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_df.to_pandas(), pd_df, ) @@ -4252,7 +4266,7 @@ def test_loc_setitem_bool_series_scalar_existing_col(scalars_dfs, col, value): bf_df.loc[bf_df["int64_too"] == 1, col] = value pd_df.loc[pd_df["int64_too"] == 1, col] = value - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_df.to_pandas(), pd_df, ) @@ -4405,7 +4419,9 @@ def test_dataframe_aggregates_quantile_mono(scalars_df_index, scalars_pandas_df_ # Pandas may produce narrower numeric types, but bigframes always produces Float64 pd_result = pd_result.astype("Float64") - bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, check_index_type=False + ) def test_dataframe_aggregates_quantile_multi(scalars_df_index, scalars_pandas_df_index): @@ -4418,7 +4434,7 @@ def test_dataframe_aggregates_quantile_multi(scalars_df_index, scalars_pandas_df pd_result = pd_result.astype("Float64") pd_result.index = pd_result.index.astype("Float64") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -4444,7 +4460,9 @@ def test_dataframe_bool_aggregates(scalars_df_index, scalars_pandas_df_index, op bf_result = bf_series.to_pandas() pd_series.index = pd_series.index.astype(bf_result.index.dtype) - bigframes.testing.assert_series_equal(pd_series, bf_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + pd_series, bf_result, check_index_type=False + ) def test_dataframe_prod(scalars_df_index, scalars_pandas_df_index): @@ -4456,7 +4474,9 @@ def test_dataframe_prod(scalars_df_index, scalars_pandas_df_index): # Pandas may produce narrower numeric types, but bigframes always produces Float64 pd_series = pd_series.astype("Float64") # Pandas has object index type - bigframes.testing.assert_series_equal(pd_series, bf_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + pd_series, bf_result, check_index_type=False + ) def test_df_skew_too_few_values(scalars_dfs): @@ -4468,7 +4488,9 @@ def test_df_skew_too_few_values(scalars_dfs): # Pandas may produce narrower numeric types, but bigframes always produces Float64 pd_result = pd_result.astype("Float64") - bigframes.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + pd_result, bf_result, check_index_type=False + ) @pytest.mark.parametrize( @@ -4501,7 +4523,9 @@ def test_df_kurt_too_few_values(scalars_dfs): # Pandas may produce narrower numeric types, but bigframes always produces Float64 pd_result = pd_result.astype("Float64") - bigframes.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + pd_result, bf_result, check_index_type=False + ) def test_df_kurt(scalars_dfs): @@ -4513,7 +4537,9 @@ def test_df_kurt(scalars_dfs): # Pandas may produce narrower numeric types, but bigframes always produces Float64 pd_result = pd_result.astype("Float64") - bigframes.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + pd_result, bf_result, check_index_type=False + ) @pytest.mark.parametrize( @@ -4597,7 +4623,7 @@ def test_df_add_prefix(scalars_df_index, scalars_pandas_df_index, axis): pd_result = scalars_pandas_df_index.add_prefix("prefix_", axis) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, @@ -4618,7 +4644,7 @@ def test_df_add_suffix(scalars_df_index, scalars_pandas_df_index, axis): pd_result = scalars_pandas_df_index.add_suffix("_suffix", axis) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, @@ -4638,7 +4664,7 @@ def test_df_columns_filter_items(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.filter(items=["string_col", "int64_col"]) # Ignore column ordering as pandas order differently depending on version - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.sort_index(axis=1), pd_result.sort_index(axis=1), ) @@ -4649,7 +4675,7 @@ def test_df_columns_filter_like(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.filter(like="64_col") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4660,7 +4686,7 @@ def test_df_columns_filter_regex(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.filter(regex="^[^_]+$") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4692,7 +4718,7 @@ def test_df_rows_filter_like(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.filter(like="ello", axis=0) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4706,7 +4732,7 @@ def test_df_rows_filter_regex(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.filter(regex="^[GH].*", axis=0) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4737,7 +4763,7 @@ def test_df_reindex_rows_index(scalars_df_index, scalars_pandas_df_index): # Pandas uses int64 instead of Int64 (nullable) dtype. pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4762,7 +4788,7 @@ def test_df_reindex_columns(scalars_df_index, scalars_pandas_df_index): # Pandas uses float64 as default for newly created empty column, bf uses Float64 pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype()) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4777,7 +4803,7 @@ def test_df_reindex_columns_with_same_order(scalars_df_index, scalars_pandas_df_ bf_result = bf.reindex(columns=columns).to_pandas() pd_result = pd_df.reindex(columns=columns) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4867,7 +4893,7 @@ def test_df_reindex_like(scalars_df_index, scalars_pandas_df_index): pd_result.index = pd_result.index.astype(pd.Int64Dtype()) # Pandas uses float64 as default for newly created empty column, bf uses Float64 pd_result.not_a_col = pd_result.not_a_col.astype(pandas.Float64Dtype()) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -4878,7 +4904,7 @@ def test_df_values(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.values # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False ) @@ -4888,7 +4914,7 @@ def test_df_to_numpy(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.to_numpy() # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False ) @@ -4898,7 +4924,7 @@ def test_df___array__(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.__array__() # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd.DataFrame(bf_result), pd.DataFrame(pd_result), check_dtype=False ) @@ -4990,7 +5016,7 @@ def test_loc_list_string_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.loc[index_list].to_pandas() pd_result = scalars_pandas_df_index.loc[index_list] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -5002,7 +5028,7 @@ def test_loc_list_integer_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.loc[index_list] pd_result = scalars_pandas_df_index.loc[index_list] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5037,7 +5063,7 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index, index_list): bf_result = scalars_df_index.iloc[index_list] pd_result = scalars_pandas_df_index.iloc[index_list] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5057,7 +5083,7 @@ def test_iloc_list_partial_ordering( bf_result = scalars_df_partial_ordering.iloc[index_list] pd_result = scalars_pandas_df_index.iloc[index_list] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5075,7 +5101,7 @@ def test_iloc_list_multiindex(scalars_dfs): bf_result = scalars_df.iloc[index_list] pd_result = scalars_pandas_df.iloc[index_list] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5095,7 +5121,7 @@ def test_rename_axis(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.rename_axis("newindexname") pd_result = scalars_pandas_df_index.rename_axis("newindexname") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5105,7 +5131,7 @@ def test_rename_axis_nonstring(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.rename_axis((4,)) pd_result = scalars_pandas_df_index.rename_axis((4,)) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5121,7 +5147,7 @@ def test_loc_bf_series_string_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.loc[bf_string_series] pd_result = scalars_pandas_df_index.loc[pd_string_series] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5139,7 +5165,7 @@ def test_loc_bf_series_multiindex(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_multiindex.loc[bf_string_series] pd_result = scalars_pandas_df_multiindex.loc[pd_string_series] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5152,7 +5178,7 @@ def test_loc_bf_index_integer_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.loc[bf_index] pd_result = scalars_pandas_df_index.loc[pd_index] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5172,7 +5198,7 @@ def test_loc_bf_index_integer_index_renamed_col( bf_result = scalars_df_index.loc[bf_index] pd_result = scalars_pandas_df_index.loc[pd_index] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.to_pandas(), pd_result, ) @@ -5198,7 +5224,7 @@ def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, sub columns = ["bool_col", "int64_too", "int64_col"] bf_df = scalars_df_index[columns].drop_duplicates(subset, keep=keep).to_pandas() pd_df = scalars_pandas_df_index[columns].drop_duplicates(subset, keep=keep) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_df, bf_df, ) @@ -5225,7 +5251,7 @@ def test_df_drop_duplicates_w_json(json_df, keep): pd_df = json_pandas_df.drop_duplicates(keep=keep) pd_df["json_col"] = pd_df["json_col"].astype(dtypes.JSON_DTYPE) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_df, bf_df, ) @@ -5250,7 +5276,7 @@ def test_df_duplicated(scalars_df_index, scalars_pandas_df_index, keep, subset): columns = ["bool_col", "int64_too", "int64_col"] bf_series = scalars_df_index[columns].duplicated(subset, keep=keep).to_pandas() pd_series = scalars_pandas_df_index[columns].duplicated(subset, keep=keep) - bigframes.testing.assert_series_equal(pd_series, bf_series, check_dtype=False) + bigframes.testing.utils.assert_series_equal(pd_series, bf_series, check_dtype=False) def test_df_from_dict_columns_orient(): @@ -5479,7 +5505,7 @@ def test_df_eval(scalars_dfs, expr): bf_result = scalars_df.eval(expr).to_pandas() pd_result = scalars_pandas_df.eval(expr) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -5500,7 +5526,7 @@ def test_df_query(scalars_dfs, expr): bf_result = scalars_df.query(expr).to_pandas() pd_result = scalars_pandas_df.query(expr) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -5525,7 +5551,7 @@ def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna): subset, normalize=normalize, ascending=ascending, dropna=dropna ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, @@ -5577,7 +5603,7 @@ def test_df_rank_with_nulls( .astype(pd.Float64Dtype()) ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -5674,7 +5700,7 @@ def test_df_dot_inline(session): pd_result[name] = pd_result[name].astype(pd.Int64Dtype()) pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -5691,7 +5717,7 @@ def test_df_dot( for name in pd_result.columns: pd_result[name] = pd_result[name].astype(pd.Int64Dtype()) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -5708,7 +5734,7 @@ def test_df_dot_operator( for name in pd_result.columns: pd_result[name] = pd_result[name].astype(pd.Int64Dtype()) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -5731,7 +5757,7 @@ def test_df_dot_series_inline(): pd_result = pd_result.astype(pd.Int64Dtype()) pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -5747,7 +5773,7 @@ def test_df_dot_series( # Pandas result is object instead of Int64 (nullable) dtype. pd_result = pd_result.astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -5763,7 +5789,7 @@ def test_df_dot_operator_series( # Pandas result is object instead of Int64 (nullable) dtype. pd_result = pd_result.astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -5905,7 +5931,7 @@ def test_dataframe_explode(col_names, ignore_index, session): bf_materialized = bf_result.to_pandas() execs_post = metrics.execution_count - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_materialized, pd_result, check_index_type=False, @@ -5936,7 +5962,7 @@ def test_dataframe_explode_reserve_order(ignore_index, ordered): pd_res = pd_df.explode(["a", "b"], ignore_index=ignore_index).astype( pd.Int64Dtype() ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( res if ordered else res.sort_index(), pd_res, check_index_type=False, @@ -5990,7 +6016,7 @@ def test_resample_with_column( ].max() # TODO: (b/484364312) pd_result.index.names = bf_result.index.names - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -6106,7 +6132,7 @@ def test_resample_start_time(rule, origin, data): # TODO: (b/484364312) pd_result.index.names = bf_result.index.names - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -6131,7 +6157,9 @@ def test_df_astype(scalars_dfs, dtype): bf_result = bf_df.astype(dtype).to_pandas() pd_result = pd_df.astype(dtype) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_frame_equal( + bf_result, pd_result, check_index_type=False + ) def test_df_astype_python_types(scalars_dfs): @@ -6145,7 +6173,9 @@ def test_df_astype_python_types(scalars_dfs): {"bool_col": "string[pyarrow]", "int64_col": pd.Float64Dtype()} ) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_frame_equal( + bf_result, pd_result, check_index_type=False + ) def test_astype_invalid_type_fail(scalars_dfs): @@ -6165,7 +6195,7 @@ def test_agg_with_dict_lists_strings(scalars_dfs): bf_result = bf_df.agg(agg_funcs).to_pandas() pd_result = pd_df.agg(agg_funcs) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -6185,7 +6215,7 @@ def test_agg_with_dict_lists_callables(scalars_dfs): bf_result = bf_df.agg(agg_funcs).to_pandas() pd_result = pd_df.agg(agg_funcs) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -6200,7 +6230,7 @@ def test_agg_with_dict_list_and_str(scalars_dfs): bf_result = bf_df.agg(agg_funcs).to_pandas() pd_result = pd_df.agg(agg_funcs) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -6217,7 +6247,7 @@ def test_agg_with_dict_strs(scalars_dfs): pd_result = pd_df.agg(agg_funcs) pd_result.index = pd_result.index.astype("string[pyarrow]") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -6239,7 +6269,7 @@ def test_df_agg_with_builtins(scalars_dfs): .agg({"int64_col": [len, sum, min, max, list], "bool_col": [all, any, max]}) ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py index cce230ae17..fece679d06 100644 --- a/tests/system/small/test_dataframe_io.py +++ b/tests/system/small/test_dataframe_io.py @@ -63,7 +63,7 @@ def test_sql_executes(scalars_df_default_index, bigquery_client): .reset_index(drop=True) ) bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE) - bigframes.testing.assert_frame_equal(bf_result, bq_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, bq_result, check_dtype=False) def test_sql_executes_and_includes_named_index( @@ -95,7 +95,7 @@ def test_sql_executes_and_includes_named_index( .sort_values("rowindex") ) bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, bq_result, check_dtype=False, check_index_type=False ) @@ -129,7 +129,7 @@ def test_sql_executes_and_includes_named_multiindex( .sort_values("rowindex") ) bq_result["bytes_col"] = bq_result["bytes_col"].astype(dtypes.BYTES_DTYPE) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, bq_result, check_dtype=False, check_index_type=False ) @@ -367,7 +367,7 @@ def test_to_pandas_batches_w_empty_dataframe(session): assert len(results) == 1 assert list(results[0].index.names) == ["idx1", "idx2"] assert list(results[0].columns) == ["col1", "col2"] - bigframes.testing.assert_series_equal(results[0].dtypes, empty.dtypes) + bigframes.testing.utils.assert_series_equal(results[0].dtypes, empty.dtypes) @pytest.mark.skipif( diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index b6c8709191..aa914993b9 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -51,7 +51,7 @@ def test_dataframe_groupby_numeric_aggregate( pd_result = operator(scalars_pandas_df_index[col_names].groupby("string_col")) bf_result_computed = bf_result.to_pandas() # Pandas std function produces float64, not matching Float64 from bigframes - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -60,7 +60,7 @@ def test_dataframe_groupby_head(scalars_df_index, scalars_pandas_df_index): col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"] bf_result = scalars_df_index[col_names].groupby("bool_col").head(2).to_pandas() pd_result = scalars_pandas_df_index[col_names].groupby("bool_col").head(2) - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result, check_dtype=False) def test_dataframe_groupby_len(scalars_df_index, scalars_pandas_df_index): @@ -101,7 +101,7 @@ def test_dataframe_groupby_quantile(scalars_df_index, scalars_pandas_df_index, q scalars_df_index[col_names].groupby("string_col").quantile(q) ).to_pandas() pd_result = scalars_pandas_df_index[col_names].groupby("string_col").quantile(q) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -141,7 +141,7 @@ def test_dataframe_groupby_rank( .astype("float64") .astype("Float64") ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -169,7 +169,7 @@ def test_dataframe_groupby_aggregate( pd_result = operator(scalars_pandas_df_index[col_names].groupby("string_col")) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -179,7 +179,7 @@ def test_dataframe_groupby_corr(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_names].groupby("bool_col").corr().to_pandas() pd_result = scalars_pandas_df_index[col_names].groupby("bool_col").corr() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -189,7 +189,7 @@ def test_dataframe_groupby_cov(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_names].groupby("bool_col").cov().to_pandas() pd_result = scalars_pandas_df_index[col_names].groupby("bool_col").cov() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -209,7 +209,7 @@ def test_dataframe_groupby_agg_string( pd_result = scalars_pandas_df_index[col_names].groupby("string_col").agg("count") bf_result_computed = bf_result.to_pandas(ordered=ordered) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False, ignore_order=not ordered ) @@ -219,7 +219,7 @@ def test_dataframe_groupby_agg_size_string(scalars_df_index, scalars_pandas_df_i bf_result = scalars_df_index[col_names].groupby("string_col").agg("size") pd_result = scalars_pandas_df_index[col_names].groupby("string_col").agg("size") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result.to_pandas(), check_dtype=False ) @@ -239,7 +239,7 @@ def test_dataframe_groupby_agg_list(scalars_df_index, scalars_pandas_df_index): # some inconsistency between versions, so normalize to bigframes behavior pd_result = pd_result.rename({"amin": "min"}, axis="columns") bf_result_computed = bf_result_computed.rename({"amin": "min"}, axis="columns") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False, check_index_type=False ) @@ -258,7 +258,7 @@ def test_dataframe_groupby_agg_list_w_column_multi_index( pd_result = pd_df.groupby(level=0).agg(["count", np.min, "size"]) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -290,7 +290,7 @@ def test_dataframe_groupby_agg_dict_with_list( ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False, check_index_type=False ) @@ -309,7 +309,7 @@ def test_dataframe_groupby_agg_dict_no_lists(scalars_df_index, scalars_pandas_df ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -334,7 +334,7 @@ def test_dataframe_groupby_agg_named(scalars_df_index, scalars_pandas_df_index): ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -356,7 +356,7 @@ def test_dataframe_groupby_agg_kw_tuples(scalars_df_index, scalars_pandas_df_ind ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -403,7 +403,7 @@ def test_dataframe_groupby_multi_sum( # BigQuery DataFrames default indices use nullable Int64 always pd_series.index = pd_series.index.astype("Int64") - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_series, bf_result, ) @@ -442,7 +442,7 @@ def test_dataframe_groupby_analytic( ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -465,7 +465,7 @@ def test_dataframe_groupby_cumcount( ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -477,7 +477,7 @@ def test_dataframe_groupby_size_as_index_false( bf_result_computed = bf_result.to_pandas() pd_result = scalars_pandas_df_index.groupby("string_col", as_index=False).size() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False, check_index_type=False ) @@ -489,7 +489,7 @@ def test_dataframe_groupby_size_as_index_true( pd_result = scalars_pandas_df_index.groupby("string_col", as_index=True).size() bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -499,7 +499,7 @@ def test_dataframe_groupby_skew(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_names].groupby("bool_col").skew().to_pandas() pd_result = scalars_pandas_df_index[col_names].groupby("bool_col").skew() - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result, check_dtype=False) @pytest.mark.skipif( @@ -512,7 +512,7 @@ def test_dataframe_groupby_kurt(scalars_df_index, scalars_pandas_df_index): # Pandas doesn't have groupby.kurt yet: https://github.com/pandas-dev/pandas/issues/40139 pd_result = scalars_pandas_df_index[col_names].groupby("bool_col").kurt() - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result, check_dtype=False) @pytest.mark.parametrize( @@ -528,7 +528,7 @@ def test_dataframe_groupby_diff(scalars_df_index, scalars_pandas_df_index, order pd_result = scalars_pandas_df_index[col_names].groupby("string_col").diff(-1) bf_result_computed = bf_result.to_pandas(ordered=ordered) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False, ignore_order=not ordered ) @@ -545,7 +545,7 @@ def test_dataframe_groupby_getitem( scalars_pandas_df_index[col_names].groupby("string_col")["int64_col"].min() ) - bigframes.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result, check_dtype=False) def test_dataframe_groupby_getitem_error( @@ -576,7 +576,7 @@ def test_dataframe_groupby_getitem_list( scalars_pandas_df_index[col_names].groupby("string_col")[col_names].min() ) - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result, check_dtype=False) def test_dataframe_groupby_getitem_list_error( @@ -609,7 +609,7 @@ def test_dataframe_groupby_nonnumeric_with_mean(): bf_result = bpd.DataFrame(df).groupby(["key1", "key2"]).mean().to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, check_index_type=False, check_dtype=False ) @@ -654,10 +654,14 @@ def test_dataframe_groupby_value_counts( ) if as_index: - bigframes.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal( + pd_result, bf_result, check_dtype=False + ) else: pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal( + pd_result, bf_result, check_dtype=False + ) @pytest.mark.parametrize( @@ -683,7 +687,7 @@ def test_dataframe_groupby_first( .groupby(scalars_pandas_df_index.int64_col % 2) .first(numeric_only=numeric_only, min_count=min_count) ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, ) @@ -707,7 +711,7 @@ def test_dataframe_groupby_last( pd_result = scalars_pandas_df_index.groupby( scalars_pandas_df_index.int64_col % 2 ).last(numeric_only=numeric_only, min_count=min_count) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result, ) @@ -743,7 +747,7 @@ def test_series_groupby_agg_string(scalars_df_index, scalars_pandas_df_index, ag ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result_computed, check_dtype=False, check_names=False ) @@ -761,7 +765,7 @@ def test_series_groupby_agg_list(scalars_df_index, scalars_pandas_df_index): ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result, bf_result_computed, check_dtype=False, check_names=False ) @@ -816,7 +820,7 @@ def test_series_groupby_rank( .astype("float64") .astype("Float64") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -831,7 +835,7 @@ def test_series_groupby_head(scalars_df_index, scalars_pandas_df_index, dropna): pd_result = scalars_pandas_df_index.groupby("bool_col", dropna=dropna)[ "int64_too" ].head(1) - bigframes.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result, check_dtype=False) def test_series_groupby_kurt(scalars_df_index, scalars_pandas_df_index): @@ -846,7 +850,7 @@ def test_series_groupby_kurt(scalars_df_index, scalars_pandas_df_index): pd.Series.kurt ) - bigframes.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result, check_dtype=False) def test_series_groupby_size(scalars_df_index, scalars_pandas_df_index): @@ -860,7 +864,7 @@ def test_series_groupby_size(scalars_df_index, scalars_pandas_df_index): ) bf_result_computed = bf_result.to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result_computed, check_dtype=False ) @@ -878,7 +882,7 @@ def test_series_groupby_skew(scalars_df_index, scalars_pandas_df_index): .skew() ) - bigframes.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result, check_dtype=False) @pytest.mark.parametrize( @@ -893,7 +897,7 @@ def test_series_groupby_quantile(scalars_df_index, scalars_pandas_df_index, q): scalars_df_index.groupby("string_col")["int64_col"].quantile(q) ).to_pandas() pd_result = scalars_pandas_df_index.groupby("string_col")["int64_col"].quantile(q) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, check_dtype=False, check_index_type=False ) @@ -934,7 +938,7 @@ def test_series_groupby_value_counts( pd_result = scalars_pandas_df_index.groupby("bool_col")["string_col"].value_counts( normalize=normalize, ascending=ascending, dropna=dropna ) - bigframes.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result, check_dtype=False) @pytest.mark.parametrize( @@ -955,7 +959,7 @@ def test_series_groupby_first( pd_result = scalars_pandas_df_index.groupby("string_col")["int64_col"].first( numeric_only=numeric_only, min_count=min_count ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -979,4 +983,4 @@ def test_series_groupby_last( pd_result = scalars_pandas_df_index.groupby("string_col")["int64_col"].last( numeric_only=numeric_only, min_count=min_count ) - bigframes.testing.assert_series_equal(pd_result, bf_result) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result) diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py index ed901f9562..e086ef6130 100644 --- a/tests/system/small/test_multiindex.py +++ b/tests/system/small/test_multiindex.py @@ -58,7 +58,7 @@ def test_multi_index_from_arrays(): names=[" 1index 1", "_1index 2"], ) assert bf_idx.names == pd_idx.names - bigframes.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx) + bigframes.testing.utils.assert_index_equal(bf_idx.to_pandas(), pd_idx) def test_read_pandas_multi_index_axes(): @@ -90,7 +90,7 @@ def test_read_pandas_multi_index_axes(): bf_df = bpd.DataFrame(pandas_df) bf_df_computed = bf_df.to_pandas() - bigframes.testing.assert_frame_equal(bf_df_computed, pandas_df) + bigframes.testing.utils.assert_frame_equal(bf_df_computed, pandas_df) # Row Multi-index tests @@ -98,7 +98,7 @@ def test_set_multi_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.set_index(["bool_col", "int64_too"]).to_pandas() pd_result = scalars_pandas_df_index.set_index(["bool_col", "int64_too"]) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -127,7 +127,7 @@ def test_df_reset_multi_index(scalars_df_index, scalars_pandas_df_index, level, if pd_result.index.dtype != bf_result.index.dtype: pd_result.index = pd_result.index.astype(bf_result.index.dtype) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -160,9 +160,9 @@ def test_series_reset_multi_index( pd_result.index = pd_result.index.astype(pandas.Int64Dtype()) if drop: - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) else: - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_series_multi_index_idxmin(scalars_df_index, scalars_pandas_df_index): @@ -187,7 +187,7 @@ def test_binop_series_series_matching_multi_indices( bf_result = bf_left["int64_col"] + bf_right["int64_too"] pd_result = pd_left["int64_col"] + pd_right["int64_too"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.sort_index().to_pandas(), pd_result.sort_index() ) @@ -203,7 +203,7 @@ def test_binop_df_series_matching_multi_indices( bf_result = bf_left[["int64_col", "int64_too"]].add(bf_right["int64_too"], axis=0) pd_result = pd_left[["int64_col", "int64_too"]].add(pd_right["int64_too"], axis=0) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.sort_index().to_pandas(), pd_result.sort_index() ) @@ -217,7 +217,7 @@ def test_binop_multi_index_mono_index(scalars_df_index, scalars_pandas_df_index) bf_result = bf_left["int64_col"] + bf_right["int64_too"] pd_result = pd_left["int64_col"] + pd_right["int64_too"] - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) def test_binop_overlapping_multi_indices(scalars_df_index, scalars_pandas_df_index): @@ -229,7 +229,7 @@ def test_binop_overlapping_multi_indices(scalars_df_index, scalars_pandas_df_ind bf_result = bf_left["int64_col"] + bf_right["int64_too"] pd_result = pd_left["int64_col"] + pd_right["int64_too"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.sort_index().to_pandas(), pd_result.sort_index() ) @@ -245,7 +245,7 @@ def test_concat_compatible_multi_indices(scalars_df_index, scalars_pandas_df_ind bf_result = bpd.concat([bf_left, bf_right]) pd_result = pandas.concat([pd_left, pd_right]) - bigframes.testing.assert_frame_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result.to_pandas(), pd_result) def test_concat_multi_indices_ignore_index(scalars_df_index, scalars_pandas_df_index): @@ -260,7 +260,7 @@ def test_concat_multi_indices_ignore_index(scalars_df_index, scalars_pandas_df_i # Pandas uses int64 instead of Int64 (nullable) dtype. pd_result.index = pd_result.index.astype(pandas.Int64Dtype()) - bigframes.testing.assert_frame_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -277,7 +277,7 @@ def test_multi_index_loc_multi_row(scalars_df_index, scalars_pandas_df_index, ke ) pd_result = scalars_pandas_df_index.set_index(["int64_too", "string_col"]).loc[key] - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_multi_index_loc_single_row(scalars_df_index, scalars_pandas_df_index): @@ -288,7 +288,7 @@ def test_multi_index_loc_single_row(scalars_df_index, scalars_pandas_df_index): (2, "capitalize, This ") ] - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_multi_index_getitem_bool(scalars_df_index, scalars_pandas_df_index): @@ -298,7 +298,7 @@ def test_multi_index_getitem_bool(scalars_df_index, scalars_pandas_df_index): bf_result = bf_frame[bf_frame["int64_col"] > 0].to_pandas() pd_result = pd_frame[pd_frame["int64_col"] > 0] - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -318,7 +318,7 @@ def test_df_multi_index_droplevel(scalars_df_index, scalars_pandas_df_index, lev bf_result = bf_frame.droplevel(level).to_pandas() pd_result = pd_frame.droplevel(level) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -338,7 +338,7 @@ def test_series_multi_index_droplevel(scalars_df_index, scalars_pandas_df_index, bf_result = bf_frame["string_col"].droplevel(level).to_pandas() pd_result = pd_frame["string_col"].droplevel(level) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -357,7 +357,7 @@ def test_multi_index_drop(scalars_df_index, scalars_pandas_df_index, labels, lev bf_result = bf_frame.drop(labels=labels, axis="index", level=level).to_pandas() pd_result = pd_frame.drop(labels=labels, axis="index", level=level) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -382,7 +382,7 @@ def test_df_multi_index_reorder_levels( bf_result = bf_frame.reorder_levels(order).to_pandas() pd_result = pd_frame.reorder_levels(order) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -407,7 +407,7 @@ def test_series_multi_index_reorder_levels( bf_result = bf_frame["string_col"].reorder_levels(order).to_pandas() pd_result = pd_frame["string_col"].reorder_levels(order) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_df_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index): @@ -417,7 +417,7 @@ def test_df_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index): bf_result = bf_frame.swaplevel().to_pandas() pd_result = pd_frame.swaplevel() - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_series_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index): @@ -427,7 +427,7 @@ def test_series_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index) bf_result = bf_frame["string_col"].swaplevel(0, 2).to_pandas() pd_result = pd_frame["string_col"].swaplevel(0, 2) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_multi_index_series_groupby(scalars_df_index, scalars_pandas_df_index): @@ -443,7 +443,7 @@ def test_multi_index_series_groupby(scalars_df_index, scalars_pandas_df_index): pd_frame["float64_col"].groupby([pd_frame.int64_col % 2, "bool_col"]).mean() ) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -470,7 +470,7 @@ def test_multi_index_series_groupby_level( .mean() ) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_multi_index_dataframe_groupby(scalars_df_index, scalars_pandas_df_index): @@ -485,7 +485,7 @@ def test_multi_index_dataframe_groupby(scalars_df_index, scalars_pandas_df_index numeric_only=True ) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -521,7 +521,9 @@ def test_multi_index_dataframe_groupby_level_aggregate( bf_result = bf_result.drop(col, axis=1) # Pandas will have int64 index, while bigquery will have Int64 when resetting - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_frame_equal( + bf_result, pd_result, check_index_type=False + ) @pytest.mark.parametrize( @@ -554,7 +556,7 @@ def test_multi_index_dataframe_groupby_level_analytic( .cumsum(numeric_only=True) ) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) all_joins = pytest.mark.parametrize( @@ -584,7 +586,7 @@ def test_multi_index_dataframe_join(scalars_dfs, how): (["bool_col", "rowindex_2"]) )[["float64_col"]] pd_result = pd_df_a.join(pd_df_b, how=how) - bigframes.testing.assert_frame_equal(bf_result, pd_result, ignore_order=True) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, ignore_order=True) @all_joins @@ -605,7 +607,7 @@ def test_multi_index_dataframe_join_on(scalars_dfs, how): pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2) pd_df_b = pd_df[["float64_col"]] pd_result = pd_df_a.join(pd_df_b, on="rowindex_2", how=how) - bigframes.testing.assert_frame_equal(bf_result, pd_result, ignore_order=True) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, ignore_order=True) def test_multi_index_dataframe_where_series_cond_none_other( @@ -633,7 +635,7 @@ def test_multi_index_dataframe_where_series_cond_none_other( bf_result = dataframe_bf.where(series_cond_bf).to_pandas() pd_result = dataframe_pd.where(series_cond_pd) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, @@ -669,7 +671,7 @@ def test_multi_index_dataframe_where_series_cond_dataframe_other( bf_result = dataframe_bf.where(series_cond_bf, dataframe_other_bf).to_pandas() pd_result = dataframe_pd.where(series_cond_pd, dataframe_other_pd) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, @@ -701,7 +703,7 @@ def test_multi_index_dataframe_where_dataframe_cond_constant_other( bf_result = dataframe_bf.where(dataframe_cond_bf, other).to_pandas() pd_result = dataframe_pd.where(dataframe_cond_pd, other) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, @@ -734,7 +736,7 @@ def test_multi_index_dataframe_where_dataframe_cond_dataframe_other( bf_result = dataframe_bf.where(dataframe_cond_bf, dataframe_other_bf).to_pandas() pd_result = dataframe_pd.where(dataframe_cond_pd, dataframe_other_pd) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, @@ -766,7 +768,7 @@ def test_multi_index_series_groupby_level_aggregate( .mean() ) - bigframes.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result, check_dtype=False) @pytest.mark.parametrize( @@ -793,7 +795,7 @@ def test_multi_index_series_groupby_level_analytic( .cumsum() ) - bigframes.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result, check_dtype=False) def test_multi_index_series_rename_dict_same_type( @@ -808,7 +810,7 @@ def test_multi_index_series_rename_dict_same_type( "string_col" ].rename({1: 100, 2: 200}) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -826,7 +828,7 @@ def test_multi_index_df_reindex(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index.set_index(["rowindex_2", "string_col"]).reindex( index=new_index ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -844,15 +846,15 @@ def test_column_multi_index_getitem(scalars_df_index, scalars_pandas_df_index): bf_a = bf_df["a"].to_pandas() pd_a = pd_df["a"] - bigframes.testing.assert_frame_equal(bf_a, pd_a) + bigframes.testing.utils.assert_frame_equal(bf_a, pd_a) bf_b = bf_df["b"].to_pandas() pd_b = pd_df["b"] - bigframes.testing.assert_frame_equal(bf_b, pd_b) + bigframes.testing.utils.assert_frame_equal(bf_b, pd_b) bf_fullkey = bf_df[("a", "int64_too")].to_pandas() pd_fullkey = pd_df[("a", "int64_too")] - bigframes.testing.assert_series_equal(bf_fullkey, pd_fullkey) + bigframes.testing.utils.assert_series_equal(bf_fullkey, pd_fullkey) def test_column_multi_index_concat(scalars_df_index, scalars_pandas_df_index): @@ -877,7 +879,7 @@ def test_column_multi_index_concat(scalars_df_index, scalars_pandas_df_index): bf_result = bpd.concat([bf_df1, bf_df2, bf_df1]).to_pandas() pd_result = pandas.concat([pd_df1, pd_df2, pd_df1]) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_column_multi_index_drop(scalars_df_index, scalars_pandas_df_index): @@ -890,7 +892,7 @@ def test_column_multi_index_drop(scalars_df_index, scalars_pandas_df_index): bf_a = bf_df.drop(("a", "int64_too"), axis=1).to_pandas() pd_a = pd_df.drop(("a", "int64_too"), axis=1) - bigframes.testing.assert_frame_equal(bf_a, pd_a) + bigframes.testing.utils.assert_frame_equal(bf_a, pd_a) @pytest.mark.parametrize( @@ -914,7 +916,7 @@ def test_column_multi_index_assign(scalars_df_index, scalars_pandas_df_index, ke pd_result = pd_df.assign(**kwargs) # Pandas assign results in non-nullable dtype - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) def test_column_multi_index_rename(scalars_df_index, scalars_pandas_df_index): @@ -928,7 +930,7 @@ def test_column_multi_index_rename(scalars_df_index, scalars_pandas_df_index): bf_result = bf_df.rename(columns={"b": "c"}).to_pandas() pd_result = pd_df.rename(columns={"b": "c"}) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -958,7 +960,7 @@ def test_column_multi_index_reset_index( # Pandas uses int64 instead of Int64 (nullable) dtype. pd_result.index = pd_result.index.astype(pandas.Int64Dtype()) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_column_multi_index_binary_op(scalars_df_index, scalars_pandas_df_index): @@ -972,7 +974,7 @@ def test_column_multi_index_binary_op(scalars_df_index, scalars_pandas_df_index) bf_result = (bf_df[("a", "a")] + 3).to_pandas() pd_result = pd_df[("a", "a")] + 3 - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_column_multi_index_any(): @@ -989,7 +991,7 @@ def test_column_multi_index_any(): pd_result = pd_df.isna().any() bf_result = bf_df.isna().any().to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result.reset_index(drop=False), pd_result.reset_index(drop=False), check_dtype=False, @@ -1009,7 +1011,9 @@ def test_column_multi_index_agg(scalars_df_index, scalars_pandas_df_index): # Pandas may produce narrower numeric types, but bigframes always produces Float64 pd_result = pd_result.astype("Float64") - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_frame_equal( + bf_result, pd_result, check_index_type=False + ) def test_column_multi_index_prefix_suffix(scalars_df_index, scalars_pandas_df_index): @@ -1023,7 +1027,7 @@ def test_column_multi_index_prefix_suffix(scalars_df_index, scalars_pandas_df_in bf_result = bf_df.add_prefix("prefixed_").add_suffix("_suffixed").to_pandas() pd_result = pd_df.add_prefix("prefixed_").add_suffix("_suffixed") - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_column_multi_index_cumsum(scalars_df_index, scalars_pandas_df_index): @@ -1039,7 +1043,7 @@ def test_column_multi_index_cumsum(scalars_df_index, scalars_pandas_df_index): bf_result = bf_df.cumsum().to_pandas() pd_result = pd_df.cumsum() - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) @pytest.mark.parametrize( @@ -1072,7 +1076,7 @@ def test_column_multi_index_stack(level): # Pandas produces NaN, where bq dataframes produces pd.NA # Column ordering seems to depend on pandas version assert isinstance(pd_result, pandas.DataFrame) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -1100,7 +1104,7 @@ def test_column_multi_index_melt(): pd_result = pd_df.melt() # BigFrames uses different string and int types, but values are identical - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_index_type=False, check_dtype=False ) @@ -1122,7 +1126,7 @@ def test_column_multi_index_unstack(scalars_df_index, scalars_pandas_df_index): # Pandas produces NaN, where bq dataframes produces pd.NA # Column ordering seems to depend on pandas version - bigframes.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result, check_dtype=False) def test_corr_w_multi_index(scalars_df_index, scalars_pandas_df_index): @@ -1143,7 +1147,7 @@ def test_corr_w_multi_index(scalars_df_index, scalars_pandas_df_index): # BigFrames and Pandas differ in their data type handling: # - Column types: BigFrames uses Float64, Pandas uses float64. # - Index types: BigFrames uses strign, Pandas uses object. - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -1166,7 +1170,7 @@ def test_cov_w_multi_index(scalars_df_index, scalars_pandas_df_index): # BigFrames and Pandas differ in their data type handling: # - Column types: BigFrames uses Float64, Pandas uses float64. # - Index types: BigFrames uses string, Pandas uses object. - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -1245,7 +1249,7 @@ def test_column_multi_index_droplevel(scalars_df_index, scalars_pandas_df_index) bf_result = bf_df.droplevel(1, axis=1).to_pandas() pd_result = pd_df.droplevel(1, axis=1) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_df_column_multi_index_reindex(scalars_df_index, scalars_pandas_df_index): @@ -1267,7 +1271,7 @@ def test_df_column_multi_index_reindex(scalars_df_index, scalars_pandas_df_index # Pandas uses float64 as default for newly created empty column, bf uses Float64 pd_result[("z", "a")] = pd_result[("z", "a")].astype(pandas.Float64Dtype()) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, ) @@ -1286,7 +1290,7 @@ def test_column_multi_index_reorder_levels(scalars_df_index, scalars_pandas_df_i bf_result = bf_df.reorder_levels([-2, -1, 0], axis=1).to_pandas() pd_result = pd_df.reorder_levels([-2, -1, 0], axis=1) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -1303,7 +1307,7 @@ def test_df_multi_index_unstack(hockey_df, hockey_pandas_df, level): ["team_name", "position"], append=True ).unstack(level=level) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) @pytest.mark.parametrize( @@ -1320,7 +1324,7 @@ def test_series_multi_index_unstack(hockey_df, hockey_pandas_df, level): "number" ].unstack(level=level) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) def test_column_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index): @@ -1336,7 +1340,7 @@ def test_column_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index) bf_result = bf_df.swaplevel(-3, -1, axis=1).to_pandas() pd_result = pd_df.swaplevel(-3, -1, axis=1) - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_df_multi_index_dot_not_supported(): @@ -1410,7 +1414,7 @@ def test_explode_w_column_multi_index(): assert isinstance(pd_df, pandas.DataFrame) assert isinstance(pd_df["col0"], pandas.DataFrame) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( df["col0"].explode("col00").to_pandas(), pd_df["col0"].explode("col00"), check_dtype=False, @@ -1428,7 +1432,7 @@ def test_explode_w_multi_index(): df = bpd.DataFrame(data, index=multi_index, columns=columns) pd_df = df.to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( df.explode("col00").to_pandas(), pd_df.explode("col00"), check_dtype=False, @@ -1452,7 +1456,7 @@ def test_column_multi_index_w_na_stack(scalars_df_index, scalars_pandas_df_index # Pandas produces pd.NA, where bq dataframes produces NaN pd_result["c"] = pd_result["c"].replace(pandas.NA, np.nan) - bigframes.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, check_dtype=False) @pytest.mark.parametrize( @@ -1483,6 +1487,6 @@ def test_multiindex_eq_const(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.set_index(col_name).index == (2, False) pd_result = scalars_pandas_df_index.set_index(col_name).index == (2, False) - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( pandas.Index(pd_result, dtype="boolean"), bf_result.to_pandas() ) diff --git a/tests/system/small/test_numpy.py b/tests/system/small/test_numpy.py index d04fb81a0a..9f2d02bbbc 100644 --- a/tests/system/small/test_numpy.py +++ b/tests/system/small/test_numpy.py @@ -47,7 +47,9 @@ def test_series_ufuncs(floats_pd, floats_bf, opname): bf_result = getattr(np, opname)(floats_bf).to_pandas() pd_result = getattr(np, opname)(floats_pd) - bigframes.testing.assert_series_equal(bf_result, pd_result, nulls_are_nan=True) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, nulls_are_nan=True + ) @pytest.mark.parametrize( @@ -81,7 +83,7 @@ def test_df_ufuncs(scalars_dfs, opname): ): pd_result["int64_col"] = pd_result["int64_col"].astype(pd.Float64Dtype()) - bigframes.testing.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) @pytest.mark.parametrize( @@ -101,7 +103,7 @@ def test_df_binary_ufuncs(scalars_dfs, opname): bf_result = op(scalars_df[["float64_col", "int64_col"]], 5.1).to_pandas() pd_result = op(scalars_pandas_df[["float64_col", "int64_col"]], 5.1) - bigframes.testing.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) # Operations tested here don't work on full dataframe in numpy+pandas @@ -133,7 +135,9 @@ def test_series_binary_ufuncs(scalars_dfs, x, y, opname): bf_result = op(scalars_df[x], scalars_df[y]).to_pandas() pd_result = op(scalars_pandas_df[x], scalars_pandas_df[y]) - bigframes.testing.assert_series_equal(bf_result, pd_result, nulls_are_nan=True) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, nulls_are_nan=True + ) def test_series_binary_ufuncs_reverse(scalars_dfs): @@ -143,7 +147,9 @@ def test_series_binary_ufuncs_reverse(scalars_dfs): bf_result = np.subtract(5.1, scalars_df["int64_col"]).to_pandas() pd_result = np.subtract(5.1, scalars_pandas_df["int64_col"]) - bigframes.testing.assert_series_equal(bf_result, pd_result, nulls_are_nan=True) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, nulls_are_nan=True + ) def test_df_binary_ufuncs_reverse(scalars_dfs): @@ -156,4 +162,4 @@ def test_df_binary_ufuncs_reverse(scalars_dfs): scalars_pandas_df[["float64_col", "int64_col"]], ) - bigframes.testing.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result, nulls_are_nan=True) diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py index d83955ecde..33c7364b5e 100644 --- a/tests/system/small/test_pandas.py +++ b/tests/system/small/test_pandas.py @@ -65,7 +65,7 @@ def test_concat_series(scalars_dfs): ] ) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -602,7 +602,7 @@ def test_cut_for_array(): bf_result = bpd.cut(sc, x) pd_result = _convert_pandas_category(pd_result) - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -621,7 +621,7 @@ def test_cut_by_int_bins(scalars_dfs, labels, right): bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=labels, right=right) pd_result = _convert_pandas_category(pd_result) - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) def test_cut_by_int_bins_w_labels(scalars_dfs): @@ -632,7 +632,7 @@ def test_cut_by_int_bins_w_labels(scalars_dfs): bf_result = bpd.cut(scalars_df["float64_col"], 5, labels=labels) pd_result = _convert_pandas_category(pd_result) - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -675,7 +675,7 @@ def test_cut_by_numeric_breaks(scalars_dfs, breaks, right, labels): ).to_pandas() pd_result_converted = _convert_pandas_category(pd_result) - bigframes.testing.assert_series_equal(bf_result, pd_result_converted) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result_converted) def test_cut_by_numeric_breaks_w_labels(scalars_dfs): @@ -687,7 +687,7 @@ def test_cut_by_numeric_breaks_w_labels(scalars_dfs): bf_result = bpd.cut(scalars_df["float64_col"], bins, labels=labels) pd_result = _convert_pandas_category(pd_result) - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -727,7 +727,7 @@ def test_cut_by_interval_bins(scalars_dfs, bins, right, labels): pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=labels, right=right) pd_result_converted = _convert_pandas_category(pd_result) - bigframes.testing.assert_series_equal(bf_result, pd_result_converted) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result_converted) def test_cut_by_interval_bins_w_labels(scalars_dfs): @@ -739,7 +739,7 @@ def test_cut_by_interval_bins_w_labels(scalars_dfs): bf_result = bpd.cut(scalars_df["float64_col"], bins, labels=labels) pd_result = _convert_pandas_category(pd_result) - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -756,7 +756,7 @@ def test_cut_by_edge_cases_bins(scalars_dfs, bins, labels): pd_result = pd.cut(scalars_pandas_df["int64_too"], bins, labels=labels) pd_result_converted = _convert_pandas_category(pd_result) - bigframes.testing.assert_series_equal(bf_result, pd_result_converted) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result_converted) def test_cut_empty_array_raises_error(): @@ -785,7 +785,7 @@ def test_qcut(scalars_dfs, q): bf_result = bpd.qcut(scalars_df["float64_col"], q, labels=False, duplicates="drop") pd_result = pd_result.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -832,7 +832,7 @@ def test_to_datetime_iterable(arg, utc, unit, format): .dt.floor("us") .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -846,7 +846,7 @@ def test_to_datetime_series(scalars_dfs): pd_result = pd.Series(pd.to_datetime(scalars_pandas_df[col], unit="s")).astype( "datetime64[s]" ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -872,7 +872,7 @@ def test_to_datetime_unit_param(arg, unit): .dt.floor("us") .astype("datetime64[ns]") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -897,7 +897,7 @@ def test_to_datetime_format_param(arg, utc, format): .dt.floor("us") .astype("datetime64[ns, UTC]" if utc else "datetime64[ns]") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -955,7 +955,7 @@ def test_to_datetime_string_inputs(arg, utc, output_in_utc, format): .astype(normalized_type) ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -999,7 +999,7 @@ def test_to_datetime_timestamp_inputs(arg, utc, output_in_utc): pd.Series(pd.to_datetime(arg, utc=utc)).dt.floor("us").astype(normalized_type) ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, check_names=False ) @@ -1064,7 +1064,7 @@ def test_to_timedelta_with_bf_float_series_value_rounded_down(session): expected_result = pd.Series([pd.Timedelta(1, "us"), pd.Timedelta(2, "us")]).astype( "timedelta64[ns]" ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -1085,7 +1085,7 @@ def test_to_timedelta_with_list_like_input(session, input): ) expected_result = pd.Series(pd.to_timedelta(input, "s")).astype("timedelta64[ns]") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) @@ -1116,6 +1116,6 @@ def test_to_timedelta_on_timedelta_series__should_be_no_op(scalars_dfs): ) expected_result = pd.to_timedelta(pd_series, unit="s").astype("timedelta64[ns]") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_index_type=False ) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 51d0cc61f0..90d3b9f819 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -48,7 +48,7 @@ def test_series_construct_copy(scalars_dfs): pd_result = pd.Series( scalars_pandas_df["int64_col"], name="test_series", dtype="Float64" ) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_nullable_ints(): @@ -63,7 +63,7 @@ def test_series_construct_nullable_ints(): ) expected = pd.Series([1, 3, pd.NA], dtype=pd.Int64Dtype(), index=expected_index) - bigframes.testing.assert_series_equal(bf_result, expected) + bigframes.testing.utils.assert_series_equal(bf_result, expected) def test_series_construct_timestamps(): @@ -75,7 +75,9 @@ def test_series_construct_timestamps(): bf_result = series.Series(datetimes).to_pandas() pd_result = pd.Series(datetimes, dtype=pd.ArrowDtype(pa.timestamp("us"))) - bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, check_index_type=False + ) def test_series_construct_copy_with_index(scalars_dfs): @@ -92,7 +94,7 @@ def test_series_construct_copy_with_index(scalars_dfs): dtype="Float64", index=scalars_pandas_df["int64_too"], ) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_copy_index(scalars_dfs): @@ -109,7 +111,7 @@ def test_series_construct_copy_index(scalars_dfs): dtype="Float64", index=scalars_pandas_df["int64_too"], ) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_pandas(scalars_dfs): @@ -121,7 +123,7 @@ def test_series_construct_pandas(scalars_dfs): scalars_pandas_df["int64_col"], name="test_series", dtype="Float64" ) assert bf_result.shape == pd_result.shape - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) def test_series_construct_from_list(): @@ -131,7 +133,7 @@ def test_series_construct_from_list(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_reindex(): @@ -142,7 +144,7 @@ def test_series_construct_reindex(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_from_list_w_index(): @@ -156,7 +158,7 @@ def test_series_construct_from_list_w_index(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_empty(session: bigframes.Session): @@ -177,7 +179,7 @@ def test_series_construct_scalar_no_index(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_scalar_w_index(): @@ -189,7 +191,7 @@ def test_series_construct_scalar_w_index(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_nan(): @@ -199,7 +201,7 @@ def test_series_construct_nan(): pd_result.index = pd_result.index.astype("Int64") pd_result = pd_result.astype("Float64") - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_scalar_w_bf_index(): @@ -210,7 +212,7 @@ def test_series_construct_scalar_w_bf_index(): pd_result = pd_result.astype("string[pyarrow]") - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_construct_from_list_escaped_strings(): @@ -226,7 +228,7 @@ def test_series_construct_from_list_escaped_strings(): # BigQuery DataFrame default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) def test_series_construct_geodata(): @@ -241,7 +243,7 @@ def test_series_construct_geodata(): series = bigframes.pandas.Series(pd_series) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_series, series.to_pandas(), check_index_type=False ) @@ -259,7 +261,7 @@ def test_series_construct_w_dtype(dtype): expected = pd.Series(data, dtype=dtype) expected.index = expected.index.astype("Int64") series = bigframes.pandas.Series(data, dtype=dtype) - bigframes.testing.assert_series_equal(series.to_pandas(), expected) + bigframes.testing.utils.assert_series_equal(series.to_pandas(), expected) def test_series_construct_w_dtype_for_struct(): @@ -276,7 +278,7 @@ def test_series_construct_w_dtype_for_struct(): series = bigframes.pandas.Series(data, dtype=dtype) expected = pd.Series(data, dtype=dtype) expected.index = expected.index.astype("Int64") - bigframes.testing.assert_series_equal(series.to_pandas(), expected) + bigframes.testing.utils.assert_series_equal(series.to_pandas(), expected) def test_series_construct_w_dtype_for_array_string(): @@ -294,7 +296,7 @@ def test_series_construct_w_dtype_for_array_string(): else: check_dtype = False - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( series.to_pandas(), expected, check_dtype=check_dtype ) @@ -314,7 +316,7 @@ def test_series_construct_w_dtype_for_array_struct(): else: check_dtype = False - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( series.to_pandas(), expected, check_dtype=check_dtype ) @@ -324,7 +326,7 @@ def test_series_construct_local_unordered_has_sequential_index(unordered_session ["Sun", "Mon", "Tues", "Wed", "Thurs", "Fri", "Sat"], session=unordered_session ) expected: pd.Index = pd.Index([0, 1, 2, 3, 4, 5, 6], dtype=pd.Int64Dtype()) - bigframes.testing.assert_index_equal(series.index.to_pandas(), expected) + bigframes.testing.utils.assert_index_equal(series.index.to_pandas(), expected) @pytest.mark.parametrize( @@ -386,14 +388,14 @@ def test_series_construct_w_nested_json_dtype(): ), ) - bigframes.testing.assert_series_equal(s.to_pandas(), s2.to_pandas()) + bigframes.testing.utils.assert_series_equal(s.to_pandas(), s2.to_pandas()) def test_series_keys(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs bf_result = scalars_df["int64_col"].keys().to_pandas() pd_result = scalars_pandas_df["int64_col"].keys() - bigframes.testing.assert_index_equal(bf_result, pd_result) + bigframes.testing.utils.assert_index_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -538,7 +540,7 @@ def test_series___getitem__(scalars_dfs, index_col, key): scalars_pandas_df = scalars_pandas_df.set_index(index_col, drop=False) bf_result = scalars_df[col_name][key] pd_result = scalars_pandas_df[col_name][key] - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -592,7 +594,7 @@ def test_series___setitem__(scalars_dfs, index_col, key, value): bf_series[key] = value pd_series[key] = value - bigframes.testing.assert_series_equal(bf_series.to_pandas(), pd_series) + bigframes.testing.utils.assert_series_equal(bf_series.to_pandas(), pd_series) @pytest.mark.parametrize( @@ -617,7 +619,7 @@ def test_series___setitem___with_int_key_numeric(scalars_dfs, key, value): bf_series[key] = value pd_series[key] = value - bigframes.testing.assert_series_equal(bf_series.to_pandas(), pd_series) + bigframes.testing.utils.assert_series_equal(bf_series.to_pandas(), pd_series) def test_series___setitem___with_default_index(scalars_dfs): @@ -714,7 +716,7 @@ def test_series_replace_scalar_scalar(scalars_dfs): ) pd_result = scalars_pandas_df[col_name].replace("Hello, World!", "Howdy, Planet!") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -730,7 +732,7 @@ def test_series_replace_regex_scalar(scalars_dfs): "^H.l", "Howdy, Planet!", regex=True ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -748,7 +750,7 @@ def test_series_replace_list_scalar(scalars_dfs): ["Hello, World!", "T"], "Howdy, Planet!" ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -760,7 +762,7 @@ def test_series_replace_nans_with_pd_na(scalars_dfs): bf_result = scalars_df[col_name].replace({pd.NA: "UNKNOWN"}).to_pandas() pd_result = scalars_pandas_df[col_name].replace({pd.NA: "UNKNOWN"}) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -785,7 +787,7 @@ def test_series_replace_dict(scalars_dfs, replacement_dict): bf_result = scalars_df[col_name].replace(replacement_dict).to_pandas() pd_result = scalars_pandas_df[col_name].replace(replacement_dict) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -841,7 +843,9 @@ def test_series_dropna(scalars_dfs, ignore_index): col_name = "string_col" bf_result = scalars_df[col_name].dropna(ignore_index=ignore_index).to_pandas() pd_result = scalars_pandas_df[col_name].dropna(ignore_index=ignore_index) - bigframes.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + pd_result, bf_result, check_index_type=False + ) @pytest.mark.parametrize( @@ -877,7 +881,9 @@ def test_series_agg_multi_string(scalars_dfs): # Pandas may produce narrower numeric types, but bigframes always produces Float64 pd_result = pd_result.astype("Float64") - bigframes.testing.assert_series_equal(pd_result, bf_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + pd_result, bf_result, check_index_type=False + ) @pytest.mark.parametrize( @@ -994,7 +1000,7 @@ def test_mode_stat(scalars_df_index, scalars_pandas_df_index, col_name): ## Mode implicitly resets index, and bigframes default indices use nullable Int64 pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -1162,7 +1168,7 @@ def test_mods(scalars_dfs, col_x, col_y, method): else: bf_result = bf_series.astype("Float64").to_pandas() pd_result = getattr(scalars_pandas_df[col_x], method)(scalars_pandas_df[col_y]) - bigframes.testing.assert_series_equal(pd_result, bf_result) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result) # We work around a pandas bug that doesn't handle correlating nullable dtypes by doing this @@ -1227,16 +1233,20 @@ def test_divmods_series(scalars_dfs, col_x, col_y, method): ) # BigQuery's mod functions return NUMERIC values for non-INT64 inputs. if bf_div_result.dtype == pd.Int64Dtype(): - bigframes.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas()) + bigframes.testing.utils.assert_series_equal( + pd_div_result, bf_div_result.to_pandas() + ) else: - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_div_result, bf_div_result.astype("Float64").to_pandas() ) if bf_mod_result.dtype == pd.Int64Dtype(): - bigframes.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas()) + bigframes.testing.utils.assert_series_equal( + pd_mod_result, bf_mod_result.to_pandas() + ) else: - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_mod_result, bf_mod_result.astype("Float64").to_pandas() ) @@ -1268,16 +1278,20 @@ def test_divmods_scalars(scalars_dfs, col_x, other, method): pd_div_result, pd_mod_result = getattr(scalars_pandas_df[col_x], method)(other) # BigQuery's mod functions return NUMERIC values for non-INT64 inputs. if bf_div_result.dtype == pd.Int64Dtype(): - bigframes.testing.assert_series_equal(pd_div_result, bf_div_result.to_pandas()) + bigframes.testing.utils.assert_series_equal( + pd_div_result, bf_div_result.to_pandas() + ) else: - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_div_result, bf_div_result.astype("Float64").to_pandas() ) if bf_mod_result.dtype == pd.Int64Dtype(): - bigframes.testing.assert_series_equal(pd_mod_result, bf_mod_result.to_pandas()) + bigframes.testing.utils.assert_series_equal( + pd_mod_result, bf_mod_result.to_pandas() + ) else: - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_mod_result, bf_mod_result.astype("Float64").to_pandas() ) @@ -1350,7 +1364,7 @@ def test_series_add_different_table_default_index( + scalars_df_2_default_index["float64_col"].to_pandas() ) # TODO(swast): Can remove sort_index() when there's default ordering. - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.sort_index(), pd_result.sort_index() ) @@ -1363,7 +1377,7 @@ def test_series_add_different_table_with_index( # When index values are unique, we can emulate with values from the same # DataFrame. pd_result = scalars_pandas_df["float64_col"] + scalars_pandas_df["int64_col"] - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) def test_reset_index_drop(scalars_df_index, scalars_pandas_df_index): @@ -1382,7 +1396,7 @@ def test_reset_index_drop(scalars_df_index, scalars_pandas_df_index): # BigQuery DataFrames default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) def test_series_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df_index): @@ -1401,7 +1415,7 @@ def test_series_reset_index_allow_duplicates(scalars_df_index, scalars_pandas_df pd_result.index = pd_result.index.astype(pd.Int64Dtype()) # reset_index should maintain the original ordering. - bigframes.testing.assert_frame_equal(bf_result, pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result, pd_result) def test_series_reset_index_duplicates_error(scalars_df_index): @@ -1420,7 +1434,7 @@ def test_series_reset_index_inplace(scalars_df_index, scalars_pandas_df_index): # BigQuery DataFrames default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) @pytest.mark.parametrize( @@ -1447,7 +1461,7 @@ def test_reset_index_no_drop(scalars_df_index, scalars_pandas_df_index, name): # BigQuery DataFrames default indices use nullable Int64 always pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_frame_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_frame_equal(bf_result.to_pandas(), pd_result) def test_copy(scalars_df_index, scalars_pandas_df_index): @@ -1464,7 +1478,7 @@ def test_copy(scalars_df_index, scalars_pandas_df_index): pd_series.loc[0] = 3.4 assert bf_copy.to_pandas().loc[0] != bf_series.to_pandas().loc[0] - bigframes.testing.assert_series_equal(bf_copy.to_pandas(), pd_copy) + bigframes.testing.utils.assert_series_equal(bf_copy.to_pandas(), pd_copy) def test_isin_raise_error(scalars_df_index, scalars_pandas_df_index): @@ -1505,7 +1519,7 @@ def test_isin(scalars_dfs, col_name, test_set): scalars_df, scalars_pandas_df = scalars_dfs bf_result = scalars_df[col_name].isin(test_set).to_pandas() pd_result = scalars_pandas_df[col_name].isin(test_set).astype("boolean") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -1545,7 +1559,7 @@ def test_isin_bigframes_values(scalars_dfs, col_name, test_set, session): scalars_df[col_name].isin(series.Series(test_set, session=session)).to_pandas() ) pd_result = scalars_pandas_df[col_name].isin(test_set).astype("boolean") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -1563,7 +1577,7 @@ def test_isin_bigframes_index(scalars_dfs, session): .isin(pd.Index(["Hello, World!", "Hi", "こんにちは"])) .astype("boolean") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result, ) @@ -1608,7 +1622,7 @@ def test_isin_bigframes_values_as_predicate( pd_predicate = scalars_pandas_df[col_name].isin(test_set) pd_result = scalars_pandas_df[pd_predicate] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( pd_result.reset_index(), bf_result.reset_index(), ) @@ -1709,10 +1723,10 @@ def test_loc_setitem_cell(scalars_df_index, scalars_pandas_df_index): pd_series.loc[2] = "This value isn't in the test data." bf_result = bf_series.to_pandas() pd_result = pd_series - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) # Per Copy-on-Write semantics, other references to the original DataFrame # should remain unchanged. - bigframes.testing.assert_series_equal(bf_original.to_pandas(), pd_original) + bigframes.testing.utils.assert_series_equal(bf_original.to_pandas(), pd_original) def test_at_setitem_row_label_scalar(scalars_dfs): @@ -1723,7 +1737,7 @@ def test_at_setitem_row_label_scalar(scalars_dfs): pd_series.at[1] = 1000 bf_result = bf_series.to_pandas() pd_result = pd_series.astype("Int64") - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_ne_obj_series(scalars_dfs): @@ -2007,7 +2021,7 @@ def test_series_quantile(scalars_dfs): pd_result = pd_series.quantile([0.0, 0.4, 0.6, 1.0]) bf_result = bf_series.quantile([0.0, 0.4, 0.6, 1.0]) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result.to_pandas(), check_dtype=False, check_index_type=False ) @@ -2056,7 +2070,7 @@ def test_cumprod(scalars_dfs): col_name = "float64_col" bf_result = scalars_df[col_name].cumprod() pd_result = scalars_pandas_df[col_name].cumprod() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_result, bf_result.to_pandas(), ) @@ -2157,7 +2171,7 @@ def test_groupby_level_sum(scalars_dfs): bf_series = scalars_df[col_name].groupby(level=0).sum() pd_series = scalars_pandas_df[col_name].groupby(level=0).sum() # TODO(swast): Update groupby to use index based on group by key(s). - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_series.sort_index(), bf_series.to_pandas().sort_index(), ) @@ -2171,7 +2185,7 @@ def test_groupby_level_list_sum(scalars_dfs): bf_series = scalars_df[col_name].groupby(level=["rowindex"]).sum() pd_series = scalars_pandas_df[col_name].groupby(level=["rowindex"]).sum() # TODO(swast): Update groupby to use index based on group by key(s). - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_series.sort_index(), bf_series.to_pandas().sort_index(), ) @@ -2288,7 +2302,7 @@ def test_groupby_window_ops(scalars_df_index, scalars_pandas_df_index, operator) scalars_pandas_df_index[col_name].groupby(scalars_pandas_df_index[group_key]) ).astype(bf_series.dtype) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_series, bf_series, ) @@ -2304,7 +2318,7 @@ def test_groupby_window_ops(scalars_df_index, scalars_pandas_df_index, operator) def test_drop_label(scalars_df_index, scalars_pandas_df_index, label, col_name): bf_series = scalars_df_index[col_name].drop(label).to_pandas() pd_series = scalars_pandas_df_index[col_name].drop(label) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_series, bf_series, ) @@ -2314,7 +2328,7 @@ def test_drop_label_list(scalars_df_index, scalars_pandas_df_index): col_name = "int64_col" bf_series = scalars_df_index[col_name].drop([1, 3]).to_pandas() pd_series = scalars_pandas_df_index[col_name].drop([1, 3]) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_series, bf_series, ) @@ -2338,7 +2352,7 @@ def test_drop_label_list(scalars_df_index, scalars_pandas_df_index): def test_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, col_name): bf_series = scalars_df_index[col_name].drop_duplicates(keep=keep).to_pandas() pd_series = scalars_pandas_df_index[col_name].drop_duplicates(keep=keep) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd_series, bf_series, ) @@ -2375,7 +2389,7 @@ def test_unique(scalars_df_index, scalars_pandas_df_index, col_name): def test_duplicated(scalars_df_index, scalars_pandas_df_index, keep, col_name): bf_series = scalars_df_index[col_name].duplicated(keep=keep).to_pandas() pd_series = scalars_pandas_df_index[col_name].duplicated(keep=keep) - bigframes.testing.assert_series_equal(pd_series, bf_series, check_dtype=False) + bigframes.testing.utils.assert_series_equal(pd_series, bf_series, check_dtype=False) def test_shape(scalars_dfs): @@ -2509,7 +2523,7 @@ def test_head_then_scalar_operation(scalars_dfs): bf_result = (scalars_df["float64_col"].head(1) + 4).to_pandas() pd_result = scalars_pandas_df["float64_col"].head(1) + 4 - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2525,7 +2539,7 @@ def test_head_then_series_operation(scalars_dfs): "float64_col" ].head(2) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2536,7 +2550,7 @@ def test_series_peek(scalars_dfs): peek_result = scalars_df["float64_col"].peek(n=3, force=False) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( peek_result, scalars_pandas_df["float64_col"].reindex_like(peek_result), ) @@ -2555,7 +2569,7 @@ def test_series_peek_with_large_results_not_allowed(scalars_dfs): # The metrics won't be fully updated when we call query_and_wait. print(session.slot_millis_sum - slot_millis_sum) assert session.slot_millis_sum - slot_millis_sum < 500 - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( peek_result, scalars_pandas_df["float64_col"].reindex_like(peek_result), ) @@ -2569,7 +2583,7 @@ def test_series_peek_multi_index(scalars_dfs): pd_series = scalars_pandas_df.set_index(["string_col", "bool_col"])["float64_col"] pd_series.name = ("2-part", "name") peek_result = bf_series.peek(n=3, force=False) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( peek_result, pd_series.reindex_like(peek_result), ) @@ -2581,7 +2595,7 @@ def test_series_peek_filtered(scalars_dfs): n=3, force=False ) pd_result = scalars_pandas_df[scalars_pandas_df.int64_col > 0]["float64_col"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( peek_result, pd_result.reindex_like(peek_result), ) @@ -2597,7 +2611,7 @@ def test_series_peek_force(scalars_dfs): peek_result = df_filtered.peek(n=3, force=True) pd_cumsum_df = scalars_pandas_df[["int64_col", "int64_too"]].cumsum() pd_result = pd_cumsum_df[pd_cumsum_df.int64_col > 0]["int64_too"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( peek_result, pd_result.reindex_like(peek_result), ) @@ -2613,7 +2627,7 @@ def test_series_peek_force_float(scalars_dfs): peek_result = df_filtered.peek(n=3, force=True) pd_cumsum_df = scalars_pandas_df[["int64_col", "float64_col"]].cumsum() pd_result = pd_cumsum_df[pd_cumsum_df.float64_col > 0]["float64_col"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( peek_result, pd_result.reindex_like(peek_result), ) @@ -2625,7 +2639,7 @@ def test_shift(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].shift().astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2636,7 +2650,7 @@ def test_series_ffill(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_name].ffill(limit=1).to_pandas() pd_result = scalars_pandas_df_index[col_name].ffill(limit=1) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2647,7 +2661,7 @@ def test_series_bfill(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_name].bfill(limit=2).to_pandas() pd_result = scalars_pandas_df_index[col_name].bfill(limit=2) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2662,7 +2676,7 @@ def test_cumsum_int(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].cumsum().astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2683,7 +2697,7 @@ def test_cumsum_int_ordered(scalars_df_index, scalars_pandas_df_index): .astype(pd.Int64Dtype()) ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2702,7 +2716,7 @@ def test_series_nlargest(scalars_df_index, scalars_pandas_df_index, keep): bf_result = scalars_df_index[col_name].nlargest(4, keep=keep).to_pandas() pd_result = scalars_pandas_df_index[col_name].nlargest(4, keep=keep) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2725,7 +2739,7 @@ def test_diff(scalars_df_index, scalars_pandas_df_index, periods): .astype(pd.Int64Dtype()) ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2744,7 +2758,7 @@ def test_series_pct_change(scalars_df_index, scalars_pandas_df_index, periods): # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index["int64_col"].ffill().pct_change(periods=periods) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2763,7 +2777,7 @@ def test_series_nsmallest(scalars_df_index, scalars_pandas_df_index, keep): bf_result = scalars_df_index[col_name].nsmallest(2, keep=keep).to_pandas() pd_result = scalars_pandas_df_index[col_name].nsmallest(2, keep=keep) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2812,7 +2826,7 @@ def test_series_rank( .astype(pd.Float64Dtype()) ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2824,7 +2838,7 @@ def test_cast_float_to_int(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable floats in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2836,7 +2850,7 @@ def test_cast_float_to_bool(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable floats in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].astype(pd.BooleanDtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2854,7 +2868,7 @@ def test_cumsum_nested(scalars_df_index, scalars_pandas_df_index): .astype(pd.Float64Dtype()) ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2883,7 +2897,7 @@ def test_nested_analytic_ops_align(scalars_df_index, scalars_pandas_df_index): + pd_series.expanding().max() ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2899,7 +2913,7 @@ def test_cumsum_int_filtered(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable ints in pandas, produces object type and never ignores NA pd_result = pd_col[pd_col > -2].cumsum().astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2911,7 +2925,7 @@ def test_cumsum_float(scalars_df_index, scalars_pandas_df_index): # cumsum does not behave well on nullable floats in pandas, produces object type and never ignores NA pd_result = scalars_pandas_df_index[col_name].cumsum().astype(pd.Float64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2922,7 +2936,7 @@ def test_cummin_int(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_name].cummin().to_pandas() pd_result = scalars_pandas_df_index[col_name].cummin() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2933,7 +2947,7 @@ def test_cummax_int(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index[col_name].cummax().to_pandas() pd_result = scalars_pandas_df_index[col_name].cummax() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -2966,7 +2980,7 @@ def test_value_counts(scalars_dfs, kwargs): bf_result = s.value_counts(**kwargs).to_pandas() pd_result = pd_s.value_counts(**kwargs) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3004,7 +3018,7 @@ def test_value_counts_w_cut(scalars_dfs): pd_result = pd_cut.value_counts() pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result.astype(pd.Int64Dtype()), ) @@ -3014,7 +3028,7 @@ def test_iloc_nested(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index["string_col"].iloc[1:].iloc[1:].to_pandas() pd_result = scalars_pandas_df_index["string_col"].iloc[1:].iloc[1:] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3043,7 +3057,7 @@ def test_iloc_nested(scalars_df_index, scalars_pandas_df_index): def test_series_iloc(scalars_df_index, scalars_pandas_df_index, start, stop, step): bf_result = scalars_df_index["string_col"].iloc[start:stop:step].to_pandas() pd_result = scalars_pandas_df_index["string_col"].iloc[start:stop:step] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3079,7 +3093,7 @@ def test_series_add_prefix(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["int64_too"].add_prefix("prefix_") # Index will be object type in pandas, string type in bigframes, but same values - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, @@ -3092,7 +3106,7 @@ def test_series_add_suffix(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["int64_too"].add_suffix("_suffix") # Index will be object type in pandas, string type in bigframes, but same values - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_index_type=False, @@ -3120,7 +3134,7 @@ def test_series_filter_like(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["float64_col"].filter(like="ello") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3134,7 +3148,7 @@ def test_series_filter_regex(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["float64_col"].filter(regex="^[GH].*") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3149,7 +3163,7 @@ def test_series_reindex(scalars_df_index, scalars_pandas_df_index): # Pandas uses int64 instead of Int64 (nullable) dtype. pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3176,7 +3190,7 @@ def test_series_reindex_like(scalars_df_index, scalars_pandas_df_index): # Pandas uses int64 instead of Int64 (nullable) dtype. pd_result.index = pd_result.index.astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3192,7 +3206,7 @@ def test_where_with_series(scalars_df_index, scalars_pandas_df_index): scalars_pandas_df_index["bool_col"], scalars_pandas_df_index["int64_too"] ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3217,7 +3231,7 @@ def test_where_with_different_indices(scalars_df_index, scalars_pandas_df_index) ) ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3231,7 +3245,7 @@ def test_where_with_default(scalars_df_index, scalars_pandas_df_index): scalars_pandas_df_index["bool_col"] ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3251,7 +3265,7 @@ def _is_positive(x): cond=_is_positive, other=lambda x: x * 10 ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3300,7 +3314,7 @@ def test_clip_filtered_two_sided(scalars_df_index, scalars_pandas_df_index): upper_pd = scalars_pandas_df_index["int64_too"].iloc[:5] + 1 pd_result = col_pd.clip(lower_pd, upper_pd) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3315,7 +3329,7 @@ def test_clip_filtered_one_sided(scalars_df_index, scalars_pandas_df_index): lower_pd = scalars_pandas_df_index["int64_too"].iloc[2:] - 1 pd_result = col_pd.clip(lower_pd, None) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3345,7 +3359,7 @@ def test_between(scalars_df_index, scalars_pandas_df_index, left, right, inclusi ) pd_result = scalars_pandas_df_index["int64_col"].between(left, right, inclusive) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result.astype(pd.BooleanDtype()), ) @@ -3383,7 +3397,7 @@ def test_series_case_when(scalars_dfs_maybe_ordered): bf_result = bf_series.case_when(bf_conditions).to_pandas() pd_result = pd_series.case_when(pd_conditions) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result.astype(pd.Int64Dtype()), ) @@ -3419,7 +3433,7 @@ def test_series_case_when_change_type(scalars_dfs_maybe_ordered): bf_result = bf_series.case_when(bf_conditions).to_pandas() pd_result = pd_series.case_when(pd_conditions) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result.astype("string[pyarrow]"), ) @@ -3448,7 +3462,7 @@ def test_to_json(gcs_folder, scalars_df_index, scalars_pandas_df_index): scalars_df_index["int64_col"].to_json(path, lines=True, orient="records") gcs_df = pd.read_json(get_first_file_from_wildcard(path), lines=True) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( gcs_df["int64_col"].astype(pd.Int64Dtype()), scalars_pandas_df_index["int64_col"], check_dtype=False, @@ -3461,7 +3475,7 @@ def test_to_csv(gcs_folder, scalars_df_index, scalars_pandas_df_index): scalars_df_index["int64_col"].to_csv(path) gcs_df = pd.read_csv(get_first_file_from_wildcard(path)) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( gcs_df["int64_col"].astype(pd.Int64Dtype()), scalars_pandas_df_index["int64_col"], check_dtype=False, @@ -3590,7 +3604,7 @@ def test_series_values(scalars_df_index, scalars_pandas_df_index): pd_result = scalars_pandas_df_index["int64_too"].values # Numpy isn't equipped to compare non-numeric objects, so convert back to dataframe - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( pd.Series(bf_result), pd.Series(pd_result), check_dtype=False ) @@ -3623,7 +3637,7 @@ def test_sort_values(scalars_df_index, scalars_pandas_df_index, ascending, na_po ascending=ascending, na_position=na_position ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3636,7 +3650,7 @@ def test_series_sort_values_inplace(scalars_df_index, scalars_pandas_df_index): bf_result = bf_series.to_pandas() pd_result = scalars_pandas_df_index["int64_col"].sort_values(ascending=False) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3655,7 +3669,7 @@ def test_sort_index(scalars_df_index, scalars_pandas_df_index, ascending): ) pd_result = scalars_pandas_df_index["int64_too"].sort_index(ascending=ascending) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3667,7 +3681,7 @@ def test_series_sort_index_inplace(scalars_df_index, scalars_pandas_df_index): bf_result = bf_series.to_pandas() pd_result = scalars_pandas_df_index["int64_too"].sort_index(ascending=False) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3719,7 +3733,7 @@ def _ten_times(x): cond=lambda x: x > 0, other=_ten_times ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -3828,7 +3842,7 @@ def test_astype(scalars_df_index, scalars_pandas_df_index, column, to_type, erro pytest.importorskip("pandas", minversion="2.0.0") bf_result = scalars_df_index[column].astype(to_type, errors=errors).to_pandas() pd_result = scalars_pandas_df_index[column].astype(to_type) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_astype_python(session): @@ -3839,7 +3853,7 @@ def test_series_astype_python(session): index=pd.Index([0, 1, 2, 3], dtype="Int64"), ) result = session.read_pandas(input).astype(float, errors="null").to_pandas() - bigframes.testing.assert_series_equal(result, exepcted) + bigframes.testing.utils.assert_series_equal(result, exepcted) def test_astype_safe(session): @@ -3850,7 +3864,7 @@ def test_astype_safe(session): index=pd.Index([0, 1, 2, 3], dtype="Int64"), ) result = session.read_pandas(input).astype("Float64", errors="null").to_pandas() - bigframes.testing.assert_series_equal(result, exepcted) + bigframes.testing.utils.assert_series_equal(result, exepcted) def test_series_astype_w_invalid_error(session): @@ -3871,7 +3885,7 @@ def test_astype_numeric_to_int(scalars_df_index, scalars_pandas_df_index): .apply(lambda x: None if pd.isna(x) else math.trunc(x)) .astype(to_type) ) - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) @pytest.mark.parametrize( @@ -3889,7 +3903,7 @@ def test_date_time_astype_int( pytest.importorskip("pandas", minversion="2.0.0") bf_result = scalars_df_index[column].astype(to_type).to_pandas() pd_result = scalars_pandas_df_index[column].astype(to_type) - bigframes.testing.assert_series_equal(bf_result, pd_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result, check_dtype=False) assert bf_result.dtype == "Int64" @@ -3900,7 +3914,9 @@ def test_string_astype_int(session): pd_result = pd_series.astype("Int64") bf_result = bf_series.astype("Int64").to_pandas() - bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, check_index_type=False + ) def test_string_astype_float(session): @@ -3913,7 +3929,9 @@ def test_string_astype_float(session): pd_result = pd_series.astype("Float64") bf_result = bf_series.astype("Float64").to_pandas() - bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, check_index_type=False + ) def test_string_astype_date(session): @@ -3933,7 +3951,9 @@ def test_string_astype_date(session): pd_result = pd_series.astype("date32[day][pyarrow]") # type: ignore bf_result = bf_series.astype("date32[day][pyarrow]").to_pandas() - bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, check_index_type=False + ) def test_string_astype_datetime(session): @@ -3946,7 +3966,9 @@ def test_string_astype_datetime(session): pd_result = pd_series.astype(pd.ArrowDtype(pa.timestamp("us"))) bf_result = bf_series.astype(pd.ArrowDtype(pa.timestamp("us"))).to_pandas() - bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, check_index_type=False + ) def test_string_astype_timestamp(session): @@ -3965,7 +3987,9 @@ def test_string_astype_timestamp(session): pd.ArrowDtype(pa.timestamp("us", tz="UTC")) ).to_pandas() - bigframes.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + bigframes.testing.utils.assert_series_equal( + bf_result, pd_result, check_index_type=False + ) def test_timestamp_astype_string(session): @@ -3987,7 +4011,7 @@ def test_timestamp_astype_string(session): ) bf_result = bf_series.astype(pa.string()).to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, expected_result, check_index_type=False, check_dtype=False ) assert bf_result.dtype == "string[pyarrow]" @@ -4003,7 +4027,7 @@ def test_float_astype_json(errors, session): expected_result = pd.Series(data, dtype=dtypes.JSON_DTYPE) expected_result.index = expected_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), expected_result) def test_float_astype_json_str(session): @@ -4015,7 +4039,7 @@ def test_float_astype_json_str(session): expected_result = pd.Series(data, dtype=dtypes.JSON_DTYPE) expected_result.index = expected_result.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), expected_result) @pytest.mark.parametrize("errors", ["raise", "null"]) @@ -4032,7 +4056,7 @@ def test_string_astype_json(errors, session): assert bf_result.dtype == dtypes.JSON_DTYPE pd_result = bf_series.to_pandas().astype(dtypes.JSON_DTYPE) - bigframes.testing.assert_series_equal(bf_result.to_pandas(), pd_result) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), pd_result) def test_string_astype_json_in_safe_mode(session): @@ -4043,7 +4067,7 @@ def test_string_astype_json_in_safe_mode(session): expected = pd.Series([None], dtype=dtypes.JSON_DTYPE) expected.index = expected.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), expected) def test_string_astype_json_raise_error(session): @@ -4081,7 +4105,7 @@ def test_json_astype_others(data, to_type, errors, session): load_data = [json.loads(item) if item is not None else None for item in data] expected = pd.Series(load_data, dtype=to_type) expected.index = expected.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), expected) @pytest.mark.parametrize( @@ -4115,7 +4139,7 @@ def test_json_astype_others_in_safe_mode(data, to_type, session): expected = pd.Series([None, None], dtype=to_type) expected.index = expected.index.astype("Int64") - bigframes.testing.assert_series_equal(bf_result.to_pandas(), expected) + bigframes.testing.utils.assert_series_equal(bf_result.to_pandas(), expected) @pytest.mark.parametrize( @@ -4138,7 +4162,7 @@ def test_loc_bool_series_explicit_index(scalars_df_index, scalars_pandas_df_inde bf_result = scalars_df_index.string_col.loc[scalars_df_index.bool_col].to_pandas() pd_result = scalars_pandas_df_index.string_col.loc[scalars_pandas_df_index.bool_col] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, ) @@ -4199,7 +4223,7 @@ def test_rename(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.rename("newname") pd_result = scalars_pandas_df_index.string_col.rename("newname") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4209,7 +4233,7 @@ def test_rename_nonstring(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.rename((4, 2)) pd_result = scalars_pandas_df_index.string_col.rename((4, 2)) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4221,7 +4245,7 @@ def test_rename_dict_same_type(scalars_df_index, scalars_pandas_df_index): pd_result.index = pd_result.index.astype("Int64") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4231,7 +4255,7 @@ def test_rename_axis(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.rename_axis("newindexname") pd_result = scalars_pandas_df_index.string_col.rename_axis("newindexname") - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4248,7 +4272,7 @@ def test_loc_list_string_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.loc[index_list] pd_result = scalars_pandas_df_index.string_col.loc[index_list] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4260,7 +4284,7 @@ def test_loc_list_integer_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.bool_col.loc[index_list] pd_result = scalars_pandas_df_index.bool_col.loc[index_list] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4276,7 +4300,7 @@ def test_loc_list_multiindex(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_multiindex.int64_too.loc[index_list] pd_result = scalars_pandas_df_multiindex.int64_too.loc[index_list] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4288,7 +4312,7 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.string_col.iloc[index_list] pd_result = scalars_pandas_df_index.string_col.iloc[index_list] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4302,7 +4326,7 @@ def test_iloc_list_nameless(scalars_df_index, scalars_pandas_df_index): pd_series = scalars_pandas_df_index.string_col.rename(None) pd_result = pd_series.iloc[index_list] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4317,7 +4341,7 @@ def test_loc_list_nameless(scalars_df_index, scalars_pandas_df_index): pd_series = scalars_pandas_df_index.string_col.rename(None) pd_result = pd_series.loc[index_list] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4333,7 +4357,7 @@ def test_loc_bf_series_string_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.date_col.loc[bf_string_series] pd_result = scalars_pandas_df_index.date_col.loc[pd_string_series] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4351,7 +4375,7 @@ def test_loc_bf_series_multiindex(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_multiindex.int64_too.loc[bf_string_series] pd_result = scalars_pandas_df_multiindex.int64_too.loc[pd_string_series] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4364,7 +4388,7 @@ def test_loc_bf_index_integer_index(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index.date_col.loc[bf_index] pd_result = scalars_pandas_df_index.date_col.loc[pd_index] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4378,7 +4402,7 @@ def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_ind index = "Hello, World!" bf_result = scalars_df_index.date_col.loc[index] pd_result = scalars_pandas_df_index.date_col.loc[index] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4463,7 +4487,7 @@ def test_map_dict_input(scalars_dfs): pd_result = pd_result.astype("Int64") # pandas type differences bf_result = scalars_df.string_col.map(local_map) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4482,7 +4506,7 @@ def test_map_series_input(scalars_dfs): pd_result = scalars_pandas_df.int64_too.map(pd_map_series) bf_result = scalars_df.int64_too.map(bf_map_series) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result.to_pandas(), pd_result, ) @@ -4743,7 +4767,7 @@ def foo(x: int, y: int, df): def test_series_explode(data): s = bigframes.pandas.Series(data) pd_s = s.to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( s.explode().to_pandas(), pd_s.explode(), check_index_type=False, @@ -4789,7 +4813,7 @@ def test_series_explode_w_index(index, ignore_index): s = bigframes.pandas.Series(data, index=index) pd_s = pd.Series(data, index=index) # TODO(b/340885567): fix type error - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( s.explode(ignore_index=ignore_index).to_pandas(), # type: ignore pd_s.explode(ignore_index=ignore_index).astype(pd.Float64Dtype()), # type: ignore check_index_type=False, @@ -4814,7 +4838,7 @@ def test_series_explode_reserve_order(ignore_index, ordered): # TODO(b/340885567): fix type error pd_res = pd_s.explode(ignore_index=ignore_index).astype(pd.Int64Dtype()) # type: ignore pd_res.index = pd_res.index.astype(pd.Int64Dtype()) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( res if ordered else res.sort_index(), pd_res, ) @@ -4836,7 +4860,7 @@ def test_series_construct_empty_array(): dtype=pd.ArrowDtype(pa.list_(pa.float64())), index=pd.Index([0], dtype=pd.Int64Dtype()), ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( expected, s.to_pandas(), ) @@ -4853,7 +4877,7 @@ def test_series_construct_empty_array(): ) def test_series_explode_null(data): s = bigframes.pandas.Series(data) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( s.explode().to_pandas(), s.to_pandas().explode(), check_dtype=False, @@ -4880,7 +4904,7 @@ def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min() # TODO: (b/484364312) pd_result.index.names = bf_result.index.names - bigframes.testing.assert_series_equal(bf_result, pd_result) + bigframes.testing.utils.assert_series_equal(bf_result, pd_result) def test_series_struct_get_field_by_attribute( @@ -4892,13 +4916,13 @@ def test_series_struct_get_field_by_attribute( bf_series = nested_structs_df["person"] df_series = nested_structs_pandas_df["person"] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_series.address.city.to_pandas(), df_series.struct.field("address").struct.field("city"), check_dtype=False, check_index=False, ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_series.address.country.to_pandas(), df_series.struct.field("address").struct.field("country"), check_dtype=False, diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 2fa633a62b..e8e601cc76 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -327,7 +327,7 @@ def test_read_gbq_w_anonymous_query_results_table(session: bigframes.Session): df = session.read_gbq(destination, index_col="name") result = df.to_pandas() expected.index = expected.index.astype(result.index.dtype) - bigframes.testing.assert_frame_equal(result, expected, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(result, expected, check_dtype=False) def test_read_gbq_w_primary_keys_table( @@ -350,7 +350,7 @@ def test_read_gbq_w_primary_keys_table( # Verify that the DataFrame is already sorted by primary keys. sorted_result = result.sort_values(primary_keys) - bigframes.testing.assert_frame_equal(result, sorted_result) + bigframes.testing.utils.assert_frame_equal(result, sorted_result) # Verify that we're working from a snapshot rather than a copy of the table. assert "FOR SYSTEM_TIME AS OF" in df.sql @@ -389,7 +389,7 @@ def test_read_gbq_w_primary_keys_table_and_filters( # Verify that the DataFrame is already sorted by primary keys. sorted_result = result.sort_values(primary_keys) - bigframes.testing.assert_frame_equal(result, sorted_result) + bigframes.testing.utils.assert_frame_equal(result, sorted_result) @pytest.mark.parametrize( @@ -534,7 +534,7 @@ def test_read_gbq_w_ambigous_name( .to_pandas() ) pd_df = pd.DataFrame({"x": [2, 1], "ambiguous_name": [20, 10]}) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( df, pd_df, check_dtype=False, check_index_type=False ) @@ -771,8 +771,10 @@ def test_read_gbq_w_json_and_compare_w_pandas_json(session): dtype=pd.ArrowDtype(db_dtypes.JSONArrowType()), ) pd_df.index = pd_df.index.astype("Int64") - bigframes.testing.assert_series_equal(df.dtypes, pd_df.dtypes) - bigframes.testing.assert_series_equal(df["json_col"].to_pandas(), pd_df["json_col"]) + bigframes.testing.utils.assert_series_equal(df.dtypes, pd_df.dtypes) + bigframes.testing.utils.assert_series_equal( + df["json_col"].to_pandas(), pd_df["json_col"] + ) def test_read_gbq_w_json_in_struct(session): @@ -870,7 +872,7 @@ def test_read_pandas(session, scalars_dfs): result = df.to_pandas() expected = scalars_pandas_df - bigframes.testing.assert_frame_equal(result, expected) + bigframes.testing.utils.assert_frame_equal(result, expected) def test_read_pandas_series(session): @@ -878,14 +880,14 @@ def test_read_pandas_series(session): pd_series = pd.Series([3, 1, 4, 1, 5], dtype=pd.Int64Dtype(), index=idx) bf_series = session.read_pandas(pd_series) - bigframes.testing.assert_series_equal(bf_series.to_pandas(), pd_series) + bigframes.testing.utils.assert_series_equal(bf_series.to_pandas(), pd_series) def test_read_pandas_index(session): pd_idx: pd.Index = pd.Index([2, 7, 1, 2, 8], dtype=pd.Int64Dtype()) bf_idx = session.read_pandas(pd_idx) - bigframes.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx) + bigframes.testing.utils.assert_index_equal(bf_idx.to_pandas(), pd_idx) def test_read_pandas_w_unsupported_mixed_dtype(session): @@ -915,7 +917,7 @@ def test_read_pandas_col_label_w_space(session: bigframes.Session): ) result = session.read_pandas(expected).to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( result, expected, check_index_type=False, check_dtype=False ) @@ -923,7 +925,7 @@ def test_read_pandas_col_label_w_space(session: bigframes.Session): def test_read_pandas_multi_index(session, scalars_pandas_df_multi_index): df = session.read_pandas(scalars_pandas_df_multi_index) result = df.to_pandas() - bigframes.testing.assert_frame_equal(result, scalars_pandas_df_multi_index) + bigframes.testing.utils.assert_frame_equal(result, scalars_pandas_df_multi_index) def test_read_pandas_rowid_exists_adds_suffix(session, scalars_pandas_df_default_index): @@ -931,7 +933,9 @@ def test_read_pandas_rowid_exists_adds_suffix(session, scalars_pandas_df_default pandas_df["rowid"] = np.arange(pandas_df.shape[0]) df_roundtrip = session.read_pandas(pandas_df).to_pandas() - bigframes.testing.assert_frame_equal(df_roundtrip, pandas_df, check_dtype=False) + bigframes.testing.utils.assert_frame_equal( + df_roundtrip, pandas_df, check_dtype=False + ) def test_read_pandas_tokyo( @@ -970,7 +974,7 @@ def test_read_pandas_timedelta_dataframes(session, write_engine): expected_result = pandas_df.astype(bigframes.dtypes.TIMEDELTA_DTYPE) expected_result.index = expected_result.index.astype(bigframes.dtypes.INT_DTYPE) - bigframes.testing.assert_frame_equal(actual_result, expected_result) + bigframes.testing.utils.assert_frame_equal(actual_result, expected_result) @all_write_engines @@ -985,7 +989,7 @@ def test_read_pandas_timedelta_series(session, write_engine): .astype("timedelta64[ns]") ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_series, check_index_type=False ) @@ -1002,7 +1006,7 @@ def test_read_pandas_timedelta_index(session, write_engine): .astype("timedelta64[ns]") ) - bigframes.testing.assert_index_equal(actual_result, expected_index) + bigframes.testing.utils.assert_index_equal(actual_result, expected_index) @all_write_engines @@ -1021,7 +1025,7 @@ def test_read_pandas_json_dataframes(session, write_engine): expected_df, write_engine=write_engine ).to_pandas() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result, expected_df, check_index_type=False ) @@ -1039,7 +1043,7 @@ def test_read_pandas_json_series(session, write_engine): actual_result = session.read_pandas( expected_series, write_engine=write_engine ).to_pandas() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_series, check_index_type=False ) @@ -1067,7 +1071,7 @@ def test_read_pandas_json_index(session, write_engine): actual_result = session.read_pandas( expected_index, write_engine=write_engine ).to_pandas() - bigframes.testing.assert_index_equal(actual_result, expected_index) + bigframes.testing.utils.assert_index_equal(actual_result, expected_index) @pytest.mark.parametrize( @@ -1128,7 +1132,7 @@ def test_read_pandas_w_nested_json(session, write_engine): .to_pandas() .reset_index(drop=True) ) - bigframes.testing.assert_series_equal(bq_s, pd_s) + bigframes.testing.utils.assert_series_equal(bq_s, pd_s) @pytest.mark.parametrize( @@ -1212,7 +1216,7 @@ def test_read_pandas_w_nested_json_index(session, write_engine): ), ) bq_idx = session.read_pandas(pd_idx, write_engine=write_engine).to_pandas() - bigframes.testing.assert_index_equal(bq_idx, pd_idx) + bigframes.testing.utils.assert_index_equal(bq_idx, pd_idx) @all_write_engines @@ -1226,13 +1230,15 @@ def test_read_csv_for_gcs_file_w_write_engine(session, df_and_gcs_csv, write_eng write_engine=write_engine, dtype=scalars_df.dtypes.to_dict(), ) - bigframes.testing.assert_frame_equal(pd_df.to_pandas(), scalars_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal( + pd_df.to_pandas(), scalars_df.to_pandas() + ) if write_engine in ("default", "bigquery_load"): bf_df = session.read_csv( path, engine="bigquery", index_col="rowindex", write_engine=write_engine ) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1260,8 +1266,10 @@ def test_read_csv_for_local_file_w_sep(session, df_and_local_csv, sep): pd_df = session.read_csv( buffer, index_col="rowindex", sep=sep, dtype=scalars_df.dtypes.to_dict() ) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas()) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal( + bf_df.to_pandas(), scalars_df.to_pandas() + ) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1293,7 +1301,7 @@ def test_read_csv_for_index_col_w_false(session, df_and_local_csv, index_col): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("rowindex").sort_index() pd_df = pd_df.set_index("rowindex") - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1316,7 +1324,7 @@ def test_read_csv_for_index_col(session, df_and_gcs_csv, index_col): ) assert bf_df.shape == pd_df.shape - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1369,7 +1377,7 @@ def test_read_csv_for_gcs_wildcard_path(session, df_and_gcs_csv): assert bf_df.shape == pd_df.shape assert bf_df.columns.tolist() == pd_df.columns.tolist() - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names(session, df_and_gcs_csv_for_two_columns): @@ -1388,7 +1396,7 @@ def test_read_csv_for_names(session, df_and_gcs_csv_for_two_columns): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index(names[0]).sort_index() pd_df = pd_df.set_index(names[0]) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_more_than_columns_can_raise_error( @@ -1417,7 +1425,7 @@ def test_read_csv_for_names_less_than_columns(session, df_and_gcs_csv_for_two_co # Pandas's index name is None, while BigFrames's index name is "rowindex". pd_df.index.name = "rowindex" - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_less_than_columns_raise_error_when_index_col_set( @@ -1455,7 +1463,7 @@ def test_read_csv_for_names_and_index_col( assert bf_df.shape == pd_df.shape assert bf_df.columns.tolist() == pd_df.columns.tolist() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_df.to_pandas(), pd_df.to_pandas(), check_index_type=False ) @@ -1487,7 +1495,7 @@ def test_read_csv_for_names_and_usecols( # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index(names[0]).sort_index() pd_df = pd_df.set_index(names[0]) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_and_invalid_usecols( @@ -1534,7 +1542,7 @@ def test_read_csv_for_names_and_usecols_and_indexcol( assert bf_df.shape == pd_df.shape assert bf_df.columns.tolist() == pd_df.columns.tolist() - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_less_than_columns_and_same_usecols( @@ -1557,7 +1565,7 @@ def test_read_csv_for_names_less_than_columns_and_same_usecols( # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index(names[0]).sort_index() pd_df = pd_df.set_index(names[0]) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_names_less_than_columns_and_mismatched_usecols( @@ -1602,7 +1610,7 @@ def test_read_csv_for_dtype(session, df_and_gcs_csv_for_two_columns): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("rowindex").sort_index() pd_df = pd_df.set_index("rowindex") - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_for_dtype_w_names(session, df_and_gcs_csv_for_two_columns): @@ -1622,7 +1630,7 @@ def test_read_csv_for_dtype_w_names(session, df_and_gcs_csv_for_two_columns): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("a").sort_index() pd_df = pd_df.set_index("a") - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) @pytest.mark.parametrize( @@ -1689,8 +1697,10 @@ def test_read_csv_for_gcs_file_w_header(session, df_and_gcs_csv, header): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("rowindex").sort_index() pd_df = pd_df.set_index("rowindex") - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), scalars_df.to_pandas()) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal( + bf_df.to_pandas(), scalars_df.to_pandas() + ) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_w_usecols(session, df_and_local_csv): @@ -1718,7 +1728,7 @@ def test_read_csv_w_usecols(session, df_and_local_csv): # (b/280889935) or guarantee row ordering. bf_df = bf_df.set_index("rowindex").sort_index() pd_df = pd_df.set_index("rowindex") - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_w_usecols_and_indexcol(session, df_and_local_csv): @@ -1744,7 +1754,7 @@ def test_read_csv_w_usecols_and_indexcol(session, df_and_local_csv): assert bf_df.shape == pd_df.shape assert bf_df.columns.tolist() == pd_df.columns.tolist() - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_csv_w_indexcol_not_in_usecols(session, df_and_local_csv): @@ -1799,10 +1809,10 @@ def test_read_csv_local_w_encoding(session, penguins_pandas_df_default_index): bf_df = session.read_csv( path, engine="bigquery", index_col="rowindex", encoding="ISO-8859-1" ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_df.to_pandas(), penguins_pandas_df_default_index ) - bigframes.testing.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) + bigframes.testing.utils.assert_frame_equal(bf_df.to_pandas(), pd_df.to_pandas()) def test_read_pickle_local(session, penguins_pandas_df_default_index, tmp_path): @@ -1811,7 +1821,7 @@ def test_read_pickle_local(session, penguins_pandas_df_default_index, tmp_path): penguins_pandas_df_default_index.to_pickle(path) df = session.read_pickle(path) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( penguins_pandas_df_default_index, df.to_pandas() ) @@ -1822,7 +1832,7 @@ def test_read_pickle_buffer(session, penguins_pandas_df_default_index): buffer.seek(0) df = session.read_pickle(buffer) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( penguins_pandas_df_default_index, df.to_pandas() ) @@ -1843,7 +1853,7 @@ def test_read_pickle_gcs(session, penguins_pandas_df_default_index, gcs_folder): penguins_pandas_df_default_index.to_pickle(path) df = session.read_pickle(path) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( penguins_pandas_df_default_index, df.to_pandas() ) @@ -1918,7 +1928,7 @@ def test_read_parquet_gcs( assert df_out.size != 0 pd_df_in = df_in.to_pandas() pd_df_out = df_out.to_pandas() - bigframes.testing.assert_frame_equal(pd_df_in, pd_df_out) + bigframes.testing.utils.assert_frame_equal(pd_df_in, pd_df_out) @pytest.mark.parametrize( @@ -1968,7 +1978,7 @@ def test_read_parquet_gcs_compressed( assert df_out.size != 0 pd_df_in = df_in.to_pandas() pd_df_out = df_out.to_pandas() - bigframes.testing.assert_frame_equal(pd_df_in, pd_df_out) + bigframes.testing.utils.assert_frame_equal(pd_df_in, pd_df_out) @pytest.mark.parametrize( @@ -2013,7 +2023,7 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder): df = session.read_json(read_path, lines=True, orient="records", engine="bigquery") # The auto detects of BigQuery load job does not preserve any ordering of columns for json. - bigframes.testing.assert_index_equal( + bigframes.testing.utils.assert_index_equal( df.columns.sort_values(), scalars_df.columns.sort_values() ) @@ -2038,7 +2048,7 @@ def test_read_json_gcs_bq_engine(session, scalars_dfs, gcs_folder): ] ) assert df.shape[0] == scalars_df.shape[0] - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( df.dtypes.sort_index(), scalars_df.dtypes.sort_index() ) @@ -2064,7 +2074,7 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder): orient="records", ) - bigframes.testing.assert_index_equal(df.columns, scalars_df.columns) + bigframes.testing.utils.assert_index_equal(df.columns, scalars_df.columns) # The auto detects of BigQuery load job have restrictions to detect the bytes, # numeric and geometry types, so they're skipped here. @@ -2078,7 +2088,7 @@ def test_read_json_gcs_default_engine(session, scalars_dfs, gcs_folder): scalars_df = scalars_df.drop(columns=["date_col", "datetime_col", "time_col"]) assert df.shape[0] == scalars_df.shape[0] - bigframes.testing.assert_series_equal(df.dtypes, scalars_df.dtypes) + bigframes.testing.utils.assert_series_equal(df.dtypes, scalars_df.dtypes) @pytest.mark.parametrize( @@ -2226,7 +2236,7 @@ def _assert_query_dry_run_stats_are_valid(result: pd.Series): ] ) - bigframes.testing.assert_index_equal(result.index, expected_index) + bigframes.testing.utils.assert_index_equal(result.index, expected_index) assert result["columnCount"] + result["indexLevel"] > 0 @@ -2246,5 +2256,5 @@ def _assert_table_dry_run_stats_are_valid(result: pd.Series): ] ) - bigframes.testing.assert_index_equal(result.index, expected_index) + bigframes.testing.utils.assert_index_equal(result.index, expected_index) assert result["columnCount"] == len(result["columnDtypes"]) diff --git a/tests/system/small/test_window.py b/tests/system/small/test_window.py index 843ac2a581..187c76ad1f 100644 --- a/tests/system/small/test_window.py +++ b/tests/system/small/test_window.py @@ -62,7 +62,7 @@ def test_dataframe_rolling_closed_param(rows_rolling_dfs, closed): actual_result = bf_df.rolling(window=3, closed=closed).sum().to_pandas() expected_result = pd_df.rolling(window=3, closed=closed).sum() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result, expected_result, check_dtype=False ) @@ -83,7 +83,7 @@ def test_dataframe_groupby_rolling_closed_param(rows_rolling_dfs, closed): expected_result = ( pd_df.groupby(pd_df["int64_too"] % 2).rolling(window=3, closed=closed).sum() ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result[check_columns], expected_result, check_dtype=False ) @@ -94,7 +94,7 @@ def test_dataframe_rolling_on(rows_rolling_dfs): actual_result = bf_df.rolling(window=3, on="int64_too").sum().to_pandas() expected_result = pd_df.rolling(window=3, on="int64_too").sum() - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result, expected_result, check_dtype=False ) @@ -121,7 +121,7 @@ def test_dataframe_groupby_rolling_on(rows_rolling_dfs): expected_result = ( pd_df.groupby(pd_df["int64_too"] % 2).rolling(window=3, on="float64_col").sum() ) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result[check_columns], expected_result, check_dtype=False ) @@ -140,7 +140,7 @@ def test_series_rolling_closed_param(rows_rolling_series, closed): actual_result = bf_series.rolling(window=3, closed=closed).sum().to_pandas() expected_result = df_series.rolling(window=3, closed=closed).sum() - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_dtype=False ) @@ -159,7 +159,7 @@ def test_series_groupby_rolling_closed_param(rows_rolling_series, closed): expected_result = ( df_series.groupby(df_series % 2).rolling(window=3, closed=closed).sum() ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_dtype=False ) @@ -195,7 +195,7 @@ def test_series_window_agg_ops(rows_rolling_series, windowing, agg_op): actual_result = agg_op(windowing(bf_series)).to_pandas() expected_result = agg_op(windowing(pd_series)) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( expected_result, actual_result, check_dtype=False ) @@ -236,7 +236,7 @@ def test_dataframe_window_agg_ops(scalars_dfs, windowing, agg_op): bf_result = agg_op(windowing(bf_df)).to_pandas() pd_result = agg_op(windowing(pd_df)) - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result, check_dtype=False) @pytest.mark.parametrize( @@ -283,7 +283,7 @@ def test_dataframe_window_agg_func(scalars_dfs, windowing, func): pd_result = windowing(pd_df).agg(func) - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result, check_dtype=False) def test_series_window_agg_single_func(scalars_dfs): @@ -296,7 +296,7 @@ def test_series_window_agg_single_func(scalars_dfs): pd_result = pd_series.expanding().agg("sum") - bigframes.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_series_equal(pd_result, bf_result, check_dtype=False) def test_series_window_agg_multi_func(scalars_dfs): @@ -309,7 +309,7 @@ def test_series_window_agg_multi_func(scalars_dfs): pd_result = pd_series.expanding().agg(["sum", np.mean]) - bigframes.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False) + bigframes.testing.utils.assert_frame_equal(pd_result, bf_result, check_dtype=False) @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) @@ -335,7 +335,7 @@ def test_series_range_rolling(range_rolling_dfs, window, closed, ascending): .rolling(window=window, closed=closed) .min() ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_dtype=False, check_index=False ) @@ -356,7 +356,7 @@ def test_series_groupby_range_rolling(range_rolling_dfs): expected_result = ( pd_series.sort_index().groupby(pd_series % 2 == 0).rolling(window="3s").min() ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_dtype=False, check_index=False ) @@ -387,7 +387,7 @@ def test_dataframe_range_rolling(range_rolling_dfs, window, closed, ascending): # Need to cast Pandas index type. Otherwise it uses DatetimeIndex that # does not exist in BigFrame expected_result.index = expected_result.index.astype(dtypes.TIMESTAMP_DTYPE) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result, expected_result, check_dtype=False, @@ -404,7 +404,7 @@ def test_dataframe_range_rolling_on(range_rolling_dfs): # Need to specify the column order because Pandas (seemingly) # re-arranges columns alphabetically cols = ["ts_col", "int_col", "float_col"] - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result[cols], expected_result[cols], check_dtype=False, @@ -428,7 +428,7 @@ def test_dataframe_groupby_range_rolling(range_rolling_dfs): pd_df.sort_values(on).groupby("int_col").rolling(window="3s", on=on).min() ) expected_result.index = expected_result.index.set_names("index", level=1) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( actual_result, expected_result, check_dtype=False, @@ -455,7 +455,7 @@ def test_range_rolling_order_info_lookup(range_rolling_dfs): .rolling(window="3s") .count() ) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( actual_result, expected_result, check_dtype=False, check_index=False ) diff --git a/tests/unit/core/test_groupby.py b/tests/unit/core/test_groupby.py index faee007c3d..9e2eb5415e 100644 --- a/tests/unit/core/test_groupby.py +++ b/tests/unit/core/test_groupby.py @@ -33,7 +33,7 @@ def test_groupby_df_iter_by_key_singular(polars_session): bf_result = bf_group_df.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -47,7 +47,7 @@ def test_groupby_df_iter_by_key_list(polars_session): bf_result = bf_group_df.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -69,7 +69,7 @@ def test_groupby_df_iter_by_key_list_multiple(polars_session): bf_result = bf_group_df.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -85,7 +85,7 @@ def test_groupby_df_iter_by_level_singular(polars_session): bf_result = bf_group_df.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -109,7 +109,7 @@ def test_groupby_df_iter_by_level_list_one_item(polars_session): assert bf_key == tuple(pd_key) else: assert bf_key == (pd_key,) - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -131,7 +131,7 @@ def test_groupby_df_iter_by_level_list_multiple(polars_session): bf_result = bf_group_df.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_frame_equal( + bigframes.testing.utils.assert_frame_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -149,7 +149,7 @@ def test_groupby_series_iter_by_level_singular(polars_session): bf_result = bf_group_series.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -175,7 +175,7 @@ def test_groupby_series_iter_by_level_list_one_item(polars_session): assert bf_key == tuple(pd_key) else: assert bf_key == (pd_key,) - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -199,7 +199,7 @@ def test_groupby_series_iter_by_level_list_multiple(polars_session): bf_result = bf_group_df.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -218,7 +218,7 @@ def test_groupby_series_iter_by_series(polars_session): bf_result = bf_group_series.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -237,7 +237,7 @@ def test_groupby_series_iter_by_series_list_one_item(polars_session): bf_result = bf_group_series.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) @@ -259,6 +259,6 @@ def test_groupby_series_iter_by_series_list_multiple(polars_session): bf_result = bf_group_series.to_pandas() pd_key, pd_result = pd_group assert bf_key == pd_key - bigframes.testing.assert_series_equal( + bigframes.testing.utils.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) From ca3ca55982f5119c39ee98d1ad413d02db712947 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Tue, 10 Mar 2026 00:44:35 +0000 Subject: [PATCH 2/2] fix: mypy --- tests/system/small/bigquery/test_datetime.py | 2 +- tests/system/small/bigquery/test_geo.py | 2 +- tests/system/small/bigquery/test_sql.py | 2 +- tests/system/small/bigquery/test_struct.py | 2 +- tests/system/small/core/test_reshape.py | 2 +- tests/system/small/ml/test_metrics.py | 2 +- tests/system/small/ml/test_utils.py | 2 +- tests/system/small/operations/test_dates.py | 2 +- tests/system/small/operations/test_timedeltas.py | 2 +- tests/system/small/test_groupby.py | 2 +- tests/system/small/test_multiindex.py | 2 +- tests/system/small/test_numpy.py | 2 +- tests/system/small/test_window.py | 2 +- tests/unit/core/test_groupby.py | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/system/small/bigquery/test_datetime.py b/tests/system/small/bigquery/test_datetime.py index 8db75cf366..ff9bcb38a0 100644 --- a/tests/system/small/bigquery/test_datetime.py +++ b/tests/system/small/bigquery/test_datetime.py @@ -19,7 +19,7 @@ import pytest from bigframes import bigquery -import bigframes.testing +import bigframes.testing.utils _TIMESTAMP_DTYPE = pd.ArrowDtype(pa.timestamp("us", tz="UTC")) diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 66328ae9ad..021947eb9e 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -32,7 +32,7 @@ import bigframes.bigquery as bbq import bigframes.geopandas import bigframes.session -import bigframes.testing +import bigframes.testing.utils def test_geo_st_area(session: bigframes.session.Session): diff --git a/tests/system/small/bigquery/test_sql.py b/tests/system/small/bigquery/test_sql.py index 6bfcb75857..c0f7eed938 100644 --- a/tests/system/small/bigquery/test_sql.py +++ b/tests/system/small/bigquery/test_sql.py @@ -17,7 +17,7 @@ import bigframes.bigquery as bbq import bigframes.dtypes as dtypes import bigframes.pandas as bpd -import bigframes.testing +import bigframes.testing.utils def test_sql_scalar_for_all_scalar_types(scalars_df_null_index): diff --git a/tests/system/small/bigquery/test_struct.py b/tests/system/small/bigquery/test_struct.py index 5bcd208025..8540496960 100644 --- a/tests/system/small/bigquery/test_struct.py +++ b/tests/system/small/bigquery/test_struct.py @@ -16,7 +16,7 @@ import bigframes.bigquery as bbq import bigframes.series as series -import bigframes.testing +import bigframes.testing.utils @pytest.mark.parametrize( diff --git a/tests/system/small/core/test_reshape.py b/tests/system/small/core/test_reshape.py index 36ab79d5c4..519ed91fd3 100644 --- a/tests/system/small/core/test_reshape.py +++ b/tests/system/small/core/test_reshape.py @@ -17,7 +17,7 @@ from bigframes import session from bigframes.core.reshape import merge -import bigframes.testing +import bigframes.testing.utils @pytest.mark.parametrize( diff --git a/tests/system/small/ml/test_metrics.py b/tests/system/small/ml/test_metrics.py index 46c3cf0a0e..6675745ee6 100644 --- a/tests/system/small/ml/test_metrics.py +++ b/tests/system/small/ml/test_metrics.py @@ -20,7 +20,7 @@ import bigframes from bigframes.ml import metrics -import bigframes.testing +import bigframes.testing.utils def test_r2_score_perfect_fit(session): diff --git a/tests/system/small/ml/test_utils.py b/tests/system/small/ml/test_utils.py index 4d48569032..ec3bd315b1 100644 --- a/tests/system/small/ml/test_utils.py +++ b/tests/system/small/ml/test_utils.py @@ -16,7 +16,7 @@ import pytest import bigframes.ml.utils as utils -import bigframes.testing +import bigframes.testing.utils _DATA_FRAME = pd.DataFrame({"column": [1, 2, 3]}) _SERIES = pd.Series([1, 2, 3], name="column") diff --git a/tests/system/small/operations/test_dates.py b/tests/system/small/operations/test_dates.py index 826ee869f3..bce638d537 100644 --- a/tests/system/small/operations/test_dates.py +++ b/tests/system/small/operations/test_dates.py @@ -20,7 +20,7 @@ import pytest from bigframes import dtypes -import bigframes.testing +import bigframes.testing.utils def test_date_diff_between_series(session): diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py index 39ee5ca25e..429c813220 100644 --- a/tests/system/small/operations/test_timedeltas.py +++ b/tests/system/small/operations/test_timedeltas.py @@ -23,7 +23,7 @@ import pytest from bigframes import dtypes -import bigframes.testing +import bigframes.testing.utils # Some methods/features used by this test don't exist in pandas 1.x pytest.importorskip("pandas", minversion="2.0.0") diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index aa914993b9..d488b0a5ad 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -17,7 +17,7 @@ import pytest import bigframes.pandas as bpd -import bigframes.testing +import bigframes.testing.utils # ================= # DataFrame.groupby diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py index e086ef6130..522e8db9e4 100644 --- a/tests/system/small/test_multiindex.py +++ b/tests/system/small/test_multiindex.py @@ -17,7 +17,7 @@ import pytest import bigframes.pandas as bpd -import bigframes.testing +import bigframes.testing.utils # Sample MultiIndex for testing DataFrames where() method. _MULTI_INDEX = pandas.MultiIndex.from_tuples( diff --git a/tests/system/small/test_numpy.py b/tests/system/small/test_numpy.py index 9f2d02bbbc..774f72bef4 100644 --- a/tests/system/small/test_numpy.py +++ b/tests/system/small/test_numpy.py @@ -16,7 +16,7 @@ import pandas as pd import pytest -import bigframes.testing +import bigframes.testing.utils @pytest.mark.parametrize( diff --git a/tests/system/small/test_window.py b/tests/system/small/test_window.py index 187c76ad1f..61e1cac096 100644 --- a/tests/system/small/test_window.py +++ b/tests/system/small/test_window.py @@ -19,7 +19,7 @@ import pytest from bigframes import dtypes -import bigframes.testing +import bigframes.testing.utils @pytest.fixture(scope="module") diff --git a/tests/unit/core/test_groupby.py b/tests/unit/core/test_groupby.py index 9e2eb5415e..b23199da33 100644 --- a/tests/unit/core/test_groupby.py +++ b/tests/unit/core/test_groupby.py @@ -18,7 +18,7 @@ import bigframes.core.utils as utils import bigframes.pandas as bpd -import bigframes.testing +import bigframes.testing.utils pytest.importorskip("polars") pytest.importorskip("pandas", minversion="2.0.0")