diff --git a/.kokoro/presubmit/presubmit-doctest-bigframes.cfg b/.kokoro/presubmit/presubmit-doctest-bigframes.cfg index 7f514f762f3a..c8298623ff9d 100644 --- a/.kokoro/presubmit/presubmit-doctest-bigframes.cfg +++ b/.kokoro/presubmit/presubmit-doctest-bigframes.cfg @@ -9,4 +9,10 @@ env_vars: { env_vars: { key: "GOOGLE_CLOUD_PROJECT" value: "bigframes-testing" +} + +# Override the build file to only run for bigframes. +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/google-cloud-python/packages/bigframes/scripts/run_doctest.sh" } \ No newline at end of file diff --git a/packages/bigframes/bigframes/bigquery/_operations/struct.py b/packages/bigframes/bigframes/bigquery/_operations/struct.py index 580d93b977c0..d26dd53a6f03 100644 --- a/packages/bigframes/bigframes/bigquery/_operations/struct.py +++ b/packages/bigframes/bigframes/bigquery/_operations/struct.py @@ -41,6 +41,7 @@ def struct(value: dataframe.DataFrame) -> series.Series: >>> srs = series.Series([{"version": 1, "project": "pandas"}, {"version": 2, "project": "numpy"},]) >>> df = srs.struct.explode() + >>> df = df[["project", "version"]] # set the column order to ensure stable output for doctest >>> bbq.struct(df) 0 {'project': 'pandas', 'version': 1} 1 {'project': 'numpy', 'version': 2} diff --git a/packages/bigframes/bigframes/session/loader.py b/packages/bigframes/bigframes/session/loader.py index 960208063105..a07b6fd71ca7 100644 --- a/packages/bigframes/bigframes/session/loader.py +++ b/packages/bigframes/bigframes/session/loader.py @@ -54,6 +54,8 @@ from google.cloud import bigquery_storage_v1 from google.cloud.bigquery_storage_v1 import ( types as bq_storage_types, +) +from google.cloud.bigquery_storage_v1 import ( writer as bq_storage_writer, ) diff --git a/packages/bigframes/noxfile.py b/packages/bigframes/noxfile.py index 537d417e9145..3b636399fd0c 100644 --- a/packages/bigframes/noxfile.py +++ b/packages/bigframes/noxfile.py @@ -392,9 +392,6 @@ def system_noextras(session: nox.sessions.Session): @nox.session(python="3.12") def doctest(session: nox.sessions.Session): """Run the system test suite.""" - session.skip( - "Temporary skip to enable a PR merge. Remove skip as part of closing https://github.com/googleapis/google-cloud-python/issues/16489" - ) run_system( session=session, diff --git a/packages/bigframes/scripts/manage_cloud_functions.py b/packages/bigframes/scripts/manage_cloud_functions.py index ccf588bde7c2..c92be4ebadbf 100644 --- a/packages/bigframes/scripts/manage_cloud_functions.py +++ b/packages/bigframes/scripts/manage_cloud_functions.py @@ -67,6 +67,7 @@ def get_bigframes_functions(project, region): functions = GCF_CLIENT.list_functions( functions_v2.ListFunctionsRequest(parent=parent) ) + # Filter bigframes created functions functions = [ function diff --git a/packages/bigframes/scripts/run_doctest.sh b/packages/bigframes/scripts/run_doctest.sh new file mode 100755 index 000000000000..d5fd7256ece2 --- /dev/null +++ b/packages/bigframes/scripts/run_doctest.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -eo pipefail + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +# Assume we are running from the repo root or we need to find it. +# If this script is in packages/bigframes/scripts/run_doctest.sh, +# then repo root is 3 levels up. +export PROJECT_ROOT=$(realpath "$(dirname "${BASH_SOURCE[0]}")/../../..") +cd "$PROJECT_ROOT" + +git config --global --add safe.directory "$(realpath .)" + +package_name="bigframes" +package_path="packages/${package_name}" +files_to_check="${package_path}" + +# Use the IF block to handle the case where KOKORO vars are missing +# (e.g. local testing) +if [[ -n "${KOKORO_GITHUB_PULL_REQUEST_TARGET_BRANCH}" && -n "${KOKORO_GITHUB_PULL_REQUEST_COMMIT}" ]]; then + echo "checking changes with 'git diff ${KOKORO_GITHUB_PULL_REQUEST_TARGET_BRANCH}...${KOKORO_GITHUB_PULL_REQUEST_COMMIT} -- ${files_to_check}'" + + package_modified=$(git diff "${KOKORO_GITHUB_PULL_REQUEST_TARGET_BRANCH}...${KOKORO_GITHUB_PULL_REQUEST_COMMIT}" -- "${files_to_check}" | wc -l) +else + # If not a PR (like a local run or a different CI trigger), + # we treat it as 0 so it falls through to the "continuous" check. + package_modified=0 +fi + +# Check if modified OR if it's a continuous build +if [[ "${package_modified}" -gt 0 || "$KOKORO_BUILD_ARTIFACTS_SUBDIR" == *"continuous"* ]]; then + echo "------------------------------------------------------------" + echo "Running doctest for: ${package_name}" + echo "------------------------------------------------------------" + + # Ensure credentials are set for system tests in Kokoro + if [[ -z "${GOOGLE_APPLICATION_CREDENTIALS}" && -f "${KOKORO_GFILE_DIR}/service-account.json" ]]; then + export GOOGLE_APPLICATION_CREDENTIALS="${KOKORO_GFILE_DIR}/service-account.json" + fi + + export GOOGLE_CLOUD_PROJECT="bigframes-testing" + NOX_SESSION=("cleanup" "doctest") + + cd "${package_path}" + python3 -m nox -s "${NOX_SESSION[@]}" +else + echo "No changes in ${package_name} and not a continuous build, skipping." +fi \ No newline at end of file diff --git a/packages/bigframes/tests/system/small/bigquery/test_json.py b/packages/bigframes/tests/system/small/bigquery/test_json.py index d2ebb73972a6..8e73440a787c 100644 --- a/packages/bigframes/tests/system/small/bigquery/test_json.py +++ b/packages/bigframes/tests/system/small/bigquery/test_json.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json + import geopandas as gpd # type: ignore import pandas as pd import pyarrow as pa @@ -396,8 +398,8 @@ def test_to_json_from_int(): def test_to_json_from_struct(): s = bpd.Series( [ - {"version": 1, "project": "pandas"}, - {"version": 2, "project": "numpy"}, + {"project": "pandas", "version": 1}, + {"project": "numpy", "version": 2}, ] ) assert dtypes.is_struct_like(s.dtype) @@ -408,7 +410,9 @@ def test_to_json_from_struct(): dtype=dtypes.JSON_DTYPE, ) - pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas()) + actual_json = [json.loads(x) for x in actual.to_pandas()] + expected_json = [json.loads(x) for x in expected.to_pandas()] + assert actual_json == expected_json def test_to_json_string_from_int(): @@ -421,8 +425,8 @@ def test_to_json_string_from_int(): def test_to_json_string_from_struct(): s = bpd.Series( [ - {"version": 1, "project": "pandas"}, - {"version": 2, "project": "numpy"}, + {"project": "pandas", "version": 1}, + {"project": "numpy", "version": 2}, ] ) assert dtypes.is_struct_like(s.dtype) @@ -433,7 +437,9 @@ def test_to_json_string_from_struct(): dtype=dtypes.STRING_DTYPE, ) - pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas()) + actual_json = [json.loads(x) for x in actual.to_pandas()] + expected_json = [json.loads(x) for x in expected.to_pandas()] + assert actual_json == expected_json def test_json_keys(): diff --git a/packages/bigframes/tests/system/small/test_series.py b/packages/bigframes/tests/system/small/test_series.py index 5de7ce4256f9..0ac8f1eb61e9 100644 --- a/packages/bigframes/tests/system/small/test_series.py +++ b/packages/bigframes/tests/system/small/test_series.py @@ -1234,7 +1234,7 @@ def test_divmods_series(scalars_dfs, col_x, col_y, method): # BigQuery's mod functions return NUMERIC values for non-INT64 inputs. if bf_div_result.dtype == pd.Int64Dtype(): bigframes.testing.utils.assert_series_equal( - pd_div_result, bf_div_result.to_pandas() + pd_div_result, bf_div_result.to_pandas(), check_dtype=False ) else: bigframes.testing.utils.assert_series_equal( @@ -1279,7 +1279,7 @@ def test_divmods_scalars(scalars_dfs, col_x, other, method): # BigQuery's mod functions return NUMERIC values for non-INT64 inputs. if bf_div_result.dtype == pd.Int64Dtype(): bigframes.testing.utils.assert_series_equal( - pd_div_result, bf_div_result.to_pandas() + pd_div_result, bf_div_result.to_pandas(), check_dtype=False ) else: bigframes.testing.utils.assert_series_equal(