diff --git a/.asf.yaml b/.asf.yaml index 16e358f62f..677adaeb0d 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -22,9 +22,12 @@ github: - java enabled_merge_buttons: squash: true + squash_commit_message: PR_TITLE merge: false rebase: false - autolink_jira: RATIS + autolink_jira: + - HDDS + - RATIS notifications: commits: commits@ratis.apache.org diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..4ada4d9821 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: 2 + +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # 'daily' only runs on weekdays + interval: "cron" + cronjob: "15 9 * * *" + cooldown: + default-days: 7 + - package-ecosystem: "maven" + directory: "/" + schedule: + # 'daily' only runs on weekdays + interval: "cron" + cronjob: "15 10 * * *" + cooldown: + default-days: 7 + ignore: + # requires Java 11 + - dependency-name: "com.github.spotbugs:spotbugs" + versions: [">=4.9.0"] + - dependency-name: "com.github.spotbugs:spotbugs-maven-plugin" + versions: [">=4.9.0.0"] + - dependency-name: "org.mockito:mockito-core" + versions: [">=5.0.0"] + # requires Java 17 + - dependency-name: "org.apache.hadoop:*" + versions: [">=3.5.0"] + - dependency-name: "org.junit:junit-bom" + versions: [">=6.0.0"] diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml new file mode 100644 index 0000000000..a10c02f3d5 --- /dev/null +++ b/.github/workflows/check.yaml @@ -0,0 +1,212 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This reusable workflow executes a single check from `dev-support/checks/`. +# Before and after the check, it performs various steps based on workflow inputs. + +name: ci-check + +on: + workflow_call: + inputs: + # REQUIRED + script: + type: string + description: "Test script to run from dev-support/checks, without .sh extension" + required: true + + # OPTIONAL (ordered alphabetically) + java-version: + type: string + description: "Java version to set up (default: 17)" + default: '17' + required: false + + needs-binary-tarball: + type: boolean + description: "Whether to download Ratis binary tarball created by build (default: no)" + default: false + required: false + + needs-maven-repo: + type: boolean + description: "Whether to download Ratis jars created by build (default: no)" + default: false + required: false + + needs-source-tarball: + type: boolean + description: "Whether to download Ratis source tarball created by build (default: no)" + default: false + required: false + + runner: + type: string + description: "GitHub Actions runner to use" + default: 'ubuntu-24.04' + required: false + + script-args: + type: string + description: "Arguments for the test script" + default: '' + required: false + + split: + type: string + description: "Name of split for matrix jobs, only used in display name" + default: '' + required: false + + timeout-minutes: + type: number + description: "Job timeout in minutes (default: 30)" + default: 30 + required: false + + secrets: + DEVELOCITY_ACCESS_KEY: + description: 'Token for submitting build scan to Develocity' + required: false + +env: + MAVEN_ARGS: --batch-mode --show-version + MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3 + SCRIPT: ${{ inputs.script }} + WITH_COVERAGE: ${{ github.event_name == 'push' }} + +jobs: + check: + name: ${{ (inputs.split && format('{0} ({1})', inputs.script, inputs.split)) || inputs.script }} + runs-on: ${{ inputs.runner }} + timeout-minutes: ${{ inputs.timeout-minutes }} + steps: + - name: Checkout project + if: ${{ !inputs.needs-source-tarball }} + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Download source tarball + if: ${{ inputs.needs-source-tarball }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ratis-src + + - name: Extract source tarball + if: ${{ inputs.needs-source-tarball }} + run: | + tar --strip-components 1 -xzvf ratis*-src.tar.gz + + - name: Create cache for Maven dependencies + if: ${{ inputs.script == 'build' }} + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/ratis + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- + + - name: Restore cache for Maven dependencies + if: ${{ inputs.script != 'build' }} + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.m2/repository/*/*/* + !~/.m2/repository/org/apache/ratis + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- + + - name: Download Maven repo + id: download-maven-repo + if: ${{ inputs.needs-maven-repo }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: maven-repo + path: | + ~/.m2/repository/org/apache/ratis + + - name: Download binary tarball + if: ${{ inputs.needs-binary-tarball }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ratis-bin + + - name: Extract binary tarball + if: ${{ inputs.needs-binary-tarball }} + run: | + mkdir -p ratis-assembly/target + tar xzvf ratis-*-bin.tar.gz -C ratis-assembly/target + + - name: Setup java ${{ inputs.java-version }} + if: ${{ inputs.java-version }} + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + distribution: 'temurin' + java-version: ${{ inputs.java-version }} + + - name: Execute tests + run: | + $COMMAND + env: + COMMAND: dev-support/checks/${{ inputs.script }}.sh ${{ inputs.script-args }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + + - name: Summary of failures + if: ${{ failure() }} + run: | + if [[ -s "target/$SCRIPT/summary.txt" ]]; then + cat target/$SCRIPT/summary.txt + fi + + - name: Archive build results + if: ${{ !cancelled() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: ${{ (inputs.split && format('{0}-{1}', inputs.script, inputs.split)) || inputs.script }} + path: target/${{ inputs.script }} + continue-on-error: true + + # The following steps are hard-coded to be run only for 'build' check, + # to avoid the need for 3 more inputs. + - name: Store binaries for tests + if: ${{ inputs.script == 'build' && !cancelled() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: ratis-bin + path: | + ratis-assembly/target/ratis-assembly-*-bin.tar.gz + retention-days: 1 + + - name: Store source tarball for compilation + if: ${{ inputs.script == 'build' && !cancelled() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: ratis-src + path: | + ratis-assembly/target/ratis-assembly-*-src.tar.gz + retention-days: 1 + + - name: Store Maven repo for tests + if: ${{ inputs.script == 'build' && !cancelled() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: maven-repo + path: | + ~/.m2/repository/org/apache/ratis + retention-days: 1 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000000..a00a07955f --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,166 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: CI + +on: + workflow_call: + inputs: + ref: + type: string + description: Ratis git ref (branch, tag or commit hash) + default: '' + required: false + secrets: + DEVELOCITY_ACCESS_KEY: + description: 'Token for submitting build scan to Develocity' + required: false + SONARCLOUD_TOKEN: + description: 'Token for submitting coverage data to SonarCloud' + required: false + +permissions: { } + +jobs: + build: + uses: ./.github/workflows/check.yaml + with: + script: build + script-args: -Prelease + timeout-minutes: 30 + secrets: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + + compile: + needs: + - build + strategy: + matrix: + java: [ 8, 11, 17, 21 ] + fail-fast: false + uses: ./.github/workflows/check.yaml + with: + java-version: ${{ matrix.java }} + needs-source-tarball: true + script: compile + script-args: -Dmaven.compiler.release=${{ matrix.java }} + split: ${{ matrix.java }} + timeout-minutes: 30 + secrets: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + + release: + uses: ./.github/workflows/check.yaml + with: + script: release + timeout-minutes: 30 + secrets: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + + repro: + needs: + - build + uses: ./.github/workflows/check.yaml + with: + needs-maven-repo: true + script: repro + script-args: -Prelease + timeout-minutes: 30 + secrets: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + + basic: + strategy: + matrix: + check: + - author + - checkstyle + - findbugs + - rat + fail-fast: false + uses: ./.github/workflows/check.yaml + with: + script: ${{ matrix.check }} + timeout-minutes: 30 + secrets: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + + unit: + strategy: + matrix: + profile: + - grpc + - server + - misc + - flaky + fail-fast: false + uses: ./.github/workflows/check.yaml + with: + script: unit + script-args: -P${{ matrix.profile }}-tests + split: ${{ matrix.profile }} + timeout-minutes: 60 + secrets: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + + coverage: + needs: + - build + - unit + runs-on: ubuntu-24.04 + timeout-minutes: 30 + if: github.event_name != 'pull_request' + steps: + - name: Checkout project + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + persist-credentials: false + - name: Cache for maven dependencies + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/ratis + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- + - name: Setup java 17 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + distribution: 'temurin' + java-version: 17 + - name: Download artifacts + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: target/artifacts + - name: Untar binaries + run: | + mkdir -p ratis-assembly/target + tar xzvf target/artifacts/ratis-bin/ratis-assembly-*.tar.gz -C ratis-assembly/target + - name: Calculate combined coverage + run: ./dev-support/checks/coverage.sh + - name: Upload coverage to Sonar + if: github.repository == 'apache/ratis' + run: ./dev-support/checks/sonar.sh + env: + SONAR_TOKEN: ${{ secrets.SONARCLOUD_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Archive build results + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + if: always() + with: + name: ${{ github.job }} + path: target/${{ github.job }} diff --git a/.github/workflows/close-stale-pr.yaml b/.github/workflows/close-stale-pr.yaml new file mode 100644 index 0000000000..010f0c955d --- /dev/null +++ b/.github/workflows/close-stale-pr.yaml @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: close-stale-prs + +on: + schedule: + # We can run this only once a week on Sundays so contributors get sufficient time to follow up. + - cron: '0 0 * * 0' + +jobs: + close-stale-prs: + permissions: + pull-requests: write + runs-on: ubuntu-slim + steps: + - name: Close Stale PRs + uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0 + with: + stale-pr-label: 'stale' + exempt-draft-pr: false + days-before-issue-stale: -1 + days-before-pr-stale: 60 + days-before-pr-close: 30 + remove-pr-stale-when-updated: true + operations-per-run: 500 + stale-pr-message: 'This PR has been marked as stale due to 60 days of inactivity. Please comment or remove the stale label to keep it open. Otherwise, it will be automatically closed in ~30 days.' + close-pr-message: 'Thank you for your contribution. This PR is being closed due to inactivity. Please contact a maintainer if you would like to reopen it.' diff --git a/.github/workflows/post-commit.yaml b/.github/workflows/post-commit.yaml new file mode 100644 index 0000000000..4a946f8621 --- /dev/null +++ b/.github/workflows/post-commit.yaml @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: build-branch + +on: + push: + branches-ignore: + - 'dependabot/**' + tags: + - '**' + pull_request: + +concurrency: + group: ci-${{ github.event.pull_request.number || case(github.repository == 'apache/ratis', github.sha, github.ref_name) }} + cancel-in-progress: ${{ github.event_name == 'pull_request' || github.repository != 'apache/ratis' }} + +permissions: { } + +jobs: + CI: + if: github.event_name == 'pull_request' + || github.repository == 'apache/ratis' + || github.ref_name != 'master' + uses: ./.github/workflows/ci.yaml + secrets: + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + SONARCLOUD_TOKEN: ${{ secrets.SONARCLOUD_TOKEN }} diff --git a/.github/workflows/post-commit.yml b/.github/workflows/post-commit.yml deleted file mode 100644 index 2d3258ccc5..0000000000 --- a/.github/workflows/post-commit.yml +++ /dev/null @@ -1,265 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -name: build-branch -on: - - push - - pull_request -env: - WITH_COVERAGE: true -jobs: - build: - runs-on: ubuntu-20.04 - steps: - - name: Checkout project - uses: actions/checkout@v4 - - name: Cache for maven dependencies - uses: actions/cache@v4 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/ratis - key: maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - maven-repo- - - name: Setup java - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: 8 - - name: Run a full build - run: ./dev-support/checks/build.sh -Prelease assembly:single - - name: Store binaries for tests - uses: actions/upload-artifact@v4 - with: - name: ratis-bin - path: | - ratis-assembly/target/apache-ratis-*.tar.gz - !ratis-assembly/target/apache-ratis-*-src.tar.gz - retention-days: 1 - - name: Store source tarball for compilation - uses: actions/upload-artifact@v4 - with: - name: ratis-src - path: ratis-assembly/target/apache-ratis-*-src.tar.gz - retention-days: 1 - compile: - needs: - - build - runs-on: ubuntu-20.04 - strategy: - matrix: - java: [ 11 ] - fail-fast: false - steps: - - name: Download source tarball - uses: actions/download-artifact@v4 - with: - name: ratis-src - - name: Untar sources - run: | - tar --strip-components 1 -xzvf apache-ratis-*-src.tar.gz - - name: Cache for maven dependencies - uses: actions/cache/restore@v4 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/ratis - key: maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - maven-repo- - - name: Setup java - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: ${{ matrix.java }} - - name: Run a full build - run: ./dev-support/checks/build.sh - rat: - name: rat - runs-on: ubuntu-20.04 - steps: - - name: Checkout project - uses: actions/checkout@v4 - - name: Cache for maven dependencies - uses: actions/cache/restore@v4 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/ratis - key: maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - maven-repo- - - name: Run tests - run: ./dev-support/checks/rat.sh - - name: Upload results - uses: actions/upload-artifact@v4 - if: always() - with: - name: rat - path: target/rat - author: - name: author - runs-on: ubuntu-20.04 - steps: - - name: Checkout project - uses: actions/checkout@v4 - - name: Run tests - run: ./dev-support/checks/author.sh - - name: Upload results - uses: actions/upload-artifact@v4 - if: always() - with: - name: author - path: target/author - unit: - name: unit - runs-on: ubuntu-20.04 - strategy: - matrix: - profile: - - grpc - - server - - misc - fail-fast: false - steps: - # TEMPORARY WHILE GITHUB FIXES https://github.com/actions/virtual-environments/issues/3185 - - name: Add the current IP address, long hostname and short hostname record to /etc/hosts file - run: | - echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - # REMOVE CODE ABOVE WHEN ISSUE IS ADDRESSED! - - name: Checkout project - uses: actions/checkout@v4 - - name: Cache for maven dependencies - uses: actions/cache/restore@v4 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/ratis - key: maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - maven-repo- - - name: Setup java - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: 8 - - name: Run tests - run: ./dev-support/checks/unit.sh -P${{ matrix.profile }}-tests - - name: Summary of failures - run: cat target/${{ github.job }}/summary.txt - if: ${{ !cancelled() }} - - name: Upload results - uses: actions/upload-artifact@v4 - if: ${{ !cancelled() }} - with: - name: unit-${{ matrix.profile }} - path: target/unit - checkstyle: - name: checkstyle - runs-on: ubuntu-20.04 - steps: - - name: Checkout project - uses: actions/checkout@v4 - - name: Cache for maven dependencies - uses: actions/cache/restore@v4 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/ratis - key: maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - maven-repo- - - name: Run tests - run: ./dev-support/checks/checkstyle.sh - - name: Upload results - uses: actions/upload-artifact@v4 - if: always() - with: - name: checkstyle - path: target/checkstyle - findbugs: - name: findbugs - runs-on: ubuntu-20.04 - steps: - - name: Setup java - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: 8 - - name: Checkout project - uses: actions/checkout@v4 - - name: Cache for maven dependencies - uses: actions/cache/restore@v4 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/ratis - key: maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - maven-repo- - - name: Run tests - run: ./dev-support/checks/findbugs.sh - - name: Upload results - uses: actions/upload-artifact@v4 - if: always() - with: - name: findbugs - path: target/findbugs - coverage: - needs: - - build - - unit - runs-on: ubuntu-20.04 - if: (github.repository == 'apache/ratis' || github.repository == 'apache/incubator-ratis') && github.event_name != 'pull_request' - steps: - - name: Checkout project - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Cache for maven dependencies - uses: actions/cache/restore@v4 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/ratis - key: maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - maven-repo- - - name: Setup java 17 - uses: actions/setup-java@v4 - with: - distribution: 'temurin' - java-version: 17 - - name: Download artifacts - uses: actions/download-artifact@v4 - with: - path: target/artifacts - - name: Untar binaries - run: | - mkdir -p ratis-assembly/target - tar xzvf target/artifacts/ratis-bin/apache-ratis*.tar.gz -C ratis-assembly/target - - name: Calculate combined coverage - run: ./dev-support/checks/coverage.sh - - name: Upload coverage to Sonar - run: ./dev-support/checks/sonar.sh - env: - SONAR_TOKEN: ${{ secrets.SONARCLOUD_TOKEN }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Archive build results - uses: actions/upload-artifact@v4 - if: always() - with: - name: ${{ github.job }} - path: target/${{ github.job }} diff --git a/.github/workflows/repeat-test.yml b/.github/workflows/repeat-test.yaml similarity index 80% rename from .github/workflows/repeat-test.yml rename to .github/workflows/repeat-test.yaml index e3c05bec6a..4bfeebe7f1 100644 --- a/.github/workflows/repeat-test.yml +++ b/.github/workflows/repeat-test.yaml @@ -47,19 +47,24 @@ env: TEST_METHOD: ${{ github.event.inputs.test-method }} ITERATIONS: ${{ github.event.inputs.iterations }} FAIL_FAST: ${{ github.event.inputs.fail-fast }} + SPLITS: ${{ github.event.inputs.splits }} run-name: ${{ github.event_name == 'workflow_dispatch' && format('{0}#{1}[{2}]-{3}x{4}', inputs.test-class, inputs.test-method, inputs.ref, inputs.splits, inputs.iterations) || '' }} + +permissions: { } + jobs: prepare: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 outputs: matrix: ${{ steps.generate.outputs.matrix }} test-spec: ${{ steps.test-spec.outputs.test-spec }} + ref: ${{ steps.ref.outputs.ref }} steps: - id: generate name: Generate test matrix run: | splits=() - for ((i = 1; i <= ${{ github.event.inputs.splits }}; i++)); do + for ((i = 1; i <= $SPLITS; i++)); do splits+=("$i") done printf -v x "%s," "${splits[@]}" @@ -75,11 +80,17 @@ jobs: fi echo "Test to be run: $test_spec" echo "test-spec=$test_spec" >> $GITHUB_OUTPUT + - name: Define checkout ref + id: ref + run: | + echo "ref=$REF" >> $GITHUB_OUTPUT + env: + REF: ${{ github.event.inputs.ref }} test: if: ${{ always() }} needs: - prepare - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 env: TEST_SPEC: ${{ needs.prepare.outputs.test-spec }} strategy: @@ -87,11 +98,12 @@ jobs: split: ${{ fromJson(needs.prepare.outputs.matrix) }} fail-fast: ${{ fromJson(github.event.inputs.fail-fast) }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - ref: ${{ github.event.inputs.ref }} + persist-credentials: false + ref: ${{ needs.prepare.outputs.ref }} - name: Cache for maven dependencies - uses: actions/cache@v4 + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: path: | ~/.m2/repository @@ -100,7 +112,7 @@ jobs: restore-keys: | maven-repo- - name: Setup java - uses: actions/setup-java@v4 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 with: distribution: 'temurin' java-version: 8 @@ -113,18 +125,18 @@ jobs: run: dev-support/checks/_summary.sh target/unit/summary.txt if: ${{ !cancelled() }} - name: Archive build results - uses: actions/upload-artifact@v4 - if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + if: ${{ failure() }} with: - name: result-${{ env.TEST_CLASS }}-split-${{ matrix.split }} + name: result-${{ github.run_number }}-${{ github.run_id }}-split-${{ matrix.split }} path: target/unit count-failures: - if: ${{ always() }} + if: ${{ failure() }} needs: test - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - name: Download build results - uses: actions/download-artifact@v4 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - name: Count failures run: | failures=$(find . -name 'summary.txt' | grep -v 'iteration' | xargs grep -v 'exit code: 0' | wc -l) diff --git a/.github/workflows/vulnerability-check.yaml b/.github/workflows/vulnerability-check.yaml new file mode 100644 index 0000000000..307ef97185 --- /dev/null +++ b/.github/workflows/vulnerability-check.yaml @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: vulnerability-check + +on: + schedule: + # Run at 16:00 UTC every Sunday (Monday 00:00 CST) + - cron: "0 16 * * 0" + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3 + MAVEN_ARGS: --batch-mode --no-transfer-progress + +permissions: { } + +jobs: + dependency-check: + if: ${{ github.event_name == 'workflow_dispatch' || github.repository == 'apache/ratis' }} + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Set up JDK 11 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + distribution: corretto + java-version: 11 + + - name: Do Maven install + shell: bash + run: mvn $MAVEN_ARGS clean install -DskipTests + + - name: Do the dependency-check:aggregate + shell: bash + run: mvn $MAVEN_ARGS org.owasp:dependency-check-maven:aggregate -DossIndexUsername=${{ secrets.OSS_INDEX_USER }} -DossIndexPassword=${{ secrets.OSS_INDEX_TOKEN }} -DnvdApiKey=${{ secrets.NVD_API_KEY }} + + - name: Generate report date for artifact name + run: | + target_time=$(TZ=Asia/Shanghai date -d "$utc_time" +"%Y-%m-%d") + echo "REPORT_DATE=$target_time" >> $GITHUB_ENV + env: + utc_time: ${{ github.run_started_at }} + + - name: Upload Artifact + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: vulnerability-check-result-${{ env.REPORT_DATE }} + path: target/dependency-check-report.html + retention-days: 15 diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml new file mode 100644 index 0000000000..6b7263f91d --- /dev/null +++ b/.github/workflows/zizmor.yml @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: zizmor + +on: + push: + pull_request: + +permissions: { } + +jobs: + zizmor: + runs-on: ubuntu-latest + permissions: + security-events: write + steps: + - name: Checkout project + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run zizmor + uses: zizmorcore/zizmor-action@b1d7e1fb5de872772f31590499237e7cce841e8e # v0.5.3 diff --git a/.gitignore b/.gitignore index 9379453102..cf5943b0df 100644 --- a/.gitignore +++ b/.gitignore @@ -8,9 +8,11 @@ *.sdf *.suo *.vcxproj.user +.dev-tools/ .hugo_build.lock .idea .classpath +.mvn/.develocity/ .project .settings target diff --git a/.mvn/develocity.xml b/.mvn/develocity.xml new file mode 100644 index 0000000000..3bef395946 --- /dev/null +++ b/.mvn/develocity.xml @@ -0,0 +1,53 @@ + + + + ratis + + https://develocity.apache.org + false + + + + true + true + true + false + + #{isFalse(env['GITHUB_ACTIONS'])} + + + + + #{{'0.0.0.0'}} + + + + + #{isFalse(env['GITHUB_ACTIONS'])} + + + false + + + diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml new file mode 100644 index 0000000000..597996803a --- /dev/null +++ b/.mvn/extensions.xml @@ -0,0 +1,34 @@ + + + + + com.gradle + develocity-maven-extension + 2.4.0 + + + com.gradle + common-custom-user-data-maven-extension + 2.2.0 + + diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties index 08ea486aa5..d58dfb70ba 100644 --- a/.mvn/wrapper/maven-wrapper.properties +++ b/.mvn/wrapper/maven-wrapper.properties @@ -14,5 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.0/apache-maven-3.9.0-bin.zip -wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar +wrapperVersion=3.3.2 +distributionType=only-script +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip diff --git a/dev-support/checks/_lib.sh b/dev-support/checks/_lib.sh new file mode 100644 index 0000000000..fd30f756fb --- /dev/null +++ b/dev-support/checks/_lib.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +check_name="$(basename "${BASH_SOURCE[1]}")" +check_name="${check_name%.sh}" + +: ${TOOLS_DIR:=$(pwd)/.dev-tools} # directory for tools +: ${RATIS_PREFER_LOCAL_TOOL:=true} # skip install if tools are already available (eg. via package manager) + +## @description Install a dependency. Only first argument is mandatory. +## @param name of the tool +## @param the directory for binaries, relative to the tool directory; added to PATH. +## @param the directory for the tool, relative to TOOLS_DIR +## @param name of the executable, for testing if it is already installed +## @param name of the function that performs actual installation steps +_install_tool() { + local tool bindir dir bin func + + tool="$1" + bindir="${2:-}" + dir="${TOOLS_DIR}"/"${3:-"${tool}"}" + bin="${4:-"${tool}"}" + func="${5:-"_install_${tool}"}" + + if [[ "${RATIS_PREFER_LOCAL_TOOL}" == "true" ]] && which "${bin}" >& /dev/null; then + echo "Skip installing $bin, as it's already available on PATH." + return + fi + + if [[ ! -d "${dir}" ]]; then + mkdir -pv "${dir}" + _do_install "${tool}" "${dir}" "${func}" + fi + + if [[ -n "${bindir}" ]]; then + _add_to_path "${dir}"/"${bindir}" + + if ! which "${bin}" >& /dev/null; then + _do_install "${tool}" "${dir}" "${func}" + _add_to_path "${dir}"/"${bindir}" + fi + fi +} + +_do_install() { + local tool="$1" + local dir="$2" + local func="$3" + + pushd "${dir}" + if eval "${func}"; then + echo "Installed ${tool} in ${dir}" + popd + else + popd + msg="Failed to install ${tool}" + echo "${msg}" >&2 + if [[ -n "${REPORT_FILE}" ]]; then + echo "${msg}" >> "${REPORT_FILE}" + fi + exit 1 + fi +} + +_add_to_path() { + local bindir="$1" + + if [[ -d "${bindir}" ]]; then + if [[ "${RATIS_PREFER_LOCAL_TOOL}" == "true" ]]; then + export PATH="${PATH}:${bindir}" + else + export PATH="${bindir}:${PATH}" + fi + fi +} diff --git a/dev-support/checks/build.sh b/dev-support/checks/build.sh index 6add1ae605..ee8d3f3d7e 100755 --- a/dev-support/checks/build.sh +++ b/dev-support/checks/build.sh @@ -20,7 +20,7 @@ source "${DIR}/../find_maven.sh" : ${WITH_COVERAGE:="false"} -MAVEN_OPTIONS='-V -B -Dmaven.javadoc.skip=true -DskipTests --no-transfer-progress' +MAVEN_OPTIONS='-V -B -Dmaven.javadoc.skip=true -DskipTests' if [[ "${WITH_COVERAGE}" != "true" ]]; then MAVEN_OPTIONS="${MAVEN_OPTIONS} -Djacoco.skip" diff --git a/dev-support/checks/checkstyle.sh b/dev-support/checks/checkstyle.sh index a2ee427380..473035bf11 100755 --- a/dev-support/checks/checkstyle.sh +++ b/dev-support/checks/checkstyle.sh @@ -23,19 +23,20 @@ REPORT_DIR=${OUTPUT_DIR:-"$DIR/../../target/checkstyle"} mkdir -p "$REPORT_DIR" REPORT_FILE="$REPORT_DIR/summary.txt" -MAVEN_OPTIONS='-B -fae --no-transfer-progress -Dcheckstyle.failOnViolation=false' +MAVEN_OPTIONS='-B -fae -Dcheckstyle.failOnViolation=false' declare -i rc -${MVN} ${MAVEN_OPTIONS} checkstyle:check | tee "${REPORT_DIR}/output.log" +${MVN} ${MAVEN_OPTIONS} checkstyle:check > "${REPORT_DIR}/output.log" rc=$? if [[ ${rc} -ne 0 ]]; then - ${MVN} ${MAVEN_OPTIONS} clean test-compile checkstyle:check + ${MVN} ${MAVEN_OPTIONS} clean test-compile checkstyle:check > output.log rc=$? mkdir -p "$REPORT_DIR" # removed by mvn clean -else - cat "${REPORT_DIR}/output.log" + mv output.log "${REPORT_DIR}"/ fi +cat "${REPORT_DIR}/output.log" + #Print out the exact violations with parsing XML results with sed find "." -name checkstyle-result.xml -print0 \ | xargs -0 sed '$!N; //dev/null 2>&1 && pwd )" cd "$DIR/../.." || exit 1 -REPORT_DIR=${OUTPUT_DIR:-"$DIR/../../target/shellcheck"} -mkdir -p "$REPORT_DIR" -REPORT_FILE="$REPORT_DIR/summary.txt" +source "${DIR}/../find_maven.sh" -echo "" > "$OUTPUT_FILE" -if [[ "$(uname -s)" = "Darwin" ]]; then - find . -type f -perm '-500' -else - find . -type f -executable -fi \ - | grep -v -e target/ -e node_modules/ -e '\.\(ico\|py\|yml\)$' \ - | xargs -n1 shellcheck \ - | tee "$REPORT_FILE" +: ${WITH_COVERAGE:="false"} -wc -l "$REPORT_FILE" | awk '{print $1}'> "$REPORT_DIR/failures" +MAVEN_OPTIONS='-V -B -Dmaven.javadoc.skip=true -DskipTests' -if [[ -s "${REPORT_FILE}" ]]; then - exit 1 +if [[ "${WITH_COVERAGE}" != "true" ]]; then + MAVEN_OPTIONS="${MAVEN_OPTIONS} -Djacoco.skip" fi + +export MAVEN_OPTS="-Xmx4096m" +${MVN} ${MAVEN_OPTIONS} clean verify "$@" +exit $? diff --git a/dev-support/checks/coverage.sh b/dev-support/checks/coverage.sh index a2fab9b32a..ff0aef1a48 100755 --- a/dev-support/checks/coverage.sh +++ b/dev-support/checks/coverage.sh @@ -29,7 +29,7 @@ mkdir -p "$REPORT_DIR" JACOCO_VERSION=$(${MVN} help:evaluate -Dexpression=jacoco.version -q -DforceStdout) #Install jacoco cli -${MVN} --non-recursive --no-transfer-progress \ +${MVN} --non-recursive \ org.apache.maven.plugins:maven-dependency-plugin:3.6.1:copy \ -Dartifact=org.jacoco:org.jacoco.cli:${JACOCO_VERSION}:jar:nodeps diff --git a/dev-support/checks/findbugs.sh b/dev-support/checks/findbugs.sh index 17c669b8d5..93d3ef936f 100755 --- a/dev-support/checks/findbugs.sh +++ b/dev-support/checks/findbugs.sh @@ -13,6 +13,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +set -u -o pipefail + DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "$DIR/../.." || exit 1 @@ -20,28 +23,27 @@ source "${DIR}/../find_maven.sh" : ${WITH_COVERAGE:="false"} -MAVEN_OPTIONS='-B -fae --no-transfer-progress' +REPORT_DIR=${OUTPUT_DIR:-"$DIR/../../target/findbugs"} +mkdir -p "$REPORT_DIR" +REPORT_FILE="$REPORT_DIR/summary.txt" + +source "${DIR}/_lib.sh" +source "${DIR}/install/spotbugs.sh" -if ! type unionBugs >/dev/null 2>&1 || ! type convertXmlToText >/dev/null 2>&1; then - #shellcheck disable=SC2086 - ${MVN} ${MAVEN_OPTIONS} test-compile spotbugs:check - exit $? -fi +MAVEN_OPTIONS='-B -fae' if [[ "${WITH_COVERAGE}" != "true" ]]; then MAVEN_OPTIONS="${MAVEN_OPTIONS} -Djacoco.skip" fi #shellcheck disable=SC2086 -${MVN} ${MAVEN_OPTIONS} test-compile spotbugs:spotbugs +${MVN} ${MAVEN_OPTIONS} test-compile spotbugs:spotbugs "$@" | tee "${REPORT_DIR}/output.log" rc=$? -REPORT_DIR=${OUTPUT_DIR:-"$DIR/../../target/findbugs"} -mkdir -p "$REPORT_DIR" -REPORT_FILE="$REPORT_DIR/summary.txt" +touch "$REPORT_FILE" find ratis* -name spotbugsXml.xml -print0 | xargs -0 unionBugs -output "${REPORT_DIR}"/summary.xml -convertXmlToText "${REPORT_DIR}"/summary.xml | tee "${REPORT_FILE}" +convertXmlToText "${REPORT_DIR}"/summary.xml | tee -a "${REPORT_FILE}" convertXmlToText -html:fancy-hist.xsl "${REPORT_DIR}"/summary.xml "${REPORT_DIR}"/summary.html wc -l "$REPORT_FILE" | awk '{print $1}'> "$REPORT_DIR/failures" diff --git a/dev-support/checks/install/spotbugs.sh b/dev-support/checks/install/spotbugs.sh new file mode 100644 index 0000000000..337ba2b94d --- /dev/null +++ b/dev-support/checks/install/spotbugs.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script installs SpotBugs. +# Requires _install_tool from _lib.sh. Use `source` for both scripts, because it modifies $PATH. + +_get_spotbugs_version() { + MAVEN_ARGS='' ${MVN} -q -DforceStdout -Dscan=false help:evaluate -Dexpression=spotbugs.version 2>/dev/null || echo '4.8.6' +} + +if [[ -z "${SPOTBUGS_VERSION:-}" ]]; then + SPOTBUGS_VERSION="$(_get_spotbugs_version)" +fi + +_install_spotbugs() { + echo "https://repo.maven.apache.org/maven2/com/github/spotbugs/spotbugs/${SPOTBUGS_VERSION}/spotbugs-${SPOTBUGS_VERSION}.tgz" + curl -LSs "https://repo.maven.apache.org/maven2/com/github/spotbugs/spotbugs/${SPOTBUGS_VERSION}/spotbugs-${SPOTBUGS_VERSION}.tgz" | tar -xz -f - || exit 1 + find "spotbugs-${SPOTBUGS_VERSION}"/bin -type f -print0 | xargs -0 --no-run-if-empty chmod +x +} + +_install_tool spotbugs "spotbugs-${SPOTBUGS_VERSION}/bin" diff --git a/dev-support/checks/rat.sh b/dev-support/checks/rat.sh index 34d8a25854..9b55878eff 100755 --- a/dev-support/checks/rat.sh +++ b/dev-support/checks/rat.sh @@ -23,7 +23,7 @@ mkdir -p "$REPORT_DIR" REPORT_FILE="$REPORT_DIR/summary.txt" -${MVN} -B -fn --no-transfer-progress org.apache.rat:apache-rat-plugin:0.13:check +${MVN} -B -fn org.apache.rat:apache-rat-plugin:0.13:check cd "$DIR/../.." || exit 1 diff --git a/dev-support/checks/release.sh b/dev-support/checks/release.sh new file mode 100755 index 0000000000..1297b36b0c --- /dev/null +++ b/dev-support/checks/release.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e -u -o pipefail + +# This script tests the local part of the release process. It does not publish anything. + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cd "$DIR/../.." || exit 1 + +: "${RATISVERSION:="0.0.1"}" +: "${RC:="-ci-test"}" +: "${STAGING_REPO_DIR:="/tmp/ratis.staging-repo"}" +: "${SVNDISTDIR:="/tmp/ratis.svn"}" +: "${USERID:="ratis-ci-not-for-release"}" + +MVN_REPO_DIR="${HOME}/.m2/repository" + +mkdir -p "${SVNDISTDIR}" + +if [[ -z "${CODESIGNINGKEY:-}" ]]; then + gpg --batch --passphrase '' --pinentry-mode loopback --quick-generate-key "${USERID}" rsa4096 default 1d + CODESIGNINGKEY=$(gpg --list-keys --with-colons "${USERID}" | grep '^pub:' | cut -f5 -d:) +fi + +git config user.email || git config user.email 'test@example.com' +git config user.name || git config user.name 'Test User' + +export CODESIGNINGKEY MVN_REPO_DIR RATISVERSION RC SVNDISTDIR + +export MAVEN_ARGS="--batch-mode" + +dev-support/make_rc.sh 1-prepare-src +dev-support/make_rc.sh 2-verify-bin +dev-support/make_rc.sh 3-publish-mvn -DaltDeploymentRepository="local::default::file://${STAGING_REPO_DIR}" +dev-support/make_rc.sh 4-assembly diff --git a/dev-support/checks/repro.sh b/dev-support/checks/repro.sh new file mode 100755 index 0000000000..88941bc286 --- /dev/null +++ b/dev-support/checks/repro.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +cd "$DIR/../.." || exit 1 + +source "${DIR}/../find_maven.sh" + +: ${WITH_COVERAGE:="false"} + +MAVEN_OPTIONS='-V -B -Dmaven.javadoc.skip=true -DskipTests' + +if [[ "${WITH_COVERAGE}" != "true" ]]; then + MAVEN_OPTIONS="${MAVEN_OPTIONS} -Djacoco.skip" +fi + +export MAVEN_OPTS="-Xmx4096m" +${MVN} ${MAVEN_OPTIONS} clean verify artifact:compare "$@" +exit $? diff --git a/dev-support/checks/sonar.sh b/dev-support/checks/sonar.sh index 55a46cfec7..55edbdaaea 100755 --- a/dev-support/checks/sonar.sh +++ b/dev-support/checks/sonar.sh @@ -23,7 +23,7 @@ if [ ! "$SONAR_TOKEN" ]; then exit 1 fi -${MVN} -B verify -DskipShade -DskipTests --no-transfer-progress \ - org.sonarsource.scanner.maven:sonar-maven-plugin:3.6.0.1398:sonar \ +${MVN} -B verify -DskipShade -DskipTests \ + sonar:sonar \ -Dsonar.coverage.jacoco.xmlReportPaths="$(pwd)/target/coverage/all.xml" \ -Dsonar.host.url=https://sonarcloud.io -Dsonar.organization=apache -Dsonar.projectKey=apache-ratis diff --git a/dev-support/checks/unit.sh b/dev-support/checks/unit.sh index f7a4f3017e..c0369898e7 100755 --- a/dev-support/checks/unit.sh +++ b/dev-support/checks/unit.sh @@ -34,7 +34,11 @@ REPORT_DIR=${OUTPUT_DIR:-"$DIR/../../target/unit"} mkdir -p "$REPORT_DIR" export MAVEN_OPTS="-Xmx4096m" -MAVEN_OPTIONS='-V -B --no-transfer-progress' +MAVEN_OPTIONS='-V -B' + +if [[ "$@" =~ "-Pflaky-tests" ]]; then + MAVEN_OPTIONS="${MAVEN_OPTIONS} -Dsurefire.rerunFailingTestsCount=5 -Dsurefire.timeout=1200" +fi if [[ "${FAIL_FAST}" == "true" ]]; then MAVEN_OPTIONS="${MAVEN_OPTIONS} --fail-fast -Dsurefire.skipAfterFailureCount=1" @@ -65,6 +69,12 @@ for i in $(seq 1 ${ITERATIONS}); do fi if [[ ${ITERATIONS} -gt 1 ]]; then + if ! grep -q "Running .*Test" "${REPORT_DIR}/output.log"; then + echo "No tests were run" >> "${REPORT_DIR}/summary.txt" + irc=1 + FAIL_FAST=true + fi + if [[ ${irc} == 0 ]]; then rm -fr "${REPORT_DIR}" fi diff --git a/dev-support/checkstyle.xml b/dev-support/checkstyle.xml index 6f8ac9f96f..db4954fb49 100644 --- a/dev-support/checkstyle.xml +++ b/dev-support/checkstyle.xml @@ -55,6 +55,10 @@ + + + + diff --git a/dev-support/find_maven.sh b/dev-support/find_maven.sh index 20b6462b1e..2067ff5152 100644 --- a/dev-support/find_maven.sh +++ b/dev-support/find_maven.sh @@ -17,7 +17,7 @@ # limitations under the License. function find_maven() { - if [ "$MAVEN" != "" ]; then + if [[ -n "${MAVEN:-}" ]]; then echo "${MAVEN}" else local DIR diff --git a/dev-support/make_rc.sh b/dev-support/make_rc.sh index b5bec51dde..7317729acb 100755 --- a/dev-support/make_rc.sh +++ b/dev-support/make_rc.sh @@ -36,7 +36,7 @@ fi mvnGet() { ${MVN} -q -Dexec.executable="echo" -Dexec.args="\${${1}}" --non-recursive \ - org.codehaus.mojo:exec-maven-plugin:1.6.0:exec 2>/dev/null + org.codehaus.mojo:exec-maven-plugin:exec 2>/dev/null } @@ -91,12 +91,12 @@ mvnFun() { MAVEN_OPTS="${mvnopts}" ${MVN} -Dmaven.repo.local="${repodir}" "$@" } -prepare-src() { +1-prepare-src() { cd "$projectdir" git reset --hard git clean -fdx - mvnFun versions:set -DnewVersion="$RATISVERSION" - git commit -a -m "Change version for the version $RATISVERSION $RC" + mvnFun versions:set -DnewVersion="$RATISVERSION" -DprocessAllModules + git commit --allow-empty -a -m "Change version for the version $RATISVERSION $RC" git config user.signingkey "${CODESIGNINGKEY}" git tag -s -m "Release $RATISVERSION $RC" ratis-"${RATISVERSION}${RC}" @@ -106,69 +106,73 @@ prepare-src() { #grep -r SNAPSHOT --include=pom.xml - mvnFun clean install assembly:single -DskipTests=true -Prelease -Papache-release -Dgpg.keyname="${CODESIGNINGKEY}" + mvnFun clean install -DskipTests=true -Prelease -Papache-release -Dgpg.keyname="${CODESIGNINGKEY}" } -prepare-bin() { +2-verify-bin() { echo "Cleaning up workingdir $WORKINGDIR" rm -rf "$WORKINGDIR" mkdir -p "$WORKINGDIR" cd "$WORKINGDIR" - tar zvxf "$projectdir/ratis-assembly/target/apache-ratis-${RATISVERSION}-src.tar.gz" + tar zvxf "$projectdir/ratis-assembly/target/ratis-assembly-${RATISVERSION}-src.tar.gz" mv "apache-ratis-${RATISVERSION}-src" "apache-ratis-${RATISVERSION}" cd "apache-ratis-${RATISVERSION}" - mvnFun clean install assembly:single -DskipTests=true -Prelease -Papache-release -Dgpg.keyname="${CODESIGNINGKEY}" + mvnFun clean verify -DskipTests=true -Prelease -Papache-release -Dgpg.keyname="${CODESIGNINGKEY}" "$@" } -assembly() { +3-publish-mvn() { + cd "$projectdir" + mvnFun verify artifact:compare deploy:deploy -DdeployAtEnd=true -DskipTests=true -Prelease -Papache-release -Dgpg.keyname="${CODESIGNINGKEY}" "$@" +} + +4-assembly() { cd "$SVNDISTDIR" RCDIR="$SVNDISTDIR/${RATISVERSION}/${RC#-}" mkdir -p "$RCDIR" cd "$RCDIR" - cp "$WORKINGDIR/apache-ratis-${RATISVERSION}/ratis-assembly/target/apache-ratis-${RATISVERSION}-bin.tar.gz" "apache-ratis-${RATISVERSION}-bin.tar.gz" - cp "$projectdir/ratis-assembly/target/apache-ratis-${RATISVERSION}-src.tar.gz" "apache-ratis-${RATISVERSION}-src.tar.gz" + cp "$projectdir/ratis-assembly/target/ratis-assembly-${RATISVERSION}-bin.tar.gz" "apache-ratis-${RATISVERSION}-bin.tar.gz" + cp "$projectdir/ratis-assembly/target/ratis-assembly-${RATISVERSION}-src.tar.gz" "apache-ratis-${RATISVERSION}-src.tar.gz" for i in *.tar.gz; do gpg -u "${CODESIGNINGKEY}" --armor --output "${i}.asc" --detach-sig "${i}"; done for i in *.tar.gz; do gpg --print-md SHA512 "${i}" > "${i}.sha512"; done for i in *.tar.gz; do gpg --print-mds "${i}" > "${i}.mds"; done cd "$SVNDISTDIR" - svn add "${RATISVERSION}" || svn add "${RATISVERSION}/${RC#-}" + # skip svn add in CI + if [[ -z "${CI:-}" ]]; then + svn add "${RATISVERSION}" || svn add "${RATISVERSION}/${RC#-}" + fi } -publish-git(){ +5-publish-git(){ cd "$projectdir" + git push apache HEAD:"release-${RATISVERSION}" git push apache "ratis-${RATISVERSION}${RC}" } -publish-svn() { +6-publish-svn() { cd "${SVNDISTDIR}" svn commit -m "Publish proposed version of the next Ratis release ${RATISVERSION}${RC}" } -publish-mvn(){ - cd "$projectdir" - mvnFun -X clean deploy assembly:single -DskipTests=true -Prelease -Papache-release -Dgpg.keyname="${CODESIGNINGKEY}" -} - -if [ "$#" -ne 1 ]; then +if [ "$#" -lt 1 ]; then cat << EOF -Please choose from available phases (eg. make_rc.sh prepare-src): +Please choose from available phases (eg. make_rc.sh 1-prepare-src): - 1. prepare-src: This is the first step. It modifies the mvn version, creates the git tag and + 1-prepare-src: This is the first step. It modifies the mvn version, creates the git tag and builds the project to create the source artifacts. IT INCLUDES A GIT RESET + CLEAN. ALL THE LOCAL CHANGES WILL BE LOST! - 2. prepare-bin: The source artifact is copied to the $WORKINGDIR and the binary artifact is created from the source. + 2-verify-bin: The source artifact is copied to the $WORKINGDIR and the binary artifact is created from the source. This is an additional check as the the released source artifact should be enough to build the whole project. - 3. assembly : This step copies all the required artifacts to the svn directory and ($SVNDISTDIR) creates the signatures/checksum files. + 3-publish-mvn: Performs the final build, and uploads the artifacts to the maven staging repository - 4. publish-git: The first remote step, only do it if everything is fine. It pushes the rc tag to the repository. + 4-assembly: This step copies all the required artifacts to the svn directory and ($SVNDISTDIR) creates the signatures/checksum files. - 5. publish-svn: Uploads the artifacts to the apache dev staging area to start the vote. + 5-publish-git: Only do it if everything is fine. It pushes the rc tag and release branch to the repository. - 6. publish-mvn: Uploads the artifacts to the maven staging repository + 6-publish-svn: Uploads the artifacts to the apache dev staging area to start the vote. The next steps of the release process are not scripted: @@ -189,5 +193,7 @@ The next steps of the release process are not scripted: EOF else set -x - eval "$1" + func="$1" + shift + eval "$func" "$@" fi diff --git a/mvnw b/mvnw index 8d937f4c14..19529ddf8c 100755 --- a/mvnw +++ b/mvnw @@ -19,290 +19,241 @@ # ---------------------------------------------------------------------------- # ---------------------------------------------------------------------------- -# Apache Maven Wrapper startup batch script, version 3.2.0 -# -# Required ENV vars: -# ------------------ -# JAVA_HOME - location of a JDK home dir +# Apache Maven Wrapper startup batch script, version 3.3.2 # # Optional ENV vars # ----------------- -# MAVEN_OPTS - parameters passed to the Java VM when running Maven -# e.g. to debug Maven itself, use -# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 -# MAVEN_SKIP_RC - flag to disable loading of mavenrc files +# JAVA_HOME - location of a JDK home dir, required when download maven via java source +# MVNW_REPOURL - repo url base for downloading maven distribution +# MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +# MVNW_VERBOSE - true: enable verbose log; debug: trace the mvnw script; others: silence the output # ---------------------------------------------------------------------------- -if [ -z "$MAVEN_SKIP_RC" ] ; then - - if [ -f /usr/local/etc/mavenrc ] ; then - . /usr/local/etc/mavenrc - fi - - if [ -f /etc/mavenrc ] ; then - . /etc/mavenrc - fi - - if [ -f "$HOME/.mavenrc" ] ; then - . "$HOME/.mavenrc" - fi - -fi +set -euf +[ "${MVNW_VERBOSE-}" != debug ] || set -x -# OS specific support. $var _must_ be set to either true or false. -cygwin=false; -darwin=false; -mingw=false +# OS specific support. +native_path() { printf %s\\n "$1"; } case "$(uname)" in - CYGWIN*) cygwin=true ;; - MINGW*) mingw=true;; - Darwin*) darwin=true - # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home - # See https://developer.apple.com/library/mac/qa/qa1170/_index.html - if [ -z "$JAVA_HOME" ]; then - if [ -x "/usr/libexec/java_home" ]; then - JAVA_HOME="$(/usr/libexec/java_home)"; export JAVA_HOME - else - JAVA_HOME="/Library/Java/Home"; export JAVA_HOME - fi - fi - ;; +CYGWIN* | MINGW*) + [ -z "${JAVA_HOME-}" ] || JAVA_HOME="$(cygpath --unix "$JAVA_HOME")" + native_path() { cygpath --path --windows "$1"; } + ;; esac -if [ -z "$JAVA_HOME" ] ; then - if [ -r /etc/gentoo-release ] ; then - JAVA_HOME=$(java-config --jre-home) - fi -fi - -# For Cygwin, ensure paths are in UNIX format before anything is touched -if $cygwin ; then - [ -n "$JAVA_HOME" ] && - JAVA_HOME=$(cygpath --unix "$JAVA_HOME") - [ -n "$CLASSPATH" ] && - CLASSPATH=$(cygpath --path --unix "$CLASSPATH") -fi - -# For Mingw, ensure paths are in UNIX format before anything is touched -if $mingw ; then - [ -n "$JAVA_HOME" ] && [ -d "$JAVA_HOME" ] && - JAVA_HOME="$(cd "$JAVA_HOME" || (echo "cannot cd into $JAVA_HOME."; exit 1); pwd)" -fi - -if [ -z "$JAVA_HOME" ]; then - javaExecutable="$(which javac)" - if [ -n "$javaExecutable" ] && ! [ "$(expr "\"$javaExecutable\"" : '\([^ ]*\)')" = "no" ]; then - # readlink(1) is not available as standard on Solaris 10. - readLink=$(which readlink) - if [ ! "$(expr "$readLink" : '\([^ ]*\)')" = "no" ]; then - if $darwin ; then - javaHome="$(dirname "\"$javaExecutable\"")" - javaExecutable="$(cd "\"$javaHome\"" && pwd -P)/javac" - else - javaExecutable="$(readlink -f "\"$javaExecutable\"")" - fi - javaHome="$(dirname "\"$javaExecutable\"")" - javaHome=$(expr "$javaHome" : '\(.*\)/bin') - JAVA_HOME="$javaHome" - export JAVA_HOME - fi - fi -fi - -if [ -z "$JAVACMD" ] ; then - if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then +# set JAVACMD and JAVACCMD +set_java_home() { + # For Cygwin and MinGW, ensure paths are in Unix format before anything is touched + if [ -n "${JAVA_HOME-}" ]; then + if [ -x "$JAVA_HOME/jre/sh/java" ]; then # IBM's JDK on AIX uses strange locations for the executables JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACCMD="$JAVA_HOME/jre/sh/javac" else JAVACMD="$JAVA_HOME/bin/java" + JAVACCMD="$JAVA_HOME/bin/javac" + + if [ ! -x "$JAVACMD" ] || [ ! -x "$JAVACCMD" ]; then + echo "The JAVA_HOME environment variable is not defined correctly, so mvnw cannot run." >&2 + echo "JAVA_HOME is set to \"$JAVA_HOME\", but \"\$JAVA_HOME/bin/java\" or \"\$JAVA_HOME/bin/javac\" does not exist." >&2 + return 1 + fi fi else - JAVACMD="$(\unset -f command 2>/dev/null; \command -v java)" - fi -fi - -if [ ! -x "$JAVACMD" ] ; then - echo "Error: JAVA_HOME is not defined correctly." >&2 - echo " We cannot execute $JAVACMD" >&2 - exit 1 -fi + JAVACMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v java + )" || : + JAVACCMD="$( + 'set' +e + 'unset' -f command 2>/dev/null + 'command' -v javac + )" || : -if [ -z "$JAVA_HOME" ] ; then - echo "Warning: JAVA_HOME environment variable is not set." -fi - -# traverses directory structure from process work directory to filesystem root -# first directory with .mvn subdirectory is considered project base directory -find_maven_basedir() { - if [ -z "$1" ] - then - echo "Path not specified to find_maven_basedir" - return 1 + if [ ! -x "${JAVACMD-}" ] || [ ! -x "${JAVACCMD-}" ]; then + echo "The java/javac command does not exist in PATH nor is JAVA_HOME set, so mvnw cannot run." >&2 + return 1 + fi fi +} - basedir="$1" - wdir="$1" - while [ "$wdir" != '/' ] ; do - if [ -d "$wdir"/.mvn ] ; then - basedir=$wdir - break - fi - # workaround for JBEAP-8937 (on Solaris 10/Sparc) - if [ -d "${wdir}" ]; then - wdir=$(cd "$wdir/.." || exit 1; pwd) - fi - # end of workaround +# hash string like Java String::hashCode +hash_string() { + str="${1:-}" h=0 + while [ -n "$str" ]; do + char="${str%"${str#?}"}" + h=$(((h * 31 + $(LC_CTYPE=C printf %d "'$char")) % 4294967296)) + str="${str#?}" done - printf '%s' "$(cd "$basedir" || exit 1; pwd)" + printf %x\\n $h } -# concatenates all lines of a file -concat_lines() { - if [ -f "$1" ]; then - # Remove \r in case we run on Windows within Git Bash - # and check out the repository with auto CRLF management - # enabled. Otherwise, we may read lines that are delimited with - # \r\n and produce $'-Xarg\r' rather than -Xarg due to word - # splitting rules. - tr -s '\r\n' ' ' < "$1" - fi +verbose() { :; } +[ "${MVNW_VERBOSE-}" != true ] || verbose() { printf %s\\n "${1-}"; } + +die() { + printf %s\\n "$1" >&2 + exit 1 } -log() { - if [ "$MVNW_VERBOSE" = true ]; then - printf '%s\n' "$1" - fi +trim() { + # MWRAPPER-139: + # Trims trailing and leading whitespace, carriage returns, tabs, and linefeeds. + # Needed for removing poorly interpreted newline sequences when running in more + # exotic environments such as mingw bash on Windows. + printf "%s" "${1}" | tr -d '[:space:]' +} + +# parse distributionUrl and optional distributionSha256Sum, requires .mvn/wrapper/maven-wrapper.properties +while IFS="=" read -r key value; do + case "${key-}" in + distributionUrl) distributionUrl=$(trim "${value-}") ;; + distributionSha256Sum) distributionSha256Sum=$(trim "${value-}") ;; + esac +done <"${0%/*}/.mvn/wrapper/maven-wrapper.properties" +[ -n "${distributionUrl-}" ] || die "cannot read distributionUrl property in ${0%/*}/.mvn/wrapper/maven-wrapper.properties" + +case "${distributionUrl##*/}" in +maven-mvnd-*bin.*) + MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ + case "${PROCESSOR_ARCHITECTURE-}${PROCESSOR_ARCHITEW6432-}:$(uname -a)" in + *AMD64:CYGWIN* | *AMD64:MINGW*) distributionPlatform=windows-amd64 ;; + :Darwin*x86_64) distributionPlatform=darwin-amd64 ;; + :Darwin*arm64) distributionPlatform=darwin-aarch64 ;; + :Linux*x86_64*) distributionPlatform=linux-amd64 ;; + *) + echo "Cannot detect native platform for mvnd on $(uname)-$(uname -m), use pure java version" >&2 + distributionPlatform=linux-amd64 + ;; + esac + distributionUrl="${distributionUrl%-bin.*}-$distributionPlatform.zip" + ;; +maven-mvnd-*) MVN_CMD=mvnd.sh _MVNW_REPO_PATTERN=/maven/mvnd/ ;; +*) MVN_CMD="mvn${0##*/mvnw}" _MVNW_REPO_PATTERN=/org/apache/maven/ ;; +esac + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +[ -z "${MVNW_REPOURL-}" ] || distributionUrl="$MVNW_REPOURL$_MVNW_REPO_PATTERN${distributionUrl#*"$_MVNW_REPO_PATTERN"}" +distributionUrlName="${distributionUrl##*/}" +distributionUrlNameMain="${distributionUrlName%.*}" +distributionUrlNameMain="${distributionUrlNameMain%-bin}" +MAVEN_USER_HOME="${MAVEN_USER_HOME:-${HOME}/.m2}" +MAVEN_HOME="${MAVEN_USER_HOME}/wrapper/dists/${distributionUrlNameMain-}/$(hash_string "$distributionUrl")" + +exec_maven() { + unset MVNW_VERBOSE MVNW_USERNAME MVNW_PASSWORD MVNW_REPOURL || : + exec "$MAVEN_HOME/bin/$MVN_CMD" "$@" || die "cannot exec $MAVEN_HOME/bin/$MVN_CMD" } -BASE_DIR=$(find_maven_basedir "$(dirname "$0")") -if [ -z "$BASE_DIR" ]; then - exit 1; +if [ -d "$MAVEN_HOME" ]; then + verbose "found existing MAVEN_HOME at $MAVEN_HOME" + exec_maven "$@" fi -MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}; export MAVEN_PROJECTBASEDIR -log "$MAVEN_PROJECTBASEDIR" +case "${distributionUrl-}" in +*?-bin.zip | *?maven-mvnd-?*-?*.zip) ;; +*) die "distributionUrl is not valid, must match *-bin.zip or maven-mvnd-*.zip, but found '${distributionUrl-}'" ;; +esac -########################################################################################## -# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central -# This allows using the maven wrapper in projects that prohibit checking in binary data. -########################################################################################## -wrapperJarPath="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" -if [ -r "$wrapperJarPath" ]; then - log "Found $wrapperJarPath" +# prepare tmp dir +if TMP_DOWNLOAD_DIR="$(mktemp -d)" && [ -d "$TMP_DOWNLOAD_DIR" ]; then + clean() { rm -rf -- "$TMP_DOWNLOAD_DIR"; } + trap clean HUP INT TERM EXIT else - log "Couldn't find $wrapperJarPath, downloading it ..." + die "cannot create temp dir" +fi - if [ -n "$MVNW_REPOURL" ]; then - wrapperUrl="$MVNW_REPOURL/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" - else - wrapperUrl="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" - fi - while IFS="=" read -r key value; do - # Remove '\r' from value to allow usage on windows as IFS does not consider '\r' as a separator ( considers space, tab, new line ('\n'), and custom '=' ) - safeValue=$(echo "$value" | tr -d '\r') - case "$key" in (wrapperUrl) wrapperUrl="$safeValue"; break ;; - esac - done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" - log "Downloading from: $wrapperUrl" +mkdir -p -- "${MAVEN_HOME%/*}" - if $cygwin; then - wrapperJarPath=$(cygpath --path --windows "$wrapperJarPath") - fi +# Download and Install Apache Maven +verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +verbose "Downloading from: $distributionUrl" +verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" - if command -v wget > /dev/null; then - log "Found wget ... using wget" - [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--quiet" - if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then - wget $QUIET "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" - else - wget $QUIET --http-user="$MVNW_USERNAME" --http-password="$MVNW_PASSWORD" "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" - fi - elif command -v curl > /dev/null; then - log "Found curl ... using curl" - [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--silent" - if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then - curl $QUIET -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" - else - curl $QUIET --user "$MVNW_USERNAME:$MVNW_PASSWORD" -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" - fi - else - log "Falling back to using Java to download" - javaSource="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.java" - javaClass="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.class" - # For Cygwin, switch paths to Windows format before running javac - if $cygwin; then - javaSource=$(cygpath --path --windows "$javaSource") - javaClass=$(cygpath --path --windows "$javaClass") - fi - if [ -e "$javaSource" ]; then - if [ ! -e "$javaClass" ]; then - log " - Compiling MavenWrapperDownloader.java ..." - ("$JAVA_HOME/bin/javac" "$javaSource") - fi - if [ -e "$javaClass" ]; then - log " - Running MavenWrapperDownloader.java ..." - ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$wrapperUrl" "$wrapperJarPath") || rm -f "$wrapperJarPath" - fi - fi - fi +# select .zip or .tar.gz +if ! command -v unzip >/dev/null; then + distributionUrl="${distributionUrl%.zip}.tar.gz" + distributionUrlName="${distributionUrl##*/}" fi -########################################################################################## -# End of extension -########################################################################################## -# If specified, validate the SHA-256 sum of the Maven wrapper jar file -wrapperSha256Sum="" -while IFS="=" read -r key value; do - case "$key" in (wrapperSha256Sum) wrapperSha256Sum=$value; break ;; - esac -done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" -if [ -n "$wrapperSha256Sum" ]; then - wrapperSha256Result=false - if command -v sha256sum > /dev/null; then - if echo "$wrapperSha256Sum $wrapperJarPath" | sha256sum -c > /dev/null 2>&1; then - wrapperSha256Result=true +# verbose opt +__MVNW_QUIET_WGET=--quiet __MVNW_QUIET_CURL=--silent __MVNW_QUIET_UNZIP=-q __MVNW_QUIET_TAR='' +[ "${MVNW_VERBOSE-}" != true ] || __MVNW_QUIET_WGET='' __MVNW_QUIET_CURL='' __MVNW_QUIET_UNZIP='' __MVNW_QUIET_TAR=v + +# normalize http auth +case "${MVNW_PASSWORD:+has-password}" in +'') MVNW_USERNAME='' MVNW_PASSWORD='' ;; +has-password) [ -n "${MVNW_USERNAME-}" ] || MVNW_USERNAME='' MVNW_PASSWORD='' ;; +esac + +if [ -z "${MVNW_USERNAME-}" ] && command -v wget >/dev/null; then + verbose "Found wget ... using wget" + wget ${__MVNW_QUIET_WGET:+"$__MVNW_QUIET_WGET"} "$distributionUrl" -O "$TMP_DOWNLOAD_DIR/$distributionUrlName" || die "wget: Failed to fetch $distributionUrl" +elif [ -z "${MVNW_USERNAME-}" ] && command -v curl >/dev/null; then + verbose "Found curl ... using curl" + curl ${__MVNW_QUIET_CURL:+"$__MVNW_QUIET_CURL"} -f -L -o "$TMP_DOWNLOAD_DIR/$distributionUrlName" "$distributionUrl" || die "curl: Failed to fetch $distributionUrl" +elif set_java_home; then + verbose "Falling back to use Java to download" + javaSource="$TMP_DOWNLOAD_DIR/Downloader.java" + targetZip="$TMP_DOWNLOAD_DIR/$distributionUrlName" + cat >"$javaSource" <<-END + public class Downloader extends java.net.Authenticator + { + protected java.net.PasswordAuthentication getPasswordAuthentication() + { + return new java.net.PasswordAuthentication( System.getenv( "MVNW_USERNAME" ), System.getenv( "MVNW_PASSWORD" ).toCharArray() ); + } + public static void main( String[] args ) throws Exception + { + setDefault( new Downloader() ); + java.nio.file.Files.copy( java.net.URI.create( args[0] ).toURL().openStream(), java.nio.file.Paths.get( args[1] ).toAbsolutePath().normalize() ); + } + } + END + # For Cygwin/MinGW, switch paths to Windows format before running javac and java + verbose " - Compiling Downloader.java ..." + "$(native_path "$JAVACCMD")" "$(native_path "$javaSource")" || die "Failed to compile Downloader.java" + verbose " - Running Downloader.java ..." + "$(native_path "$JAVACMD")" -cp "$(native_path "$TMP_DOWNLOAD_DIR")" Downloader "$distributionUrl" "$(native_path "$targetZip")" +fi + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +if [ -n "${distributionSha256Sum-}" ]; then + distributionSha256Result=false + if [ "$MVN_CMD" = mvnd.sh ]; then + echo "Checksum validation is not supported for maven-mvnd." >&2 + echo "Please disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 + exit 1 + elif command -v sha256sum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | sha256sum -c >/dev/null 2>&1; then + distributionSha256Result=true fi - elif command -v shasum > /dev/null; then - if echo "$wrapperSha256Sum $wrapperJarPath" | shasum -a 256 -c > /dev/null 2>&1; then - wrapperSha256Result=true + elif command -v shasum >/dev/null; then + if echo "$distributionSha256Sum $TMP_DOWNLOAD_DIR/$distributionUrlName" | shasum -a 256 -c >/dev/null 2>&1; then + distributionSha256Result=true fi else - echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." - echo "Please install either command, or disable validation by removing 'wrapperSha256Sum' from your maven-wrapper.properties." + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." >&2 + echo "Please install either command, or disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." >&2 exit 1 fi - if [ $wrapperSha256Result = false ]; then - echo "Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised." >&2 - echo "Investigate or delete $wrapperJarPath to attempt a clean download." >&2 - echo "If you updated your Maven version, you need to update the specified wrapperSha256Sum property." >&2 + if [ $distributionSha256Result = false ]; then + echo "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised." >&2 + echo "If you updated your Maven version, you need to update the specified distributionSha256Sum property." >&2 exit 1 fi fi -MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" - -# For Cygwin, switch paths to Windows format before running java -if $cygwin; then - [ -n "$JAVA_HOME" ] && - JAVA_HOME=$(cygpath --path --windows "$JAVA_HOME") - [ -n "$CLASSPATH" ] && - CLASSPATH=$(cygpath --path --windows "$CLASSPATH") - [ -n "$MAVEN_PROJECTBASEDIR" ] && - MAVEN_PROJECTBASEDIR=$(cygpath --path --windows "$MAVEN_PROJECTBASEDIR") +# unzip and move +if command -v unzip >/dev/null; then + unzip ${__MVNW_QUIET_UNZIP:+"$__MVNW_QUIET_UNZIP"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -d "$TMP_DOWNLOAD_DIR" || die "failed to unzip" +else + tar xzf${__MVNW_QUIET_TAR:+"$__MVNW_QUIET_TAR"} "$TMP_DOWNLOAD_DIR/$distributionUrlName" -C "$TMP_DOWNLOAD_DIR" || die "failed to untar" fi +printf %s\\n "$distributionUrl" >"$TMP_DOWNLOAD_DIR/$distributionUrlNameMain/mvnw.url" +mv -- "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" "$MAVEN_HOME" || [ -d "$MAVEN_HOME" ] || die "fail to move MAVEN_HOME" -# Provide a "standardized" way to retrieve the CLI args that will -# work with both Windows and non-Windows executions. -MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $*" -export MAVEN_CMD_LINE_ARGS - -WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain - -# shellcheck disable=SC2086 # safe args -exec "$JAVACMD" \ - $MAVEN_OPTS \ - $MAVEN_DEBUG_OPTS \ - -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ - "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ - ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" +clean || : +exec_maven "$@" diff --git a/mvnw.cmd b/mvnw.cmd index f80fbad3e7..b150b91ed5 100644 --- a/mvnw.cmd +++ b/mvnw.cmd @@ -1,3 +1,4 @@ +<# : batch portion @REM ---------------------------------------------------------------------------- @REM Licensed to the Apache Software Foundation (ASF) under one @REM or more contributor license agreements. See the NOTICE file @@ -18,188 +19,131 @@ @REM ---------------------------------------------------------------------------- @REM ---------------------------------------------------------------------------- -@REM Apache Maven Wrapper startup batch script, version 3.2.0 -@REM -@REM Required ENV vars: -@REM JAVA_HOME - location of a JDK home dir +@REM Apache Maven Wrapper startup batch script, version 3.3.2 @REM @REM Optional ENV vars -@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands -@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending -@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven -@REM e.g. to debug Maven itself, use -@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 -@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files +@REM MVNW_REPOURL - repo url base for downloading maven distribution +@REM MVNW_USERNAME/MVNW_PASSWORD - user and password for downloading maven +@REM MVNW_VERBOSE - true: enable verbose log; others: silence the output @REM ---------------------------------------------------------------------------- -@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' -@echo off -@REM set title of command window -title %0 -@REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' -@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% - -@REM set %HOME% to equivalent of $HOME -if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") - -@REM Execute a user defined script before this one -if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre -@REM check for pre script, once with legacy .bat ending and once with .cmd ending -if exist "%USERPROFILE%\mavenrc_pre.bat" call "%USERPROFILE%\mavenrc_pre.bat" %* -if exist "%USERPROFILE%\mavenrc_pre.cmd" call "%USERPROFILE%\mavenrc_pre.cmd" %* -:skipRcPre - -@setlocal - -set ERROR_CODE=0 - -@REM To isolate internal variables from possible post scripts, we use another setlocal -@setlocal - -@REM ==== START VALIDATION ==== -if not "%JAVA_HOME%" == "" goto OkJHome - -echo. -echo Error: JAVA_HOME not found in your environment. >&2 -echo Please set the JAVA_HOME variable in your environment to match the >&2 -echo location of your Java installation. >&2 -echo. -goto error - -:OkJHome -if exist "%JAVA_HOME%\bin\java.exe" goto init - -echo. -echo Error: JAVA_HOME is set to an invalid directory. >&2 -echo JAVA_HOME = "%JAVA_HOME%" >&2 -echo Please set the JAVA_HOME variable in your environment to match the >&2 -echo location of your Java installation. >&2 -echo. -goto error - -@REM ==== END VALIDATION ==== - -:init - -@REM Find the project base dir, i.e. the directory that contains the folder ".mvn". -@REM Fallback to current working directory if not found. - -set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% -IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir - -set EXEC_DIR=%CD% -set WDIR=%EXEC_DIR% -:findBaseDir -IF EXIST "%WDIR%"\.mvn goto baseDirFound -cd .. -IF "%WDIR%"=="%CD%" goto baseDirNotFound -set WDIR=%CD% -goto findBaseDir - -:baseDirFound -set MAVEN_PROJECTBASEDIR=%WDIR% -cd "%EXEC_DIR%" -goto endDetectBaseDir - -:baseDirNotFound -set MAVEN_PROJECTBASEDIR=%EXEC_DIR% -cd "%EXEC_DIR%" - -:endDetectBaseDir - -IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig - -@setlocal EnableExtensions EnableDelayedExpansion -for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a -@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% - -:endReadAdditionalConfig - -SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" -set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" -set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain - -set WRAPPER_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" - -FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( - IF "%%A"=="wrapperUrl" SET WRAPPER_URL=%%B -) - -@REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central -@REM This allows using the maven wrapper in projects that prohibit checking in binary data. -if exist %WRAPPER_JAR% ( - if "%MVNW_VERBOSE%" == "true" ( - echo Found %WRAPPER_JAR% - ) -) else ( - if not "%MVNW_REPOURL%" == "" ( - SET WRAPPER_URL="%MVNW_REPOURL%/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" - ) - if "%MVNW_VERBOSE%" == "true" ( - echo Couldn't find %WRAPPER_JAR%, downloading it ... - echo Downloading from: %WRAPPER_URL% - ) - - powershell -Command "&{"^ - "$webclient = new-object System.Net.WebClient;"^ - "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ - "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ - "}"^ - "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%WRAPPER_URL%', '%WRAPPER_JAR%')"^ - "}" - if "%MVNW_VERBOSE%" == "true" ( - echo Finished downloading %WRAPPER_JAR% - ) -) -@REM End of extension - -@REM If specified, validate the SHA-256 sum of the Maven wrapper jar file -SET WRAPPER_SHA_256_SUM="" -FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( - IF "%%A"=="wrapperSha256Sum" SET WRAPPER_SHA_256_SUM=%%B +@IF "%__MVNW_ARG0_NAME__%"=="" (SET __MVNW_ARG0_NAME__=%~nx0) +@SET __MVNW_CMD__= +@SET __MVNW_ERROR__= +@SET __MVNW_PSMODULEP_SAVE=%PSModulePath% +@SET PSModulePath= +@FOR /F "usebackq tokens=1* delims==" %%A IN (`powershell -noprofile "& {$scriptDir='%~dp0'; $script='%__MVNW_ARG0_NAME__%'; icm -ScriptBlock ([Scriptblock]::Create((Get-Content -Raw '%~f0'))) -NoNewScope}"`) DO @( + IF "%%A"=="MVN_CMD" (set __MVNW_CMD__=%%B) ELSE IF "%%B"=="" (echo %%A) ELSE (echo %%A=%%B) ) -IF NOT %WRAPPER_SHA_256_SUM%=="" ( - powershell -Command "&{"^ - "$hash = (Get-FileHash \"%WRAPPER_JAR%\" -Algorithm SHA256).Hash.ToLower();"^ - "If('%WRAPPER_SHA_256_SUM%' -ne $hash){"^ - " Write-Output 'Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised.';"^ - " Write-Output 'Investigate or delete %WRAPPER_JAR% to attempt a clean download.';"^ - " Write-Output 'If you updated your Maven version, you need to update the specified wrapperSha256Sum property.';"^ - " exit 1;"^ - "}"^ - "}" - if ERRORLEVEL 1 goto error -) - -@REM Provide a "standardized" way to retrieve the CLI args that will -@REM work with both Windows and non-Windows executions. -set MAVEN_CMD_LINE_ARGS=%* - -%MAVEN_JAVA_EXE% ^ - %JVM_CONFIG_MAVEN_PROPS% ^ - %MAVEN_OPTS% ^ - %MAVEN_DEBUG_OPTS% ^ - -classpath %WRAPPER_JAR% ^ - "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" ^ - %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* -if ERRORLEVEL 1 goto error -goto end - -:error -set ERROR_CODE=1 - -:end -@endlocal & set ERROR_CODE=%ERROR_CODE% - -if not "%MAVEN_SKIP_RC%"=="" goto skipRcPost -@REM check for post script, once with legacy .bat ending and once with .cmd ending -if exist "%USERPROFILE%\mavenrc_post.bat" call "%USERPROFILE%\mavenrc_post.bat" -if exist "%USERPROFILE%\mavenrc_post.cmd" call "%USERPROFILE%\mavenrc_post.cmd" -:skipRcPost - -@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' -if "%MAVEN_BATCH_PAUSE%"=="on" pause - -if "%MAVEN_TERMINATE_CMD%"=="on" exit %ERROR_CODE% - -cmd /C exit /B %ERROR_CODE% +@SET PSModulePath=%__MVNW_PSMODULEP_SAVE% +@SET __MVNW_PSMODULEP_SAVE= +@SET __MVNW_ARG0_NAME__= +@SET MVNW_USERNAME= +@SET MVNW_PASSWORD= +@IF NOT "%__MVNW_CMD__%"=="" (%__MVNW_CMD__% %*) +@echo Cannot start maven from wrapper >&2 && exit /b 1 +@GOTO :EOF +: end batch / begin powershell #> + +$ErrorActionPreference = "Stop" +if ($env:MVNW_VERBOSE -eq "true") { + $VerbosePreference = "Continue" +} + +# calculate distributionUrl, requires .mvn/wrapper/maven-wrapper.properties +$distributionUrl = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionUrl +if (!$distributionUrl) { + Write-Error "cannot read distributionUrl property in $scriptDir/.mvn/wrapper/maven-wrapper.properties" +} + +switch -wildcard -casesensitive ( $($distributionUrl -replace '^.*/','') ) { + "maven-mvnd-*" { + $USE_MVND = $true + $distributionUrl = $distributionUrl -replace '-bin\.[^.]*$',"-windows-amd64.zip" + $MVN_CMD = "mvnd.cmd" + break + } + default { + $USE_MVND = $false + $MVN_CMD = $script -replace '^mvnw','mvn' + break + } +} + +# apply MVNW_REPOURL and calculate MAVEN_HOME +# maven home pattern: ~/.m2/wrapper/dists/{apache-maven-,maven-mvnd--}/ +if ($env:MVNW_REPOURL) { + $MVNW_REPO_PATTERN = if ($USE_MVND) { "/org/apache/maven/" } else { "/maven/mvnd/" } + $distributionUrl = "$env:MVNW_REPOURL$MVNW_REPO_PATTERN$($distributionUrl -replace '^.*'+$MVNW_REPO_PATTERN,'')" +} +$distributionUrlName = $distributionUrl -replace '^.*/','' +$distributionUrlNameMain = $distributionUrlName -replace '\.[^.]*$','' -replace '-bin$','' +$MAVEN_HOME_PARENT = "$HOME/.m2/wrapper/dists/$distributionUrlNameMain" +if ($env:MAVEN_USER_HOME) { + $MAVEN_HOME_PARENT = "$env:MAVEN_USER_HOME/wrapper/dists/$distributionUrlNameMain" +} +$MAVEN_HOME_NAME = ([System.Security.Cryptography.MD5]::Create().ComputeHash([byte[]][char[]]$distributionUrl) | ForEach-Object {$_.ToString("x2")}) -join '' +$MAVEN_HOME = "$MAVEN_HOME_PARENT/$MAVEN_HOME_NAME" + +if (Test-Path -Path "$MAVEN_HOME" -PathType Container) { + Write-Verbose "found existing MAVEN_HOME at $MAVEN_HOME" + Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" + exit $? +} + +if (! $distributionUrlNameMain -or ($distributionUrlName -eq $distributionUrlNameMain)) { + Write-Error "distributionUrl is not valid, must end with *-bin.zip, but found $distributionUrl" +} + +# prepare tmp dir +$TMP_DOWNLOAD_DIR_HOLDER = New-TemporaryFile +$TMP_DOWNLOAD_DIR = New-Item -Itemtype Directory -Path "$TMP_DOWNLOAD_DIR_HOLDER.dir" +$TMP_DOWNLOAD_DIR_HOLDER.Delete() | Out-Null +trap { + if ($TMP_DOWNLOAD_DIR.Exists) { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } + } +} + +New-Item -Itemtype Directory -Path "$MAVEN_HOME_PARENT" -Force | Out-Null + +# Download and Install Apache Maven +Write-Verbose "Couldn't find MAVEN_HOME, downloading and installing it ..." +Write-Verbose "Downloading from: $distributionUrl" +Write-Verbose "Downloading to: $TMP_DOWNLOAD_DIR/$distributionUrlName" + +$webclient = New-Object System.Net.WebClient +if ($env:MVNW_USERNAME -and $env:MVNW_PASSWORD) { + $webclient.Credentials = New-Object System.Net.NetworkCredential($env:MVNW_USERNAME, $env:MVNW_PASSWORD) +} +[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12 +$webclient.DownloadFile($distributionUrl, "$TMP_DOWNLOAD_DIR/$distributionUrlName") | Out-Null + +# If specified, validate the SHA-256 sum of the Maven distribution zip file +$distributionSha256Sum = (Get-Content -Raw "$scriptDir/.mvn/wrapper/maven-wrapper.properties" | ConvertFrom-StringData).distributionSha256Sum +if ($distributionSha256Sum) { + if ($USE_MVND) { + Write-Error "Checksum validation is not supported for maven-mvnd. `nPlease disable validation by removing 'distributionSha256Sum' from your maven-wrapper.properties." + } + Import-Module $PSHOME\Modules\Microsoft.PowerShell.Utility -Function Get-FileHash + if ((Get-FileHash "$TMP_DOWNLOAD_DIR/$distributionUrlName" -Algorithm SHA256).Hash.ToLower() -ne $distributionSha256Sum) { + Write-Error "Error: Failed to validate Maven distribution SHA-256, your Maven distribution might be compromised. If you updated your Maven version, you need to update the specified distributionSha256Sum property." + } +} + +# unzip and move +Expand-Archive "$TMP_DOWNLOAD_DIR/$distributionUrlName" -DestinationPath "$TMP_DOWNLOAD_DIR" | Out-Null +Rename-Item -Path "$TMP_DOWNLOAD_DIR/$distributionUrlNameMain" -NewName $MAVEN_HOME_NAME | Out-Null +try { + Move-Item -Path "$TMP_DOWNLOAD_DIR/$MAVEN_HOME_NAME" -Destination $MAVEN_HOME_PARENT | Out-Null +} catch { + if (! (Test-Path -Path "$MAVEN_HOME" -PathType Container)) { + Write-Error "fail to move MAVEN_HOME" + } +} finally { + try { Remove-Item $TMP_DOWNLOAD_DIR -Recurse -Force | Out-Null } + catch { Write-Warning "Cannot remove $TMP_DOWNLOAD_DIR" } +} + +Write-Output "MVN_CMD=$MAVEN_HOME/bin/$MVN_CMD" diff --git a/pom.xml b/pom.xml index 0b3eedee12..4ffed3fdc2 100644 --- a/pom.xml +++ b/pom.xml @@ -18,13 +18,13 @@ org.apache apache - 25 + 37 ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT Apache Ratis pom @@ -46,21 +46,6 @@ - - - ${distMgmtSnapshotsId} - ${distMgmtSnapshotsName} - ${distMgmtSnapshotsUrl} - - - repository.jboss.org - https://repository.jboss.org/nexus/content/groups/public/ - - false - - - - Apache License, Version 2.0 @@ -82,22 +67,15 @@ ratis-test ratis-examples - ratis-replicated-map ratis-metrics-api ratis-metrics-default ratis-metrics-dropwizard3 ratis-tools ratis-shell ratis-assembly + ratis-bom - - - apache.snapshots - https://repository.apache.org/snapshots/ - - - scm:git:git://git.apache.org/ratis.git scm:git:https://git-wip-us.apache.org/repos/asf/ratis.git @@ -162,9 +140,6 @@ - 2023-11-19T14:23:30Z - UTF-8 - UTF-8 ${project.build.directory}/maven-shared-archive-resources/META-INF/LICENSE @@ -172,57 +147,54 @@ false - 5.1.8 - 3.3.0 - 4.0.6 - 1.6.1 - 3.0.0 + 6.0.2 3.5.3 - + 3.4.3 + 5.5.0.6356 - 3.3.0 - 3.1.0 - 1.6.1 - 2.4.0 - 2.2.0 + 3.6.1 + 3.6.3 + 1.12.0 + 3.0.0 + 2.7.1 0.6.1 - 1.0 + 2.9.1 - 4.2.1 - 4.2.0 + 4.8.6 + 4.8.6.8 - apache.snapshots.https - Apache Development Snapshot Repository - https://repository.apache.org/content/repositories/snapshots - apache.staging.https - Apache Release Distribution Repository - https://repository.apache.org/service/local/staging/deploy/maven2 + ${distMgmtReleasesId} + ${distMgmtReleasesName} + ${distMgmtReleasesUrl} bash - - 1.8 - ${javac.version} - 3.3.9 + + ${javaVersion} - - 1.0.5 + + 1.0.11 - 3.24.4 - 1.58.0 + 3.25.8 + 1.77.1 + + 1.60.1 + 1.40.0 true _ 4 - 2.0.7 - 5.10.1 - 0.8.11 + 2.0.17 + 5.14.3 + 4.11.0 + 0.8.14 + flaky | org.apache.ratis.test.tag.FlakyTest @@ -348,19 +320,6 @@ ${project.version} - - ratis-replicated-map - org.apache.ratis - ${project.version} - - - ratis-replicated-map - org.apache.ratis - ${project.version} - test-jar - test - - ratis-server-api org.apache.ratis @@ -416,31 +375,47 @@ test ${slf4j.version} - - - junit - junit - 4.13.2 - org.junit junit-bom - ${junit.jupiter.version} + ${junit-bom.version} pom import org.mockito mockito-core - 4.3.1 + ${mockito.version} + + + + + io.opentelemetry + opentelemetry-api + ${opentelemetry.version} + + + io.opentelemetry + opentelemetry-sdk + ${opentelemetry.version} + + + io.opentelemetry + opentelemetry-sdk-testing + ${opentelemetry.version} + + + io.opentelemetry.semconv + opentelemetry-semconv + ${opentelemetry-semconv.version} - org.apache.tomcat - annotations-api - 6.0.53 - provided + io.opentelemetry + opentelemetry-context + ${opentelemetry.version} + @@ -448,12 +423,17 @@ kr.motd.maven os-maven-plugin - 1.5.0.Final + 1.7.1 + + org.apache.hadoop + hadoop-maven-plugins + ${hadoop-maven-plugins.version} + org.xolstice.maven.plugins protobuf-maven-plugin @@ -469,22 +449,15 @@ license-maven-plugin ${license-maven-plugin.version} - - com.coderplus.maven.plugins - copy-rename-maven-plugin - ${copy-rename-maven-plugin.version} - - - org.apache.maven.plugins - maven-install-plugin - org.apache.maven.plugins maven-javadoc-plugin 8 + -J-Duser.language=en + -J-Duser.country=US -Xmaxwarns 10000 -Xdoclint:-missing @@ -512,30 +485,15 @@ exec-maven-plugin ${exec-maven-plugin.version} - - org.apache.maven.plugins - maven-pdf-plugin - ${maven-pdf-plugin.version} - org.apache.maven.plugins maven-enforcer-plugin - - - - [${maven.min.version},) - - - [${java.min.version},) - - - de.skuzzle.enforcer restrict-imports-enforcer-rule - ${restrict-imports-enforcer-rules.version} + ${restrict-imports-enforcer-rule.version} @@ -573,8 +531,6 @@ org.apache.maven.plugins maven-compiler-plugin - ${javac.version} - ${javac.version} true 512m 2048m @@ -585,21 +541,6 @@ - - - org.apache.maven.plugins - maven-source-plugin - - - attach-sources - prepare-package - - jar-no-fork - - - - @@ -628,15 +569,15 @@ org.apache.maven.plugins maven-surefire-plugin - ${maven-surefire-plugin.version} false false false false + all 600 - -Xmx2048m -XX:+HeapDumpOnOutOfMemoryError @{argLine} + -Xmx2g -XX:+HeapDumpOnOutOfMemoryError @{argLine} ${project.build.directory}/log ${project.build.directory}/tmp @@ -650,12 +591,6 @@ **/Test*$*.java ${test.exclude.pattern} - - - listener - org.apache.ratis.JUnitRunListener - - @@ -676,10 +611,6 @@ - - org.apache.maven.plugins - maven-deploy-plugin - org.apache.rat apache-rat-plugin @@ -692,10 +623,6 @@ - - org.apache.maven.plugins - maven-antrun-plugin - org.apache.maven.plugins maven-site-plugin @@ -707,11 +634,6 @@ - - com.atlassian.maven.plugins - maven-clover2-plugin - ${maven-clover2-plugin.version} - org.apache.felix maven-bundle-plugin @@ -723,6 +645,16 @@ jacoco-maven-plugin ${jacoco.version} + + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx.version} + + + org.sonarsource.scanner.maven + sonar-maven-plugin + ${sonar-maven-plugin.version} + @@ -732,91 +664,44 @@ spotbugs-maven-plugin - org.apache.maven.plugins - maven-enforcer-plugin - false + org.apache.felix + maven-bundle-plugin + true + true + + + org.apache.hadoop + hadoop-maven-plugins - clean - - enforce - - pre-clean - - - default + version-info + generate-resources - enforce + version-info - validate - - - - [${maven.min.version},) - Maven is out of date. - Ratis requires at least version ${maven.min.version} of Maven to properly build from source. - You appear to be using an older version. You can use either "mvn -version" or - "mvn enforcer:display-info" to verify what version is active. - - - - - [${java.min.version},) - Java is out of date. - Ratis requires at least version ${java.min.version} of the JDK to properly build from source. - You appear to be using an older version. You can use either "mvn -version" or - "mvn enforcer:display-info" to verify what version is active. - - - + + ${project.basedir} + + */src/main/java/**/*.java + */src/main/proto/*.proto + + - - org.apache.felix - maven-bundle-plugin - true - true - org.apache.maven.plugins maven-checkstyle-plugin - ${maven-checkstyle-plugin.version} + + dev-support/checkstyle.xml true false - - org.apache.maven.plugins - maven-pdf-plugin - - ${project.reporting.outputDirectory} - - false - - - - org.codehaus.mojo - buildnumber-maven-plugin - 1.4 - - - generate-resources - - create-metadata - - - target/classes - ratis-version.properties - Unknown - - - - org.codehaus.mojo build-helper-maven-plugin @@ -861,7 +746,25 @@ + + org.owasp + dependency-check-maven + 12.2.0 + + + + ${project.basedir}/src/main/resources + false + + + ${project.basedir}/../src/main/resources + + ratis-version.properties + + true + + @@ -931,9 +834,6 @@ jar - - ${project.build.directory} - @@ -943,7 +843,7 @@ - ratis-java-sources + attach-sources package jar-no-fork @@ -990,13 +890,11 @@ - ${java.min.version} + ${maven.compiler.release} Ratis has unsupported dependencies. - Ratis requires that all dependencies be compiled with version ${java.min.version} or earlier + Ratis requires that all dependencies be compiled with version ${maven.compiler.release} or earlier of the JDK to properly build from source. You appear to be using a newer dependency. You can use either "mvn -version" or "mvn enforcer:display-info" to verify what version is active. - Non-release builds can temporarily build with a newer JDK version by setting the - 'javac.source' property (eg. mvn -Djavac.source=1.8 clean package). @@ -1052,6 +950,18 @@ + + org.cyclonedx + cyclonedx-maven-plugin + + + package + + makeBom + + + + @@ -1067,6 +977,7 @@ org.apache.ratis.grpc.** + ${flaky-test-groups} @@ -1084,6 +995,7 @@ org.apache.ratis.datastream.** org.apache.ratis.server.** + ${flaky-test-groups} @@ -1102,6 +1014,21 @@ org.apache.ratis.grpc.** org.apache.ratis.server.** + ${flaky-test-groups} + + + + + + + flaky-tests + + + + org.apache.maven.plugins + maven-surefire-plugin + + ${flaky-test-groups} diff --git a/ratis-assembly/pom.xml b/ratis-assembly/pom.xml index 98826e142a..0558693039 100644 --- a/ratis-assembly/pom.xml +++ b/ratis-assembly/pom.xml @@ -17,7 +17,7 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-assembly @@ -33,6 +33,8 @@ ${project.build.directory}/test-classes ${project.build.directory}/test-classes true + + true @@ -120,8 +122,6 @@ org.apache.maven.plugins maven-assembly-plugin - - apache-ratis-${project.version} false true gnu @@ -137,25 +137,18 @@ src/main/assembly/src.xml - apache-ratis-${project.version}-src - false - default-cli + bin package single - src/main/assembly/examples-bin.xml - src/main/assembly/shell-bin.xml src/main/assembly/bin.xml - src/main/assembly/bin-pkg.xml - apache-ratis-${project.version}-bin - false @@ -280,16 +273,6 @@ test-jar - - ratis-replicated-map - org.apache.ratis - - - ratis-replicated-map - org.apache.ratis - test-jar - - org.apache.ratis ratis-metrics-api diff --git a/ratis-assembly/src/main/assembly/bin-pkg.xml b/ratis-assembly/src/main/assembly/bin-pkg.xml deleted file mode 100644 index 4d89869578..0000000000 --- a/ratis-assembly/src/main/assembly/bin-pkg.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - bin-pkg - - tar.gz - - - - ${project.basedir}/target/apache-ratis-${project.version}-bin/apache-ratis-${project.version}-bin - .. - - bin/ratis - libexec/*.sh - examples/bin/*.sh - - - - ${project.basedir}/target/apache-ratis-${project.version}-bin/apache-ratis-${project.version}-bin - .. - - bin/ratis - libexec/*.sh - examples/bin/*.sh - - 0755 - - - diff --git a/ratis-assembly/src/main/assembly/bin.xml b/ratis-assembly/src/main/assembly/bin.xml index 7be3d01d61..12161b00a8 100644 --- a/ratis-assembly/src/main/assembly/bin.xml +++ b/ratis-assembly/src/main/assembly/bin.xml @@ -22,9 +22,18 @@ */ --> bin + apache-ratis-${project.version}-bin - dir + tar.gz + + + + org.apache.ratis:ratis-examples + + examples/lib + + true @@ -35,13 +44,13 @@ org.apache.ratis:ratis-grpc org.apache.ratis:ratis-netty org.apache.ratis:ratis-proto - org.apache.ratis:ratis-replicated-map org.apache.ratis:ratis-server-api org.apache.ratis:ratis-server org.apache.ratis:ratis-test org.apache.ratis:ratis-metrics-api org.apache.ratis:ratis-metrics-default org.apache.ratis:ratis-metrics-dropwizard3 + org.apache.ratis:ratis-shell org.apache.ratis:ratis-tools org.apache.ratis:ratis-resource-bundle @@ -63,6 +72,16 @@ 0644 + + + ${project.basedir}/../target + . + + bom.json + bom.xml + + 0644 + ${project.basedir}/../ratis-docs/target/classes/docs @@ -70,5 +89,50 @@ 0644 0755 + + ${project.basedir}/../ratis-shell/src/main/bin + bin + 0755 + + + ${project.basedir}/../ratis-shell/src/main/libexec + libexec + 0755 + 0755 + + + ${project.basedir}/../ratis-shell/src/main/conf + conf + 644 + + + ${project.basedir}/../ratis-shell/target/lib/ + jars + + + ${project.basedir}/../ratis-examples + examples + + README.md + + 0644 + + + ${project.basedir}/../ratis-examples/src/main/bin + examples/bin + + *.* + + 0755 + + + ${project.basedir}/../ratis-examples/src/main/resources + examples/conf + + conf.properties + log4j.properties + + 644 + diff --git a/ratis-assembly/src/main/assembly/examples-bin.xml b/ratis-assembly/src/main/assembly/examples-bin.xml deleted file mode 100644 index 21cc7eced2..0000000000 --- a/ratis-assembly/src/main/assembly/examples-bin.xml +++ /dev/null @@ -1,73 +0,0 @@ - - - - examples-bin - - dir - - - - - org.apache.ratis:ratis-examples - - examples/lib - - - - - ${project.basedir}/src/main/resources - . - - README.md - LICENSE - NOTICE - - 0644 - - - ${project.basedir}/../ratis-examples - examples - - README.md - - 0644 - - - ${project.basedir}/../ratis-examples/src/main/bin - examples/bin - - *.* - - 0755 - - - ${project.basedir}/../ratis-examples/src/main/resources - examples/conf - - conf.properties - log4j.properties - - 644 - - - diff --git a/ratis-assembly/src/main/assembly/shell-bin.xml b/ratis-assembly/src/main/assembly/shell-bin.xml deleted file mode 100644 index 470870f41c..0000000000 --- a/ratis-assembly/src/main/assembly/shell-bin.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - shell - - dir - - - - ${project.basedir}/../ratis-shell/target/ - jars - - ratis-shell-${project.version}.jar - - - - ${project.basedir}/../ratis-shell/target/lib/ - jars - - - ${project.basedir}/src/main/resources - . - - README.md - LICENSE - NOTICE - - 0644 - - - ${project.basedir}/../ratis-shell/src/main/bin - bin - 0755 - - - ${project.basedir}/../ratis-shell/src/main/libexec - libexec - 0755 - 0755 - - - ${project.basedir}/../ratis-shell/src/main/conf - conf - 644 - - - diff --git a/ratis-assembly/src/main/assembly/src.xml b/ratis-assembly/src/main/assembly/src.xml index 98e06c5739..e20770d54d 100644 --- a/ratis-assembly/src/main/assembly/src.xml +++ b/ratis-assembly/src/main/assembly/src.xml @@ -22,6 +22,7 @@ */ --> src + apache-ratis-${project.version}-src tar.gz @@ -30,6 +31,7 @@ true org.apache.ratis:ratis-assembly + org.apache.ratis:ratis-bom org.apache.ratis:ratis-client org.apache.ratis:ratis-common org.apache.ratis:ratis-examples @@ -37,7 +39,6 @@ org.apache.ratis:ratis-netty org.apache.ratis:ratis-proto org.apache.ratis:ratis-docs - org.apache.ratis:ratis-replicated-map org.apache.ratis:ratis-server-api org.apache.ratis:ratis-server org.apache.ratis:ratis-shell @@ -102,6 +103,7 @@ README.md mvnw.cmd pom.xml + src/** start-build-env.sh 0644 diff --git a/ratis-bom/pom.xml b/ratis-bom/pom.xml new file mode 100644 index 0000000000..3a046d9ff3 --- /dev/null +++ b/ratis-bom/pom.xml @@ -0,0 +1,153 @@ + + + + 4.0.0 + + + org.apache + apache + 37 + + + + org.apache.ratis + ratis-bom + 3.3.0-SNAPSHOT + Apache Ratis BOM + Apache Ratis Bill of Materials (BOM) + + pom + + + + ${distMgmtStagingId} + ${distMgmtStagingName} + ${distMgmtStagingUrl} + + + ${distMgmtSnapshotsId} + ${distMgmtSnapshotsName} + ${distMgmtSnapshotsUrl} + + + + + ${distMgmtReleasesId} + ${distMgmtReleasesName} + ${distMgmtReleasesUrl} + + 1.0.11 + + true + + + + + + org.apache.ratis + ratis-client + ${project.version} + + + org.apache.ratis + ratis-common + ${project.version} + + + org.apache.ratis + ratis-docs + ${project.version} + + + org.apache.ratis + ratis-examples + ${project.version} + + + org.apache.ratis + ratis-experiments + ${project.version} + + + org.apache.ratis + ratis-grpc + ${project.version} + + + org.apache.ratis + ratis-metrics-api + ${project.version} + + + org.apache.ratis + ratis-metrics-default + ${project.version} + + + org.apache.ratis + ratis-metrics-dropwizard3 + ${project.version} + + + org.apache.ratis + ratis-netty + ${project.version} + + + org.apache.ratis + ratis-proto + ${project.version} + + + org.apache.ratis + ratis-server + ${project.version} + + + org.apache.ratis + ratis-server-api + ${project.version} + + + org.apache.ratis + ratis-shell + ${project.version} + + + org.apache.ratis + ratis-thirdparty-misc + ${ratis.thirdparty.version} + + + org.apache.ratis + ratis-tools + ${project.version} + + + + + + + + com.github.spotbugs + spotbugs-maven-plugin + 4.8.6.8 + + true + + + + + diff --git a/ratis-client/dev-support/findbugsExcludeFile.xml b/ratis-client/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..3a808c4486 --- /dev/null +++ b/ratis-client/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ratis-client/pom.xml b/ratis-client/pom.xml index d1b08d5a6a..26b2034983 100644 --- a/ratis-client/pom.xml +++ b/ratis-client/pom.xml @@ -17,12 +17,17 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-client Apache Ratis Client + + + true + + org.apache.ratis @@ -42,5 +47,27 @@ org.slf4j slf4j-api + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test + + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-client/src/main/java/org/apache/ratis/client/RaftClientConfigKeys.java b/ratis-client/src/main/java/org/apache/ratis/client/RaftClientConfigKeys.java index 7360a9cadb..925324c21c 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/RaftClientConfigKeys.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/RaftClientConfigKeys.java @@ -42,7 +42,7 @@ interface Rpc { String PREFIX = RaftClientConfigKeys.PREFIX + ".rpc"; String REQUEST_TIMEOUT_KEY = PREFIX + ".request.timeout"; - TimeDuration REQUEST_TIMEOUT_DEFAULT = TimeDuration.valueOf(3000, TimeUnit.MILLISECONDS); + TimeDuration REQUEST_TIMEOUT_DEFAULT = TimeDuration.valueOf(3, TimeUnit.SECONDS); static TimeDuration requestTimeout(RaftProperties properties) { return getTimeDuration(properties.getTimeDuration(REQUEST_TIMEOUT_DEFAULT.getUnit()), REQUEST_TIMEOUT_KEY, REQUEST_TIMEOUT_DEFAULT, getDefaultLog()); @@ -52,8 +52,7 @@ static void setRequestTimeout(RaftProperties properties, TimeDuration timeoutDur } String WATCH_REQUEST_TIMEOUT_KEY = PREFIX + ".watch.request.timeout"; - TimeDuration WATCH_REQUEST_TIMEOUT_DEFAULT = - TimeDuration.valueOf(10000, TimeUnit.MILLISECONDS); + TimeDuration WATCH_REQUEST_TIMEOUT_DEFAULT = TimeDuration.valueOf(10, TimeUnit.SECONDS); static TimeDuration watchRequestTimeout(RaftProperties properties) { return getTimeDuration(properties.getTimeDuration(WATCH_REQUEST_TIMEOUT_DEFAULT.getUnit()), WATCH_REQUEST_TIMEOUT_KEY, WATCH_REQUEST_TIMEOUT_DEFAULT, getDefaultLog()); @@ -125,7 +124,7 @@ static void setFlushRequestBytesMin(RaftProperties properties, SizeInBytes flush } String REQUEST_TIMEOUT_KEY = PREFIX + ".request.timeout"; - TimeDuration REQUEST_TIMEOUT_DEFAULT = TimeDuration.valueOf(10000, TimeUnit.MILLISECONDS); + TimeDuration REQUEST_TIMEOUT_DEFAULT = TimeDuration.valueOf(10, TimeUnit.SECONDS); static TimeDuration requestTimeout(RaftProperties properties) { return getTimeDuration(properties.getTimeDuration(REQUEST_TIMEOUT_DEFAULT.getUnit()), REQUEST_TIMEOUT_KEY, REQUEST_TIMEOUT_DEFAULT, getDefaultLog()); diff --git a/ratis-client/src/main/java/org/apache/ratis/client/api/SnapshotManagementApi.java b/ratis-client/src/main/java/org/apache/ratis/client/api/SnapshotManagementApi.java index edd0475442..359763fe90 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/api/SnapshotManagementApi.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/api/SnapshotManagementApi.java @@ -27,6 +27,24 @@ */ public interface SnapshotManagementApi { - /** trigger create snapshot file. */ - RaftClientReply create(long timeoutMs) throws IOException; + /** The same as create(0, timeoutMs). */ + default RaftClientReply create(long timeoutMs) throws IOException { + return create(0, timeoutMs); + } + + /** The same as create(force? 1 : 0, timeoutMs). */ + default RaftClientReply create(boolean force, long timeoutMs) throws IOException { + return create(force? 1 : 0, timeoutMs); + } + + /** + * Trigger to create a snapshot. + * + * @param creationGap When (creationGap > 0) and (lastAppliedIndex - lastSnapshotIndex < creationGap), + * return lastSnapshotIndex; otherwise, take a new snapshot and then return its index. + * When creationGap == 0, use the server configured value as the creationGap. + * @return a reply. When {@link RaftClientReply#isSuccess()} is true, + * {@link RaftClientReply#getLogIndex()} is the snapshot index fulfilling the operation. + */ + RaftClientReply create(long creationGap, long timeoutMs) throws IOException; } diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/AsyncImpl.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/AsyncImpl.java index 8547ce2665..01329fa7d6 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/AsyncImpl.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/AsyncImpl.java @@ -27,6 +27,7 @@ import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.trace.TraceClient; /** Async api implementations. */ class AsyncImpl implements AsyncRpcApi { @@ -38,7 +39,8 @@ class AsyncImpl implements AsyncRpcApi { CompletableFuture send( RaftClientRequest.Type type, Message message, RaftPeerId server) { - return client.getOrderedAsync().send(type, message, server); + return TraceClient.asyncSend( + () -> client.getOrderedAsync(server).send(type, message, server), type, server); } @Override diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java index db19831955..d2146a521f 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java @@ -128,6 +128,7 @@ static RaftRpcRequestProto.Builder toRaftRpcRequestProtoBuilder(RaftClientReques Optional.ofNullable(request.getSlidingWindowEntry()).ifPresent(b::setSlidingWindowEntry); Optional.ofNullable(request.getRoutingTable()).map(RoutingTable::toProto).ifPresent(b::setRoutingTable); + Optional.ofNullable(request.getSpanContext()).ifPresent(b::setSpanContext); return b.setCallId(request.getCallId()) .setToLeader(request.isToLeader()) @@ -179,15 +180,18 @@ static RaftClientRequest toRaftClientRequest(RaftClientRequestProto p) { final RaftClientRequest.Builder b = RaftClientRequest.newBuilder(); - final RaftPeerId perrId = RaftPeerId.valueOf(request.getReplyId()); + final RaftPeerId peerId = RaftPeerId.valueOf(request.getReplyId()); if (request.getToLeader()) { - b.setLeaderId(perrId); + b.setLeaderId(peerId); } else { - b.setServerId(perrId); + b.setServerId(peerId); } if (request.hasSlidingWindowEntry()) { b.setSlidingWindowEntry(request.getSlidingWindowEntry()); } + if (request.hasSpanContext()) { + b.setSpanContext(request.getSpanContext()); + } return b.setClientId(ClientId.valueOf(request.getRequestorId())) .setGroupId(ProtoUtils.toRaftGroupId(request.getRaftGroupId())) .setCallId(request.getCallId()) @@ -204,9 +208,13 @@ static ByteBuffer toRaftClientRequestProtoByteBuffer(RaftClientRequest request) } static RaftClientRequestProto toRaftClientRequestProto(RaftClientRequest request) { + return toRaftClientRequestProto(request, true); + } + + static RaftClientRequestProto toRaftClientRequestProto(RaftClientRequest request, boolean withMsg) { final RaftClientRequestProto.Builder b = RaftClientRequestProto.newBuilder() .setRpcRequest(toRaftRpcRequestProtoBuilder(request)); - if (request.getMessage() != null) { + if (withMsg && request.getMessage() != null) { b.setMessage(toClientMessageEntryProtoBuilder(request.getMessage())); } @@ -364,6 +372,7 @@ static GroupInfoReplyProto toGroupInfoReplyProto(GroupInfoReply reply) { b.setIsRaftStorageHealthy(reply.isRaftStorageHealthy()); b.setRole(reply.getRoleInfoProto()); b.addAllCommitInfos(reply.getCommitInfos()); + b.setLogInfo(reply.getLogInfoProto()); } } return b.build(); @@ -397,7 +406,8 @@ static RaftClientReply toRaftClientReply(RaftClientReplyProto replyProto) { e = new NotLeaderException(serverMemberId, suggestedLeader, peers); } else if (replyProto.getExceptionDetailsCase() == NOTREPLICATEDEXCEPTION) { final NotReplicatedExceptionProto nre = replyProto.getNotReplicatedException(); - e = new NotReplicatedException(nre.getCallId(), nre.getReplication(), nre.getLogIndex()); + e = new NotReplicatedException(nre.getCallId(), nre.getReplication(), nre.getLogIndex(), + replyProto.getCommitInfosList()); } else if (replyProto.getExceptionDetailsCase().equals(STATEMACHINEEXCEPTION)) { e = toStateMachineException(serverMemberId, replyProto.getStateMachineException()); } else if (replyProto.getExceptionDetailsCase().equals(DATASTREAMEXCEPTION)) { @@ -506,7 +516,8 @@ static GroupInfoReply toGroupInfoReply(GroupInfoReplyProto replyProto) { ProtoUtils.toRaftGroup(replyProto.getGroup()), replyProto.getRole(), replyProto.getIsRaftStorageHealthy(), - replyProto.hasConf()? replyProto.getConf(): null); + replyProto.hasConf()? replyProto.getConf(): null, + replyProto.getLogInfo()); } static Message toMessage(final ClientMessageEntryProto p) { @@ -657,7 +668,8 @@ static SnapshotManagementRequest toSnapshotManagementRequest(SnapshotManagementR switch(p.getOpCase()) { case CREATE: return SnapshotManagementRequest.newCreate(clientId, serverId, - ProtoUtils.toRaftGroupId(m.getRaftGroupId()), m.getCallId(), m.getTimeoutMs()); + ProtoUtils.toRaftGroupId(m.getRaftGroupId()), m.getCallId(), m.getTimeoutMs(), + p.getCreate().getCreationGap()); default: throw new IllegalArgumentException("Unexpected op " + p.getOpCase() + " in " + p); } @@ -669,7 +681,7 @@ static SnapshotManagementRequestProto toSnapshotManagementRequestProto( .setRpcRequest(toRaftRpcRequestProtoBuilder(request)); final SnapshotManagementRequest.Create create = request.getCreate(); if (create != null) { - b.setCreate(SnapshotCreateRequestProto.newBuilder().build()); + b.setCreate(SnapshotCreateRequestProto.newBuilder().setCreationGap(create.getCreationGap()).build()); } return b.build(); } diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java index 26d01c356f..313131cbda 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java @@ -40,6 +40,7 @@ import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.exceptions.AlreadyClosedException; import org.apache.ratis.rpc.CallId; +import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.util.IOUtils; import org.apache.ratis.protocol.*; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; @@ -152,6 +153,9 @@ private CompletableFuture combineHeader(CompletableFuture writeAsyncImpl(Object data, long length, Iterable options) { if (isClosed()) { + if (data instanceof ByteBuf) { + ((ByteBuf) data).release(); + } return JavaUtils.completeExceptionally(new AlreadyClosedException( clientId + ": stream already closed, request=" + header)); } @@ -169,6 +173,10 @@ private CompletableFuture writeAsyncImpl(Object data, long leng return f; } + public CompletableFuture writeAsync(ByteBuf src, Iterable options) { + return writeAsyncImpl(src, src.readableBytes(), options); + } + @Override public CompletableFuture writeAsync(ByteBuffer src, Iterable options) { return writeAsyncImpl(src, src.remaining(), options); @@ -235,7 +243,7 @@ public DataStreamClientRpc getClientRpc() { } @Override - public DataStreamOutputRpc stream(RaftClientRequest request) { + public DataStreamOutputImpl stream(RaftClientRequest request) { return new DataStreamOutputImpl(request); } diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/OrderedAsync.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/OrderedAsync.java index 09c6cd4ac9..50e8093a13 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/OrderedAsync.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/OrderedAsync.java @@ -59,6 +59,8 @@ public final class OrderedAsync { public static final Logger LOG = LoggerFactory.getLogger(OrderedAsync.class); + public static final Message DUMMY = Message.valueOf("DUMMY"); + private enum BatchLogKey implements BatchLogger.Key { SEND_REQUEST_EXCEPTION } @@ -117,12 +119,12 @@ public String toString() { } } - static OrderedAsync newInstance(RaftClientImpl client, RaftProperties properties) { + static OrderedAsync newInstance(RaftClientImpl client, RaftPeerId server, RaftProperties properties) { final OrderedAsync ordered = new OrderedAsync(client, properties); // send a dummy watch request to establish the connection // TODO: this is a work around, it is better to fix the underlying RPC implementation if (RaftClientConfigKeys.Async.Experimental.sendDummyRequest(properties)) { - ordered.send(RaftClientRequest.watchRequestType(), null, null); + ordered.send(RaftClientRequest.watchRequestType(), DUMMY, server); } return ordered; } @@ -176,9 +178,9 @@ CompletableFuture send(RaftClientRequest.Type type, Message mes ).whenComplete((r, e) -> { if (e != null) { if (e.getCause() instanceof AlreadyClosedException) { - LOG.error("Failed to send request, message=" + message + " due to " + e); + LOG.error("Failed to send request, message={} due to {}", message, e.toString()); } else { - LOG.error("Failed to send request, message=" + message, e); + LOG.error("Failed to send request, message={}", message, e); } } requestSemaphore.release(); @@ -199,7 +201,7 @@ private void sendRequestWithRetry(PendingOrderedRequest pending) { return; } - if (getSlidingWindow((RaftPeerId) null).isFirst(pending.getSeqNum())) { + if (getSlidingWindow(request).isFirst(pending.getSeqNum())) { pending.setFirstRequest(); } LOG.debug("{}: send* {}", client.getId(), request); @@ -213,12 +215,31 @@ private void sendRequestWithRetry(PendingOrderedRequest pending) { final Throwable exception = e; final String key = client.getId() + "-" + request.getCallId() + "-" + exception; final Consumer op = suffix -> LOG.error("{} {}: Failed* {}", suffix, client.getId(), request, exception); - BatchLogger.warn(BatchLogKey.SEND_REQUEST_EXCEPTION, key, op); + BatchLogger.print(BatchLogKey.SEND_REQUEST_EXCEPTION, key, op); handleException(pending, request, e); return null; }); } + private void logError(String prefix, RaftClientRequest request, Throwable e) { + final Class[] knownExceptionClasses = {AlreadyClosedException.class, NotLeaderException.class}; + for(Class known : knownExceptionClasses) { + if (logError(prefix, request, e, known)) { + return; + } + } + LOG.error("{} {}: Failed* {}", prefix, client.getId(), request, e); + } + + private boolean logError(String prefix, RaftClientRequest request, Throwable e, Class cause) { + if (JavaUtils.unwrapCompletionException(e).getClass().isAssignableFrom(cause)) { + LOG.error("{} {}: Failed* {} due to {} caused by {}", + prefix, client.getId(), request, e, cause.getSimpleName()); + return true; + } + return false; + } + private void handleException(PendingOrderedRequest pending, RaftClientRequest request, Throwable e) { final RetryPolicy retryPolicy = client.getRetryPolicy(); if (client.isClosed()) { diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/OrderedStreamAsync.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/OrderedStreamAsync.java index 989c00cbbc..275755514f 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/OrderedStreamAsync.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/OrderedStreamAsync.java @@ -21,12 +21,14 @@ import org.apache.ratis.client.RaftClientConfigKeys; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamPacketByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; import org.apache.ratis.datastream.impl.DataStreamRequestFilePositionCount; import org.apache.ratis.io.FilePositionCount; import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.DataStreamRequest; import org.apache.ratis.protocol.DataStreamRequestHeader; +import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.util.IOUtils; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.SlidingWindow; @@ -56,6 +58,8 @@ static class DataStreamWindowRequest implements SlidingWindow.ClientSideRequest< DataStreamRequest getDataStreamRequest() { if (header.getDataLength() == 0) { return new DataStreamRequestByteBuffer(header, DataStreamPacketByteBuffer.EMPTY_BYTE_BUFFER); + } else if (data instanceof ByteBuf) { + return new DataStreamRequestByteBuf(header, (ByteBuf)data); } else if (data instanceof ByteBuffer) { return new DataStreamRequestByteBuffer(header, (ByteBuffer)data); } else if (data instanceof FilePositionCount) { diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/RaftClientImpl.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/RaftClientImpl.java index 1b82709daf..f24360f62b 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/RaftClientImpl.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/RaftClientImpl.java @@ -43,9 +43,11 @@ import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.thirdparty.com.google.common.cache.Cache; import org.apache.ratis.thirdparty.com.google.common.cache.CacheBuilder; +import org.apache.ratis.trace.TraceUtils; import org.apache.ratis.util.CollectionUtils; import org.apache.ratis.util.IOUtils; import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.MemoizedFunction; import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.TimeDuration; @@ -172,13 +174,14 @@ private synchronized Set getAndReset() { private final RaftGroupId groupId; private final RetryPolicy retryPolicy; + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile RaftPeerId leaderId; /** The callIds of the replied requests. */ private final RepliedCallIds repliedCallIds; private final TimeoutExecutor scheduler = TimeoutExecutor.getInstance(); - private final Supplier orderedAsync; + private final MemoizedFunction orderedAsync; private final Supplier asyncApi; private final Supplier blockingApi; private final Supplier messageStreamApi; @@ -207,7 +210,7 @@ private synchronized Set getAndReset() { clientRpc.addRaftPeers(group.getPeers()); this.clientRpc = clientRpc; - this.orderedAsync = JavaUtils.memoize(() -> OrderedAsync.newInstance(this, properties)); + this.orderedAsync = MemoizedFunction.valueOf(server -> OrderedAsync.newInstance(this, server, properties)); this.messageStreamApi = JavaUtils.memoize(() -> MessageStreamImpl.newInstance(this, properties)); this.asyncApi = JavaUtils.memoize(() -> new AsyncImpl(this)); this.blockingApi = JavaUtils.memoize(() -> new BlockingImpl(this)); @@ -217,6 +220,7 @@ private synchronized Set getAndReset() { .setParameters(parameters) .build()); this.adminApi = JavaUtils.memoize(() -> new AdminImpl(this)); + TraceUtils.setTracerWhenEnabled(properties); } @Override @@ -274,8 +278,8 @@ TimeoutExecutor getScheduler() { return scheduler; } - OrderedAsync getOrderedAsync() { - return orderedAsync.get(); + OrderedAsync getOrderedAsync(RaftPeerId server) { + return orderedAsync.apply(server); } RaftClientRequest newRaftClientRequest( diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/SnapshotManagementImpl.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/SnapshotManagementImpl.java index 1762dc0e49..65c54d0f21 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/SnapshotManagementImpl.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/SnapshotManagementImpl.java @@ -37,9 +37,10 @@ class SnapshotManagementImpl implements SnapshotManagementApi { } @Override - public RaftClientReply create(long timeoutMs) throws IOException { + public RaftClientReply create(long creationGap, long timeoutMs) throws IOException { final long callId = CallId.getAndIncrement(); return client.io().sendRequestWithRetry(() -> SnapshotManagementRequest.newCreate(client.getId(), - Optional.ofNullable(server).orElseGet(client::getLeaderId), client.getGroupId(), callId, timeoutMs)); + Optional.ofNullable(server).orElseGet(client::getLeaderId), + client.getGroupId(), callId, timeoutMs, creationGap)); } } diff --git a/ratis-common/dev-support/findbugsExcludeFile.xml b/ratis-common/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..882f08b7fa --- /dev/null +++ b/ratis-common/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ratis-common/pom.xml b/ratis-common/pom.xml index 9205e81c2c..ba19c73e33 100644 --- a/ratis-common/pom.xml +++ b/ratis-common/pom.xml @@ -17,7 +17,7 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-common @@ -39,9 +39,24 @@ - junit - junit - test + io.opentelemetry + opentelemetry-api + + + io.opentelemetry + opentelemetry-context + + + io.opentelemetry + opentelemetry-sdk + + + io.opentelemetry + opentelemetry-sdk-testing + + + io.opentelemetry.semconv + opentelemetry-semconv @@ -54,11 +69,6 @@ junit-jupiter-engine test - - org.junit.vintage - junit-vintage-engine - test - org.junit.platform junit-platform-launcher @@ -70,4 +80,15 @@ test + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-common/src/main/java/org/apache/ratis/conf/ConfUtils.java b/ratis-common/src/main/java/org/apache/ratis/conf/ConfUtils.java index c1fb9268c9..43706faabc 100644 --- a/ratis-common/src/main/java/org/apache/ratis/conf/ConfUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/conf/ConfUtils.java @@ -18,7 +18,6 @@ package org.apache.ratis.conf; import org.apache.ratis.security.TlsConf; -import org.apache.ratis.thirdparty.com.google.common.base.Objects; import org.apache.ratis.util.NetUtils; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; @@ -33,6 +32,9 @@ import java.net.InetSocketAddress; import java.util.Arrays; import java.util.List; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Consumer; @@ -41,10 +43,24 @@ public interface ConfUtils { Logger LOG = LoggerFactory.getLogger(ConfUtils.class); + class Utils { + private static final ConcurrentMap CACHE = new ConcurrentHashMap<>(); + + private static boolean isNew(String key, T value) { + if (value == null) { + final Object previous = CACHE.remove(key); + return previous != null; + } else { + final Object previous = CACHE.put(key, value); + return !value.equals(previous); + } + } + } + static void logGet(String key, T value, T defaultValue, Consumer logger) { - if (logger != null) { + if (logger != null && Utils.isNew(key, value)) { logger.accept(String.format("%s = %s (%s)", key, value, - Objects.equal(value, defaultValue)? "default": "custom")); + Objects.equals(value, defaultValue)? "default": "custom")); } } diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamRequestByteBuf.java b/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamRequestByteBuf.java similarity index 96% rename from ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamRequestByteBuf.java rename to ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamRequestByteBuf.java index 2542b1ec6f..1873bec9b4 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamRequestByteBuf.java +++ b/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamRequestByteBuf.java @@ -16,9 +16,8 @@ * limitations under the License. */ -package org.apache.ratis.netty.server; +package org.apache.ratis.datastream.impl; -import org.apache.ratis.datastream.impl.DataStreamPacketImpl; import org.apache.ratis.io.WriteOption; import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; import org.apache.ratis.protocol.ClientId; diff --git a/ratis-common/src/main/java/org/apache/ratis/io/MD5Hash.java b/ratis-common/src/main/java/org/apache/ratis/io/MD5Hash.java index e60bef9652..71fd39f34b 100644 --- a/ratis-common/src/main/java/org/apache/ratis/io/MD5Hash.java +++ b/ratis-common/src/main/java/org/apache/ratis/io/MD5Hash.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,155 +18,73 @@ package org.apache.ratis.io; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.io.InputStream; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.Arrays; - -public class MD5Hash { - public static final int MD5_LEN = 16; - - private static final ThreadLocal DIGESTER_FACTORY = - ThreadLocal.withInitial(() -> { - try { - return MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } - }); - - private byte[] digest; - - /** Constructs an MD5Hash. */ - public MD5Hash() { - this.digest = new byte[MD5_LEN]; - } - - /** Constructs an MD5Hash from a hex string. */ - public MD5Hash(String hex) { - setDigest(hex); - } - - /** Constructs an MD5Hash with a specified value. */ - public MD5Hash(byte[] digest) { - if (digest.length != MD5_LEN) { - throw new IllegalArgumentException("Wrong length: " + digest.length); - } - this.digest = digest.clone(); - } - - public void readFields(DataInput in) throws IOException { - in.readFully(digest); - } - - /** Constructs, reads and returns an instance. */ - public static MD5Hash read(DataInput in) throws IOException { - MD5Hash result = new MD5Hash(); - result.readFields(in); - return result; - } - - public void write(DataOutput out) throws IOException { - out.write(digest); - } - - /** Copy the contents of another instance into this instance. */ - public void set(MD5Hash that) { - System.arraycopy(that.digest, 0, this.digest, 0, MD5_LEN); - } - - /** Returns the digest bytes. */ - public byte[] getDigest() { - return digest.clone(); - } - - /** Construct a hash value for a byte array. */ - public static MD5Hash digest(byte[] data) { - return digest(data, 0, data.length); - } +import org.apache.ratis.util.MemoizedSupplier; +import org.apache.ratis.util.Preconditions; - /** - * Create a thread local MD5 digester - */ - public static MessageDigest getDigester() { - MessageDigest digester = DIGESTER_FACTORY.get(); - digester.reset(); - return digester; - } - - /** Construct a hash value for the content from the InputStream. */ - public static MD5Hash digest(InputStream in) throws IOException { - final byte[] buffer = new byte[4*1024]; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Objects; +import java.util.function.Supplier; - final MessageDigest digester = getDigester(); - for(int n; (n = in.read(buffer)) != -1; ) { - digester.update(buffer, 0, n); +/** + * A MD5 hash value. + *

+ * This is a value-based class. + */ +public final class MD5Hash { + public static final int MD5_LENGTH = 16; + + /** @return an instance with the given digest in a (case-insensitive) hexadecimals. */ + public static MD5Hash newInstance(String digestHexadecimals) { + Objects.requireNonNull(digestHexadecimals, "digestHexadecimals == null"); + Preconditions.assertSame(2 * MD5_LENGTH, digestHexadecimals.length(), "digestHexadecimals"); + + final byte[] digest = new byte[MD5_LENGTH]; + for (int i = 0; i < MD5_LENGTH; i++) { + final int j = i << 1; + digest[i] = (byte) (charToNibble(digestHexadecimals, j) << 4 | + charToNibble(digestHexadecimals, j + 1)); } - - return new MD5Hash(digester.digest()); - } - - /** Construct a hash value for a byte array. */ - public static MD5Hash digest(byte[] data, int start, int len) { - byte[] digest; - MessageDigest digester = getDigester(); - digester.update(data, start, len); - digest = digester.digest(); return new MD5Hash(digest); } - /** Construct a hash value for an array of byte array. */ - public static MD5Hash digest(byte[][] dataArr, int start, int len) { - byte[] digest; - MessageDigest digester = getDigester(); - for (byte[] data : dataArr) { - digester.update(data, start, len); - } - digest = digester.digest(); - return new MD5Hash(digest); + /** @return an instance with the given digest. */ + public static MD5Hash newInstance(byte[] digest) { + Objects.requireNonNull(digest, "digest == null"); + Preconditions.assertSame(MD5_LENGTH, digest.length, "digest"); + return new MD5Hash(digest.clone()); } - /** Construct a half-sized version of this MD5. Fits in a long **/ - public long halfDigest() { - long value = 0; - for (int i = 0; i < 8; i++) { - value |= ((digest[i] & 0xffL) << (8*(7-i))); - } - return value; + private final byte[] digest; + private final Supplier digestString; + + private MD5Hash(byte[] digest) { + this.digest = digest; + this.digestString = MemoizedSupplier.valueOf(() -> digestToString(digest)); } - /** - * Return a 32-bit digest of the MD5. - * @return the first 4 bytes of the md5 - */ - public int quarterDigest() { - int value = 0; - for (int i = 0; i < 4; i++) { - value |= ((digest[i] & 0xff) << (8*(3-i))); - } - return value; + /** @return the digest wrapped by a read-only {@link ByteBuffer}. */ + public ByteBuffer getDigest() { + return ByteBuffer.wrap(digest).asReadOnlyBuffer(); } - /** Returns true iff o is an MD5Hash whose digest contains the - * same values. */ @Override - public boolean equals(Object o) { - if (!(o instanceof MD5Hash)) { + public boolean equals(Object object) { + if (this == object) { + return true; + } else if (!(object instanceof MD5Hash)) { return false; } - MD5Hash other = (MD5Hash)o; - return Arrays.equals(this.digest, other.digest); + final MD5Hash that = (MD5Hash) object; + return Arrays.equals(this.digest, that.digest); } - /** Returns a hash code value for this object. - * Only uses the first 4 bytes, since md5s are evenly distributed. - */ @Override public int hashCode() { - return quarterDigest(); + return ((digest[0] & 0xFF) << 24) + | ((digest[1] & 0xFF) << 16) + | ((digest[2] & 0xFF) << 8) + | (digest[3] & 0xFF); } private static final char[] HEX_DIGITS = @@ -175,8 +93,12 @@ public int hashCode() { /** Returns a string representation of this object. */ @Override public String toString() { - StringBuilder buf = new StringBuilder(MD5_LEN*2); - for (int i = 0; i < MD5_LEN; i++) { + return digestString.get(); + } + + static String digestToString(byte[] digest) { + StringBuilder buf = new StringBuilder(MD5_LENGTH *2); + for (int i = 0; i < MD5_LENGTH; i++) { int b = digest[i]; buf.append(HEX_DIGITS[(b >> 4) & 0xf]); buf.append(HEX_DIGITS[b & 0xf]); @@ -184,20 +106,8 @@ public String toString() { return buf.toString(); } - /** Sets the digest value from a hex string. */ - public void setDigest(String hex) { - if (hex.length() != MD5_LEN*2) { - throw new IllegalArgumentException("Wrong length: " + hex.length()); - } - this.digest = new byte[MD5_LEN]; - for (int i = 0; i < MD5_LEN; i++) { - int j = i << 1; - this.digest[i] = (byte)(charToNibble(hex.charAt(j)) << 4 | - charToNibble(hex.charAt(j+1))); - } - } - - private static int charToNibble(char c) { + private static int charToNibble(String hexadecimals, int i) { + final char c = hexadecimals.charAt(i); if (c >= '0' && c <= '9') { return c - '0'; } else if (c >= 'a' && c <= 'f') { @@ -205,7 +115,8 @@ private static int charToNibble(char c) { } else if (c >= 'A' && c <= 'F') { return 0xA + (c - 'A'); } else { - throw new RuntimeException("Not a hex character: " + c); + throw new IllegalArgumentException( + "Found a non-hexadecimal character '" + c + "' at index " + i + " in \"" + hexadecimals + "\""); } } } diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/ClientId.java b/ratis-common/src/main/java/org/apache/ratis/protocol/ClientId.java index 4de615730c..09b77e6e81 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/ClientId.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/ClientId.java @@ -18,6 +18,7 @@ package org.apache.ratis.protocol; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.util.WeakValueCache; import java.util.UUID; @@ -26,13 +27,17 @@ * to correctly identify retry requests from the same client. */ public final class ClientId extends RaftId { - private static final Factory FACTORY = new Factory() { + private static final Factory FACTORY = new Factory(ClientId.class) { @Override ClientId newInstance(UUID uuid) { return new ClientId(uuid); } }; + static WeakValueCache getCache() { + return FACTORY.getCache(); + } + public static ClientId emptyClientId() { return FACTORY.emptyId(); } diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/GroupInfoReply.java b/ratis-common/src/main/java/org/apache/ratis/protocol/GroupInfoReply.java index 632fa65293..bfac81a2b0 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/GroupInfoReply.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/GroupInfoReply.java @@ -19,6 +19,7 @@ import org.apache.ratis.proto.RaftProtos.RaftConfigurationProto; import org.apache.ratis.proto.RaftProtos.CommitInfoProto; +import org.apache.ratis.proto.RaftProtos.LogInfoProto; import org.apache.ratis.proto.RaftProtos.RoleInfoProto; import java.util.Collection; @@ -33,25 +34,27 @@ public class GroupInfoReply extends RaftClientReply { private final RoleInfoProto roleInfoProto; private final boolean isRaftStorageHealthy; private final RaftConfigurationProto conf; + private final LogInfoProto logInfoProto; public GroupInfoReply(RaftClientRequest request, Collection commitInfos, RaftGroup group, RoleInfoProto roleInfoProto, boolean isRaftStorageHealthy, - RaftConfigurationProto conf) { + RaftConfigurationProto conf, LogInfoProto logInfoProto) { this(request.getClientId(), request.getServerId(), request.getRaftGroupId(), request.getCallId(), commitInfos, - group, roleInfoProto, isRaftStorageHealthy, conf); + group, roleInfoProto, isRaftStorageHealthy, conf, logInfoProto); } @SuppressWarnings("parameternumber") public GroupInfoReply(ClientId clientId, RaftPeerId serverId, RaftGroupId groupId, long callId, Collection commitInfos, RaftGroup group, RoleInfoProto roleInfoProto, boolean isRaftStorageHealthy, - RaftConfigurationProto conf) { + RaftConfigurationProto conf, LogInfoProto logInfoProto) { super(clientId, serverId, groupId, callId, true, null, null, 0L, commitInfos); this.group = group; this.roleInfoProto = roleInfoProto; this.isRaftStorageHealthy = isRaftStorageHealthy; this.conf = conf; + this.logInfoProto = logInfoProto; } public RaftGroup getGroup() { @@ -69,4 +72,8 @@ public boolean isRaftStorageHealthy() { public Optional getConf() { return Optional.ofNullable(conf); } + + public LogInfoProto getLogInfoProto() { + return logInfoProto; + } } diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/Message.java b/ratis-common/src/main/java/org/apache/ratis/protocol/Message.java index e7ea97ca4e..55fcd064d2 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/Message.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/Message.java @@ -19,6 +19,7 @@ import org.apache.ratis.thirdparty.com.google.protobuf.AbstractMessage; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat; import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.StringUtils; @@ -47,11 +48,11 @@ public String toString() { } static Message valueOf(AbstractMessage abstractMessage) { - return valueOf(abstractMessage.toByteString(), abstractMessage::toString); + return valueOf(abstractMessage.toByteString(), () -> TextFormat.shortDebugString(abstractMessage)); } static Message valueOf(ByteString bytes) { - return valueOf(bytes, () -> "Message:" + StringUtils.bytes2HexShortString(bytes)); + return valueOf(bytes, () -> "Message:" + StringUtils.bytes2ShortString(bytes)); } static Message valueOf(String string) { diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientAsynchronousProtocol.java b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientAsynchronousProtocol.java index 1985bbe667..428bdaf18d 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientAsynchronousProtocol.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientAsynchronousProtocol.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -46,11 +46,11 @@ default CompletableFuture submitClientRequestAsync( ReferenceCountedObject requestRef) { try { // for backward compatibility - return submitClientRequestAsync(requestRef.retain()) - .whenComplete((r, e) -> requestRef.release()); + return submitClientRequestAsync(requestRef.retain()); } catch (Exception e) { - requestRef.release(); return JavaUtils.completeExceptionally(e); + } finally { + requestRef.release(); } } } \ No newline at end of file diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientMessage.java b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientMessage.java index 8d3104a73d..92ae77ce21 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientMessage.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientMessage.java @@ -18,7 +18,8 @@ package org.apache.ratis.protocol; import org.apache.ratis.util.JavaUtils; -import org.apache.ratis.util.Preconditions; + +import java.util.Objects; public abstract class RaftClientMessage implements RaftRpcMessage { private final ClientId clientId; @@ -27,9 +28,9 @@ public abstract class RaftClientMessage implements RaftRpcMessage { private final long callId; RaftClientMessage(ClientId clientId, RaftPeerId serverId, RaftGroupId groupId, long callId) { - this.clientId = Preconditions.assertNotNull(clientId, "clientId"); - this.serverId = Preconditions.assertNotNull(serverId, "serverId"); - this.groupId = Preconditions.assertNotNull(groupId, "groupId"); + this.clientId = Objects.requireNonNull(clientId, "clientId == null"); + this.serverId = Objects.requireNonNull(serverId, "serverId == null"); + this.groupId = Objects.requireNonNull(groupId, "groupId == null"); this.callId = callId; } diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientRequest.java b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientRequest.java index ed41f1ea2c..85ede62e8c 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientRequest.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftClientRequest.java @@ -24,6 +24,7 @@ import org.apache.ratis.proto.RaftProtos.ReadRequestTypeProto; import org.apache.ratis.proto.RaftProtos.ReplicationLevel; import org.apache.ratis.proto.RaftProtos.SlidingWindowEntry; +import org.apache.ratis.proto.RaftProtos.SpanContextProto; import org.apache.ratis.proto.RaftProtos.StaleReadRequestTypeProto; import org.apache.ratis.proto.RaftProtos.WatchRequestTypeProto; import org.apache.ratis.proto.RaftProtos.WriteRequestTypeProto; @@ -305,6 +306,7 @@ public static class Builder { private SlidingWindowEntry slidingWindowEntry; private RoutingTable routingTable; private long timeoutMs; + private SpanContextProto spanContext; public RaftClientRequest build() { return new RaftClientRequest(this); @@ -366,6 +368,11 @@ public Builder setTimeoutMs(long timeoutMs) { this.timeoutMs = timeoutMs; return this; } + + public Builder setSpanContext(SpanContextProto spanContext) { + this.spanContext = spanContext; + return this; + } } public static Builder newBuilder() { @@ -397,6 +404,8 @@ public static RaftClientRequest toWriteRequest(RaftClientRequest r, Message mess private final boolean toLeader; + private final SpanContextProto spanContext; + /** Construct a request for sending to the given server. */ protected RaftClientRequest(ClientId clientId, RaftPeerId serverId, RaftGroupId groupId, long callId, Type type) { this(newBuilder() @@ -429,6 +438,7 @@ private RaftClientRequest(Builder b) { this.slidingWindowEntry = b.slidingWindowEntry; this.routingTable = b.routingTable; this.timeoutMs = b.timeoutMs; + this.spanContext = b.spanContext; } @Override @@ -472,9 +482,19 @@ public long getTimeoutMs() { return timeoutMs; } + public SpanContextProto getSpanContext() { + return spanContext; + } + @Override public String toString() { - return super.toString() + ", seq=" + ProtoUtils.toString(slidingWindowEntry) + ", " - + type + ", " + getMessage(); + return toStringShort() + ", " + getMessage(); + } + + /** + * @return a short string which does not include {@link #message}. + */ + public String toStringShort() { + return super.toString() + ", seq=" + ProtoUtils.toString(slidingWindowEntry) + ", " + type; } } diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftGroup.java b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftGroup.java index 0612a16f9d..5cf970afc3 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftGroup.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftGroup.java @@ -19,7 +19,13 @@ import org.apache.ratis.util.Preconditions; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + /** * Description of a raft group, which has a unique {@link RaftGroupId} and a collection of {@link RaftPeer}. diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftGroupId.java b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftGroupId.java index 9caedf7574..af40746918 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftGroupId.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftGroupId.java @@ -18,6 +18,7 @@ package org.apache.ratis.protocol; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.util.WeakValueCache; import java.util.UUID; @@ -27,13 +28,17 @@ * This is a value-based class. */ public final class RaftGroupId extends RaftId { - private static final Factory FACTORY = new Factory() { + private static final Factory FACTORY = new Factory(RaftGroupId.class) { @Override RaftGroupId newInstance(UUID uuid) { return new RaftGroupId(uuid); } }; + static WeakValueCache getCache() { + return FACTORY.getCache(); + } + public static RaftGroupId emptyGroupId() { return FACTORY.emptyId(); } diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftId.java b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftId.java index 9c2a83ffa3..d089c7d3cb 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/RaftId.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/RaftId.java @@ -17,17 +17,15 @@ */ package org.apache.ratis.protocol; -import org.apache.ratis.thirdparty.com.google.common.cache.Cache; -import org.apache.ratis.thirdparty.com.google.common.cache.CacheBuilder; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.WeakValueCache; import java.nio.ByteBuffer; import java.util.Objects; import java.util.UUID; -import java.util.concurrent.ExecutionException; import java.util.function.Supplier; /** Unique identifier implemented using {@link UUID}. */ @@ -53,18 +51,20 @@ static ByteString toByteString(UUID uuid) { } abstract static class Factory { - private final Cache cache = CacheBuilder.newBuilder() - .weakValues() - .build(); + private final WeakValueCache cache; + + Factory(Class clazz) { + this.cache = new WeakValueCache<>(clazz.getSimpleName() + "_UUID", this::newInstance); + } abstract ID newInstance(UUID uuid); + WeakValueCache getCache() { + return cache; + } + final ID valueOf(UUID uuid) { - try { - return cache.get(uuid, () -> newInstance(uuid)); - } catch (ExecutionException e) { - throw new IllegalStateException("Failed to valueOf(" + uuid + ")", e); - } + return cache.getOrCreate(uuid); } final ID valueOf(ByteString bytes) { @@ -85,7 +85,7 @@ ID randomId() { private final Supplier uuidString; RaftId(UUID uuid) { - this.uuid = Preconditions.assertNotNull(uuid, "uuid"); + this.uuid = Objects.requireNonNull(uuid, "uuid == null"); this.uuidBytes = JavaUtils.memoize(() -> toByteString(uuid)); this.uuidString = JavaUtils.memoize(() -> createUuidString(uuid)); Preconditions.assertTrue(ZERO_UUID == uuid || !uuid.equals(ZERO_UUID), diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/SnapshotManagementRequest.java b/ratis-common/src/main/java/org/apache/ratis/protocol/SnapshotManagementRequest.java index 2ea2059b51..269fdfc591 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/SnapshotManagementRequest.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/SnapshotManagementRequest.java @@ -24,7 +24,16 @@ public final class SnapshotManagementRequest extends RaftClientRequest { public abstract static class Op { } - public static class Create extends Op { + + public static final class Create extends Op { + private final long creationGap; + private Create(long creationGap) { + this.creationGap = creationGap; + } + + public long getCreationGap() { + return creationGap; + } @Override public String toString() { @@ -35,8 +44,13 @@ public String toString() { public static SnapshotManagementRequest newCreate(ClientId clientId, RaftPeerId serverId, RaftGroupId groupId, long callId, long timeoutMs) { + return newCreate(clientId, serverId, groupId, callId, timeoutMs, 0); + } + + public static SnapshotManagementRequest newCreate(ClientId clientId, + RaftPeerId serverId, RaftGroupId groupId, long callId, long timeoutMs, long creationGap) { return new SnapshotManagementRequest(clientId, - serverId, groupId, callId, timeoutMs,new SnapshotManagementRequest.Create()); + serverId, groupId, callId, timeoutMs, new SnapshotManagementRequest.Create(creationGap)); } private final Op op; diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/NotLeaderException.java b/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/NotLeaderException.java index 8d5c2cb4e9..c7dc6a3961 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/NotLeaderException.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/NotLeaderException.java @@ -30,7 +30,8 @@ public class NotLeaderException extends RaftException { private final Collection peers; public NotLeaderException(RaftGroupMemberId memberId, RaftPeer suggestedLeader, Collection peers) { - super("Server " + memberId + " is not the leader" + (suggestedLeader != null? " " + suggestedLeader: "")); + super("Server " + memberId + " is not the leader" + + (suggestedLeader != null ? ", suggested leader is: " + suggestedLeader : "")); this.suggestedLeader = suggestedLeader; this.peers = peers != null? Collections.unmodifiableCollection(peers): Collections.emptyList(); Preconditions.assertUnique(this.peers); diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/NotReplicatedException.java b/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/NotReplicatedException.java index 5f48654eec..37ff816245 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/NotReplicatedException.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/exceptions/NotReplicatedException.java @@ -17,12 +17,17 @@ */ package org.apache.ratis.protocol.exceptions; +import org.apache.ratis.proto.RaftProtos.CommitInfoProto; import org.apache.ratis.proto.RaftProtos.ReplicationLevel; +import java.util.Collection; + public class NotReplicatedException extends RaftException { private final long callId; private final ReplicationLevel requiredReplication; private final long logIndex; + /** This is only populated on client-side since RaftClientReply already has commitInfos */ + private Collection commitInfos; public NotReplicatedException(long callId, ReplicationLevel requiredReplication, long logIndex) { super("Request with call Id " + callId + " and log index " + logIndex @@ -32,6 +37,12 @@ public NotReplicatedException(long callId, ReplicationLevel requiredReplication, this.logIndex = logIndex; } + public NotReplicatedException(long callId, ReplicationLevel requiredReplication, long logIndex, + Collection commitInfos) { + this(callId, requiredReplication, logIndex); + this.commitInfos = commitInfos; + } + public long getCallId() { return callId; } @@ -43,4 +54,8 @@ public ReplicationLevel getRequiredReplication() { public long getLogIndex() { return logIndex; } + + public Collection getCommitInfos() { + return commitInfos; + } } diff --git a/ratis-common/src/main/java/org/apache/ratis/retry/ExponentialBackoffRetry.java b/ratis-common/src/main/java/org/apache/ratis/retry/ExponentialBackoffRetry.java index bb2f50e43a..90c7efbf8a 100644 --- a/ratis-common/src/main/java/org/apache/ratis/retry/ExponentialBackoffRetry.java +++ b/ratis-common/src/main/java/org/apache/ratis/retry/ExponentialBackoffRetry.java @@ -20,6 +20,7 @@ import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.TimeDuration; +import java.util.Objects; import java.util.concurrent.ThreadLocalRandom; /** @@ -31,7 +32,6 @@ * in the range [s*0.5, s*1.5). */ public final class ExponentialBackoffRetry implements RetryPolicy { - public static final class Builder { private Builder() {} @@ -56,7 +56,7 @@ public Builder setMaxSleepTime(TimeDuration maxSleepTime) { } public ExponentialBackoffRetry build() { - Preconditions.assertNotNull(baseSleepTime, "baseSleepTime"); + Objects.requireNonNull(baseSleepTime, "baseSleepTime == null"); return new ExponentialBackoffRetry(baseSleepTime, maxSleepTime, maxAttempts); } @@ -67,6 +67,14 @@ public ExponentialBackoffRetry build() { private final int maxAttempts; private ExponentialBackoffRetry(TimeDuration baseSleepTime, TimeDuration maxSleepTime, int maxAttempts) { + Objects.requireNonNull(baseSleepTime, "baseSleepTime == null"); + Preconditions.assertTrue(baseSleepTime.isPositive(), () -> "baseSleepTime = " + baseSleepTime + " <= 0"); + if (maxSleepTime != null) { + Preconditions.assertTrue(maxSleepTime.compareTo(baseSleepTime) >= 0, + () -> "maxSleepTime = " + maxSleepTime + " < baseSleepTime = " + baseSleepTime); + } + Preconditions.assertTrue(maxAttempts >= 0, () -> "maxAttempts = " + maxAttempts + " < 0"); + this.baseSleepTime = baseSleepTime; this.maxSleepTime = maxSleepTime; this.maxAttempts = maxAttempts; diff --git a/ratis-common/src/main/java/org/apache/ratis/retry/MultipleLinearRandomRetry.java b/ratis-common/src/main/java/org/apache/ratis/retry/MultipleLinearRandomRetry.java index bc453f5bea..9cceb6bc5b 100644 --- a/ratis-common/src/main/java/org/apache/ratis/retry/MultipleLinearRandomRetry.java +++ b/ratis-common/src/main/java/org/apache/ratis/retry/MultipleLinearRandomRetry.java @@ -19,8 +19,6 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.TimeDuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Collections; @@ -34,28 +32,19 @@ * Given pairs of number of retries and sleep time (n0, t0), (n1, t1), ..., * the first n0 retries sleep t0 milliseconds on average, * the following n1 retries sleep t1 milliseconds on average, and so on. - * + *

* For all the sleep, the actual sleep time is randomly uniform distributed * in the close interval [0.5t, 1.5t], where t is the sleep time specified. - * + *

* The objects of this class are immutable. */ public final class MultipleLinearRandomRetry implements RetryPolicy { - static final Logger LOG = LoggerFactory.getLogger(MultipleLinearRandomRetry.class); - /** Pairs of numRetries and sleepSeconds */ - private static class Pair { + private static final class Pair { private final int numRetries; private final TimeDuration sleepTime; Pair(int numRetries, TimeDuration sleepTime) { - if (numRetries < 0) { - throw new IllegalArgumentException("numRetries = " + numRetries+" < 0"); - } - if (sleepTime.isNegative()) { - throw new IllegalArgumentException("sleepTime = " + sleepTime + " < 0"); - } - this.numRetries = numRetries; this.sleepTime = sleepTime; } @@ -76,9 +65,6 @@ public String toString() { private final Supplier myString; private MultipleLinearRandomRetry(List pairs) { - if (pairs == null || pairs.isEmpty()) { - throw new IllegalArgumentException("pairs must be neither null nor empty."); - } this.pairs = Collections.unmodifiableList(pairs); this.myString = JavaUtils.memoize(() -> JavaUtils.getClassSimpleName(getClass()) + pairs); } @@ -131,30 +117,22 @@ public String toString() { * @return the parsed object, or null if the parsing fails. */ public static MultipleLinearRandomRetry parseCommaSeparated(String input) { - final String[] elements = input.split(","); - if (elements.length == 0) { - LOG.warn("Illegal value: there is no element in \"{}\".", input); - return null; + input = input.trim(); + if (input.isEmpty()) { + throw new IllegalArgumentException("Failed to parse \"" + input + "\": no elements found"); } + final String[] elements = input.split(","); if (elements.length % 2 != 0) { - LOG.warn("Illegal value: the number of elements in \"{}\" is {} but an even number of elements is expected.", - input, elements.length); - return null; + throw new IllegalArgumentException("Failed to parse \"" + input + + "\": number of elements (" + elements.length + ") is old"); } final List pairs = new ArrayList<>(); for(int i = 0; i < elements.length; ) { - //parse the i-th sleep-time - final TimeDuration sleep = parseElement(elements, i++, input, MultipleLinearRandomRetry::parsePositiveTime); - if (sleep == null) { - return null; //parse fails - } - //parse the i-th number-of-retries - final Integer retries = parseElement(elements, i++, input, MultipleLinearRandomRetry::parsePositiveInt); - if (retries == null) { - return null; //parse fails - } - + final TimeDuration sleep = parseElement("sleep-time", elements, i++, input, + MultipleLinearRandomRetry::parsePositiveTime); + final Integer retries = parseElement("retries", elements, i++, input, + MultipleLinearRandomRetry::parsePositiveInt); pairs.add(new Pair(retries, sleep)); } return new MultipleLinearRandomRetry(pairs); @@ -176,13 +154,13 @@ private static int parsePositiveInt(String trimmed) { return n; } - private static E parseElement(String[] elements, int i, String input, Function parser) { + private static E parseElement(String name, String[] elements, int i, String input, Function parser) { final String s = elements[i].trim().replace("_", ""); try { return parser.apply(s); - } catch(Exception t) { - LOG.warn("Failed to parse \"{}\", which is the index {} element in \"{}\"", s, i, input, t); - return null; + } catch (Exception e) { + throw new IllegalArgumentException( + "Failed to parse \"" + s + "\" as " + name + " (element " + i + " in \"" + input + "\")", e); } } } diff --git a/ratis-common/src/main/java/org/apache/ratis/retry/RetryPolicy.java b/ratis-common/src/main/java/org/apache/ratis/retry/RetryPolicy.java index 1de07f19e1..0885e0a44a 100644 --- a/ratis-common/src/main/java/org/apache/ratis/retry/RetryPolicy.java +++ b/ratis-common/src/main/java/org/apache/ratis/retry/RetryPolicy.java @@ -19,6 +19,10 @@ import org.apache.ratis.util.TimeDuration; +import java.util.Arrays; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + /** * Policy abstract for retrying. */ @@ -72,4 +76,57 @@ default Throwable getCause() { * @return the action it should take. */ Action handleAttemptFailure(Event event); + + static RetryPolicy parse(String commaSeparated, String name) { + try { + return parse(commaSeparated); + } catch (Exception e) { + throw new IllegalArgumentException("Failed to parse " + name + ": \"" + commaSeparated + "\"", e); + } + } + + static RetryPolicy parse(String commaSeparated) { + Objects.requireNonNull(commaSeparated, "commaSeparated == null"); + final String[] args = commaSeparated.split(","); + if (args.length < 1) { + throw new IllegalArgumentException("Failed to parse RetryPolicy: empty comma separated string"); + } + final String classname = args[0].trim(); + if (classname.equals(ExponentialBackoffRetry.class.getSimpleName())) { + if (args.length != 4) { + throw new IllegalArgumentException("Failed to parse ExponentialBackoffRetry: args.length = " + + args.length + " != 4 for " + commaSeparated); + } + return ExponentialBackoffRetry.newBuilder() + .setBaseSleepTime(TimeDuration.valueOf(args[1], TimeUnit.MILLISECONDS)) + .setMaxSleepTime(TimeDuration.valueOf(args[2], TimeUnit.MILLISECONDS)) + .setMaxAttempts(Integer.parseInt(args[3].trim())) + .build(); + } + if (classname.equals(MultipleLinearRandomRetry.class.getSimpleName())) { + if (args.length == 1) { + throw new IllegalArgumentException( + "Failed to parse MultipleLinearRandomRetry: the parameter list is empty for " + commaSeparated); + } + final String params = String.join(",", Arrays.copyOfRange(args, 1, args.length)); + return MultipleLinearRandomRetry.parseCommaSeparated(params); + } + // Backward compatibility: legacy config omits class name and starts with a duration (e.g. "1ms"). + if (isLegacyMultipleLinearRandomRetryParams(classname)) { + return MultipleLinearRandomRetry.parseCommaSeparated(commaSeparated); + } + // If a class name is present but unknown, fail fast to surface config errors. + throw new IllegalArgumentException("Failed to parse RetryPolicy: unknown class " + + classname + " for " + commaSeparated); + } + + static boolean isLegacyMultipleLinearRandomRetryParams(String firstElement) { + // The legacy format starts with a duration token, not a class name. + try { + final TimeDuration t = TimeDuration.valueOf(firstElement, TimeUnit.MILLISECONDS); + return t.isPositive(); + } catch (RuntimeException e) { + return false; + } + } } diff --git a/ratis-common/src/main/java/org/apache/ratis/trace/RatisAttributes.java b/ratis-common/src/main/java/org/apache/ratis/trace/RatisAttributes.java new file mode 100644 index 0000000000..3c3be83e79 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/trace/RatisAttributes.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.trace; + +import io.opentelemetry.api.common.AttributeKey; + +/** + * The constants in this class correspond with the guidance outlined by the OpenTelemetry Semantic + * Conventions. + */ +public final class RatisAttributes { + public static final AttributeKey CLIENT_ID = AttributeKey.stringKey("raft.client.id"); + public static final AttributeKey MEMBER_ID = AttributeKey.stringKey("raft.member.id"); + public static final AttributeKey CALL_ID = AttributeKey.stringKey("raft.call.id"); + + public static final AttributeKey PEER_ID = AttributeKey.stringKey("raft.peer.id"); + public static final AttributeKey OPERATION_NAME = AttributeKey.stringKey("raft.operation.name"); + public static final AttributeKey OPERATION_TYPE = AttributeKey.stringKey("raft.operation.type"); + + + private RatisAttributes() { + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/trace/TraceClient.java b/ratis-common/src/main/java/org/apache/ratis/trace/TraceClient.java new file mode 100644 index 0000000000..0ab34e689e --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/trace/TraceClient.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.trace; + +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanKind; +import org.apache.ratis.protocol.RaftClientRequest; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.function.CheckedSupplier; + +import java.util.concurrent.CompletableFuture; + +/** Client-side OpenTelemetry helpers. */ +public final class TraceClient { + private static final String LEADER = "LEADER"; + + private TraceClient() { + } + + /** + * Traces an asynchronous client send ({@code Async::send}) when tracing is enabled. + */ + public static CompletableFuture asyncSend( + CheckedSupplier, THROWABLE> action, + RaftClientRequest.Type type, RaftPeerId server) throws THROWABLE { + if (!TraceUtils.isEnabled()) { + return action.get(); + } + return TraceUtils.traceAsyncMethod(action, + () -> createClientOperationSpan(type, server, "Async::send")); + } + + private static Span createClientOperationSpan(RaftClientRequest.Type type, RaftPeerId server, + String spanName) { + Preconditions.assertNotNull(spanName, () -> "Span name cannot be null"); + Preconditions.assertTrue(!spanName.isEmpty(), "Span name should not be empty"); + String peerId = server == null ? LEADER : String.valueOf(server); + final Span span = TraceUtils.getGlobalTracer() + .spanBuilder(spanName) + .setSpanKind(SpanKind.CLIENT) + .startSpan(); + span.setAttribute(RatisAttributes.PEER_ID, peerId); + span.setAttribute(RatisAttributes.OPERATION_NAME, spanName); + span.setAttribute(RatisAttributes.OPERATION_TYPE, String.valueOf(type)); + return span; + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/trace/TraceConfigKeys.java b/ratis-common/src/main/java/org/apache/ratis/trace/TraceConfigKeys.java new file mode 100644 index 0000000000..b0a1cbd9b1 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/trace/TraceConfigKeys.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.trace; + +import org.apache.ratis.conf.RaftProperties; + +import java.util.function.Consumer; + +import static org.apache.ratis.conf.ConfUtils.getBoolean; +import static org.apache.ratis.conf.ConfUtils.setBoolean; + +public interface TraceConfigKeys { + String PREFIX = "raft.otel.tracing"; + + String ENABLED_KEY = PREFIX + ".enabled"; + boolean ENABLED_DEFAULT = false; + + static boolean enabled(RaftProperties properties, Consumer logger) { + return getBoolean(properties::getBoolean, ENABLED_KEY, ENABLED_DEFAULT, logger); + } + + static boolean enabled(RaftProperties properties) { + return enabled(properties, null); + } + + static void setEnabled(RaftProperties properties, boolean enabled) { + setBoolean(properties::setBoolean, ENABLED_KEY, enabled); + } +} + diff --git a/ratis-common/src/main/java/org/apache/ratis/trace/TraceServer.java b/ratis-common/src/main/java/org/apache/ratis/trace/TraceServer.java new file mode 100644 index 0000000000..9670f0d763 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/trace/TraceServer.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.trace; + +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.context.Context; +import org.apache.ratis.protocol.RaftClientRequest; +import org.apache.ratis.util.function.CheckedSupplier; + +import java.util.concurrent.CompletableFuture; + +/** Server-side OpenTelemetry helpers. */ +public final class TraceServer { + private TraceServer() { + } + + /** + * Traces {@code submitClientRequestAsync} when tracing is enabled. + */ + public static CompletableFuture traceAsyncMethod( + CheckedSupplier, THROWABLE> action, + RaftClientRequest request, String memberId, String spanName) throws THROWABLE { + if (!TraceUtils.isEnabled()) { + return action.get(); + } + return TraceUtils.traceAsyncMethod(action, + () -> createServerSpanFromClientRequest(request, memberId, spanName)); + } + + private static Span createServerSpanFromClientRequest(RaftClientRequest request, String memberId, + String spanName) { + final Context remoteContext = TraceUtils.extractContextFromProto(request.getSpanContext()); + final Span span = TraceUtils.getGlobalTracer() + .spanBuilder(spanName) + .setParent(remoteContext) + .setSpanKind(SpanKind.SERVER) + .startSpan(); + span.setAttribute(RatisAttributes.CLIENT_ID, String.valueOf(request.getClientId())); + span.setAttribute(RatisAttributes.CALL_ID, String.valueOf(request.getCallId())); + span.setAttribute(RatisAttributes.MEMBER_ID, memberId); + return span; + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/trace/TraceUtils.java b/ratis-common/src/main/java/org/apache/ratis/trace/TraceUtils.java new file mode 100644 index 0000000000..f350ca8884 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/trace/TraceUtils.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.trace; + +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.context.Context; +import io.opentelemetry.context.Scope; +import io.opentelemetry.context.propagation.TextMapPropagator; +import io.opentelemetry.context.propagation.TextMapGetter; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.proto.RaftProtos.SpanContextProto; +import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.function.CheckedSupplier; +import org.apache.ratis.util.VersionInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiConsumer; +import java.util.function.Supplier; + +/** Common OpenTelemetry utilities shared by {@link TraceClient} and {@link TraceServer}. */ +public final class TraceUtils { + + private static final AtomicReference TRACER = new AtomicReference<>(); + private static final Logger LOG = LoggerFactory.getLogger(TraceUtils.class); + + private TraceUtils() { + } + + public static Tracer getGlobalTracer() { + return TRACER.get(); + } + + /** + * Initializes the global tracer from configuration when tracing is enabled, or clears it when + * disabled. Call from {@link org.apache.ratis.server.RaftServer} and + * {@link org.apache.ratis.client.RaftClient} construction so tracing follows + * {@link TraceConfigKeys}. + * + * @param properties raft configuration; tracing is on when {@link TraceConfigKeys#enabled} is true + */ + public static void setTracerWhenEnabled(RaftProperties properties) { + setTracerWhenEnabled(TraceConfigKeys.enabled(properties)); + } + + /** + * Enables or disables the tracer without reading {@link RaftProperties}. Intended for tests and + * simple toggles; production code should prefer {@link #setTracerWhenEnabled(RaftProperties)}. + * + * @param enabled when true, lazily obtains the OpenTelemetry tracer; when false, clears it + */ + public static void setTracerWhenEnabled(boolean enabled) { + if (enabled) { + TRACER.updateAndGet(previous -> previous != null ? previous + : GlobalOpenTelemetry.getTracer("org.apache.ratis", VersionInfo.getSoftwareInfoVersion())); + } else { + TRACER.set(null); + } + } + + static boolean isEnabled() { + return TRACER.get() != null; + } + + /** + * Traces an asynchronous operation represented by a {@link CompletableFuture}. The returned future + * completes with the same outcome as the supplied future; the span is ended when that future + * completes. + */ + static CompletableFuture traceAsyncMethod( + CheckedSupplier, THROWABLE> action, Supplier spanSupplier) throws THROWABLE { + final Span span = spanSupplier.get(); + try (Scope ignored = span.makeCurrent()) { + final CompletableFuture future; + try { + future = action.get(); + } catch (RuntimeException | Error e) { + setError(span, e); + span.end(); + throw e; + } catch (Throwable t) { + setError(span, t); + span.end(); + throw JavaUtils.cast(t); + } + endSpan(future, span); + return future; + } + } + + private static void endSpan(CompletableFuture future, Span span) { + if (span == null) { + LOG.debug("Span is null, cannot trace the future {}", future); + return; + } + addListener(future, (resp, error) -> { + try { + if (error != null) { + setError(span, error); + } else { + span.setStatus(StatusCode.OK); + } + } catch (Throwable t) { + LOG.error("Error setting span status, ending span anyway", t); + } finally { + span.end(); + } + }); + } + + public static void setError(Span span, Throwable error) { + span.recordException(error); + span.setStatus(StatusCode.ERROR); + } + + /** + * This is method is used when you just want to add a listener to the given future. We will call + * {@link CompletableFuture#whenComplete(BiConsumer)} to register the {@code action} to the + * {@code future}. Ignoring the return value of a Future is considered as a bad practice as it may + * suppress exceptions thrown from the code that completes the future, and this method will catch + * all the exception thrown from the {@code action} to catch possible code bugs. + *

+ * And the error phone check will always report FutureReturnValueIgnored because every method in + * the {@link CompletableFuture} class will return a new {@link CompletableFuture}, so you always + * have one future that has not been checked. So we introduce this method and add a suppression + * warnings annotation here. + */ + @SuppressWarnings("FutureReturnValueIgnored") + private static void addListener(CompletableFuture future, + BiConsumer action) { + future.whenComplete((resp, error) -> { + try { + // https://s.apache.org/completionexception — unwrap CompletionException for callers + action.accept(resp, error == null ? null : JavaUtils.unwrapCompletionException(error)); + } catch (Throwable t) { + LOG.error("Unexpected error caught when processing CompletableFuture", t); + } + }); + } + + private static final TextMapPropagator PROPAGATOR = + GlobalOpenTelemetry.getPropagators().getTextMapPropagator(); + + public static SpanContextProto injectContextToProto(Context context) { + Map carrier = new TreeMap<>(); + PROPAGATOR.inject(context, carrier, (map, key, value) -> map.put(key, value)); + return SpanContextProto.newBuilder().putAllContext(carrier).build(); + } + + public static Context extractContextFromProto(SpanContextProto proto) { + if (proto == null || proto.getContextMap().isEmpty()) { + return Context.current(); + } + final TextMapGetter getter = SpanContextGetter.INSTANCE; + return PROPAGATOR.extract(Context.current(), proto, getter); + } +} + +class SpanContextGetter implements TextMapGetter { + static final SpanContextGetter INSTANCE = new SpanContextGetter(); + + @Override + public Iterable keys(SpanContextProto carrier) { + return carrier.getContextMap().keySet(); + } + + @Override + public String get(SpanContextProto carrier, String key) { + return Optional.ofNullable(carrier).map(SpanContextProto::getContextMap) + .map(map -> map.get(key)).orElse(null); + } + +} diff --git a/ratis-common/src/main/java/org/apache/ratis/util/AtomicFileOutputStream.java b/ratis-common/src/main/java/org/apache/ratis/util/AtomicFileOutputStream.java index 530eb383c7..ec0eda94f5 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/AtomicFileOutputStream.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/AtomicFileOutputStream.java @@ -17,6 +17,10 @@ */ package org.apache.ratis.util; +import static java.nio.file.StandardOpenOption.CREATE; +import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING; +import static java.nio.file.StandardOpenOption.WRITE; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -24,7 +28,6 @@ import java.io.FilterOutputStream; import java.io.IOException; import java.nio.file.StandardCopyOption; -import java.nio.file.StandardOpenOption; import java.util.concurrent.atomic.AtomicBoolean; /** @@ -42,7 +45,7 @@ * NOTE that on Windows platforms, the output file, if it exists, is deleted * before the temporary file is moved. */ -public class AtomicFileOutputStream extends FilterOutputStream { +public final class AtomicFileOutputStream extends FilterOutputStream { static final Logger LOG = LoggerFactory.getLogger(AtomicFileOutputStream.class); public static final String TMP_EXTENSION = ".tmp"; @@ -60,11 +63,16 @@ public AtomicFileOutputStream(File outFile) throws IOException { } public AtomicFileOutputStream(File outFile, File tmpFile) throws IOException { - super(FileUtils.newOutputStreamForceAtClose(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.WRITE)); + super(FileUtils.newOutputStreamForceAtClose(tmpFile, CREATE, TRUNCATE_EXISTING, WRITE)); this.outFile = outFile.getAbsoluteFile(); this.tmpFile = tmpFile.getAbsoluteFile(); } + @Override + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + } + public boolean isClosed() { return isClosed.get(); } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/AutoCloseableLock.java b/ratis-common/src/main/java/org/apache/ratis/util/AutoCloseableLock.java index 8a5409bafe..9581e925a5 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/AutoCloseableLock.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/AutoCloseableLock.java @@ -17,6 +17,7 @@ */ package org.apache.ratis.util; +import java.util.Objects; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Lock; @@ -45,6 +46,13 @@ public static AutoCloseableLock acquire(final Lock lock, Runnable preUnlock) { return new AutoCloseableLock(lock, preUnlock); } + public static AutoCloseableLock tryAcquire(final Lock lock, Runnable preUnlock, TimeDuration timeout) + throws InterruptedException { + Objects.requireNonNull(timeout, "timeout == null"); + final boolean locked = lock.tryLock(timeout.getDuration(), timeout.getUnit()); + return locked? new AutoCloseableLock(lock, preUnlock): null; + } + private final Lock underlying; private final AtomicBoolean closed = new AtomicBoolean(false); private final Runnable preUnlock; diff --git a/ratis-common/src/main/java/org/apache/ratis/util/BatchLogger.java b/ratis-common/src/main/java/org/apache/ratis/util/BatchLogger.java index 38dad5c499..b57bed704c 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/BatchLogger.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/BatchLogger.java @@ -45,9 +45,9 @@ default TimeDuration getBatchDuration() { private static final class UniqueId { private final Key key; - private final String name; + private final Object name; - private UniqueId(Key key, String name) { + private UniqueId(Key key, Object name) { this.key = Objects.requireNonNull(key, "key == null"); this.name = name; } @@ -99,15 +99,15 @@ private synchronized boolean tryStartBatch(Consumer op) { private static final TimeoutExecutor SCHEDULER = TimeoutExecutor.getInstance(); private static final ConcurrentMap LOG_CACHE = new ConcurrentHashMap<>(); - public static void warn(Key key, String name, Consumer op) { - warn(key, name, op, key.getBatchDuration(), true); + public static void print(Key key, Object name, Consumer op) { + print(key, name, op, key.getBatchDuration(), true); } - public static void warn(Key key, String name, Consumer op, TimeDuration batchDuration) { - warn(key, name, op, batchDuration, true); + public static void print(Key key, Object name, Consumer op, TimeDuration batchDuration) { + print(key, name, op, batchDuration, true); } - public static void warn(Key key, String name, Consumer op, TimeDuration batchDuration, boolean shouldBatch) { + public static void print(Key key, Object name, Consumer op, TimeDuration batchDuration, boolean shouldBatch) { if (!shouldBatch || batchDuration.isNonPositive()) { op.accept(""); return; diff --git a/ratis-common/src/main/java/org/apache/ratis/util/BiWeakValueCache.java b/ratis-common/src/main/java/org/apache/ratis/util/BiWeakValueCache.java new file mode 100644 index 0000000000..52940ed883 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/util/BiWeakValueCache.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import org.apache.ratis.thirdparty.com.google.common.collect.MapMaker; + +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiFunction; +import java.util.function.Consumer; + +/** + * Weak Value Cache: ({@link OUTER}, {@link INNER}) -> {@link T}. + *

+ * Note that the cached values are weakly referenced. + * A cached value could be garage-collected (i.e. evicted from the cache) + * when there are no external (strong) references. + * + * @param the type of the outer keys. + * @param the type of the inner keys. + * @param the type to be cached. + */ +public final class BiWeakValueCache { + static ConcurrentMap newMap() { + return new MapMaker().weakValues().makeMap(); + } + + private final String outerName; + private final String innerName; + private final String name; + + /** For constructing {@link T} values from ({@link OUTER}, {@link INNER}) keys. */ + private final BiFunction constructor; + /** Count the number of {@link T} values constructed. */ + private final AtomicInteger valueCount = new AtomicInteger(0); + + /** + * Actual map {@link OUTER} -> ({@link INNER} -> {@link T}) + * for the logical view ({@link OUTER}, {@link INNER}) -> {@link T}. + */ + private final ConcurrentMap> map = new ConcurrentHashMap<>(); + + /** + * Create a cache for mapping ({@link OUTER}, {@link INNER}) keys to {@link T} values. + * + * @param outerName the name of the outer long. + * @param innerName the name of the inner long. + * @param constructor for constructing {@link T} values. + */ + public BiWeakValueCache(String outerName, String innerName, BiFunction constructor) { + this.outerName = outerName; + this.innerName = innerName; + this.name = "(" + outerName + ", " + innerName + ")-cache"; + this.constructor = constructor; + } + + private T construct(OUTER outer, INNER inner) { + final T constructed = constructor.apply(outer, inner); + Objects.requireNonNull(constructed, "constructed == null"); + valueCount.incrementAndGet(); + return constructed; + } + + /** + * If the key ({@link OUTER}, {@link INNER}) is in the cache, return the cached values. + * Otherwise, create a new value and then return it. + */ + public T getOrCreate(OUTER outer, INNER inner) { + Objects.requireNonNull(outer, () -> outerName + " (outer) == null"); + Objects.requireNonNull(inner, () -> innerName + " (inner) == null"); + final ConcurrentMap innerMap = map.computeIfAbsent(outer, k -> newMap()); + final T computed = innerMap.computeIfAbsent(inner, i -> construct(outer, i)); + if ((valueCount.get() & 0xFFF) == 0) { + cleanupEmptyInnerMaps(); // cleanup empty maps once in a while + } + return computed; + } + + /** @return the value count for the given outer key. */ + int count(OUTER outer) { + final ConcurrentMap innerMap = map.get(outer); + if (innerMap == null) { + return 0; + } + + // size() may return incorrect result; see Guava MapMaker javadoc + int n = 0; + for (INNER ignored : innerMap.keySet()) { + n++; + } + return n; + } + + void cleanupEmptyInnerMaps() { + // isEmpty() may return incorrect result; see Guava MapMaker javadoc + map.values().removeIf(e -> !e.entrySet().iterator().hasNext()); + } + + @Override + public String toString() { + return name; + } + + /** The cache content for debugging. */ + int dump(Consumer out) { + out.accept(name + ":\n"); + int emptyCount = 0; + for (Map.Entry> entry : map.entrySet()) { + final OUTER outer = entry.getKey(); + final ConcurrentMap innerMap = entry.getValue(); + final int count = count(outer); + if (count == 0) { + emptyCount++; + } + + out.accept(" " + outerName + ":" + outer); + out.accept(", " + innerName + ":" + innerMap.keySet()); + out.accept(", count=" + count); + out.accept(", size=" + innerMap.size()); + out.accept("\n"); + } + out.accept(" emptyCount=" + emptyCount); + out.accept("\n"); + return emptyCount; + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/util/CodeInjectionForTesting.java b/ratis-common/src/main/java/org/apache/ratis/util/CodeInjectionForTesting.java index a7d36ac0eb..112f6bd250 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/CodeInjectionForTesting.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/CodeInjectionForTesting.java @@ -68,4 +68,9 @@ public static boolean execute(String injectionPoint, Object localId, } return code.execute(localId, remoteId, args); } + + /** Remove an injection point. */ + public static void remove(String injectionPoint) { + INJECTION_POINTS.remove(injectionPoint); + } } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/CollectionUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/CollectionUtils.java index 11f484608a..2615c2659c 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/CollectionUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/CollectionUtils.java @@ -17,7 +17,14 @@ */ package org.apache.ratis.util; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ThreadLocalRandom; import java.util.function.BiFunction; diff --git a/ratis-common/src/main/java/org/apache/ratis/util/DataBlockingQueue.java b/ratis-common/src/main/java/org/apache/ratis/util/DataBlockingQueue.java index 842b8f1549..fb0f0715c5 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/DataBlockingQueue.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/DataBlockingQueue.java @@ -29,6 +29,7 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Consumer; import java.util.function.ToLongFunction; /** @@ -46,6 +47,8 @@ public class DataBlockingQueue extends DataQueue { private final Condition notFull = lock.newCondition(); private final Condition notEmpty = lock.newCondition(); + private boolean closed = false; + public DataBlockingQueue(Object name, SizeInBytes byteLimit, int elementLimit, ToLongFunction getNumBytes) { super(name, byteLimit, elementLimit, getNumBytes); } @@ -72,10 +75,34 @@ public void clear() { } } + /** Apply the given handler to each element and then {@link #clear()}. */ + public void clear(Consumer handler) { + try(AutoCloseableLock auto = AutoCloseableLock.acquire(lock)) { + for(E e : this) { + handler.accept(e); + } + super.clear(); + } + } + + /** + * Close this queue to stop accepting new elements, i.e. the offer(…) methods always return false. + * Note that closing the queue will not clear the existing elements. + * The existing elements can be peeked, polled or cleared after close. + */ + public void close() { + try(AutoCloseableLock ignored = AutoCloseableLock.acquire(lock)) { + closed = true; + } + } + @Override public boolean offer(E element) { Objects.requireNonNull(element, "element == null"); try(AutoCloseableLock auto = AutoCloseableLock.acquire(lock)) { + if (closed) { + return false; + } if (super.offer(element)) { notEmpty.signal(); return true; @@ -95,6 +122,9 @@ public boolean offer(E element, TimeDuration timeout) throws InterruptedExceptio long nanos = timeout.toLong(TimeUnit.NANOSECONDS); try(AutoCloseableLock auto = AutoCloseableLock.acquire(lock)) { for(;;) { + if (closed) { + return false; + } if (super.offer(element)) { notEmpty.signal(); return true; @@ -162,4 +192,11 @@ public List pollList(long timeoutM return results; } } + + @Override + public E peek() { + try(AutoCloseableLock auto = AutoCloseableLock.acquire(lock)) { + return super.peek(); + } + } } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/DataQueue.java b/ratis-common/src/main/java/org/apache/ratis/util/DataQueue.java index 3db06f56e6..38762caa17 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/DataQueue.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/DataQueue.java @@ -154,6 +154,11 @@ public E poll() { return polled; } + /** Peek the head element from this queue. */ + public E peek() { + return q.peek(); + } + /** The same as {@link java.util.Collection#remove(Object)}. */ public boolean remove(E e) { final boolean removed = q.remove(e); diff --git a/ratis-common/src/main/java/org/apache/ratis/util/FileUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/FileUtils.java index d5141e9171..79c00b5a35 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/FileUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/FileUtils.java @@ -27,12 +27,27 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.nio.file.*; +import java.nio.file.AtomicMoveNotSupportedException; +import java.nio.file.CopyOption; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.LinkOption; +import java.nio.file.NotDirectoryException; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; import java.nio.file.attribute.BasicFileAttributes; import java.util.Arrays; import java.util.List; import java.util.Objects; +import java.util.function.BiConsumer; +import java.util.function.Consumer; import java.util.function.Supplier; +import java.util.stream.Stream; public interface FileUtils { Logger LOG = LoggerFactory.getLogger(FileUtils.class); @@ -49,6 +64,39 @@ static T attempt(CheckedSupplier op, Supplier name) throw } } + /** @return true iff the given dir is an ancestor of the given sub path. */ + static boolean isAncestor(File dir, File sub) throws IOException { + Objects.requireNonNull(dir, "dir == null"); + Objects.requireNonNull(sub, "sub == null"); + + String dirPath = dir.getCanonicalPath(); + final String subPath = sub.getCanonicalPath(); + if (dirPath.equals(subPath)) { + return true; + } else if (!dirPath.endsWith(File.separator)) { + dirPath += File.separator; + } + LOG.debug("dirPath: {}", dirPath); + LOG.debug("subPath: {}", subPath); + return subPath.startsWith(dirPath); + } + + /** + * Resolve the full path from the given dir and sub, + * where dir is supposed to be an ancestor of the resolved path. + * + * @return the full path + * @throws IOException if the dir is not an ancestor of the resolved path. + */ + static File resolveFullPath(File dir, String sub) throws IOException { + final File full = new File(dir, sub); + if (!isAncestor(dir, full)) { + throw new IOException("The dir is not an ancestor of the full path: dir=" + dir + + ", sub=" + sub + ", full=" + full); + } + return full; + } + static void truncateFile(File f, long target) throws IOException { final long original = f.length(); LogUtils.runAndLog(LOG, @@ -201,8 +249,10 @@ static File move(File src, String suffix) throws IOException { } /** The same as passing f.toPath() to {@link #delete(Path)}. */ - static void deleteFile(File f) throws IOException { - delete(f.toPath()); + static Path deleteFile(File f) throws IOException { + final Path path = f.toPath(); + delete(path); + return path; } /** @@ -336,4 +386,26 @@ public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOExce } }); } + + static void listDir(File dir, Consumer out, BiConsumer err) { + listDir(dir.toPath(), out, err); + } + + static void listDir(Path dir, Consumer out, BiConsumer err) { + try { + listDir(dir, out); + } catch (IOException e) { + err.accept("Failed to listDir: " + dir, e); + } + } + + static void listDir(Path dir, Consumer out) throws IOException { + if (!Files.isDirectory(dir, LinkOption.NOFOLLOW_LINKS)) { + throw new NotDirectoryException( "Failed to listDir: " + dir + " is not a directory."); + } + + try(Stream s = Files.list(dir)) { + s.forEach(out); + } + } } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/IOUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/IOUtils.java index f1fe6c35c8..8e91b3fb05 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/IOUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/IOUtils.java @@ -91,9 +91,14 @@ static T getFromFuture(CompletableFuture future, Supplier name, T } static boolean shouldReconnect(Throwable e) { - return ReflectionUtils.isInstance(e, - SocketException.class, SocketTimeoutException.class, ClosedChannelException.class, EOFException.class, - AlreadyClosedException.class); + for (; e != null; e = e.getCause()) { + if (ReflectionUtils.isInstance(e, + SocketException.class, SocketTimeoutException.class, ClosedChannelException.class, EOFException.class, + AlreadyClosedException.class, TimeoutIOException.class)) { + return true; + } + } + return false; } static void readFully(InputStream in, int buffSize) throws IOException { diff --git a/ratis-common/src/main/java/org/apache/ratis/util/JavaUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/JavaUtils.java index f689006db2..c7f8f76298 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/JavaUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/JavaUtils.java @@ -41,6 +41,7 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; +import java.util.function.BooleanSupplier; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Supplier; @@ -133,7 +134,7 @@ static T doPrivileged(Supplier action, Function exc static T doPrivileged(Supplier action, Supplier name) { return doPrivileged(action, e -> { - LOG.warn("Failed to " + name.get(), e); + LOG.warn("Failed to {}", name.get(), e); return null; }); } @@ -147,7 +148,7 @@ static T doPrivileged(Supplier action, Supplier name) { * otherwise, return system property value. */ static String getSystemProperty(final String key) { - Preconditions.assertNotNull(key, "key"); + Objects.requireNonNull(key, "key == null"); Preconditions.assertTrue(!key.isEmpty(), "key is empty."); return doPrivileged(() -> System.getProperty(key), () -> "get system property " + key); } @@ -165,9 +166,9 @@ static String getEnv(String variable) { * When there is a {@link SecurityException}, this becomes a NOOP. */ static void setSystemProperty(String key, String value) { - Preconditions.assertNotNull(key, "key"); + Objects.requireNonNull(key, "key == null"); Preconditions.assertTrue(!key.isEmpty(), "key is empty."); - Preconditions.assertNotNull(value, "value"); + Objects.requireNonNull(value, "value == null"); doPrivileged(() -> System.setProperty(key, value), () -> "set system property " + key + " to " + value); } @@ -227,8 +228,8 @@ static RETURN attempt( throw t; } if (log != null && log.isWarnEnabled()) { - log.warn("FAILED \"" + name.get() + "\", attempt #" + i + "/" + numAttempts - + ", sleep " + sleepTime + " and then retry: " + t); + log.warn("FAILED \"{}\", attempt #{}/{}, sleep {} and then retry: {}", + name.get(), i, numAttempts, sleepTime, t.toString()); } } @@ -244,6 +245,18 @@ static void attempt( attemptRepeatedly(CheckedRunnable.asCheckedSupplier(runnable), numAttempts, sleepTime, name, log); } + /** Attempt to wait the given condition to return true multiple times. */ + static void attemptUntilTrue( + BooleanSupplier condition, int numAttempts, TimeDuration sleepTime, String name, Logger log) + throws InterruptedException { + Objects.requireNonNull(condition, "condition == null"); + attempt(() -> { + if (!condition.getAsBoolean()) { + throw new IllegalStateException("Condition " + name + " is false."); + } + }, numAttempts, sleepTime, name, log); + } + static Timer runRepeatedly(Runnable runnable, long delay, long period, TimeUnit unit) { final Timer timer = new Timer(true); timer.schedule(new TimerTask() { @@ -278,6 +291,15 @@ static Throwable unwrapCompletionException(Throwable t) { return t instanceof CompletionException && t.getCause() != null? t.getCause(): t; } + static boolean isCausedBy(Throwable t, Class cause) { + for(; t != null; t = t.getCause()) { + if (cause.isInstance(t)) { + return true; + } + } + return false; + } + static CompletableFuture allOf(Collection> futures) { if (futures == null || futures.isEmpty()) { return CompletableFuture.completedFuture(null); diff --git a/ratis-common/src/main/java/org/apache/ratis/util/LeakDetector.java b/ratis-common/src/main/java/org/apache/ratis/util/LeakDetector.java new file mode 100644 index 0000000000..6f12205795 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/util/LeakDetector.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.ref.ReferenceQueue; +import java.lang.ref.WeakReference; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Consumer; +import java.util.function.Supplier; + +/** + * Simple general resource leak detector using {@link ReferenceQueue} and {@link java.lang.ref.WeakReference} to + * observe resource object life-cycle and assert proper resource closure before they are GCed. + * + *

+ * Example usage: + * + *

 {@code
+ * class MyResource implements AutoClosable {
+ *   static final LeakDetector LEAK_DETECTOR = new LeakDetector("MyResource");
+ *
+ *   private final UncheckedAutoCloseable leakTracker = LEAK_DETECTOR.track(this, () -> {
+ *      // report leaks, don't refer to the original object (MyResource) here.
+ *      System.out.println("MyResource is not closed before being discarded.");
+ *   });
+ *
+ *   @Override
+ *   public void close() {
+ *     // proper resources cleanup...
+ *     // inform tracker that this object is closed properly.
+ *     leakTracker.close();
+ *   }
+ * }
+ *
+ * }
+ */ +public class LeakDetector { + private static final Logger LOG = LoggerFactory.getLogger(LeakDetector.class); + + private static class LeakTrackerSet { + private final Set set = Collections.newSetFromMap(new HashMap<>()); + + synchronized boolean remove(LeakTracker tracker) { + return set.remove(tracker); + } + + synchronized void removeExisting(LeakTracker tracker) { + final boolean removed = set.remove(tracker); + Preconditions.assertTrue(removed, () -> "Failed to remove existing " + tracker); + } + + synchronized LeakTracker add(Object referent, ReferenceQueue queue, Supplier leakReporter) { + final LeakTracker tracker = new LeakTracker(referent, queue, this::removeExisting, leakReporter); + final boolean added = set.add(tracker); + Preconditions.assertTrue(added, () -> "Failed to add " + tracker + " for " + referent); + return tracker; + } + + synchronized int getNumLeaks(boolean throwException) { + if (set.isEmpty()) { + return 0; + } + + int n = 0; + for (LeakTracker tracker : set) { + if (tracker.reportLeak() != null) { + n++; + } + } + if (throwException) { + assertNoLeaks(n); + } + return n; + } + + synchronized void assertNoLeaks(int leaks) { + Preconditions.assertTrue(leaks == 0, () -> { + final int size = set.size(); + return "#leaks = " + leaks + " > 0, #leaks " + (leaks == size? "==" : "!=") + " set.size = " + size; + }); + } + } + + private static final AtomicLong COUNTER = new AtomicLong(); + + private final ReferenceQueue queue = new ReferenceQueue<>(); + /** All the {@link LeakTracker}s. */ + private final LeakTrackerSet trackers = new LeakTrackerSet(); + /** When a leak is discovered, a message is printed and added to this list. */ + private final List leakMessages = Collections.synchronizedList(new ArrayList<>()); + private final String name; + + LeakDetector(String name) { + this.name = name + COUNTER.getAndIncrement(); + } + + LeakDetector start() { + Thread t = new Thread(this::run); + t.setName(LeakDetector.class.getSimpleName() + "-" + name); + t.setDaemon(true); + LOG.info("Starting leak detector thread {}.", name); + t.start(); + return this; + } + + private void run() { + while (true) { + try { + LeakTracker tracker = (LeakTracker) queue.remove(); + // Original resource already been GCed, if tracker is not closed yet, + // report a leak. + if (trackers.remove(tracker)) { + final String leak = tracker.reportLeak(); + if (leak != null) { + leakMessages.add(leak); + } + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("Thread interrupted, exiting.", e); + break; + } + } + + LOG.warn("Exiting leak detector {}.", name); + } + + Runnable track(Object leakable, Supplier reportLeak) { + // TODO: A rate filter can be put here to only track a subset of all objects, e.g. 5%, 10%, + // if we have proofs that leak tracking impacts performance, or a single LeakDetector + // thread can't keep up with the pace of object allocation. + // For now, it looks effective enough and let keep it simple. + return trackers.add(leakable, queue, reportLeak)::remove; + } + + public int getLeakCount() { + return trackers.getNumLeaks(false); + } + + public void assertNoLeaks(int maxRetries, TimeDuration retrySleep) throws InterruptedException { + synchronized (leakMessages) { + // leakMessages are all the leaks discovered so far. + Preconditions.assertTrue(leakMessages.isEmpty(), + () -> "#leaks = " + leakMessages.size() + "\n" + leakMessages); + } + + for(int i = 0; i < maxRetries; i++) { + final int numLeaks = trackers.getNumLeaks(false); + if (numLeaks == 0) { + return; + } + LOG.warn("{}/{}) numLeaks == {} > 0, will wait and retry ...", i, maxRetries, numLeaks); + retrySleep.sleep(); + } + trackers.getNumLeaks(true); + } + + private static final class LeakTracker extends WeakReference { + private final Consumer removeMethod; + private final Supplier getLeakMessage; + + LeakTracker(Object referent, ReferenceQueue referenceQueue, + Consumer removeMethod, Supplier getLeakMessage) { + super(referent, referenceQueue); + this.removeMethod = removeMethod; + this.getLeakMessage = getLeakMessage; + } + + /** Called by the tracked resource when the object is completely released. */ + void remove() { + removeMethod.accept(this); + } + + /** @return the leak message if there is a leak; return null if there is no leak. */ + String reportLeak() { + return getLeakMessage.get(); + } + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/util/LifeCycle.java b/ratis-common/src/main/java/org/apache/ratis/util/LifeCycle.java index 9870fe3719..e96ba88a5c 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/LifeCycle.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/LifeCycle.java @@ -117,6 +117,9 @@ static void validate(Object name, State from, State to) { if (LOG.isTraceEnabled()) { LOG.trace("TRACE", new Throwable()); } + if (to == EXCEPTION) { + LOG.error("{} has failed ({} -> {})", name, from, to, new Throwable("TRACE")); + } Preconditions.assertTrue(isValid(from, to), "ILLEGAL TRANSITION: In %s, %s -> %s", name, from, to); diff --git a/ratis-common/src/main/java/org/apache/ratis/util/LogUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/LogUtils.java index d29f1e56ec..63bb08accf 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/LogUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/LogUtils.java @@ -118,7 +118,13 @@ static void warn(Logger log, Supplier message, Throwable t, Class... if (log.isWarnEnabled()) { if (ReflectionUtils.isInstance(t, exceptionClasses)) { // do not print stack trace for known exceptions. - log.warn(message.get() + ": " + t); + final StringBuilder b = new StringBuilder() + .append(message.get()) + .append(": ").append(t); + for(Throwable cause = t.getCause(); cause != null; cause = cause.getCause()) { + b.append("\n Caused by: ").append(cause); + } + log.warn(b.toString()); } else { log.warn(message.get(), t); } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/MD5FileUtil.java b/ratis-common/src/main/java/org/apache/ratis/util/MD5FileUtil.java index 8a38f45e67..2c217b27d4 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/MD5FileUtil.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/MD5FileUtil.java @@ -18,8 +18,6 @@ package org.apache.ratis.util; import org.apache.ratis.io.MD5Hash; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.File; @@ -29,16 +27,21 @@ import java.nio.charset.StandardCharsets; import java.nio.file.StandardOpenOption; import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; -public abstract class MD5FileUtil { - public static final Logger LOG = LoggerFactory.getLogger(MD5FileUtil.class); +public final class MD5FileUtil { + private MD5FileUtil() {} - // TODO: we should provide something like Hadoop's checksum fs for the local filesystem - // so that individual state machines do not have to deal with checksumming/corruption prevention. - // Keep the checksum and data in the same block format instead of individual files. + public static MessageDigest newMD5() { + try { + return MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException("Failed to create MessageDigest for MD5", e); + } + } public static final String MD5_SUFFIX = ".md5"; private static final String LINE_REGEX = "([0-9a-f]{32}) [ *](.+)"; @@ -105,7 +108,7 @@ public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException { referencedFile.getName() + " but we expected it to reference " + dataFile); } - return new MD5Hash(storedHash); + return MD5Hash.newInstance(storedHash); } /** @@ -113,7 +116,7 @@ public static MD5Hash readStoredMd5ForFile(File dataFile) throws IOException { */ public static MD5Hash computeMd5ForFile(File dataFile) throws IOException { final int bufferSize = SizeInBytes.ONE_MB.getSizeInt(); - final MessageDigest digester = MD5Hash.getDigester(); + final MessageDigest digester = newMD5(); try (FileChannel in = FileUtils.newFileChannel(dataFile, StandardOpenOption.READ)) { final long fileSize = in.size(); for (int offset = 0; offset < fileSize; ) { @@ -122,7 +125,7 @@ public static MD5Hash computeMd5ForFile(File dataFile) throws IOException { offset += readSize; } } - return new MD5Hash(digester.digest()); + return MD5Hash.newInstance(digester.digest()); } public static MD5Hash computeAndSaveMd5ForFile(File dataFile) { @@ -147,7 +150,7 @@ public static MD5Hash computeAndSaveMd5ForFile(File dataFile) { */ public static void saveMD5File(File dataFile, MD5Hash digest) throws IOException { - final String digestString = StringUtils.bytes2HexString(digest.getDigest()); + final String digestString = digest.toString(); saveMD5File(dataFile, digestString); } @@ -162,10 +165,6 @@ private static void saveMD5File(File dataFile, String digestString) try (AtomicFileOutputStream afos = new AtomicFileOutputStream(md5File)) { afos.write(md5Line.getBytes(StandardCharsets.UTF_8)); } - - if (LOG.isDebugEnabled()) { - LOG.debug("Saved MD5 " + digestString + " to " + md5File); - } } /** diff --git a/ratis-common/src/main/java/org/apache/ratis/util/MemoizedBase.java b/ratis-common/src/main/java/org/apache/ratis/util/MemoizedBase.java new file mode 100644 index 0000000000..1c78c05cfc --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/util/MemoizedBase.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import org.apache.ratis.util.function.CheckedSupplier; + +import java.util.Objects; + +/** + * This is the base class for the memoized subclass such as + * {@link MemoizedSupplier}, {@link MemoizedFunction}, {@link MemoizedCheckedSupplier}, etc, + * The subclasses provide its own method to retrieve the value, + * such as {@link MemoizedSupplier#get()} and {@link MemoizedFunction#apply(Object)}. + * The subclass method returns a value by invoking its initializer once at the first call + * and then keeps returning the same value for the subsequent calls. + *

+ * All the subclasses are thread safe. + * + * @param The value type. + * @param The throwable type of the initializer. + */ +abstract class MemoizedBase { + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type + private volatile RETURN value = null; + + final RETURN init(CheckedSupplier initializer) throws THROW { + final RETURN initialized = value; + if (initialized != null) { + return initialized; + } + + synchronized (this) { + if (value == null) { + value = initializer.get(); + Objects.requireNonNull(value, "initializer.get() returns null"); + } + return value; + } + } + + /** @return is the object initialized? */ + public final boolean isInitialized() { + return value != null; + } + + /** + * @return the value, which must be already initialized. + * @throws NullPointerException if the value is uninitialized. + */ + public RETURN getInitializedValue() { + return Objects.requireNonNull(value, "Uninitialized: value == null"); + } + + @Override + public String toString() { + return value != null ? "Memoized:" + value : "Uninitialized"; + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/util/MemoizedCheckedSupplier.java b/ratis-common/src/main/java/org/apache/ratis/util/MemoizedCheckedSupplier.java index cf2d060239..8d4cf9cb83 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/MemoizedCheckedSupplier.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/MemoizedCheckedSupplier.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -25,13 +25,14 @@ * A memoized supplier is a {@link CheckedSupplier} * which gets a value by invoking its initializer once. * and then keeps returning the same value as its supplied results. - * + *

* This class is thread safe. * * @param The return type of the supplier. * @param The throwable type of the supplier. */ public final class MemoizedCheckedSupplier + extends MemoizedBase implements CheckedSupplier { /** * @param supplier to supply at most one non-null value. @@ -44,7 +45,6 @@ public static MemoizedCheckedSupplier initializer; - private volatile RETURN value = null; /** * Create a memoized supplier. @@ -58,16 +58,7 @@ private MemoizedCheckedSupplier(CheckedSupplier initializer) { /** @return the lazily initialized object. */ @Override public RETURN get() throws THROW { - RETURN v = value; - if (v == null) { - synchronized (this) { - v = value; - if (v == null) { - v = value = Objects.requireNonNull(initializer.get(), "initializer.get() returns null"); - } - } - } - return v; + return init(initializer); } /** @@ -75,16 +66,6 @@ public RETURN get() throws THROW { * @throws NullPointerException if the object is uninitialized. */ public RETURN getUnchecked() { - return Objects.requireNonNull(value, "value == null"); - } - - /** @return is the object initialized? */ - public boolean isInitialized() { - return value != null; - } - - @Override - public String toString() { - return isInitialized()? "Memoized:" + value: "UNINITIALIZED"; + return getInitializedValue(); } } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/MemoizedFunction.java b/ratis-common/src/main/java/org/apache/ratis/util/MemoizedFunction.java new file mode 100644 index 0000000000..13272f0f65 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/util/MemoizedFunction.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import java.util.Objects; +import java.util.function.Function; + +/** + * A memoized function is a {@link Function} + * which returns a value by invoking its initializer once + * and then keeps returning the same value as its result. + *

+ * This class is similar to {@link MemoizedSupplier} except that + * the initializer takes a parameter. + *

+ * This class is thread safe. + * + * @param The function result type. + */ +public final class MemoizedFunction + extends MemoizedBase + implements Function { + /** + * @param function to supply at most one non-null value. + * @return a {@link MemoizedFunction} with the given function. + */ + public static MemoizedFunction valueOf(Function function) { + return function instanceof MemoizedFunction ? + (MemoizedFunction) function : new MemoizedFunction<>(function); + } + + private final Function initializer; + + /** + * Create a memoized function. + * @param initializer to supply at most one non-null value. + */ + private MemoizedFunction(Function initializer) { + Objects.requireNonNull(initializer, "initializer == null"); + this.initializer = initializer; + } + + /** + * @param parameter for passing to the initializer. + * Since the returned function is memoized, the parameter is only used at the first call. + * The parameter is ignored in the subsequent calls. + * + * @return the lazily initialized object. + */ + @Override + public RETURN apply(PARAMETER parameter) { + return init(() -> initializer.apply(parameter)); + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/util/MemoizedSupplier.java b/ratis-common/src/main/java/org/apache/ratis/util/MemoizedSupplier.java index f179d2dcd0..588f7ff796 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/MemoizedSupplier.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/MemoizedSupplier.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -24,12 +24,14 @@ * A memoized supplier is a {@link Supplier} * which gets a value by invoking its initializer once * and then keeps returning the same value as its supplied results. - * + *

* This class is thread safe. * * @param The supplier result type. */ -public final class MemoizedSupplier implements Supplier { +public final class MemoizedSupplier + extends MemoizedBase + implements Supplier { /** * @param supplier to supply at most one non-null value. * @return a {@link MemoizedSupplier} with the given supplier. @@ -40,7 +42,6 @@ public static MemoizedSupplier valueOf(Supplier supplier) { } private final Supplier initializer; - private volatile T value = null; /** * Create a memoized supplier. @@ -54,26 +55,6 @@ private MemoizedSupplier(Supplier initializer) { /** @return the lazily initialized object. */ @Override public T get() { - T v = value; - if (v == null) { - synchronized (this) { - v = value; - if (v == null) { - v = value = Objects.requireNonNull(initializer.get(), - "initializer.get() returns null"); - } - } - } - return v; - } - - /** @return is the object initialized? */ - public boolean isInitialized() { - return value != null; - } - - @Override - public String toString() { - return isInitialized()? "Memoized:" + get(): "UNINITIALIZED"; + return init(initializer::get); } } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/NetUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/NetUtils.java index a6ce5af792..c5f22c2893 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/NetUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/NetUtils.java @@ -17,11 +17,19 @@ */ package org.apache.ratis.util; +import org.apache.ratis.thirdparty.com.google.common.net.InetAddresses; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.net.*; +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.ServerSocket; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.UnknownHostException; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -145,8 +153,12 @@ static String address2String(InetSocketAddress address) { if (address == null) { return null; } - final StringBuilder b = new StringBuilder(address.getHostName()); - if (address.getAddress() instanceof Inet6Address) { + String hostName = address.getHostName(); + final StringBuilder b = new StringBuilder(hostName); + // Surround with '[', ']' only if it is a IPv6 ip - not for a IPv6 host + if (address.getAddress() instanceof Inet6Address && + InetAddresses.isInetAddress(hostName) && + InetAddresses.forString(hostName).getAddress().length == 16) { b.insert(0, '[').append(']'); } return b.append(':').append(address.getPort()).toString(); diff --git a/ratis-common/src/main/java/org/apache/ratis/util/PeerProxyMap.java b/ratis-common/src/main/java/org/apache/ratis/util/PeerProxyMap.java index 0ce0595fa9..868b65cc37 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/PeerProxyMap.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/PeerProxyMap.java @@ -45,6 +45,7 @@ public class PeerProxyMap implements RaftPeer.Add, Clos /** Peer and its proxy. */ private class PeerAndProxy { private final RaftPeer peer; + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile PROXY proxy = null; private final LifeCycle lifeCycle; @@ -58,7 +59,10 @@ RaftPeer getPeer() { } PROXY getProxy() throws IOException { - if (proxy == null) { + final PROXY p = proxy; + if (p != null) { + return p; + } else { synchronized (this) { if (proxy == null) { final LifeCycle.State current = lifeCycle.getCurrentState(); @@ -68,9 +72,9 @@ PROXY getProxy() throws IOException { lifeCycle.startAndTransition( () -> proxy = createProxyImpl(peer), IOException.class); } + return Objects.requireNonNull(proxy, "proxy"); } } - return proxy; } Optional setNullProxyAndClose() { diff --git a/ratis-common/src/main/java/org/apache/ratis/util/Preconditions.java b/ratis-common/src/main/java/org/apache/ratis/util/Preconditions.java index c757de2990..f37b250f53 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/Preconditions.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/Preconditions.java @@ -78,6 +78,11 @@ static void assertTrue(boolean value, Supplier message) { } } + static void assertSame(int expected, int computed, String name) { + assertTrue(expected == computed, + () -> name + ": expected == " + expected + " but computed == " + computed); + } + static void assertSame(long expected, long computed, String name) { assertTrue(expected == computed, () -> name + ": expected == " + expected + " but computed == " + computed); diff --git a/ratis-common/src/main/java/org/apache/ratis/util/ProtoUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/ProtoUtils.java index cd22ebe6b4..161715bf80 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/ProtoUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/ProtoUtils.java @@ -35,6 +35,8 @@ import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.com.google.protobuf.MessageOrBuilder; +import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat; import java.io.IOException; import java.io.ObjectOutputStream; @@ -250,4 +252,8 @@ static String toString(RaftRpcReplyProto proto) { + "#" + proto.getCallId() + ":" + (proto.getSuccess()? "OK": "FAIL"); } + + static String shortDebugString(MESSAGE message) { + return message != null ? TextFormat.shortDebugString(message) : "null"; + } } diff --git a/ratis-metrics-dropwizard3/src/main/java/org/apache/ratis/metrics/dropwizard3/RefCountingMap.java b/ratis-common/src/main/java/org/apache/ratis/util/RefCountingMap.java similarity index 88% rename from ratis-metrics-dropwizard3/src/main/java/org/apache/ratis/metrics/dropwizard3/RefCountingMap.java rename to ratis-common/src/main/java/org/apache/ratis/util/RefCountingMap.java index 6c3ad6f8cd..a6a9eb81ab 100644 --- a/ratis-metrics-dropwizard3/src/main/java/org/apache/ratis/metrics/dropwizard3/RefCountingMap.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/RefCountingMap.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.ratis.metrics.dropwizard3; +package org.apache.ratis.util; import java.util.Collection; import java.util.Set; @@ -31,7 +31,7 @@ * call will increment the ref count, and each remove() will decrement it. The values are removed * from the map iff ref count == 0. */ -class RefCountingMap { +public final class RefCountingMap { private static class Payload { private final V value; private final AtomicInteger refCount = new AtomicInteger(); @@ -55,15 +55,15 @@ Payload decrement() { private final ConcurrentMap> map = new ConcurrentHashMap<>(); - V put(K k, Supplier supplier) { + public V put(K k, Supplier supplier) { return map.compute(k, (k1, old) -> old != null? old: new Payload<>(supplier.get())).increment(); } - static V get(Payload p) { + public static V get(Payload p) { return p == null ? null : p.get(); } - V get(K k) { + public V get(K k) { return get(map.get(k)); } @@ -72,23 +72,23 @@ V get(K k) { * @param k the key to remove * @return the value associated with the specified key or null if key is removed from map. */ - V remove(K k) { + public V remove(K k) { return get(map.computeIfPresent(k, (k1, v) -> v.decrement())); } - void clear() { + public void clear() { map.clear(); } - Set keySet() { + public Set keySet() { return map.keySet(); } - Collection values() { + public Collection values() { return map.values().stream().map(Payload::get).collect(Collectors.toList()); } - int size() { + public int size() { return map.size(); } } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/ReferenceCountedLeakDetector.java b/ratis-common/src/main/java/org/apache/ratis/util/ReferenceCountedLeakDetector.java new file mode 100644 index 0000000000..330060a90f --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/util/ReferenceCountedLeakDetector.java @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Consumer; +import java.util.function.Supplier; + +/** + * A utility to detect leaks from @{@link ReferenceCountedObject}. + */ +public final class ReferenceCountedLeakDetector { + private static final Logger LOG = LoggerFactory.getLogger(ReferenceCountedLeakDetector.class); + // Leak detection is turned off by default. + + private static final AtomicReference FACTORY = new AtomicReference<>(Mode.NONE); + private static final Supplier SUPPLIER + = MemoizedSupplier.valueOf(() -> new LeakDetector(FACTORY.get().name()).start()); + + static Factory getFactory() { + return FACTORY.get(); + } + + public static LeakDetector getLeakDetector() { + return SUPPLIER.get(); + } + + private ReferenceCountedLeakDetector() { + } + + public static synchronized void enable(boolean advanced) { + FACTORY.set(advanced ? Mode.ADVANCED : Mode.SIMPLE); + } + + interface Factory { + ReferenceCountedObject create(V value, Runnable retainMethod, Consumer releaseMethod); + } + + private enum Mode implements Factory { + /** Leak detector is not enable in production to avoid performance impacts. */ + NONE { + @Override + public ReferenceCountedObject create(V value, Runnable retainMethod, Consumer releaseMethod) { + return new Impl<>(value, retainMethod, releaseMethod); + } + }, + /** Leak detector is enabled to detect leaks. This is intended to use in every tests. */ + SIMPLE { + @Override + public ReferenceCountedObject create(V value, Runnable retainMethod, Consumer releaseMethod) { + return new SimpleTracing<>(value, retainMethod, releaseMethod, getLeakDetector()); + } + }, + /** + * Leak detector is enabled to detect leaks and report object creation stacktrace as well as every retain and + * release stacktraces. This has severe impact in performance and only used to debug specific test cases. + */ + ADVANCED { + @Override + public ReferenceCountedObject create(V value, Runnable retainMethod, Consumer releaseMethod) { + return new AdvancedTracing<>(value, retainMethod, releaseMethod, getLeakDetector()); + } + } + } + + private static class Impl implements ReferenceCountedObject { + private final AtomicInteger count; + private final V value; + private final Runnable retainMethod; + private final Consumer releaseMethod; + + Impl(V value, Runnable retainMethod, Consumer releaseMethod) { + this.value = value; + this.retainMethod = retainMethod; + this.releaseMethod = releaseMethod; + count = new AtomicInteger(); + } + + @Override + public V get() { + final int previous = count.get(); + if (previous < 0) { + throw new IllegalStateException("Failed to get: object has already been completely released."); + } else if (previous == 0) { + throw new IllegalStateException("Failed to get: object has not yet been retained."); + } + return value; + } + + final int getCount() { + return count.get(); + } + + @Override + public V retain() { + // n < 0: exception + // n >= 0: n++ + if (count.getAndUpdate(n -> n < 0? n : n + 1) < 0) { + throw new IllegalStateException("Failed to retain: object has already been completely released."); + } + LOG.trace("retain value : {}, count : {}.", value, count); + retainMethod.run(); + return value; + } + + @Override + public boolean release() { + // n <= 0: exception + // n > 1: n-- + // n == 1: n = -1 + final int previous = count.getAndUpdate(n -> n <= 1? -1: n - 1); + if (previous < 0) { + throw new IllegalStateException("Failed to release: object has already been completely released."); + } else if (previous == 0) { + throw new IllegalStateException("Failed to release: object has not yet been retained."); + } + LOG.trace("release value : {}, count : {}.", value, count); + final boolean completedReleased = previous == 1; + releaseMethod.accept(completedReleased); + return completedReleased; + } + } + + private static class SimpleTracing extends Impl { + private final LeakDetector leakDetector; + private final Class valueClass; + private String valueString = null; + private Runnable removeMethod = null; + + SimpleTracing(T value, Runnable retainMethod, Consumer releaseMethod, LeakDetector leakDetector) { + super(value, retainMethod, releaseMethod); + this.valueClass = value.getClass(); + this.leakDetector = leakDetector; + } + + String getTraceString(int count) { + return "(" + valueClass + ", count=" + count + ", value=" + valueString + ")"; + } + + /** @return the leak message if there is a leak; return null if there is no leak. */ + String logLeakMessage() { + final int count = getCount(); + if (count == 0) { // never retain + return null; + } + final String message = "LEAK: " + getTraceString(count); + LOG.warn(message); + return message; + } + + @Override + public synchronized T get() { + try { + return super.get(); + } catch (Exception e) { + throw new IllegalStateException("Failed to get: " + getTraceString(getCount()), e); + } + } + + @Override + public synchronized T retain() { + final T value; + try { + value = super.retain(); + } catch (Exception e) { + throw new IllegalStateException("Failed to retain: " + getTraceString(getCount()), e); + } + if (getCount() == 1) { // this is the first retain + this.removeMethod = leakDetector.track(this, this::logLeakMessage); + this.valueString = value.toString(); + } + return value; + } + + @Override + public synchronized boolean release() { + final boolean released; + try { + released = super.release(); + } catch (Exception e) { + throw new IllegalStateException("Failed to release: " + getTraceString(getCount()), e); + } + + if (released) { + Objects.requireNonNull(removeMethod, () -> "Not yet retained (removeMethod == null): " + valueClass); + removeMethod.run(); + } + return released; + } + } + + private static class AdvancedTracing extends SimpleTracing { + enum Op {CREATION, RETAIN, RELEASE, CURRENT} + + static class Counts { + private final int refCount; + private final int retainCount; + private final int releaseCount; + + Counts() { + this.refCount = 0; + this.retainCount = 0; + this.releaseCount = 0; + } + + Counts(Op op, Counts previous) { + if (op == Op.RETAIN) { + this.refCount = previous.refCount + 1; + this.retainCount = previous.retainCount + 1; + this.releaseCount = previous.releaseCount; + } else if (op == Op.RELEASE) { + this.refCount = previous.refCount - 1; + this.retainCount = previous.retainCount; + this.releaseCount = previous.releaseCount + 1; + } else { + throw new IllegalStateException("Unexpected op: " + op); + } + } + + @Override + public String toString() { + return "refCount=" + refCount + + ", retainCount=" + retainCount + + ", releaseCount=" + releaseCount; + } + } + + static class TraceInfo { + private final int id; + private final Op op; + private final int previousRefCount; + private final Counts counts; + + private final String threadInfo; + private final StackTraceElement[] stackTraces; + private final int newTraceElementIndex; + + TraceInfo(int id, Op op, TraceInfo previous, int previousRefCount) { + this.id = id; + this.op = op; + this.previousRefCount = previousRefCount; + this.counts = previous == null? new Counts() + : op == Op.CURRENT ? previous.counts + : new Counts(op, previous.counts); + + final Thread thread = Thread.currentThread(); + this.threadInfo = "Thread_" + thread.getId() + ":" + thread.getName(); + this.stackTraces = thread.getStackTrace(); + this.newTraceElementIndex = previous == null? stackTraces.length - 1 + : findFirstUnequalFromTail(this.stackTraces, previous.stackTraces); + } + + static int findFirstUnequalFromTail(T[] current, T[] previous) { + int c = current.length == 0 ? 0 : current.length - 1; + for(int p = previous.length - 1; p >= 0; p--, c--) { + if (!previous[p].equals(current[c])) { + return c; + } + } + return -1; + } + + private StringBuilder appendTo(StringBuilder b) { + b.append(op).append("_").append(id) + .append(": previousRefCount=").append(previousRefCount) + .append(", ").append(counts) + .append(", ").append(threadInfo).append("\n"); + final int n = newTraceElementIndex + 1; + int line = 3; + for (; line <= n && line < stackTraces.length; line++) { + b.append(" ").append(stackTraces[line]).append("\n"); + } + if (line < stackTraces.length) { + b.append(" ...\n"); + } + return b; + } + + @Override + public String toString() { + return appendTo(new StringBuilder()).toString(); + } + } + + private final List traceInfos = new ArrayList<>(); + private TraceInfo previous; + + AdvancedTracing(T value, Runnable retainMethod, Consumer releaseMethod, LeakDetector leakDetector) { + super(value, retainMethod, releaseMethod, leakDetector); + addTraceInfo(Op.CREATION, -1); + } + + private synchronized TraceInfo addTraceInfo(Op op, int previousRefCount) { + final TraceInfo current = new TraceInfo(traceInfos.size(), op, previous, previousRefCount); + traceInfos.add(current); + previous = current; + return current; + } + + + @Override + public synchronized T retain() { + final int previousRefCount = getCount(); + final T retained = super.retain(); + final TraceInfo info = addTraceInfo(Op.RETAIN, previousRefCount); + Preconditions.assertSame(getCount(), info.counts.refCount, "refCount"); + return retained; + } + + @Override + public synchronized boolean release() { + final int previousRefCount = getCount(); + final boolean released = super.release(); + final TraceInfo info = addTraceInfo(Op.RELEASE, previousRefCount); + final int count = getCount(); + final int expected = count == -1? 0 : count; + Preconditions.assertSame(expected, info.counts.refCount, "refCount"); + return released; + } + + @Override + synchronized String getTraceString(int count) { + return super.getTraceString(count) + getTraceInfosString(); + } + + private String getTraceInfosString() { + final int n = traceInfos.size(); + final StringBuilder b = new StringBuilder(n << 10).append(" #TraceInfos=").append(n); + TraceInfo last = null; + for (TraceInfo info : traceInfos) { + info.appendTo(b.append("\n")); + last = info; + } + + // append current track info + final TraceInfo current = new TraceInfo(n, Op.CURRENT, last, getCount()); + current.appendTo(b.append("\n")); + + return b.toString(); + } + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/util/ReferenceCountedObject.java b/ratis-common/src/main/java/org/apache/ratis/util/ReferenceCountedObject.java index 3f72f5ffe8..1fc72c3445 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/ReferenceCountedObject.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/ReferenceCountedObject.java @@ -19,10 +19,11 @@ import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; +import java.util.Collection; import java.util.Objects; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; +import java.util.function.Function; /** * A reference-counted object can be retained for later use @@ -44,6 +45,7 @@ * @param The object type. */ public interface ReferenceCountedObject { + /** @return the object. */ T get(); @@ -101,6 +103,32 @@ static ReferenceCountedObject wrap(V value) { return wrap(value, () -> {}, ignored -> {}); } + static ReferenceCountedObject delegateFrom(Collection> fromRefs, V value) { + return new ReferenceCountedObject() { + @Override + public V get() { + return value; + } + + @Override + public V retain() { + fromRefs.forEach(ReferenceCountedObject::retain); + return value; + } + + @Override + public boolean release() { + boolean allReleased = true; + for (ReferenceCountedObject ref : fromRefs) { + if (!ref.release()) { + allReleased = false; + } + } + return allReleased; + } + }; + } + /** * @return a {@link ReferenceCountedObject} of the given value by delegating to this object. */ @@ -125,6 +153,13 @@ public boolean release() { }; } + /** + * @return a {@link ReferenceCountedObject} by apply the given function to this object. + */ + default ReferenceCountedObject apply(Function function) { + return delegate(function.apply(get())); + } + /** * Wrap the given value as a {@link ReferenceCountedObject}. * @@ -140,48 +175,7 @@ static ReferenceCountedObject wrap(V value, Runnable retainMethod, Consum Objects.requireNonNull(retainMethod, "retainMethod == null"); Objects.requireNonNull(releaseMethod, "releaseMethod == null"); - return new ReferenceCountedObject() { - private final AtomicInteger count = new AtomicInteger(); - - @Override - public V get() { - final int previous = count.get(); - if (previous < 0) { - throw new IllegalStateException("Failed to get: object has already been completely released."); - } else if (previous == 0) { - throw new IllegalStateException("Failed to get: object has not yet been retained."); - } - return value; - } - - @Override - public V retain() { - // n < 0: exception - // n >= 0: n++ - if (count.getAndUpdate(n -> n < 0? n : n + 1) < 0) { - throw new IllegalStateException("Failed to retain: object has already been completely released."); - } - - retainMethod.run(); - return value; - } - - @Override - public boolean release() { - // n <= 0: exception - // n > 1: n-- - // n == 1: n = -1 - final int previous = count.getAndUpdate(n -> n <= 1? -1: n - 1); - if (previous < 0) { - throw new IllegalStateException("Failed to release: object has already been completely released."); - } else if (previous == 0) { - throw new IllegalStateException("Failed to release: object has not yet been retained."); - } - final boolean completedReleased = previous == 1; - releaseMethod.accept(completedReleased); - return completedReleased; - } - }; + return ReferenceCountedLeakDetector.getFactory().create(value, retainMethod, releaseMethod); } /** The same as wrap(value, retainMethod, ignored -> releaseMethod.run()). */ diff --git a/ratis-common/src/main/java/org/apache/ratis/util/ReflectionUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/ReflectionUtils.java index 6f05c9c530..a8c217b6a1 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/ReflectionUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/ReflectionUtils.java @@ -20,11 +20,17 @@ package org.apache.ratis.util; - import java.lang.ref.WeakReference; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.WeakHashMap; import java.util.concurrent.ConcurrentHashMap; /** diff --git a/ratis-common/src/main/java/org/apache/ratis/util/SizeInBytes.java b/ratis-common/src/main/java/org/apache/ratis/util/SizeInBytes.java index 683f0da628..a2a80697f8 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/SizeInBytes.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/SizeInBytes.java @@ -33,14 +33,14 @@ public static SizeInBytes valueOf(long size) { } public static SizeInBytes valueOf(long n, TraditionalBinaryPrefix prefix) { - final long size = Objects.requireNonNull(prefix, "prefix = null").toLong(n); + final long size = Objects.requireNonNull(prefix, "prefix == null").toLong(n); final String input = n + " " + prefix.getSymbol(); final String description = input + " (=" + size + ")"; return new SizeInBytes(size, input, description); } public static SizeInBytes valueOf(String input) { - input = Objects.requireNonNull(input, "input = null").trim(); + input = Objects.requireNonNull(input, "input == null").trim(); final int last = input.length() - 1; final String s = "b".equalsIgnoreCase(input.substring(last))? @@ -83,4 +83,14 @@ public String getInput() { public String toString() { return description; } + + @Override + public boolean equals(Object obj) { + return obj instanceof SizeInBytes && size == ((SizeInBytes)obj).size; + } + + @Override + public int hashCode() { + return Long.hashCode(size); + } } diff --git a/ratis-common/src/main/java/org/apache/ratis/util/SlidingWindow.java b/ratis-common/src/main/java/org/apache/ratis/util/SlidingWindow.java index 732e3d890e..a41b4b136a 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/SlidingWindow.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/SlidingWindow.java @@ -28,7 +28,6 @@ import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.TimeUnit; import java.util.function.Consumer; import java.util.function.LongFunction; @@ -52,6 +51,9 @@ interface Request { boolean hasReply(); void fail(Throwable e); + + default void release() { + } } interface ClientSideRequest extends Request { @@ -70,9 +72,6 @@ class RequestMap, REPLY> implements Iterable replyMetho + " will NEVER be processed; request = " + r); r.fail(e); replyMethod.accept(r); + r.release(); } tail.clear(); putNewRequest(end); } - void clear() { + void clear(long nextToProcess) { LOG.debug("close {}", this); + final SortedMap tail = requests.tailMap(nextToProcess); + for (REQUEST r : tail.values()) { + r.release(); + } requests.clear(); } @@ -448,19 +452,26 @@ public synchronized String toString() { /** A request (or a retry) arrives (may be out-of-order except for the first request). */ public synchronized void receivedRequest(REQUEST request, Consumer processingMethod) { final long seqNum = request.getSeqNum(); + final boolean accepted; if (nextToProcess == -1 && (request.isFirstRequest() || seqNum == 0)) { nextToProcess = seqNum; requests.putNewRequest(request); LOG.debug("Received seq={} (first request), {}", seqNum, this); + accepted = true; + } else if (request.getSeqNum() < nextToProcess) { + LOG.debug("Received seq={} < nextToProcess {}, {}", seqNum, nextToProcess, this); + accepted = false; } else { final boolean isRetry = requests.putIfAbsent(request); LOG.debug("Received seq={}, isRetry? {}, {}", seqNum, isRetry, this); - if (isRetry) { - return; - } + accepted = !isRetry; } - processRequestsFromHead(processingMethod); + if (accepted) { + processRequestsFromHead(processingMethod); + } else { + request.release(); + } } private void processRequestsFromHead(Consumer processingMethod) { @@ -469,6 +480,7 @@ private void processRequestsFromHead(Consumer processingMethod) { return; } else if (r.getSeqNum() == nextToProcess) { processingMethod.accept(r); + r.release(); nextToProcess++; } } @@ -514,7 +526,7 @@ public synchronized boolean endOfRequests(Consumer replyMethod) { @Override public void close() { - requests.clear(); + requests.clear(nextToProcess); } } } \ No newline at end of file diff --git a/ratis-common/src/main/java/org/apache/ratis/util/StringUtils.java b/ratis-common/src/main/java/org/apache/ratis/util/StringUtils.java index 0f3266e657..50a256e5f8 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/StringUtils.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/StringUtils.java @@ -81,6 +81,23 @@ public static String format(final String format, final Object... objects) { return String.format(Locale.ENGLISH, format, objects); } + public static String bytes2ShortString(ByteString bytes) { + return bytes.isValidUtf8() ? bytes2ShortUtf8String(bytes) : StringUtils.bytes2HexShortString(bytes); + } + + public static String bytes2ShortUtf8String(ByteString bytes) { + final String utf8 = bytes.toStringUtf8(); + if (utf8.isEmpty()) { + return ""; + } + final int length = utf8.length(); + if (length <= 10) { + return utf8; + } + // return only the first 10 characters + return utf8.substring(0, 10) + "...(length=" + length + ")"; + } + public static String bytes2HexShortString(ByteString bytes) { final int size = bytes.size(); if (size == 0) { diff --git a/ratis-common/src/main/java/org/apache/ratis/util/TimeDuration.java b/ratis-common/src/main/java/org/apache/ratis/util/TimeDuration.java index 2abdfdfaca..2a520083e0 100644 --- a/ratis-common/src/main/java/org/apache/ratis/util/TimeDuration.java +++ b/ratis-common/src/main/java/org/apache/ratis/util/TimeDuration.java @@ -144,7 +144,7 @@ public static long parse(String timeString, TimeUnit targetUnit) { * @return a {@link TimeDuration} in the target unit. */ public static TimeDuration valueOf(String timeString, TimeUnit defaultUnit) { - Objects.requireNonNull(timeString, "timeString = null"); + Objects.requireNonNull(timeString, "timeString == null"); final String lower = timeString.trim().replace("_", "").toLowerCase(); for(Abbreviation a : Abbreviation.values()) { for(String s : a.getSymbols()) { @@ -172,7 +172,7 @@ public static TimeDuration valueOf(long duration, TimeUnit unit) { private TimeDuration(long duration, TimeUnit unit) { this.duration = duration; - this.unit = Objects.requireNonNull(unit, "unit = null"); + this.unit = Objects.requireNonNull(unit, "unit == null"); } /** @return the duration value. */ diff --git a/ratis-common/src/main/java/org/apache/ratis/util/VersionInfo.java b/ratis-common/src/main/java/org/apache/ratis/util/VersionInfo.java new file mode 100644 index 0000000000..976b2ad6e1 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/util/VersionInfo.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.EnumMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Properties; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.function.BiConsumer; +import java.util.function.Consumer; + +/** + * This class is for the resource generated by hadoop-maven-plugins:version-info. + *

+ * This class is immutable. + */ +public final class VersionInfo { + static final Logger LOG = LoggerFactory.getLogger(VersionInfo.class); + + private static final String RATIS_VERSION_PROPERTIES = "ratis-version.properties"; + private static final String UNKNOWN = ""; + private static final String FORMAT = " %20s: %s"; + + private enum SoftwareInfo { + // the ordering is the output ordering + NAME, VERSION, REVISION; + + static SoftwareInfo parse(String key) { + for (SoftwareInfo info : SoftwareInfo.values()) { + if (info.name().toLowerCase().equals(key)) { + return info; + } + } + return null; + } + } + + private enum RuntimeInfo { + // the ordering is the output ordering + JAVA, USER, MEMORY; + + static final InfoMap MAP; + + static { + final EnumMap map = new EnumMap<>(RuntimeInfo.class); + final Properties properties = System.getProperties(); + map.put(JAVA, properties.getProperty("java.vm.name") + " " + properties.getProperty("java.runtime.version")); + final Runtime r = Runtime.getRuntime(); + map.put(MEMORY, "max: " + TraditionalBinaryPrefix.long2String(r.maxMemory()) + + ", total: " + TraditionalBinaryPrefix.long2String(r.totalMemory()) + + ", free: " + TraditionalBinaryPrefix.long2String(r.freeMemory())); + map.put(USER, properties.getProperty("user.name")); + MAP = new InfoMap<>(map); + } + } + + private static class InfoMap> { + private final Map map; + + InfoMap(EnumMap map) { + this.map = Collections.unmodifiableMap(map); + } + + String getOrDefault(INFO info) { + return map.getOrDefault(info, UNKNOWN); + } + + String format(INFO info) { + return String.format(FORMAT, info.name().toLowerCase(), getOrDefault(info)); + } + } + + public static VersionInfo load(Class clazz) { + final Properties properties = new Properties(); + + try (InputStream in = clazz.getClassLoader().getResourceAsStream(RATIS_VERSION_PROPERTIES)) { + if (in != null) { + properties.load(in); + } else { + LOG.warn("Resource '{}' not found for {}", RATIS_VERSION_PROPERTIES, clazz); + } + } catch (IOException e) { + LOG.warn("Failed to load resource '{}' for {}", RATIS_VERSION_PROPERTIES, clazz, e); + } + return new VersionInfo(clazz, properties); + } + + private final Class clazz; + private final InfoMap runtimeInfos = RuntimeInfo.MAP; + private final InfoMap softwareInfos; + private final Map otherInfos; + + private VersionInfo(Class clazz, Properties properties) { + this.clazz = Objects.requireNonNull(clazz, "clazz == null"); + + final EnumMap softwareInfoMap = new EnumMap<>(SoftwareInfo.class); + final Map others = new LinkedHashMap<>(); // preserve insertion order + for (Map.Entry e : properties.entrySet()) { + final String key = e.getKey().toString(); + final String value = e.getValue().toString(); + final SoftwareInfo k = SoftwareInfo.parse(key); + if (k != null) { + softwareInfoMap.put(k, value); + } else { + others.put(key, value); + } + } + + this.softwareInfos = new InfoMap<>(softwareInfoMap); + this.otherInfos = Collections.unmodifiableMap(others); + } + + public void printStartupMessages(Object name, Consumer log) { + Objects.requireNonNull(name, "name == null"); + log.accept(String.format("Starting %s -- %s %s", + softwareInfos.getOrDefault(SoftwareInfo.NAME), clazz.getSimpleName(), name)); + final SoftwareInfo[] softwareInfoValues = SoftwareInfo.values(); + for(int i = 1; i < softwareInfoValues.length; i++) { + log.accept(softwareInfos.format(softwareInfoValues[i])); + } + for(RuntimeInfo runtimeInfo : RuntimeInfo.values()) { + log.accept(runtimeInfos.format(runtimeInfo)); + } + for (Map.Entry e : otherInfos.entrySet()) { + log.accept(String.format(FORMAT, e.getKey(), e.getValue())); + } + } + + static void printSystemProperties(BiConsumer out) { + final SortedMap sortedMap = new TreeMap<>(); + for(Map.Entry e : System.getProperties().entrySet()) { + sortedMap.put(e.getKey().toString(), e.getValue()); + } + sortedMap.forEach(out); + } + + /** + * Get the current ratis version. + * @return the current ratis version string. + */ + public static String getSoftwareInfoVersion() { + return VersionInfo.load(VersionInfo.class).softwareInfos.getOrDefault(SoftwareInfo.VERSION); + } + + public static void main(String[] args) { + printSystemProperties((key, value) -> System.out.printf("%-40s = %s%n", key, value)); + + VersionInfo.load(VersionInfo.class).printStartupMessages(":", System.out::println); + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/util/WeakValueCache.java b/ratis-common/src/main/java/org/apache/ratis/util/WeakValueCache.java new file mode 100644 index 0000000000..5c6fcd3683 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/util/WeakValueCache.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; + +import static org.apache.ratis.util.BiWeakValueCache.newMap; + +/** + * Weak Value Cache: {@link K} -> {@link V}. + *

+ * Note that the cached values are weakly referenced. + * A cached value could be garage-collected (i.e. evicted from the cache) + * when there are no external (strong) references. + *

+ * For key types with two components, use {@link BiWeakValueCache}. + * + * @param the type of the keys. + * @param the type to be cached values. + */ +public final class WeakValueCache { + private final String keyName; + private final String name; + + /** For constructing a value from a key. */ + private final Function constructor; + /** Count the number of values constructed. */ + private final AtomicInteger constructionCount = new AtomicInteger(0); + + /** Map: {@link K} -> {@link V}. */ + private final ConcurrentMap map = newMap(); + + /** + * Create a cache for mapping {@link K} keys to {@link V} values. + * + * @param keyName the name of the key. + * @param constructor for constructing {@link V} values. + */ + public WeakValueCache(String keyName, Function constructor) { + this.keyName = keyName; + this.name = keyName + "-cache"; + this.constructor = constructor; + } + + private V construct(K key) { + final V constructed = constructor.apply(key); + Objects.requireNonNull(constructed, "constructed == null"); + constructionCount.incrementAndGet(); + return constructed; + } + + /** + * If the given key is in the cache, return its cached values. + * Otherwise, create a new value, put it in the cache and then return it. + */ + public V getOrCreate(K key) { + Objects.requireNonNull(key, () -> keyName + " (key) == null"); + return map.computeIfAbsent(key, this::construct); + } + + List getValues() { + return new ArrayList<>(map.values()); + } + + @Override + public String toString() { + return name; + } +} diff --git a/ratis-common/src/test/java/org/apache/ratis/BaseTest.java b/ratis-common/src/test/java/org/apache/ratis/BaseTest.java index fb34d64f1e..a8509c239a 100644 --- a/ratis-common/src/test/java/org/apache/ratis/BaseTest.java +++ b/ratis-common/src/test/java/org/apache/ratis/BaseTest.java @@ -18,30 +18,27 @@ package org.apache.ratis; import org.apache.ratis.conf.ConfUtils; -import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.util.ExitUtils; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.ReferenceCountedLeakDetector; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.StringUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedRunnable; -import org.junit.After; -import org.junit.Rule; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.Timeout; -import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.event.Level; import java.io.File; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import java.lang.reflect.Method; import java.util.Objects; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; @@ -73,34 +70,29 @@ public void setFirstException(Throwable e) { } } - public List getPeersWithPriority(List peers, RaftPeer suggestedLeader) { - List peersWithPriority = new ArrayList<>(); - for (int i = 0; i < peers.size(); i++) { - RaftPeer peer = peers.get(i); - final int priority = peer.equals(suggestedLeader)? 2: 1; - peersWithPriority.add( - RaftPeer.newBuilder(peer).setPriority(priority).build()); - } - return peersWithPriority; - } - - - /* - * Junit 4 reference will be removed and the code will be refactored once - * all the unit tests are migrated to Junit 5. - */ - private String testCaseName; @BeforeEach public void setup(TestInfo testInfo) { - testCaseName = testInfo.getTestMethod() - .orElseThrow(() -> new RuntimeException("Exception while getting test name.")) - .getName(); + checkAssumptions(); + + final Method method = testInfo.getTestMethod().orElse(null); + testCaseName = testInfo.getTestClass().orElse(getClass()).getSimpleName() + + "." + (method == null? null : method.getName()); + } + + @BeforeEach + public void checkAssumptions() { + final int leaks = ReferenceCountedLeakDetector.getLeakDetector().getLeakCount(); + Assumptions.assumeFalse(0 < leaks, () -> "numLeaks " + leaks + " > 0"); + + final Throwable first = firstException.get(); + Assumptions.assumeTrue(first == null, () -> "Already failed with " + first); + + final Throwable exited = ExitUtils.getFirstExitException(); + Assumptions.assumeTrue(exited == null, () -> "Already exited with " + exited); } - // @After annotation is retained to support junit 4 tests. - @After @AfterEach public void assertNoFailures() { final Throwable e = firstException.get(); @@ -111,20 +103,7 @@ public void assertNoFailures() { ExitUtils.assertNotTerminated(); } - // Retained to support junit 4 tests. - @Rule - public final org.junit.rules.Timeout globalTimeout = new org.junit.rules.Timeout( - getGlobalTimeoutSeconds(), TimeUnit.SECONDS ); - - // Retained to support junit 4 tests. - @Rule - public final TestName testName = new TestName(); - - public int getGlobalTimeoutSeconds() { - return 100; - } - - private static final Supplier rootTestDir = JavaUtils.memoize( + private static final Supplier ROOT_TEST_DIR = JavaUtils.memoize( () -> JavaUtils.callAsUnchecked(() -> { final File dir = new File(System.getProperty("test.build.data", "target/test/data"), Integer.toHexString(ThreadLocalRandom.current().nextInt())); @@ -138,7 +117,7 @@ public int getGlobalTimeoutSeconds() { public static File getRootTestDir() { - return rootTestDir.get(); + return ROOT_TEST_DIR.get(); } public File getClassTestDir() { @@ -146,9 +125,7 @@ public File getClassTestDir() { } public File getTestDir() { - // This will work for both junit 4 and 5. - final String name = testCaseName != null ? testCaseName : testName.getMethodName(); - return new File(getClassTestDir(), name); + return new File(getClassTestDir(), testCaseName); } @SafeVarargs diff --git a/ratis-common/src/test/java/org/apache/ratis/JUnit5TestExecutionListener.java b/ratis-common/src/test/java/org/apache/ratis/JUnit5TestExecutionListener.java new file mode 100644 index 0000000000..535d8968cb --- /dev/null +++ b/ratis-common/src/test/java/org/apache/ratis/JUnit5TestExecutionListener.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis; + +import org.apache.ratis.util.JavaUtils; +import org.junit.platform.engine.TestExecutionResult; +import org.junit.platform.launcher.TestExecutionListener; +import org.junit.platform.launcher.TestIdentifier; + +import java.io.PrintStream; +import java.util.concurrent.TimeoutException; + +/** + * A {@link TestExecutionListener} to dump all threads after a test timeout failure. + */ +public class JUnit5TestExecutionListener implements TestExecutionListener { + private final PrintStream out = System.out; + + @Override + public void executionFinished(TestIdentifier id, TestExecutionResult result) { + final Throwable timeoutException = getTimeoutException(result); + if (timeoutException != null) { + out.format("%n%s %s failed%n", JavaUtils.date(), id.getDisplayName()); + timeoutException.printStackTrace(out); + JavaUtils.dumpAllThreads(out::println); + } + } + + private static Throwable getTimeoutException(TestExecutionResult result) { + if (result == null) { + return null; + } + final Throwable throwable = result.getThrowable().orElse(null); + return throwable instanceof TimeoutException? throwable : null; + } +} diff --git a/ratis-common/src/test/java/org/apache/ratis/JUnitRunListener.java b/ratis-common/src/test/java/org/apache/ratis/JUnitRunListener.java deleted file mode 100644 index 144c8069c6..0000000000 --- a/ratis-common/src/test/java/org/apache/ratis/JUnitRunListener.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.ratis; - -import org.apache.ratis.util.JavaUtils; -import org.junit.internal.runners.statements.FailOnTimeout; -import org.junit.runner.notification.Failure; -import org.junit.runner.notification.RunListener; -import org.junit.runners.model.Statement; -import org.junit.runners.model.TestTimedOutException; - -import java.io.PrintStream; -import java.util.concurrent.TimeUnit; - -/** - * A {@link RunListener} to dump all threads after a test timeout failure. - */ -public class JUnitRunListener extends RunListener { - private static final Throwable TIMEOUT_EXCEPTION = getTimeoutException(); - private static final String TIMEOUT_EXCEPTION_PREFIX; - - private static Throwable getTimeoutException() { - final FailOnTimeout f = FailOnTimeout.builder().withTimeout(1, TimeUnit.NANOSECONDS).build(new Statement() { - @Override - public void evaluate() throws InterruptedException { - Thread.sleep(1000); - } - }); - try { - f.evaluate(); - } catch(Throwable throwable) { - return throwable; - } - throw new IllegalStateException("Failed to getTimeoutException"); - } - - static { - final String message = JUnitRunListener.TIMEOUT_EXCEPTION.getMessage(); - TIMEOUT_EXCEPTION_PREFIX = message.substring(0, message.indexOf('1')); - } - - private final PrintStream out = System.out; - - @Override - public void testFailure(Failure failure) { - final Throwable timeoutException = getTimeoutException(failure); - if (timeoutException != null) { - out.format("%n%s ", JavaUtils.date()); - timeoutException.printStackTrace(out); - JavaUtils.dumpAllThreads(out::println); - } - } - - private static Throwable getTimeoutException(Failure failure) { - if (failure == null) { - return null; - } - final Throwable throwable = failure.getException(); - if (throwable.getClass() != TIMEOUT_EXCEPTION.getClass()) { - return null; - } - final String message = throwable.getMessage(); - if (message == null || !message.startsWith(TIMEOUT_EXCEPTION_PREFIX)) { - return null; - } - return throwable; - } - - public static void main(String[] args) { - final JUnitRunListener listener = new JUnitRunListener(); - listener.out.println("TIMEOUT_EXCEPTION_PREFIX = '" + TIMEOUT_EXCEPTION_PREFIX + "'"); - TIMEOUT_EXCEPTION.printStackTrace(listener.out); - - listener.testFailure(new Failure(null, new TestTimedOutException(999, TimeUnit.MILLISECONDS))); - } -} diff --git a/ratis-common/src/test/java/org/apache/ratis/io/TestMD5.java b/ratis-common/src/test/java/org/apache/ratis/io/TestMD5.java new file mode 100644 index 0000000000..7e7c29b642 --- /dev/null +++ b/ratis-common/src/test/java/org/apache/ratis/io/TestMD5.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.io; + +import org.apache.ratis.util.StringUtils; +import org.junit.jupiter.api.Test; + +import java.nio.ByteBuffer; +import java.util.concurrent.ThreadLocalRandom; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class TestMD5 { + + @Test + void testMD5Hash() { + final byte[] digest = new byte[MD5Hash.MD5_LENGTH]; + final ThreadLocalRandom random = ThreadLocalRandom.current(); + + for(int i = 0; i < 1000; i++) { + random.nextBytes(digest); + final MD5Hash md5 = MD5Hash.newInstance(digest); + + // test hashCode + final int expectedHashCode = oldQuarterDigest(digest); + assertEquals(expectedHashCode, md5.hashCode()); + + // test toString + final String expectedString = StringUtils.bytes2HexString(digest); + assertEquals(expectedString, md5.toString()); + assertEquals(expectedString, MD5Hash.digestToString(digest)); + + // test newInstance(String) + assertEquals(md5, MD5Hash.newInstance(expectedString.toLowerCase())); + assertEquals(md5, MD5Hash.newInstance(expectedString.toUpperCase())); + + // test getDigest + final ByteBuffer expectedByteBuffer = ByteBuffer.wrap(digest); + assertEquals(expectedByteBuffer, md5.getDigest()); + } + } + + /** + * Return a 32-bit digest of the MD5. + * @return the first 4 bytes of the md5 + */ + private static int oldQuarterDigest(byte[] digest) { + int value = 0; + for (int i = 0; i < 4; i++) { + value |= ((digest[i] & 0xff) << (8*(3-i))); + } + return value; + } +} \ No newline at end of file diff --git a/ratis-common/src/test/java/org/apache/ratis/retry/TestRetryPolicyParse.java b/ratis-common/src/test/java/org/apache/ratis/retry/TestRetryPolicyParse.java new file mode 100644 index 0000000000..17edb38682 --- /dev/null +++ b/ratis-common/src/test/java/org/apache/ratis/retry/TestRetryPolicyParse.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.retry; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class TestRetryPolicyParse { + @Test + void testParseExponentialBackoffRetry() { + final RetryPolicy policy = RetryPolicy.parse("ExponentialBackoffRetry,100ms,5s,100"); + assertInstanceOf(ExponentialBackoffRetry.class, policy); + } + + @Test + void testParseMultipleLinearRandomRetryWithClassname() { + final MultipleLinearRandomRetry expected = + MultipleLinearRandomRetry.parseCommaSeparated("1ms,10,1s,20,5s,1000"); + final RetryPolicy actual = + RetryPolicy.parse("MultipleLinearRandomRetry,1ms,10,1s,20,5s,1000"); + assertEquals(expected, actual); + } + + @Test + void testParseMultipleLinearRandomRetryWithoutClassname() { + final MultipleLinearRandomRetry expected = + MultipleLinearRandomRetry.parseCommaSeparated("1ms,10,1s,20,5s,1000"); + final RetryPolicy actual = RetryPolicy.parse("1ms,10,1s,20,5s,1000"); + assertEquals(expected, actual); + } + + @Test + void testParseUnknownClassnameThrows() { + assertThrows(IllegalArgumentException.class, + () -> RetryPolicy.parse("UnknownRetry,1ms,10")); + } + + @Test + void testParseMultipleLinearRandomRetryMissingParamsThrows() { + assertThrows(IllegalArgumentException.class, + () -> RetryPolicy.parse("MultipleLinearRandomRetry")); + } + + @Test + void testParseNonLegacyUnknownFirstTokenThrows() { + assertThrows(IllegalArgumentException.class, + () -> RetryPolicy.parse("not_a_duration,1ms,10")); + } +} diff --git a/ratis-common/src/test/java/org/apache/ratis/test/tag/Flaky.java b/ratis-common/src/test/java/org/apache/ratis/test/tag/Flaky.java new file mode 100644 index 0000000000..2d8c63030a --- /dev/null +++ b/ratis-common/src/test/java/org/apache/ratis/test/tag/Flaky.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.test.tag; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +import org.junit.jupiter.api.Tag; + +/** + * Annotation to mark JUnit5 test classes or methods that exhibit intermittent + * issues. These are run separately from the normal tests in CI. In case of + * failure they may be repeated a few times. + * Usage: @Flaky("RATIS-123") + */ +@Target({ ElementType.TYPE, ElementType.METHOD }) +@Retention(RetentionPolicy.RUNTIME) +@Tag("flaky") +public @interface Flaky { + /** + * The issue(s) tracking the flaky test. + */ + String[] value(); +} diff --git a/ratis-common/src/test/java/org/apache/ratis/test/tag/FlakyTest.java b/ratis-common/src/test/java/org/apache/ratis/test/tag/FlakyTest.java new file mode 100644 index 0000000000..f43bd16985 --- /dev/null +++ b/ratis-common/src/test/java/org/apache/ratis/test/tag/FlakyTest.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.test.tag; + +/** + * Interface to mark JUnit4 test classes or methods that exhibit intermittent + * issues. These are run separately from the normal tests in CI. In case of + * failure they may be repeated a few times. + * Usage: @Category(FlakyTest.class) @Flaky("RATIS-123") + */ +public interface FlakyTest { + // category marker +} diff --git a/ratis-common/src/test/java/org/apache/ratis/trace/TestTraceUtils.java b/ratis-common/src/test/java/org/apache/ratis/trace/TestTraceUtils.java new file mode 100644 index 0000000000..71f0f25643 --- /dev/null +++ b/ratis-common/src/test/java/org/apache/ratis/trace/TestTraceUtils.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.trace; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.sdk.testing.junit5.OpenTelemetryExtension; +import io.opentelemetry.sdk.trace.data.SpanData; +import org.apache.ratis.protocol.RaftPeerId; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.util.List; +import java.util.concurrent.CompletableFuture; + +public class TestTraceUtils { + + @RegisterExtension + private static final OpenTelemetryExtension openTelemetryExtension = + OpenTelemetryExtension.create(); + + private void runTraceAsyncAndAssertClientSpan(boolean tracingEnabled, boolean expectClientSpan) + throws Exception { + TraceUtils.setTracerWhenEnabled(tracingEnabled); + TraceClient.asyncSend( + () -> CompletableFuture.completedFuture("ok"), + null, + RaftPeerId.valueOf("s0") + ).get(); + + List spans = openTelemetryExtension.getSpans(); + boolean hasClientSpan = spans.stream().anyMatch(s -> s.getKind() == SpanKind.CLIENT); + if (expectClientSpan) { + assertTrue(hasClientSpan, "Expected CLIENT span from traceAsyncRpcSend, got: " + spans); + } else { + assertFalse(hasClientSpan, "Expected no CLIENT span when tracing disabled, got: " + spans); + } + } + + @Test + public void testTraceAsyncRpcSendCreatesClientSpan() throws Exception { + runTraceAsyncAndAssertClientSpan(true, true); + } + + @Test + public void testTraceAsyncRpcSendCreatesClientSpanDisabled() throws Exception { + runTraceAsyncAndAssertClientSpan(false, false); + } +} diff --git a/ratis-common/src/test/java/org/apache/ratis/util/TestFileUtils.java b/ratis-common/src/test/java/org/apache/ratis/util/TestFileUtils.java index 3171756b87..8f64ff294c 100644 --- a/ratis-common/src/test/java/org/apache/ratis/util/TestFileUtils.java +++ b/ratis-common/src/test/java/org/apache/ratis/util/TestFileUtils.java @@ -26,6 +26,32 @@ /** Test methods of {@link FileUtils}. */ public class TestFileUtils extends BaseTest { + @Test + public void testIsAncestor() throws IOException { + runTestIsAncestor(true, "/a", "/a/b"); + runTestIsAncestor(true, "/a", "/a/"); + runTestIsAncestor(true, "/a", "/a"); + runTestIsAncestor(true, "a", "a/b"); + runTestIsAncestor(true, "a", "a/"); + runTestIsAncestor(true, "a", "a"); + + runTestIsAncestor(false, "/a", "/c"); + runTestIsAncestor(false, "/a", "/abc"); + runTestIsAncestor(false, "/a", "/a/../c"); + runTestIsAncestor(false, "a", "a/../c"); + runTestIsAncestor(false, "a", "/c"); + } + + static void runTestIsAncestor(boolean expected, String ancestor, String path) throws IOException { + final boolean computed = isAncestor(ancestor, path); + System.out.printf("isAncestor(%2s, %-9s)? %s, expected? %s%n", + ancestor, path, computed, expected); + Assertions.assertSame(expected, computed); + } + + static boolean isAncestor(String ancestor, String path) throws IOException { + return FileUtils.isAncestor(new File(ancestor), new File(path)); + } @Test public void testRenameToCorrupt() throws IOException { diff --git a/ratis-metrics-default/src/test/java/org/apache/ratis/metrics/impl/TestRefCountingMap.java b/ratis-common/src/test/java/org/apache/ratis/util/TestRefCountingMap.java similarity index 94% rename from ratis-metrics-default/src/test/java/org/apache/ratis/metrics/impl/TestRefCountingMap.java rename to ratis-common/src/test/java/org/apache/ratis/util/TestRefCountingMap.java index be6eb3037c..3c3cbdc6ee 100644 --- a/ratis-metrics-default/src/test/java/org/apache/ratis/metrics/impl/TestRefCountingMap.java +++ b/ratis-common/src/test/java/org/apache/ratis/util/TestRefCountingMap.java @@ -15,13 +15,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.ratis.metrics.impl; +package org.apache.ratis.util; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.Arrays; import java.util.Collection; import java.util.Set; @@ -128,7 +129,7 @@ public void testKeySet() { Set keys = map.keySet(); assertEquals(3, keys.size()); - Lists.newArrayList("foo", "bar", "baz").forEach(v -> assertTrue(keys.contains(v))); + Arrays.asList("foo", "bar", "baz").forEach(v -> assertTrue(keys.contains(v))); } @Test @@ -141,7 +142,7 @@ public void testValues() { Collection values = map.values(); assertEquals(3, values.size()); - Lists.newArrayList("foovalue", "foovalue3", "foovalue4") + Arrays.asList("foovalue", "foovalue3", "foovalue4") .forEach(v -> assertTrue(values.contains(v))); } } diff --git a/ratis-common/src/test/resources/META-INF/services/org.junit.platform.launcher.TestExecutionListener b/ratis-common/src/test/resources/META-INF/services/org.junit.platform.launcher.TestExecutionListener new file mode 100644 index 0000000000..71636f29a8 --- /dev/null +++ b/ratis-common/src/test/resources/META-INF/services/org.junit.platform.launcher.TestExecutionListener @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +org.apache.ratis.JUnit5TestExecutionListener \ No newline at end of file diff --git a/ratis-docs/pom.xml b/ratis-docs/pom.xml index 47f1175a46..894b6a52e4 100644 --- a/ratis-docs/pom.xml +++ b/ratis-docs/pom.xml @@ -20,10 +20,29 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-docs Apache Ratis Documentation jar + + + + true + + + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test + + + diff --git a/ratis-docs/src/site/markdown/cli.md b/ratis-docs/src/site/markdown/cli.md index 60958fc7ed..ab9f89982f 100644 --- a/ratis-docs/src/site/markdown/cli.md +++ b/ratis-docs/src/site/markdown/cli.md @@ -182,5 +182,5 @@ It has the following subcommands: ### local raftMetaConf Generate a new raft-meta.conf file based on original raft-meta.conf and new peers, which is used to move a raft node to a new node. ``` -$ ratis sh local raftMetaConf -peers -path +$ ratis sh local raftMetaConf -peers <[P0_ID|]P0_HOST:P0_PORT,[P1_ID|]P1_HOST:P1_PORT,[P2_ID|]P2_HOST:P2_PORT> -path ``` diff --git a/ratis-docs/src/site/markdown/concept/advanced.md b/ratis-docs/src/site/markdown/concept/advanced.md new file mode 100644 index 0000000000..58b4f8e3a5 --- /dev/null +++ b/ratis-docs/src/site/markdown/concept/advanced.md @@ -0,0 +1,85 @@ + +# Introduction to Apache Ratis + +Previous: [Operations and Management](operations.md) | Top:[Overview of Raft and Ratis](index.md) + +## Section 5: Advanced Topics + +* [Scaling with Multi-Raft Groups](#scaling-with-multi-raft-groups) + +### Scaling with Multi-Raft Groups + +As your application grows, you may find that a single Raft group becomes a bottleneck. This is +where Ratis's multi-group capability becomes valuable. + +#### Understanding Multi-Raft + +Multi-Raft is an implementation pattern that Ratis supports for scaling beyond the limits of a +single Raft group. In a multi-Raft setup, you run multiple independent Raft groups, each +handling a subset of your application's operations. Each group operates independently with its +own leader election, consensus, log, and state machine. + +#### What is a Raft Group in Ratis? + +In Ratis terminology, a "Raft Group" is a collection of servers that participate in a single +Raft cluster. Each group has a unique RaftGroupId (a UUID) that distinguishes it from other groups. +Each group consists of a set of RaftPeer objects representing the servers that participate in that +group's consensus. + +#### When to Use Multiple Groups + +Consider using multiple Raft groups when a single group cannot handle the required throughput, +when you can logically partition your data or operations (such as having one group per geographic +region, per customer tenant, or per data type), when you need better fault isolation (if one +group fails, other groups can continue operating), or when you need different operational +characteristics for different parts of your system. + +#### Implementation Considerations + +A single RaftServer instance can participate in multiple groups simultaneously. Each group gets +its own "Division" within the server, with its own state machine and storage. Since groups don't +coordinate at the Raft level, your application must handle any cross-group consistency +requirements through distributed transactions, saga patterns, or eventual consistency approaches. + +To use multi-Raft effectively, you need to partition your application state. Horizontal +partitioning involves partitioning data across groups based on some key (e.g., user ID hash, +geographic region). Functional partitioning assigns different groups to handle different types +of operations or services. Hierarchical partitioning uses a tree-like structure where +higher-level groups coordinate lower-level groups. + +Clients need to know which group to send requests to through client-side routing logic, a proxy +layer that routes requests, or consistent hashing schemes. + +#### Trade-offs and Limitations + +Multi-group setups are significantly more complex than single-group setups. Maintaining +consistency across groups requires application-level coordination, which can be complex and +error-prone. More groups mean more leaders to monitor, more logs to manage, and more complex +failure scenarios. Each group consumes resources, so there's a practical limit to the number of +groups per server. + +#### Best Practices + +Begin with a single group and only move to multiple groups when you have a clear scalability +need. Design your data model and operations to be partition-friendly from the start if you +anticipate needing multiple groups. Implement comprehensive monitoring for all groups, including +leader stability, replication lag, and resource usage. + +Multi-Raft groups are a powerful scaling tool, but they should be used judiciously. The added +complexity is only worthwhile when you have clear scalability requirements that cannot be met +with a single Raft cluster. diff --git a/ratis-docs/src/site/markdown/concept/core-concepts.md b/ratis-docs/src/site/markdown/concept/core-concepts.md new file mode 100644 index 0000000000..a97e6e91ca --- /dev/null +++ b/ratis-docs/src/site/markdown/concept/core-concepts.md @@ -0,0 +1,200 @@ + +# Introduction to Apache Ratis + +Previous: [Overview of Raft and Ratis](index.md) | Top:[Overview of Raft and Ratis](index.md) + +## Section 2: Core Concepts + +* [The Raft Log](#the-raft-log---foundation-of-consensus) +* [The State Machine](#the-state-machine---your-applications-heart) +* [Consistency Models and Read Patterns](#consistency-models-and-read-patterns) + +### The Raft Log - Foundation of Consensus + +The Raft log is the central data structure that makes distributed consensus possible. Each server +in a Raft group maintains its own copy of this append-only ledger that records every operation +in the exact order they should be applied to the state machine. + +Each entry in the log contains three key pieces of information: the operation itself (what should +be done), a log index (a sequential number indicating the entry's position), and a term number +(the period during which a leader created this entry). Terms represent periods of leadership and +increase each time a new leader is elected, preventing old leaders from overwriting newer entries. +The combination of the term and log index is referred to as a term-index (`TermIndex`) and +establishes the ordering of entries in the log. + +The log serves as both the mechanism for replication (leaders send log entries to followers) and +the source of truth for recovery (servers can rebuild their state by replaying the log). When we +talk about "committing" an operation, we mean that a majority of servers have acknowledged +storing that log entry, making it safe to apply to the state machine. + +### The State Machine - Your Application's Heart + +In Ratis, the state machine is your application's primary integration point. Your business logic +or data storage operations are implemented by the state machine. + +The state machine is a deterministic computation engine that processes a sequence of operations +and maintains some internal state. The state machine must be deterministic: given the same +sequence of operations, it must always produce the same results and end up in the same final state. +Operations are processed sequentially, one at a time, in the order they appear in the Raft log. + +#### State Machine Responsibilities + +Your state machine has three primary responsibilities. First, it processes Raft transactions by +validating incoming requests before they're replicated and applying committed operations to your +application state. Second, it maintains your application's actual data, which might be an +in-memory data structure, a local database, files on disk, or any combination of these. Third, +it creates point-in-time representations of its state (snapshots) and can restore its state from +snapshots during recovery. + +#### The State Machine Lifecycle + +The state machine operates at two different lifecycle levels: an overall peer lifecycle and a +per-transaction processing lifecycle. + +##### Peer Lifecycle + +During initialization, when a peer starts up, the state machine loads any existing snapshots and +prepares its internal data structures. The Raft layer then replays any log entries that occurred +after the snapshot, bringing the peer up to the current state of the group. + +During normal operation, the state machine continuously processes transactions as they're +committed by the Raft group, handles read-only queries, and may respond to changes in the node's +status as a leader or follower. For read-only operations, the state machine can answer queries +directly without going through the Raft log, providing better performance for reads but with +[consistency trade-offs](#consistency-models-and-read-patterns). + +Periodically, the state machine creates snapshots of its current state. This happens either +automatically based on configuration (like log size thresholds) or manually through +administrative commands. + +##### Transaction Processing Lifecycle + +For each individual transaction, the state machine follows a multistep processing sequence. In +the validation phase, the leader's state machine examines incoming requests through the +`startTransaction` method. This is where you validate that the operation is properly structured +and valid in the current context. + +In the pre-append phase, just before the operation is written to the log, the state machine can +perform any final preparations through the `preAppendTransaction` method. After the operation is +committed by the Raft group, the state machine is notified via `applyTransactionSerial` and can +handle any order-sensitive logic that must happen before the main application logic is invoked. + +Finally, in the application phase, the operation is applied to the actual application state +through the `applyTransaction` method. This is where your business logic executes and where the +operation's effects become visible to future queries. + +#### Designing Your State Machine + +When designing your state machine, ensure your operations are deterministic and can be efficiently +serialized for replication. Operations are not required to be idempotent because the Raft protocol +ensures that each operation is applied exactly once on each peer, however idempotent operations may +make it easier to reason about your application. + +Plan how you'll represent your application's state for both runtime efficiency and snapshot +serialization. If your state machine maintains state in external systems (databases, files), +ensure your snapshot process captures this external state consistently. + +Robust error handling is crucial. Server-side errors require distinguishing between recoverable +errors (like validation failures) and fatal errors (like storage failures). Errors in +`startTransaction` prevent operations from being committed and replicated. Errors in +`applyTransaction` are considered fatal since they indicate the state machine cannot process +already-committed operations. + +### Consistency Models and Read Patterns + +In a distributed system, consistency refers to the guarantees you have about seeing the effects +of write operations when you read data. For write operations, Raft and Ratis provide strong +consistency: once a write operation is acknowledged as committed, all subsequent reads will see +the effects of that write. Read operations are more complex because Ratis offers several +different approaches with different consistency and performance characteristics. + +#### Write Consistency + +Write operations in Ratis follow a straightforward path that provides strong consistency. Clients +send write requests to the leader, which validates the operation through the state machine's +`startTransaction` method, then replicates it to a majority of followers. Once a majority +acknowledges, the operation is committed. The leader applies the operation to its state machine +and returns the result to the client, while followers eventually apply the same operation in the +same order. + +#### Read Consistency Options + +Ratis provides several read patterns with different consistency and performance characteristics. + +Read requests query the state machine of a server directly without going through the Raft consensus +protocol. The `sendReadOnly()` API sends a read request to the leader. If a non-leader server +receives such request, it throws a `NotLeaderException` and then the client will retry other +servers. In contrast, the `sendReadOnly(message, serverId)` API sends the request to a particular +server, which may be a leader or a follower. + +The server's `raft.server.read.option` configuration affects read consistency behavior: + +* **DEFAULT (default setting)**: `sendReadOnly()` performs leader reads for efficiency. It provides +strong consistency under normal conditions. However, In case that an old leader has been +partitioned from the majority and a new leader has been elected, reading from the old leader can +return stale data since the old leader does not have the new transactions committed by the new +leader (referred to as the "split-brain problem"). +* **LINEARIZABLE**: both `sendReadOnly()` and `sendReadOnly(message, serverId)` use the ReadIndex +protocol to provide linearizable consistency, ensuring you always read the most up-to-date committed +data and won't read stale data as described in the "Split-brain Problem" above. + * Non-linearizable API: Clients may use `sendReadOnlyNonLinearizable()` to read from leader's + state machine directly without a linearizable guarantee. + +Server-side configuration allows operators to choose between performance (leader reads) and strong +consistency guarantees (linearizable reads) for their entire cluster. + +Stale reads with minimum index let you specify a minimum log index that the peer must have +applied before serving the read. Call `sendStaleRead()`: if the peer hasn't caught up to your +minimum index, it will throw a `StaleReadException`. + +In summary: +* **Leader reads** query the current leader's state machine directly without going through the Raft +consensus protocol. Call `sendReadOnly()` for the strongest consistency supported by the server. +* Use`sendReadOnlyNonLinearizable()` for leader reads without a linearizable guarantee. +* Use `sendReadOnly(message, serverId)` with a specific follower's server ID for **follower reads**, +which offer better performance but may return stale data. +* Use `sendStaleRead()` to specify the minimum log index that the server must have applied. +* Use `sendReadAfterWrite()` to ensure the read reflects the latest successful write by the +same client, for **read-after-write consistency**. + +Note that all of these operations may be performing as blocking or async operations. See +[Client API Patterns](integration.md#client-api-patterns) for more information. + +#### The Query Method and Read-Only Operations + +The state machine's `query` method enables you to handle read-only operations without going +through the Raft protocol. This provides significant performance benefits but requires careful +consideration of consistency requirements. Your state machine's `query` method will be called +for explicit read-only requests from clients, queries that need to read state without modifying +it, and health checks or monitoring queries. + +#### Choosing the Right Read Pattern + +Use **linearizable reads** when correctness is more important than performance, you need to read +your own writes immediately, or the application cannot tolerate any stale data. Use **leader +reads** when you need strong consistency but can tolerate very brief staleness during network +partitions, or when building interactive applications where users expect to see their recent +changes. + +Use **follower reads** when you can tolerate stale data in exchange for better performance and +availability, you're implementing read replicas for scaling read-heavy workloads, or the data +being read doesn't change frequently. Use **stale reads** when you need fine-grained control +over the consistency/performance trade-off. + +--- +Next: [Integration](integration.md) \ No newline at end of file diff --git a/ratis-docs/src/site/markdown/concept/index.md b/ratis-docs/src/site/markdown/concept/index.md index 279338a5fd..3843221a69 100644 --- a/ratis-docs/src/site/markdown/concept/index.md +++ b/ratis-docs/src/site/markdown/concept/index.md @@ -14,25 +14,95 @@ See the License for the specific language governing permissions and limitations under the License. --> -# Concepts -TODO: complete this section -## RaftServer +# Introduction to Apache Ratis -## RaftPeer and RaftPeerId +## Sections +1. [Overview](index.md#section-1) +2. [Core Concepts](core-concepts.md) +3. [Integration Guide](integration.md) +4. [Operations and Management](operations.md) +5. [Advanced Topics](advanced.md) -## RaftGroup + -## Transport (gRPC,Netty ...) +## Section 1: Overview of Raft and Apache Ratis -## Raft Log +* [Introduction to Raft and Apache Ratis](#raft-and-apache-ratis) +* [Raft Cluster Topology](#raft-cluster-topology) -## StateMachine -A StateMachine is the abstraction point for user-code +### Raft and Apache Ratis -## Snapshot +The Raft consensus algorithm solves a fundamental problem in distributed systems: how do you get +multiple computers to agree on a sequence of operations, even when some might fail or become +unreachable? This problem, known as distributed consensus, is at the heart of building reliable +distributed systems. -## TermIndex +Raft ensures that a cluster of servers maintains an identical, ordered log of operations. Each +server applies these operations to its local state machine in the same order, guaranteeing that +all servers end up with identical state. This approach, called state machine replication, +provides both consistency and fault tolerance. -## Transaction (as in applyTransaction or startTransaction in StateMachine) +You should consider using Raft when your system needs strong consistency guarantees across +multiple servers. This typically applies to systems where correctness is more important than +absolute performance, such as distributed databases, configuration management systems, or any +application where split-brain scenarios would be unacceptable. -## StateMachineStorage +Apache Ratis is a Java library that implements the Raft consensus protocol. The key word here +is "library" - Ratis is not a standalone service that you communicate with over the network. +Instead, you embed Ratis directly into your Java application, and it becomes part of your +application's runtime. + +This embedded approach creates tight integration between your application and the consensus +mechanism. Your application and Ratis run in the same JVM, sharing memory and computational +resources. Your application provides the business logic (the "state machine" in Raft terminology), +while Ratis handles the distributed consensus mechanics needed to keep multiple instances of your +application synchronized. + +### Raft Cluster Topology + +Understanding the basic building blocks of a Raft deployment affects both the correctness and +performance of your system. + +#### Servers, Clusters, and Groups + +A Raft server (also known as a "peer" or "member") is a single running instance of your application +with Ratis embedded. Each server runs your state machine and participates in the consensus +protocol. + +A Raft cluster is a physical collection of servers that can participate in consensus. A Raft +group is a logical consensus domain that runs across a specific subset of peers in the cluster. +One of the peers in a group acts as the "leader" while the others are "followers" or "listeners". +The leader handles all write requests and replicates operations to other peers in the group. Both +leaders and followers can service read requests, with different consistency guarantees. A single +cluster can host multiple independent Raft groups, each with its own leader election, consistency +and state replication. + +#### Majority-Based Decision-Making + +Raft's safety guarantees depend on majority agreement within each group. The leader replicates +each operation to the followers in its group, and operations are committed when at least +$\lfloor N/2 + 1 \rfloor$ peers in that group acknowledge them. This means a group of 3 peers can +tolerate 1 failure, a group of five peers can tolerate 2 failures, and so on. Since a group of +$N$ peers for an even $N$ can tolerate the same number of failures as a group of $(N-1)$ peers, +groups typically consist of an odd number of peers (3, 5, or 7 are common) to ensure clear +majority decisions. + +This majority requirement affects both availability and performance. A group remains available as +long as a majority of its peers are reachable and functioning. However, every transaction must +wait for majority acknowledgment, so the slowest server in the majority determines your write +latency. + +#### Server Placement and Network Considerations + +The physical and network placement of your servers impacts both availability and performance. +Placing all servers in the same rack or data center provides the lowest latency but risks +creating a single point of failure. Distributing servers across multiple availability zones or +data centers improves fault tolerance but can increase latency. + +A common approach is to place servers across multiple availability zones within a single region +for a balance of fault tolerance and performance. For applications requiring geographic +distribution, you might place servers in different regions, accepting higher latency in exchange +for better disaster recovery capabilities. + +--- +Next: [Core Concepts](core-concepts.md) \ No newline at end of file diff --git a/ratis-docs/src/site/markdown/concept/integration.md b/ratis-docs/src/site/markdown/concept/integration.md new file mode 100644 index 0000000000..352f1308df --- /dev/null +++ b/ratis-docs/src/site/markdown/concept/integration.md @@ -0,0 +1,107 @@ + +# Introduction to Apache Ratis + +Previous: [Core Concepts](core-concepts.md) | Top:[Overview of Raft and Ratis](index.md) + +## Section 3: Integration + +* [Logical Organization of Ratis](#logical-organization-of-ratis) +* [Server Configuration and Lifecycle](#server-configuration-and-lifecycle) + +### Logical Organization of Ratis + +Rather than focusing on package structure, let's examine the logical components and their +relationships, understanding how they work together to provide the Raft consensus functionality. + +#### Primary Integration Points + +When integrating with Ratis, you'll work with a small set of key classes and interfaces. + +`StateMachine` Interface - This is where you'll spend most of your development time. Your +application implements this interface to define what operations mean and how they affect your +data. Key methods include `startTransaction()` to validate requests, `applyTransaction()` to +process committed operations, `query()` to handle reads, and `takeSnapshot()` to checkpoint your +application state. + +`RaftClient` - Your application uses this to send requests to the Raft cluster. It handles +leader discovery, retries, and connection management automatically. You'll primarily use `send()` +for writes, `sendReadOnly()` for consistent reads, and `sendStaleRead()` for performance- +optimized reads. + +`RaftServer` - This hosts your `StateMachine` and handles the Raft protocol. You'll configure +and start it, but most interaction happens through your `StateMachine` implementation. One +server can participate in multiple Raft groups simultaneously. + +`RaftGroup` and `RaftPeer` - These define your cluster topology. `RaftGroup` represents a +consensus domain (which peers participate in a group), while `RaftPeer` represents individual +servers (their IDs and network addresses). + +Configuration Classes - `RaftProperties` and related classes control behavior like timeouts, +storage locations, and transport settings. + +Message and Request Types - Your operations flow through the system as `Message` objects. These +are serializable containers that carry your application's operations from clients to the +`StateMachine`. The `Message` interface is simple but designing your message types thoughtfully +affects both performance and maintainability. + +#### Client API Patterns + +`RaftClient` provides several API styles to match different application patterns. The +`BlockingApi`, accessed through `RaftClient.io()`, offers traditional synchronous operations: +simple to use and understand, ideal when simplicity matters more than maximum throughput. The +`AsyncApi`, accessed through `RaftClient.async()` provides non-blocking operations that return +`CompletableFuture` objects, allowing your application to send multiple requests concurrently. + +For applications that need to transfer large amounts of data, the `DataStreamApi` provides +efficient streaming that bypasses the normal Raft log for the data payload itself. Instead of +sending large payloads through the consensus mechanism, you stream data directly to peers while +still maintaining ordering and consistency guarantees through the Raft protocol. + +The `AdminApi` handles cluster management operations like adding or removing peers, triggering +snapshots, and querying cluster status. + +#### Request Flow Through the System + +When your application calls `RaftClient.send(message)`, the `RaftClient` first determines which +server to contact, handling leader discovery automatically. If the contacted server isn't the +current leader, it returns a `NotLeaderException` with information about the actual leader. + +Once the message reaches the leader, your `StateMachine`'s `startTransaction(message)` method +validates the request. If validation succeeds, the leader replicates the operation through the +Raft protocol to a majority of followers. After the operation is committed, the leader calls +your `StateMachine`'s `applyTransaction(message)` method to execute the business logic. + +The result flows back to the client, while followers eventually receive and apply the same +operation through their own `applyTransaction` calls. Read-only operations can bypass this flow +by going directly to the `query` method, trading consistency guarantees for better performance. + + +### Server Configuration and Lifecycle + +`RaftServer` is the main server-side entry point, but it requires several configuration decisions +before startup. You'll need to choose a transport implementation (gRPC works well for most +deployments, while Netty provides more control), storage configuration including directories for +logs and snapshots, and key configuration like timeout and retry policies, snapshot policies, +and security settings. + +A single `RaftServer` instance can participate in [multiple Raft groups](advanced.md) +simultaneously through Ratis's `Division` concept. Each group gets its own state machine instance +and storage within the server. + +--- +Next: [Operations and Management](operations.md) \ No newline at end of file diff --git a/ratis-docs/src/site/markdown/concept/operations.md b/ratis-docs/src/site/markdown/concept/operations.md new file mode 100644 index 0000000000..11e774d715 --- /dev/null +++ b/ratis-docs/src/site/markdown/concept/operations.md @@ -0,0 +1,137 @@ + +# Introduction to Apache Ratis + +Previous: [Integration](integration.md) | Top:[Overview of Raft and Ratis](index.md) + +## Section 4: Operations and Management + +* [Snapshots](#snapshots---managing-growth-and-recovery) +* [Leadership and Fault Tolerance](#leadership-and-fault-tolerance) + +### Snapshots - Managing Growth and Recovery + +Snapshots are a point-in-time representation of your state machine's complete state, along with +metadata about which log entries are included in that state. They prevent the log from growing +without bound and enable efficient recovery and catch-up for peers that have fallen behind. + +The snapshot includes the actual application state, the term-index of the last log entry that +contributed to this state, and the Raft group configuration at the time the snapshot was taken. + +Without snapshots, the Raft log would grow indefinitely, eventually consuming all available +storage. Crashed peers would need to replay potentially millions of log entries to catch up, +dramatically slowing recovery. New peers joining an established group would need to process the +entire history of the group, which could take hours or days for active systems. + +#### Creating Snapshots + +Snapshots can be created automatically when the log grows beyond a certain size, manually +triggered through the admin API, or sent by the leader to peers that are far behind instead of +replaying thousands of log entries. + +When your state machine's `takeSnapshot` method is called, it needs to create a consistent view +of your application state. This might involve pausing writes, creating a database transaction, +or using copy-on-write data structures. The method must serialize state by writing it to durable +storage in a format that can be read back later, record which term-index the snapshot represents, +and return the log index so Ratis can safely discard older log entries. + +Different applications will have different strategies for snapshot creation. A stop-the-world +approach pauses all operations while creating the snapshot: simple but impacts availability. +Copy-on-write uses data structures that support efficient point-in-time copies. Database +transactions can create consistent snapshots if your state is in a database. Some storage +engines support checkpointing to leverage native snapshot capabilities. + +#### Snapshot Installation and Recovery + +When a peer needs to catch up using a snapshot, it receives the snapshot data from the leader or +loads it from local storage. The state machine is paused to prevent conflicts during restoration, +the snapshot data is loaded replacing any existing state, and the state machine resumes normal +operation by replaying any log entries that occurred after the snapshot. + +Your state machine's `reinitialize` method is responsible for loading snapshots during startup by +loading the latest snapshot if available, with the Raft layer replaying any log entries after +the snapshot. + +#### Designing Snapshot-Friendly State Machines + +When designing your state machine, ensure your state can be efficiently serialized and +deserialized, avoiding complex object graphs that are difficult to serialize. For very large +state machines, consider whether you can implement incremental snapshots that only capture +changes since the last snapshot. + +If your state machine maintains state in external systems, ensure your snapshot process captures +this external state consistently. Regularly test your snapshot and recovery process to ensure it +works correctly under various failure scenarios. + +### Leadership and Fault Tolerance + +Ratis handles the mechanics of leader election and failover automatically. If your application does +not care about whether a specific server is a leader or follower, then it does not need to do +anything when leadership changes. Otherwise, your application can optionally observe leadership +changes and react accordingly: see [State Machine Leadership Events](#State-Machine-Leadership-Events). + +#### Leadership and Automatic Election + +In Raft, the leader is the only server that can accept write requests and decide the order of +operations in the log. This centralized decision-making enables Raft to provide strong +consistency guarantees. Leadership is temporary and can change at any time due to failures, +network partitions, or normal operational events. + +When a Raft group starts up, or when the current leader fails, the remaining servers +automatically elect a new leader through a voting process. This process uses randomized timeouts +to prevent split votes and ensures that only servers with up-to-date logs can become leaders. +This happens entirely within Ratis without any intervention from your application code. + +#### Leadership and Client Behavior + +From a client perspective, leadership changes are largely transparent. Clients can send requests +to any server in the group, and if that server is not the leader, it returns a +`NotLeaderException` with information about the current leader. If the leader fails while +processing a request, the client's retry logic will eventually find the new leader and retry. + +Leadership changes can cause temporary performance degradation as the new leader establishes +itself and catches up any lagging followers. Applications should be designed to handle these +temporary slowdowns gracefully. + +#### State Machine Leadership Events + +Your `StateMachine` can observe and react to leadership changes through several event +notification methods exposed through the `StateMachine.EventApi` interface. The +`notifyLeaderChanged` method is called whenever leadership changes. The `notifyLeaderReady` +method is called when this server becomes leader and is ready to serve requests: the +appropriate place to start any leader-specific background tasks. The `notifyNotLeader` method +is called when this server is nolonger the leader: where you should clean up any leader-specific +resources. + +#### Handling Network Partitions + +When a network partition occurs, the Raft group may split into multiple subgroups that cannot +communicate with each other. Raft's majority-based approach ensures that at most one subgroup (that +contains a majority of servers) can continue processing writes. Any minority subgroup will be +unable to elect a leader and will reject write requests. + +This behavior prevents split-brain scenarios where different parts of the system make conflicting +decisions. However, it also means that your application may become unavailable for writes if no +subgroups have a majority of servers. + +Consider the implications of different partition scenarios when designing your Raft deployment. +If you're deploying across multiple data centers, consider how network partitions between data +centers might affect availability. You may need to choose between consistency and availability +based on your application's requirements. + +--- +Next: [Advanced Topics](advanced.md) \ No newline at end of file diff --git a/ratis-docs/src/site/markdown/configurations.md b/ratis-docs/src/site/markdown/configurations.md index 7c8fb001fb..f5189ed862 100644 --- a/ratis-docs/src/site/markdown/configurations.md +++ b/ratis-docs/src/site/markdown/configurations.md @@ -104,6 +104,16 @@ When bootstrapping a new peer, If the gap between the match index of the peer and the leader's latest committed index is less than this gap, we treat the peer as caught-up. Increase this number when write throughput is high. +--------------------------------------------------------------------------------- +| **Property** | `raft.server.staging.timeout` | +|:----------------|:-----------------------------------------| +| **Description** | timeout of bootstrapping a new peer | +| **Type** | TimeDuration | +| **Default** | 3 times of `raft.server.rpc.timeout.max` | + +During the initialization of a new peer, the leader will classify the bootstrap process as "NO PROGRESS" +if it fails to receive any RPC responses from this peer within this specified timeout period. + --------------------------------------------------------------------------------- ### ThreadPool - Configurations related to server thread pools. @@ -111,7 +121,7 @@ treat the peer as caught-up. Increase this number when write throughput is high. | **Property** | `raft.server.threadpool.proxy.cached` | |:----------------|:--------------------------------------------------------| -| **Description** | use CachedThreadPool, otherwise, uee newFixedThreadPool | +| **Description** | use CachedThreadPool, otherwise, use newFixedThreadPool | | **Type** | boolean | | **Default** | true | @@ -128,7 +138,7 @@ treat the peer as caught-up. Increase this number when write throughput is high. | **Property** | `raft.server.threadpool.server.cached` | |:----------------|:--------------------------------------------------------| -| **Description** | use CachedThreadPool, otherwise, uee newFixedThreadPool | +| **Description** | use CachedThreadPool, otherwise, use newFixedThreadPool | | **Type** | boolean | | **Default** | true | @@ -145,7 +155,7 @@ treat the peer as caught-up. Increase this number when write throughput is high. | **Property** | `raft.server.threadpool.client.cached` | |:----------------|:--------------------------------------------------------| -| **Description** | use CachedThreadPool, otherwise, uee newFixedThreadPool | +| **Description** | use CachedThreadPool, otherwise, use newFixedThreadPool | | **Type** | boolean | | **Default** | true | @@ -208,7 +218,62 @@ treat the peer as caught-up. Increase this number when write throughput is high. | **Type** | TimeDuration | | **Default** | 60s | +### Read Index - Configurations related to ReadIndex used in linearizable read +| **Property** | `raft.server.read.read-index.type` | +|:----------------|:-----------------------------------------------------------------------------| +| **Description** | type of read index returned | +| **Type** | enum `Read.ReadIndex.Type` [`COMMIT_INDEX`, `APPLIED_INDEX`, `REPLIED_INDEX` | +| **Default** | `Read.ReadIndex.Type.COMMIT_INDEX` | + +* `Read.ReadIndex.Type.COMMIT_INDEX` - Use leader's CommitIndex (see Raft Paper section 6.4) + * The safest type as it is specified in the Raft dissertation + * This ReadIndex type can be chosen if the base linearizable read from followers performance already meets expectations. + +* `Read.ReadIndex.Type.APPLIED_INDEX` - Use leader's AppliedIndex + * Allow leader to return AppliedIndex (instead of CommitIndex) as the ReadIndex + * This reduces the time follower applying logs up to ReadIndex since AppliedIndex ≤ CommitIndex + * This ReadIndex type can be chosen `Read.ReadIndex.Type.COMMIT_INDEX` read latency is too high. + +* `Read.ReadIndex.Type.REPLIED_INDEX` - Use leader's RepliedIndex + * RepliedIndex is defined as the last AppliedIndex of the leader when returning the last batch. + * Leader delays replying write requests and only reply them every write batch boundary configurable by `raft.server.read.read-index.replied-index.batch-interval`. + * This allows the ReadIndex to advance in a coarser, less frequent steps, so followers are more likely to have already applied past the ReadIndex when a read arrives. + * This is most effective on read-heavy, follower-read workloads which prioritizes overall read throughput without consistency sacrifice. + * There is a trade-off in increased write latency (up to one `raft.server.read.read-index.replied-index.batch-interval`) per write. + * RepliedIndex still guarantees linearizability (no stale read) since by definition each ReadIndex returns the index of the last replied request. + * If the RepliedIndex is set to 0, the behavior is identical to `Read.ReadIndex.Type.APPLIED_INDEX` + +Note that theoretically all the ReadIndex types still guarantee linearizability, +but there are tradeoffs (e.g. Write and Read performance) between different types. + +| **Property** | `raft.server.read.read-index.replied-index.batch-interval` | +|:----------------|:--------------------------------------------------------------------------------------------------------------------------------------------| +| **Description** | if `Read.ReadIndex.Type` is `REPLIED_INDEX`, the interval at which held write replies are flushed to clients and `repliedIndex` is advanced | +| **Type** | TimeDuration | +| **Default** | 10ms | + +| **Property** | `raft.server.read.leader.heartbeat-check.enabled` | +|:----------------|:--------------------------------------------------| +| **Description** | whether to check heartbeat for read index. | +| **Type** | boolean | +| **Default** | true | + +Note that the original read index algorithm requires heartbeat check +in order to guarantee linearizable read. +By setting this property to false, +it reduces the RTT by eliminating the heartbeat check. +However, it might cause the reads not to be linearizable in a split-brain case. +Without the heartbeat check, a leader might not be the latest leader +and, as a result, it might serve stale reads. +When there is a split brain, there might be a small period of time +that the (old) leader has lost majority heartbeats but have not yet detected it. +As the same time, a new leader is elected by a majority of peers. +Then, the old leader might serve stale data +since it does not have the transactions committed by the new leaders. +Since such split-brain case is supposed to be rare, +it might be an acceptable tradeoff for applications that +seek to improve the linearizable read performance. ### Write - Configurations related to write requests. @@ -471,11 +536,27 @@ The follower's statemachine is responsible for fetching and installing snapshot | **Type** | string | | **Default** | 1ms,10, 1s,20, 5s,1000 | -"1ms,10, 1s,20, 5s,1000" means -The min wait time as 1ms (0 is not allowed) for first 10, -(5 iteration with 2 times grpc client retry), -next wait 1sec for next 20 retry (10 iteration with 2 times grpc client) -further wait for 5sec for max times ((5sec*980)/2 times ~= 40min) +Format: +`,` +If `` is omitted, it defaults to `MultipleLinearRandomRetry` for backward compatibility. + +Examples: +- `MultipleLinearRandomRetry,1ms,10,1s,20,5s,1000` +- `1ms,10,1s,20,5s,1000` (same as above) +- `ExponentialBackoffRetry,100ms,5s,100` + +For `MultipleLinearRandomRetry`, the parameter "1ms,10, 1s,20, 5s,1000" means +that the wait time is 1ms on average for the first 10 retries. +Then, it becomes 1s on average for next 20 retries +and 5s on average for the last 1000 retries. + +For `ExponentialBackoffRetry`, the parameter "100ms,5s,100" means +that the base wait time is 100ms, the maximum wait time is 5s +and the number of attempts is 100. +The wait time is $\min(2^{n-1} \times 100\text{ms}, 5\text{s})$ on average for the n-th retry. +In other words, the wait times are on average 100ms, 200ms, 400ms, 800ms, 1.6s, 3.2s, 5s, 5s and so on. + +Note that the actual wait time is randomized by a multiplier in the range [0.5, 1.5) for all retry policies. -------------------------------------------------------------------------------- @@ -494,6 +575,11 @@ further wait for 5sec for max times ((5sec*980)/2 times ~= 40min) | **Type** | boolean | | **Default** | true | +| **Property** | `raft.server.snapshot.trigger-when-remove.enabled` | +|:----------------|:--------------------------------------------------------| +| **Description** | whether to trigger snapshot when raft server is removed | +| **Type** | boolean | +| **Default** | true | | **Property** | `raft.server.snapshot.creation.gap` | |:----------------|:-----------------------------------------------------| @@ -519,7 +605,7 @@ further wait for 5sec for max times ((5sec*980)/2 times ~= 40min) | **Property** | `raft.server.data-stream.async.request.thread.pool.cached` | |:----------------|:-----------------------------------------------------------| -| **Description** | use CachedThreadPool, otherwise, uee newFixedThreadPool | +| **Description** | use CachedThreadPool, otherwise, use newFixedThreadPool | | **Type** | boolean | | **Default** | false | @@ -532,7 +618,7 @@ further wait for 5sec for max times ((5sec*980)/2 times ~= 40min) | **Property** | `raft.server.data-stream.async.write.thread.pool.cached` | |:----------------|:---------------------------------------------------------| -| **Description** | use CachedThreadPool, otherwise, uee newFixedThreadPool | +| **Description** | use CachedThreadPool, otherwise, use newFixedThreadPool | | **Type** | boolean | | **Default** | false | @@ -670,4 +756,65 @@ For examples, 2. However, setConf to a 3-member group by removing 2 of members and adding 2 new members is a majority-add. - Note also that adding 1 new member to an 1-member group is always allowed, - although it is a majority-add. \ No newline at end of file + although it is a majority-add. + + +## Client Configurations + +Client configurations are located at `RaftClientConfigKeys`. + +### RPC - Configurations related to Client RPC timeout. + +| **Property** | `raft.client.rpc.request.timeout` | +|:----------------|:------------------------------------------| +| **Description** | client side timeout for sending a request | +| **Type** | TimeDuration | +| **Default** | 3s | + +| **Property** | `raft.client.rpc.watch.request.timeout` | +|:----------------|:------------------------------------------------| +| **Description** | client side timeout for sending a watch request | +| **Type** | TimeDuration | +| **Default** | 10s | + +### Async - Configurations related to async requests. + +| **Property** | `raft.client.async.outstanding-requests.max` | +|:----------------|:---------------------------------------------| +| **Description** | maximum number of outstanding async requests | +| **Type** | int | +| **Default** | 100 | + +### DataStream - Configurations related to DataStream Api. + +| **Property** | `raft.client.data-stream.outstanding-requests.max` | +|:----------------|:---------------------------------------------------| +| **Description** | maximum number of outstanding data stream requests | +| **Type** | int | +| **Default** | 100 | + +| **Property** | `raft.client.data-stream.flush.request.count.min` | +|:----------------|:-----------------------------------------------------------------| +| **Description** | minimum number of requests before data stream flush would happen | +| **Type** | int | +| **Default** | 0 | + +| **Property** | `raft.client.data-stream.flush.request.bytes.min` | +|:----------------|:--------------------------------------------------------------| +| **Description** | minimum number of bytes before data stream flush would happen | +| **Type** | SizeInBytes | +| **Default** | 1MB | + +| **Property** | `raft.client.data-stream.request.timeout` | +|:----------------|:------------------------------------------| +| **Description** | timeout for data stream request | +| **Type** | TimeDuration | +| **Default** | 10s | + +### MessageStream - Configurations related to MessageStream Api. + +| **Property** | `raft.client.message-stream.submessage-size` | +|:----------------|:---------------------------------------------| +| **Description** | maximum size of a sub message | +| **Type** | SizeInBytes | +| **Default** | 1MB | diff --git a/ratis-docs/src/site/markdown/membership-change.md b/ratis-docs/src/site/markdown/membership-change.md index 81ae5495cc..3a4bc46b6f 100644 --- a/ratis-docs/src/site/markdown/membership-change.md +++ b/ratis-docs/src/site/markdown/membership-change.md @@ -39,11 +39,11 @@ Be careful to keep both separate majorities online! To add a new node (e.g., `N3`) to an existing group (e.g., `N0`, `N1`, `N2`), follow these steps: -1. Start the new peer `N3` with **EMPTY** group. +1. Start the new peer `N3` with the existing groupId and provide an empty list of peers. ```java RaftServer N3 = RaftServer.newBuilder() - .setGroup(RaftGroup.emptygroup()) + .setGroup(RaftGroup.valueOf(groupId, Collections.emptyList())) .setProperties(properties) .setServerId(n3id) .setStateMachine(userStateMachine) @@ -52,8 +52,8 @@ To add a new node (e.g., `N3`) to an existing group (e.g., `N0`, `N1`, `N2`), fo ``` 2. Invoke a `setConfiguration` method in the [AdminApi]( -../../../../ratis-client/src/main/java/org/apache/ratis/client/api/AdminApi.java#L44) -with the new group as the parameter. +../../../../ratis-client/src/main/java/org/apache/ratis/client/api/AdminApi.java#L44) + on the existing cluster with the new list of peers as the parameter. It will wait for the new peer to catch up before returning the reply. ```java reply = client.admin().setConfiguration(List.of(N0, N1, N2, N3)) diff --git a/ratis-docs/src/site/markdown/security.md b/ratis-docs/src/site/markdown/security.md new file mode 100644 index 0000000000..eb233bf932 --- /dev/null +++ b/ratis-docs/src/site/markdown/security.md @@ -0,0 +1,46 @@ + + +# Security + +Raft is a consensus algorithm designed for fault tolerance. +A basic assumption of Raft is that + +- the servers can trust each other. + +In contrast, Raft is not designed to solve the Byzantine Generals Problem, +which assumes that some of the servers may be malicious. + +Apache Ratis is a Raft library +which supports a pluggable transport. +Applications using Ratis usually run a transport over a network. +In such case, +the applications must either provide secure communications between the servers +or provide a safe network environment such as running the servers in a private network. +Applications must not accept requests from any untrusted servers. + +Below are the TLS Configuration Parameters. +Applications may use them to build `RaftServer`/`RaftClient` objects for establishing secure connections. + +| **Property** | **Description** | +|:----------------------------------------|:----------------------------------| +| `raft.grpc.tls.conf` | gRPC default TLS conf | +| `raft.grpc.server.tls.conf` | gRPC server TLS conf | +| `raft.grpc.client.tls.conf` | gRPC client TLS conf | +| `raft.grpc.admin.tls.conf` | gRPC admin TLS conf | +| `raft.netty.dataStream.server.tls.conf` | Netty data stream server TLS conf | +| `raft.netty.dataStream.client.tls.conf` | Netty data stream client TLS conf | diff --git a/ratis-docs/src/site/markdown/snapshot.md b/ratis-docs/src/site/markdown/snapshot.md index f20dc19d71..a9f06511fc 100644 --- a/ratis-docs/src/site/markdown/snapshot.md +++ b/ratis-docs/src/site/markdown/snapshot.md @@ -75,6 +75,8 @@ Note that Ratis imposes a minimal creation gap between two subsequent snapshot c ```java // SnapshotManagementApi RaftClientReply create(long timeoutMs) throws IOException; + RaftClientReply create(boolean force, long timeoutMs) throws IOException; + RaftClientReply create(long creationGap, long timeoutMs) throws IOException; ``` ```java diff --git a/ratis-examples/dev-support/findbugsExcludeFile.xml b/ratis-examples/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..26260f03ea --- /dev/null +++ b/ratis-examples/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/ratis-examples/pom.xml b/ratis-examples/pom.xml index 7454e928eb..c52019334f 100644 --- a/ratis-examples/pom.xml +++ b/ratis-examples/pom.xml @@ -17,7 +17,7 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-examples @@ -117,11 +117,6 @@ runtime - - junit - junit - test - org.junit.jupiter junit-jupiter-engine @@ -150,8 +145,8 @@ shade - ${project.build.directory}/dependency-reduced-pom.xml - true + false + false org.apache.ratis.examples.common.Runner @@ -185,21 +180,11 @@ - com.coderplus.maven.plugins - copy-rename-maven-plugin - - - copy-file - package - - copy - - - target/${project.artifactId}-${project.version}-shaded.jar - target/${project.artifactId}-${project.version}.jar - - - + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + diff --git a/ratis-examples/src/main/java/org/apache/ratis/examples/arithmetic/ArithmeticStateMachine.java b/ratis-examples/src/main/java/org/apache/ratis/examples/arithmetic/ArithmeticStateMachine.java index 28e3fb1c7d..c4adff5987 100644 --- a/ratis-examples/src/main/java/org/apache/ratis/examples/arithmetic/ArithmeticStateMachine.java +++ b/ratis-examples/src/main/java/org/apache/ratis/examples/arithmetic/ArithmeticStateMachine.java @@ -81,7 +81,7 @@ public void initialize(RaftServer server, RaftGroupId groupId, @Override public void reinitialize() throws IOException { close(); - loadSnapshot(storage.getLatestSnapshot()); + loadSnapshot(storage.loadLatestSnapshot()); } @Override @@ -164,7 +164,7 @@ public void close() { @Override public CompletableFuture applyTransaction(TransactionContext trx) { - final LogEntryProto entry = trx.getLogEntry(); + final LogEntryProto entry = trx.getLogEntryUnsafe(); final AssignmentMessage assignment = new AssignmentMessage(entry.getStateMachineLogEntry().getLogData()); final long index = entry.getIndex(); diff --git a/ratis-examples/src/main/java/org/apache/ratis/examples/counter/server/CounterStateMachine.java b/ratis-examples/src/main/java/org/apache/ratis/examples/counter/server/CounterStateMachine.java index b88a763e0f..e8b09c77ae 100644 --- a/ratis-examples/src/main/java/org/apache/ratis/examples/counter/server/CounterStateMachine.java +++ b/ratis-examples/src/main/java/org/apache/ratis/examples/counter/server/CounterStateMachine.java @@ -78,6 +78,11 @@ TermIndex getApplied() { int getCounter() { return counter; } + + @Override + public String toString() { + return counter + "@" + applied; + } } private final SimpleStateMachineStorage storage = new SimpleStateMachineStorage(); @@ -94,11 +99,11 @@ public CounterStateMachine() { } /** @return the current state. */ - private synchronized CounterState getState() { + synchronized CounterState getState() { return new CounterState(getLastAppliedTermIndex(), counter.get()); } - private synchronized void updateState(TermIndex applied, int counterValue) { + synchronized void updateState(TermIndex applied, int counterValue) { updateLastAppliedTermIndex(applied); counter.set(counterValue); } @@ -138,7 +143,12 @@ public void initialize(RaftServer server, RaftGroupId groupId, RaftStorage raftS */ @Override public void reinitialize() throws IOException { - load(storage.getLatestSnapshot()); + load(storage.loadLatestSnapshot()); + } + + @Override + public SimpleStateMachineStorage getStateMachineStorage() { + return storage; } /** @@ -147,30 +157,34 @@ public void reinitialize() throws IOException { * @return the index of the snapshot */ @Override - public long takeSnapshot() { + public long takeSnapshot() throws IOException { //get the current state final CounterState state = getState(); final long index = state.getApplied().getIndex(); //create a file with a proper name to store the snapshot final File snapshotFile = storage.getSnapshotFile(state.getApplied().getTerm(), index); + try { + saveSnapshot(state, snapshotFile); + } catch (Exception e) { + throw new IOException("Failed to save snapshot (" + state + ") to file " + snapshotFile, e); + } + //return the index of the stored snapshot (which is the last applied one) + return index; + } + + void saveSnapshot(CounterState state, File snapshotFile) throws IOException { //write the counter value into the snapshot file try (ObjectOutputStream out = new ObjectOutputStream(new BufferedOutputStream( Files.newOutputStream(snapshotFile.toPath())))) { out.writeInt(state.getCounter()); - } catch (IOException ioe) { - LOG.warn("Failed to write snapshot file \"" + snapshotFile - + "\", last applied index=" + state.getApplied()); } // update storage final MD5Hash md5 = MD5FileUtil.computeAndSaveMd5ForFile(snapshotFile); final FileInfo info = new FileInfo(snapshotFile.toPath(), md5); storage.updateLatestSnapshot(new SingleFileSnapshotInfo(info, state.getApplied())); - - //return the index of the stored snapshot (which is the last applied one) - return index; } /** @@ -247,7 +261,7 @@ public TransactionContext startTransaction(RaftClientRequest request) throws IOE */ @Override public CompletableFuture applyTransaction(TransactionContext trx) { - final LogEntryProto entry = trx.getLogEntry(); + final LogEntryProto entry = trx.getLogEntryUnsafe(); //increment the counter and update term-index final TermIndex termIndex = TermIndex.valueOf(entry); final int incremented = incrementCounter(termIndex); diff --git a/ratis-examples/src/main/java/org/apache/ratis/examples/debug/server/Server.java b/ratis-examples/src/main/java/org/apache/ratis/examples/debug/server/Server.java index 4377a1420f..07f7b20f44 100644 --- a/ratis-examples/src/main/java/org/apache/ratis/examples/debug/server/Server.java +++ b/ratis-examples/src/main/java/org/apache/ratis/examples/debug/server/Server.java @@ -34,6 +34,7 @@ public final class Server { private Server(){ } + @SuppressWarnings({"squid:S2095"}) // Suppress closeable warning public static void main(String[] args) throws IOException { if (args.length < 1) { System.err.println("The arguments should be "); diff --git a/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/FileInfo.java b/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/FileInfo.java index c7d8cb7cd1..bba001002a 100644 --- a/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/FileInfo.java +++ b/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/FileInfo.java @@ -19,6 +19,7 @@ import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations; import org.apache.ratis.util.CollectionUtils; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LogUtils; @@ -84,7 +85,7 @@ ByteString read(CheckedFunction resolver, long offset, final ByteBuffer buffer = ByteBuffer.allocateDirect(FileStoreCommon.getChunkSize(length)); in.position(offset).read(buffer); buffer.flip(); - return ByteString.copyFrom(buffer); + return UnsafeByteOperations.unsafeWrap(buffer); } } diff --git a/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/FileStoreStateMachine.java b/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/FileStoreStateMachine.java index 858e300ec9..345831dfb2 100644 --- a/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/FileStoreStateMachine.java +++ b/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/FileStoreStateMachine.java @@ -32,7 +32,6 @@ import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.server.RaftServer; -import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.statemachine.StateMachineStorage; import org.apache.ratis.statemachine.TransactionContext; @@ -41,7 +40,7 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.ratis.util.FileUtils; -import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.ReferenceCountedObject; import java.io.IOException; import java.nio.file.Path; @@ -114,16 +113,18 @@ public TransactionContext startTransaction(RaftClientRequest request) throws IOE @Override public TransactionContext startTransaction(LogEntryProto entry, RaftProtos.RaftPeerRole role) { + ByteString copied = ByteString.copyFrom(entry.getStateMachineLogEntry().getLogData().asReadOnlyByteBuffer()); return TransactionContext.newBuilder() .setStateMachine(this) .setLogEntry(entry) .setServerRole(role) - .setStateMachineContext(getProto(entry)) + .setStateMachineContext(getProto(copied)) .build(); } @Override - public CompletableFuture write(LogEntryProto entry, TransactionContext context) { + public CompletableFuture write(ReferenceCountedObject entryRef, TransactionContext context) { + LogEntryProto entry = entryRef.retain(); final FileStoreRequestProto proto = getProto(context, entry); if (proto.getRequestCase() != FileStoreRequestProto.RequestCase.WRITEHEADER) { return null; @@ -132,9 +133,10 @@ public CompletableFuture write(LogEntryProto entry, TransactionContext final WriteRequestHeaderProto h = proto.getWriteHeader(); final CompletableFuture f = files.write(entry.getIndex(), h.getPath().toStringUtf8(), h.getClose(), h.getSync(), h.getOffset(), - entry.getStateMachineLogEntry().getStateMachineEntry().getStateMachineData()); + entry.getStateMachineLogEntry().getStateMachineEntry().getStateMachineData() + ).whenComplete((r, e) -> entryRef.release()); // sync only if closing the file - return h.getClose()? f: null; + return h.getClose() ? f: null; } static FileStoreRequestProto getProto(TransactionContext context, LogEntryProto entry) { @@ -144,14 +146,14 @@ static FileStoreRequestProto getProto(TransactionContext context, LogEntryProto return proto; } } - return getProto(entry); + return getProto(entry.getStateMachineLogEntry().getLogData()); } - static FileStoreRequestProto getProto(LogEntryProto entry) { + static FileStoreRequestProto getProto(ByteString bytes) { try { - return FileStoreRequestProto.parseFrom(entry.getStateMachineLogEntry().getLogData()); + return FileStoreRequestProto.parseFrom(bytes); } catch (InvalidProtocolBufferException e) { - throw new IllegalArgumentException("Failed to parse data, entry=" + entry, e); + throw new IllegalArgumentException("Failed to parse data", e); } } @@ -170,11 +172,9 @@ public CompletableFuture read(LogEntryProto entry, TransactionContex } static class LocalStream implements DataStream { - private final String name; private final DataChannel dataChannel; - LocalStream(String name, DataChannel dataChannel) { - this.name = JavaUtils.getClassSimpleName(getClass()) + "[" + name + "]"; + LocalStream(DataChannel dataChannel) { this.dataChannel = dataChannel; } @@ -194,11 +194,6 @@ public CompletableFuture cleanUp() { } }); } - - @Override - public String toString() { - return name; - } } @Override @@ -211,20 +206,19 @@ public CompletableFuture stream(RaftClientRequest request) { return FileStoreCommon.completeExceptionally( "Failed to parse stream header", e); } - final String file = proto.getStream().getPath().toStringUtf8(); - return files.createDataChannel(file) - .thenApply(channel -> new LocalStream(file, channel)); + return files.createDataChannel(proto.getStream().getPath().toStringUtf8()) + .thenApply(LocalStream::new); } @Override public CompletableFuture link(DataStream stream, LogEntryProto entry) { - LOG.info("linking {} to {}", stream, LogProtoUtils.toLogEntryString(entry)); + LOG.info("linking {}", stream); return files.streamLink(stream); } @Override public CompletableFuture applyTransaction(TransactionContext trx) { - final LogEntryProto entry = trx.getLogEntry(); + final LogEntryProto entry = trx.getLogEntryUnsafe(); final long index = entry.getIndex(); updateLastAppliedTermIndex(entry.getTerm(), index); diff --git a/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/cli/Client.java b/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/cli/Client.java index 1856fc9b5d..caf2aa59b2 100644 --- a/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/cli/Client.java +++ b/ratis-examples/src/main/java/org/apache/ratis/examples/filestore/cli/Client.java @@ -159,6 +159,9 @@ protected void dropCache() { Process pro = Runtime.getRuntime().exec(cmds); pro.waitFor(); } catch (Throwable t) { + if (t instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } System.err.println("Failed to run command:" + Arrays.toString(cmds) + ":" + t.getMessage()); } } diff --git a/ratis-examples/src/main/java/org/apache/ratis/examples/membership/server/CServer.java b/ratis-examples/src/main/java/org/apache/ratis/examples/membership/server/CServer.java index a846cd1e77..2145412e04 100644 --- a/ratis-examples/src/main/java/org/apache/ratis/examples/membership/server/CServer.java +++ b/ratis-examples/src/main/java/org/apache/ratis/examples/membership/server/CServer.java @@ -32,7 +32,6 @@ import org.apache.ratis.thirdparty.com.google.common.base.MoreObjects; import org.apache.ratis.util.FileUtils; -import java.io.Closeable; import java.io.File; import java.io.IOException; import java.util.Collections; @@ -40,7 +39,7 @@ /** * A simple raft server using {@link CounterStateMachine}. */ -public class CServer implements Closeable { +public class CServer { public static final RaftGroupId GROUP_ID = RaftGroupId.randomId(); public static final String LOCAL_ADDR = "0.0.0.0"; @@ -78,7 +77,6 @@ public RaftPeer getPeer() { return server.getPeer(); } - @Override public void close() throws IOException { server.close(); FileUtils.deleteFully(storageDir); diff --git a/ratis-examples/src/main/resources/log4j.properties b/ratis-examples/src/main/resources/log4j.properties index f5b4baf67c..eedefe287e 100644 --- a/ratis-examples/src/main/resources/log4j.properties +++ b/ratis-examples/src/main/resources/log4j.properties @@ -16,10 +16,8 @@ log4j.rootLogger=INFO, stdout -log4j.logger.org.apache.ratis.server.impl.RatisServerImpl=DEBUG -log4j.logger.org.apache.ratis.client.RaftClient=DEBUG log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.Target=System.out log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n \ No newline at end of file +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n diff --git a/ratis-examples/src/test/java/org/apache/ratis/TestMultiRaftGroup.java b/ratis-examples/src/test/java/org/apache/ratis/TestMultiRaftGroup.java index 190f758589..5ce96da39a 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/TestMultiRaftGroup.java +++ b/ratis-examples/src/test/java/org/apache/ratis/TestMultiRaftGroup.java @@ -22,25 +22,22 @@ import org.apache.ratis.examples.arithmetic.ArithmeticStateMachine; import org.apache.ratis.examples.arithmetic.TestArithmetic; import org.apache.ratis.protocol.RaftGroup; -import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.impl.GroupManagementBaseTest; import org.apache.ratis.server.impl.MiniRaftCluster; -import org.apache.ratis.util.Slf4jUtils; +import org.apache.ratis.test.tag.Flaky; import org.apache.ratis.util.function.CheckedBiConsumer; +import org.junit.jupiter.api.Timeout; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; -import org.slf4j.event.Level; import java.io.IOException; import java.util.Collection; import java.util.concurrent.atomic.AtomicInteger; +@Flaky("RATIS-2218") +@Timeout(value = 300) public class TestMultiRaftGroup extends BaseTest { - static { - Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG); - } - - public static Collection data() throws IOException { + public static Collection data() { return ParameterizedBaseTest.getMiniRaftClusters(ArithmeticStateMachine.class, 0); } diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/ParameterizedBaseTest.java b/ratis-examples/src/test/java/org/apache/ratis/examples/ParameterizedBaseTest.java index df2fce13ee..12445d1d2c 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/ParameterizedBaseTest.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/ParameterizedBaseTest.java @@ -35,7 +35,12 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; @@ -48,11 +53,11 @@ public static Collection data() { } /** For {@link ParameterizedTest} so that a cluster can be shared by multiple {@link Test} */ - private static final AtomicReference currentCluster = new AtomicReference<>(); + private static final AtomicReference CURRENT_CLUSTER = new AtomicReference<>(); - /** Set {@link #currentCluster} to the given cluster and start it if {@link #currentCluster} is changed. */ + /** Set {@link #CURRENT_CLUSTER} to the given cluster and start it if {@link #CURRENT_CLUSTER} is changed. */ public static void setAndStart(MiniRaftCluster cluster) throws InterruptedException, IOException { - final MiniRaftCluster previous = currentCluster.getAndSet(cluster); + final MiniRaftCluster previous = CURRENT_CLUSTER.getAndSet(cluster); if (previous != cluster) { if (previous != null) { previous.shutdown(); @@ -65,7 +70,7 @@ public static void setAndStart(MiniRaftCluster cluster) throws InterruptedExcept @AfterAll public static void shutdownCurrentCluster() { - final MiniRaftCluster cluster = currentCluster.getAndSet(null); + final MiniRaftCluster cluster = CURRENT_CLUSTER.getAndSet(null); if (cluster != null) { cluster.shutdown(); } diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/TestArithmetic.java b/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/TestArithmetic.java index 0c56898f0c..3d9d89a7db 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/TestArithmetic.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/TestArithmetic.java @@ -107,7 +107,8 @@ void runGaussLegendre(RaftClient client) throws IOException { final Variable p0 = new Variable("p" + i_1); final Variable a1 = defineVariable(client, "a"+i, DIV.apply(ADD.apply(a0, b0), 2)); final Variable b1 = defineVariable(client, "b"+i, SQRT.apply(MULT.apply(a0, b0))); - final Variable t1 = defineVariable(client, "t"+i, SUBTRACT.apply(t0, MULT.apply(p0, SQUARE.apply(SUBTRACT.apply(a0, a1))))); + final Variable t1 = defineVariable(client, "t"+i, + SUBTRACT.apply(t0, MULT.apply(p0, SQUARE.apply(SUBTRACT.apply(a0, a1))))); final Variable p1 = defineVariable(client, "p"+i, MULT.apply(2, p0)); final Variable pi_i = new Variable("pi_"+i); diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/TestArithmeticLogDump.java b/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/TestArithmeticLogDump.java index c39335308f..f17ebe2604 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/TestArithmeticLogDump.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/TestArithmeticLogDump.java @@ -47,17 +47,17 @@ public class TestArithmeticLogDump extends BaseTest { public static final int NUM_SERVERS = 1; - protected static final RaftProperties properties = new RaftProperties(); + protected static final RaftProperties PROPERTIES = new RaftProperties(); private final MiniRaftClusterWithSimulatedRpc cluster = MiniRaftClusterWithSimulatedRpc .FACTORY.newCluster(NUM_SERVERS, getProperties()); public RaftProperties getProperties() { RaftServerConfigKeys.Rpc - .setSlownessTimeout(properties, TimeDuration.valueOf(1, TimeUnit.SECONDS)); - properties.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, + .setSlownessTimeout(PROPERTIES, TimeDuration.valueOf(1, TimeUnit.SECONDS)); + PROPERTIES.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, SimpleStateMachine4Testing.class, StateMachine.class); - return properties; + return PROPERTIES; } @BeforeEach diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/expression/TestExpression.java b/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/expression/TestExpression.java index 4cc81c6ed1..b512c617f4 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/expression/TestExpression.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/arithmetic/expression/TestExpression.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,16 +21,13 @@ import org.apache.ratis.BaseTest; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; +@Timeout(value = 1) public class TestExpression extends BaseTest { - @Override - public int getGlobalTimeoutSeconds() { - return 1; - } - @Test public void testArithmeticUtils() throws Exception { final Random ran = ThreadLocalRandom.current(); diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/common/TestSubCommand.java b/ratis-examples/src/test/java/org/apache/ratis/examples/common/TestSubCommand.java index 5ef034809a..c47dbb82a1 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/common/TestSubCommand.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/common/TestSubCommand.java @@ -21,12 +21,9 @@ import java.util.Collection; import java.util.Collections; -import org.junit.Test; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; public class TestSubCommand { diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/counter/server/TestManualRestoreSnapshot.java b/ratis-examples/src/test/java/org/apache/ratis/examples/counter/server/TestManualRestoreSnapshot.java new file mode 100644 index 0000000000..0480a5364a --- /dev/null +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/counter/server/TestManualRestoreSnapshot.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.examples.counter.server; + +import org.apache.ratis.BaseTest; +import org.apache.ratis.RaftTestUtil; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.examples.counter.CounterCommand; +import org.apache.ratis.grpc.MiniRaftClusterWithGrpc; +import org.apache.ratis.protocol.Message; +import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.statemachine.StateMachine; +import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; +import org.apache.ratis.util.FileUtils; +import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.TimeDuration; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test manually restoring a snapshot. + * Due to hardware failures or software bugs, + * the state of a state machine can become corrupted. + * In such case, we may manually copy a snapshot from the leader + * and then install it to the corrupted state machine. + */ +public class TestManualRestoreSnapshot extends BaseTest implements MiniRaftClusterWithGrpc.FactoryGet { + public static final int NUM_SERVERS = 3; + + { + getProperties().setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, CounterStateMachine.class, StateMachine.class); + } + + @Test + public void testManualRestoreSnapshot() throws Exception { + runWithNewCluster(NUM_SERVERS, this::run); + } + + void run(MiniRaftCluster cluster) throws Exception { + final RaftGroup group = cluster.getGroup(); + + // send some messages + final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); + LOG.info("Leader: {}", leader); + sendMessages(cluster, 5); + + // kill a follower + final RaftServer.Division toBeKilled = cluster.getFollowers().get(0); + LOG.info("Follower to be killed: {}", toBeKilled.getId()); + final SimpleStateMachineStorage smStorage = ((CounterStateMachine) toBeKilled.getStateMachine()) + .getStateMachineStorage(); + final File raftLogCurrentDir = toBeKilled.getRaftStorage().getStorageDir().getCurrentDir(); + cluster.killServer(toBeKilled.getId()); + + // send more messages + sendMessages(cluster, 3); + + // get a snapshot from the leader + final CounterStateMachine leaderStateMachine = (CounterStateMachine) leader.getStateMachine(); + final CounterStateMachine.CounterState snapshot = leaderStateMachine.getState(); + LOG.info("{}: Leader {}", leader.getId(), snapshot); + + // remove raft log from the killed follower + FileUtils.listDir(raftLogCurrentDir, s -> LOG.info("{}", s), LOG::error); + final String[] logFiles = raftLogCurrentDir.list((dir, name) -> name.startsWith("log")); + assertNotNull(logFiles); + for (String logFile : logFiles) { + FileUtils.deleteFile(new File(raftLogCurrentDir, logFile)); + } + + // remove the killed follower + final RaftPeerId followerId = toBeKilled.getId(); + cluster.removeServer(followerId); + + // save the leader snapshot to the killed follower + final TermIndex applied = snapshot.getApplied(); + final File snapshotFile = smStorage.getSnapshotFile(applied.getTerm(), applied.getIndex()); + final RaftServer toSaveSnapshot = cluster.putNewServer(followerId, group, false); + ((CounterStateMachine) toSaveSnapshot.getDivision(group.getGroupId()).getStateMachine()) + .saveSnapshot(snapshot, snapshotFile); + + // start follower and verify last applied + LOG.info("Restarting {}", followerId); + final RaftServer.Division restartedFollower = cluster.restartServer(followerId, group, false); + final StateMachine stateMachine = restartedFollower.getStateMachine(); + final SnapshotInfo info = stateMachine.getLatestSnapshot(); + LOG.info("{} restarted snapshot info {} from {}", followerId, info, stateMachine); + + JavaUtils.attemptUntilTrue(() -> { + System.out.println(cluster.printServers()); + final TermIndex leaderLastApplied = leaderStateMachine.getLastAppliedTermIndex(); + LOG.info("Leader {} last applied {}", leader.getId(), leaderLastApplied); + final TermIndex followerLastApplied = stateMachine.getLastAppliedTermIndex(); + LOG.info("Follower {} last applied {}", followerId, followerLastApplied); + return followerLastApplied.equals(leaderLastApplied); + }, 10, TimeDuration.ONE_SECOND, "followerLastApplied", LOG); + + sendMessages(cluster, 7); + } + + static void sendMessages(MiniRaftCluster cluster, int numMessages) throws Exception { + final List messages = getUpdateRequests(numMessages); + try(final RaftClient client = cluster.createClient()) { + for (Message message : messages) { + final RaftClientReply reply = client.io().send(message); + assertTrue(reply.isSuccess()); + } + } + } + + static List getUpdateRequests(int numMessages) { + final List messages = new ArrayList<>(); + for(int i = 0; i < numMessages; i++) { + messages.add(CounterCommand.INCREMENT.getMessage()); + } + return messages; + } + +} diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreAsyncBaseTest.java b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreAsyncBaseTest.java index eb51e643b0..ae3d7fc7b0 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreAsyncBaseTest.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreAsyncBaseTest.java @@ -25,14 +25,18 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.LogUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.*; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; public abstract class FileStoreAsyncBaseTest extends BaseTest diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreBaseTest.java b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreBaseTest.java index 5bcc500391..07668e0f2d 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreBaseTest.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreBaseTest.java @@ -28,8 +28,8 @@ import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedSupplier; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -96,22 +96,22 @@ void runTestWatch(int n, CLUSTER cluster) throws Exception { }); firstList.add(f); final CompletableFuture s = client.watchAsync(pathSecond + i).whenComplete((reply, e) -> { - Assert.assertNotNull(reply); - Assert.assertNull(e); - Assert.assertTrue(isStarted.get()); + Assertions.assertNotNull(reply); + Assertions.assertNull(e); + Assertions.assertTrue(isStarted.get()); completionOrder.add(index); }); watchSecond.add(s); - Assert.assertFalse(f.isDone()); - Assert.assertFalse(s.isDone()); - Assert.assertFalse(isStarted.get()); + Assertions.assertFalse(f.isDone()); + Assertions.assertFalse(s.isDone()); + Assertions.assertFalse(isStarted.get()); } TimeDuration.valueOf(ThreadLocalRandom.current().nextLong(500) + 100, TimeUnit.MILLISECONDS) .sleep(s -> LOG.info("{}", s)); - firstList.stream().map(CompletableFuture::isDone).forEach(Assert::assertFalse); - watchSecond.stream().map(CompletableFuture::isDone).forEach(Assert::assertFalse); - Assert.assertFalse(isStarted.get()); + firstList.stream().map(CompletableFuture::isDone).forEach(Assertions::assertFalse); + watchSecond.stream().map(CompletableFuture::isDone).forEach(Assertions::assertFalse); + Assertions.assertFalse(isStarted.get()); isStarted.set(true); for (int i : randomIndices) { @@ -121,12 +121,12 @@ void runTestWatch(int n, CLUSTER cluster) throws Exception { for (int i = 0; i < n; i++) { final ReadReplyProto reply = watchSecond.get(i).get(100, TimeUnit.MILLISECONDS); LOG.info("reply {}: {}", i, reply); - Assert.assertNotNull(reply); - Assert.assertEquals(pathSecond + i, reply.getResolvedPath().toStringUtf8()); + Assertions.assertNotNull(reply); + Assertions.assertEquals(pathSecond + i, reply.getResolvedPath().toStringUtf8()); } LOG.info("completionOrder {}", completionOrder); - Assert.assertEquals(randomIndices, completionOrder); - firstList.stream().map(CompletableFuture::isDone).forEach(Assert::assertFalse); + Assertions.assertEquals(randomIndices, completionOrder); + firstList.stream().map(CompletableFuture::isDone).forEach(Assertions::assertFalse); } } diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreStreamingBaseTest.java b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreStreamingBaseTest.java index 92147de4e9..cdcee0ef0e 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreStreamingBaseTest.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreStreamingBaseTest.java @@ -30,8 +30,8 @@ import org.apache.ratis.util.LogUtils; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.function.CheckedSupplier; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,6 +56,7 @@ public abstract class FileStoreStreamingBaseTest peers = raftGroup.getPeers(); - Assert.assertEquals(NUM_PEERS, peers.size()); + Assertions.assertEquals(NUM_PEERS, peers.size()); RaftPeer primary = peers.iterator().next(); final CheckedSupplier newClient = @@ -90,7 +91,7 @@ public void testFileStoreStreamMultipleFiles() throws Exception { final RaftGroup raftGroup = cluster.getGroup(); final Collection peers = raftGroup.getPeers(); - Assert.assertEquals(NUM_PEERS, peers.size()); + Assertions.assertEquals(NUM_PEERS, peers.size()); RaftPeer primary = peers.iterator().next(); final CheckedSupplier newClient = @@ -113,7 +114,7 @@ private void testSingleFile( .setFileSize(fileLength) .setBufferSize(bufferSize) .setFileStoreClientSupplier(newClient) - .build().streamWriteAndVerify(routingTable); + .build().streamWriteAndVerify(routingTable).close(); } private void testMultipleFiles(String pathBase, int numFile, SizeInBytes fileLength, @@ -135,7 +136,7 @@ private void testMultipleFiles(String pathBase, int numFile, SizeInBytes fileLen writerFutures.add(executor.submit(callable)); } for (Future future : writerFutures) { - future.get(); + future.get().close(); } } } diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreWriter.java b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreWriter.java index dbffab8a7d..c0f7d08097 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreWriter.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/FileStoreWriter.java @@ -29,7 +29,7 @@ import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.StringUtils; import org.apache.ratis.util.function.CheckedSupplier; -import org.junit.Assert; +import org.junit.jupiter.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,7 +44,7 @@ import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicInteger; -class FileStoreWriter implements Closeable { +final class FileStoreWriter implements Closeable { public static final Logger LOG = LoggerFactory.getLogger(FileStoreWriter.class); final long seed = ThreadLocalRandom.current().nextLong(); @@ -129,7 +129,7 @@ FileStoreWriter write(boolean sync) throws IOException { LOG.trace("write {}, offset={}, length={}, close? {}", fileName, offset, length, close); final long written = client.write(fileName, offset, close, b, sync); - Assert.assertEquals(length, written); + Assertions.assertEquals(length, written); offset += length; } return this; @@ -156,15 +156,15 @@ public FileStoreWriter streamWriteAndVerify(RoutingTable routingTable) { } DataStreamReply reply = dataStreamOutput.closeAsync().join(); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); // TODO: handle when any of the writeAsync has failed. // check writeAsync requests for (int i = 0; i < futures.size(); i++) { reply = futures.get(i).join(); - Assert.assertTrue(reply.isSuccess()); - Assert.assertEquals(sizes.get(i).longValue(), reply.getBytesWritten()); - Assert.assertEquals(reply.getType(), RaftProtos.DataStreamPacketHeaderProto.Type.STREAM_DATA); + Assertions.assertTrue(reply.isSuccess()); + Assertions.assertEquals(sizes.get(i).longValue(), reply.getBytesWritten()); + Assertions.assertEquals(reply.getType(), RaftProtos.DataStreamPacketHeaderProto.Type.STREAM_DATA); } return this; @@ -192,7 +192,7 @@ CompletableFuture writeAsync(boolean sync) { LOG.trace("writeAsync {}, offset={}, length={}, close? {}", fileName, offset, length, close); client.writeAsync(fileName, offset, close, b, sync) - .thenAcceptAsync(written -> Assert.assertEquals(length, (long)written), asyncExecutor) + .thenAcceptAsync(written -> Assertions.assertEquals(length, (long)written), asyncExecutor) .thenRun(() -> { final int count = callCount.decrementAndGet(); LOG.trace("writeAsync {}, offset={}, length={}, close? {}: n={}, callCount={}", @@ -256,12 +256,12 @@ CompletableFuture verifyAsync() { return null; }); } - Assert.assertEquals(size, n.get()); + Assertions.assertEquals(size, n.get()); return returnFuture; } void verify(ByteString read, int offset, int length, ByteBuffer expected) { - Assert.assertEquals(length, read.size()); + Assertions.assertEquals(length, read.size()); assertBuffers(offset, length, expected, read.asReadOnlyByteBuffer()); } @@ -282,7 +282,7 @@ public void close() throws IOException { static void assertBuffers(int offset, int length, ByteBuffer expected, ByteBuffer computed) { try { - Assert.assertEquals(expected, computed); + Assertions.assertEquals(expected, computed); } catch(AssertionError e) { LOG.error("Buffer mismatched at offset=" + offset + ", length=" + length + "\n expected = " + StringUtils.bytes2HexString(expected) diff --git a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/TestFileStoreStreamingWithGrpcCluster.java b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/TestFileStoreStreamingWithGrpcCluster.java index a6f112760a..a5106444c3 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/TestFileStoreStreamingWithGrpcCluster.java +++ b/ratis-examples/src/test/java/org/apache/ratis/examples/filestore/TestFileStoreStreamingWithGrpcCluster.java @@ -19,7 +19,8 @@ import org.apache.ratis.datastream.MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty; -public class TestFileStoreStreamingWithGrpcCluster extends FileStoreStreamingBaseTest +public class TestFileStoreStreamingWithGrpcCluster + extends FileStoreStreamingBaseTest implements MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.FactoryGet { } diff --git a/ratis-examples/src/test/java/org/apache/ratis/server/impl/TestReadAfterWrite.java b/ratis-examples/src/test/java/org/apache/ratis/server/impl/TestReadAfterWrite.java index f515628c97..025c7ed0db 100644 --- a/ratis-examples/src/test/java/org/apache/ratis/server/impl/TestReadAfterWrite.java +++ b/ratis-examples/src/test/java/org/apache/ratis/server/impl/TestReadAfterWrite.java @@ -33,9 +33,9 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.CodeInjectionForTesting; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.util.concurrent.CompletableFuture; @@ -48,7 +48,7 @@ public class TestReadAfterWrite extends BaseTest implements MiniRaftClusterWithGrpc.FactoryGet { - @Before + @BeforeEach public void setup() { Slf4jUtils.setLogLevel(ArithmeticStateMachine.LOG, Level.DEBUG); Slf4jUtils.setLogLevel(CodeInjectionForTesting.LOG, Level.DEBUG); @@ -108,7 +108,7 @@ void runTestReadAfterWrite(RaftClient client) throws Exception { final AsyncApi async = client.async(); final int initialValue = 10; final RaftClientReply assign = async.send(a.assign(new DoubleValue(initialValue))).join(); - Assert.assertTrue(assign.isSuccess()); + Assertions.assertTrue(assign.isSuccess()); final Message query = Expression.Utils.toMessage(a); assertReply(async.sendReadOnly(query), initialValue); @@ -131,15 +131,15 @@ void runTestReadAfterWrite(RaftClient client) throws Exception { final RaftClientReply reply = readAfterWrite.get(100, TimeUnit.MILLISECONDS); final DoubleValue result = (DoubleValue) Expression.Utils.bytes2Expression( reply.getMessage().getContent().toByteArray(), 0); - Assert.fail("result=" + result + ", reply=" + reply); + Assertions.fail("result=" + result + ", reply=" + reply); } catch (TimeoutException e) { LOG.info("Good", e); } // plus2 should still be blocked. - Assert.assertFalse(plus2.isDone()); + Assertions.assertFalse(plus2.isDone()); // readAfterWrite should still be blocked. - Assert.assertFalse(readAfterWrite.isDone()); + Assertions.assertFalse(readAfterWrite.isDone()); // unblock plus2 blockingCode.complete(); @@ -151,10 +151,10 @@ void runTestReadAfterWrite(RaftClient client) throws Exception { void assertReply(CompletableFuture future, int expected) { LOG.info("assertReply, expected {}", expected); final RaftClientReply reply = future.join(); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); LOG.info("reply {}", reply); final DoubleValue result = (DoubleValue) Expression.Utils.bytes2Expression( reply.getMessage().getContent().toByteArray(), 0); - Assert.assertEquals(expected, (int) (double) result.evaluate(null)); + Assertions.assertEquals(expected, (int) (double) result.evaluate(null)); } } diff --git a/ratis-experiments/pom.xml b/ratis-experiments/pom.xml index 2099142ea3..e4a8451db2 100644 --- a/ratis-experiments/pom.xml +++ b/ratis-experiments/pom.xml @@ -17,12 +17,17 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-experiments Apache Ratis Experiments + + + true + + org.apache.ratis @@ -49,7 +54,6 @@ org.codehaus.mojo exec-maven-plugin - 1.6.0 compile-flatbufs @@ -120,8 +124,8 @@ shade - ${project.build.directory}/dependency-reduced-pom.xml - true + false + false *:* @@ -140,23 +144,6 @@ - - com.coderplus.maven.plugins - copy-rename-maven-plugin - - - copy-file - package - - copy - - - target/${project.artifactId}-${project.version}-shaded.jar - target/${project.artifactId}-${project.version}.jar - - - - diff --git a/ratis-grpc/dev-support/findbugsExcludeFile.xml b/ratis-grpc/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..c13c34ade0 --- /dev/null +++ b/ratis-grpc/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/ratis-grpc/pom.xml b/ratis-grpc/pom.xml index 71377b5efc..360131d55b 100644 --- a/ratis-grpc/pom.xml +++ b/ratis-grpc/pom.xml @@ -17,7 +17,7 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-grpc @@ -37,12 +37,6 @@ ratis-common org.apache.ratis - - ratis-common - org.apache.ratis - test - test-jar - ratis-client org.apache.ratis @@ -53,6 +47,10 @@ test test-jar + + org.apache.ratis + ratis-server-api + ratis-server org.apache.ratis @@ -71,14 +69,20 @@ - junit - junit - test - - - org.mockito - mockito-core + org.junit.jupiter + junit-jupiter-api test + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcConfigKeys.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcConfigKeys.java index 8caacfeeb5..2fcb9b6b0a 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcConfigKeys.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcConfigKeys.java @@ -19,6 +19,7 @@ import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.grpc.server.GrpcServices; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; @@ -230,15 +231,6 @@ static void setAsyncRequestThreadPoolSize(RaftProperties properties, int port) { setInt(properties::setInt, ASYNC_REQUEST_THREAD_POOL_SIZE_KEY, port); } - String TLS_CONF_PARAMETER = PREFIX + ".tls.conf"; - Class TLS_CONF_CLASS = TLS.CONF_CLASS; - static GrpcTlsConfig tlsConf(Parameters parameters) { - return parameters != null ? parameters.get(TLS_CONF_PARAMETER, TLS_CONF_CLASS): null; - } - static void setTlsConf(Parameters parameters, GrpcTlsConfig conf) { - parameters.put(TLS_CONF_PARAMETER, conf, TLS_CONF_CLASS); - } - String LEADER_OUTSTANDING_APPENDS_MAX_KEY = PREFIX + ".leader.outstanding.appends.max"; int LEADER_OUTSTANDING_APPENDS_MAX_DEFAULT = 8; static int leaderOutstandingAppendsMax(RaftProperties properties) { @@ -292,6 +284,34 @@ static void setLogMessageBatchDuration(RaftProperties properties, setTimeDuration(properties::setTimeDuration, LOG_MESSAGE_BATCH_DURATION_KEY, logMessageBatchDuration); } + + String ZERO_COPY_ENABLED_KEY = PREFIX + ".zerocopy.enabled"; + boolean ZERO_COPY_ENABLED_DEFAULT = false; + static boolean zeroCopyEnabled(RaftProperties properties) { + return getBoolean(properties::getBoolean, ZERO_COPY_ENABLED_KEY, ZERO_COPY_ENABLED_DEFAULT, getDefaultLog()); + } + static void setZeroCopyEnabled(RaftProperties properties, boolean enabled) { + setBoolean(properties::setBoolean, ZERO_COPY_ENABLED_KEY, enabled); + } + + String SERVICES_CUSTOMIZER_PARAMETER = PREFIX + ".services.customizer"; + Class SERVICES_CUSTOMIZER_CLASS = GrpcServices.Customizer.class; + static GrpcServices.Customizer servicesCustomizer(Parameters parameters) { + return parameters == null ? null + : parameters.get(SERVICES_CUSTOMIZER_PARAMETER, SERVICES_CUSTOMIZER_CLASS); + } + static void setServicesCustomizer(Parameters parameters, GrpcServices.Customizer customizer) { + parameters.put(SERVICES_CUSTOMIZER_PARAMETER, customizer, SERVICES_CUSTOMIZER_CLASS); + } + + String TLS_CONF_PARAMETER = PREFIX + ".tls.conf"; + Class TLS_CONF_CLASS = TLS.CONF_CLASS; + static GrpcTlsConfig tlsConf(Parameters parameters) { + return parameters != null ? parameters.get(TLS_CONF_PARAMETER, TLS_CONF_CLASS): null; + } + static void setTlsConf(Parameters parameters, GrpcTlsConfig conf) { + parameters.put(TLS_CONF_PARAMETER, conf, TLS_CONF_CLASS); + } } String MESSAGE_SIZE_MAX_KEY = PREFIX + ".message.size.max"; diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcFactory.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcFactory.java index 75eb34a2d1..331d1a8585 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcFactory.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcFactory.java @@ -22,7 +22,8 @@ import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.grpc.client.GrpcClientRpc; import org.apache.ratis.grpc.server.GrpcLogAppender; -import org.apache.ratis.grpc.server.GrpcService; +import org.apache.ratis.grpc.server.GrpcServices; +import org.apache.ratis.grpc.server.GrpcServicesImpl; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.rpc.SupportedRpcType; import org.apache.ratis.server.RaftServer; @@ -64,6 +65,8 @@ static boolean checkPooledByteBufAllocatorUseCacheForAllThreads(Consumer return value; } + private final GrpcServices.Customizer servicesCustomizer; + private final GrpcTlsConfig tlsConfig; private final GrpcTlsConfig adminTlsConfig; private final GrpcTlsConfig clientTlsConfig; @@ -76,7 +79,7 @@ public static Parameters newRaftParameters(GrpcTlsConfig conf) { } public GrpcFactory(Parameters parameters) { - this( + this(GrpcConfigKeys.Server.servicesCustomizer(parameters), GrpcConfigKeys.TLS.conf(parameters), GrpcConfigKeys.Admin.tlsConf(parameters), GrpcConfigKeys.Client.tlsConf(parameters), @@ -85,11 +88,14 @@ public GrpcFactory(Parameters parameters) { } public GrpcFactory(GrpcTlsConfig tlsConfig) { - this(tlsConfig, null, null, null); + this(null, tlsConfig, null, null, null); } - private GrpcFactory(GrpcTlsConfig tlsConfig, GrpcTlsConfig adminTlsConfig, + private GrpcFactory(GrpcServices.Customizer servicesCustomizer, + GrpcTlsConfig tlsConfig, GrpcTlsConfig adminTlsConfig, GrpcTlsConfig clientTlsConfig, GrpcTlsConfig serverTlsConfig) { + this.servicesCustomizer = servicesCustomizer; + this.tlsConfig = tlsConfig; this.adminTlsConfig = adminTlsConfig; this.clientTlsConfig = clientTlsConfig; @@ -123,10 +129,11 @@ public LogAppender newLogAppender(RaftServer.Division server, LeaderState state, } @Override - public GrpcService newRaftServerRpc(RaftServer server) { + public GrpcServices newRaftServerRpc(RaftServer server) { checkPooledByteBufAllocatorUseCacheForAllThreads(LOG::info); - return GrpcService.newBuilder() + return GrpcServicesImpl.newBuilder() .setServer(server) + .setCustomizer(servicesCustomizer) .setAdminTlsConfig(getAdminTlsConfig()) .setServerTlsConfig(getServerTlsConfig()) .setClientTlsConfig(getClientTlsConfig()) diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcUtil.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcUtil.java index 22653b6efb..311bcb8778 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcUtil.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/GrpcUtil.java @@ -24,8 +24,12 @@ import org.apache.ratis.security.TlsConf.CertificatesConf; import org.apache.ratis.security.TlsConf.PrivateKeyConf; import org.apache.ratis.security.TlsConf.KeyManagerConf; +import org.apache.ratis.thirdparty.com.google.protobuf.MessageLite; import org.apache.ratis.thirdparty.io.grpc.ManagedChannel; import org.apache.ratis.thirdparty.io.grpc.Metadata; +import org.apache.ratis.thirdparty.io.grpc.MethodDescriptor; +import org.apache.ratis.thirdparty.io.grpc.ServerCallHandler; +import org.apache.ratis.thirdparty.io.grpc.ServerServiceDefinition; import org.apache.ratis.thirdparty.io.grpc.Status; import org.apache.ratis.thirdparty.io.grpc.StatusRuntimeException; import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver; @@ -59,14 +63,8 @@ public interface GrpcUtil { Metadata.Key.of("heartbeat", Metadata.ASCII_STRING_MARSHALLER); static StatusRuntimeException wrapException(Throwable t) { - return wrapException(t, -1); - } - - static StatusRuntimeException wrapException(Throwable t, long callId) { t = JavaUtils.unwrapCompletionException(t); - Metadata trailers = new StatusRuntimeExceptionMetadataBuilder(t) - .addCallId(callId) - .build(); + Metadata trailers = new StatusRuntimeExceptionMetadataBuilder(t).build(); return wrapException(t, trailers); } @@ -78,6 +76,10 @@ static StatusRuntimeException wrapException(Throwable t, long callId, boolean is .build(); return wrapException(t, trailers); } + static StatusRuntimeException wrapException(Throwable t, long callId) { + return wrapException(t, callId, false); + } + static StatusRuntimeException wrapException(Throwable t, Metadata trailers) { return new StatusRuntimeException( @@ -163,13 +165,6 @@ static IOException unwrapIOException(Throwable t) { return e; } - static void asyncCall( - StreamObserver responseObserver, - CheckedSupplier, IOException> supplier, - Function toProto) { - asyncCall(responseObserver, supplier, toProto, throwable -> {}); - } - static void asyncCall( StreamObserver responseObserver, CheckedSupplier, IOException> supplier, @@ -304,4 +299,26 @@ static void setKeyManager(SslContextBuilder b, KeyManagerConf keyManagerConfig) b.keyManager(privateKey.get(), certificates.get()); } } + + /** + * Used to add a method to Service definition with a custom request marshaller. + * + * @param orig original service definition. + * @param newServiceBuilder builder of the new service definition. + * @param origMethod the original method definition. + * @param customMarshaller custom marshaller to be set for the method. + * @param + * @param + */ + static void addMethodWithCustomMarshaller( + ServerServiceDefinition orig, ServerServiceDefinition.Builder newServiceBuilder, + MethodDescriptor origMethod, MethodDescriptor.PrototypeMarshaller customMarshaller) { + MethodDescriptor newMethod = origMethod.toBuilder() + .setRequestMarshaller(customMarshaller) + .build(); + @SuppressWarnings("unchecked") + ServerCallHandler serverCallHandler = + (ServerCallHandler) orig.getMethod(newMethod.getFullMethodName()).getServerCallHandler(); + newServiceBuilder.addMethod(newMethod, serverCallHandler); + } } diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/client/GrpcClientProtocolClient.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/client/GrpcClientProtocolClient.java index 08bacdb73b..3b9d512683 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/client/GrpcClientProtocolClient.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/client/GrpcClientProtocolClient.java @@ -128,8 +128,11 @@ private ManagedChannel buildChannel(String address, GrpcTlsConfig tlsConf, SizeInBytes flowControlWindow, SizeInBytes maxMessageSize) { NettyChannelBuilder channelBuilder = NettyChannelBuilder.forTarget(address); + // ignore any http proxy for grpc + channelBuilder.proxyDetector(uri -> null); if (tlsConf != null) { + LOG.debug("Setting TLS for {}", address); SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient(); GrpcUtil.setTrustManager(sslContextBuilder, tlsConf.getTrustManager()); if (tlsConf.getMtlsEnabled()) { diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/metrics/ZeroCopyMetrics.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/metrics/ZeroCopyMetrics.java new file mode 100644 index 0000000000..1fcc317f9d --- /dev/null +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/metrics/ZeroCopyMetrics.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc.metrics; + +import org.apache.ratis.metrics.LongCounter; +import org.apache.ratis.metrics.MetricRegistryInfo; +import org.apache.ratis.metrics.RatisMetricRegistry; +import org.apache.ratis.metrics.RatisMetrics; +import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.ratis.thirdparty.com.google.protobuf.AbstractMessage; + +import java.util.function.Supplier; + +public class ZeroCopyMetrics extends RatisMetrics { + private static final String RATIS_GRPC_METRICS_APP_NAME = "ratis_grpc"; + private static final String RATIS_GRPC_METRICS_COMP_NAME = "zero_copy"; + private static final String RATIS_GRPC_METRICS_DESC = "Metrics for Ratis Grpc Zero copy"; + + private final LongCounter zeroCopyMessages = getRegistry().counter("num_zero_copy_messages"); + private final LongCounter nonZeroCopyMessages = getRegistry().counter("num_non_zero_copy_messages"); + private final LongCounter releasedMessages = getRegistry().counter("num_released_messages"); + + public ZeroCopyMetrics() { + super(createRegistry()); + } + + private static RatisMetricRegistry createRegistry() { + return create(new MetricRegistryInfo("", + RATIS_GRPC_METRICS_APP_NAME, + RATIS_GRPC_METRICS_COMP_NAME, RATIS_GRPC_METRICS_DESC)); + } + + public void addUnreleased(String name, Supplier unreleased) { + getRegistry().gauge(name + "_num_unreleased_messages", () -> unreleased); + } + + + public void onZeroCopyMessage(AbstractMessage ignored) { + zeroCopyMessages.inc(); + } + + public void onNonZeroCopyMessage(AbstractMessage ignored) { + nonZeroCopyMessages.inc(); + } + + public void onReleasedMessage(AbstractMessage ignored) { + releasedMessages.inc(); + } + + @VisibleForTesting + public long zeroCopyMessages() { + return zeroCopyMessages.getCount(); + } + + @VisibleForTesting + public long nonZeroCopyMessages() { + return nonZeroCopyMessages.getCount(); + } + + @VisibleForTesting + public long releasedMessages() { + return releasedMessages.getCount(); + } +} \ No newline at end of file diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcAdminProtocolService.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcAdminProtocolService.java index feb780302e..f6a6cfba6a 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcAdminProtocolService.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcAdminProtocolService.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -32,8 +32,12 @@ import org.apache.ratis.proto.RaftProtos.RaftClientReplyProto; import org.apache.ratis.proto.RaftProtos.GroupManagementRequestProto; import org.apache.ratis.proto.grpc.AdminProtocolServiceGrpc.AdminProtocolServiceImplBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class GrpcAdminProtocolService extends AdminProtocolServiceImplBase { + static final Logger LOG = LoggerFactory.getLogger(GrpcAdminProtocolService.class); + private final AdminAsynchronousProtocol protocol; public GrpcAdminProtocolService(AdminAsynchronousProtocol protocol) { @@ -45,7 +49,8 @@ public void groupManagement(GroupManagementRequestProto proto, StreamObserver responseObserver) { final GroupManagementRequest request = ClientProtoUtils.toGroupManagementRequest(proto); GrpcUtil.asyncCall(responseObserver, () -> protocol.groupManagementAsync(request), - ClientProtoUtils::toRaftClientReplyProto); + ClientProtoUtils::toRaftClientReplyProto, + t -> LOG.warn("Failed groupManagement: {}, {}", proto.getOpCase(), request, t)); } @Override @@ -53,14 +58,16 @@ public void groupList(GroupListRequestProto proto, StreamObserver responseObserver) { final GroupListRequest request = ClientProtoUtils.toGroupListRequest(proto); GrpcUtil.asyncCall(responseObserver, () -> protocol.getGroupListAsync(request), - ClientProtoUtils::toGroupListReplyProto); + ClientProtoUtils::toGroupListReplyProto, + t -> LOG.warn("Failed to groupList: {}", request, t)); } @Override public void groupInfo(GroupInfoRequestProto proto, StreamObserver responseObserver) { final GroupInfoRequest request = ClientProtoUtils.toGroupInfoRequest(proto); GrpcUtil.asyncCall(responseObserver, () -> protocol.getGroupInfoAsync(request), - ClientProtoUtils::toGroupInfoReplyProto); + ClientProtoUtils::toGroupInfoReplyProto, + t -> LOG.warn("Failed to groupInfo: {}", request, t)); } @Override @@ -68,7 +75,8 @@ public void setConfiguration(SetConfigurationRequestProto proto, StreamObserver responseObserver) { final SetConfigurationRequest request = ClientProtoUtils.toSetConfigurationRequest(proto); GrpcUtil.asyncCall(responseObserver, () -> protocol.setConfigurationAsync(request), - ClientProtoUtils::toRaftClientReplyProto); + ClientProtoUtils::toRaftClientReplyProto, + t -> LOG.warn("Failed to setConfiguration: {}", request, t)); } @Override @@ -76,7 +84,8 @@ public void transferLeadership(TransferLeadershipRequestProto proto, StreamObserver responseObserver) { final TransferLeadershipRequest request = ClientProtoUtils.toTransferLeadershipRequest(proto); GrpcUtil.asyncCall(responseObserver, () -> protocol.transferLeadershipAsync(request), - ClientProtoUtils::toRaftClientReplyProto); + ClientProtoUtils::toRaftClientReplyProto, + t -> LOG.warn("Failed to transferLeadership: {}", request, t)); } @Override @@ -84,7 +93,8 @@ public void snapshotManagement(SnapshotManagementRequestProto proto, StreamObserver responseObserver) { final SnapshotManagementRequest request = ClientProtoUtils.toSnapshotManagementRequest(proto); GrpcUtil.asyncCall(responseObserver, () -> protocol.snapshotManagementAsync(request), - ClientProtoUtils::toRaftClientReplyProto); + ClientProtoUtils::toRaftClientReplyProto, + t -> LOG.warn("Failed snapshotManagement: {}, {}", proto.getOpCase(), request, t)); } @Override @@ -92,6 +102,7 @@ public void leaderElectionManagement(LeaderElectionManagementRequestProto proto, StreamObserver responseObserver) { final LeaderElectionManagementRequest request = ClientProtoUtils.toLeaderElectionManagementRequest(proto); GrpcUtil.asyncCall(responseObserver, () -> protocol.leaderElectionManagementAsync(request), - ClientProtoUtils::toRaftClientReplyProto); + ClientProtoUtils::toRaftClientReplyProto, + t -> LOG.warn("Failed leaderElectionManagement: {}, {}", proto.getOpCase(), request, t)); } } diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcClientProtocolService.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcClientProtocolService.java index 9c19684677..b7548780cd 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcClientProtocolService.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcClientProtocolService.java @@ -19,10 +19,13 @@ import org.apache.ratis.client.impl.ClientProtoUtils; import org.apache.ratis.grpc.GrpcUtil; +import org.apache.ratis.grpc.metrics.ZeroCopyMetrics; +import org.apache.ratis.grpc.util.ZeroCopyMessageMarshaller; import org.apache.ratis.protocol.*; import org.apache.ratis.protocol.exceptions.AlreadyClosedException; import org.apache.ratis.protocol.exceptions.GroupMismatchException; import org.apache.ratis.protocol.exceptions.RaftException; +import org.apache.ratis.thirdparty.io.grpc.ServerServiceDefinition; import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver; import org.apache.ratis.proto.RaftProtos.RaftClientReplyProto; import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto; @@ -30,16 +33,15 @@ import org.apache.ratis.util.CollectionUtils; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.SlidingWindow; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicBoolean; @@ -48,15 +50,21 @@ import java.util.function.Consumer; import java.util.function.Supplier; +import static org.apache.ratis.grpc.GrpcUtil.addMethodWithCustomMarshaller; +import static org.apache.ratis.proto.grpc.RaftClientProtocolServiceGrpc.getOrderedMethod; +import static org.apache.ratis.proto.grpc.RaftClientProtocolServiceGrpc.getUnorderedMethod; + class GrpcClientProtocolService extends RaftClientProtocolServiceImplBase { private static final Logger LOG = LoggerFactory.getLogger(GrpcClientProtocolService.class); private static class PendingOrderedRequest implements SlidingWindow.ServerSideRequest { + private final ReferenceCountedObject requestRef; private final RaftClientRequest request; private final AtomicReference reply = new AtomicReference<>(); - PendingOrderedRequest(RaftClientRequest request) { - this.request = request; + PendingOrderedRequest(ReferenceCountedObject requestRef) { + this.requestRef = requestRef; + this.request = requestRef != null ? requestRef.retain() : null; } @Override @@ -76,15 +84,23 @@ public boolean hasReply() { @Override public void setReply(RaftClientReply r) { final boolean set = reply.compareAndSet(null, r); - Preconditions.assertTrue(set, () -> "Reply is already set: request=" + request + ", reply=" + reply); + Preconditions.assertTrue(set, () -> "Reply is already set: request=" + + request.toStringShort() + ", reply=" + reply); } RaftClientReply getReply() { return reply.get(); } - RaftClientRequest getRequest() { - return request; + ReferenceCountedObject getRequestRef() { + return requestRef; + } + + @Override + public void release() { + if (requestRef != null) { + requestRef.release(); + } } @Override @@ -135,18 +151,38 @@ void closeAllExisting(RaftGroupId groupId) { private final ExecutorService executor; private final OrderedStreamObservers orderedStreamObservers = new OrderedStreamObservers(); + private final boolean zeroCopyEnabled; + private final ZeroCopyMessageMarshaller zeroCopyRequestMarshaller; GrpcClientProtocolService(Supplier idSupplier, RaftClientAsynchronousProtocol protocol, - ExecutorService executor) { + ExecutorService executor, boolean zeroCopyEnabled, ZeroCopyMetrics zeroCopyMetrics) { this.idSupplier = idSupplier; this.protocol = protocol; this.executor = executor; + this.zeroCopyEnabled = zeroCopyEnabled; + this.zeroCopyRequestMarshaller = new ZeroCopyMessageMarshaller<>(RaftClientRequestProto.getDefaultInstance(), + zeroCopyMetrics::onZeroCopyMessage, zeroCopyMetrics::onNonZeroCopyMessage, zeroCopyMetrics::onReleasedMessage); + zeroCopyMetrics.addUnreleased("client_protocol", zeroCopyRequestMarshaller::getUnclosedCount); } RaftPeerId getId() { return idSupplier.get(); } + ServerServiceDefinition bindServiceWithZeroCopy() { + ServerServiceDefinition orig = super.bindService(); + if (!zeroCopyEnabled) { + LOG.info("{}: Zero copy is disabled.", getId()); + return orig; + } + ServerServiceDefinition.Builder builder = ServerServiceDefinition.builder(orig.getServiceDescriptor().getName()); + + addMethodWithCustomMarshaller(orig, builder, getOrderedMethod(), zeroCopyRequestMarshaller); + addMethodWithCustomMarshaller(orig, builder, getUnorderedMethod(), zeroCopyRequestMarshaller); + + return builder.build(); + } + @Override public StreamObserver ordered(StreamObserver responseObserver) { final OrderedRequestStreamObserver so = new OrderedRequestStreamObserver(responseObserver); @@ -220,31 +256,38 @@ boolean isClosed() { return isClosed.get(); } - CompletableFuture processClientRequest(RaftClientRequest request, Consumer replyHandler) { - try { - final String errMsg = LOG.isDebugEnabled() ? "processClientRequest for " + request : ""; - return protocol.submitClientRequestAsync(request - ).thenAcceptAsync(replyHandler, executor - ).exceptionally(exception -> { - // TODO: the exception may be from either raft or state machine. - // Currently we skip all the following responses when getting an - // exception from the state machine. - responseError(exception, () -> errMsg); - return null; - }); - } catch (IOException e) { - throw new CompletionException("Failed processClientRequest for " + request + " in " + name, e); - } + CompletableFuture processClientRequest(ReferenceCountedObject requestRef, + Consumer replyHandler) { + final String errMsg = LOG.isDebugEnabled() ? "processClientRequest for " + requestRef.get() : ""; + return protocol.submitClientRequestAsync(requestRef + ).thenAcceptAsync(replyHandler, executor + ).exceptionally(exception -> { + // TODO: the exception may be from either raft or state machine. + // Currently we skip all the following responses when getting an + // exception from the state machine. + responseError(exception, () -> errMsg); + return null; + }); } - abstract void processClientRequest(RaftClientRequest request); + abstract void processClientRequest(ReferenceCountedObject requestRef); @Override public void onNext(RaftClientRequestProto request) { + ReferenceCountedObject requestRef = null; try { final RaftClientRequest r = ClientProtoUtils.toRaftClientRequest(request); - processClientRequest(r); + requestRef = ReferenceCountedObject.wrap(r, () -> {}, released -> { + if (released) { + zeroCopyRequestMarshaller.release(request); + } + }); + + processClientRequest(requestRef); } catch (Exception e) { + if (requestRef == null) { + zeroCopyRequestMarshaller.release(request); + } responseError(e, () -> "onNext for " + ClientProtoUtils.toString(request) + " in " + name); } } @@ -278,15 +321,21 @@ private class UnorderedRequestStreamObserver extends RequestStreamObserver { } @Override - void processClientRequest(RaftClientRequest request) { - final CompletableFuture f = processClientRequest(request, reply -> { - if (!reply.isSuccess()) { - LOG.info("Failed " + request + ", reply=" + reply); - } - final RaftClientReplyProto proto = ClientProtoUtils.toRaftClientReplyProto(reply); - responseNext(proto); - }); - final long callId = request.getCallId(); + void processClientRequest(ReferenceCountedObject requestRef) { + final long callId = requestRef.retain().getCallId(); + final CompletableFuture f; + try { + f = processClientRequest(requestRef, reply -> { + if (!reply.isSuccess()) { + LOG.info("Failed request cid={}, reply={}", callId, reply); + } + final RaftClientReplyProto proto = ClientProtoUtils.toRaftClientReplyProto(reply); + responseNext(proto); + }); + } finally { + requestRef.release(); + } + put(callId, f); f.thenAccept(dummy -> remove(callId)); } @@ -329,31 +378,40 @@ RaftGroupId getGroupId() { void processClientRequest(PendingOrderedRequest pending) { final long seq = pending.getSeqNum(); - processClientRequest(pending.getRequest(), + processClientRequest(pending.getRequestRef(), reply -> slidingWindow.receiveReply(seq, reply, this::sendReply)); } @Override - void processClientRequest(RaftClientRequest r) { - if (isClosed()) { - final AlreadyClosedException exception = new AlreadyClosedException(getName() + ": the stream is closed"); - responseError(exception, () -> "processClientRequest (stream already closed) for " + r); - } + void processClientRequest(ReferenceCountedObject requestRef) { + final RaftClientRequest request = requestRef.retain(); + try { + if (isClosed()) { + final AlreadyClosedException exception = new AlreadyClosedException(getName() + ": the stream is closed"); + responseError(exception, () -> "processClientRequest (stream already closed) for " + request); + } - final RaftGroupId requestGroupId = r.getRaftGroupId(); - // use the group id in the first request as the group id of this observer - final RaftGroupId updated = groupId.updateAndGet(g -> g != null ? g: requestGroupId); - final PendingOrderedRequest pending = new PendingOrderedRequest(r); - - if (!requestGroupId.equals(updated)) { - final GroupMismatchException exception = new GroupMismatchException(getId() - + ": The group (" + requestGroupId + ") of " + r.getClientId() - + " does not match the group (" + updated + ") of the " + JavaUtils.getClassSimpleName(getClass())); - responseError(exception, () -> "processClientRequest (Group mismatched) for " + r); - return; - } + final RaftGroupId requestGroupId = request.getRaftGroupId(); + // use the group id in the first request as the group id of this observer + final RaftGroupId updated = groupId.updateAndGet(g -> g != null ? g : requestGroupId); - slidingWindow.receivedRequest(pending, this::processClientRequest); + if (!requestGroupId.equals(updated)) { + final GroupMismatchException exception = new GroupMismatchException(getId() + + ": The group (" + requestGroupId + ") of " + request.getClientId() + + " does not match the group (" + updated + ") of the " + JavaUtils.getClassSimpleName(getClass())); + responseError(exception, () -> "processClientRequest (Group mismatched) for " + request); + return; + } + final PendingOrderedRequest pending = new PendingOrderedRequest(requestRef); + try { + slidingWindow.receivedRequest(pending, this::processClientRequest); + } catch (Exception e) { + pending.release(); + throw e; + } + } finally { + requestRef.release(); + } } private void sendReply(PendingOrderedRequest ready) { diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java index ec44d8c485..c016dabfe4 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcLogAppender.java @@ -51,11 +51,17 @@ import java.io.IOException; import java.io.InterruptedIOException; -import java.util.*; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Queue; +import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; /** @@ -66,6 +72,7 @@ public class GrpcLogAppender extends LogAppenderBase { private enum BatchLogKey implements BatchLogger.Key { RESET_CLIENT, + INCONSISTENCY_REPLY, APPEND_LOG_RESPONSE_HANDLER_ON_ERROR } @@ -156,7 +163,7 @@ synchronized int process(Event event) { private final TimeDuration logMessageBatchDuration; private final int maxOutstandingInstallSnapshots; private final TimeoutExecutor scheduler = TimeoutExecutor.getInstance(); - + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile StreamObservers appendLogRequestObserver; private final boolean useSeparateHBChannel; @@ -170,7 +177,7 @@ synchronized int process(Event event) { public GrpcLogAppender(RaftServer.Division server, LeaderState leaderState, FollowerInfo f) { super(server, leaderState, f); - Preconditions.assertNotNull(getServerRpc(), "getServerRpc()"); + Objects.requireNonNull(getServerRpc(), "getServerRpc() == null"); final RaftProperties properties = server.getRaftServer().getProperties(); this.maxPendingRequestsNum = GrpcConfigKeys.Server.leaderOutstandingAppendsMax(properties); @@ -192,8 +199,8 @@ public GrpcLogAppender(RaftServer.Division server, LeaderState leaderState, Foll } @Override - public GrpcService getServerRpc() { - return (GrpcService)super.getServerRpc(); + public GrpcServicesImpl getServerRpc() { + return (GrpcServicesImpl)super.getServerRpc(); } private GrpcServerProtocolClient getClient() throws IOException { @@ -217,7 +224,7 @@ private void resetClient(AppendEntriesRequest request, Event event) { .orElseGet(f::getMatchIndex); if (event.isError() && request == null) { final long followerNextIndex = f.getNextIndex(); - BatchLogger.warn(BatchLogKey.RESET_CLIENT, f.getId() + "-" + followerNextIndex, suffix -> + BatchLogger.print(BatchLogKey.RESET_CLIENT, f.getId() + "-" + followerNextIndex, suffix -> LOG.warn("{}: Follower failed (request=null, errorCount={}); keep nextIndex ({}) unchanged and retry.{}", this, errorCount, followerNextIndex, suffix), logMessageBatchDuration); return; @@ -302,8 +309,14 @@ private long errorWaitTimeMs() { @Override public CompletableFuture stopAsync() { - grpcServerMetrics.unregister(); - return super.stopAsync(); + try (AutoCloseableLock ignored = lock.writeLock(caller, LOG::trace)) { + if (appendLogRequestObserver != null) { + appendLogRequestObserver.stop(); + appendLogRequestObserver = null; + } + grpcServerMetrics.unregister(); + return super.stopAsync(); + } } @Override @@ -379,30 +392,42 @@ public Comparator getCallIdComparator() { } private void appendLog(boolean heartbeat) throws IOException { - final AppendEntriesRequestProto pending; + final ReferenceCountedObject pending; final AppendEntriesRequest request; try (AutoCloseableLock writeLock = lock.writeLock(caller, LOG::trace)) { + if (!isRunning()) { + return; + } // Prepare and send the append request. // Note changes on follower's nextIndex and ops on pendingRequests should always be done under the write-lock - pending = newAppendEntriesRequest(callId.getAndIncrement(), heartbeat); + pending = nextAppendEntriesRequest(callId.getAndIncrement(), heartbeat); if (pending == null) { return; } - request = new AppendEntriesRequest(pending, getFollowerId(), grpcServerMetrics); - pendingRequests.put(request); - increaseNextIndex(pending); - if (appendLogRequestObserver == null) { - appendLogRequestObserver = new StreamObservers( - getClient(), new AppendLogResponseHandler(), useSeparateHBChannel, getWaitTimeMin()); + try { + request = new AppendEntriesRequest(pending.get(), getFollowerId(), grpcServerMetrics); + pendingRequests.put(request); + increaseNextIndex(pending.get()); + if (appendLogRequestObserver == null) { + appendLogRequestObserver = new StreamObservers( + getClient(), new AppendLogResponseHandler(), useSeparateHBChannel, getWaitTimeMin()); + } + } catch (Exception e) { + pending.release(); + throw e; } } - final TimeDuration remaining = getRemainingWaitTime(); - if (remaining.isPositive()) { - sleep(remaining, heartbeat); - } - if (isRunning()) { - sendRequest(request, pending); + try { + final TimeDuration remaining = getRemainingWaitTime(); + if (remaining.isPositive()) { + sleep(remaining, heartbeat); + } + if (isRunning()) { + sendRequest(request, pending.get()); + } + } finally { + pending.release(); } } @@ -419,7 +444,7 @@ private static void sleep(TimeDuration waitTime, boolean heartbeat) private void sendRequest(AppendEntriesRequest request, AppendEntriesRequestProto proto) throws InterruptedIOException { - CodeInjectionForTesting.execute(GrpcService.GRPC_SEND_SERVER_REQUEST, + CodeInjectionForTesting.execute(GrpcServicesImpl.GRPC_SEND_SERVER_REQUEST, getServer().getId(), null, proto); resetHeartbeatTrigger(); @@ -435,7 +460,7 @@ private void sendRequest(AppendEntriesRequest request, } private void timeoutAppendRequest(long cid, boolean heartbeat) { - final AppendEntriesRequest pending = pendingRequests.handleTimeout(cid, heartbeat); + final AppendEntriesRequest pending = pendingRequests.remove(cid, heartbeat); if (pending != null) { final int errorCount = replyState.process(Event.TIMEOUT); LOG.warn("{}: Timed out {}appendEntries, errorCount={}, request={}", @@ -516,8 +541,9 @@ private void onNextImpl(AppendEntriesRequest request, AppendEntriesReplyProto re break; case INCONSISTENCY: grpcServerMetrics.onRequestInconsistency(getFollowerId().toString()); - LOG.warn("{}: received {} reply with nextIndex {}, errorCount={}, request={}", - this, reply.getResult(), reply.getNextIndex(), errorCount, request); + BatchLogger.print(BatchLogKey.INCONSISTENCY_REPLY, getFollower().getName() + "_" + reply.getNextIndex(), + suffix -> LOG.warn("{}: received {} reply with nextIndex {}, errorCount={}, request={} {}", + this, reply.getResult(), reply.getNextIndex(), errorCount, request, suffix)); final long requestFirstIndex = request != null? request.getFirstIndex(): RaftLog.INVALID_LOG_INDEX; updateNextIndex(getNextIndexForInconsistency(requestFirstIndex, reply.getNextIndex())); break; @@ -537,7 +563,7 @@ public void onError(Throwable t) { LOG.info("{} is already stopped", GrpcLogAppender.this); return; } - BatchLogger.warn(BatchLogKey.APPEND_LOG_RESPONSE_HANDLER_ON_ERROR, AppendLogResponseHandler.this.name, + BatchLogger.print(BatchLogKey.APPEND_LOG_RESPONSE_HANDLER_ON_ERROR, AppendLogResponseHandler.this.name, suffix -> GrpcUtil.warn(LOG, () -> this + ": Failed appendEntries" + suffix, t), logMessageBatchDuration, t instanceof StatusRuntimeException); grpcServerMetrics.onRequestRetry(); // Update try counter @@ -567,7 +593,7 @@ private void updateNextIndex(long replyNextIndex) { private class InstallSnapshotResponseHandler implements StreamObserver { private final String name = getFollower().getName() + "-" + JavaUtils.getClassSimpleName(getClass()); private final Queue pending; - private final AtomicBoolean done = new AtomicBoolean(false); + private final CompletableFuture done = new CompletableFuture<>(); private final boolean isNotificationOnly; InstallSnapshotResponseHandler() { @@ -628,12 +654,18 @@ void notifyInstallSnapshotFinished(InstallSnapshotResult result, long snapshotIn getServer().getStateMachine().event().notifySnapshotInstalled(result, snapshotIndex, getFollower().getPeer()); } - boolean isDone() { - return done.get(); + void waitForResponse() { + try { + done.get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } catch (ExecutionException e) { + throw new IllegalStateException("Failed to complete " + name, e); + } } void close() { - done.set(true); + done.complete(null); notifyLogAppender(); } @@ -702,6 +734,8 @@ public void onNext(InstallSnapshotReplyProto reply) { LOG.error("Unrecognized the reply result {}: Leader is {}, follower is {}", reply.getResult(), getServer().getId(), getFollowerId()); break; + case SNAPSHOT_EXPIRED: + LOG.warn("{}: Follower could not install snapshot as it is expired.", this); default: break; } @@ -767,14 +801,7 @@ private void installSnapshot(SnapshotInfo snapshot) { } return; } - - while (isRunning() && !responseHandler.isDone()) { - try { - getEventAwaitForSignal().await(); - } catch (InterruptedException ignored) { - Thread.currentThread().interrupt(); - } - } + responseHandler.waitForResponse(); if (responseHandler.hasAllResponse()) { getFollower().setSnapshotIndex(snapshot.getTermIndex().getIndex()); @@ -812,14 +839,7 @@ private void notifyInstallSnapshot(TermIndex firstAvailable) { } return; } - - while (isRunning() && !responseHandler.isDone()) { - try { - getEventAwaitForSignal().await(); - } catch (InterruptedException ignored) { - Thread.currentThread().interrupt(); - } - } + responseHandler.waitForResponse(); } /** @@ -863,6 +883,7 @@ private TermIndex shouldNotifyToInstallSnapshot() { static class AppendEntriesRequest { private final Timekeeper timer; + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile Timekeeper.Context timerContext; private final long callId; @@ -871,7 +892,7 @@ static class AppendEntriesRequest { private final TermIndex firstEntry; private final TermIndex lastEntry; - + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile Timestamp sendTime; AppendEntriesRequest(AppendEntriesRequestProto proto, RaftPeerId followerId, GrpcServerMetrics grpcServerMetrics) { @@ -954,9 +975,5 @@ AppendEntriesRequest remove(AppendEntriesReplyProto reply) { AppendEntriesRequest remove(long cid, boolean isHeartbeat) { return isHeartbeat ? heartbeats.remove(cid): logRequests.remove(cid); } - - public AppendEntriesRequest handleTimeout(long callId, boolean heartbeat) { - return heartbeat ? heartbeats.remove(callId) : logRequests.get(callId); - } } } diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolClient.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolClient.java index 970134d02a..4a280ab335 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolClient.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolClient.java @@ -78,6 +78,8 @@ private ManagedChannel buildChannel(RaftPeer target, int flowControlWindow, GrpcTlsConfig tlsConfig) { NettyChannelBuilder channelBuilder = NettyChannelBuilder.forTarget(target.getAddress()); + // ignore any http proxy for grpc + channelBuilder.proxyDetector(uri -> null); if (tlsConfig!= null) { SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient(); diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolService.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolService.java index 766e14321a..7e17cb3cf4 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolService.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServerProtocolService.java @@ -20,16 +20,21 @@ import java.util.function.Consumer; import java.util.function.Function; import org.apache.ratis.grpc.GrpcUtil; +import org.apache.ratis.grpc.metrics.ZeroCopyMetrics; +import org.apache.ratis.grpc.util.ZeroCopyMessageMarshaller; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.protocol.RaftServerProtocol; import org.apache.ratis.server.util.ServerStringUtils; +import org.apache.ratis.thirdparty.io.grpc.ServerServiceDefinition; import org.apache.ratis.thirdparty.io.grpc.Status; -import org.apache.ratis.thirdparty.io.grpc.StatusRuntimeException; import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver; import org.apache.ratis.proto.RaftProtos.*; import org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc.RaftServerProtocolServiceImplBase; +import org.apache.ratis.util.BatchLogger; +import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.ProtoUtils; +import org.apache.ratis.util.ReferenceCountedObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,28 +45,45 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; +import static org.apache.ratis.grpc.GrpcUtil.addMethodWithCustomMarshaller; +import static org.apache.ratis.proto.grpc.RaftServerProtocolServiceGrpc.getAppendEntriesMethod; + class GrpcServerProtocolService extends RaftServerProtocolServiceImplBase { public static final Logger LOG = LoggerFactory.getLogger(GrpcServerProtocolService.class); + private enum BatchLogKey implements BatchLogger.Key { + COMPLETED_REQUEST, + COMPLETED_REPLY + } + static class PendingServerRequest { - private final REQUEST request; + private final AtomicReference> requestRef; private final CompletableFuture future = new CompletableFuture<>(); - PendingServerRequest(REQUEST request) { - this.request = request; + PendingServerRequest(ReferenceCountedObject requestRef) { + requestRef.retain(); + this.requestRef = new AtomicReference<>(requestRef); } REQUEST getRequest() { - return request; + return Optional.ofNullable(requestRef.get()) + .map(ReferenceCountedObject::get) + .orElse(null); } CompletableFuture getFuture() { return future; } + + void release() { + Optional.ofNullable(requestRef.getAndSet(null)) + .ifPresent(ReferenceCountedObject::release); + } } abstract class ServerRequestStreamObserver implements StreamObserver { private final RaftServer.Op op; + private final Supplier nameSupplier; private final StreamObserver responseObserver; /** For ordered {@link #onNext(Object)} requests. */ private final AtomicReference> previousOnNext = new AtomicReference<>(); @@ -72,9 +94,14 @@ abstract class ServerRequestStreamObserver implements StreamObse ServerRequestStreamObserver(RaftServer.Op op, StreamObserver responseObserver) { this.op = op; + this.nameSupplier = MemoizedSupplier.valueOf(() -> getId() + "_" + op); this.responseObserver = responseObserver; } + String getName() { + return nameSupplier.get(); + } + private String getPreviousRequestString() { return Optional.ofNullable(previousOnNext.get()) .map(PendingServerRequest::getRequest) @@ -82,24 +109,39 @@ private String getPreviousRequestString() { .orElse(null); } - abstract CompletableFuture process(REQUEST request) throws IOException; + CompletableFuture process(REQUEST request) throws IOException { + throw new UnsupportedOperationException("This method is not supported."); + } + + CompletableFuture process(ReferenceCountedObject requestRef) + throws IOException { + try { + return process(requestRef.retain()); + } finally { + requestRef.release(); + } + } + + void release(REQUEST req) { + } abstract long getCallId(REQUEST request); + boolean isHeartbeat(REQUEST request) { + return false; + } + abstract String requestToString(REQUEST request); abstract String replyToString(REPLY reply); abstract boolean replyInOrder(REQUEST request); - StatusRuntimeException wrapException(Throwable e, REQUEST request) { - return GrpcUtil.wrapException(e, getCallId(request)); - } - - private void handleError(Throwable e, REQUEST request) { - GrpcUtil.warn(LOG, () -> getId() + ": Failed " + op + " request " + requestToString(request), e); + private synchronized void handleError(Throwable e, long callId, boolean isHeartbeat) { + GrpcUtil.warn(LOG, () -> getId() + ": Failed " + op + " request cid=" + callId + ", isHeartbeat? " + + isHeartbeat, e); if (isClosed.compareAndSet(false, true)) { - responseObserver.onError(wrapException(e, request)); + responseObserver.onError(GrpcUtil.wrapException(e, callId, isHeartbeat)); } } @@ -119,24 +161,32 @@ void composeRequest(CompletableFuture current) { @Override public void onNext(REQUEST request) { + ReferenceCountedObject requestRef = ReferenceCountedObject.wrap(request, () -> {}, released -> { + if (released) { + release(request); + } + }); + if (!replyInOrder(request)) { try { - composeRequest(process(request).thenApply(this::handleReply)); + composeRequest(process(requestRef).thenApply(this::handleReply)); } catch (Exception e) { - handleError(e, request); + handleError(e, getCallId(request), isHeartbeat(request)); + release(request); } return; } - final PendingServerRequest current = new PendingServerRequest<>(request); - final PendingServerRequest previous = previousOnNext.getAndSet(current); - final CompletableFuture previousFuture = Optional.ofNullable(previous) - .map(PendingServerRequest::getFuture) + final PendingServerRequest current = new PendingServerRequest<>(requestRef); + final long callId = getCallId(current.getRequest()); + final boolean isHeartbeat = isHeartbeat(current.getRequest()); + final Optional> previous = Optional.ofNullable(previousOnNext.getAndSet(current)); + final CompletableFuture previousFuture = previous.map(PendingServerRequest::getFuture) .orElse(CompletableFuture.completedFuture(null)); try { - final CompletableFuture f = process(request).exceptionally(e -> { + final CompletableFuture f = process(requestRef).exceptionally(e -> { // Handle cases, such as RaftServer is paused - handleError(e, request); + handleError(e, callId, isHeartbeat); current.getFuture().completeExceptionally(e); return null; }).thenCombine(previousFuture, (reply, v) -> { @@ -146,21 +196,32 @@ public void onNext(REQUEST request) { }); composeRequest(f); } catch (Exception e) { - handleError(e, request); + handleError(e, callId, isHeartbeat); current.getFuture().completeExceptionally(e); + } finally { + previous.ifPresent(PendingServerRequest::release); + if (isClosed.get()) { + // Some requests may come after onCompleted or onError, ensure they're released. + releaseLast(); + } } } @Override public void onCompleted() { if (isClosed.compareAndSet(false, true)) { - LOG.info("{}: Completed {}, lastRequest: {}", getId(), op, getPreviousRequestString()); + BatchLogger.print(BatchLogKey.COMPLETED_REQUEST, getName(), + suffix -> LOG.info("{}: Completed {}, lastRequest: {} {}", + getId(), op, getPreviousRequestString(), suffix)); requestFuture.get().thenAccept(reply -> { - LOG.info("{}: Completed {}, lastReply: {}", getId(), op, reply); + BatchLogger.print(BatchLogKey.COMPLETED_REPLY, getName(), + suffix -> LOG.info("{}: Completed {}, lastReply: {} {}", getId(), op, reply, suffix)); responseObserver.onCompleted(); }); + releaseLast(); } } + @Override public void onError(Throwable t) { GrpcUtil.warn(LOG, () -> getId() + ": "+ op + " onError, lastRequest: " + getPreviousRequestString(), t); @@ -169,22 +230,54 @@ public void onError(Throwable t) { if (status != null && status.getCode() != Status.Code.CANCELLED) { responseObserver.onCompleted(); } + releaseLast(); } } + + private void releaseLast() { + Optional.ofNullable(previousOnNext.get()).ifPresent(PendingServerRequest::release); + } } private final Supplier idSupplier; private final RaftServer server; + private final boolean zeroCopyEnabled; + private final ZeroCopyMessageMarshaller zeroCopyRequestMarshaller; - GrpcServerProtocolService(Supplier idSupplier, RaftServer server) { + GrpcServerProtocolService(Supplier idSupplier, RaftServer server, boolean zeroCopyEnabled, + ZeroCopyMetrics zeroCopyMetrics) { this.idSupplier = idSupplier; this.server = server; + this.zeroCopyEnabled = zeroCopyEnabled; + this.zeroCopyRequestMarshaller = new ZeroCopyMessageMarshaller<>(AppendEntriesRequestProto.getDefaultInstance(), + zeroCopyMetrics::onZeroCopyMessage, zeroCopyMetrics::onNonZeroCopyMessage, zeroCopyMetrics::onReleasedMessage); + zeroCopyMetrics.addUnreleased("server_protocol", zeroCopyRequestMarshaller::getUnclosedCount); } RaftPeerId getId() { return idSupplier.get(); } + ServerServiceDefinition bindServiceWithZeroCopy() { + ServerServiceDefinition orig = super.bindService(); + if (!zeroCopyEnabled) { + LOG.info("{}: Zero copy is disabled.", getId()); + return orig; + } + ServerServiceDefinition.Builder builder = ServerServiceDefinition.builder(orig.getServiceDescriptor().getName()); + + // Add appendEntries with zero copy marshaller. + addMethodWithCustomMarshaller(orig, builder, getAppendEntriesMethod(), zeroCopyRequestMarshaller); + // Add remaining methods as is. + orig.getMethods().stream().filter( + x -> !x.getMethodDescriptor().getFullMethodName().equals(getAppendEntriesMethod().getFullMethodName()) + ).forEach( + builder::addMethod + ); + + return builder.build(); + } + @Override public void requestVote(RequestVoteRequestProto request, StreamObserver responseObserver) { @@ -225,8 +318,14 @@ public StreamObserver appendEntries( return new ServerRequestStreamObserver( RaftServerProtocol.Op.APPEND_ENTRIES, responseObserver) { @Override - CompletableFuture process(AppendEntriesRequestProto request) throws IOException { - return server.appendEntriesAsync(request); + CompletableFuture process(ReferenceCountedObject requestRef) + throws IOException { + return server.appendEntriesAsync(requestRef); + } + + @Override + void release(AppendEntriesRequestProto req) { + zeroCopyRequestMarshaller.release(req); } @Override @@ -234,9 +333,14 @@ long getCallId(AppendEntriesRequestProto request) { return request.getServerRequest().getCallId(); } + @Override + boolean isHeartbeat(AppendEntriesRequestProto request) { + return request.getEntriesCount() == 0; + } + @Override String requestToString(AppendEntriesRequestProto request) { - return ServerStringUtils.toAppendEntriesRequestString(request); + return ServerStringUtils.toAppendEntriesRequestString(request, null); } @Override @@ -248,11 +352,6 @@ String replyToString(AppendEntriesReplyProto reply) { boolean replyInOrder(AppendEntriesRequestProto request) { return request.getEntriesCount() != 0; } - - @Override - StatusRuntimeException wrapException(Throwable e, AppendEntriesRequestProto request) { - return GrpcUtil.wrapException(e, getCallId(request), request.getEntriesCount() == 0); - } }; } diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcService.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcService.java deleted file mode 100644 index 097900a0fb..0000000000 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcService.java +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.ratis.grpc.server; - -import org.apache.ratis.conf.RaftProperties; -import org.apache.ratis.grpc.GrpcConfigKeys; -import org.apache.ratis.grpc.GrpcTlsConfig; -import org.apache.ratis.grpc.GrpcUtil; -import org.apache.ratis.grpc.metrics.intercept.server.MetricServerInterceptor; -import org.apache.ratis.protocol.RaftGroupId; -import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.rpc.SupportedRpcType; -import org.apache.ratis.server.RaftServer; -import org.apache.ratis.server.RaftServerConfigKeys; -import org.apache.ratis.server.RaftServerRpcWithProxy; -import org.apache.ratis.server.protocol.RaftServerAsynchronousProtocol; -import org.apache.ratis.thirdparty.io.grpc.ServerInterceptors; -import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts; -import org.apache.ratis.thirdparty.io.grpc.netty.NettyServerBuilder; -import org.apache.ratis.thirdparty.io.grpc.Server; -import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver; -import org.apache.ratis.thirdparty.io.netty.channel.ChannelOption; -import org.apache.ratis.thirdparty.io.netty.handler.ssl.ClientAuth; -import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContextBuilder; - -import org.apache.ratis.proto.RaftProtos.*; -import org.apache.ratis.util.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.InetSocketAddress; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.function.Supplier; - -import static org.apache.ratis.thirdparty.io.netty.handler.ssl.SslProvider.OPENSSL; - -/** A grpc implementation of {@link org.apache.ratis.server.RaftServerRpc}. */ -public final class GrpcService extends RaftServerRpcWithProxy> { - static final Logger LOG = LoggerFactory.getLogger(GrpcService.class); - public static final String GRPC_SEND_SERVER_REQUEST = - JavaUtils.getClassSimpleName(GrpcService.class) + ".sendRequest"; - - class AsyncService implements RaftServerAsynchronousProtocol { - - @Override - public CompletableFuture appendEntriesAsync(AppendEntriesRequestProto request) - throws IOException { - throw new UnsupportedOperationException("This method is not supported"); - } - - @Override - public CompletableFuture readIndexAsync(ReadIndexRequestProto request) throws IOException { - CodeInjectionForTesting.execute(GRPC_SEND_SERVER_REQUEST, getId(), null, request); - - final CompletableFuture f = new CompletableFuture<>(); - final StreamObserver s = new StreamObserver() { - @Override - public void onNext(ReadIndexReplyProto reply) { - f.complete(reply); - } - - @Override - public void onError(Throwable throwable) { - f.completeExceptionally(throwable); - } - - @Override - public void onCompleted() { - } - }; - - final RaftPeerId target = RaftPeerId.valueOf(request.getServerRequest().getReplyId()); - getProxies().getProxy(target).readIndex(request, s); - return f; - } - } - - public static final class Builder { - private RaftServer server; - private GrpcTlsConfig tlsConfig; - private GrpcTlsConfig adminTlsConfig; - private GrpcTlsConfig clientTlsConfig; - private GrpcTlsConfig serverTlsConfig; - - private Builder() {} - - public Builder setServer(RaftServer raftServer) { - this.server = raftServer; - return this; - } - - public GrpcService build() { - return new GrpcService(server, adminTlsConfig, clientTlsConfig, serverTlsConfig); - } - - public Builder setTlsConfig(GrpcTlsConfig tlsConfig) { - this.tlsConfig = tlsConfig; - return this; - } - - public Builder setAdminTlsConfig(GrpcTlsConfig config) { - this.adminTlsConfig = config; - return this; - } - - public Builder setClientTlsConfig(GrpcTlsConfig config) { - this.clientTlsConfig = config; - return this; - } - - public Builder setServerTlsConfig(GrpcTlsConfig config) { - this.serverTlsConfig = config; - return this; - } - - public GrpcTlsConfig getTlsConfig() { - return tlsConfig; - } - } - - public static Builder newBuilder() { - return new Builder(); - } - - private final Map servers = new HashMap<>(); - private final Supplier addressSupplier; - private final Supplier clientServerAddressSupplier; - private final Supplier adminServerAddressSupplier; - - private final AsyncService asyncService = new AsyncService(); - - private final ExecutorService executor; - private final GrpcClientProtocolService clientProtocolService; - - private final MetricServerInterceptor serverInterceptor; - - public MetricServerInterceptor getServerInterceptor() { - return serverInterceptor; - } - - private GrpcService(RaftServer server, - GrpcTlsConfig adminTlsConfig, GrpcTlsConfig clientTlsConfig, GrpcTlsConfig serverTlsConfig) { - this(server, server::getId, - GrpcConfigKeys.Admin.host(server.getProperties()), - GrpcConfigKeys.Admin.port(server.getProperties()), - adminTlsConfig, - GrpcConfigKeys.Client.host(server.getProperties()), - GrpcConfigKeys.Client.port(server.getProperties()), - clientTlsConfig, - GrpcConfigKeys.Server.host(server.getProperties()), - GrpcConfigKeys.Server.port(server.getProperties()), - serverTlsConfig, - GrpcConfigKeys.messageSizeMax(server.getProperties(), LOG::info), - RaftServerConfigKeys.Log.Appender.bufferByteLimit(server.getProperties()), - GrpcConfigKeys.flowControlWindow(server.getProperties(), LOG::info), - RaftServerConfigKeys.Rpc.requestTimeout(server.getProperties()), - GrpcConfigKeys.Server.heartbeatChannel(server.getProperties())); - } - - @SuppressWarnings("checkstyle:ParameterNumber") // private constructor - private GrpcService(RaftServer raftServer, Supplier idSupplier, - String adminHost, int adminPort, GrpcTlsConfig adminTlsConfig, - String clientHost, int clientPort, GrpcTlsConfig clientTlsConfig, - String serverHost, int serverPort, GrpcTlsConfig serverTlsConfig, - SizeInBytes grpcMessageSizeMax, SizeInBytes appenderBufferSize, - SizeInBytes flowControlWindow,TimeDuration requestTimeoutDuration, - boolean useSeparateHBChannel) { - super(idSupplier, id -> new PeerProxyMap<>(id.toString(), - p -> new GrpcServerProtocolClient(p, flowControlWindow.getSizeInt(), - requestTimeoutDuration, serverTlsConfig, useSeparateHBChannel))); - if (appenderBufferSize.getSize() > grpcMessageSizeMax.getSize()) { - throw new IllegalArgumentException("Illegal configuration: " - + RaftServerConfigKeys.Log.Appender.BUFFER_BYTE_LIMIT_KEY + " = " + appenderBufferSize - + " > " + GrpcConfigKeys.MESSAGE_SIZE_MAX_KEY + " = " + grpcMessageSizeMax); - } - - final RaftProperties properties = raftServer.getProperties(); - this.executor = ConcurrentUtils.newThreadPoolWithMax( - GrpcConfigKeys.Server.asyncRequestThreadPoolCached(properties), - GrpcConfigKeys.Server.asyncRequestThreadPoolSize(properties), - getId() + "-request-"); - this.clientProtocolService = new GrpcClientProtocolService(idSupplier, raftServer, executor); - - this.serverInterceptor = new MetricServerInterceptor( - idSupplier, - JavaUtils.getClassSimpleName(getClass()) + "_" + serverPort - ); - - final boolean separateAdminServer = adminPort != serverPort && adminPort > 0; - final boolean separateClientServer = clientPort != serverPort && clientPort > 0; - - final NettyServerBuilder serverBuilder = - startBuildingNettyServer(serverHost, serverPort, serverTlsConfig, grpcMessageSizeMax, flowControlWindow); - serverBuilder.addService(ServerInterceptors.intercept( - new GrpcServerProtocolService(idSupplier, raftServer), serverInterceptor)); - if (!separateAdminServer) { - addAdminService(raftServer, serverBuilder); - } - if (!separateClientServer) { - addClientService(serverBuilder); - } - - final Server server = serverBuilder.build(); - servers.put(GrpcServerProtocolService.class.getSimpleName(), server); - addressSupplier = newAddressSupplier(serverPort, server); - - if (separateAdminServer) { - final NettyServerBuilder builder = - startBuildingNettyServer(adminHost, adminPort, adminTlsConfig, grpcMessageSizeMax, flowControlWindow); - addAdminService(raftServer, builder); - final Server adminServer = builder.build(); - servers.put(GrpcAdminProtocolService.class.getName(), adminServer); - adminServerAddressSupplier = newAddressSupplier(adminPort, adminServer); - } else { - adminServerAddressSupplier = addressSupplier; - } - - if (separateClientServer) { - final NettyServerBuilder builder = - startBuildingNettyServer(clientHost, clientPort, clientTlsConfig, grpcMessageSizeMax, flowControlWindow); - addClientService(builder); - final Server clientServer = builder.build(); - servers.put(GrpcClientProtocolService.class.getName(), clientServer); - clientServerAddressSupplier = newAddressSupplier(clientPort, clientServer); - } else { - clientServerAddressSupplier = addressSupplier; - } - } - - private MemoizedSupplier newAddressSupplier(int port, Server server) { - return JavaUtils.memoize(() -> new InetSocketAddress(port != 0 ? port : server.getPort())); - } - - private void addClientService(NettyServerBuilder builder) { - builder.addService(ServerInterceptors.intercept(clientProtocolService, serverInterceptor)); - } - - private void addAdminService(RaftServer raftServer, NettyServerBuilder nettyServerBuilder) { - nettyServerBuilder.addService(ServerInterceptors.intercept( - new GrpcAdminProtocolService(raftServer), - serverInterceptor)); - } - - private static NettyServerBuilder startBuildingNettyServer(String hostname, int port, GrpcTlsConfig tlsConfig, - SizeInBytes grpcMessageSizeMax, SizeInBytes flowControlWindow) { - InetSocketAddress address = hostname == null || hostname.isEmpty() ? - new InetSocketAddress(port) : new InetSocketAddress(hostname, port); - NettyServerBuilder nettyServerBuilder = NettyServerBuilder.forAddress(address) - .withChildOption(ChannelOption.SO_REUSEADDR, true) - .maxInboundMessageSize(grpcMessageSizeMax.getSizeInt()) - .flowControlWindow(flowControlWindow.getSizeInt()); - - if (tlsConfig != null) { - SslContextBuilder sslContextBuilder = GrpcUtil.initSslContextBuilderForServer(tlsConfig.getKeyManager()); - if (tlsConfig.getMtlsEnabled()) { - sslContextBuilder.clientAuth(ClientAuth.REQUIRE); - GrpcUtil.setTrustManager(sslContextBuilder, tlsConfig.getTrustManager()); - } - sslContextBuilder = GrpcSslContexts.configure(sslContextBuilder, OPENSSL); - try { - nettyServerBuilder.sslContext(sslContextBuilder.build()); - } catch (Exception ex) { - throw new IllegalArgumentException("Failed to build SslContext, tlsConfig=" + tlsConfig, ex); - } - } - return nettyServerBuilder; - } - - @Override - public SupportedRpcType getRpcType() { - return SupportedRpcType.GRPC; - } - - @Override - public void startImpl() { - for (Server server : servers.values()) { - try { - server.start(); - } catch (IOException e) { - ExitUtils.terminate(1, "Failed to start Grpc server", e, LOG); - } - LOG.info("{}: {} started, listening on {}", - getId(), JavaUtils.getClassSimpleName(getClass()), server.getPort()); - } - } - - @Override - public void closeImpl() throws IOException { - for (Map.Entry server : servers.entrySet()) { - final String name = getId() + ": shutdown server " + server.getKey(); - LOG.info("{} now", name); - final Server s = server.getValue().shutdownNow(); - super.closeImpl(); - try { - s.awaitTermination(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw IOUtils.toInterruptedIOException(name + " failed", e); - } - LOG.info("{} successfully", name); - } - - serverInterceptor.close(); - ConcurrentUtils.shutdownAndWait(executor); - } - - @Override - public void notifyNotLeader(RaftGroupId groupId) { - clientProtocolService.closeAllOrderedRequestStreamObservers(groupId); - } - - @Override - public InetSocketAddress getInetSocketAddress() { - return addressSupplier.get(); - } - - @Override - public InetSocketAddress getClientServerAddress() { - return clientServerAddressSupplier.get(); - } - - @Override - public InetSocketAddress getAdminServerAddress() { - return adminServerAddressSupplier.get(); - } - - @Override - public RaftServerAsynchronousProtocol async() { - return asyncService; - } - - @Override - public AppendEntriesReplyProto appendEntries(AppendEntriesRequestProto request) { - throw new UnsupportedOperationException( - "Blocking " + JavaUtils.getCurrentStackTraceElement().getMethodName() + " call is not supported"); - } - - @Override - public InstallSnapshotReplyProto installSnapshot(InstallSnapshotRequestProto request) { - throw new UnsupportedOperationException( - "Blocking " + JavaUtils.getCurrentStackTraceElement().getMethodName() + " call is not supported"); - } - - @Override - public RequestVoteReplyProto requestVote(RequestVoteRequestProto request) - throws IOException { - CodeInjectionForTesting.execute(GRPC_SEND_SERVER_REQUEST, getId(), - null, request); - - final RaftPeerId target = RaftPeerId.valueOf(request.getServerRequest().getReplyId()); - return getProxies().getProxy(target).requestVote(request); - } - - @Override - public StartLeaderElectionReplyProto startLeaderElection(StartLeaderElectionRequestProto request) throws IOException { - CodeInjectionForTesting.execute(GRPC_SEND_SERVER_REQUEST, getId(), null, request); - - final RaftPeerId target = RaftPeerId.valueOf(request.getServerRequest().getReplyId()); - return getProxies().getProxy(target).startLeaderElection(request); - } - -} diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServices.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServices.java new file mode 100644 index 0000000000..663fd6d743 --- /dev/null +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServices.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc.server; + +import org.apache.ratis.server.RaftServerRpc; +import org.apache.ratis.thirdparty.io.grpc.netty.NettyServerBuilder; + +import java.util.EnumSet; + +/** The gRPC services extending {@link RaftServerRpc}. */ +public interface GrpcServices extends RaftServerRpc { + /** The type of the services. */ + enum Type {ADMIN, CLIENT, SERVER} + + /** + * To customize the services. + * For example, add a custom service. + */ + interface Customizer { + /** The default NOOP {@link Customizer}. */ + class Default implements Customizer { + private static final Default INSTANCE = new Default(); + + @Override + public NettyServerBuilder customize(NettyServerBuilder builder, EnumSet types) { + return builder; + } + } + + static Customizer getDefaultInstance() { + return Default.INSTANCE; + } + + /** + * Customize the given builder for the given types. + * + * @return the customized builder. + */ + NettyServerBuilder customize(NettyServerBuilder builder, EnumSet types); + } +} diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServicesImpl.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServicesImpl.java new file mode 100644 index 0000000000..d6f6a0c866 --- /dev/null +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcServicesImpl.java @@ -0,0 +1,443 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc.server; + +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.grpc.GrpcConfigKeys; +import org.apache.ratis.grpc.GrpcTlsConfig; +import org.apache.ratis.grpc.GrpcUtil; +import org.apache.ratis.grpc.metrics.MessageMetrics; +import org.apache.ratis.grpc.metrics.ZeroCopyMetrics; +import org.apache.ratis.grpc.metrics.intercept.server.MetricServerInterceptor; +import org.apache.ratis.protocol.AdminAsynchronousProtocol; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.rpc.SupportedRpcType; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.server.RaftServerRpcWithProxy; +import org.apache.ratis.server.protocol.RaftServerAsynchronousProtocol; +import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; +import org.apache.ratis.thirdparty.io.grpc.ServerInterceptor; +import org.apache.ratis.thirdparty.io.grpc.ServerInterceptors; +import org.apache.ratis.thirdparty.io.grpc.ServerServiceDefinition; +import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts; +import org.apache.ratis.thirdparty.io.grpc.netty.NettyServerBuilder; +import org.apache.ratis.thirdparty.io.grpc.Server; +import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelOption; +import org.apache.ratis.thirdparty.io.netty.handler.ssl.ClientAuth; +import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContextBuilder; + +import org.apache.ratis.proto.RaftProtos.*; +import org.apache.ratis.util.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; + +import static org.apache.ratis.thirdparty.io.netty.handler.ssl.SslProvider.OPENSSL; + +/** A grpc implementation of {@link org.apache.ratis.server.RaftServerRpc}. */ +public final class GrpcServicesImpl + extends RaftServerRpcWithProxy> + implements GrpcServices { + static final Logger LOG = LoggerFactory.getLogger(GrpcServicesImpl.class); + public static final String GRPC_SEND_SERVER_REQUEST = + JavaUtils.getClassSimpleName(GrpcServicesImpl.class) + ".sendRequest"; + + class AsyncService implements RaftServerAsynchronousProtocol { + + @Override + public CompletableFuture appendEntriesAsync(AppendEntriesRequestProto request) { + throw new UnsupportedOperationException("This method is not supported"); + } + + @Override + public CompletableFuture readIndexAsync(ReadIndexRequestProto request) throws IOException { + CodeInjectionForTesting.execute(GRPC_SEND_SERVER_REQUEST, getId(), null, request); + + final CompletableFuture f = new CompletableFuture<>(); + final StreamObserver s = new StreamObserver() { + @Override + public void onNext(ReadIndexReplyProto reply) { + f.complete(reply); + } + + @Override + public void onError(Throwable throwable) { + f.completeExceptionally(throwable); + } + + @Override + public void onCompleted() { + } + }; + + final RaftPeerId target = RaftPeerId.valueOf(request.getServerRequest().getReplyId()); + getProxies().getProxy(target).readIndex(request, s); + return f; + } + } + + public static final class Builder { + private RaftServer server; + private Customizer customizer; + + private String adminHost; + private int adminPort; + private GrpcTlsConfig adminTlsConfig; + private String clientHost; + private int clientPort; + private GrpcTlsConfig clientTlsConfig; + private String serverHost; + private int serverPort; + private GrpcTlsConfig serverTlsConfig; + + private SizeInBytes messageSizeMax; + private SizeInBytes flowControlWindow; + private TimeDuration requestTimeoutDuration; + private boolean separateHeartbeatChannel; + private boolean zeroCopyEnabled; + + private Builder() {} + + public Builder setServer(RaftServer raftServer) { + this.server = raftServer; + + final RaftProperties properties = server.getProperties(); + this.adminHost = GrpcConfigKeys.Admin.host(properties); + this.adminPort = GrpcConfigKeys.Admin.port(properties); + this.clientHost = GrpcConfigKeys.Client.host(properties); + this.clientPort = GrpcConfigKeys.Client.port(properties); + this.serverHost = GrpcConfigKeys.Server.host(properties); + this.serverPort = GrpcConfigKeys.Server.port(properties); + this.messageSizeMax = GrpcConfigKeys.messageSizeMax(properties, LOG::info); + this.flowControlWindow = GrpcConfigKeys.flowControlWindow(properties, LOG::info); + this.requestTimeoutDuration = RaftServerConfigKeys.Rpc.requestTimeout(properties); + this.separateHeartbeatChannel = GrpcConfigKeys.Server.heartbeatChannel(properties); + this.zeroCopyEnabled = GrpcConfigKeys.Server.zeroCopyEnabled(properties); + + final SizeInBytes appenderBufferSize = RaftServerConfigKeys.Log.Appender.bufferByteLimit(properties); + final SizeInBytes gap = SizeInBytes.ONE_MB; + final long diff = messageSizeMax.getSize() - appenderBufferSize.getSize(); + if (diff < gap.getSize()) { + throw new IllegalArgumentException("Illegal configuration: " + + GrpcConfigKeys.MESSAGE_SIZE_MAX_KEY + "(= " + messageSizeMax + + ") must be " + gap + " larger than " + + RaftServerConfigKeys.Log.Appender.BUFFER_BYTE_LIMIT_KEY + "(= " + appenderBufferSize + ")."); + } + + return this; + } + + public Builder setCustomizer(Customizer customizer) { + this.customizer = customizer != null? customizer : Customizer.getDefaultInstance(); + return this; + } + + private GrpcServerProtocolClient newGrpcServerProtocolClient(RaftPeer target) { + return new GrpcServerProtocolClient(target, flowControlWindow.getSizeInt(), + requestTimeoutDuration, serverTlsConfig, separateHeartbeatChannel); + } + + private ExecutorService newExecutor() { + final RaftProperties properties = server.getProperties(); + return ConcurrentUtils.newThreadPoolWithMax( + GrpcConfigKeys.Server.asyncRequestThreadPoolCached(properties), + GrpcConfigKeys.Server.asyncRequestThreadPoolSize(properties), + server.getId() + "-request-"); + } + + private GrpcClientProtocolService newGrpcClientProtocolService( + ExecutorService executor, ZeroCopyMetrics zeroCopyMetrics) { + return new GrpcClientProtocolService(server::getId, server, executor, zeroCopyEnabled, zeroCopyMetrics); + } + + private GrpcServerProtocolService newGrpcServerProtocolService(ZeroCopyMetrics zeroCopyMetrics) { + return new GrpcServerProtocolService(server::getId, server, zeroCopyEnabled, zeroCopyMetrics); + } + + private MetricServerInterceptor newMetricServerInterceptor() { + return new MetricServerInterceptor(server::getId, + JavaUtils.getClassSimpleName(getClass()) + "_" + serverPort); + } + + Server buildServer(NettyServerBuilder builder, EnumSet types) { + return customizer.customize(builder, types).build(); + } + + private NettyServerBuilder newNettyServerBuilderForServer() { + return newNettyServerBuilder(serverHost, serverPort, serverTlsConfig); + } + + private NettyServerBuilder newNettyServerBuilderForAdmin() { + return newNettyServerBuilder(adminHost, adminPort, adminTlsConfig); + } + + private NettyServerBuilder newNettyServerBuilderForClient() { + return newNettyServerBuilder(clientHost, clientPort, clientTlsConfig); + } + + private NettyServerBuilder newNettyServerBuilder(String hostname, int port, GrpcTlsConfig tlsConfig) { + final InetSocketAddress address = hostname == null || hostname.isEmpty() ? + new InetSocketAddress(port) : new InetSocketAddress(hostname, port); + final NettyServerBuilder nettyServerBuilder = NettyServerBuilder.forAddress(address) + .withChildOption(ChannelOption.SO_REUSEADDR, true) + .maxInboundMessageSize(messageSizeMax.getSizeInt()) + .flowControlWindow(flowControlWindow.getSizeInt()); + + if (tlsConfig != null) { + LOG.info("Setting TLS for {}", address); + SslContextBuilder sslContextBuilder = GrpcUtil.initSslContextBuilderForServer(tlsConfig.getKeyManager()); + if (tlsConfig.getMtlsEnabled()) { + sslContextBuilder.clientAuth(ClientAuth.REQUIRE); + GrpcUtil.setTrustManager(sslContextBuilder, tlsConfig.getTrustManager()); + } + sslContextBuilder = GrpcSslContexts.configure(sslContextBuilder, OPENSSL); + try { + nettyServerBuilder.sslContext(sslContextBuilder.build()); + } catch (Exception ex) { + throw new IllegalArgumentException("Failed to build SslContext, tlsConfig=" + tlsConfig, ex); + } + } + return nettyServerBuilder; + } + + private boolean separateAdminServer() { + return adminPort > 0 && adminPort != serverPort; + } + + private boolean separateClientServer() { + return clientPort > 0 && clientPort != serverPort; + } + + Server newServer(GrpcClientProtocolService client, ZeroCopyMetrics zeroCopyMetrics, ServerInterceptor interceptor) { + final EnumSet types = EnumSet.of(GrpcServices.Type.SERVER); + final NettyServerBuilder serverBuilder = newNettyServerBuilderForServer(); + final ServerServiceDefinition service = newGrpcServerProtocolService(zeroCopyMetrics).bindServiceWithZeroCopy(); + serverBuilder.addService(ServerInterceptors.intercept(service, interceptor)); + + if (!separateAdminServer()) { + types.add(GrpcServices.Type.ADMIN); + addAdminService(serverBuilder, server, interceptor); + } + if (!separateClientServer()) { + types.add(GrpcServices.Type.CLIENT); + addClientService(serverBuilder, client, interceptor); + } + return buildServer(serverBuilder, types); + } + + public GrpcServicesImpl build() { + return new GrpcServicesImpl(this); + } + + public Builder setAdminTlsConfig(GrpcTlsConfig config) { + this.adminTlsConfig = config; + return this; + } + + public Builder setClientTlsConfig(GrpcTlsConfig config) { + this.clientTlsConfig = config; + return this; + } + + public Builder setServerTlsConfig(GrpcTlsConfig config) { + this.serverTlsConfig = config; + return this; + } + } + + public static Builder newBuilder() { + return new Builder(); + } + + private final Map servers = new HashMap<>(); + private final Supplier addressSupplier; + private final Supplier clientServerAddressSupplier; + private final Supplier adminServerAddressSupplier; + + private final AsyncService asyncService = new AsyncService(); + + private final ExecutorService executor; + private final GrpcClientProtocolService clientProtocolService; + + private final MetricServerInterceptor serverInterceptor; + private final ZeroCopyMetrics zeroCopyMetrics = new ZeroCopyMetrics(); + + private GrpcServicesImpl(Builder b) { + super(b.server::getId, id -> new PeerProxyMap<>(id.toString(), b::newGrpcServerProtocolClient)); + + this.executor = b.newExecutor(); + this.clientProtocolService = b.newGrpcClientProtocolService(executor, zeroCopyMetrics); + this.serverInterceptor = b.newMetricServerInterceptor(); + final Server server = b.newServer(clientProtocolService, zeroCopyMetrics, serverInterceptor); + + servers.put(GrpcServerProtocolService.class.getSimpleName(), server); + addressSupplier = newAddressSupplier(b.serverPort, server); + + if (b.separateAdminServer()) { + final NettyServerBuilder builder = b.newNettyServerBuilderForAdmin(); + addAdminService(builder, b.server, serverInterceptor); + final Server adminServer = b.buildServer(builder, EnumSet.of(GrpcServices.Type.ADMIN)); + servers.put(GrpcAdminProtocolService.class.getName(), adminServer); + adminServerAddressSupplier = newAddressSupplier(b.adminPort, adminServer); + } else { + adminServerAddressSupplier = addressSupplier; + } + + if (b.separateClientServer()) { + final NettyServerBuilder builder = b.newNettyServerBuilderForClient(); + addClientService(builder, clientProtocolService, serverInterceptor); + final Server clientServer = b.buildServer(builder, EnumSet.of(GrpcServices.Type.CLIENT)); + servers.put(GrpcClientProtocolService.class.getName(), clientServer); + clientServerAddressSupplier = newAddressSupplier(b.clientPort, clientServer); + } else { + clientServerAddressSupplier = addressSupplier; + } + } + + static MemoizedSupplier newAddressSupplier(int port, Server server) { + return JavaUtils.memoize(() -> new InetSocketAddress(port != 0 ? port : server.getPort())); + } + + static void addClientService(NettyServerBuilder builder, GrpcClientProtocolService client, + ServerInterceptor interceptor) { + final ServerServiceDefinition service = client.bindServiceWithZeroCopy(); + builder.addService(ServerInterceptors.intercept(service, interceptor)); + } + + static void addAdminService(NettyServerBuilder builder, AdminAsynchronousProtocol admin, + ServerInterceptor interceptor) { + final GrpcAdminProtocolService service = new GrpcAdminProtocolService(admin); + builder.addService(ServerInterceptors.intercept(service, interceptor)); + } + + @Override + public SupportedRpcType getRpcType() { + return SupportedRpcType.GRPC; + } + + @Override + public void startImpl() { + for (Server server : servers.values()) { + try { + server.start(); + } catch (IOException e) { + ExitUtils.terminate(1, "Failed to start Grpc server", e, LOG); + } + LOG.info("{}: {} started, listening on {}", + getId(), JavaUtils.getClassSimpleName(getClass()), server.getPort()); + } + } + + @Override + public void closeImpl() throws IOException { + for (Map.Entry server : servers.entrySet()) { + final String name = getId() + ": shutdown server " + server.getKey(); + LOG.info("{} now", name); + final Server s = server.getValue().shutdownNow(); + super.closeImpl(); + try { + s.awaitTermination(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw IOUtils.toInterruptedIOException(name + " failed", e); + } + LOG.info("{} successfully", name); + } + + serverInterceptor.close(); + ConcurrentUtils.shutdownAndWait(executor); + zeroCopyMetrics.unregister(); + } + + @Override + public void notifyNotLeader(RaftGroupId groupId) { + clientProtocolService.closeAllOrderedRequestStreamObservers(groupId); + } + + @Override + public InetSocketAddress getInetSocketAddress() { + return addressSupplier.get(); + } + + @Override + public InetSocketAddress getClientServerAddress() { + return clientServerAddressSupplier.get(); + } + + @Override + public InetSocketAddress getAdminServerAddress() { + return adminServerAddressSupplier.get(); + } + + @Override + public RaftServerAsynchronousProtocol async() { + return asyncService; + } + + @Override + public AppendEntriesReplyProto appendEntries(AppendEntriesRequestProto request) { + throw new UnsupportedOperationException( + "Blocking " + JavaUtils.getCurrentStackTraceElement().getMethodName() + " call is not supported"); + } + + @Override + public InstallSnapshotReplyProto installSnapshot(InstallSnapshotRequestProto request) { + throw new UnsupportedOperationException( + "Blocking " + JavaUtils.getCurrentStackTraceElement().getMethodName() + " call is not supported"); + } + + @Override + public RequestVoteReplyProto requestVote(RequestVoteRequestProto request) + throws IOException { + CodeInjectionForTesting.execute(GRPC_SEND_SERVER_REQUEST, getId(), + null, request); + + final RaftPeerId target = RaftPeerId.valueOf(request.getServerRequest().getReplyId()); + return getProxies().getProxy(target).requestVote(request); + } + + @Override + public StartLeaderElectionReplyProto startLeaderElection(StartLeaderElectionRequestProto request) throws IOException { + CodeInjectionForTesting.execute(GRPC_SEND_SERVER_REQUEST, getId(), null, request); + + final RaftPeerId target = RaftPeerId.valueOf(request.getServerRequest().getReplyId()); + return getProxies().getProxy(target).startLeaderElection(request); + } + + @VisibleForTesting + MessageMetrics getMessageMetrics() { + return serverInterceptor.getMetrics(); + } + + @VisibleForTesting + public ZeroCopyMetrics getZeroCopyMetrics() { + return zeroCopyMetrics; + } +} diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcStubPool.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcStubPool.java new file mode 100644 index 0000000000..9667661d07 --- /dev/null +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/server/GrpcStubPool.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc.server; + +import org.apache.ratis.grpc.GrpcUtil; +import org.apache.ratis.thirdparty.io.grpc.ManagedChannel; +import org.apache.ratis.thirdparty.io.grpc.netty.NegotiationType; +import org.apache.ratis.thirdparty.io.grpc.netty.NettyChannelBuilder; +import org.apache.ratis.thirdparty.io.grpc.stub.AbstractStub; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelOption; +import org.apache.ratis.thirdparty.io.netty.channel.WriteBufferWaterMark; +import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContext; +import org.apache.ratis.util.MemoizedSupplier; +import org.apache.ratis.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Semaphore; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; + +final class GrpcStubPool> { + public static final Logger LOG = LoggerFactory.getLogger(GrpcStubPool.class); + + static ManagedChannel buildManagedChannel(String address, SslContext sslContext) { + NettyChannelBuilder channelBuilder = NettyChannelBuilder.forTarget(address) + .keepAliveTime(10, TimeUnit.MINUTES) + .keepAliveWithoutCalls(false) + .idleTimeout(30, TimeUnit.MINUTES) + .withOption(ChannelOption.WRITE_BUFFER_WATER_MARK, new WriteBufferWaterMark(64 << 10, 128 << 10)); + if (sslContext != null) { + LOG.debug("Setting TLS for {}", address); + channelBuilder.useTransportSecurity().sslContext(sslContext); + } else { + channelBuilder.negotiationType(NegotiationType.PLAINTEXT); + } + ManagedChannel ch = channelBuilder.build(); + ch.getState(true); + return ch; + } + + static final class Stub> { + private final ManagedChannel ch; + private final S stub; + private final Semaphore permits; + + Stub(String address, SslContext sslContext, Function stubFactory, int maxInflight) { + this.ch = buildManagedChannel(address, sslContext); + this.stub = stubFactory.apply(ch); + this.permits = new Semaphore(maxInflight); + } + + S getStub() { + return stub; + } + + void release() { + permits.release(); + } + + void shutdown() { + GrpcUtil.shutdownManagedChannel(ch); + } + } + + private final List>> pool; + + GrpcStubPool(int connections, String address, SslContext sslContext, Function stubFactory, + int maxInflightPerConn) { + Preconditions.assertTrue(connections > 1, "connections must be > 1"); + final List>> tmpPool = new ArrayList<>(connections); + for (int i = 0; i < connections; i++) { + tmpPool.add(MemoizedSupplier.valueOf(() -> new Stub<>(address, sslContext, stubFactory, maxInflightPerConn))); + } + this.pool = Collections.unmodifiableList(tmpPool); + } + + Stub getStub(int i) { + return pool.get(i).get(); + } + + Stub acquire() throws InterruptedException { + final int size = pool.size(); + final int start = ThreadLocalRandom.current().nextInt(size); + for (int k = 0; k < size; k++) { + Stub p = getStub((start + k) % size); + if (p.permits.tryAcquire()) { + return p; + } + } + final Stub p = getStub(start); + p.permits.acquire(); + return p; + } + + public void close() { + for (MemoizedSupplier> p : pool) { + if (p.isInitialized()) { + p.get().shutdown(); + } + } + } +} diff --git a/ratis-grpc/src/main/java/org/apache/ratis/grpc/util/ZeroCopyMessageMarshaller.java b/ratis-grpc/src/main/java/org/apache/ratis/grpc/util/ZeroCopyMessageMarshaller.java index bb8183a247..eddf2495e4 100644 --- a/ratis-grpc/src/main/java/org/apache/ratis/grpc/util/ZeroCopyMessageMarshaller.java +++ b/ratis-grpc/src/main/java/org/apache/ratis/grpc/util/ZeroCopyMessageMarshaller.java @@ -62,12 +62,14 @@ public class ZeroCopyMessageMarshaller implements Prototy private final Consumer zeroCopyCount; private final Consumer nonZeroCopyCount; + private final Consumer releasedCount; public ZeroCopyMessageMarshaller(T defaultInstance) { - this(defaultInstance, m -> {}, m -> {}); + this(defaultInstance, m -> {}, m -> {}, m -> {}); } - public ZeroCopyMessageMarshaller(T defaultInstance, Consumer zeroCopyCount, Consumer nonZeroCopyCount) { + public ZeroCopyMessageMarshaller(T defaultInstance, Consumer zeroCopyCount, Consumer nonZeroCopyCount, + Consumer releasedCount) { this.name = JavaUtils.getClassSimpleName(defaultInstance.getClass()) + "-Marshaller"; @SuppressWarnings("unchecked") final Parser p = (Parser) defaultInstance.getParserForType(); @@ -76,6 +78,7 @@ public ZeroCopyMessageMarshaller(T defaultInstance, Consumer zeroCopyCount, C this.zeroCopyCount = zeroCopyCount; this.nonZeroCopyCount = nonZeroCopyCount; + this.releasedCount = releasedCount; } @Override @@ -124,6 +127,7 @@ public void release(T message) { } try { stream.close(); + releasedCount.accept(message); } catch (IOException e) { LOG.error(name + ": Failed to close stream.", e); } @@ -222,4 +226,31 @@ private T parseFrom(CodedInputStream stream) throws InvalidProtocolBufferExcepti public InputStream popStream(T message) { return unclosedStreams.remove(message); } + + public int getUnclosedCount() { + return unclosedStreams.size(); + } + + void assertNoUnclosedStreams() { + // Intended for tests/teardown to fail fast if callers forgot to release streams. + final int size = unclosedStreams.size(); + Preconditions.assertTrue(size == 0, () -> name + ": " + size + " unclosed stream(s)"); + } + + public void close() { + // Cleanup helper for tests/teardown; do not call while streams may still be in use. + synchronized (unclosedStreams) { + if (unclosedStreams.isEmpty()) { + return; + } + for (InputStream stream : unclosedStreams.values()) { + try { + stream.close(); + } catch (IOException e) { + LOG.warn("{}: Failed to close leaked stream.", name, e); + } + } + unclosedStreams.clear(); + } + } } diff --git a/ratis-grpc/src/test/java/org/apache/ratis/grpc/MiniRaftClusterWithGrpc.java b/ratis-grpc/src/test/java/org/apache/ratis/grpc/MiniRaftClusterWithGrpc.java index 18c65c5be4..bd1c72b241 100644 --- a/ratis-grpc/src/test/java/org/apache/ratis/grpc/MiniRaftClusterWithGrpc.java +++ b/ratis-grpc/src/test/java/org/apache/ratis/grpc/MiniRaftClusterWithGrpc.java @@ -21,14 +21,19 @@ import org.apache.ratis.RaftTestUtil; import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; -import org.apache.ratis.grpc.server.GrpcService; +import org.apache.ratis.grpc.metrics.ZeroCopyMetrics; +import org.apache.ratis.grpc.server.GrpcServicesImpl; import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.rpc.SupportedRpcType; +import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.impl.DelayLocalExecutionInjection; import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.util.NetUtils; +import org.apache.ratis.util.ReferenceCountedLeakDetector; +import org.junit.jupiter.api.Assertions; import java.util.Optional; @@ -45,6 +50,11 @@ public MiniRaftClusterWithGrpc newCluster(String[] ids, String[] listenerIds, Ra } }; + static { + // TODO move it to MiniRaftCluster for detecting non-gRPC cases + ReferenceCountedLeakDetector.enable(false); + } + public interface FactoryGet extends Factory.Get { @Override default Factory getFactory() { @@ -52,10 +62,14 @@ default Factory getFactory() { } } - public static final DelayLocalExecutionInjection sendServerRequestInjection = - new DelayLocalExecutionInjection(GrpcService.GRPC_SEND_SERVER_REQUEST); + public static final DelayLocalExecutionInjection SEND_SERVER_REQUEST_INJECTION = + new DelayLocalExecutionInjection(GrpcServicesImpl.GRPC_SEND_SERVER_REQUEST); - protected MiniRaftClusterWithGrpc(String[] ids, String[] listenerIds, RaftProperties properties, Parameters parameters) { + public MiniRaftClusterWithGrpc(String[] ids, RaftProperties properties, Parameters parameters) { + this(ids, new String[0], properties, parameters); + } + + public MiniRaftClusterWithGrpc(String[] ids, String[] listenerIds, RaftProperties properties, Parameters parameters) { super(ids, listenerIds, properties, parameters); } @@ -66,13 +80,33 @@ protected Parameters setPropertiesAndInitParameters(RaftPeerId id, RaftGroup gro GrpcConfigKeys.Client.setPort(properties, NetUtils.createSocketAddr(address).getPort())); Optional.ofNullable(getAddress(id, group, RaftPeer::getAdminAddress)).ifPresent(address -> GrpcConfigKeys.Admin.setPort(properties, NetUtils.createSocketAddr(address).getPort())); + // Always run grpc integration tests with zero-copy enabled because the path of nonzero-copy is not risky. + GrpcConfigKeys.Server.setZeroCopyEnabled(properties, true); return parameters; } @Override protected void blockQueueAndSetDelay(String leaderId, int delayMs) throws InterruptedException { - RaftTestUtil.blockQueueAndSetDelay(getServers(), sendServerRequestInjection, + RaftTestUtil.blockQueueAndSetDelay(getServers(), SEND_SERVER_REQUEST_INJECTION, leaderId, delayMs, getTimeoutMax()); } + + @Override + public void shutdown() { + super.shutdown(); + assertZeroCopyMetrics(); + } + + public void assertZeroCopyMetrics() { + getServers().forEach(server -> server.getGroupIds().forEach(id -> { + LOG.info("Checking {}-{}", server.getId(), id); + RaftServer.Division division = RaftServerTestUtil.getDivision(server, id); + final GrpcServicesImpl service = (GrpcServicesImpl) RaftServerTestUtil.getServerRpc(division); + ZeroCopyMetrics zeroCopyMetrics = service.getZeroCopyMetrics(); + Assertions.assertEquals(0, zeroCopyMetrics.nonZeroCopyMessages()); + Assertions.assertEquals(zeroCopyMetrics.zeroCopyMessages(), zeroCopyMetrics.releasedMessages(), + "Unreleased zero copy messages: please check logs to find the leaks. "); + })); + } } diff --git a/ratis-metrics-api/dev-support/findbugsExcludeFile.xml b/ratis-metrics-api/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..2b3c140b14 --- /dev/null +++ b/ratis-metrics-api/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,22 @@ + + + + + + + \ No newline at end of file diff --git a/ratis-metrics-api/pom.xml b/ratis-metrics-api/pom.xml index e51a9f9632..db368269e2 100644 --- a/ratis-metrics-api/pom.xml +++ b/ratis-metrics-api/pom.xml @@ -18,12 +18,17 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-metrics-api Apache Ratis Metrics API + + + true + + ratis-common @@ -34,5 +39,27 @@ org.slf4j slf4j-api + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test + + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-metrics-api/src/main/java/org/apache/ratis/metrics/MetricRegistriesLoader.java b/ratis-metrics-api/src/main/java/org/apache/ratis/metrics/MetricRegistriesLoader.java index 0837b0bdd1..8baac7a465 100644 --- a/ratis-metrics-api/src/main/java/org/apache/ratis/metrics/MetricRegistriesLoader.java +++ b/ratis-metrics-api/src/main/java/org/apache/ratis/metrics/MetricRegistriesLoader.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.List; import java.util.ServiceLoader; +import java.util.stream.Collectors; import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.ratis.util.ReflectionUtils; @@ -30,10 +31,9 @@ public final class MetricRegistriesLoader { - private static final Logger LOG = LoggerFactory.getLogger(MetricRegistries.class); + private static final Logger LOG = LoggerFactory.getLogger(MetricRegistriesLoader.class); - private static final String DEFAULT_CLASS - = "org.apache.ratis.metrics.impl.MetricRegistriesImpl"; + static final String DEFAULT_CLASS = "org.apache.ratis.metrics.impl.MetricRegistriesImpl"; private MetricRegistriesLoader() { } @@ -56,32 +56,25 @@ public static MetricRegistries load() { * @return A {@link MetricRegistries} implementation. */ @VisibleForTesting - static MetricRegistries load(List availableImplementations) { - - if (availableImplementations.size() == 1) { - // One and only one instance -- what we want/expect - MetricRegistries impl = availableImplementations.get(0); - LOG.info("Loaded MetricRegistries " + impl.getClass()); - return impl; - } else if (availableImplementations.isEmpty()) { + static MetricRegistries load(List registries) { + if (registries.isEmpty()) { try { return ReflectionUtils.newInstance(Class.forName(DEFAULT_CLASS).asSubclass(MetricRegistries.class)); } catch (ClassNotFoundException e) { - throw new RuntimeException(e); + throw new IllegalStateException("Failed to load default MetricRegistries " + DEFAULT_CLASS, e); } + } + + final MetricRegistries first = registries.get(0); + if (registries.size() == 1) { + // One and only one instance -- what we want/expect + LOG.debug("Loaded {}", first.getClass()); } else { // Tell the user they're doing something wrong, and choose the first impl. - StringBuilder sb = new StringBuilder(); - for (MetricRegistries factory : availableImplementations) { - if (sb.length() > 0) { - sb.append(", "); - } - sb.append(factory.getClass()); - } - LOG.warn("Found multiple MetricRegistries implementations: " + sb - + ". Using first found implementation: " + availableImplementations.get(0)); - return availableImplementations.get(0); + final List> classes = registries.stream().map(Object::getClass).collect(Collectors.toList()); + LOG.warn("Found multiple MetricRegistries: {}. Using the first: {}", classes, first.getClass()); } + return first; } private static List getDefinedImplementations() { diff --git a/ratis-metrics-default/dev-support/findbugsExcludeFile.xml b/ratis-metrics-default/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..3b4b52c664 --- /dev/null +++ b/ratis-metrics-default/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,22 @@ + + + + + + + \ No newline at end of file diff --git a/ratis-metrics-default/pom.xml b/ratis-metrics-default/pom.xml index 4fe8073922..b561cbe924 100644 --- a/ratis-metrics-default/pom.xml +++ b/ratis-metrics-default/pom.xml @@ -18,7 +18,7 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-metrics-default @@ -29,31 +29,16 @@ ratis-metrics-api org.apache.ratis - - ratis-proto - org.apache.ratis - ratis-common org.apache.ratis - - ratis-common - org.apache.ratis - test - test-jar - org.slf4j slf4j-api - - junit - junit - test - org.junit.jupiter junit-jupiter-engine @@ -71,4 +56,15 @@ test + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-metrics-default/src/main/java/org/apache/ratis/metrics/impl/MetricRegistriesImpl.java b/ratis-metrics-default/src/main/java/org/apache/ratis/metrics/impl/MetricRegistriesImpl.java index 17968ae9f8..088508fab9 100644 --- a/ratis-metrics-default/src/main/java/org/apache/ratis/metrics/impl/MetricRegistriesImpl.java +++ b/ratis-metrics-default/src/main/java/org/apache/ratis/metrics/impl/MetricRegistriesImpl.java @@ -28,6 +28,7 @@ import org.apache.ratis.metrics.MetricRegistries; import org.apache.ratis.metrics.MetricRegistryInfo; import org.apache.ratis.metrics.RatisMetricRegistry; +import org.apache.ratis.util.RefCountingMap; import org.apache.ratis.util.TimeDuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/ratis-metrics-default/src/main/java/org/apache/ratis/metrics/impl/RefCountingMap.java b/ratis-metrics-default/src/main/java/org/apache/ratis/metrics/impl/RefCountingMap.java deleted file mode 100644 index 49759781f8..0000000000 --- a/ratis-metrics-default/src/main/java/org/apache/ratis/metrics/impl/RefCountingMap.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.ratis.metrics.impl; - -import java.util.Collection; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Supplier; -import java.util.stream.Collectors; - -/** - * A map of K to V, but does ref counting for added and removed values. The values are - * not added directly, but instead requested from the given Supplier if ref count == 0. Each put() - * call will increment the ref count, and each remove() will decrement it. The values are removed - * from the map iff ref count == 0. - */ -class RefCountingMap { - private static class Payload { - private final V value; - private final AtomicInteger refCount = new AtomicInteger(); - - Payload(V v) { - this.value = v; - } - - V get() { - return value; - } - - V increment() { - return refCount.incrementAndGet() > 0? value: null; - } - - Payload decrement() { - return refCount.decrementAndGet() > 0? this: null; - } - } - - private final ConcurrentMap> map = new ConcurrentHashMap<>(); - - V put(K k, Supplier supplier) { - return map.compute(k, (k1, old) -> old != null? old: new Payload<>(supplier.get())).increment(); - } - - static V get(Payload p) { - return p == null ? null : p.get(); - } - - V get(K k) { - return get(map.get(k)); - } - - /** - * Decrements the ref count of k, and removes from map if ref count == 0. - * @param k the key to remove - * @return the value associated with the specified key or null if key is removed from map. - */ - V remove(K k) { - return get(map.computeIfPresent(k, (k1, v) -> v.decrement())); - } - - void clear() { - map.clear(); - } - - Set keySet() { - return map.keySet(); - } - - Collection values() { - return map.values().stream().map(Payload::get).collect(Collectors.toList()); - } - - int size() { - return map.size(); - } -} diff --git a/ratis-metrics-default/src/test/java/org/apache/ratis/metrics/TestMetricRegistriesLoader.java b/ratis-metrics-default/src/test/java/org/apache/ratis/metrics/TestMetricRegistriesLoader.java index c7f26740d6..9816cc99c5 100644 --- a/ratis-metrics-default/src/test/java/org/apache/ratis/metrics/TestMetricRegistriesLoader.java +++ b/ratis-metrics-default/src/test/java/org/apache/ratis/metrics/TestMetricRegistriesLoader.java @@ -34,6 +34,12 @@ * Test class for {@link MetricRegistriesLoader}. */ public class TestMetricRegistriesLoader { + @Test + public void testLoadEmptyInstance() { + MetricRegistries instance = MetricRegistriesLoader.load(Collections.emptyList()); + assertEquals(MetricRegistriesLoader.DEFAULT_CLASS, instance.getClass().getName()); + } + @Test public void testLoadSingleInstance() { MetricRegistries loader = mock(MetricRegistries.class); @@ -67,7 +73,7 @@ public void testAddRemoveReporter() { Consumer reporter = v-> cntr.incrementAndGet(); Consumer stopReporter = v-> cntr.incrementAndGet(); r.addReporterRegistration(reporter, stopReporter); - + // check if add and remove of metric do reporting counter increase MetricRegistryInfo info = new MetricRegistryInfo("t1", "t1", "t1", "t1"); r.create(info); diff --git a/ratis-metrics-dropwizard3/dev-support/findbugsExcludeFile.xml b/ratis-metrics-dropwizard3/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..c413237ff8 --- /dev/null +++ b/ratis-metrics-dropwizard3/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,22 @@ + + + + + + + \ No newline at end of file diff --git a/ratis-metrics-dropwizard3/pom.xml b/ratis-metrics-dropwizard3/pom.xml index 42fff0445b..2dd329858e 100644 --- a/ratis-metrics-dropwizard3/pom.xml +++ b/ratis-metrics-dropwizard3/pom.xml @@ -18,14 +18,14 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-metrics-dropwizard3 Apache Ratis Metrics Dropwizard 3 Implementation - 3.2.5 + 3.2.6 @@ -53,11 +53,6 @@ slf4j-api - - junit - junit - test - org.junit.jupiter junit-jupiter-engine @@ -99,4 +94,15 @@ + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-metrics-dropwizard3/src/main/java/org/apache/ratis/metrics/dropwizard3/Dm3MetricRegistriesImpl.java b/ratis-metrics-dropwizard3/src/main/java/org/apache/ratis/metrics/dropwizard3/Dm3MetricRegistriesImpl.java index b26f2e27a4..a90c5a0cee 100644 --- a/ratis-metrics-dropwizard3/src/main/java/org/apache/ratis/metrics/dropwizard3/Dm3MetricRegistriesImpl.java +++ b/ratis-metrics-dropwizard3/src/main/java/org/apache/ratis/metrics/dropwizard3/Dm3MetricRegistriesImpl.java @@ -28,6 +28,7 @@ import org.apache.ratis.metrics.MetricRegistries; import org.apache.ratis.metrics.MetricRegistryInfo; import org.apache.ratis.metrics.RatisMetricRegistry; +import org.apache.ratis.util.RefCountingMap; import org.apache.ratis.util.TimeDuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/ratis-metrics-dropwizard3/src/test/java/org/apache/ratis/metrics/dropwizard3/TestRefCountingMap.java b/ratis-metrics-dropwizard3/src/test/java/org/apache/ratis/metrics/dropwizard3/TestRefCountingMap.java deleted file mode 100644 index 87b8bf0123..0000000000 --- a/ratis-metrics-dropwizard3/src/test/java/org/apache/ratis/metrics/dropwizard3/TestRefCountingMap.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.ratis.metrics.dropwizard3; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.Collection; -import java.util.Set; - -import org.apache.ratis.thirdparty.com.google.common.collect.Lists; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestRefCountingMap { - - private RefCountingMap map; - - @BeforeEach - public void setUp() { - map = new RefCountingMap<>(); - } - - @Test - public void testPutGet() { - map.put("foo", () -> "foovalue"); - - String v = map.get("foo"); - assertNotNull(v); - assertEquals("foovalue", v); - } - - @Test - public void testPutMulti() { - String v1 = map.put("foo", () -> "foovalue"); - String v2 = map.put("foo", () -> "foovalue2"); - String v3 = map.put("foo", () -> "foovalue3"); - - String v = map.get("foo"); - assertEquals("foovalue", v); - assertEquals(v, v1); - assertEquals(v, v2); - assertEquals(v, v3); - } - - @Test - public void testPutRemove() { - map.put("foo", () -> "foovalue"); - String v = map.remove("foo"); - assertNull(v); - v = map.get("foo"); - assertNull(v); - } - - @Test - public void testPutRemoveMulti() { - map.put("foo", () -> "foovalue"); - map.put("foo", () -> "foovalue2"); - map.put("foo", () -> "foovalue3"); - - // remove 1 - String v = map.remove("foo"); - assertEquals("foovalue", v); - - // remove 2 - v = map.remove("foo"); - assertEquals("foovalue", v); - - // remove 3 - v = map.remove("foo"); - assertNull(v); - v = map.get("foo"); - assertNull(v); - } - - @Test - public void testSize() { - assertEquals(0, map.size()); - - // put a key - map.put("foo", () -> "foovalue"); - assertEquals(1, map.size()); - - // put a different key - map.put("bar", () -> "foovalue2"); - assertEquals(2, map.size()); - - // put the same key again - map.put("bar", () -> "foovalue3"); - assertEquals(2, map.size()); // map should be same size - } - - @Test - public void testClear() { - map.put("foo", () -> "foovalue"); - map.put("bar", () -> "foovalue2"); - map.put("baz", () -> "foovalue3"); - - map.clear(); - - assertEquals(0, map.size()); - } - - - @Test - public void testKeySet() { - map.put("foo", () -> "foovalue"); - map.put("bar", () -> "foovalue2"); - map.put("baz", () -> "foovalue3"); - - Set keys = map.keySet(); - assertEquals(3, keys.size()); - - Lists.newArrayList("foo", "bar", "baz").forEach(v -> assertTrue(keys.contains(v))); - } - - @Test - public void testValues() { - map.put("foo", () -> "foovalue"); - map.put("foo", () -> "foovalue2"); - map.put("bar", () -> "foovalue3"); - map.put("baz", () -> "foovalue4"); - - Collection values = map.values(); - assertEquals(3, values.size()); - - Lists.newArrayList("foovalue", "foovalue3", "foovalue4") - .forEach(v -> assertTrue(values.contains(v))); - } -} diff --git a/ratis-netty/dev-support/findbugsExcludeFile.xml b/ratis-netty/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..0e1646df77 --- /dev/null +++ b/ratis-netty/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ratis-netty/pom.xml b/ratis-netty/pom.xml index 1966db8ac4..5688a1fa4a 100644 --- a/ratis-netty/pom.xml +++ b/ratis-netty/pom.xml @@ -17,12 +17,17 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-netty Apache Ratis Netty Support + + + true + + org.apache.ratis @@ -73,5 +78,28 @@ slf4j-api + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test + + + + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyClient.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyClient.java index 0cf4bd3833..56ca6b030c 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyClient.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyClient.java @@ -17,14 +17,13 @@ */ package org.apache.ratis.netty; +import org.apache.ratis.protocol.exceptions.AlreadyClosedException; import org.apache.ratis.thirdparty.io.netty.bootstrap.Bootstrap; import org.apache.ratis.thirdparty.io.netty.channel.Channel; import org.apache.ratis.thirdparty.io.netty.channel.ChannelFuture; import org.apache.ratis.thirdparty.io.netty.channel.ChannelInitializer; import org.apache.ratis.thirdparty.io.netty.channel.EventLoopGroup; import org.apache.ratis.thirdparty.io.netty.channel.socket.SocketChannel; -import org.apache.ratis.thirdparty.io.netty.handler.logging.LogLevel; -import org.apache.ratis.thirdparty.io.netty.handler.logging.LoggingHandler; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.NetUtils; @@ -33,21 +32,23 @@ import java.net.InetSocketAddress; public class NettyClient implements Closeable { - private final LifeCycle lifeCycle = new LifeCycle(JavaUtils.getClassSimpleName(getClass())); - + private final LifeCycle lifeCycle; + private final String serverAddress; private Channel channel; + NettyClient(String serverAddress) { + this.lifeCycle = new LifeCycle(JavaUtils.getClassSimpleName(getClass()) + "-" + serverAddress); + this.serverAddress = serverAddress; + } + /** Connects to the given server address. */ - public void connect(String serverAddress, EventLoopGroup group, - ChannelInitializer initializer) + public void connect(EventLoopGroup group, ChannelInitializer initializer) throws InterruptedException { final InetSocketAddress address = NetUtils.createSocketAddr(serverAddress); - lifeCycle.startAndTransition( () -> channel = new Bootstrap() .group(group) .channel(NettyUtils.getSocketChannelClass(group)) - .handler(new LoggingHandler(LogLevel.INFO)) .handler(initializer) .connect(address) .sync() @@ -57,13 +58,26 @@ public void connect(String serverAddress, EventLoopGroup group, @Override public void close() { - lifeCycle.checkStateAndClose(() -> { - channel.close().syncUninterruptibly(); - }); + lifeCycle.checkStateAndClose(() -> NettyUtils.closeChannel(channel, serverAddress)); } - public ChannelFuture writeAndFlush(Object msg) { - lifeCycle.assertCurrentState(LifeCycle.States.RUNNING); - return channel.writeAndFlush(msg); + public ChannelFuture writeAndFlush(Object msg) throws AlreadyClosedException { + final LifeCycle.State state = lifeCycle.getCurrentState(); + if (state.isRunning()) { + return channel.writeAndFlush(msg); + } + // For CLOSING, CLOSED, and EXCEPTION states, throw AlreadyClosedException to trigger reconnection + if (state.isClosingOrClosed() || state == LifeCycle.State.EXCEPTION) { + throw new AlreadyClosedException( + "Client is closed or failed: state=" + state + ", channel=" + channel); + } + // For other states (NEW, STARTING, PAUSING, PAUSED), this is a programming error + throw new IllegalStateException("Client is in unexpected state for writeAndFlush: " + + "state=" + state + ", channel=" + channel); + } + + @Override + public String toString() { + return lifeCycle.toString(); } } diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyConfigKeys.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyConfigKeys.java index be3ad8ee67..e84cb4eb24 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyConfigKeys.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyConfigKeys.java @@ -176,6 +176,18 @@ static TimeDuration replyQueueGracePeriod(RaftProperties properties) { static void setReplyQueueGracePeriod(RaftProperties properties, TimeDuration timeoutDuration) { setTimeDuration(properties::setTimeDuration, REPLY_QUEUE_GRACE_PERIOD_KEY, timeoutDuration); } + + /** A retry policy specified in comma separated format. */ + String RECONNECT_POLICY_KEY = PREFIX + ".reconnect.policy"; + /** ExponentialBackoffRetry with base sleep 100ms, max sleep 5s and max attempt 100. */ + String RECONNECT_POLICY_DEFAULT = "ExponentialBackoffRetry,100ms,5s,100"; + static String reconnectPolicy(RaftProperties properties) { + return properties.get(RECONNECT_POLICY_KEY, RECONNECT_POLICY_DEFAULT); + } + static void setReconnectPolicy(RaftProperties properties, String retryPolicy) { + properties.set(RECONNECT_POLICY_KEY, retryPolicy); + } + } interface Server { diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java index bd03fefcc3..583d6e3e94 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java @@ -23,7 +23,7 @@ import org.apache.ratis.io.FilePositionCount; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.io.WriteOption; -import org.apache.ratis.netty.server.DataStreamRequestByteBuf; +import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.proto.RaftProtos.DataStreamReplyHeaderProto; import org.apache.ratis.proto.RaftProtos.DataStreamRequestHeaderProto; import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto; @@ -103,11 +103,11 @@ static void encodeDataStreamRequestHeader(DataStreamRequest request, Consumer out) { out.accept(Unpooled.wrappedBuffer(buffer)); } + static void encodeDataStreamRequestByteBuf(DataStreamRequestByteBuf request, Consumer out, + ByteBufAllocator allocator) { + encodeDataStreamRequestHeader(request, out, allocator); + encodeByteBuf(request.slice(), out); + } + + static void encodeByteBuf(ByteBuf buffer, Consumer out) { + if (buffer.readableBytes() == 0) { + out.accept(Unpooled.EMPTY_BUFFER); // to avoid EncoderException: must produce at least one message + return; + } + out.accept(buffer); + } + static void encodeDataStreamRequestFilePositionCount( DataStreamRequestFilePositionCount request, Consumer out, ByteBufAllocator allocator) { encodeDataStreamRequestHeader(request, out, allocator); @@ -138,7 +152,7 @@ static void encodeDataStreamRequestFilePositionCount( static void encodeDataStreamReplyByteBuffer(DataStreamReplyByteBuffer reply, Consumer out, ByteBufAllocator allocator) { ByteBuffer headerBuf = getDataStreamReplyHeaderProtoByteBuf(reply); - final ByteBuf headerLenBuf = allocator.directBuffer(DataStreamPacketHeader.getSizeOfHeaderLen()); + final ByteBuf headerLenBuf = allocator.ioBuffer(DataStreamPacketHeader.getSizeOfHeaderLen()); headerLenBuf.writeInt(headerBuf.remaining()); out.accept(headerLenBuf); out.accept(Unpooled.wrappedBuffer(headerBuf)); diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyRpcProxy.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyRpcProxy.java index b7a04b050b..e72d6c6772 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyRpcProxy.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyRpcProxy.java @@ -30,16 +30,23 @@ import org.apache.ratis.proto.RaftProtos.RaftRpcRequestProto; import org.apache.ratis.proto.netty.NettyProtos.RaftNettyServerReplyProto; import org.apache.ratis.proto.netty.NettyProtos.RaftNettyServerRequestProto; +import org.apache.ratis.protocol.exceptions.AlreadyClosedException; +import org.apache.ratis.thirdparty.io.netty.handler.logging.LogLevel; +import org.apache.ratis.thirdparty.io.netty.handler.logging.LoggingHandler; import org.apache.ratis.util.IOUtils; +import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.PeerProxyMap; +import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.ProtoUtils; import org.apache.ratis.util.TimeDuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.Closeable; import java.io.IOException; -import java.util.LinkedList; -import java.util.Queue; +import java.util.Map; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -47,6 +54,7 @@ import static org.apache.ratis.proto.netty.NettyProtos.RaftNettyServerReplyProto.RaftNettyServerReplyCase.EXCEPTIONREPLY; public class NettyRpcProxy implements Closeable { + public static final Logger LOG = LoggerFactory.getLogger(NettyRpcProxy.class); public static class PeerMap extends PeerProxyMap { private final EventLoopGroup group; @@ -74,6 +82,41 @@ public void close() { } } + static RaftRpcRequestProto getRequest(RaftNettyServerRequestProto proto) { + final RaftNettyServerRequestProto.RaftNettyServerRequestCase requestCase = proto.getRaftNettyServerRequestCase(); + switch (requestCase) { + case REQUESTVOTEREQUEST: + return proto.getRequestVoteRequest().getServerRequest(); + case APPENDENTRIESREQUEST: + return proto.getAppendEntriesRequest().getServerRequest(); + case INSTALLSNAPSHOTREQUEST: + return proto.getInstallSnapshotRequest().getServerRequest(); + case RAFTCLIENTREQUEST: + return proto.getRaftClientRequest().getRpcRequest(); + case SETCONFIGURATIONREQUEST: + return proto.getSetConfigurationRequest().getRpcRequest(); + case GROUPMANAGEMENTREQUEST: + return proto.getGroupManagementRequest().getRpcRequest(); + case GROUPLISTREQUEST: + return proto.getGroupListRequest().getRpcRequest(); + case GROUPINFOREQUEST: + return proto.getGroupInfoRequest().getRpcRequest(); + case TRANSFERLEADERSHIPREQUEST: + return proto.getTransferLeadershipRequest().getRpcRequest(); + case STARTLEADERELECTIONREQUEST: + return proto.getStartLeaderElectionRequest().getServerRequest(); + case SNAPSHOTMANAGEMENTREQUEST: + return proto.getSnapshotManagementRequest().getRpcRequest(); + case LEADERELECTIONMANAGEMENTREQUEST: + return proto.getLeaderElectionManagementRequest().getRpcRequest(); + + case RAFTNETTYSERVERREQUEST_NOT_SET: + throw new IllegalArgumentException("Request case not set in proto: " + requestCase); + default: + throw new UnsupportedOperationException("Request case not supported: " + requestCase); + } + } + public static long getCallId(RaftNettyServerReplyProto proto) { switch (proto.getRaftNettyServerReplyCase()) { case REQUESTVOTEREPLY: @@ -86,6 +129,10 @@ public static long getCallId(RaftNettyServerReplyProto proto) { return proto.getInstallSnapshotReply().getServerReply().getCallId(); case RAFTCLIENTREPLY: return proto.getRaftClientReply().getRpcReply().getCallId(); + case GROUPLISTREPLY: + return proto.getGroupListReply().getRpcReply().getCallId(); + case GROUPINFOREPLY: + return proto.getGroupInfoReply().getRpcReply().getCallId(); case EXCEPTIONREPLY: return proto.getExceptionReply().getRpcReply().getCallId(); case RAFTNETTYSERVERREPLY_NOT_SET: @@ -99,9 +146,8 @@ public static long getCallId(RaftNettyServerReplyProto proto) { class Connection implements Closeable { - private final NettyClient client = new NettyClient(); - private final Queue> replies - = new LinkedList<>(); + private final NettyClient client = new NettyClient(peer.getAddress()); + private final Map> replies = new ConcurrentHashMap<>(); Connection(EventLoopGroup group) throws InterruptedException { final ChannelInboundHandler inboundHandler @@ -109,10 +155,14 @@ class Connection implements Closeable { @Override protected void channelRead0(ChannelHandlerContext ctx, RaftNettyServerReplyProto proto) { - final CompletableFuture future = pollReply(); + final long callId = getCallId(proto); + final CompletableFuture future = getReplyFuture(callId, null, "reply"); if (future == null) { - throw new IllegalStateException("Request #" + getCallId(proto) - + " not found"); + if (LOG.isDebugEnabled()) { + LOG.debug("Ignoring reply for callId={} from {} (no outstanding request, outstanding={})", + callId, peer, replies.size()); + } + return; } if (proto.getRaftNettyServerReplyCase() == EXCEPTIONREPLY) { final Object ioe = ProtoUtils.toObject(proto.getExceptionReply().getException()); @@ -121,13 +171,27 @@ protected void channelRead0(ChannelHandlerContext ctx, future.complete(proto); } } + + @Override + public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { + client.close(); + failOutstandingRequests(new IOException("Caught an exception for the connection to " + peer, cause)); + } + + @Override + public void channelInactive(ChannelHandlerContext ctx) throws Exception { + failOutstandingRequests(new AlreadyClosedException("Channel to " + peer + " is inactive.")); + super.channelInactive(ctx); + } }; final ChannelInitializer initializer = new ChannelInitializer() { @Override - protected void initChannel(SocketChannel ch) throws Exception { + protected void initChannel(SocketChannel ch) { final ChannelPipeline p = ch.pipeline(); + // LoggingHandler emits all events at the chosen level; use DEBUG to reduce noise by default. + p.addLast(new LoggingHandler(LogLevel.DEBUG)); p.addLast(new ProtobufVarint32FrameDecoder()); p.addLast(new ProtobufDecoder(RaftNettyServerReplyProto.getDefaultInstance())); p.addLast(new ProtobufVarint32LengthFieldPrepender()); @@ -137,25 +201,67 @@ protected void initChannel(SocketChannel ch) throws Exception { } }; - client.connect(peer.getAddress(), group, initializer); + client.connect(group, initializer); } - synchronized ChannelFuture offer(RaftNettyServerRequestProto request, - CompletableFuture reply) { - replies.offer(reply); - return client.writeAndFlush(request); + private CompletableFuture getReplyFuture(long callId, + CompletableFuture expected, String reason) { + final CompletableFuture removed = replies.remove(callId); + if (removed == null && LOG.isDebugEnabled()) { + LOG.debug("Request {} not found for callId={} from {} (reason={}, outstanding={})", + expected == null ? "future" : "reply", callId, peer, reason, replies.size()); + } + if (expected != null) { + Preconditions.assertSame(expected, removed, "removed"); + } + return removed; } - synchronized CompletableFuture pollReply() { - return replies.poll(); + synchronized CompletableFuture offer(RaftNettyServerRequestProto request) { + final CompletableFuture reply = new CompletableFuture<>(); + final long callId = getRequest(request).getCallId(); + final CompletableFuture previous = replies.put(callId, reply); + Preconditions.assertNull(previous, "previous"); + + final ChannelFuture future; + try { + future = client.writeAndFlush(request); + } catch (AlreadyClosedException e) { + replies.remove(callId, reply); + return JavaUtils.completeExceptionally(e); + } + + future.addListener(cf -> { + if (!cf.isSuccess()) { + // Remove from queue on async write failure to prevent reply mismatch. + // Only complete exceptionally if removal succeeds (not already polled). + final CompletableFuture removed = + getReplyFuture(callId, reply, "write-failure"); + if (removed != null) { + removed.completeExceptionally(cf.cause()); + } else if (LOG.isDebugEnabled()) { + LOG.debug("Write failed for callId={} to {} after request removed", callId, peer, cf.cause()); + } + client.close(); + } + }); + return reply; } @Override public synchronized void close() { client.close(); + failOutstandingRequests(new AlreadyClosedException("Closing connection to " + peer)); + } + + private synchronized void failOutstandingRequests(Throwable cause) { if (!replies.isEmpty()) { - final IOException e = new IOException("Connection to " + peer + " is closed."); - replies.stream().forEach(f -> f.completeExceptionally(e)); + LOG.warn("Still have {} requests outstanding from {} connection: {}", + replies.size(), peer, cause.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("Outstanding request ids from {}: {}", peer, replies.keySet()); + } + replies.values().forEach(f -> f.completeExceptionally(cause)); replies.clear(); } } @@ -176,14 +282,15 @@ public void close() { connection.close(); } + public CompletableFuture sendAsync(RaftNettyServerRequestProto proto) { + return connection.offer(proto); + } + public RaftNettyServerReplyProto send( RaftRpcRequestProto request, RaftNettyServerRequestProto proto) throws IOException { - final CompletableFuture reply = new CompletableFuture<>(); - final ChannelFuture channelFuture = connection.offer(proto, reply); - + final CompletableFuture reply = sendAsync(proto); try { - channelFuture.sync(); TimeDuration newDuration = requestTimeoutDuration.add(request.getTimeoutMs(), TimeUnit.MILLISECONDS); return reply.get(newDuration.getDuration(), newDuration.getUnit()); } catch (InterruptedException e) { diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyUtils.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyUtils.java index 8cce291af9..37666bf189 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyUtils.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyUtils.java @@ -22,6 +22,8 @@ import org.apache.ratis.security.TlsConf.KeyManagerConf; import org.apache.ratis.security.TlsConf.PrivateKeyConf; import org.apache.ratis.security.TlsConf.TrustManagerConf; +import org.apache.ratis.thirdparty.io.netty.channel.Channel; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelFuture; import org.apache.ratis.thirdparty.io.netty.channel.EventLoopGroup; import org.apache.ratis.thirdparty.io.netty.channel.ServerChannel; import org.apache.ratis.thirdparty.io.netty.channel.epoll.Epoll; @@ -35,16 +37,19 @@ import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContext; import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContextBuilder; import org.apache.ratis.util.ConcurrentUtils; +import org.apache.ratis.util.TimeDuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.net.ssl.KeyManager; import javax.net.ssl.TrustManager; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; public interface NettyUtils { Logger LOG = LoggerFactory.getLogger(NettyUtils.class); + TimeDuration CLOSE_TIMEOUT = TimeDuration.valueOf(5, TimeUnit.SECONDS); class Print { private static final AtomicBoolean PRINTED_EPOLL_UNAVAILABILITY_CAUSE = new AtomicBoolean(); @@ -176,4 +181,19 @@ static Class getServerChannelClass(EventLoopGroup event return eventLoopGroup instanceof EpollEventLoopGroup ? EpollServerSocketChannel.class : NioServerSocketChannel.class; } + + static void closeChannel(Channel channel, String name) { + final ChannelFuture f = channel.close(); + final boolean completed; + try { + completed = f.await(CLOSE_TIMEOUT.getDuration(), CLOSE_TIMEOUT.getUnit()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.info("Interrupted closeChannel {} ", name, e); + return; + } + if (!completed) { + LOG.warn("closeChannel {} is not yet completed in {}", name, CLOSE_TIMEOUT); + } + } } \ No newline at end of file diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientReplies.java b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientReplies.java index fc97b6fe34..695177262c 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientReplies.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientReplies.java @@ -30,22 +30,25 @@ import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; public class NettyClientReplies { public static final Logger LOG = LoggerFactory.getLogger(NettyClientReplies.class); private final ConcurrentMap replies = new ConcurrentHashMap<>(); - ReplyMap getReplyMap(ClientInvocationId clientInvocationId) { + ReplyMap getOrCreateReplyMap(ClientInvocationId clientInvocationId) { final MemoizedSupplier q = MemoizedSupplier.valueOf(() -> new ReplyMap(clientInvocationId)); return replies.computeIfAbsent(clientInvocationId, key -> q.get()); } + ReplyMap getReplyMap(ClientInvocationId clientInvocationId) { + return replies.get(clientInvocationId); + } + class ReplyMap { private final ClientInvocationId clientInvocationId; private final Map map = new ConcurrentHashMap<>(); @@ -56,8 +59,8 @@ class ReplyMap { ReplyEntry submitRequest(RequestEntry requestEntry, boolean isClose, CompletableFuture f) { LOG.debug("put {} to the map for {}", requestEntry, clientInvocationId); - final MemoizedSupplier replySupplier = MemoizedSupplier.valueOf(() -> new ReplyEntry(isClose, f)); - return map.computeIfAbsent(requestEntry, r -> replySupplier.get()); + // ConcurrentHashMap.computeIfAbsent javadoc: the function is applied at most once per key. + return map.computeIfAbsent(requestEntry, r -> new ReplyEntry(isClose, f)); } void receiveReply(DataStreamReply reply) { @@ -147,7 +150,7 @@ public String toString() { static class ReplyEntry { private final boolean isClosed; private final CompletableFuture replyFuture; - private final AtomicReference> timeoutFuture = new AtomicReference<>(); + private ScheduledFuture timeoutFuture; // for reply timeout ReplyEntry(boolean isClosed, CompletableFuture replyFuture) { this.isClosed = isClosed; @@ -158,22 +161,26 @@ boolean isClosed() { return isClosed; } - void complete(DataStreamReply reply) { - cancelTimeoutFuture(); + synchronized void complete(DataStreamReply reply) { + cancel(timeoutFuture); replyFuture.complete(reply); } - void completeExceptionally(Throwable t) { - cancelTimeoutFuture(); + synchronized void completeExceptionally(Throwable t) { + cancel(timeoutFuture); replyFuture.completeExceptionally(t); } - private void cancelTimeoutFuture() { - Optional.ofNullable(timeoutFuture.get()).ifPresent(f -> f.cancel(false)); + static void cancel(ScheduledFuture future) { + if (future != null) { + future.cancel(true); + } } - void setTimeoutFuture(ScheduledFuture timeoutFuture) { - this.timeoutFuture.compareAndSet(null, timeoutFuture); + synchronized void scheduleTimeout(Supplier> scheduleMethod) { + if (!replyFuture.isDone()) { + timeoutFuture = scheduleMethod.get(); + } } } } diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientRpc.java index c816e29ee8..ef34caf17d 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientRpc.java @@ -17,6 +17,7 @@ */ package org.apache.ratis.netty.client; +import org.apache.ratis.client.RaftClientConfigKeys; import org.apache.ratis.client.impl.ClientProtoUtils; import org.apache.ratis.client.impl.RaftClientRpcWithProxy; import org.apache.ratis.conf.RaftProperties; @@ -28,12 +29,79 @@ import org.apache.ratis.proto.RaftProtos.GroupManagementRequestProto; import org.apache.ratis.proto.RaftProtos.SetConfigurationRequestProto; import org.apache.ratis.proto.netty.NettyProtos.RaftNettyServerRequestProto; +import org.apache.ratis.protocol.exceptions.TimeoutIOException; +import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.TimeDuration; +import org.apache.ratis.util.TimeoutExecutor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.concurrent.CompletableFuture; public class NettyClientRpc extends RaftClientRpcWithProxy { + + public static final Logger LOG = LoggerFactory.getLogger(NettyClientRpc.class); + + private ClientId clientId; + private final TimeDuration requestTimeout; + private final TimeoutExecutor scheduler = TimeoutExecutor.getInstance(); + public NettyClientRpc(ClientId clientId, RaftProperties properties) { super(new NettyRpcProxy.PeerMap(clientId.toString(), properties)); + this.clientId = clientId; + this.requestTimeout = RaftClientConfigKeys.Rpc.requestTimeout(properties); + } + + @Override + public CompletableFuture sendRequestAsync(RaftClientRequest request) { + final RaftPeerId serverId = request.getServerId(); + long callId = request.getCallId(); + try { + final NettyRpcProxy proxy = getProxies().getProxy(serverId); + final RaftNettyServerRequestProto serverRequestProto = buildRequestProto(request); + final CompletableFuture replyFuture = new CompletableFuture<>(); + + proxy.sendAsync(serverRequestProto).thenApply(replyProto -> { + if (request instanceof GroupListRequest) { + return ClientProtoUtils.toGroupListReply(replyProto.getGroupListReply()); + } else if (request instanceof GroupInfoRequest) { + return ClientProtoUtils.toGroupInfoReply(replyProto.getGroupInfoReply()); + } else { + return ClientProtoUtils.toRaftClientReply(replyProto.getRaftClientReply()); + } + }).whenComplete((reply, e) -> { + if (e == null) { + if (reply == null) { + e = new NullPointerException("Both reply==null && e==null"); + } + if (e == null) { + e = reply.getNotLeaderException(); + } + if (e == null) { + e = reply.getLeaderNotReadyException(); + } + } + + if (e != null) { + replyFuture.completeExceptionally(e); + } else { + replyFuture.complete(reply); + } + }); + + scheduler.onTimeout(requestTimeout, () -> { + if (!replyFuture.isDone()) { + final String s = clientId + "->" + serverId + " request #" + + callId + " timeout " + requestTimeout.getDuration(); + replyFuture.completeExceptionally(new TimeoutIOException(s)); + } + }, LOG, () -> "Timeout check for client request #" + callId); + + return replyFuture; + } catch (Throwable e) { + return JavaUtils.completeExceptionally(e); + } } @Override @@ -41,58 +109,69 @@ public RaftClientReply sendRequest(RaftClientRequest request) throws IOException final RaftPeerId serverId = request.getServerId(); final NettyRpcProxy proxy = getProxies().getProxy(serverId); + final RaftNettyServerRequestProto serverRequestProto = buildRequestProto(request); + final RaftRpcRequestProto rpcRequest = getRpcRequestProto(serverRequestProto); + if (request instanceof GroupListRequest) { + return ClientProtoUtils.toGroupListReply( + proxy.send(rpcRequest, serverRequestProto).getGroupListReply()); + } else if (request instanceof GroupInfoRequest) { + return ClientProtoUtils.toGroupInfoReply( + proxy.send(rpcRequest, serverRequestProto).getGroupInfoReply()); + } else { + return ClientProtoUtils.toRaftClientReply( + proxy.send(rpcRequest, serverRequestProto).getRaftClientReply()); + } + } + + private RaftNettyServerRequestProto buildRequestProto(RaftClientRequest request) { final RaftNettyServerRequestProto.Builder b = RaftNettyServerRequestProto.newBuilder(); - final RaftRpcRequestProto rpcRequest; if (request instanceof GroupManagementRequest) { final GroupManagementRequestProto proto = ClientProtoUtils.toGroupManagementRequestProto( (GroupManagementRequest)request); b.setGroupManagementRequest(proto); - rpcRequest = proto.getRpcRequest(); } else if (request instanceof SetConfigurationRequest) { final SetConfigurationRequestProto proto = ClientProtoUtils.toSetConfigurationRequestProto( (SetConfigurationRequest)request); b.setSetConfigurationRequest(proto); - rpcRequest = proto.getRpcRequest(); } else if (request instanceof GroupListRequest) { final RaftProtos.GroupListRequestProto proto = ClientProtoUtils.toGroupListRequestProto( (GroupListRequest)request); b.setGroupListRequest(proto); - rpcRequest = proto.getRpcRequest(); } else if (request instanceof GroupInfoRequest) { final RaftProtos.GroupInfoRequestProto proto = ClientProtoUtils.toGroupInfoRequestProto( (GroupInfoRequest)request); b.setGroupInfoRequest(proto); - rpcRequest = proto.getRpcRequest(); } else if (request instanceof TransferLeadershipRequest) { final RaftProtos.TransferLeadershipRequestProto proto = ClientProtoUtils.toTransferLeadershipRequestProto( (TransferLeadershipRequest)request); b.setTransferLeadershipRequest(proto); - rpcRequest = proto.getRpcRequest(); } else if (request instanceof SnapshotManagementRequest) { final RaftProtos.SnapshotManagementRequestProto proto = ClientProtoUtils.toSnapshotManagementRequestProto( (SnapshotManagementRequest) request); b.setSnapshotManagementRequest(proto); - rpcRequest = proto.getRpcRequest(); } else if (request instanceof LeaderElectionManagementRequest) { final RaftProtos.LeaderElectionManagementRequestProto proto = ClientProtoUtils.toLeaderElectionManagementRequestProto( (LeaderElectionManagementRequest) request); b.setLeaderElectionManagementRequest(proto); - rpcRequest = proto.getRpcRequest(); } else { final RaftClientRequestProto proto = ClientProtoUtils.toRaftClientRequestProto(request); b.setRaftClientRequest(proto); - rpcRequest = proto.getRpcRequest(); } - if (request instanceof GroupListRequest) { - return ClientProtoUtils.toGroupListReply( - proxy.send(rpcRequest, b.build()).getGroupListReply()); - } else if (request instanceof GroupInfoRequest) { - return ClientProtoUtils.toGroupInfoReply( - proxy.send(rpcRequest, b.build()).getGroupInfoReply()); + return b.build(); + } + + private RaftRpcRequestProto getRpcRequestProto(RaftNettyServerRequestProto serverRequestProto) { + if (serverRequestProto.hasGroupManagementRequest()) { + return serverRequestProto.getGroupManagementRequest().getRpcRequest(); + } else if (serverRequestProto.hasSetConfigurationRequest()) { + return serverRequestProto.getSetConfigurationRequest().getRpcRequest(); + } else if (serverRequestProto.hasGroupListRequest()) { + return serverRequestProto.getGroupListRequest().getRpcRequest(); + } else if (serverRequestProto.hasGroupInfoRequest()) { + return serverRequestProto.getGroupInfoRequest().getRpcRequest(); } else { - return ClientProtoUtils.toRaftClientReply( - proxy.send(rpcRequest, b.build()).getRaftClientReply()); + return serverRequestProto.getRaftClientRequest().getRpcRequest(); } } } diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java index 020acc2fdb..4970d244a1 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java @@ -21,6 +21,7 @@ import org.apache.ratis.client.DataStreamClientRpc; import org.apache.ratis.client.RaftClientConfigKeys; import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; import org.apache.ratis.datastream.impl.DataStreamRequestFilePositionCount; import org.apache.ratis.io.StandardWriteOption; @@ -34,7 +35,9 @@ import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.exceptions.AlreadyClosedException; import org.apache.ratis.protocol.exceptions.TimeoutIOException; +import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.security.TlsConf; +import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.ratis.thirdparty.io.netty.bootstrap.Bootstrap; import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.thirdparty.io.netty.channel.Channel; @@ -52,7 +55,6 @@ import org.apache.ratis.thirdparty.io.netty.handler.codec.ByteToMessageDecoder; import org.apache.ratis.thirdparty.io.netty.handler.codec.MessageToMessageEncoder; import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContext; -import org.apache.ratis.thirdparty.io.netty.util.concurrent.ScheduledFuture; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.NetUtils; @@ -71,6 +73,8 @@ import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Supplier; @@ -136,20 +140,24 @@ void shutdownGracefully() { } static class Connection { - static final TimeDuration RECONNECT = TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); + static final TimeDuration FIVE_HUNDRED_MS = TimeDuration.valueOf(500, TimeUnit.MILLISECONDS); private final InetSocketAddress address; private final WorkerGroupGetter workerGroup; private final Supplier> channelInitializerSupplier; + private final RetryPolicy reconnectPolicy; /** The {@link ChannelFuture} is null when this connection is closed. */ private final AtomicReference> ref; + private final AtomicBoolean reconnectScheduled = new AtomicBoolean(false); + private final AtomicInteger reconnectAttempts = new AtomicInteger(); Connection(InetSocketAddress address, WorkerGroupGetter workerGroup, - Supplier> channelInitializerSupplier) { + Supplier> channelInitializerSupplier, RetryPolicy reconnectPolicy) { this.address = address; this.workerGroup = workerGroup; this.channelInitializerSupplier = channelInitializerSupplier; + this.reconnectPolicy = reconnectPolicy; this.ref = new AtomicReference<>(MemoizedSupplier.valueOf(this::connect)); } @@ -191,21 +199,47 @@ public void operationComplete(ChannelFuture future) { if (!future.isSuccess()) { scheduleReconnect(Connection.this + " failed", future.cause()); } else { + reconnectAttempts.set(0); LOG.trace("{} succeed.", Connection.this); } } }); } + /** + * Schedules a reconnection attempt with exponential backoff and jitter. + * + * @param message description of the failure + * @param cause the exception that triggered reconnection (may be null) + */ void scheduleReconnect(String message, Throwable cause) { if (isClosed()) { return; } - LOG.warn("{}: {}; schedule reconnecting to {} in {}", this, message, address, RECONNECT); + if (!reconnectScheduled.compareAndSet(false, true)) { + return; + } + // Use retry index starting at 0 so the first delay equals base sleep time. + final int attempt = reconnectAttempts.getAndIncrement(); + final RetryPolicy.Action action = reconnectPolicy.handleAttemptFailure(() -> attempt); + if (!action.shouldRetry()) { + reconnectScheduled.set(false); + LOG.warn("{}: {}; no more retries to {} after attempt {}", this, message, address, attempt); + return; + } + final TimeDuration delay = action.getSleepTime(); if (cause != null) { - LOG.warn("", cause); + LOG.warn("{}: {}; reconnect to {} in {} for attempt {}", + this, message, address, delay, attempt, cause); + } else if (delay.compareTo(FIVE_HUNDRED_MS) < 0) { + LOG.info("{}: {}; reconnect to {} in {} for attempt {}", this, message, address, delay, attempt); + } else { + LOG.warn("{}: {}; reconnect to {} in {} for attempt {}", this, message, address, delay, attempt); } - getWorkerGroup().schedule(this::reconnect, RECONNECT.getDuration(), RECONNECT.getUnit()); + getWorkerGroup().schedule(() -> { + reconnectScheduled.set(false); + reconnect(); + }, delay.getDuration(), delay.getUnit()); } private synchronized ChannelFuture reconnect() { @@ -313,8 +347,11 @@ public NettyClientStreamRpc(RaftPeer server, TlsConf tlsConf, RaftProperties pro final InetSocketAddress address = NetUtils.createSocketAddr(server.getDataStreamAddress()); final SslContext sslContext = NettyUtils.buildSslContextForClient(tlsConf); + final RetryPolicy reconnectPolicy = RetryPolicy.parse( + NettyConfigKeys.DataStream.Client.reconnectPolicy(properties), + NettyConfigKeys.DataStream.Client.RECONNECT_POLICY_KEY); this.connection = new Connection(address, WorkerGroupGetter.newInstance(properties), - () -> newChannelInitializer(address, sslContext, getClientHandler())); + () -> newChannelInitializer(address, sslContext, getClientHandler()), reconnectPolicy); } private ChannelInboundHandler getClientHandler(){ @@ -339,15 +376,14 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) { try { replyMap.receiveReply(reply); } catch (Throwable cause) { - LOG.warn(name + ": channelRead error:", cause); + LOG.warn("{} : channelRead error:", name, cause); replyMap.completeExceptionally(cause); } } @Override public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { - LOG.warn(name + ": exceptionCaught", cause); - + LOG.warn("{} : exceptionCaught", name, cause); ctx.close(); } @@ -370,6 +406,7 @@ public void initChannel(SocketChannel ch) { p.addLast(ENCODER); p.addLast(ENCODER_FILE_POSITION_COUNT); p.addLast(ENCODER_BYTE_BUFFER); + p.addLast(ENCODER_BYTE_BUF); p.addLast(newDecoder()); p.addLast(handler); } @@ -386,6 +423,16 @@ protected void encode(ChannelHandlerContext context, DataStreamRequestByteBuffer } } + static final MessageToMessageEncoder ENCODER_BYTE_BUF = new EncoderByteBuf(); + + @ChannelHandler.Sharable + static class EncoderByteBuf extends MessageToMessageEncoder { + @Override + protected void encode(ChannelHandlerContext context, DataStreamRequestByteBuf request, List out) { + NettyDataStreamUtils.encodeDataStreamRequestByteBuf(request, out::add, context.alloc()); + } + } + static final MessageToMessageEncoder ENCODER_FILE_POSITION_COUNT = new EncoderFilePositionCount(); @@ -426,7 +473,7 @@ public CompletableFuture streamAsync(DataStreamRequest request) ClientInvocationId clientInvocationId = ClientInvocationId.valueOf(request.getClientId(), request.getStreamId()); final boolean isClose = request.getWriteOptionList().contains(StandardWriteOption.CLOSE); - final NettyClientReplies.ReplyMap replyMap = replies.getReplyMap(clientInvocationId); + final NettyClientReplies.ReplyMap replyMap = replies.getOrCreateReplyMap(clientInvocationId); final ChannelFuture channelFuture; final Channel channel; final NettyClientReplies.RequestEntry requestEntry = new NettyClientReplies.RequestEntry(request); @@ -454,15 +501,13 @@ public CompletableFuture streamAsync(DataStreamRequest request) LOG.debug("{}: write after {}", this, request); final TimeDuration timeout = isClose ? closeTimeout : requestTimeout; - // if reply success cancel this future - final ScheduledFuture timeoutFuture = channel.eventLoop().schedule(() -> { + replyEntry.scheduleTimeout(() -> channel.eventLoop().schedule(() -> { if (!f.isDone()) { f.completeExceptionally(new TimeoutIOException( - "Timeout " + timeout + ": Failed to send " + request + " channel: " + channel)); + "Timeout " + timeout + ": Failed to send " + request + " via channel " + channel)); replyMap.fail(requestEntry); } - }, timeout.toLong(timeout.getUnit()), timeout.getUnit()); - replyEntry.setTimeoutFuture(timeoutFuture); + }, timeout.getDuration(), timeout.getUnit())); } }); return f; @@ -484,4 +529,28 @@ public void close() { public String toString() { return name; } + + // Visible for tests. + @VisibleForTesting + RetryPolicy getReconnectPolicy() { + return connection.reconnectPolicy; + } + + // Visible for tests. + boolean waitForChannelActive(TimeDuration timeout) { + final long deadline = System.nanoTime() + timeout.toLong(TimeUnit.NANOSECONDS); + while (System.nanoTime() < deadline) { + final Channel channel = connection.getChannelUninterruptibly(); + if (channel != null && channel.isActive()) { + return true; + } + try { + Thread.sleep(100L); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return false; + } + } + return false; + } } diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/ChannelMap.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/ChannelMap.java new file mode 100644 index 0000000000..4edf878dfa --- /dev/null +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/ChannelMap.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.netty.server; + +import org.apache.ratis.protocol.ClientInvocationId; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelId; + +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** Map: {@link ChannelId} -> {@link ClientInvocationId}s. */ +class ChannelMap { + private final Map> map = new ConcurrentHashMap<>(); + + void add(ChannelId channelId, ClientInvocationId clientInvocationId) { + map.computeIfAbsent(channelId, (e) -> new ConcurrentHashMap<>()) + .put(clientInvocationId, clientInvocationId); + } + + void remove(ChannelId channelId, ClientInvocationId clientInvocationId) { + Optional.ofNullable(map.get(channelId)) + .ifPresent((ids) -> ids.remove(clientInvocationId)); + } + + int size(ChannelId channelId) { + return Optional.ofNullable(map.get(channelId)) + .map(Map::size) + .orElse(0); + } + + Set remove(ChannelId channelId) { + return Optional.ofNullable(map.remove(channelId)) + .map(Map::keySet) + .orElse(Collections.emptySet()); + } +} diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java index 276a365cea..0e10b0f4dc 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java @@ -18,10 +18,12 @@ package org.apache.ratis.netty.server; -import org.apache.ratis.client.DataStreamOutputRpc; +import org.apache.ratis.client.RaftClientConfigKeys; import org.apache.ratis.client.impl.ClientProtoUtils; +import org.apache.ratis.client.impl.DataStreamClientImpl.DataStreamOutputImpl; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.io.WriteOption; import org.apache.ratis.metrics.Timekeeper; @@ -49,6 +51,7 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.StateMachine.DataStream; import org.apache.ratis.statemachine.StateMachine.DataChannel; +import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.thirdparty.io.netty.channel.ChannelHandlerContext; import org.apache.ratis.thirdparty.io.netty.channel.ChannelId; @@ -69,13 +72,10 @@ import java.util.Collection; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicReference; @@ -111,12 +111,12 @@ void cleanUp() { } static class RemoteStream { - private final DataStreamOutputRpc out; + private final DataStreamOutputImpl out; private final AtomicReference> sendFuture = new AtomicReference<>(CompletableFuture.completedFuture(null)); private final RequestMetrics metrics; - RemoteStream(DataStreamOutputRpc out, RequestMetrics metrics) { + RemoteStream(DataStreamOutputImpl out, RequestMetrics metrics) { this.metrics = metrics; this.out = out; } @@ -132,7 +132,7 @@ static Iterable addFlush(List original) { CompletableFuture write(DataStreamRequestByteBuf request, Executor executor) { final Timekeeper.Context context = metrics.start(); return composeAsync(sendFuture, executor, - n -> out.writeAsync(request.slice().nioBuffer(), addFlush(request.getWriteOptionList())) + n -> out.writeAsync(request.slice().retain(), addFlush(request.getWriteOptionList())) .whenComplete((l, e) -> metrics.stop(context, e == null))); } } @@ -142,20 +142,20 @@ static class StreamInfo { private final boolean primary; private final LocalStream local; private final Set remotes; - private final RaftServer server; + private final Division division; private final AtomicReference> previous = new AtomicReference<>(CompletableFuture.completedFuture(null)); - StreamInfo(RaftClientRequest request, boolean primary, CompletableFuture stream, RaftServer server, - CheckedBiFunction, Set, IOException> getStreams, + StreamInfo(RaftClientRequest request, boolean primary, CompletableFuture stream, Division division, + CheckedBiFunction, Set, IOException> getStreams, Function metricsConstructor) throws IOException { this.request = request; this.primary = primary; this.local = new LocalStream(stream, metricsConstructor.apply(RequestType.LOCAL_WRITE)); - this.server = server; - final Set successors = getSuccessors(server.getId()); - final Set outs = getStreams.apply(request, successors); + this.division = division; + final Set successors = getSuccessors(division.getId()); + final Set outs = getStreams.apply(request, successors); this.remotes = outs.stream() .map(o -> new RemoteStream(o, metricsConstructor.apply(RequestType.REMOTE_WRITE))) .collect(Collectors.toSet()); @@ -169,16 +169,12 @@ RaftClientRequest getRequest() { return request; } - Division getDivision() throws IOException { - return server.getDivision(request.getRaftGroupId()); + Division getDivision() { + return division; } Collection getCommitInfos() { - try { - return getDivision().getCommitInfos(); - } catch (IOException e) { - throw new IllegalStateException(e); - } + return getDivision().getCommitInfos(); } boolean isPrimary() { @@ -198,7 +194,7 @@ public String toString() { return JavaUtils.getClassSimpleName(getClass()) + ":" + request; } - private Set getSuccessors(RaftPeerId peerId) throws IOException { + private Set getSuccessors(RaftPeerId peerId) { final RaftConfiguration conf = getDivision().getRaftConf(); final RoutingTable routingTable = request.getRoutingTable(); @@ -207,66 +203,31 @@ private Set getSuccessors(RaftPeerId peerId) throws IOException { } if (isPrimary()) { - // Default start topology + // Default star topology // get the other peers from the current configuration return conf.getCurrentPeers().stream() - .filter(p -> !p.getId().equals(server.getId())) + .filter(p -> !p.getId().equals(division.getId())) .collect(Collectors.toSet()); } return Collections.emptySet(); } - } - - static class StreamMap { - private final ConcurrentMap map = new ConcurrentHashMap<>(); - - StreamInfo computeIfAbsent(ClientInvocationId key, Function function) { - final StreamInfo info = map.computeIfAbsent(key, function); - LOG.debug("computeIfAbsent({}) returns {}", key, info); - return info; - } - StreamInfo get(ClientInvocationId key) { - final StreamInfo info = map.get(key); - LOG.debug("get({}) returns {}", key, info); - return info; - } - - StreamInfo remove(ClientInvocationId key) { - final StreamInfo info = map.remove(key); - LOG.debug("remove({}) returns {}", key, info); - return info; - } - } - - public static class ChannelMap { - private final Map> map = new ConcurrentHashMap<>(); - - public void add(ChannelId channelId, - ClientInvocationId clientInvocationId) { - map.computeIfAbsent(channelId, (e) -> new ConcurrentHashMap<>()).put(clientInvocationId, clientInvocationId); - } - - public void remove(ChannelId channelId, - ClientInvocationId clientInvocationId) { - Optional.ofNullable(map.get(channelId)).ifPresent((ids) -> ids.remove(clientInvocationId)); - } - - public Set remove(ChannelId channelId) { - return Optional.ofNullable(map.remove(channelId)) - .map(Map::keySet) - .orElse(Collections.emptySet()); + void cleanUp(ClientInvocationId invocationId) { + getDivision().getDataStreamMap().remove(invocationId); + getLocal().cleanUp(); + applyToRemotes(remote -> remote.out.closeAsync()); } } private final RaftServer server; private final String name; - private final StreamMap streams = new StreamMap(); + private final StreamMap streams = new StreamMap<>(); private final ChannelMap channels; private final ExecutorService requestExecutor; private final ExecutorService writeExecutor; + private final TimeDuration requestTimeout; private final NettyServerStreamRpcMetrics nettyServerStreamRpcMetrics; @@ -283,6 +244,7 @@ public Set remove(ChannelId channelId) { this.writeExecutor = ConcurrentUtils.newThreadPoolWithMax(useCachedThreadPool, RaftServerConfigKeys.DataStream.asyncWriteThreadPoolSize(properties), name + "-write-"); + this.requestTimeout = RaftClientConfigKeys.DataStream.requestTimeout(server.getProperties()); this.nettyServerStreamRpcMetrics = metrics; } @@ -315,12 +277,13 @@ private CompletableFuture computeDataStreamIfAbsent(RaftClientReques } private StreamInfo newStreamInfo(ByteBuf buf, - CheckedBiFunction, Set, IOException> getStreams) { + CheckedBiFunction, Set, IOException> getStreams) { try { final RaftClientRequest request = ClientProtoUtils.toRaftClientRequest( RaftClientRequestProto.parseFrom(buf.nioBuffer())); final boolean isPrimary = server.getId().equals(request.getServerId()); - return new StreamInfo(request, isPrimary, computeDataStreamIfAbsent(request), server, getStreams, + final Division division = server.getDivision(request.getRaftGroupId()); + return new StreamInfo(request, isPrimary, computeDataStreamIfAbsent(request), division, getStreams, getMetrics()::newRequestMetrics); } catch (Throwable e) { throw new CompletionException(e); @@ -345,6 +308,9 @@ static long writeTo(ByteBuf buf, Iterable options, final DataChannel channel = stream.getDataChannel(); long byteWritten = 0; for (ByteBuffer buffer : buf.nioBuffers()) { + if (buffer.remaining() == 0) { + continue; + } final ReferenceCountedObject wrapped = ReferenceCountedObject.wrap( buffer, buf::retain, ignored -> buf.release()); try(UncheckedAutoCloseable ignore = wrapped.retainAndReleaseOnClose()) { @@ -386,7 +352,7 @@ static DataStreamReplyByteBuffer newDataStreamReplyByteBuffer(DataStreamRequestB .build(); } - static void sendReply(List> remoteWrites, + private void sendReply(List> remoteWrites, DataStreamRequestByteBuf request, long bytesWritten, Collection commitInfos, ChannelHandlerContext ctx) { final boolean success = checkSuccessRemoteWrite(remoteWrites, bytesWritten, request); @@ -433,9 +399,7 @@ static void sendDataStreamException(Throwable throwable, DataStreamRequestByteBu void cleanUp(Set ids) { for (ClientInvocationId clientInvocationId : ids) { - Optional.ofNullable(streams.remove(clientInvocationId)) - .map(StreamInfo::getLocal) - .ifPresent(LocalStream::cleanUp); + removeDataStream(clientInvocationId); } } @@ -448,23 +412,39 @@ void cleanUpOnChannelInactive(ChannelId channelId, TimeDuration channelInactiveG }); } + @VisibleForTesting + int getChannelInvocationCount(ChannelId channelId) { + return channels.size(channelId); + } + void read(DataStreamRequestByteBuf request, ChannelHandlerContext ctx, - CheckedBiFunction, Set, IOException> getStreams) { + CheckedBiFunction, Set, IOException> getStreams) { LOG.debug("{}: read {}", this, request); + final ClientInvocationId key = ClientInvocationId.valueOf(request.getClientId(), request.getStreamId()); + final ChannelId channelId = ctx.channel().id(); try { - readImpl(request, ctx, getStreams); + readImpl(request, ctx, getStreams, key, channelId); } catch (Throwable t) { replyDataStreamException(t, request, ctx); + removeDataStream(key); + channels.remove(channelId, key); + } + } + + private StreamInfo removeDataStream(ClientInvocationId invocationId) { + final StreamInfo removed = streams.remove(invocationId); + if (removed != null) { + removed.cleanUp(invocationId); } + return removed; } private void readImpl(DataStreamRequestByteBuf request, ChannelHandlerContext ctx, - CheckedBiFunction, Set, IOException> getStreams) { + CheckedBiFunction, Set, IOException> getStreams, + ClientInvocationId key, ChannelId channelId) { final boolean close = request.getWriteOptionList().contains(StandardWriteOption.CLOSE); - ClientInvocationId key = ClientInvocationId.valueOf(request.getClientId(), request.getStreamId()); // add to ChannelMap - final ChannelId channelId = ctx.channel().id(); channels.add(channelId, key); final StreamInfo info; @@ -473,8 +453,6 @@ private void readImpl(DataStreamRequestByteBuf request, ChannelHandlerContext ct () -> newStreamInfo(request.slice(), getStreams)); info = streams.computeIfAbsent(key, id -> supplier.get()); if (!supplier.isInitialized()) { - final StreamInfo removed = streams.remove(key); - removed.getLocal().cleanUp(); throw new IllegalStateException("Failed to create a new stream for " + request + " since a stream already exists Key: " + key + " StreamInfo:" + info); } @@ -512,9 +490,15 @@ private void readImpl(DataStreamRequestByteBuf request, ChannelHandlerContext ct }, requestExecutor)).whenComplete((v, exception) -> { try { if (exception != null) { - final StreamInfo removed = streams.remove(key); replyDataStreamException(server, exception, info.getRequest(), request, ctx); - removed.getLocal().cleanUp(); + final StreamInfo removed = removeDataStream(key); + if (removed != null) { + Preconditions.assertSame(info, removed, "removed"); + } else { + info.cleanUp(key); + } + } else if (close) { + info.applyToRemotes(remote -> remote.out.closeAsync()); } } finally { request.release(); @@ -531,10 +515,15 @@ static void assertReplyCorrespondingToRequest( Preconditions.assertTrue(request.getStreamOffset() == reply.getStreamOffset()); } - static boolean checkSuccessRemoteWrite(List> replyFutures, long bytesWritten, + private boolean checkSuccessRemoteWrite(List> replyFutures, long bytesWritten, final DataStreamRequestByteBuf request) { for (CompletableFuture replyFuture : replyFutures) { - final DataStreamReply reply = replyFuture.join(); + final DataStreamReply reply; + try { + reply = replyFuture.get(requestTimeout.getDuration(), requestTimeout.getUnit()); + } catch (Exception e) { + throw new CompletionException("Failed to get reply for bytesWritten=" + bytesWritten + ", " + request, e); + } assertReplyCorrespondingToRequest(request, reply); if (!reply.isSuccess()) { LOG.warn("reply is not success, request: {}", request); diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java index c5f24b0587..451040bb62 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java @@ -20,9 +20,11 @@ import org.apache.ratis.client.DataStreamClient; import org.apache.ratis.client.DataStreamOutputRpc; +import org.apache.ratis.client.impl.DataStreamClientImpl.DataStreamOutputImpl; import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.netty.NettyConfigKeys; import org.apache.ratis.netty.NettyDataStreamUtils; import org.apache.ratis.netty.NettyUtils; @@ -90,8 +92,8 @@ void addPeers(Collection newPeers) { map.addRaftPeers(newPeers); } - Set getDataStreamOutput(RaftClientRequest request, Set peers) throws IOException { - final Set outs = new HashSet<>(); + Set getDataStreamOutput(RaftClientRequest request, Set peers) throws IOException { + final Set outs = new HashSet<>(); try { getDataStreamOutput(request, peers, outs); } catch (IOException e) { @@ -101,11 +103,11 @@ Set getDataStreamOutput(RaftClientRequest request, Set peers, Set outs) + private void getDataStreamOutput(RaftClientRequest request, Set peers, Set outs) throws IOException { for (RaftPeer peer : peers) { try { - outs.add((DataStreamOutputRpc) map.computeIfAbsent(peer).get().stream(request)); + outs.add((DataStreamOutputImpl) map.computeIfAbsent(peer).get().stream(request)); } catch (IOException e) { map.handleException(peer.getId(), e, true); throw new IOException(map.getName() + ": Failed to getDataStreamOutput for " + peer, e); @@ -238,7 +240,7 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) { } @Override - public void channelInactive(ChannelHandlerContext ctx) throws Exception { + public void channelInactive(ChannelHandlerContext ctx) { requests.cleanUpOnChannelInactive(ctx.channel().id(), channelInactiveGracePeriod); } diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/StreamMap.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/StreamMap.java new file mode 100644 index 0000000000..073698cb80 --- /dev/null +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/StreamMap.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.netty.server; + +import org.apache.ratis.protocol.ClientInvocationId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.function.Function; + +/** + * Map: {@link ClientInvocationId} -> {@link STREAM}. + * + * @param the stream type. + */ +class StreamMap { + public static final Logger LOG = LoggerFactory.getLogger(StreamMap.class); + + private final ConcurrentMap map = new ConcurrentHashMap<>(); + + STREAM computeIfAbsent(ClientInvocationId key, Function function) { + final STREAM info = map.computeIfAbsent(key, function); + LOG.debug("computeIfAbsent({}) returns {}", key, info); + return info; + } + + STREAM get(ClientInvocationId key) { + final STREAM info = map.get(key); + LOG.debug("get({}) returns {}", key, info); + return info; + } + + STREAM remove(ClientInvocationId key) { + final STREAM info = map.remove(key); + LOG.debug("remove({}) returns {}", key, info); + return info; + } +} diff --git a/ratis-netty/src/test/java/org/apache/ratis/netty/MiniRaftClusterWithNetty.java b/ratis-netty/src/test/java/org/apache/ratis/netty/MiniRaftClusterWithNetty.java index 990b63d90d..b912e14927 100644 --- a/ratis-netty/src/test/java/org/apache/ratis/netty/MiniRaftClusterWithNetty.java +++ b/ratis-netty/src/test/java/org/apache/ratis/netty/MiniRaftClusterWithNetty.java @@ -48,7 +48,7 @@ default Factory getFactory() { } } - public static final DelayLocalExecutionInjection sendServerRequest + public static final DelayLocalExecutionInjection SEND_SERVER_REQUEST = new DelayLocalExecutionInjection(NettyRpcService.SEND_SERVER_REQUEST); protected MiniRaftClusterWithNetty(String[] ids, String[] listenerIds, RaftProperties properties) { @@ -64,7 +64,7 @@ protected Parameters setPropertiesAndInitParameters(RaftPeerId id, RaftGroup gro @Override protected void blockQueueAndSetDelay(String leaderId, int delayMs) throws InterruptedException { - RaftTestUtil.blockQueueAndSetDelay(getServers(), sendServerRequest, + RaftTestUtil.blockQueueAndSetDelay(getServers(), SEND_SERVER_REQUEST, leaderId, delayMs, getTimeoutMax()); } } diff --git a/ratis-proto/pom.xml b/ratis-proto/pom.xml index e0cf2c8587..322bf56778 100644 --- a/ratis-proto/pom.xml +++ b/ratis-proto/pom.xml @@ -17,7 +17,7 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-proto @@ -25,6 +25,8 @@ true + + true true @@ -34,7 +36,7 @@ kr.motd.maven os-maven-plugin - 1.5.0.Final + 1.7.1 @@ -50,11 +52,6 @@ false - - org.apache.maven.plugins - maven-checkstyle-plugin - - org.apache.maven.plugins @@ -174,8 +171,14 @@ ratis-thirdparty-misc - org.apache.tomcat - annotations-api + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test diff --git a/ratis-proto/src/main/proto/RMap.proto b/ratis-proto/src/main/proto/RMap.proto deleted file mode 100644 index 43c9377d24..0000000000 --- a/ratis-proto/src/main/proto/RMap.proto +++ /dev/null @@ -1,189 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -syntax = "proto3"; -option java_package = "org.apache.ratis.proto.rmap"; -option java_outer_classname = "RMapProtos"; -option java_generate_equals_and_hash = true; -package ratis.rmap; - -// TODO: This .proto file should go to the ratis-replicated-map module, but we need it here -// due to shading. - -// Metadata about a replicated map -message RMapInfo { - int64 rmap_id = 1; - string name = 2; - string key_class = 3; - string value_class = 4; - string key_serde_class = 5; - string value_serde_class = 6; - string key_comparator_class = 7; -} - -// An entry in a replicated map.` -message Entry { - bytes key = 1; - bytes value = 2; -} - -// TODO: raft client should allow a Service decleration, and calling a method from the service -// similar to how coprocessor calls work in HBase. -message Request { - oneof RequestType { - MultiActionRequest multi_action_request = 1; - ScanRequest scan_request = 2; - CreateRMapRequest create_rmap_request = 3; - DeleteRMapRequest delete_rmap_request = 4; - ListRMapInfosRequest list_rmap_infos_request = 5; - } -} - -message Response { - ExceptionResponse exception = 1; - oneof ResponseType { - MultiActionResponse multi_action_response = 2; - ScanResponse scan_response = 3; - CreateRMapResponse create_rmap_response = 4; - DeleteRMapResponse delete_rmap_response = 5; - ListRMapInfosResponse list_rmap_infos_response = 6; - } -} - -message MultiActionRequest { - int64 rmap_id = 1; - repeated Action action = 2; -} - -message Action { - oneof ActionType { - GetRequest get_request = 1; - PutRequest put_request = 2; - } -} - -message ActionResponse { - oneof ActionType { - GetResponse get_response = 1; - PutResponse put_response = 2; - } -} - -message MultiActionResponse { - repeated ActionResponse action_response = 1; -} - -message CreateRMapRequest { - RMapInfo rmap_info = 1; -} - -message CreateRMapResponse { - RMapInfo rmap_info = 1; -} - -message DeleteRMapRequest { - int64 rmap_id = 1; -} - -message DeleteRMapResponse { -} - -message ListRMapInfosRequest { - oneof ListRMapInfosType { - // if set, we only care about a particular RMapInfo - int64 rmap_id = 1; - - // only return infos whose names match this pattern - string name_pattern = 2; - } -} - -message ListRMapInfosResponse { - repeated RMapInfo rmap_info = 1; -} - -message GetRequest { - bytes key = 1; -} - -message GetResponse { - bool found = 1; - bytes key = 2; - bytes value = 3; -} - -message PutRequest { - bytes key = 1; - bytes value = 2; -} - -message PutResponse { -} - -message Scan { - bytes start_Key = 1; - bytes end_key = 2; - bool start_key_inclusive = 3; - bool end_key_inclusive = 4; - bool keys_only = 5; - int32 limit = 6; -} - -message ScanRequest { - int64 rmap_id = 1; - Scan scan = 2; -} - -message ScanResponse { - repeated Entry entry = 1; -} - -message ExceptionResponse { - // Class name of the exception thrown from the server - string exception_class_name = 1; - // Exception stack trace from the server side - string stack_trace = 2; - // Optional hostname. Filled in for some exceptions such as region moved - // where exception gives clue on where the region may have moved. - string hostname = 3; - int32 port = 4; - // Set if we are NOT to retry on receipt of this exception - bool do_not_retry = 5; -} - -message Id { - int64 id = 1; -} - -message WALEntry { - int64 rmap_id = 1; // these are shared by all WALEntry types - repeated Entry entry = 2; - oneof WALEntryType { - // Multi is not here, because we do not want to create one more object unnecessarily - CreateRMapWALEntry create_rmap_entry = 3; - DeleteRMapWALEntry delete_rmap_entry = 4; - } -} - -message CreateRMapWALEntry { - RMapInfo rmap_info = 1; - Id id = 2; -} - -message DeleteRMapWALEntry { - int64 id = 1; -} diff --git a/ratis-proto/src/main/proto/Raft.proto b/ratis-proto/src/main/proto/Raft.proto index 586ec1b052..eba5de3b74 100644 --- a/ratis-proto/src/main/proto/Raft.proto +++ b/ratis-proto/src/main/proto/Raft.proto @@ -117,6 +117,7 @@ message RaftRpcRequestProto { uint64 callId = 4; bool toLeader = 5; + SpanContextProto spanContext = 11; repeated uint64 repliedCallIds = 12; // The call ids of the replied requests uint64 timeoutMs = 13; RoutingTableProto routingTable = 14; @@ -155,6 +156,7 @@ enum InstallSnapshotResult { CONF_MISMATCH = 4; SNAPSHOT_INSTALLED = 5; SNAPSHOT_UNAVAILABLE = 6; + SNAPSHOT_EXPIRED = 7; } message RequestVoteRequestProto { @@ -168,6 +170,7 @@ message RequestVoteReplyProto { RaftRpcReplyProto serverReply = 1; uint64 term = 2; bool shouldShutdown = 3; + TermIndexProto lastEntry = 4; // to determine if the voter log is empty. } message CommitInfoProto { @@ -470,7 +473,7 @@ message SnapshotManagementRequestProto { } message SnapshotCreateRequestProto { - + uint64 creationGap = 1; } message StartLeaderElectionRequestProto { @@ -556,4 +559,19 @@ message GroupInfoReplyProto { bool isRaftStorageHealthy = 4; repeated CommitInfoProto commitInfos = 5; RaftConfigurationProto conf = 6; + LogInfoProto logInfo = 7; +} + +/** Add new LogInfoProto for RATIS-2030, allow GroupInfoCommand to show each server's last committed log, + last applied log, last snapshot log, last entry log.*/ +message LogInfoProto { + TermIndexProto lastSnapshot = 1; + TermIndexProto applied = 2; + TermIndexProto committed = 3; + TermIndexProto lastEntry = 4; +} + +// The attribute map for opentelemetry trace +message SpanContextProto { + map context = 1; } diff --git a/ratis-replicated-map/README.md b/ratis-replicated-map/README.md deleted file mode 100644 index ec4d3c554a..0000000000 --- a/ratis-replicated-map/README.md +++ /dev/null @@ -1,36 +0,0 @@ - - -Overview -======== -Ratis replicated map is an implementation of a sorted map (think TreeMap) as -a replicated state machine. This is not under examples because it is intended -to be used in production where a simple in-memory map is sufficient to hold the -data. The data is fully cached in memory, but it is still durable since raft -log is used as a replicated log, and data is snapshotted periodically. - - -The replicated map (RMap) is not only the state machine implementation, but -all of the remaining code, including the client and querying capabilities which -is built on top of the other modules. In that sense, it is dog-fooding the ratis -library to implement an end-to-end solution for a replicated in-memory data store. - -Replicated maps are conceptually similar to ZooKeeper/Etcd/LogCabin where the data -is hosted in a known cluster configuration and is not sharded. All the servers -in the cluster participate in a single RAFT ring. - -The data model is that users can create independent RMap instances in the cluster -and read / write or scan the data as key value pairs in those replicated maps. A -replicated map named the meta map contains information about all of the other maps -in the cluster. diff --git a/ratis-replicated-map/pom.xml b/ratis-replicated-map/pom.xml index 5b4b8f21c1..e6144fd103 100644 --- a/ratis-replicated-map/pom.xml +++ b/ratis-replicated-map/pom.xml @@ -17,12 +17,17 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.2.0-SNAPSHOT ratis-replicated-map Apache Ratis Replicated Map + + + true + + org.apache.ratis diff --git a/ratis-replicated-map/src/test/resources/log4j.properties b/ratis-replicated-map/src/test/resources/log4j.properties deleted file mode 100644 index ced0687caa..0000000000 --- a/ratis-replicated-map/src/test/resources/log4j.properties +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# log4j configuration used during build and unit tests - -log4j.rootLogger=info,stdout -log4j.threshold=ALL -log4j.appender.stdout=org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n diff --git a/ratis-resource-bundle/pom.xml b/ratis-resource-bundle/pom.xml index a879a74238..edecf1b633 100644 --- a/ratis-resource-bundle/pom.xml +++ b/ratis-resource-bundle/pom.xml @@ -23,7 +23,7 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT .. @@ -36,7 +36,18 @@ true - + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test + + diff --git a/ratis-resource-bundle/src/main/resources/META-INF/LICENSE.vm b/ratis-resource-bundle/src/main/resources/META-INF/LICENSE.vm index 3d9f76a119..874d716c8b 100644 --- a/ratis-resource-bundle/src/main/resources/META-INF/LICENSE.vm +++ b/ratis-resource-bundle/src/main/resources/META-INF/LICENSE.vm @@ -221,10 +221,10 @@ under the License. limitations under the License. ## Special cases, for e.g. ASL2.0 licensed works that bundle additional third party works -#set($bundled-jquery = ${bundled-jquery.equalsIgnoreCase("true")}) -#set($bundled-logo = ${bundled-logo.equalsIgnoreCase("true")}) -#set($bundled-dependencies = ${bundled-dependencies.equalsIgnoreCase("true")}) -#if($bundled-jquery || $bundled-logo || $bundled-dependencies) +#set($bundled_jquery = ${bundled_jquery.equalsIgnoreCase("true")}) +#set($bundled_logo = ${bundled_logo.equalsIgnoreCase("true")}) +#set($bundled_dependencies = ${bundled_dependencies.equalsIgnoreCase("true")}) +#if($bundled_jquery || $bundled_logo || $bundled_dependencies) ==== ${project.name} contained works @@ -884,12 +884,12 @@ facade for Java, which can be obtained at: * HOMEPAGE: * http://www.slf4j.org/ #end -## Supplemental from commons-math +## Supplemental from commons_math #macro(commons_math_license) ---- APACHE COMMONS MATH DERIVATIVE WORKS: -The Apache commons-math library includes a number of subcomponents +The Apache commons_math library includes a number of subcomponents whose implementation is derived from original sources written in C or Fortran. License terms of the original sources are reproduced below. @@ -1178,7 +1178,7 @@ For the org.apache.hadoop.util.bloom.* classes: ## skip jquery ## skip backbone ## relocated jackson 2.4.0 is ASLv2 with no notice -## relocated commons-logging 1.1.1 is in NOTICE.vm +## relocated commons_logging 1.1.1 is in NOTICE.vm #end #macro (thrift_license) ## Thrift supplemental for libthrift is a no-op. @@ -1295,15 +1295,15 @@ You can redistribute it and/or modify it under either the terms of the PURPOSE. #end ## modules with bundled works in source -#if(${bundled-jquery}) +#if(${bundled_jquery}) #jquery_license() #end -#if(${bundled-logo}) +#if(${bundled_logo}) #orca_logo_license() #end ## when true, we're in a module that makes a binary dist with ## bundled works. -#if(${bundled-dependencies}) +#if(${bundled_dependencies}) ==== ## gather up CDDL licensed works #set($cddl_1_0 = []) @@ -1316,8 +1316,8 @@ You can redistribute it and/or modify it under either the terms of the #set($cpl = []) ## gather up EPL 1.0 works #set($epl = []) -## track commons-math -#set($commons-math = false) +## track commons_math +#set($commons_math = false) ## track if we need jruby additionals. #set($jruby = false) ## track hadoops @@ -1342,7 +1342,7 @@ g:${dep.groupId} AND a:${dep.artifactId} AND v:${dep.version} Until ratis-resource-bundle/src/main/resources/supplemental-models.xml is updated, the build should fail. #end -#if(${debug-print-included-work-info.equalsIgnoreCase("true")}) +#if(${debug_print_included_work_info.equalsIgnoreCase("true")}) ===== Check license for included work @@ -1364,8 +1364,8 @@ ${dep.scm.url} #end #set($aggregated=false) ## Check for our set of known dependencies that require manual LICENSE additions. -#if($dep.artifactId.equals("commons-math3")) -#set($commons-math=true) +#if($dep.artifactId.equals("commons_math3")) +#set($commons_math=true) #end #if($dep.artifactId.equals("jruby-complete")) #set($jruby=true) @@ -1465,7 +1465,7 @@ ${dep.scm.url} #if($hadoop) #hadoop_license() #end -#if($commons-math) +#if($commons_math) #commons_math_license() #end #if(!(${mit.isEmpty()})) diff --git a/ratis-resource-bundle/src/main/resources/META-INF/NOTICE.vm b/ratis-resource-bundle/src/main/resources/META-INF/NOTICE.vm index f3fca903b3..7d473b60ac 100644 --- a/ratis-resource-bundle/src/main/resources/META-INF/NOTICE.vm +++ b/ratis-resource-bundle/src/main/resources/META-INF/NOTICE.vm @@ -23,10 +23,10 @@ Copyright 2017-2019 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). ## Specially handled included deps. e.g. ASL2.0 works that have additional bundled works but -#set($bundled-boostrap = ${bundled-boostrap.equalsIgnoreCase("true")}) -#set($bundled-logo = ${bundled-logo.equalsIgnoreCase("true")}) -#set($bundled-dependencies = ${bundled-dependencies.equalsIgnoreCase("true")}) -#if($bundled-bootstrap || $bundled-logo || $bundled-dependencies) +#set($bundled_boostrap = ${bundled_boostrap.equalsIgnoreCase("true")}) +#set($bundled_logo = ${bundled_logo.equalsIgnoreCase("true")}) +#set($bundled_dependencies = ${bundled_dependencies.equalsIgnoreCase("true")}) +#if($bundled_bootstrap || $bundled_logo || $bundled_dependencies) ==== ${project.name} contained works @@ -102,13 +102,13 @@ noted in the LICENSE file. #end -#macro(jetty_notice $jetty-include-unixcrypt) +#macro(jetty_notice $jetty_include_unixcrypt) -- This product includes portions of 'The Jetty Web Container' Copyright 1995-2016 Mort Bay Consulting Pty Ltd. -#if(${jetty-include-unixcrypt}) +#if(${jetty_include_unixcrypt}) ## UnixCrypt.java paragraph only in server The UnixCrypt.java code ~Implements the one way cryptography used by Unix systems for simple password protection. Copyright 1996 Aki Yoshida, @@ -207,13 +207,13 @@ under the Apache License 2.0 (see: StringUtils.containsWhitespace()) #end ## first bundled source -#if(${bundled-logo}) +#if(${bundled_logo}) #orca_logo_notice() #end -#if(${bundled-bootstrap}) +#if(${bundled_bootstrap}) #bootstrap_notice() #end -#if(${bundled-dependencies}) +#if(${bundled_dependencies}) #** Note that this will fail the build if we don't have a license. update supplemental-models via setting '-Dlicense.debug.print.included' and looking in the generated LICENSE file @@ -229,7 +229,7 @@ under the Apache License 2.0 (see: StringUtils.containsWhitespace()) #set($mpl_1_1=[]) ## track jettys #set($jetty=false) -#set($jetty-with-crypt=false) +#set($jetty_with_crypt=false) ## track jruby #set($jruby=false) #foreach( ${dep} in ${projects} ) @@ -244,7 +244,7 @@ under the Apache License 2.0 (see: StringUtils.containsWhitespace()) #if(${dep.artifactId.startsWith("jetty")}) #set($jetty=true) #if(${dep.artifactId.equals("jetty")}) -#set($jetty-with-crypt=true) +#set($jetty_with_crypt=true) #end #end #if(${dep.artifactId.equals("log4j")}) @@ -317,7 +317,7 @@ For source see '${dep.url}'. #end ## Print out jetty #if(${jetty}) -#jetty_notice(${jetty-with-crypt}) +#jetty_notice(${jetty_with_crypt}) #end ## Now go through all the lists of Category-B licensed works and make sure we ## name them and give a URL for the project's home page. diff --git a/ratis-server-api/dev-support/findbugsExcludeFile.xml b/ratis-server-api/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..33c6d02a47 --- /dev/null +++ b/ratis-server-api/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ratis-server-api/pom.xml b/ratis-server-api/pom.xml index f3c4b617a0..feed49190c 100644 --- a/ratis-server-api/pom.xml +++ b/ratis-server-api/pom.xml @@ -17,12 +17,17 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-server-api Apache Ratis Server API + + + true + + org.apache.ratis @@ -47,5 +52,27 @@ org.slf4j slf4j-api + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test + + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/DataStreamServerRpc.java b/ratis-server-api/src/main/java/org/apache/ratis/server/DataStreamServerRpc.java index 4e948c6f81..6316ef6073 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/DataStreamServerRpc.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/DataStreamServerRpc.java @@ -20,18 +20,10 @@ import org.apache.ratis.protocol.RaftPeer; import java.io.Closeable; -import java.net.InetSocketAddress; /** * A server interface handling incoming streams * Relays those streams to other servers after persisting */ -public interface DataStreamServerRpc extends RaftPeer.Add, Closeable { - /** - * start server - */ - void start(); - - /** @return the address where this RPC server is listening to. */ - InetSocketAddress getInetSocketAddress(); +public interface DataStreamServerRpc extends ServerRpc, RaftPeer.Add, Closeable { } diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/DivisionInfo.java b/ratis-server-api/src/main/java/org/apache/ratis/server/DivisionInfo.java index c07317bd69..78073a21c5 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/DivisionInfo.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/DivisionInfo.java @@ -80,4 +80,10 @@ default boolean isAlive() { * otherwise, return null. */ long[] getFollowerNextIndices(); + + /** + * @return an array of match indices of the followers if this server division is the leader; + * otherwise, return null. + */ + long[] getFollowerMatchIndices(); } diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java index 565e881269..2d55594782 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerConfigKeys.java @@ -77,9 +77,14 @@ static TimeDuration sleepDeviationThreshold(RaftProperties properties) { return getTimeDuration(properties.getTimeDuration(SLEEP_DEVIATION_THRESHOLD_DEFAULT.getUnit()), SLEEP_DEVIATION_THRESHOLD_KEY, SLEEP_DEVIATION_THRESHOLD_DEFAULT, getDefaultLog()); } + /** @deprecated use {@link #setSleepDeviationThreshold(RaftProperties, TimeDuration)}. */ + @Deprecated static void setSleepDeviationThreshold(RaftProperties properties, int thresholdMs) { setInt(properties::setInt, SLEEP_DEVIATION_THRESHOLD_KEY, thresholdMs); } + static void setSleepDeviationThreshold(RaftProperties properties, TimeDuration threshold) { + setTimeDuration(properties::setTimeDuration, SLEEP_DEVIATION_THRESHOLD_KEY, threshold); + } String CLOSE_THRESHOLD_KEY = PREFIX + ".close.threshold"; TimeDuration CLOSE_THRESHOLD_DEFAULT = TimeDuration.valueOf(60, TimeUnit.SECONDS); @@ -87,8 +92,13 @@ static TimeDuration closeThreshold(RaftProperties properties) { return getTimeDuration(properties.getTimeDuration(CLOSE_THRESHOLD_DEFAULT.getUnit()), CLOSE_THRESHOLD_KEY, CLOSE_THRESHOLD_DEFAULT, getDefaultLog()); } - static void setCloseThreshold(RaftProperties properties, int thresholdMs) { - setInt(properties::setInt, CLOSE_THRESHOLD_KEY, thresholdMs); + /** @deprecated use {@link #setCloseThreshold(RaftProperties, TimeDuration)}. */ + @Deprecated + static void setCloseThreshold(RaftProperties properties, int thresholdSec) { + setInt(properties::setInt, CLOSE_THRESHOLD_KEY, thresholdSec); + } + static void setCloseThreshold(RaftProperties properties, TimeDuration threshold) { + setTimeDuration(properties::setTimeDuration, CLOSE_THRESHOLD_KEY, threshold); } /** @@ -106,6 +116,21 @@ static void setStagingCatchupGap(RaftProperties properties, int stagingCatchupGa setInt(properties::setInt, STAGING_CATCHUP_GAP_KEY, stagingCatchupGap); } + String STAGING_TIMEOUT_KEY = PREFIX + ".staging.timeout"; + + TimeDuration STAGING_TIMEOUT_DEFAULT = null; + + static TimeDuration stagingTimeout(RaftProperties properties) { + final TimeDuration fallbackStagingTimeout = Rpc.timeoutMax(properties, null).multiply(3); + return getTimeDuration(properties.getTimeDuration(fallbackStagingTimeout.getUnit()), + STAGING_TIMEOUT_KEY, STAGING_TIMEOUT_DEFAULT, + Rpc.TIMEOUT_MAX_KEY, fallbackStagingTimeout, getDefaultLog()); + } + static void setStagingTimeout(RaftProperties properties, TimeDuration stagingTimeout) { + setTimeDuration(properties::setTimeDuration, STAGING_TIMEOUT_KEY, stagingTimeout); + } + + interface ThreadPool { String PREFIX = RaftServerConfigKeys.PREFIX + ".threadpool"; @@ -224,6 +249,17 @@ static void setLeaderLeaseTimeoutRatio(RaftProperties properties, double ratio) setDouble(properties::setDouble, LEADER_LEASE_TIMEOUT_RATIO_KEY, ratio); } + String LEADER_HEARTBEAT_CHECK_ENABLED_KEY = PREFIX + ".leader.heartbeat-check.enabled"; + boolean LEADER_HEARTBEAT_CHECK_ENABLED_DEFAULT = true; + static boolean leaderHeartbeatCheckEnabled(RaftProperties properties) { + return getBoolean(properties::getBoolean, LEADER_HEARTBEAT_CHECK_ENABLED_KEY, + LEADER_HEARTBEAT_CHECK_ENABLED_DEFAULT, getDefaultLog()); + } + + static void setLeaderHeartbeatCheckEnabled(RaftProperties properties, boolean enabled) { + setBoolean(properties::setBoolean, LEADER_HEARTBEAT_CHECK_ENABLED_KEY, enabled); + } + interface ReadAfterWriteConsistent { String PREFIX = Read.PREFIX + ".read-after-write-consistent"; @@ -240,6 +276,40 @@ static void setWriteIndexCacheExpiryTime(RaftProperties properties, TimeDuration setTimeDuration(properties::setTimeDuration, WRITE_INDEX_CACHE_EXPIRY_TIME_KEY, expiryTime); } } + + interface ReadIndex { + String PREFIX = Read.PREFIX + ".read-index"; + + enum Type { + /** ReadIndex returns leader's commitIndex (see Raft Paper section 6.4). */ + COMMIT_INDEX, + + /** ReadIndex returns leader's appliedIndex to reduce the ReadIndex latency. */ + APPLIED_INDEX, + + /** ReadIndex returns leader's repliedIndex, the index of the last replied request. */ + REPLIED_INDEX + } + + String TYPE_KEY = PREFIX + ".type"; + Type TYPE_DEFAULT = Type.COMMIT_INDEX; + static Type type(RaftProperties properties) { + return get(properties::getEnum, TYPE_KEY, TYPE_DEFAULT, getDefaultLog()); + } + static void setType(RaftProperties properties, Type type) { + set(properties::setEnum, TYPE_KEY, type); + } + + String REPLIED_INDEX_BATCH_INTERVAL_KEY = PREFIX + ".replied-index.batch-interval"; + TimeDuration REPLIED_INDEX_BATCH_INTERVAL_DEFAULT = TimeDuration.valueOf(10, TimeUnit.MILLISECONDS); + static TimeDuration repliedIndexBatchInterval(RaftProperties properties) { + return getTimeDuration(properties.getTimeDuration(REPLIED_INDEX_BATCH_INTERVAL_DEFAULT.getUnit()), + REPLIED_INDEX_BATCH_INTERVAL_KEY, REPLIED_INDEX_BATCH_INTERVAL_DEFAULT, getDefaultLog()); + } + static void setRepliedIndexBatchInterval(RaftProperties properties, TimeDuration interval) { + setTimeDuration(properties::setTimeDuration, REPLIED_INDEX_BATCH_INTERVAL_KEY, interval); + } + } } interface Write { @@ -390,6 +460,26 @@ static void setSegmentSizeMax(RaftProperties properties, SizeInBytes segmentSize setSizeInBytes(properties::set, SEGMENT_SIZE_MAX_KEY, segmentSizeMax); } + String READ_LOCK_ENABLED_KEY = PREFIX + ".read.lock.enabled"; + boolean READ_LOCK_ENABLED_DEFAULT = true; + static boolean readLockEnabled(RaftProperties properties) { + return getBoolean(properties::getBoolean, + READ_LOCK_ENABLED_KEY, READ_LOCK_ENABLED_DEFAULT, getDefaultLog()); + } + static void setReadLockEnabled(RaftProperties properties, boolean readLockEnabled) { + setBoolean(properties::setBoolean, READ_LOCK_ENABLED_KEY, readLockEnabled); + } + + String APPEND_ENTRIES_COMPOSE_ENABLED_KEY = PREFIX + ".append-entries.compose.enabled"; + boolean APPEND_ENTRIES_COMPOSE_ENABLED_DEFAULT = true; + static boolean appendEntriesComposeEnabled(RaftProperties properties) { + return getBoolean(properties::getBoolean, + APPEND_ENTRIES_COMPOSE_ENABLED_KEY, APPEND_ENTRIES_COMPOSE_ENABLED_DEFAULT, getDefaultLog()); + } + static void setAppendEntriesComposeEnabled(RaftProperties properties, boolean enabled) { + setBoolean(properties::setBoolean, APPEND_ENTRIES_COMPOSE_ENABLED_KEY, enabled); + } + /** * Besides the open segment, the max number of segments caching log entries. */ @@ -465,6 +555,18 @@ static void setAsyncFlushEnabled(RaftProperties properties, boolean asyncFlush) setBoolean(properties::setBoolean, ASYNC_FLUSH_ENABLED_KEY, asyncFlush); } + /** Log metadata can guarantee that a server can recover commit index and state machine + * even if a majority of servers are dead by consuming a little extra space. */ + String LOG_METADATA_ENABLED_KEY = PREFIX + ".log-metadata.enabled"; + boolean LOG_METADATA_ENABLED_DEFAULT = true; + static boolean logMetadataEnabled(RaftProperties properties) { + return getBoolean(properties::getBoolean, + LOG_METADATA_ENABLED_KEY, LOG_METADATA_ENABLED_DEFAULT, getDefaultLog()); + } + static void setLogMetadataEnabled(RaftProperties properties, boolean logMetadata) { + setBoolean(properties::setBoolean, LOG_METADATA_ENABLED_KEY, logMetadata); + } + /** The policy to handle corrupted raft log. */ enum CorruptionPolicy { /** Rethrow the exception. */ @@ -647,6 +749,18 @@ static void setTriggerWhenStopEnabled(RaftProperties properties, boolean trigger setBoolean(properties::setBoolean, TRIGGER_WHEN_STOP_ENABLED_KEY, triggerWhenStopEnabled); } + /** whether trigger snapshot when remove raft server */ + String TRIGGER_WHEN_REMOVE_ENABLED_KEY = PREFIX + ".trigger-when-remove.enabled"; + /** by default let the state machine to trigger snapshot when remove */ + boolean TRIGGER_WHEN_REMOVE_ENABLED_DEFAULT = true; + static boolean triggerWhenRemoveEnabled(RaftProperties properties) { + return getBoolean(properties::getBoolean, + TRIGGER_WHEN_REMOVE_ENABLED_KEY, TRIGGER_WHEN_REMOVE_ENABLED_DEFAULT, getDefaultLog()); + } + static void setTriggerWhenRemoveEnabled(RaftProperties properties, boolean triggerWhenRemoveEnabled) { + setBoolean(properties::setBoolean, TRIGGER_WHEN_REMOVE_ENABLED_KEY, triggerWhenRemoveEnabled); + } + /** The log index gap between to two snapshot creations. */ String CREATION_GAP_KEY = PREFIX + ".creation.gap"; long CREATION_GAP_DEFAULT = 1024; diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerRpc.java b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerRpc.java index d81f9cc8bc..76bd817f5e 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerRpc.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServerRpc.java @@ -26,20 +26,13 @@ import org.apache.ratis.util.JavaUtils; import java.io.Closeable; -import java.io.IOException; import java.net.InetSocketAddress; /** * An server-side interface for supporting different RPC implementations * such as Netty, gRPC and Hadoop. */ -public interface RaftServerRpc extends RaftServerProtocol, RpcType.Get, RaftPeer.Add, Closeable { - /** Start the RPC service. */ - void start() throws IOException; - - /** @return the address where this RPC server is listening */ - InetSocketAddress getInetSocketAddress(); - +public interface RaftServerRpc extends RaftServerProtocol, ServerRpc, RpcType.Get, RaftPeer.Add, Closeable { /** @return the address where this RPC server is listening for client requests */ default InetSocketAddress getClientServerAddress() { return getInetSocketAddress(); diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/ServerRpc.java b/ratis-server-api/src/main/java/org/apache/ratis/server/ServerRpc.java new file mode 100644 index 0000000000..6ad5eacf14 --- /dev/null +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/ServerRpc.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.server; + +import java.io.Closeable; +import java.io.IOException; +import java.net.InetSocketAddress; + +/** + * A general server interface. + */ +public interface ServerRpc extends Closeable { + /** Start the RPC service. */ + void start() throws IOException; + + /** @return the address where this RPC server is listening to. */ + InetSocketAddress getInetSocketAddress(); +} diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/leader/LogAppender.java b/ratis-server-api/src/main/java/org/apache/ratis/server/leader/LogAppender.java index 020a352c05..a333b8393a 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/leader/LogAppender.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/leader/LogAppender.java @@ -87,10 +87,11 @@ default void stop() { default CompletableFuture stopAsync() { stop(); return CompletableFuture.supplyAsync(() -> { - for (; isRunning(); ) { + while (isRunning()) { try { Thread.sleep(10); } catch (InterruptedException e) { + Thread.currentThread().interrupt(); throw new CompletionException("stopAsync interrupted", e); } } @@ -124,7 +125,9 @@ default RaftPeerId getFollowerId() { * @param heartbeat the returned request must be a heartbeat. * * @return a new {@link AppendEntriesRequestProto} object. + * @deprecated this is no longer a public API. */ + @Deprecated AppendEntriesRequestProto newAppendEntriesRequest(long callId, boolean heartbeat) throws RaftLogIOException; /** @return a new {@link InstallSnapshotRequestProto} object. */ @@ -142,7 +145,7 @@ default SnapshotInfo shouldInstallSnapshot() { // we should install snapshot if the follower needs to catch up and: // 1. there is no local log entry but there is snapshot // 2. or the follower's next index is smaller than the log start index - // 3. or the follower is bootstrapping and has not installed any snapshot yet + // 3. or the follower is bootstrapping (i.e. not yet caught up) and has not installed any snapshot yet final FollowerInfo follower = getFollower(); final boolean isFollowerBootstrapping = getLeaderState().isFollowerBootstrapping(follower); final SnapshotInfo snapshot = getServer().getStateMachine().getLatestSnapshot(); diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/protocol/RaftServerAsynchronousProtocol.java b/ratis-server-api/src/main/java/org/apache/ratis/server/protocol/RaftServerAsynchronousProtocol.java index 8a904069ba..035e0a815f 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/protocol/RaftServerAsynchronousProtocol.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/protocol/RaftServerAsynchronousProtocol.java @@ -22,14 +22,39 @@ import org.apache.ratis.proto.RaftProtos.ReadIndexReplyProto; import org.apache.ratis.proto.RaftProtos.AppendEntriesReplyProto; import org.apache.ratis.proto.RaftProtos.AppendEntriesRequestProto; +import org.apache.ratis.util.ReferenceCountedObject; import java.io.IOException; import java.util.concurrent.CompletableFuture; public interface RaftServerAsynchronousProtocol { - CompletableFuture appendEntriesAsync(AppendEntriesRequestProto request) - throws IOException; + /** + * It is recommended to override {@link #appendEntriesAsync(ReferenceCountedObject)} instead. + * Then, it does not have to override this method. + */ + default CompletableFuture appendEntriesAsync(AppendEntriesRequestProto request) + throws IOException { + throw new UnsupportedOperationException(); + } + + /** + * A referenced counted request is submitted from a client for processing. + * Implementations of this method should retain the request, process it and then release it. + * The request may be retained even after the future returned by this method has completed. + * + * @return a future of the reply + * @see ReferenceCountedObject + */ + default CompletableFuture appendEntriesAsync( + ReferenceCountedObject requestRef) throws IOException { + // Default implementation for backward compatibility. + try { + return appendEntriesAsync(requestRef.retain()); + } finally { + requestRef.release(); + } + } CompletableFuture readIndexAsync(ReadIndexRequestProto request) throws IOException; diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/protocol/TermIndex.java b/ratis-server-api/src/main/java/org/apache/ratis/server/protocol/TermIndex.java index 7def686bb5..369aefc85f 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/protocol/TermIndex.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/protocol/TermIndex.java @@ -19,12 +19,29 @@ import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.proto.RaftProtos.TermIndexProto; +import org.apache.ratis.server.raftlog.RaftLog; +import org.apache.ratis.util.BiWeakValueCache; +import org.apache.ratis.util.MemoizedSupplier; import java.util.Comparator; import java.util.Optional; +import java.util.function.Supplier; /** The term and the log index defined in the Raft consensus algorithm. */ public interface TermIndex extends Comparable { + /** + * The initial value. + * When a new Raft group starts, + * all the servers has term 0 and index -1 (= {@link RaftLog#INVALID_LOG_INDEX}). + * Note that term is incremented during leader election + * and index is incremented when writing to the {@link RaftLog}. + * The least term and index possibly written to the {@link RaftLog} + * are respectively 1 and 0 (= {@link RaftLog#LEAST_VALID_LOG_INDEX}). + */ + TermIndex INITIAL_VALUE = valueOf(0, RaftLog.INVALID_LOG_INDEX); + TermIndex PROTO_DEFAULT = valueOf(TermIndexProto.getDefaultInstance()); + + /** An empty {@link TermIndex} array. */ TermIndex[] EMPTY_ARRAY = {}; /** @return the term. */ @@ -60,43 +77,74 @@ static TermIndex valueOf(LogEntryProto proto) { /** @return a {@link TermIndex} object. */ static TermIndex valueOf(long term, long index) { - return new TermIndex() { - @Override - public long getTerm() { - return term; - } - - @Override - public long getIndex() { - return index; - } - - @Override - public boolean equals(Object obj) { - if (obj == this) { - return true; - } else if (!(obj instanceof TermIndex)) { - return false; + return Impl.getCache().getOrCreate(term, index); + } + + /** + * An implementation for private use. + * Note that this is not a public API, although this is public class. + */ + final class Impl { + private Impl() { } + + private static final BiWeakValueCache CACHE + = new BiWeakValueCache<>("term", "index", Impl::newTermIndex); + + static BiWeakValueCache getCache() { + return CACHE; + } + + private static TermIndex newTermIndex(long term, long index) { + return new TermIndex() { + private final Supplier protoSupplier = MemoizedSupplier.valueOf(TermIndex.super::toProto); + + @Override + public long getTerm() { + return term; + } + + @Override + public long getIndex() { + return index; } - final TermIndex that = (TermIndex) obj; - return this.getTerm() == that.getTerm() - && this.getIndex() == that.getIndex(); - } - - @Override - public int hashCode() { - return Long.hashCode(term) ^ Long.hashCode(index); - } - - private String longToString(long n) { - return n >= 0L? String.valueOf(n) : "~"; - } - - @Override - public String toString() { - return String.format("(t:%s, i:%s)", longToString(term), longToString(index)); - } - }; + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } else if (!(obj instanceof TermIndex)) { + return false; + } + + final TermIndex that = (TermIndex) obj; + return this.getTerm() == that.getTerm() + && this.getIndex() == that.getIndex(); + } + + @Override + public int hashCode() { + return Long.hashCode(term) ^ Long.hashCode(index); + } + + @Override + public TermIndexProto toProto() { + return protoSupplier.get(); + } + + private String longToString(long n) { + return n >= 0L ? String.valueOf(n) : "~"; + } + + @Override + public String toString() { + if (this.equals(INITIAL_VALUE)) { + return ""; + } else if (this.equals(PROTO_DEFAULT)) { + return ""; + } + return String.format("(t:%s, i:%s)", longToString(term), longToString(index)); + } + }; + } } } \ No newline at end of file diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/raftlog/RaftLog.java b/ratis-server-api/src/main/java/org/apache/ratis/server/raftlog/RaftLog.java index e504462b80..ca785a4a6d 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/raftlog/RaftLog.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/raftlog/RaftLog.java @@ -21,6 +21,7 @@ import org.apache.ratis.server.metrics.RaftLogMetrics; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.storage.RaftStorageMetadata; +import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.TimeDuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,16 +58,44 @@ default boolean contains(TermIndex ti) { /** * @return null if the log entry is not found in this log; - * otherwise, return the log entry corresponding to the given index. + * otherwise, return a copy of the log entry corresponding to the given index. + * @deprecated use {@link RaftLog#retainLog(long)} instead in order to avoid copying. */ + @Deprecated LogEntryProto get(long index) throws RaftLogIOException; + /** + * @return a retained {@link ReferenceCountedObject} to the log entry corresponding to the given index if it exists; + * otherwise, return null. + * Since the returned reference is retained, the caller must call {@link ReferenceCountedObject#release()}} + * after use. + */ + default ReferenceCountedObject retainLog(long index) throws RaftLogIOException { + ReferenceCountedObject wrap = ReferenceCountedObject.wrap(get(index)); + wrap.retain(); + return wrap; + } + /** * @return null if the log entry is not found in this log; * otherwise, return the {@link EntryWithData} corresponding to the given index. + * @deprecated use {@link #retainEntryWithData(long)}. */ + @Deprecated EntryWithData getEntryWithData(long index) throws RaftLogIOException; + /** + * @return null if the log entry is not found in this log; + * otherwise, return a retained reference of the {@link EntryWithData} corresponding to the given index. + * Since the returned reference is retained, the caller must call {@link ReferenceCountedObject#release()}} + * after use. + */ + default ReferenceCountedObject retainEntryWithData(long index) throws RaftLogIOException { + final ReferenceCountedObject wrap = ReferenceCountedObject.wrap(getEntryWithData(index)); + wrap.retain(); + return wrap; +} + /** * @param startIndex the starting log index (inclusive) * @param endIndex the ending log index (exclusive) @@ -157,6 +186,15 @@ default long getNextIndex() { * containing both the log entry and the state machine data. */ interface EntryWithData { + /** @return the index of this entry. */ + default long getIndex() { + try { + return getEntry(TimeDuration.ONE_MINUTE).getIndex(); + } catch (Exception e) { + throw new IllegalStateException("Failed to getIndex", e); + } + } + /** @return the serialized size including both log entry and state machine data. */ int getSerializedSize(); diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/raftlog/RaftLogSequentialOps.java b/ratis-server-api/src/main/java/org/apache/ratis/server/raftlog/RaftLogSequentialOps.java index 7b9f42b6bd..5a25728830 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/raftlog/RaftLogSequentialOps.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/raftlog/RaftLogSequentialOps.java @@ -22,8 +22,10 @@ import org.apache.ratis.server.RaftConfiguration; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.StringUtils; import org.apache.ratis.util.function.CheckedSupplier; +import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import java.util.Arrays; import java.util.List; @@ -77,8 +79,10 @@ OUTPUT runSequentially( // The current thread is already the runner. return operation.get(); } else { + final Throwable cause = new Throwable("The thread already running: " + previous); + cause.setStackTrace(previous.getStackTrace()); throw new IllegalStateException( - name + ": Already running a method by " + previous + ", current=" + current); + name + ": Already running a method by " + previous + ", current=" + current, cause); } } } @@ -122,31 +126,56 @@ OUTPUT runSequentially( CompletableFuture appendEntry(LogEntryProto entry); /** - * Append asynchronously an entry. - * Used by the leader. + * @deprecated use {@link #appendEntry(ReferenceCountedObject, TransactionContext)}}. */ + @Deprecated default CompletableFuture appendEntry(LogEntryProto entry, TransactionContext context) { - return appendEntry(entry); + throw new UnsupportedOperationException(); + } + + /** + * Append asynchronously an entry. + * Used for scenarios that there is a ReferenceCountedObject context for resource cleanup when the given entry + * is no longer used/referenced by this log. + */ + default CompletableFuture appendEntry(ReferenceCountedObject entryRef, + TransactionContext context) { + return appendEntry(entryRef.get(), context); } /** * The same as append(Arrays.asList(entries)). * - * @deprecated use {@link #append(List)} + * @deprecated use {@link #append(ReferenceCountedObject)}. */ @Deprecated default List> append(LogEntryProto... entries) { return append(Arrays.asList(entries)); } + /** + * @deprecated use {@link #append(ReferenceCountedObject)}. + */ + @Deprecated + default List> append(List entries) { + throw new UnsupportedOperationException(); + } + /** * Append asynchronously all the given log entries. * Used by the followers. * * If an existing entry conflicts with a new one (same index but different terms), * delete the existing entry and all entries that follow it (§5.3). + * + * A reference counter is also submitted. + * For each entry, implementations of this method should retain the counter, process it and then release. */ - List> append(List entries); + default List> append(ReferenceCountedObject> entriesRef) { + try(UncheckedAutoCloseableSupplier> entries = entriesRef.retainAndReleaseOnClose()) { + return append(entries.get()); + } + } /** * Truncate asynchronously the log entries till the given index (inclusively). diff --git a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java index b1fc5addae..3960ab8287 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java @@ -88,11 +88,35 @@ default CompletableFuture read(LogEntryProto entry, TransactionConte return read(entry); } + /** + * Read asynchronously the state machine data from this state machine. + * StateMachines implement this method when the read result contains retained resources that should be released + * after use. + * + * @return a future for the read task. The result of the future is a {@link ReferenceCountedObject} wrapping the + * read result. Client code of this method must call {@link ReferenceCountedObject#release()} after + * use. + */ + default CompletableFuture> retainRead(LogEntryProto entry, + TransactionContext context) { + return read(entry, context).thenApply(r -> { + if (r == null) { + return null; + } + ReferenceCountedObject ref = ReferenceCountedObject.wrap(r); + ref.retain(); + return ref; + + }); + } + /** * Write asynchronously the state machine data in the given log entry to this state machine. * * @return a future for the write task + * @deprecated Applications should implement {@link #write(ReferenceCountedObject, TransactionContext)} instead. */ + @Deprecated default CompletableFuture write(LogEntryProto entry) { return CompletableFuture.completedFuture(null); } @@ -101,11 +125,36 @@ default CompletableFuture write(LogEntryProto entry) { * Write asynchronously the state machine data in the given log entry to this state machine. * * @return a future for the write task + * @deprecated Applications should implement {@link #write(ReferenceCountedObject, TransactionContext)} instead. */ + @Deprecated default CompletableFuture write(LogEntryProto entry, TransactionContext context) { return write(entry); } + /** + * Write asynchronously the state machine data in the given log entry to this state machine. + * + * @param entryRef Reference to a log entry. + * Implementations of this method may call {@link ReferenceCountedObject#get()} + * to access the log entry before this method returns. + * If the log entry is needed after this method returns, + * e.g. for asynchronous computation or caching, + * the implementation must invoke {@link ReferenceCountedObject#retain()} + * and {@link ReferenceCountedObject#release()}. + * @return a future for the write task + */ + default CompletableFuture write(ReferenceCountedObject entryRef, TransactionContext context) { + final LogEntryProto entry = entryRef.get(); + try { + final LogEntryProto copy = LogEntryProto.parseFrom(entry.toByteString()); + return write(copy, context); + } catch (InvalidProtocolBufferException e) { + return JavaUtils.completeExceptionally(new IllegalStateException( + "Failed to copy log entry " + TermIndex.valueOf(entry), e)); + } + } + /** * Create asynchronously a {@link DataStream} to stream state machine data. * The state machine may use the first message (i.e. request.getMessage()) as the header to create the stream. @@ -214,9 +263,20 @@ default void notifyLogFailed(Throwable cause, LogEntryProto failedEntry) {} default void notifySnapshotInstalled(InstallSnapshotResult result, long snapshotIndex, RaftPeer peer) {} /** - * Notify the {@link StateMachine} that a raft server has step down. + * Notify the {@link StateMachine} that the server for this division has been shut down. + * @Deprecated please use/override {@link #notifyServerShutdown(RoleInfoProto, boolean)} instead + */ + @Deprecated + default void notifyServerShutdown(RoleInfoProto roleInfo) { + notifyServerShutdown(roleInfo, false); + } + + /** + * Notify the {@link StateMachine} that either the server for this division or all the servers have been shut down. + * @param roleInfo roleInfo this server + * @param allServer whether all raft servers will be shutdown at this time */ - default void notifyServerShutdown(RoleInfoProto roleInfo) {} + default void notifyServerShutdown(RoleInfoProto roleInfo, boolean allServer) {} } /** diff --git a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/TransactionContext.java b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/TransactionContext.java index e0190747fc..d6ff9a0833 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/TransactionContext.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/TransactionContext.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.util.Objects; +import java.util.Optional; /** * Context for a transaction. @@ -59,7 +60,10 @@ public interface TransactionContext { /** * Returns the data from the {@link StateMachine} * @return the data from the {@link StateMachine} + * @deprecated access StateMachineLogEntry via {@link TransactionContext#getLogEntryRef()} or + * {@link TransactionContext#getLogEntryUnsafe()} */ + @Deprecated StateMachineLogEntryProto getStateMachineLogEntry(); /** Set exception in case of failure. */ @@ -94,11 +98,40 @@ public interface TransactionContext { LogEntryProto initLogEntry(long term, long index); /** - * Returns the committed log entry - * @return the committed log entry + * @return a copy of the committed log entry if it exists; otherwise, returns null + * + * @deprecated Use {@link #getLogEntryRef()} or {@link #getLogEntryUnsafe()} to avoid copying. */ + @Deprecated LogEntryProto getLogEntry(); + /** + * @return the committed log entry if it exists; otherwise, returns null. + * The returned value is safe to use only before {@link StateMachine#applyTransaction} returns. + * Once {@link StateMachine#applyTransaction} has returned, it is unsafe to use the log entry + * since the underlying buffers can possiby be released. + */ + default LogEntryProto getLogEntryUnsafe() { + return getLogEntryRef().get(); + } + + /** + * Get a {@link ReferenceCountedObject} to the committed log entry. + * + * It is safe to access the log entry by calling {@link ReferenceCountedObject#get()} + * (without {@link ReferenceCountedObject#retain()}) + * inside the scope of {@link StateMachine#applyTransaction}. + * + * If the log entry is needed after {@link StateMachine#applyTransaction} returns, + * e.g. for asynchronous computation or caching, + * the caller must invoke {@link ReferenceCountedObject#retain()} and {@link ReferenceCountedObject#release()}. + * + * @return a reference to the committed log entry if it exists; otherwise, returns null. + */ + default ReferenceCountedObject getLogEntryRef() { + return Optional.ofNullable(getLogEntryUnsafe()).map(this::wrap).orElse(null); + } + /** Wrap the given log entry as a {@link ReferenceCountedObject} for retaining it for later use. */ default ReferenceCountedObject wrap(LogEntryProto entry) { Preconditions.assertSame(getLogEntry().getTerm(), entry.getTerm(), "entry.term"); @@ -202,7 +235,7 @@ public TransactionContext build() { return newTransactionContext(stateMachine, clientRequest, stateMachineLogEntry, logData, stateMachineData, stateMachineContext); } else { - Objects.requireNonNull(logEntry, "logEntry MUST NOT be null since clientRequest == null"); + Objects.requireNonNull(logEntry, "logEntry must not be null since clientRequest == null"); Preconditions.assertTrue(logEntry.hasStateMachineLogEntry(), () -> "Unexpected logEntry: stateMachineLogEntry not found, logEntry=" + logEntry); return newTransactionContext(serverRole, stateMachine, logEntry); diff --git a/ratis-server/dev-support/findbugsExcludeFile.xml b/ratis-server/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..0161c226bb --- /dev/null +++ b/ratis-server/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ratis-server/pom.xml b/ratis-server/pom.xml index 38af72d9db..2c0bc93a25 100644 --- a/ratis-server/pom.xml +++ b/ratis-server/pom.xml @@ -17,7 +17,7 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-server @@ -48,12 +48,6 @@ ratis-client org.apache.ratis - - ratis-client - org.apache.ratis - test - test-jar - ratis-server-api @@ -65,11 +59,6 @@ slf4j-api - - junit - junit - test - org.junit.jupiter junit-jupiter-api @@ -80,11 +69,6 @@ junit-jupiter-engine test - - org.junit.vintage - junit-vintage-engine - test - org.junit.platform junit-platform-launcher @@ -110,4 +94,15 @@ test + + + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + + + diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/ConfigurationManager.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/ConfigurationManager.java index 0e020b7e36..2ba8107837 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/ConfigurationManager.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/ConfigurationManager.java @@ -25,7 +25,10 @@ import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.StringUtils; -import java.util.*; +import java.util.NavigableMap; +import java.util.SortedMap; +import java.util.TreeMap; + /** * Maintain the mappings between log index and corresponding raft configuration. @@ -41,9 +44,9 @@ public class ConfigurationManager { * The current raft configuration. If configurations is not empty, should be * the last entry of the map. Otherwise is initialConf. */ - private volatile RaftConfigurationImpl currentConf; + private RaftConfigurationImpl currentConf; /** Cache the peer corresponding to {@link #id}. */ - private volatile RaftPeer currentPeer; + private RaftPeer currentPeer; ConfigurationManager(RaftPeerId id, RaftConfigurationImpl initialConf) { this.id = id; @@ -76,11 +79,11 @@ private void addRaftConfigurationImpl(long logIndex, RaftConfigurationImpl conf) } } - RaftConfigurationImpl getCurrent() { + synchronized RaftConfigurationImpl getCurrent() { return currentConf; } - RaftPeer getCurrentPeer() { + synchronized RaftPeer getCurrentPeer() { return currentPeer; } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/FollowerState.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/FollowerState.java index 3911e39a5c..b01abcddc0 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/FollowerState.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/FollowerState.java @@ -19,13 +19,14 @@ import org.apache.ratis.server.DivisionInfo; import org.apache.ratis.server.leader.LeaderState; +import org.apache.ratis.server.util.ServerStringUtils; import org.apache.ratis.util.Daemon; -import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.Timestamp; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.ToIntFunction; @@ -59,13 +60,15 @@ int update(AtomicInteger outstanding) { private final RaftServerImpl server; private final Timestamp creationTime = Timestamp.currentTime(); + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile Timestamp lastRpcTime = creationTime; private volatile boolean isRunning = true; + private final CompletableFuture stopped = new CompletableFuture<>(); private final AtomicInteger outstandingOp = new AtomicInteger(); FollowerState(RaftServerImpl server, Object reason) { super(newBuilder() - .setName(server.getMemberId() + "-" + JavaUtils.getClassSimpleName(FollowerState.class)) + .setName(ServerStringUtils.generateUnifiedName(server.getMemberId(), FollowerState.class)) .setThreadGroup(server.getThreadGroup())); this.server = server; this.reason = reason; @@ -92,8 +95,10 @@ boolean isCurrentLeaderValid() { return lastRpcTime.elapsedTime().compareTo(server.properties().minRpcTimeout()) < 0; } - void stopRunning() { + CompletableFuture stopRunning() { this.isRunning = false; + interrupt(); + return stopped; } boolean lostMajorityHeartbeatsRecently() { @@ -121,6 +126,22 @@ private boolean shouldRun() { @Override public void run() { + try { + runImpl(); + } finally { + stopped.complete(null); + } + } + + private boolean roleChangeChecking(TimeDuration electionTimeout) { + return outstandingOp.get() == 0 + && isRunning && server.getInfo().isFollower() + && lastRpcTime.elapsedTime().compareTo(electionTimeout) >= 0 + && !lostMajorityHeartbeatsRecently() + && server.isRunning(); + } + + private void runImpl() { final TimeDuration sleepDeviationThreshold = server.getSleepDeviationThreshold(); while (shouldRun()) { final TimeDuration electionTimeout = server.getRandomElectionTimeout(); @@ -136,10 +157,7 @@ public void run() { break; } synchronized (server) { - if (outstandingOp.get() == 0 - && isRunning && server.getInfo().isFollower() - && lastRpcTime.elapsedTime().compareTo(electionTimeout) >= 0 - && !lostMajorityHeartbeatsRecently()) { + if (roleChangeChecking(electionTimeout)) { LOG.info("{}: change to CANDIDATE, lastRpcElapsedTime:{}, electionTimeout:{}", this, lastRpcTime.elapsedTime(), electionTimeout); server.getLeaderElectionMetrics().onLeaderElectionTimeout(); // Update timeout metric counters. diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderElection.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderElection.java index 53e83d866e..6ee55b59cf 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderElection.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderElection.java @@ -20,16 +20,17 @@ import org.apache.ratis.metrics.Timekeeper; import org.apache.ratis.proto.RaftProtos.RequestVoteReplyProto; import org.apache.ratis.proto.RaftProtos.RequestVoteRequestProto; +import org.apache.ratis.proto.RaftProtos.TermIndexProto; +import org.apache.ratis.protocol.RaftGroupMemberId; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.server.DivisionInfo; import org.apache.ratis.server.RaftConfiguration; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.server.util.ServerStringUtils; import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.ratis.util.Daemon; -import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.LogUtils; import org.apache.ratis.util.Preconditions; @@ -46,6 +47,7 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.Callable; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; @@ -75,9 +77,124 @@ * Ongaro, D. Consensus: Bridging Theory and Practice. PhD thesis, Stanford University, 2014. * Available at https://github.com/ongardie/dissertation */ -class LeaderElection implements Runnable { +final class LeaderElection implements Runnable { public static final Logger LOG = LoggerFactory.getLogger(LeaderElection.class); + interface ServerInterface { + default RaftPeerId getId() { + return getMemberId().getPeerId(); + } + + RaftGroupMemberId getMemberId(); + boolean isAlive(); + boolean isCandidate(); + + long getCurrentTerm(); + long getLastCommittedIndex(); + TermIndex getLastEntry(); + + boolean isPreVoteEnabled(); + ConfAndTerm initElection(Phase phase) throws IOException; + RequestVoteReplyProto requestVote(RequestVoteRequestProto r) throws IOException; + + void changeToLeader(); + void rejected(long term, ResultAndTerm result) throws IOException; + void shutdown(); + + Timekeeper getLeaderElectionTimer(); + void onNewLeaderElectionCompletion(); + + TimeDuration getRandomElectionTimeout(); + ThreadGroup getThreadGroup(); + + static ServerInterface get(RaftServerImpl server) { + final boolean preVote = RaftServerConfigKeys.LeaderElection.preVote(server.getRaftServer().getProperties()); + + return new ServerInterface() { + @Override + public RaftGroupMemberId getMemberId() { + return server.getMemberId(); + } + + @Override + public boolean isAlive() { + return server.getInfo().isAlive(); + } + + @Override + public boolean isCandidate() { + return server.getInfo().isCandidate(); + } + + @Override + public long getCurrentTerm() { + return server.getState().getCurrentTerm(); + } + + @Override + public long getLastCommittedIndex() { + return server.getRaftLog().getLastCommittedIndex(); + } + + @Override + public TermIndex getLastEntry() { + return server.getState().getLastEntry(); + } + + @Override + public boolean isPreVoteEnabled() { + return preVote; + } + + @Override + public ConfAndTerm initElection(Phase phase) throws IOException { + return server.getState().initElection(phase); + } + + @Override + public RequestVoteReplyProto requestVote(RequestVoteRequestProto r) throws IOException { + return server.getServerRpc().requestVote(r); + } + + @Override + public void changeToLeader() { + server.changeToLeader(); + } + + @Override + public void rejected(long term, ResultAndTerm result) throws IOException { + server.changeToFollowerAndPersistMetadata(term, false, result); + } + + @Override + public void shutdown() { + server.close(); + server.getStateMachine().event().notifyServerShutdown(server.getRoleInfoProto(), false); + } + + @Override + public Timekeeper getLeaderElectionTimer() { + return server.getLeaderElectionMetrics().getLeaderElectionTimer(); + } + + @Override + public void onNewLeaderElectionCompletion() { + server.getLeaderElectionMetrics().onNewLeaderElectionCompletion(); + } + + @Override + public TimeDuration getRandomElectionTimeout() { + return server.getRandomElectionTimeout(); + } + + @Override + public ThreadGroup getThreadGroup() { + return server.getThreadGroup(); + } + }; + } + } + private ResultAndTerm logAndReturn(Phase phase, Result result, Map responses, List exceptions) { return logAndReturn(phase, result, responses, exceptions, null); @@ -106,7 +223,7 @@ enum Phase { enum Result {PASSED, SINGLE_MODE_PASSED, REJECTED, TIMEOUT, DISCOVERED_A_NEW_TERM, SHUTDOWN, NOT_IN_CONF} - private static class ResultAndTerm { + static class ResultAndTerm { private final Result result; private final Long term; @@ -143,7 +260,7 @@ static class Executor { } void shutdown() { - executor.shutdown(); + executor.shutdownNow(); } void submit(Callable task) { @@ -183,28 +300,39 @@ public String toString() { private final String name; private final LifeCycle lifeCycle; private final Daemon daemon; + private final CompletableFuture stopped = new CompletableFuture<>(); - private final RaftServerImpl server; + private final ServerInterface server; private final boolean skipPreVote; private final ConfAndTerm round0; - LeaderElection(RaftServerImpl server, boolean force) { - this.name = server.getMemberId() + "-" + JavaUtils.getClassSimpleName(getClass()) + COUNT.incrementAndGet(); - this.lifeCycle = new LifeCycle(this); - this.daemon = Daemon.newBuilder().setName(name).setRunnable(this) - .setThreadGroup(server.getThreadGroup()).build(); - this.server = server; - this.skipPreVote = force || - !RaftServerConfigKeys.LeaderElection.preVote( - server.getRaftServer().getProperties()); + static LeaderElection newInstance(RaftServerImpl server, boolean force) { + return newInstance(ServerInterface.get(server), force); + } + + static LeaderElection newInstance(ServerInterface server, boolean force) { + String name = ServerStringUtils.generateUnifiedName(server.getMemberId(), LeaderElection.class) + + COUNT.incrementAndGet(); try { // increase term of the candidate in advance if it's forced to election - this.round0 = force ? server.getState().initElection(Phase.ELECTION) : null; + final ConfAndTerm round0 = force ? server.initElection(Phase.ELECTION) : null; + return new LeaderElection(name, server, force, round0); } catch (IOException e) { throw new IllegalStateException(name + ": Failed to initialize election", e); } } + + private LeaderElection(String name, ServerInterface server, boolean force, ConfAndTerm round0) { + this.name = name; + this.lifeCycle = new LifeCycle(this); + this.daemon = Daemon.newBuilder().setName(name).setRunnable(this) + .setThreadGroup(server.getThreadGroup()).build(); + this.server = server; + this.skipPreVote = force || !server.isPreVoteEnabled(); + this.round0 = round0; + } + void start() { startIfNew(daemon::start); } @@ -223,8 +351,9 @@ private void startIfNew(Runnable starter) { } } - void shutdown() { + CompletableFuture shutdown() { lifeCycle.checkStateAndClose(); + return stopped; } @VisibleForTesting @@ -234,13 +363,22 @@ LifeCycle.State getCurrentState() { @Override public void run() { + try { + runImpl(); + } finally { + stopped.complete(null); + } + } + + @SuppressWarnings("try") + private void runImpl() { if (!lifeCycle.compareAndTransition(STARTING, RUNNING)) { final LifeCycle.State state = lifeCycle.getCurrentState(); LOG.info("{}: skip running since this is already {}", this, state); return; } - try (AutoCloseable ignored = Timekeeper.start(server.getLeaderElectionMetrics().getLeaderElectionTimer())) { + try (AutoCloseable ignored = Timekeeper.start(server.getLeaderElectionTimer())) { for (int round = 0; shouldRun(); round++) { if (skipPreVote || askForVotes(Phase.PRE_VOTE, round)) { if (askForVotes(Phase.ELECTION, round)) { @@ -254,10 +392,10 @@ public void run() { } final LifeCycle.State state = lifeCycle.getCurrentState(); if (state.isClosingOrClosed()) { - LOG.info(this + ": since this is already " + state + ", safely ignore " + e); + LOG.info("{}: since this is already {}, safely ignore {}", this, state, e.toString()); } else { - if (!server.getInfo().isAlive()) { - LOG.info(this + ": since the server is not alive, safely ignore " + e); + if (!server.isAlive()) { + LOG.info("{}: since the server is not alive, safely ignore {}", this, e.toString()); } else { LOG.error("{}: Failed, state={}", this, state, e); } @@ -265,18 +403,17 @@ public void run() { } } finally { // Update leader election completion metric(s). - server.getLeaderElectionMetrics().onNewLeaderElectionCompletion(); + server.onNewLeaderElectionCompletion(); lifeCycle.checkStateAndClose(() -> {}); } } private boolean shouldRun() { - final DivisionInfo info = server.getInfo(); - return lifeCycle.getCurrentState().isRunning() && info.isCandidate() && info.isAlive(); + return lifeCycle.getCurrentState().isRunning() && server.isCandidate() && server.isAlive(); } private boolean shouldRun(long electionTerm) { - return shouldRun() && server.getState().getCurrentTerm() == electionTerm; + return shouldRun() && server.getCurrentTerm() == electionTerm; } private ResultAndTerm submitRequestAndWaitResult(Phase phase, RaftConfigurationImpl conf, long electionTerm) @@ -289,7 +426,7 @@ private ResultAndTerm submitRequestAndWaitResult(Phase phase, RaftConfigurationI if (others.isEmpty()) { r = new ResultAndTerm(Result.PASSED, electionTerm); } else { - final TermIndex lastEntry = server.getState().getLastEntry(); + final TermIndex lastEntry = server.getLastEntry(); final Executor voteExecutor = new Executor(this, others.size()); try { final int submitted = submitRequests(phase, electionTerm, lastEntry, others, voteExecutor); @@ -312,8 +449,7 @@ private boolean askForVotes(Phase phase, int round) throws InterruptedException, } // If round0 is non-null, we have already called initElection in the constructor, // reuse round0 to avoid initElection again for the first round - final ConfAndTerm confAndTerm = (round == 0 && round0 != null) ? - round0 : server.getState().initElection(phase); + final ConfAndTerm confAndTerm = (round == 0 && round0 != null) ? round0 : server.initElection(phase); electionTerm = confAndTerm.getTerm(); conf = confAndTerm.getConf(); } @@ -333,15 +469,14 @@ private boolean askForVotes(Phase phase, int round) throws InterruptedException, return true; case NOT_IN_CONF: case SHUTDOWN: - server.getRaftServer().close(); - server.getStateMachine().event().notifyServerShutdown(server.getRoleInfoProto()); + server.shutdown(); return false; case TIMEOUT: return false; // should retry case REJECTED: case DISCOVERED_A_NEW_TERM: - final long term = r.maxTerm(server.getState().getCurrentTerm()); - server.changeToFollowerAndPersistMetadata(term, false, r); + final long term = r.maxTerm(server.getCurrentTerm()); + server.rejected(term, r); return false; default: throw new IllegalArgumentException("Unable to process result " + r.result); } @@ -354,7 +489,7 @@ private int submitRequests(Phase phase, long electionTerm, TermIndex lastEntry, for (final RaftPeer peer : others) { final RequestVoteRequestProto r = ServerProtoUtils.toRequestVoteRequestProto( server.getMemberId(), peer.getId(), electionTerm, lastEntry, phase == Phase.PRE_VOTE); - voteExecutor.submit(() -> server.getServerRpc().requestVote(r)); + voteExecutor.submit(() -> server.requestVote(r)); submitted++; } return submitted; @@ -380,6 +515,9 @@ private ResultAndTerm waitForResults(Phase phase, long electionTerm, int submitt Set higherPriorityPeers = getHigherPriorityPeers(conf); final boolean singleMode = conf.isSingleMode(server.getId()); + // true iff this server does not have any commits + final boolean emptyCommit = server.getLastCommittedIndex() < RaftLog.LEAST_VALID_LOG_INDEX; + while (waitForNum > 0 && shouldRun(electionTerm)) { final TimeDuration waitTime = timeout.elapsedTime().apply(n -> -n); if (waitTime.isNonPositive()) { @@ -429,7 +567,10 @@ private ResultAndTerm waitForResults(Phase phase, long electionTerm, int submitt // all higher priority peers have replied higherPriorityPeers.remove(replierId); - if (r.getServerReply().getSuccess()) { + final boolean acceptVote = r.getServerReply().getSuccess() + // When the commits are non-empty, do not accept votes from empty log voters. + && (emptyCommit || nonEmptyLog(r)); + if (acceptVote) { votedPeers.add(replierId); // If majority and all peers with higher priority have voted, candidate pass vote if (higherPriorityPeers.isEmpty() && conf.hasMajority(votedPeers, server.getId())) { @@ -438,6 +579,7 @@ private ResultAndTerm waitForResults(Phase phase, long electionTerm, int submitt } else { rejectedPeers.add(replierId); if (conf.majorityRejectVotes(rejectedPeers)) { + LOG.info("rejectedPeers: {}, emptyCommit? {}", rejectedPeers, emptyCommit); return logAndReturn(phase, Result.REJECTED, responses, exceptions); } } @@ -457,6 +599,26 @@ private ResultAndTerm waitForResults(Phase phase, long electionTerm, int submitt } } + /** + * @return true if the given reply indicates that the voter has a non-empty raft log. + * Note that a voter running with an old version may not include the lastEntry in the reply. + * For compatibility, this method returns true for such case. + */ + static boolean nonEmptyLog(RequestVoteReplyProto reply) { + final TermIndexProto lastEntry = reply.getLastEntry(); + // valid term >= 1 and valid index >= 0; therefore, (0, 0) can only be the proto default + if (lastEntry.equals(TermIndexProto.getDefaultInstance())) { // default: (0,0) + LOG.info("Reply missing lastEntry: {} ", ServerStringUtils.toRequestVoteReplyString(reply)); + return true; // accept voters with an older version + } + if (lastEntry.getTerm() > 0) { // when log is empty, lastEntry is (0,-1). + return true; // accept voters with a non-empty log + } + + LOG.info("Replier log is empty: {} ", ServerStringUtils.toRequestVoteReplyString(reply)); + return false; // reject voters with an empty log + } + @Override public String toString() { return name; diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderLease.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderLease.java index 315cc9f143..3b8f53da14 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderLease.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderLease.java @@ -92,7 +92,7 @@ private Timestamp getMaxTimestampWithMajorityAck(List followers) { return Timestamp.currentTime(); } - final int mid = followers.size() / 2; + final long mid = followers.size() / 2; return followers.stream() .map(FollowerInfo::getLastRespondedAppendEntriesSendTime) .sorted() diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java index ea25c8a315..dab660fc05 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/LeaderStateImpl.java @@ -49,6 +49,7 @@ import org.apache.ratis.server.raftlog.LogEntryHeader; import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.RaftLog; +import org.apache.ratis.server.util.ServerStringUtils; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.util.CodeInjectionForTesting; import org.apache.ratis.util.CollectionUtils; @@ -56,6 +57,7 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.Timestamp; @@ -81,6 +83,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.LongSupplier; import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.function.ToLongFunction; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -110,12 +113,10 @@ private enum Type { } private final Type type; - private final long newTerm; private final Runnable handler; - StateUpdateEvent(Type type, long newTerm, Runnable handler) { + StateUpdateEvent(Type type, Runnable handler) { this.type = type; - this.newTerm = newTerm; this.handler = handler; } @@ -131,25 +132,30 @@ public boolean equals(Object obj) { return false; } final StateUpdateEvent that = (StateUpdateEvent)obj; - return this.type == that.type && this.newTerm == that.newTerm; + return this.type == that.type; } @Override public int hashCode() { - return Objects.hash(type, newTerm); + return type.hashCode(); } @Override public String toString() { - return type + (newTerm >= 0? ":" + newTerm: ""); + return type.name(); } } private class EventQueue { - private final String name = server.getMemberId() + "-" + JavaUtils.getClassSimpleName(getClass()); - private final BlockingQueue queue = new ArrayBlockingQueue<>(4096); + private final String name = ServerStringUtils.generateUnifiedName(server.getMemberId(), getClass()); + private final BlockingQueue queue = new ArrayBlockingQueue<>( + StateUpdateEvent.Type.values().length);; - void submit(StateUpdateEvent event) { + // submit can be invoked by different threads -- need to be synchronized + synchronized void submit(StateUpdateEvent event) { + if (queue.contains(event)) { // avoid duplicated events + return; + } try { queue.put(event); } catch (InterruptedException e) { @@ -158,10 +164,10 @@ void submit(StateUpdateEvent event) { } } + // poll is invoked only by the EventProcessor thread -- synchronized is not needed StateUpdateEvent poll() { - final StateUpdateEvent e; try { - e = queue.poll(server.getMaxTimeoutMs(), TimeUnit.MILLISECONDS); + return queue.poll(server.getMaxTimeoutMs(), TimeUnit.MILLISECONDS); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); String s = this + ": poll() is interrupted"; @@ -172,14 +178,6 @@ StateUpdateEvent poll() { throw new IllegalStateException(s + " UNEXPECTEDLY", ie); } } - - if (e != null) { - // remove duplicated events from the head. - while(e.equals(queue.peek())) { - queue.poll(); - } - } - return e; } @Override @@ -223,11 +221,8 @@ boolean removeAll(Collection c) { } CompletableFuture stopAll() { - final CompletableFuture[] futures = new CompletableFuture[senders.size()]; - for(int i = 0; i < futures.length; i++) { - futures[i] = senders.get(i).stopAsync(); - } - return CompletableFuture.allOf(futures); + return CompletableFuture.allOf(senders.stream(). + map(LogAppender::stopAsync).toArray(CompletableFuture[]::new)); } } @@ -268,7 +263,7 @@ static boolean isSameConf(CurrentOldFollowerInfos cached, RaftConfigurationImpl static class FollowerInfoMap { private final Map map = new ConcurrentHashMap<>(); - + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile CurrentOldFollowerInfos followerInfos; void put(RaftPeerId id, FollowerInfo info) { @@ -324,14 +319,15 @@ boolean isApplied() { } private final StateUpdateEvent updateCommitEvent = - new StateUpdateEvent(StateUpdateEvent.Type.UPDATE_COMMIT, -1, this::updateCommit); + new StateUpdateEvent(StateUpdateEvent.Type.UPDATE_COMMIT, this::updateCommit); private final StateUpdateEvent checkStagingEvent = - new StateUpdateEvent(StateUpdateEvent.Type.CHECK_STAGING, -1, this::checkStaging); + new StateUpdateEvent(StateUpdateEvent.Type.CHECK_STAGING, this::checkStaging); private final String name; private final RaftServerImpl server; private final RaftLog raftLog; private final long currentTerm; + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile ConfigurationStagingState stagingState; private final FollowerInfoMap followerInfoMap = new FollowerInfoMap(); @@ -350,21 +346,29 @@ boolean isApplied() { private final MemoizedSupplier startupLogEntry = MemoizedSupplier.valueOf(StartupLogEntry::new); private final AtomicBoolean isStopped = new AtomicBoolean(); + private final boolean logMetadataEnabled; private final int stagingCatchupGap; + private final TimeDuration stagingTimeout; private final RaftServerMetricsImpl raftServerMetrics; private final LogAppenderMetrics logAppenderMetrics; private final long followerMaxGapThreshold; private final PendingStepDown pendingStepDown; private final ReadIndexHeartbeats readIndexHeartbeats; + private final RaftServerConfigKeys.Read.ReadIndex.Type readIndexType; + private final Supplier readIndexSupplier; + private final boolean leaderHeartbeatCheckEnabled; private final LeaderLease lease; + private ReplyFlusher replyFlusher; + LeaderStateImpl(RaftServerImpl server) { - this.name = server.getMemberId() + "-" + JavaUtils.getClassSimpleName(getClass()); + this.name = ServerStringUtils.generateUnifiedName(server.getMemberId(), getClass()); this.server = server; final RaftProperties properties = server.getRaftServer().getProperties(); stagingCatchupGap = RaftServerConfigKeys.stagingCatchupGap(properties); + stagingTimeout = RaftServerConfigKeys.stagingTimeout(properties); final ServerState state = server.getState(); this.raftLog = state.getLog(); @@ -380,6 +384,7 @@ boolean isApplied() { this.pendingStepDown = new PendingStepDown(this); this.readIndexHeartbeats = new ReadIndexHeartbeats(); this.lease = new LeaderLease(properties); + this.logMetadataEnabled = RaftServerConfigKeys.Log.logMetadataEnabled(properties); long maxPendingRequests = RaftServerConfigKeys.Write.elementLimit(properties); double followerGapRatioMax = RaftServerConfigKeys.Write.followerGapRatioMax(properties); @@ -392,6 +397,23 @@ boolean isApplied() { this.followerMaxGapThreshold = (long) (followerGapRatioMax * maxPendingRequests); } + this.readIndexType = RaftServerConfigKeys.Read.ReadIndex.type(properties); + switch (readIndexType) { + case REPLIED_INDEX: + this.replyFlusher = new ReplyFlusher(server.getId(), state.getLastAppliedIndex(), + RaftServerConfigKeys.Read.ReadIndex.repliedIndexBatchInterval(properties)); + readIndexSupplier = replyFlusher::getRepliedIndex; + break; + case APPLIED_INDEX: + readIndexSupplier = () -> server.getState().getLastAppliedIndex(); + break; + case COMMIT_INDEX: + default: + readIndexSupplier = () -> server.getRaftLog().getLastCommittedIndex(); + } + this.leaderHeartbeatCheckEnabled = RaftServerConfigKeys.Read + .leaderHeartbeatCheckEnabled(properties); + final RaftConfigurationImpl conf = state.getRaftConf(); Collection others = conf.getOtherPeers(server.getId()); @@ -414,6 +436,11 @@ void start() { // Initialize startup log entry and append it to the RaftLog startupLogEntry.get(); processor.start(); + + if (replyFlusher != null) { + replyFlusher.start(startupLogEntry.get().startIndex); + } + senders.forEach(LogAppender::start); } @@ -449,6 +476,9 @@ CompletableFuture stop() { startupLogEntry.get().getAppliedIndexFuture().completeExceptionally( new ReadIndexException("failed to obtain read index since: ", nle)); server.getServerRpc().notifyNotLeader(server.getMemberId().getGroupId()); + if (replyFlusher != null) { + replyFlusher.stop(); + } logAppenderMetrics.unregister(); raftServerMetrics.unregister(); pendingRequests.close(); @@ -526,20 +556,21 @@ PendingRequests.Permit tryAcquirePendingRequest(Message message) { PendingRequest addPendingRequest(PendingRequests.Permit permit, RaftClientRequest request, TransactionContext entry) { if (LOG.isDebugEnabled()) { LOG.debug("{}: addPendingRequest at {}, entry={}", this, request, - LogProtoUtils.toLogEntryString(entry.getLogEntry())); + LogProtoUtils.toLogEntryString(entry.getLogEntryUnsafe())); } return pendingRequests.add(permit, request, entry); } - CompletableFuture streamAsync(RaftClientRequest request) { - return messageStreamRequests.streamAsync(request) + CompletableFuture streamAsync(ReferenceCountedObject requestRef) { + RaftClientRequest request = requestRef.get(); + return messageStreamRequests.streamAsync(requestRef) .thenApply(dummy -> server.newSuccessReply(request)) .exceptionally(e -> exception2RaftClientReply(request, e)); } - CompletableFuture streamEndOfRequestAsync(RaftClientRequest request) { - return messageStreamRequests.streamEndOfRequestAsync(request) - .thenApply(bytes -> RaftClientRequest.toWriteRequest(request, Message.valueOf(bytes))); + CompletableFuture> streamEndOfRequestAsync( + ReferenceCountedObject requestRef) { + return messageStreamRequests.streamEndOfRequestAsync(requestRef); } CompletableFuture addWatchRequest(RaftClientRequest request) { @@ -615,8 +646,8 @@ public AppendEntriesRequestProto newAppendEntriesRequestProto(FollowerInfo follo List entries, TermIndex previous, long callId) { final boolean initializing = !isCaughtUp(follower); final RaftPeerId targetId = follower.getId(); - return ServerProtoUtils.toAppendEntriesRequestProto(server.getMemberId(), targetId, currentTerm, entries, - ServerImplUtils.effectiveCommitIndex(raftLog.getLastCommittedIndex(), previous, entries.size()), + return ServerProtoUtils.toAppendEntriesRequestProto(server.getMemberId(), targetId, getCurrentTerm(), entries, + ServerImplUtils.effectiveCommitIndex(readIndexSupplier.get(), previous, entries.size()), initializing, previous, server.getCommitInfos(), callId); } @@ -695,20 +726,22 @@ private void updateSenders(RaftConfigurationImpl conf) { } void submitStepDownEvent(StepDownReason reason) { - submitStepDownEvent(getCurrentTerm(), reason); + submitStepDownEvent(currentTerm, reason); } void submitStepDownEvent(long term, StepDownReason reason) { - eventQueue.submit(new StateUpdateEvent(StateUpdateEvent.Type.STEP_DOWN, term, () -> stepDown(term, reason))); + eventQueue.submit(new StateUpdateEvent(StateUpdateEvent.Type.STEP_DOWN, () -> stepDown(term, reason))); } private void stepDown(long term, StepDownReason reason) { try { lease.getAndSetEnabled(false); - server.changeToFollowerAndPersistMetadata(term, false, reason); + server.changeToFollowerAndPersistMetadata(term, false, reason) + .get(5, TimeUnit.SECONDS); pendingStepDown.complete(server::newSuccessReply); - } catch(IOException e) { - final String s = this + ": Failed to persist metadata for term " + term; + } catch(Exception e) { + pendingStepDown.completeExceptionally(e); + final String s = this + ": Failed to step down for term " + term; LOG.warn(s, e); // the failure should happen while changing the state to follower // thus the in-memory state should have been updated @@ -789,12 +822,13 @@ public void run() { private BootStrapProgress checkProgress(FollowerInfo follower, long committed) { Preconditions.assertTrue(!isCaughtUp(follower)); final Timestamp progressTime = Timestamp.currentTime().addTimeMs(-server.getMaxTimeoutMs()); - final Timestamp timeoutTime = Timestamp.currentTime().addTimeMs(-3L * server.getMaxTimeoutMs()); + final Timestamp timeoutTime = Timestamp.currentTime().addTimeMs(-stagingTimeout.toLong(TimeUnit.MILLISECONDS)); if (follower.getLastRpcResponseTime().compareTo(timeoutTime) < 0) { LOG.debug("{} detects a follower {} timeout ({}ms) for bootstrapping", this, follower, follower.getLastRpcResponseTime().elapsedTimeMs()); return BootStrapProgress.NOPROGRESS; } else if (follower.getMatchIndex() + stagingCatchupGap > committed + && follower.getMatchIndex() >= server.getRaftConf().getLogEntryIndex() && follower.getLastRpcResponseTime().compareTo(progressTime) > 0 && follower.hasAttemptedToInstallSnapshot()) { return BootStrapProgress.CAUGHTUP; @@ -815,7 +849,9 @@ public void onFollowerSuccessAppendEntries(FollowerInfo follower) { @Override public boolean isFollowerBootstrapping(FollowerInfo follower) { - return isBootStrappingPeer(follower.getId()); + // It is better to check caught up than staging state + // since a follower may have already caught up but still in the staging state. + return !isCaughtUp(follower); } private void checkStaging() { @@ -847,7 +883,12 @@ private void checkStaging() { } boolean isBootStrappingPeer(RaftPeerId peerId) { - return Optional.ofNullable(stagingState).map(s -> s.contains(peerId)).orElse(false); + final Optional info = getLogAppender(peerId); + if (info.isPresent()) { + return !isCaughtUp(info.get().getFollower()); + } + final ConfigurationStagingState staging = stagingState; + return staging != null && staging.contains(peerId); } void submitUpdateCommitEvent() { @@ -943,8 +984,7 @@ private boolean hasMajority(Predicate isAcked) { private void updateCommit(LogEntryHeader[] entriesToCommit) { final long newCommitIndex = raftLog.getLastCommittedIndex(); - logMetadata(newCommitIndex); - commitIndexChanged(); + long lastCommitIndex = RaftLog.INVALID_LOG_INDEX; boolean hasConfiguration = false; for (LogEntryHeader entry : entriesToCommit) { @@ -953,7 +993,14 @@ private void updateCommit(LogEntryHeader[] entriesToCommit) { } hasConfiguration |= entry.getLogEntryBodyCase() == LogEntryBodyCase.CONFIGURATIONENTRY; raftLog.getRaftLogMetrics().onLogEntryCommitted(entry); + if (entry.getLogEntryBodyCase() != LogEntryBodyCase.METADATAENTRY) { + lastCommitIndex = entry.getIndex(); + } + } + if (logMetadataEnabled && lastCommitIndex != RaftLog.INVALID_LOG_INDEX) { + logMetadata(lastCommitIndex); } + commitIndexChanged(); if (hasConfiguration) { checkAndUpdateConfiguration(); } @@ -973,8 +1020,9 @@ private void updateCommit(long majority, long min) { } private void logMetadata(long commitIndex) { - raftLog.appendMetadata(currentTerm, commitIndex); - notifySenders(); + if (raftLog.appendMetadata(currentTerm, commitIndex) != RaftLog.INVALID_LOG_INDEX) { + notifySenders(); + } } private void checkAndUpdateConfiguration() { @@ -1119,19 +1167,21 @@ public boolean checkLeadership() { /** * Obtain the current readIndex for read only requests. See Raft paper section 6.4. * 1. Leader makes sure at least one log from current term is committed. - * 2. Leader record last committed index as readIndex. + * 2. Leader record last committed index or applied index or replied index (depending on configuration) as readIndex. * 3. Leader broadcast heartbeats to followers and waits for acknowledgements. * 4. If majority respond success, returns readIndex. * @return current readIndex. */ CompletableFuture getReadIndex(Long readAfterWriteConsistentIndex) { + final long index = readIndexSupplier.get(); final long readIndex; - if (readAfterWriteConsistentIndex != null) { + if (readAfterWriteConsistentIndex != null && readAfterWriteConsistentIndex > index) { readIndex = readAfterWriteConsistentIndex; } else { - readIndex = server.getRaftLog().getLastCommittedIndex(); + readIndex = index; } - LOG.debug("readIndex={}, readAfterWriteConsistentIndex={}", readIndex, readAfterWriteConsistentIndex); + LOG.debug("readIndex={} ({}={}, readAfterWriteConsistentIndex={})", + readIndex, readIndexType, index, readAfterWriteConsistentIndex); // if group contains only one member, fast path if (server.getRaftConf().isSingleton()) { @@ -1144,7 +1194,8 @@ CompletableFuture getReadIndex(Long readAfterWriteConsistentIndex) { } // if lease is enabled, check lease first - if (hasLease()) { + // if we allow leader to skip the leadership check heartbeat, we can return immediately + if (!leaderHeartbeatCheckEnabled || hasLease()) { return CompletableFuture.completedFuture(readIndex); } @@ -1203,6 +1254,10 @@ long[] getFollowerNextIndices() { return getLogAppenders().mapToLong(s -> s.getFollower().getNextIndex()).toArray(); } + long[] getFollowerMatchIndices() { + return getLogAppenders().mapToLong(s -> s.getFollower().getMatchIndex()).toArray(); + } + static Map newMap(Collection peers, String str) { Objects.requireNonNull(peers, () -> str + " == null"); final Map map = new HashMap<>(); @@ -1213,7 +1268,7 @@ static Map newMap(Collection peers, String str) } private class ConfigurationStagingState { - private final String name = server.getMemberId() + "-" + JavaUtils.getClassSimpleName(getClass()); + private final String name = ServerStringUtils.generateUnifiedName(server.getMemberId(), getClass()); private final Map newPeers; private final Map newListeners; private final PeerConfiguration newConf; @@ -1283,6 +1338,7 @@ private static boolean isCaughtUp(FollowerInfo follower) { } @Override + @SuppressWarnings("deprecation") public void checkHealth(FollowerInfo follower) { final TimeDuration elapsedTime = follower.getLastRpcResponseTime().elapsedTime(); if (elapsedTime.compareTo(server.properties().rpcSlownessTimeout()) > 0) { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/MessageStreamRequests.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/MessageStreamRequests.java index ac81b348bb..c00c57b364 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/MessageStreamRequests.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/MessageStreamRequests.java @@ -25,12 +25,15 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.ReferenceCountedObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; class MessageStreamRequests { public static final Logger LOG = LoggerFactory.getLogger(MessageStreamRequests.class); @@ -39,12 +42,14 @@ private static class PendingStream { private final ClientInvocationId key; private long nextId = -1; private ByteString bytes = ByteString.EMPTY; + private final List> pendingRefs = new LinkedList<>(); PendingStream(ClientInvocationId key) { this.key = key; } - synchronized CompletableFuture append(long messageId, Message message) { + synchronized CompletableFuture append(long messageId, + ReferenceCountedObject requestRef) { if (nextId == -1) { nextId = messageId; } else if (messageId != nextId) { @@ -52,27 +57,38 @@ synchronized CompletableFuture append(long messageId, Message messag "Unexpected message id in " + key + ": messageId = " + messageId + " != nextId = " + nextId)); } nextId++; + final Message message = requestRef.retain().getMessage(); + pendingRefs.add(requestRef); bytes = bytes.concat(message.getContent()); return CompletableFuture.completedFuture(bytes); } - synchronized CompletableFuture getBytes(long messageId, Message message) { - return append(messageId, message); + synchronized CompletableFuture> getWriteRequest(long messageId, + ReferenceCountedObject requestRef) { + return append(messageId, requestRef) + .thenApply(appended -> RaftClientRequest.toWriteRequest(requestRef.get(), () -> appended)) + .thenApply(request -> ReferenceCountedObject.delegateFrom(pendingRefs, request)); + } + + synchronized void clear() { + pendingRefs.forEach(ReferenceCountedObject::release); + pendingRefs.clear(); } } static class StreamMap { - private final ConcurrentMap map = new ConcurrentHashMap<>(); + private final Map map = new HashMap<>(); - PendingStream computeIfAbsent(ClientInvocationId key) { + synchronized PendingStream computeIfAbsent(ClientInvocationId key) { return map.computeIfAbsent(key, PendingStream::new); } - PendingStream remove(ClientInvocationId key) { + synchronized PendingStream remove(ClientInvocationId key) { return map.remove(key); } - void clear() { + synchronized void clear() { + map.values().forEach(PendingStream::clear); map.clear(); } } @@ -84,15 +100,18 @@ void clear() { this.name = name + "-" + JavaUtils.getClassSimpleName(getClass()); } - CompletableFuture streamAsync(RaftClientRequest request) { + CompletableFuture streamAsync(ReferenceCountedObject requestRef) { + final RaftClientRequest request = requestRef.get(); final MessageStreamRequestTypeProto stream = request.getType().getMessageStream(); Preconditions.assertTrue(!stream.getEndOfRequest()); final ClientInvocationId key = ClientInvocationId.valueOf(request.getClientId(), stream.getStreamId()); final PendingStream pending = streams.computeIfAbsent(key); - return pending.append(stream.getMessageId(), request.getMessage()); + return pending.append(stream.getMessageId(), requestRef); } - CompletableFuture streamEndOfRequestAsync(RaftClientRequest request) { + CompletableFuture> streamEndOfRequestAsync( + ReferenceCountedObject requestRef) { + final RaftClientRequest request = requestRef.get(); final MessageStreamRequestTypeProto stream = request.getType().getMessageStream(); Preconditions.assertTrue(stream.getEndOfRequest()); final ClientInvocationId key = ClientInvocationId.valueOf(request.getClientId(), stream.getStreamId()); @@ -101,7 +120,7 @@ CompletableFuture streamEndOfRequestAsync(RaftClientRequest request) if (pending == null) { return JavaUtils.completeExceptionally(new StreamException(name + ": " + key + " not found")); } - return pending.getBytes(stream.getMessageId(), request.getMessage()); + return pending.getWriteRequest(stream.getMessageId(), requestRef); } void clear() { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingRequest.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingRequest.java index 06a3a7b3ce..d72fcde90b 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingRequest.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingRequest.java @@ -38,7 +38,7 @@ class PendingRequest { private final CompletableFuture futureToReturn; PendingRequest(RaftClientRequest request, TransactionContext entry) { - this.termIndex = entry == null? null: TermIndex.valueOf(entry.getLogEntry()); + this.termIndex = entry == null? null: TermIndex.valueOf(entry.getLogEntryUnsafe()); this.request = request; this.entry = entry; if (request.is(TypeCase.FORWARD)) { @@ -64,7 +64,7 @@ RaftClientReply convert(RaftClientRequest q, RaftClientReply p) { } TermIndex getTermIndex() { - return Objects.requireNonNull(termIndex, "termIndex"); + return Objects.requireNonNull(termIndex, "termIndex == null"); } RaftClientRequest getRequest() { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingStepDown.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingStepDown.java index b7bfde3f6e..c1e5cc5f53 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingStepDown.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/PendingStepDown.java @@ -56,8 +56,12 @@ void complete(Function newSuccessRep replyFuture.complete(newSuccessReply.apply(request)); } + void completeExceptionally(Exception e) { + replyFuture.completeExceptionally(e); + } + void timeout() { - replyFuture.completeExceptionally(new TimeoutIOException( + completeExceptionally(new TimeoutIOException( ": Failed to step down leader on " + leader + "request " + request.getTimeoutMs() + "ms")); } @@ -105,6 +109,10 @@ void complete(Function newSuccessRep pending.getAndSetNull().ifPresent(p -> p.complete(newSuccessReply)); } + void completeExceptionally(Exception e) { + pending.getAndSetNull().ifPresent(p -> p.completeExceptionally(e)); + } + void timeout() { pending.getAndSetNull().ifPresent(PendingRequest::timeout); } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftConfigurationImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftConfigurationImpl.java index d609264af5..2e5cd58120 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftConfigurationImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftConfigurationImpl.java @@ -58,7 +58,7 @@ static final class Builder { private Builder() {} Builder setConf(PeerConfiguration conf) { - Objects.requireNonNull(conf); + Objects.requireNonNull(conf, "PeerConfiguration == null"); Preconditions.assertTrue(this.conf == null, "conf is already set."); this.conf = conf; return this; @@ -73,7 +73,7 @@ Builder setConf(Iterable peers, Iterable listeners) { } Builder setConf(RaftConfigurationImpl transitionalConf) { - Objects.requireNonNull(transitionalConf); + Objects.requireNonNull(transitionalConf, "transitionalConf == null"); Preconditions.assertTrue(transitionalConf.isTransitional()); Preconditions.assertTrue(!forceTransitional); @@ -83,7 +83,7 @@ Builder setConf(RaftConfigurationImpl transitionalConf) { Builder setOldConf(PeerConfiguration oldConf) { - Objects.requireNonNull(oldConf); + Objects.requireNonNull(oldConf, "oldConf == null"); Preconditions.assertTrue(this.oldConf == null, "oldConf is already set."); this.oldConf = oldConf; return this; @@ -94,7 +94,7 @@ Builder setOldConf(Iterable oldPeers, Iterable oldListeners) } Builder setOldConf(RaftConfigurationImpl stableConf) { - Objects.requireNonNull(stableConf); + Objects.requireNonNull(stableConf, "stableConf == null"); Preconditions.assertTrue(stableConf.isStable()); Preconditions.assertTrue(!forceStable); @@ -133,7 +133,7 @@ RaftConfigurationImpl build() { private RaftConfigurationImpl(PeerConfiguration conf, PeerConfiguration oldConf, long logEntryIndex) { - this.conf = Objects.requireNonNull(conf); + this.conf = Objects.requireNonNull(conf, "PeerConfiguration == null"); this.oldConf = oldConf; this.logEntryIndex = logEntryIndex; } @@ -148,6 +148,7 @@ boolean isStable() { return oldConf == null; } + @SuppressWarnings({"squid:S6466"}) // Suppress ArrayIndexOutOfBoundsException warning boolean containsInConf(RaftPeerId peerId, RaftPeerRole... roles) { if (roles == null || roles.length == 0) { return conf.contains(peerId); @@ -298,7 +299,7 @@ boolean isSingleton() { @Override public String toString() { - return logEntryIndex + ": " + conf + ", old=" + oldConf; + return "conf: {index: " + logEntryIndex + ", cur=" + conf + ", old=" + oldConf + "}"; } boolean hasNoChange(Collection newMembers, Collection newListeners) { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index 88f4f76406..043ba1ee71 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -17,36 +17,31 @@ */ package org.apache.ratis.server.impl; -import java.io.File; -import java.io.IOException; -import java.nio.file.NoSuchFileException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Function; -import java.util.function.Supplier; -import java.util.stream.Collectors; -import java.util.stream.Stream; - import org.apache.ratis.client.impl.ClientProtoUtils; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.metrics.Timekeeper; -import org.apache.ratis.proto.RaftProtos.*; +import org.apache.ratis.proto.RaftProtos.AppendEntriesReplyProto; import org.apache.ratis.proto.RaftProtos.AppendEntriesReplyProto.AppendResult; +import org.apache.ratis.proto.RaftProtos.AppendEntriesRequestProto; +import org.apache.ratis.proto.RaftProtos.CommitInfoProto; +import org.apache.ratis.proto.RaftProtos.InstallSnapshotReplyProto; +import org.apache.ratis.proto.RaftProtos.InstallSnapshotRequestProto; +import org.apache.ratis.proto.RaftProtos.InstallSnapshotResult; +import org.apache.ratis.proto.RaftProtos.LogEntryProto; +import org.apache.ratis.proto.RaftProtos.LogInfoProto; +import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto; import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto.TypeCase; +import org.apache.ratis.proto.RaftProtos.RaftConfigurationProto; +import org.apache.ratis.proto.RaftProtos.RaftPeerRole; +import org.apache.ratis.proto.RaftProtos.RaftRpcRequestProto; +import org.apache.ratis.proto.RaftProtos.ReadIndexReplyProto; +import org.apache.ratis.proto.RaftProtos.ReadIndexRequestProto; +import org.apache.ratis.proto.RaftProtos.ReplicationLevel; +import org.apache.ratis.proto.RaftProtos.RequestVoteReplyProto; +import org.apache.ratis.proto.RaftProtos.RequestVoteRequestProto; +import org.apache.ratis.proto.RaftProtos.RoleInfoProto; +import org.apache.ratis.proto.RaftProtos.StartLeaderElectionReplyProto; +import org.apache.ratis.proto.RaftProtos.StartLeaderElectionRequestProto; import org.apache.ratis.protocol.ClientInvocationId; import org.apache.ratis.protocol.GroupInfoReply; import org.apache.ratis.protocol.GroupInfoRequest; @@ -64,7 +59,20 @@ import org.apache.ratis.protocol.SetConfigurationRequest; import org.apache.ratis.protocol.SnapshotManagementRequest; import org.apache.ratis.protocol.TransferLeadershipRequest; -import org.apache.ratis.protocol.exceptions.*; +import org.apache.ratis.protocol.exceptions.GroupMismatchException; +import org.apache.ratis.protocol.exceptions.LeaderNotReadyException; +import org.apache.ratis.protocol.exceptions.LeaderSteppingDownException; +import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.apache.ratis.protocol.exceptions.RaftException; +import org.apache.ratis.protocol.exceptions.ReadException; +import org.apache.ratis.protocol.exceptions.ReadIndexException; +import org.apache.ratis.protocol.exceptions.ReconfigurationInProgressException; +import org.apache.ratis.protocol.exceptions.ResourceUnavailableException; +import org.apache.ratis.protocol.exceptions.ServerNotReadyException; +import org.apache.ratis.protocol.exceptions.SetConfigurationException; +import org.apache.ratis.protocol.exceptions.StaleReadException; +import org.apache.ratis.protocol.exceptions.StateMachineException; +import org.apache.ratis.protocol.exceptions.TransferLeadershipException; import org.apache.ratis.server.DataStreamMap; import org.apache.ratis.server.DivisionInfo; import org.apache.ratis.server.DivisionProperties; @@ -73,8 +81,9 @@ import org.apache.ratis.server.RaftServerRpc; import org.apache.ratis.server.impl.LeaderElection.Phase; import org.apache.ratis.server.impl.RetryCacheImpl.CacheEntry; +import org.apache.ratis.server.impl.ServerImplUtils.ConsecutiveIndices; +import org.apache.ratis.server.impl.ServerImplUtils.NavigableIndices; import org.apache.ratis.server.leader.LeaderState; -import org.apache.ratis.server.leader.LogAppender; import org.apache.ratis.server.metrics.LeaderElectionMetrics; import org.apache.ratis.server.metrics.RaftServerMetricsImpl; import org.apache.ratis.server.protocol.RaftServerAsynchronousProtocol; @@ -85,7 +94,6 @@ import org.apache.ratis.server.raftlog.RaftLogIOException; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.server.storage.RaftStorageDirectory; -import org.apache.ratis.server.util.ServerStringUtils; import org.apache.ratis.statemachine.SnapshotInfo; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.TransactionContext; @@ -105,8 +113,46 @@ import org.apache.ratis.util.ProtoUtils; import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.TimeDuration; -import org.apache.ratis.util.Timestamp; import org.apache.ratis.util.function.CheckedSupplier; +import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; + +import java.io.File; +import java.io.IOException; +import java.nio.file.NoSuchFileException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.apache.ratis.server.impl.ServerImplUtils.assertEntries; +import static org.apache.ratis.server.impl.ServerImplUtils.assertGroup; +import static org.apache.ratis.server.impl.ServerImplUtils.effectiveCommitIndex; +import static org.apache.ratis.server.impl.ServerProtoUtils.toAppendEntriesReplyProto; +import static org.apache.ratis.server.impl.ServerProtoUtils.toReadIndexReplyProto; +import static org.apache.ratis.server.impl.ServerProtoUtils.toReadIndexRequestProto; +import static org.apache.ratis.server.impl.ServerProtoUtils.toRequestVoteReplyProto; +import static org.apache.ratis.server.impl.ServerProtoUtils.toStartLeaderElectionReplyProto; +import static org.apache.ratis.server.util.ServerStringUtils.toAppendEntriesReplyString; +import static org.apache.ratis.server.util.ServerStringUtils.toAppendEntriesRequestString; +import static org.apache.ratis.server.util.ServerStringUtils.toRequestVoteReplyString; class RaftServerImpl implements RaftServer.Division, RaftServerProtocol, RaftServerAsynchronousProtocol, @@ -118,6 +164,7 @@ class RaftServerImpl implements RaftServer.Division, static final String APPEND_TRANSACTION = CLASS_NAME + ".appendTransaction"; static final String LOG_SYNC = APPEND_ENTRIES + ".logComplete"; static final String START_LEADER_ELECTION = CLASS_NAME + ".startLeaderElection"; + static final String START_COMPLETE = CLASS_NAME + ".startComplete"; class Info implements DivisionInfo { @Override @@ -162,6 +209,14 @@ public long[] getFollowerNextIndices() { .map(LeaderStateImpl::getFollowerNextIndices) .orElse(null); } + + @Override + public long[] getFollowerMatchIndices() { + return role.getLeaderState() + .filter(leader -> isLeader()) + .map(LeaderStateImpl::getFollowerMatchIndices) + .orElse(null); + } } private final RaftServerProxy proxy; @@ -188,6 +243,7 @@ public long[] getFollowerNextIndices() { private final RaftServerJmxAdapter jmxAdapter = new RaftServerJmxAdapter(this); private final LeaderElectionMetrics leaderElectionMetrics; private final RaftServerMetricsImpl raftServerMetrics; + private final CountDownLatch closeFinishedLatch = new CountDownLatch(1); // To avoid append entry before complete start() method // For example, if thread1 start(), but before thread1 startAsFollower(), thread2 receive append entry @@ -206,6 +262,9 @@ public long[] getFollowerNextIndices() { private final AtomicBoolean firstElectionSinceStartup = new AtomicBoolean(true); private final ThreadGroup threadGroup; + private final AtomicReference> appendLogFuture; + private final NavigableIndices appendLogTermIndices = new NavigableIndices(); + RaftServerImpl(RaftGroup group, StateMachine stateMachine, RaftServerProxy proxy, RaftStorage.StartupOption option) throws IOException { final RaftPeerId id = proxy.getId(); @@ -239,6 +298,7 @@ public long[] getFollowerNextIndices() { this.transferLeadership = new TransferLeadership(this, properties); this.snapshotRequestHandler = new SnapshotManagementRequestHandler(this); this.snapshotInstallationHandler = new SnapshotInstallationHandler(this, properties); + this.appendLogFuture = new AtomicReference<>(CompletableFuture.completedFuture(null)); this.serverExecutor = ConcurrentUtils.newThreadPoolWithMax( RaftServerConfigKeys.ThreadPool.serverCached(properties), @@ -357,7 +417,10 @@ boolean start() throws IOException { jmxAdapter.registerMBean(); state.start(); - startComplete.compareAndSet(false, true); + CodeInjectionForTesting.execute(START_COMPLETE, getId(), null, role); + if (startComplete.compareAndSet(false, true)) { + LOG.info("{}: Successfully started.", getMemberId()); + } return true; } @@ -420,7 +483,15 @@ void groupRemove(boolean deleteDirectory, boolean renameDirectory) { final RaftStorageDirectory dir = state.getStorage().getStorageDir(); /* Shutdown is triggered here inorder to avoid any locked files. */ + state.getStateMachineUpdater().setRemoving(); close(); + try { + closeFinishedLatch.await(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("{}: Waiting closing interrupted, will not continue to remove group locally", getMemberId()); + return; + } getStateMachine().event().notifyGroupRemove(); if (deleteDirectory) { for (int i = 0; i < FileUtils.NUM_ATTEMPTS; i ++) { @@ -499,6 +570,7 @@ public void close() { } catch (Exception e) { LOG.warn(getMemberId() + ": Failed to shutdown serverExecutor", e); } + closeFinishedLatch.countDown(); }); } @@ -514,21 +586,17 @@ void setFirstElection(Object reason) { * @param force Force to start a new {@link FollowerState} even if this server is already a follower. * @return if the term/votedFor should be updated to the new term */ - private synchronized boolean changeToFollower( - long newTerm, - boolean force, - boolean allowListener, - Object reason) { + private synchronized CompletableFuture changeToFollower( + long newTerm, boolean force, boolean allowListener, Object reason, AtomicBoolean metadataUpdated) { final RaftPeerRole old = role.getCurrentRole(); - final boolean metadataUpdated = state.updateCurrentTerm(newTerm); if (old == RaftPeerRole.LISTENER && !allowListener) { throw new IllegalStateException("Unexpected role " + old); } - + CompletableFuture future = CompletableFuture.completedFuture(null); if ((old != RaftPeerRole.FOLLOWER || force) && old != RaftPeerRole.LISTENER) { setRole(RaftPeerRole.FOLLOWER, reason); if (old == RaftPeerRole.LEADER) { - role.shutdownLeaderState(false) + future = role.shutdownLeaderState(false) .exceptionally(e -> { if (e != null) { if (!getInfo().isAlive()) { @@ -537,27 +605,33 @@ private synchronized boolean changeToFollower( } } throw new CompletionException("Failed to shutdownLeaderState: " + this, e); - }) - .join(); + }); state.setLeader(null, reason); } else if (old == RaftPeerRole.CANDIDATE) { - role.shutdownLeaderElection(); + future = role.shutdownLeaderElection(); } else if (old == RaftPeerRole.FOLLOWER) { - role.shutdownFollowerState(); + future = role.shutdownFollowerState(); } + + metadataUpdated.set(state.updateCurrentTerm(newTerm)); role.startFollowerState(this, reason); setFirstElection(reason); + } else { + metadataUpdated.set(state.updateCurrentTerm(newTerm)); } - return metadataUpdated; + return future; } - synchronized void changeToFollowerAndPersistMetadata( + synchronized CompletableFuture changeToFollowerAndPersistMetadata( long newTerm, boolean allowListener, Object reason) throws IOException { - if (changeToFollower(newTerm, false, allowListener, reason)) { + final AtomicBoolean metadataUpdated = new AtomicBoolean(); + final CompletableFuture future = changeToFollower(newTerm, false, allowListener, reason, metadataUpdated); + if (metadataUpdated.get()) { state.persistMetadata(); } + return future; } synchronized void changeToLeader() { @@ -573,6 +647,15 @@ synchronized void changeToLeader() { @Override public Collection getCommitInfos() { + try { + return getCommitInfosImpl(); + } catch (Throwable t) { + LOG.warn("{} Failed to getCommitInfos", getMemberId(), t); + return Collections.emptyList(); + } + } + + private Collection getCommitInfosImpl() { final List infos = new ArrayList<>(); // add the commit info of this server final long commitIndex = updateCommitInfoCache(); @@ -602,50 +685,33 @@ GroupInfoReply getGroupInfo(GroupInfoRequest request) { final RaftConfigurationProto conf = LogProtoUtils.toRaftConfigurationProtoBuilder(getRaftConf()).build(); return new GroupInfoReply(request, getCommitInfos(), getGroup(), getRoleInfoProto(), - dir.isHealthy(), conf); + dir.isHealthy(), conf, getLogInfo()); } - RoleInfoProto getRoleInfoProto() { - RaftPeerRole currentRole = role.getCurrentRole(); - RoleInfoProto.Builder roleInfo = RoleInfoProto.newBuilder() - .setSelf(getPeer().getRaftPeerProto()) - .setRole(currentRole) - .setRoleElapsedTimeMs(role.getRoleElapsedTimeMs()); - switch (currentRole) { - case CANDIDATE: - CandidateInfoProto.Builder candidate = CandidateInfoProto.newBuilder() - .setLastLeaderElapsedTimeMs(state.getLastLeaderElapsedTimeMs()); - roleInfo.setCandidateInfo(candidate); - break; - - case LISTENER: - case FOLLOWER: - final Optional fs = role.getFollowerState(); - final ServerRpcProto leaderInfo = ServerProtoUtils.toServerRpcProto( - getRaftConf().getPeer(state.getLeaderId()), - fs.map(FollowerState::getLastRpcTime).map(Timestamp::elapsedTimeMs).orElse(0L)); - // FollowerState can be null while adding a new peer as it is not - // a voting member yet - roleInfo.setFollowerInfo(FollowerInfoProto.newBuilder() - .setLeaderInfo(leaderInfo) - .setOutstandingOp(fs.map(FollowerState::getOutstandingOp).orElse(0))); - break; - - case LEADER: - role.getLeaderState().ifPresent(ls -> { - final LeaderInfoProto.Builder leader = LeaderInfoProto.newBuilder(); - ls.getLogAppenders().map(LogAppender::getFollower).forEach(f -> - leader.addFollowerInfo(ServerProtoUtils.toServerRpcProto( - f.getPeer(), f.getLastRpcResponseTime().elapsedTimeMs()))); - leader.setTerm(ls.getCurrentTerm()); - roleInfo.setLeaderInfo(leader); - }); - break; - - default: - throw new IllegalStateException("incorrect role of server " + currentRole); + LogInfoProto getLogInfo(){ + final RaftLog log = getRaftLog(); + LogInfoProto.Builder logInfoBuilder = LogInfoProto.newBuilder(); + final TermIndex applied = getStateMachine().getLastAppliedTermIndex(); + if (applied != null) { + logInfoBuilder.setApplied(applied.toProto()); } - return roleInfo.build(); + final TermIndex committed = log.getTermIndex(log.getLastCommittedIndex()); + if (committed != null) { + logInfoBuilder.setCommitted(committed.toProto()); + } + final TermIndex entry = log.getLastEntryTermIndex(); + if (entry != null) { + logInfoBuilder.setLastEntry(entry.toProto()); + } + final SnapshotInfo snapshot = getStateMachine().getLatestSnapshot(); + if (snapshot != null) { + logInfoBuilder.setLastSnapshot(snapshot.getTermIndex().toProto()); + } + return logInfoBuilder.build(); + } + + RoleInfoProto getRoleInfoProto() { + return role.buildRoleInfoProto(this); } synchronized void changeToCandidate(boolean forceStartLeaderElection) { @@ -661,7 +727,7 @@ synchronized void changeToCandidate(boolean forceStartLeaderElection) { @Override public String toString() { - return role + " " + state + " " + lifeCycle.getCurrentState(); + return role + " (" + lifeCycle.getCurrentState() + "): " + state; } RaftClientReply.Builder newReplyBuilder(RaftClientRequest request) { @@ -706,7 +772,7 @@ private CompletableFuture checkLeaderState(RaftClientRequest re */ private CompletableFuture checkLeaderState(RaftClientRequest request, CacheEntry entry) { try { - assertGroup(request.getRequestorId(), request.getRaftGroupId()); + assertGroup(getMemberId(), request); } catch (GroupMismatchException e) { return RetryCacheImpl.failWithException(e, entry); } @@ -755,15 +821,6 @@ void assertLifeCycleState(Set expected) throws ServerNotReadyEx getMemberId() + " is not in " + expected + ": current state is " + c), expected); } - void assertGroup(Object requestorId, RaftGroupId requestorGroupId) throws GroupMismatchException { - final RaftGroupId groupId = getMemberId().getGroupId(); - if (!groupId.equals(requestorGroupId)) { - throw new GroupMismatchException(getMemberId() - + ": The group (" + requestorGroupId + ") of " + requestorId - + " does not match the group (" + groupId + ") of the server " + getId()); - } - } - /** * Append a transaction to the log for processing a client request. * Note that the given request could be different from {@link TransactionContext#getClientRequest()} @@ -825,20 +882,14 @@ private CompletableFuture appendTransaction( leaderState.notifySenders(); } - final CompletableFuture future = pending.getFuture(); - if (request.is(TypeCase.WRITE)) { - // check replication - final ReplicationLevel replication = request.getType().getWrite().getReplication(); - if (replication != ReplicationLevel.MAJORITY) { - return future.thenCompose(reply -> waitForReplication(reply, replication)); - } - } - - return future; + return pending.getFuture(); } /** Wait until the given replication requirement is satisfied. */ private CompletableFuture waitForReplication(RaftClientReply reply, ReplicationLevel replication) { + if (!reply.isSuccess()) { + return CompletableFuture.completedFuture(reply); + } final RaftClientRequest.Type type = RaftClientRequest.watchRequestType(reply.getLogIndex(), replication); final RaftClientRequest watch = RaftClientRequest.newBuilder() .setServerId(reply.getServerId()) @@ -847,7 +898,24 @@ private CompletableFuture waitForReplication(RaftClientReply re .setCallId(reply.getCallId()) .setType(type) .build(); - return watchAsync(watch).thenApply(r -> reply); + return watchAsync(watch).thenApply(watchReply -> combineReplies(reply, watchReply)); + } + + private RaftClientReply combineReplies(RaftClientReply reply, RaftClientReply watchReply) { + final RaftClientReply combinedReply = RaftClientReply.newBuilder() + .setServerId(getMemberId()) + // from write reply + .setClientId(reply.getClientId()) + .setCallId(reply.getCallId()) + .setMessage(reply.getMessage()) + .setLogIndex(reply.getLogIndex()) + // from watchReply + .setSuccess(watchReply.isSuccess()) + .setException(watchReply.getException()) + .setCommitInfos(watchReply.getCommitInfos()) + .build(); + LOG.debug("combinedReply={}", combinedReply); + return combinedReply; } void stepDownOnJvmPause() { @@ -879,25 +947,27 @@ CompletableFuture executeSubmitClientRequestAsync( public CompletableFuture submitClientRequestAsync( ReferenceCountedObject requestRef) { final RaftClientRequest request = requestRef.retain(); - LOG.debug("{}: receive client request({})", getMemberId(), request); - try { + LOG.debug("{}: receive client request({})", getMemberId(), request); assertLifeCycleState(LifeCycle.States.RUNNING); - } catch (ServerNotReadyException e) { - final RaftClientReply reply = newExceptionReply(request, e); - requestRef.release(); - return CompletableFuture.completedFuture(reply); - } - final Timekeeper timer = raftServerMetrics.getClientRequestTimer(request.getType()); - final Optional timerContext = Optional.ofNullable(timer).map(Timekeeper::time); - return replyFuture(requestRef).whenComplete((clientReply, exception) -> { + RaftClientRequest.Type type = request.getType(); + final Timekeeper timer = raftServerMetrics.getClientRequestTimer(type); + final Optional timerContext = Optional.ofNullable(timer).map(Timekeeper::time); + return replyFuture(requestRef).whenComplete((clientReply, exception) -> { + timerContext.ifPresent(Timekeeper.Context::stop); + if (exception != null || clientReply.getException() != null) { + raftServerMetrics.incFailedRequestCount(type); + } + }); + } catch (RaftException e) { + return CompletableFuture.completedFuture(newExceptionReply(request, e)); + } catch (Throwable t) { + LOG.error("{} Failed to submitClientRequestAsync for {}", getMemberId(), request, t); + return CompletableFuture.completedFuture(newExceptionReply(request, new RaftException(t))); + } finally { requestRef.release(); - timerContext.ifPresent(Timekeeper.Context::stop); - if (exception != null || clientReply.getException() != null) { - raftServerMetrics.incFailedRequestCount(request.getType()); - } - }); + } } private CompletableFuture replyFuture(ReferenceCountedObject requestRef) { @@ -923,6 +993,19 @@ private CompletableFuture replyFuture(ReferenceCountedObject writeAsync(ReferenceCountedObject requestRef) { + final RaftClientRequest request = requestRef.get(); + final CompletableFuture future = writeAsyncImpl(requestRef); + if (request.is(TypeCase.WRITE)) { + // check replication + final ReplicationLevel replication = request.getType().getWrite().getReplication(); + if (replication != ReplicationLevel.MAJORITY) { + return future.thenCompose(r -> waitForReplication(r, replication)); + } + } + return future; + } + + private CompletableFuture writeAsyncImpl(ReferenceCountedObject requestRef) { final RaftClientRequest request = requestRef.get(); final CompletableFuture reply = checkLeaderState(request); if (reply != null) { @@ -993,8 +1076,7 @@ private CompletableFuture sendReadIndexAsync(RaftClientRequ if (leaderId == null) { return JavaUtils.completeExceptionally(new ReadIndexException(getMemberId() + ": Leader is unknown.")); } - final ReadIndexRequestProto request = - ServerProtoUtils.toReadIndexRequestProto(clientRequest, getMemberId(), leaderId); + final ReadIndexRequestProto request = toReadIndexRequestProto(clientRequest, getMemberId(), leaderId); try { return getServerRpc().async().readIndexAsync(request); } catch (IOException e) { @@ -1067,21 +1149,28 @@ private CompletableFuture messageStreamAsync(ReferenceCountedOb } if (request.getType().getMessageStream().getEndOfRequest()) { - final CompletableFuture f = streamEndOfRequestAsync(request); + final CompletableFuture> f = streamEndOfRequestAsync(requestRef); if (f.isCompletedExceptionally()) { return f.thenApply(r -> null); } // the message stream has ended and the request become a WRITE request - return replyFuture(requestRef.delegate(f.join())); + ReferenceCountedObject joinedRequest = f.join(); + try { + return replyFuture(joinedRequest); + } finally { + // Released pending streaming requests. + joinedRequest.release(); + } } return role.getLeaderState() - .map(ls -> ls.streamAsync(request)) + .map(ls -> ls.streamAsync(requestRef)) .orElseGet(() -> CompletableFuture.completedFuture( newExceptionReply(request, generateNotLeaderException()))); } - private CompletableFuture streamEndOfRequestAsync(RaftClientRequest request) { + private CompletableFuture> streamEndOfRequestAsync( + ReferenceCountedObject request) { return role.getLeaderState() .map(ls -> ls.streamEndOfRequestAsync(request)) .orElse(null); @@ -1164,7 +1253,7 @@ CompletableFuture transferLeadershipAsync(TransferLeadershipReq LOG.info("{}: receive transferLeadership {}", getMemberId(), request); assertLifeCycleState(LifeCycle.States.RUNNING); - assertGroup(request.getRequestorId(), request.getRaftGroupId()); + assertGroup(getMemberId(), request); synchronized (this) { CompletableFuture reply = checkLeaderState(request); @@ -1194,7 +1283,7 @@ CompletableFuture transferLeadershipAsync(TransferLeadershipReq if (!conf.isHighestPriority(request.getNewLeader())) { String msg = getMemberId() + " refused to transfer leadership to peer " + request.getNewLeader() + - " as it does not has highest priority " + conf; + " as it does not has highest priority in " + conf; return logAndReturnTransferLeadershipFail(request, msg); } @@ -1205,10 +1294,11 @@ CompletableFuture transferLeadershipAsync(TransferLeadershipReq CompletableFuture takeSnapshotAsync(SnapshotManagementRequest request) throws IOException { LOG.info("{}: takeSnapshotAsync {}", getMemberId(), request); assertLifeCycleState(LifeCycle.States.RUNNING); - assertGroup(request.getRequestorId(), request.getRaftGroupId()); + assertGroup(getMemberId(), request); + Objects.requireNonNull(request.getCreate(), "create == null"); - //TODO(liuyaolong): get the gap value from shell command - long minGapValue = RaftServerConfigKeys.Snapshot.creationGap(proxy.getProperties()); + final long creationGap = request.getCreate().getCreationGap(); + long minGapValue = creationGap > 0? creationGap : RaftServerConfigKeys.Snapshot.creationGap(proxy.getProperties()); final long lastSnapshotIndex = Optional.ofNullable(stateMachine.getLatestSnapshot()) .map(SnapshotInfo::getIndex) .orElse(0L); @@ -1237,7 +1327,7 @@ CompletableFuture leaderElectionManagementAsync(LeaderElectionM throws IOException { LOG.info("{} receive leaderElectionManagement request {}", getMemberId(), request); assertLifeCycleState(LifeCycle.States.RUNNING); - assertGroup(request.getRequestorId(), request.getRaftGroupId()); + assertGroup(getMemberId(), request); final LeaderElectionManagementRequest.Pause pause = request.getPause(); if (pause != null) { @@ -1256,7 +1346,7 @@ CompletableFuture leaderElectionManagementAsync(LeaderElectionM CompletableFuture stepDownLeaderAsync(TransferLeadershipRequest request) throws IOException { LOG.info("{} receive stepDown leader request {}", getMemberId(), request); assertLifeCycleState(LifeCycle.States.RUNNING); - assertGroup(request.getRequestorId(), request.getRaftGroupId()); + assertGroup(getMemberId(), request); return role.getLeaderState().map(leader -> leader.submitStepDownRequestAsync(request)) .orElseGet(() -> CompletableFuture.completedFuture( @@ -1273,7 +1363,7 @@ public RaftClientReply setConfiguration(SetConfigurationRequest request) throws public CompletableFuture setConfigurationAsync(SetConfigurationRequest request) throws IOException { LOG.info("{}: receive setConfiguration {}", getMemberId(), request); assertLifeCycleState(LifeCycle.States.RUNNING); - assertGroup(request.getRequestorId(), request.getRaftGroupId()); + assertGroup(getMemberId(), request); CompletableFuture reply = checkLeaderState(request); if (reply != null) { @@ -1352,15 +1442,13 @@ static List add(RaftPeerRole role, RaftConfigurationImpl conf, SetConf } /** - * check if the remote peer is not included in the current conf - * and should shutdown. should shutdown if all the following stands: - * 1. this is a leader + * The remote peer should shut down if all the following are true. + * 1. this is the current leader * 2. current conf is stable and has been committed - * 3. candidate id is not included in conf - * 4. candidate's last entry's index < conf's index + * 3. candidate is not in the current conf + * 4. candidate last entry index < conf index (the candidate was removed) */ - private boolean shouldSendShutdown(RaftPeerId candidateId, - TermIndex candidateLastEntry) { + private boolean shouldSendShutdown(RaftPeerId candidateId, TermIndex candidateLastEntry) { return getInfo().isLeader() && getRaftConf().isStable() && getState().isConfCommitted() @@ -1387,10 +1475,11 @@ private RequestVoteReplyProto requestVote(Phase phase, LOG.info("{}: receive requestVote({}, {}, {}, {}, {})", getMemberId(), phase, candidateId, candidateGroupId, candidateTerm, candidateLastEntry); assertLifeCycleState(LifeCycle.States.RUNNING); - assertGroup(candidateId, candidateGroupId); + assertGroup(getMemberId(), candidateId, candidateGroupId); boolean shouldShutdown = false; final RequestVoteReplyProto reply; + CompletableFuture future = null; synchronized (this) { // Check life cycle state again to avoid the PAUSING/PAUSED state. assertLifeCycleState(LifeCycle.States.RUNNING); @@ -1400,12 +1489,12 @@ private RequestVoteReplyProto requestVote(Phase phase, final boolean voteGranted = context.decideVote(candidate, candidateLastEntry); if (candidate != null && phase == Phase.ELECTION) { // change server state in the ELECTION phase - final boolean termUpdated = - changeToFollower(candidateTerm, true, false, "candidate:" + candidateId); + final AtomicBoolean termUpdated = new AtomicBoolean(); + future = changeToFollower(candidateTerm, true, false, "candidate:" + candidateId, termUpdated); if (voteGranted) { state.grantVote(candidate.getId()); } - if (termUpdated || voteGranted) { + if (termUpdated.get() || voteGranted) { state.persistMetadata(); // sync metafile } } @@ -1414,75 +1503,55 @@ private RequestVoteReplyProto requestVote(Phase phase, } else if(shouldSendShutdown(candidateId, candidateLastEntry)) { shouldShutdown = true; } - reply = ServerProtoUtils.toRequestVoteReplyProto(candidateId, getMemberId(), - voteGranted, state.getCurrentTerm(), shouldShutdown); + reply = toRequestVoteReplyProto(candidateId, getMemberId(), + voteGranted, state.getCurrentTerm(), shouldShutdown, state.getLastEntry()); if (LOG.isInfoEnabled()) { LOG.info("{} replies to {} vote request: {}. Peer's state: {}", - getMemberId(), phase, ServerStringUtils.toRequestVoteReplyString(reply), state); + getMemberId(), phase, toRequestVoteReplyString(reply), state); } } - return reply; - } - - private void validateEntries(long expectedTerm, TermIndex previous, - List entries) { - if (entries != null && !entries.isEmpty()) { - final long index0 = entries.get(0).getIndex(); - // Check if next entry's index is 1 greater than the snapshotIndex. If yes, then - // we do not have to check for the existence of previous. - if (index0 != state.getSnapshotIndex() + 1) { - if (previous == null || previous.getTerm() == 0) { - Preconditions.assertTrue(index0 == 0, - "Unexpected Index: previous is null but entries[%s].getIndex()=%s", - 0, index0); - } else { - Preconditions.assertTrue(previous.getIndex() == index0 - 1, - "Unexpected Index: previous is %s but entries[%s].getIndex()=%s", - previous, 0, index0); - } - } - - for (int i = 0; i < entries.size(); i++) { - LogEntryProto entry = entries.get(i); - final long t = entry.getTerm(); - Preconditions.assertTrue(expectedTerm >= t, - "Unexpected Term: entries[%s].getTerm()=%s but expectedTerm=%s", - i, t, expectedTerm); - - final long indexi = entry.getIndex(); - Preconditions.assertTrue(indexi == index0 + i, - "Unexpected Index: entries[%s].getIndex()=%s but entries[0].getIndex()=%s", - i, indexi, index0); - } + if (future != null) { + future.join(); } + return reply; } @Override public AppendEntriesReplyProto appendEntries(AppendEntriesRequestProto r) throws IOException { try { - return appendEntriesAsync(r).join(); + return appendEntriesAsync(ReferenceCountedObject.wrap(r)).join(); } catch (CompletionException e) { throw IOUtils.asIOException(JavaUtils.unwrapCompletionException(e)); } } @Override - public CompletableFuture appendEntriesAsync(AppendEntriesRequestProto r) - throws IOException { + public CompletableFuture appendEntriesAsync( + ReferenceCountedObject requestRef) throws IOException { + final AppendEntriesRequestProto r = requestRef.retain(); final RaftRpcRequestProto request = r.getServerRequest(); - final List entries = r.getEntriesList(); final TermIndex previous = r.hasPreviousLog()? TermIndex.valueOf(r.getPreviousLog()) : null; - final RaftPeerId requestorId = RaftPeerId.valueOf(request.getRequestorId()); - - preAppendEntriesAsync(requestorId, ProtoUtils.toRaftGroupId(request.getRaftGroupId()), r.getLeaderTerm(), - previous, r.getLeaderCommit(), r.getInitializing(), entries); try { - return appendEntriesAsync(requestorId, r.getLeaderTerm(), previous, r.getLeaderCommit(), - request.getCallId(), r.getInitializing(), r.getCommitInfosList(), entries); + final RaftPeerId leaderId = RaftPeerId.valueOf(request.getRequestorId()); + final RaftGroupId leaderGroupId = ProtoUtils.toRaftGroupId(request.getRaftGroupId()); + + CodeInjectionForTesting.execute(APPEND_ENTRIES, getId(), leaderId, previous, r); + + assertLifeCycleState(LifeCycle.States.STARTING_OR_RUNNING); + if (!startComplete.get()) { + throw new ServerNotReadyException(getMemberId() + ": The server role is not yet initialized."); + } + assertGroup(getMemberId(), leaderId, leaderGroupId); + assertEntries(r, previous, state); + + return appendEntriesAsync(leaderId, request.getCallId(), previous, requestRef); } catch(Exception t) { - LOG.error("{}: Failed appendEntriesAsync {}", getMemberId(), r, t); - throw t; + LOG.error("{}: Failed appendEntries* {}", getMemberId(), + toAppendEntriesRequestString(r, stateMachine::toStateMachineLogEntryString), t); + throw IOUtils.asIOException(t); + } finally { + requestRef.release(); } } @@ -1494,14 +1563,12 @@ public CompletableFuture readIndexAsync(ReadIndexRequestPro final LeaderStateImpl leader = role.getLeaderState().orElse(null); if (leader == null) { - return CompletableFuture.completedFuture( - ServerProtoUtils.toReadIndexReplyProto(peerId, getMemberId(), false, RaftLog.INVALID_LOG_INDEX)); + return CompletableFuture.completedFuture(toReadIndexReplyProto(peerId, getMemberId())); } return getReadIndex(ClientProtoUtils.toRaftClientRequest(request.getClientRequest()), leader) - .thenApply(index -> ServerProtoUtils.toReadIndexReplyProto(peerId, getMemberId(), true, index)) - .exceptionally(throwable -> - ServerProtoUtils.toReadIndexReplyProto(peerId, getMemberId(), false, RaftLog.INVALID_LOG_INDEX)); + .thenApply(index -> toReadIndexReplyProto(peerId, getMemberId(), true, index)) + .exceptionally(throwable -> toReadIndexReplyProto(peerId, getMemberId())); } static void logAppendEntries(boolean isHeartbeat, Supplier message) { @@ -1526,24 +1593,6 @@ Optional updateLastRpcTime(FollowerState.UpdateType updateType) { } } - private void preAppendEntriesAsync(RaftPeerId leaderId, RaftGroupId leaderGroupId, long leaderTerm, - TermIndex previous, long leaderCommit, boolean initializing, List entries) throws IOException { - CodeInjectionForTesting.execute(APPEND_ENTRIES, getId(), - leaderId, leaderTerm, previous, leaderCommit, initializing, entries); - - assertLifeCycleState(LifeCycle.States.STARTING_OR_RUNNING); - if (!startComplete.get()) { - throw new ServerNotReadyException(getMemberId() + ": The server role is not yet initialized."); - } - assertGroup(leaderId, leaderGroupId); - - try { - validateEntries(leaderTerm, previous, entries); - } catch (IllegalArgumentException e) { - throw new IOException(e); - } - } - private long updateCommitInfoCache() { return commitInfoCache.update(getId(), state.getLog().getLastCommittedIndex()); } @@ -1552,45 +1601,38 @@ ExecutorService getServerExecutor() { return serverExecutor; } - @SuppressWarnings("checkstyle:parameternumber") - private CompletableFuture appendEntriesAsync( - RaftPeerId leaderId, long leaderTerm, TermIndex previous, long leaderCommit, long callId, boolean initializing, - List commitInfos, List entries) throws IOException { + private CompletableFuture appendEntriesAsync(RaftPeerId leaderId, long callId, + TermIndex previous, ReferenceCountedObject requestRef) throws IOException { + final AppendEntriesRequestProto proto = requestRef.get(); + final List entries = proto.getEntriesList(); final boolean isHeartbeat = entries.isEmpty(); - logAppendEntries(isHeartbeat, - () -> getMemberId() + ": receive appendEntries(" + leaderId + ", " + leaderTerm + ", " - + previous + ", " + leaderCommit + ", " + initializing - + ", commits:" + ProtoUtils.toString(commitInfos) - + ", cId:" + callId - + ", entries: " + LogProtoUtils.toLogEntriesString(entries)); + logAppendEntries(isHeartbeat, () -> getMemberId() + ": appendEntries* " + + toAppendEntriesRequestString(proto, stateMachine::toStateMachineLogEntryString)); + final long leaderTerm = proto.getLeaderTerm(); final long currentTerm; final long followerCommit = state.getLog().getLastCommittedIndex(); final Optional followerState; final Timekeeper.Context timer = raftServerMetrics.getFollowerAppendEntryTimer(isHeartbeat).time(); + final CompletableFuture future; synchronized (this) { // Check life cycle state again to avoid the PAUSING/PAUSED state. assertLifeCycleState(LifeCycle.States.STARTING_OR_RUNNING); - final boolean recognized = state.recognizeLeader(leaderId, leaderTerm); currentTerm = state.getCurrentTerm(); + final boolean recognized = state.recognizeLeader(Op.APPEND_ENTRIES, leaderId, leaderTerm); if (!recognized) { - final AppendEntriesReplyProto reply = ServerProtoUtils.toAppendEntriesReplyProto( + return CompletableFuture.completedFuture(toAppendEntriesReplyProto( leaderId, getMemberId(), currentTerm, followerCommit, state.getNextIndex(), - AppendResult.NOT_LEADER, callId, RaftLog.INVALID_LOG_INDEX, isHeartbeat); - if (LOG.isDebugEnabled()) { - LOG.debug("{}: Not recognize {} (term={}) as leader, state: {} reply: {}", - getMemberId(), leaderId, leaderTerm, state, ServerStringUtils.toAppendEntriesReplyString(reply)); - } - return CompletableFuture.completedFuture(reply); + AppendResult.NOT_LEADER, callId, RaftLog.INVALID_LOG_INDEX, isHeartbeat)); } try { - changeToFollowerAndPersistMetadata(leaderTerm, true, "appendEntries"); + future = changeToFollowerAndPersistMetadata(leaderTerm, true, "appendEntries"); } catch (IOException e) { return JavaUtils.completeExceptionally(e); } state.setLeader(leaderId, "appendEntries"); - if (!initializing && lifeCycle.compareAndTransition(State.STARTING, State.RUNNING)) { + if (!proto.getInitializing() && lifeCycle.compareAndTransition(State.STARTING, State.RUNNING)) { role.startFollowerState(this, Op.APPEND_ENTRIES); } followerState = updateLastRpcTime(FollowerState.UpdateType.APPEND_START); @@ -1602,20 +1644,23 @@ leaderId, getMemberId(), currentTerm, followerCommit, state.getNextIndex(), // 3. There is a gap between the local log and the entries // In any of these scenarios, we should return an INCONSISTENCY reply // back to leader so that the leader can update this follower's next index. - - AppendEntriesReplyProto inconsistencyReply = checkInconsistentAppendEntries( - leaderId, currentTerm, followerCommit, previous, callId, isHeartbeat, entries); - if (inconsistencyReply != null) { + final long inconsistencyReplyNextIndex = checkInconsistentAppendEntries(previous, entries); + if (inconsistencyReplyNextIndex > RaftLog.INVALID_LOG_INDEX) { + final AppendEntriesReplyProto reply = toAppendEntriesReplyProto( + leaderId, getMemberId(), currentTerm, followerCommit, inconsistencyReplyNextIndex, + AppendResult.INCONSISTENCY, callId, RaftLog.INVALID_LOG_INDEX, isHeartbeat); + LOG.info("{}: appendEntries* reply {}", getMemberId(), toAppendEntriesReplyString(reply)); followerState.ifPresent(fs -> fs.updateLastRpcTime(FollowerState.UpdateType.APPEND_COMPLETE)); - return CompletableFuture.completedFuture(inconsistencyReply); + return future.thenApply(dummy -> reply); } state.updateConfiguration(entries); } + future.join(); + final CompletableFuture appendLog = entries.isEmpty()? CompletableFuture.completedFuture(null) + : appendLog(requestRef.delegate(entries)); - final List> futures = entries.isEmpty() ? Collections.emptyList() - : state.getLog().append(entries); - commitInfos.forEach(commitInfoCache::update); + proto.getCommitInfosList().forEach(commitInfoCache::update); CodeInjectionForTesting.execute(LOG_SYNC, getId(), null); if (!isHeartbeat) { @@ -1625,47 +1670,43 @@ leaderId, getMemberId(), currentTerm, followerCommit, state.getNextIndex(), stateMachine.event().notifySnapshotInstalled(InstallSnapshotResult.SUCCESS, installedIndex, getPeer()); } } - return JavaUtils.allOf(futures).whenCompleteAsync( - (r, t) -> followerState.ifPresent(fs -> fs.updateLastRpcTime(FollowerState.UpdateType.APPEND_COMPLETE)), - serverExecutor - ).thenApply(v -> { - final AppendEntriesReplyProto reply; - synchronized(this) { - final long commitIndex = ServerImplUtils.effectiveCommitIndex(leaderCommit, previous, entries.size()); - state.updateCommitIndex(commitIndex, currentTerm, false); + + final long commitIndex = effectiveCommitIndex(proto.getLeaderCommit(), previous, entries.size()); + final long matchIndex = isHeartbeat? RaftLog.INVALID_LOG_INDEX: entries.get(entries.size() - 1).getIndex(); + return appendLog.whenCompleteAsync((r, t) -> { + followerState.ifPresent(fs -> fs.updateLastRpcTime(FollowerState.UpdateType.APPEND_COMPLETE)); + timer.stop(); + }, getServerExecutor()).thenApply(v -> { + final boolean updated = state.updateCommitIndex(commitIndex, currentTerm, false); + if (updated) { updateCommitInfoCache(); - final long n; - final long matchIndex; - if (!isHeartbeat) { - LogEntryProto requestLastEntry = entries.get(entries.size() - 1); - n = requestLastEntry.getIndex() + 1; - matchIndex = requestLastEntry.getIndex(); - } else { - n = state.getLog().getNextIndex(); - matchIndex = RaftLog.INVALID_LOG_INDEX; - } - reply = ServerProtoUtils.toAppendEntriesReplyProto(leaderId, getMemberId(), currentTerm, - state.getLog().getLastCommittedIndex(), n, AppendResult.SUCCESS, callId, matchIndex, isHeartbeat); } - logAppendEntries(isHeartbeat, () -> getMemberId() + ": succeeded to handle AppendEntries. Reply: " - + ServerStringUtils.toAppendEntriesReplyString(reply)); - timer.stop(); // TODO: future never completes exceptionally? + final long nextIndex = isHeartbeat? state.getNextIndex(): matchIndex + 1; + final AppendEntriesReplyProto reply = toAppendEntriesReplyProto(leaderId, getMemberId(), + currentTerm, updated? commitIndex : state.getLog().getLastCommittedIndex(), + nextIndex, AppendResult.SUCCESS, callId, matchIndex, isHeartbeat); + logAppendEntries(isHeartbeat, () -> getMemberId() + + ": appendEntries* reply " + toAppendEntriesReplyString(reply)); return reply; }); } - - private AppendEntriesReplyProto checkInconsistentAppendEntries(RaftPeerId leaderId, long currentTerm, - long followerCommit, TermIndex previous, long callId, boolean isHeartbeat, List entries) { - final long replyNextIndex = checkInconsistentAppendEntries(previous, entries); - if (replyNextIndex == -1) { - return null; + private CompletableFuture appendLog(ReferenceCountedObject> entriesRef) { + final List entriesTermIndices; + try(UncheckedAutoCloseableSupplier> entries = entriesRef.retainAndReleaseOnClose()) { + entriesTermIndices = ConsecutiveIndices.convert(entries.get()); + if (!appendLogTermIndices.append(entriesTermIndices)) { + // index already exists, return the last future + return appendLogFuture.get(); + } } - final AppendEntriesReplyProto reply = ServerProtoUtils.toAppendEntriesReplyProto( - leaderId, getMemberId(), currentTerm, followerCommit, replyNextIndex, - AppendResult.INCONSISTENCY, callId, RaftLog.INVALID_LOG_INDEX, isHeartbeat); - LOG.info("{}: inconsistency entries. Reply:{}", getMemberId(), ServerStringUtils.toAppendEntriesReplyString(reply)); - return reply; + entriesRef.retain(); + return appendLogFuture.updateAndGet(f -> f.thenCompose( + ignored -> JavaUtils.allOf(state.getLog().append(entriesRef)))) + .whenComplete((v, e) -> { + entriesRef.release(); + appendLogTermIndices.removeExisting(entriesTermIndices); + }); } private long checkInconsistentAppendEntries(TermIndex previous, List entries) { @@ -1692,13 +1733,13 @@ private long checkInconsistentAppendEntries(TermIndex previous, List stateMachine.startTransaction(entry, getInfo().getCurrentRole()))); } - CompletableFuture applyLogToStateMachine(LogEntryProto next) throws RaftLogIOException { - if (!next.hasStateMachineLogEntry()) { - stateMachine.event().notifyTermIndexUpdated(next.getTerm(), next.getIndex()); - } + CompletableFuture applyLogToStateMachine(ReferenceCountedObject nextRef) + throws RaftLogIOException { + LogEntryProto next = nextRef.get(); + CompletableFuture messageFuture = null; - if (next.hasConfigurationEntry()) { + switch (next.getLogEntryBodyCase()) { + case CONFIGURATIONENTRY: // the reply should have already been set. only need to record // the new conf in the metadata file and notify the StateMachine. state.writeRaftConfiguration(next); - stateMachine.event().notifyConfigurationChanged(next.getTerm(), next.getIndex(), next.getConfigurationEntry()); + stateMachine.event().notifyConfigurationChanged(next.getTerm(), next.getIndex(), + next.getConfigurationEntry()); role.getLeaderState().ifPresent(leader -> leader.checkReady(next)); - } else if (next.hasStateMachineLogEntry()) { + break; + case STATEMACHINELOGENTRY: TransactionContext trx = getTransactionContext(next, true); + Objects.requireNonNull(trx, "trx == null"); final ClientInvocationId invocationId = ClientInvocationId.valueOf(next.getStateMachineLogEntry()); writeIndexCache.add(invocationId.getClientId(), ((TransactionContextImpl) trx).getLogIndexFuture()); - + ((TransactionContextImpl) trx).setDelegatedRef(nextRef); try { // Let the StateMachine inject logic for committed transactions in sequential order. trx = stateMachine.applyTransactionSerial(trx); final CompletableFuture stateMachineFuture = stateMachine.applyTransaction(trx); - return replyPendingRequest(invocationId, TermIndex.valueOf(next), stateMachineFuture); + messageFuture = replyPendingRequest(invocationId, TermIndex.valueOf(next), stateMachineFuture); } catch (Exception e) { throw new RaftLogIOException(e); } + break; + case METADATAENTRY: + break; + default: + throw new IllegalStateException("Unexpected LogEntryBodyCase " + next.getLogEntryBodyCase() + ", next=" + next); } - return null; + + if (next.getLogEntryBodyCase() != LogEntryProto.LogEntryBodyCase.STATEMACHINELOGENTRY) { + stateMachine.event().notifyTermIndexUpdated(next.getTerm(), next.getIndex()); + } + return messageFuture; } /** @@ -1898,6 +1947,7 @@ CompletableFuture applyLogToStateMachine(LogEntryProto next) throws Raf * @param logEntry the log entry being truncated */ void notifyTruncatedLogEntry(LogEntryProto logEntry) { + Optional.ofNullable(getState()).ifPresent(s -> s.truncate(logEntry.getIndex())); if (logEntry.hasStateMachineLogEntry()) { getTransactionManager().remove(TermIndex.valueOf(logEntry)); @@ -1923,4 +1973,8 @@ public RaftServerMetricsImpl getRaftServerMetrics() { void onGroupLeaderElected() { transferLeadership.complete(TransferLeadership.Result.SUCCESS); } + + boolean isRunning() { + return startComplete.get() && lifeCycle.getCurrentState() == State.RUNNING; + } } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java index cb7918e51d..2914c434f1 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java @@ -67,7 +67,12 @@ import java.util.Objects; import java.util.Optional; import java.util.UUID; -import java.util.concurrent.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; import java.util.function.Predicate; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -138,6 +143,7 @@ private void close(RaftGroupId groupId, CompletableFuture future } catch (Throwable t) { LOG.warn("{}: Failed to close the division for {}", getId(), groupId, t); } + impl.getStateMachine().event().notifyServerShutdown(impl.getRoleInfoProto(), true); } synchronized List getGroupIds() { @@ -415,6 +421,13 @@ private void startImpl() throws IOException { public void close() { lifeCycle.checkStateAndClose(() -> { LOG.info("{}: close", getId()); + + try { + ConcurrentUtils.shutdownAndWait(implExecutor.get()); + } catch (Exception ignored) { + LOG.warn(getId() + ": Failed to shutdown implExecutor", ignored); + } + impls.close(); try { @@ -429,12 +442,6 @@ public void close() { LOG.warn(getId() + ": Failed to close " + SupportedDataStreamType.NETTY + " server", ignored); } - try { - ConcurrentUtils.shutdownAndWait(implExecutor.get()); - } catch (Exception ignored) { - LOG.warn(getId() + ": Failed to shutdown implExecutor", ignored); - } - try { ConcurrentUtils.shutdownAndWait(executor.get()); } catch (Exception ignored) { @@ -645,10 +652,17 @@ public StartLeaderElectionReplyProto startLeaderElection(StartLeaderElectionRequ } @Override - public CompletableFuture appendEntriesAsync(AppendEntriesRequestProto request) { - final RaftGroupId groupId = ProtoUtils.toRaftGroupId(request.getServerRequest().getRaftGroupId()); - return getImplFuture(groupId) - .thenCompose(impl -> impl.executeSubmitServerRequestAsync(() -> impl.appendEntriesAsync(request))); + public CompletableFuture appendEntriesAsync( + ReferenceCountedObject requestRef) { + AppendEntriesRequestProto request = requestRef.retain(); + try { + final RaftGroupId groupId = ProtoUtils.toRaftGroupId(request.getServerRequest().getRaftGroupId()); + return getImplFuture(groupId) + .thenCompose(impl -> JavaUtils.callAsUnchecked( + () -> impl.appendEntriesAsync(requestRef), CompletionException::new)); + } finally { + requestRef.release(); + } } @Override diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/ReadIndexHeartbeats.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/ReadIndexHeartbeats.java index d08a1ea406..4ff1460d7d 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/ReadIndexHeartbeats.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/ReadIndexHeartbeats.java @@ -23,6 +23,7 @@ import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.server.raftlog.RaftLogIndex; import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -123,8 +124,15 @@ boolean isAcknowledged(RaftPeerId id) { class AppendEntriesListeners { private final NavigableMap sorted = new TreeMap<>(); + private Exception exception = null; synchronized AppendEntriesListener add(long commitIndex, Function constructor) { + if (exception != null) { + Preconditions.assertTrue(sorted.isEmpty()); + final AppendEntriesListener listener = constructor.apply(commitIndex); + listener.getFuture().completeExceptionally(exception); + return listener; + } return sorted.computeIfAbsent(commitIndex, constructor); } @@ -152,6 +160,10 @@ synchronized void onAppendEntriesReply(LogAppender appender, AppendEntriesReplyP } synchronized void failAll(Exception e) { + if (exception != null) { + return; + } + exception = e; sorted.forEach((index, listener) -> listener.getFuture().completeExceptionally(e)); sorted.clear(); } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/ReplyFlusher.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/ReplyFlusher.java new file mode 100644 index 0000000000..47e9967c11 --- /dev/null +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/ReplyFlusher.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.server.impl; + +import org.apache.ratis.server.raftlog.RaftLogIndex; +import org.apache.ratis.util.CodeInjectionForTesting; +import org.apache.ratis.util.Daemon; +import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.LifeCycle; +import org.apache.ratis.util.TimeDuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.LinkedList; +import java.util.concurrent.TimeUnit; +import java.util.function.LongSupplier; + +/** + * Implements the reply flush logic as part of the leader batch write when RepliedIndex is used. + */ +public class ReplyFlusher { + static final Logger LOG = LoggerFactory.getLogger(ReplyFlusher.class); + + private static final String CLASS_NAME = JavaUtils.getClassSimpleName(RaftServerImpl.class); + public static final String FLUSH = CLASS_NAME + ".flush"; + + static class Replies { + /** When a {@link LongSupplier} is invoked, it completes a write reply and return the log index. */ + private LinkedList list = new LinkedList<>(); + + synchronized void add(LongSupplier replyMethod) { + list.add(replyMethod); + } + + synchronized LinkedList getAndSetNewList() { + final LinkedList old = list; + list = new LinkedList<>(); + return old; + } + } + + private final Object id; + private final LifeCycle lifeCycle; + private final Daemon daemon; + private final Replies replies = new Replies(); + private final RaftLogIndex repliedIndex; + /** The interval at which held write replies are flushed. */ + private final TimeDuration batchInterval; + + ReplyFlusher(Object id, long repliedIndex, TimeDuration batchInterval) { + this.id = id; + final String name = id + "-ReplyFlusher"; + this.lifeCycle = new LifeCycle(name); + this.daemon = Daemon.newBuilder() + .setName(name) + .setRunnable(this::run) + .build(); + this.repliedIndex = new RaftLogIndex("repliedIndex", repliedIndex); + this.batchInterval = batchInterval; + } + + long getRepliedIndex() { + return repliedIndex.get(); + } + + /** Hold a write reply for later batch flushing */ + void hold(LongSupplier replyMethod) { + replies.add(replyMethod); + } + + void start(long startIndex) { + repliedIndex.updateToMax(startIndex, s -> LOG.debug("{}: {}", id, s)); + lifeCycle.transition(LifeCycle.State.STARTING); + // We need to transition to RUNNING first so that ReplyFlusher#run always + // see that the lifecycle state is in RUNNING state. + lifeCycle.transition(LifeCycle.State.RUNNING); + daemon.start(); + } + + /** The reply flusher daemon loop. */ + private void run() { + try { + while (lifeCycle.getCurrentState() == LifeCycle.State.RUNNING) { + batchInterval.sleep(); + flush(); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.warn("{}: Interrupted ", daemon.getName(), e); + } finally { + // Flush remaining on exit + flush(); + } + } + + /** Flush all held replies and advance {@link #repliedIndex}. */ + private void flush() { + CodeInjectionForTesting.execute(FLUSH, id, null); + + final LinkedList toFlush = replies.getAndSetNewList(); + if (toFlush.isEmpty()) { + return; + } + long maxIndex = toFlush.removeLast().getAsLong(); + for (LongSupplier held : toFlush) { + maxIndex = Math.max(maxIndex, held.getAsLong()); + } + repliedIndex.updateToMax(maxIndex, s -> + LOG.debug("{}: flushed {} replies, {}", id, toFlush.size(), s)); + } + + /** Stop the reply flusher daemon. */ + void stop() { + lifeCycle.checkStateAndClose(); + daemon.interrupt(); + try { + daemon.join(batchInterval.toLong(TimeUnit.MILLISECONDS )* 2); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RoleInfo.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RoleInfo.java index fe2bc963b1..409d7a06bd 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RoleInfo.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RoleInfo.java @@ -18,8 +18,14 @@ package org.apache.ratis.server.impl; +import org.apache.ratis.proto.RaftProtos.CandidateInfoProto; +import org.apache.ratis.proto.RaftProtos.FollowerInfoProto; +import org.apache.ratis.proto.RaftProtos.LeaderInfoProto; import org.apache.ratis.proto.RaftProtos.RaftPeerRole; +import org.apache.ratis.proto.RaftProtos.RoleInfoProto; +import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.server.leader.LogAppender; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.Timestamp; @@ -32,6 +38,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import static org.apache.ratis.server.impl.ServerProtoUtils.toServerRpcProto; + /** * Maintain the Role of a Raft Peer. */ @@ -39,7 +47,7 @@ class RoleInfo { public static final Logger LOG = LoggerFactory.getLogger(RoleInfo.class); private final RaftPeerId id; - private volatile RaftPeerRole role; + private final AtomicReference role = new AtomicReference<>(); /** Used when the peer is leader */ private final AtomicReference leaderState = new AtomicReference<>(); /** Used when the peer is follower, to monitor election timeout */ @@ -56,7 +64,7 @@ class RoleInfo { } void transitionRole(RaftPeerRole newRole) { - this.role = newRole; + this.role.set(newRole); this.transitionTime.set(Timestamp.currentTime()); } @@ -65,7 +73,7 @@ long getRoleElapsedTimeMs() { } RaftPeerRole getCurrentRole() { - return role; + return role.get(); } boolean isLeaderReady() { @@ -105,33 +113,33 @@ void startFollowerState(RaftServerImpl server, Object reason) { updateAndGet(followerState, new FollowerState(server, reason)).start(); } - void shutdownFollowerState() { + CompletableFuture shutdownFollowerState() { final FollowerState follower = followerState.getAndSet(null); - if (follower != null) { - LOG.info("{}: shutdown {}", id, follower); - follower.stopRunning(); - follower.interrupt(); + if (follower == null) { + return CompletableFuture.completedFuture(null); } + LOG.info("{}: shutdown {}", id, follower); + return follower.stopRunning(); } void startLeaderElection(RaftServerImpl server, boolean force) { if (pauseLeaderElection.get()) { return; } - updateAndGet(leaderElection, new LeaderElection(server, force)).start(); + updateAndGet(leaderElection, LeaderElection.newInstance(server, force)).start(); } void setLeaderElectionPause(boolean pause) { pauseLeaderElection.set(pause); } - void shutdownLeaderElection() { + CompletableFuture shutdownLeaderElection() { final LeaderElection election = leaderElection.getAndSet(null); - if (election != null) { - LOG.info("{}: shutdown {}", id, election); - election.shutdown(); - // no need to interrupt the election thread + if (election == null) { + return CompletableFuture.completedFuture(null); } + LOG.info("{}: shutdown {}", id, election); + return election.shutdown(); } private T updateAndGet(AtomicReference ref, T current) { @@ -141,6 +149,55 @@ private T updateAndGet(AtomicReference ref, T current) { return updated; } + RoleInfoProto buildRoleInfoProto(RaftServerImpl server) { + final RaftPeerRole currentRole = getCurrentRole(); + final RoleInfoProto.Builder proto = RoleInfoProto.newBuilder() + .setSelf(server.getPeer().getRaftPeerProto()) + .setRole(currentRole) + .setRoleElapsedTimeMs(getRoleElapsedTimeMs()); + + switch (currentRole) { + case LEADER: + getLeaderState().ifPresent(leader -> { + final LeaderInfoProto.Builder b = LeaderInfoProto.newBuilder() + .setTerm(leader.getCurrentTerm()); + leader.getLogAppenders() + .map(LogAppender::getFollower) + .map(f -> toServerRpcProto(f.getPeer(), f.getLastRpcResponseTime().elapsedTimeMs())) + .forEach(b::addFollowerInfo); + proto.setLeaderInfo(b); + }); + return proto.build(); + + case CANDIDATE: + return proto.setCandidateInfo(CandidateInfoProto.newBuilder() + .setLastLeaderElapsedTimeMs(server.getState().getLastLeaderElapsedTimeMs())) + .build(); + + case LISTENER: + case FOLLOWER: + // FollowerState can be null while adding a new peer as it is not a voting member yet + final FollowerState follower = getFollowerState().orElse(null); + final long rpcElapsed; + final int outstandingOp; + if (follower != null) { + rpcElapsed = follower.getLastRpcTime().elapsedTimeMs(); + outstandingOp = follower.getOutstandingOp(); + } else { + rpcElapsed = 0; + outstandingOp = 0; + } + final RaftPeer leader = server.getRaftConf().getPeer(server.getState().getLeaderId()); + return proto.setFollowerInfo(FollowerInfoProto.newBuilder() + .setLeaderInfo(toServerRpcProto(leader, rpcElapsed)) + .setOutstandingOp(outstandingOp)) + .build(); + + default: + throw new IllegalStateException("Unexpected role " + currentRole); + } + } + @Override public String toString() { return String.format("%9s", role); diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerImplUtils.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerImplUtils.java index e4fe8f232f..864b402a23 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerImplUtils.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerImplUtils.java @@ -19,9 +19,15 @@ import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.proto.RaftProtos.AppendEntriesRequestProto; +import org.apache.ratis.proto.RaftProtos.LogEntryProto; +import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftGroupMemberId; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.GroupMismatchException; import org.apache.ratis.server.RaftConfiguration; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.protocol.TermIndex; @@ -34,12 +40,127 @@ import org.apache.ratis.util.TimeDuration; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; -import java.util.Optional; +import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeMap; +import java.util.Objects; import java.util.concurrent.TimeUnit; /** Server utilities for internal use. */ public final class ServerImplUtils { + /** The consecutive indices within the same term. */ + static class ConsecutiveIndices { + /** Convert the given entries to a list of {@link ConsecutiveIndices} */ + static List convert(List entries) { + if (entries == null || entries.isEmpty()) { + return Collections.emptyList(); + } + + List indices = null; + + LogEntryProto previous = entries.get(0); + long startIndex = previous.getIndex(); + int count = 1; + + for (int i = 1; i < entries.size(); i++) { + final LogEntryProto current = entries.get(i); + // validate if the indices are consecutive + Preconditions.assertSame(previous.getIndex() + 1, current.getIndex(), "index"); + + if (current.getTerm() == previous.getTerm()) { + count++; + } else { + // validate if the terms are increasing + Preconditions.assertTrue(previous.getTerm() < current.getTerm(), "term"); + if (indices == null) { + indices = new ArrayList<>(); + } + indices.add(new ConsecutiveIndices(previous.getTerm(), startIndex, count)); + + startIndex = current.getIndex(); + count = 1; + } + previous = current; + } + + final ConsecutiveIndices last = new ConsecutiveIndices(previous.getTerm(), startIndex, count); + if (indices == null) { + return Collections.singletonList(last); + } else { + indices.add(last); + return indices; + } + } + + private final long term; + private final long startIndex; + private final int count; + + ConsecutiveIndices(long term, long startIndex, int count) { + Preconditions.assertTrue(count > 0, () -> "count = " + count + " <= 0 "); + this.term = term; + this.startIndex = startIndex; + this.count = count; + } + + long getNextIndex() { + return startIndex + count; + } + + Long getTerm(long index) { + final long diff = index - startIndex; + return diff < 0 || diff >= count ? null: term; + } + } + + /** A data structure to support the {@link #contains(TermIndex)} method. */ + static class NavigableIndices { + private final NavigableMap map = new TreeMap<>(); + + boolean contains(TermIndex ti) { + final Long term = getTerm(ti.getIndex()); + return term != null && term == ti.getTerm(); + } + + synchronized Long getTerm(long index) { + if (map.isEmpty()) { + return null; + } + + final Map.Entry floorEntry = map.floorEntry(index); + if (floorEntry == null) { + return null; + } + return floorEntry.getValue().getTerm(index); + } + + synchronized boolean append(List entriesTermIndices) { + for(int i = 0; i < entriesTermIndices.size(); i++) { + final ConsecutiveIndices indices = entriesTermIndices.get(i); + final ConsecutiveIndices previous = map.put(indices.startIndex, indices); + if (previous != null) { + // index already exists, revert this append + map.put(previous.startIndex, previous); + for(int j = 0; j < i; j++) { + map.remove(entriesTermIndices.get(j).startIndex); + } + return false; + } + } + return true; + } + + synchronized void removeExisting(List entriesTermIndices) { + for(ConsecutiveIndices indices : entriesTermIndices) { + final ConsecutiveIndices removed = map.remove(indices.startIndex); + Preconditions.assertSame(indices, removed, "removed"); + } + } + } + private ServerImplUtils() { //Never constructed } @@ -50,8 +171,8 @@ public static RaftServerProxy newRaftServer( ThreadGroup threadGroup, RaftProperties properties, Parameters parameters) throws IOException { RaftServer.LOG.debug("newRaftServer: {}, {}", id, group); if (group != null && !group.getPeers().isEmpty()) { - Preconditions.assertNotNull(id, "RaftPeerId %s is not in RaftGroup %s", id, group); - Preconditions.assertNotNull(group.getPeer(id), "RaftPeerId %s is not in RaftGroup %s", id, group); + Objects.requireNonNull(id, () -> "RaftPeerId " + id + " is not in RaftGroup " + group); + Objects.requireNonNull(group.getPeer(id), () -> "RaftPeerId " + id + " is not in RaftGroup " + group); } final RaftServerProxy proxy = newRaftServer(id, stateMachineRegistry, threadGroup, properties, parameters); proxy.initGroups(group, option); @@ -88,7 +209,51 @@ public static RaftConfiguration newRaftConfiguration(List conf, List entries = proto.getEntriesList(); + if (entries != null && !entries.isEmpty()) { + final long index0 = entries.get(0).getIndex(); + // Check if next entry's index is 1 greater than the snapshotIndex. If yes, then + // we do not have to check for the existence of previous. + if (index0 != state.getSnapshotIndex() + 1) { + final long expected = previous == null || previous.getTerm() == 0 ? 0 : previous.getIndex() + 1; + Preconditions.assertTrue(index0 == expected, + "Unexpected Index: previous is %s but entries[%s].getIndex() == %s != %s", + previous, 0, index0, expected); + } + + final long leaderTerm = proto.getLeaderTerm(); + for (int i = 0; i < entries.size(); i++) { + final LogEntryProto entry = entries.get(i); + final long entryTerm = entry.getTerm(); + Preconditions.assertTrue(entryTerm <= leaderTerm , + "Unexpected Term: entries[%s].getTerm() == %s > leaderTerm == %s", + i, entryTerm, leaderTerm); + + final long indexI = entry.getIndex(); + final long expected = index0 + i; + Preconditions.assertTrue(indexI == expected, + "Unexpected Index: entries[0].getIndex() == %s but entries[%s].getIndex() == %s != %s", + index0, i, indexI, expected); + } + } } } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerProtoUtils.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerProtoUtils.java index f2be8c61c6..494037f373 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerProtoUtils.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerProtoUtils.java @@ -43,11 +43,24 @@ private static RaftRpcReplyProto.Builder toRaftRpcReplyProtoBuilder( } static RequestVoteReplyProto toRequestVoteReplyProto( - RaftPeerId requestorId, RaftGroupMemberId replyId, boolean success, long term, boolean shouldShutdown) { + RaftPeerId requestorId, RaftGroupMemberId replyId, boolean success, long term, boolean shouldShutdown, + TermIndex lastEntry) { return RequestVoteReplyProto.newBuilder() .setServerReply(toRaftRpcReplyProtoBuilder(requestorId, replyId, success)) .setTerm(term) .setShouldShutdown(shouldShutdown) + .setLastEntry((lastEntry != null? lastEntry : TermIndex.INITIAL_VALUE).toProto()) + .build(); + } + + static RequestVoteReplyProto toRequestVoteReplyProto( + RaftPeerId requestorId, RaftGroupMemberId replyId, boolean success, long term, boolean shouldShutdown, + TermIndex lastEntry, long callId) { + return RequestVoteReplyProto.newBuilder() + .setServerReply(toRaftRpcReplyProtoBuilder(requestorId, replyId, success).setCallId(callId)) + .setTerm(term) + .setShouldShutdown(shouldShutdown) + .setLastEntry((lastEntry != null? lastEntry : TermIndex.INITIAL_VALUE).toProto()) .build(); } @@ -114,7 +127,7 @@ static ReadIndexRequestProto toReadIndexRequestProto( RaftClientRequest clientRequest, RaftGroupMemberId requestorId, RaftPeerId replyId) { return ReadIndexRequestProto.newBuilder() .setServerRequest(ClientProtoUtils.toRaftRpcRequestProtoBuilder(requestorId, replyId)) - .setClientRequest(ClientProtoUtils.toRaftClientRequestProto(clientRequest)) + .setClientRequest(ClientProtoUtils.toRaftClientRequestProto(clientRequest, false)) .build(); } @@ -126,6 +139,10 @@ static ReadIndexReplyProto toReadIndexReplyProto( .build(); } + static ReadIndexReplyProto toReadIndexReplyProto(RaftPeerId requestorId, RaftGroupMemberId replyId) { + return toReadIndexReplyProto(requestorId, replyId, false, RaftLog.INVALID_LOG_INDEX); + } + @SuppressWarnings("parameternumber") static AppendEntriesReplyProto toAppendEntriesReplyProto( RaftPeerId requestorId, RaftGroupMemberId replyId, long term, diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerState.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerState.java index e21f63caa6..c49e9554f0 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerState.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/ServerState.java @@ -87,6 +87,7 @@ class ServerState { /** * Candidate that this peer granted vote for in current term (or null if none) */ + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile RaftPeerId votedFor; /** @@ -158,6 +159,7 @@ void writeRaftConfiguration(LogEntryProto conf) { } void start() { + // initialize stateMachineUpdater stateMachineUpdater.get().start(); } @@ -170,6 +172,7 @@ private RaftLog initRaftLog(LongSupplier getSnapshotIndexFromStateMachine, RaftP } } + @SuppressWarnings({"squid:S2095"}) // Suppress closeable warning private static RaftLog initRaftLog(RaftGroupMemberId memberId, RaftServerImpl server, RaftStorage storage, Consumer logConsumer, LongSupplier getSnapshotIndexFromStateMachine, RaftProperties prop) throws IOException { @@ -318,27 +321,24 @@ TermIndex getLastEntry() { void appendLog(TransactionContext operation) throws StateMachineException { getLog().append(currentTerm.get(), operation); - Objects.requireNonNull(operation.getLogEntry()); + Objects.requireNonNull(operation.getLogEntryUnsafe(), "transaction-logEntry"); } - /** - * Check if accept the leader selfId and term from the incoming AppendEntries rpc. - * If accept, update the current state. - * @return true if the check passes - */ - boolean recognizeLeader(RaftPeerId peerLeaderId, long leaderTerm) { + /** @return true iff the given peer id is recognized as the leader. */ + boolean recognizeLeader(Object op, RaftPeerId peerId, long peerTerm) { final long current = currentTerm.get(); - if (leaderTerm < current) { + if (peerTerm < current) { + LOG.warn("{}: Failed to recognize {} as leader for {} since peerTerm = {} < currentTerm = {}", + getMemberId(), peerId, op, peerTerm, current); return false; } final RaftPeerId curLeaderId = getLeaderId(); - if (leaderTerm > current || curLeaderId == null) { - // If the request indicates a term that is greater than the current term - // or no leader has been set for the current term, make sure to update - // leader and term later - return true; + if (peerTerm == current && curLeaderId != null && !curLeaderId.equals(peerId)) { + LOG.warn("{}: Failed to recognize {} as leader for {} since current leader is {} (peerTerm = currentTerm = {})", + getMemberId(), peerId, op, curLeaderId, current); + return false; } - return curLeaderId.equals(peerLeaderId); + return true; } static int compareLog(TermIndex lastEntry, TermIndex candidateLastEntry) { @@ -387,6 +387,10 @@ void setRaftConf(RaftConfiguration conf) { LOG.trace("{}: {}", getMemberId(), configurationManager); } + void truncate(long logIndex) { + configurationManager.removeConfigurations(logIndex); + } + void updateConfiguration(List entries) { if (entries != null && !entries.isEmpty()) { configurationManager.removeConfigurations(entries.get(0).getIndex()); @@ -424,7 +428,6 @@ void close() { } LOG.warn(getMemberId() + ": Failed to join " + getStateMachineUpdater(), e); } - LOG.info("{}: applyIndex: {}", getMemberId(), getLastAppliedIndex()); try { if (log.isInitialized()) { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/SnapshotInstallationHandler.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/SnapshotInstallationHandler.java index 9794314b83..4f1ac4177f 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/SnapshotInstallationHandler.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/SnapshotInstallationHandler.java @@ -32,9 +32,11 @@ import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.server.protocol.RaftServerProtocol; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.util.ServerStringUtils; +import org.apache.ratis.util.BatchLogger; import org.apache.ratis.util.CodeInjectionForTesting; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.Preconditions; @@ -45,15 +47,24 @@ import java.io.IOException; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; +import static org.apache.ratis.server.impl.ServerProtoUtils.toInstallSnapshotReplyProto; +import static org.apache.ratis.server.impl.ServerProtoUtils.toServerRpcProto; import static org.apache.ratis.server.raftlog.RaftLog.INVALID_LOG_INDEX; class SnapshotInstallationHandler { static final Logger LOG = LoggerFactory.getLogger(SnapshotInstallationHandler.class); + private enum BatchLogKey implements BatchLogger.Key { + INSTALL_SNAPSHOT_REQUEST, + INSTALL_SNAPSHOT_REPLY + } + static final TermIndex INVALID_TERM_INDEX = TermIndex.valueOf(0, INVALID_LOG_INDEX); private final RaftServerImpl server; @@ -65,6 +76,9 @@ class SnapshotInstallationHandler { new AtomicReference<>(INVALID_TERM_INDEX); private final AtomicBoolean isSnapshotNull = new AtomicBoolean(); private final AtomicLong installedIndex = new AtomicLong(INVALID_LOG_INDEX); + private final AtomicInteger nextChunkIndex = new AtomicInteger(-1); + /** The callId of the chunk with index 0. */ + private final AtomicLong chunk0CallId = new AtomicLong(-1); SnapshotInstallationHandler(RaftServerImpl server, RaftProperties properties) { this.server = server; @@ -85,10 +99,9 @@ long getInProgressInstallSnapshotIndex() { } InstallSnapshotReplyProto installSnapshot(InstallSnapshotRequestProto request) throws IOException { - if (LOG.isInfoEnabled()) { - LOG.info("{}: receive installSnapshot: {}", getMemberId(), - ServerStringUtils.toInstallSnapshotRequestString(request)); - } + BatchLogger.print(BatchLogKey.INSTALL_SNAPSHOT_REQUEST, getMemberId(), + suffix -> LOG.info("{}: receive installSnapshot: {} {}", + getMemberId(), ServerStringUtils.toInstallSnapshotRequestString(request), suffix)); final InstallSnapshotReplyProto reply; try { reply = installSnapshotImpl(request); @@ -96,10 +109,9 @@ InstallSnapshotReplyProto installSnapshot(InstallSnapshotRequestProto request) t LOG.error("{}: installSnapshot failed", getMemberId(), e); throw e; } - if (LOG.isInfoEnabled()) { - LOG.info("{}: reply installSnapshot: {}", getMemberId(), - ServerStringUtils.toInstallSnapshotReplyString(reply)); - } + BatchLogger.print(BatchLogKey.INSTALL_SNAPSHOT_REPLY, getMemberId(), + suffix -> LOG.info("{}: reply installSnapshot: {} {}", + getMemberId(), ServerStringUtils.toInstallSnapshotReplyString(reply), suffix)); return reply; } @@ -110,19 +122,19 @@ private InstallSnapshotReplyProto installSnapshotImpl(InstallSnapshotRequestProt CodeInjectionForTesting.execute(RaftServerImpl.INSTALL_SNAPSHOT, server.getId(), leaderId, request); server.assertLifeCycleState(LifeCycle.States.STARTING_OR_RUNNING); - server.assertGroup(leaderId, leaderGroupId); + ServerImplUtils.assertGroup(getMemberId(), leaderId, leaderGroupId); InstallSnapshotReplyProto reply = null; // Check if install snapshot from Leader is enabled if (installSnapshotEnabled) { // Leader has sent InstallSnapshot request with SnapshotInfo. Install the snapshot. if (request.hasSnapshotChunk()) { - reply = checkAndInstallSnapshot(request, leaderId); + reply = checkAndInstallSnapshot(request, leaderId).join(); } } else { // Leader has only sent a notification to install snapshot. Inform State Machine to install snapshot. if (request.hasNotification()) { - reply = notifyStateMachineToInstallSnapshot(request, leaderId); + reply = notifyStateMachineToInstallSnapshot(request, leaderId).join(); } } @@ -130,6 +142,7 @@ private InstallSnapshotReplyProto installSnapshotImpl(InstallSnapshotRequestProt if (request.hasLastRaftConfigurationLogEntryProto()) { // Set the configuration included in the snapshot final LogEntryProto proto = request.getLastRaftConfigurationLogEntryProto(); + state.truncate(proto.getIndex()); if (!state.getRaftConf().equals(LogProtoUtils.toRaftConfiguration(proto))) { LOG.info("{}: set new configuration {} from snapshot", getMemberId(), proto); state.setRaftConf(proto); @@ -142,7 +155,7 @@ private InstallSnapshotReplyProto installSnapshotImpl(InstallSnapshotRequestProt } // There is a mismatch between configurations on leader and follower. - final InstallSnapshotReplyProto failedReply = ServerProtoUtils.toInstallSnapshotReplyProto( + final InstallSnapshotReplyProto failedReply = toInstallSnapshotReplyProto( leaderId, getMemberId(), state.getCurrentTerm(), InstallSnapshotResult.CONF_MISMATCH); LOG.error("{}: Configuration Mismatch ({}): Leader {} has it set to {} but follower {} has it set to {}", getMemberId(), RaftServerConfigKeys.Log.Appender.INSTALL_SNAPSHOT_ENABLED_KEY, @@ -150,41 +163,69 @@ private InstallSnapshotReplyProto installSnapshotImpl(InstallSnapshotRequestProt return failedReply; } - private InstallSnapshotReplyProto checkAndInstallSnapshot(InstallSnapshotRequestProto request, + private CompletableFuture checkAndInstallSnapshot(InstallSnapshotRequestProto request, RaftPeerId leaderId) throws IOException { final long currentTerm; final long leaderTerm = request.getLeaderTerm(); final InstallSnapshotRequestProto.SnapshotChunkProto snapshotChunkRequest = request.getSnapshotChunk(); final TermIndex lastIncluded = TermIndex.valueOf(snapshotChunkRequest.getTermIndex()); final long lastIncludedIndex = lastIncluded.getIndex(); + final CompletableFuture future; synchronized (server) { - final boolean recognized = state.recognizeLeader(leaderId, leaderTerm); + final boolean recognized = state.recognizeLeader(RaftServerProtocol.Op.INSTALL_SNAPSHOT, leaderId, leaderTerm); currentTerm = state.getCurrentTerm(); if (!recognized) { - final InstallSnapshotReplyProto reply = ServerProtoUtils.toInstallSnapshotReplyProto(leaderId, getMemberId(), - currentTerm, snapshotChunkRequest.getRequestIndex(), InstallSnapshotResult.NOT_LEADER); - LOG.warn("{}: Failed to recognize leader for installSnapshot chunk.", getMemberId()); - return reply; + return CompletableFuture.completedFuture(toInstallSnapshotReplyProto(leaderId, getMemberId(), + currentTerm, snapshotChunkRequest.getRequestIndex(), InstallSnapshotResult.NOT_LEADER)); } - server.changeToFollowerAndPersistMetadata(leaderTerm, true, "installSnapshot"); + future = server.changeToFollowerAndPersistMetadata(leaderTerm, true, "installSnapshot"); state.setLeader(leaderId, "installSnapshot"); server.updateLastRpcTime(FollowerState.UpdateType.INSTALL_SNAPSHOT_START); + long callId = chunk0CallId.get(); + // 1. leaderTerm < currentTerm will never come here + // 2. leaderTerm == currentTerm && callId == request.getCallId() + // means the snapshotRequest is staled with the same leader + // 3. leaderTerm > currentTerm means this is a new snapshot request from a new leader, + // chunk0CallId will be reset when a snapshot request with requestIndex == 0 is received . + if (callId > request.getServerRequest().getCallId() && currentTerm == leaderTerm) { + LOG.warn("{}: Snapshot Request Staled: chunk 0 callId is {} but {}", getMemberId(), callId, + ServerStringUtils.toInstallSnapshotRequestString(request)); + InstallSnapshotReplyProto reply = toInstallSnapshotReplyProto(leaderId, getMemberId(), + currentTerm, snapshotChunkRequest.getRequestIndex(), InstallSnapshotResult.SNAPSHOT_EXPIRED); + return future.thenApply(dummy -> reply); + } + if (snapshotChunkRequest.getRequestIndex() == 0) { + nextChunkIndex.set(0); + chunk0CallId.set(request.getServerRequest().getCallId()); + } else if (nextChunkIndex.get() != snapshotChunkRequest.getRequestIndex()) { + throw new IOException("Snapshot request already failed at chunk index " + nextChunkIndex.get() + + "; ignoring request with chunk index " + snapshotChunkRequest.getRequestIndex()); + } try { // Check and append the snapshot chunk. We simply put this in lock // considering a follower peer requiring a snapshot installation does not // have a lot of requests - Preconditions.assertTrue(state.getLog().getLastCommittedIndex() < lastIncludedIndex, - "%s log's commit index is %s, last included index in snapshot is %s", - getMemberId(), state.getLog().getLastCommittedIndex(), lastIncludedIndex); + if (state.getLog().getLastCommittedIndex() >= lastIncludedIndex) { + nextChunkIndex.set(snapshotChunkRequest.getRequestIndex() + 1); + final InstallSnapshotReplyProto reply = toInstallSnapshotReplyProto(leaderId, getMemberId(), + currentTerm, snapshotChunkRequest.getRequestIndex(), InstallSnapshotResult.ALREADY_INSTALLED); + return future.thenApply(dummy -> reply); + } //TODO: We should only update State with installed snapshot once the request is done. state.installSnapshot(request); + final int expectedChunkIndex = nextChunkIndex.getAndIncrement(); + if (expectedChunkIndex != snapshotChunkRequest.getRequestIndex()) { + throw new IOException("Unexpected request chunk index: " + snapshotChunkRequest.getRequestIndex() + + " (the expected index is " + expectedChunkIndex + ")"); + } // update the committed index // re-load the state machine if this is the last chunk if (snapshotChunkRequest.getDone()) { state.reloadStateMachine(lastIncluded); + chunk0CallId.set(-1); } } finally { server.updateLastRpcTime(FollowerState.UpdateType.INSTALL_SNAPSHOT_COMPLETE); @@ -193,27 +234,27 @@ private InstallSnapshotReplyProto checkAndInstallSnapshot(InstallSnapshotRequest if (snapshotChunkRequest.getDone()) { LOG.info("{}: successfully install the entire snapshot-{}", getMemberId(), lastIncludedIndex); } - return ServerProtoUtils.toInstallSnapshotReplyProto(leaderId, getMemberId(), + final InstallSnapshotReplyProto reply = toInstallSnapshotReplyProto(leaderId, getMemberId(), currentTerm, snapshotChunkRequest.getRequestIndex(), InstallSnapshotResult.SUCCESS); + return future.thenApply(dummy -> reply); } - private InstallSnapshotReplyProto notifyStateMachineToInstallSnapshot( + private CompletableFuture notifyStateMachineToInstallSnapshot( InstallSnapshotRequestProto request, RaftPeerId leaderId) throws IOException { final long currentTerm; final long leaderTerm = request.getLeaderTerm(); final TermIndex firstAvailableLogTermIndex = TermIndex.valueOf( request.getNotification().getFirstAvailableTermIndex()); final long firstAvailableLogIndex = firstAvailableLogTermIndex.getIndex(); + final CompletableFuture future; synchronized (server) { - final boolean recognized = state.recognizeLeader(leaderId, leaderTerm); + final boolean recognized = state.recognizeLeader("notifyInstallSnapshot", leaderId, leaderTerm); currentTerm = state.getCurrentTerm(); if (!recognized) { - final InstallSnapshotReplyProto reply = ServerProtoUtils.toInstallSnapshotReplyProto(leaderId, getMemberId(), - currentTerm, InstallSnapshotResult.NOT_LEADER); - LOG.warn("{}: Failed to recognize leader for installSnapshot notification.", getMemberId()); - return reply; + return CompletableFuture.completedFuture(toInstallSnapshotReplyProto(leaderId, getMemberId(), + currentTerm, InstallSnapshotResult.NOT_LEADER)); } - server.changeToFollowerAndPersistMetadata(leaderTerm, true, "installSnapshot"); + future = server.changeToFollowerAndPersistMetadata(leaderTerm, true, "installSnapshot"); state.setLeader(leaderId, "installSnapshot"); server.updateLastRpcTime(FollowerState.UpdateType.INSTALL_SNAPSHOT_NOTIFICATION); @@ -222,15 +263,17 @@ private InstallSnapshotReplyProto notifyStateMachineToInstallSnapshot( // Check if snapshot index is already at par or ahead of the first // available log index of the Leader. final long snapshotIndex = state.getLog().getSnapshotIndex(); - if (snapshotIndex + 1 >= firstAvailableLogIndex && firstAvailableLogIndex > INVALID_LOG_INDEX) { + if (snapshotIndex != INVALID_LOG_INDEX && snapshotIndex + 1 >= firstAvailableLogIndex && + firstAvailableLogIndex > INVALID_LOG_INDEX) { // State Machine has already installed the snapshot. Return the // latest snapshot index to the Leader. inProgressInstallSnapshotIndex.compareAndSet(firstAvailableLogIndex, INVALID_LOG_INDEX); LOG.info("{}: InstallSnapshot notification result: {}, current snapshot index: {}", getMemberId(), InstallSnapshotResult.ALREADY_INSTALLED, snapshotIndex); - return ServerProtoUtils.toInstallSnapshotReplyProto(leaderId, getMemberId(), currentTerm, + final InstallSnapshotReplyProto reply = toInstallSnapshotReplyProto(leaderId, getMemberId(), currentTerm, InstallSnapshotResult.ALREADY_INSTALLED, snapshotIndex); + return future.thenApply(dummy -> reply); } final RaftPeerProto leaderProto; @@ -307,8 +350,9 @@ private InstallSnapshotReplyProto notifyStateMachineToInstallSnapshot( inProgressInstallSnapshotIndex.set(INVALID_LOG_INDEX); server.getStateMachine().event().notifySnapshotInstalled( InstallSnapshotResult.SNAPSHOT_UNAVAILABLE, INVALID_LOG_INDEX, server.getPeer()); - return ServerProtoUtils.toInstallSnapshotReplyProto(leaderId, getMemberId(), + final InstallSnapshotReplyProto reply = toInstallSnapshotReplyProto(leaderId, getMemberId(), currentTerm, InstallSnapshotResult.SNAPSHOT_UNAVAILABLE); + return future.thenApply(dummy -> reply); } // If a snapshot has been installed, return SNAPSHOT_INSTALLED with the installed snapshot index and reset @@ -325,8 +369,9 @@ private InstallSnapshotReplyProto notifyStateMachineToInstallSnapshot( server.getStateMachine().event().notifySnapshotInstalled( InstallSnapshotResult.SNAPSHOT_INSTALLED, latestInstalledIndex, server.getPeer()); installedIndex.set(latestInstalledIndex); - return ServerProtoUtils.toInstallSnapshotReplyProto(leaderId, getMemberId(), + final InstallSnapshotReplyProto reply = toInstallSnapshotReplyProto(leaderId, getMemberId(), currentTerm, InstallSnapshotResult.SNAPSHOT_INSTALLED, latestInstalledSnapshotTermIndex.getIndex()); + return future.thenApply(dummy -> reply); } // Otherwise, Snapshot installation is in progress. @@ -334,15 +379,16 @@ private InstallSnapshotReplyProto notifyStateMachineToInstallSnapshot( LOG.debug("{}: InstallSnapshot notification result: {}", getMemberId(), InstallSnapshotResult.IN_PROGRESS); } - return ServerProtoUtils.toInstallSnapshotReplyProto(leaderId, getMemberId(), + final InstallSnapshotReplyProto reply = toInstallSnapshotReplyProto(leaderId, getMemberId(), currentTerm, InstallSnapshotResult.IN_PROGRESS); + return future.thenApply(dummy -> reply); } } private RoleInfoProto getRoleInfoProto(RaftPeer leader) { final RoleInfo role = server.getRole(); final Optional fs = role.getFollowerState(); - final ServerRpcProto leaderInfo = ServerProtoUtils.toServerRpcProto(leader, + final ServerRpcProto leaderInfo = toServerRpcProto(leader, fs.map(FollowerState::getLastRpcTime).map(Timestamp::elapsedTimeMs).orElse(0L)); final FollowerInfoProto.Builder followerInfo = FollowerInfoProto.newBuilder() .setLeaderInfo(leaderInfo) diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/StateMachineUpdater.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/StateMachineUpdater.java index 43fbdd8843..9c5290efe4 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/StateMachineUpdater.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/StateMachineUpdater.java @@ -29,6 +29,7 @@ import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.server.raftlog.RaftLogIOException; import org.apache.ratis.server.raftlog.RaftLogIndex; +import org.apache.ratis.server.util.ServerStringUtils; import org.apache.ratis.statemachine.SnapshotInfo; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.SnapshotRetentionPolicy; @@ -37,8 +38,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Objects; import java.util.Optional; import java.util.concurrent.CompletableFuture; @@ -74,6 +73,8 @@ enum State { private final boolean triggerSnapshotWhenStopEnabled; + private final boolean triggerSnapshotWhenRemoveEnabled; + private final Long autoSnapshotThreshold; private final boolean purgeUptoSnapshotIndex; @@ -91,9 +92,11 @@ enum State { private final Consumer appliedIndexConsumer; + private volatile boolean isRemoving; + StateMachineUpdater(StateMachine stateMachine, RaftServerImpl server, ServerState serverState, long lastAppliedIndex, RaftProperties properties, Consumer appliedIndexConsumer) { - this.name = serverState.getMemberId() + "-" + JavaUtils.getClassSimpleName(getClass()); + this.name = ServerStringUtils.generateUnifiedName(serverState.getMemberId(), getClass()); this.appliedIndexConsumer = appliedIndexConsumer; this.infoIndexChange = s -> LOG.info("{}: {}", name, s); this.debugIndexChange = s -> LOG.debug("{}: {}", name, s); @@ -106,12 +109,14 @@ enum State { this.snapshotIndex = new RaftLogIndex("snapshotIndex", lastAppliedIndex); this.triggerSnapshotWhenStopEnabled = RaftServerConfigKeys.Snapshot.triggerWhenStopEnabled(properties); + this.triggerSnapshotWhenRemoveEnabled = RaftServerConfigKeys.Snapshot.triggerWhenRemoveEnabled(properties); final boolean autoSnapshot = RaftServerConfigKeys.Snapshot.autoTriggerEnabled(properties); this.autoSnapshotThreshold = autoSnapshot? RaftServerConfigKeys.Snapshot.autoTriggerThreshold(properties): null; final int numSnapshotFilesRetained = RaftServerConfigKeys.Snapshot.retentionFileNum(properties); this.snapshotRetentionPolicy = new SnapshotRetentionPolicy() { @Override - public int getNumSnapshotsRetained() { + @SuppressWarnings({"deprecation", "try"}) +public int getNumSnapshotsRetained() { return numSnapshotFilesRetained; } }; @@ -133,6 +138,8 @@ void start() { private void stop() { state = State.STOP; try { + LOG.info("{}: closing {}, lastApplied={}", name, + JavaUtils.getClassSimpleName(stateMachine.getClass()), stateMachine.getLastAppliedTermIndex()); stateMachine.close(); if (stateMachineMetrics.isInitialized()) { stateMachineMetrics.get().unregister(); @@ -175,23 +182,25 @@ public String toString() { @Override public void run() { + CompletableFuture applyLogFutures = CompletableFuture.completedFuture(null); for(; state != State.STOP; ) { try { - waitForCommit(); + waitForCommit(applyLogFutures); if (state == State.RELOAD) { reload(); } - final MemoizedSupplier>> futures = applyLog(); - checkAndTakeSnapshot(futures); + applyLogFutures = applyLog(applyLogFutures); + checkAndTakeSnapshot(applyLogFutures); if (shouldStop()) { - checkAndTakeSnapshot(futures); + applyLogFutures.get(); stop(); } } catch (Throwable t) { if (t instanceof InterruptedException && state == State.STOP) { + Thread.currentThread().interrupt(); LOG.info("{} was interrupted. Exiting ...", this); } else { state = State.EXCEPTION; @@ -202,12 +211,15 @@ public void run() { } } - private void waitForCommit() throws InterruptedException { + private void waitForCommit(CompletableFuture applyLogFutures) throws InterruptedException, ExecutionException { // When a peer starts, the committed is initialized to 0. // It will be updated only after the leader contacts other peers. // Thus it is possible to have applied > committed initially. final long applied = getLastAppliedIndex(); for(; applied >= raftLog.getLastCommittedIndex() && state == State.RUNNING && !shouldStop(); ) { + if (server.getSnapshotRequestHandler().shouldTriggerTakingSnapshot()) { + takeSnapshot(applyLogFutures); + } if (awaitForSignal.await(100, TimeUnit.MILLISECONDS)) { return; } @@ -228,15 +240,21 @@ private void reload() throws IOException { state = State.RUNNING; } - private MemoizedSupplier>> applyLog() throws RaftLogIOException { - final MemoizedSupplier>> futures = MemoizedSupplier.valueOf(ArrayList::new); + private CompletableFuture applyLog(CompletableFuture applyLogFutures) throws RaftLogIOException { final long committed = raftLog.getLastCommittedIndex(); for(long applied; (applied = getLastAppliedIndex()) < committed && state == State.RUNNING && !shouldStop(); ) { final long nextIndex = applied + 1; - final LogEntryProto next = raftLog.get(nextIndex); - if (next != null) { + final ReferenceCountedObject next = raftLog.retainLog(nextIndex); + if (next == null) { + LOG.debug("{}: logEntry {} is null. There may be snapshot to load. state:{}", + this, nextIndex, state); + break; + } + + try { + final LogEntryProto entry = next.get(); if (LOG.isTraceEnabled()) { - LOG.trace("{}: applying nextIndex={}, nextLog={}", this, nextIndex, LogProtoUtils.toLogEntryString(next)); + LOG.trace("{}: applying nextIndex={}, nextLog={}", this, nextIndex, LogProtoUtils.toLogEntryString(entry)); } else { LOG.debug("{}: applying nextIndex={}", this, nextIndex); } @@ -245,34 +263,35 @@ private MemoizedSupplier>> applyLog() throws Raf final long incremented = appliedIndex.incrementAndGet(debugIndexChange); Preconditions.assertTrue(incremented == nextIndex); if (f != null) { - futures.get().add(f); + CompletableFuture exceptionHandledFuture = f.exceptionally(ex -> { + LOG.error("Exception while {}: applying txn index={}, nextLog={}", this, nextIndex, + LogProtoUtils.toLogEntryString(entry), ex); + return null; + }); + applyLogFutures = applyLogFutures.thenCombine(exceptionHandledFuture, (v, message) -> null); f.thenAccept(m -> notifyAppliedIndex(incremented)); } else { notifyAppliedIndex(incremented); } - } else { - LOG.debug("{}: logEntry {} is null. There may be snapshot to load. state:{}", - this, nextIndex, state); - break; + } finally { + next.release(); } } - return futures; + return applyLogFutures; } - private void checkAndTakeSnapshot(MemoizedSupplier>> futures) + private void checkAndTakeSnapshot(CompletableFuture futures) throws ExecutionException, InterruptedException { // check if need to trigger a snapshot if (shouldTakeSnapshot()) { - if (futures.isInitialized()) { - JavaUtils.allOf(futures.get()).get(); - } - - takeSnapshot(); + takeSnapshot(futures); } } - private void takeSnapshot() { + @SuppressWarnings("try") + private void takeSnapshot(CompletableFuture applyLogFutures) throws ExecutionException, InterruptedException { final long i; + applyLogFutures.get(); try { try(UncheckedAutoCloseable ignored = Timekeeper.start(stateMachineMetrics.get().getTakeSnapshotTimer())) { i = stateMachine.takeSnapshot(); @@ -320,12 +339,33 @@ private boolean shouldTakeSnapshot() { if (autoSnapshotThreshold == null) { return false; } else if (shouldStop()) { - return triggerSnapshotWhenStopEnabled && getLastAppliedIndex() - snapshotIndex.get() > 0; + return shouldTakeSnapshotAtStop() && getLastAppliedIndex() - snapshotIndex.get() > 0; } return state == State.RUNNING && getStateMachineLastAppliedIndex() - snapshotIndex.get() >= autoSnapshotThreshold; } + /** + * In view of the three variables triggerSnapshotWhenStopEnabled, triggerSnapshotWhenRemoveEnabled and isRemoving, + * we can draw the following 8 combination: + * true true true => true + * true true false => true + * true false true => false + * true false false => true + * false true true => true + * false true false => false + * false false true => false + * false false false => false + * @return result + */ + private boolean shouldTakeSnapshotAtStop() { + return isRemoving ? triggerSnapshotWhenRemoveEnabled : triggerSnapshotWhenStopEnabled; + } + + void setRemoving() { + this.isRemoving = true; + } + private long getLastAppliedIndex() { return appliedIndex.get(); } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/TransactionManager.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/TransactionManager.java index c33bc26bce..cba310b34d 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/TransactionManager.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/TransactionManager.java @@ -71,12 +71,6 @@ void remove(TermIndex termIndex) { @Override public String toString() { - if (contexts.isEmpty()) { - return name + " "; - } - - final StringBuilder b = new StringBuilder(name); - contexts.forEach((k, v) -> b.append("\n ").append(k).append(": initialized? ").append(v.isInitialized())); - return b.toString(); + return name + ":size=" + contexts.size(); } } \ No newline at end of file diff --git a/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderBase.java b/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderBase.java index 958cc6fa81..d7e82f4a3f 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderBase.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderBase.java @@ -33,27 +33,109 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; +import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.LongUnaryOperator; /** * An abstract implementation of {@link LogAppender}. */ +@SuppressWarnings({"deprecation", "try"}) public abstract class LogAppenderBase implements LogAppender { + /** For buffering log entries to create an {@link EntryList}. */ + private static class EntryBuffer { + /** A queue for limiting the byte size, number of elements and poll time. */ + private final DataQueue queue; + /** A map for releasing {@link ReferenceCountedObject}s. */ + private final Map> references = new HashMap<>(); + + EntryBuffer(Object name, RaftProperties properties) { + final SizeInBytes bufferByteLimit = RaftServerConfigKeys.Log.Appender.bufferByteLimit(properties); + final int bufferElementLimit = RaftServerConfigKeys.Log.Appender.bufferElementLimit(properties); + this.queue = new DataQueue<>(name, bufferByteLimit, bufferElementLimit, EntryWithData::getSerializedSize); + } + + boolean putNew(long index, ReferenceCountedObject retained) { + if (!queue.offer(retained.get())) { + retained.release(); + return false; + } + final ReferenceCountedObject previous = references.put(index, retained); + Preconditions.assertNull(previous, () -> "previous with index " + index); + return true; + } + + void releaseAllAndClear() { + for (ReferenceCountedObject ref : references.values()) { + ref.release(); + } + references.clear(); + queue.clear(); + } + + EntryList pollList(long heartbeatWaitTimeMs) throws RaftLogIOException { + final List protos; + try { + protos = queue.pollList(heartbeatWaitTimeMs, EntryWithData::getEntry, null); + } catch (Exception e) { + releaseAllAndClear(); + throw e; + } finally { + for (EntryWithData entry : queue) { + // Remove and release remaining entries. + final ReferenceCountedObject removed = references.remove(entry.getIndex()); + Objects.requireNonNull(removed, "removed == null"); + removed.release(); + } + queue.clear(); + } + return new EntryList(protos, references); + } + } + + /** Storing log entries and their references. */ + private static class EntryList { + private final List protos; + private final Collection> references; + + EntryList(List protos, Map> references) { + Preconditions.assertSame(references.size(), protos.size(), "#entries"); + this.protos = Collections.unmodifiableList(protos); + this.references = Collections.unmodifiableCollection(references.values()); + } + + List getProtos() { + return protos; + } + + void retain() { + for (ReferenceCountedObject ref : references) { + ref.retain(); + } + } + + void release() { + for (ReferenceCountedObject ref : references) { + ref.release(); + } + } + } + private final String name; private final RaftServer.Division server; private final LeaderState leaderState; private final FollowerInfo follower; - private final DataQueue buffer; private final int snapshotChunkMaxSize; private final LogAppenderDaemon daemon; @@ -71,9 +153,6 @@ protected LogAppenderBase(RaftServer.Division server, LeaderState leaderState, F final RaftProperties properties = server.getRaftServer().getProperties(); this.snapshotChunkMaxSize = RaftServerConfigKeys.Log.Appender.snapshotChunkSizeMax(properties).getSizeInt(); - final SizeInBytes bufferByteLimit = RaftServerConfigKeys.Log.Appender.bufferByteLimit(properties); - final int bufferElementLimit = RaftServerConfigKeys.Log.Appender.bufferElementLimit(properties); - this.buffer = new DataQueue<>(this, bufferByteLimit, bufferElementLimit, EntryWithData::getSerializedSize); this.daemon = new LogAppenderDaemon(this); this.eventAwaitForSignal = new AwaitForSignal(name); @@ -124,7 +203,7 @@ public void start() { @Override public boolean isRunning() { - return daemon.isWorking(); + return daemon.isWorking() && server.getInfo().isLeader(); } @Override @@ -206,51 +285,66 @@ protected LongUnaryOperator getNextIndexForError(long newNextIndex) { final long n = oldNextIndex <= 0L ? oldNextIndex : Math.min(oldNextIndex - 1, newNextIndex); if (m > n) { if (m > newNextIndex) { - LOG.info("Set nextIndex to matchIndex + 1 (= " + m + ")"); + LOG.info("{}: Set nextIndex to matchIndex + 1 (= {})", name, m); } return m; } else if (oldNextIndex <= 0L) { return oldNextIndex; // no change. } else { - LOG.info("Decrease nextIndex to " + n); + LOG.info("{}: Decrease nextIndex to {}", name, n); return n; } }; } - @Override - public AppendEntriesRequestProto newAppendEntriesRequest(long callId, boolean heartbeat) + public AppendEntriesRequestProto newAppendEntriesRequest(long callId, boolean heartbeat) { + throw new UnsupportedOperationException("Use nextAppendEntriesRequest(" + callId + ", " + heartbeat +") instead."); + } + + /** + * Create a {@link AppendEntriesRequestProto} object using the {@link FollowerInfo} of this {@link LogAppender}. + * The {@link AppendEntriesRequestProto} object may contain zero or more log entries. + * When there is zero log entries, the {@link AppendEntriesRequestProto} object is a heartbeat. + * + * @param callId The call id of the returned request. + * @param heartbeat the returned request must be a heartbeat. + * + * @return a retained reference of {@link AppendEntriesRequestProto} object. + * Since the returned reference is retained, + * the caller must call {@link ReferenceCountedObject#release()}} after use. + */ + protected ReferenceCountedObject nextAppendEntriesRequest(long callId, boolean heartbeat) throws RaftLogIOException { final long heartbeatWaitTimeMs = getHeartbeatWaitTimeMs(); final TermIndex previous = getPrevious(follower.getNextIndex()); if (heartbeatWaitTimeMs <= 0L || heartbeat) { // heartbeat - return leaderState.newAppendEntriesRequestProto(follower, Collections.emptyList(), - hasPendingDataRequests()? null : previous, callId); + AppendEntriesRequestProto heartbeatRequest = + leaderState.newAppendEntriesRequestProto(follower, Collections.emptyList(), + hasPendingDataRequests() ? null : previous, callId); + ReferenceCountedObject ref = ReferenceCountedObject.wrap(heartbeatRequest); + ref.retain(); + return ref; } - Preconditions.assertTrue(buffer.isEmpty(), () -> "buffer has " + buffer.getNumElements() + " elements."); - final long snapshotIndex = follower.getSnapshotIndex(); - final long leaderNext = getRaftLog().getNextIndex(); final long followerNext = follower.getNextIndex(); - final long halfMs = heartbeatWaitTimeMs/2; - for (long next = followerNext; leaderNext > next && getHeartbeatWaitTimeMs() - halfMs > 0; ) { - if (!buffer.offer(getRaftLog().getEntryWithData(next++))) { - break; - } - } - if (buffer.isEmpty()) { + final EntryBuffer entryBuffer = readLogEntries(followerNext, heartbeatWaitTimeMs); + if (entryBuffer == null) { return null; } - final List protos = buffer.pollList(getHeartbeatWaitTimeMs(), EntryWithData::getEntry, - (entry, time, exception) -> LOG.warn("Failed to get " + entry - + " in " + time.toString(TimeUnit.MILLISECONDS, 3), exception)); - buffer.clear(); + final EntryList entryList = entryBuffer.pollList(heartbeatWaitTimeMs); + final List protos = entryList.getProtos(); assertProtos(protos, followerNext, previous, snapshotIndex); - return leaderState.newAppendEntriesRequestProto(follower, protos, previous, callId); + AppendEntriesRequestProto appendEntriesProto = + leaderState.newAppendEntriesRequestProto(follower, protos, previous, callId); + final ReferenceCountedObject ref = ReferenceCountedObject.wrap( + appendEntriesProto, entryList::retain, entryList::release); + ref.retain(); + entryList.release(); + return ref; } private void assertProtos(List protos, long nextIndex, TermIndex previous, long snapshotIndex) { @@ -272,6 +366,31 @@ private void assertProtos(List protos, long nextIndex, TermIndex } } + private EntryBuffer readLogEntries(long followerNext, long heartbeatWaitTimeMs) throws RaftLogIOException { + final RaftLog raftLog = getRaftLog(); + final long leaderNext = raftLog.getNextIndex(); + final long halfMs = heartbeatWaitTimeMs/2; + EntryBuffer entryBuffer = null; + for (long next = followerNext; leaderNext > next && getHeartbeatWaitTimeMs() - halfMs > 0; next++) { + final ReferenceCountedObject retained; + try { + retained = raftLog.retainEntryWithData(next); + if (entryBuffer == null) { + entryBuffer = new EntryBuffer(name, server.getRaftServer().getProperties()); + } + if (!entryBuffer.putNew(next, retained)) { + break; + } + } catch (Exception e) { + if (entryBuffer != null) { + entryBuffer.releaseAllAndClear(); + } + throw e; + } + } + return entryBuffer; + } + @Override public InstallSnapshotRequestProto newInstallSnapshotNotificationRequest(TermIndex firstAvailableLogTermIndex) { Preconditions.assertTrue(firstAvailableLogTermIndex.getIndex() >= 0); diff --git a/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderDaemon.java b/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderDaemon.java index 847617426f..5de3f3b4da 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderDaemon.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderDaemon.java @@ -108,8 +108,11 @@ private void run() { }; public CompletableFuture tryToClose() { - if (lifeCycle.transition(TRY_TO_CLOSE) == CLOSING) { + final State state = lifeCycle.transition(TRY_TO_CLOSE); + if (state == CLOSING) { daemon.interrupt(); + } else if (state == CLOSED) { + closeFuture.complete(state); } return closeFuture; } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderDefault.java b/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderDefault.java index 21ef70d4df..8c1675c7c3 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderDefault.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/leader/LogAppenderDefault.java @@ -23,9 +23,11 @@ import org.apache.ratis.proto.RaftProtos.InstallSnapshotRequestProto; import org.apache.ratis.rpc.CallId; import org.apache.ratis.server.RaftServer; +import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.server.raftlog.RaftLogIOException; import org.apache.ratis.server.util.ServerStringUtils; import org.apache.ratis.statemachine.SnapshotInfo; +import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.Timestamp; import java.io.IOException; @@ -33,6 +35,7 @@ import java.util.Comparator; import java.util.UUID; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; /** * The default implementation of {@link LogAppender} @@ -54,43 +57,38 @@ public Comparator getCallIdComparator() { } /** Send an appendEntries RPC; retry indefinitely. */ - private AppendEntriesReplyProto sendAppendEntriesWithRetries() + private AppendEntriesReplyProto sendAppendEntriesWithRetries(AtomicLong requestFirstIndex) throws InterruptedException, InterruptedIOException, RaftLogIOException { - int retry = 0; - - AppendEntriesRequestProto request = newAppendEntriesRequest(CallId.getAndIncrement(), false); - while (isRunning()) { // keep retrying for IOException + for(int retry = 0; isRunning(); retry++) { + final ReferenceCountedObject request = nextAppendEntriesRequest( + CallId.getAndIncrement(), false); + if (request == null) { + LOG.trace("{} no entries to send now, wait ...", this); + return null; + } try { - if (request == null || request.getEntriesCount() == 0) { - request = newAppendEntriesRequest(CallId.getAndIncrement(), false); - } - - if (request == null) { - LOG.trace("{} no entries to send now, wait ...", this); - return null; - } else if (!isRunning()) { + if (!isRunning()) { LOG.info("{} is stopped. Skip appendEntries.", this); return null; } - resetHeartbeatTrigger(); - final Timestamp sendTime = Timestamp.currentTime(); - getFollower().updateLastRpcSendTime(request.getEntriesCount() == 0); - final AppendEntriesReplyProto r = getServerRpc().appendEntries(request); - getFollower().updateLastRpcResponseTime(); - getFollower().updateLastRespondedAppendEntriesSendTime(sendTime); - - getLeaderState().onFollowerCommitIndex(getFollower(), r.getFollowerCommit()); - return r; + final AppendEntriesRequestProto proto = request.get(); + final AppendEntriesReplyProto reply = sendAppendEntries(proto); + final long first = proto.getEntriesCount() > 0 ? proto.getEntries(0).getIndex() : RaftLog.INVALID_LOG_INDEX; + requestFirstIndex.set(first); + return reply; } catch (InterruptedIOException | RaftLogIOException e) { throw e; } catch (IOException ioe) { // TODO should have more detailed retry policy here. - if (retry++ % 10 == 0) { // to reduce the number of messages + if (retry % 10 == 0) { // to reduce the number of messages LOG.warn("{}: Failed to appendEntries (retry={})", this, retry, ioe); } handleException(ioe); + } finally { + request.release(); } + if (isRunning()) { getServer().properties().rpcSleepTime().sleep(); } @@ -98,6 +96,18 @@ private AppendEntriesReplyProto sendAppendEntriesWithRetries() return null; } + private AppendEntriesReplyProto sendAppendEntries(AppendEntriesRequestProto request) throws IOException { + resetHeartbeatTrigger(); + final Timestamp sendTime = Timestamp.currentTime(); + getFollower().updateLastRpcSendTime(request.getEntriesCount() == 0); + final AppendEntriesReplyProto r = getServerRpc().appendEntries(request); + getFollower().updateLastRpcResponseTime(); + getFollower().updateLastRespondedAppendEntriesSendTime(sendTime); + + getLeaderState().onFollowerCommitIndex(getFollower(), r.getFollowerCommit()); + return r; + } + private InstallSnapshotReplyProto installSnapshot(SnapshotInfo snapshot) throws InterruptedIOException { String requestId = UUID.randomUUID().toString(); InstallSnapshotReplyProto reply = null; @@ -145,6 +155,7 @@ public void run() throws InterruptedException, IOException { case SUCCESS: case SNAPSHOT_UNAVAILABLE: case ALREADY_INSTALLED: + case SNAPSHOT_EXPIRED: getFollower().setAttemptedToInstallSnapshot(); break; default: @@ -153,9 +164,10 @@ public void run() throws InterruptedException, IOException { } // otherwise if r is null, retry the snapshot installation } else { - final AppendEntriesReplyProto r = sendAppendEntriesWithRetries(); + final AtomicLong requestFirstIndex = new AtomicLong(RaftLog.INVALID_LOG_INDEX); + final AppendEntriesReplyProto r = sendAppendEntriesWithRetries(requestFirstIndex); if (r != null) { - handleReply(r); + handleReply(r, requestFirstIndex.get()); } } } @@ -166,7 +178,8 @@ public void run() throws InterruptedException, IOException { } } - private void handleReply(AppendEntriesReplyProto reply) throws IllegalArgumentException { + private void handleReply(AppendEntriesReplyProto reply, long requestFirstIndex) + throws IllegalArgumentException { if (reply != null) { switch (reply.getResult()) { case SUCCESS: @@ -189,7 +202,7 @@ private void handleReply(AppendEntriesReplyProto reply) throws IllegalArgumentEx onFollowerTerm(reply.getTerm()); break; case INCONSISTENCY: - getFollower().decreaseNextIndex(reply.getNextIndex()); + getFollower().setNextIndex(getNextIndexForInconsistency(requestFirstIndex, reply.getNextIndex())); break; case UNRECOGNIZED: LOG.warn("{}: received {}", this, reply.getResult()); diff --git a/ratis-server/src/main/java/org/apache/ratis/server/metrics/LeaderElectionMetrics.java b/ratis-server/src/main/java/org/apache/ratis/server/metrics/LeaderElectionMetrics.java index 7447498d42..1d044bb6fe 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/metrics/LeaderElectionMetrics.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/metrics/LeaderElectionMetrics.java @@ -51,6 +51,7 @@ public final class LeaderElectionMetrics extends RatisMetrics { private final Timekeeper electionTime = getRegistry().timer(LEADER_ELECTION_TIME_TAKEN); + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile Timestamp lastElectionTime; private LeaderElectionMetrics(RaftGroupMemberId serverId, LongSupplier getLastLeaderElapsedTimeMs) { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/LogProtoUtils.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/LogProtoUtils.java index de06faf63e..e969eaa484 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/LogProtoUtils.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/LogProtoUtils.java @@ -19,7 +19,6 @@ import org.apache.ratis.proto.RaftProtos.*; import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.ClientInvocationId; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.server.RaftConfiguration; @@ -27,6 +26,7 @@ import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.thirdparty.com.google.protobuf.AbstractMessage; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.ProtoUtils; @@ -45,9 +45,10 @@ public static String toLogEntryString(LogEntryProto entry, Function proto -> "" + ClientInvocationId.valueOf(proto)) - .apply(entry.getStateMachineLogEntry()); + if (function == null) { + function = LogProtoUtils::stateMachineLogEntryProtoToString; + } + s = ", " + function.apply(entry.getStateMachineLogEntry()); } else if (entry.hasMetadataEntry()) { final MetadataProto metadata = entry.getMetadataEntry(); s = "(c:" + metadata.getCommitIndex() + ")"; @@ -69,7 +70,11 @@ static String peersToString(List peers) { } static String stateMachineLogEntryProtoToString(StateMachineLogEntryProto p) { - return "logData:" + p.getLogData() + ", stateMachineEntry:" + p.getType() + ":" + p.getStateMachineEntry(); + final StateMachineEntryProto stateMachineEntry = p.getStateMachineEntry(); + return p.getType() + + ": logData.size=" + p.getLogData().size() + + ", stateMachineData.size=" + stateMachineEntry.getStateMachineData().size() + + ", logEntryProtoSerializedSize=" + stateMachineEntry.getLogEntryProtoSerializedSize(); } public static String toLogEntryString(LogEntryProto entry) { @@ -81,10 +86,11 @@ public static String toLogEntriesString(List entries) { : entries.stream().map(LogProtoUtils::toLogEntryString).collect(Collectors.toList()).toString(); } - public static String toLogEntriesShortString(List entries) { + public static String toLogEntriesShortString(List entries, + Function stateMachineToString) { return entries == null ? null : entries.isEmpty()? "" - : "size=" + entries.size() + ", first=" + LogProtoUtils.toLogEntryString(entries.get(0)); + : "size=" + entries.size() + ", first=" + toLogEntryString(entries.get(0), stateMachineToString); } public static LogEntryProto toLogEntryProto(RaftConfiguration conf, Long term, long index) { @@ -136,8 +142,9 @@ public static LogEntryProto removeStateMachineData(LogEntryProto entry) { } private static LogEntryProto replaceStateMachineDataWithSerializedSize(LogEntryProto entry) { - return replaceStateMachineEntry(entry, + LogEntryProto replaced = replaceStateMachineEntry(entry, StateMachineEntryProto.newBuilder().setLogEntryProtoSerializedSize(entry.getSerializedSize())); + return copy(replaced); } private static LogEntryProto replaceStateMachineEntry(LogEntryProto proto, StateMachineEntryProto.Builder newEntry) { @@ -159,6 +166,13 @@ static LogEntryProto addStateMachineData(ByteString stateMachineData, LogEntryPr return replaceStateMachineEntry(entry, StateMachineEntryProto.newBuilder().setStateMachineData(stateMachineData)); } + public static boolean hasStateMachineData(LogEntryProto entry) { + return getStateMachineEntry(entry) + .map(StateMachineEntryProto::getStateMachineData) + .map(data -> !data.isEmpty()) + .orElse(false); + } + public static boolean isStateMachineDataEmpty(LogEntryProto entry) { return getStateMachineEntry(entry) .map(StateMachineEntryProto::getStateMachineData) @@ -221,4 +235,21 @@ public static RaftConfiguration toRaftConfiguration(LogEntryProto entry) { final List oldListener = ProtoUtils.toRaftPeers(proto.getOldListenersList()); return ServerImplUtils.newRaftConfiguration(conf, listener, entry.getIndex(), oldConf, oldListener); } + + public static LogEntryProto copy(LogEntryProto proto) { + if (proto == null) { + return null; + } + + if (!proto.hasStateMachineLogEntry() && !proto.hasMetadataEntry() && !proto.hasConfigurationEntry()) { + // empty entry, just return as is. + return proto; + } + + try { + return LogEntryProto.parseFrom(proto.toByteString()); + } catch (InvalidProtocolBufferException e) { + throw new IllegalArgumentException("Failed to copy log entry " + TermIndex.valueOf(proto), e); + } + } } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/RaftLogBase.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/RaftLogBase.java index be2ebcb45f..8042f5e21c 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/RaftLogBase.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/RaftLogBase.java @@ -17,6 +17,7 @@ */ package org.apache.ratis.server.raftlog; +import java.util.Objects; import java.util.concurrent.atomic.AtomicReference; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.proto.RaftProtos.LogEntryProto; @@ -31,7 +32,9 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.OpenCloseState; import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.TimeDuration; +import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; import java.io.IOException; import java.util.List; @@ -118,32 +121,41 @@ public boolean isOpened() { } @Override + @SuppressWarnings("try") public boolean updateCommitIndex(long majorityIndex, long currentTerm, boolean isLeader) { - try(AutoCloseableLock writeLock = writeLock()) { + try(AutoCloseableLock writeLock = tryWriteLock(TimeDuration.ONE_SECOND)) { final long oldCommittedIndex = getLastCommittedIndex(); final long newCommitIndex = Math.min(majorityIndex, getFlushIndex()); if (oldCommittedIndex < newCommitIndex) { if (!isLeader) { - commitIndex.updateIncreasingly(newCommitIndex, traceIndexChange); - return true; + return commitIndex.updateIncreasingly(newCommitIndex, traceIndexChange); } // Only update last committed index for current term. See §5.4.2 in paper for details. final TermIndex entry = getTermIndex(newCommitIndex); if (entry != null && entry.getTerm() == currentTerm) { - commitIndex.updateIncreasingly(newCommitIndex, traceIndexChange); - return true; + return commitIndex.updateIncreasingly(newCommitIndex, traceIndexChange); } } + } catch (InterruptedException e) { + LOG.warn("{}: Interrupted to updateCommitIndex: majorityIndex={}, currentTerm={}, isLeader={}", + getName(), majorityIndex, currentTerm, isLeader, e); } return false; } + protected void updatePurgeIndex(Long purged) { + if (purged != null) { + purgeIndex.updateToMax(purged, infoIndexChange); + } + } + protected void updateSnapshotIndexFromStateMachine() { updateSnapshotIndex(getSnapshotIndexFromStateMachine.getAsLong()); } @Override + @SuppressWarnings("try") public void updateSnapshotIndex(long newSnapshotIndex) { try(AutoCloseableLock writeLock = writeLock()) { final long oldSnapshotIndex = getSnapshotIndex(); @@ -162,6 +174,7 @@ public final long append(long term, TransactionContext transaction) throws State return runner.runSequentially(() -> appendImpl(term, transaction)); } + @SuppressWarnings("try") private long appendImpl(long term, TransactionContext operation) throws StateMachineException { checkLogState(); try(AutoCloseableLock writeLock = writeLock()) { @@ -185,12 +198,13 @@ private long appendImpl(long term, TransactionContext operation) throws StateMac throw new StateMachineException(memberId, new RaftLogIOException( "Log entry size " + entrySize + " exceeds the max buffer limit of " + maxBufferSize)); } - appendEntry(e, operation).whenComplete((returned, t) -> { + + appendEntry(operation.wrap(e), operation).whenComplete((returned, t) -> { if (t != null) { - LOG.error(name + ": Failed to write log entry " + LogProtoUtils.toLogEntryString(e), t); + LOG.error(name + ": Failed to write log entry " + toLogEntryString(e), t); } else if (returned != nextIndex) { LOG.error("{}: Indices mismatched: returned index={} but nextIndex={} for log entry {}", - name, returned, nextIndex, LogProtoUtils.toLogEntryString(e)); + name, returned, nextIndex, toLogEntryString(e)); } else { return; // no error } @@ -210,6 +224,7 @@ public final long appendMetadata(long term, long newCommitIndex) { return runner.runSequentially(() -> appendMetadataImpl(term, newCommitIndex)); } + @SuppressWarnings("try") private long appendMetadataImpl(long term, long newCommitIndex) { checkLogState(); if (!shouldAppendMetadata(newCommitIndex)) { @@ -231,21 +246,10 @@ private boolean shouldAppendMetadata(long newCommitIndex) { if (newCommitIndex <= 0) { // do not log the first conf entry return false; - } else if (Optional.ofNullable(lastMetadataEntry.get()) - .filter(e -> e.getIndex() == newCommitIndex || e.getMetadataEntry().getCommitIndex() >= newCommitIndex) - .isPresent()) { - //log neither lastMetadataEntry, nor entries with a smaller commit index. - return false; - } - try { - if (get(newCommitIndex).hasMetadataEntry()) { - // do not log the metadata entry - return false; - } - } catch(RaftLogIOException e) { - LOG.error("Failed to get log entry for index " + newCommitIndex, e); } - return true; + final LogEntryProto last = lastMetadataEntry.get(); + // do not log entries with a smaller commit index. + return last == null || newCommitIndex > last.getMetadataEntry().getCommitIndex(); } @Override @@ -253,6 +257,7 @@ public final long append(long term, RaftConfiguration configuration) { return runner.runSequentially(() -> appendImpl(term, configuration)); } + @SuppressWarnings("try") private long appendImpl(long term, RaftConfiguration newConf) { checkLogState(); try(AutoCloseableLock writeLock = writeLock()) { @@ -277,7 +282,7 @@ public final void open(long lastIndexInSnapshot, Consumer consume final long startIndex = getStartIndex(); if (startIndex > LEAST_VALID_LOG_INDEX) { - purgeIndex.updateIncreasingly(startIndex - 1, infoIndexChange); + purgeIndex.updateToMax(startIndex - 1, infoIndexChange); } } @@ -319,22 +324,28 @@ public final CompletableFuture truncate(long index) { @Override public final CompletableFuture purge(long suggestedIndex) { + final long adjustedIndex; if (purgePreservation > 0) { final long currentIndex = getNextIndex() - 1; - suggestedIndex = Math.min(suggestedIndex, currentIndex - purgePreservation); + adjustedIndex = Math.min(suggestedIndex, currentIndex - purgePreservation); + } else { + adjustedIndex = suggestedIndex; } final long lastPurge = purgeIndex.get(); - if (suggestedIndex - lastPurge < purgeGap) { + if (adjustedIndex - lastPurge < purgeGap) { return CompletableFuture.completedFuture(lastPurge); } - LOG.info("{}: purge {}", getName(), suggestedIndex); - final long finalSuggestedIndex = suggestedIndex; - return purgeImpl(suggestedIndex).whenComplete((purged, e) -> { - if (purged != null) { - purgeIndex.updateToMax(purged, infoIndexChange); - } + final long startIndex = getStartIndex(); + if (adjustedIndex < startIndex) { + LOG.info("{}: purge({}) is skipped: adjustedIndex = {} < startIndex = {}, purgePreservation = {}", + getName(), suggestedIndex, adjustedIndex, startIndex, purgePreservation); + return CompletableFuture.completedFuture(lastPurge); + } + LOG.info("{}: purge {}", getName(), adjustedIndex ); + return purgeImpl(adjustedIndex).whenComplete((purged, e) -> { + updatePurgeIndex(purged); if (e != null) { - LOG.warn(getName() + ": Failed to purge " + finalSuggestedIndex, e); + LOG.warn(getName() + ": Failed to purge " + adjustedIndex, e); } }); } @@ -343,22 +354,32 @@ public final CompletableFuture purge(long suggestedIndex) { @Override public final CompletableFuture appendEntry(LogEntryProto entry) { - return appendEntry(entry, null); + return appendEntry(ReferenceCountedObject.wrap(entry), null); } @Override - public final CompletableFuture appendEntry(LogEntryProto entry, TransactionContext context) { + public final CompletableFuture appendEntry(ReferenceCountedObject entry, + TransactionContext context) { return runner.runSequentially(() -> appendEntryImpl(entry, context)); } - protected abstract CompletableFuture appendEntryImpl(LogEntryProto entry, TransactionContext context); + protected abstract CompletableFuture appendEntryImpl(ReferenceCountedObject entry, + TransactionContext context); @Override - public final List> append(List entries) { + public final List> append(ReferenceCountedObject> entries) { return runner.runSequentially(() -> appendImpl(entries)); } - protected abstract List> appendImpl(List entries); + protected List> appendImpl(List entries) { + throw new UnsupportedOperationException(); + } + + protected List> appendImpl(ReferenceCountedObject> entriesRef) { + try(UncheckedAutoCloseableSupplier> entries = entriesRef.retainAndReleaseOnClose()) { + return appendImpl(entries.get()); + } + } @Override public String toString() { @@ -374,6 +395,10 @@ public AutoCloseableLock writeLock() { return AutoCloseableLock.acquire(lock.writeLock()); } + public AutoCloseableLock tryWriteLock(TimeDuration timeout) throws InterruptedException { + return AutoCloseableLock.tryAcquire(lock.writeLock(), null, timeout); + } + public boolean hasWriteLock() { return this.lock.isWriteLockedByCurrentThread(); } @@ -391,8 +416,43 @@ public String getName() { return name; } - protected EntryWithData newEntryWithData(LogEntryProto logEntry, CompletableFuture future) { - return new EntryWithDataImpl(logEntry, future); + protected ReferenceCountedObject newEntryWithData(ReferenceCountedObject retained) { + return retained.delegate(new EntryWithDataImpl(retained.get(), null)); + } + + protected ReferenceCountedObject newEntryWithData(ReferenceCountedObject retained, + CompletableFuture> stateMachineDataFuture) { + final EntryWithDataImpl impl = new EntryWithDataImpl(retained.get(), stateMachineDataFuture); + return new ReferenceCountedObject() { + private CompletableFuture> future + = Objects.requireNonNull(stateMachineDataFuture, "stateMachineDataFuture == null"); + + @Override + public EntryWithData get() { + return impl; + } + + synchronized void updateFuture(Consumer> action) { + future = future.whenComplete((ref, e) -> { + if (ref != null) { + action.accept(ref); + } + }); + } + + @Override + public EntryWithData retain() { + retained.retain(); + updateFuture(ReferenceCountedObject::retain); + return impl; + } + + @Override + public boolean release() { + updateFuture(ReferenceCountedObject::release); + return retained.release(); + } + }; } /** @@ -400,20 +460,25 @@ protected EntryWithData newEntryWithData(LogEntryProto logEntry, CompletableFutu */ class EntryWithDataImpl implements EntryWithData { private final LogEntryProto logEntry; - private final CompletableFuture future; + private final CompletableFuture> future; - EntryWithDataImpl(LogEntryProto logEntry, CompletableFuture future) { + EntryWithDataImpl(LogEntryProto logEntry, CompletableFuture> future) { this.logEntry = logEntry; this.future = future == null? null: future.thenApply(this::checkStateMachineData); } - private ByteString checkStateMachineData(ByteString data) { + private ReferenceCountedObject checkStateMachineData(ReferenceCountedObject data) { if (data == null) { - throw new IllegalStateException("State machine data is null for log entry " + logEntry); + throw new IllegalStateException("State machine data is null for log entry " + this); } return data; } + @Override + public long getIndex() { + return logEntry.getIndex(); + } + @Override public int getSerializedSize() { return LogProtoUtils.getSerializedSize(logEntry); @@ -421,14 +486,15 @@ public int getSerializedSize() { @Override public LogEntryProto getEntry(TimeDuration timeout) throws RaftLogIOException, TimeoutException { - LogEntryProto entryProto; if (future == null) { return logEntry; } + final LogEntryProto entryProto; + ReferenceCountedObject data; try { - entryProto = future.thenApply(data -> LogProtoUtils.addStateMachineData(data, logEntry)) - .get(timeout.getDuration(), timeout.getUnit()); + data = future.get(timeout.getDuration(), timeout.getUnit()); + entryProto = LogProtoUtils.addStateMachineData(data.get(), logEntry); } catch (TimeoutException t) { if (timeout.compareTo(stateMachineDataReadTimeout) > 0) { getRaftLogMetrics().onStateMachineDataReadTimeout(); @@ -438,14 +504,14 @@ public LogEntryProto getEntry(TimeDuration timeout) throws RaftLogIOException, T if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } - final String err = getName() + ": Failed readStateMachineData for " + toLogEntryString(logEntry); + final String err = getName() + ": Failed readStateMachineData for " + this; LOG.error(err, e); throw new RaftLogIOException(err, JavaUtils.unwrapCompletionException(e)); } // by this time we have already read the state machine data, // so the log entry data should be set now if (LogProtoUtils.isStateMachineDataEmpty(entryProto)) { - final String err = getName() + ": State machine data not set for " + toLogEntryString(logEntry); + final String err = getName() + ": State machine data not set for " + this; LOG.error(err); throw new RaftLogIOException(err); } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/memory/MemoryRaftLog.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/memory/MemoryRaftLog.java index ebb1e27d77..3579bb1f37 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/memory/MemoryRaftLog.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/memory/MemoryRaftLog.java @@ -22,12 +22,15 @@ import org.apache.ratis.server.metrics.RaftLogMetricsBase; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.proto.RaftProtos.LogEntryProto; +import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.RaftLogBase; import org.apache.ratis.server.raftlog.LogEntryHeader; +import org.apache.ratis.server.raftlog.RaftLogIOException; import org.apache.ratis.server.storage.RaftStorageMetadata; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.util.AutoCloseableLock; import org.apache.ratis.util.Preconditions; +import org.apache.ratis.util.ReferenceCountedObject; import java.io.IOException; import java.util.ArrayList; @@ -40,6 +43,7 @@ /** * A simple RaftLog implementation in memory. Used only for testing. */ +@SuppressWarnings({"deprecation", "try"}) public class MemoryRaftLog extends RaftLogBase { static class EntryList { private final List entries = new ArrayList<>(); @@ -62,18 +66,22 @@ int size() { void truncate(int index) { if (entries.size() > index) { - entries.subList(index, entries.size()).clear(); + clear(index, entries.size()); } } void purge(int index) { if (entries.size() > index) { - entries.subList(0, index).clear(); + clear(0, index); } } - void add(LogEntryProto entry) { - entries.add(entry); + void clear(int from, int to) { + entries.subList(from, to).clear(); + } + + void add(LogEntryProto entryRef) { + entries.add(entryRef); } } @@ -100,16 +108,35 @@ public RaftLogMetricsBase getRaftLogMetrics() { } @Override - public LogEntryProto get(long index) { + public LogEntryProto get(long index) throws RaftLogIOException { + final ReferenceCountedObject ref = retainLog(index); + try { + return LogProtoUtils.copy(ref.get()); + } finally { + ref.release(); + } + } + + @Override + public ReferenceCountedObject retainLog(long index) { checkLogState(); - try(AutoCloseableLock readLock = readLock()) { - return entries.get(Math.toIntExact(index)); + try (AutoCloseableLock readLock = readLock()) { + final LogEntryProto entry = entries.get(Math.toIntExact(index)); + final ReferenceCountedObject ref = ReferenceCountedObject.wrap(entry); + ref.retain(); + return ref; } } @Override - public EntryWithData getEntryWithData(long index) { - return newEntryWithData(get(index), null); + public EntryWithData getEntryWithData(long index) throws RaftLogIOException { + throw new UnsupportedOperationException("Use retainEntryWithData(" + index + ") instead."); + } + + @Override + public ReferenceCountedObject retainEntryWithData(long index) { + final ReferenceCountedObject ref = retainLog(index); + return newEntryWithData(ref); } @Override @@ -166,11 +193,15 @@ public TermIndex getLastEntryTermIndex() { } @Override - protected CompletableFuture appendEntryImpl(LogEntryProto entry, TransactionContext context) { + protected CompletableFuture appendEntryImpl(ReferenceCountedObject entryRef, + TransactionContext context) { checkLogState(); - try(AutoCloseableLock writeLock = writeLock()) { + LogEntryProto entry = entryRef.retain(); + try (AutoCloseableLock writeLock = writeLock()) { validateLogEntry(entry); entries.add(entry); + } finally { + entryRef.release(); } return CompletableFuture.completedFuture(entry.getIndex()); } @@ -181,12 +212,14 @@ public long getStartIndex() { } @Override - public List> appendImpl(List logEntryProtos) { + public List> appendImpl(ReferenceCountedObject> entriesRef) { checkLogState(); + final List logEntryProtos = entriesRef.retain(); if (logEntryProtos == null || logEntryProtos.isEmpty()) { + entriesRef.release(); return Collections.emptyList(); } - try(AutoCloseableLock writeLock = writeLock()) { + try (AutoCloseableLock writeLock = writeLock()) { // Before truncating the entries, we first need to check if some // entries are duplicated. If the leader sends entry 6, entry 7, then // entry 6 again, without this check the follower may truncate entry 7 @@ -214,10 +247,12 @@ public List> appendImpl(List logEntryProt } for (int i = index; i < logEntryProtos.size(); i++) { LogEntryProto logEntryProto = logEntryProtos.get(i); - this.entries.add(logEntryProto); + entries.add(LogProtoUtils.copy(logEntryProto)); futures.add(CompletableFuture.completedFuture(logEntryProto.getIndex())); } return futures; + } finally { + entriesRef.release(); } } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegment.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegment.java index 0750d2cc8c..6b1696b960 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegment.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegment.java @@ -26,9 +26,10 @@ import org.apache.ratis.server.raftlog.RaftLogIOException; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; -import org.apache.ratis.thirdparty.com.google.common.cache.CacheLoader; import org.apache.ratis.thirdparty.com.google.protobuf.CodedOutputStream; +import org.apache.ratis.util.CodeInjectionForTesting; import org.apache.ratis.util.FileUtils; +import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.SizeInBytes; @@ -37,13 +38,14 @@ import java.io.File; import java.io.IOException; -import java.util.ArrayList; +import java.nio.file.Path; import java.util.Comparator; -import java.util.List; +import java.util.HashMap; +import java.util.Iterator; import java.util.Map; import java.util.Objects; -import java.util.Optional; -import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentNavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -60,6 +62,8 @@ public final class LogSegment { static final Logger LOG = LoggerFactory.getLogger(LogSegment.class); + public static final String APPEND_RECORD = LogSegment.class.getSimpleName() + ".append"; + enum Op { LOAD_SEGMENT_FILE, REMOVE_CACHE, @@ -73,7 +77,7 @@ static long getEntrySize(LogEntryProto entry, Op op) { case CHECK_SEGMENT_FILE_FULL: case LOAD_SEGMENT_FILE: case WRITE_CACHE_WITH_STATE_MACHINE_CACHE: - Preconditions.assertTrue(entry == LogProtoUtils.removeStateMachineData(entry), + Preconditions.assertTrue(!LogProtoUtils.hasStateMachineData(entry), () -> "Unexpected LogEntryProto with StateMachine data: op=" + op + ", entry=" + entry); break; case WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE: @@ -109,6 +113,44 @@ long getOffset() { } } + private static class Records { + private final ConcurrentNavigableMap map = new ConcurrentSkipListMap<>(); + + int size() { + return map.size(); + } + + LogRecord getFirst() { + final Map.Entry first = map.firstEntry(); + return first != null? first.getValue() : null; + } + + LogRecord getLast() { + final Map.Entry last = map.lastEntry(); + return last != null? last.getValue() : null; + } + + LogRecord get(long i) { + return map.get(i); + } + + long append(LogRecord record) { + final long index = record.getTermIndex().getIndex(); + final LogRecord previous = map.put(index, record); + Preconditions.assertNull(previous, "previous"); + return index; + } + + LogRecord removeLast() { + final Map.Entry last = map.pollLastEntry(); + return Objects.requireNonNull(last, "last == null").getValue(); + } + + void clear() { + map.clear(); + } + } + static LogSegment newOpenSegment(RaftStorage storage, long start, SizeInBytes maxOpSize, SegmentedRaftLogMetrics raftLogMetrics) { Preconditions.assertTrue(start >= 0); @@ -133,8 +175,7 @@ public static int readSegmentFile(File file, LogSegmentStartEnd startEnd, SizeIn Consumer> entryConsumer) throws IOException { int count = 0; - try (SegmentedRaftLogInputStream in = new SegmentedRaftLogInputStream( - file, startEnd.getStartIndex(), startEnd.getEndIndex(), startEnd.isOpen(), maxOpSize, raftLogMetrics)) { + try(SegmentedRaftLogInputStream in = new SegmentedRaftLogInputStream(file, startEnd, maxOpSize, raftLogMetrics)) { for(LogEntryProto prev = null, next; (next = in.nextEntry()) != null; prev = next) { if (prev != null) { Preconditions.assertTrue(next.getIndex() == prev.getIndex() + 1, @@ -185,7 +226,8 @@ static LogSegment loadSegment(RaftStorage storage, File file, LogSegmentStartEnd if (entryCount == 0) { // The segment does not have any entries, delete the file. - FileUtils.deleteFile(file); + final Path deleted = FileUtils.deleteFile(file); + LOG.info("Deleted RaftLog segment since entry count is zero: startEnd={}, path={}", startEnd, deleted); return null; } else if (file.length() > segment.getTotalFileSize()) { // The segment has extra padding, truncate it. @@ -207,10 +249,12 @@ private void assertSegment(long expectedStart, int expectedEntryCount, boolean c final long expectedLastIndex = expectedStart + expectedEntryCount - 1; Preconditions.assertSame(expectedLastIndex, getEndIndex(), "Segment end index"); - final LogRecord last = getLastRecord(); + final LogRecord last = records.getLast(); if (last != null) { Preconditions.assertSame(expectedLastIndex, last.getTermIndex().getIndex(), "Index at the last record"); - Preconditions.assertSame(expectedStart, records.get(0).getTermIndex().getIndex(), "Index at the first record"); + final LogRecord first = records.getFirst(); + Objects.requireNonNull(first, "first record"); + Preconditions.assertSame(expectedStart, first.getTermIndex().getIndex(), "Index at the first record"); } if (!corrupted) { Preconditions.assertSame(expectedEnd, expectedLastIndex, "End/last Index"); @@ -224,66 +268,128 @@ private void assertSegment(long expectedStart, int expectedEntryCount, boolean c * * In the future we can make the cache loader configurable if necessary. */ - class LogEntryLoader extends CacheLoader { + class LogEntryLoader { private final SegmentedRaftLogMetrics raftLogMetrics; LogEntryLoader(SegmentedRaftLogMetrics raftLogMetrics) { this.raftLogMetrics = raftLogMetrics; } - @Override - public LogEntryProto load(LogRecord key) throws IOException { + ReferenceCountedObject load(TermIndex key) throws IOException { final File file = getFile(); // note the loading should not exceed the endIndex: it is possible that // the on-disk log file should be truncated but has not been done yet. - final AtomicReference toReturn = new AtomicReference<>(); + final AtomicReference> toReturn = new AtomicReference<>(); final LogSegmentStartEnd startEnd = LogSegmentStartEnd.valueOf(startIndex, endIndex, isOpen); readSegmentFile(file, startEnd, maxOpSize, getLogCorruptionPolicy(), raftLogMetrics, entryRef -> { final LogEntryProto entry = entryRef.retain(); - final TermIndex ti = TermIndex.valueOf(entry); - putEntryCache(ti, entryRef, Op.LOAD_SEGMENT_FILE); - if (ti.equals(key.getTermIndex())) { - toReturn.set(entry); + try { + final TermIndex ti = TermIndex.valueOf(entry); + putEntryCache(ti, entryRef, Op.LOAD_SEGMENT_FILE); + if (ti.equals(key)) { + entryRef.retain(); + toReturn.set(entryRef); + } + } finally { + entryRef.release(); } }); loadingTimes.incrementAndGet(); - return Objects.requireNonNull(toReturn.get()); + final ReferenceCountedObject proto = toReturn.get(); + if (proto == null) { + throw new RaftLogIOException("Failed to load log entry " + key); + } + return proto; } } - static class EntryCache { - private final Map> map = new ConcurrentHashMap<>(); + private static class Item { + private final AtomicReference> ref; + private final long serializedSize; + + Item(ReferenceCountedObject obj, long serializedSize) { + this.ref = new AtomicReference<>(obj); + this.serializedSize = serializedSize; + } + + ReferenceCountedObject get() { + return ref.get(); + } + + long release() { + final ReferenceCountedObject entry = ref.getAndSet(null); + if (entry == null) { + return 0; + } + entry.release(); + return serializedSize; + } + } + + class EntryCache { + private Map map = new HashMap<>(); private final AtomicLong size = new AtomicLong(); + @Override + public String toString() { + return JavaUtils.getClassSimpleName(getClass()) + "-" + LogSegment.this; + } + long size() { return size.get(); } - LogEntryProto get(TermIndex ti) { - return Optional.ofNullable(map.get(ti)) - .map(ReferenceCountedObject::get) - .orElse(null); + synchronized ReferenceCountedObject get(TermIndex ti) { + if (map == null) { + return null; + } + final Item ref = map.get(ti); + return ref == null? null: ref.get(); } - void clear() { - map.values().forEach(ReferenceCountedObject::release); - map.clear(); - size.set(0); + /** After close(), the cache CANNOT be used again. */ + synchronized void close() { + if (map == null) { + return; + } + evict(); + map = null; + LOG.info("Successfully closed {}", this); + } + + /** After evict(), the cache can be used again. */ + synchronized void evict() { + if (map == null) { + return; + } + for (Iterator> i = map.entrySet().iterator(); i.hasNext(); i.remove()) { + release(i.next().getValue()); + } } - void put(TermIndex key, ReferenceCountedObject valueRef, Op op) { + synchronized void put(TermIndex key, ReferenceCountedObject valueRef, Op op) { + if (map == null) { + return; + } valueRef.retain(); - Optional.ofNullable(map.put(key, valueRef)).ifPresent(this::release); - size.getAndAdd(getEntrySize(valueRef.get(), op)); + final long serializedSize = getEntrySize(valueRef.get(), op); + release(map.put(key, new Item(valueRef, serializedSize))); + size.getAndAdd(serializedSize); } - private void release(ReferenceCountedObject entry) { - size.getAndAdd(-getEntrySize(entry.get(), Op.REMOVE_CACHE)); - entry.release(); + private void release(Item ref) { + if (ref == null) { + return; + } + final long serializedSize = ref.release(); + size.getAndAdd(-serializedSize); } - void remove(TermIndex key) { - Optional.ofNullable(map.remove(key)).ifPresent(this::release); + synchronized void remove(TermIndex key) { + if (map == null) { + return; + } + release(map.remove(key)); } } @@ -294,10 +400,10 @@ File getFile() { private volatile boolean isOpen; private long totalFileSize = SegmentedRaftLogFormat.getHeaderLength(); /** Segment start index, inclusive. */ - private long startIndex; + private final long startIndex; /** Segment end index, inclusive. */ private volatile long endIndex; - private RaftStorage storage; + private final RaftStorage storage; private final SizeInBytes maxOpSize; private final LogEntryLoader cacheLoader; /** later replace it with a metric */ @@ -306,7 +412,7 @@ File getFile() { /** * the list of records is more like the index of a segment */ - private final List records = new ArrayList<>(); + private final Records records = new Records(); /** * the entryCache caches the content of log entries. */ @@ -351,10 +457,14 @@ private void append(Op op, ReferenceCountedObject entryRef, boolean keepEntryInCache, Consumer logConsumer) { final LogEntryProto entry = entryRef.retain(); try { - final LogRecord record = appendLogRecord(op, entry); + final LogRecord record = new LogRecord(totalFileSize, entry); if (keepEntryInCache) { putEntryCache(record.getTermIndex(), entryRef, op); + CodeInjectionForTesting.execute(APPEND_RECORD, this, record.getTermIndex()); } + appendLogRecord(op, record); + totalFileSize += getEntrySize(entry, op); + if (logConsumer != null) { logConsumer.accept(entry); } @@ -363,60 +473,57 @@ private void append(Op op, ReferenceCountedObject entryRef, } } + private void appendLogRecord(Op op, LogRecord record) { + Objects.requireNonNull(record, "record == null"); + final LogRecord currentLast = records.getLast(); - private LogRecord appendLogRecord(Op op, LogEntryProto entry) { - Objects.requireNonNull(entry, "entry == null"); - if (records.isEmpty()) { - Preconditions.assertTrue(entry.getIndex() == startIndex, - "gap between start index %s and first entry to append %s", - startIndex, entry.getIndex()); + final long index = record.getTermIndex().getIndex(); + if (currentLast == null) { + Preconditions.assertTrue(index == startIndex, + "%s: gap between start index %s and the entry to append %s", op, startIndex, index); + } else { + final long currentLastIndex = currentLast.getTermIndex().getIndex(); + Preconditions.assertTrue(index == currentLastIndex + 1, + "%s: gap between last entry %s and the entry to append %s", op, currentLastIndex, index); } - final LogRecord currentLast = getLastRecord(); - if (currentLast != null) { - Preconditions.assertTrue(entry.getIndex() == currentLast.getTermIndex().getIndex() + 1, - "gap between entries %s and %s", entry.getIndex(), currentLast.getTermIndex().getIndex()); - } - - final LogRecord record = new LogRecord(totalFileSize, entry); - records.add(record); - totalFileSize += getEntrySize(entry, op); - endIndex = entry.getIndex(); - return record; + records.append(record); + endIndex = index; } - LogEntryProto getEntryFromCache(TermIndex ti) { + ReferenceCountedObject getEntryFromCache(TermIndex ti) { return entryCache.get(ti); } /** * Acquire LogSegment's monitor so that there is no concurrent loading. */ - synchronized LogEntryProto loadCache(LogRecord record) throws RaftLogIOException { - LogEntryProto entry = entryCache.get(record.getTermIndex()); + synchronized ReferenceCountedObject loadCache(TermIndex ti) throws RaftLogIOException { + final ReferenceCountedObject entry = entryCache.get(ti); if (entry != null) { - return entry; + try { + entry.retain(); + return entry; + } catch (IllegalStateException ignored) { + // The entry could be removed from the cache and released. + // The exception can be safely ignored since it is the same as cache miss. + } } try { - return cacheLoader.load(record); + return cacheLoader.load(ti); + } catch (RaftLogIOException e) { + throw e; } catch (Exception e) { - throw new RaftLogIOException(e); + throw new RaftLogIOException("Failed to loadCache for log entry " + ti, e); } } LogRecord getLogRecord(long index) { - if (index >= startIndex && index <= endIndex) { - return records.get(Math.toIntExact(index - startIndex)); - } - return null; - } - - private LogRecord getLastRecord() { - return records.isEmpty() ? null : records.get(records.size() - 1); + return records.get(index); } TermIndex getLastTermIndex() { - LogRecord last = getLastRecord(); + final LogRecord last = records.getLast(); return last == null ? null : last.getTermIndex(); } @@ -434,7 +541,8 @@ long getTotalCacheSize() { synchronized void truncate(long fromIndex) { Preconditions.assertTrue(fromIndex >= startIndex && fromIndex <= endIndex); for (long index = endIndex; index >= fromIndex; index--) { - LogRecord removed = records.remove(Math.toIntExact(index - startIndex)); + final LogRecord removed = records.removeLast(); + Preconditions.assertSame(index, removed.getTermIndex().getIndex(), "removedIndex"); removeEntryCache(removed.getTermIndex()); totalFileSize = removed.offset; } @@ -470,7 +578,7 @@ private int compareTo(Long l) { synchronized void clear() { records.clear(); - evictCache(); + entryCache.close(); endIndex = startIndex - 1; } @@ -479,7 +587,7 @@ int getLoadingTimes() { } void evictCache() { - entryCache.clear(); + entryCache.evict(); } void putEntryCache(TermIndex key, ReferenceCountedObject valueRef, Op op) { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegmentStartEnd.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegmentStartEnd.java index 6fbaeab62a..4f0734ca9e 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegmentStartEnd.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/LogSegmentStartEnd.java @@ -22,14 +22,12 @@ import org.apache.ratis.util.Preconditions; import java.io.File; -import java.util.Comparator; import java.util.Objects; -import java.util.Optional; import java.util.regex.Pattern; /** * The start index and an end index of a log segment. - * + *

* This is a value-based class. */ public final class LogSegmentStartEnd implements Comparable { @@ -76,21 +74,24 @@ static LogSegmentStartEnd valueOf(long startIndex, long endIndex, boolean isOpen private final Long endIndex; private LogSegmentStartEnd(long startIndex, Long endIndex) { - Preconditions.assertTrue(startIndex >= RaftLog.LEAST_VALID_LOG_INDEX); - Preconditions.assertTrue(endIndex == null || endIndex >= startIndex); this.startIndex = startIndex; this.endIndex = endIndex; + + Preconditions.assertTrue(startIndex >= RaftLog.LEAST_VALID_LOG_INDEX, this); + if (endIndex != null) { + Preconditions.assertTrue(endIndex >= startIndex, this); + } } - public long getStartIndex() { + long getStartIndex() { return startIndex; } - public long getEndIndex() { - return Optional.ofNullable(endIndex).orElse(RaftLog.INVALID_LOG_INDEX); + long getEndIndex() { + return Objects.requireNonNull(endIndex, "endIndex"); } - public boolean isOpen() { + boolean isOpen() { return endIndex == null; } @@ -108,9 +109,21 @@ File getFile(RaftStorage storage) { @Override public int compareTo(LogSegmentStartEnd that) { - return Comparator.comparingLong(LogSegmentStartEnd::getStartIndex) - .thenComparingLong(LogSegmentStartEnd::getEndIndex) - .compare(this, that); + if (this == that) { + return 0; + } + // startIndex always non-null + final int diff = Long.compare(this.getStartIndex(), that.getStartIndex()); + if (diff != 0) { + return diff; + } + + // same startIndex, compare endIndex + if (this.isOpen()) { + return that.isOpen()? 0 : -1; //open first + } else { + return that.isOpen() ? 1 : Long.compare(this.endIndex, that.endIndex); + } } @Override @@ -131,6 +144,6 @@ public int hashCode() { @Override public String toString() { - return startIndex + "-" + Optional.ofNullable(endIndex).map(Object::toString).orElse(""); + return startIndex + "-" + (endIndex != null? endIndex : ""); } } \ No newline at end of file diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLog.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLog.java index 7010189671..895531c897 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLog.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLog.java @@ -36,6 +36,7 @@ import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.TransactionContext; +import org.apache.ratis.statemachine.impl.TransactionContextImpl; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.AutoCloseableLock; import org.apache.ratis.util.AwaitToRun; @@ -54,7 +55,6 @@ import java.util.concurrent.CompletionException; import java.util.function.BiFunction; import java.util.function.Consumer; -import java.util.function.Function; import java.util.function.LongSupplier; import org.apache.ratis.util.UncheckedAutoCloseable; @@ -101,6 +101,9 @@ void done() { completeFuture(); } + void discard() { + } + final void completeFuture() { final boolean completed = future.complete(getEndIndex()); Preconditions.assertTrue(completed, @@ -180,11 +183,17 @@ public long getLastAppliedIndex() { @Override public void notifyTruncatedLogEntry(TermIndex ti) { + ReferenceCountedObject ref = null; try { - final LogEntryProto entry = get(ti.getIndex()); + ref = retainLog(ti.getIndex()); + final LogEntryProto entry = ref != null ? ref.get() : null; notifyTruncatedLogEntry.accept(entry); } catch (RaftLogIOException e) { LOG.error("{}: Failed to read log {}", getName(), ti, e); + } finally { + if (ref != null) { + ref.release(); + } } } @@ -205,6 +214,7 @@ public TransactionContext getTransactionContext(LogEntryProto entry, boolean cre private final boolean stateMachineCachingEnabled; private final SegmentedRaftLogMetrics metrics; + @SuppressWarnings({"squid:S2095"}) // Suppress closeable warning private SegmentedRaftLog(Builder b) { super(b.memberId, b.snapshotIndexSupplier, b.properties); this.metrics = new SegmentedRaftLogMetrics(b.memberId); @@ -265,61 +275,86 @@ private void loadLogSegments(long lastIndexInSnapshot, if (!cache.isEmpty() && cache.getEndIndex() < lastIndexInSnapshot) { LOG.warn("End log index {} is smaller than last index in snapshot {}", cache.getEndIndex(), lastIndexInSnapshot); - purgeImpl(lastIndexInSnapshot); + purgeImpl(lastIndexInSnapshot).whenComplete((purged, e) -> updatePurgeIndex(purged)); } } } @Override + @SuppressWarnings("deprecation") public LogEntryProto get(long index) throws RaftLogIOException { + final ReferenceCountedObject ref = retainLog(index); + if (ref == null) { + return null; + } + try { + return LogProtoUtils.copy(ref.get()); + } finally { + ref.release(); + } + } + + @Override + public ReferenceCountedObject retainLog(long index) throws RaftLogIOException { checkLogState(); - final LogSegment segment; - final LogRecord record; - try (AutoCloseableLock readLock = readLock()) { - segment = cache.getSegment(index); - if (segment == null) { - return null; - } - record = segment.getLogRecord(index); - if (record == null) { - return null; - } - final LogEntryProto entry = segment.getEntryFromCache(record.getTermIndex()); - if (entry != null) { + final LogSegment segment = cache.getSegment(index); + if (segment == null) { + return null; + } + final LogRecord record = segment.getLogRecord(index); + if (record == null) { + return null; + } + final TermIndex ti = record.getTermIndex(); + final ReferenceCountedObject entry = segment.getEntryFromCache(ti); + if (entry != null) { + try { + entry.retain(); getRaftLogMetrics().onRaftLogCacheHit(); return entry; + } catch (IllegalStateException ignored) { + // The entry could be removed from the cache and released. + // The exception can be safely ignored since it is the same as cache miss. } } // the entry is not in the segment's cache. Load the cache without holding the lock. getRaftLogMetrics().onRaftLogCacheMiss(); cacheEviction.signal(); - return segment.loadCache(record); + return segment.loadCache(ti); } @Override + @SuppressWarnings("deprecation") public EntryWithData getEntryWithData(long index) throws RaftLogIOException { - final LogEntryProto entry = get(index); - if (entry == null) { + throw new UnsupportedOperationException("Use retainEntryWithData(" + index + ") instead."); + } + + @Override + public ReferenceCountedObject retainEntryWithData(long index) throws RaftLogIOException { + final ReferenceCountedObject entryRef = retainLog(index); + if (entryRef == null) { throw new RaftLogIOException("Log entry not found: index = " + index); } + + final LogEntryProto entry = entryRef.get(); if (!LogProtoUtils.isStateMachineDataEmpty(entry)) { - return newEntryWithData(entry, null); + return newEntryWithData(entryRef); } try { - CompletableFuture future = null; + CompletableFuture> future = null; if (stateMachine != null) { - future = stateMachine.data().read(entry, server.getTransactionContext(entry, false)).exceptionally(ex -> { + future = stateMachine.data().retainRead(entry, server.getTransactionContext(entry, false)).exceptionally(ex -> { stateMachine.event().notifyLogFailed(ex, entry); throw new CompletionException("Failed to read state machine data for log entry " + entry, ex); }); } - return newEntryWithData(entry, future); + return future != null? newEntryWithData(entryRef, future): newEntryWithData(entryRef); } catch (Exception e) { - final String err = getName() + ": Failed readStateMachineData for " + - LogProtoUtils.toLogEntryString(entry); + final String err = getName() + ": Failed readStateMachineData for " + toLogEntryString(entry); LOG.error(err, e); + entryRef.release(); throw new RaftLogIOException(err, JavaUtils.unwrapCompletionException(e)); } } @@ -339,26 +374,19 @@ private void checkAndEvictCache() { @Override public TermIndex getTermIndex(long index) { checkLogState(); - try(AutoCloseableLock readLock = readLock()) { - LogRecord record = cache.getLogRecord(index); - return record != null ? record.getTermIndex() : null; - } + return cache.getTermIndex(index); } @Override public LogEntryHeader[] getEntries(long startIndex, long endIndex) { checkLogState(); - try(AutoCloseableLock readLock = readLock()) { - return cache.getTermIndices(startIndex, endIndex); - } + return cache.getTermIndices(startIndex, endIndex); } @Override public TermIndex getLastEntryTermIndex() { checkLogState(); - try(AutoCloseableLock readLock = readLock()) { - return cache.getLastTermIndex(); - } + return cache.getLastTermIndex(); } @Override @@ -380,18 +408,21 @@ protected CompletableFuture purgeImpl(long index) { try (AutoCloseableLock writeLock = writeLock()) { SegmentedRaftLogCache.TruncationSegments ts = cache.purge(index); updateSnapshotIndexFromStateMachine(); - LOG.debug("purging segments:{}", ts); if (ts != null) { + LOG.info("{}: {}", getName(), ts); Task task = fileLogWorker.purge(ts); return task.getFuture(); } } + LOG.debug("{}: purge({}) found nothing to purge.", getName(), index); return CompletableFuture.completedFuture(index); } @Override - protected CompletableFuture appendEntryImpl(LogEntryProto entry, TransactionContext context) { + protected CompletableFuture appendEntryImpl(ReferenceCountedObject entryRef, + TransactionContext context) { checkLogState(); + LogEntryProto entry = entryRef.retain(); if (LOG.isTraceEnabled()) { LOG.trace("{}: appendEntry {}", getName(), LogProtoUtils.toLogEntryString(entry)); } @@ -426,20 +457,25 @@ protected CompletableFuture appendEntryImpl(LogEntryProto entry, Transacti // If the entry has state machine data, then the entry should be inserted // to statemachine first and then to the cache. Not following the order // will leave a spurious entry in the cache. - final Task write = fileLogWorker.writeLogEntry(entry, removedStateMachineData, context); - final Function> wrap = context != null ? - context::wrap : ReferenceCountedObject::wrap; + final Task write = fileLogWorker.writeLogEntry(entryRef, removedStateMachineData, context); if (stateMachineCachingEnabled) { // The stateMachineData will be cached inside the StateMachine itself. - cache.appendEntry(LogSegment.Op.WRITE_CACHE_WITH_STATE_MACHINE_CACHE, wrap.apply(removedStateMachineData)); + if (removedStateMachineData != entry) { + cache.appendEntry(LogSegment.Op.WRITE_CACHE_WITH_STATE_MACHINE_CACHE, + ReferenceCountedObject.wrap(removedStateMachineData)); + } else { + cache.appendEntry(LogSegment.Op.WRITE_CACHE_WITH_STATE_MACHINE_CACHE, + ReferenceCountedObject.wrap(LogProtoUtils.copy(removedStateMachineData))); + } } else { - cache.appendEntry(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, wrap.apply(entry) - ); + cache.appendEntry(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, entryRef); } return write.getFuture().whenComplete((clientReply, exception) -> appendEntryTimerContext.stop()); } catch (Exception e) { - LOG.error("{}: Failed to append {}", getName(), LogProtoUtils.toLogEntryString(entry), e); + LOG.error("{}: Failed to append {}", getName(), toLogEntryString(entry), e); throw e; + } finally { + entryRef.release(); } } @@ -456,12 +492,14 @@ private boolean isSegmentFull(LogSegment segment, LogEntryProto entry) { } @Override - public List> appendImpl(List entries) { + protected List> appendImpl(ReferenceCountedObject> entriesRef) { checkLogState(); + final List entries = entriesRef.retain(); if (entries == null || entries.isEmpty()) { + entriesRef.release(); return Collections.emptyList(); } - try(AutoCloseableLock writeLock = writeLock()) { + try (AutoCloseableLock writeLock = writeLock()) { final TruncateIndices ti = cache.computeTruncateIndices(server::notifyTruncatedLogEntry, entries); final long truncateIndex = ti.getTruncateIndex(); final int index = ti.getArrayIndex(); @@ -476,9 +514,12 @@ public List> appendImpl(List entries) { } for (int i = index; i < entries.size(); i++) { final LogEntryProto entry = entries.get(i); - futures.add(appendEntry(entry, server.getTransactionContext(entry, true))); + TransactionContextImpl transactionContext = (TransactionContextImpl) server.getTransactionContext(entry, true); + futures.add(appendEntry(entriesRef.delegate(entry), transactionContext)); } return futures; + } finally { + entriesRef.release(); } } @@ -521,12 +562,13 @@ public CompletableFuture onSnapshotInstalled(long lastSnapshotIndex) { cacheEviction.signal(); } } - return purgeImpl(lastSnapshotIndex); + return purgeImpl(lastSnapshotIndex).whenComplete((purged, e) -> updatePurgeIndex(purged)); } @Override public void close() throws IOException { try(AutoCloseableLock writeLock = writeLock()) { + LOG.info("Start closing {}", this); super.close(); cacheEviction.close(); cache.close(); @@ -534,6 +576,7 @@ public void close() throws IOException { fileLogWorker.close(); storage.close(); getRaftLogMetrics().unregister(); + LOG.info("Successfully closed {}", this); } SegmentedRaftLogCache getRaftLogCache() { @@ -542,7 +585,8 @@ SegmentedRaftLogCache getRaftLogCache() { @Override public String toLogEntryString(LogEntryProto logEntry) { - return LogProtoUtils.toLogEntryString(logEntry, stateMachine::toStateMachineLogEntryString); + return LogProtoUtils.toLogEntryString(logEntry, stateMachine != null ? + stateMachine::toStateMachineLogEntryString : null); } public static Builder newBuilder() { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogCache.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogCache.java index 1d08316fda..8d79c58d37 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogCache.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogCache.java @@ -39,9 +39,19 @@ import java.io.File; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.Optional; import java.util.concurrent.CopyOnWriteArrayList; import java.util.function.Consumer; +import java.util.stream.Collectors; /** * In-memory RaftLog Cache. Currently we provide a simple implementation that @@ -123,9 +133,14 @@ public String toString() { } static class TruncationSegments { + private final String reason; private final SegmentFileInfo toTruncate; // name of the file to be truncated private final SegmentFileInfo[] toDelete; // names of the files to be deleted + String getReason() { + return reason; + } + public SegmentFileInfo getToTruncate() { return toTruncate; } @@ -134,10 +149,10 @@ public SegmentFileInfo[] getToDelete() { return toDelete; } - TruncationSegments(SegmentFileInfo toTruncate, - List toDelete) { - this.toDelete = SegmentFileInfo.toSortedArray(toDelete); + TruncationSegments(String reason, SegmentFileInfo toTruncate, List toDelete) { + this.reason = reason; this.toTruncate = toTruncate; + this.toDelete = SegmentFileInfo.toSortedArray(toDelete); } long maxEndIndex() { @@ -153,8 +168,9 @@ long maxEndIndex() { @Override public String toString() { - return "toTruncate: " + toTruncate - + "\n toDelete: " + Arrays.toString(toDelete); + return reason + + "\n toTruncate: " + toTruncate + + "\n toDelete: " + Arrays.toString(toDelete); } } @@ -298,6 +314,7 @@ void clear() { } TruncationSegments truncate(long index, LogSegment openSegment, Runnable clearOpenSegment) { + final String reason = "truncate(" + index + ")"; try(AutoCloseableLock writeLock = writeLock()) { final int segmentIndex = binarySearch(index); if (segmentIndex == -segments.size() - 1) { @@ -306,7 +323,7 @@ TruncationSegments truncate(long index, LogSegment openSegment, Runnable clearOp if (index == openSegment.getStartIndex()) { // the open segment should be deleted final SegmentFileInfo deleted = deleteOpenSegment(openSegment, clearOpenSegment); - return new TruncationSegments(null, Collections.singletonList(deleted)); + return new TruncationSegments(reason, null, Collections.singletonList(deleted)); } else { openSegment.truncate(index); Preconditions.assertTrue(!openSegment.isOpen(), @@ -316,7 +333,7 @@ TruncationSegments truncate(long index, LogSegment openSegment, Runnable clearOp segments.add(openSegment); sizeInBytes += openSegment.getTotalFileSize(); clearOpenSegment.run(); - return new TruncationSegments(info, Collections.emptyList()); + return new TruncationSegments(reason, info, Collections.emptyList()); } } } else if (segmentIndex >= 0) { @@ -341,7 +358,7 @@ TruncationSegments truncate(long index, LogSegment openSegment, Runnable clearOp } SegmentFileInfo t = ts.numOfEntries() == 0? null: new SegmentFileInfo(ts.getStartIndex(), oldEnd, false, ts.getTotalFileSize(), ts.getEndIndex()); - return new TruncationSegments(t, list); + return new TruncationSegments(reason, t, list); } return null; } @@ -350,12 +367,14 @@ TruncationSegments truncate(long index, LogSegment openSegment, Runnable clearOp TruncationSegments purge(long index) { try (AutoCloseableLock writeLock = writeLock()) { int segmentIndex = binarySearch(index); - List list = new ArrayList<>(); + if (segmentIndex == -1) { + // nothing to purge + return null; + } + List list = new LinkedList<>(); if (segmentIndex == -segments.size() - 1) { - for (LogSegment ls : segments) { - list.add(SegmentFileInfo.newClosedSegmentFileInfo(ls)); - } + list.addAll(segments); segments.clear(); sizeInBytes = 0; } else if (segmentIndex >= 0) { @@ -368,13 +387,16 @@ TruncationSegments purge(long index) { for (int i = 0; i <= startIndex; i++) { LogSegment segment = segments.remove(0); // must remove the first segment to avoid gaps. sizeInBytes -= segment.getTotalFileSize(); - list.add(SegmentFileInfo.newClosedSegmentFileInfo(segment)); + list.add(segment); } } else { throw new IllegalStateException("Unexpected gap in segments: binarySearch(" + index + ") returns " + segmentIndex + ", segments=" + segments); } - return list.isEmpty() ? null : new TruncationSegments(null, list); + list.forEach(LogSegment::evictCache); + List toDelete = list.stream().map(SegmentFileInfo::newClosedSegmentFileInfo) + .collect(Collectors.toList()); + return list.isEmpty() ? null : new TruncationSegments("purge(" + index + ")", null, toDelete); } } @@ -394,6 +416,7 @@ public String toString() { } private final String name; + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile LogSegment openSegment; private final LogSegmentList closedSegments; private final RaftStorage storage; @@ -520,16 +543,21 @@ void rollOpenSegment(boolean createNewOpen) { } LogSegment getSegment(long index) { - if (openSegment != null && index >= openSegment.getStartIndex()) { - return openSegment; + final LogSegment open = this.openSegment; + if (open != null && index >= open.getStartIndex()) { + return open; } else { return closedSegments.search(index); } } - LogRecord getLogRecord(long index) { + TermIndex getTermIndex(long index) { LogSegment segment = getSegment(index); - return segment == null ? null : segment.getLogRecord(index); + if (segment == null) { + return null; + } + final LogRecord record = segment.getLogRecord(index); + return record != null ? record.getTermIndex() : null; } /** @@ -600,7 +628,7 @@ TermIndex getLastTermIndex() { void appendEntry(LogSegment.Op op, ReferenceCountedObject entry) { // SegmentedRaftLog does the segment creation/rolling work. Here we just // simply append the entry into the open segment. - Preconditions.assertNotNull(openSegment, "openSegment"); + Objects.requireNonNull(openSegment, "openSegment == null"); openSegment.appendToOpenSegment(op, entry); } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogInputStream.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogInputStream.java index 481f837f5b..3cc8767fa4 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogInputStream.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogInputStream.java @@ -21,6 +21,7 @@ import java.io.EOFException; import java.io.File; import java.io.IOException; +import java.nio.channels.ClosedByInterruptException; import java.util.Optional; import org.apache.ratis.proto.RaftProtos.LogEntryProto; @@ -33,6 +34,7 @@ import org.slf4j.LoggerFactory; import static org.apache.ratis.server.raftlog.RaftLog.INVALID_LOG_INDEX; +import static org.apache.ratis.server.raftlog.RaftLog.LEAST_VALID_LOG_INDEX; public class SegmentedRaftLogInputStream implements Closeable { static final Logger LOG = LoggerFactory.getLogger(SegmentedRaftLogInputStream.class); @@ -62,27 +64,17 @@ boolean hasCorruptHeader() { } private final File logFile; - private final long startIndex; - private final long endIndex; - private final boolean isOpen; + private final LogSegmentStartEnd startEnd; private final OpenCloseState state; private SegmentedRaftLogReader reader; private final SizeInBytes maxOpSize; private final SegmentedRaftLogMetrics raftLogMetrics; - SegmentedRaftLogInputStream(File log, long startIndex, long endIndex, boolean isOpen, + SegmentedRaftLogInputStream(File log, LogSegmentStartEnd startEnd, SizeInBytes maxOpSize, SegmentedRaftLogMetrics raftLogMetrics) { this.maxOpSize = maxOpSize; - if (isOpen) { - Preconditions.assertTrue(endIndex == INVALID_LOG_INDEX); - } else { - Preconditions.assertTrue(endIndex >= startIndex); - } - this.logFile = log; - this.startIndex = startIndex; - this.endIndex = endIndex; - this.isOpen = isOpen; + this.startEnd = startEnd; this.state = new OpenCloseState(getName()); this.raftLogMetrics = raftLogMetrics; } @@ -104,14 +96,6 @@ private void init() throws IOException { } } - long getStartIndex() { - return startIndex; - } - - long getEndIndex() { - return endIndex; - } - String getName() { return logFile.getName(); } @@ -121,7 +105,11 @@ public LogEntryProto nextEntry() throws IOException { try { init(); } catch (Exception e) { - LOG.error("caught exception initializing " + this, e); + if (e.getCause() instanceof ClosedByInterruptException) { + LOG.warn("Initialization is interrupted: {}", this, e); + } else { + LOG.error("Failed to initialize {}", this, e); + } throw IOUtils.asIOException(e); } } @@ -131,7 +119,7 @@ public LogEntryProto nextEntry() throws IOException { final LogEntryProto entry = reader.readEntry(); if (entry != null) { long index = entry.getIndex(); - if (!isOpen() && index >= endIndex) { + if (!startEnd.isOpen() && index >= startEnd.getEndIndex()) { /* * The end index may be derived from the segment recovery * process. It is possible that we still have some uncleaned garbage @@ -139,8 +127,8 @@ public LogEntryProto nextEntry() throws IOException { */ long skipAmt = logFile.length() - reader.getPos(); if (skipAmt > 0) { - LOG.debug("skipping {} bytes at the end of log '{}': reached" + - " entry {} out of {}", skipAmt, getName(), index, endIndex); + LOG.info("Skipping {} bytes at the end of log '{}': reached entry {} out of [{}]", + skipAmt, getName(), index, startEnd); reader.skipFully(skipAmt); } } @@ -172,10 +160,6 @@ public void close() throws IOException { } } - boolean isOpen() { - return isOpen; - } - @Override public String toString() { return getName(); @@ -188,24 +172,18 @@ public String toString() { * ID. The file portion beyond this ID is * potentially being updated. * @return Result of the validation - * @throws IOException */ static LogValidation scanEditLog(File file, long maxTxIdToScan, SizeInBytes maxOpSize) throws IOException { - SegmentedRaftLogInputStream in; - try { - in = new SegmentedRaftLogInputStream(file, INVALID_LOG_INDEX, INVALID_LOG_INDEX, false, maxOpSize, null); - // read the header, initialize the inputstream - in.init(); - } catch (EOFException e) { - LOG.warn("Log file " + file + " has no valid header", e); - return new LogValidation(0, INVALID_LOG_INDEX, true); - } - - try { + final LogSegmentStartEnd startEnd = LogSegmentStartEnd.valueOf(LEAST_VALID_LOG_INDEX); + try(SegmentedRaftLogInputStream in = new SegmentedRaftLogInputStream(file, startEnd, maxOpSize, null)) { + try { + in.init(); + } catch (EOFException e) { + LOG.warn("Invalid header for RaftLog segment {}", file, e); + return new LogValidation(0, INVALID_LOG_INDEX, true); + } return scanEditLog(in, maxTxIdToScan); - } finally { - IOUtils.cleanup(LOG, in); } } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogReader.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogReader.java index 7d03105b95..57baffb2fd 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogReader.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogReader.java @@ -42,6 +42,7 @@ import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.channels.ClosedByInterruptException; import java.util.Optional; import java.util.zip.Checksum; @@ -169,7 +170,14 @@ public long skip(long amt) throws IOException { */ boolean verifyHeader() throws IOException { final int headerLength = SegmentedRaftLogFormat.getHeaderLength(); - final int readLength = in.read(temp, 0, headerLength); + final int readLength; + try{ + readLength = in.read(temp, 0, headerLength); + } catch (ClosedByInterruptException e) { + Thread.currentThread().interrupt(); + throw new IOException("Interrupted while reading the header of " + file, e); + } + Preconditions.assertTrue(readLength <= headerLength); final int matchLength = SegmentedRaftLogFormat.matchHeader(temp, 0, readLength); Preconditions.assertTrue(matchLength <= readLength); diff --git a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogWorker.java b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogWorker.java index 0d1ea763b6..3382eb9fae 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogWorker.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogWorker.java @@ -38,6 +38,7 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.StateMachine.DataStream; import org.apache.ratis.statemachine.TransactionContext; +import org.apache.ratis.thirdparty.io.netty.util.internal.PlatformDependent; import org.apache.ratis.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,11 +46,15 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.file.Path; import java.util.LinkedList; import java.util.Objects; import java.util.Optional; import java.util.Queue; -import java.util.concurrent.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.function.Supplier; @@ -62,6 +67,9 @@ class SegmentedRaftLogWorker { static final TimeDuration ONE_SECOND = TimeDuration.valueOf(1, TimeUnit.SECONDS); + private static final String CLASS_NAME = JavaUtils.getClassSimpleName(SegmentedRaftLogWorker.class); + static final String RUN_WORKER = CLASS_NAME + ".runWorker"; + static class StateMachineDataPolicy { private final boolean sync; private final TimeDuration syncTimeout; @@ -143,6 +151,7 @@ synchronized void updateIndex(long i) { private volatile boolean running = true; private final ExecutorService workerThreadExecutor; private final RaftStorage storage; + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile SegmentedRaftLogOutputStream out; private final Runnable submitUpdateCommitEvent; private final StateMachine stateMachine; @@ -237,11 +246,13 @@ void start(long latestIndex, long evictIndex, File openSegmentFile) throws IOExc } void close() { + queue.close(); this.running = false; + ConcurrentUtils.shutdownAndWait(TimeDuration.ONE_MINUTE, workerThreadExecutor, + timeout -> LOG.warn("{}: shutdown timeout in {}", name, timeout)); Optional.ofNullable(flushExecutor).ifPresent(ExecutorService::shutdown); - ConcurrentUtils.shutdownAndWait(TimeDuration.ONE_SECOND.multiply(3), - workerThreadExecutor, timeout -> LOG.warn("{}: shutdown timeout in " + timeout, name)); IOUtils.cleanup(LOG, out); + PlatformDependent.freeDirectBuffer(writeBuffer); LOG.info("{} close()", name); } @@ -281,6 +292,7 @@ private Task addIOTask(Task task) { LOG.error("Failed to add IO task {}", task, e); Optional.ofNullable(server).ifPresent(RaftServer.Division::close); } + task.discard(); } task.startTimerOnEnqueue(raftLogMetrics.getEnqueuedTimer()); return task; @@ -294,6 +306,7 @@ private void run() { // if and when a log task encounters an exception RaftLogIOException logIOException = null; + CodeInjectionForTesting.execute(RUN_WORKER, server == null ? null : server.getId(), null, queue); while (running) { try { Task task = queue.poll(ONE_SECOND); @@ -333,7 +346,7 @@ private void run() { LOG.info(Thread.currentThread().getName() + " was interrupted, exiting. There are " + queue.getNumElements() + " tasks remaining in the queue."); - return; + break; } catch (Exception e) { if (!running) { LOG.info("{} got closed and hit exception", @@ -344,6 +357,8 @@ private void run() { } } } + + queue.clear(Task::discard); } private boolean shouldFlush() { @@ -352,7 +367,7 @@ private boolean shouldFlush() { } else if (pendingFlushNum >= forceSyncNum) { return true; } - return pendingFlushNum > 0 && queue.isEmpty(); + return pendingFlushNum > 0 && !(queue.peek() instanceof WriteLog); } private void flushIfNecessary() throws IOException { @@ -438,7 +453,8 @@ void rollLogSegment(LogSegment segmentToClose) { addIOTask(new StartLogSegment(segmentToClose.getEndIndex() + 1)); } - Task writeLogEntry(LogEntryProto entry, LogEntryProto removedStateMachineData, TransactionContext context) { + Task writeLogEntry(ReferenceCountedObject entry, + LogEntryProto removedStateMachineData, TransactionContext context) { return addIOTask(new WriteLog(entry, removedStateMachineData, context)); } @@ -468,8 +484,10 @@ private PurgeLog(TruncationSegments segments) { void execute() throws IOException { if (segments.getToDelete() != null) { try(UncheckedAutoCloseable ignored = raftLogMetrics.startPurgeTimer()) { - for (SegmentFileInfo fileInfo : segments.getToDelete()) { - FileUtils.deleteFile(fileInfo.getFile(storage)); + SegmentFileInfo[] toDeletes = segments.getToDelete(); + for (int i = toDeletes.length - 1; i >= 0; i--) { + final Path deleted = FileUtils.deleteFile(toDeletes[i].getFile(storage)); + LOG.info("{}: Purged RaftLog segment: info={}, path={}", name, toDeletes[i], deleted); } } } @@ -485,26 +503,32 @@ private class WriteLog extends Task { private final LogEntryProto entry; private final CompletableFuture stateMachineFuture; private final CompletableFuture combined; + private final AtomicReference> ref = new AtomicReference<>(); - WriteLog(LogEntryProto entry, LogEntryProto removedStateMachineData, TransactionContext context) { + WriteLog(ReferenceCountedObject entryRef, LogEntryProto removedStateMachineData, + TransactionContext context) { + LogEntryProto origEntry = entryRef.get(); this.entry = removedStateMachineData; - if (this.entry == entry) { - final StateMachineLogEntryProto proto = entry.hasStateMachineLogEntry()? entry.getStateMachineLogEntry(): null; + if (this.entry == origEntry) { + final StateMachineLogEntryProto proto = origEntry.hasStateMachineLogEntry() ? + origEntry.getStateMachineLogEntry(): null; if (stateMachine != null && proto != null && proto.getType() == StateMachineLogEntryProto.Type.DATASTREAM) { final ClientInvocationId invocationId = ClientInvocationId.valueOf(proto); final CompletableFuture removed = server.getDataStreamMap().remove(invocationId); - this.stateMachineFuture = removed == null? stateMachine.data().link(null, entry) - : removed.thenApply(stream -> stateMachine.data().link(stream, entry)); + this.stateMachineFuture = removed == null? stateMachine.data().link(null, origEntry) + : removed.thenApply(stream -> stateMachine.data().link(stream, origEntry)); } else { this.stateMachineFuture = null; } + entryRef.retain(); + this.ref.set(entryRef); } else { try { - // this.entry != entry iff the entry has state machine data - this.stateMachineFuture = stateMachine.data().write(entry, context); + // this.entry != origEntry if it has state machine data + this.stateMachineFuture = stateMachine.data().write(entryRef, context); } catch (Exception e) { - LOG.error(name + ": writeStateMachineData failed for index " + entry.getIndex() - + ", entry=" + LogProtoUtils.toLogEntryString(entry, stateMachine::toStateMachineLogEntryString), e); + LOG.error(name + ": writeStateMachineData failed for index " + origEntry.getIndex() + + ", entry=" + LogProtoUtils.toLogEntryString(origEntry, stateMachine::toStateMachineLogEntryString), e); throw e; } } @@ -516,6 +540,7 @@ private class WriteLog extends Task { void failed(IOException e) { stateMachine.event().notifyLogFailed(e, entry); super.failed(e); + discard(); } @Override @@ -531,6 +556,15 @@ CompletableFuture getFuture() { @Override void done() { writeTasks.offerOrCompleteFuture(this); + discard(); + } + + @Override + void discard() { + final ReferenceCountedObject entryRef = ref.getAndSet(null); + if (entryRef != null) { + entryRef.release(); + } } @Override @@ -561,8 +595,8 @@ public String toString() { } } - File getFile(long startIndex, Long endIndex) { - return LogSegmentStartEnd.valueOf(startIndex, endIndex).getFile(storage); + private File getFile(LogSegmentStartEnd startEnd) { + return startEnd.getFile(storage); } private class FinalizeLogSegment extends Task { @@ -579,19 +613,20 @@ private class FinalizeLogSegment extends Task { public void execute() throws IOException { freeSegmentedRaftLogOutputStream(); - final File openFile = getFile(startIndex, null); + final LogSegmentStartEnd openStartEnd = LogSegmentStartEnd.valueOf(startIndex); + final File openFile = getFile(openStartEnd); Preconditions.assertTrue(openFile.exists(), () -> name + ": File " + openFile + " to be rolled does not exist"); if (endIndex - startIndex + 1 > 0) { // finalize the current open segment - final File dstFile = getFile(startIndex, endIndex); + final File dstFile = getFile(LogSegmentStartEnd.valueOf(startIndex, endIndex)); Preconditions.assertTrue(!dstFile.exists()); FileUtils.move(openFile, dstFile); LOG.info("{}: Rolled log segment from {} to {}", name, openFile, dstFile); } else { // delete the file of the empty segment - FileUtils.deleteFile(openFile); - LOG.info("{}: Deleted empty log segment {}", name, openFile); + final Path deleted = FileUtils.deleteFile(openFile); + LOG.info("{}: Deleted empty RaftLog segment: startEnd={}, path={}", name, openStartEnd, deleted); } updateFlushedIndexIncreasingly(); safeCacheEvictIndex.updateToMax(endIndex, traceIndexChange); @@ -624,7 +659,7 @@ private class StartLogSegment extends Task { @Override void execute() throws IOException { - final File openFile = getFile(newStartIndex, null); + final File openFile = getFile(LogSegmentStartEnd.valueOf(newStartIndex)); Preconditions.assertTrue(!openFile.exists(), "open file %s exists for %s", openFile, name); Preconditions.assertTrue(pendingFlushNum == 0); @@ -666,8 +701,8 @@ void execute() throws IOException { final File delFile = del.getFile(storage); Preconditions.assertTrue(delFile.exists(), "File %s to be deleted does not exist", delFile); - FileUtils.deleteFile(delFile); - LOG.info("{}: Deleted log file {}", name, delFile); + final Path deleted = FileUtils.deleteFile(delFile); + LOG.info("{}: Deleted RaftLog segment for {}: path={}", name, segments.getReason(), deleted); minStart = Math.min(minStart, del.getStartIndex()); } if (segments.getToTruncate() == null) { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/storage/FileChunkReader.java b/ratis-server/src/main/java/org/apache/ratis/server/storage/FileChunkReader.java index 65bfc8b809..b80924eef1 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/storage/FileChunkReader.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/storage/FileChunkReader.java @@ -17,13 +17,13 @@ */ package org.apache.ratis.server.storage; -import org.apache.ratis.io.MD5Hash; import org.apache.ratis.proto.RaftProtos.FileChunkProto; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.UnsafeByteOperations; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.IOUtils; import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.MD5FileUtil; import java.io.Closeable; import java.io.File; @@ -56,7 +56,7 @@ public FileChunkReader(FileInfo info, Path relativePath) throws IOException { this.relativePath = relativePath; final File f = info.getPath().toFile(); if (info.getFileDigest() == null) { - digester = MD5Hash.getDigester(); + digester = MD5FileUtil.newMD5(); this.in = new DigestInputStream(FileUtils.newInputStream(f), digester); } else { digester = null; diff --git a/ratis-server/src/main/java/org/apache/ratis/server/storage/RaftStorageDirectoryImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/storage/RaftStorageDirectoryImpl.java index e7f69d1e24..119f7922db 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/storage/RaftStorageDirectoryImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/storage/RaftStorageDirectoryImpl.java @@ -210,6 +210,7 @@ void lock() throws IOException { * null if storage is already locked. * @throws IOException if locking fails. */ + @SuppressWarnings({"squid:S2095"}) // Suppress closeable warning private FileLock tryLock(File lockF) throws IOException { boolean deletionHookAdded = false; if (!lockF.exists()) { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/storage/RaftStorageImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/storage/RaftStorageImpl.java index fbb7bf7d46..ce809cad8f 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/storage/RaftStorageImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/storage/RaftStorageImpl.java @@ -19,7 +19,6 @@ import java.io.InputStream; import java.io.OutputStream; -import java.nio.file.NoSuchFileException; import java.util.concurrent.atomic.AtomicReference; import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.server.RaftConfiguration; @@ -153,14 +152,16 @@ public void writeRaftConfiguration(LogEntryProto conf) { public RaftConfiguration readRaftConfiguration() { File confFile = storageDir.getMetaConfFile(); - try (InputStream fio = FileUtils.newInputStream(confFile)) { - LogEntryProto confProto = LogEntryProto.newBuilder().mergeFrom(fio).build(); - return LogProtoUtils.toRaftConfiguration(confProto); - } catch (FileNotFoundException | NoSuchFileException e) { - return null; - } catch (Exception e) { - LOG.error("Failed reading configuration from file:" + confFile, e); + if (!confFile.exists()) { return null; + } else { + try (InputStream fio = FileUtils.newInputStream(confFile)) { + LogEntryProto confProto = LogEntryProto.newBuilder().mergeFrom(fio).build(); + return LogProtoUtils.toRaftConfiguration(confProto); + } catch (Exception e) { + LOG.error("Failed reading configuration from file:" + confFile, e); + return null; + } } } diff --git a/ratis-server/src/main/java/org/apache/ratis/server/storage/SnapshotManager.java b/ratis-server/src/main/java/org/apache/ratis/server/storage/SnapshotManager.java index c49a86ec59..e4afdff03e 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/storage/SnapshotManager.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/storage/SnapshotManager.java @@ -27,11 +27,11 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.StateMachineStorage; import org.apache.ratis.util.FileUtils; -import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.MD5FileUtil; import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.StringUtils; +import org.apache.ratis.util.function.CheckedFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,7 +44,6 @@ import java.nio.file.StandardOpenOption; import java.security.MessageDigest; import java.util.Optional; -import java.util.function.Function; import java.util.function.Supplier; /** @@ -62,8 +61,8 @@ public class SnapshotManager { private final Supplier snapshotDir; private final Supplier snapshotTmpDir; - private final Function getRelativePath; - private final Supplier digester = JavaUtils.memoize(MD5Hash::getDigester); + private final CheckedFunction getRelativePath; + private MessageDigest digester; SnapshotManager(RaftPeerId selfId, Supplier dir, StateMachineStorage smStorage) { this.selfId = selfId; @@ -74,9 +73,10 @@ public class SnapshotManager { final Supplier smDir = MemoizedSupplier.valueOf(() -> dir.get().getStateMachineDir().toPath()); this.getRelativePath = c -> smDir.get().relativize( - new File(dir.get().getRoot(), c.getFilename()).toPath()).toString(); + FileUtils.resolveFullPath(dir.get().getRoot(), c.getFilename()).toPath()).toString(); } + @SuppressWarnings({"squid:S2095"}) // Suppress closeable warning private FileChannel open(FileChunkProto chunk, File tmpSnapshotFile) throws IOException { final FileChannel out; final boolean exists = tmpSnapshotFile.exists(); @@ -87,7 +87,7 @@ private FileChannel open(FileChunkProto chunk, File tmpSnapshotFile) throws IOEx } // create the temp snapshot file and put padding inside out = FileUtils.newFileChannel(tmpSnapshotFile, StandardOpenOption.WRITE, StandardOpenOption.CREATE); - digester.get().reset(); + digester = MD5FileUtil.newMD5(); } else { if (!exists) { throw new FileNotFoundException("Chunk offset is non-zero but file is not found: " + tmpSnapshotFile @@ -113,7 +113,6 @@ public void installSnapshot(InstallSnapshotRequestProto request, StateMachine st // TODO: Make sure that subsequent requests for the same installSnapshot are coming in order, // and are not lost when whole request cycle is done. Check requestId and requestIndex here - for (FileChunkProto chunk : snapshotChunkRequest.getFileChunksList()) { SnapshotInfo pi = stateMachine.getLatestSnapshot(); if (pi != null && pi.getTermIndex().getIndex() >= lastIncludedIndex) { @@ -122,12 +121,12 @@ public void installSnapshot(InstallSnapshotRequestProto request, StateMachine st + " with endIndex >= lastIncludedIndex " + lastIncludedIndex); } - final File tmpSnapshotFile = new File(tmpDir, getRelativePath.apply(chunk)); + final File tmpSnapshotFile = FileUtils.resolveFullPath(tmpDir, getRelativePath.apply(chunk)); FileUtils.createDirectoriesDeleteExistingNonDirectory(tmpSnapshotFile.getParentFile()); try (FileChannel out = open(chunk, tmpSnapshotFile)) { final ByteBuffer data = chunk.getData().asReadOnlyByteBuffer(); - digester.get().update(data.duplicate()); + digester.update(data.duplicate()); int written = 0; for(; data.remaining() > 0; ) { @@ -139,11 +138,10 @@ public void installSnapshot(InstallSnapshotRequestProto request, StateMachine st // rename the temp snapshot file if this is the last chunk. also verify // the md5 digest and create the md5 meta-file. if (chunk.getDone()) { - final MD5Hash expectedDigest = - new MD5Hash(chunk.getFileDigest().toByteArray()); + final MD5Hash expectedDigest = MD5Hash.newInstance(chunk.getFileDigest().toByteArray()); // calculate the checksum of the snapshot file and compare it with the // file digest in the request - final MD5Hash digest = new MD5Hash(digester.get().digest()); + final MD5Hash digest = MD5Hash.newInstance(digester.digest()); if (!digest.equals(expectedDigest)) { LOG.warn("The snapshot md5 digest {} does not match expected {}", digest, expectedDigest); diff --git a/ratis-server/src/main/java/org/apache/ratis/server/util/ServerStringUtils.java b/ratis-server/src/main/java/org/apache/ratis/server/util/ServerStringUtils.java index 25223c0f4d..3a5db62859 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/util/ServerStringUtils.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/util/ServerStringUtils.java @@ -21,16 +21,23 @@ import org.apache.ratis.proto.RaftProtos.AppendEntriesRequestProto; import org.apache.ratis.proto.RaftProtos.InstallSnapshotReplyProto; import org.apache.ratis.proto.RaftProtos.InstallSnapshotRequestProto; +import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.proto.RaftProtos.RequestVoteReplyProto; +import org.apache.ratis.proto.RaftProtos.StateMachineLogEntryProto; +import org.apache.ratis.protocol.RaftGroupMemberId; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.raftlog.LogProtoUtils; +import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.ProtoUtils; +import java.util.List; +import java.util.function.Function; + /** * This class provides convenient utilities for converting Protocol Buffers messages to strings. * The output strings are for information purpose only. * They are concise and compact compared to the Protocol Buffers implementations of {@link Object#toString()}. - * + *

* The output messages or the output formats may be changed without notice. * Callers of this class should not try to parse the output strings for any purposes. * Instead, they should use the public APIs provided by Protocol Buffers. @@ -38,16 +45,19 @@ public final class ServerStringUtils { private ServerStringUtils() {} - public static String toAppendEntriesRequestString(AppendEntriesRequestProto request) { + public static String toAppendEntriesRequestString(AppendEntriesRequestProto request, + Function stateMachineToString) { if (request == null) { return null; } + final List entries = request.getEntriesList(); return ProtoUtils.toString(request.getServerRequest()) + "-t" + request.getLeaderTerm() + ",previous=" + TermIndex.valueOf(request.getPreviousLog()) + ",leaderCommit=" + request.getLeaderCommit() + ",initializing? " + request.getInitializing() - + ",entries: " + LogProtoUtils.toLogEntriesShortString(request.getEntriesList()); + + "," + (entries.isEmpty()? "HEARTBEAT" : "entries: " + + LogProtoUtils.toLogEntriesShortString(entries, stateMachineToString)); } public static String toAppendEntriesReplyString(AppendEntriesReplyProto reply) { @@ -108,6 +118,16 @@ public static String toRequestVoteReplyString(RequestVoteReplyProto proto) { if (proto == null) { return null; } - return ProtoUtils.toString(proto.getServerReply()) + "-t" + proto.getTerm(); + return ProtoUtils.toString(proto.getServerReply()) + "-t" + proto.getTerm() + + "-last:" + TermIndex.valueOf(proto.getLastEntry()); + } + + /** + * Used to generate the necessary unified name in the submodules under + * {@link org.apache.ratis.server.impl.RaftServerImpl}, which consists + * of {@link org.apache.ratis.server.impl.ServerState#memberId} and the specific class. + */ + public static String generateUnifiedName(RaftGroupMemberId memberId, Class clazz) { + return memberId + "-" + JavaUtils.getClassSimpleName(clazz); } } diff --git a/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/BaseStateMachine.java b/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/BaseStateMachine.java index c987c53ddb..7ed1124295 100644 --- a/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/BaseStateMachine.java +++ b/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/BaseStateMachine.java @@ -18,7 +18,7 @@ package org.apache.ratis.statemachine.impl; -import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftGroupId; @@ -50,6 +50,7 @@ public class BaseStateMachine implements StateMachine, StateMachine.DataApi, StateMachine.EventApi, StateMachine.LeaderEventApi, StateMachine.FollowerEventApi { private final CompletableFuture server = new CompletableFuture<>(); + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile RaftGroupId groupId; private final LifeCycle lifeCycle = new LifeCycle(JavaUtils.getClassSimpleName(getClass())); @@ -58,7 +59,7 @@ public class BaseStateMachine implements StateMachine, StateMachine.DataApi, private final SortedMap> transactionFutures = new TreeMap<>(); public BaseStateMachine() { - setLastAppliedTermIndex(TermIndex.valueOf(0, -1)); + setLastAppliedTermIndex(TermIndex.INITIAL_VALUE); } public RaftPeerId getId() { @@ -110,10 +111,10 @@ public TransactionContext applyTransactionSerial(TransactionContext trx) throws @Override public CompletableFuture applyTransaction(TransactionContext trx) { // return the same message contained in the entry - RaftProtos.LogEntryProto entry = Objects.requireNonNull(trx.getLogEntry()); + final LogEntryProto entry = Objects.requireNonNull(trx.getLogEntryUnsafe()); updateLastAppliedTermIndex(entry.getTerm(), entry.getIndex()); return CompletableFuture.completedFuture( - Message.valueOf(trx.getLogEntry().getStateMachineLogEntry().getLogData())); + Message.valueOf(entry.getStateMachineLogEntry().getLogData())); } @Override diff --git a/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/SimpleStateMachineStorage.java b/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/SimpleStateMachineStorage.java index 37e94a8a0a..7e8afbaa85 100644 --- a/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/SimpleStateMachineStorage.java +++ b/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/SimpleStateMachineStorage.java @@ -116,7 +116,7 @@ public void cleanupOldSnapshots(SnapshotRetentionPolicy snapshotRetentionPolicy) final List allSnapshotFiles = getSingleFileSnapshotInfos(stateMachineDir.toPath()); - if (allSnapshotFiles.size() > snapshotRetentionPolicy.getNumSnapshotsRetained()) { + if (allSnapshotFiles.size() > numSnapshotsRetained) { allSnapshotFiles.sort(Comparator.comparing(SingleFileSnapshotInfo::getIndex).reversed()); allSnapshotFiles.subList(numSnapshotsRetained, allSnapshotFiles.size()) .stream() @@ -217,6 +217,10 @@ public SingleFileSnapshotInfo getLatestSnapshot() { if (s != null) { return s; } + return loadLatestSnapshot(); + } + + public SingleFileSnapshotInfo loadLatestSnapshot() { final File dir = stateMachineDir; if (dir == null) { return null; diff --git a/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/TransactionContextImpl.java b/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/TransactionContextImpl.java index 7c4f1782de..58869f5edc 100644 --- a/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/TransactionContextImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/statemachine/impl/TransactionContextImpl.java @@ -25,12 +25,14 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.ReferenceCountedObject; import java.io.IOException; import java.util.Objects; import java.util.concurrent.CompletableFuture; +import java.util.function.Supplier; /** * Implementation of {@link TransactionContext} @@ -47,6 +49,7 @@ public class TransactionContextImpl implements TransactionContext { private final RaftClientRequest clientRequest; /** Exception from the {@link StateMachine} or from the log */ + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile Exception exception; /** Data from the {@link StateMachine} */ @@ -58,6 +61,7 @@ public class TransactionContextImpl implements TransactionContext { * {@link StateMachine#startTransaction(RaftClientRequest)} and * {@link StateMachine#applyTransaction(TransactionContext)}. */ + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile Object stateMachineContext; /** @@ -68,8 +72,14 @@ public class TransactionContextImpl implements TransactionContext { private boolean shouldCommit = true; /** Committed LogEntry. */ + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile LogEntryProto logEntry; + /** Committed LogEntry copy. */ + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type + private volatile Supplier logEntryCopy; + /** For wrapping {@link #logEntry} in order to release the underlying buffer. */ + @SuppressWarnings({"squid:S3077"}) // Suppress volatile for generic type private volatile ReferenceCountedObject delegatedRef; private final CompletableFuture logIndexFuture = new CompletableFuture<>(); @@ -112,7 +122,7 @@ private static StateMachineLogEntryProto get(StateMachineLogEntryProto stateMach */ TransactionContextImpl(RaftPeerRole serverRole, StateMachine stateMachine, LogEntryProto logEntry) { this(serverRole, null, stateMachine, logEntry.getStateMachineLogEntry()); - this.logEntry = logEntry; + setLogEntry(logEntry); this.logIndexFuture.complete(logEntry.getIndex()); } @@ -135,12 +145,15 @@ public ReferenceCountedObject wrap(LogEntryProto entry) { if (delegatedRef == null) { return TransactionContext.super.wrap(entry); } - Preconditions.assertSame(getLogEntry().getTerm(), entry.getTerm(), "entry.term"); - Preconditions.assertSame(getLogEntry().getIndex(), entry.getIndex(), "entry.index"); + final LogEntryProto expected = getLogEntryUnsafe(); + Objects.requireNonNull(expected, "logEntry == null"); + Preconditions.assertSame(expected.getTerm(), entry.getTerm(), "entry.term"); + Preconditions.assertSame(expected.getIndex(), entry.getIndex(), "entry.index"); return delegatedRef.delegate(entry); } @Override + @SuppressWarnings("deprecation") public StateMachineLogEntryProto getStateMachineLogEntry() { return stateMachineLogEntry; } @@ -168,18 +181,32 @@ public LogEntryProto initLogEntry(long term, long index) { Objects.requireNonNull(stateMachineLogEntry, "stateMachineLogEntry == null"); logIndexFuture.complete(index); - return logEntry = LogProtoUtils.toLogEntryProto(stateMachineLogEntry, term, index); + return setLogEntry(LogProtoUtils.toLogEntryProto(stateMachineLogEntry, term, index)); } public CompletableFuture getLogIndexFuture() { return logIndexFuture; } + private LogEntryProto setLogEntry(LogEntryProto entry) { + this.logEntry = entry; + this.logEntryCopy = MemoizedSupplier.valueOf(() -> LogProtoUtils.copy(entry)); + return entry; + } + + @Override + @SuppressWarnings("deprecation") public LogEntryProto getLogEntry() { + return logEntryCopy == null ? null : logEntryCopy.get(); + } + + @Override + public LogEntryProto getLogEntryUnsafe() { return logEntry; } + @Override public TransactionContext setException(Exception ioe) { this.exception = ioe; @@ -209,4 +236,8 @@ public TransactionContext cancelTransaction() throws IOException { // call this to let the SM know that Transaction cannot be synced return stateMachine.cancelTransaction(this); } + + public static LogEntryProto getLogEntry(TransactionContext context) { + return ((TransactionContextImpl) context).logEntry; + } } diff --git a/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotFromLeaderTests.java b/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotFromLeaderTests.java index 15dafb88c8..e7683a3991 100644 --- a/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotFromLeaderTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotFromLeaderTests.java @@ -21,10 +21,16 @@ import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.RaftException; +import org.apache.ratis.protocol.exceptions.RaftRetryFailureException; +import org.apache.ratis.protocol.exceptions.ReconfigurationTimeoutException; +import org.apache.ratis.retry.RetryPolicies; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.server.impl.PeerChanges; import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.raftlog.RaftLog; @@ -39,8 +45,7 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.SizeInBytes; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,59 +79,116 @@ public abstract class InstallSnapshotFromLeaderTests { + for (RaftServer.Division follower : cluster.getFollowers()) { + final SnapshotInfo info = follower.getStateMachine().getLatestSnapshot(); + Assertions.assertNotNull(info); + Assertions.assertEquals(3, info.getFiles().size()); } + }, 10, ONE_SECOND, "check snapshot", LOG); + } - final SnapshotInfo snapshot = cluster.getLeader().getStateMachine().getLatestSnapshot(); - Assert.assertEquals(3, snapshot.getFiles().size()); - - // add two more peers - final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true, - true); - // trigger setConfiguration - cluster.setConfiguration(change.allPeersInNewConf); - - RaftServerTestUtil - .waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); - - // Check the installed snapshot file number on each Follower matches with the - // leader snapshot. - JavaUtils.attempt(() -> { - for (RaftServer.Division follower : cluster.getFollowers()) { - final SnapshotInfo info = follower.getStateMachine().getLatestSnapshot(); - Assert.assertNotNull(info); - Assert.assertEquals(3, info.getFiles().size()); - } - }, 10, ONE_SECOND, "check snapshot", LOG); - } finally { - cluster.shutdown(); + private void testInstallSnapshotDuringLeaderSwitch(CLUSTER cluster) throws Exception { + RaftTestUtil.waitForLeader(cluster); + final RaftPeerId leaderId = cluster.getLeader().getId(); + + // perform operations and force all peers to take snapshot + try (final RaftClient client = cluster.createClient(leaderId)) { + for (int i = 0; i < SNAPSHOT_TRIGGER_THRESHOLD * 2; i++) { + final RaftClientReply + reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); + Assertions.assertTrue(reply.isSuccess()); + } + + for (final RaftPeer peer: cluster.getPeers()) { + final RaftClientReply snapshotReply = client.getSnapshotManagementApi(leaderId).create(3000); + Assertions.assertTrue(snapshotReply.isSuccess()); + } + } + final SnapshotInfo snapshot = cluster.getLeader().getStateMachine().getLatestSnapshot(); + Assertions.assertNotNull(snapshot); + + // isolate two followers (majority) in old configuration + final List oldFollowers = cluster.getFollowers(); + for (RaftServer.Division f: oldFollowers) { + RaftTestUtil.isolate(cluster, f.getId()); + } + + // add two more peers and install snapshot from leaders + final PeerChanges change = cluster.addNewPeers(2, true); + try (final RaftClient client = cluster.createClient(leaderId, RetryPolicies.noRetry())) { + final RaftException e = Assertions.assertThrows(RaftException.class, + () -> client.admin().setConfiguration(change.getPeersInNewConf())); + Assertions.assertTrue( e instanceof RaftRetryFailureException + || e instanceof ReconfigurationTimeoutException, + () -> "Unexpected exception: " + e); + } + + final SnapshotInfo snapshotInfo = cluster.getDivision(change.getAddedPeers().get(0).getId()) + .getStateMachine().getLatestSnapshot(); + Assertions.assertNotNull(snapshotInfo); + + // recover the old followers and isolate the leader to force leader switch + RaftTestUtil.isolate(cluster, leaderId); + for (RaftServer.Division f: oldFollowers) { + RaftTestUtil.deIsolate(cluster, f.getId()); + } + RaftTestUtil.waitForLeader(cluster); + + try (final RaftClient client = cluster.createClient(cluster.getLeader().getId())) { + // successfully setConfiguration during leader switch + final RaftClientReply setConf = client.admin().setConfiguration(change.getPeersInNewConf()); + Assertions.assertTrue(setConf.isSuccess()); + + RaftTestUtil.deIsolate(cluster, leaderId); + final RaftClientReply + reply = client.io().send(new RaftTestUtil.SimpleMessage("final")); + Assertions.assertTrue(reply.isSuccess()); } } @@ -137,7 +199,8 @@ private static class StateMachineWithMultiNestedSnapshotFile extends SimpleState File file2; @Override - public synchronized void initialize(RaftServer server, RaftGroupId groupId, RaftStorage raftStorage) throws IOException { + public synchronized void initialize(RaftServer server, RaftGroupId groupId, RaftStorage raftStorage) + throws IOException { super.initialize(server, groupId, raftStorage); // contains two snapshot files @@ -182,8 +245,8 @@ public long takeSnapshot() { return RaftLog.INVALID_LOG_INDEX; } - Assert.assertTrue(file1.exists()); - Assert.assertTrue(file2.exists()); + Assertions.assertTrue(file1.exists()); + Assertions.assertTrue(file2.exists()); return super.takeSnapshot(); } @@ -199,7 +262,7 @@ public SnapshotInfo getLatestSnapshot() { files.add(new FileInfo( file2.toPath(), null)); - Assert.assertEquals(2, files.size()); + Assertions.assertEquals(2, files.size()); SnapshotInfo info = super.getLatestSnapshot(); if (info == null) { @@ -218,14 +281,15 @@ private static class StateMachineWithSeparatedSnapshotPath extends SimpleStateMa private File tmpDir; @Override - public synchronized void initialize(RaftServer server, RaftGroupId groupId, RaftStorage raftStorage) throws IOException { + public synchronized void initialize(RaftServer server, RaftGroupId groupId, RaftStorage raftStorage) + throws IOException { super.initialize(server, groupId, raftStorage); this.root = new File("/tmp/ratis-tests/statemachine/" + getId().toString()); this.snapshotDir = new File(root, "snapshot"); this.tmpDir = new File(root, "tmp"); FileUtils.deleteFully(root); - Assert.assertTrue(this.snapshotDir.mkdirs()); - Assert.assertTrue(this.tmpDir.mkdirs()); + Assertions.assertTrue(this.snapshotDir.mkdirs()); + Assertions.assertTrue(this.tmpDir.mkdirs()); this.root.deleteOnExit(); } @@ -241,18 +305,19 @@ public synchronized void pause() { public long takeSnapshot() { final TermIndex lastApplied = getLastAppliedTermIndex(); final File snapshotTmpDir = new File(tmpDir, UUID.randomUUID().toString()); - final File snapshotRealDir = new File(snapshotDir, String.format("%d_%d", lastApplied.getTerm(), lastApplied.getIndex())); + final File snapshotRealDir = new File(snapshotDir, + String.format("%d_%d", lastApplied.getTerm(), lastApplied.getIndex())); try { FileUtils.deleteFully(snapshotRealDir); FileUtils.deleteFully(snapshotTmpDir); - Assert.assertTrue(snapshotTmpDir.mkdirs()); + Assertions.assertTrue(snapshotTmpDir.mkdirs()); final File snapshotFile1 = new File(snapshotTmpDir, "deer"); final File snapshotFile2 = new File(snapshotTmpDir, "loves"); final File snapshotFile3 = new File(snapshotTmpDir, "vegetable"); - Assert.assertTrue(snapshotFile1.createNewFile()); - Assert.assertTrue(snapshotFile2.createNewFile()); - Assert.assertTrue(snapshotFile3.createNewFile()); + Assertions.assertTrue(snapshotFile1.createNewFile()); + Assertions.assertTrue(snapshotFile2.createNewFile()); + Assertions.assertTrue(snapshotFile3.createNewFile()); FileUtils.move(snapshotTmpDir, snapshotRealDir); } catch (IOException ioe) { LOG.error("create snapshot data file failed", ioe); diff --git a/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotNotificationTests.java b/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotNotificationTests.java index 72ddd06f26..931bf6317f 100644 --- a/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotNotificationTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/InstallSnapshotNotificationTests.java @@ -26,6 +26,7 @@ import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.server.impl.PeerChanges; import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.raftlog.RaftLog; @@ -39,8 +40,8 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.event.Level; @@ -49,7 +50,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.concurrent.CompletableFuture; @@ -79,11 +79,12 @@ public abstract class InstallSnapshotNotificationTests leaderSnapshotInfoRef = new AtomicReference<>(); + private static final AtomicReference LEADER_SNAPSHOT_INFO_REF = new AtomicReference<>(); private static final AtomicInteger numSnapshotRequests = new AtomicInteger(); private static final AtomicInteger numNotifyInstallSnapshotFinished = new AtomicInteger(); @@ -102,7 +103,7 @@ public CompletableFuture notifyInstallSnapshotFromLeader( } numSnapshotRequests.incrementAndGet(); - final SingleFileSnapshotInfo leaderSnapshotInfo = (SingleFileSnapshotInfo) leaderSnapshotInfoRef.get(); + final SingleFileSnapshotInfo leaderSnapshotInfo = (SingleFileSnapshotInfo) LEADER_SNAPSHOT_INFO_REF.get(); LOG.info("{}: leaderSnapshotInfo = {}", getId(), leaderSnapshotInfo); if (leaderSnapshotInfo == null) { return super.notifyInstallSnapshotFromLeader(roleInfoProto, termIndex); @@ -137,7 +138,7 @@ public void notifySnapshotInstalled(RaftProtos.InstallSnapshotResult result, lon return; } numNotifyInstallSnapshotFinished.incrementAndGet(); - final SingleFileSnapshotInfo leaderSnapshotInfo = (SingleFileSnapshotInfo) leaderSnapshotInfoRef.get(); + final SingleFileSnapshotInfo leaderSnapshotInfo = (SingleFileSnapshotInfo) LEADER_SNAPSHOT_INFO_REF.get(); File leaderSnapshotFile = leaderSnapshotInfo.getFile().getPath().toFile(); synchronized (this) { try { @@ -184,7 +185,7 @@ public void testAddNewFollowersNoSnapshot() throws Exception { private void testAddNewFollowers(CLUSTER cluster, int numRequests) throws Exception { final boolean shouldInstallSnapshot = numRequests >= SNAPSHOT_TRIGGER_THRESHOLD; - leaderSnapshotInfoRef.set(null); + LEADER_SNAPSHOT_INFO_REF.set(null); final List logs; int i = 0; try { @@ -194,7 +195,7 @@ private void testAddNewFollowers(CLUSTER cluster, int numRequests) throws Except try(final RaftClient client = cluster.createClient(leaderId)) { for (; i < numRequests; i++) { final RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -206,7 +207,7 @@ private void testAddNewFollowers(CLUSTER cluster, int numRequests) throws Except final List snapshotFiles = RaftSnapshotBaseTest.getSnapshotFiles(cluster, nextIndex - SNAPSHOT_TRIGGER_THRESHOLD, nextIndex); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); + Assertions.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); return null; }, 10, ONE_SECOND, "snapshotFile.exist", LOG); logs = LogSegmentPath.getLogSegmentPaths(leader.getRaftStorage()); @@ -231,29 +232,26 @@ private void testAddNewFollowers(CLUSTER cluster, int numRequests) throws Except // generate some more traffic try(final RaftClient client = cluster.createClient(cluster.getLeader().getId())) { - Assert.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("m" + i)).isSuccess()); + Assertions.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("m" + i)).isSuccess()); } final SnapshotInfo leaderSnapshotInfo = cluster.getLeader().getStateMachine().getLatestSnapshot(); LOG.info("LeaderSnapshotInfo: {}", leaderSnapshotInfo.getTermIndex()); - final boolean set = leaderSnapshotInfoRef.compareAndSet(null, leaderSnapshotInfo); - Assert.assertTrue(set); + final boolean set = LEADER_SNAPSHOT_INFO_REF.compareAndSet(null, leaderSnapshotInfo); + Assertions.assertTrue(set); - // add two more peers - final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true, - true); + // Add new peer(s) + final PeerChanges change = cluster.addNewPeers(1, true); // trigger setConfiguration - RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf), - peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); + RaftServerTestUtil.runWithMinorityPeers(cluster, change.getPeersInNewConf(), cluster::setConfiguration); - RaftServerTestUtil - .waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); + RaftServerTestUtil.waitAndCheckNewConf(cluster, change.getPeersInNewConf(), 0, null); // Check the installed snapshot index on each Follower matches with the // leader snapshot. for (RaftServer.Division follower : cluster.getFollowers()) { - final long expected = shouldInstallSnapshot ? leaderSnapshotInfo.getIndex() : RaftLog.INVALID_LOG_INDEX; - Assert.assertEquals(expected, RaftServerTestUtil.getLatestInstalledSnapshotIndex(follower)); + final long expected = leaderSnapshotInfo.getIndex(); + Assertions.assertEquals(expected, RaftServerTestUtil.getLatestInstalledSnapshotIndex(follower)); RaftSnapshotBaseTest.assertLogContent(follower, false); } @@ -271,7 +269,7 @@ public void testRestartFollower() throws Exception { } private void testRestartFollower(CLUSTER cluster) throws Exception { - leaderSnapshotInfoRef.set(null); + LEADER_SNAPSHOT_INFO_REF.set(null); int i = 0; final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); final RaftPeerId leaderId = leader.getId(); @@ -279,7 +277,7 @@ private void testRestartFollower(CLUSTER cluster) throws Exception { try (final RaftClient client = cluster.createClient(leaderId)) { for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) { final RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -290,7 +288,7 @@ private void testRestartFollower(CLUSTER cluster) throws Exception { final List snapshotFiles = RaftSnapshotBaseTest.getSnapshotFiles(cluster, oldLeaderNextIndex - SNAPSHOT_TRIGGER_THRESHOLD, oldLeaderNextIndex); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); + Assertions.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); return null; }, 10, ONE_SECOND, "snapshotFile.exist", LOG); } @@ -300,7 +298,7 @@ private void testRestartFollower(CLUSTER cluster) throws Exception { // generate some more traffic try (final RaftClient client = cluster.createClient(leader.getId())) { - Assert.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("m" + i)).isSuccess()); + Assertions.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("m" + i)).isSuccess()); } FIVE_SECONDS.sleep(); @@ -309,8 +307,8 @@ private void testRestartFollower(CLUSTER cluster) throws Exception { JavaUtils.attempt(() -> { final long newLeaderNextIndex = leader.getRaftLog().getNextIndex(); LOG.info("{}: newLeaderNextIndex = {}", leaderId, newLeaderNextIndex); - Assert.assertTrue(newLeaderNextIndex > oldLeaderNextIndex); - Assert.assertEquals(newLeaderNextIndex, follower.getRaftLog().getNextIndex()); + Assertions.assertTrue(newLeaderNextIndex > oldLeaderNextIndex); + Assertions.assertEquals(newLeaderNextIndex, follower.getRaftLog().getNextIndex()); }, 10, ONE_SECOND, "followerNextIndex", LOG); } @@ -321,7 +319,7 @@ public void testInstallSnapshotNotificationCount() throws Exception { private void testInstallSnapshotNotificationCount(CLUSTER cluster) throws Exception { - leaderSnapshotInfoRef.set(null); + LEADER_SNAPSHOT_INFO_REF.set(null); numSnapshotRequests.set(0); int i = 0; @@ -331,14 +329,14 @@ private void testInstallSnapshotNotificationCount(CLUSTER cluster) throws Except // Let a few heartbeats pass. ONE_SECOND.sleep(); - Assert.assertEquals(0, numSnapshotRequests.get()); + Assertions.assertEquals(0, numSnapshotRequests.get()); // Generate data. try(final RaftClient client = cluster.createClient(leaderId)) { for (; i < 10; i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -349,58 +347,56 @@ private void testInstallSnapshotNotificationCount(CLUSTER cluster) throws Except // Take snapshot and check result. long snapshotIndex = cluster.getLeader().getStateMachine().takeSnapshot(); - Assert.assertEquals(20, snapshotIndex); + Assertions.assertEquals(20, snapshotIndex); final SnapshotInfo leaderSnapshotInfo = cluster.getLeader().getStateMachine().getLatestSnapshot(); - Assert.assertEquals(20, leaderSnapshotInfo.getIndex()); - final boolean set = leaderSnapshotInfoRef.compareAndSet(null, leaderSnapshotInfo); - Assert.assertTrue(set); + Assertions.assertEquals(20, leaderSnapshotInfo.getIndex()); + final boolean set = LEADER_SNAPSHOT_INFO_REF.compareAndSet(null, leaderSnapshotInfo); + Assertions.assertTrue(set); // Wait for the snapshot to be done. final RaftServer.Division leader = cluster.getLeader(); final long nextIndex = leader.getRaftLog().getNextIndex(); - Assert.assertEquals(21, nextIndex); + Assertions.assertEquals(21, nextIndex); // End index is exclusive. final List snapshotFiles = RaftSnapshotBaseTest.getSnapshotFiles(cluster, 0, nextIndex); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); + Assertions.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); return null; }, 10, ONE_SECOND, "snapshotFile.exist", LOG); // Clear all log files and reset cached log start index. long snapshotInstallIndex = leader.getRaftLog().onSnapshotInstalled(leader.getRaftLog().getLastCommittedIndex()).get(); - Assert.assertEquals(20, snapshotInstallIndex); + Assertions.assertEquals(20, snapshotInstallIndex); // Check that logs are gone. - Assert.assertEquals(0, + Assertions.assertEquals(0, LogSegmentPath.getLogSegmentPaths(leader.getRaftStorage()).size()); - Assert.assertEquals(RaftLog.INVALID_LOG_INDEX, leader.getRaftLog().getStartIndex()); + Assertions.assertEquals(RaftLog.INVALID_LOG_INDEX, leader.getRaftLog().getStartIndex()); // Allow some heartbeats to go through, then make sure none of them had // snapshot requests. ONE_SECOND.sleep(); - Assert.assertEquals(0, numSnapshotRequests.get()); + Assertions.assertEquals(0, numSnapshotRequests.get()); // Make sure leader and followers are still up to date. for (RaftServer.Division follower : cluster.getFollowers()) { - Assert.assertEquals( + Assertions.assertEquals( leader.getRaftLog().getNextIndex(), follower.getRaftLog().getNextIndex()); } - // Add two more peers who will need snapshots from the leader. - final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true, - true); + // Add new peer(s) who will need snapshots from the leader. + final int numNewPeers = 1; + final PeerChanges change = cluster.addNewPeers(numNewPeers, true); // trigger setConfiguration - RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf), - peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); - RaftServerTestUtil - .waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); + RaftServerTestUtil.runWithMinorityPeers(cluster, change.getPeersInNewConf(), cluster::setConfiguration); + RaftServerTestUtil.waitAndCheckNewConf(cluster, change.getPeersInNewConf(), 0, null); // Generate more data. try (final RaftClient client = cluster.createClient(leader.getId())) { - Assert.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("m" + i)).isSuccess()); + Assertions.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("m" + i)).isSuccess()); } // Make sure leader and followers are still up to date. @@ -412,7 +408,7 @@ private void testInstallSnapshotNotificationCount(CLUSTER cluster) throws Except } // Make sure each new peer got one snapshot notification. - Assert.assertEquals(2, numSnapshotRequests.get()); + Assertions.assertEquals(numNewPeers, numSnapshotRequests.get()); } finally { cluster.shutdown(); @@ -425,7 +421,7 @@ public void testInstallSnapshotInstalledEvent() throws Exception{ } private void testInstallSnapshotInstalledEvent(CLUSTER cluster) throws Exception{ - leaderSnapshotInfoRef.set(null); + LEADER_SNAPSHOT_INFO_REF.set(null); numNotifyInstallSnapshotFinished.set(0); final List logs; int i = 0; @@ -437,7 +433,7 @@ private void testInstallSnapshotInstalledEvent(CLUSTER cluster) throws Exception for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -448,7 +444,7 @@ private void testInstallSnapshotInstalledEvent(CLUSTER cluster) throws Exception final List snapshotFiles = RaftSnapshotBaseTest.getSnapshotFiles(cluster, nextIndex - SNAPSHOT_TRIGGER_THRESHOLD, nextIndex); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); + Assertions.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); return null; }, 10, ONE_SECOND, "snapshotFile.exist", LOG); logs = LogSegmentPath.getLogSegmentPaths(leader.getRaftStorage()); @@ -470,27 +466,24 @@ private void testInstallSnapshotInstalledEvent(CLUSTER cluster) throws Exception // generate some more traffic try(final RaftClient client = cluster.createClient(cluster.getLeader().getId())) { - Assert.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("m" + i)).isSuccess()); + Assertions.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("m" + i)).isSuccess()); } final SnapshotInfo leaderSnapshotInfo = cluster.getLeader().getStateMachine().getLatestSnapshot(); LOG.info("LeaderSnapshotInfo: {}", leaderSnapshotInfo.getTermIndex()); - final boolean set = leaderSnapshotInfoRef.compareAndSet(null, leaderSnapshotInfo); - Assert.assertTrue(set); + final boolean set = LEADER_SNAPSHOT_INFO_REF.compareAndSet(null, leaderSnapshotInfo); + Assertions.assertTrue(set); // add one new peer - final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(1, true, true); + final PeerChanges change = cluster.addNewPeers(1, true); // trigger setConfiguration - RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf), - peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); - - RaftServerTestUtil - .waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); + RaftServerTestUtil.runWithMinorityPeers(cluster, change.getPeersInNewConf(), cluster::setConfiguration); + RaftServerTestUtil.waitAndCheckNewConf(cluster, change.getPeersInNewConf(), 0, null); // Check the installed snapshot index on each Follower matches with the // leader snapshot. for (RaftServer.Division follower : cluster.getFollowers()) { - Assert.assertEquals(leaderSnapshotInfo.getIndex(), + Assertions.assertEquals(leaderSnapshotInfo.getIndex(), RaftServerTestUtil.getLatestInstalledSnapshotIndex(follower)); } @@ -500,9 +493,9 @@ private void testInstallSnapshotInstalledEvent(CLUSTER cluster) throws Exception (SimpleStateMachine4Testing) cluster.getFollowers().get(0).getStateMachine(); final File followerSnapshotFile = new File(followerStateMachine.getStateMachineDir(), leaderSnapshotFile.getName()); - Assert.assertEquals(numNotifyInstallSnapshotFinished.get(), 2); - Assert.assertTrue(leaderSnapshotFile.exists()); - Assert.assertFalse(followerSnapshotFile.exists()); + Assertions.assertEquals(numNotifyInstallSnapshotFinished.get(), 2); + Assertions.assertTrue(leaderSnapshotFile.exists()); + Assertions.assertFalse(followerSnapshotFile.exists()); // restart the peer and check if it can correctly handle conf change cluster.restartServer(cluster.getLeader().getId(), false); @@ -524,7 +517,7 @@ public void testInstallSnapshotDuringBootstrap() throws Exception { } private void testInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exception { - leaderSnapshotInfoRef.set(null); + LEADER_SNAPSHOT_INFO_REF.set(null); numSnapshotRequests.set(0); int i = 0; try { @@ -535,7 +528,7 @@ private void testInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exceptio for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -546,34 +539,33 @@ private void testInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exceptio final List snapshotFiles = RaftSnapshotBaseTest.getSnapshotFiles(cluster, nextIndex - SNAPSHOT_TRIGGER_THRESHOLD, nextIndex); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); + Assertions.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); return null; }, 10, ONE_SECOND, "snapshotFile.exist", LOG); RaftSnapshotBaseTest.assertLeaderContent(cluster); final SnapshotInfo leaderSnapshotInfo = cluster.getLeader().getStateMachine().getLatestSnapshot(); - final boolean set = leaderSnapshotInfoRef.compareAndSet(null, leaderSnapshotInfo); - Assert.assertTrue(set); + final boolean set = LEADER_SNAPSHOT_INFO_REF.compareAndSet(null, leaderSnapshotInfo); + Assertions.assertTrue(set); - // add two more peers - final MiniRaftCluster.PeerChanges change = cluster.addNewPeers(2, true, - true); + // Add new peer(s) + final int numNewPeers = 1; + final PeerChanges change = cluster.addNewPeers(numNewPeers, true); // trigger setConfiguration - RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf), - peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); + RaftServerTestUtil.runWithMinorityPeers(cluster, change.getPeersInNewConf(), cluster::setConfiguration); - RaftServerTestUtil.waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); + RaftServerTestUtil.waitAndCheckNewConf(cluster, change.getPeersInNewConf(), 0, null); // Check the installed snapshot index on each Follower matches with the // leader snapshot. for (RaftServer.Division follower : cluster.getFollowers()) { - Assert.assertEquals(leaderSnapshotInfo.getIndex(), + Assertions.assertEquals(leaderSnapshotInfo.getIndex(), RaftServerTestUtil.getLatestInstalledSnapshotIndex(follower)); } // Make sure each new peer got at least one snapshot notification. - Assert.assertTrue(2 <= numSnapshotRequests.get()); + Assertions.assertTrue(numNewPeers <= numSnapshotRequests.get()); } finally { cluster.shutdown(); } diff --git a/ratis-server/src/test/java/org/apache/ratis/LinearizableReadTests.java b/ratis-server/src/test/java/org/apache/ratis/LinearizableReadTests.java new file mode 100644 index 0000000000..09781b546e --- /dev/null +++ b/ratis-server/src/test/java/org/apache/ratis/LinearizableReadTests.java @@ -0,0 +1,288 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis; + +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.client.RaftClientConfigKeys; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.ReadIndexException; +import org.apache.ratis.retry.ExceptionDependentRetry; +import org.apache.ratis.retry.RetryPolicies; +import org.apache.ratis.retry.RetryPolicy; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.server.RaftServerConfigKeys.Read.ReadIndex.Type; +import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.util.Slf4jUtils; +import org.apache.ratis.util.TimeDuration; +import org.apache.ratis.util.function.CheckedConsumer; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.event.Level; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; + +import static org.apache.ratis.ReadOnlyRequestTests.CounterStateMachine; +import static org.apache.ratis.ReadOnlyRequestTests.INCREMENT; +import static org.apache.ratis.ReadOnlyRequestTests.QUERY; +import static org.apache.ratis.ReadOnlyRequestTests.WAIT_AND_INCREMENT; +import static org.apache.ratis.ReadOnlyRequestTests.assertOption; +import static org.apache.ratis.ReadOnlyRequestTests.assertReplyAtLeast; +import static org.apache.ratis.ReadOnlyRequestTests.assertReplyExact; +import static org.apache.ratis.server.RaftServerConfigKeys.Read.Option.LINEARIZABLE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; + +/** Test for the {@link RaftServerConfigKeys.Read.Option#LINEARIZABLE} feature. */ +public abstract class LinearizableReadTests + extends BaseTest + implements MiniRaftCluster.Factory.Get { + + { + Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG); + Slf4jUtils.setLogLevel(RaftClient.LOG, Level.DEBUG); + } + + public abstract boolean isLeaderLeaseEnabled(); + + public abstract Type readIndexType(); + + public final void assertRaftProperties(RaftProperties p) { + assertOption(LINEARIZABLE, p); + assertEquals(isLeaderLeaseEnabled(), RaftServerConfigKeys.Read.leaderLeaseEnabled(p)); + assertSame(readIndexType(), RaftServerConfigKeys.Read.ReadIndex.type(p)); + } + + protected void runWithNewCluster(CheckedConsumer testCase) throws Exception { + runWithNewCluster(3, 0, true, cluster -> { + assertRaftProperties(cluster.getProperties()); + testCase.accept(cluster); + }); + } + + @BeforeEach + public void setup() { + final RaftProperties p = getProperties(); + CounterStateMachine.setProperties(p); + RaftServerConfigKeys.Read.setOption(p, LINEARIZABLE); + RaftServerConfigKeys.Read.setLeaderLeaseEnabled(p, isLeaderLeaseEnabled()); + RaftServerConfigKeys.Read.ReadIndex.setType(p, readIndexType()); + + // Enable dummy request so linearizable-read tests exercise the default ordered-async bootstrap path. + RaftClientConfigKeys.Async.Experimental.setSendDummyRequest(p, true); + } + + @Test + public void testLinearizableRead() throws Exception { + runWithNewCluster(ReadOnlyRequestTests::runTestReadOnly); + } + + @Test + public void testLinearizableReadTimeout() throws Exception { + runWithNewCluster(cluster -> ReadOnlyRequestTests.runTestReadTimeout(ReadIndexException.class, cluster)); + } + + @Test + public void testFollowerLinearizableRead() throws Exception { + runWithNewCluster(LinearizableReadTests::runTestFollowerLinearizableRead); + } + + public static class Reply { + private final int count; + private final CompletableFuture future; + + public Reply(int count, CompletableFuture future) { + this.count = count; + this.future = future; + } + + public boolean isDone() { + return future.isDone(); + } + + public void assertExact() { + assertReplyExact(count, future.join()); + } + + public void assertAtLeast() { + assertReplyAtLeast(count, future.join()); + } + + @Override + public String toString() { + return "Reply{" + + "count=" + count + + ", reply=" + (isDone() ? future.join() : "pending") + + '}'; + } + } + + static void runTestFollowerLinearizableRead(C cluster) throws Exception { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); + + final List followers = cluster.getFollowers(); + Assertions.assertEquals(2, followers.size()); + + final RaftPeerId f0 = followers.get(0).getId(); + final RaftPeerId f1 = followers.get(1).getId(); + + final int n = 100; + final List f0Replies = new ArrayList<>(n); + final List f1Replies = new ArrayList<>(n); + try (RaftClient client = cluster.createClient(leaderId); + RaftClient c0 = cluster.createClient(f0); + RaftClient c1 = cluster.createClient(f1); + ) { + for (int i = 0; i < n; i++) { + final int count = i + 1; + assertReplyExact(count, client.io().send(INCREMENT)); + + f0Replies.add(new Reply(count, c0.async().sendReadOnly(QUERY, f0))); + f1Replies.add(new Reply(count, c1.async().sendReadOnly(QUERY, f1))); + } + + for (int i = 0; i < n; i++) { + f0Replies.get(i).assertAtLeast(); + f1Replies.get(i).assertAtLeast(); + } + } + } + + @Test + public void testFollowerLinearizableReadParallel() throws Exception { + runWithNewCluster(LinearizableReadTests::runTestFollowerReadOnlyParallel); + } + + static void runTestFollowerReadOnlyParallel(C cluster) throws Exception { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); + + final List followers = cluster.getFollowers(); + Assertions.assertEquals(2, followers.size()); + final RaftPeerId f0 = followers.get(0).getId(); + final RaftPeerId f1 = followers.get(1).getId(); + + try (RaftClient leaderClient = cluster.createClient(leaderId); + RaftClient f0Client = cluster.createClient(f0); + RaftClient f1Client = cluster.createClient(f1)) { + + final int n = 10; + final List writeReplies = new ArrayList<>(n); + final List f1Replies = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + int count = 2*i + 1; + assertReplyExact(count, leaderClient.io().send(INCREMENT)); + + count++; + writeReplies.add(new Reply(count, leaderClient.async().send(WAIT_AND_INCREMENT))); + // sleep to let the commitIndex/appliedIndex get updated. + Thread.sleep(100); + // WAIT_AND_INCREMENT will delay 500ms to update the count, the read must wait for it. + assertReplyExact(count, f0Client.io().sendReadOnly(QUERY, f0)); + f1Replies.add(new Reply(count, f1Client.async().sendReadOnly(QUERY, f1))); + } + + for (int i = 0; i < n; i++) { + writeReplies.get(i).assertExact(); + f1Replies.get(i).assertAtLeast(); + } + } + } + + @Test + public void testLinearizableReadFailWhenLeaderDown() throws Exception { + runWithNewCluster(LinearizableReadTests::runTestLinearizableReadFailWhenLeaderDown); + } + + static void runTestLinearizableReadFailWhenLeaderDown(C cluster) throws Exception { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); + + final List followers = cluster.getFollowers(); + assertEquals(2, followers.size()); + final RaftPeerId f0 = followers.get(0).getId(); + + try (RaftClient leaderClient = cluster.createClient(leaderId); + RaftClient f0Client = cluster.createClient(f0, RetryPolicies.noRetry())) { + assertReplyExact(1, leaderClient.io().send(INCREMENT)); + assertReplyExact(1, f0Client.io().sendReadOnly(QUERY)); + + // kill the leader + // read timeout quicker than election timeout + final RaftClientReply reply = leaderClient.admin().transferLeadership(null, 200); + Assertions.assertTrue(reply.isSuccess()); + + // client should fail and won't retry + Assertions.assertThrows(ReadIndexException.class, () -> f0Client.io().sendReadOnly(QUERY, f0)); + } + } + + @Test + public void testFollowerReadOnlyRetryWhenLeaderDown() throws Exception { + // only retry on ReadIndexException + final RetryPolicy retryPolicy = ExceptionDependentRetry + .newBuilder() + .setDefaultPolicy(RetryPolicies.noRetry()) + .setExceptionToPolicy(ReadIndexException.class, + RetryPolicies.retryForeverWithSleep(TimeDuration.valueOf(500, TimeUnit.MILLISECONDS))) + .build(); + + runWithNewCluster(cluster -> ReadOnlyRequestTests.runTestReadOnlyRetryWhenLeaderDown(retryPolicy, cluster)); + } + + + @Test + public void testReadAfterWrite() throws Exception { + runWithNewCluster(LinearizableReadTests::runTestReadAfterWrite); + } + + static void runTestReadAfterWrite(C cluster) throws Exception { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); + + try (RaftClient client = cluster.createClient(leaderId)) { + // test blocking read-after-write + assertReplyExact(1, client.io().send(INCREMENT)); + assertReplyExact(1, client.io().sendReadAfterWrite(QUERY)); + + // test asynchronous read-after-write + final CompletableFuture writeReply = client.async().send(INCREMENT); + final CompletableFuture asyncReply = client.async().sendReadAfterWrite(QUERY); + + final int n = 100; + final List writeReplies = new ArrayList<>(n); + final List readAfterWriteReplies = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + final int count = i + 3; + writeReplies.add(new Reply(count, client.async().send(INCREMENT))); + readAfterWriteReplies.add(new Reply(count, client.async().sendReadAfterWrite(QUERY))); + } + + for (int i = 0; i < n; i++) { + writeReplies.get(i).assertExact(); + readAfterWriteReplies.get(i).assertAtLeast(); + } + + assertReplyAtLeast(2, writeReply.join()); + assertReplyAtLeast(2, asyncReply.join()); + } + } +} \ No newline at end of file diff --git a/ratis-server/src/test/java/org/apache/ratis/LogAppenderTests.java b/ratis-server/src/test/java/org/apache/ratis/LogAppenderTests.java index ce08e4a74f..8a8731daf4 100644 --- a/ratis-server/src/test/java/org/apache/ratis/LogAppenderTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/LogAppenderTests.java @@ -18,7 +18,7 @@ package org.apache.ratis; import static org.apache.ratis.RaftTestUtil.waitForLeader; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.ratis.RaftTestUtil.SimpleMessage; import org.apache.ratis.client.RaftClient; @@ -32,6 +32,7 @@ import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.leader.LogAppender; import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.server.metrics.RaftServerMetricsImpl; import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.RaftLog; @@ -40,8 +41,8 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.ArrayList; @@ -53,6 +54,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Stream; import org.apache.ratis.thirdparty.com.codahale.metrics.Gauge; import org.slf4j.event.Level; @@ -169,6 +171,7 @@ public void testFollowerHeartbeatMetric() throws IOException, InterruptedExcepti assertTrue(t.getTimer().getCount() > 0L); } } + cluster.shutdown(); } void runTest(CLUSTER cluster) throws Exception { @@ -199,7 +202,7 @@ void runTest(CLUSTER cluster) throws Exception { if (e != null) { throw e; } - Assert.assertTrue(s.succeed.get()); + Assertions.assertTrue(s.succeed.get()); } } finally { for (int i = 0; i < clients.size(); i ++) { @@ -215,11 +218,101 @@ void runTest(CLUSTER cluster) throws Exception { final RaftLog leaderLog = cluster.getLeader().getRaftLog(); final EnumMap counts = RaftTestUtil.countEntries(leaderLog); LOG.info("counts = " + counts); - Assert.assertEquals(6 * numMsgs * numClients, counts.get(LogEntryBodyCase.STATEMACHINELOGENTRY).get()); + Assertions.assertEquals(6 * numMsgs * numClients, counts.get(LogEntryBodyCase.STATEMACHINELOGENTRY).get()); final LogEntryProto last = RaftTestUtil.getLastEntry(LogEntryBodyCase.STATEMACHINELOGENTRY, leaderLog); LOG.info("last = {}", LogProtoUtils.toLogEntryString(last)); - Assert.assertNotNull(last); - Assert.assertTrue(last.getIndex() <= leader.getInfo().getLastAppliedIndex()); + Assertions.assertNotNull(last); + Assertions.assertTrue(last.getIndex() <= leader.getInfo().getLastAppliedIndex()); + } + + @Test + public void testNewAppendEntriesRequestAfterPurgeFollowerBehindStartIndex() throws Exception { + final RaftProperties prop = getProperties(); + RaftServerConfigKeys.Log.setPurgeGap(prop, 1); + RaftServerConfigKeys.Log.setSegmentSizeMax(prop, SizeInBytes.valueOf("1KB")); + runWithNewCluster(3, cluster -> { + final long startIndexAfterPurge = setupPurgedLeaderLog(cluster); + // Test when followerNextIndex < leader's logStartIndex + runTestNewAppendEntriesRequestAfterPurge(cluster, startIndexAfterPurge - 1); + }); + } + + @Test + public void testNewAppendEntriesRequestAfterPurgeFollowerAtStartIndex() throws Exception { + final RaftProperties prop = getProperties(); + RaftServerConfigKeys.Log.setPurgeGap(prop, 1); + RaftServerConfigKeys.Log.setSegmentSizeMax(prop, SizeInBytes.valueOf("1KB")); + runWithNewCluster(3, cluster -> { + final long startIndexAfterPurge = setupPurgedLeaderLog(cluster); + // Test when followerNextIndex == leader's logStartIndex, but the previous index is already purged + runTestNewAppendEntriesRequestAfterPurge(cluster, startIndexAfterPurge); + }); + } + + private long setupPurgedLeaderLog(CLUSTER cluster) throws Exception { + final RaftServer.Division leader = waitForLeader(cluster); + final RaftLog leaderLog = leader.getRaftLog(); + + try (RaftClient client = cluster.createClient(leader.getId())) { + for (SimpleMessage msg : generateMsgs(5)) { + client.io().send(msg); + } + } + + final long lastLogIndex = leaderLog.getLastEntryTermIndex().getIndex(); + LOG.info("Leader log lastIndex={}, startIndex={}", lastLogIndex, leaderLog.getStartIndex()); + Assertions.assertTrue(lastLogIndex > 5, "Need enough log entries for the test"); + + // Take a snapshot so that shouldInstallSnapshot() can return it + final long snapshotIndex = SimpleStateMachine4Testing.get(leader).takeSnapshot(); + LOG.info("Snapshot taken at index {}", snapshotIndex); + Assertions.assertTrue(snapshotIndex > 0, "Snapshot should have been taken"); + + final long purgeUpTo = lastLogIndex - 2; + LOG.info("Purging leader log up to index {}", purgeUpTo); + leaderLog.purge(purgeUpTo).get(); + + final long startIndexAfterPurge = leaderLog.getStartIndex(); + LOG.info("Leader log after purge: startIndex={}", startIndexAfterPurge); + Assertions.assertTrue(startIndexAfterPurge > 1, + "Purge should have advanced startIndex, but got " + startIndexAfterPurge); + + return startIndexAfterPurge; + } + + void runTestNewAppendEntriesRequestAfterPurge(CLUSTER cluster, + long targetNextIndex) throws Exception { + final RaftServer.Division leader = waitForLeader(cluster); + final RaftLog leaderLog = leader.getRaftLog(); + final long startIndexAfterPurge = leaderLog.getStartIndex(); + + final Stream appenders = RaftServerTestUtil.getLogAppenders(leader); + Assertions.assertNotNull(appenders, "Leader should have log appenders"); + final LogAppender appender = appenders.findFirst().orElseThrow( + () -> new AssertionError("No log appender found")); + + Assertions.assertTrue(targetNextIndex > RaftLog.LEAST_VALID_LOG_INDEX, + "targetNextIndex should be > LEAST_VALID_LOG_INDEX"); + appender.getFollower().setNextIndex(targetNextIndex); + + LOG.info("Set follower nextIndex={}, startIndexAfterPurge={}, snapshotIndex={}", + targetNextIndex, startIndexAfterPurge, appender.getFollower().getSnapshotIndex()); + Assertions.assertEquals(0, appender.getFollower().getSnapshotIndex(), + "Follower snapshotIndex should be 0 (default, never installed snapshot)"); + + Assertions.assertNull(leaderLog.getTermIndex(targetNextIndex - 1), + "Entry at previousIndex=" + (targetNextIndex - 1) + " should have been purged"); + + // Should return null instead of throwing NPE + Assertions.assertNull(appender.newAppendEntriesRequest(0, false), + "newAppendEntriesRequest should return null when previous TermIndex is not found"); + + Assertions.assertEquals(targetNextIndex, appender.getFollower().getNextIndex(), + "Follower nextIndex should remain unchanged"); + + Assertions.assertNotNull(appender.shouldInstallSnapshot(), + "shouldInstallSnapshot should return non-null when followerNextIndex (" + + targetNextIndex + ") and previous entry has been purged"); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/MessageStreamApiTests.java b/ratis-server/src/test/java/org/apache/ratis/MessageStreamApiTests.java index 8166115738..50f59b2d20 100644 --- a/ratis-server/src/test/java/org/apache/ratis/MessageStreamApiTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/MessageStreamApiTests.java @@ -31,8 +31,8 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.nio.charset.StandardCharsets; @@ -75,11 +75,11 @@ void runTestStream(CLUSTER cluster) throws Exception { try(RaftClient client = cluster.createClient()) { final String k1 = k.substring(0, endOfRequest); final RaftClientReply r1= client.io().sendReadOnly(new SimpleMessage(k1)); - Assert.assertTrue(r1.isSuccess()); + Assertions.assertTrue(r1.isSuccess()); final String k2 = k.substring(endOfRequest); final RaftClientReply r2 = client.io().sendReadOnly(new SimpleMessage(k2)); - Assert.assertTrue(r2.isSuccess()); + Assertions.assertTrue(r2.isSuccess()); } } @@ -104,24 +104,24 @@ void runTestStreamAsync(CLUSTER cluster) throws Exception { final String s = (char)('A' + i) + "1234567"; LOG.info("s=" + s); final ByteString b = ByteString.copyFrom(s, StandardCharsets.UTF_8); - Assert.assertEquals(8, b.size()); + Assertions.assertEquals(8, b.size()); for(int j = 0; j < 128; j++) { bytes = bytes.concat(b); } i++; - Assert.assertEquals(i*SUBMESSAGE_SIZE.getSizeInt(), bytes.size()); + Assertions.assertEquals(i*SUBMESSAGE_SIZE.getSizeInt(), bytes.size()); } try(RaftClient client = cluster.createClient()) { final RaftClientReply reply = client.getMessageStreamApi().streamAsync(Message.valueOf(bytes)).get(); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } // check if all the parts are streamed as a single message. try(RaftClient client = cluster.createClient()) { final RaftClientReply reply = client.io().sendReadOnly(new SimpleMessage(bytes.toString(StandardCharsets.UTF_8))); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } } diff --git a/ratis-server/src/test/java/org/apache/ratis/OutputStreamBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/OutputStreamBaseTest.java index d86170d0b4..9821126ce6 100644 --- a/ratis-server/src/test/java/org/apache/ratis/OutputStreamBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/OutputStreamBaseTest.java @@ -22,14 +22,12 @@ import org.apache.ratis.proto.RaftProtos.LogEntryProto.LogEntryBodyCase; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.impl.MiniRaftCluster; -import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.raftlog.LogEntryHeader; import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.StringUtils; -import org.junit.Assert; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.io.OutputStream; @@ -49,7 +47,7 @@ import java.util.function.Supplier; import static org.apache.ratis.RaftTestUtil.waitForLeader; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; public abstract class OutputStreamBaseTest extends BaseTest @@ -94,7 +92,7 @@ private void runTestSimpleWrite(CLUSTER cluster) throws Exception { private void checkLog(RaftLog raftLog, long expectedCommittedIndex, Supplier s) throws IOException { long committedIndex = raftLog.getLastCommittedIndex(); - Assert.assertTrue(committedIndex >= expectedCommittedIndex); + Assertions.assertTrue(committedIndex >= expectedCommittedIndex); // check the log content final LogEntryHeader[] entries = raftLog.getEntries(0, Long.MAX_VALUE); int count = 0; @@ -108,10 +106,10 @@ private void checkLog(RaftLog raftLog, long expectedCommittedIndex, final String message = "log " + entry + " " + log.getLogEntryBodyCase() + " " + StringUtils.bytes2HexString(logData) + ", expected=" + StringUtils.bytes2HexString(expected); - Assert.assertArrayEquals(message, expected, logData); + Assertions.assertArrayEquals(expected, logData, message); count++; } - Assert.assertEquals(expectedCommittedIndex, count); + Assertions.assertEquals(expectedCommittedIndex, count); } @Test @@ -157,12 +155,12 @@ private void runTestWriteAndFlush(CLUSTER cluster) throws Exception { private RaftLog assertRaftLog(int expectedEntries, RaftServer.Division server) throws Exception { final RaftLog raftLog = server.getRaftLog(); final EnumMap counts = RaftTestUtil.countEntries(raftLog); - Assert.assertEquals(expectedEntries, counts.get(LogEntryBodyCase.STATEMACHINELOGENTRY).get()); + Assertions.assertEquals(expectedEntries, counts.get(LogEntryBodyCase.STATEMACHINELOGENTRY).get()); final LogEntryProto last = RaftTestUtil.getLastEntry(LogEntryBodyCase.STATEMACHINELOGENTRY, raftLog); - Assert.assertNotNull(last); - Assert.assertTrue(raftLog.getLastCommittedIndex() >= last.getIndex()); - Assert.assertTrue(server.getInfo().getLastAppliedIndex() >= last.getIndex()); + Assertions.assertNotNull(last); + Assertions.assertTrue(raftLog.getLastCommittedIndex() >= last.getIndex()); + Assertions.assertTrue(server.getInfo().getLastAppliedIndex() >= last.getIndex()); return raftLog; } @@ -251,12 +249,12 @@ private void runTestWriteWithOffset(CLUSTER cluster) throws Exception { final LogEntryProto e = raftLog.get(ti.getIndex()); if (e.hasStateMachineLogEntry()) { final byte[] eValue = e.getStateMachineLogEntry().getLogData().toByteArray(); - Assert.assertEquals(ByteValue.BUFFERSIZE, eValue.length); + Assertions.assertEquals(ByteValue.BUFFERSIZE, eValue.length); System.arraycopy(eValue, 0, actual, totalSize, eValue.length); totalSize += eValue.length; } } - Assert.assertArrayEquals(expected, actual); + Assertions.assertArrayEquals(expected, actual); } /** @@ -298,18 +296,20 @@ private void runTestKillLeader(CLUSTER cluster) throws Exception { Thread.sleep(500); RaftTestUtil.waitAndKillLeader(cluster); final RaftServer.Division newLeader = waitForLeader(cluster); - Assert.assertNotEquals(leader.getId(), newLeader.getId()); + Assertions.assertNotEquals(leader.getId(), newLeader.getId()); Thread.sleep(500); running.set(false); - latch.await(5, TimeUnit.SECONDS); + final boolean latchCompleted = latch.await(5, TimeUnit.SECONDS); + Assertions.assertTrue(latchCompleted, "Writer thread did not finish within the timeout"); LOG.info("Writer success? " + success.get()); - Assert.assertTrue(success.get()); + Assertions.assertNotNull(success.get(), "Writer thread completed but success was not set"); + Assertions.assertTrue(success.get()); // total number of tx should be >= result + 2, where 2 means two NoOp from // leaders. It may be larger than result+2 because the client may resend // requests and we do not have retry cache on servers yet. LOG.info("last applied index: {}. total number of requests: {}", newLeader.getInfo().getLastAppliedIndex(), result.get()); - Assert.assertTrue(newLeader.getInfo().getLastAppliedIndex() >= result.get() + 1); + Assertions.assertTrue(newLeader.getInfo().getLastAppliedIndex() >= result.get() + 1); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/RaftAsyncExceptionTests.java b/ratis-server/src/test/java/org/apache/ratis/RaftAsyncExceptionTests.java index 1ad53a8381..044ddc3422 100644 --- a/ratis-server/src/test/java/org/apache/ratis/RaftAsyncExceptionTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/RaftAsyncExceptionTests.java @@ -30,8 +30,8 @@ import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.util.ArrayList; @@ -62,13 +62,13 @@ private void runTestGroupMismatchException(CLUSTER cluster) throws Exception { // send a message to make sure the cluster is working try(RaftClient client = cluster.createClient()) { final RaftClientReply reply = client.async().send(new SimpleMessage("first")).get(); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } // create another group final RaftGroup clusterGroup = cluster.getGroup(); final RaftGroup anotherGroup = RaftGroup.valueOf(RaftGroupId.randomId(), clusterGroup.getPeers()); - Assert.assertNotEquals(clusterGroup.getGroupId(), anotherGroup.getGroupId()); + Assertions.assertNotEquals(clusterGroup.getGroupId(), anotherGroup.getGroupId()); // create another client using another group final SimpleMessage[] messages = SimpleMessage.create(5); @@ -78,7 +78,7 @@ private void runTestGroupMismatchException(CLUSTER cluster) throws Exception { for(SimpleMessage m : messages) { futures.add(client.async().send(m)); } - Assert.assertEquals(messages.length, futures.size()); + Assertions.assertEquals(messages.length, futures.size()); // check replies final Iterator> i = futures.iterator(); @@ -102,9 +102,9 @@ private void runTestTimeoutException(CLUSTER cluster) throws Exception { // send a message to make sure the cluster is working try(RaftClient client = cluster.createClient()) { final RaftClientReply reply = client.io().send(new SimpleMessage("m0")); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); - RaftClientConfigKeys.Rpc.setRequestTimeout(properties.get(), ONE_SECOND); + RaftClientConfigKeys.Rpc.setRequestTimeout(PROPERTIES.get(), ONE_SECOND); // Block StartTransaction StreamSupport.stream(cluster.getServers().spliterator(), false) .map(cluster::getDivision) @@ -118,7 +118,7 @@ private void runTestTimeoutException(CLUSTER cluster) throws Exception { .map(SimpleStateMachine4Testing::get) .forEach(SimpleStateMachine4Testing::unblockStartTransaction); // The request should succeed after start transaction is unblocked - Assert.assertTrue(replyFuture.get(FIVE_SECONDS.getDuration(), FIVE_SECONDS.getUnit()).isSuccess()); + Assertions.assertTrue(replyFuture.get(FIVE_SECONDS.getDuration(), FIVE_SECONDS.getUnit()).isSuccess()); } } } diff --git a/ratis-server/src/test/java/org/apache/ratis/RaftAsyncTests.java b/ratis-server/src/test/java/org/apache/ratis/RaftAsyncTests.java index 71c5c5ef06..3c765d7171 100644 --- a/ratis-server/src/test/java/org/apache/ratis/RaftAsyncTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/RaftAsyncTests.java @@ -24,6 +24,7 @@ import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.proto.RaftProtos.CommitInfoProto; import org.apache.ratis.proto.RaftProtos.LogEntryProto; +import org.apache.ratis.proto.RaftProtos.ReplicationLevel; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.exceptions.AlreadyClosedException; @@ -46,11 +47,13 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.PlatformUtils; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedRunnable; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.event.Level; import java.io.IOException; @@ -68,6 +71,7 @@ import static org.apache.ratis.RaftTestUtil.waitForLeader; +@Timeout(100) public abstract class RaftAsyncTests extends BaseTest implements MiniRaftCluster.Factory.Get { { @@ -77,11 +81,15 @@ public abstract class RaftAsyncTests extends Ba public static final int NUM_SERVERS = 3; - private static final DelayLocalExecutionInjection logSyncDelay = RaftServerTestUtil.getLogSyncDelay(); + private static final DelayLocalExecutionInjection LOG_SYNC_DELAY = RaftServerTestUtil.getLogSyncDelay(); { getProperties().setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, SimpleStateMachine4Testing.class, StateMachine.class); + if (!PlatformUtils.LINUX) { + getProperties().setBoolean("raft.netty.server.use-epoll", false); + getProperties().setBoolean("raft.netty.client.use-epoll", false); + } } @Test @@ -106,9 +114,9 @@ public void testAsyncConfiguration() throws IOException { } static void assertRaftRetryFailureException(RaftRetryFailureException rfe, RetryPolicy retryPolicy, String name) { - Assert.assertNotNull(name + " does not have RaftRetryFailureException", rfe); - Assert.assertTrue(name + ": unexpected error message, rfe=" + rfe + ", retryPolicy=" + retryPolicy, - rfe.getMessage().contains(retryPolicy.toString())); + Assertions.assertNotNull(rfe, name + " does not have RaftRetryFailureException"); + Assertions.assertTrue(rfe.getMessage().contains(retryPolicy.toString()), + name + ": unexpected error message, rfe=" + rfe + ", retryPolicy=" + retryPolicy); } @Test @@ -166,19 +174,15 @@ void runTestRequestAsyncWithRetryFailure(boolean initialMessages, CLUSTER cluste for (; i < messages.length; i++) { replies.add(client.async().send(messages[i])); } - Assert.assertEquals(messages.length, replies.size()); + Assertions.assertEquals(messages.length, replies.size()); } // sleep again so that the first half calls will fail retries. // the second half still have retry time remaining. sleepTime.apply(t -> t*2).sleep(); - if (leader != null) { - cluster.restartServer(leader, false); - } else { - cluster.start(); - } - + // The client will try to reconnect, but the server is + // not started at this time and the retry will fail anyway. // all the calls should fail for ordering guarantee for(int i = 0; i < replies.size(); i++) { final CheckedRunnable getReply = replies.get(i)::get; @@ -195,6 +199,12 @@ void runTestRequestAsyncWithRetryFailure(boolean initialMessages, CLUSTER cluste testFailureCaseAsync("last-request", () -> client.async().send(new SimpleMessage("last")), AlreadyClosedException.class, RaftRetryFailureException.class); + + if (leader != null) { + cluster.restartServer(leader, false); + } else { + cluster.start(); + } } } @@ -223,7 +233,7 @@ void runTestAsyncRequestSemaphore(CLUSTER cluster) throws Exception { futures[i] = client.async().send(messages[i]); blockedRequestsCount.decrementAndGet(); } - Assert.assertEquals(0, blockedRequestsCount.get()); + Assertions.assertEquals(0, blockedRequestsCount.get()); futures[numMessages] = CompletableFuture.supplyAsync(() -> { blockedRequestsCount.incrementAndGet(); @@ -236,7 +246,7 @@ void runTestAsyncRequestSemaphore(CLUSTER cluster) throws Exception { while (blockedRequestsCount.get() != 1) { Thread.sleep(1000); } - Assert.assertEquals(1, blockedRequestsCount.get()); + Assertions.assertEquals(1, blockedRequestsCount.get()); //Since all semaphore permits are acquired the last message sent is in queue RaftClientTestUtil.assertAsyncRequestSemaphore(client, 0, 1); @@ -249,7 +259,7 @@ void runTestAsyncRequestSemaphore(CLUSTER cluster) throws Exception { for (int i = 0; i <= numMessages; i++) { futures[i].join(); } - Assert.assertEquals(0, blockedRequestsCount.get()); + Assertions.assertEquals(0, blockedRequestsCount.get()); } } @@ -280,22 +290,22 @@ public void testStaleReadAsync() throws Exception { } void runTestStaleReadAsync(CLUSTER cluster) throws Exception { - final int numMesssages = 10; - try (RaftClient client = cluster.createClient()) { - RaftTestUtil.waitForLeader(cluster); + final int numMessages = 10; + RaftServer.Division division = waitForLeader(cluster); + try (RaftClient client = cluster.createClient(division.getId())) { // submit some messages final List> futures = new ArrayList<>(); - for (int i = 0; i < numMesssages; i++) { + for (int i = 0; i < numMessages; i++) { final String s = "" + i; LOG.info("sendAsync " + s); futures.add(client.async().send(new SimpleMessage(s))); } - Assert.assertEquals(numMesssages, futures.size()); + Assertions.assertEquals(numMessages, futures.size()); final List replies = new ArrayList<>(); for (CompletableFuture f : futures) { final RaftClientReply r = f.join(); - Assert.assertTrue(r.isSuccess()); + Assertions.assertTrue(r.isSuccess()); replies.add(r); } futures.clear(); @@ -303,6 +313,7 @@ void runTestStaleReadAsync(CLUSTER cluster) throws Exception { // Use a follower with the max commit index final RaftClientReply lastWriteReply = replies.get(replies.size() - 1); final RaftPeerId leader = lastWriteReply.getServerId(); + Assertions.assertEquals(leader, lastWriteReply.getServerId()); LOG.info("leader = " + leader); final Collection commitInfos = lastWriteReply.getCommitInfos(); LOG.info("commitInfos = " + commitInfos); @@ -321,7 +332,7 @@ void runTestStaleReadAsync(CLUSTER cluster) throws Exception { StateMachineException.class, IndexOutOfBoundsException.class); // test sendStaleReadAsync - for (int i = 0; i < numMesssages; i++) { + for (int i = 0; i < numMessages; i++) { final RaftClientReply reply = replies.get(i); final String query = "" + i; LOG.info("query=" + query + ", reply=" + reply); @@ -349,7 +360,7 @@ void runTestStaleReadAsync(CLUSTER cluster) throws Exception { throw new CompletionException(e); } - Assert.assertEquals("log entry mismatch for query=" + query, expected, computed); + Assertions.assertEquals(expected, computed, "log entry mismatch for query=" + query); return null; })); } @@ -357,6 +368,38 @@ void runTestStaleReadAsync(CLUSTER cluster) throws Exception { } } + @Test + public void testWriteAsyncCustomReplicationLevel() throws Exception { + // verify that send(msg, ALL_COMMITTED) would reply with all servers committed past the log index + runWithNewCluster(NUM_SERVERS, this::runTestWriteAsyncCustomReplicationLevel); + } + + void runTestWriteAsyncCustomReplicationLevel(CLUSTER cluster) throws Exception { + final int numMessages = 20; + final RaftPeerId leader = waitForLeader(cluster).getId(); + try (RaftClient client = cluster.createClient(leader)) { + + // submit some messages + for (int i = 0; i < numMessages; i++) { + final String s = "" + i; + LOG.info("sendAsync with ALL_COMMITTED " + s); + client.async().send(new SimpleMessage(s), ReplicationLevel.ALL_COMMITTED).whenComplete((reply, exception) -> { + if (exception != null) { + LOG.error("Failed to send message " + s, exception); + // reply should be null in case of exception + Assertions.assertNull(reply); + return; + } + Assertions.assertTrue(reply.isSuccess()); + Assertions.assertNull(reply.getException()); + // verify that all servers have caught up to log index when the reply is returned + reply.getCommitInfos().forEach(commitInfoProto -> + Assertions.assertTrue(commitInfoProto.getCommitIndex() >= reply.getLogIndex())); + }); + } + } + } + @Test public void testRequestTimeout() throws Exception { final TimeDuration oldExpiryTime = RaftServerConfigKeys.RetryCache.expiryTime(getProperties()); @@ -384,28 +427,28 @@ void runTestAppendEntriesTimeout(CLUSTER cluster) throws Exception { LOG.info("Running testAppendEntriesTimeout"); final TimeDuration oldExpiryTime = RaftServerConfigKeys.RetryCache.expiryTime(getProperties()); RaftServerConfigKeys.RetryCache.setExpiryTime(getProperties(), TimeDuration.valueOf(20, TimeUnit.SECONDS)); - waitForLeader(cluster); + final RaftPeerId leader = waitForLeader(cluster).getId(); long time = System.currentTimeMillis(); long waitTime = 5000; try (final RaftClient client = cluster.createClient()) { // block append requests cluster.getServerAliveStream() - .filter(impl -> !impl.getInfo().isLeader()) + .filter(impl -> !impl.getInfo().isLeader() && !impl.getPeer().getId().equals(leader)) .map(SimpleStateMachine4Testing::get) .forEach(SimpleStateMachine4Testing::blockWriteStateMachineData); CompletableFuture replyFuture = client.async().send(new SimpleMessage("abc")); Thread.sleep(waitTime); // replyFuture should not be completed until append request is unblocked. - Assert.assertFalse(replyFuture.isDone()); + Assertions.assertFalse(replyFuture.isDone()); // unblock append request. cluster.getServerAliveStream() - .filter(impl -> !impl.getInfo().isLeader()) + .filter(impl -> !impl.getInfo().isLeader() && !impl.getPeer().getId().equals(leader)) .map(SimpleStateMachine4Testing::get) .forEach(SimpleStateMachine4Testing::unblockWriteStateMachineData); - Assert.assertTrue(replyFuture.get().isSuccess()); - Assert.assertTrue(System.currentTimeMillis() - time > waitTime); + Assertions.assertTrue(replyFuture.get().isSuccess()); + Assertions.assertTrue(System.currentTimeMillis() - time > waitTime); } //reset for the other tests @@ -430,7 +473,7 @@ void runTestCheckLeadershipFailure(CLUSTER cluster) throws Exception { cluster.getServerAliveStream() .filter(impl -> !impl.getInfo().isLeader()) .map(SimpleStateMachine4Testing::get) - .forEach(peer -> logSyncDelay.setDelayMs(peer.getId().toString(), 1000)); + .forEach(peer -> LOG_SYNC_DELAY.setDelayMs(peer.getId().toString(), 1000)); // trigger append entries request client.async().send(new SimpleMessage("abc")); @@ -442,11 +485,11 @@ void runTestCheckLeadershipFailure(CLUSTER cluster) throws Exception { // previous leader should not there. cluster.getServerAliveStream() .map(RaftServer.Division::getInfo) - .forEach(info -> Assert.assertTrue(!info.isLeader() || info.getCurrentTerm() > termOfPrevLeader)); + .forEach(info -> Assertions.assertTrue(!info.isLeader() || info.getCurrentTerm() > termOfPrevLeader)); } finally { // unblock append entries request - logSyncDelay.clear(); + LOG_SYNC_DELAY.clear(); } waitForLeader(cluster); @@ -455,7 +498,7 @@ void runTestCheckLeadershipFailure(CLUSTER cluster) throws Exception { LOG.info("Current Leader is elected on term {}", termOfCurrLeader); // leader on termOfPrevLeader should step-down. - Assert.assertTrue(termOfPrevLeader < termOfCurrLeader); + Assertions.assertTrue(termOfPrevLeader < termOfCurrLeader); } @Test @@ -468,10 +511,10 @@ public void testNoRetryWaitOnNotLeaderException() throws Exception { private void runTestNoRetryWaitOnNotLeaderException(MiniRaftCluster cluster) throws Exception { final RaftServer.Division leader = waitForLeader(cluster); final List followers = cluster.getFollowers(); - Assert.assertNotNull(followers); - Assert.assertEquals(2, followers.size()); - Assert.assertNotSame(leader, followers.get(0)); - Assert.assertNotSame(leader, followers.get(1)); + Assertions.assertNotNull(followers); + Assertions.assertEquals(2, followers.size()); + Assertions.assertNotSame(leader, followers.get(0)); + Assertions.assertNotSame(leader, followers.get(1)); // send a message to make sure that the leader is ready try (final RaftClient client = cluster.createClient(leader.getId())) { diff --git a/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java b/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java index 4ff9681f0a..c71b57e826 100644 --- a/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java @@ -17,6 +17,7 @@ */ package org.apache.ratis; +import org.apache.ratis.test.tag.Flaky; import org.apache.ratis.thirdparty.com.codahale.metrics.Gauge; import org.apache.ratis.RaftTestUtil.SimpleMessage; import org.apache.ratis.client.RaftClient; @@ -42,8 +43,10 @@ import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.Timestamp; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.Logger; import org.slf4j.event.Level; @@ -94,27 +97,27 @@ public void testBasicAppendEntriesKillLeader() throws Exception { } static CompletableFuture killAndRestartServer( - RaftPeerId id, long killSleepMs, long restartSleepMs, MiniRaftCluster cluster, Logger LOG) { + RaftPeerId id, long killSleepMs, long restartSleepMs, MiniRaftCluster cluster, Logger log) { final CompletableFuture future = new CompletableFuture<>(); new Thread(() -> { try { Thread.sleep(killSleepMs); cluster.killServer(id); Thread.sleep(restartSleepMs); - LOG.info("restart server: " + id); + log.info("restart server: " + id); cluster.restartServer(id, false); future.complete(null); } catch (Exception e) { - ExitUtils.terminate(-1, "Failed to kill/restart server: " + id, e, LOG); + ExitUtils.terminate(-1, "Failed to kill/restart server: " + id, e, log); } }).start(); return future; } static void runTestBasicAppendEntries( - boolean async, boolean killLeader, int numMessages, MiniRaftCluster cluster, Logger LOG) + boolean async, boolean killLeader, int numMessages, MiniRaftCluster cluster, Logger log) throws Exception { - LOG.info("runTestBasicAppendEntries: async? {}, killLeader={}, numMessages={}", + log.info("runTestBasicAppendEntries: async? {}, killLeader={}, numMessages={}", async, killLeader, numMessages); for (RaftServer s : cluster.getServers()) { cluster.restartServer(s.getId(), false); @@ -123,16 +126,16 @@ static void runTestBasicAppendEntries( final long term = leader.getInfo().getCurrentTerm(); final CompletableFuture killAndRestartFollower = killAndRestartServer( - cluster.getFollowers().get(0).getId(), 0, 1000, cluster, LOG); + cluster.getFollowers().get(0).getId(), 0, 1000, cluster, log); final CompletableFuture killAndRestartLeader; if (killLeader) { - LOG.info("killAndRestart leader " + leader.getId()); - killAndRestartLeader = killAndRestartServer(leader.getId(), 2000, 4000, cluster, LOG); + log.info("killAndRestart leader " + leader.getId()); + killAndRestartLeader = killAndRestartServer(leader.getId(), 2000, 4000, cluster, log); } else { killAndRestartLeader = CompletableFuture.completedFuture(null); } - LOG.info(cluster.printServers()); + log.info(cluster.printServers()); final SimpleMessage[] messages = SimpleMessage.create(numMessages); @@ -152,23 +155,23 @@ static void runTestBasicAppendEntries( }); } else { final RaftClientReply reply = client.io().send(message); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } if (async) { f.join(); - Assert.assertEquals(messages.length, asyncReplyCount.get()); + Assertions.assertEquals(messages.length, asyncReplyCount.get()); } } Thread.sleep(cluster.getTimeoutMax().toIntExact(TimeUnit.MILLISECONDS) + 100); - LOG.info(cluster.printAllLogs()); + log.info(cluster.printAllLogs()); killAndRestartFollower.join(); killAndRestartLeader.join(); final List divisions = cluster.getServerAliveStream().collect(Collectors.toList()); for(RaftServer.Division impl: divisions) { - RaftTestUtil.assertLogEntries(impl, term, messages, 50, LOG); + RaftTestUtil.assertLogEntries(impl, term, messages, 50, log); } } @@ -194,7 +197,7 @@ void runTestOldLeaderCommit(CLUSTER cluster) throws Exception { Thread.sleep(cluster.getTimeoutMax().toLong(TimeUnit.MILLISECONDS) + 100); for (RaftServer.Division followerToSendLog : followersToSendLog) { RaftLog followerLog = followerToSendLog.getRaftLog(); - Assert.assertTrue(RaftTestUtil.logEntriesContains(followerLog, messages)); + Assertions.assertTrue(RaftTestUtil.logEntriesContains(followerLog, messages)); } LOG.info(String.format("killing old leader: %s", leaderId.toString())); @@ -212,7 +215,7 @@ void runTestOldLeaderCommit(CLUSTER cluster) throws Exception { Set followersToSendLogIds = followersToSendLog.stream().map(f -> f.getId()).collect(Collectors.toSet()); - Assert.assertTrue(followersToSendLogIds.contains(newLeaderId)); + Assertions.assertTrue(followersToSendLogIds.contains(newLeaderId)); cluster.getServerAliveStream() .map(RaftServer.Division::getRaftLog) @@ -234,8 +237,8 @@ void runTestOldLeaderNotCommit(CLUSTER cluster) throws Exception { cluster.killServer(followers.get(i).getId()); } } catch (IndexOutOfBoundsException e) { - throw new org.junit.AssumptionViolatedException("The assumption is follower.size() = NUM_SERVERS - 1, " - + "actual NUM_SERVERS is " + NUM_SERVERS + ", and actual follower.size() is " + followers.size(), e); + Assumptions.abort("The assumption is follower.size() = NUM_SERVERS - 1, " + + "actual NUM_SERVERS is " + NUM_SERVERS + ", and actual follower.size() is " + followers.size()); } SimpleMessage[] messages = SimpleMessage.create(1); @@ -269,16 +272,16 @@ static class Client4TestWithLoad extends Thread { final AtomicReference exceptionInClientThread = new AtomicReference<>(); final MiniRaftCluster cluster; - final Logger LOG; + final Logger log; Client4TestWithLoad(int index, int numMessages, boolean useAsync, - MiniRaftCluster cluster, Logger LOG) { + MiniRaftCluster cluster, Logger log) { super("client-" + index); this.index = index; this.messages = SimpleMessage.create(numMessages, index + "-"); this.useAsync = useAsync; this.cluster = cluster; - this.LOG = LOG; + this.log = log; } boolean isRunning() { @@ -293,7 +296,7 @@ public void run() { if (!useAsync) { final RaftClientReply reply = client.io().send(messages[step.getAndIncrement()]); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } else { final CompletableFuture replyFuture = client.async().send(messages[i]); @@ -305,20 +308,20 @@ public void run() { if (step.incrementAndGet() == messages.length) { f.complete(null); } - Assert.assertTrue(r.isSuccess()); + Assertions.assertTrue(r.isSuccess()); }); } } if (useAsync) { f.join(); - Assert.assertTrue(step.get() == messages.length); + Assertions.assertEquals(step.get(), messages.length); } } catch(Exception t) { if (exceptionInClientThread.compareAndSet(null, t)) { - LOG.error(this + " failed", t); + log.error("{} failed", this, t); } else { exceptionInClientThread.get().addSuppressed(t); - LOG.error(this + " failed again!", t); + log.error("{} failed again!", this, t); } } finally { isRunning.set(false); @@ -337,47 +340,49 @@ public String toString() { } @Test + @Timeout(value = 300) public void testWithLoad() throws Exception { runWithNewCluster(NUM_SERVERS, cluster -> testWithLoad(10, 300, false, cluster, LOG)); } static void testWithLoad(final int numClients, final int numMessages, - boolean useAsync, MiniRaftCluster cluster, Logger LOG) throws Exception { - LOG.info("Running testWithLoad: numClients=" + numClients - + ", numMessages=" + numMessages + ", async=" + useAsync); + boolean useAsync, MiniRaftCluster cluster, Logger log) throws Exception { + log.info("Running testWithLoad: numClients={}, numMessages={}, async={}", + numClients, numMessages, useAsync); waitForLeader(cluster); final List clients = Stream.iterate(0, i -> i+1).limit(numClients) - .map(i -> new Client4TestWithLoad(i, numMessages, useAsync, cluster, LOG)) + .map(i -> new Client4TestWithLoad(i, numMessages, useAsync, cluster, log)) .collect(Collectors.toList()); final AtomicInteger lastStep = new AtomicInteger(); final Timer timer = new Timer(); timer.schedule(new TimerTask() { - private int previousLastStep = lastStep.get(); + private final AtomicInteger previousLastStep = new AtomicInteger(lastStep.get()); @Override public void run() { - LOG.info(cluster.printServers()); - LOG.info(BlockRequestHandlingInjection.getInstance().toString()); - LOG.info(cluster.toString()); - clients.forEach(c -> LOG.info(" " + c)); - JavaUtils.dumpAllThreads(s -> LOG.info(s)); - final int last = lastStep.get(); - if (last != previousLastStep) { - previousLastStep = last; + if (last != previousLastStep.get()) { + previousLastStep.set(last); } else { + // Only dump cluster/client state when no progress is detected to reduce log noise. + log.info(cluster.printServers()); + log.info(BlockRequestHandlingInjection.getInstance().toString()); + log.info(cluster.toString()); + clients.forEach(c -> log.info(" {}", c)); + JavaUtils.dumpAllThreads(s -> log.info(s)); + final RaftServer.Division leader = cluster.getLeader(); - LOG.info("NO PROGRESS at " + last + ", try to restart leader=" + leader); + log.info("NO PROGRESS at {}, try to restart leader={}", last, leader); if (leader != null) { try { cluster.restartServer(leader.getId(), false); - LOG.info("Restarted leader=" + leader); + log.info("Restarted leader={}", leader); } catch (IOException e) { - LOG.error("Failed to restart leader=" + leader); + log.error("Failed to restart leader={}", leader); } } } @@ -393,7 +398,7 @@ public void run() { } final int n = clients.stream().mapToInt(c -> c.step.get()).sum(); - Assert.assertTrue(n >= lastStep.get()); + Assertions.assertTrue(n >= lastStep.get()); if (n - lastStep.get() < 50 * numClients) { // Change leader at least 50 steps. Thread.sleep(10); @@ -408,10 +413,10 @@ public void run() { RaftTestUtil.changeLeader(cluster, leader.getId()); } } catch (IllegalStateException e) { - LOG.error("Failed to change leader ", e); + log.error("Failed to change leader ", e); } } - LOG.info("Leader change count=" + count); + log.info("Leader change count={}", count); timer.cancel(); for(Client4TestWithLoad c : clients) { @@ -422,7 +427,7 @@ public void run() { } } - public static void testRequestTimeout(boolean async, MiniRaftCluster cluster, Logger LOG) throws Exception { + public static void testRequestTimeout(boolean async, MiniRaftCluster cluster, Logger log) throws Exception { waitForLeader(cluster); final Timestamp startTime = Timestamp.currentTime(); try (final RaftClient client = cluster.createClient()) { @@ -447,10 +452,11 @@ public static void testRequestTimeout(boolean async, MiniRaftCluster cluster, Lo // The duration for which the client waits should be more than the retryCacheExpiryDuration. final TimeDuration duration = startTime.elapsedTime(); TimeDuration retryCacheExpiryDuration = RaftServerConfigKeys.RetryCache.expiryTime(cluster.getProperties()); - Assert.assertTrue(duration.compareTo(retryCacheExpiryDuration) >= 0); + Assertions.assertTrue(duration.compareTo(retryCacheExpiryDuration) >= 0); } } + @Flaky("RATIS-2262") @Test public void testStateMachineMetrics() throws Exception { runWithNewCluster(NUM_SERVERS, cluster -> runTestStateMachineMetrics(false, cluster)); @@ -458,7 +464,7 @@ public void testStateMachineMetrics() throws Exception { static void runTestStateMachineMetrics(boolean async, MiniRaftCluster cluster) throws Exception { RaftServer.Division leader = waitForLeader(cluster); - try (final RaftClient client = cluster.createClient()) { + try (final RaftClient client = cluster.createClient(leader.getId())) { Gauge appliedIndexGauge = getStatemachineGaugeWithName(leader, STATEMACHINE_APPLIED_INDEX_GAUGE); Gauge smAppliedIndexGauge = getStatemachineGaugeWithName(leader, @@ -479,10 +485,10 @@ static void runTestStateMachineMetrics(boolean async, MiniRaftCluster cluster) t long smAppliedIndexAfter = (Long) smAppliedIndexGauge.getValue(); checkFollowerCommitLagsLeader(cluster); - Assert.assertTrue("StateMachine Applied Index not incremented", - appliedIndexAfter > appliedIndexBefore); - Assert.assertTrue("StateMachine Apply completed Index not incremented", - smAppliedIndexAfter > smAppliedIndexBefore); + Assertions.assertTrue(appliedIndexAfter > appliedIndexBefore, + "StateMachine Applied Index not incremented"); + Assertions.assertTrue(smAppliedIndexAfter > smAppliedIndexBefore, + "StateMachine Apply completed Index not incremented"); } } @@ -495,12 +501,12 @@ private static void checkFollowerCommitLagsLeader(MiniRaftCluster cluster) { for (RaftServer.Division f : followers) { final RaftGroupMemberId follower = f.getMemberId(); Gauge followerCommitGauge = ServerMetricsTestUtils.getPeerCommitIndexGauge(leader, follower.getPeerId()); - Assert.assertTrue((Long)leaderCommitGauge.getValue() >= + Assertions.assertTrue((Long)leaderCommitGauge.getValue() >= (Long)followerCommitGauge.getValue()); Gauge followerMetric = ServerMetricsTestUtils.getPeerCommitIndexGauge(follower, follower.getPeerId()); System.out.println(followerCommitGauge.getValue()); System.out.println(followerMetric.getValue()); - Assert.assertTrue((Long)followerCommitGauge.getValue() <= (Long)followerMetric.getValue()); + Assertions.assertTrue((Long)followerCommitGauge.getValue() <= (Long)followerMetric.getValue()); } } @@ -511,7 +517,7 @@ private static Gauge getStatemachineGaugeWithName(RaftServer.Division server, St RATIS_STATEMACHINE_METRICS, RATIS_STATEMACHINE_METRICS_DESC); Optional metricRegistry = MetricRegistries.global().get(info); - Assert.assertTrue(metricRegistry.isPresent()); + Assertions.assertTrue(metricRegistry.isPresent()); return ServerMetricsTestUtils.getGaugeWithName(gaugeName, metricRegistry::get); } diff --git a/ratis-server/src/test/java/org/apache/ratis/RaftExceptionBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/RaftExceptionBaseTest.java index 9b6d811211..b06d6e904a 100644 --- a/ratis-server/src/test/java/org/apache/ratis/RaftExceptionBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/RaftExceptionBaseTest.java @@ -28,14 +28,15 @@ import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.server.impl.PeerChanges; import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.server.raftlog.RaftLogIOException; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.io.IOException; @@ -83,17 +84,17 @@ RaftClientReply assertNotLeaderException(RaftPeerId expectedSuggestedLeader, String messageId, RaftPeerId server, RaftClientRpc rpc, CLUSTER cluster) throws IOException { final SimpleMessage message = new SimpleMessage(messageId); final RaftClientReply reply = rpc.sendRequest(cluster.newRaftClientRequest(ClientId.randomId(), server, message)); - Assert.assertNotNull(reply); - Assume.assumeFalse(reply.isSuccess()); + Assertions.assertNotNull(reply); + Assumptions.assumeFalse(reply.isSuccess()); final NotLeaderException nle = reply.getNotLeaderException(); Objects.requireNonNull(nle); - Assert.assertEquals(expectedSuggestedLeader, nle.getSuggestedLeader().getId()); + Assertions.assertEquals(expectedSuggestedLeader, nle.getSuggestedLeader().getId()); return reply; } static void sendMessage(String message, RaftClient client) throws IOException { final RaftClientReply reply = client.io().send(new SimpleMessage(message)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } @Test @@ -109,13 +110,12 @@ void runTestNotLeaderExceptionWithReconf(CLUSTER cluster) throws Exception { final RaftPeerId newLeader = RaftTestUtil.changeLeader(cluster, oldLeader); // add two more peers - MiniRaftCluster.PeerChanges change = cluster.addNewPeers(new String[]{ - "ss1", "ss2"}, true, false); + PeerChanges change = cluster.addNewPeers(2, true); // trigger setConfiguration - LOG.info("Start changing the configuration: {}", Arrays.asList(change.allPeersInNewConf)); + LOG.info("Start changing the configuration: {}", change.getPeersInNewConf()); try (final RaftClient c2 = cluster.createClient(newLeader)) { - RaftClientReply reply = c2.admin().setConfiguration(change.allPeersInNewConf); - Assert.assertTrue(reply.isSuccess()); + RaftClientReply reply = c2.admin().setConfiguration(change.getPeersInNewConf()); + Assertions.assertTrue(reply.isSuccess()); } LOG.info(cluster.printServers()); @@ -127,9 +127,9 @@ void runTestNotLeaderExceptionWithReconf(CLUSTER cluster) throws Exception { final Collection peers = cluster.getPeers(); final Collection peersFromReply = reply.getNotLeaderException().getPeers(); - Assert.assertEquals(peers.size(), peersFromReply.size()); + Assertions.assertEquals(peers.size(), peersFromReply.size()); for (RaftPeer p : peersFromReply) { - Assert.assertTrue(peers.contains(p)); + Assertions.assertTrue(peers.contains(p)); } sendMessage("m2", client); @@ -143,10 +143,10 @@ public void testGroupMismatchException() throws Exception { void runTestGroupMismatchException(CLUSTER cluster) throws Exception { final RaftGroup clusterGroup = cluster.getGroup(); - Assert.assertEquals(NUM_PEERS, clusterGroup.getPeers().size()); + Assertions.assertEquals(NUM_PEERS, clusterGroup.getPeers().size()); final RaftGroup anotherGroup = RaftGroup.valueOf(RaftGroupId.randomId(), clusterGroup.getPeers()); - Assert.assertNotEquals(clusterGroup.getGroupId(), anotherGroup.getGroupId()); + Assertions.assertNotEquals(clusterGroup.getGroupId(), anotherGroup.getGroupId()); // Create client using another group try(RaftClient client = cluster.createClient(anotherGroup)) { diff --git a/ratis-server/src/test/java/org/apache/ratis/RaftTestUtil.java b/ratis-server/src/test/java/org/apache/ratis/RaftTestUtil.java index ec149f2f45..40a93df17f 100644 --- a/ratis-server/src/test/java/org/apache/ratis/RaftTestUtil.java +++ b/ratis-server/src/test/java/org/apache/ratis/RaftTestUtil.java @@ -25,6 +25,7 @@ import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; @@ -35,20 +36,21 @@ import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.server.raftlog.RaftLogBase; -import org.apache.ratis.thirdparty.com.google.common.base.Preconditions; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.AutoCloseableLock; import org.apache.ratis.util.CollectionUtils; import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.ProtoUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.AssumptionViolatedException; +import org.apache.ratis.util.function.CheckedConsumer; +import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.lang.ref.WeakReference; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Arrays; @@ -140,7 +142,7 @@ static RaftServer.Division waitForLeader(MiniRaftCluster cluster, RaftGroupId gr static RaftPeerId waitAndKillLeader(MiniRaftCluster cluster) throws InterruptedException { final RaftServer.Division leader = waitForLeader(cluster); - Assert.assertNotNull(leader); + Assertions.assertNotNull(leader); LOG.info("killing leader = " + leader); cluster.killServer(leader.getId()); @@ -149,8 +151,9 @@ static RaftPeerId waitAndKillLeader(MiniRaftCluster cluster) throws InterruptedE static void waitFor(Supplier check, int checkEveryMillis, int waitForMillis) throws TimeoutException, InterruptedException { - Preconditions.checkNotNull(check); - Preconditions.checkArgument(waitForMillis >= checkEveryMillis); + Objects.requireNonNull(check, "check == null"); + Preconditions.assertTrue(waitForMillis >= checkEveryMillis, + () -> "waitFor: " + waitForMillis + " < checkEvery: " + checkEveryMillis); long st = System.currentTimeMillis(); boolean result = check.get(); @@ -226,7 +229,7 @@ static void checkLogEntries(RaftLog log, SimpleMessage[] expectedMessages, e = log.get(termIndices[i].getIndex()); if (Arrays.equals(expectedMessages[j].getContent().toByteArray(), e.getStateMachineLogEntry().getLogData().toByteArray())) { - Assert.assertTrue(predicate.test(e)); + Assertions.assertTrue(predicate.test(e)); } } catch (IOException exception) { exception.printStackTrace(); @@ -326,7 +329,8 @@ static List getStateMachineLogEntries(RaftLog log, Consumer print) { + static Void assertLogEntries(RaftLog log, long expectedTerm, SimpleMessage[] expectedMessages, + Consumer print) { final List entries = getStateMachineLogEntries(log, print); try { assertLogEntries(entries, expectedTerm, expectedMessages); @@ -338,16 +342,16 @@ static Void assertLogEntries(RaftLog log, long expectedTerm, SimpleMessage[] exp static void assertLogEntries(List entries, long expectedTerm, SimpleMessage... expectedMessages) { long logIndex = 0; - Assert.assertEquals(expectedMessages.length, entries.size()); + Assertions.assertEquals(expectedMessages.length, entries.size()); for (int i = 0; i < expectedMessages.length; i++) { final LogEntryProto e = entries.get(i); - Assert.assertTrue(e.getTerm() >= expectedTerm); + Assertions.assertTrue(e.getTerm() >= expectedTerm); if (e.getTerm() > expectedTerm) { expectedTerm = e.getTerm(); } - Assert.assertTrue(e.getIndex() > logIndex); + Assertions.assertTrue(e.getIndex() > logIndex); logIndex = e.getIndex(); - Assert.assertEquals(expectedMessages[i].getContent(), e.getStateMachineLogEntry().getLogData()); + Assertions.assertEquals(expectedMessages[i].getContent(), e.getStateMachineLogEntry().getLogData()); } } @@ -405,8 +409,8 @@ public ByteString getContent() { } class SimpleOperation { - private static final ClientId clientId = ClientId.randomId(); - private static final AtomicLong callId = new AtomicLong(); + private static final ClientId CLIENT_ID = ClientId.randomId(); + private static final AtomicLong CALL_ID = new AtomicLong(); private final String op; private final StateMachineLogEntryProto smLogEntryProto; @@ -416,7 +420,7 @@ public SimpleOperation(String op) { } public SimpleOperation(String op, boolean hasStateMachineData) { - this(clientId, callId.incrementAndGet(), op, hasStateMachineData); + this(CLIENT_ID, CALL_ID.incrementAndGet(), op, hasStateMachineData); } private SimpleOperation(ClientId clientId, long callId, String op, boolean hasStateMachineData) { @@ -461,20 +465,29 @@ static void delay(IntSupplier getDelayMs) throws InterruptedException { } } + static List getPeersWithPriority(List peers, RaftPeer suggestedLeader) { + List peersWithPriority = new ArrayList<>(); + for (RaftPeer peer : peers) { + final int priority = peer.equals(suggestedLeader) ? 2 : 1; + peersWithPriority.add(RaftPeer.newBuilder(peer).setPriority(priority).build()); + } + return peersWithPriority; + } + static RaftPeerId changeLeader(MiniRaftCluster cluster, RaftPeerId oldLeader) throws Exception { - return changeLeader(cluster, oldLeader, AssumptionViolatedException::new); + return changeLeader(cluster, oldLeader, Assumptions::abort); } - static RaftPeerId changeLeader(MiniRaftCluster cluster, RaftPeerId oldLeader, Function constructor) - throws Exception { + static RaftPeerId changeLeader(MiniRaftCluster cluster, RaftPeerId oldLeader, + CheckedConsumer failToChangeLeaderHandler) throws Exception { final String name = JavaUtils.getCallerStackTraceElement().getMethodName() + "-changeLeader"; cluster.setBlockRequestsFrom(oldLeader.toString(), true); try { return JavaUtils.attemptRepeatedly(() -> { final RaftPeerId newLeader = waitForLeader(cluster).getId(); if (newLeader.equals(oldLeader)) { - throw constructor.apply("Failed to change leader: newLeader == oldLeader == " + oldLeader); + failToChangeLeaderHandler.accept("Failed to change leader: newLeader == oldLeader == " + oldLeader); } LOG.info("Changed leader from " + oldLeader + " to " + newLeader); return newLeader; @@ -511,6 +524,20 @@ static void blockQueueAndSetDelay(Iterable servers, Thread.sleep(3 * maxTimeout.toLong(TimeUnit.MILLISECONDS)); } + static void isolate(MiniRaftCluster cluster, RaftPeerId id) { + try { + BlockRequestHandlingInjection.getInstance().blockReplier(id.toString()); + cluster.setBlockRequestsFrom(id.toString(), true); + } catch (Exception e) { + e.printStackTrace(); + } + } + + static void deIsolate(MiniRaftCluster cluster, RaftPeerId id) { + BlockRequestHandlingInjection.getInstance().unblockReplier(id.toString()); + cluster.setBlockRequestsFrom(id.toString(), false); + } + static Thread sendMessageInNewThread(MiniRaftCluster cluster, RaftPeerId leaderId, SimpleMessage... messages) { Thread t = new Thread(() -> { try (final RaftClient client = cluster.createClient(leaderId)) { @@ -526,11 +553,11 @@ static Thread sendMessageInNewThread(MiniRaftCluster cluster, RaftPeerId leaderI } static void assertSameLog(RaftLog expected, RaftLog computed) throws Exception { - Assert.assertEquals(expected.getLastEntryTermIndex(), computed.getLastEntryTermIndex()); + Assertions.assertEquals(expected.getLastEntryTermIndex(), computed.getLastEntryTermIndex()); final long lastIndex = expected.getNextIndex() - 1; - Assert.assertEquals(expected.getLastEntryTermIndex().getIndex(), lastIndex); + Assertions.assertEquals(expected.getLastEntryTermIndex().getIndex(), lastIndex); for(long i = 0; i < lastIndex; i++) { - Assert.assertEquals(expected.get(i), computed.get(i)); + Assertions.assertEquals(expected.get(i), computed.get(i)); } } @@ -561,7 +588,21 @@ static void assertSuccessReply(CompletableFuture reply) throws } static void assertSuccessReply(RaftClientReply reply) { - Assert.assertNotNull("reply == null", reply); - Assert.assertTrue("reply is not success: " + reply, reply.isSuccess()); + Assertions.assertNotNull(reply, "reply == null"); + Assertions.assertTrue(reply.isSuccess(), "reply is not success: " + reply); + } + + static void gc() throws InterruptedException { + // use WeakReference to detect gc + Object obj = new Object(); + final WeakReference weakRef = new WeakReference<>(obj); + obj = null; + + // loop until gc has completed. + for (int i = 0; weakRef.get() != null; i++) { + LOG.info("gc {}", i); + System.gc(); + Thread.sleep(100); + } } } diff --git a/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestTests.java b/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestTests.java index eea75592ef..94e9433b15 100644 --- a/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestTests.java @@ -19,31 +19,29 @@ import org.apache.ratis.client.RaftClient; import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.RaftRetryFailureException; import org.apache.ratis.protocol.exceptions.ReadException; -import org.apache.ratis.protocol.exceptions.ReadIndexException; -import org.apache.ratis.retry.ExceptionDependentRetry; import org.apache.ratis.retry.RetryPolicies; import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.util.Slf4jUtils; -import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.nio.charset.StandardCharsets; -import java.util.List; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; public abstract class ReadOnlyRequestTests @@ -56,283 +54,129 @@ public abstract class ReadOnlyRequestTests static final int NUM_SERVERS = 3; - static final String INCREMENT = "INCREMENT"; - static final String WAIT_AND_INCREMENT = "WAIT_AND_INCREMENT"; - static final String QUERY = "QUERY"; - final Message incrementMessage = new RaftTestUtil.SimpleMessage(INCREMENT); - final Message waitAndIncrementMessage = new RaftTestUtil.SimpleMessage(WAIT_AND_INCREMENT); - final Message queryMessage = new RaftTestUtil.SimpleMessage(QUERY); + static final String INCREMENT_STRING = "INCREMENT"; + static final String WAIT_AND_INCREMENT_STRING = "WAIT_AND_INCREMENT"; + static final String QUERY_STRING = "QUERY"; - @Before + public static final Message INCREMENT = new RaftTestUtil.SimpleMessage(INCREMENT_STRING); + public static final Message WAIT_AND_INCREMENT = new RaftTestUtil.SimpleMessage(WAIT_AND_INCREMENT_STRING); + public static final Message QUERY = new RaftTestUtil.SimpleMessage(QUERY_STRING); + + @BeforeEach public void setup() { final RaftProperties p = getProperties(); - p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, - CounterStateMachine.class, StateMachine.class); + CounterStateMachine.setProperties(p); } - @Test - public void testLinearizableRead() throws Exception { - getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, RaftServerConfigKeys.Read.Option.LINEARIZABLE); - runWithNewCluster(NUM_SERVERS, this::testReadOnlyImpl); + public static void assertOption(RaftServerConfigKeys.Read.Option expected, RaftProperties properties) { + final RaftServerConfigKeys.Read.Option computed = RaftServerConfigKeys.Read.option(properties); + Assertions.assertEquals(expected, computed); } @Test - public void testLeaseRead() throws Exception { - getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, true); - runWithNewCluster(NUM_SERVERS, this::testReadOnlyImpl); + public void testReadOnly() throws Exception { + assertOption(RaftServerConfigKeys.Read.Option.DEFAULT, getProperties()); + runWithNewCluster(NUM_SERVERS, ReadOnlyRequestTests::runTestReadOnly); } - private void testReadOnlyImpl(CLUSTER cluster) throws Exception { + static void runTestReadOnly(C cluster) throws Exception { try { RaftTestUtil.waitForLeader(cluster); final RaftPeerId leaderId = cluster.getLeader().getId(); try (final RaftClient client = cluster.createClient(leaderId)) { for (int i = 1; i <= 10; i++) { - RaftClientReply reply = client.io().send(incrementMessage); - Assert.assertTrue(reply.isSuccess()); - reply = client.io().sendReadOnly(queryMessage); - Assert.assertEquals(i, retrieve(reply)); - } - } - } finally { - cluster.shutdown(); - } - } - - @Test - public void testLinearizableReadTimeout() throws Exception { - getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, RaftServerConfigKeys.Read.Option.LINEARIZABLE); - runWithNewCluster(NUM_SERVERS, this::testReadOnlyTimeoutImpl); - } - - @Test - public void testLeaseReadTimeout() throws Exception { - getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, true); - runWithNewCluster(NUM_SERVERS, this::testReadOnlyTimeoutImpl); - } - - private void testReadOnlyTimeoutImpl(CLUSTER cluster) throws Exception { - try { - RaftTestUtil.waitForLeader(cluster); - final RaftPeerId leaderId = cluster.getLeader().getId(); - - try (final RaftClient client = cluster.createClient(leaderId); - final RaftClient noRetry = cluster.createClient(leaderId, RetryPolicies.noRetry())) { - - CompletableFuture result = client.async().send(incrementMessage); - client.admin().transferLeadership(null, 200); - - Assert.assertThrows(ReadIndexException.class, () -> { - RaftClientReply timeoutReply = noRetry.io().sendReadOnly(queryMessage); - Assert.assertNotNull(timeoutReply.getException()); - Assert.assertTrue(timeoutReply.getException() instanceof ReadException); - }); - } - - } finally { - cluster.shutdown(); - } - } - - @Test - public void testFollowerLinearizableRead() throws Exception { - getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, RaftServerConfigKeys.Read.Option.LINEARIZABLE); - runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyImpl); - } - - @Test - public void testFollowerLeaseRead() throws Exception { - getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, true); - runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyImpl); - } - - private void testFollowerReadOnlyImpl(CLUSTER cluster) throws Exception { - try { - RaftTestUtil.waitForLeader(cluster); - - List followers = cluster.getFollowers(); - Assert.assertEquals(2, followers.size()); - - final RaftPeerId f0 = followers.get(0).getId(); - final RaftPeerId f1 = followers.get(1).getId(); - try (RaftClient client = cluster.createClient(cluster.getLeader().getId())) { - for (int i = 1; i <= 10; i++) { - final RaftClientReply reply = client.io().send(incrementMessage); - Assert.assertTrue(reply.isSuccess()); - final RaftClientReply read1 = client.io().sendReadOnly(queryMessage, f0); - Assert.assertEquals(i, retrieve(read1)); - final CompletableFuture read2 = client.async().sendReadOnly(queryMessage, f1); - Assert.assertEquals(i, retrieve(read2.get(1, TimeUnit.SECONDS))); + assertReplyExact(i, client.io().send(INCREMENT)); + assertReplyExact(i, client.io().sendReadOnly(QUERY)); } } } finally { cluster.shutdown(); } } - - @Test - public void testFollowerLinearizableReadParallel() throws Exception { - getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, RaftServerConfigKeys.Read.Option.LINEARIZABLE); - runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyParallelImpl); - } - - @Test - public void testFollowerLeaseReadParallel() throws Exception { - getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, true); - runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyParallelImpl); - } - - private void testFollowerReadOnlyParallelImpl(CLUSTER cluster) throws Exception { - try { - RaftTestUtil.waitForLeader(cluster); - - List followers = cluster.getFollowers(); - Assert.assertEquals(2, followers.size()); - - try (RaftClient leaderClient = cluster.createClient(cluster.getLeader().getId()); - RaftClient followerClient1 = cluster.createClient(followers.get(0).getId())) { - - leaderClient.io().send(incrementMessage); - leaderClient.async().send(waitAndIncrementMessage); - Thread.sleep(100); - - RaftClientReply clientReply = followerClient1.io().sendReadOnly(queryMessage, followers.get(0).getId()); - Assert.assertEquals(2, retrieve(clientReply)); - } - - } finally { - cluster.shutdown(); - } - } - - @Test - public void testFollowerLinearizableReadFailWhenLeaderDown() throws Exception { - getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, RaftServerConfigKeys.Read.Option.LINEARIZABLE); - runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyFailWhenLeaderDownImpl); - } - @Test - public void testFollowerLeaseReadWhenLeaderDown() throws Exception { - getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, true); - runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyFailWhenLeaderDownImpl); + public void testReadTimeout() throws Exception { + runWithNewCluster(NUM_SERVERS, cluster -> runTestReadTimeout(RaftRetryFailureException.class, cluster)); } - private void testFollowerReadOnlyFailWhenLeaderDownImpl(CLUSTER cluster) throws Exception { - try { - RaftTestUtil.waitForLeader(cluster); - - List followers = cluster.getFollowers(); - Assert.assertEquals(2, followers.size()); - - try (RaftClient leaderClient = cluster.createClient(cluster.getLeader().getId()); - RaftClient followerClient1 = cluster.createClient(followers.get(0).getId(), RetryPolicies.noRetry())) { - leaderClient.io().send(incrementMessage); + static void runTestReadTimeout(Class exceptionClass, C cluster) + throws Exception { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); - RaftClientReply clientReply = followerClient1.io().sendReadOnly(queryMessage); - Assert.assertEquals(1, retrieve(clientReply)); + try (final RaftClient client = cluster.createClient(leaderId); + final RaftClient noRetry = cluster.createClient(leaderId, RetryPolicies.noRetry())) { - // kill the leader - // read timeout quicker than election timeout - leaderClient.admin().transferLeadership(null, 200); - - Assert.assertThrows(ReadIndexException.class, () -> { - followerClient1.io().sendReadOnly(queryMessage, followers.get(0).getId()); - }); - } + assertReplyExact(1, client.io().send(INCREMENT)); + client.admin().transferLeadership(null, 200); - } finally { - cluster.shutdown(); + Assertions.assertThrows(exceptionClass, () -> { + final RaftClientReply timeoutReply = noRetry.io().sendReadOnly(QUERY); + Assertions.assertFalse(timeoutReply.isSuccess()); + Assertions.assertNotNull(timeoutReply.getException()); + Assertions.assertInstanceOf(ReadException.class, timeoutReply.getException()); + }); } } @Test - public void testFollowerReadOnlyRetryWhenLeaderDown() throws Exception { - getProperties().setEnum(RaftServerConfigKeys.Read.OPTION_KEY, RaftServerConfigKeys.Read.Option.LINEARIZABLE); - runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyRetryWhenLeaderDown); + public void testReadOnlyRetryWhenLeaderDown() throws Exception { + runWithNewCluster(NUM_SERVERS, cluster -> runTestReadOnlyRetryWhenLeaderDown(null, cluster)); } - @Test - public void testFollowerLeaseReadRetryWhenLeaderDown() throws Exception { - getProperties().setBoolean(RaftServerConfigKeys.Read.LEADER_LEASE_ENABLED_KEY, true); - runWithNewCluster(NUM_SERVERS, this::testFollowerReadOnlyRetryWhenLeaderDown); - } - - private void testFollowerReadOnlyRetryWhenLeaderDown(CLUSTER cluster) throws Exception { - // only retry on readIndexException - final RetryPolicy retryPolicy = ExceptionDependentRetry - .newBuilder() - .setDefaultPolicy(RetryPolicies.noRetry()) - .setExceptionToPolicy(ReadIndexException.class, - RetryPolicies.retryForeverWithSleep(TimeDuration.valueOf(500, TimeUnit.MILLISECONDS))) - .build(); + static void runTestReadOnlyRetryWhenLeaderDown(RetryPolicy retryPolicy, C cluster) + throws Exception { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); - RaftTestUtil.waitForLeader(cluster); - - try (RaftClient client = cluster.createClient(cluster.getLeader().getId(), retryPolicy)) { - client.io().send(incrementMessage); - - final RaftClientReply clientReply = client.io().sendReadOnly(queryMessage); - Assert.assertEquals(1, retrieve(clientReply)); + try (RaftClient client = cluster.createClient(leaderId, retryPolicy)) { + assertReplyExact(1, client.io().send(INCREMENT)); + assertReplyExact(1, client.io().sendReadOnly(QUERY)); // kill the leader client.admin().transferLeadership(null, 200); // readOnly will success after re-election - final RaftClientReply replySuccess = client.io().sendReadOnly(queryMessage); - Assert.assertEquals(1, retrieve(clientReply)); + assertReplyExact(1, client.io().sendReadOnly(QUERY)); } } - @Test - public void testReadAfterWrite() throws Exception { - runWithNewCluster(NUM_SERVERS, this::testReadAfterWriteImpl); + static int retrieve(RaftClientReply reply) { + Assertions.assertTrue(reply.isSuccess()); + return Integer.parseInt(reply.getMessage().getContent().toString(StandardCharsets.UTF_8)); } - private void testReadAfterWriteImpl(CLUSTER cluster) throws Exception { - RaftTestUtil.waitForLeader(cluster); - try (RaftClient client = cluster.createClient()) { - // test blocking read-after-write - client.io().send(incrementMessage); - final RaftClientReply blockReply = client.io().sendReadAfterWrite(queryMessage); - Assert.assertEquals(1, retrieve(blockReply)); - - // test asynchronous read-after-write - client.async().send(incrementMessage); - client.async().sendReadAfterWrite(queryMessage).thenAccept(reply -> { - Assert.assertEquals(2, retrieve(reply)); - }); - - for (int i = 0; i < 20; i++) { - client.async().send(incrementMessage); - } - final CompletableFuture linearizable = client.async().sendReadOnly(queryMessage); - final CompletableFuture readAfterWrite = client.async().sendReadAfterWrite(queryMessage); - - CompletableFuture.allOf(linearizable, readAfterWrite).get(); - // read-after-write is more consistent than linearizable read - Assert.assertTrue(retrieve(readAfterWrite.get()) >= retrieve(linearizable.get())); - } + public static void assertReplyExact(int expectedCount, RaftClientReply reply) { + Assertions.assertTrue(reply.isSuccess()); + final int retrieved = retrieve(reply); + Assertions.assertEquals(expectedCount, retrieved, () -> "reply=" + reply); } - static int retrieve(RaftClientReply reply) { - return Integer.parseInt(reply.getMessage().getContent().toString(StandardCharsets.UTF_8)); + static void assertReplyAtLeast(int minCount, RaftClientReply reply) { + Assertions.assertTrue(reply.isSuccess()); + final int retrieved = retrieve(reply); + Assertions.assertTrue(retrieved >= minCount, + () -> "retrieved = " + retrieved + " < minCount = " + minCount + ", reply=" + reply); } - /** * CounterStateMachine support 3 operations * 1. increment * 2. get * 3. waitAndIncrement */ - static class CounterStateMachine extends BaseStateMachine { + public static class CounterStateMachine extends BaseStateMachine { + static void setProperties(RaftProperties properties) { + properties.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, CounterStateMachine.class, StateMachine.class); + } + private final AtomicLong counter = new AtomicLong(0L); @Override public CompletableFuture query(Message request) { - return CompletableFuture.completedFuture( - Message.valueOf(String.valueOf(counter.get()))); + return toMessageFuture(counter.get()); + } + + static CompletableFuture toMessageFuture(long count) { + return CompletableFuture.completedFuture(Message.valueOf(String.valueOf(count))); } @Override @@ -349,39 +193,45 @@ private void sleepQuietly(int millis) { } } - private void increment() { - counter.incrementAndGet(); + public long getCount() { + return counter.get(); + } + + private long increment() { + return counter.incrementAndGet(); } - private void waitAndIncrement() { + private long waitAndIncrement() { sleepQuietly(500); - increment(); + return increment(); } - private void timeoutIncrement() { + private long timeoutIncrement() { sleepQuietly(5000); - increment(); + return increment(); } @Override public CompletableFuture applyTransaction(TransactionContext trx) { - LOG.debug("apply trx with index=" + trx.getLogEntry().getIndex()); - updateLastAppliedTermIndex(trx.getLogEntry().getTerm(), trx.getLogEntry().getIndex()); - - String command = trx.getLogEntry().getStateMachineLogEntry() - .getLogData().toString(StandardCharsets.UTF_8); - - LOG.info("receive command: {}", command); - if (command.equals(INCREMENT)) { - increment(); - } else if (command.equals(WAIT_AND_INCREMENT)) { - waitAndIncrement(); + final LogEntryProto logEntry = trx.getLogEntry(); + final TermIndex ti = TermIndex.valueOf(logEntry); + updateLastAppliedTermIndex(ti); + LOG.info("{}: updateLastAppliedTermIndex {}", getId(), ti); + + final String command = logEntry.getStateMachineLogEntry().getLogData().toString(StandardCharsets.UTF_8); + + final long updatedCount; + if (command.equals(INCREMENT_STRING)) { + updatedCount = increment(); + } else if (command.equals(WAIT_AND_INCREMENT_STRING)) { + updatedCount = waitAndIncrement(); } else { - timeoutIncrement(); + updatedCount = timeoutIncrement(); } + LOG.info("{}: Applied {} command {}, updatedCount={}", getId(), ti, command, updatedCount); - return CompletableFuture.completedFuture(Message.valueOf("OK")); + return toMessageFuture(updatedCount); } } } diff --git a/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestWithLongTimeoutTests.java b/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestWithLongTimeoutTests.java index 14aa1bca26..4a6498f939 100644 --- a/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestWithLongTimeoutTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/ReadOnlyRequestWithLongTimeoutTests.java @@ -32,9 +32,9 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.util.concurrent.CompletableFuture; @@ -59,7 +59,7 @@ public abstract class ReadOnlyRequestWithLongTimeoutTests asyncTimeoutReply = client.async().send(timeoutMessage); Thread.sleep(100); - Assert.assertThrows(ReadException.class, () -> { + Assertions.assertThrows(ReadException.class, () -> { final RaftClientReply timeoutReply = client.io().sendReadOnly(queryMessage); - Assert.assertTrue(timeoutReply.getException().getCause() instanceof TimeoutIOException); + Assertions.assertTrue(timeoutReply.getException().getCause() instanceof TimeoutIOException); }); asyncTimeoutReply.join(); diff --git a/ratis-server/src/test/java/org/apache/ratis/RequestLimitAsyncBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/RequestLimitAsyncBaseTest.java index 19a46a0652..ce339faff9 100644 --- a/ratis-server/src/test/java/org/apache/ratis/RequestLimitAsyncBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/RequestLimitAsyncBaseTest.java @@ -31,8 +31,8 @@ import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.util.ArrayList; @@ -74,7 +74,7 @@ void runTestWriteElementLimit(CLUSTER cluster) throws Exception { final SimpleMessage message = new SimpleMessage("first"); final CompletableFuture future = c1.async().send(message); final RaftClientReply reply = getWithDefaultTimeout(future); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } // collecting futures returned from StateMachine.applyTransaction @@ -118,7 +118,7 @@ void runTestWriteElementLimit(CLUSTER cluster) throws Exception { // check replies for(CompletableFuture f : writeFutures) { final RaftClientReply reply = getWithDefaultTimeout(f); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } } diff --git a/ratis-server/src/test/java/org/apache/ratis/RetryCacheTests.java b/ratis-server/src/test/java/org/apache/ratis/RetryCacheTests.java index 18561ee65c..0674c3ed94 100644 --- a/ratis-server/src/test/java/org/apache/ratis/RetryCacheTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/RetryCacheTests.java @@ -18,7 +18,7 @@ package org.apache.ratis; import org.apache.ratis.server.impl.MiniRaftCluster; -import org.apache.ratis.server.impl.MiniRaftCluster.PeerChanges; +import org.apache.ratis.server.impl.PeerChanges; import org.apache.ratis.RaftTestUtil.SimpleMessage; import org.apache.ratis.client.RaftClient; import org.apache.ratis.client.RaftClientRpc; @@ -35,15 +35,14 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; -import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.concurrent.TimeUnit; -import static java.util.Arrays.asList; - public abstract class RetryCacheTests extends BaseTest implements MiniRaftCluster.Factory.Get { @@ -84,12 +83,13 @@ void runTestBasicRetry(CLUSTER cluster) throws Exception { } public static void assertReply(RaftClientReply reply, RaftClient client, long callId) { - Assert.assertEquals(client.getId(), reply.getClientId()); - Assert.assertEquals(callId, reply.getCallId()); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertEquals(client.getId(), reply.getClientId()); + Assertions.assertEquals(callId, reply.getCallId()); + Assertions.assertTrue(reply.isSuccess()); } - public void assertServer(MiniRaftCluster cluster, ClientId clientId, long callId, long oldLastApplied) throws Exception { + public void assertServer(MiniRaftCluster cluster, ClientId clientId, long callId, long oldLastApplied) + throws Exception { final long leaderApplied = cluster.getLeader().getInfo().getLastAppliedIndex(); // make sure retry cache has the entry for (RaftServer.Division server : cluster.iterateDivisions()) { @@ -97,10 +97,10 @@ public void assertServer(MiniRaftCluster cluster, ClientId clientId, long callId if (server.getInfo().getLastAppliedIndex() < leaderApplied) { Thread.sleep(1000); } - Assert.assertEquals(2, server.getRetryCache().getStatistics().size()); - Assert.assertNotNull(RetryCacheTestUtil.get(server, clientId, callId)); + Assertions.assertEquals(2, server.getRetryCache().getStatistics().size()); + Assertions.assertNotNull(RetryCacheTestUtil.get(server, clientId, callId)); // make sure there is only one log entry committed - Assert.assertEquals(1, count(server.getRaftLog(), oldLastApplied + 1)); + Assertions.assertEquals(1, count(server.getRaftLog(), oldLastApplied + 1)); } } @@ -136,23 +136,21 @@ void runTestRetryOnNewLeader(CLUSTER cluster) throws Exception { final long oldLastApplied = cluster.getLeader().getInfo().getLastAppliedIndex(); // trigger the reconfiguration, make sure the original leader is kicked out - PeerChanges change = cluster.addNewPeers(2, true); - RaftPeer[] allPeers = cluster.removePeers(2, true, - asList(change.newPeers)).allPeersInNewConf; + final PeerChanges change = cluster.removePeers(2, true, Collections.emptyList()); + final List allPeers = change.getPeersInNewConf(); // trigger setConfiguration - RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(allPeers), - peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); + RaftServerTestUtil.runWithMinorityPeers(cluster, allPeers, cluster::setConfiguration); final RaftPeerId newLeaderId = JavaUtils.attemptRepeatedly(() -> { final RaftPeerId id = RaftTestUtil.waitForLeader(cluster).getId(); - Assert.assertNotEquals(leaderId, id); + Assertions.assertNotEquals(leaderId, id); return id; }, 10, TimeDuration.valueOf(100, TimeUnit.MILLISECONDS), "wait for a leader different than " + leaderId, LOG); - Assert.assertNotEquals(leaderId, newLeaderId); + Assertions.assertNotEquals(leaderId, newLeaderId); // same clientId and callId in the request r = cluster.newRaftClientRequest(client.getId(), newLeaderId, callId, new SimpleMessage("message")); - rpc.addRaftPeers(Arrays.asList(change.newPeers)); + rpc.addRaftPeers(change.getAddedPeers()); for (int i = 0; i < 10; i++) { try { assertReply(rpc.sendRequest(r), client, callId); @@ -164,7 +162,7 @@ void runTestRetryOnNewLeader(CLUSTER cluster) throws Exception { } // check the new leader and make sure the retry did not get committed - Assert.assertEquals(0, count(cluster.getLeader().getRaftLog(), oldLastApplied + 1)); + Assertions.assertEquals(0, count(cluster.getLeader().getRaftLog(), oldLastApplied + 1)); } } } diff --git a/ratis-server/src/test/java/org/apache/ratis/TestReConfigProperty.java b/ratis-server/src/test/java/org/apache/ratis/TestReConfigProperty.java index 4535406a77..7d1aec901f 100644 --- a/ratis-server/src/test/java/org/apache/ratis/TestReConfigProperty.java +++ b/ratis-server/src/test/java/org/apache/ratis/TestReConfigProperty.java @@ -27,9 +27,10 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.event.Level; import java.io.IOException; @@ -59,7 +60,7 @@ public abstract class TestReConfigProperty exte private static final String VAL2 = "val2"; private static final String DEFAULT = "default"; - @Before + @BeforeEach public void setup () { conf1 = new RaftProperties(); conf2 = new RaftProperties(); @@ -81,8 +82,7 @@ public void testGetChangedProperty() { Collection changes = ReconfigurationBase.getChangedProperties(conf2, conf1); - Assert.assertTrue("expected 3 changed properties but got " + changes.size(), - changes.size() == 3); + Assertions.assertEquals(3, changes.size(), "Unexpected changed size"); boolean changeFound = false; boolean unsetFound = false; @@ -100,8 +100,7 @@ public void testGetChangedProperty() { setFound = true; } } - Assert.assertTrue("not all changes have been applied", - changeFound && unsetFound && setFound); + Assertions.assertTrue(changeFound && unsetFound && setFound, "not all changes have been applied"); } /** @@ -157,161 +156,133 @@ public void run() { public void testReconfigure() { ReconfigurableDummy dummy = new ReconfigurableDummy(conf1); - Assert.assertEquals(PROP1 + " set to wrong value ", VAL1, dummy.getProperties().get(PROP1)); - Assert.assertEquals(PROP2 + " set to wrong value ", VAL1, dummy.getProperties().get(PROP2)); - Assert.assertEquals(PROP3 + " set to wrong value ", VAL1, dummy.getProperties().get(PROP3)); - Assert.assertNull(PROP4 + " set to wrong value ", dummy.getProperties().get(PROP4)); - Assert.assertNull(PROP5 + " set to wrong value ", dummy.getProperties().get(PROP5)); - - Assert.assertTrue(PROP1 + " should be reconfigurable ", - dummy.isPropertyReconfigurable(PROP1)); - Assert.assertTrue(PROP2 + " should be reconfigurable ", - dummy.isPropertyReconfigurable(PROP2)); - Assert.assertFalse(PROP3 + " should not be reconfigurable ", - dummy.isPropertyReconfigurable(PROP3)); - Assert.assertTrue(PROP4 + " should be reconfigurable ", - dummy.isPropertyReconfigurable(PROP4)); - Assert.assertFalse(PROP5 + " should not be reconfigurable ", - dummy.isPropertyReconfigurable(PROP5)); + Assertions.assertEquals(VAL1, dummy.getProperties().get(PROP1), PROP1 + " set to wrong value "); + Assertions.assertEquals(VAL1, dummy.getProperties().get(PROP2), PROP2 + " set to wrong value "); + Assertions.assertEquals(VAL1, dummy.getProperties().get(PROP3), PROP3 + " set to wrong value "); + Assertions.assertNull(dummy.getProperties().get(PROP4), PROP4 + " set to wrong value "); + Assertions.assertNull(dummy.getProperties().get(PROP5), PROP5 + " set to wrong value "); + + Assertions.assertTrue(dummy.isPropertyReconfigurable(PROP1), PROP1 + " should be reconfigurable "); + Assertions.assertTrue(dummy.isPropertyReconfigurable(PROP2), PROP2 + " should be reconfigurable "); + Assertions.assertFalse(dummy.isPropertyReconfigurable(PROP3), PROP3 + " should not be reconfigurable "); + Assertions.assertTrue(dummy.isPropertyReconfigurable(PROP4), PROP4 + " should be reconfigurable "); + Assertions.assertFalse(dummy.isPropertyReconfigurable(PROP5), PROP5 + " should not be reconfigurable "); // change something to the same value as before { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP1, VAL1); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertEquals(PROP1 + " set to wrong value ", VAL1, dummy.getProperties().get(PROP1)); + Assertions.assertEquals(VAL1, dummy.getProperties().get(PROP1), PROP1 + " set to wrong value "); } catch (ReconfigurationException | IOException | TimeoutException | InterruptedException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertFalse("received unexpected exception", - exceptionCaught); } // change something to null { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP1, null); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertEquals(PROP1 + "set to wrong value ", DEFAULT, - dummy.getProperties().get(PROP1)); + Assertions.assertEquals(DEFAULT +, dummy.getProperties().get(PROP1), PROP1 + "set to wrong value "); } catch (ReconfigurationException | IOException | InterruptedException | TimeoutException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertFalse("received unexpected exception", - exceptionCaught); } // change something to a different value than before { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP1, VAL2); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertEquals(PROP1 + "set to wrong value ", VAL2, dummy.getProperties().get(PROP1)); + Assertions.assertEquals(VAL2, dummy.getProperties().get(PROP1), PROP1 + "set to wrong value "); } catch (ReconfigurationException | IOException | InterruptedException | TimeoutException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertFalse("received unexpected exception", - exceptionCaught); } // set unset property to null { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP4, null); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertSame(PROP4 + "set to wrong value ", DEFAULT, dummy.getProperties().get(PROP4)); + Assertions.assertSame(DEFAULT, dummy.getProperties().get(PROP4), PROP4 + "set to wrong value "); } catch (ReconfigurationException | IOException | InterruptedException | TimeoutException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertFalse("received unexpected exception", - exceptionCaught); } // set unset property { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP4, VAL1); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertEquals(PROP4 + "set to wrong value ", VAL1, dummy.getProperties().get(PROP4)); + Assertions.assertEquals(VAL1, dummy.getProperties().get(PROP4), PROP4 + "set to wrong value "); } catch (ReconfigurationException | IOException | InterruptedException | TimeoutException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertFalse("received unexpected exception", - exceptionCaught); } // try to set unset property to null (not reconfigurable) { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP5, null); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); } catch (ReconfigurationException | IOException | InterruptedException | TimeoutException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertTrue("did not receive expected exception", - dummy.getReconfigurationStatus().getChanges() + Assertions.assertTrue(dummy.getReconfigurationStatus().getChanges() .get(new PropertyChange(PROP5, DEFAULT, null)) - .getMessage().contains("Property is not reconfigurable.") && !exceptionCaught); + .getMessage().contains("Property is not reconfigurable."), "did not receive expected exception"); } // try to set unset property to value (not reconfigurable) { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP5, VAL1); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); } catch (ReconfigurationException | IOException | InterruptedException | TimeoutException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertTrue("did not receive expected exception", - dummy.getReconfigurationStatus().getChanges() + Assertions.assertTrue(dummy.getReconfigurationStatus().getChanges() .get(new PropertyChange(PROP5, VAL1, null)) - .getMessage().contains("Property is not reconfigurable.") && !exceptionCaught); + .getMessage().contains("Property is not reconfigurable."), "did not receive expected exception"); } // try to change property to value (not reconfigurable) { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP3, VAL2); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); } catch (ReconfigurationException | IOException | InterruptedException | TimeoutException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertTrue("did not receive expected exception", - dummy.getReconfigurationStatus().getChanges() + Assertions.assertTrue(dummy.getReconfigurationStatus().getChanges() .get(new PropertyChange(PROP3, VAL2, VAL1)) - .getMessage().contains("Property is not reconfigurable.") && !exceptionCaught); + .getMessage().contains("Property is not reconfigurable."), "did not receive expected exception"); } // try to change property to null (not reconfigurable) { - boolean exceptionCaught = false; try { dummy.reconfigureProperty(PROP3, null); dummy.startReconfiguration(); RaftTestUtil.waitFor(() -> dummy.getReconfigurationStatus().ended(), 100, 60000); } catch (ReconfigurationException | IOException | InterruptedException | TimeoutException e) { - exceptionCaught = true; + Assertions.fail("Unexpected exception", e); } - Assert.assertTrue("did not receive expected exception", - dummy.getReconfigurationStatus().getChanges() + Assertions.assertTrue(dummy.getReconfigurationStatus().getChanges() .get(new PropertyChange(PROP3, DEFAULT, VAL1)) - .getMessage().contains("Property is not reconfigurable.") && !exceptionCaught); + .getMessage().contains("Property is not reconfigurable."), "did not receive expected exception"); } } @@ -321,7 +292,7 @@ public void testReconfigure() { @Test public void testThread() throws ReconfigurationException, IOException { ReconfigurableDummy dummy = new ReconfigurableDummy(conf1); - Assert.assertEquals(VAL1, dummy.getProperties().get(PROP1)); + Assertions.assertEquals(VAL1, dummy.getProperties().get(PROP1)); Thread dummyThread = new Thread(dummy); dummyThread.start(); try { @@ -341,25 +312,23 @@ public void testThread() throws ReconfigurationException, IOException { } } - Assert.assertFalse("dummy thread should not be alive", - dummyThread.isAlive()); + Assertions.assertFalse(dummyThread.isAlive(), "dummy thread should not be alive"); dummy.running = false; try { dummyThread.join(); } catch (InterruptedException ignore) { // do nothing } - Assert.assertTrue(PROP1 + " is set to wrong value", - dummy.getProperties().get(PROP1).equals(VAL2)); + Assertions.assertEquals(VAL2, dummy.getProperties().get(PROP1), PROP1 + " is set to wrong value"); } /** * Ensure that {@link ReconfigurationBase#reconfigureProperty} updates the * parent's cached configuration on success. - * @throws IOException */ - @Test (timeout=300000) + @Test + @Timeout(value = 300) public void testConfIsUpdatedOnSuccess() throws ReconfigurationException, IOException, InterruptedException, TimeoutException { final String property = "FOO"; @@ -377,15 +346,15 @@ public void testConfIsUpdatedOnSuccess() reconfigurable.reconfigureProperty(property, value2); reconfigurable.startReconfiguration(); RaftTestUtil.waitFor(() -> reconfigurable.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertEquals(value2, reconfigurable.getProperties().get(property)); + Assertions.assertEquals(value2, reconfigurable.getProperties().get(property)); } /** * Ensure that {@link ReconfigurationBase#startReconfiguration} updates * its parent's cached configuration on success. - * @throws IOException */ - @Test (timeout=300000) + @Test + @Timeout(value = 300) public void testConfIsUpdatedOnSuccessAsync() throws InterruptedException, IOException, TimeoutException { final String property = "FOO"; @@ -404,7 +373,7 @@ public void testConfIsUpdatedOnSuccessAsync() reconfigurable.startReconfiguration(); RaftTestUtil.waitFor(() -> reconfigurable.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertEquals(value2, reconfigurable.getProperties().get(property)); + Assertions.assertEquals(value2, reconfigurable.getProperties().get(property)); } /** @@ -412,7 +381,8 @@ public void testConfIsUpdatedOnSuccessAsync() * property in its parent's configuration when the new value is null. * @throws IOException */ - @Test (timeout=300000) + @Test + @Timeout(value = 300) public void testConfIsUnset() throws InterruptedException, TimeoutException, IOException { final String property = "FOO"; @@ -427,15 +397,15 @@ public void testConfIsUnset() reconfigurable.startReconfiguration(); RaftTestUtil.waitFor(() -> reconfigurable.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertNull(reconfigurable.getProperties().get(property)); + Assertions.assertNull(reconfigurable.getProperties().get(property)); } /** * Ensure that {@link ReconfigurationBase#startReconfiguration} unsets the * property in its parent's configuration when the new value is null. - * @throws IOException */ - @Test (timeout=300000) + @Test + @Timeout(value = 300) public void testConfIsUnsetAsync() throws ReconfigurationException, IOException, TimeoutException, InterruptedException { final String property = "FOO"; @@ -451,7 +421,7 @@ public void testConfIsUnsetAsync() throws ReconfigurationException, // Kick off a reconfiguration task and wait until it completes. reconfigurable.startReconfiguration(); RaftTestUtil.waitFor(() -> reconfigurable.getReconfigurationStatus().ended(), 100, 60000); - Assert.assertNull(reconfigurable.getProperties().get(property)); + Assertions.assertNull(reconfigurable.getProperties().get(property)); } private ReconfigurationBase makeReconfigurable( diff --git a/ratis-server/src/test/java/org/apache/ratis/WatchRequestTests.java b/ratis-server/src/test/java/org/apache/ratis/WatchRequestTests.java index a9bdd1a3a9..a2c0cd2e10 100644 --- a/ratis-server/src/test/java/org/apache/ratis/WatchRequestTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/WatchRequestTests.java @@ -42,24 +42,22 @@ import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedConsumer; import org.apache.ratis.util.function.CheckedSupplier; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.event.Level; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; public abstract class WatchRequestTests extends BaseTest @@ -72,7 +70,7 @@ public abstract class WatchRequestTests static final int NUM_SERVERS = 3; static final int GET_TIMEOUT_SECOND = 10; - @Before + @BeforeEach public void setup() { final RaftProperties p = getProperties(); p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, @@ -130,7 +128,7 @@ CompletableFuture sendWatchRequest(long logIndex, RetryPolicy p CompletableFuture reply = watchClient.async().send(new RaftTestUtil.SimpleMessage("message")); long writeIndex = reply.get().getLogIndex(); - Assert.assertTrue(writeIndex > 0); + Assertions.assertTrue(writeIndex > 0); watchClient.async().watch(writeIndex, ReplicationLevel.MAJORITY_COMMITTED); return watchClient.async().watch(logIndex, ReplicationLevel.MAJORITY); } @@ -143,26 +141,26 @@ public String toString() { } } - static void runTest(CheckedConsumer testCase, MiniRaftCluster cluster, Logger LOG) + static void runTest(CheckedConsumer testCase, MiniRaftCluster cluster, Logger log) throws Exception { try(final RaftClient client = cluster.createClient(RaftTestUtil.waitForLeader(cluster).getId())) { final int[] numMessages = {1, 10, 20}; for(int n : numMessages) { - final TestParameters p = new TestParameters(n, client, cluster, LOG); - LOG.info("{}) {}, {}", n, p, cluster.printServers()); + final TestParameters p = new TestParameters(n, client, cluster, log); + log.info("{}) {}, {}", n, p, cluster.printServers()); testCase.accept(p); } } } static void runSingleTest(CheckedConsumer testCase, - MiniRaftCluster cluster, Logger LOG) + MiniRaftCluster cluster, Logger log) throws Exception { try(final RaftClient client = cluster.createClient(RaftTestUtil.waitForLeader(cluster).getId())) { final int[] numMessages = {1}; for(int n : numMessages) { - final TestParameters p = new TestParameters(n, client, cluster, LOG); - LOG.info("{}) {}, {}", n, p, cluster.printServers()); + final TestParameters p = new TestParameters(n, client, cluster, log); + log.info("{}) {}, {}", n, p, cluster.printServers()); testCase.accept(p); } } @@ -178,7 +176,8 @@ static class WatchReplies { WatchReplies(long logIndex, CompletableFuture majority, CompletableFuture all, - CompletableFuture majorityCommitted, CompletableFuture allCommitted, Logger log) { + CompletableFuture majorityCommitted, CompletableFuture allCommitted, + Logger log) { this.logIndex = logIndex; this.majority = majority; this.all = all; @@ -213,26 +212,26 @@ RaftClientReply get(CompletableFuture f, String name) throws Ex } log.info("{}-Watch({}) returns {}", name, logIndex, reply); - Assert.assertTrue(reply.isSuccess()); - Assert.assertTrue(reply.getLogIndex() >= logIndex); + Assertions.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.getLogIndex() >= logIndex); return reply; } } static void runTestWatchRequestAsync(TestParameters p) throws Exception { - final Logger LOG = p.log; + final Logger log = p.log; final MiniRaftCluster cluster = p.cluster; final int numMessages = p.numMessages; // blockStartTransaction of the leader so that no transaction can be committed MAJORITY final RaftServer.Division leader = cluster.getLeader(); - LOG.info("block leader {}", leader.getId()); + log.info("block leader {}", leader.getId()); SimpleStateMachine4Testing.get(leader).blockStartTransaction(); // blockFlushStateMachineData a follower so that no transaction can be ALL_COMMITTED final List followers = cluster.getFollowers(); final RaftServer.Division blockedFollower = followers.get(ThreadLocalRandom.current().nextInt(followers.size())); - LOG.info("block follower {}", blockedFollower.getId()); + log.info("block follower {}", blockedFollower.getId()); SimpleStateMachine4Testing.get(blockedFollower).blockFlushStateMachineData(); // send a message @@ -241,8 +240,8 @@ static void runTestWatchRequestAsync(TestParameters p) throws Exception { p.sendRequests(replies, watches); - Assert.assertEquals(numMessages, replies.size()); - Assert.assertEquals(numMessages, watches.size()); + Assertions.assertEquals(numMessages, replies.size()); + Assertions.assertEquals(numMessages, watches.size()); // since leader is blocked, nothing can be done. TimeUnit.SECONDS.sleep(1); @@ -251,11 +250,11 @@ static void runTestWatchRequestAsync(TestParameters p) throws Exception { // unblock leader so that the transaction can be committed. SimpleStateMachine4Testing.get(leader).unblockStartTransaction(); - LOG.info("unblock leader {}", leader.getId()); + log.info("unblock leader {}", leader.getId()); - checkMajority(replies, watches, LOG); + checkMajority(replies, watches, log); - Assert.assertEquals(numMessages, watches.size()); + Assertions.assertEquals(numMessages, watches.size()); // but not replicated/committed to all. TimeUnit.SECONDS.sleep(1); @@ -263,54 +262,54 @@ static void runTestWatchRequestAsync(TestParameters p) throws Exception { assertNotDone(watches.stream().map(CompletableFuture::join).map(w -> w.allCommitted)); // unblock follower so that the transaction can be replicated and committed to all. - LOG.info("unblock follower {}", blockedFollower.getId()); + log.info("unblock follower {}", blockedFollower.getId()); SimpleStateMachine4Testing.get(blockedFollower).unblockFlushStateMachineData(); - checkAll(watches, LOG); + checkAll(watches, log); } static void checkMajority(List> replies, - List> watches, Logger LOG) throws Exception { + List> watches, Logger log) throws Exception { for(int i = 0; i < replies.size(); i++) { final RaftClientReply reply = replies.get(i).get(GET_TIMEOUT_SECOND, TimeUnit.SECONDS); - LOG.info("checkMajority {}: receive {}", i, reply); + log.info("checkMajority {}: receive {}", i, reply); final long logIndex = reply.getLogIndex(); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); final WatchReplies watchReplies = watches.get(i).get(GET_TIMEOUT_SECOND, TimeUnit.SECONDS); - Assert.assertEquals(logIndex, watchReplies.logIndex); + Assertions.assertEquals(logIndex, watchReplies.logIndex); final RaftClientReply watchMajorityReply = watchReplies.getMajority(); final RaftClientReply watchMajorityCommittedReply = watchReplies.getMajorityCommitted(); { // check commit infos final Collection commitInfos = watchMajorityCommittedReply.getCommitInfos(); final String message = "logIndex=" + logIndex + ", " + ProtoUtils.toString(commitInfos); - Assert.assertEquals(NUM_SERVERS, commitInfos.size()); + Assertions.assertEquals(NUM_SERVERS, commitInfos.size()); // One follower has not committed, so min must be less than logIndex final long min = commitInfos.stream().map(CommitInfoProto::getCommitIndex).min(Long::compare).get(); - Assert.assertTrue(message, logIndex > min); + Assertions.assertTrue(logIndex > min, message); // All other followers have committed commitInfos.stream() .map(CommitInfoProto::getCommitIndex).sorted(Long::compare) - .skip(1).forEach(ci -> Assert.assertTrue(message, logIndex <= ci)); + .skip(1).forEach(ci -> Assertions.assertTrue(logIndex <= ci, message)); } } } - static void checkAll(List> watches, Logger LOG) throws Exception { + static void checkAll(List> watches, Logger log) throws Exception { for(int i = 0; i < watches.size(); i++) { final WatchReplies watchReplies = watches.get(i).get(GET_TIMEOUT_SECOND, TimeUnit.SECONDS); final long logIndex = watchReplies.logIndex; - LOG.info("checkAll {}: logIndex={}", i, logIndex); + log.info("checkAll {}: logIndex={}", i, logIndex); final RaftClientReply watchAllReply = watchReplies.getAll(); final RaftClientReply watchAllCommittedReply = watchReplies.getAllCommitted(); { // check commit infos final Collection commitInfos = watchAllCommittedReply.getCommitInfos(); final String message = "logIndex=" + logIndex + ", " + ProtoUtils.toString(commitInfos); - Assert.assertEquals(NUM_SERVERS, commitInfos.size()); - commitInfos.forEach(info -> Assert.assertTrue(message, logIndex <= info.getCommitIndex())); + Assertions.assertEquals(NUM_SERVERS, commitInfos.size()); + commitInfos.forEach(info -> Assertions.assertTrue(logIndex <= info.getCommitIndex(), message)); } } } @@ -338,14 +337,14 @@ public void testWatchRequestAsyncChangeLeader() throws Exception { } static void runTestWatchRequestAsyncChangeLeader(TestParameters p) throws Exception { - final Logger LOG = p.log; + final Logger log = p.log; final MiniRaftCluster cluster = p.cluster; final int numMessages = p.numMessages; // blockFlushStateMachineData a follower so that no transaction can be ALL_COMMITTED final List followers = cluster.getFollowers(); final RaftServer.Division blockedFollower = followers.get(ThreadLocalRandom.current().nextInt(followers.size())); - LOG.info("block follower {}", blockedFollower.getId()); + log.info("block follower {}", blockedFollower.getId()); SimpleStateMachine4Testing.get(blockedFollower).blockFlushStateMachineData(); final List> replies = new ArrayList<>(); @@ -353,11 +352,11 @@ static void runTestWatchRequestAsyncChangeLeader(TestParameters p) throws Except p.sendRequests(replies, watches); - Assert.assertEquals(numMessages, replies.size()); - Assert.assertEquals(numMessages, watches.size()); + Assertions.assertEquals(numMessages, replies.size()); + Assertions.assertEquals(numMessages, watches.size()); // since only one follower is blocked commit, requests can be committed MAJORITY and ALL but not ALL_COMMITTED. - checkMajority(replies, watches, LOG); + checkMajority(replies, watches, log); TimeUnit.SECONDS.sleep(1); assertNotDone(watches.stream().map(CompletableFuture::join).map(w -> w.allCommitted)); @@ -367,8 +366,8 @@ static void runTestWatchRequestAsyncChangeLeader(TestParameters p) throws Except // unblock follower so that the transaction can be replicated and committed to all. SimpleStateMachine4Testing.get(blockedFollower).unblockFlushStateMachineData(); - LOG.info("unblock follower {}", blockedFollower.getId()); - checkAll(watches, LOG); + log.info("unblock follower {}", blockedFollower.getId()); + checkAll(watches, log); } @Test @@ -386,7 +385,7 @@ public void testWatchRequestTimeout() throws Exception { } static void runTestWatchRequestTimeout(TestParameters p) throws Exception { - final Logger LOG = p.log; + final Logger log = p.log; final MiniRaftCluster cluster = p.cluster; final int numMessages = p.numMessages; @@ -396,13 +395,13 @@ static void runTestWatchRequestTimeout(TestParameters p) throws Exception { // blockStartTransaction of the leader so that no transaction can be committed MAJORITY final RaftServer.Division leader = cluster.getLeader(); - LOG.info("block leader {}", leader.getId()); + log.info("block leader {}", leader.getId()); SimpleStateMachine4Testing.get(leader).blockStartTransaction(); // blockFlushStateMachineData a follower so that no transaction can be ALL_COMMITTED final List followers = cluster.getFollowers(); final RaftServer.Division blockedFollower = followers.get(ThreadLocalRandom.current().nextInt(followers.size())); - LOG.info("block follower {}", blockedFollower.getId()); + log.info("block follower {}", blockedFollower.getId()); SimpleStateMachine4Testing.get(blockedFollower).blockFlushStateMachineData(); // send a message @@ -411,8 +410,8 @@ static void runTestWatchRequestTimeout(TestParameters p) throws Exception { p.sendRequests(replies, watches); - Assert.assertEquals(numMessages, replies.size()); - Assert.assertEquals(numMessages, watches.size()); + Assertions.assertEquals(numMessages, replies.size()); + Assertions.assertEquals(numMessages, watches.size()); watchTimeout.sleep(); watchTimeoutDenomination.sleep(); // for roundup error @@ -421,13 +420,13 @@ static void runTestWatchRequestTimeout(TestParameters p) throws Exception { // unblock leader so that the transaction can be committed. SimpleStateMachine4Testing.get(leader).unblockStartTransaction(); - LOG.info("unblock leader {}", leader.getId()); + log.info("unblock leader {}", leader.getId()); - checkMajority(replies, watches, LOG); - checkTimeout(replies, watches, LOG); + checkMajority(replies, watches, log); + checkTimeout(replies, watches, log); SimpleStateMachine4Testing.get(blockedFollower).unblockFlushStateMachineData(); - LOG.info("unblock follower {}", blockedFollower.getId()); + log.info("unblock follower {}", blockedFollower.getId()); } @Test @@ -448,7 +447,7 @@ public void testWatchRequestClientTimeout() throws Exception { } static void runTestWatchRequestClientTimeout(TestParameters p) throws Exception { - final Logger LOG = p.log; + final Logger log = p.log; CompletableFuture watchReply; // watch 1000 which will never be committed @@ -461,12 +460,12 @@ static void runTestWatchRequestClientTimeout(TestParameters p) throws Exception watchReply.get(); fail("runTestWatchRequestClientTimeout failed"); } catch (Exception ex) { - LOG.error("error occurred", ex); - Assert.assertTrue(ex.getCause().getClass() == AlreadyClosedException.class || + log.error("error occurred", ex); + Assertions.assertTrue(ex.getCause().getClass() == AlreadyClosedException.class || ex.getCause().getClass() == RaftRetryFailureException.class); if (ex.getCause() != null) { if (ex.getCause().getCause() != null) { - Assert.assertEquals(TimeoutIOException.class, + Assertions.assertEquals(TimeoutIOException.class, ex.getCause().getCause().getClass()); } } @@ -527,15 +526,15 @@ static void runTestWatchMetrics(TestParameters p) throws Exception { } static void checkTimeout(List> replies, - List> watches, Logger LOG) throws Exception { + List> watches, Logger log) throws Exception { for(int i = 0; i < replies.size(); i++) { final RaftClientReply reply = replies.get(i).get(GET_TIMEOUT_SECOND, TimeUnit.SECONDS); - LOG.info("checkTimeout {}: receive {}", i, reply); + log.info("checkTimeout {}: receive {}", i, reply); final long logIndex = reply.getLogIndex(); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); final WatchReplies watchReplies = watches.get(i).get(GET_TIMEOUT_SECOND, TimeUnit.SECONDS); - Assert.assertEquals(logIndex, watchReplies.logIndex); + Assertions.assertEquals(logIndex, watchReplies.logIndex); assertNotReplicatedException(logIndex, ReplicationLevel.ALL, watchReplies::getAll); @@ -555,10 +554,11 @@ static void assertNotReplicatedException(long logIndex, ReplicationLevel replica } static void assertNotReplicatedException(long logIndex, ReplicationLevel replication, Throwable t) { - Assert.assertSame(NotReplicatedException.class, t.getClass()); + Assertions.assertSame(NotReplicatedException.class, t.getClass()); final NotReplicatedException nre = (NotReplicatedException) t; - Assert.assertNotNull(nre); - Assert.assertEquals(logIndex, nre.getLogIndex()); - Assert.assertEquals(replication, nre.getRequiredReplication()); + Assertions.assertNotNull(nre); + Assertions.assertEquals(logIndex, nre.getLogIndex()); + Assertions.assertEquals(replication, nre.getRequiredReplication()); + Assertions.assertNotNull(nre.getCommitInfos()); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/client/impl/RaftClientTestUtil.java b/ratis-server/src/test/java/org/apache/ratis/client/impl/RaftClientTestUtil.java index d90b0cc53f..886879a472 100644 --- a/ratis-server/src/test/java/org/apache/ratis/client/impl/RaftClientTestUtil.java +++ b/ratis-server/src/test/java/org/apache/ratis/client/impl/RaftClientTestUtil.java @@ -29,7 +29,8 @@ /** Interface for testing raft client. */ public interface RaftClientTestUtil { static void assertAsyncRequestSemaphore(RaftClient client, int expectedAvailablePermits, int expectedQueueLength) { - ((RaftClientImpl) client).getOrderedAsync().assertRequestSemaphore(expectedAvailablePermits, expectedQueueLength); + ((RaftClientImpl) client).getOrderedAsync(null) + .assertRequestSemaphore(expectedAvailablePermits, expectedQueueLength); } static ClientInvocationId getClientInvocationId(RaftClient client) { diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/BlockRequestHandlingInjection.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/BlockRequestHandlingInjection.java index 3487aa472a..9af62bc430 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/BlockRequestHandlingInjection.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/BlockRequestHandlingInjection.java @@ -25,7 +25,7 @@ import java.util.concurrent.ConcurrentHashMap; /** Inject code to block a server from handling incoming requests. */ -public class BlockRequestHandlingInjection implements CodeInjectionForTesting.Code { +public final class BlockRequestHandlingInjection implements CodeInjectionForTesting.Code { private static final BlockRequestHandlingInjection INSTANCE = new BlockRequestHandlingInjection(); diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/GroupInfoBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/GroupInfoBaseTest.java index 72eedf0f32..218b6e067d 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/GroupInfoBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/GroupInfoBaseTest.java @@ -24,8 +24,8 @@ import org.apache.ratis.proto.RaftProtos.CommitInfoProto; import org.apache.ratis.server.RaftServer; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.util.Collection; @@ -92,7 +92,7 @@ private void runTest(CLUSTER cluster) throws Exception { final RaftClientReply reply = sendMessages(numMessages, cluster); for(CommitInfoProto i : reply.getCommitInfos()) { if (!RaftPeerId.valueOf(i.getServer().getId()).equals(killedFollower)) { - Assert.assertTrue(i.getCommitIndex() > maxCommit); + Assertions.assertTrue(i.getCommitIndex() > maxCommit); } } } @@ -104,12 +104,12 @@ private void runTest(CLUSTER cluster) throws Exception { } try(final RaftClient client = cluster.createClient(peer.getId())) { final GroupListReply info = client.getGroupManagementApi(peer.getId()).list(); - Assert.assertEquals(1, info.getGroupIds().stream().filter(id -> group.getGroupId().equals(id)).count()); + Assertions.assertEquals(1, info.getGroupIds().stream().filter(id -> group.getGroupId().equals(id)).count()); for(CommitInfoProto i : info.getCommitInfos()) { if (RaftPeerId.valueOf(i.getServer().getId()).equals(killedFollower)) { - Assert.assertTrue(i.getCommitIndex() <= maxCommit); + Assertions.assertTrue(i.getCommitIndex() <= maxCommit); } else { - Assert.assertTrue(i.getCommitIndex() > maxCommit); + Assertions.assertTrue(i.getCommitIndex() > maxCommit); } } } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/GroupManagementBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/GroupManagementBaseTest.java index 311a2150d6..a86ab2a515 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/GroupManagementBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/GroupManagementBaseTest.java @@ -36,8 +36,8 @@ import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedBiConsumer; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.event.Level; @@ -54,6 +54,8 @@ import java.util.Random; import java.util.stream.Collectors; +import static org.apache.ratis.RaftTestUtil.getPeersWithPriority; + public abstract class GroupManagementBaseTest extends BaseTest { static final Logger LOG = LoggerFactory.getLogger(GroupManagementBaseTest.class); @@ -63,20 +65,20 @@ public abstract class GroupManagementBaseTest extends BaseTest { Slf4jUtils.setLogLevel(RaftClient.LOG, Level.DEBUG); } - static final RaftProperties prop = new RaftProperties(); + static final RaftProperties PROP = new RaftProperties(); static { // avoid flaky behaviour in CI environment - RaftServerConfigKeys.Rpc.setTimeoutMin(prop, TimeDuration.valueOf(1500, TimeUnit.MILLISECONDS)); - RaftServerConfigKeys.Rpc.setTimeoutMax(prop, TimeDuration.valueOf(2000, TimeUnit.MILLISECONDS)); + RaftServerConfigKeys.Rpc.setTimeoutMin(PROP, TimeDuration.valueOf(1500, TimeUnit.MILLISECONDS)); + RaftServerConfigKeys.Rpc.setTimeoutMax(PROP, TimeDuration.valueOf(2000, TimeUnit.MILLISECONDS)); // it takes 5s+ to finish the blocking group add call - RaftClientConfigKeys.Rpc.setRequestTimeout(prop, TimeDuration.valueOf(12, TimeUnit.SECONDS)); + RaftClientConfigKeys.Rpc.setRequestTimeout(PROP, TimeDuration.valueOf(12, TimeUnit.SECONDS)); } public abstract MiniRaftCluster.Factory getClusterFactory(); public MiniRaftCluster getCluster(int peerNum) { - return getClusterFactory().newCluster(peerNum, prop); + return getClusterFactory().newCluster(peerNum, PROP); } @Test @@ -95,7 +97,7 @@ public void testGroupWithPriority() throws Exception { // Make sure that there are no leaders. TimeUnit.SECONDS.sleep(1); LOG.info("start: " + cluster.printServers()); - Assert.assertNull(cluster.getLeader()); + Assertions.assertNull(cluster.getLeader()); // Add groups List peers = cluster.getPeers(); @@ -106,7 +108,7 @@ public void testGroupWithPriority() throws Exception { LOG.info("add new group: " + newGroup); try (final RaftClient client = cluster.createClient(newGroup)) { // Before request, client try leader with the highest priority - Assert.assertTrue(client.getLeaderId() == peersWithPriority.get(suggestedLeaderIndex).getId()); + Assertions.assertTrue(client.getLeaderId() == peersWithPriority.get(suggestedLeaderIndex).getId()); for (RaftPeer p : newGroup.getPeers()) { client.getGroupManagementApi(p.getId()).add(newGroup); } @@ -114,7 +116,7 @@ public void testGroupWithPriority() throws Exception { JavaUtils.attempt(() -> { final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster, newGroup.getGroupId()); - Assert.assertTrue(leader.getId() == peers.get(suggestedLeaderIndex).getId()); + Assertions.assertTrue(leader.getId() == peers.get(suggestedLeaderIndex).getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testMultiGroupWithPriority", LOG); String suggestedLeader = peers.get(suggestedLeaderIndex).getId().toString(); @@ -128,7 +130,7 @@ public void testGroupWithPriority() throws Exception { JavaUtils.attempt(() -> { final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster, newGroup.getGroupId()); - Assert.assertTrue(leader.getId() != peers.get(suggestedLeaderIndex).getId()); + Assertions.assertTrue(leader.getId() != peers.get(suggestedLeaderIndex).getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testMultiGroupWithPriority", LOG); // send request so that suggested leader's log lag behind new leader's, @@ -136,7 +138,7 @@ public void testGroupWithPriority() throws Exception { try (final RaftClient client = cluster.createClient(newGroup)) { for (int i = 0; i < 10; i ++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -148,7 +150,7 @@ public void testGroupWithPriority() throws Exception { // leadership to suggested leader when suggested leader catch up the log. JavaUtils.attempt(() -> { final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster, newGroup.getGroupId()); - Assert.assertTrue(leader.getId() == peers.get(suggestedLeaderIndex).getId()); + Assertions.assertTrue(leader.getId() == peers.get(suggestedLeaderIndex).getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testMultiGroupWithPriority", LOG); // change the suggest leader @@ -156,18 +158,18 @@ public void testGroupWithPriority() throws Exception { List peersWithNewPriority = getPeersWithPriority(peers, peers.get(newSuggestedLeaderIndex)); try (final RaftClient client = cluster.createClient(newGroup)) { RaftClientReply reply = client.admin().setConfiguration(peersWithNewPriority.toArray(new RaftPeer[0])); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } JavaUtils.attempt(() -> { final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster, newGroup.getGroupId()); - Assert.assertTrue(leader.getId() == peers.get(newSuggestedLeaderIndex).getId()); + Assertions.assertTrue(leader.getId() == peers.get(newSuggestedLeaderIndex).getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testMultiGroupWithPriority", LOG); cluster.killServer(peers.get(newSuggestedLeaderIndex).getId()); JavaUtils.attempt(() -> { final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster, newGroup.getGroupId()); - Assert.assertTrue(leader.getId() != peers.get(newSuggestedLeaderIndex).getId()); + Assertions.assertTrue(leader.getId() != peers.get(newSuggestedLeaderIndex).getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testMultiGroupWithPriority", LOG); cluster.shutdown(); @@ -189,7 +191,7 @@ public void testSingleGroupRestart() throws Exception { // Make sure that there are no leaders. TimeUnit.SECONDS.sleep(1); LOG.info("start: " + cluster.printServers()); - Assert.assertNull(cluster.getLeader()); + Assertions.assertNull(cluster.getLeader()); // Add groups final RaftGroup newGroup = RaftGroup.valueOf(RaftGroupId.randomId(), cluster.getPeers()); @@ -199,7 +201,7 @@ public void testSingleGroupRestart() throws Exception { client.getGroupManagementApi(p.getId()).add(newGroup); } } - Assert.assertNotNull(RaftTestUtil.waitForLeader(cluster)); + Assertions.assertNotNull(RaftTestUtil.waitForLeader(cluster)); TimeUnit.SECONDS.sleep(1); // restart the servers with null group @@ -209,7 +211,7 @@ public void testSingleGroupRestart() throws Exception { } // the servers should retrieve the conf from the log. - Assert.assertNotNull(RaftTestUtil.waitForLeader(cluster)); + Assertions.assertNotNull(RaftTestUtil.waitForLeader(cluster)); cluster.shutdown(); } @@ -265,7 +267,7 @@ public static void runMultiGroupTest( // Make sure that there are no leaders. TimeUnit.SECONDS.sleep(1); LOG.info("start: " + cluster.printServers()); - Assert.assertNull(cluster.getLeader()); + Assertions.assertNull(cluster.getLeader()); try { // Reinitialize servers to three groups @@ -284,7 +286,7 @@ public static void runMultiGroupTest( client.getGroupManagementApi(p.getId()).add(groups[i]); } } - Assert.assertNotNull(RaftTestUtil.waitForLeader(cluster, gid)); + Assertions.assertNotNull(RaftTestUtil.waitForLeader(cluster, gid)); checker.accept(cluster, groups[i]); } printThreadCount(type, "start groups"); @@ -300,15 +302,15 @@ public static void runMultiGroupTest( for (RaftPeer p : g.getPeers()) { final RaftServer.Division d = cluster.getDivision(p.getId(), g.getGroupId()); final File root = d.getRaftStorage().getStorageDir().getRoot(); - Assert.assertTrue(root.exists()); - Assert.assertTrue(root.isDirectory()); + Assertions.assertTrue(root.exists()); + Assertions.assertTrue(root.isDirectory()); final RaftClientReply r; try (final RaftClient client = cluster.createClient(p.getId(), g)) { r = client.getGroupManagementApi(p.getId()).remove(g.getGroupId(), true, false); } - Assert.assertTrue(r.isSuccess()); - Assert.assertFalse(root.exists()); + Assertions.assertTrue(r.isSuccess()); + Assertions.assertFalse(root.exists()); } } } @@ -333,7 +335,7 @@ public static void runMultiGroupTest( peers -> client.admin().setConfiguration(peers.toArray(RaftPeer.emptyArray()))); } - Assert.assertNotNull(RaftTestUtil.waitForLeader(cluster)); + Assertions.assertNotNull(RaftTestUtil.waitForLeader(cluster)); checker.accept(cluster, groups[chosen]); LOG.info("update groups: " + cluster.printServers()); printThreadCount(type, "update groups"); @@ -356,15 +358,15 @@ public void testGroupAlreadyExists() throws Exception { final RaftPeerId peerId = peer.getId(); final RaftGroup group = RaftGroup.valueOf(cluster.getGroupId(), peer); try (final RaftClient client = cluster.createClient()) { - Assert.assertEquals(group, cluster.getDivision(peerId).getGroup()); + Assertions.assertEquals(group, cluster.getDivision(peerId).getGroup()); try { client.getGroupManagementApi(peer.getId()).add(group); } catch (IOException ex) { // HadoopRPC throws RemoteException, which makes it hard to check if // the exception is instance of AlreadyExistsException - Assert.assertTrue(ex.toString().contains(AlreadyExistsException.class.getCanonicalName())); + Assertions.assertTrue(ex.toString().contains(AlreadyExistsException.class.getCanonicalName())); } - Assert.assertEquals(group, cluster.getDivision(peerId).getGroup()); + Assertions.assertEquals(group, cluster.getDivision(peerId).getGroup()); cluster.shutdown(); } } @@ -381,32 +383,32 @@ public void testGroupRemoveWhenRename() throws Exception { final RaftGroup group1 = RaftGroup.valueOf(cluster1.getGroupId(), peer1); final RaftGroup group2 = RaftGroup.valueOf(cluster2.getGroupId(), peer1); try (final RaftClient client = cluster1.createClient()) { - Assert.assertEquals(group1, cluster1.getDivision(peerId1).getGroup()); + Assertions.assertEquals(group1, cluster1.getDivision(peerId1).getGroup()); try { // Group2 is added to one of the peers in Group1 final GroupManagementApi api1 = client.getGroupManagementApi(peerId1); api1.add(group2); List groupIds1 = cluster1.getServer(peerId1).getGroupIds(); - Assert.assertEquals(groupIds1.size(), 2); + Assertions.assertEquals(groupIds1.size(), 2); // Group2 is renamed from the peer1 of Group1 api1.remove(group2.getGroupId(), false, true); groupIds1 = cluster1.getServer(peerId1).getGroupIds(); - Assert.assertEquals(groupIds1.size(), 1); + Assertions.assertEquals(groupIds1.size(), 1); cluster1.restart(false); List groupIdsAfterRestart = cluster1.getServer(peerId1).getGroupIds(); - Assert.assertEquals(groupIds1.size(), groupIdsAfterRestart.size()); + Assertions.assertEquals(groupIds1.size(), groupIdsAfterRestart.size()); File renamedGroup = new File(RaftServerConfigKeys.removedGroupsDir( cluster1.getProperties()), group2.getGroupId().getUuid().toString()); - Assert.assertTrue(renamedGroup.isDirectory()); + Assertions.assertTrue(renamedGroup.isDirectory()); } catch (IOException ex) { - Assert.fail(); + Assertions.fail(); } finally { cluster1.shutdown(); // Clean up @@ -428,7 +430,7 @@ public void testGroupRemoveWhenDelete() throws Exception { final RaftGroup group1 = RaftGroup.valueOf(cluster1.getGroupId(), peer1); final RaftGroup group2 = RaftGroup.valueOf(cluster2.getGroupId(), peer1); try (final RaftClient client = cluster1.createClient()) { - Assert.assertEquals(group1, + Assertions.assertEquals(group1, cluster1.getDivision(peerId1).getGroup()); try { @@ -436,20 +438,20 @@ public void testGroupRemoveWhenDelete() throws Exception { final GroupManagementApi api1 = client.getGroupManagementApi(peerId1); api1.add(group2); List groupIds1 = cluster1.getServer(peerId1).getGroupIds(); - Assert.assertEquals(groupIds1.size(), 2); + Assertions.assertEquals(groupIds1.size(), 2); // Group2 is deleted from the peer1 of Group1 api1.remove(group2.getGroupId(), true, false); groupIds1 = cluster1.getServer(peerId1).getGroupIds(); - Assert.assertEquals(groupIds1.size(), 1); + Assertions.assertEquals(groupIds1.size(), 1); cluster1.restart(false); List groupIdsAfterRestart = cluster1.getServer(peerId1).getGroupIds(); - Assert.assertEquals(groupIds1.size(), groupIdsAfterRestart.size()); + Assertions.assertEquals(groupIds1.size(), groupIdsAfterRestart.size()); } catch (IOException ex) { - Assert.fail(); + Assertions.fail(); } finally { cluster1.shutdown(); FileUtils.deleteFully(RaftServerConfigKeys.removedGroupsDir( diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/LeaderElectionTests.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/LeaderElectionTests.java index 6453e8e944..3a47d127c5 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/LeaderElectionTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/LeaderElectionTests.java @@ -29,6 +29,7 @@ import org.apache.ratis.protocol.RaftGroupMemberId; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.SetConfigurationRequest; import org.apache.ratis.protocol.exceptions.LeaderSteppingDownException; import org.apache.ratis.protocol.exceptions.TransferLeadershipException; import org.apache.ratis.server.DivisionInfo; @@ -37,20 +38,21 @@ import org.apache.ratis.server.metrics.LeaderElectionMetrics; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogTestUtils; -import org.apache.ratis.util.ExitUtils; +import org.apache.ratis.test.tag.Flaky; +import org.apache.ratis.thirdparty.com.codahale.metrics.Timer; +import org.apache.ratis.util.CodeInjectionForTesting; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.Timestamp; import org.apache.ratis.util.function.CheckedBiConsumer; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -60,19 +62,19 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import static org.apache.ratis.RaftTestUtil.getPeersWithPriority; import static org.apache.ratis.RaftTestUtil.waitForLeader; import static org.apache.ratis.server.metrics.LeaderElectionMetrics.LAST_LEADER_ELECTION_ELAPSED_TIME; import static org.apache.ratis.server.metrics.LeaderElectionMetrics.LEADER_ELECTION_COUNT_METRIC; import static org.apache.ratis.server.metrics.LeaderElectionMetrics.LEADER_ELECTION_TIME_TAKEN; import static org.apache.ratis.server.metrics.LeaderElectionMetrics.LEADER_ELECTION_TIMEOUT_COUNT_METRIC; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; -import static org.junit.Assert.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import org.apache.ratis.thirdparty.com.codahale.metrics.Timer; import org.slf4j.event.Level; public abstract class LeaderElectionTests @@ -97,6 +99,80 @@ public void testBasicLeaderElection() throws Exception { cluster.shutdown(); } + static class SleepCode implements CodeInjectionForTesting.Code { + private final long sleepMs; + + SleepCode(long sleepMs) { + this.sleepMs = sleepMs; + } + + @Override + public boolean execute(Object localId, Object remoteId, Object... args) { + try { + LOG.info("{}: Simulate RaftServer startup blocking", localId); + Thread.sleep(sleepMs); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + return true; + } + } + + @Test + public void testWaitServerReady() throws Exception { + final int sleepMs = 1000 + ThreadLocalRandom.current().nextInt(1000); + LOG.info("Running testWaitServerReady, sleep = {}ms", sleepMs); + CodeInjectionForTesting.put(RaftServerImpl.START_COMPLETE, new SleepCode(sleepMs)); + final MiniRaftCluster cluster = newCluster(1); + final Timestamp startTime = Timestamp.currentTime(); + cluster.start(); + LOG.info("Cluster started at {}ms", startTime.elapsedTimeMs()); + final RaftGroupId groupId = cluster.getGroupId(); + final RaftServerImpl server = (RaftServerImpl) cluster.getServers().iterator().next().getDivision(groupId); + final boolean isRunning = server.isRunning(); + LOG.info("{} isRunning at {}ms? {}", server.getId(), startTime.elapsedTimeMs(), isRunning); + + // Leader will be elected if the server is ready + assertNotNull(waitForLeader(cluster), "No leader is elected."); + final long elapsedMs = startTime.elapsedTimeMs(); + // allow a small difference to tolerate system timer inaccuracy + assertTrue(elapsedMs > sleepMs - 10, () -> "elapseMs = " + elapsedMs + " but sleepMs = " + sleepMs); + cluster.shutdown(); + CodeInjectionForTesting.remove(RaftServerImpl.START_COMPLETE); + } + + @Test + public void testAddServerForWaitReady() throws IOException, InterruptedException { + LOG.info("Running testAddServerForWaitReady"); + // normal startup cluster with 3 server + final MiniRaftCluster cluster = newCluster(3); + cluster.start(); + RaftTestUtil.waitForLeader(cluster); + try (RaftClient client = cluster.createClient()) { + for (int i = 0; i < 10; ++i) { + RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("message_" + i)); + assertTrue(reply.isSuccess()); + } + // add 3 new servers and wait longer time + CodeInjectionForTesting.put(RaftServerImpl.START_COMPLETE, new SleepCode(2000)); + final PeerChanges peerChanges = cluster.addNewPeers(2, true); + LOG.info("add new 3 servers"); + LOG.info(cluster.printServers()); + RaftClientReply reply = client.admin().setConfiguration(SetConfigurationRequest.Arguments.newBuilder() + .setServersInNewConf(peerChanges.getAddedPeers()) + .setMode(SetConfigurationRequest.Mode.ADD).build()); + assertTrue(reply.isSuccess()); + for (RaftServer server : cluster.getServers()) { + RaftServerProxy proxy = (RaftServerProxy) server; + proxy.getImpls().forEach(s -> { + assertTrue(s.isRunning()); + }); + } + } + cluster.shutdown();; + CodeInjectionForTesting.remove(RaftServerImpl.START_COMPLETE); + } + @Test public void testChangeLeader() throws Exception { SegmentedRaftLogTestUtils.setRaftLogWorkerLogLevel(Level.TRACE); @@ -106,8 +182,7 @@ public void testChangeLeader() throws Exception { RaftPeerId leader = RaftTestUtil.waitForLeader(cluster).getId(); for(int i = 0; i < 10; i++) { - leader = RaftTestUtil.changeLeader(cluster, leader, IllegalStateException::new); - ExitUtils.assertNotTerminated(); + leader = RaftTestUtil.changeLeader(cluster, leader, Assertions::fail); } SegmentedRaftLogTestUtils.setRaftLogWorkerLogLevel(Level.INFO); cluster.shutdown(); @@ -122,12 +197,12 @@ void runTestLostMajorityHeartbeats(CLUSTER cluster) throws Exception { final TimeDuration maxTimeout = RaftServerConfigKeys.Rpc.timeoutMax(getProperties()); final RaftServer.Division leader = waitForLeader(cluster); try { - isolate(cluster, leader.getId()); + RaftTestUtil.isolate(cluster, leader.getId()); maxTimeout.sleep(); maxTimeout.sleep(); RaftServerTestUtil.assertLostMajorityHeartbeatsRecently(leader); } finally { - deIsolate(cluster, leader.getId()); + RaftTestUtil.deIsolate(cluster, leader.getId()); } } @@ -138,18 +213,18 @@ public void testLeaderNotCountListenerForMajority() throws Exception { void runTestLeaderNotCountListenerForMajority(CLUSTER cluster) throws Exception { final RaftServer.Division leader = waitForLeader(cluster); - Assert.assertEquals(2, ((RaftConfigurationImpl)cluster.getLeader().getRaftConf()).getMajorityCount()); + assertEquals(2, ((RaftConfigurationImpl)cluster.getLeader().getRaftConf()).getMajorityCount()); try (RaftClient client = cluster.createClient(leader.getId())) { client.io().send(new RaftTestUtil.SimpleMessage("message")); List listeners = cluster.getListeners() .stream().map(RaftServer.Division::getPeer).collect(Collectors.toList()); - Assert.assertEquals(2, listeners.size()); + assertEquals(2, listeners.size()); RaftClientReply reply = client.admin().setConfiguration(cluster.getPeers()); - Assert.assertTrue(reply.isSuccess()); + assertTrue(reply.isSuccess()); Collection peer = leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.LISTENER); - Assert.assertEquals(0, peer.size()); + assertEquals(0, peer.size()); } - Assert.assertEquals(3, ((RaftConfigurationImpl)cluster.getLeader().getRaftConf()).getMajorityCount()); + assertEquals(3, ((RaftConfigurationImpl)cluster.getLeader().getRaftConf()).getMajorityCount()); } @Test @@ -164,12 +239,12 @@ void runTestListenerNotStartLeaderElection(CLUSTER cluster) throws Exception { final RaftServer.Division listener = cluster.getListeners().get(0); final RaftPeerId listenerId = listener.getId(); try { - isolate(cluster, listenerId); + RaftTestUtil.isolate(cluster, listenerId); maxTimeout.sleep(); maxTimeout.sleep(); - Assert.assertEquals(RaftProtos.RaftPeerRole.LISTENER, listener.getInfo().getCurrentRole()); + assertEquals(RaftProtos.RaftPeerRole.LISTENER, listener.getInfo().getCurrentRole()); } finally { - deIsolate(cluster, listener.getId()); + RaftTestUtil.deIsolate(cluster, listener.getId()); } } @@ -183,18 +258,18 @@ public void testTransferLeader() throws Exception { client.io().send(new RaftTestUtil.SimpleMessage("message")); List followers = cluster.getFollowers(); - Assert.assertEquals(2, followers.size()); + assertEquals(2, followers.size()); RaftServer.Division newLeader = followers.get(0); RaftClientReply reply = client.admin().transferLeadership(newLeader.getId(), 20000); - Assert.assertTrue(reply.isSuccess()); + assertTrue(reply.isSuccess()); final RaftServer.Division currLeader = waitForLeader(cluster); - Assert.assertEquals(newLeader.getId(), currLeader.getId()); + assertEquals(newLeader.getId(), currLeader.getId()); reply = client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertEquals(newLeader.getId().toString(), reply.getReplierId()); - Assert.assertTrue(reply.isSuccess()); + assertEquals(newLeader.getId().toString(), reply.getReplierId()); + assertTrue(reply.isSuccess()); } cluster.shutdown(); @@ -211,24 +286,24 @@ public void testYieldLeaderToHigherPriority() throws Exception { client.io().send(new RaftTestUtil.SimpleMessage("message")); List followers = cluster.getFollowers(); - Assert.assertEquals(2, followers.size()); + assertEquals(2, followers.size()); RaftServer.Division newLeader = followers.get(0); List peers = cluster.getPeers(); List peersWithNewPriority = getPeersWithPriority(peers, newLeader.getPeer()); RaftClientReply reply = client.admin().setConfiguration(peersWithNewPriority.toArray(new RaftPeer[0])); - Assert.assertTrue(reply.isSuccess()); + assertTrue(reply.isSuccess()); // Wait the old leader to step down. // TODO: make it more deterministic. TimeDuration.valueOf(1, TimeUnit.SECONDS).sleep(); final RaftServer.Division currLeader = waitForLeader(cluster); - Assert.assertEquals(newLeader.getId(), currLeader.getId()); + assertEquals(newLeader.getId(), currLeader.getId()); reply = client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertEquals(newLeader.getId().toString(), reply.getReplierId()); - Assert.assertTrue(reply.isSuccess()); + assertEquals(newLeader.getId().toString(), reply.getReplierId()); + assertTrue(reply.isSuccess()); } cluster.shutdown(); @@ -243,11 +318,11 @@ public void testTransferLeaderTimeout() throws Exception { final RaftServer.Division leader = waitForLeader(cluster); try (RaftClient client = cluster.createClient(leader.getId())) { List followers = cluster.getFollowers(); - Assert.assertEquals(followers.size(), 2); + assertEquals(followers.size(), 2); RaftServer.Division newLeader = followers.get(0); // isolate new leader, so that transfer leadership will timeout - isolate(cluster, newLeader.getId()); + RaftTestUtil.isolate(cluster, newLeader.getId()); List peers = cluster.getPeers(); @@ -259,9 +334,9 @@ public void testTransferLeaderTimeout() throws Exception { client.admin().transferLeadership(newLeader.getId(), timeoutMs); } catch (TransferLeadershipException e) { long cost = System.currentTimeMillis() - start; - Assert.assertTrue(cost > timeoutMs); - Assert.assertTrue(e.getMessage().contains("Failed to transfer leadership to")); - Assert.assertTrue(e.getMessage().contains(TransferLeadership.Result.Type.TIMED_OUT.toString())); + assertTrue(cost > timeoutMs); + assertTrue(e.getMessage().contains("Failed to transfer leadership to")); + assertTrue(e.getMessage().contains(TransferLeadership.Result.Type.TIMED_OUT.toString())); } return true; @@ -275,19 +350,19 @@ public void testTransferLeaderTimeout() throws Exception { try { client.io().send(new RaftTestUtil.SimpleMessage("message")); } catch (LeaderSteppingDownException e) { - Assert.assertTrue(e.getMessage().contains("is stepping down")); + assertTrue(e.getMessage().contains("is stepping down")); } return null; }, 5, TimeDuration.ONE_SECOND, "check leader steppingDown", RaftServer.LOG); - Assert.assertTrue(transferTimeoutFuture.get()); + assertTrue(transferTimeoutFuture.get()); // after transfer timeout, leader should accept request RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertEquals(leader.getId().toString(), reply.getReplierId()); - Assert.assertTrue(reply.isSuccess()); + assertEquals(leader.getId().toString(), reply.getReplierId()); + assertTrue(reply.isSuccess()); - deIsolate(cluster, newLeader.getId()); + RaftTestUtil.deIsolate(cluster, newLeader.getId()); } cluster.shutdown(); @@ -312,18 +387,18 @@ public void testEnforceLeader() throws Exception { } } - static void enforceLeader(MiniRaftCluster cluster, final String newLeader, Logger LOG) throws InterruptedException { - LOG.info(cluster.printServers()); + static void enforceLeader(MiniRaftCluster cluster, final String newLeader, Logger log) throws InterruptedException { + log.info(cluster.printServers()); for(int i = 0; !cluster.tryEnforceLeader(newLeader) && i < 10; i++) { final RaftServer.Division currLeader = cluster.getLeader(); - LOG.info("try enforcing leader to " + newLeader + " but " + + log.info("try enforcing leader to " + newLeader + " but " + (currLeader == null ? "no leader for round " + i : "new leader is " + currLeader.getId())); TimeDuration.ONE_SECOND.sleep(); } - LOG.info(cluster.printServers()); + log.info(cluster.printServers()); final RaftServer.Division leader = cluster.getLeader(); - Assert.assertEquals(newLeader, leader.getId().toString()); + assertEquals(newLeader, leader.getId().toString()); } @Test @@ -352,7 +427,8 @@ public void testLateServerStart() throws Exception { .orElseThrow(() -> new IllegalStateException("No leader yet")), 10, ONE_SECOND, "getLeaderId", LOG); LOG.info(cluster.printServers()); - Assert.assertEquals(leader.getId(), lastServerLeaderId); + assertEquals(leader.getId(), lastServerLeaderId); + cluster.shutdown(); } protected void testDisconnectLeader() throws Exception { @@ -363,32 +439,18 @@ protected void testDisconnectLeader() throws Exception { try (RaftClient client = cluster.createClient(leader.getId())) { client.io().send(new RaftTestUtil.SimpleMessage("message")); Thread.sleep(1000); - isolate(cluster, leader.getId()); + RaftTestUtil.isolate(cluster, leader.getId()); RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertNotEquals(reply.getReplierId(), leader.getId().toString()); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertNotEquals(reply.getReplierId(), leader.getId().toString()); + assertTrue(reply.isSuccess()); } finally { - deIsolate(cluster, leader.getId()); + RaftTestUtil.deIsolate(cluster, leader.getId()); } cluster.shutdown(); } } - private void isolate(MiniRaftCluster cluster, RaftPeerId id) { - try { - BlockRequestHandlingInjection.getInstance().blockReplier(id.toString()); - cluster.setBlockRequestsFrom(id.toString(), true); - } catch (Exception e) { - e.printStackTrace(); - } - } - - private void deIsolate(MiniRaftCluster cluster, RaftPeerId id) { - BlockRequestHandlingInjection.getInstance().unblockReplier(id.toString()); - cluster.setBlockRequestsFrom(id.toString(), false); - } - @Test public void testAddListener() throws Exception { try (final MiniRaftCluster cluster = newCluster(3)) { @@ -397,15 +459,15 @@ public void testAddListener() throws Exception { try (RaftClient client = cluster.createClient(leader.getId())) { client.io().send(new RaftTestUtil.SimpleMessage("message")); List servers = cluster.getPeers(); - Assert.assertEquals(servers.size(), 3); - MiniRaftCluster.PeerChanges changes = cluster.addNewPeers(1, - true, false, RaftProtos.RaftPeerRole.LISTENER); - RaftClientReply reply = client.admin().setConfiguration(servers, Arrays.asList(changes.newPeers)); - Assert.assertTrue(reply.isSuccess()); + assertEquals(servers.size(), 3); + final PeerChanges changes = cluster.addNewPeers(1, true); + final List added = changes.getAddedPeers(); + final RaftClientReply reply = client.admin().setConfiguration(servers, added); + assertTrue(reply.isSuccess()); Collection listener = leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.LISTENER); - Assert.assertEquals(1, listener.size()); - Assert.assertEquals(changes.newPeers[0].getId(), new ArrayList<>(listener).get(0).getId()); + assertEquals(1, listener.size()); + assertEquals(added.get(0).getId(), listener.iterator().next().getId()); } cluster.shutdown(); } @@ -419,18 +481,18 @@ public void testAddFollowerWhenExistsListener() throws Exception { try (RaftClient client = cluster.createClient(leader.getId())) { client.io().send(new RaftTestUtil.SimpleMessage("message")); List servers = cluster.getPeers(); - Assert.assertEquals(4, servers.size()); + assertEquals(4, servers.size()); List listener = new ArrayList<>( leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.LISTENER)); - Assert.assertEquals(1, listener.size()); - MiniRaftCluster.PeerChanges changes = cluster.addNewPeers(1, true, false); - ArrayList newPeers = new ArrayList<>(Arrays.asList(changes.newPeers)); + assertEquals(1, listener.size()); + final PeerChanges changes = cluster.addNewPeers(1, true); + final List newPeers = new ArrayList<>(changes.getAddedPeers()); newPeers.addAll(leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.FOLLOWER)); RaftClientReply reply = client.admin().setConfiguration(newPeers, listener); - Assert.assertTrue(reply.isSuccess()); - Assert.assertEquals(4, + assertTrue(reply.isSuccess()); + assertEquals(4, leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.FOLLOWER).size()); - Assert.assertEquals(1, + assertEquals(1, leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.LISTENER).size()); } cluster.shutdown(); @@ -444,13 +506,13 @@ public void testRemoveListener() throws Exception { final RaftServer.Division leader = waitForLeader(cluster); try (RaftClient client = cluster.createClient(leader.getId())) { client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertEquals(1, cluster.getListeners().size()); + assertEquals(1, cluster.getListeners().size()); List servers = cluster.getFollowers().stream().map(RaftServer.Division::getPeer).collect( Collectors.toList()); servers.add(leader.getPeer()); RaftClientReply reply = client.admin().setConfiguration(servers); - Assert.assertTrue(reply.isSuccess()); - Assert.assertEquals(0, leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.LISTENER).size()); + assertTrue(reply.isSuccess()); + assertEquals(0, leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.LISTENER).size()); } cluster.shutdown(); } @@ -465,15 +527,15 @@ public void testChangeFollowerToListener() throws Exception { client.io().send(new RaftTestUtil.SimpleMessage("message")); List followers = cluster.getFollowers().stream().map( RaftServer.Division::getPeer).collect(Collectors.toList()); - Assert.assertEquals(2, followers.size()); + assertEquals(2, followers.size()); List listeners = new ArrayList<>(); listeners.add(followers.get(1)); followers.remove(1); RaftClientReply reply = client.admin().setConfiguration(followers, listeners); - Assert.assertTrue(reply.isSuccess()); + assertTrue(reply.isSuccess()); Collection peer = leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.LISTENER); - Assert.assertEquals(1, peer.size()); - Assert.assertEquals(listeners.get(0).getId(), new ArrayList<>(peer).get(0).getId()); + assertEquals(1, peer.size()); + assertEquals(listeners.get(0).getId(), new ArrayList<>(peer).get(0).getId()); } cluster.shutdown(); } @@ -488,11 +550,17 @@ public void testChangeListenerToFollower() throws Exception { client.io().send(new RaftTestUtil.SimpleMessage("message")); List listeners = cluster.getListeners() .stream().map(RaftServer.Division::getPeer).collect(Collectors.toList()); - Assert.assertEquals(listeners.size(), 1); + assertEquals(listeners.size(), 1); RaftClientReply reply = client.admin().setConfiguration(cluster.getPeers()); - Assert.assertTrue(reply.isSuccess()); + assertTrue(reply.isSuccess()); Collection peer = leader.getRaftConf().getAllPeers(RaftProtos.RaftPeerRole.LISTENER); - Assert.assertEquals(0, peer.size()); + assertEquals(0, peer.size()); + + RaftServerTestUtil.waitAndCheckNewConf(cluster, cluster.getPeers(), 0, null); + + listeners = cluster.getListeners() + .stream().map(RaftServer.Division::getPeer).collect(Collectors.toList()); + assertEquals(0, listeners.size()); } cluster.shutdown(); } @@ -515,7 +583,8 @@ public void testLeaderElectionMetrics() throws IOException, InterruptedException long numLeaderElectionTimeout = ratisMetricRegistry.counter(LEADER_ELECTION_TIMEOUT_COUNT_METRIC).getCount(); assertTrue(numLeaderElectionTimeout > 0); - final DefaultTimekeeperImpl timekeeper = (DefaultTimekeeperImpl) ratisMetricRegistry.timer(LEADER_ELECTION_TIME_TAKEN); + final DefaultTimekeeperImpl timekeeper = + (DefaultTimekeeperImpl) ratisMetricRegistry.timer(LEADER_ELECTION_TIME_TAKEN); final Timer timer = timekeeper.getTimer(); double meanTimeNs = timer.getSnapshot().getMean(); long elapsedNs = timestamp.elapsedTime().toLong(TimeUnit.NANOSECONDS); @@ -528,7 +597,7 @@ public void testLeaderElectionMetrics() throws IOException, InterruptedException @Test public void testImmediatelyRevertedToFollower() { RaftServerImpl server = createMockServer(true); - LeaderElection subject = new LeaderElection(server, false); + LeaderElection subject = LeaderElection.newInstance(server, false); try { subject.startInForeground(); @@ -542,7 +611,7 @@ public void testImmediatelyRevertedToFollower() { @Test public void testShutdownBeforeStart() { RaftServerImpl server = createMockServer(false); - LeaderElection subject = new LeaderElection(server, false); + LeaderElection subject = LeaderElection.newInstance(server, false); try { subject.shutdown(); @@ -568,15 +637,15 @@ public void testPreVote() { assertEquals(followers.size(), 2); RaftServer.Division follower = followers.get(0); - isolate(cluster, follower.getId()); + RaftTestUtil.isolate(cluster, follower.getId()); // send message so that the isolated follower's log lag the others RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertTrue(reply.isSuccess()); + assertTrue(reply.isSuccess()); final long savedTerm = leader.getInfo().getCurrentTerm(); LOG.info("Wait follower {} timeout and trigger pre-vote", follower.getId()); Thread.sleep(2000); - deIsolate(cluster, follower.getId()); + RaftTestUtil.deIsolate(cluster, follower.getId()); Thread.sleep(2000); // with pre-vote leader will not step down RaftServer.Division newleader = waitForLeader(cluster); @@ -586,7 +655,7 @@ public void testPreVote() { assertEquals(savedTerm, leader.getInfo().getCurrentTerm()); reply = client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertTrue(reply.isSuccess()); + assertTrue(reply.isSuccess()); } cluster.shutdown(); @@ -604,9 +673,10 @@ void runTestListenerRejectRequestVote(CLUSTER cluster) throws IOException, Inter final TermIndex lastEntry = leader.getRaftLog().getLastEntryTermIndex(); RaftServer.Division listener = cluster.getListeners().get(0); final RaftProtos.RequestVoteRequestProto r = ServerProtoUtils.toRequestVoteRequestProto( - leader.getMemberId(), listener.getId(), leader.getRaftLog().getLastEntryTermIndex().getTerm() + 1, lastEntry, true); + leader.getMemberId(), listener.getId(), + leader.getRaftLog().getLastEntryTermIndex().getTerm() + 1, lastEntry, true); RaftProtos.RequestVoteReplyProto listenerReply = listener.getRaftServer().requestVote(r); - Assert.assertFalse(listenerReply.getServerReply().getSuccess()); + Assertions.assertFalse(listenerReply.getServerReply().getSuccess()); } @@ -620,23 +690,23 @@ void runTestPauseResumeLeaderElection(CLUSTER cluster) throws IOException, Inter final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); final RaftPeerId leaderId = leader.getId(); final List followers = cluster.getFollowers(); - Assert.assertTrue(followers.size() >= 1); + assertTrue(followers.size() >= 1); final RaftServerImpl f1 = (RaftServerImpl)followers.get(0); try (final RaftClient client = cluster.createClient()) { pauseLeaderReply = client.getLeaderElectionManagementApi(f1.getId()).pause(); - Assert.assertTrue(pauseLeaderReply.isSuccess()); + assertTrue(pauseLeaderReply.isSuccess()); client.io().send(new RaftTestUtil.SimpleMessage("message")); RaftServer.Division newLeader = followers.get(0); List peers = cluster.getPeers(); List peersWithNewPriority = getPeersWithPriority(peers, newLeader.getPeer()); RaftClientReply reply = client.admin().setConfiguration(peersWithNewPriority.toArray(new RaftPeer[0])); - Assert.assertTrue(reply.isSuccess()); - JavaUtils.attempt(() -> Assert.assertEquals(leaderId, leader.getId()), + assertTrue(reply.isSuccess()); + JavaUtils.attempt(() -> assertEquals(leaderId, leader.getId()), 20, HUNDRED_MILLIS, "check leader id", LOG); final RaftClientReply resumeLeaderReply = client.getLeaderElectionManagementApi(f1.getId()).resume(); - Assert.assertTrue(resumeLeaderReply.isSuccess()); - JavaUtils.attempt(() -> Assert.assertEquals(f1.getId(), cluster.getLeader().getId()), + assertTrue(resumeLeaderReply.isSuccess()); + JavaUtils.attempt(() -> assertEquals(f1.getId(), cluster.getLeader().getId()), 20, HUNDRED_MILLIS, "check new leader", LOG); } } @@ -662,21 +732,22 @@ void runTestLeaderLease(CLUSTER cluster, long leaseTimeoutMs) throws Exception { try (final RaftClient client = cluster.createClient(leader.getId())) { client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertTrue(leader.getInfo().isLeader()); - Assert.assertTrue(leader.getInfo().isLeaderReady()); + assertTrue(leader.getInfo().isLeader()); + assertTrue(leader.getInfo().isLeaderReady()); RaftServerTestUtil.assertLeaderLease(leader, true); - isolate(cluster, leader.getId()); + RaftTestUtil.isolate(cluster, leader.getId()); Thread.sleep(leaseTimeoutMs); - Assert.assertTrue(leader.getInfo().isLeader()); - Assert.assertTrue(leader.getInfo().isLeaderReady()); + assertTrue(leader.getInfo().isLeader()); + assertTrue(leader.getInfo().isLeaderReady()); RaftServerTestUtil.assertLeaderLease(leader, false); } finally { - deIsolate(cluster, leader.getId()); + RaftTestUtil.deIsolate(cluster, leader.getId()); } } + @Flaky("RATIS-2108") @Test public void testLeaderLeaseDuringReconfiguration() throws Exception { // use a strict lease @@ -690,12 +761,12 @@ void runTestLeaderLeaseDuringReconfiguration(CLUSTER cluster, long leaseTimeoutM try (final RaftClient client = cluster.createClient(leader.getId())) { client.io().send(new RaftTestUtil.SimpleMessage("message")); - Assert.assertTrue(leader.getInfo().isLeader()); - Assert.assertTrue(leader.getInfo().isLeaderReady()); + assertTrue(leader.getInfo().isLeader()); + assertTrue(leader.getInfo().isLeaderReady()); RaftServerTestUtil.assertLeaderLease(leader, true); final List followers = cluster.getFollowers(); - final MiniRaftCluster.PeerChanges changes = cluster.addNewPeers(2, true); + final PeerChanges changes = cluster.addNewPeers(2, true); // blocking the original 2 followers BlockRequestHandlingInjection.getInstance().blockReplier(followers.get(0).getId().toString()); @@ -704,7 +775,7 @@ void runTestLeaderLeaseDuringReconfiguration(CLUSTER cluster, long leaseTimeoutM // start reconfiguration in another thread, shall fail eventually new Thread(() -> { try { - client.admin().setConfiguration(changes.allPeersInNewConf); + client.admin().setConfiguration(changes.getPeersInNewConf()); } catch (IOException e) { System.out.println("as expected: " + e.getMessage()); } @@ -712,8 +783,8 @@ void runTestLeaderLeaseDuringReconfiguration(CLUSTER cluster, long leaseTimeoutM Thread.sleep(leaseTimeoutMs); - Assert.assertTrue(leader.getInfo().isLeader()); - Assert.assertTrue(leader.getInfo().isLeaderReady()); + assertTrue(leader.getInfo().isLeader()); + assertTrue(leader.getInfo().isLeaderReady()); RaftServerTestUtil.assertLeaderLease(leader, false); } finally { diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/LogMetadataTests.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/LogMetadataTests.java new file mode 100644 index 0000000000..537ece931d --- /dev/null +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/LogMetadataTests.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.ratis.server.impl; + +import java.util.function.Predicate; +import java.util.stream.Collectors; +import org.apache.ratis.BaseTest; +import org.apache.ratis.RaftTestUtil; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.server.raftlog.RaftLog; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public abstract class LogMetadataTests + extends BaseTest + implements MiniRaftCluster.Factory.Get { + + @Test + public void testLogMetadataEnabled() throws Exception { + testLogMetadataBasicTest(true, x -> x > RaftLog.INVALID_LOG_INDEX); + } + + @Test + public void testLogMetadataDisabled() throws Exception { + testLogMetadataBasicTest(false, x -> x == RaftLog.INVALID_LOG_INDEX); + } + + public void testLogMetadataBasicTest(boolean logMetadata, Predicate checker) + throws Exception { + final RaftProperties prop = getProperties(); + RaftServerConfigKeys.Log.setLogMetadataEnabled(prop, logMetadata); + + final MiniRaftCluster cluster = newCluster(3); + try { + cluster.start(); + RaftTestUtil.waitForLeader(cluster); + final RaftServer.Division leader = cluster.getLeader(); + RaftPeerId leaderId = leader.getId(); + + cluster.getLeaderAndSendFirstMessage(true); + + // kill majority servers + for (RaftPeerId id : cluster.getGroup().getPeers().stream().map(RaftPeer::getId) + .filter(x -> !x.equals(leaderId)).collect(Collectors.toList())) { + cluster.killServer(id); + } + + // only restart one server + cluster.restartServer(leaderId, false); + + long commitIndex = cluster.getServer(leaderId).getDivision(cluster.getGroupId()).getRaftLog() + .getLastCommittedIndex(); + + Assertions.assertTrue(checker.test(commitIndex)); + } finally { + cluster.shutdown(); + } + } +} diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/MiniRaftCluster.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/MiniRaftCluster.java index f6dd6121cb..825ae8f55b 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/MiniRaftCluster.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/MiniRaftCluster.java @@ -22,7 +22,6 @@ import org.apache.ratis.client.RaftClient; import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; -import org.apache.ratis.proto.RaftProtos; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientReply; @@ -68,6 +67,7 @@ import java.util.Map; import java.util.Optional; import java.util.Timer; +import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -82,6 +82,8 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import static org.apache.ratis.proto.RaftProtos.RaftPeerRole.LISTENER; + public abstract class MiniRaftCluster implements Closeable { public static final Logger LOG = LoggerFactory.getLogger(MiniRaftCluster.class); @@ -92,14 +94,16 @@ public abstract class MiniRaftCluster implements Closeable { TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); static final AtomicInteger THREAD_COUNT = new AtomicInteger(0); - public static abstract class Factory { + static final AtomicInteger CLIENT_ID = new AtomicInteger(0); + + public abstract static class Factory { public interface Get { - Supplier properties = JavaUtils.memoize(RaftProperties::new); + Supplier PROPERTIES = JavaUtils.memoize(RaftProperties::new); Factory getFactory(); default RaftProperties getProperties() { - return properties.get(); + return PROPERTIES.get(); } default RaftProperties setStateMachine(Class stateMachineClass) { @@ -120,18 +124,20 @@ default void runWithNewCluster(int numServers, CheckedConsumer testCase) throws Exception { + default void runWithNewCluster(int numServers, boolean startCluster, CheckedConsumer testCase) + throws Exception { runWithNewCluster(numServers, 0, startCluster, testCase); } - default void runWithNewCluster(int numServers, int numListeners, CheckedConsumer testCase) throws Exception { + default void runWithNewCluster(int numServers, int numListeners, CheckedConsumer testCase) + throws Exception { runWithNewCluster(numServers, numListeners, true, testCase); } - default void runWithNewCluster(int numServers, int numListeners, boolean startCluster, CheckedConsumer testCase) - throws Exception { + default void runWithNewCluster(int numServers, int numListeners, boolean startCluster, + CheckedConsumer testCase) throws Exception { final StackTraceElement caller = JavaUtils.getCallerStackTraceElement(); - LOG.info("Running " + caller.getMethodName()); + LOG.info("Running {}", caller.getMethodName()); final CLUSTER cluster = newCluster(numServers, numListeners); try { if (startCluster) { @@ -140,7 +146,7 @@ default void runWithNewCluster(int numServers, int numListeners, boolean startCl testCase.accept(cluster); } catch(Exception t) { LOG.info(cluster.printServers()); - LOG.error("Failed " + caller, t); + LOG.error("Failed {}", caller, t); throw t; } finally { cluster.shutdown(); @@ -151,9 +157,10 @@ default void runWithSameCluster(int numServers, CheckedConsumer testCase) throws Exception { + default void runWithSameCluster(int numServers, int numListeners, CheckedConsumer testCase) + throws Exception { final StackTraceElement caller = JavaUtils.getCallerStackTraceElement(); - LOG.info("Running " + caller.getMethodName()); + LOG.info("Running {}", caller.getMethodName()); CLUSTER cluster = null; try { cluster = getFactory().reuseCluster(numServers, numListeners, getProperties()); @@ -162,7 +169,7 @@ default void runWithSameCluster(int numServers, int numListeners, CheckedConsume if (cluster != null) { LOG.info(cluster.printServers()); } - LOG.error("Failed " + caller, t); + LOG.error("Failed {}", caller, t); throw t; } } @@ -198,7 +205,7 @@ public CLUSTER newCluster(int numServer, int numListeners, RaftProperties prop) } } - public static abstract class RpcBase extends MiniRaftCluster { + public abstract static class RpcBase extends MiniRaftCluster { public RpcBase(String[] ids, String[] listenerIds, RaftProperties properties, Parameters parameters) { super(ids, listenerIds, properties, parameters); } @@ -235,18 +242,6 @@ private int getPort(String address) { } } - public static class PeerChanges { - public final RaftPeer[] allPeersInNewConf; - public final RaftPeer[] newPeers; - public final RaftPeer[] removedPeers; - - public PeerChanges(RaftPeer[] all, RaftPeer[] newPeers, RaftPeer[] removed) { - this.allPeersInNewConf = all; - this.newPeers = newPeers; - this.removedPeers = removed; - } - } - public static RaftGroup initRaftGroup(Collection ids, Collection listenerIds) { Stream peer = ids.stream() .map(id -> RaftPeer.newBuilder().setId(id)) @@ -255,7 +250,7 @@ public static RaftGroup initRaftGroup(Collection ids, Collection Stream listener = listenerIds.stream() .map(id -> RaftPeer.newBuilder().setId(id)) .map(MiniRaftCluster::assignAddresses) - .map(p -> p.setStartupRole(RaftProtos.RaftPeerRole.LISTENER)) + .map(p -> p.setStartupRole(LISTENER)) .map(RaftPeer.Builder::build); final RaftPeer[] peers = Stream.concat(peer, listener).toArray(RaftPeer[]::new); @@ -314,22 +309,22 @@ public RaftProperties getProperties() { } public MiniRaftCluster initServers() { - LOG.info("servers = " + servers); + LOG.info("servers = {}", servers); if (servers.isEmpty()) { putNewServers(CollectionUtils.as(group.getPeers(), RaftPeer::getId), true, group); } return this; } - public RaftServerProxy putNewServer(RaftPeerId id, RaftGroup group, boolean format) { - final RaftServerProxy s = newRaftServer(id, group, format); + public RaftServerProxy putNewServer(RaftPeerId id, RaftGroup raftGroup, boolean format) { + final RaftServerProxy s = newRaftServer(id, raftGroup, format); peers.put(s.getId(), s.getPeer()); Preconditions.assertTrue(servers.put(id, s) == null); return s; } - private Collection putNewServers(Iterable peers, boolean format, RaftGroup raftGroup) { - return StreamSupport.stream(peers.spliterator(), false) + private Collection putNewServers(Iterable peerIds, boolean format, RaftGroup raftGroup) { + return StreamSupport.stream(peerIds.spliterator(), false) .map(id -> putNewServer(id, raftGroup, format)) .collect(Collectors.toList()); } @@ -345,23 +340,26 @@ public void start() throws IOException { startServers(servers.values()); this.timer.updateAndGet(t -> t != null? t - : JavaUtils.runRepeatedly(() -> LOG.info("TIMED-PRINT: " + printServers()), 10, 10, TimeUnit.SECONDS)); + : JavaUtils.runRepeatedly(() -> LOG.info("TIMED-PRINT: {}.", printServers()), 10, 10, TimeUnit.SECONDS)); } - /** - * start a stopped server again. - */ + public void removeServer(RaftPeerId serverId) { + servers.remove(serverId); + } + + /** Restart the server with the given id. */ public RaftServer.Division restartServer(RaftPeerId serverId, boolean format) throws IOException { return restartServer(serverId, group, format); } - public RaftServer.Division restartServer(RaftPeerId serverId, RaftGroup group, boolean format) throws IOException { + public RaftServer.Division restartServer(RaftPeerId serverId, RaftGroup raftGroup, boolean format) + throws IOException { killServer(serverId); - servers.remove(serverId); + removeServer(serverId); - final RaftServer proxy = putNewServer(serverId, group, format); + final RaftServer proxy = putNewServer(serverId, raftGroup, format); proxy.start(); - return group == null? null: proxy.getDivision(group.getGroupId()); + return raftGroup == null? null: proxy.getDivision(raftGroup.getGroupId()); } public void restart(boolean format) throws IOException { @@ -377,8 +375,8 @@ public TimeDuration getTimeoutMax() { return RaftServerConfigKeys.Rpc.timeoutMax(properties); } - private RaftServerProxy newRaftServer(RaftPeerId id, RaftGroup group, boolean format) { - LOG.info("newRaftServer: {}, {}, format? {}", id, group, format); + private RaftServerProxy newRaftServer(RaftPeerId id, RaftGroup raftGroup, boolean format) { + LOG.info("newRaftServer: {}, {}, format? {}", id, raftGroup, format); try { final File dir = getStorageDir(id); if (format) { @@ -387,27 +385,27 @@ private RaftServerProxy newRaftServer(RaftPeerId id, RaftGroup group, boolean fo } final RaftProperties prop = new RaftProperties(properties); RaftServerConfigKeys.setStorageDir(prop, Collections.singletonList(dir)); - return ServerImplUtils.newRaftServer(id, group, + return ServerImplUtils.newRaftServer(id, raftGroup, format? RaftStorage.StartupOption.FORMAT: RaftStorage.StartupOption.RECOVER, - getStateMachineRegistry(prop), null, prop, setPropertiesAndInitParameters(id, group, prop)); + getStateMachineRegistry(prop), null, prop, setPropertiesAndInitParameters(id, raftGroup, prop)); } catch (IOException e) { throw new RuntimeException(e); } } protected abstract Parameters setPropertiesAndInitParameters( - RaftPeerId id, RaftGroup group, RaftProperties properties); + RaftPeerId id, RaftGroup raftGroup, RaftProperties raftProperties); public void setStateMachineRegistry(StateMachine.Registry stateMachineRegistry) { this.stateMachineRegistry = stateMachineRegistry; } - StateMachine.Registry getStateMachineRegistry(RaftProperties properties) { + StateMachine.Registry getStateMachineRegistry(RaftProperties raftProperties) { if (stateMachineRegistry != null) { return stateMachineRegistry; } - final Class smClass = properties.getClass( + final Class smClass = raftProperties.getClass( STATEMACHINE_CLASS_KEY, null, StateMachine.class); if (smClass == null) { return STATEMACHINE_REGISTRY_DEFAULT; @@ -423,7 +421,7 @@ StateMachine.Registry getStateMachineRegistry(RaftProperties properties) { try { final Class[] argClasses = {RaftProperties.class}; - return ReflectionUtils.newInstance(smClass, argClasses, properties); + return ReflectionUtils.newInstance(smClass, argClasses, raftProperties); } catch(RuntimeException e) { exception.addSuppressed(e); } @@ -439,43 +437,11 @@ private static List toRaftPeers(Iterable servers) { public PeerChanges addNewPeers(int number, boolean startNewPeer) throws IOException { - return addNewPeers(generateIds(number, servers.size()), startNewPeer, false); - } - - public PeerChanges addNewPeers(int number, boolean startNewPeer, - boolean emptyPeer) throws IOException { - return addNewPeers(generateIds(number, servers.size()), startNewPeer, emptyPeer, - RaftProtos.RaftPeerRole.FOLLOWER); - } - - public PeerChanges addNewPeers(String[] ids, boolean startNewPeer, - boolean emptyPeer) throws IOException { - return addNewPeers(ids, startNewPeer, emptyPeer, RaftProtos.RaftPeerRole.FOLLOWER); - } - - public PeerChanges addNewPeers(int number, boolean startNewPeer, - boolean emptyPeer, RaftProtos.RaftPeerRole startRole) throws IOException { - return addNewPeers(generateIds(number, servers.size()), startNewPeer, emptyPeer, startRole); - } - - public PeerChanges addNewPeers(String[] ids, boolean startNewPeer, - boolean emptyPeer, RaftProtos.RaftPeerRole startRole) throws IOException { + final String[] ids = generateIds(number, servers.size()); LOG.info("Add new peers {}", Arrays.asList(ids)); final Iterable peerIds = CollectionUtils.as(Arrays.asList(ids), RaftPeerId::valueOf); - final RaftGroup raftGroup; - if (emptyPeer) { - raftGroup = RaftGroup.valueOf(group.getGroupId(), Collections.emptyList()); - } else { - final Collection newPeers = StreamSupport.stream(peerIds.spliterator(), false) - .map(id -> RaftPeer.newBuilder().setId(id) - .setStartupRole(startRole)) - .map(MiniRaftCluster::assignAddresses) - .map(RaftPeer.Builder::build) - .collect(Collectors.toSet()); - newPeers.addAll(group.getPeers()); - raftGroup = RaftGroup.valueOf(group.getGroupId(), newPeers); - } + final RaftGroup raftGroup = RaftGroup.valueOf(group.getGroupId(), Collections.emptyList()); // create and add new RaftServers final Collection newServers = putNewServers(peerIds, true, raftGroup); @@ -487,16 +453,16 @@ public PeerChanges addNewPeers(String[] ids, boolean startNewPeer, } } - final Collection newPeers = toRaftPeers(newServers); - final RaftPeer[] np = newPeers.toArray(RaftPeer.emptyArray()); - newPeers.addAll(group.getPeers()); - RaftPeer[] p = newPeers.toArray(RaftPeer.emptyArray()); - group = RaftGroup.valueOf(group.getGroupId(), p); - return new PeerChanges(p, np, RaftPeer.emptyArray()); + final List newPeers = toRaftPeers(newServers); + final List allPeers = new ArrayList<>(newPeers.size() + group.getPeers().size()); + allPeers.addAll(newPeers); + allPeers.addAll(group.getPeers()); + group = RaftGroup.valueOf(group.getGroupId(), allPeers); + return new PeerChanges(allPeers, newPeers, Collections.emptyList()); } - void startServers(Iterable servers) throws IOException { - for(RaftServer s : servers) { + void startServers(Iterable raftServers) throws IOException { + for(RaftServer s : raftServers) { s.start(); peers.put(s.getId(), s.getPeer()); } @@ -507,12 +473,12 @@ void startServers(Iterable servers) throws IOException { */ public PeerChanges removePeers(int number, boolean removeLeader, Collection excluded) throws InterruptedException { - Collection peers = new ArrayList<>(group.getPeers()); + final List raftPeers = new ArrayList<>(group.getPeers()); List removedPeers = new ArrayList<>(number); if (removeLeader) { final RaftPeer leader = RaftTestUtil.waitForLeader(this).getPeer(); Preconditions.assertTrue(!excluded.contains(leader)); - peers.remove(leader); + raftPeers.remove(leader); removedPeers.add(leader); } final List followers = getFollowers(); @@ -520,18 +486,17 @@ public PeerChanges removePeers(int number, boolean removeLeader, removed < (removeLeader ? number - 1 : number); i++) { RaftPeer toRemove = followers.get(i).getPeer(); if (!excluded.contains(toRemove)) { - peers.remove(toRemove); + raftPeers.remove(toRemove); removedPeers.add(toRemove); removed++; } } - final RaftPeer[] p = peers.toArray(RaftPeer.emptyArray()); - group = RaftGroup.valueOf(group.getGroupId(), p); - return new PeerChanges(p, RaftPeer.emptyArray(), removedPeers.toArray(RaftPeer.emptyArray())); + group = RaftGroup.valueOf(group.getGroupId(), raftPeers); + return new PeerChanges(raftPeers, Collections.emptyList(), removedPeers); } public void killServer(RaftPeerId id) { - LOG.info("killServer " + id); + LOG.info("killServer {}", id); servers.get(id).close(); } @@ -727,13 +692,13 @@ public List getPeers() { return toRaftPeers(getServers()); } - RaftPeer getPeer(RaftPeerId id, RaftGroup group) { + RaftPeer getPeer(RaftPeerId id, RaftGroup raftGroup) { RaftPeer p = peers.get(id); if (p != null) { return p; } - if (group != null) { - p = group.getPeer(id); + if (raftGroup != null) { + p = raftGroup.getPeer(id); } if (p == null) { p = Optional.ofNullable(servers.get(id)).map(RaftServerProxy::getPeer).orElse(null); @@ -765,24 +730,28 @@ public RaftClient createClient(RetryPolicy retryPolicy) { } public RaftClient createClient(RaftPeerId leaderId, RetryPolicy retryPolicy) { + if (retryPolicy == null) { + retryPolicy = getDefaultRetryPolicy(); + } return createClient(leaderId, group, retryPolicy); } - public RaftClient createClient(RaftPeerId leaderId, RaftGroup group) { - return createClient(leaderId, group, getDefaultRetryPolicy()); + public RaftClient createClient(RaftPeerId leaderId, RaftGroup raftGroup) { + return createClient(leaderId, raftGroup, getDefaultRetryPolicy()); } public RaftClient createClient(RaftPeer primaryServer) { return createClient(null, group, getDefaultRetryPolicy(), primaryServer); } - public RaftClient createClient(RaftPeerId leaderId, RaftGroup group, RetryPolicy retryPolicy) { - return createClient(leaderId, group, retryPolicy, null); + public RaftClient createClient(RaftPeerId leaderId, RaftGroup raftGroup, RetryPolicy retryPolicy) { + return createClient(leaderId, raftGroup, retryPolicy, null); } - public RaftClient createClient(RaftPeerId leaderId, RaftGroup group, RetryPolicy retryPolicy, RaftPeer primaryServer) { + public RaftClient createClient(RaftPeerId leaderId, RaftGroup raftGroup, RetryPolicy retryPolicy, RaftPeer primaryServer) { RaftClient.Builder builder = RaftClient.newBuilder() - .setRaftGroup(group) + .setClientId(ClientId.valueOf(new UUID(0, CLIENT_ID.incrementAndGet()))) + .setRaftGroup(raftGroup) .setLeaderId(leaderId) .setProperties(properties) .setParameters(parameters) @@ -809,16 +778,15 @@ public RaftClientRequest newRaftClientRequest( } public SetConfigurationRequest newSetConfigurationRequest( - ClientId clientId, RaftPeerId leaderId, - RaftPeer... peers) { + ClientId clientId, RaftPeerId leaderId, List raftPeers) { return new SetConfigurationRequest(clientId, leaderId, getGroupId(), CallId.getDefault(), - SetConfigurationRequest.Arguments.newBuilder().setServersInNewConf(peers).build()); + SetConfigurationRequest.Arguments.newBuilder().setServersInNewConf(raftPeers).build()); } - public void setConfiguration(RaftPeer... peers) throws IOException { + public void setConfiguration(List raftPeers) throws IOException { try(RaftClient client = createClient()) { - LOG.info("Start changing the configuration: {}", Arrays.asList(peers)); - final RaftClientReply reply = client.admin().setConfiguration(peers); + LOG.info("Start changing the configuration: {}", raftPeers); + final RaftClientReply reply = client.admin().setConfiguration(raftPeers); Preconditions.assertTrue(reply.isSuccess()); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/PeerChanges.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/PeerChanges.java new file mode 100644 index 0000000000..edb8e1acca --- /dev/null +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/PeerChanges.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.server.impl; + +import org.apache.ratis.protocol.RaftPeer; + +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +public class PeerChanges { + private final List peersInNewConf; + private final List addedPeers; + private final List removedPeers; + + PeerChanges(List all, List addedPeers, List removed) { + this.peersInNewConf = Collections.unmodifiableList(all); + this.addedPeers = Collections.unmodifiableList(addedPeers); + this.removedPeers = Collections.unmodifiableList(removed); + } + + public List getPeersInNewConf() { + return peersInNewConf; + } + + public List getAddedPeers() { + return addedPeers; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } else if (!(obj instanceof PeerChanges)) { + return false; + } + final PeerChanges that = (PeerChanges) obj; + return Objects.equals(this.peersInNewConf, that.peersInNewConf) + && Objects.equals(this.addedPeers, that.addedPeers) + && Objects.equals(this.removedPeers, that.removedPeers); + } + + @Override + public int hashCode() { + return Objects.hashCode(peersInNewConf); + } + + @Override + public String toString() { + return "PeerChanges{" + + "\n newConf=" + peersInNewConf + + "\n added=" + addedPeers + + "\n removed=" + removedPeers + + "\n}"; + } +} diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/PreAppendLeaderStepDownTest.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/PreAppendLeaderStepDownTest.java index 4a62844c0a..0211bfed00 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/PreAppendLeaderStepDownTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/PreAppendLeaderStepDownTest.java @@ -34,8 +34,8 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.io.IOException; @@ -105,9 +105,9 @@ private void runTestLeaderStepDown(CLUSTER cluster) throws Exception { RaftTestUtil.waitForLeader(cluster).getRaftLog().getLastEntryTermIndex().getTerm(); if (leaderShouldStepDown) { - Assert.assertTrue(newTerm > oldTerm); + Assertions.assertTrue(newTerm > oldTerm); } else { - Assert.assertEquals(newTerm, oldTerm); + Assertions.assertEquals(newTerm, oldTerm); } cluster.shutdown(); @@ -124,11 +124,11 @@ void runTestLeaderStepDownAsync(CLUSTER cluster) throws IOException, Interrupted RaftPeerId leaderId = leader.getId(); RaftServerImpl l = (RaftServerImpl) leader; try (RaftClient client = cluster.createClient(leader.getId())) { - JavaUtils.attempt(() -> Assert.assertEquals(leaderId, leader.getId()), + JavaUtils.attempt(() -> Assertions.assertEquals(leaderId, leader.getId()), 20, ONE_SECOND, "check leader id", LOG); RaftClientReply reply = client.admin().transferLeadership(null, 3000); - Assert.assertTrue(reply.isSuccess()); - Assert.assertEquals(2, ((RaftServerImpl) leader).getRole().getCurrentRole().getNumber()); + Assertions.assertTrue(reply.isSuccess()); + Assertions.assertEquals(2, ((RaftServerImpl) leader).getRole().getCurrentRole().getNumber()); } } } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftReconfigurationBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftReconfigurationBaseTest.java index 3b8e206deb..7a2f1a24ee 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftReconfigurationBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftReconfigurationBaseTest.java @@ -37,21 +37,25 @@ import org.apache.ratis.server.RaftConfiguration; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; -import org.apache.ratis.server.impl.MiniRaftCluster.PeerChanges; import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.server.raftlog.RaftLogBase; import org.apache.ratis.server.storage.RaftStorageTestUtils; +import org.apache.ratis.test.tag.Flaky; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.event.Level; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -59,9 +63,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import static java.util.Arrays.asList; import static org.apache.ratis.server.impl.RaftServerTestUtil.waitAndCheckNewConf; -import static org.junit.Assert.assertThrows; +import static org.junit.jupiter.api.Assertions.assertThrows; public abstract class RaftReconfigurationBaseTest extends BaseTest @@ -70,8 +73,8 @@ public abstract class RaftReconfigurationBaseTest for (int j = 0; j < peersWithPriority.size(); j ++) { int priorityInConf = conf.getPeer(peersWithPriority.get(j).getId()).getPriority(); - Assert.assertEquals(priorityInConf, peersWithPriority.get(j).getPriority()); + Assertions.assertEquals(priorityInConf, peersWithPriority.get(j).getPriority()); } } } @@ -136,7 +139,7 @@ public void testAddPeers() throws Exception { RaftTestUtil.waitForLeader(cluster); // add new peers - RaftPeer[] allPeers = cluster.addNewPeers(2, true).allPeersInNewConf; + final List allPeers = cluster.addNewPeers(2, true).getPeersInNewConf(); // trigger setConfiguration cluster.setConfiguration(allPeers); @@ -156,25 +159,25 @@ public void testLeaderElectionWhenChangeFromSingleToHA() throws Exception { RaftGroupId groupId = cluster.getGroup().getGroupId(); RaftPeer curPeer = cluster.getGroup().getPeers().iterator().next(); - RaftPeer newPeer = cluster.addNewPeers(1, true, true).newPeers[0]; + RaftPeer newPeer = cluster.addNewPeers(1, true).getAddedPeers().get(0); RaftServerProxy leaderServer = cluster.getServer(curPeer.getId()); // Update leader conf to transitional single mode. RaftConfigurationImpl oldNewConf = RaftConfigurationImpl.newBuilder() - .setOldConf(new PeerConfiguration(Arrays.asList(curPeer))) + .setOldConf(new PeerConfiguration(Collections.singletonList(curPeer))) .setConf(new PeerConfiguration(Arrays.asList(curPeer, newPeer))) .setLogEntryIndex(Long.MAX_VALUE / 2) .build(); - Assert.assertTrue(oldNewConf.isSingleMode(curPeer.getId())); + Assertions.assertTrue(oldNewConf.isSingleMode(curPeer.getId())); RaftServerTestUtil.setRaftConf(leaderServer, groupId, oldNewConf); try(RaftClient client = cluster.createClient()) { client.admin().transferLeadership(null, leaderServer.getId(), 1000); } final RaftServer.Division newLeader = RaftTestUtil.waitForLeader(cluster); - Assert.assertEquals(leaderServer.getId(), newLeader.getId()); - Assert.assertEquals(oldNewConf, newLeader.getRaftConf()); + Assertions.assertEquals(leaderServer.getId(), newLeader.getId()); + Assertions.assertEquals(oldNewConf, newLeader.getRaftConf()); }); } @@ -189,11 +192,11 @@ public void testChangeMajority() throws Exception { SetConfigurationRequest.Arguments arguments = SetConfigurationRequest.Arguments.newBuilder() .setServersInCurrentConf(cluster.getPeers()) - .setServersInNewConf(c1.allPeersInNewConf) + .setServersInNewConf(c1.getPeersInNewConf()) .setMode(SetConfigurationRequest.Mode.COMPARE_AND_SET) .build(); - assertThrows("Expect change majority error.", SetConfigurationException.class, - () -> client.admin().setConfiguration(arguments)); + assertThrows(SetConfigurationException.class, () -> client.admin().setConfiguration(arguments), + "Expect change majority error."); } }); } @@ -207,8 +210,8 @@ public void testRemovePeers() throws Exception { RaftTestUtil.waitForLeader(cluster); // remove peers, leader still included in the new conf - RaftPeer[] allPeers = cluster - .removePeers(2, false, Collections.emptyList()).allPeersInNewConf; + final List allPeers = cluster.removePeers(2, false, Collections.emptyList()) + .getPeersInNewConf(); // trigger setConfiguration cluster.setConfiguration(allPeers); @@ -235,8 +238,8 @@ private void runTestAddRemovePeers(boolean leaderStepdown, CLUSTER cluster) thro RaftTestUtil.waitForLeader(cluster); PeerChanges change = cluster.addNewPeers(2, true); - RaftPeer[] allPeers = cluster.removePeers(2, leaderStepdown, - asList(change.newPeers)).allPeersInNewConf; + final List allPeers = cluster.removePeers(2, leaderStepdown, change.getAddedPeers()) + .getPeersInNewConf(); // trigger setConfiguration cluster.setConfiguration(allPeers); @@ -254,19 +257,19 @@ private void runTestSetConfigurationInAddMode(CLUSTER cluster) throws Exception final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); PeerChanges change = cluster.addNewPeers(1, true); - List peers = Arrays.asList(change.newPeers); + List peers = change.getAddedPeers(); try (final RaftClient client = cluster.createClient(leader.getId())) { for (int i = 0; i < 10; i++) { RaftClientReply reply = client.io().send(new SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } RaftClientReply reply = client.admin().setConfiguration( SetConfigurationRequest.Arguments.newBuilder() .setServersInNewConf(peers) .setMode(SetConfigurationRequest.Mode.ADD).build()); - Assert.assertTrue(reply.isSuccess()); - waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); + Assertions.assertTrue(reply.isSuccess()); + waitAndCheckNewConf(cluster, change.getPeersInNewConf(), 0, null); } cluster.close(); } @@ -281,12 +284,12 @@ private void runTestSetConfigurationInCasMode(CLUSTER cluster) throws Exception List oldPeers = cluster.getPeers(); PeerChanges change = cluster.addNewPeers(1, true); - List peers = Arrays.asList(change.allPeersInNewConf); + List peers = change.getPeersInNewConf(); try (final RaftClient client = cluster.createClient(leader.getId())) { for (int i = 0; i < 10; i++) { RaftClientReply reply = client.io().send(new SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } testFailureCase("Can't set configuration in CAS mode ", @@ -303,15 +306,16 @@ private void runTestSetConfigurationInCasMode(CLUSTER cluster) throws Exception .setServersInCurrentConf(oldPeers) .setMode(SetConfigurationRequest.Mode.COMPARE_AND_SET) .build()); - Assert.assertTrue(reply.isSuccess()); - waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); + Assertions.assertTrue(reply.isSuccess()); + waitAndCheckNewConf(cluster, change.getPeersInNewConf(), 0, null); } cluster.close(); } - @Test(timeout = 30000) + @Test + @Timeout(value = 30) public void testReconfTwice() throws Exception { runWithNewCluster(3, this::runTestReconfTwice); } @@ -323,30 +327,26 @@ void runTestReconfTwice(CLUSTER cluster) throws Exception { // submit some msgs before reconf for (int i = 0; i < STAGING_CATCHUP_GAP * 2; i++) { RaftClientReply reply = client.io().send(new SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } final AtomicBoolean reconf1 = new AtomicBoolean(false); final AtomicBoolean reconf2 = new AtomicBoolean(false); - final AtomicReference finalPeers = new AtomicReference<>(null); - final AtomicReference deadPeers = new AtomicReference<>(null); + final AtomicReference> finalPeers = new AtomicReference<>(null); CountDownLatch latch = new CountDownLatch(1); Thread clientThread = new Thread(() -> { try { - PeerChanges c1 = cluster.addNewPeers(2, true, true); - LOG.info("Start changing the configuration: {}", - asList(c1.allPeersInNewConf)); + PeerChanges c1 = cluster.addNewPeers(2, true); + LOG.info("Start changing the configuration: {}", c1.getPeersInNewConf()); - RaftClientReply reply = client.admin().setConfiguration(c1.allPeersInNewConf); + RaftClientReply reply = client.admin().setConfiguration(c1.getPeersInNewConf()); reconf1.set(reply.isSuccess()); - PeerChanges c2 = cluster.removePeers(2, true, asList(c1.newPeers)); - finalPeers.set(c2.allPeersInNewConf); - deadPeers.set(c2.removedPeers); + PeerChanges c2 = cluster.removePeers(2, true, c1.getAddedPeers()); + finalPeers.set(c2.getPeersInNewConf()); - LOG.info("Start changing the configuration again: {}", - asList(c2.allPeersInNewConf)); - reply = client.admin().setConfiguration(c2.allPeersInNewConf); + LOG.info("Start changing the configuration again: {}", c2.getPeersInNewConf()); + reply = client.admin().setConfiguration(c2.getPeersInNewConf()); reconf2.set(reply.isSuccess()); latch.countDown(); @@ -357,8 +357,8 @@ void runTestReconfTwice(CLUSTER cluster) throws Exception { clientThread.start(); latch.await(); - Assert.assertTrue(reconf1.get()); - Assert.assertTrue(reconf2.get()); + Assertions.assertTrue(reconf1.get()); + Assertions.assertTrue(reconf2.get()); waitAndCheckNewConf(cluster, finalPeers.get(), 2, null); final RaftPeerId leader2 = RaftTestUtil.waitForLeader(cluster).getId(); @@ -370,7 +370,7 @@ void runTestReconfTwice(CLUSTER cluster) throws Exception { // each leader change generates one configuration. // expectedConf = 1 (init) + 2*2 (two conf changes) + #leader final int expectedConf = leader2.equals(leaderId) ? 6 : 7; - Assert.assertEquals(server.getId() + ": " + confManager, expectedConf, confManager.numOfConf()); + Assertions.assertEquals(expectedConf, confManager.numOfConf(), server.getId() + ": " + confManager); }); } } @@ -387,19 +387,18 @@ void runTestReconfTimeout(CLUSTER cluster) throws Exception { try (final RaftClient client = cluster.createClient(leaderId)) { PeerChanges c1 = cluster.addNewPeers(2, false); - LOG.info("Start changing the configuration: {}", - asList(c1.allPeersInNewConf)); - Assert.assertFalse(((RaftConfigurationImpl)cluster.getLeader().getRaftConf()).isTransitional()); + LOG.info("Start changing the configuration: {}", c1.getPeersInNewConf()); + Assertions.assertFalse(((RaftConfigurationImpl)cluster.getLeader().getRaftConf()).isTransitional()); final RaftClientRpc sender = client.getClientRpc(); final SetConfigurationRequest request = cluster.newSetConfigurationRequest( - client.getId(), leaderId, c1.allPeersInNewConf); + client.getId(), leaderId, c1.getPeersInNewConf()); try { RaftClientReply reply = sender.sendRequest(request); - Assert.fail("did not get expected exception " + reply.toString()); + Assertions.fail("did not get expected exception " + reply.toString()); } catch (IOException e) { - Assert.assertTrue("Got exception " + e, - e instanceof ReconfigurationTimeoutException); + Assertions.assertTrue( + e instanceof ReconfigurationTimeoutException, "Got exception " + e); } // the two new peers have not started yet, the bootstrapping must timeout @@ -409,18 +408,18 @@ void runTestReconfTimeout(CLUSTER cluster) throws Exception { // state so that we still get timeout instead of in-progress exception try { sender.sendRequest(request); - Assert.fail("did not get expected exception"); + Assertions.fail("did not get expected exception"); } catch (IOException e) { - Assert.assertTrue("Got exception " + e, - e instanceof ReconfigurationTimeoutException); + Assertions.assertTrue( + e instanceof ReconfigurationTimeoutException, "Got exception " + e); } // start the two new peers LOG.info("Start new peers"); - for (RaftPeer np : c1.newPeers) { + for (RaftPeer np : c1.getAddedPeers()) { cluster.restartServer(np.getId(), false); } - Assert.assertTrue(client.admin().setConfiguration(c1.allPeersInNewConf).isSuccess()); + Assertions.assertTrue(client.admin().setConfiguration(c1.getPeersInNewConf()).isSuccess()); } } @@ -440,8 +439,8 @@ public void testBootstrapReconfWithSingleNodeAddTwo() throws Exception { try (final RaftClient client = cluster.createClient(leaderId)) { final PeerChanges c1 = cluster.addNewPeers(2, true); - assertThrows("Expect change majority error.", SetConfigurationException.class, - () -> client.admin().setConfiguration(c1.allPeersInNewConf)); + assertThrows(SetConfigurationException.class, () -> client.admin().setConfiguration(c1.getPeersInNewConf()), + "Expect change majority error."); } }); } @@ -462,16 +461,15 @@ void runTestBootstrapReconf(int numNewPeer, boolean startNewPeer, CLUSTER cluste // submit some msgs before reconf for (int i = 0; i < STAGING_CATCHUP_GAP * 2; i++) { RaftClientReply reply = client.io().send(new SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } final PeerChanges c1 = cluster.addNewPeers(numNewPeer, startNewPeer); - LOG.info("Start changing the configuration: {}", - asList(c1.allPeersInNewConf)); + LOG.info("Start changing the configuration: {}", c1.getPeersInNewConf()); final AtomicReference success = new AtomicReference<>(); Thread clientThread = new Thread(() -> { try { - RaftClientReply reply = client.admin().setConfiguration(c1.allPeersInNewConf); + RaftClientReply reply = client.admin().setConfiguration(c1.getPeersInNewConf()); success.set(reply.isSuccess()); } catch (IOException ioe) { LOG.error("FAILED", ioe); @@ -483,8 +481,8 @@ void runTestBootstrapReconf(int numNewPeer, boolean startNewPeer, CLUSTER cluste // Make sure that set configuration is run inside the thread RaftTestUtil.waitFor(() -> clientThread.isAlive(), 300, 5000); ONE_SECOND.sleep(); - LOG.info("start new peer(s): {}", c1.newPeers); - for(RaftPeer p : c1.newPeers) { + LOG.info("start new peer(s): {}", c1.getAddedPeers()); + for(RaftPeer p : c1.getAddedPeers()) { cluster.restartServer(p.getId(), false); } } @@ -494,11 +492,11 @@ void runTestBootstrapReconf(int numNewPeer, boolean startNewPeer, CLUSTER cluste RaftTestUtil.waitFor(() -> cluster.getLeader() != null, 300, 5000); final RaftLog leaderLog = cluster.getLeader().getRaftLog(); - for (RaftPeer newPeer : c1.newPeers) { + for (RaftPeer newPeer : c1.getAddedPeers()) { final RaftServer.Division d = cluster.getDivision(newPeer.getId()); RaftTestUtil.waitFor(() -> leaderLog.getEntries(0, Long.MAX_VALUE).length == d.getRaftLog().getEntries(0, Long.MAX_VALUE).length, 300, 15000); - Assert.assertArrayEquals(leaderLog.getEntries(0, Long.MAX_VALUE), + Assertions.assertArrayEquals(leaderLog.getEntries(0, Long.MAX_VALUE), d.getRaftLog().getEntries(0, Long.MAX_VALUE)); } } @@ -509,6 +507,7 @@ void runTestBootstrapReconf(int numNewPeer, boolean startNewPeer, CLUSTER cluste * retrying. */ @Test + @Flaky("RATIS-2251") public void testKillLeaderDuringReconf() throws Exception { // originally 3 peers runWithNewCluster(3, this::runTestKillLeaderDuringReconf); @@ -521,16 +520,16 @@ void runTestKillLeaderDuringReconf(CLUSTER cluster) throws Exception { final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); PeerChanges c1 = cluster.addNewPeers(1, false); - PeerChanges c2 = cluster.removePeers(1, false, asList(c1.newPeers)); + PeerChanges c2 = cluster.removePeers(1, false, c1.getAddedPeers()); - LOG.info("Start setConf: {}", asList(c2.allPeersInNewConf)); + LOG.info("Start setConf: {}", c2.getPeersInNewConf()); LOG.info(cluster.printServers()); final CompletableFuture setConf = new CompletableFuture<>(); clientThread = new Thread(() -> { try(final RaftClient client = cluster.createClient(leaderId)) { for(int i = 0; clientRunning.get() && !setConf.isDone(); i++) { - final RaftClientReply reply = client.admin().setConfiguration(c2.allPeersInNewConf); + final RaftClientReply reply = client.admin().setConfiguration(c2.getPeersInNewConf()); if (reply.isSuccess()) { setConf.complete(null); return; @@ -546,7 +545,7 @@ void runTestKillLeaderDuringReconf(CLUSTER cluster) throws Exception { // the leader cannot generate the (old, new) conf, and it will keep // bootstrapping the 1 new peer since it has not started yet. - Assert.assertFalse(((RaftConfigurationImpl)cluster.getLeader().getRaftConf()).isTransitional()); + Assertions.assertFalse(((RaftConfigurationImpl)cluster.getLeader().getRaftConf()).isTransitional()); // (0) the first conf entry, (1) the 1st setConf entry, (2) a metadata entry // (3) new current conf entry (4) a metadata entry @@ -556,17 +555,17 @@ void runTestKillLeaderDuringReconf(CLUSTER cluster) throws Exception { LOG.info("{}", LogProtoUtils.toLogEntryString(e)); } final long commitIndex = leaderLog.getLastCommittedIndex(); - Assert.assertTrue("commitIndex = " + commitIndex + " > 2", commitIndex <= 2); + Assertions.assertTrue(commitIndex <= 2, "commitIndex = " + commitIndex + " > 2"); } final RaftPeerId killed = RaftTestUtil.waitAndKillLeader(cluster); - Assert.assertEquals(leaderId, killed); + Assertions.assertEquals(leaderId, killed); final RaftPeerId newLeaderId = RaftTestUtil.waitForLeader(cluster).getId(); LOG.info("newLeaderId: {}", newLeaderId); TimeDuration.valueOf(1500, TimeUnit.MILLISECONDS).sleep(); - LOG.info("start new peers: {}", Arrays.asList(c1.newPeers)); - for (RaftPeer np : c1.newPeers) { + LOG.info("start new peers: {}", c1.getAddedPeers()); + for (RaftPeer np : c1.getAddedPeers()) { cluster.restartServer(np.getId(), false); } @@ -575,12 +574,12 @@ void runTestKillLeaderDuringReconf(CLUSTER cluster) throws Exception { } catch(TimeoutException ignored) { } - RaftServerProxy newServer = cluster.getServer(c1.newPeers[0].getId()); + RaftServerProxy newServer = cluster.getServer(c1.getAddedPeers().get(0).getId()); if (newServer.getLifeCycleState() == LifeCycle.State.CLOSED) { - LOG.info("New peer {} is shutdown. Skip the check", c1.newPeers[0].getId()); + LOG.info("New peer {} is shutdown. Skip the check", c1.getAddedPeers().get(0).getId()); } else { // the client fails with the first leader, and then retry the same setConfiguration request - waitAndCheckNewConf(cluster, c2.allPeersInNewConf, 1, Collections.singletonList(leaderId)); + waitAndCheckNewConf(cluster, c2.getPeersInNewConf(), 1, Collections.singletonList(leaderId)); setConf.get(2, TimeUnit.SECONDS); } } finally { @@ -612,13 +611,13 @@ void runTestNoChangeRequest(CLUSTER cluster) throws Exception { // no real configuration change in the request final RaftClientReply reply = client.admin().setConfiguration(cluster.getPeers().toArray(RaftPeer.emptyArray())); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); final long newCommittedIndex = leaderLog.getLastCommittedIndex(); for(long i = committedIndex + 1; i <= newCommittedIndex; i++) { final LogEntryProto e = leaderLog.get(i); - Assert.assertTrue(e.hasMetadataEntry()); + Assertions.assertTrue(e.hasMetadataEntry()); } - Assert.assertSame(confBefore, cluster.getLeader().getRaftConf()); + Assertions.assertSame(confBefore, cluster.getLeader().getRaftConf()); } } @@ -650,14 +649,14 @@ void runTestOverlappedSetConfRequests(CLUSTER cluster) throws Exception { final RaftPeerId leaderId = cluster.getLeader().getId(); - RaftPeer[] newPeers = cluster.addNewPeers(2, true).allPeersInNewConf; + final List newPeers = cluster.addNewPeers(2, true).getPeersInNewConf(); // delay every peer's logSync so that the setConf request is delayed cluster.getPeers() - .forEach(peer -> logSyncDelay.setDelayMs(peer.getId().toString(), 1000)); + .forEach(peer -> LOG_SYNC_DELAY.setDelayMs(peer.getId().toString(), 1000)); final CountDownLatch latch = new CountDownLatch(1); - final RaftPeer[] peersInRequest2 = cluster.getPeers().toArray(new RaftPeer[0]); + final List peersInRequest2 = cluster.getPeers(); AtomicBoolean caughtException = new AtomicBoolean(false); new Thread(() -> { try(final RaftClient client2 = cluster.createClient(leaderId)) { @@ -688,10 +687,10 @@ void runTestOverlappedSetConfRequests(CLUSTER cluster) throws Exception { for (int i = 0; i < 10 && !confChanged.get(); i++) { Thread.sleep(1000); } - Assert.assertTrue(confChanged.get()); - Assert.assertTrue(caughtException.get()); + Assertions.assertTrue(confChanged.get()); + Assertions.assertTrue(caughtException.get()); } finally { - logSyncDelay.clear(); + LOG_SYNC_DELAY.clear(); } } @@ -723,7 +722,7 @@ void runTestRevertConfigurationChange(CLUSTER cluster) throws Exception { BlockRequestHandlingInjection.getInstance().blockReplier(leaderId.toString()); cluster.setBlockRequestsFrom(leaderId.toString(), true); - PeerChanges change = cluster.removePeers(1, false, new ArrayList<>()); + PeerChanges change = cluster.removePeers(1, false, Collections.emptyList()); AtomicBoolean gotNotLeader = new AtomicBoolean(false); final Thread clientThread = new Thread(() -> { @@ -731,7 +730,7 @@ void runTestRevertConfigurationChange(CLUSTER cluster) throws Exception { LOG.info("client starts to change conf"); final RaftClientRpc sender = client.getClientRpc(); RaftClientReply reply = sender.sendRequest(cluster.newSetConfigurationRequest( - client.getId(), leaderId, change.allPeersInNewConf)); + client.getId(), leaderId, change.getPeersInNewConf())); if (reply.getNotLeaderException() != null) { gotNotLeader.set(true); } @@ -755,11 +754,11 @@ void runTestRevertConfigurationChange(CLUSTER cluster) throws Exception { // wait till the old leader persist the new conf JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(log.getFlushIndex() >= confIndex); + Assertions.assertTrue(log.getFlushIndex() >= confIndex); return null; }, 10, sleepTime, "FLUSH", LOG); final long committed = log.getLastCommittedIndex(); - Assert.assertTrue(committed < confIndex); + Assertions.assertTrue(committed < confIndex); // unblock the old leader BlockRequestHandlingInjection.getInstance().unblockReplier(leaderId.toString()); @@ -767,14 +766,14 @@ void runTestRevertConfigurationChange(CLUSTER cluster) throws Exception { // the client should get NotLeaderException clientThread.join(5000); - Assert.assertTrue(gotNotLeader.get()); + Assertions.assertTrue(gotNotLeader.get()); // the old leader should have truncated the setConf from the log JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(log.getLastCommittedIndex() >= confIndex); + Assertions.assertTrue(log.getLastCommittedIndex() >= confIndex); return null; }, 10, ONE_SECOND, "COMMIT", LOG); - Assert.assertTrue(log.get(confIndex).hasConfigurationEntry()); + Assertions.assertTrue(log.get(confIndex).hasConfigurationEntry()); log2 = null; } finally { RaftStorageTestUtils.printLog(log2, s -> LOG.info(s)); @@ -792,7 +791,7 @@ public void testLeaderNotReadyException() throws Exception { try { // delay 1s for each logSync call cluster.getServers().forEach( - peer -> leaderPlaceHolderDelay.setDelayMs(peer.getId().toString(), 2000)); + peer -> LEADER_PLACE_HOLDER_DELAY.setDelayMs(peer.getId().toString(), 2000)); cluster.start(); AtomicBoolean caughtNotReady = new AtomicBoolean(false); @@ -830,10 +829,10 @@ public void testLeaderNotReadyException() throws Exception { for (int i = 0; !success.get() && i < 5; i++) { Thread.sleep(1000); } - Assert.assertTrue(success.get()); - Assert.assertTrue(caughtNotReady.get()); + Assertions.assertTrue(success.get()); + Assertions.assertTrue(caughtNotReady.get()); } finally { - leaderPlaceHolderDelay.clear(); + LEADER_PLACE_HOLDER_DELAY.clear(); cluster.shutdown(); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftServerImplTracingTests.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftServerImplTracingTests.java new file mode 100644 index 0000000000..300cf51cde --- /dev/null +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftServerImplTracingTests.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.server.impl; + +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.context.Context; +import io.opentelemetry.sdk.testing.junit5.OpenTelemetryExtension; +import io.opentelemetry.sdk.trace.data.SpanData; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.RaftClientRequest; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.ServerNotReadyException; +import org.apache.ratis.server.storage.RaftStorage; +import org.apache.ratis.statemachine.StateMachine; +import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; +import org.apache.ratis.trace.TraceConfigKeys; +import org.apache.ratis.trace.TraceUtils; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.mock; + +public class RaftServerImplTracingTests { + + @RegisterExtension + private static final OpenTelemetryExtension openTelemetryExtension = + OpenTelemetryExtension.create(); + + @Test + public void testSubmitClientRequestAsync() throws Exception { + final List spans = submitClientRequestAndCollectNewSpans(true); + assertEquals(2, spans.size()); + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.CLIENT && s.getName().equals("client-span")), + "Expected at least one span with SpanKind.CLIENT" + ); + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.SERVER + && s.getName().equals("raft.server.submitClientRequestAsync")), + "Expected at least one span with SpanKind.SERVER" + ); + + } + + @Test + public void testSubmitClientRequestAsyncTracingDisabled() throws Exception { + final List spans = submitClientRequestAndCollectNewSpans(false); + // Even when server-side tracing is disabled, we still emit the client span used to + // generate the propagated context. + assertEquals(1, spans.size()); + assertTrue( + spans.stream().noneMatch(s -> s.getKind() == SpanKind.SERVER + && s.getName().equals("raft.server.submitClientRequestAsync")), + "Expected no SERVER span when tracing is disabled" + ); + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.CLIENT && s.getName().equals("client-span")), + "Expected at least one span with SpanKind.CLIENT" + ); + } + + private static List submitClientRequestAndCollectNewSpans(boolean enableTracing) + throws Exception { + final int before = openTelemetryExtension.getSpans().size(); + + final RaftServerImpl server = newRaftServerImpl(enableTracing); + try { + final RaftClientRequest request = newRaftClientRequest(RaftClientRequest.writeRequestType()); + + try { + server.submitClientRequestAsync(request); + } catch (ServerNotReadyException ignored) { + // server is not running; only verifying span emission + } + } finally { + server.close(); + } + + final List after = openTelemetryExtension.getSpans(); + return new ArrayList<>(after.subList(before, after.size())); + } + + private static RaftServerImpl newRaftServerImpl(boolean enableTracing) throws Exception { + final RaftGroup group = RaftGroup.emptyGroup(); + final StateMachine sm = new SimpleStateMachine4Testing(); + final RaftServerProxy proxy = mock(RaftServerProxy.class); + when(proxy.getId()).thenReturn(RaftPeerId.valueOf("peer1")); + final RaftProperties properties = new RaftProperties(); + TraceConfigKeys.setEnabled(properties, enableTracing); + when(proxy.getProperties()).thenReturn(properties); + when(proxy.getThreadGroup()).thenReturn(new ThreadGroup("test")); + return new RaftServerImpl(group, sm, proxy, RaftStorage.StartupOption.FORMAT); + } + + private static RaftClientRequest newRaftClientRequest(RaftClientRequest.Type type) { + final Span clientSpan = + openTelemetryExtension.getOpenTelemetry().getTracer("test") + .spanBuilder("client-span") + .setSpanKind(SpanKind.CLIENT) + .startSpan(); + try { + final Context clientContext = Context.current().with(clientSpan); + return RaftClientRequest.newBuilder() + .setClientId(ClientId.randomId()) + .setServerId(RaftPeerId.valueOf("s0")) + .setGroupId(RaftGroupId.randomId()) + .setCallId(1L) + .setType(type) + .setSpanContext(TraceUtils.injectContextToProto(clientContext)) + .build(); + } finally { + clientSpan.end(); + } + } +} + diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftServerTestUtil.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftServerTestUtil.java index 58a51e0514..c225730d37 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftServerTestUtil.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftServerTestUtil.java @@ -40,14 +40,14 @@ import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedConsumer; -import org.junit.Assert; +import org.junit.jupiter.api.Assertions; import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.event.Level; import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -58,9 +58,12 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -public class RaftServerTestUtil { +public final class RaftServerTestUtil { static final Logger LOG = LoggerFactory.getLogger(RaftServerTestUtil.class); + private RaftServerTestUtil() { + } + public static final RaftGroupMemberId TEST_MEMBER_ID = RaftGroupMemberId.valueOf( RaftPeerId.valueOf("test"), RaftGroupId.emptyGroupId()); @@ -79,24 +82,24 @@ public static void setPendingRequestsLogLevel(Level level) { } public static void waitAndCheckNewConf(MiniRaftCluster cluster, - RaftPeer[] peers, int numOfNewPeers, int numOfRemovedPeers, Collection deadPeers) + List peers, int numOfNewPeers, int numOfRemovedPeers, Collection deadPeers) throws Exception { final TimeDuration sleepTime = cluster.getTimeoutMax().apply(n -> n * (numOfRemovedPeers + numOfNewPeers + 2)); - JavaUtils.attempt(() -> waitAndCheckNewConf(cluster, Arrays.asList(peers), deadPeers), + JavaUtils.attempt(() -> waitAndCheckNewConf(cluster, peers, deadPeers), 10, sleepTime, "waitAndCheckNewConf", LOG); } public static void waitAndCheckNewConf(MiniRaftCluster cluster, - RaftPeer[] peers, int numOfRemovedPeers, Collection deadPeers) + List peers, int numOfRemovedPeers, Collection deadPeers) throws Exception { final TimeDuration sleepTime = cluster.getTimeoutMax().apply(n -> n * (numOfRemovedPeers + 2)); - JavaUtils.attempt(() -> waitAndCheckNewConf(cluster, Arrays.asList(peers), deadPeers), + JavaUtils.attempt(() -> waitAndCheckNewConf(cluster, peers, deadPeers), 10, sleepTime, "waitAndCheckNewConf", LOG); } private static void waitAndCheckNewConf(MiniRaftCluster cluster, Collection peers, Collection deadPeers) { LOG.info("waitAndCheckNewConf: peers={}, deadPeers={}, {}", peers, deadPeers, cluster.printServers()); - Assert.assertNotNull(cluster.getLeader()); + Assertions.assertNotNull(cluster.getLeader()); int numIncluded = 0; int deadIncluded = 0; @@ -114,16 +117,16 @@ private static void waitAndCheckNewConf(MiniRaftCluster cluster, final RaftConfigurationImpl conf = server.getState().getRaftConf(); if (current.containsInConf(server.getId())) { numIncluded++; - Assert.assertTrue(conf.isStable()); - Assert.assertTrue(conf.hasNoChange(peers, Collections.emptyList())); + Assertions.assertTrue(conf.isStable()); + Assertions.assertTrue(conf.hasNoChange(peers, Collections.emptyList())); } else if (server.getInfo().isAlive()) { // The server is successfully removed from the conf // It may not be shutdown since it may not be able to talk to the new leader (who is not in its conf). - Assert.assertTrue(conf.isStable()); - Assert.assertFalse(conf.containsInConf(server.getId())); + Assertions.assertTrue(conf.isStable()); + Assertions.assertFalse(conf.containsInConf(server.getId())); } } - Assert.assertEquals(peers.size(), numIncluded + deadIncluded); + Assertions.assertEquals(peers.size(), numIncluded + deadIncluded); } public static long getNextIndex(RaftServer.Division server) { @@ -173,8 +176,8 @@ public static Stream getLogAppenders(RaftServer.Division server) { public static void assertLeaderLease(RaftServer.Division leader, boolean hasLease) { final LeaderStateImpl l = getLeaderState(leader).orElse(null); - Assert.assertNotNull(l); - Assert.assertEquals(l.hasLease(), hasLease); + Assertions.assertNotNull(l); + Assertions.assertEquals(l.hasLease(), hasLease); } public static void restartLogAppenders(RaftServer.Division server) { @@ -197,8 +200,8 @@ public static DataStreamMap newDataStreamMap(Object name) { public static void assertLostMajorityHeartbeatsRecently(RaftServer.Division leader) { final FollowerState f = ((RaftServerImpl)leader).getRole().getFollowerState().orElse(null); - Assert.assertNotNull(f); - Assert.assertTrue(f.lostMajorityHeartbeatsRecently()); + Assertions.assertNotNull(f); + Assertions.assertTrue(f.lostMajorityHeartbeatsRecently()); } public static SegmentedRaftLog newSegmentedRaftLog(RaftGroupMemberId memberId, DivisionInfo info, @@ -221,16 +224,16 @@ public static boolean isHighestPriority(RaftConfiguration config, RaftPeerId pee return ((RaftConfigurationImpl)config).isHighestPriority(peerId); } - public static void runWithMinorityPeers(MiniRaftCluster cluster, Collection peersInNewConf, - CheckedConsumer, IOException> consumer) throws IOException { - Collection peers = parseMinorityPeers(cluster, peersInNewConf); + public static void runWithMinorityPeers(MiniRaftCluster cluster, List peersInNewConf, + CheckedConsumer, IOException> consumer) throws IOException { + List peers = parseMinorityPeers(cluster, peersInNewConf); while (peers != null) { consumer.accept(peers); peers = parseMinorityPeers(cluster, peersInNewConf); } } - private static Collection parseMinorityPeers(MiniRaftCluster cluster, Collection peersInNewConf) { + private static List parseMinorityPeers(MiniRaftCluster cluster, List peersInNewConf) { RaftConfigurationImpl conf = (RaftConfigurationImpl) cluster.getLeader().getRaftConf(); Set peers = new HashSet<>(conf.getCurrentPeers()); @@ -244,11 +247,12 @@ private static Collection parseMinorityPeers(MiniRaftCluster cluster, break; } } - return peers; + return new ArrayList<>(peers); } // All new peers has been added. Handle the removed peers. - List peersToRemove = peers.stream().filter(peer -> !peersInNewConf.contains(peer)).collect(Collectors.toList()); + List peersToRemove = peers.stream().filter(peer -> !peersInNewConf.contains(peer)) + .collect(Collectors.toList()); if (!peersToRemove.isEmpty()) { return peersInNewConf; } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftStateMachineExceptionTests.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftStateMachineExceptionTests.java index be857141cd..1e46907d10 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftStateMachineExceptionTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/RaftStateMachineExceptionTests.java @@ -33,15 +33,16 @@ import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.io.IOException; import java.util.Objects; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; public abstract class RaftStateMachineExceptionTests extends BaseTest implements MiniRaftCluster.Factory.Get { @@ -52,6 +53,7 @@ public abstract class RaftStateMachineExceptionTests { - Assert.assertNotNull(RetryCacheTestUtil.get(server, client.getId(), callId)); + Assertions.assertNotNull(RetryCacheTestUtil.get(server, client.getId(), callId)); return null; }, 5, BaseTest.ONE_SECOND, "GetRetryEntry", LOG); @@ -161,8 +169,8 @@ private void runTestRetryOnExceptionDuringReplication(CLUSTER cluster) throws Ex Objects.requireNonNull(reply.getStateMachineException()); final RetryCache.Entry oldEntry = RetryCacheTestUtil.get(oldLeader, client.getId(), callId); - Assert.assertNotNull(oldEntry); - Assert.assertTrue(RetryCacheTestUtil.isFailed(oldEntry)); + Assertions.assertNotNull(oldEntry); + Assertions.assertTrue(RetryCacheTestUtil.isFailed(oldEntry)); Thread.sleep(100); // At this point of time the old leader would have stepped down. wait for leader election to complete @@ -173,9 +181,36 @@ private void runTestRetryOnExceptionDuringReplication(CLUSTER cluster) throws Ex Objects.requireNonNull(reply.getStateMachineException()); final RetryCache.Entry currentEntry = RetryCacheTestUtil.get(leader, client.getId(), callId); - Assert.assertNotNull(currentEntry); - Assert.assertTrue(RetryCacheTestUtil.isFailed(currentEntry)); - Assert.assertNotEquals(oldEntry, currentEntry); + Assertions.assertNotNull(currentEntry); + Assertions.assertTrue(RetryCacheTestUtil.isFailed(currentEntry)); + Assertions.assertNotEquals(oldEntry, currentEntry); + failPreAppend = false; + } + } + + @Test + public void testNoCancelTransactionOnPreAppendFailure() throws Exception { + runWithNewCluster(3, this::runTestNoCancelTransactionOnPreAppendFailure); + } + + private void runTestNoCancelTransactionOnPreAppendFailure(CLUSTER cluster) throws Exception { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); + failPreAppend = true; + numCancelTransaction.set(0); + try (final RaftClient client = cluster.createClient(leaderId)) { + try { + client.io().send(new SimpleMessage("cancel-transaction")); + fail("Exception expected"); + } catch (StateMachineException e) { + Assertions.assertTrue(e.getCause().getMessage().contains("Fake Exception in preAppend")); + } + + JavaUtils.attemptRepeatedly(() -> { + Assertions.assertEquals(0, numCancelTransaction.get(), + () -> "Expected cancelTransaction() not to be called but got " + numCancelTransaction.get()); + return null; + }, 10, ONE_SECOND, "wait for cancelTransaction", LOG); + } finally { failPreAppend = false; } } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/RetryCacheTestUtil.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/RetryCacheTestUtil.java index e5a55e49ca..f90f914179 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/RetryCacheTestUtil.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/RetryCacheTestUtil.java @@ -27,7 +27,7 @@ import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLog; import org.apache.ratis.server.storage.RaftStorage; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; +import org.junit.jupiter.api.Assertions; import java.util.concurrent.TimeUnit; @@ -36,7 +36,11 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -public class RetryCacheTestUtil { +public final class RetryCacheTestUtil { + + private RetryCacheTestUtil() { + } + public static RetryCache createRetryCache(){ return new RetryCacheImpl(TimeDuration.valueOf(60, TimeUnit.SECONDS), null); } @@ -55,7 +59,7 @@ public static boolean isFailed(RetryCache.Entry entry) { public static void assertFailure(RetryCache cache, LogEntryProto logEntry, boolean isFailed) { if(logEntry.hasStateMachineLogEntry()) { final ClientInvocationId invocationId = ClientInvocationId.valueOf(logEntry.getStateMachineLogEntry()); - Assert.assertEquals(isFailed, get(cache, invocationId).isFailed()); + Assertions.assertEquals(isFailed, get(cache, invocationId).isFailed()); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/ServerPauseResumeTest.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/ServerPauseResumeTest.java index d2584c6312..185b0e3cdb 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/ServerPauseResumeTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/ServerPauseResumeTest.java @@ -29,8 +29,8 @@ import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.raftlog.RaftLog; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; /** Test server pause and resume. */ public abstract class ServerPauseResumeTest @@ -50,7 +50,7 @@ void runTestPauseResume(CLUSTER cluster) throws InterruptedException, IOExceptio final RaftServer.Division leader = waitForLeader(cluster); RaftPeerId leaderId = leader.getId(); final List followers = cluster.getFollowers(); - Assert.assertTrue(followers.size() >= 1); + Assertions.assertTrue(followers.size() >= 1); final RaftServerImpl follower = (RaftServerImpl)followers.get(0); SimpleMessage[] batch1 = SimpleMessage.create(100, "batch1"); @@ -60,15 +60,15 @@ void runTestPauseResume(CLUSTER cluster) throws InterruptedException, IOExceptio Thread.sleep(cluster.getTimeoutMax().toLong(TimeUnit.MILLISECONDS) * 5); final RaftLog leaderLog = leader.getRaftLog(); // leader should contain all logs. - Assert.assertTrue(RaftTestUtil.logEntriesContains(leaderLog, batch1)); + Assertions.assertTrue(RaftTestUtil.logEntriesContains(leaderLog, batch1)); RaftLog followerLog = follower.getRaftLog(); // follower should contain all logs. - Assert.assertTrue(RaftTestUtil.logEntriesContains(followerLog, batch1)); + Assertions.assertTrue(RaftTestUtil.logEntriesContains(followerLog, batch1)); // pause follower. boolean isSuccess = follower.pause(); - Assert.assertTrue(isSuccess); - Assert.assertTrue(follower.getInfo().getLifeCycleState().isPausingOrPaused()); + Assertions.assertTrue(isSuccess); + Assertions.assertTrue(follower.getInfo().getLifeCycleState().isPausingOrPaused()); SimpleMessage[] batch2 = SimpleMessage.create(100, "batch2"); Thread writeThread2 = RaftTestUtil.sendMessageInNewThread(cluster, leaderId, batch2); @@ -76,15 +76,15 @@ void runTestPauseResume(CLUSTER cluster) throws InterruptedException, IOExceptio writeThread2.join(); Thread.sleep(cluster.getTimeoutMax().toLong(TimeUnit.MILLISECONDS) * 5); // paused follower should not have any batch2 message in its raftlog. - Assert.assertTrue(RaftTestUtil.logEntriesNotContains(followerLog, batch2)); + Assertions.assertTrue(RaftTestUtil.logEntriesNotContains(followerLog, batch2)); // resume follower. isSuccess = follower.resume(); - Assert.assertTrue(isSuccess); - Assert.assertFalse(follower.getInfo().getLifeCycleState().isPausingOrPaused()); + Assertions.assertTrue(isSuccess); + Assertions.assertFalse(follower.getInfo().getLifeCycleState().isPausingOrPaused()); Thread.sleep(cluster.getTimeoutMax().toLong(TimeUnit.MILLISECONDS) * 5); // follower should contain all logs. - Assert.assertTrue(RaftTestUtil.logEntriesContains(followerLog, batch2)); + Assertions.assertTrue(RaftTestUtil.logEntriesContains(followerLog, batch2)); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/StateMachineShutdownTests.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/StateMachineShutdownTests.java index 28f8e6ace9..fc00b70bab 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/StateMachineShutdownTests.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/StateMachineShutdownTests.java @@ -28,43 +28,93 @@ import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.TransactionContext; -import org.junit.Assert; -import org.junit.Test; - +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Comparator; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; import java.util.concurrent.CompletableFuture; - +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicLong; public abstract class StateMachineShutdownTests extends BaseTest implements MiniRaftCluster.Factory.Get { - + public static Logger LOG = LoggerFactory.getLogger(StateMachineUpdater.class); protected static class StateMachineWithConditionalWait extends SimpleStateMachine4Testing { + boolean unblockAllTxns = false; + final Set blockTxns = ConcurrentHashMap.newKeySet(); + private final ExecutorService executor = Executors.newFixedThreadPool(10); + public static Map>> futures = new ConcurrentHashMap<>(); + public static Map numTxns = new ConcurrentHashMap<>(); + private final Map appliedTxns = new ConcurrentHashMap<>(); + + private synchronized void updateTxns() { + long appliedIndex = this.getLastAppliedTermIndex().getIndex() + 1; + Long appliedTerm = null; + while (appliedTxns.containsKey(appliedIndex)) { + appliedTerm = appliedTxns.remove(appliedIndex); + appliedIndex += 1; + } + if (appliedTerm != null) { + updateLastAppliedTermIndex(appliedTerm, appliedIndex - 1); + } + } - private final Long objectToWait = 0L; - volatile boolean blockOnApply = true; + @Override + public void notifyTermIndexUpdated(long term, long index) { + appliedTxns.put(index, term); + updateTxns(); + } @Override public CompletableFuture applyTransaction(TransactionContext trx) { - if (blockOnApply) { - synchronized (objectToWait) { - try { - objectToWait.wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(); + final RaftProtos.LogEntryProto entry = trx.getLogEntry(); + + CompletableFuture future = new CompletableFuture<>(); + futures.computeIfAbsent(Thread.currentThread().getId(), k -> new HashSet<>()).add(future); + executor.submit(() -> { + synchronized (blockTxns) { + if (!unblockAllTxns) { + blockTxns.add(entry.getIndex()); + } + while (!unblockAllTxns && blockTxns.contains(entry.getIndex())) { + try { + blockTxns.wait(10000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } } + numTxns.computeIfAbsent(getId(), (k) -> new AtomicLong()).incrementAndGet(); + appliedTxns.put(entry.getIndex(), entry.getTerm()); + updateTxns(); + future.complete(new RaftTestUtil.SimpleMessage("done")); + }); + return future; + } + + public void unBlockApplyTxn(long txnId) { + synchronized (blockTxns) { + blockTxns.remove(txnId); + blockTxns.notifyAll(); } - RaftProtos.LogEntryProto entry = trx.getLogEntry(); - updateLastAppliedTermIndex(entry.getTerm(), entry.getIndex()); - return CompletableFuture.completedFuture(new RaftTestUtil.SimpleMessage("done")); } - public void unBlockApplyTxn() { - blockOnApply = false; - synchronized (objectToWait) { - objectToWait.notifyAll(); + public void unblockAllTxns() { + unblockAllTxns = true; + synchronized (blockTxns) { + for (Long txnId : blockTxns) { + blockTxns.remove(txnId); + } + blockTxns.notifyAll(); } } } @@ -82,10 +132,9 @@ public void testStateMachineShutdownWaitsForApplyTxn() throws Exception { //Unblock leader and one follower ((StateMachineWithConditionalWait)leader.getStateMachine()) - .unBlockApplyTxn(); + .unblockAllTxns(); ((StateMachineWithConditionalWait)cluster. - getFollowers().get(0).getStateMachine()).unBlockApplyTxn(); - + getFollowers().get(0).getStateMachine()).unblockAllTxns(); cluster.getLeaderAndSendFirstMessage(true); try (final RaftClient client = cluster.createClient(leaderId)) { @@ -98,25 +147,39 @@ public void testStateMachineShutdownWaitsForApplyTxn() throws Exception { RaftClientReply watchReply = client.io().watch( logIndex, RaftProtos.ReplicationLevel.ALL_COMMITTED); watchReply.getCommitInfos().forEach( - val -> Assert.assertTrue(val.getCommitIndex() >= logIndex)); + val -> Assertions.assertTrue(val.getCommitIndex() >= logIndex)); final RaftServer.Division secondFollower = cluster.getFollowers().get(1); // Second follower is blocked in apply transaction - Assert.assertTrue(secondFollower.getInfo().getLastAppliedIndex() < logIndex); + Assertions.assertTrue(secondFollower.getInfo().getLastAppliedIndex() < logIndex); // Now shutdown the follower in a separate thread final Thread t = new Thread(secondFollower::close); t.start(); - // The second follower should still be blocked in apply transaction - Assert.assertTrue(secondFollower.getInfo().getLastAppliedIndex() < logIndex); + // Now unblock the second follower - ((StateMachineWithConditionalWait) secondFollower.getStateMachine()) - .unBlockApplyTxn(); + long minIndex = ((StateMachineWithConditionalWait) secondFollower.getStateMachine()).blockTxns.stream() + .min(Comparator.naturalOrder()).get(); + Assertions.assertEquals(2, StateMachineWithConditionalWait.numTxns.values().stream() + .filter(val -> val.get() == 3).count()); + // The second follower should still be blocked in apply transaction + Assertions.assertTrue(secondFollower.getInfo().getLastAppliedIndex() < minIndex); + for (long index : ((StateMachineWithConditionalWait) secondFollower.getStateMachine()).blockTxns) { + if (minIndex != index) { + ((StateMachineWithConditionalWait) secondFollower.getStateMachine()).unBlockApplyTxn(index); + } + } + Assertions.assertEquals(2, StateMachineWithConditionalWait.numTxns.values().stream() + .filter(val -> val.get() == 3).count()); + Assertions.assertTrue(secondFollower.getInfo().getLastAppliedIndex() < minIndex); + ((StateMachineWithConditionalWait) secondFollower.getStateMachine()).unBlockApplyTxn(minIndex); // Now wait for the thread t.join(5000); - Assert.assertEquals(logIndex, secondFollower.getInfo().getLastAppliedIndex()); + Assertions.assertTrue(logIndex <= secondFollower.getInfo().getLastAppliedIndex()); + Assertions.assertEquals(3, StateMachineWithConditionalWait.numTxns.values().stream() + .filter(val -> val.get() == 3).count()); cluster.shutdown(); } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/TestLogAppenderMetrics.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/TestLogAppenderMetrics.java index 4bd075ef66..6a46734689 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/TestLogAppenderMetrics.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/TestLogAppenderMetrics.java @@ -83,16 +83,16 @@ long getNextIndex() { return nextIndex; } - void updateNextIndex(long nextIndex) { - this.nextIndex = nextIndex; + void updateNextIndex(long index) { + this.nextIndex = index; } long getMatchIndex() { return matchIndex; } - void updateMatchIndex(long matchIndex) { - this.matchIndex = matchIndex; + void updateMatchIndex(long index) { + this.matchIndex = index; } Timestamp getLastRpcTime() { diff --git a/ratis-server/src/test/java/org/apache/ratis/server/impl/TestRatisServerMetricsBase.java b/ratis-server/src/test/java/org/apache/ratis/server/impl/TestRatisServerMetricsBase.java index a465b3c4dc..2fe2ed0b6e 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/impl/TestRatisServerMetricsBase.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/impl/TestRatisServerMetricsBase.java @@ -17,7 +17,7 @@ */ package org.apache.ratis.server.impl; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; import java.util.concurrent.CompletableFuture; @@ -32,8 +32,8 @@ import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.metrics.RaftServerMetricsImpl; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; /** Tests on Ratis server metrics. */ @@ -66,7 +66,7 @@ void runTestClientFailedRequest(CLUSTER cluster) .setType(RaftClientRequest.staleReadRequestType(Long.MAX_VALUE)) .build(); final CompletableFuture f = leaderImpl.getRaftServer().submitClientRequestAsync(r); - Assert.assertFalse(f.get().isSuccess()); + Assertions.assertFalse(f.get().isSuccess()); assertEquals(1L, ((RaftServerMetricsImpl)leaderImpl.getRaftServerMetrics()) .getNumFailedClientStaleRead().getCount()); } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogTestUtils.java b/ratis-server/src/test/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogTestUtils.java index e242eddf59..473aa0a46c 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogTestUtils.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/raftlog/segmented/SegmentedRaftLogTestUtils.java @@ -17,25 +17,33 @@ */ package org.apache.ratis.server.raftlog.segmented; +import org.apache.ratis.server.RaftServer; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.Slf4jUtils; import org.slf4j.event.Level; import java.io.File; +import java.nio.file.Path; +import java.util.List; +import java.util.stream.Collectors; public interface SegmentedRaftLogTestUtils { SizeInBytes MAX_OP_SIZE = SizeInBytes.valueOf("32MB"); static SegmentedRaftLogInputStream newSegmentedRaftLogInputStream(File log, long startIndex, long endIndex, boolean isOpen) { - return new SegmentedRaftLogInputStream(log, startIndex, endIndex, isOpen, MAX_OP_SIZE, null); + final LogSegmentStartEnd startEnd = LogSegmentStartEnd.valueOf(startIndex, endIndex, isOpen); + return new SegmentedRaftLogInputStream(log, startEnd, MAX_OP_SIZE, null); } static void setRaftLogWorkerLogLevel(Level level) { Slf4jUtils.setLogLevel(SegmentedRaftLogWorker.LOG, level); } - static String getLogFlushTimeMetric(String memberId) { - return SegmentedRaftLogWorker.class.getName() + "." + memberId + ".flush-time"; + static List getOpenLogFiles(RaftServer.Division server) throws Exception { + return LogSegmentPath.getLogSegmentPaths(server.getRaftStorage()).stream() + .filter(p -> p.getStartEnd().isOpen()) + .map(LogSegmentPath::getPath) + .collect(Collectors.toList()); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/server/simulation/MiniRaftClusterWithSimulatedRpc.java b/ratis-server/src/test/java/org/apache/ratis/server/simulation/MiniRaftClusterWithSimulatedRpc.java index d0e7e9f5c8..922796cb80 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/simulation/MiniRaftClusterWithSimulatedRpc.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/simulation/MiniRaftClusterWithSimulatedRpc.java @@ -34,7 +34,7 @@ import static org.apache.ratis.conf.ConfUtils.requireMin; -public class MiniRaftClusterWithSimulatedRpc extends MiniRaftCluster { +public final class MiniRaftClusterWithSimulatedRpc extends MiniRaftCluster { static final Logger LOG = LoggerFactory.getLogger(MiniRaftClusterWithSimulatedRpc.class); public static final Factory FACTORY diff --git a/ratis-server/src/test/java/org/apache/ratis/server/simulation/RaftServerReply.java b/ratis-server/src/test/java/org/apache/ratis/server/simulation/RaftServerReply.java index 5d9d871f8c..8b7f88ed4e 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/simulation/RaftServerReply.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/simulation/RaftServerReply.java @@ -17,7 +17,6 @@ */ package org.apache.ratis.server.simulation; -import org.apache.ratis.proto.RaftProtos; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftRpcMessage; import org.apache.ratis.proto.RaftProtos.AppendEntriesReplyProto; diff --git a/ratis-server/src/test/java/org/apache/ratis/server/simulation/SimulatedRequestReply.java b/ratis-server/src/test/java/org/apache/ratis/server/simulation/SimulatedRequestReply.java index 0874e7466c..1b7da951c1 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/simulation/SimulatedRequestReply.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/simulation/SimulatedRequestReply.java @@ -27,7 +27,11 @@ import java.io.IOException; import java.util.Map; -import java.util.concurrent.*; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; diff --git a/ratis-server/src/test/java/org/apache/ratis/server/simulation/SimulatedRpc.java b/ratis-server/src/test/java/org/apache/ratis/server/simulation/SimulatedRpc.java index 0399b414bf..e570c35af0 100644 --- a/ratis-server/src/test/java/org/apache/ratis/server/simulation/SimulatedRpc.java +++ b/ratis-server/src/test/java/org/apache/ratis/server/simulation/SimulatedRpc.java @@ -28,9 +28,13 @@ import java.util.Objects; -class SimulatedRpc implements RpcType { +public class SimulatedRpc implements RpcType { static final SimulatedRpc INSTANCE = new SimulatedRpc(); + public static SimulatedRpc get() { + return INSTANCE; + } + @Override public String name() { return getClass().getName(); diff --git a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java index fe1a97ddca..09b5ee59a6 100644 --- a/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/statemachine/RaftSnapshotBaseTest.java @@ -38,6 +38,7 @@ import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; +import org.apache.ratis.server.impl.PeerChanges; import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.server.metrics.RaftServerMetricsImpl; import org.apache.ratis.server.raftlog.RaftLog; @@ -49,16 +50,12 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.Slf4jUtils; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; -import java.io.IOException; -import java.util.Arrays; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -67,11 +64,18 @@ import org.apache.ratis.thirdparty.com.codahale.metrics.Timer; import org.slf4j.event.Level; -public abstract class RaftSnapshotBaseTest extends BaseTest { +public abstract class RaftSnapshotBaseTest + extends BaseTest + implements MiniRaftCluster.Factory.Get { { Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG); Slf4jUtils.setLogLevel(RaftLog.LOG, Level.DEBUG); - Slf4jUtils.setLogLevel(RaftClient.LOG, Level.DEBUG); + + final RaftProperties p = getProperties(); + p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, SimpleStateMachine4Testing.class, StateMachine.class); + RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold(p, SNAPSHOT_TRIGGER_THRESHOLD); + RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(p, true); + RaftServerConfigKeys.LeaderElection.setMemberMajorityAdd(p, true); } static final Logger LOG = LoggerFactory.getLogger(RaftSnapshotBaseTest.class); @@ -92,20 +96,20 @@ public static void assertLeaderContent(MiniRaftCluster cluster) throws Exception assertLogContent(leader, true); } - public static void assertLogContent(RaftServer.Division server, boolean isLeader) throws Exception { + public static void checkMetadataEntry(RaftServer.Division server) throws Exception { final RaftLog log = server.getRaftLog(); final long lastIndex = log.getLastEntryTermIndex().getIndex(); final LogEntryProto e = log.get(lastIndex); - Assert.assertTrue(e.hasMetadataEntry()); + Assertions.assertTrue(e.hasMetadataEntry()); + Assertions.assertEquals(log.getLastCommittedIndex() - 1, e.getMetadataEntry().getCommitIndex()); + } - JavaUtils.attemptRepeatedly(() -> { - Assert.assertEquals(log.getLastCommittedIndex() - 1, e.getMetadataEntry().getCommitIndex()); - return null; - }, 50, BaseTest.HUNDRED_MILLIS, "CheckMetadataEntry", LOG); + public static void assertLogContent(RaftServer.Division server, boolean isLeader) throws Exception { + JavaUtils.attempt(() -> checkMetadataEntry(server), 50, HUNDRED_MILLIS, "checkMetadataEntry", LOG); SimpleStateMachine4Testing simpleStateMachine = SimpleStateMachine4Testing.get(server); if (isLeader) { - Assert.assertTrue("Not notified as a leader", simpleStateMachine.isNotifiedAsLeader()); + Assertions.assertTrue(simpleStateMachine.isNotifiedAsLeader(), "Not notified as a leader"); } final LogEntryProto[] entries = simpleStateMachine.getContent(); long message = 0; @@ -113,35 +117,12 @@ public static void assertLogContent(RaftServer.Division server, boolean isLeader LOG.info("{}) {} {}", i, message, entries[i].toString().replace("\n", ", ")); if (entries[i].hasStateMachineLogEntry()) { final SimpleMessage m = new SimpleMessage("m" + message++); - Assert.assertArrayEquals(m.getContent().toByteArray(), + Assertions.assertArrayEquals(m.getContent().toByteArray(), entries[i].getStateMachineLogEntry().getLogData().toByteArray()); } } } - private MiniRaftCluster cluster; - - public abstract MiniRaftCluster.Factory getFactory(); - - @Before - public void setup() throws IOException { - final RaftProperties prop = new RaftProperties(); - prop.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, - SimpleStateMachine4Testing.class, StateMachine.class); - RaftServerConfigKeys.Snapshot.setAutoTriggerThreshold( - prop, SNAPSHOT_TRIGGER_THRESHOLD); - RaftServerConfigKeys.Snapshot.setAutoTriggerEnabled(prop, true); - this.cluster = getFactory().newCluster(1, prop); - cluster.start(); - } - - @After - public void tearDown() { - if (cluster != null) { - cluster.shutdown(); - } - } - /** * Keep generating writing traffic and make sure snapshots are taken. * We then restart the whole raft peer and check if it can correctly load @@ -149,13 +130,18 @@ public void tearDown() { */ @Test public void testRestartPeer() throws Exception { - RaftTestUtil.waitForLeader(cluster); - final RaftPeerId leaderId = cluster.getLeader().getId(); + runWithNewCluster(1, this::runTestRestartPeer); + + } + + void runTestRestartPeer(CLUSTER cluster) throws Exception { + LOG.info("runTestRestartPeer"); + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); int i = 0; try(final RaftClient client = cluster.createClient(leaderId)) { for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) { RaftClientReply reply = client.io().send(new SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -164,7 +150,7 @@ public void testRestartPeer() throws Exception { // wait for the snapshot to be done final List snapshotFiles = getSnapshotFiles(cluster, nextIndex - SNAPSHOT_TRIGGER_THRESHOLD, nextIndex); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); + Assertions.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); return null; }, 10, ONE_SECOND, "snapshotFile.exist", LOG); @@ -180,7 +166,7 @@ public void testRestartPeer() throws Exception { public static boolean exists(File f) { if (f.exists()) { - LOG.info("File exists: " + f); + LOG.info("File exists: {}", f); return true; } return false; @@ -193,16 +179,20 @@ public static boolean exists(File f) { */ @Test public void testBasicInstallSnapshot() throws Exception { + runWithNewCluster(1, this::runTestBasicInstallSnapshot); + } + + void runTestBasicInstallSnapshot(CLUSTER cluster) throws Exception { + LOG.info("runTestBasicInstallSnapshot"); final List logs; int i = 0; try { - RaftTestUtil.waitForLeader(cluster); - final RaftPeerId leaderId = cluster.getLeader().getId(); + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); try(final RaftClient client = cluster.createClient(leaderId)) { for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) { RaftClientReply reply = client.io().send(new SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -211,7 +201,7 @@ public void testBasicInstallSnapshot() throws Exception { LOG.info("nextIndex = {}", nextIndex); final List snapshotFiles = getSnapshotFiles(cluster, nextIndex - SNAPSHOT_TRIGGER_THRESHOLD, nextIndex); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); + Assertions.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); return null; }, 10, ONE_SECOND, "snapshotFile.exist", LOG); verifyTakeSnapshotMetric(cluster.getLeader()); @@ -233,26 +223,23 @@ public void testBasicInstallSnapshot() throws Exception { // generate some more traffic try(final RaftClient client = cluster.createClient(cluster.getLeader().getId())) { - Assert.assertTrue(client.io().send(new SimpleMessage("m" + i)).isSuccess()); + Assertions.assertTrue(client.io().send(new SimpleMessage("m" + i)).isSuccess()); } - // add two more peers - String[] newPeers = new String[]{"s3", "s4"}; - MiniRaftCluster.PeerChanges change = cluster.addNewPeers( - newPeers, true, false); + // add a new peer + final PeerChanges change = cluster.addNewPeers(1, true); // trigger setConfiguration - RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf), - peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); + RaftServerTestUtil.runWithMinorityPeers(cluster, change.getPeersInNewConf(), cluster::setConfiguration); - for (String newPeer : newPeers) { - final RaftServer.Division s = cluster.getDivision(RaftPeerId.valueOf(newPeer)); + for (RaftPeer newPeer : change.getAddedPeers()) { + final RaftServer.Division s = cluster.getDivision(newPeer.getId()); SimpleStateMachine4Testing simpleStateMachine = SimpleStateMachine4Testing.get(s); - Assert.assertSame(LifeCycle.State.RUNNING, simpleStateMachine.getLifeCycleState()); + Assertions.assertSame(LifeCycle.State.RUNNING, simpleStateMachine.getLifeCycleState()); } // Verify installSnapshot counter on leader before restart. verifyInstallSnapshotMetric(cluster.getLeader()); - RaftServerTestUtil.waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); + RaftServerTestUtil.waitAndCheckNewConf(cluster, change.getPeersInNewConf(), 0, null); Timer timer = getTakeSnapshotTimer(cluster.getLeader()); long count = timer.getCount(); @@ -262,7 +249,7 @@ public void testBasicInstallSnapshot() throws Exception { assertLeaderContent(cluster); // verify that snapshot was taken when stopping the server - Assert.assertTrue(count < timer.getCount()); + Assertions.assertTrue(count < timer.getCount()); } finally { cluster.shutdown(); } @@ -275,6 +262,11 @@ public void testBasicInstallSnapshot() throws Exception { */ @Test public void testInstallSnapshotDuringBootstrap() throws Exception { + runWithNewCluster(1, this::runTestInstallSnapshotDuringBootstrap); + } + + void runTestInstallSnapshotDuringBootstrap(CLUSTER cluster) throws Exception { + LOG.info("runTestInstallSnapshotDuringBootstrap"); int i = 0; try { RaftTestUtil.waitForLeader(cluster); @@ -283,7 +275,7 @@ public void testInstallSnapshotDuringBootstrap() throws Exception { try(final RaftClient client = cluster.createClient(leaderId)) { for (; i < SNAPSHOT_TRIGGER_THRESHOLD * 2 - 1; i++) { RaftClientReply reply = client.io().send(new SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -292,30 +284,27 @@ public void testInstallSnapshotDuringBootstrap() throws Exception { LOG.info("nextIndex = {}", nextIndex); final List snapshotFiles = getSnapshotFiles(cluster, nextIndex - SNAPSHOT_TRIGGER_THRESHOLD, nextIndex); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); + Assertions.assertTrue(snapshotFiles.stream().anyMatch(RaftSnapshotBaseTest::exists)); return null; }, 10, ONE_SECOND, "snapshotFile.exist", LOG); verifyTakeSnapshotMetric(cluster.getLeader()); assertLeaderContent(cluster); - // add two more peers - String[] newPeers = new String[]{"s3", "s4"}; - MiniRaftCluster.PeerChanges change = cluster.addNewPeers( - newPeers, true, false); + // add a new peer + final PeerChanges change = cluster.addNewPeers(1, true); // trigger setConfiguration - RaftServerTestUtil.runWithMinorityPeers(cluster, Arrays.asList(change.allPeersInNewConf), - peers -> cluster.setConfiguration(peers.toArray(RaftPeer.emptyArray()))); + RaftServerTestUtil.runWithMinorityPeers(cluster, change.getPeersInNewConf(), cluster::setConfiguration); - for (String newPeer : newPeers) { - final RaftServer.Division s = cluster.getDivision(RaftPeerId.valueOf(newPeer)); + for (RaftPeer newPeer : change.getAddedPeers()) { + final RaftServer.Division s = cluster.getDivision(newPeer.getId()); SimpleStateMachine4Testing simpleStateMachine = SimpleStateMachine4Testing.get(s); - Assert.assertSame(LifeCycle.State.RUNNING, simpleStateMachine.getLifeCycleState()); + Assertions.assertSame(LifeCycle.State.RUNNING, simpleStateMachine.getLifeCycleState()); } // Verify installSnapshot counter on leader verifyInstallSnapshotMetric(cluster.getLeader()); - RaftServerTestUtil.waitAndCheckNewConf(cluster, change.allPeersInNewConf, 0, null); + RaftServerTestUtil.waitAndCheckNewConf(cluster, change.getPeersInNewConf(), 0, null); } finally { cluster.shutdown(); } @@ -324,13 +313,13 @@ public void testInstallSnapshotDuringBootstrap() throws Exception { protected void verifyInstallSnapshotMetric(RaftServer.Division leader) { final LongCounter installSnapshotCounter = ((RaftServerMetricsImpl)leader.getRaftServerMetrics()) .getNumInstallSnapshot(); - Assert.assertNotNull(installSnapshotCounter); - Assert.assertTrue(installSnapshotCounter.getCount() >= 1); + Assertions.assertNotNull(installSnapshotCounter); + Assertions.assertTrue(installSnapshotCounter.getCount() >= 1); } private static void verifyTakeSnapshotMetric(RaftServer.Division leader) { Timer timer = getTakeSnapshotTimer(leader); - Assert.assertTrue(timer.getCount() > 0); + Assertions.assertTrue(timer.getCount() > 0); } private static Timer getTakeSnapshotTimer(RaftServer.Division leader) { @@ -338,9 +327,9 @@ private static Timer getTakeSnapshotTimer(RaftServer.Division leader) { RATIS_APPLICATION_NAME_METRICS, RATIS_STATEMACHINE_METRICS, RATIS_STATEMACHINE_METRICS_DESC); Optional opt = MetricRegistries.global().get(info); - Assert.assertTrue(opt.isPresent()); + Assertions.assertTrue(opt.isPresent()); RatisMetricRegistry metricRegistry = opt.get(); - Assert.assertNotNull(metricRegistry); + Assertions.assertNotNull(metricRegistry); return ((DefaultTimekeeperImpl)metricRegistry.timer(STATEMACHINE_TAKE_SNAPSHOT_TIMER)).getTimer(); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/statemachine/SnapshotManagementTest.java b/ratis-server/src/test/java/org/apache/ratis/statemachine/SnapshotManagementTest.java index dffeb42286..8be669bf12 100644 --- a/ratis-server/src/test/java/org/apache/ratis/statemachine/SnapshotManagementTest.java +++ b/ratis-server/src/test/java/org/apache/ratis/statemachine/SnapshotManagementTest.java @@ -31,9 +31,9 @@ import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.event.Level; @@ -51,7 +51,7 @@ public abstract class SnapshotManagementTest static final Logger LOG = LoggerFactory.getLogger(SnapshotManagementTest.class); - @Before + @BeforeEach public void setup() { final RaftProperties p = getProperties(); p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, @@ -74,18 +74,18 @@ void runTestTakeSnapshot(CLUSTER cluster) throws Exception { try (final RaftClient client = cluster.createClient(leaderId)) { for (int i = 0; i < RaftServerConfigKeys.Snapshot.creationGap(getProperties()); i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } snapshotReply = client.getSnapshotManagementApi().create(3000); } - Assert.assertTrue(snapshotReply.isSuccess()); + Assertions.assertTrue(snapshotReply.isSuccess()); final long snapshotIndex = snapshotReply.getLogIndex(); LOG.info("snapshotIndex = {}", snapshotIndex); final File snapshotFile = SimpleStateMachine4Testing.get(leader) .getStateMachineStorage().getSnapshotFile(leader.getInfo().getCurrentTerm(), snapshotIndex); - Assert.assertTrue(snapshotFile.exists()); + Assertions.assertTrue(snapshotFile.exists()); } void runTestTakeSnapshotWithConfigurableGap(CLUSTER cluster) throws Exception { @@ -95,29 +95,29 @@ void runTestTakeSnapshotWithConfigurableGap(CLUSTER cluster) throws Exception { try (final RaftClient client = cluster.createClient(leaderId)) { for (int i = 0; i < RaftServerConfigKeys.Snapshot.creationGap(getProperties())/2-1; i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } - Assert.assertTrue(leader.getStateMachine().getLastAppliedTermIndex().getIndex() + Assertions.assertTrue(leader.getStateMachine().getLastAppliedTermIndex().getIndex() < RaftServerConfigKeys.Snapshot.creationGap(getProperties())); snapshotReply = client.getSnapshotManagementApi(leaderId).create(3000); - Assert.assertTrue(snapshotReply.isSuccess()); - Assert.assertEquals(0,snapshotReply.getLogIndex()); + Assertions.assertTrue(snapshotReply.isSuccess()); + Assertions.assertEquals(0,snapshotReply.getLogIndex()); for (int i = 0; i < RaftServerConfigKeys.Snapshot.creationGap(getProperties())/2-1; i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } final SnapshotManagementRequest r1 = SnapshotManagementRequest.newCreate(client.getId(), leaderId, cluster.getGroupId(), CallId.getAndIncrement(), 3000); snapshotReply = client.getSnapshotManagementApi(leaderId).create(3000); } - Assert.assertTrue(snapshotReply.isSuccess()); + Assertions.assertTrue(snapshotReply.isSuccess()); final long snapshotIndex = snapshotReply.getLogIndex(); LOG.info("snapshotIndex = {}", snapshotIndex); final File snapshotFile = SimpleStateMachine4Testing.get(leader) .getStateMachineStorage() .getSnapshotFile(leader.getInfo().getCurrentTerm(), snapshotIndex); - Assert.assertTrue(snapshotFile.exists()); + Assertions.assertTrue(snapshotFile.exists()); } void runTestTakeSnapshotOnSpecificServer(CLUSTER cluster) throws Exception { @@ -125,23 +125,23 @@ void runTestTakeSnapshotOnSpecificServer(CLUSTER cluster) throws Exception { final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); final RaftServer.Division follower = cluster.getFollowers().get(0); final RaftPeerId followerId = follower.getId(); - Assert.assertTrue(follower.getInfo().isFollower()); + Assertions.assertTrue(follower.getInfo().isFollower()); try (final RaftClient client = cluster.createClient(followerId)) { for (int i = 0; i < RaftServerConfigKeys.Snapshot.creationGap(getProperties()); i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } snapshotReply = client.getSnapshotManagementApi(followerId).create(3000); } - Assert.assertTrue(snapshotReply.isSuccess()); + Assertions.assertTrue(snapshotReply.isSuccess()); final long snapshotIndex = snapshotReply.getLogIndex(); LOG.info("snapshotIndex = {} on {} server {}", snapshotIndex, follower.getInfo().getCurrentRole(), follower.getId()); final File snapshotFile = SimpleStateMachine4Testing.get(follower) .getStateMachineStorage().getSnapshotFile(follower.getInfo().getCurrentTerm(), snapshotIndex); - Assert.assertTrue(snapshotFile.exists()); + Assertions.assertTrue(snapshotFile.exists()); } @@ -155,22 +155,22 @@ void runTestReceiveLogAndTakeSnapshotOnListener(CLUSTER cluster) throws Exceptio final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); final RaftServer.Division listener = cluster.getListeners().get(0); final RaftPeerId listenerId = listener.getId(); - Assert.assertTrue(listener.getInfo().isListener()); + Assertions.assertTrue(listener.getInfo().isListener()); try (final RaftClient client = cluster.createClient(listenerId)) { for (int i = 0; i < RaftServerConfigKeys.Snapshot.creationGap(getProperties()); i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } snapshotReply = client.getSnapshotManagementApi(listenerId).create(3000); } - Assert.assertTrue(snapshotReply.isSuccess()); + Assertions.assertTrue(snapshotReply.isSuccess()); final long snapshotIndex = snapshotReply.getLogIndex(); LOG.info("snapshotIndex = {} on {} server {}", snapshotIndex, listener.getInfo().getCurrentRole(), listener.getId()); final File snapshotFile = SimpleStateMachine4Testing.get(listener) .getStateMachineStorage().getSnapshotFile(listener.getInfo().getCurrentTerm(), snapshotIndex); - Assert.assertTrue(snapshotFile.exists()); + Assertions.assertTrue(snapshotFile.exists()); } } diff --git a/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java b/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java index 312c9508d3..1ffbdbcb99 100644 --- a/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java +++ b/ratis-server/src/test/java/org/apache/ratis/statemachine/impl/SimpleStateMachine4Testing.java @@ -208,12 +208,12 @@ private void put(LogEntryProto entry) { } @Override - public synchronized void initialize(RaftServer server, RaftGroupId groupId, + public synchronized void initialize(RaftServer server, RaftGroupId raftGroupId, RaftStorage raftStorage) throws IOException { - LOG.info("Initializing " + this); - this.groupId = groupId; + LOG.info("Initializing {}", this); + this.groupId = raftGroupId; getLifeCycle().startAndTransition(() -> { - super.initialize(server, groupId, raftStorage); + super.initialize(server, raftGroupId, raftStorage); storage.init(raftStorage); loadSnapshot(storage.getLatestSnapshot()); @@ -233,7 +233,10 @@ public synchronized void pause() { @Override public synchronized void reinitialize() throws IOException { - LOG.info("Reinitializing " + this); + LOG.info("Reinitializing {}", this); + indexMap.clear(); + dataMap.clear(); + loadSnapshot(storage.getLatestSnapshot()); if (getLifeCycleState() == LifeCycle.State.PAUSED) { getLifeCycle().transition(LifeCycle.State.STARTING); @@ -328,14 +331,14 @@ public CompletableFuture query(Message request) { final String string = request.getContent().toStringUtf8(); Exception exception; try { - LOG.info("query " + string); + LOG.info("query {}", string); final LogEntryProto entry = dataMap.get(string); if (entry != null) { return CompletableFuture.completedFuture(Message.valueOf(entry.toByteString())); } exception = new IndexOutOfBoundsException(getId() + ": LogEntry not found for query " + string); } catch (Exception e) { - LOG.warn("Failed request " + request, e); + LOG.warn("Failed request {}", request, e); exception = e; } return JavaUtils.completeExceptionally(new StateMachineException( diff --git a/ratis-shell/dev-support/findbugsExcludeFile.xml b/ratis-shell/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000..c29ededa06 --- /dev/null +++ b/ratis-shell/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ratis-shell/pom.xml b/ratis-shell/pom.xml index 78ab6679fd..495187b2a8 100644 --- a/ratis-shell/pom.xml +++ b/ratis-shell/pom.xml @@ -17,12 +17,17 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-shell Apache Ratis Shell + + + true + + ratis-client @@ -38,7 +43,7 @@ commons-cli commons-cli - 1.5.0 + 1.11.0 org.slf4j @@ -48,6 +53,17 @@ org.slf4j slf4j-simple + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test + @@ -69,6 +85,13 @@ + + com.github.spotbugs + spotbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/AbstractShell.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/AbstractShell.java index e2679045e5..a59e03cf8e 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/AbstractShell.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/AbstractShell.java @@ -19,12 +19,9 @@ import org.apache.commons.cli.CommandLine; import org.apache.ratis.shell.cli.sh.command.Context; -import org.apache.ratis.thirdparty.com.google.common.io.Closer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.Closeable; -import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.Map; @@ -34,20 +31,16 @@ /** * Abstract class for handling command line inputs. */ -public abstract class AbstractShell implements Closeable { +public abstract class AbstractShell { private static final Logger LOG = LoggerFactory.getLogger(AbstractShell.class); private final Map mCommands; - private final Closer closer; /** * Creates a new instance of {@link AbstractShell}. */ protected AbstractShell(Context context) { - closer = Closer.create(); mCommands = loadCommands(context); - // Register all loaded commands under closer. - mCommands.values().forEach(closer::register); } /** @@ -116,11 +109,6 @@ public Collection getCommands() { return mCommands.values(); } - @Override - public void close() throws IOException { - closer.close(); - } - /** * @return name of the shell */ @@ -133,10 +121,6 @@ public void close() throws IOException { */ protected abstract Map loadCommands(Context context); - protected Closer getCloser() { - return closer; - } - /** * Prints usage for all commands. */ diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/CliUtils.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/CliUtils.java new file mode 100644 index 0000000000..1cecc665c6 --- /dev/null +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/CliUtils.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.shell.cli; + +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.protocol.GroupInfoReply; +import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.RaftException; +import org.apache.ratis.util.function.CheckedFunction; + +import java.io.IOException; +import java.io.PrintStream; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Utilities for command line interface. + */ +public final class CliUtils { + private CliUtils() { + // prevent instantiation + } + + /** @return {@link RaftPeerId} from the given address. */ + public static RaftPeerId getPeerId(InetSocketAddress address) { + return getPeerId(address.getHostString(), address.getPort()); + } + + /** @return {@link RaftPeerId} from the given host and port. */ + public static RaftPeerId getPeerId(String host, int port) { + return RaftPeerId.getRaftPeerId(host + "_" + port); + } + + /** + * Apply the given function to the given parameter a list. + * + * @param list the input parameter list + * @param function the function to be applied + * @param parameter type + * @param return value type + * @param the exception type thrown by the given function. + * @return the first non-null value returned by the given function applied to the given list. + */ + private static RETURN applyFunctionReturnFirstNonNull( + Collection list, CheckedFunction function, PrintStream out) { + for (PARAMETER parameter : list) { + try { + RETURN ret = function.apply(parameter); + if (ret != null) { + return ret; + } + } catch (Throwable e) { + e.printStackTrace(out); + } + } + return null; + } + + /** Parse the given string as a list of {@link RaftPeer}. */ + public static List parseRaftPeers(String peers) { + List addresses = new ArrayList<>(); + String[] peersArray = peers.split(","); + for (String peer : peersArray) { + addresses.add(parseInetSocketAddress(peer)); + } + + return addresses.stream() + .map(addr -> RaftPeer.newBuilder().setId(getPeerId(addr)).setAddress(addr).build()) + .collect(Collectors.toList()); + } + + /** Parse the given string as a {@link RaftGroupId}. */ + public static RaftGroupId parseRaftGroupId(String groupId) { + return groupId != null && groupId.isEmpty() ? RaftGroupId.valueOf(UUID.fromString(groupId)) : null; + } + + /** + * Get the group id from the given peers if the given group id is null. + * + * @param client for communicating to the peers. + * @param peers the peers of the group. + * @param groupId the given group id, if there is any. + * @param err for printing error messages. + * @return the group id from the given peers if the given group id is null; + * otherwise, return the given group id. + */ + public static RaftGroupId getGroupId(RaftClient client, List peers, RaftGroupId groupId, + PrintStream err) throws IOException { + if (groupId != null) { + return groupId; + } + + final List groupIds = applyFunctionReturnFirstNonNull(peers, + p -> client.getGroupManagementApi(p.getId()).list().getGroupIds(), err); + + if (groupIds == null) { + final String message = "Failed to get group ID from " + peers; + err.println("Failed to get group ID from " + peers); + throw new IOException(message); + } else if (groupIds.size() == 1) { + return groupIds.get(0); + } else { + String message = "Unexpected multiple group IDs " + groupIds + + ". In such case, the target group ID must be specified."; + err.println(message); + throw new IOException(message); + } + } + + /** + * Get the group info from the given peers. + * + * @param client for communicating to the peers. + * @param peers the peers of the group. + * @param groupId the target group + * @param err for printing error messages. + * @return the group info + */ + public static GroupInfoReply getGroupInfo(RaftClient client, List peers, RaftGroupId groupId, + PrintStream err) throws IOException { + GroupInfoReply groupInfoReply = applyFunctionReturnFirstNonNull(peers, + p -> client.getGroupManagementApi((p.getId())).info(groupId), err); + checkReply(groupInfoReply, () -> "Failed to get group info for " + groupId.getUuid() + + " from " + peers, err); + return groupInfoReply; + } + + /** Check if the given reply is success. */ + public static void checkReply(RaftClientReply reply, Supplier message, PrintStream printStream) + throws IOException { + if (reply == null || !reply.isSuccess()) { + final RaftException e = Optional.ofNullable(reply) + .map(RaftClientReply::getException) + .orElseGet(() -> new RaftException("Reply: " + reply)); + printStream.println(message.get()); + throw new IOException(message.get(), e); + } + } + + /** Parse the given string as a {@link InetSocketAddress}. */ + public static InetSocketAddress parseInetSocketAddress(String address) { + try { + final String[] hostPortPair = address.split(":"); + if (hostPortPair.length < 2) { + throw new IllegalArgumentException("Unexpected address format ."); + } + return new InetSocketAddress(hostPortPair[0], Integer.parseInt(hostPortPair[1])); + } catch (Exception e) { + throw new IllegalArgumentException("Failed to parse the server address parameter \"" + address + "\".", e); + } + } + +} diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/Command.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/Command.java index ae4e701077..bc2882bfef 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/Command.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/Command.java @@ -23,7 +23,6 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import java.io.Closeable; import java.io.IOException; import java.util.Collections; import java.util.Map; @@ -32,7 +31,7 @@ /** * An interface for all the commands that can be run from a shell. */ -public interface Command extends Comparable, Closeable { +public interface Command extends Comparable { /** * Gets the command name as input from the shell. @@ -119,10 +118,4 @@ default int run(CommandLine cl) throws IOException { */ String getDescription(); - /** - * Used to close resources created by commands. - * - * @throws IOException if closing resources fails - */ - default void close() throws IOException {} } diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/RaftUtils.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/RaftUtils.java deleted file mode 100644 index 1239fc56c4..0000000000 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/RaftUtils.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.ratis.shell.cli; - -import org.apache.ratis.client.RaftClient; -import org.apache.ratis.client.RaftClientConfigKeys; -import org.apache.ratis.conf.RaftProperties; -import org.apache.ratis.protocol.RaftGroup; -import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.retry.ExponentialBackoffRetry; -import org.apache.ratis.util.TimeDuration; - -import java.net.InetSocketAddress; -import java.util.Properties; -import java.util.concurrent.TimeUnit; - -/** - * Helper class for raft operations. - */ -public final class RaftUtils { - - private RaftUtils() { - // prevent instantiation - } - - /** - * Gets the raft peer id. - * - * @param address the address of the server - * @return the raft peer id - */ - public static RaftPeerId getPeerId(InetSocketAddress address) { - return getPeerId(address.getHostString(), address.getPort()); - } - - /** - * Gets the raft peer id. - * - * @param host the hostname of the server - * @param port the port of the server - * @return the raft peer id - */ - public static RaftPeerId getPeerId(String host, int port) { - return RaftPeerId.getRaftPeerId(host + "_" + port); - } - - /** - * Create a raft client to communicate to ratis server. - * @param raftGroup the raft group - * @return return a raft client - */ - public static RaftClient createClient(RaftGroup raftGroup) { - RaftProperties properties = new RaftProperties(); - RaftClientConfigKeys.Rpc.setRequestTimeout(properties, - TimeDuration.valueOf(15, TimeUnit.SECONDS)); - - // Since ratis-shell support GENERIC_COMMAND_OPTIONS, here we should - // merge these options to raft properties to make it work. - final Properties sys = System.getProperties(); - sys.stringPropertyNames().forEach(key -> properties.set(key, sys.getProperty(key))); - - ExponentialBackoffRetry retryPolicy = ExponentialBackoffRetry.newBuilder() - .setBaseSleepTime(TimeDuration.valueOf(1000, TimeUnit.MILLISECONDS)) - .setMaxAttempts(10) - .setMaxSleepTime( - TimeDuration.valueOf(100_000, TimeUnit.MILLISECONDS)) - .build(); - return RaftClient.newBuilder() - .setRaftGroup(raftGroup) - .setProperties(properties) - .setRetryPolicy(retryPolicy) - .build(); - } -} diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/RatisShell.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/RatisShell.java index 2e53e31912..cdad683011 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/RatisShell.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/RatisShell.java @@ -17,6 +17,9 @@ */ package org.apache.ratis.shell.cli.sh; +import org.apache.ratis.conf.Parameters; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.shell.cli.AbstractShell; import org.apache.ratis.shell.cli.Command; import org.apache.ratis.shell.cli.sh.command.AbstractParentCommand; @@ -60,7 +63,11 @@ public static void main(String[] args) { } public RatisShell(PrintStream out) { - super(new Context(out)); + this(new Context(out)); + } + + private RatisShell(Context context) { + super(context); } @Override @@ -73,4 +80,39 @@ protected Map loadCommands(Context context) { return allParentCommands(context).stream() .collect(Collectors.toMap(Command::getCommandName, Function.identity())); } + + public static Builder newBuilder() { + return new Builder(); + } + + public static class Builder { + private PrintStream printStream = System.out; + private RetryPolicy retryPolicy; + private RaftProperties properties; + private Parameters parameters; + + public Builder setPrintStream(PrintStream printStream) { + this.printStream = printStream; + return this; + } + + public Builder setRetryPolicy(RetryPolicy retryPolicy) { + this.retryPolicy = retryPolicy; + return this; + } + + public Builder setProperties(RaftProperties properties) { + this.properties = properties; + return this; + } + + public Builder setParameters(Parameters parameters) { + this.parameters = parameters; + return this; + } + + public RatisShell build() { + return new RatisShell(new Context(printStream, false, retryPolicy, properties, parameters)); + } + } } diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/AbstractCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/AbstractCommand.java index 20a52a80f8..e1d7f8e0b6 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/AbstractCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/AbstractCommand.java @@ -20,40 +20,31 @@ import org.apache.ratis.shell.cli.Command; import java.io.PrintStream; -import java.net.InetSocketAddress; /** * The base class for all the ratis shell {@link Command} classes. */ public abstract class AbstractCommand implements Command { - private final PrintStream printStream; + private final Context context; protected AbstractCommand(Context context) { - printStream = context.getPrintStream(); + this.context = context; } - public static InetSocketAddress parseInetSocketAddress(String address) { - try { - final String[] hostPortPair = address.split(":"); - if (hostPortPair.length < 2) { - throw new IllegalArgumentException("Unexpected address format ."); - } - return new InetSocketAddress(hostPortPair[0], Integer.parseInt(hostPortPair[1])); - } catch (Exception e) { - throw new IllegalArgumentException("Failed to parse the server address parameter \"" + address + "\".", e); - } + protected Context getContext() { + return context; } protected PrintStream getPrintStream() { - return printStream; + return getContext().getPrintStream(); } protected void printf(String format, Object... args) { - printStream.printf(format, args); + getPrintStream().printf(format, args); } protected void println(Object message) { - printStream.println(message); + getPrintStream().println(message); } } diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/AbstractRatisCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/AbstractRatisCommand.java index 1888c0e0ea..aea1f7c4b3 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/AbstractRatisCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/AbstractRatisCommand.java @@ -18,9 +18,13 @@ package org.apache.ratis.shell.cli.sh.command; import org.apache.commons.cli.Option; -import org.apache.ratis.protocol.*; -import org.apache.ratis.protocol.exceptions.RaftException; -import org.apache.ratis.shell.cli.RaftUtils; +import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.GroupInfoReply; +import org.apache.ratis.shell.cli.CliUtils; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.Options; import org.apache.ratis.client.RaftClient; @@ -30,11 +34,14 @@ import org.apache.ratis.proto.RaftProtos.RaftPeerRole; import org.apache.ratis.proto.RaftProtos.RoleInfoProto; import org.apache.ratis.util.ProtoUtils; -import org.apache.ratis.util.function.CheckedFunction; import java.io.IOException; +import java.io.PrintStream; import java.net.InetSocketAddress; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -46,32 +53,6 @@ public abstract class AbstractRatisCommand extends AbstractCommand { public static final String PEER_OPTION_NAME = "peers"; public static final String GROUPID_OPTION_NAME = "groupid"; - public static final RaftGroupId DEFAULT_RAFT_GROUP_ID = RaftGroupId.randomId(); - - /** - * Execute a given function with input parameter from the members of a list. - * - * @param list the input parameters - * @param function the function to be executed - * @param parameter type - * @param return value type - * @param the exception type thrown by the given function. - * @return the value returned by the given function. - */ - public static K run(Collection list, CheckedFunction function) { - for (T t : list) { - try { - K ret = function.apply(t); - if (ret != null) { - return ret; - } - } catch (Throwable e) { - e.printStackTrace(); - } - } - return null; - } - private RaftGroup raftGroup; private GroupInfoReply groupInfoReply; @@ -81,51 +62,22 @@ protected AbstractRatisCommand(Context context) { @Override public int run(CommandLine cl) throws IOException { - List addresses = new ArrayList<>(); - String peersStr = cl.getOptionValue(PEER_OPTION_NAME); - String[] peersArray = peersStr.split(","); - for (String peer : peersArray) { - addresses.add(parseInetSocketAddress(peer)); - } - - final RaftGroupId raftGroupIdFromConfig = cl.hasOption(GROUPID_OPTION_NAME)? - RaftGroupId.valueOf(UUID.fromString(cl.getOptionValue(GROUPID_OPTION_NAME))) - : DEFAULT_RAFT_GROUP_ID; - - List peers = addresses.stream() - .map(addr -> RaftPeer.newBuilder() - .setId(RaftUtils.getPeerId(addr)) - .setAddress(addr) - .build() - ).collect(Collectors.toList()); - raftGroup = RaftGroup.valueOf(raftGroupIdFromConfig, peers); - try (final RaftClient client = RaftUtils.createClient(raftGroup)) { - final RaftGroupId remoteGroupId; - if (raftGroupIdFromConfig != DEFAULT_RAFT_GROUP_ID) { - remoteGroupId = raftGroupIdFromConfig; - } else { - final List groupIds = run(peers, - p -> client.getGroupManagementApi((p.getId())).list().getGroupIds()); - - if (groupIds == null) { - println("Failed to get group ID from " + peers); - return -1; - } else if (groupIds.size() == 1) { - remoteGroupId = groupIds.get(0); - } else { - println("There are more than one groups, you should specific one. " + groupIds); - return -2; - } - } - - groupInfoReply = run(peers, p -> client.getGroupManagementApi((p.getId())).info(remoteGroupId)); - processReply(groupInfoReply, - () -> "Failed to get group info for group id " + remoteGroupId.getUuid() + " from " + peers); + final List peers = CliUtils.parseRaftPeers(cl.getOptionValue(PEER_OPTION_NAME)); + final RaftGroupId groupIdSpecified = CliUtils.parseRaftGroupId(cl.getOptionValue(GROUPID_OPTION_NAME)); + raftGroup = RaftGroup.valueOf(groupIdSpecified != null? groupIdSpecified: RaftGroupId.randomId(), peers); + PrintStream printStream = getPrintStream(); + try (final RaftClient client = newRaftClient()) { + final RaftGroupId remoteGroupId = CliUtils.getGroupId(client, peers, groupIdSpecified, printStream); + groupInfoReply = CliUtils.getGroupInfo(client, peers, remoteGroupId, printStream); raftGroup = groupInfoReply.getGroup(); } return 0; } + protected RaftClient newRaftClient() { + return getContext().newRaftClient(getRaftGroup()); + } + @Override public Options getOptions() { return new Options() @@ -168,14 +120,7 @@ protected RaftPeerProto getLeader(RoleInfoProto roleInfo) { } protected void processReply(RaftClientReply reply, Supplier messageSupplier) throws IOException { - if (reply == null || !reply.isSuccess()) { - final RaftException e = Optional.ofNullable(reply) - .map(RaftClientReply::getException) - .orElseGet(() -> new RaftException("Reply: " + reply)); - final String message = messageSupplier.get(); - printf("%s. Error: %s%n", message, e); - throw new IOException(message, e); - } + CliUtils.checkReply(reply, messageSupplier, getPrintStream()); } protected List getIds(String[] optionValues, BiConsumer consumer) { @@ -184,8 +129,8 @@ protected List getIds(String[] optionValues, BiConsumer ids = new ArrayList<>(); for (String address : optionValues) { - final InetSocketAddress serverAddress = parseInetSocketAddress(address); - final RaftPeerId peerId = RaftUtils.getPeerId(serverAddress); + final InetSocketAddress serverAddress = CliUtils.parseInetSocketAddress(address); + final RaftPeerId peerId = CliUtils.getPeerId(serverAddress); consumer.accept(peerId, serverAddress); ids.add(peerId); } diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/Context.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/Context.java index bae98dc0b5..a29cbd0265 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/Context.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/command/Context.java @@ -17,27 +17,54 @@ */ package org.apache.ratis.shell.cli.sh.command; -import org.apache.ratis.thirdparty.com.google.common.io.Closer; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.client.RaftClientConfigKeys; +import org.apache.ratis.conf.Parameters; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.retry.ExponentialBackoffRetry; +import org.apache.ratis.retry.RetryPolicy; +import org.apache.ratis.util.TimeDuration; -import java.io.Closeable; -import java.io.IOException; import java.io.PrintStream; import java.util.Objects; +import java.util.Properties; +import java.util.concurrent.TimeUnit; /** * A context for ratis-shell. */ -public final class Context implements Closeable { +public final class Context { + private static final TimeDuration DEFAULT_REQUEST_TIMEOUT = TimeDuration.valueOf(15, TimeUnit.SECONDS); + private static final RetryPolicy DEFAULT_RETRY_POLICY = ExponentialBackoffRetry.newBuilder() + .setBaseSleepTime(TimeDuration.valueOf(1000, TimeUnit.MILLISECONDS)) + .setMaxAttempts(10) + .setMaxSleepTime(TimeDuration.valueOf(100_000, TimeUnit.MILLISECONDS)) + .build(); + private final PrintStream mPrintStream; - private final Closer mCloser; + + private final boolean cli; + private final RetryPolicy retryPolicy; + private final RaftProperties properties; + private final Parameters parameters; /** * Build a context. * @param printStream the print stream */ public Context(PrintStream printStream) { - mCloser = Closer.create(); - mPrintStream = mCloser.register(Objects.requireNonNull(printStream, "printStream == null")); + this(printStream, true, DEFAULT_RETRY_POLICY, new RaftProperties(), null); + } + + public Context(PrintStream printStream, boolean cli, RetryPolicy retryPolicy, + RaftProperties properties, Parameters parameters) { + mPrintStream = Objects.requireNonNull(printStream, "printStream == null"); + + this.cli = cli; + this.retryPolicy = retryPolicy != null? retryPolicy : DEFAULT_RETRY_POLICY; + this.properties = properties != null? properties : new RaftProperties(); + this.parameters = parameters; } /** @@ -47,8 +74,40 @@ public PrintStream getPrintStream() { return mPrintStream; } - @Override - public void close() throws IOException { - mCloser.close(); + /** Is this from CLI? */ + public boolean isCli() { + return cli; + } + + public RetryPolicy getRetryPolicy() { + return retryPolicy; + } + + public RaftProperties getProperties() { + return properties; + } + + public Parameters getParameters() { + return parameters; + } + + /** Create a new {@link RaftClient} from the given group. */ + public RaftClient newRaftClient(RaftGroup group) { + final RaftProperties p = getProperties(); + if (isCli()) { + RaftClientConfigKeys.Rpc.setRequestTimeout(p, DEFAULT_REQUEST_TIMEOUT); + + // Since ratis-shell support GENERIC_COMMAND_OPTIONS, here we should + // merge these options to raft p to make it work. + final Properties sys = System.getProperties(); + sys.stringPropertyNames().forEach(key -> p.set(key, sys.getProperty(key))); + } + + return RaftClient.newBuilder() + .setRaftGroup(group) + .setProperties(p) + .setParameters(getParameters()) + .setRetryPolicy(getRetryPolicy()) + .build(); } } diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/PauseCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/PauseCommand.java index 4ea2969bac..f8a627a8fa 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/PauseCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/PauseCommand.java @@ -24,7 +24,6 @@ import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.shell.cli.RaftUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; @@ -61,7 +60,7 @@ public int run(CommandLine cl) throws IOException { printf("Peer not found: %s", strAddr); return -1; } - try(final RaftClient raftClient = RaftUtils.createClient(getRaftGroup())) { + try(final RaftClient raftClient = newRaftClient()) { RaftClientReply reply = raftClient.getLeaderElectionManagementApi(peerId).pause(); processReply(reply, () -> String.format("Failed to pause leader election on peer %s", strAddr)); printf(String.format("Successful pause leader election on peer %s", strAddr)); diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/ResumeCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/ResumeCommand.java index 4b4dc225a0..1b5c80fac1 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/ResumeCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/ResumeCommand.java @@ -24,7 +24,6 @@ import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.shell.cli.RaftUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; @@ -61,7 +60,7 @@ public int run(CommandLine cl) throws IOException { printf("Can't find a sever with the address:%s", strAddr); return -1; } - try(final RaftClient raftClient = RaftUtils.createClient(getRaftGroup())) { + try(final RaftClient raftClient = newRaftClient()) { RaftClientReply reply = raftClient.getLeaderElectionManagementApi(peerId).resume(); processReply(reply, () -> String.format("Failed to resume leader election on peer %s", strAddr)); printf(String.format("Successful pause leader election on peer %s", strAddr)); diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/StepDownCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/StepDownCommand.java index 911a2bb26a..f18921e5ec 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/StepDownCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/StepDownCommand.java @@ -21,7 +21,6 @@ import org.apache.ratis.client.RaftClient; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.shell.cli.RaftUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; @@ -48,7 +47,7 @@ public String getCommandName() { public int run(CommandLine cl) throws IOException { super.run(cl); - try (RaftClient client = RaftUtils.createClient(getRaftGroup())) { + try (RaftClient client = newRaftClient()) { RaftPeerId leaderId = RaftPeerId.valueOf(getLeader(getGroupInfoReply().getRoleInfoProto()).getId()); final RaftClientReply transferLeadershipReply = client.admin().transferLeadership(null, leaderId, 60_000); processReply(transferLeadershipReply, () -> "Failed to step down leader"); diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/TransferCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/TransferCommand.java index c71d7f89f6..88cfec9148 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/TransferCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/election/TransferCommand.java @@ -25,7 +25,6 @@ import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.exceptions.TransferLeadershipException; -import org.apache.ratis.shell.cli.RaftUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; import org.apache.ratis.util.TimeDuration; @@ -74,7 +73,7 @@ public int run(CommandLine cl) throws IOException { printf("Peer with address %s not found.", strAddr); return -2; } - try (RaftClient client = RaftUtils.createClient(getRaftGroup())) { + try (RaftClient client = newRaftClient()) { // transfer leadership if (!tryTransfer(client, newLeader, highestPriority, timeout.orElse(timeoutDefault))) { // legacy mode, transfer leadership by setting priority. diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/group/GroupInfoCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/group/GroupInfoCommand.java index d2c4e65991..0125440e90 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/group/GroupInfoCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/group/GroupInfoCommand.java @@ -54,6 +54,7 @@ public int run(CommandLine cl) throws IOException { printf("leader info: %s(%s)%n%n", leader.getId().toStringUtf8(), leader.getAddress()); } println(reply.getCommitInfos()); + println(reply.getLogInfoProto()); return 0; } diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/group/GroupListCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/group/GroupListCommand.java index 5bbd1939ad..214ed15078 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/group/GroupListCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/group/GroupListCommand.java @@ -24,7 +24,7 @@ import org.apache.ratis.client.RaftClient; import org.apache.ratis.protocol.GroupListReply; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.shell.cli.RaftUtils; +import org.apache.ratis.shell.cli.CliUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; @@ -61,15 +61,15 @@ public int run(CommandLine cl) throws IOException { address = getRaftGroup().getPeer(peerId).getAddress(); } else if (cl.hasOption(SERVER_ADDRESS_OPTION_NAME)) { address = cl.getOptionValue(SERVER_ADDRESS_OPTION_NAME); - final InetSocketAddress serverAddress = parseInetSocketAddress(address); - peerId = RaftUtils.getPeerId(serverAddress); + final InetSocketAddress serverAddress = CliUtils.parseInetSocketAddress(address); + peerId = CliUtils.getPeerId(serverAddress); } else { throw new IllegalArgumentException( "Both " + PEER_ID_OPTION_NAME + " and " + SERVER_ADDRESS_OPTION_NAME + " options are missing."); } - try(final RaftClient raftClient = RaftUtils.createClient(getRaftGroup())) { + try(final RaftClient raftClient = newRaftClient()) { GroupListReply reply = raftClient.getGroupManagementApi(peerId).list(); processReply(reply, () -> String.format("Failed to get group information of peerId %s (server %s)", peerId, address)); diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/local/RaftMetaConfCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/local/RaftMetaConfCommand.java index 231c643ac3..a63b659375 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/local/RaftMetaConfCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/local/RaftMetaConfCommand.java @@ -24,7 +24,8 @@ import org.apache.ratis.proto.RaftProtos.RaftConfigurationProto; import org.apache.ratis.proto.RaftProtos.RaftPeerProto; import org.apache.ratis.proto.RaftProtos.RaftPeerRole; -import org.apache.ratis.shell.cli.RaftUtils; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.shell.cli.CliUtils; import org.apache.ratis.shell.cli.sh.command.AbstractCommand; import org.apache.ratis.shell.cli.sh.command.Context; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; @@ -32,11 +33,14 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.InetSocketAddress; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** * Command for generate a new raft-meta.conf file based on original raft-meta.conf and new peers, @@ -49,6 +53,7 @@ public class RaftMetaConfCommand extends AbstractCommand { private static final String RAFT_META_CONF = "raft-meta.conf"; private static final String NEW_RAFT_META_CONF = "new-raft-meta.conf"; + private static final String SEPARATOR = "\\|"; /** * @param context command context */ @@ -69,11 +74,49 @@ public int run(CommandLine cl) throws IOException { printf("peers or path can't be empty."); return -1; } + Set addresses = new HashSet<>(); + Set ids = new HashSet<>(); List raftPeerProtos = new ArrayList<>(); - for (String address : peersStr.split(",")) { - String peerId = RaftUtils.getPeerId(parseInetSocketAddress(address)).toString(); + for (String idWithAddress : peersStr.split(",")) { + String[] peerIdWithAddressArray = idWithAddress.split(SEPARATOR); + + if (peerIdWithAddressArray.length < 1 || peerIdWithAddressArray.length > 2) { + String message = + "Failed to parse peer's ID and address for: %s, " + + "from option: -peers %s. \n" + + "Please make sure to provide list of peers" + + " in format <[P0_ID|]P0_HOST:P0_PORT,[P1_ID|]P1_HOST:P1_PORT,[P2_ID|]P2_HOST:P2_PORT>"; + printf(message, idWithAddress, peersStr); + return -1; + } + InetSocketAddress inetSocketAddress = CliUtils.parseInetSocketAddress( + peerIdWithAddressArray[peerIdWithAddressArray.length - 1]); + String addressString = inetSocketAddress.getHostString() + ":" + inetSocketAddress.getPort(); + if (addresses.contains(addressString)) { + printf("Found duplicated address: %s. Please make sure the address of peer have no duplicated value.", + addressString); + return -1; + } + addresses.add(addressString); + + String peerId; + if (peerIdWithAddressArray.length == 2) { + // Peer ID is provided + peerId = RaftPeerId.getRaftPeerId(peerIdWithAddressArray[0]).toString(); + + if (ids.contains(peerId)) { + printf("Found duplicated ID: %s. Please make sure the ID of peer have no duplicated value.", peerId); + return -1; + } + ids.add(peerId); + } else { + // If peer ID is not provided, use host address as peerId value + peerId = CliUtils.getPeerId(inetSocketAddress).toString(); + } + raftPeerProtos.add(RaftPeerProto.newBuilder() - .setId(ByteString.copyFrom(peerId.getBytes(StandardCharsets.UTF_8))).setAddress(address) + .setId(ByteString.copyFrom(peerId.getBytes(StandardCharsets.UTF_8))) + .setAddress(addressString) .setStartupRole(RaftPeerRole.FOLLOWER).build()); } try (InputStream in = Files.newInputStream(Paths.get(path, RAFT_META_CONF)); @@ -93,7 +136,7 @@ public int run(CommandLine cl) throws IOException { @Override public String getUsage() { return String.format("%s" - + " -%s " + + " -%s <[P0_ID|]P0_HOST:P0_PORT,[P1_ID|]P1_HOST:P1_PORT,[P2_ID|]P2_HOST:P2_PORT>" + " -%s ", getCommandName(), PEER_OPTION_NAME, PATH_OPTION_NAME); } diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/AddCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/AddCommand.java index 3c65bb12de..c0d9f8dfe6 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/AddCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/AddCommand.java @@ -25,9 +25,10 @@ import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.shell.cli.RaftUtils; +import org.apache.ratis.shell.cli.CliUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; +import org.apache.ratis.util.Preconditions; import java.io.IOException; import java.net.InetSocketAddress; @@ -45,6 +46,9 @@ public class AddCommand extends AbstractRatisCommand { public static final String ADDRESS_OPTION_NAME = "address"; public static final String PEER_ID_OPTION_NAME = "peerId"; + public static final String CLIENT_ADDRESS_OPTION_NAME = "clientAddress"; + public static final String ADMIN_ADDRESS_OPTION_NAME = "adminAddress"; + /** * @param context command context */ @@ -61,32 +65,71 @@ public String getCommandName() { public int run(CommandLine cl) throws IOException { super.run(cl); final Map peersInfo = new HashMap<>(); + final Map clientAddressInfo = new HashMap<>(); + final Map adminAddressInfo = new HashMap<>(); List ids; if (cl.hasOption(ADDRESS_OPTION_NAME) && cl.hasOption(PEER_ID_OPTION_NAME)) { ids = Arrays.stream(cl.getOptionValue(PEER_ID_OPTION_NAME).split(",")) .map(RaftPeerId::getRaftPeerId).collect(Collectors.toList()); - List addresses = - Arrays.stream(cl.getOptionValue(ADDRESS_OPTION_NAME).split(",")) - .map(s -> parseInetSocketAddress(s)).collect(Collectors.toList()); - assert ids.size() == addresses.size(); + final List addresses = Arrays.stream(cl.getOptionValue(ADDRESS_OPTION_NAME).split(",")) + .map(CliUtils::parseInetSocketAddress) + .collect(Collectors.toList()); + Preconditions.assertSame(ids.size(), addresses.size(), "size"); for (int i = 0; i < ids.size(); i++) { peersInfo.put(ids.get(i), addresses.get(i)); } + + if (cl.hasOption(CLIENT_ADDRESS_OPTION_NAME)) { + final List clientAddresses = + Arrays.stream(cl.getOptionValue(CLIENT_ADDRESS_OPTION_NAME).split(",")) + .map(CliUtils::parseInetSocketAddress) + .collect(Collectors.toList()); + Preconditions.assertSame(ids.size(), clientAddresses.size(), "clientAddress size"); + for (int i = 0; i < ids.size(); i++) { + clientAddressInfo.put(ids.get(i), clientAddresses.get(i)); + } + } + + if (cl.hasOption(ADMIN_ADDRESS_OPTION_NAME)) { + final List adminAddresses = + Arrays.stream(cl.getOptionValue(ADMIN_ADDRESS_OPTION_NAME).split(",")) + .map(CliUtils::parseInetSocketAddress) + .collect(Collectors.toList()); + Preconditions.assertSame(ids.size(), adminAddresses.size(), "adminAddress size"); + for (int i = 0; i < ids.size(); i++) { + adminAddressInfo.put(ids.get(i), adminAddresses.get(i)); + } + } } else if (cl.hasOption(ADDRESS_OPTION_NAME)) { ids = getIds(cl.getOptionValue(ADDRESS_OPTION_NAME).split(","), peersInfo::put); + if (cl.hasOption(CLIENT_ADDRESS_OPTION_NAME) || cl.hasOption(ADMIN_ADDRESS_OPTION_NAME)) { + throw new IllegalArgumentException( + "When using auto-generated peer IDs, clientAddress and adminAddress are not supported."); + } } else { throw new IllegalArgumentException( "Both " + PEER_ID_OPTION_NAME + " and " + ADDRESS_OPTION_NAME + " options are missing."); } - try (RaftClient client = RaftUtils.createClient(getRaftGroup())) { + try (RaftClient client = newRaftClient()) { final Stream remaining = getPeerStream(RaftPeerRole.FOLLOWER); - final Stream adding = ids.stream().map(raftPeerId -> RaftPeer.newBuilder() - .setId(raftPeerId) - .setAddress(peersInfo.get(raftPeerId)) - .setPriority(0) - .build()); + final Stream adding = ids.stream().map(raftPeerId -> { + RaftPeer.Builder builder = RaftPeer.newBuilder() + .setId(raftPeerId) + .setAddress(peersInfo.get(raftPeerId)) + .setPriority(0); + + if (clientAddressInfo.containsKey(raftPeerId)) { + builder.setClientAddress(clientAddressInfo.get(raftPeerId)); + } + + if (adminAddressInfo.containsKey(raftPeerId)) { + builder.setAdminAddress(adminAddressInfo.get(raftPeerId)); + } + + return builder.build(); + }); final List peers = Stream.concat(remaining, adding).collect(Collectors.toList()); final List listeners = getPeerStream(RaftPeerRole.LISTENER) .collect(Collectors.toList()); @@ -103,9 +146,12 @@ public String getUsage() { return String.format("%s" + " -%s " + " [-%s ]" - + " <[-%s ]|[-%s ]>", + + " <[-%s ]|[-%s ]>" + + " [-%s ]" + + " [-%s ]", getCommandName(), PEER_OPTION_NAME, GROUPID_OPTION_NAME, - ADDRESS_OPTION_NAME, PEER_ID_OPTION_NAME); + ADDRESS_OPTION_NAME, PEER_ID_OPTION_NAME, + CLIENT_ADDRESS_OPTION_NAME, ADMIN_ADDRESS_OPTION_NAME); } @Override @@ -122,8 +168,19 @@ public Options getOptions() { .desc("The address information of ratis peers") .build()) .addOption(Option.builder() - .option(PEER_ID_OPTION_NAME).hasArg() + .option(PEER_ID_OPTION_NAME) + .hasArg() .desc("The peer id of ratis peers") + .build()) + .addOption(Option.builder() + .option(CLIENT_ADDRESS_OPTION_NAME) + .hasArg() + .desc("The client address information of ratis peers") + .build()) + .addOption(Option.builder() + .option(ADMIN_ADDRESS_OPTION_NAME) + .hasArg() + .desc("The admin address information of ratis peers") .build()); } diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/RemoveCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/RemoveCommand.java index 5918516070..904a897880 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/RemoveCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/RemoveCommand.java @@ -25,7 +25,6 @@ import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.shell.cli.RaftUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; @@ -66,7 +65,7 @@ public int run(CommandLine cl) throws IOException { throw new IllegalArgumentException( "Both " + PEER_ID_OPTION_NAME + " and " + ADDRESS_OPTION_NAME + " options are missing."); } - try (RaftClient client = RaftUtils.createClient(getRaftGroup())) { + try (RaftClient client = newRaftClient()) { final List peers = getPeerStream(RaftPeerRole.FOLLOWER) .filter(raftPeer -> !ids.contains(raftPeer.getId())).collect(Collectors.toList()); final List listeners = getPeerStream(RaftPeerRole.LISTENER) diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/SetPriorityCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/SetPriorityCommand.java index 01e81f3c34..2834ef5cc8 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/SetPriorityCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/peer/SetPriorityCommand.java @@ -24,7 +24,6 @@ import org.apache.ratis.proto.RaftProtos.RaftPeerRole; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeer; -import org.apache.ratis.shell.cli.RaftUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; @@ -63,7 +62,7 @@ public int run(CommandLine cl) throws IOException { addressPriorityMap.put(str[0], Integer.parseInt(str[1])); } - try (RaftClient client = RaftUtils.createClient(getRaftGroup())) { + try (RaftClient client = newRaftClient()) { final List peers = getPeerStream(RaftPeerRole.FOLLOWER).map(peer -> { final Integer newPriority = addressPriorityMap.get(peer.getAddress()); final int priority = newPriority != null ? newPriority : peer.getPriority(); diff --git a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/snapshot/TakeSnapshotCommand.java b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/snapshot/TakeSnapshotCommand.java index 10bac34975..521b22e4e3 100644 --- a/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/snapshot/TakeSnapshotCommand.java +++ b/ratis-shell/src/main/java/org/apache/ratis/shell/cli/sh/snapshot/TakeSnapshotCommand.java @@ -23,7 +23,6 @@ import org.apache.ratis.client.RaftClient; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftPeerId; -import org.apache.ratis.shell.cli.RaftUtils; import org.apache.ratis.shell.cli.sh.command.AbstractRatisCommand; import org.apache.ratis.shell.cli.sh.command.Context; @@ -58,7 +57,7 @@ public int run(CommandLine cl) throws IOException { } else { timeout = 3000; } - try(final RaftClient raftClient = RaftUtils.createClient(getRaftGroup())) { + try(final RaftClient raftClient = newRaftClient()) { if (cl.hasOption(PEER_ID_OPTION_NAME)) { peerId = RaftPeerId.getRaftPeerId(cl.getOptionValue(PEER_ID_OPTION_NAME)); } else { diff --git a/ratis-test/pom.xml b/ratis-test/pom.xml index 2021e06ad4..577262d84c 100644 --- a/ratis-test/pom.xml +++ b/ratis-test/pom.xml @@ -17,14 +17,14 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-test Apache Ratis Test - 1.76 + 1.84 @@ -132,8 +132,8 @@ - junit - junit + org.junit.jupiter + junit-jupiter-engine test @@ -141,6 +141,16 @@ junit-jupiter-api test + + org.junit.jupiter + junit-jupiter-params + test + + + org.junit.platform + junit-platform-launcher + test + org.mockito mockito-core diff --git a/ratis-test/src/test/java/org/apache/ratis/RaftLogTruncateTests.java b/ratis-test/src/test/java/org/apache/ratis/RaftLogTruncateTests.java index c21110ea0e..9ea78e47cd 100644 --- a/ratis-test/src/test/java/org/apache/ratis/RaftLogTruncateTests.java +++ b/ratis-test/src/test/java/org/apache/ratis/RaftLogTruncateTests.java @@ -36,11 +36,13 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; +import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.event.Level; import java.util.ArrayList; @@ -53,6 +55,7 @@ import static org.apache.ratis.RaftTestUtil.waitForLeader; +@Timeout(value = 200) public abstract class RaftLogTruncateTests extends BaseTest implements MiniRaftCluster.Factory.Get { public static final int NUM_SERVERS = 5; @@ -83,11 +86,6 @@ static SimpleMessage[] arraycopy(SimpleMessage[] src1, SimpleMessage[] src2) { RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax(p, TimeDuration.ONE_SECOND.multiply(2)); } - @Override - public int getGlobalTimeoutSeconds() { - return 200; - } - @Test public void testLogTruncate() throws Exception { runWithNewCluster(NUM_SERVERS, this::runTestLogTruncate); @@ -178,9 +176,11 @@ void runTestLogTruncate(MiniRaftCluster cluster, RaftServer.Division oldLeader, LOG.info("done"); } - // kill the old leader - LOG.info("Before killServer {}: {}", oldLeader.getId(), cluster.printServers()); - cluster.killServer(oldLeader.getId()); + // kill remaining peers + LOG.info("Before killServer {}: {}", remainingPeers, cluster.printServers()); + for (RaftPeerId p : remainingPeers) { + cluster.killServer(p); + } LOG.info("After killServer {}: {}", remainingPeers, cluster.printServers()); // restart the earlier followers @@ -198,8 +198,10 @@ void runTestLogTruncate(MiniRaftCluster cluster, RaftServer.Division oldLeader, newLeaderLog.getLastEntryTermIndex()); Assertions.assertTrue(killedPeers.contains(newLeader.getId())); - // restart the old leader - cluster.restartServer(oldLeader.getId(), false); + // restart remaining peers + for (RaftPeerId p : remainingPeers) { + cluster.restartServer(p, false); + } // check RaftLog truncate for (RaftPeerId f : remainingPeers) { @@ -219,8 +221,11 @@ void runTestLogTruncate(MiniRaftCluster cluster, RaftServer.Division oldLeader, for (RaftPeer peer : cluster.getGroup().getPeers()) { final RaftServer.Division division = cluster.getDivision(peer.getId()); assertLogEntries(division, oldLeaderTerm, expectedMessages); - assertEmptyTransactionContextMap(division); - } + final String name = "assertEmptyTransactionContextMap:" + division.getId(); + JavaUtils.attempt(() -> assertEmptyTransactionContextMap(division), + 10, HUNDRED_MILLIS, name, LOG); + + } if (!exceptions.isEmpty()) { LOG.info("{} exceptions", exceptions.size()); @@ -231,9 +236,9 @@ void runTestLogTruncate(MiniRaftCluster cluster, RaftServer.Division oldLeader, } } - static void assertEmptyTransactionContextMap(RaftServer.Division division) { - Assertions.assertTrue(RaftServerTestUtil.getTransactionContextMap(division).isEmpty(), - () -> division.getId() + " TransactionContextMap is non-empty"); + static void assertEmptyTransactionContextMap(RaftServer.Division d) { + final Map> map = RaftServerTestUtil.getTransactionContextMap(d); + Assertions.assertTrue(map.isEmpty(), () -> d.getId() + " TransactionContextMap is non-empty: " + map); } static void assertEntriesInTransactionContextMap(RaftServer.Division division, diff --git a/ratis-test/src/test/java/org/apache/ratis/TestRaftServerNoLeaderTimeout.java b/ratis-test/src/test/java/org/apache/ratis/TestRaftServerNoLeaderTimeout.java index 7f4da0e0fb..61bacc3cf9 100644 --- a/ratis-test/src/test/java/org/apache/ratis/TestRaftServerNoLeaderTimeout.java +++ b/ratis-test/src/test/java/org/apache/ratis/TestRaftServerNoLeaderTimeout.java @@ -28,10 +28,10 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.io.IOException; @@ -48,25 +48,25 @@ public class TestRaftServerNoLeaderTimeout extends BaseTest { private static final int NUM_SERVERS = 3; - private static final RaftProperties properties = new RaftProperties(); + private static final RaftProperties PROPERTIES = new RaftProperties(); private final MiniRaftClusterWithSimulatedRpc cluster = MiniRaftClusterWithSimulatedRpc .FACTORY.newCluster(NUM_SERVERS, getProperties()); private static RaftProperties getProperties() { - RaftServerConfigKeys.Notification.setNoLeaderTimeout(properties, TimeDuration.valueOf(1, TimeUnit.SECONDS)); - properties.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, + RaftServerConfigKeys.Notification.setNoLeaderTimeout(PROPERTIES, TimeDuration.valueOf(1, TimeUnit.SECONDS)); + PROPERTIES.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, SimpleStateMachine4Testing.class, StateMachine.class); - return properties; + return PROPERTIES; } - @Before + @BeforeEach public void setup() throws IOException { - Assert.assertNull(cluster.getLeader()); + Assertions.assertNull(cluster.getLeader()); cluster.start(); } - @After + @AfterEach public void tearDown() { if (cluster != null) { cluster.shutdown(); @@ -91,10 +91,10 @@ public void testLeaderElectionDetection() throws Exception { RaftProtos.RoleInfoProto roleInfoProto = SimpleStateMachine4Testing.get(healthyFollower).getLeaderElectionTimeoutInfo(); - Assert.assertNotNull(roleInfoProto); + Assertions.assertNotNull(roleInfoProto); - Assert.assertEquals(roleInfoProto.getRole(), RaftProtos.RaftPeerRole.CANDIDATE); + Assertions.assertEquals(roleInfoProto.getRole(), RaftProtos.RaftPeerRole.CANDIDATE); final long noLeaderTimeoutMs = noLeaderTimeout.toLong(TimeUnit.MILLISECONDS); - Assert.assertTrue(roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() > noLeaderTimeoutMs); + Assertions.assertTrue(roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() > noLeaderTimeoutMs); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/TestRaftServerSlownessDetection.java b/ratis-test/src/test/java/org/apache/ratis/TestRaftServerSlownessDetection.java index 0ea0fe9cef..895acccfbf 100644 --- a/ratis-test/src/test/java/org/apache/ratis/TestRaftServerSlownessDetection.java +++ b/ratis-test/src/test/java/org/apache/ratis/TestRaftServerSlownessDetection.java @@ -30,12 +30,6 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; - import java.io.IOException; import java.util.List; @@ -43,13 +37,19 @@ import java.util.concurrent.TimeUnit; import org.apache.ratis.thirdparty.com.codahale.metrics.Gauge; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; /** * Test Raft Server Slowness detection and notification to Leader's statemachine. */ //TODO: fix StateMachine.notifySlowness(..); see RATIS-370 -@Ignore +@Disabled +@SuppressWarnings({"deprecation", "rawtypes"}) public class TestRaftServerSlownessDetection extends BaseTest { static { Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG); @@ -57,26 +57,26 @@ public class TestRaftServerSlownessDetection extends BaseTest { public static final int NUM_SERVERS = 3; - protected static final RaftProperties properties = new RaftProperties(); + protected static final RaftProperties PROPERTIES = new RaftProperties(); private final MiniRaftClusterWithSimulatedRpc cluster = MiniRaftClusterWithSimulatedRpc .FACTORY.newCluster(NUM_SERVERS, getProperties()); public RaftProperties getProperties() { RaftServerConfigKeys.Rpc - .setSlownessTimeout(properties, TimeDuration.valueOf(1, TimeUnit.SECONDS)); - properties.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, + .setSlownessTimeout(PROPERTIES, TimeDuration.valueOf(1, TimeUnit.SECONDS)); + PROPERTIES.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, SimpleStateMachine4Testing.class, StateMachine.class); - return properties; + return PROPERTIES; } - @Before + @BeforeEach public void setup() throws IOException { - Assert.assertNull(cluster.getLeader()); + Assertions.assertNull(cluster.getLeader()); cluster.start(); } - @After + @AfterEach public void tearDown() { if (cluster != null) { cluster.shutdown(); @@ -107,23 +107,23 @@ public void testSlownessDetection() throws Exception { Thread.sleep( slownessTimeout * 2); long followerHeartBeatElapsedMetricNew = (long) metric.getValue(); - Assert.assertTrue(followerHeartBeatElapsedMetricNew > followerHeartBeatElapsedMetric); + Assertions.assertTrue(followerHeartBeatElapsedMetricNew > followerHeartBeatElapsedMetric); // Followers should not get any failed not notification for (RaftServer.Division followerServer : cluster.getFollowers()) { - Assert.assertNull(SimpleStateMachine4Testing.get(followerServer).getSlownessInfo()); + Assertions.assertNull(SimpleStateMachine4Testing.get(followerServer).getSlownessInfo()); } // the leader should get notification that the follower has failed now RaftProtos.RoleInfoProto roleInfoProto = SimpleStateMachine4Testing.get(cluster.getLeader()).getSlownessInfo(); - Assert.assertNotNull(roleInfoProto); + Assertions.assertNotNull(roleInfoProto); List followers = roleInfoProto.getLeaderInfo().getFollowerInfoList(); //Assert that the node shutdown is lagging behind for (RaftProtos.ServerRpcProto serverProto : followers) { if (RaftPeerId.valueOf(serverProto.getId().getId()).equals(failedFollower.getId())) { - Assert.assertTrue(serverProto.getLastRpcElapsedTimeMs() > slownessTimeout); + Assertions.assertTrue(serverProto.getLastRpcElapsedTimeMs() > slownessTimeout); } } } diff --git a/ratis-test/src/test/java/org/apache/ratis/client/TestClientProtoUtils.java b/ratis-test/src/test/java/org/apache/ratis/client/TestClientProtoUtils.java index 9d85320539..56bf94a84e 100644 --- a/ratis-test/src/test/java/org/apache/ratis/client/TestClientProtoUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/client/TestClientProtoUtils.java @@ -29,8 +29,8 @@ import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.Timestamp; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.concurrent.TimeUnit; @@ -71,7 +71,7 @@ void runTestToRaftClientRequestProto(int n, SizeInBytes messageSize) final RaftClientRequest computed = ClientProtoUtils.toRaftClientRequest(proto); final TimeDuration r = startTime.elapsedTime().subtract(p); - Assert.assertEquals(request.getMessage().getContent(), computed.getMessage().getContent()); + Assertions.assertEquals(request.getMessage().getContent(), computed.getMessage().getContent()); toProto = toProto.add(p); toRequest = toRequest.add(r); diff --git a/ratis-test/src/test/java/org/apache/ratis/conf/TestConfUtils.java b/ratis-test/src/test/java/org/apache/ratis/conf/TestConfUtils.java index 1600da18dc..859c597b52 100644 --- a/ratis-test/src/test/java/org/apache/ratis/conf/TestConfUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/conf/TestConfUtils.java @@ -22,10 +22,42 @@ import org.apache.ratis.client.RaftClientConfigKeys; import org.apache.ratis.grpc.GrpcConfigKeys; import org.apache.ratis.netty.NettyConfigKeys; +import org.apache.ratis.rpc.RpcType; import org.apache.ratis.server.RaftServerConfigKeys; -import org.junit.Test; +import org.apache.ratis.server.simulation.SimulatedRpc; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; public class TestConfUtils extends BaseTest { + @Test + public void testLogging() { + final AtomicInteger count = new AtomicInteger(); + final Consumer logger = s -> { + System.out.println("log: " + s); + count.incrementAndGet(); + }; + + final RaftProperties properties = new RaftProperties(); + final RpcType simulated = SimulatedRpc.get(); + + // get a value the first time + final RpcType defaultType = RaftConfigKeys.Rpc.type(properties, logger); + Assertions.assertEquals(1, count.get()); + Assertions.assertNotEquals(defaultType, simulated); + + // get the same value the second time + RaftConfigKeys.Rpc.type(properties, logger); + Assertions.assertEquals(1, count.get()); + + // get a different value + RaftConfigKeys.Rpc.setType(properties, SimulatedRpc.get()); + RaftConfigKeys.Rpc.type(properties, logger); + Assertions.assertEquals(2, count.get()); + } + @Test public void testRaftConfigKeys() { ConfUtils.printAll(RaftConfigKeys.class); diff --git a/ratis-test/src/test/java/org/apache/ratis/conf/TestRaftProperties.java b/ratis-test/src/test/java/org/apache/ratis/conf/TestRaftProperties.java index c65d00f43e..d17d6fb6f6 100644 --- a/ratis-test/src/test/java/org/apache/ratis/conf/TestRaftProperties.java +++ b/ratis-test/src/test/java/org/apache/ratis/conf/TestRaftProperties.java @@ -17,8 +17,10 @@ */ package org.apache.ratis.conf; -import org.junit.Assert; -import org.junit.Test; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; public class TestRaftProperties { enum Type {APPEND_ENTRIES} @@ -29,71 +31,72 @@ static class Request_Vote { static final String KEY = "key"; static void setUnderscoreValue(RaftProperties p, String valueWithUnderscore) { - Assert.assertTrue(valueWithUnderscore.contains("_")); + Assertions.assertTrue(valueWithUnderscore.contains("_")); p.set(KEY, valueWithUnderscore); } static void setNonUnderscoreValue(RaftProperties p, String valueWithoutUnderscore) { - Assert.assertFalse(valueWithoutUnderscore.contains("_")); + Assertions.assertFalse(valueWithoutUnderscore.contains("_")); p.set(KEY, valueWithoutUnderscore); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testUnderscore() { final RaftProperties p = new RaftProperties(); { // boolean - Assert.assertNull(p.getBoolean(KEY, null)); + Assertions.assertNull(p.getBoolean(KEY, null)); setNonUnderscoreValue(p, "true"); - Assert.assertTrue(p.getBoolean(KEY, null)); + Assertions.assertTrue(p.getBoolean(KEY, null)); setNonUnderscoreValue(p, "false"); - Assert.assertFalse(p.getBoolean(KEY, null)); + Assertions.assertFalse(p.getBoolean(KEY, null)); setUnderscoreValue(p, "fa_lse"); - Assert.assertNull(p.getBoolean(KEY, null)); + Assertions.assertNull(p.getBoolean(KEY, null)); p.unset(KEY); } { //int final Integer expected = 1000000; - Assert.assertNull(p.getInt(KEY, null)); + Assertions.assertNull(p.getInt(KEY, null)); setUnderscoreValue(p, "1_000_000"); - Assert.assertEquals(expected, p.getInt(KEY, null)); + Assertions.assertEquals(expected, p.getInt(KEY, null)); setNonUnderscoreValue(p, "1000000"); - Assert.assertEquals(expected, p.getInt(KEY, null)); + Assertions.assertEquals(expected, p.getInt(KEY, null)); p.unset(KEY); } { // long final Long expected = 1_000_000_000_000L; - Assert.assertNull(p.getLong(KEY, null)); + Assertions.assertNull(p.getLong(KEY, null)); setUnderscoreValue(p, "1_000_000_000_000"); - Assert.assertEquals(expected, p.getLong(KEY, null)); + Assertions.assertEquals(expected, p.getLong(KEY, null)); setNonUnderscoreValue(p, "1000000000000"); - Assert.assertEquals(expected, p.getLong(KEY, null)); + Assertions.assertEquals(expected, p.getLong(KEY, null)); p.unset(KEY); } { // File final String expected = "1_000_000"; - Assert.assertNull(p.getFile(KEY, null)); + Assertions.assertNull(p.getFile(KEY, null)); setUnderscoreValue(p, expected); - Assert.assertEquals(expected, p.getFile(KEY, null).getName()); + Assertions.assertEquals(expected, p.getFile(KEY, null).getName()); p.unset(KEY); } { // class final Type expected = Type.APPEND_ENTRIES; - Assert.assertNull(p.getEnum(KEY, Type.class, null)); + Assertions.assertNull(p.getEnum(KEY, Type.class, null)); setUnderscoreValue(p, expected.name()); - Assert.assertEquals(expected, p.getEnum(KEY, Type.class, null)); + Assertions.assertEquals(expected, p.getEnum(KEY, Type.class, null)); p.unset(KEY); } { // enum final Class expected = Request_Vote.class; - Assert.assertNull(p.getClass(KEY, null)); + Assertions.assertNull(p.getClass(KEY, null)); setUnderscoreValue(p, expected.getName()); - Assert.assertEquals(expected, p.getClass(KEY, null)); + Assertions.assertEquals(expected, p.getClass(KEY, null)); p.unset(KEY); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamAsyncClusterTests.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamAsyncClusterTests.java index 8c315070e5..eb25a369e6 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamAsyncClusterTests.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamAsyncClusterTests.java @@ -34,8 +34,9 @@ import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedBiFunction; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.event.Level; import java.io.IOException; @@ -47,15 +48,11 @@ import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +@Timeout(value = 300) public abstract class DataStreamAsyncClusterTests extends DataStreamClusterTests { final Executor executor = Executors.newFixedThreadPool(16); - @Override - public int getGlobalTimeoutSeconds() { - return 300; - } - @Test public void testSingleStreamsMultipleServers() throws Exception { Slf4jUtils.setLogLevel(NettyClientStreamRpc.LOG, Level.TRACE); @@ -103,15 +100,18 @@ void runTestDataStream(CLUSTER cluster) throws Exception { long runMultipleStreams(CLUSTER cluster, boolean stepDownLeader) { final List> futures = new ArrayList<>(); - futures.add(CompletableFuture.supplyAsync(() -> runTestDataStream(cluster, 5, 10, 100_000, 10, stepDownLeader), executor)); - futures.add(CompletableFuture.supplyAsync(() -> runTestDataStream(cluster, 2, 20, 1_000, 5_000, stepDownLeader), executor)); + futures.add(CompletableFuture.supplyAsync(() -> + runTestDataStream(cluster, 5, 10, 100_000, 10, stepDownLeader), executor)); + futures.add(CompletableFuture.supplyAsync(() -> + runTestDataStream(cluster, 2, 20, 1_000, 5_000, stepDownLeader), executor)); return futures.stream() .map(CompletableFuture::join) .max(Long::compareTo) .orElseThrow(IllegalStateException::new); } - void runTestDataStream(CLUSTER cluster, boolean stepDownLeader, CheckedBiFunction runMethod) throws Exception { + void runTestDataStream(CLUSTER cluster, boolean stepDownLeader, + CheckedBiFunction runMethod) throws Exception { RaftTestUtil.waitForLeader(cluster); final long maxIndex = runMethod.apply(cluster, stepDownLeader); @@ -130,14 +130,14 @@ void runTestDataStream(CLUSTER cluster, boolean stepDownLeader, CheckedBiFunctio // wait for all servers to catch up try (RaftClient client = cluster.createClient()) { RaftClientReply reply = client.async().watch(maxIndex, ReplicationLevel.ALL).join(); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } // assert all streams are linked for (RaftServer proxy : cluster.getServers()) { final RaftServer.Division impl = proxy.getDivision(cluster.getGroupId()); final MultiDataStreamStateMachine stateMachine = (MultiDataStreamStateMachine) impl.getStateMachine(); for (SingleDataStream s : stateMachine.getStreams()) { - Assert.assertFalse(s.getDataChannel().isOpen()); + Assertions.assertFalse(s.getDataChannel().isOpen()); DataStreamTestUtils.assertLogEntry(impl, s); } } @@ -150,7 +150,7 @@ Long runTestDataStream( futures.add(CompletableFuture.supplyAsync( () -> runTestDataStream(cluster, numStreams, bufferSize, bufferNum, stepDownLeader), executor)); } - Assert.assertEquals(numClients, futures.size()); + Assertions.assertEquals(numClients, futures.size()); return futures.stream() .map(CompletableFuture::join) .max(Long::compareTo) @@ -174,7 +174,7 @@ long runTestDataStream(CLUSTER cluster, int numStreams, int bufferSize, int buff futures.add(CompletableFuture.supplyAsync(() -> DataStreamTestUtils.writeAndCloseAndAssertReplies( servers, leader, out, bufferSize, bufferNum, client.getId(), stepDownLeader).join(), executor)); } - Assert.assertEquals(numStreams, futures.size()); + Assertions.assertEquals(numStreams, futures.size()); return futures.stream() .map(CompletableFuture::join) .map(RaftClientReply::getLogIndex) diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamBaseTest.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamBaseTest.java index 3f241a8b9c..ee56f334e1 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamBaseTest.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamBaseTest.java @@ -34,7 +34,7 @@ import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.util.CollectionUtils; -import org.junit.Assert; +import org.junit.jupiter.api.Assertions; import java.io.IOException; import java.util.ArrayList; @@ -44,8 +44,8 @@ abstract class DataStreamBaseTest extends BaseTest { RaftConfiguration getRaftConf() { - final List peers = servers.stream().map(Server::getPeer).collect(Collectors.toList()); - return RaftServerTestUtil.newRaftConfiguration(peers); + final List peerList = servers.stream().map(Server::getPeer).collect(Collectors.toList()); + return RaftServerTestUtil.newRaftConfiguration(peerList); } static class Server { @@ -67,7 +67,7 @@ RaftServer getRaftServer() { return raftServer; } - void start() { + void start() throws IOException { dataStreamServer.getServerRpc().start(); } @@ -90,21 +90,21 @@ Server getPrimaryServer() { return servers.get(0); } - void setup(RaftGroupId groupId, List peers, List raftServers) { - raftGroup = RaftGroup.valueOf(groupId, peers); - this.peers = peers; - servers = new ArrayList<>(peers.size()); + void setup(RaftGroupId groupId, List peerList, List raftServers) throws Exception { + raftGroup = RaftGroup.valueOf(groupId, peerList); + this.peers = peerList; + servers = new ArrayList<>(peerList.size()); // start stream servers on raft peers. - for (int i = 0; i < peers.size(); i++) { - final Server server = new Server(peers.get(i), raftServers.get(i)); - server.addRaftPeers(removePeerFromList(peers.get(i), peers)); + for (int i = 0; i < peerList.size(); i++) { + final Server server = new Server(peerList.get(i), raftServers.get(i)); + server.addRaftPeers(removePeerFromList(peerList.get(i), peerList)); server.start(); servers.add(server); } } - private Collection removePeerFromList(RaftPeer peer, List peers) { - List otherPeers = new ArrayList<>(peers); + private Collection removePeerFromList(RaftPeer peer, List peerList) { + List otherPeers = new ArrayList<>(peerList); otherPeers.remove(peer); return otherPeers; } @@ -132,10 +132,10 @@ void runTestMockCluster(ClientId clientId, int bufferSize, int bufferNum, .stream(null, DataStreamTestUtils.getRoutingTableChainTopology(peers, getPrimaryServer().getPeer())); if (headerException != null) { final DataStreamReply headerReply = out.getHeaderFuture().join(); - Assert.assertFalse(headerReply.isSuccess()); + Assertions.assertFalse(headerReply.isSuccess()); final RaftClientReply clientReply = ClientProtoUtils.toRaftClientReply( ((DataStreamReplyByteBuffer)headerReply).slice()); - Assert.assertTrue(clientReply.getException().getMessage().contains(headerException.getMessage())); + Assertions.assertTrue(clientReply.getException().getMessage().contains(headerException.getMessage())); return; } @@ -143,11 +143,11 @@ void runTestMockCluster(ClientId clientId, int bufferSize, int bufferNum, CollectionUtils.as(servers, Server::getRaftServer), null, out, bufferSize, bufferNum, client.getId(), false).join(); if (expectedException != null) { - Assert.assertFalse(clientReply.isSuccess()); - Assert.assertTrue(clientReply.getException().getMessage().contains( + Assertions.assertFalse(clientReply.isSuccess()); + Assertions.assertTrue(clientReply.getException().getMessage().contains( expectedException.getMessage())); } else { - Assert.assertTrue(clientReply.isSuccess()); + Assertions.assertTrue(clientReply.isSuccess()); } } } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java index 352d98e650..dabc93dda2 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java @@ -36,8 +36,8 @@ import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.Timestamp; import org.apache.ratis.util.function.CheckedConsumer; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.io.File; import java.nio.channels.FileChannel; @@ -52,6 +52,7 @@ public abstract class DataStreamClusterTests ex implements MiniRaftCluster.Factory.Get { { setStateMachine(MultiDataStreamStateMachine.class); + DataStreamTestUtils.enableResourceLeakDetector(); } public static final int NUM_SERVERS = 3; @@ -115,9 +116,9 @@ void runTestInvalidPrimaryInRoutingTable(CLUSTER cluster) throws Exception { } } - Assert.assertNotNull( - "Cannot find peer other than the primary", notPrimary); - Assert.assertNotEquals(primaryServer, notPrimary); + Assertions.assertNotNull(notPrimary, + "Cannot find peer other than the primary"); + Assertions.assertNotEquals(primaryServer, notPrimary); try (RaftClient client = cluster.createClient(primaryServer)) { RoutingTable routingTableWithWrongPrimary = @@ -156,7 +157,7 @@ static CheckedConsumer transferToWritableByteCh public void accept(DataStreamOutputImpl out) throws Exception { try (FileChannel in = FileUtils.newFileChannel(f, StandardOpenOption.READ)) { final long transferred = in.transferTo(0, size, out.getWritableByteChannel()); - Assert.assertEquals(size, transferred); + Assertions.assertEquals(size, transferred); } } @@ -196,7 +197,7 @@ void assertLogEntry(CLUSTER cluster, RaftClientRequest request) throws Exception final RaftServer.Division impl = proxy.getDivision(cluster.getGroupId()); final MultiDataStreamStateMachine stateMachine = (MultiDataStreamStateMachine) impl.getStateMachine(); final SingleDataStream s = stateMachine.getSingleDataStream(request); - Assert.assertFalse(s.getDataChannel().isOpen()); + Assertions.assertFalse(s.getDataChannel().isOpen()); DataStreamTestUtils.assertLogEntry(impl, s); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java index 738cb0359c..989b6cd2b2 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java @@ -46,10 +46,13 @@ import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.io.netty.buffer.ByteBufUtil; +import org.apache.ratis.thirdparty.io.netty.util.ResourceLeakDetector; +import org.apache.ratis.thirdparty.io.netty.util.ResourceLeakDetector.Level; import org.apache.ratis.util.CollectionUtils; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.JavaUtils; -import org.junit.Assert; +import org.junit.jupiter.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -87,7 +90,7 @@ static ByteBuffer initBuffer(int offset, int size) { buffer.put(pos2byte(offset + j)); } buffer.flip(); - Assert.assertEquals(length, buffer.remaining()); + Assertions.assertEquals(length, buffer.remaining()); return buffer; } @@ -117,7 +120,7 @@ public int read(ByteBuffer dst) { FileUtils.createDirectories(f.getParentFile()); try(FileChannel out = FileUtils.newFileChannel(f, StandardOpenOption.CREATE, StandardOpenOption.WRITE)) { final long transferred = out.transferFrom(source, 0, size); - Assert.assertEquals(size, transferred); + Assertions.assertEquals(size, transferred); } } @@ -149,6 +152,7 @@ class MultiDataStreamStateMachine extends BaseStateMachine { @Override public CompletableFuture stream(RaftClientRequest request) { final SingleDataStream s = new SingleDataStream(request); + LOG.info("{} put {}, {}", this, ClientInvocationId.valueOf(request), s); streams.put(ClientInvocationId.valueOf(request), s); return CompletableFuture.completedFuture(s); } @@ -177,7 +181,9 @@ SingleDataStream getSingleDataStream(RaftClientRequest request) { } SingleDataStream getSingleDataStream(ClientInvocationId invocationId) { - return streams.get(invocationId); + final SingleDataStream s = streams.get(invocationId); + LOG.info("XXX {}: get {} return {}", this, invocationId, s); + return s; } Collection getStreams() { @@ -253,7 +259,7 @@ public int write(ByteBuffer src) { } final int remaining = src.remaining(); for (; src.remaining() > 0; ) { - Assert.assertEquals(pos2byte(bytesWritten), src.get()); + Assertions.assertEquals(pos2byte(bytesWritten), src.get()); bytesWritten += 1; } return remaining; @@ -302,9 +308,9 @@ static int writeAndAssertReplies(DataStreamOutputImpl out, int bufferSize, int b } static void assertSuccessReply(Type expectedType, long expectedBytesWritten, DataStreamReply reply) { - Assert.assertTrue(reply.isSuccess()); - Assert.assertEquals(expectedBytesWritten, reply.getBytesWritten()); - Assert.assertEquals(expectedType, reply.getType()); + Assertions.assertTrue(reply.isSuccess()); + Assertions.assertEquals(expectedBytesWritten, reply.getBytesWritten()); + Assertions.assertEquals(expectedType, reply.getType()); } static CompletableFuture writeAndCloseAndAssertReplies( @@ -327,27 +333,30 @@ static CompletableFuture writeAndCloseAndAssertReplies( static void assertHeader(RaftServer server, RaftClientRequest header, int dataSize, boolean stepDownLeader) throws Exception { + LOG.info("XXX {}: dataSize={}, stepDownLeader={}, header={}", + server.getId(), dataSize, stepDownLeader, header); // check header - Assert.assertEquals(RaftClientRequest.dataStreamRequestType(), header.getType()); + Assertions.assertEquals(RaftClientRequest.dataStreamRequestType(), header.getType()); // check stream - final MultiDataStreamStateMachine stateMachine = (MultiDataStreamStateMachine) server.getDivision(header.getRaftGroupId()).getStateMachine(); + final MultiDataStreamStateMachine stateMachine = + (MultiDataStreamStateMachine) server.getDivision(header.getRaftGroupId()).getStateMachine(); final SingleDataStream stream = stateMachine.getSingleDataStream(header); final MyDataChannel channel = stream.getDataChannel(); - Assert.assertEquals(dataSize, channel.getBytesWritten()); - Assert.assertEquals(dataSize, channel.getForcedPosition()); + Assertions.assertEquals(dataSize, channel.getBytesWritten()); + Assertions.assertEquals(dataSize, channel.getForcedPosition()); // check writeRequest final RaftClientRequest writeRequest = stream.getWriteRequest(); - Assert.assertEquals(RaftClientRequest.dataStreamRequestType(), writeRequest.getType()); + Assertions.assertEquals(RaftClientRequest.dataStreamRequestType(), writeRequest.getType()); assertRaftClientMessage(header, null, writeRequest, header.getClientId(), stepDownLeader); } static CompletableFuture assertCloseReply(DataStreamOutputImpl out, DataStreamReply dataStreamReply, long bytesWritten, RaftPeerId leader, ClientId clientId, boolean stepDownLeader) { // Test close idempotent - Assert.assertSame(dataStreamReply, out.closeAsync().join()); - Assert.assertEquals(dataStreamReply.getClientId(), clientId); + Assertions.assertSame(dataStreamReply, out.closeAsync().join()); + Assertions.assertEquals(dataStreamReply.getClientId(), clientId); BaseTest.testFailureCase("writeAsync should fail", () -> out.writeAsync(DataStreamRequestByteBuffer.EMPTY_BYTE_BUFFER).join(), CompletionException.class, (Logger) null, AlreadyClosedException.class); @@ -359,7 +368,7 @@ static CompletableFuture assertCloseReply(DataStreamOutputImpl if (reply.isSuccess()) { final ByteString bytes = reply.getMessage().getContent(); if (!bytes.equals(MOCK)) { - Assert.assertEquals(bytesWritten2ByteString(bytesWritten), bytes); + Assertions.assertEquals(bytesWritten2ByteString(bytesWritten), bytes); } } @@ -372,13 +381,13 @@ static CompletableFuture assertCloseReply(DataStreamOutputImpl static void assertRaftClientMessage( RaftClientMessage expected, RaftPeerId expectedServerId, RaftClientMessage computed, ClientId expectedClientId, boolean stepDownLeader) { - Assert.assertNotNull(computed); - Assert.assertEquals(expectedClientId, computed.getClientId()); + Assertions.assertNotNull(computed); + Assertions.assertEquals(expectedClientId, computed.getClientId()); if (!stepDownLeader) { - Assert.assertEquals( + Assertions.assertEquals( Optional.ofNullable(expectedServerId).orElseGet(expected::getServerId), computed.getServerId()); } - Assert.assertEquals(expected.getRaftGroupId(), computed.getRaftGroupId()); + Assertions.assertEquals(expected.getRaftGroupId(), computed.getRaftGroupId()); } static LogEntryProto searchLogEntry(ClientInvocationId invocationId, RaftLog log) throws Exception { @@ -394,12 +403,12 @@ static LogEntryProto searchLogEntry(ClientInvocationId invocationId, RaftLog log } static void assertLogEntry(LogEntryProto logEntry, RaftClientRequest request) { - Assert.assertNotNull(logEntry); - Assert.assertTrue(logEntry.hasStateMachineLogEntry()); + Assertions.assertNotNull(logEntry); + Assertions.assertTrue(logEntry.hasStateMachineLogEntry()); final StateMachineLogEntryProto s = logEntry.getStateMachineLogEntry(); - Assert.assertEquals(StateMachineLogEntryProto.Type.DATASTREAM, s.getType()); - Assert.assertEquals(request.getCallId(), s.getCallId()); - Assert.assertEquals(request.getClientId().toByteString(), s.getClientId()); + Assertions.assertEquals(StateMachineLogEntryProto.Type.DATASTREAM, s.getType()); + Assertions.assertEquals(request.getCallId(), s.getCallId()); + Assertions.assertEquals(request.getClientId().toByteString(), s.getClientId()); } static void assertLogEntry(RaftServer.Division division, SingleDataStream stream) throws Exception { @@ -408,6 +417,15 @@ static void assertLogEntry(RaftServer.Division division, SingleDataStream stream assertLogEntry(entryFromStream, request); final LogEntryProto entryFromLog = searchLogEntry(ClientInvocationId.valueOf(request), division.getRaftLog()); - Assert.assertEquals(entryFromStream, entryFromLog); + Assertions.assertEquals(entryFromStream, entryFromLog); + } + + ResourceLeakDetector.LeakListener LEAK_LISTENER = (resourceType, records) -> { + throw new IllegalStateException("Leak detected for resource type: " + resourceType + records); + }; + + static void enableResourceLeakDetector() { + ResourceLeakDetector.setLevel(Level.PARANOID); + ByteBufUtil.setLeakListener(DataStreamTestUtils.LEAK_LISTENER); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.java b/ratis-test/src/test/java/org/apache/ratis/datastream/MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.java index 3396ada9b3..82fab6477a 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.java @@ -17,7 +17,6 @@ */ package org.apache.ratis.datastream; -import org.apache.ratis.security.TlsConf; import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.RaftConfigKeys; import org.apache.ratis.conf.Parameters; @@ -31,7 +30,7 @@ /** * A {@link MiniRaftCluster} with {{@link SupportedRpcType#GRPC}} and {@link SupportedDataStreamType#NETTY}. */ -public class MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty extends MiniRaftClusterWithGrpc { +public final class MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty extends MiniRaftClusterWithGrpc { static class Factory extends MiniRaftCluster.Factory { private final Parameters parameters; @@ -50,15 +49,16 @@ public MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty newCluster(String[] public static final Factory FACTORY = new Factory(null); - public interface FactoryGet extends MiniRaftCluster.Factory.Get { + public interface FactoryGet + extends MiniRaftCluster.Factory.Get { @Override default MiniRaftCluster.Factory getFactory() { return FACTORY; } } - private MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty(String[] ids, String[] listenerIds, RaftProperties properties, - Parameters parameters) { + private MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty(String[] ids, String[] listenerIds, + RaftProperties properties, Parameters parameters) { super(ids, listenerIds, properties, parameters); } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty.java b/ratis-test/src/test/java/org/apache/ratis/datastream/MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty.java index 1e5149b431..9179fa0393 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty.java @@ -30,7 +30,7 @@ /** * A {@link MiniRaftCluster} with {{@link SupportedRpcType#NETTY}} and {@link SupportedDataStreamType#NETTY}. */ -public class MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty extends MiniRaftClusterWithNetty { +public final class MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty extends MiniRaftClusterWithNetty { public static final Factory FACTORY = new Factory() { @Override @@ -49,7 +49,8 @@ default Factory getFactor } } - private MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty(String[] ids, String[] listenerIds, RaftProperties properties) { + private MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty(String[] ids, String[] listenerIds, + RaftProperties properties) { super(ids, listenerIds, properties); } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/TestDataStreamDisabled.java b/ratis-test/src/test/java/org/apache/ratis/datastream/TestDataStreamDisabled.java index 168a1b02dc..613bb69752 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/TestDataStreamDisabled.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/TestDataStreamDisabled.java @@ -26,14 +26,15 @@ import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftPeer; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +@SuppressWarnings({"try"}) public class TestDataStreamDisabled extends BaseTest { @Test public void testDataStreamDisabled() throws Exception { final RaftProperties properties = new RaftProperties(); - Assert.assertEquals(SupportedDataStreamType.DISABLED, RaftConfigKeys.DataStream.type(properties, LOG::info)); + Assertions.assertEquals(SupportedDataStreamType.DISABLED, RaftConfigKeys.DataStream.type(properties, LOG::info)); final RaftPeer server = RaftPeer.newBuilder().setId("s0").build(); @@ -44,9 +45,9 @@ public void testDataStreamDisabled() throws Exception { .setProperties(properties) .build(); DataStreamOutput out = client.getDataStreamApi().stream()) { - Assert.fail("Unexpected object: " + out); + Assertions.fail("Unexpected object: " + out); } catch (UnsupportedOperationException e) { - Assert.assertTrue(e.getMessage().contains( + Assertions.assertTrue(e.getMessage().contains( DisabledDataStreamClientFactory.class.getName() + "$1 does not support streamAsync")); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/TestDataStreamSslWithRpcTypeGrpcAndDataStreamTypeNetty.java b/ratis-test/src/test/java/org/apache/ratis/datastream/TestDataStreamSslWithRpcTypeGrpcAndDataStreamTypeNetty.java index 8e423ab293..247a7bcac1 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/TestDataStreamSslWithRpcTypeGrpcAndDataStreamTypeNetty.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/TestDataStreamSslWithRpcTypeGrpcAndDataStreamTypeNetty.java @@ -24,7 +24,7 @@ import org.apache.ratis.security.TlsConf; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Ignore; +import org.junit.jupiter.api.Disabled; import org.slf4j.event.Level; import java.util.function.Supplier; @@ -36,12 +36,12 @@ public class TestDataStreamSslWithRpcTypeGrpcAndDataStreamTypeNetty } Parameters newParameters() { - final Parameters parameters = new Parameters(); + final Parameters parameters1 = new Parameters(); final TlsConf serverTlsConfig = SecurityTestUtils.newServerTlsConfig(true); - NettyConfigKeys.DataStream.Server.setTlsConf(parameters, serverTlsConfig); + NettyConfigKeys.DataStream.Server.setTlsConf(parameters1, serverTlsConfig); final TlsConf clientTlsConfig = SecurityTestUtils.newClientTlsConfig(true); - NettyConfigKeys.DataStream.Client.setTlsConf(parameters, clientTlsConfig); - return parameters; + NettyConfigKeys.DataStream.Client.setTlsConf(parameters1, clientTlsConfig); + return parameters1; } private final Supplier parameters = JavaUtils.memoize(this::newParameters); @@ -55,22 +55,22 @@ public MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.Factory getFactory() return new MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.Factory(getParameters()); } - @Ignore + @Disabled @Override public void testStreamWrites() { } - @Ignore + @Disabled @Override public void testStreamWithInvalidRoutingTable() { } - @Ignore + @Disabled @Override public void testMultipleStreamsMultipleServers() { } - @Ignore + @Disabled @Override public void testMultipleStreamsMultipleServersStepDownLeader() { } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamChainTopologyWithGrpcCluster.java b/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamChainTopologyWithGrpcCluster.java index 31b28b4c2d..778ee8225c 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamChainTopologyWithGrpcCluster.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamChainTopologyWithGrpcCluster.java @@ -22,13 +22,13 @@ import org.apache.ratis.netty.NettyConfigKeys; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; -import org.junit.Before; +import org.junit.jupiter.api.BeforeEach; public class TestNettyDataStreamChainTopologyWithGrpcCluster extends DataStreamAsyncClusterTests implements MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.FactoryGet { - @Before + @BeforeEach public void setup() { final RaftProperties p = getProperties(); RaftClientConfigKeys.DataStream.setRequestTimeout(p, TimeDuration.ONE_MINUTE); diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamStarTopologyWithGrpcCluster.java b/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamStarTopologyWithGrpcCluster.java index 45247d489a..bd80d6b6b5 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamStarTopologyWithGrpcCluster.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamStarTopologyWithGrpcCluster.java @@ -25,7 +25,7 @@ import org.apache.ratis.protocol.RoutingTable; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; -import org.junit.Before; +import org.junit.jupiter.api.BeforeEach; import java.util.Collection; import java.util.List; @@ -35,7 +35,7 @@ public class TestNettyDataStreamStarTopologyWithGrpcCluster extends DataStreamAsyncClusterTests implements MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.FactoryGet { - @Before + @BeforeEach public void setup() { final RaftProperties p = getProperties(); RaftClientConfigKeys.DataStream.setRequestTimeout(p, TimeDuration.ONE_MINUTE); diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamWithMock.java b/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamWithMock.java index 27a1ee102f..8193f1d984 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamWithMock.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamWithMock.java @@ -36,14 +36,14 @@ import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.util.NetUtils; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; import org.mockito.Mockito; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -@Ignore +@Disabled public class TestNettyDataStreamWithMock extends DataStreamBaseTest { static RaftPeer newRaftPeer(RaftServer server) { return RaftPeer.newBuilder() @@ -53,18 +53,24 @@ static RaftPeer newRaftPeer(RaftServer server) { .build(); } - @Before + @BeforeEach public void setup() { properties = new RaftProperties(); RaftConfigKeys.DataStream.setType(properties, SupportedDataStreamType.NETTY); } - RaftServer.Division mockDivision(RaftServer server) { + + RaftServer.Division mockDivision(RaftServer server, RaftGroupId groupId) { final RaftServer.Division division = mock(RaftServer.Division.class); when(division.getRaftServer()).thenReturn(server); when(division.getRaftConf()).thenAnswer(i -> getRaftConf()); final MultiDataStreamStateMachine stateMachine = new MultiDataStreamStateMachine(); + try { + stateMachine.initialize(server, groupId, null); + } catch (IOException e) { + throw new IllegalStateException(e); + } when(division.getStateMachine()).thenReturn(stateMachine); final DataStreamMap streamMap = RaftServerTestUtil.newDataStreamMap(server.getId()); @@ -95,7 +101,7 @@ private void testMockCluster(int numServers, RaftException leaderException, when(raftServer.getId()).thenReturn(peerId); when(raftServer.getPeer()).thenReturn(RaftPeer.newBuilder().setId(peerId).build()); if (getStateMachineException == null) { - final RaftServer.Division myDivision = mockDivision(raftServer); + final RaftServer.Division myDivision = mockDivision(raftServer, groupId); when(raftServer.getDivision(Mockito.any(RaftGroupId.class))).thenReturn(myDivision); } else { when(raftServer.getDivision(Mockito.any(RaftGroupId.class))).thenThrow(getStateMachineException); @@ -108,8 +114,8 @@ private void testMockCluster(int numServers, RaftException leaderException, submitException != null ? submitException : leaderException, getStateMachineException); } - void runTestMockCluster(RaftGroupId groupId, List raftServers, ClientId clientId, int bufferSize, int bufferNum, - Exception expectedException, Exception headerException) throws Exception { + void runTestMockCluster(RaftGroupId groupId, List raftServers, ClientId clientId, int bufferSize, + int bufferNum, Exception expectedException, Exception headerException) throws Exception { try { final List peers = raftServers.stream() .map(TestNettyDataStreamWithMock::newRaftPeer) diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamWithNettyCluster.java b/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamWithNettyCluster.java index 90af31425f..1a29d014e4 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamWithNettyCluster.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/TestNettyDataStreamWithNettyCluster.java @@ -17,9 +17,10 @@ */ package org.apache.ratis.datastream; -import org.junit.Ignore; -@Ignore("Ignored by runzhiwang, because NettyClientRpc does not support sendRequestAsync") +import org.junit.jupiter.api.Disabled; + +@Disabled("Ignored by runzhiwang, because NettyClientRpc does not support sendRequestAsync") public class TestNettyDataStreamWithNettyCluster extends DataStreamClusterTests implements MiniRaftClusterWithRpcTypeNettyAndDataStreamTypeNetty.FactoryGet { diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestCustomGrpcServices.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestCustomGrpcServices.java new file mode 100644 index 0000000000..7784e4d3dd --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestCustomGrpcServices.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +import org.apache.ratis.BaseTest; +import org.apache.ratis.RaftTestUtil; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.conf.Parameters; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.grpc.server.GrpcServices; +import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.server.RaftServerRpc; +import org.apache.ratis.test.proto.GreeterGrpc; +import org.apache.ratis.test.proto.HelloReply; +import org.apache.ratis.test.proto.HelloRequest; +import org.apache.ratis.thirdparty.io.grpc.StatusRuntimeException; +import org.apache.ratis.thirdparty.io.grpc.ManagedChannel; +import org.apache.ratis.thirdparty.io.grpc.ManagedChannelBuilder; +import org.apache.ratis.thirdparty.io.grpc.netty.NettyServerBuilder; +import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver; +import org.apache.ratis.util.IOUtils; +import org.apache.ratis.util.NetUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.Closeable; +import java.io.IOException; +import java.util.EnumSet; +import java.util.Objects; +import java.util.Queue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.TimeUnit; + +import static org.apache.ratis.RaftTestUtil.waitForLeader; + +public class TestCustomGrpcServices extends BaseTest { + /** Add two different greeter services for client and admin. */ + class MyCustomizer implements GrpcServices.Customizer { + final GreeterImpl clientGreeter = new GreeterImpl("Hello"); + final GreeterImpl adminGreeter = new GreeterImpl("Hi"); + + @Override + public NettyServerBuilder customize(NettyServerBuilder builder, EnumSet types) { + if (types.contains(GrpcServices.Type.CLIENT)) { + return builder.addService(clientGreeter); + } + if (types.contains(GrpcServices.Type.ADMIN)) { + return builder.addService(adminGreeter); + } + return builder; + } + } + + class GreeterImpl extends GreeterGrpc.GreeterImplBase { + private final String prefix; + + GreeterImpl(String prefix) { + this.prefix = prefix; + } + + String toReply(String request) { + return prefix + " " + request; + } + + @Override + public StreamObserver hello(StreamObserver responseObserver) { + return new StreamObserver() { + @Override + public void onNext(HelloRequest helloRequest) { + final String reply = toReply(helloRequest.getName()); + responseObserver.onNext(HelloReply.newBuilder().setMessage(reply).build()); + } + + @Override + public void onError(Throwable throwable) { + LOG.error("onError", throwable); + } + + @Override + public void onCompleted() { + responseObserver.onCompleted(); + } + }; + } + } + + class GreeterClient implements Closeable { + private final ManagedChannel channel; + private final StreamObserver requestHandler; + private final Queue> replies = new ConcurrentLinkedQueue<>(); + + GreeterClient(int port) { + this.channel = ManagedChannelBuilder.forAddress(NetUtils.LOCALHOST, port) + .usePlaintext() + .build(); + + final StreamObserver responseHandler = new StreamObserver() { + @Override + public void onNext(HelloReply helloReply) { + Objects.requireNonNull(replies.poll(), "queue is empty") + .complete(helloReply.getMessage()); + } + + @Override + public void onError(Throwable throwable) { + LOG.info("onError", throwable); + completeExceptionally(throwable); + } + + @Override + public void onCompleted() { + LOG.info("onCompleted"); + completeExceptionally(new IllegalStateException("onCompleted")); + } + + void completeExceptionally(Throwable throwable) { + replies.forEach(f -> f.completeExceptionally(throwable)); + replies.clear(); + } + }; + this.requestHandler = GreeterGrpc.newStub(channel).hello(responseHandler); + } + + @Override + public void close() throws IOException { + try { + /* After the request handler is cancelled, no more life-cycle hooks are allowed, + * see {@link org.apache.ratis.thirdparty.io.grpc.ClientCall.Listener#cancel(String, Throwable)} */ + // requestHandler.onCompleted(); + channel.shutdown().awaitTermination(5, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw IOUtils.toInterruptedIOException("Failed to close", e); + } + } + + CompletableFuture send(String name) { + LOG.info("send: {}", name); + final HelloRequest request = HelloRequest.newBuilder().setName(name).build(); + final CompletableFuture f = new CompletableFuture<>(); + try { + requestHandler.onNext(request); + replies.offer(f); + } catch (IllegalStateException e) { + // already closed + f.completeExceptionally(e); + } + return f.whenComplete((r, e) -> LOG.info("reply: {}", r)); + } + } + + @Test + public void testCustomServices() throws Exception { + final String[] ids = {"s0"}; + final RaftProperties properties = new RaftProperties(); + + final Parameters parameters = new Parameters(); + final MyCustomizer customizer = new MyCustomizer(); + GrpcConfigKeys.Server.setServicesCustomizer(parameters, customizer); + + try(MiniRaftClusterWithGrpc cluster = new MiniRaftClusterWithGrpc(ids, properties, parameters)) { + cluster.start(); + final RaftServerRpc server = waitForLeader(cluster).getRaftServer().getServerRpc(); + + // test Raft service + try (RaftClient client = cluster.createClient()) { + final RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("abc")); + Assertions.assertTrue(reply.isSuccess()); + } + + // test custom client service + final int clientPort = server.getClientServerAddress().getPort(); + try (GreeterClient client = new GreeterClient(clientPort)) { + sendAndAssertReply("world", client, customizer.clientGreeter); + } + + // test custom admin service + final int adminPort = server.getAdminServerAddress().getPort(); + try (GreeterClient admin = new GreeterClient(adminPort)) { + sendAndAssertReply("admin", admin, customizer.adminGreeter); + } + } + } + + static void sendAndAssertReply(String name, GreeterClient client, GreeterImpl greeter) { + final String computed = client.send(name).join(); + final String expected = greeter.toReply(name); + Assertions.assertEquals(expected, computed); + } + + @Test + public void testGetCallIdWithLargeValue() { + long largeCallId = (long) Integer.MAX_VALUE + 1L; + StatusRuntimeException ex = GrpcUtil.wrapException(new IOException("test"), largeCallId); + Assertions.assertEquals(largeCallId, GrpcUtil.getCallId(ex)); + } + + @Test + public void testGetCallIdWithMissingValue() { + StatusRuntimeException ex = GrpcUtil.wrapException(new IOException("test")); + Assertions.assertEquals(-1L, GrpcUtil.getCallId(ex)); + } + + @Test + public void testGetCallIdWithZeroValue() { + StatusRuntimeException ex = GrpcUtil.wrapException(new IOException("test"), 0L); + Assertions.assertEquals(-1L, GrpcUtil.getCallId(ex)); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestGrpcFactory.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestGrpcFactory.java index 76fbcee5eb..99a395d8a1 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestGrpcFactory.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestGrpcFactory.java @@ -18,16 +18,14 @@ package org.apache.ratis.grpc; import org.apache.ratis.BaseTest; -import org.apache.ratis.util.JavaUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; public class TestGrpcFactory extends BaseTest { @Test public void testUseCacheForAllThreads() { // trigger GrpcFactory static initializer final boolean value = GrpcFactory.checkPooledByteBufAllocatorUseCacheForAllThreads(LOG::info); - Assert.assertFalse(value); - LOG.info("value is {}", value); + Assertions.assertFalse(value); } } \ No newline at end of file diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderElectionWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderElectionWithGrpc.java index 7730cb1166..797124bc16 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderElectionWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderElectionWithGrpc.java @@ -19,7 +19,7 @@ import org.apache.ratis.server.impl.BlockRequestHandlingInjection; import org.apache.ratis.server.impl.LeaderElectionTests; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestLeaderElectionWithGrpc extends LeaderElectionTests @@ -30,7 +30,7 @@ public class TestLeaderElectionWithGrpc public void testEnforceLeader() throws Exception { super.testEnforceLeader(); - MiniRaftClusterWithGrpc.sendServerRequestInjection.clear(); + MiniRaftClusterWithGrpc.SEND_SERVER_REQUEST_INJECTION.clear(); BlockRequestHandlingInjection.getInstance().unblockAll(); } diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderInstallSnapshot.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderInstallSnapshotWithGrpc.java similarity index 58% rename from ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderInstallSnapshot.java rename to ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderInstallSnapshotWithGrpc.java index 5f7a40f0f4..9b2c903849 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderInstallSnapshot.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLeaderInstallSnapshotWithGrpc.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,23 +18,38 @@ package org.apache.ratis.grpc; import org.apache.ratis.InstallSnapshotFromLeaderTests; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import java.util.Arrays; import java.util.Collection; -@RunWith(Parameterized.class) -public class TestLeaderInstallSnapshot +public class TestLeaderInstallSnapshotWithGrpc extends InstallSnapshotFromLeaderTests implements MiniRaftClusterWithGrpc.FactoryGet { - public TestLeaderInstallSnapshot(Boolean separateHeartbeat) { + public static Collection data() { + return Arrays.asList((new Boolean[][] {{Boolean.FALSE}, {Boolean.TRUE}})); + } + + @ParameterizedTest + @MethodSource("data") + public void testMultiFileInstallSnapshot(Boolean separateHeartbeat) throws Exception { GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); + super.testMultiFileInstallSnapshot(); } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList((new Boolean[][] {{Boolean.FALSE}, {Boolean.TRUE}})); + @ParameterizedTest + @MethodSource("data") + public void testSeparateSnapshotInstallPath(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); + super.testSeparateSnapshotInstallPath(); + } + + @ParameterizedTest + @MethodSource("data") + public void testInstallSnapshotLeaderSwitch(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); + super.testInstallSnapshotLeaderSwitch(); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableLeaderLeaseReadWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableLeaderLeaseReadWithGrpc.java new file mode 100644 index 0000000000..120cce48cc --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableLeaderLeaseReadWithGrpc.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +import org.apache.ratis.LinearizableReadTests; +import org.apache.ratis.server.RaftServerConfigKeys.Read.ReadIndex.Type; + +public class TestLinearizableLeaderLeaseReadWithGrpc + extends LinearizableReadTests + implements MiniRaftClusterWithGrpc.FactoryGet { + + @Override + public boolean isLeaderLeaseEnabled() { + return true; + } + + @Override + public Type readIndexType() { + return Type.COMMIT_INDEX; + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadAppliedIndexLeaderLeaseReadWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadAppliedIndexLeaderLeaseReadWithGrpc.java new file mode 100644 index 0000000000..3705fb3ffc --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadAppliedIndexLeaderLeaseReadWithGrpc.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +import org.apache.ratis.server.RaftServerConfigKeys.Read.ReadIndex.Type; + +public class TestLinearizableReadAppliedIndexLeaderLeaseReadWithGrpc + extends TestLinearizableLeaderLeaseReadWithGrpc { + + @Override + public Type readIndexType() { + return Type.APPLIED_INDEX; + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadAppliedIndexWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadAppliedIndexWithGrpc.java new file mode 100644 index 0000000000..b119f32a6f --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadAppliedIndexWithGrpc.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +import org.apache.ratis.server.RaftServerConfigKeys.Read.ReadIndex.Type; + +public class TestLinearizableReadAppliedIndexWithGrpc + extends TestLinearizableReadWithGrpc { + + @Override + public Type readIndexType() { + return Type.APPLIED_INDEX; + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadRepliedIndexLeaderLeaseWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadRepliedIndexLeaderLeaseWithGrpc.java new file mode 100644 index 0000000000..bb50eafbfc --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadRepliedIndexLeaderLeaseWithGrpc.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +public class TestLinearizableReadRepliedIndexLeaderLeaseWithGrpc + extends TestLinearizableReadRepliedIndexWithGrpc { + + @Override + public boolean isLeaderLeaseEnabled() { + return true; + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadRepliedIndexWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadRepliedIndexWithGrpc.java new file mode 100644 index 0000000000..f08346fc02 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadRepliedIndexWithGrpc.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +import org.apache.ratis.RaftTestUtil; +import org.apache.ratis.ReadOnlyRequestTests.CounterStateMachine; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.server.RaftServerConfigKeys.Read.ReadIndex.Type; +import org.apache.ratis.server.impl.MiniRaftCluster; +import org.apache.ratis.server.impl.ReplyFlusher; +import org.apache.ratis.util.CodeInjectionForTesting; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +import static org.apache.ratis.ReadOnlyRequestTests.INCREMENT; +import static org.apache.ratis.ReadOnlyRequestTests.QUERY; +import static org.apache.ratis.ReadOnlyRequestTests.assertReplyExact; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestLinearizableReadRepliedIndexWithGrpc + extends TestLinearizableReadWithGrpc { + + @Override + public Type readIndexType() { + return Type.REPLIED_INDEX; + } + + @Test + @Override + public void testFollowerLinearizableReadParallel() throws Exception { + runWithNewCluster(TestLinearizableReadRepliedIndexWithGrpc::runTestFollowerReadOnlyParallelRepliedIndex); + } + + static void runTestFollowerReadOnlyParallelRepliedIndex(C cluster) + throws Exception { + final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); + final CounterStateMachine leaderStateMachine = (CounterStateMachine)leader.getStateMachine(); + + final List followers = cluster.getFollowers(); + Assertions.assertEquals(2, followers.size()); + final RaftPeerId f0 = followers.get(0).getId(); + final RaftPeerId f1 = followers.get(1).getId(); + + final BlockingCode blockingReplyFlusher = new BlockingCode(); + + try (RaftClient leaderClient = cluster.createClient(leader.getId()); + RaftClient f0Client = cluster.createClient(f0); + RaftClient f1Client = cluster.createClient(f1)) { + // Warm up the clients first before blocking the reply flusher + assertReplyExact(0, leaderClient.async().sendReadOnly(QUERY).get()); + assertReplyExact(0, f0Client.async().sendReadOnly(QUERY, f0).get()); + assertReplyExact(0, f1Client.async().sendReadOnly(QUERY, f1).get()); + + CodeInjectionForTesting.put(ReplyFlusher.FLUSH, blockingReplyFlusher); + + final int n = 10; + final List writeReplies = new ArrayList<>(n); + final List f0Replies = new ArrayList<>(n); + final List f1Replies = new ArrayList<>(n); + for (int i = 0; i < n; i++) { + final int count = i + 1; + writeReplies.add(new Reply(count, leaderClient.async().send(INCREMENT))); + + // Read reply returns immediately, but they all should return 0 since the repliedIndex has not been updated + // and write operations should not been applied by the followers + f0Replies.add(new Reply(0, f0Client.async().sendReadOnly(QUERY, f0))); + f1Replies.add(new Reply(0, f1Client.async().sendReadOnly(QUERY, f1))); + + // sleep in order to make sure + // (1) the count is incremented, and + // (2) the reads will wait for the repliedIndex. + Thread.sleep(100); + assertEquals(count, leaderStateMachine.getCount()); + } + + for (int i = 0; i < n; i++) { + // Write reply should not yet complete since ReplyFlusher remains blocked. + assertFalse(writeReplies.get(i).isDone(), "Received unexpected Write reply " + writeReplies.get(i)); + + // Follower reads should be immediately served, but the read value should return the value before the + // replyFlusher is blocked + assertTrue(f0Replies.get(i).isDone(), "Follower read should return immediately"); + f0Replies.get(i).assertExact(); + assertTrue(f1Replies.get(i).isDone(), "Follower read should return immediately"); + f1Replies.get(i).assertExact(); + } + + // unblock ReplyFlusher + blockingReplyFlusher.complete(); + assertReplyExact(n, f0Client.io().sendReadOnly(QUERY, f0)); + assertReplyExact(n, f1Client.io().sendReadOnly(QUERY, f0)); + + for (int i = 0; i < n; i++) { + //write reply should get the exact count at the write time + writeReplies.get(i).assertExact(); + } + } + } + + static class BlockingCode implements CodeInjectionForTesting.Code { + private final CompletableFuture future = new CompletableFuture<>(); + + void complete() { + future.complete(null); + } + + @Override + public boolean execute(Object localId, Object remoteId, Object... args) { + final boolean blocked = !future.isDone(); + if (blocked) { + LOG.info("{}: ReplyFlusher is blocked", localId, new Throwable()); + } + future.join(); + if (blocked) { + LOG.info("{}: ReplyFlusher is unblocked", localId); + } + return true; + } + } + + +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadWithGrpc.java new file mode 100644 index 0000000000..77593ff85e --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLinearizableReadWithGrpc.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +import org.apache.ratis.LinearizableReadTests; +import org.apache.ratis.server.RaftServerConfigKeys.Read.ReadIndex.Type; + +public class TestLinearizableReadWithGrpc + extends LinearizableReadTests + implements MiniRaftClusterWithGrpc.FactoryGet { + + @Override + public boolean isLeaderLeaseEnabled() { + return false; + } + + @Override + public Type readIndexType() { + return Type.COMMIT_INDEX; + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLogAppenderWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLogAppenderWithGrpc.java index c0d102f957..318ed5e6bc 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLogAppenderWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLogAppenderWithGrpc.java @@ -18,7 +18,10 @@ package org.apache.ratis.grpc; import org.apache.ratis.LogAppenderTests; +import org.apache.ratis.grpc.server.GrpcServicesImpl; import org.apache.ratis.proto.RaftProtos; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.RaftTestUtil; import org.apache.ratis.client.RaftClient; @@ -29,25 +32,29 @@ import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.leader.FollowerInfo; import org.apache.ratis.server.impl.RaftServerTestUtil; +import org.apache.ratis.server.leader.LogAppender; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; +import org.apache.ratis.util.CodeInjectionForTesting; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.event.Level; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import static org.apache.ratis.RaftTestUtil.waitForLeader; -@RunWith(Parameterized.class) public class TestLogAppenderWithGrpc extends LogAppenderTests implements MiniRaftClusterWithGrpc.FactoryGet { @@ -55,17 +62,14 @@ public class TestLogAppenderWithGrpc Slf4jUtils.setLogLevel(FollowerInfo.LOG, Level.DEBUG); } - public TestLogAppenderWithGrpc(Boolean separateHeartbeat) { - GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); - } - - @Parameterized.Parameters public static Collection data() { return Arrays.asList((new Boolean[][] {{Boolean.FALSE}, {Boolean.TRUE}})); } - @Test - public void testPendingLimits() throws IOException, InterruptedException { + @ParameterizedTest + @MethodSource("data") + public void testPendingLimits(Boolean separateHeartbeat) throws IOException, InterruptedException { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); int maxAppends = 10; RaftProperties properties = new RaftProperties(); properties.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, @@ -94,7 +98,7 @@ public void testPendingLimits() throws IOException, InterruptedException { JavaUtils.attempt(() -> { for (long nextIndex : leader.getInfo().getFollowerNextIndices()) { // Verify nextIndex does not progress due to pendingRequests limit - Assert.assertEquals(initialNextIndex + maxAppends, nextIndex); + Assertions.assertEquals(initialNextIndex + maxAppends, nextIndex); } }, 10, ONE_SECOND, "matching nextIndex", LOG); for (RaftServer.Division server : cluster.getFollowers()) { @@ -107,8 +111,10 @@ public void testPendingLimits() throws IOException, InterruptedException { } } - @Test - public void testRestartLogAppender() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testRestartLogAppender(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); runWithNewCluster(2, this::runTestRestartLogAppender); } @@ -120,7 +126,7 @@ private void runTestRestartLogAppender(MiniRaftClusterWithGrpc cluster) throws E try(RaftClient client = cluster.createClient(leader.getId())) { for(int i = 0; i < 10; i++) { final RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + ++messageCount)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -128,7 +134,7 @@ private void runTestRestartLogAppender(MiniRaftClusterWithGrpc cluster) throws E final GrpcServerMetrics leaderMetrics = new GrpcServerMetrics(leader.getMemberId().toString()); final String counter = String.format(GrpcServerMetrics.RATIS_GRPC_METRICS_LOG_APPENDER_INCONSISTENCY, cluster.getFollowers().iterator().next().getMemberId().getPeerId()); - Assert.assertEquals(0L, leaderMetrics.getRegistry().counter(counter).getCount()); + Assertions.assertEquals(0L, leaderMetrics.getRegistry().counter(counter).getCount()); // restart LogAppender RaftServerTestUtil.restartLogAppenders(leader); @@ -137,7 +143,7 @@ private void runTestRestartLogAppender(MiniRaftClusterWithGrpc cluster) throws E try(RaftClient client = cluster.createClient(leader.getId())) { for(int i = 0; i < 10; i++) { final RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + ++messageCount)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } @@ -148,7 +154,61 @@ private void runTestRestartLogAppender(MiniRaftClusterWithGrpc cluster) throws E // assert INCONSISTENCY counter >= 1 // If old LogAppender die before new LogAppender start, INCONSISTENCY equal to 1, // else INCONSISTENCY greater than 1 - Assert.assertTrue(newleaderMetrics.getRegistry().counter(counter).getCount() >= 1L); + Assertions.assertTrue(newleaderMetrics.getRegistry().counter(counter).getCount() >= 1L); + } + } + + @Test + public void testLogAppenderAutoRestartOnException() throws Exception { + runWithNewCluster(3, this::runTestAutoRestartOnException); + } + + private void runTestAutoRestartOnException(MiniRaftClusterWithGrpc cluster) throws Exception { + final RaftServer.Division leader = waitForLeader(cluster); + final RaftPeerId leaderId = leader.getId(); + + try (RaftClient client = cluster.createClient(leaderId)) { + for (int i = 0; i < 5; i++) { + Assertions.assertTrue(client.io().send(new RaftTestUtil.SimpleMessage("init-" + i)).isSuccess()); + } + } + + final Set before = RaftServerTestUtil.getLogAppenders(leader).collect(Collectors.toSet()); + Assertions.assertEquals(2, before.size()); + + // Inject a one-time IllegalStateException into the leader's AppendEntries send path. + // This causes the LogAppenderDaemon to enter EXCEPTION state and call restart(). + final RaftGroupId groupId = cluster.getGroupId(); + final AtomicInteger failCount = new AtomicInteger(0); + try { + CodeInjectionForTesting.put(GrpcServicesImpl.GRPC_SEND_SERVER_REQUEST, (localId, remoteId, args) -> { + if (leaderId.equals(localId) + && args.length > 0 && args[0] instanceof RaftProtos.AppendEntriesRequestProto) { + final RaftProtos.AppendEntriesRequestProto proto = (RaftProtos.AppendEntriesRequestProto) args[0]; + if (RaftGroupId.valueOf(proto.getServerRequest().getRaftGroupId().getId()).equals(groupId) + && failCount.getAndIncrement() < 1) { + throw new IllegalStateException("Injected failure for restart test"); + } + } + return false; + }); + + JavaUtils.attempt(() -> { + final Set current = RaftServerTestUtil.getLogAppenders(leader) + .collect(Collectors.toSet()); + Assertions.assertEquals(2, current.size()); + Assertions.assertTrue(current.stream().anyMatch(a -> !before.contains(a)), + "Expected at least one new LogAppender instance after daemon exception restart"); + }, 30, ONE_SECOND, "LogAppender auto-restart after exception", LOG); + } finally { + CodeInjectionForTesting.remove(GrpcServicesImpl.GRPC_SEND_SERVER_REQUEST); + } + + try (RaftClient client = cluster.createClient(leaderId)) { + for (int i = 0; i < 5; i++) { + Assertions.assertTrue( + client.io().send(new RaftTestUtil.SimpleMessage("after-restart-" + i)).isSuccess()); + } } } } diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestLogMetadataTestsWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLogMetadataTestsWithGrpc.java new file mode 100644 index 0000000000..1c69c42bbb --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestLogMetadataTestsWithGrpc.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.ratis.grpc; + +import org.apache.ratis.server.impl.LogMetadataTests; + +public class TestLogMetadataTestsWithGrpc extends LogMetadataTests + implements MiniRaftClusterWithGrpc.FactoryGet{ + +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftAsyncWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftAsyncWithGrpc.java index a12c52f100..9f9cda701c 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftAsyncWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftAsyncWithGrpc.java @@ -18,7 +18,9 @@ package org.apache.ratis.grpc; import org.apache.ratis.RaftAsyncTests; +import org.junit.jupiter.api.Timeout; +@Timeout(100) public class TestRaftAsyncWithGrpc extends RaftAsyncTests implements MiniRaftClusterWithGrpc.FactoryGet { } \ No newline at end of file diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftAsyncWithGrpcTracing.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftAsyncWithGrpcTracing.java new file mode 100644 index 0000000000..d7c619cc95 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftAsyncWithGrpcTracing.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.sdk.testing.junit5.OpenTelemetryExtension; +import io.opentelemetry.sdk.trace.data.SpanData; +import org.apache.ratis.RaftAsyncTests; +import org.apache.ratis.trace.TraceConfigKeys; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.util.List; + +@Timeout(100) +public class TestRaftAsyncWithGrpcTracing extends RaftAsyncTests + implements MiniRaftClusterWithGrpc.FactoryGet { + { + TraceConfigKeys.setEnabled(getProperties(), true); + } + + @RegisterExtension + private static final OpenTelemetryExtension openTelemetryExtension = + OpenTelemetryExtension.create(); + + /** + * Verifies traceAsyncRpcSend creates CLIENT spans when tracing is enabled. + * testBasicAppendEntriesAsync uses client.async().send() which goes through AsyncImpl.send(). + */ + @Test + public void testBasicAppendEntriesAsync() throws Exception { + super.testBasicAppendEntriesAsync(); + List spans = openTelemetryExtension.getSpans(); + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.CLIENT), + "Expected at least one span with SpanKind.CLIENT (from traceAsyncRpcSend)" + ); + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.SERVER), + "Expected at least one span with SpanKind.SERVER" + ); + } + + @Test + public void testWithLoadAsync() throws Exception { + super.testWithLoadAsync(); + List spans = openTelemetryExtension.getSpans(); + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.CLIENT), + "Expected at least one span with SpanKind.CLIENT (from traceAsyncRpcSend)" + ); + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.SERVER), + "Expected at least one span with SpanKind.SERVER" + ); + } +} \ No newline at end of file diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftOutputStreamWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftOutputStreamWithGrpc.java index fb35d958ab..2f1ef3f124 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftOutputStreamWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftOutputStreamWithGrpc.java @@ -18,12 +18,10 @@ package org.apache.ratis.grpc; import org.apache.ratis.OutputStreamBaseTest; +import org.junit.jupiter.api.Timeout; +@Timeout(value = 100) public class TestRaftOutputStreamWithGrpc extends OutputStreamBaseTest implements MiniRaftClusterWithGrpc.FactoryGet { - @Override - public int getGlobalTimeoutSeconds() { - return 100; - } } diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftServerWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftServerWithGrpc.java index 0af1d87cce..b5247cf63d 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftServerWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftServerWithGrpc.java @@ -26,7 +26,6 @@ import org.apache.ratis.protocol.ClientInvocationId; import org.apache.ratis.server.RetryCache; import org.apache.ratis.util.JavaUtils; -import org.slf4j.event.Level; import org.apache.ratis.conf.Parameters; import org.apache.ratis.security.SecurityTestUtils; import org.apache.ratis.server.storage.RaftStorage; @@ -62,40 +61,41 @@ import org.apache.ratis.util.ProtoUtils; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.event.Level; import javax.net.ssl.KeyManager; import javax.net.ssl.TrustManager; import java.io.IOException; +import java.io.InterruptedIOException; import java.nio.channels.OverlappingFileLockException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; -@RunWith(Parameterized.class) public class TestRaftServerWithGrpc extends BaseTest implements MiniRaftClusterWithGrpc.FactoryGet { { Slf4jUtils.setLogLevel(GrpcClientProtocolClient.LOG, Level.TRACE); } - public TestRaftServerWithGrpc(Boolean separateHeartbeat) { - GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); - } - - @Parameterized.Parameters public static Collection data() { return Arrays.asList((new Boolean[][] {{Boolean.FALSE}, {Boolean.TRUE}})); } - @Before + @BeforeEach public void setup() { final RaftProperties p = getProperties(); p.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, SimpleStateMachine4Testing.class, StateMachine.class); @@ -103,7 +103,42 @@ public void setup() { } @Test - public void testServerRestartOnException() throws Exception { + @Timeout(value = 30, unit = TimeUnit.SECONDS) + public void testAsyncRequestExceedsMaxMessageSize() throws Exception { + final RaftProperties properties = getProperties(); + final SizeInBytes originalMessageSize = GrpcConfigKeys.messageSizeMax(properties, s -> {}); + final SizeInBytes originalBufferLimit = RaftServerConfigKeys.Log.Appender.bufferByteLimit(properties); + final boolean originalSendDummyRequest = + RaftClientConfigKeys.Async.Experimental.sendDummyRequest(properties); + + RaftServerConfigKeys.Log.Appender.setBufferByteLimit(properties, SizeInBytes.valueOf("16KB")); + final SizeInBytes testMessageSizeMax = SizeInBytes.valueOf("1040KB"); + GrpcConfigKeys.setMessageSizeMax(properties, testMessageSizeMax); + RaftClientConfigKeys.Async.Experimental.setSendDummyRequest(properties, false); + + try { + runWithNewCluster(1, cluster -> { + try (RaftClient client = cluster.createClient(RetryPolicies.noRetry())) { + final int oversizedKb = 1200; + final byte[] bytes = new byte[oversizedKb * 1024]; // > 1040KB + final SimpleMessage message = new SimpleMessage("oversized", ByteString.copyFrom(bytes)); + + testFailureCaseAsync("async oversized request", + () -> client.async().send(message), + IllegalArgumentException.class); + } + }); + } finally { + GrpcConfigKeys.setMessageSizeMax(properties, originalMessageSize); + RaftServerConfigKeys.Log.Appender.setBufferByteLimit(properties, originalBufferLimit); + RaftClientConfigKeys.Async.Experimental.setSendDummyRequest(properties, originalSendDummyRequest); + } + } + + @ParameterizedTest + @MethodSource("data") + public void testServerRestartOnException(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); runWithNewCluster(1, this::runTestServerRestartOnException); } @@ -149,8 +184,10 @@ void runTestServerRestartOnException(MiniRaftClusterWithGrpc cluster) throws Exc cluster.getServerFactory(leaderId).newRaftServerRpc(cluster.getServer(leaderId)); } - @Test - public void testUnsupportedMethods() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testUnsupportedMethods(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); runWithNewCluster(1, this::runTestUnsupportedMethods); } @@ -167,8 +204,10 @@ void runTestUnsupportedMethods(MiniRaftClusterWithGrpc cluster) throws Exception UnsupportedOperationException.class); } - @Test - public void testLeaderRestart() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testLeaderRestart(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); runWithNewCluster(3, this::runTestLeaderRestart); } @@ -178,7 +217,7 @@ void runTestLeaderRestart(MiniRaftClusterWithGrpc cluster) throws Exception { try (final RaftClient client = cluster.createClient()) { // send a request to make sure leader is ready final CompletableFuture f = client.async().send(new SimpleMessage("testing")); - Assert.assertTrue(f.get().isSuccess()); + Assertions.assertTrue(f.get().isSuccess()); } try (final RaftClient client = cluster.createClient()) { @@ -189,14 +228,14 @@ void runTestLeaderRestart(MiniRaftClusterWithGrpc cluster) throws Exception { { // send a request using rpc directly final RaftClientRequest request = newRaftClientRequest(client, seqNum.incrementAndGet()); - Assert.assertEquals(client.getId(), request.getClientId()); + Assertions.assertEquals(client.getId(), request.getClientId()); final CompletableFuture f = rpc.sendRequestAsync(request); final RaftClientReply reply = f.get(); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); RaftClientTestUtil.handleReply(request, reply, client); invocationId = ClientInvocationId.valueOf(request.getClientId(), request.getCallId()); final RetryCache.Entry entry = leader.getRetryCache().getIfPresent(invocationId); - Assert.assertNotNull(entry); + Assertions.assertNotNull(entry); LOG.info("cache entry {}", entry); } @@ -206,13 +245,13 @@ void runTestLeaderRestart(MiniRaftClusterWithGrpc cluster) throws Exception { final RaftClientRequest requestBlocked = newRaftClientRequest(client, seqNum.incrementAndGet()); final CompletableFuture futureBlocked = rpc.sendRequestAsync(requestBlocked); - JavaUtils.attempt(() -> Assert.assertNull(leader.getRetryCache().getIfPresent(invocationId)), + JavaUtils.attempt(() -> Assertions.assertNull(leader.getRetryCache().getIfPresent(invocationId)), 10, HUNDRED_MILLIS, "invalidate cache entry", LOG); LOG.info("cache entry not found for {}", invocationId); // change leader RaftTestUtil.changeLeader(cluster, leader.getId()); - Assert.assertNotEquals(RaftPeerRole.LEADER, leader.getInfo().getCurrentRole()); + Assertions.assertNotEquals(RaftPeerRole.LEADER, leader.getInfo().getCurrentRole()); // the blocked request should fail testFailureCase("request should fail", futureBlocked::get, @@ -229,13 +268,69 @@ void runTestLeaderRestart(MiniRaftClusterWithGrpc cluster) throws Exception { } - @Test - public void testRaftClientMetrics() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testRaftClientMetrics(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); runWithNewCluster(3, this::testRaftClientRequestMetrics); } - @Test - public void testRaftServerMetrics() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testGrpcClientRpcSyncTimeout(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); + runWithNewCluster(3, cluster -> { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); + try (RaftClient client = cluster.createClient(leaderId, RetryPolicies.noRetry())) { + final SimpleStateMachine4Testing stateMachine = SimpleStateMachine4Testing.get(cluster.getLeader()); + stateMachine.blockStartTransaction(); + try { + Assertions.assertThrows(TimeoutIOException.class, + () -> client.io().send(new SimpleMessage("sync-timeout"))); + } finally { + stateMachine.unblockStartTransaction(); + } + } + }); + } + + @ParameterizedTest + @MethodSource("data") + public void testGrpcClientRpcSyncCancelOnInterrupt(Boolean separateHeartbeat) throws Exception { + RaftClientConfigKeys.Rpc.setRequestTimeout(getProperties(), TimeDuration.valueOf(10, TimeUnit.SECONDS)); + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); + runWithNewCluster(3, cluster -> { + final RaftPeerId leaderId = RaftTestUtil.waitForLeader(cluster).getId(); + try (RaftClient client = cluster.createClient(leaderId, RetryPolicies.noRetry())) { + final SimpleStateMachine4Testing stateMachine = SimpleStateMachine4Testing.get(cluster.getLeader()); + stateMachine.blockStartTransaction(); + try { + final AtomicReference error = new AtomicReference<>(); + final Thread t = new Thread(() -> { + try { + client.io().send(new SimpleMessage("sync-cancel")); + } catch (Throwable e) { + error.set(e); + } + }); + t.start(); + Thread.sleep(200); + t.interrupt(); + t.join(5000); + Assertions.assertFalse(t.isAlive(), "request thread should exit after interrupt"); + Assertions.assertTrue(error.get() instanceof InterruptedIOException, + "expected InterruptedIOException but got " + error.get()); + } finally { + stateMachine.unblockStartTransaction(); + } + } + }); + } + + @ParameterizedTest + @MethodSource("data") + public void testRaftServerMetrics(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); final RaftProperties p = getProperties(); RaftServerConfigKeys.Write.setElementLimit(p, 10); RaftServerConfigKeys.Write.setByteLimit(p, SizeInBytes.valueOf("1MB")); @@ -252,7 +347,7 @@ void testRequestMetrics(MiniRaftClusterWithGrpc cluster) throws Exception { try (RaftClient client = cluster.createClient()) { // send a request to make sure leader is ready final CompletableFuture< RaftClientReply > f = client.async().send(new SimpleMessage("testing")); - Assert.assertTrue(f.get().isSuccess()); + Assertions.assertTrue(f.get().isSuccess()); } SimpleStateMachine4Testing stateMachine = SimpleStateMachine4Testing.get(cluster.getLeader()); @@ -285,14 +380,14 @@ void testRequestMetrics(MiniRaftClusterWithGrpc cluster) throws Exception { client = cluster.createClient(cluster.getLeader().getId(), RetryPolicies.noRetry()); final SizeInBytes size = SizeInBytes.valueOf("1025kb"); final ByteString bytes = randomByteString(size.getSizeInt()); - Assert.assertEquals(size.getSizeInt(), bytes.size()); + Assertions.assertEquals(size.getSizeInt(), bytes.size()); client.async().send(new SimpleMessage(size + "-message", bytes)); clients.add(client); RaftTestUtil.waitFor(() -> getRaftServerMetrics(cluster.getLeader()) .getNumRequestsByteSizeLimitHits().getCount() == 1, 300, 5000); - Assert.assertEquals(2, getRaftServerMetrics(cluster.getLeader()) + Assertions.assertEquals(2, getRaftServerMetrics(cluster.getLeader()) .getNumResourceLimitHits().getCount()); } finally { for (RaftClient client : clients) { @@ -300,7 +395,7 @@ void testRequestMetrics(MiniRaftClusterWithGrpc cluster) throws Exception { } } } - + static ByteString randomByteString(int size) { final ByteString.Output out = ByteString.newOutput(size); final ThreadLocalRandom random = ThreadLocalRandom.current(); @@ -326,36 +421,36 @@ void testRaftClientRequestMetrics(MiniRaftClusterWithGrpc cluster) throws IOExce try (final RaftClient client = cluster.createClient()) { final CompletableFuture f1 = client.async().send(new SimpleMessage("testing")); - Assert.assertTrue(f1.get().isSuccess()); + Assertions.assertTrue(f1.get().isSuccess()); final DefaultTimekeeperImpl write = (DefaultTimekeeperImpl) registry.timer(RAFT_CLIENT_WRITE_REQUEST); - JavaUtils.attempt(() -> Assert.assertTrue(write.getTimer().getCount() > 0), + JavaUtils.attempt(() -> Assertions.assertTrue(write.getTimer().getCount() > 0), 3, TimeDuration.ONE_SECOND, "writeTimer metrics", LOG); final CompletableFuture f2 = client.async().sendReadOnly(new SimpleMessage("testing")); - Assert.assertTrue(f2.get().isSuccess()); + Assertions.assertTrue(f2.get().isSuccess()); final DefaultTimekeeperImpl read = (DefaultTimekeeperImpl) registry.timer(RAFT_CLIENT_READ_REQUEST); - JavaUtils.attempt(() -> Assert.assertTrue(read.getTimer().getCount() > 0), + JavaUtils.attempt(() -> Assertions.assertTrue(read.getTimer().getCount() > 0), 3, TimeDuration.ONE_SECOND, "readTimer metrics", LOG); final CompletableFuture f3 = client.async().sendStaleRead(new SimpleMessage("testing"), 0, leader.getId()); - Assert.assertTrue(f3.get().isSuccess()); + Assertions.assertTrue(f3.get().isSuccess()); final DefaultTimekeeperImpl staleRead = (DefaultTimekeeperImpl) registry.timer(RAFT_CLIENT_STALE_READ_REQUEST); - JavaUtils.attempt(() -> Assert.assertTrue(staleRead.getTimer().getCount() > 0), + JavaUtils.attempt(() -> Assertions.assertTrue(staleRead.getTimer().getCount() > 0), 3, TimeDuration.ONE_SECOND, "staleReadTimer metrics", LOG); final CompletableFuture f4 = client.async().watch(0, RaftProtos.ReplicationLevel.ALL); - Assert.assertTrue(f4.get().isSuccess()); + Assertions.assertTrue(f4.get().isSuccess()); final DefaultTimekeeperImpl watchAll = (DefaultTimekeeperImpl) registry.timer( String.format(RAFT_CLIENT_WATCH_REQUEST, "-ALL")); - JavaUtils.attempt(() -> Assert.assertTrue(watchAll.getTimer().getCount() > 0), + JavaUtils.attempt(() -> Assertions.assertTrue(watchAll.getTimer().getCount() > 0), 3, TimeDuration.ONE_SECOND, "watchAllTimer metrics", LOG); final CompletableFuture f5 = client.async().watch(0, RaftProtos.ReplicationLevel.MAJORITY); - Assert.assertTrue(f5.get().isSuccess()); + Assertions.assertTrue(f5.get().isSuccess()); final DefaultTimekeeperImpl watch = (DefaultTimekeeperImpl) registry.timer( String.format(RAFT_CLIENT_WATCH_REQUEST, "")); - JavaUtils.attempt(() -> Assert.assertTrue(watch.getTimer().getCount() > 0), + JavaUtils.attempt(() -> Assertions.assertTrue(watch.getTimer().getCount() > 0), 3, TimeDuration.ONE_SECOND, "watchTimer metrics", LOG); } } @@ -366,8 +461,10 @@ static RaftClientRequest newRaftClientRequest(RaftClient client, long seqNum) { RaftClientRequest.writeRequestType(), ProtoUtils.toSlidingWindowEntry(seqNum, seqNum == 1L)); } - @Test - public void testTlsWithKeyAndTrustManager() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testTlsWithKeyAndTrustManager(Boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); final RaftProperties p = getProperties(); RaftServerConfigKeys.Write.setElementLimit(p, 10); RaftServerConfigKeys.Write.setByteLimit(p, SizeInBytes.valueOf("1MB")); diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java index 2d8524f26c..7c94fb3bf5 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftSnapshotWithGrpc.java @@ -20,29 +20,25 @@ import java.util.Optional; import org.apache.ratis.metrics.LongCounter; -import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.metrics.MetricRegistries; import org.apache.ratis.metrics.MetricRegistryInfo; import org.apache.ratis.metrics.RatisMetricRegistry; import org.apache.ratis.server.RaftServer; import org.apache.ratis.statemachine.RaftSnapshotBaseTest; -import org.junit.Assert; - -public class TestRaftSnapshotWithGrpc extends RaftSnapshotBaseTest { - @Override - public MiniRaftCluster.Factory getFactory() { - return MiniRaftClusterWithGrpc.FACTORY; - } +import org.junit.jupiter.api.Assertions; +public class TestRaftSnapshotWithGrpc + extends RaftSnapshotBaseTest + implements MiniRaftClusterWithGrpc.FactoryGet { @Override protected void verifyInstallSnapshotMetric(RaftServer.Division leader) { MetricRegistryInfo info = new MetricRegistryInfo(leader.getMemberId().toString(), "ratis_grpc", "log_appender", "Metrics for Ratis Grpc Log Appender"); Optional metricRegistry = MetricRegistries.global().get(info); - Assert.assertTrue(metricRegistry.isPresent()); + Assertions.assertTrue(metricRegistry.isPresent()); final LongCounter installSnapshotCounter = metricRegistry.get().counter("num_install_snapshot"); - Assert.assertNotNull(installSnapshotCounter); - Assert.assertTrue(installSnapshotCounter.getCount() >= 1); + Assertions.assertNotNull(installSnapshotCounter); + Assertions.assertTrue(installSnapshotCounter.getCount() >= 1); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftWithGrpc.java index 046453d582..42211cefc1 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRaftWithGrpc.java @@ -28,21 +28,20 @@ import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; +import org.apache.ratis.test.tag.Flaky; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Timeout; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; -import java.util.Arrays; -import java.util.Collection; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import static org.apache.ratis.RaftTestUtil.waitForLeader; -@RunWith(Parameterized.class) public class TestRaftWithGrpc extends RaftBasicTests implements MiniRaftClusterWithGrpc.FactoryGet { @@ -52,29 +51,33 @@ public class TestRaftWithGrpc SimpleStateMachine4Testing.class, StateMachine.class); } - public TestRaftWithGrpc(Boolean separateHeartbeat) { - GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); - } - - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList((new Boolean[][] {{Boolean.FALSE}, {Boolean.TRUE}})); + @Disabled + @Override + public void testWithLoad() { + // skip testWithLoad() from parent, called from parameterized testWithLoad(boolean) } - @Override - @Test - public void testWithLoad() throws Exception { + @Flaky("RATIS-2253") + @Timeout(300) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testWithLoad(boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); super.testWithLoad(); BlockRequestHandlingInjection.getInstance().unblockAll(); } - @Test - public void testRequestTimeout() throws Exception { + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testRequestTimeout(boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); runWithNewCluster(NUM_SERVERS, cluster -> testRequestTimeout(false, cluster, LOG)); } - @Test - public void testUpdateViaHeartbeat() throws Exception { + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testUpdateViaHeartbeat(boolean separateHeartbeat) throws Exception { + GrpcConfigKeys.Server.setHeartbeatChannel(getProperties(), separateHeartbeat); runWithNewCluster(NUM_SERVERS, this::runTestUpdateViaHeartbeat); } @@ -91,7 +94,7 @@ void runTestUpdateViaHeartbeat(MiniRaftClusterWithGrpc cluster) throws Exception replyFuture = client.async().send(new RaftTestUtil.SimpleMessage("abc")); TimeDuration.valueOf(5 , TimeUnit.SECONDS).sleep(); // replyFuture should not be completed until append request is unblocked. - Assert.assertFalse(replyFuture.isDone()); + Assertions.assertFalse(replyFuture.isDone()); // unblock append request. cluster.getServerAliveStream() .filter(impl -> !impl.getInfo().isLeader()) @@ -107,9 +110,9 @@ void runTestUpdateViaHeartbeat(MiniRaftClusterWithGrpc cluster) throws Exception final LogEntryHeader[] leaderEntries = leaderLog.getEntries(0, Long.MAX_VALUE); final RaftLog followerLog = raftServer.getRaftLog(); - Assert.assertEquals(leaderNextIndex, followerLog.getNextIndex()); + Assertions.assertEquals(leaderNextIndex, followerLog.getNextIndex()); final LogEntryHeader[] serverEntries = followerLog.getEntries(0, Long.MAX_VALUE); - Assert.assertArrayEquals(serverEntries, leaderEntries); + Assertions.assertArrayEquals(serverEntries, leaderEntries); }, 10, HUNDRED_MILLIS, "assertRaftLog-" + raftServer.getId(), LOG))); // Wait for heartbeats from leader to be received by followers @@ -119,8 +122,8 @@ void runTestUpdateViaHeartbeat(MiniRaftClusterWithGrpc cluster) throws Exception final long leaderNextIndex = leaderLog.getNextIndex(); // FollowerInfo in the leader state should have updated next and match index. final long followerMatchIndex = logAppender.getFollower().getMatchIndex(); - Assert.assertTrue(followerMatchIndex >= leaderNextIndex - 1); - Assert.assertEquals(followerMatchIndex + 1, logAppender.getFollower().getNextIndex()); + Assertions.assertTrue(followerMatchIndex >= leaderNextIndex - 1); + Assertions.assertEquals(followerMatchIndex + 1, logAppender.getFollower().getNextIndex()); }, 10, HUNDRED_MILLIS, "assertRaftLog-" + logAppender.getFollower(), LOG))); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRetryCacheWithGrpc.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRetryCacheWithGrpc.java index a39a4d1ef4..5a704689da 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRetryCacheWithGrpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRetryCacheWithGrpc.java @@ -37,8 +37,9 @@ import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.event.Level; import java.io.IOException; @@ -66,7 +67,7 @@ public void testInvalidateRepliedCalls() throws Exception { } static long assertReply(RaftClientReply reply) { - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); return reply.getCallId(); } @@ -88,9 +89,10 @@ void assertRetryCacheEntry(RaftClient client, long callId, boolean exist) throws assertRetryCacheEntry(client, callId, exist, false); } - void assertRetryCacheEntry(RaftClient client, long callId, boolean exist, boolean eventually) throws InterruptedException { + void assertRetryCacheEntry(RaftClient client, long callId, boolean exist, boolean eventually) + throws InterruptedException { Supplier lookup = () -> RetryCacheTestUtil.get(leader, client.getId(), callId); - Consumer assertion = exist ? Assert::assertNotNull : Assert::assertNull; + Consumer assertion = exist ? Assertions::assertNotNull : Assertions::assertNull; if (eventually) { JavaUtils.attempt(() -> assertion.accept(lookup.get()), 100, TimeDuration.ONE_MILLISECOND, "retry cache entry", null); @@ -144,7 +146,7 @@ void run() throws Exception { ONE_SECOND.sleep(); // No calls can be completed. for (CompletableFuture f : asyncCalls) { - Assert.assertFalse(f.isDone()); + Assertions.assertFalse(f.isDone()); } stateMachine.unblockApplyTransaction(); // No calls can be invalidated. @@ -170,7 +172,8 @@ void run() throws Exception { } } - @Test(timeout = 10000) + @Test + @Timeout(value = 10) public void testRetryOnResourceUnavailableException() throws InterruptedException, IOException { RaftProperties properties = new RaftProperties(); diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestRetryCacheWithGrpcTracing.java b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRetryCacheWithGrpcTracing.java new file mode 100644 index 0000000000..722eae2cc5 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/TestRetryCacheWithGrpcTracing.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.grpc; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.sdk.testing.junit5.OpenTelemetryExtension; +import io.opentelemetry.sdk.trace.data.SpanData; +import org.apache.ratis.trace.TraceConfigKeys; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.util.List; + +public class TestRetryCacheWithGrpcTracing + extends TestRetryCacheWithGrpc { + @RegisterExtension + private static final OpenTelemetryExtension openTelemetryExtension = + OpenTelemetryExtension.create(); + { + TraceConfigKeys.setEnabled(getProperties(), true); + } + + private List spans; + + @BeforeEach + void setUpOpenTelemetry() { + GlobalOpenTelemetry.resetForTest(); + GlobalOpenTelemetry.set(openTelemetryExtension.getOpenTelemetry()); + } + + @AfterEach + void tearDownOpenTelemetry() { + GlobalOpenTelemetry.resetForTest(); + } + + /** + * Verifies traceAsyncRpcSend creates CLIENT spans when tracing is enabled. + * Uses testInvalidateRepliedCalls which exercises client.async().send() (traceAsyncRpcSend path). + * testBasicRetry uses rpc.sendRequest() (blocking) which bypasses the async tracing path. + */ + @Test + public void testBasicRetry() throws Exception { + runWithNewCluster(3, cluster -> new InvalidateRepliedCallsTest(cluster).run()); + + long deadline = System.currentTimeMillis() + 10000; + do { + spans = openTelemetryExtension.getSpans(); + if (!spans.isEmpty()) break; + Thread.sleep(100); + } while (System.currentTimeMillis() < deadline); + + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.CLIENT), + "Expected at least one span with SpanKind.CLIENT (from traceAsyncRpcSend)" + ); + assertTrue( + spans.stream().anyMatch(s -> s.getKind() == SpanKind.SERVER), + "Expected at least one span with SpanKind.SERVER" + ); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/TestGrpcMessageMetrics.java b/ratis-test/src/test/java/org/apache/ratis/grpc/server/TestGrpcMessageMetrics.java similarity index 81% rename from ratis-test/src/test/java/org/apache/ratis/grpc/TestGrpcMessageMetrics.java rename to ratis-test/src/test/java/org/apache/ratis/grpc/server/TestGrpcMessageMetrics.java index 2abba79300..737325d72f 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/TestGrpcMessageMetrics.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/server/TestGrpcMessageMetrics.java @@ -15,13 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.ratis.grpc; +package org.apache.ratis.grpc.server; import org.apache.ratis.BaseTest; +import org.apache.ratis.grpc.MiniRaftClusterWithGrpc; import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.RaftTestUtil; import org.apache.ratis.client.RaftClient; -import org.apache.ratis.grpc.server.GrpcService; import org.apache.ratis.metrics.impl.JvmMetrics; import org.apache.ratis.metrics.RatisMetricRegistry; import org.apache.ratis.protocol.RaftClientReply; @@ -29,8 +29,8 @@ import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; @@ -60,14 +60,15 @@ static void sendMessages(MiniRaftCluster cluster) throws Exception { client.async().send(new RaftTestUtil.SimpleMessage("abc")); } // Wait for commits to happen on leader - JavaUtils.attempt(() -> assertMessageCount(cluster.getLeader()), 100, HUNDRED_MILLIS, cluster.getLeader().getId() + "-assertMessageCount", null); + JavaUtils.attempt(() -> assertMessageCount(cluster.getLeader()), 100, HUNDRED_MILLIS, + cluster.getLeader().getId() + "-assertMessageCount", null); } static void assertMessageCount(RaftServer.Division server) { String serverId = server.getId().toString(); - GrpcService service = (GrpcService) RaftServerTestUtil.getServerRpc(server); - RatisMetricRegistry registry = service.getServerInterceptor().getMetrics().getRegistry(); + final GrpcServicesImpl services = (GrpcServicesImpl) RaftServerTestUtil.getServerRpc(server); + final RatisMetricRegistry registry = services.getMessageMetrics().getRegistry(); String counter_prefix = serverId + "_" + "ratis.grpc.RaftServerProtocolService"; - Assert.assertTrue(registry.counter(counter_prefix + "_" + "requestVote" + "_OK_completed_total").getCount() > 0); + Assertions.assertTrue(registry.counter(counter_prefix + "_" + "requestVote" + "_OK_completed_total").getCount() > 0); } -} \ No newline at end of file +} diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/server/TestGrpcServerMetrics.java b/ratis-test/src/test/java/org/apache/ratis/grpc/server/TestGrpcServerMetrics.java index 04f8ded95d..3e6257683c 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/server/TestGrpcServerMetrics.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/server/TestGrpcServerMetrics.java @@ -38,11 +38,12 @@ import org.apache.ratis.protocol.RaftGroupMemberId; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.thirdparty.com.codahale.metrics.Gauge; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import org.mockito.Mockito; +@SuppressWarnings({"rawtypes"}) public class TestGrpcServerMetrics { private static GrpcServerMetrics grpcServerMetrics; private static RatisMetricRegistry ratisMetricRegistry; @@ -50,7 +51,7 @@ public class TestGrpcServerMetrics { private static RaftPeerId raftPeerId; private static RaftPeerId followerId; - @BeforeClass + @BeforeAll public static void setUp() throws Exception { raftGroupId = RaftGroupId.randomId(); raftPeerId = RaftPeerId.valueOf("TestId"); @@ -74,11 +75,11 @@ public void testGrpcLogAppenderLatencyTimer() throws Exception { final String format = RATIS_GRPC_METRICS_LOG_APPENDER_LATENCY + GrpcServerMetrics.getHeartbeatSuffix(heartbeat); final String name = String.format(format, followerId); final DefaultTimekeeperImpl t = (DefaultTimekeeperImpl) ratisMetricRegistry.timer(name); - Assert.assertEquals(0L, t.getTimer().getSnapshot().getMax()); + Assertions.assertEquals(0L, t.getTimer().getSnapshot().getMax()); req.startRequestTimer(); Thread.sleep(1000L); req.stopRequestTimer(); - Assert.assertTrue(t.getTimer().getSnapshot().getMax() > 1000L); + Assertions.assertTrue(t.getTimer().getSnapshot().getMax() > 1000L); } } @@ -89,7 +90,7 @@ public void testGrpcLogRequestTotal() { RATIS_GRPC_METRICS_REQUESTS_COUNT + GrpcServerMetrics .getHeartbeatSuffix(heartbeat)).getCount(); grpcServerMetrics.onRequestCreate(heartbeat); - Assert.assertEquals(reqTotal + 1, ratisMetricRegistry.counter( + Assertions.assertEquals(reqTotal + 1, ratisMetricRegistry.counter( RATIS_GRPC_METRICS_REQUESTS_COUNT + GrpcServerMetrics .getHeartbeatSuffix(heartbeat)).getCount()); } @@ -97,9 +98,9 @@ public void testGrpcLogRequestTotal() { @Test public void testGrpcLogRequestRetry() { - Assert.assertEquals(0L, ratisMetricRegistry.counter(RATIS_GRPC_METRICS_REQUEST_RETRY_COUNT).getCount()); + Assertions.assertEquals(0L, ratisMetricRegistry.counter(RATIS_GRPC_METRICS_REQUEST_RETRY_COUNT).getCount()); grpcServerMetrics.onRequestRetry(); - Assert.assertEquals(1L, ratisMetricRegistry.counter(RATIS_GRPC_METRICS_REQUEST_RETRY_COUNT).getCount()); + Assertions.assertEquals(1L, ratisMetricRegistry.counter(RATIS_GRPC_METRICS_REQUEST_RETRY_COUNT).getCount()); } @Test @@ -110,9 +111,9 @@ public void testGrpcLogPendingRequestCount() { pendingRequest::logRequestsSize); final String name = String.format(RATIS_GRPC_METRICS_LOG_APPENDER_PENDING_COUNT, raftPeerId); final Gauge gauge = ServerMetricsTestUtils.getGaugeWithName(name, grpcServerMetrics::getRegistry); - Assert.assertEquals(0, gauge.getValue()); + Assertions.assertEquals(0, gauge.getValue()); when(pendingRequest.logRequestsSize()).thenReturn(10); - Assert.assertEquals(10, gauge.getValue()); + Assertions.assertEquals(10, gauge.getValue()); } @Test @@ -133,8 +134,8 @@ public void testGrpcLogAppenderRequestCounters() { private void assertCounterIncremented(String counterVar, Consumer incFunction) { String counter = String.format(counterVar, raftPeerId.toString()); - Assert.assertEquals(0L, ratisMetricRegistry.counter(counter).getCount()); + Assertions.assertEquals(0L, ratisMetricRegistry.counter(counter).getCount()); incFunction.accept(raftPeerId.toString()); - Assert.assertEquals(1L, ratisMetricRegistry.counter(counter).getCount()); + Assertions.assertEquals(1L, ratisMetricRegistry.counter(counter).getCount()); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/util/GrpcZeroCopyTestServer.java b/ratis-test/src/test/java/org/apache/ratis/grpc/util/GrpcZeroCopyTestServer.java index e1bfe4e222..af7991a416 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/util/GrpcZeroCopyTestServer.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/util/GrpcZeroCopyTestServer.java @@ -32,7 +32,7 @@ import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver; import org.apache.ratis.util.IOUtils; import org.apache.ratis.util.TraditionalBinaryPrefix; -import org.junit.Assert; +import org.junit.jupiter.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,14 +75,23 @@ public synchronized String toString() { private final Count zeroCopyCount = new Count(); private final Count nonZeroCopyCount = new Count(); + private final Count releasedCount = new Count(); private final Server server; + // Allow tests to disable release to validate leak detection. + private final boolean releaseRequests; private final ZeroCopyMessageMarshaller marshaller = new ZeroCopyMessageMarshaller<>( BinaryRequest.getDefaultInstance(), zeroCopyCount::inc, - nonZeroCopyCount::inc); + nonZeroCopyCount::inc, + releasedCount::inc); GrpcZeroCopyTestServer(int port) { + this(port, true); + } + + GrpcZeroCopyTestServer(int port, boolean releaseRequests) { + this.releaseRequests = releaseRequests; final GreeterImpl greeter = new GreeterImpl(); final MethodDescriptor binary = GreeterGrpc.getBinaryMethod(); final String binaryFullMethodName = binary.getFullMethodName(); @@ -115,10 +124,10 @@ Count getNonZeroCopyCount() { void assertCounts(int expectNumElements, long expectNumBytes) { LOG.info("ZeroCopyCount = {}", zeroCopyCount); LOG.info("nonZeroCopyCount = {}", nonZeroCopyCount); - Assert.assertEquals("zeroCopyCount.getNumElements()", expectNumElements, zeroCopyCount.getNumElements()); - Assert.assertEquals("zeroCopyCount.getNumBytes()", expectNumBytes, zeroCopyCount.getNumBytes()); - Assert.assertEquals("nonZeroCopyCount.getNumElements()", 0, nonZeroCopyCount.getNumElements()); - Assert.assertEquals("nonZeroCopyCount.getNumBytes()", 0, nonZeroCopyCount.getNumBytes()); + Assertions.assertEquals(expectNumElements, zeroCopyCount.getNumElements(), "zeroCopyCount.getNumElements()"); + Assertions.assertEquals(expectNumBytes, zeroCopyCount.getNumBytes()," zeroCopyCount.getNumBytes()"); + Assertions.assertEquals(0, nonZeroCopyCount.getNumElements(), "nonZeroCopyCount.getNumElements()"); + Assertions.assertEquals(0, nonZeroCopyCount.getNumBytes(), "nonZeroCopyCount.getNumBytes()"); } int start() throws IOException { @@ -128,11 +137,29 @@ int start() throws IOException { @Override public void close() throws IOException { + // Shutdown server first, then assert no leaked streams and cleanup if needed. + IOException ioe = null; try { server.shutdown().awaitTermination(5, TimeUnit.SECONDS); } catch (InterruptedException e) { Thread.currentThread().interrupt(); - throw IOUtils.toInterruptedIOException("Failed to close", e); + ioe = IOUtils.toInterruptedIOException("Failed to close", e); + } + + try { + marshaller.assertNoUnclosedStreams(); + } catch (RuntimeException e) { + if (ioe != null) { + ioe.addSuppressed(e); + throw ioe; + } + throw e; + } finally { + marshaller.close(); + } + + if (ioe != null) { + throw ioe; } } @@ -179,7 +206,9 @@ public void onNext(BinaryRequest request) { ByteBuffer.wrap(bytes).putInt(data.size()); responseObserver.onNext(BinaryReply.newBuilder().setData(UnsafeByteOperations.unsafeWrap(bytes)).build()); } finally { - marshaller.release(request); + if (releaseRequests) { + marshaller.release(request); + } } } diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/util/TestGrpcZeroCopy.java b/ratis-test/src/test/java/org/apache/ratis/grpc/util/TestGrpcZeroCopy.java index 032a9c1db5..dde44e5799 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/util/TestGrpcZeroCopy.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/util/TestGrpcZeroCopy.java @@ -26,8 +26,12 @@ import org.apache.ratis.thirdparty.io.netty.buffer.PooledByteBufAllocator; import org.apache.ratis.util.NetUtils; import org.apache.ratis.util.TraditionalBinaryPrefix; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.util.ArrayList; import java.util.List; @@ -39,29 +43,29 @@ */ public final class TestGrpcZeroCopy extends BaseTest { static class RandomData { - private static final Random random = new Random(); - private static final byte[] array = new byte[4096]; + private static final Random RANDOM = new Random(); + private static final byte[] ARRAY = new byte[4096]; static void fill(long seed, int size, ByteBuf buf) { - random.setSeed(seed); + RANDOM.setSeed(seed); for(int offset = 0; offset < size; ) { - final int remaining = Math.min(size - offset, array.length); - random.nextBytes(array); - buf.writeBytes(array, 0, remaining); + final int remaining = Math.min(size - offset, ARRAY.length); + RANDOM.nextBytes(ARRAY); + buf.writeBytes(ARRAY, 0, remaining); offset += remaining; } } static void verify(long seed, ByteString b) { - random.setSeed(seed); + RANDOM.setSeed(seed); final int size = b.size(); for(int offset = 0; offset < size; ) { - final int remaining = Math.min(size - offset, array.length); - random.nextBytes(array); - final ByteString expected = UnsafeByteOperations.unsafeWrap(array, 0, remaining); + final int remaining = Math.min(size - offset, ARRAY.length); + RANDOM.nextBytes(ARRAY); + final ByteString expected = UnsafeByteOperations.unsafeWrap(ARRAY, 0, remaining); final ByteString computed = b.substring(offset, offset + remaining); - Assert.assertEquals(expected.size(), computed.size()); - Assert.assertEquals(expected, computed); + assertEquals(expected.size(), computed.size()); + assertEquals(expected, computed); offset += remaining; } } @@ -99,7 +103,7 @@ public static boolean isReady() { /** Test a zero-copy marshaller is available from the versions of gRPC and Protobuf. */ @Test public void testReadiness() { - Assert.assertTrue(isReady()); + assertTrue(isReady()); } @@ -108,6 +112,18 @@ public void testZeroCopy() throws Exception { runTestZeroCopy(); } + @Test + public void testLeakCheck() throws Exception { + // Verify leak detection by disabling release on the server side. + assumeTrue(isReady()); + final GrpcZeroCopyTestServer server = new GrpcZeroCopyTestServer(NetUtils.getFreePort(), false); + final int port = server.start(); + try (GrpcZeroCopyTestClient client = new GrpcZeroCopyTestClient(NetUtils.LOCALHOST, port)) { + sendBinaries(1, client, server); + } + assertThrows(IllegalStateException.class, server::close); + } + void runTestZeroCopy() throws Exception { try (GrpcZeroCopyTestServer server = new GrpcZeroCopyTestServer(NetUtils.getFreePort())) { final int port = server.start(); @@ -134,7 +150,7 @@ void sendMessages(int n, GrpcZeroCopyTestClient client, GrpcZeroCopyTestServer s for (int i = 0; i < futures.size(); i++) { final String expected = GrpcZeroCopyTestServer.toReply(i, messages.get(i)); final String reply = futures.get(i).get(); - Assert.assertEquals("expected = " + expected + " != reply = " + reply, expected, reply); + assertEquals(expected, reply, "expected = " + expected + " != reply = " + reply); server.assertCounts(numElements, numBytes); } } @@ -159,8 +175,8 @@ void sendBinaries(int n, GrpcZeroCopyTestClient client, GrpcZeroCopyTestServer s } final ByteString reply = future.get(); - Assert.assertEquals(4, reply.size()); - Assert.assertEquals(size, reply.asReadOnlyByteBuffer().getInt()); + assertEquals(4, reply.size()); + assertEquals(size, reply.asReadOnlyByteBuffer().getInt()); numElements++; numBytes += size; diff --git a/ratis-test/src/test/java/org/apache/ratis/grpc/util/TestStreamObserverWithTimeout.java b/ratis-test/src/test/java/org/apache/ratis/grpc/util/TestStreamObserverWithTimeout.java index d0c936aa40..b279736f39 100644 --- a/ratis-test/src/test/java/org/apache/ratis/grpc/util/TestStreamObserverWithTimeout.java +++ b/ratis-test/src/test/java/org/apache/ratis/grpc/util/TestStreamObserverWithTimeout.java @@ -25,8 +25,8 @@ import org.apache.ratis.util.StringUtils; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.TimeoutTimer; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.util.ArrayList; @@ -106,7 +106,7 @@ void runTestTimeout(int slow, Type type) throws Exception { for (; i < slow; i++) { final String expected = (i + warmup) + GrpcTestServer.GreeterImpl.toReplySuffix(messages.get(i)); final String reply = futures.get(i).get(); - Assert.assertEquals(expected, reply); + Assertions.assertEquals(expected, reply); LOG.info("{}) passed", (i + warmup)); } @@ -114,10 +114,10 @@ void runTestTimeout(int slow, Type type) throws Exception { final CompletableFuture f = futures.get(i); try { final String reply = f.get(); - Assert.fail((i + warmup) + ") reply = " + reply + ", " + Assertions.fail((i + warmup) + ") reply = " + reply + ", " + StringUtils.completableFuture2String(f, false)); } catch (ExecutionException e) { - LOG.info("GOOD! {}) {}, {}", (i + warmup), StringUtils.completableFuture2String(f, true), e); + LOG.info("GOOD! {}) {}", (i + warmup), StringUtils.completableFuture2String(f, true), e); } } } diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/TestLeaderElectionWithNetty.java b/ratis-test/src/test/java/org/apache/ratis/netty/TestLeaderElectionWithNetty.java index f84bbb7360..0b77b5beda 100644 --- a/ratis-test/src/test/java/org/apache/ratis/netty/TestLeaderElectionWithNetty.java +++ b/ratis-test/src/test/java/org/apache/ratis/netty/TestLeaderElectionWithNetty.java @@ -19,7 +19,7 @@ import org.apache.ratis.server.impl.BlockRequestHandlingInjection; import org.apache.ratis.server.impl.LeaderElectionTests; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestLeaderElectionWithNetty extends LeaderElectionTests @@ -29,7 +29,7 @@ public class TestLeaderElectionWithNetty public void testEnforceLeader() throws Exception { super.testEnforceLeader(); - MiniRaftClusterWithNetty.sendServerRequest.clear(); + MiniRaftClusterWithNetty.SEND_SERVER_REQUEST.clear(); BlockRequestHandlingInjection.getInstance().unblockAll(); } diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/TestNettyRpcProxy.java b/ratis-test/src/test/java/org/apache/ratis/netty/TestNettyRpcProxy.java new file mode 100644 index 0000000000..2507220a8e --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/netty/TestNettyRpcProxy.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.netty; + +import org.apache.ratis.BaseTest; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.protocol.exceptions.AlreadyClosedException; +import org.apache.ratis.proto.RaftProtos.GroupListRequestProto; +import org.apache.ratis.proto.RaftProtos.RaftRpcRequestProto; +import org.apache.ratis.proto.netty.NettyProtos.RaftNettyServerReplyProto; +import org.apache.ratis.proto.netty.NettyProtos.RaftNettyServerRequestProto; +import org.apache.ratis.thirdparty.io.netty.bootstrap.ServerBootstrap; +import org.apache.ratis.thirdparty.io.netty.channel.Channel; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelFuture; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelHandlerContext; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelInitializer; +import org.apache.ratis.thirdparty.io.netty.channel.EventLoopGroup; +import org.apache.ratis.thirdparty.io.netty.channel.SimpleChannelInboundHandler; +import org.apache.ratis.thirdparty.io.netty.channel.socket.SocketChannel; +import org.apache.ratis.util.JavaUtils; +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Field; +import java.net.InetSocketAddress; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; + +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestNettyRpcProxy extends BaseTest { + @Test + public void testOfferRollbackOnAlreadyClosed() throws Exception { + // Minimal netty server to allow client connect; we don't need to process requests. + final EventLoopGroup bossGroup = NettyUtils.newEventLoopGroup("test-netty-boss", 1, false); + final EventLoopGroup workerGroup = NettyUtils.newEventLoopGroup("test-netty-worker", 1, false); + final EventLoopGroup clientGroup = NettyUtils.newEventLoopGroup("test-netty-client", 1, false); + Channel serverChannel = null; + NettyRpcProxy proxy = null; + try { + final ChannelFuture bindFuture = new ServerBootstrap() + .group(bossGroup, workerGroup) + .channel(NettyUtils.getServerChannelClass(workerGroup)) + .childHandler(new ChannelInitializer() { + @Override + protected void initChannel(SocketChannel ch) { + ch.pipeline().addLast(new SimpleChannelInboundHandler() { + @Override + protected void channelRead0(ChannelHandlerContext ctx, Object msg) { + } + }); + } + }) + .bind("localhost", 0) + .sync(); + serverChannel = bindFuture.channel(); + + final InetSocketAddress address = (InetSocketAddress) serverChannel.localAddress(); + final String peerAddress = address.getHostString() + ":" + address.getPort(); + final RaftPeer peer = RaftPeer.newBuilder().setId("s0").setAddress(peerAddress).build(); + proxy = new NettyRpcProxy(peer, new RaftProperties(), clientGroup); + + // Close to force AlreadyClosedException on write and trigger rollback logic. + proxy.close(); + final RaftRpcRequestProto rpcRequest = RaftRpcRequestProto.newBuilder() + .setCallId(1) + .build(); + final GroupListRequestProto groupListRequest = GroupListRequestProto.newBuilder() + .setRpcRequest(rpcRequest) + .build(); + final RaftNettyServerRequestProto request = RaftNettyServerRequestProto.newBuilder() + .setGroupListRequest(groupListRequest) + .build(); + final CompletableFuture reply = + proxy.sendAsync(request); + + // Ensure the future completes exceptionally with AlreadyClosedException. + final Throwable thrown = assertThrows(CompletionException.class, reply::join); + final Throwable unwrapped = JavaUtils.unwrapCompletionException(thrown); + assertInstanceOf(AlreadyClosedException.class, unwrapped); + + // The replies queue must be empty after rollback; use reflection to reach it. + final Object connection = getField(proxy, "connection"); + final Map replies = getField(connection, "replies"); + assertTrue(replies.isEmpty()); + } finally { + if (proxy != null) { + proxy.close(); + } + if (serverChannel != null) { + serverChannel.close().sync(); + } + bossGroup.shutdownGracefully(); + workerGroup.shutdownGracefully(); + clientGroup.shutdownGracefully(); + } + } + + @SuppressWarnings("unchecked") + private static T getField(Object target, String name) throws Exception { + final Field field = target.getClass().getDeclaredField(name); + field.setAccessible(true); + return (T) field.get(target); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftAsyncWithNetty.java b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftAsyncWithNetty.java new file mode 100644 index 0000000000..c3e19a26e5 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftAsyncWithNetty.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.netty; + +import org.apache.ratis.RaftAsyncTests; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.util.Slf4jUtils; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +import org.slf4j.event.Level; + +@Timeout(100) +public class TestRaftAsyncWithNetty + extends RaftAsyncTests + implements MiniRaftClusterWithNetty.FactoryGet { + { + Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.INFO); + Slf4jUtils.setLogLevel(RaftClient.LOG, Level.INFO); + } + + @Override + @Test + @Timeout(500) + public void testWithLoadAsync() throws Exception { + super.testWithLoadAsync(); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java index f1340efc74..ae16f41ed8 100644 --- a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java +++ b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftSnapshotWithNetty.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,12 +17,9 @@ */ package org.apache.ratis.netty; -import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.statemachine.RaftSnapshotBaseTest; -public class TestRaftSnapshotWithNetty extends RaftSnapshotBaseTest { - @Override - public MiniRaftCluster.Factory getFactory() { - return MiniRaftClusterWithNetty.FACTORY; - } +public class TestRaftSnapshotWithNetty + extends RaftSnapshotBaseTest + implements MiniRaftClusterWithNetty.FactoryGet { } diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftWithNetty.java b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftWithNetty.java index 28815d76ce..55b9c37286 100644 --- a/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftWithNetty.java +++ b/ratis-test/src/test/java/org/apache/ratis/netty/TestRaftWithNetty.java @@ -19,7 +19,7 @@ import org.apache.ratis.RaftBasicTests; import org.apache.ratis.server.impl.BlockRequestHandlingInjection; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestRaftWithNetty extends RaftBasicTests diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/TestTlsConfWithNetty.java b/ratis-test/src/test/java/org/apache/ratis/netty/TestTlsConfWithNetty.java index db967e391f..abbc56934d 100644 --- a/ratis-test/src/test/java/org/apache/ratis/netty/TestTlsConfWithNetty.java +++ b/ratis-test/src/test/java/org/apache/ratis/netty/TestTlsConfWithNetty.java @@ -38,8 +38,8 @@ import org.apache.ratis.thirdparty.io.netty.handler.logging.LoggingHandler; import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContext; import org.apache.ratis.util.JavaUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,8 +57,9 @@ /** * Testing {@link TlsConf} and the security related utility methods in {@link NettyUtils}. */ +@SuppressWarnings({"try"}) public class TestTlsConfWithNetty { - private final static Logger LOG = LoggerFactory.getLogger(TestTlsConfWithNetty.class); + private static final Logger LOG = LoggerFactory.getLogger(TestTlsConfWithNetty.class); static String buffer2String(ByteBuf buf){ try { @@ -112,7 +113,7 @@ static void runTest(int port, TlsConf serverSslConf, TlsConf clientSslConf) thro final CompletableFuture future = replyFutures.get(i); final String reply = future.get(3, TimeUnit.SECONDS); LOG.info(reply); - Assert.assertEquals(NettyTestServer.toReply(words[i]), reply); + Assertions.assertEquals(NettyTestServer.toReply(words[i]), reply); } } } diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyClientReplies.java b/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyClientReplies.java new file mode 100644 index 0000000000..5e22761e24 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyClientReplies.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.netty.client; + +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.ClientInvocationId; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; + +public class TestNettyClientReplies { + @Test + public void testGetReplyMapDoesNotCreate() { + final NettyClientReplies replies = new NettyClientReplies(); + final ClientInvocationId clientInvocationId = + ClientInvocationId.valueOf(ClientId.randomId(), 1L); + + assertNull(replies.getReplyMap(clientInvocationId)); + + final NettyClientReplies.ReplyMap created = replies.getOrCreateReplyMap(clientInvocationId); + assertNotNull(created); + assertSame(created, replies.getReplyMap(clientInvocationId)); + + final ClientInvocationId other = + ClientInvocationId.valueOf(ClientId.randomId(), 2L); + assertNull(replies.getReplyMap(other)); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyClientStreamRpcReconnectBackoff.java b/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyClientStreamRpcReconnectBackoff.java new file mode 100644 index 0000000000..304c488fc1 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyClientStreamRpcReconnectBackoff.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.netty.client; + +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.netty.NettyConfigKeys; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.retry.ExponentialBackoffRetry; +import org.apache.ratis.retry.RetryPolicy; +import org.apache.ratis.util.TimeDuration; +import org.junit.jupiter.api.Test; + +import java.net.InetSocketAddress; +import java.util.concurrent.TimeUnit; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestNettyClientStreamRpcReconnectBackoff { + @Test + public void testReconnectPolicyBackoffRanges() throws Exception { + // Use a small base/max to keep the test fast and deterministic in range checks. + final RaftProperties properties = new RaftProperties(); + final TimeDuration base = TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); + final TimeDuration max = TimeDuration.valueOf(400, TimeUnit.MILLISECONDS); + final int maxAttempts = 5; + NettyConfigKeys.DataStream.Client.setReconnectPolicy(properties, + "ExponentialBackoffRetry," + base + "," + max + "," + maxAttempts); + + final RaftPeer peer = RaftPeer.newBuilder() + .setId("s1") + .setDataStreamAddress(new InetSocketAddress("127.0.0.1", 1)) + .build(); + + final NettyClientStreamRpc rpc = new NettyClientStreamRpc(peer, null, properties); + try { + // Verify the reconnect policy is exponential and uses the configured maxAttempts. + final RetryPolicy policy = rpc.getReconnectPolicy(); + assertTrue(policy instanceof ExponentialBackoffRetry); + assertFalse(policy.handleAttemptFailure(() -> maxAttempts).shouldRetry()); + + // attempt=0 -> base delay; attempt=1 -> 2x base; attempt=3 -> capped by max. + assertSleepInRange(policy, 0, base, max); + assertSleepInRange(policy, 1, base, max); + // Attempt 3 should be capped by max sleep time. + assertSleepInRange(policy, 3, base, max); + } finally { + rpc.close(); + } + } + + private static void assertSleepInRange(RetryPolicy policy, int attempt, TimeDuration base, TimeDuration max) { + final RetryPolicy.Action action = policy.handleAttemptFailure(() -> attempt); + assertTrue(action.shouldRetry()); + + final long baseMillis = base.toLong(TimeUnit.MILLISECONDS); + final long maxMillis = max.toLong(TimeUnit.MILLISECONDS); + final long expected = Math.min(maxMillis, baseMillis * (1L << attempt)); + final long actual = action.getSleepTime().toLong(TimeUnit.MILLISECONDS); + + assertTrue(actual >= expected / 2, "delay too small: " + actual); + assertTrue(actual <= expected + expected / 2, "delay too large: " + actual); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyDataStreamReconnectWithGrpcCluster.java b/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyDataStreamReconnectWithGrpcCluster.java new file mode 100644 index 0000000000..2be0bc2607 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/netty/client/TestNettyDataStreamReconnectWithGrpcCluster.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ratis.netty.client; + +import org.apache.ratis.BaseTest; +import org.apache.ratis.RaftTestUtil; +import org.apache.ratis.client.DataStreamClient; +import org.apache.ratis.client.RaftClient; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.datastream.DataStreamTestUtils.MultiDataStreamStateMachine; +import org.apache.ratis.datastream.MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty; +import org.apache.ratis.netty.NettyConfigKeys; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.retry.ExponentialBackoffRetry; +import org.apache.ratis.retry.RetryPolicy; +import org.apache.ratis.util.IOUtils; +import org.apache.ratis.util.TimeDuration; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +import java.util.concurrent.TimeUnit; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +@Timeout(value = 120) +public class TestNettyDataStreamReconnectWithGrpcCluster extends BaseTest + implements MiniRaftClusterWithRpcTypeGrpcAndDataStreamTypeNetty.FactoryGet { + { + setStateMachine(MultiDataStreamStateMachine.class); + } + + @Test + public void testReconnectConfigApplied() throws Exception { + final RaftProperties properties = getProperties(); + final TimeDuration reconnectDelay = TimeDuration.valueOf(200, TimeUnit.MILLISECONDS); + final TimeDuration reconnectMaxDelay = TimeDuration.valueOf(400, TimeUnit.MILLISECONDS); + NettyConfigKeys.DataStream.Client.setReconnectPolicy(properties, + "ExponentialBackoffRetry," + reconnectDelay + "," + reconnectMaxDelay + ",10"); + + runWithNewCluster(1, cluster -> { + RaftTestUtil.waitForLeader(cluster); + final RaftPeer primary = cluster.getLeader().getPeer(); + + final RaftClient client = cluster.createClient(primary); + try { + final DataStreamClient dataStreamClient = (DataStreamClient) client.getDataStreamApi(); + final NettyClientStreamRpc rpc = (NettyClientStreamRpc) dataStreamClient.getClientRpc(); + + // Verify reconnect configuration is applied. + final RetryPolicy policy = rpc.getReconnectPolicy(); + assertTrue(policy instanceof ExponentialBackoffRetry); + assertSleepInRange(policy, 0, reconnectDelay, reconnectMaxDelay); + assertSleepInRange(policy, 1, reconnectDelay, reconnectMaxDelay); + + // Verify the data stream channel can be established. + assertTrue(rpc.waitForChannelActive(TimeDuration.valueOf(5, TimeUnit.SECONDS)), + "Data stream channel should be active"); + } finally { + IOUtils.cleanup(LOG, client); + } + }); + } + + private static void assertSleepInRange(RetryPolicy policy, int attempt, TimeDuration base, TimeDuration max) { + final RetryPolicy.Action action = policy.handleAttemptFailure(() -> attempt); + assertTrue(action.shouldRetry()); + + final long baseMillis = base.toLong(TimeUnit.MILLISECONDS); + final long maxMillis = max.toLong(TimeUnit.MILLISECONDS); + final long expected = Math.min(maxMillis, baseMillis * (1L << attempt)); + final long actual = action.getSleepTime().toLong(TimeUnit.MILLISECONDS); + + assertTrue(actual >= expected / 2, "delay too small: " + actual); + assertTrue(actual <= expected + expected / 2, "delay too large: " + actual); + } + +} diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java b/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java new file mode 100644 index 0000000000..5c06ddd319 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.netty.server; + +import org.apache.ratis.client.impl.DataStreamClientImpl.DataStreamOutputImpl; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; +import org.apache.ratis.io.StandardWriteOption; +import org.apache.ratis.netty.metrics.NettyServerStreamRpcMetrics; +import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.RaftClientRequest; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.thirdparty.io.netty.buffer.Unpooled; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelHandlerContext; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelId; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter; +import org.apache.ratis.thirdparty.io.netty.channel.embedded.EmbeddedChannel; +import org.apache.ratis.util.function.CheckedBiFunction; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.lang.reflect.Proxy; +import java.util.Collections; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +class TestDataStreamManagement { + @Test + void readCleansChannelMapOnEarlyException() throws Exception { + // Scenario: STREAM_DATA arrives without prior STREAM_HEADER, so readImpl fails early. + // Expectation: read(...) catch path must still remove channelId->invocationId mapping + // to avoid leaks when the channel remains active. + final RaftPeerId serverId = RaftPeerId.valueOf("s1"); + final RaftProperties properties = new RaftProperties(); + final RaftServer server = newRaftServer(serverId, properties); + + final NettyServerStreamRpcMetrics metrics = new NettyServerStreamRpcMetrics("s1"); + final DataStreamManagement management = new DataStreamManagement(server, metrics); + + // Use a real Netty pipeline to obtain a concrete ChannelHandlerContext. + final EmbeddedChannel embeddedChannel = new EmbeddedChannel(new ChannelInboundHandlerAdapter()); + final ChannelHandlerContext ctx = embeddedChannel.pipeline().firstContext(); + assertNotNull(ctx, "ChannelHandlerContext should be initialized"); + final ChannelId channelId = embeddedChannel.id(); + + final DataStreamRequestByteBuf request = new DataStreamRequestByteBuf( + ClientId.randomId(), + Type.STREAM_DATA, + 1L, + 0L, + Collections.singletonList(StandardWriteOption.CLOSE), + Unpooled.buffer(0)); + + final CheckedBiFunction, Set, IOException> getStreams = + (r, p) -> Collections.emptySet(); + + try { + // This read should fail early (missing stream info) and must clear ChannelMap entries. + management.read(request, ctx, getStreams); + assertEquals(0, management.getChannelInvocationCount(channelId), + "channel map should be cleared on early read failure"); + } finally { + embeddedChannel.finishAndReleaseAll(); + management.shutdown(); + } + } + + private static RaftServer newRaftServer(RaftPeerId serverId, RaftProperties properties) { + return (RaftServer) Proxy.newProxyInstance(TestDataStreamManagement.class.getClassLoader(), + new Class[]{RaftServer.class}, + (proxy, method, args) -> { + if (method.getDeclaringClass() == Object.class) { + switch (method.getName()) { + case "toString": + return "RaftServerProxy(" + serverId + ")"; + case "hashCode": + return System.identityHashCode(proxy); + case "equals": + return proxy == args[0]; + default: + return null; + } + } + switch (method.getName()) { + case "getId": + return serverId; + case "getProperties": + return properties; + default: + throw new UnsupportedOperationException("Unexpected RaftServer call: " + method); + } + }); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/protocol/TestRaftGroup.java b/ratis-test/src/test/java/org/apache/ratis/protocol/TestRaftGroup.java index 5267b22385..f4a88e40fe 100644 --- a/ratis-test/src/test/java/org/apache/ratis/protocol/TestRaftGroup.java +++ b/ratis-test/src/test/java/org/apache/ratis/protocol/TestRaftGroup.java @@ -18,27 +18,25 @@ package org.apache.ratis.protocol; import org.apache.ratis.BaseTest; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.util.LinkedList; import java.util.List; import java.util.UUID; +@Timeout(value = 1) public class TestRaftGroup extends BaseTest { - @Override - public int getGlobalTimeoutSeconds() { - return 1; - } - @Test(expected = IllegalStateException.class) + @Test public void testDuplicatePeerId() throws Exception { UUID groupId = UUID.fromString("02511d47-d67c-49a3-9011-abb3109a44c1"); List peers = new LinkedList<>(); peers.add(RaftPeer.newBuilder().setId("n0").build()); peers.add(RaftPeer.newBuilder().setId("n0").build()); - RaftGroup.valueOf(RaftGroupId.valueOf(groupId), peers); + Assertions.assertThrows(IllegalStateException.class, + () -> RaftGroup.valueOf(RaftGroupId.valueOf(groupId), peers)); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/protocol/TestRaftId.java b/ratis-test/src/test/java/org/apache/ratis/protocol/TestRaftId.java index 6610b3d049..907235e117 100644 --- a/ratis-test/src/test/java/org/apache/ratis/protocol/TestRaftId.java +++ b/ratis-test/src/test/java/org/apache/ratis/protocol/TestRaftId.java @@ -19,15 +19,21 @@ import org.apache.ratis.BaseTest; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; -import org.junit.Assert; -import org.junit.Test; +import org.apache.ratis.util.WeakValueCache; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.util.UUID; +@Timeout(value = 1) public class TestRaftId extends BaseTest { - @Override - public int getGlobalTimeoutSeconds() { - return 1; + public static WeakValueCache getClientIdCache() { + return ClientId.getCache(); + } + + public static WeakValueCache getRaftGroupIdCache() { + return RaftGroupId.getCache(); } @Test @@ -39,35 +45,35 @@ public void testRaftId() { static void assertRaftId(UUID original, ByteString expected) { final ByteString bytes = RaftId.toByteString(original); if (expected != null) { - Assert.assertEquals(expected, bytes); + Assertions.assertEquals(expected, bytes); } final UUID computed = RaftId.toUuid(bytes); - Assert.assertEquals(original, computed); - Assert.assertEquals(bytes, RaftId.toByteString(computed)); + Assertions.assertEquals(original, computed); + Assertions.assertEquals(bytes, RaftId.toByteString(computed)); } @Test public void testClientId() { final ClientId id = ClientId.randomId(); final ByteString bytes = id.toByteString(); - Assert.assertEquals(bytes, id.toByteString()); - Assert.assertEquals(id, ClientId.valueOf(bytes)); + Assertions.assertEquals(bytes, id.toByteString()); + Assertions.assertEquals(id, ClientId.valueOf(bytes)); } @Test public void testRaftGroupId() { final RaftGroupId id = RaftGroupId.randomId(); final ByteString bytes = id.toByteString(); - Assert.assertEquals(bytes, id.toByteString()); - Assert.assertEquals(id, RaftGroupId.valueOf(bytes)); + Assertions.assertEquals(bytes, id.toByteString()); + Assertions.assertEquals(id, RaftGroupId.valueOf(bytes)); } @Test public void testRaftPeerId() { final RaftPeerId id = RaftPeerId.valueOf("abc"); final ByteString bytes = id.toByteString(); - Assert.assertEquals(bytes, id.toByteString()); - Assert.assertEquals(id, RaftPeerId.valueOf(bytes)); + Assertions.assertEquals(bytes, id.toByteString()); + Assertions.assertEquals(id, RaftPeerId.valueOf(bytes)); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/protocol/TestRoutingTable.java b/ratis-test/src/test/java/org/apache/ratis/protocol/TestRoutingTable.java index 58bdf07fac..f3e08ece7a 100644 --- a/ratis-test/src/test/java/org/apache/ratis/protocol/TestRoutingTable.java +++ b/ratis-test/src/test/java/org/apache/ratis/protocol/TestRoutingTable.java @@ -18,14 +18,12 @@ package org.apache.ratis.protocol; import org.apache.ratis.BaseTest; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +@Timeout(value = 1) public class TestRoutingTable extends BaseTest { - @Override - public int getGlobalTimeoutSeconds() { - return 1; - } private final RaftPeerId[] peers = new RaftPeerId[10]; @@ -69,7 +67,7 @@ RoutingTable newRoutingTable(int... peerIndices) { } void testFailureCase(String name, int... peerIndices) { - Assert.assertEquals(0, peerIndices.length % 2); + Assertions.assertEquals(0, peerIndices.length % 2); testFailureCase(name + ": " + toString(peerIndices), () -> newRoutingTable(peerIndices), @@ -77,7 +75,7 @@ void testFailureCase(String name, int... peerIndices) { } String toString(int... peerIndices) { - Assert.assertEquals(0, peerIndices.length % 2); + Assertions.assertEquals(0, peerIndices.length % 2); if (peerIndices.length == 0) { return ""; } diff --git a/ratis-test/src/test/java/org/apache/ratis/retry/TestExceptionDependentRetry.java b/ratis-test/src/test/java/org/apache/ratis/retry/TestExceptionDependentRetry.java index 36e6dfbccf..560c0f9fdd 100644 --- a/ratis-test/src/test/java/org/apache/ratis/retry/TestExceptionDependentRetry.java +++ b/ratis-test/src/test/java/org/apache/ratis/retry/TestExceptionDependentRetry.java @@ -21,7 +21,6 @@ import org.apache.ratis.BaseTest; import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.RaftTestUtil; -import org.apache.ratis.client.retry.ClientRetryEvent; import org.apache.ratis.client.RaftClient; import org.apache.ratis.client.RaftClientConfigKeys; import org.apache.ratis.conf.RaftProperties; @@ -33,16 +32,14 @@ import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -import static org.junit.Assert.fail; - /** * Class to test {@link ExceptionDependentRetry}. */ @@ -99,9 +96,9 @@ public void testExceptionDependentRetryFailureWithExceptionDuplicate() { builder.setExceptionToPolicy(IOException.class, RetryPolicies.retryUpToMaximumCountWithFixedSleep(1, TimeDuration.valueOf(1, TimeUnit.SECONDS))); - fail("testExceptionDependentRetryFailure failed"); + Assertions.fail("testExceptionDependentRetryFailure failed"); } catch (Exception ex) { - Assert.assertEquals(IllegalStateException.class, ex.getClass()); + Assertions.assertEquals(IllegalStateException.class, ex.getClass()); } } @@ -115,9 +112,9 @@ public void testExceptionDependentRetryFailureWithExceptionMappedToNull() { RetryPolicies.retryUpToMaximumCountWithFixedSleep(1, TimeDuration.valueOf(1, TimeUnit.SECONDS))); builder.setExceptionToPolicy(IOException.class, null); - fail("testExceptionDependentRetryFailure failed"); + Assertions.fail("testExceptionDependentRetryFailure failed"); } catch (Exception ex) { - Assert.assertEquals(IllegalStateException.class, ex.getClass()); + Assertions.assertEquals(IllegalStateException.class, ex.getClass()); } } @@ -131,9 +128,9 @@ public void testExceptionDependentRetryFailureWithNoDefault() { RetryPolicies.retryUpToMaximumCountWithFixedSleep(1, TimeDuration.valueOf(1, TimeUnit.SECONDS))); builder.build(); - fail("testExceptionDependentRetryFailureWithNoDefault failed"); + Assertions.fail("testExceptionDependentRetryFailureWithNoDefault failed"); } catch (Exception ex) { - Assert.assertEquals(IllegalStateException.class, ex.getClass()); + Assertions.assertEquals(IllegalStateException.class, ex.getClass()); } try { @@ -143,9 +140,9 @@ public void testExceptionDependentRetryFailureWithNoDefault() { RetryPolicies.retryUpToMaximumCountWithFixedSleep(1, TimeDuration.valueOf(1, TimeUnit.SECONDS))); builder.setDefaultPolicy(null); - fail("testExceptionDependentRetryFailureWithNoDefault failed"); + Assertions.fail("testExceptionDependentRetryFailureWithNoDefault failed"); } catch (Exception ex) { - Assert.assertEquals(IllegalStateException.class, ex.getClass()); + Assertions.assertEquals(IllegalStateException.class, ex.getClass()); } } @@ -157,11 +154,11 @@ private void testException(int retries, int maxAttempts, .handleAttemptFailure(TestRetryPolicy.newClientRetryEvent(i, null, exception)); final boolean expected = i < retries && i < maxAttempts; - Assert.assertEquals(expected, action.shouldRetry()); + Assertions.assertEquals(expected, action.shouldRetry()); if (expected) { - Assert.assertEquals(sleepTime, action.getSleepTime().getDuration()); + Assertions.assertEquals(sleepTime, action.getSleepTime().getDuration()); } else { - Assert.assertEquals(0L, action.getSleepTime().getDuration()); + Assertions.assertEquals(0L, action.getSleepTime().getDuration()); } } } @@ -195,11 +192,11 @@ void runTestExceptionRetryAttempts(MiniRaftClusterWithGrpc cluster) throws Excep SimpleStateMachine4Testing.get(leader).blockWriteStateMachineData(); client.async().send(new RaftTestUtil.SimpleMessage("2")).get(); - Assert.fail("Test should have failed."); + Assertions.fail("Test should have failed."); } catch (ExecutionException e) { RaftRetryFailureException rrfe = (RaftRetryFailureException) e.getCause(); final int expectedCount = 1 + retryCount; // new request attempt + retry attempts - Assert.assertEquals(expectedCount, rrfe.getAttemptCount()); + Assertions.assertEquals(expectedCount, rrfe.getAttemptCount()); } finally { SimpleStateMachine4Testing.get(leader).unblockWriteStateMachineData(); cluster.shutdown(); diff --git a/ratis-test/src/test/java/org/apache/ratis/retry/TestExponentialBackoffRetry.java b/ratis-test/src/test/java/org/apache/ratis/retry/TestExponentialBackoffRetry.java index 74f1807640..c4d1d3a9c0 100644 --- a/ratis-test/src/test/java/org/apache/ratis/retry/TestExponentialBackoffRetry.java +++ b/ratis-test/src/test/java/org/apache/ratis/retry/TestExponentialBackoffRetry.java @@ -19,8 +19,8 @@ import org.apache.ratis.BaseTest; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.util.concurrent.TimeUnit; @@ -36,12 +36,12 @@ public void testExponentialBackoffRetry() { // Test maxAttempts ExponentialBackoffRetry retryPolicy = createPolicy(baseSleep, null, 1); - Assert.assertFalse(retryPolicy.handleAttemptFailure(() -> 1).shouldRetry()); + Assertions.assertFalse(retryPolicy.handleAttemptFailure(() -> 1).shouldRetry()); try { // baseSleep should not be null createPolicy(null, null, 1); - Assert.fail("Policy creation should have failed"); + Assertions.fail("Policy creation should have failed"); } catch (Exception e) { } @@ -67,9 +67,9 @@ private void assertSleep(ExponentialBackoffRetry retryPolicy, // sleep time with randomness added long randomizedDuration = action.getSleepTime().toLong(TimeUnit.MILLISECONDS); - Assert.assertTrue(action.shouldRetry()); - Assert.assertTrue(randomizedDuration >= d * 0.5); - Assert.assertTrue(randomizedDuration <= d * 1.5); + Assertions.assertTrue(action.shouldRetry()); + Assertions.assertTrue(randomizedDuration >= d * 0.5); + Assertions.assertTrue(randomizedDuration <= d * 1.5); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/retry/TestMultipleLinearRandomRetry.java b/ratis-test/src/test/java/org/apache/ratis/retry/TestMultipleLinearRandomRetry.java index 09ae55f33d..621d46b5b3 100644 --- a/ratis-test/src/test/java/org/apache/ratis/retry/TestMultipleLinearRandomRetry.java +++ b/ratis-test/src/test/java/org/apache/ratis/retry/TestMultipleLinearRandomRetry.java @@ -19,15 +19,12 @@ import org.apache.ratis.BaseTest; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; +@Timeout(value = 1) public class TestMultipleLinearRandomRetry extends BaseTest { - @Override - public int getGlobalTimeoutSeconds() { - return 1; - } - @Test public void testParseCommaSeparated() { assertIllegalInput(""); @@ -47,14 +44,18 @@ public void testParseCommaSeparated() { assertLegalInput("[10x100ms, 20x1s, 30x5s]", "100,10, 1s,20, 5s,30"); } - private static void assertIllegalInput(String input) { - final MultipleLinearRandomRetry computed = MultipleLinearRandomRetry.parseCommaSeparated(input); - Assert.assertNull(computed); + private void assertIllegalInput(String input) { + try { + MultipleLinearRandomRetry.parseCommaSeparated(input); + } catch (IllegalArgumentException e) { + LOG.info("Expected to catch: {}", String.valueOf(e)); + } } + private static MultipleLinearRandomRetry assertLegalInput(String expected, String input) { final MultipleLinearRandomRetry computed = MultipleLinearRandomRetry.parseCommaSeparated(input); - Assert.assertNotNull(computed); - Assert.assertTrue(computed.toString().endsWith(expected)); + Assertions.assertNotNull(computed); + Assertions.assertTrue(computed.toString().endsWith(expected)); return computed; } @@ -69,18 +70,18 @@ public void testMultipleLinearRandomRetry() { for (int j = 1; j <= counts[i]; j++) { final int attempt = ++k; final RetryPolicy.Action action = r.handleAttemptFailure(() -> attempt); - Assert.assertTrue(action.shouldRetry()); + Assertions.assertTrue(action.shouldRetry()); final TimeDuration randomized = action.getSleepTime(); final TimeDuration expected = times[i].to(randomized.getUnit()); final long d = expected.getDuration(); LOG.info("times[{},{}] = {}, randomized={}", i, j, times[i], randomized); - Assert.assertTrue(randomized.getDuration() >= d*0.5); - Assert.assertTrue(randomized.getDuration() < (d*1.5 + precision)); + Assertions.assertTrue(randomized.getDuration() >= d*0.5); + Assertions.assertTrue(randomized.getDuration() < (d*1.5 + precision)); } } final int attempt = ++k; final RetryPolicy.Action action = r.handleAttemptFailure(() -> attempt); - Assert.assertFalse(action.shouldRetry()); + Assertions.assertFalse(action.shouldRetry()); } } \ No newline at end of file diff --git a/ratis-test/src/test/java/org/apache/ratis/retry/TestRetryPolicy.java b/ratis-test/src/test/java/org/apache/ratis/retry/TestRetryPolicy.java index 1b9536b4b6..43b2fedd1a 100644 --- a/ratis-test/src/test/java/org/apache/ratis/retry/TestRetryPolicy.java +++ b/ratis-test/src/test/java/org/apache/ratis/retry/TestRetryPolicy.java @@ -34,8 +34,9 @@ import org.apache.ratis.protocol.exceptions.ResourceUnavailableException; import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.Timestamp; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.io.IOException; import java.util.ArrayList; @@ -45,12 +46,8 @@ import java.util.concurrent.TimeUnit; /** Test {@link RetryPolicy}. */ +@Timeout(value = 1) public class TestRetryPolicy extends BaseTest { - @Override - public int getGlobalTimeoutSeconds() { - return 1; - } - @Test public void testRetryMultipleTimesWithFixedSleep() { final int n = 4; @@ -62,11 +59,11 @@ public void testRetryMultipleTimesWithFixedSleep() { final RetryPolicy.Action action = policy.handleAttemptFailure(event); final boolean expected = i < n; - Assert.assertEquals(expected, action.shouldRetry()); + Assertions.assertEquals(expected, action.shouldRetry()); if (expected) { - Assert.assertEquals(sleepTime, action.getSleepTime()); + Assertions.assertEquals(sleepTime, action.getSleepTime()); } else { - Assert.assertEquals(0L, action.getSleepTime().getDuration()); + Assertions.assertEquals(0L, action.getSleepTime().getDuration()); } } } @@ -97,33 +94,33 @@ public void testRequestTypeDependentRetry() { final RetryPolicy.Action action = policy.handleAttemptFailure(event); final boolean expected = i < n; - Assert.assertEquals(expected, action.shouldRetry()); + Assertions.assertEquals(expected, action.shouldRetry()); if (expected) { - Assert.assertEquals(writeSleep, action.getSleepTime()); + Assertions.assertEquals(writeSleep, action.getSleepTime()); } else { - Assert.assertEquals(0L, action.getSleepTime().getDuration()); + Assertions.assertEquals(0L, action.getSleepTime().getDuration()); } } { //read and stale read are using default final ClientRetryEvent event = newClientRetryEvent(i, readRequest, null); final RetryPolicy.Action action = policy.handleAttemptFailure(event); - Assert.assertTrue(action.shouldRetry()); - Assert.assertEquals(0L, action.getSleepTime().getDuration()); + Assertions.assertTrue(action.shouldRetry()); + Assertions.assertEquals(0L, action.getSleepTime().getDuration()); } { final ClientRetryEvent event = newClientRetryEvent(i, staleReadRequest, null); final RetryPolicy.Action action = policy.handleAttemptFailure(event); - Assert.assertTrue(action.shouldRetry()); - Assert.assertEquals(0L, action.getSleepTime().getDuration()); + Assertions.assertTrue(action.shouldRetry()); + Assertions.assertEquals(0L, action.getSleepTime().getDuration()); } { //watch has no retry final ClientRetryEvent event = newClientRetryEvent(i, watchRequest, null); final RetryPolicy.Action action = policy.handleAttemptFailure(event); - Assert.assertFalse(action.shouldRetry()); - Assert.assertEquals(0L, action.getSleepTime().getDuration()); + Assertions.assertFalse(action.shouldRetry()); + Assertions.assertEquals(0L, action.getSleepTime().getDuration()); } } @@ -155,15 +152,15 @@ public RaftClientRequest newRequestImpl() { for (RaftClientRequest request : requests) { final ClientRetryEvent event = pending.newClientRetryEvent(request, new Exception()); final RetryPolicy.Action action = policy.handleAttemptFailure(event); - Assert.assertTrue(action.shouldRetry()); - Assert.assertEquals(0L, action.getSleepTime().getDuration()); + Assertions.assertTrue(action.shouldRetry()); + Assertions.assertEquals(0L, action.getSleepTime().getDuration()); } timeout.sleep(); for (RaftClientRequest request : requests) { final ClientRetryEvent event = pending.newClientRetryEvent(request, new Exception()); final RetryPolicy.Action action = policy.handleAttemptFailure(event); - Assert.assertFalse(action.shouldRetry()); + Assertions.assertFalse(action.shouldRetry()); } } @@ -227,11 +224,11 @@ private void checkEvent(int exceptionAttemptCount, RetryPolicy retryPolicy, Raft final RetryPolicy.Action action = retryPolicy.handleAttemptFailure(event); final boolean expected = exceptionAttemptCount < exceptionPolicyPair.retries; - Assert.assertEquals(expected, action.shouldRetry()); + Assertions.assertEquals(expected, action.shouldRetry()); if (expected) { - Assert.assertEquals(exceptionPolicyPair.sleepTime, action.getSleepTime().getDuration()); + Assertions.assertEquals(exceptionPolicyPair.sleepTime, action.getSleepTime().getDuration()); } else { - Assert.assertEquals(0L, action.getSleepTime().getDuration()); + Assertions.assertEquals(0L, action.getSleepTime().getDuration()); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/security/SecurityTestUtils.java b/ratis-test/src/test/java/org/apache/ratis/security/SecurityTestUtils.java index d6222b2271..cbb8e1b799 100644 --- a/ratis-test/src/test/java/org/apache/ratis/security/SecurityTestUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/security/SecurityTestUtils.java @@ -23,7 +23,7 @@ import org.apache.ratis.util.FileUtils; import org.bouncycastle.util.io.pem.PemObject; import org.bouncycastle.util.io.pem.PemReader; -import org.junit.Assert; +import org.junit.jupiter.api.Assertions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,6 +54,23 @@ public interface SecurityTestUtils { ClassLoader CLASS_LOADER = SecurityTestUtils.class.getClassLoader(); + TrustManager EMPTY_TRUST_MANAGER = new X509TrustManager() { + @Override + public X509Certificate[] getAcceptedIssuers() { + return null; + } + + @Override + public void checkClientTrusted(X509Certificate[] certs, String authType) { } + + @Override + public void checkServerTrusted(X509Certificate[] certs, String authType) { } + }; + + static TrustManager emptyTrustManager() { + return EMPTY_TRUST_MANAGER; + } + static File getResource(String name) { final File file = Optional.ofNullable(CLASS_LOADER.getResource(name)) .map(URL::getFile) @@ -100,7 +117,7 @@ static PrivateKey getPrivateKey(String keyPath) { KeyFactory keyFactory = KeyFactory.getInstance("RSA"); return keyFactory.generatePrivate(privKeySpec); } catch (Exception e) { - Assert.fail("Failed to get private key from " + keyPath + ". Error: " + + Assertions.fail("Failed to get private key from " + keyPath + ". Error: " + e.getMessage()); } return null; @@ -116,7 +133,7 @@ static X509Certificate[] getCertificate(String certPath) { } return certificate; } catch (Exception e) { - Assert.fail("Failed to get certificate from " + certPath + ". Error: " + + Assertions.fail("Failed to get certificate from " + certPath + ". Error: " + e.getMessage()); } return null; @@ -133,7 +150,7 @@ static KeyStore getServerKeyStore() { keyStore.setKeyEntry("ratis-server-key", privateKey, new char[0], certificate); return keyStore; } catch (Exception e) { - Assert.fail("Failed to get sever key store " + e.getMessage()); + Assertions.fail("Failed to get sever key store " + e.getMessage()); } return null; } @@ -149,7 +166,7 @@ static KeyStore getClientKeyStore() { keyStore.setKeyEntry("ratis-client-key", privateKey, new char[0], certificate); return keyStore; } catch (Exception e) { - Assert.fail("Failed to get client key store " + e.getMessage()); + Assertions.fail("Failed to get client key store " + e.getMessage()); } return null; } @@ -167,7 +184,7 @@ static KeyStore getTrustStore() { } return trustStore; } catch (Exception e) { - Assert.fail("Failed to get sever key store " + e.getMessage()); + Assertions.fail("Failed to get sever key store " + e.getMessage()); } return null; } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/ServerBuilderTest.java b/ratis-test/src/test/java/org/apache/ratis/server/ServerBuilderTest.java index dd76a2ec1e..15040d3d2d 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/ServerBuilderTest.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/ServerBuilderTest.java @@ -26,8 +26,8 @@ import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.util.Preconditions; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; /** * Test {@link RaftServer.Builder}. @@ -58,9 +58,9 @@ public void testPeerIdNotInRaftGroup() { .setStateMachine(new BaseStateMachine()) .setProperties(new RaftProperties()) .build(); - Assert.fail("did not get expected exception"); + Assertions.fail("did not get expected exception"); } catch (IOException e) { - Preconditions.assertInstanceOf(e.getCause(), IllegalStateException.class); + Preconditions.assertInstanceOf(e.getCause(), NullPointerException.class); } } @@ -74,9 +74,9 @@ public void testNullPeerIdWithRaftGroup() { .setStateMachine(new BaseStateMachine()) .setProperties(new RaftProperties()) .build(); - Assert.fail("did not get expected exception"); + Assertions.fail("did not get expected exception"); } catch (IOException e) { - Preconditions.assertInstanceOf(e.getCause(), IllegalStateException.class); + Preconditions.assertInstanceOf(e.getCause(), NullPointerException.class); } } @@ -90,13 +90,4 @@ public void testPeerIdWithNullRaftGroup() throws Exception { .build(); server.close(); } - - @Test - public void testNullPeerIdWithNullRaftGroup() throws Exception { - RaftServer server = RaftServer.newBuilder() - .setStateMachine(new BaseStateMachine()) - .setProperties(new RaftProperties()) - .build(); - server.close(); - } } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java b/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java index 2f3edf7815..c9495fdf79 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/ServerRestartTests.java @@ -35,8 +35,8 @@ import org.apache.ratis.server.raftlog.RaftLogIOException; import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogFormat; import org.apache.ratis.server.RaftServerConfigKeys.Log; +import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogTestUtils; import org.apache.ratis.server.raftlog.segmented.TestSegmentedRaftLog; -import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.FileUtils; @@ -46,8 +46,8 @@ import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.StringUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.event.Level; @@ -63,7 +63,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; -import java.util.stream.Collectors; /** * Test restarting raft peers. @@ -111,7 +110,7 @@ void runTestRestartFollower(MiniRaftCluster cluster) throws Exception { // make sure the restarted follower can catchup final RaftServer.Division followerState = cluster.getDivision(followerId); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(followerState.getInfo().getLastAppliedIndex() >= leaderLastIndex); + Assertions.assertTrue(followerState.getInfo().getLastAppliedIndex() >= leaderLastIndex); return null; }, 10, ONE_SECOND, "follower catchup", LOG); @@ -119,9 +118,9 @@ void runTestRestartFollower(MiniRaftCluster cluster) throws Exception { final RaftServer.Division follower = cluster.restartServer(followerId, false); final RaftLog followerLog = follower.getRaftLog(); final long followerLastIndex = followerLog.getLastEntryTermIndex().getIndex(); - Assert.assertTrue(followerLastIndex >= leaderLastIndex); + Assertions.assertTrue(followerLastIndex >= leaderLastIndex); final long leaderFinalIndex = cluster.getLeader().getRaftLog().getLastEntryTermIndex().getIndex(); - Assert.assertEquals(leaderFinalIndex, followerLastIndex); + Assertions.assertEquals(leaderFinalIndex, followerLastIndex); final File followerOpenLogFile = getOpenLogFile(follower); final File leaderOpenLogFile = getOpenLogFile(cluster.getDivision(leaderId)); @@ -148,7 +147,7 @@ void runTestRestartFollower(MiniRaftCluster cluster) throws Exception { if (i != truncatedMessageIndex) { final Message m = new SimpleMessage("m" + i); final RaftClientReply reply = client.io().sendReadOnly(m); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); LOG.info("query {}: {} {}", m, reply, LogEntryProto.parseFrom(reply.getMessage().getContent())); } } @@ -159,32 +158,26 @@ static void writeSomething(Supplier newMessage, MiniRaftCluster cluster try(final RaftClient client = cluster.createClient()) { // write some messages for(int i = 0; i < 10; i++) { - Assert.assertTrue(client.io().send(newMessage.get()).isSuccess()); + Assertions.assertTrue(client.io().send(newMessage.get()).isSuccess()); } } } - static void assertTruncatedLog(RaftPeerId id, File openLogFile, long lastIndex, MiniRaftCluster cluster) throws Exception { + static void assertTruncatedLog(RaftPeerId id, File openLogFile, long lastIndex, MiniRaftCluster cluster) + throws Exception { // truncate log if (openLogFile.length() > 0) { FileUtils.truncateFile(openLogFile, openLogFile.length() - 1); } final RaftServer.Division server = cluster.restartServer(id, false); // the last index should be one less than before - Assert.assertEquals(lastIndex - 1, server.getRaftLog().getLastEntryTermIndex().getIndex()); + Assertions.assertEquals(lastIndex - 1, server.getRaftLog().getLastEntryTermIndex().getIndex()); server.getRaftServer().close(); } - static List getOpenLogFiles(RaftServer.Division server) throws Exception { - return LogSegmentPath.getLogSegmentPaths(server.getRaftStorage()).stream() - .filter(p -> p.getStartEnd().isOpen()) - .map(LogSegmentPath::getPath) - .collect(Collectors.toList()); - } - static File getOpenLogFile(RaftServer.Division server) throws Exception { - final List openLogs = getOpenLogFiles(server); - Assert.assertEquals(1, openLogs.size()); + final List openLogs = SegmentedRaftLogTestUtils.getOpenLogFiles(server); + Assertions.assertEquals(1, openLogs.size()); return openLogs.get(0).toFile(); } @@ -210,20 +203,20 @@ void runTestRestartWithCorruptedLogHeader(MiniRaftCluster cluster) throws Except 10, HUNDRED_MILLIS, impl.getId() + "-getOpenLogFile", LOG); for(int i = 0; i < SegmentedRaftLogFormat.getHeaderLength(); i++) { assertCorruptedLogHeader(impl.getId(), openLogFile, i, cluster, LOG); - Assert.assertTrue(getOpenLogFiles(impl).isEmpty()); + Assertions.assertTrue(SegmentedRaftLogTestUtils.getOpenLogFiles(impl).isEmpty()); } } } static void assertCorruptedLogHeader(RaftPeerId id, File openLogFile, int partialLength, - MiniRaftCluster cluster, Logger LOG) throws Exception { + MiniRaftCluster cluster, Logger log) throws Exception { Preconditions.assertTrue(partialLength < SegmentedRaftLogFormat.getHeaderLength()); try(final RandomAccessFile raf = new RandomAccessFile(openLogFile, "rw")) { final ByteBuffer header = SegmentedRaftLogFormat.getHeaderBytebuffer(); - LOG.info("header = {}", StringUtils.bytes2HexString(header)); + log.info("header = {}", StringUtils.bytes2HexString(header)); final byte[] corrupted = new byte[header.remaining()]; header.get(corrupted, 0, partialLength); - LOG.info("corrupted = {}", StringUtils.bytes2HexString(corrupted)); + log.info("corrupted = {}", StringUtils.bytes2HexString(corrupted)); raf.write(corrupted); } final RaftServer.Division server = cluster.restartServer(id, false); @@ -245,7 +238,7 @@ void runTestRestartCommitIndex(MiniRaftCluster cluster) throws Exception { final SimpleMessage m = messages[i]; new Thread(() -> { try (final RaftClient client = cluster.createClient()) { - Assert.assertTrue(client.io().send(m).isSuccess()); + Assertions.assertTrue(client.io().send(m).isSuccess()); } catch (IOException e) { throw new IllegalStateException("Failed to send " + m, e); } @@ -301,11 +294,11 @@ void runTestRestartCommitIndex(MiniRaftCluster cluster) throws Exception { final RaftServer.Division server = cluster.getDivision(id); final RaftLog raftLog = server.getRaftLog(); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(raftLog.getLastCommittedIndex() >= loggedCommitIndex); + Assertions.assertTrue(raftLog.getLastCommittedIndex() >= loggedCommitIndex); return null; }, 10, HUNDRED_MILLIS, id + "(commitIndex >= loggedCommitIndex)", LOG); JavaUtils.attemptRepeatedly(() -> { - Assert.assertTrue(server.getInfo().getLastAppliedIndex() >= loggedCommitIndex); + Assertions.assertTrue(server.getInfo().getLastAppliedIndex() >= loggedCommitIndex); return null; }, 10, HUNDRED_MILLIS, id + "(lastAppliedIndex >= loggedCommitIndex)", LOG); LOG.info("{}: commitIndex={}, lastAppliedIndex={}", @@ -318,16 +311,16 @@ static void assertLastLogEntry(RaftServer.Division server) throws RaftLogIOExcep final RaftLog raftLog = server.getRaftLog(); final long lastIndex = raftLog.getLastEntryTermIndex().getIndex(); final LogEntryProto lastEntry = raftLog.get(lastIndex); - Assert.assertTrue(lastEntry.hasMetadataEntry()); + Assertions.assertTrue(lastEntry.hasMetadataEntry()); final long loggedCommitIndex = lastEntry.getMetadataEntry().getCommitIndex(); final LogEntryProto lastCommittedEntry = raftLog.get(loggedCommitIndex); - Assert.assertTrue(lastCommittedEntry.hasStateMachineLogEntry()); + Assertions.assertTrue(lastCommittedEntry.hasStateMachineLogEntry()); final SimpleStateMachine4Testing leaderStateMachine = SimpleStateMachine4Testing.get(server); final TermIndex lastAppliedTermIndex = leaderStateMachine.getLastAppliedTermIndex(); - Assert.assertEquals(lastCommittedEntry.getTerm(), lastAppliedTermIndex.getTerm()); - Assert.assertTrue(lastCommittedEntry.getIndex() <= lastAppliedTermIndex.getIndex()); + Assertions.assertEquals(lastCommittedEntry.getTerm(), lastAppliedTermIndex.getTerm()); + Assertions.assertTrue(lastCommittedEntry.getIndex() <= lastAppliedTermIndex.getIndex()); } @Test @@ -364,11 +357,11 @@ private void runTestRestartWithCorruptedLogEntry(CLUSTER cluster) throws Excepti final SimpleMessage lastMessage = messages[messages.length - 1]; try (final RaftClient client = cluster.createClient()) { for (SimpleMessage m : messages) { - Assert.assertTrue(client.io().send(m).isSuccess()); + Assertions.assertTrue(client.io().send(m).isSuccess()); } // assert that the last message exists - Assert.assertTrue(client.io().sendReadOnly(lastMessage).isSuccess()); + Assertions.assertTrue(client.io().sendReadOnly(lastMessage).isSuccess()); } final RaftLog log = leader.getRaftLog(); diff --git a/ratis-test/src/test/java/org/apache/ratis/server/TestRaftServerConfigKeys.java b/ratis-test/src/test/java/org/apache/ratis/server/TestRaftServerConfigKeys.java index bb386e8309..a9e509e7a8 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/TestRaftServerConfigKeys.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/TestRaftServerConfigKeys.java @@ -23,9 +23,9 @@ import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.io.File; import java.io.IOException; @@ -46,14 +46,14 @@ */ public class TestRaftServerConfigKeys { - private static final Supplier rootTestDir = JavaUtils.memoize( + private static final Supplier ROOT_TEST_DIR = JavaUtils.memoize( () -> new File(BaseTest.getRootTestDir(), JavaUtils.getClassSimpleName(TestRaftServerConfigKeys.class) + Integer.toHexString(ThreadLocalRandom.current().nextInt()))); - @AfterClass + @AfterAll public static void tearDown() throws IOException { - FileUtils.deleteFully(rootTestDir.get()); + FileUtils.deleteFully(ROOT_TEST_DIR.get()); } /** @@ -63,7 +63,7 @@ public static void tearDown() throws IOException { @Test public void testStorageDirProperty() { final File testDir = new File( - rootTestDir.get(), UUID.randomUUID().toString()); + ROOT_TEST_DIR.get(), UUID.randomUUID().toString()); final List directories = new ArrayList<>(); final RaftProperties properties = new RaftProperties(); @@ -74,7 +74,7 @@ public void testStorageDirProperty() { final String expected = directories.stream().map(File::getAbsolutePath) .collect(Collectors.joining(",")); final String actual = properties.get(RaftServerConfigKeys.STORAGE_DIR_KEY); - Assert.assertEquals(expected, actual); + Assertions.assertEquals(expected, actual); } /** @@ -84,7 +84,7 @@ public void testStorageDirProperty() { @Test public void testStorageDir() { final File testDir = new File( - rootTestDir.get(), UUID.randomUUID().toString()); + ROOT_TEST_DIR.get(), UUID.randomUUID().toString()); final List directories = new ArrayList<>(); IntStream.range(0, 10).mapToObj((i) -> new File(testDir, Integer.toString(i))).forEach(directories::add); @@ -97,8 +97,8 @@ public void testStorageDir() { final List actualDirs = storageDirs.stream() .map(File::getAbsolutePath).collect(Collectors.toList()); actualDirs.removeAll(expectedDirs); - Assert.assertEquals(directories.size(), storageDirs.size()); - Assert.assertEquals(0, actualDirs.size()); + Assertions.assertEquals(directories.size(), storageDirs.size()); + Assertions.assertEquals(0, actualDirs.size()); } /** @@ -113,6 +113,6 @@ public void testStorageDir() { int pendingRequestMegabyteLimit = Math.toIntExact( RaftServerConfigKeys.Write.byteLimit(properties).getSize() / SizeInBytes.ONE_MB.getSize()); - Assert.assertEquals(4096, pendingRequestMegabyteLimit); + Assertions.assertEquals(4096, pendingRequestMegabyteLimit); } } \ No newline at end of file diff --git a/ratis-test/src/test/java/org/apache/ratis/server/impl/TestLeaderElectionServerInterface.java b/ratis-test/src/test/java/org/apache/ratis/server/impl/TestLeaderElectionServerInterface.java new file mode 100644 index 0000000000..92b7ad4203 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/server/impl/TestLeaderElectionServerInterface.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.server.impl; + +import org.apache.ratis.BaseTest; +import org.apache.ratis.metrics.Timekeeper; +import org.apache.ratis.proto.RaftProtos.RequestVoteReplyProto; +import org.apache.ratis.proto.RaftProtos.RequestVoteRequestProto; +import org.apache.ratis.protocol.RaftGroup; +import org.apache.ratis.protocol.RaftGroupId; +import org.apache.ratis.protocol.RaftGroupMemberId; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.server.protocol.TermIndex; +import org.apache.ratis.util.TimeDuration; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +public class TestLeaderElectionServerInterface extends BaseTest { + private final List peers = IntStream.range(0, 3).boxed() + .map(i -> RaftPeer.newBuilder().setId("s" + i).build()) + .collect(Collectors.toList()); + private final RaftGroup group = RaftGroup.valueOf(RaftGroupId.randomId(), peers); + private final RaftConfigurationImpl conf = RaftConfigurationImpl.newBuilder().setLogEntryIndex(0).setConf(peers).build(); + private final ThreadGroup threadGroup = new ThreadGroup("ServerInterface"); + + private final RaftGroupMemberId candidate = RaftGroupMemberId.valueOf(peers.get(0).getId(), group.getGroupId()); + + LeaderElection.ServerInterface newServerInterface(boolean expectToPass, + Map lastEntries) { + return new LeaderElection.ServerInterface() { + private volatile boolean isAlive = true; + + @Override + public RaftGroupMemberId getMemberId() { + return candidate; + } + + @Override + public boolean isAlive() { + return isAlive; + } + + @Override + public boolean isCandidate() { + return true; + } + + @Override + public long getCurrentTerm() { + final TermIndex lastEntry = getLastEntry(); + return lastEntry != null? lastEntry.getTerm() : TermIndex.INITIAL_VALUE.getTerm(); + } + + @Override + public long getLastCommittedIndex() { + final TermIndex lastEntry = getLastEntry(); + return lastEntry != null? lastEntry.getIndex() : TermIndex.INITIAL_VALUE.getIndex(); + } + + @Override + public TermIndex getLastEntry() { + return lastEntries.get(getId()); + } + + @Override + public boolean isPreVoteEnabled() { + return false; + } + + @Override + public LeaderElection.ConfAndTerm initElection(LeaderElection.Phase phase) { + return new LeaderElection.ConfAndTerm(conf, getCurrentTerm()); + } + + @Override + public RequestVoteReplyProto requestVote(RequestVoteRequestProto r) { + final RaftPeerId voterPeerId = RaftPeerId.valueOf(r.getServerRequest().getReplyId()); + final RaftGroupMemberId voter = RaftGroupMemberId.valueOf(voterPeerId, group.getGroupId()); + final TermIndex lastEntry = lastEntries.get(voterPeerId); + final long term = (lastEntry != null? lastEntry : TermIndex.INITIAL_VALUE).getTerm(); + + // voter replies to candidate + return ServerProtoUtils.toRequestVoteReplyProto( + getId(), voter, true, term, false, lastEntry, r.getServerRequest().getCallId()); + } + + @Override + public void changeToLeader() { + assertTrue(expectToPass); + isAlive = false; + } + + @Override + public void rejected(long term, LeaderElection.ResultAndTerm result) { + assertFalse(expectToPass); + isAlive = false; + } + + @Override + public void shutdown() { + fail(); + } + + @Override + public Timekeeper getLeaderElectionTimer() { + final long start = System.nanoTime(); + final Timekeeper.Context context = () -> System.nanoTime() - start; + return () -> context; + } + + @Override + public void onNewLeaderElectionCompletion() { + // no op + } + + @Override + public TimeDuration getRandomElectionTimeout() { + final int millis = 100 + ThreadLocalRandom.current().nextInt(100); + return TimeDuration.valueOf(millis, TimeUnit.MILLISECONDS); + } + + @Override + public ThreadGroup getThreadGroup() { + return threadGroup; + } + }; + } + + @Test + public void testVoterWithEmptyLog() { + // all the candidate and the voters have an empty log + // expect to pass: empty-log-candidate will accept votes from empty-log-voters + runTestVoterWithEmptyLog(true); + + // candidate: non-empty commit + // voter 1 : empty log + // voter 2 : empty log + // expect to fail: non-empty-commit-candidate will NOT accept votes from empty-log-voters + final TermIndex candidateLastEntry = TermIndex.valueOf(2, 9); + runTestVoterWithEmptyLog(false, candidateLastEntry); + + // candidate: non-empty commit + // voter 1 : non-empty log + // voter 2 : empty log + // expect to pass: non-empty-commit-candidate will accept votes from non-empty-log-voters + final TermIndex voterLastEntry = TermIndex.valueOf(2, 7); + runTestVoterWithEmptyLog(true, candidateLastEntry, voterLastEntry); + + // candidate: non-empty log + // voter 1 : older version + // voter 2 : empty log + // expect to pass: non-empty-commit-candidate will accept votes from older-version-voters + runTestVoterWithEmptyLog(true, candidateLastEntry, TermIndex.PROTO_DEFAULT); + } + + void runTestVoterWithEmptyLog(boolean expectToPass, TermIndex... lastEntries) { + LOG.info("expectToPass? {}, lastEntries={}", + expectToPass, lastEntries); + final Map map = new HashMap<>(); + for(int i = 0; i < lastEntries.length; i++) { + map.put(peers.get(i).getId(), lastEntries[i]); + } + final LeaderElection election = LeaderElection.newInstance(newServerInterface(expectToPass, map), false); + election.startInForeground(); + } + +} diff --git a/ratis-test/src/test/java/org/apache/ratis/server/impl/TestPeerConfiguration.java b/ratis-test/src/test/java/org/apache/ratis/server/impl/TestPeerConfiguration.java index e1adcfa442..df4d53cf0b 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/impl/TestPeerConfiguration.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/impl/TestPeerConfiguration.java @@ -20,14 +20,14 @@ import org.apache.ratis.BaseTest; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.util.Arrays; import java.util.Collection; import java.util.stream.Collectors; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; public class TestPeerConfiguration extends BaseTest { @Test diff --git a/ratis-test/src/test/java/org/apache/ratis/server/impl/TestRaftConfiguration.java b/ratis-test/src/test/java/org/apache/ratis/server/impl/TestRaftConfiguration.java index 14e0030e6d..fa2c524e71 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/impl/TestRaftConfiguration.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/impl/TestRaftConfiguration.java @@ -22,14 +22,14 @@ import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.RaftConfiguration; -import org.junit.Test; +import org.junit.jupiter.api.Test; import java.util.Arrays; import java.util.Collection; import java.util.stream.Collectors; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; public class TestRaftConfiguration extends BaseTest { @Test @@ -39,8 +39,8 @@ public void testIsHighestPriority() { Integer node3 = 2; PeerConfiguration peerConfig = new PeerConfiguration(raftPeersWithPriority(node1, node2, node3)); RaftConfiguration config = RaftConfigurationImpl.newBuilder().setConf(peerConfig).build(); - RaftPeer[] allRaftPeers = peerConfig.getPeers(RaftProtos.RaftPeerRole.FOLLOWER).toArray(new RaftPeer[peerConfig.getPeers( - RaftProtos.RaftPeerRole.FOLLOWER).size()]); + RaftPeer[] allRaftPeers = peerConfig.getPeers(RaftProtos.RaftPeerRole.FOLLOWER).toArray( + new RaftPeer[peerConfig.getPeers(RaftProtos.RaftPeerRole.FOLLOWER).size()]); // First member should not have highest priority assertFalse(RaftServerTestUtil.isHighestPriority(config, @@ -65,29 +65,29 @@ public void testSingleMode() { RaftConfigurationImpl config = RaftConfigurationImpl.newBuilder() .setConf(new PeerConfiguration(raftPeersWithPriority(1))) .build(); - assertTrue("Peer is in single mode.", config.isSingleMode(RaftPeerId.valueOf("1"))); + assertTrue(config.isSingleMode(RaftPeerId.valueOf("1")), "Peer is in single mode."); config = RaftConfigurationImpl.newBuilder() .setConf(new PeerConfiguration(raftPeersWithPriority(0, 1))) .setOldConf(new PeerConfiguration(raftPeersWithPriority(0))) .build(); - assertTrue("Peer is in single mode.", config.isSingleMode(RaftPeerId.valueOf("0"))); - assertFalse("Peer is a new peer.", config.isSingleMode(RaftPeerId.valueOf("1"))); + assertTrue(config.isSingleMode(RaftPeerId.valueOf("0")), "Peer is in single mode."); + assertFalse(config.isSingleMode(RaftPeerId.valueOf("1")), "Peer is a new peer"); config = RaftConfigurationImpl.newBuilder() .setConf(new PeerConfiguration(raftPeersWithPriority(0, 1))) .build(); - assertFalse("Peer is in ha mode.", config.isSingleMode(RaftPeerId.valueOf("0"))); - assertFalse("Peer is in ha mode.", config.isSingleMode(RaftPeerId.valueOf("1"))); + assertFalse(config.isSingleMode(RaftPeerId.valueOf("0")), "Peer is in ha mode."); + assertFalse(config.isSingleMode(RaftPeerId.valueOf("1")), "Peer is in ha mode."); config = RaftConfigurationImpl.newBuilder() .setConf(new PeerConfiguration(raftPeersWithPriority(0, 1))) .setOldConf(new PeerConfiguration(raftPeersWithPriority(2, 3))) .build(); - assertFalse("Peer is in ha mode.", config.isSingleMode(RaftPeerId.valueOf("0"))); - assertFalse("Peer is in ha mode.", config.isSingleMode(RaftPeerId.valueOf("1"))); - assertFalse("Peer is in ha mode.", config.isSingleMode(RaftPeerId.valueOf("3"))); - assertFalse("Peer is in ha mode.", config.isSingleMode(RaftPeerId.valueOf("4"))); + assertFalse(config.isSingleMode(RaftPeerId.valueOf("0")), "Peer is in ha mode."); + assertFalse(config.isSingleMode(RaftPeerId.valueOf("1")), "Peer is in ha mode."); + assertFalse(config.isSingleMode(RaftPeerId.valueOf("3")), "Peer is in ha mode."); + assertFalse(config.isSingleMode(RaftPeerId.valueOf("4")), "Peer is in ha mode."); } @Test @@ -96,8 +96,8 @@ public void testChangeMajority() { RaftConfigurationImpl config = RaftConfigurationImpl.newBuilder() .setConf(new PeerConfiguration(raftPeersWithPriority(1))) .build(); - assertFalse("Change from single mode to ha mode is not considered as changing majority.", - config.changeMajority(raftPeersWithPriority(1, 2))); + assertFalse(config.changeMajority(raftPeersWithPriority(1, 2)), + "Change from single mode to ha mode is not considered as changing majority."); // Case 2: {1} --> {2}. assertTrue(config.changeMajority(raftPeersWithPriority(2))); diff --git a/ratis-test/src/test/java/org/apache/ratis/server/impl/TestRaftServerJmx.java b/ratis-test/src/test/java/org/apache/ratis/server/impl/TestRaftServerJmx.java index 01d8392303..b24ec72c06 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/impl/TestRaftServerJmx.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/impl/TestRaftServerJmx.java @@ -24,8 +24,9 @@ import org.apache.ratis.server.RaftServerMXBean; import org.apache.ratis.server.simulation.MiniRaftClusterWithSimulatedRpc; import org.apache.ratis.util.JmxRegister; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import javax.management.JMException; import javax.management.MBeanServer; @@ -38,9 +39,10 @@ import static org.apache.ratis.RaftTestUtil.waitForLeader; public class TestRaftServerJmx extends BaseTest { - @Test(timeout = 30000) + @Test + @Timeout(value = 30) public void testJmxBeans() throws Exception { - final int NUM_SERVERS = 3; + final int numServers = 3; final MiniRaftClusterWithSimulatedRpc cluster = MiniRaftClusterWithSimulatedRpc.FACTORY.newCluster(3, new RaftProperties()); cluster.start(); @@ -48,16 +50,17 @@ public void testJmxBeans() throws Exception { MBeanServer platformMBeanServer = ManagementFactory.getPlatformMBeanServer(); Set objectInstances = platformMBeanServer.queryMBeans(new ObjectName("Ratis:*"), null); - Assert.assertEquals(NUM_SERVERS, objectInstances.size()); + Assertions.assertEquals(numServers, objectInstances.size()); for (ObjectInstance instance : objectInstances) { Object groupId = platformMBeanServer.getAttribute(instance.getObjectName(), "GroupId"); - Assert.assertEquals(cluster.getGroupId().toString(), groupId); + Assertions.assertEquals(cluster.getGroupId().toString(), groupId); } cluster.shutdown(); } - @Test(timeout = 30000) + @Test + @Timeout(value = 30) public void testRegister() throws JMException { { final JmxRegister jmx = new JmxRegister(); @@ -84,29 +87,42 @@ public void testRegister() throws JMException { static void runRegister(boolean expectToSucceed, String name, JmxRegister jmx) { final RaftServerMXBean mBean = new RaftServerMXBean() { @Override - public String getId() { return null; } + public String getId() { + return null; + } @Override - public String getLeaderId() { return null; } + public String getLeaderId() { + return null; + } @Override - public long getCurrentTerm() { return 0; } + public long getCurrentTerm() { + return 0; + } @Override - public String getGroupId() { return null; } + public String getGroupId() { + return null; + } @Override - public String getRole() { return null; } + public String getRole() { + return null; + } @Override - public List getFollowers() { return null; } + public List getFollowers() { + return null; + } @Override - public List getGroups() { return null; } - + public List getGroups() { + return null; + } }; final String id = RaftPeerId.valueOf(name).toString(); final String groupId = RaftGroupId.randomId().toString(); final boolean succeeded = RaftServerJmxAdapter.registerMBean(id, groupId, mBean, jmx); - Assert.assertEquals(expectToSucceed, succeeded); + Assertions.assertEquals(expectToSucceed, succeeded); } static void runUnregister(boolean expectToSucceed, JmxRegister jmx) throws JMException { final boolean succeeded = jmx.unregister(); - Assert.assertEquals(expectToSucceed, succeeded); + Assertions.assertEquals(expectToSucceed, succeeded); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/protocol/ProtocolTestUtils.java b/ratis-test/src/test/java/org/apache/ratis/server/protocol/ProtocolTestUtils.java new file mode 100644 index 0000000000..dee3f224c4 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/server/protocol/ProtocolTestUtils.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.server.protocol; + +import org.apache.ratis.util.BiWeakValueCache; + +public interface ProtocolTestUtils { + static BiWeakValueCache getTermIndexCache() { + return TermIndex.Impl.getCache(); + } +} \ No newline at end of file diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/TestRaftLogIndex.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/TestRaftLogIndex.java index ac655222ba..d3b4b5ccb0 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/TestRaftLogIndex.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/TestRaftLogIndex.java @@ -18,8 +18,8 @@ package org.apache.ratis.server.raftlog; import org.apache.ratis.BaseTest; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.util.function.BiFunction; import java.util.function.Consumer; @@ -34,10 +34,10 @@ static void assertUpdate(RaftLogIndex index, BiFunction update, long oldValue, LongUnaryOperator op, boolean expectUpdate) { - Assert.assertEquals(oldValue, index.get()); + Assertions.assertEquals(oldValue, index.get()); final boolean updated = update.apply(index, op); - Assert.assertEquals(expectUpdate, updated); - Assert.assertEquals(expectUpdate? op.applyAsLong(oldValue): oldValue, index.get()); + Assertions.assertEquals(expectUpdate, updated); + Assertions.assertEquals(expectUpdate? op.applyAsLong(oldValue): oldValue, index.get()); } @@ -45,7 +45,7 @@ static void assertUpdate(RaftLogIndex index, BiFunction log = System.out::println; { // test updateIncreasingly diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/TestRaftLogMetrics.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/TestRaftLogMetrics.java index 70b185e554..6f57d42a5d 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/TestRaftLogMetrics.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/TestRaftLogMetrics.java @@ -36,8 +36,8 @@ import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import javax.management.ObjectName; import java.lang.management.ManagementFactory; @@ -110,13 +110,14 @@ static void runTestRaftLogMetrics(MiniRaftCluster cluster) throws Exception { } // Wait for commits to happen on leader - JavaUtils.attempt(() -> assertCommitCount(cluster.getLeader(), numMsg), 10, HUNDRED_MILLIS, cluster.getLeader().getId() + "-assertCommitCount", null); + JavaUtils.attempt(() -> assertCommitCount(cluster.getLeader(), numMsg), 10, HUNDRED_MILLIS, + cluster.getLeader().getId() + "-assertCommitCount", null); } static void assertCommitCount(RaftServer.Division server, int expectedMsgs) { final RatisMetricRegistry rlm = ((RatisMetrics)server.getRaftLog().getRaftLogMetrics()).getRegistry(); long stmCount = rlm.counter(STATE_MACHINE_LOG_ENTRY_COUNT).getCount(); - Assert.assertEquals(expectedMsgs, stmCount); + Assertions.assertEquals(expectedMsgs, stmCount); } static RatisMetricRegistryImpl getRegistry(RaftGroupMemberId memberId) { @@ -127,67 +128,71 @@ static void assertFlushCount(RaftServer.Division server) throws Exception { final String flushTimeMetric = RaftStorageTestUtils.getLogFlushTimeMetric(server.getMemberId().toString()); final RatisMetricRegistryImpl ratisMetricRegistry = getRegistry(server.getMemberId()); Timer tm = (Timer) ratisMetricRegistry.get(RAFT_LOG_FLUSH_TIME); - Assert.assertNotNull(tm); + Assertions.assertNotNull(tm); final MetricsStateMachine stateMachine = MetricsStateMachine.get(server); final int expectedFlush = stateMachine.getFlushCount(); JavaUtils.attemptRepeatedly(() -> { - Assert.assertEquals(expectedFlush, tm.getCount()); + Assertions.assertEquals(expectedFlush, tm.getCount()); return null; }, 50, HUNDRED_MILLIS, "expectedFlush == tm.getCount()", null); - Assert.assertTrue(tm.getMeanRate() > 0); + Assertions.assertTrue(tm.getMeanRate() > 0); // Test jmx ObjectName oname = new ObjectName(RATIS_APPLICATION_NAME_METRICS, "name", flushTimeMetric); - Assert.assertEquals(expectedFlush, + Assertions.assertEquals(expectedFlush, ((Long) ManagementFactory.getPlatformMBeanServer().getAttribute(oname, "Count")) .intValue()); } static void assertRaftLogWritePathMetrics(RaftServer.Division server) throws Exception { - final String syncTimeMetric = RaftStorageTestUtils.getRaftLogFullMetric(server.getMemberId().toString(), RAFT_LOG_SYNC_TIME); + final String syncTimeMetric = RaftStorageTestUtils.getRaftLogFullMetric(server.getMemberId().toString(), + RAFT_LOG_SYNC_TIME); final RatisMetricRegistryImpl ratisMetricRegistry = getRegistry(server.getMemberId()); //Test sync count Timer tm = (Timer) ratisMetricRegistry.get(RAFT_LOG_SYNC_TIME); - Assert.assertNotNull(tm); + Assertions.assertNotNull(tm); final MetricsStateMachine stateMachine = MetricsStateMachine.get(server); final int expectedFlush = stateMachine.getFlushCount(); - Assert.assertEquals(expectedFlush, tm.getCount()); // Ideally, flushCount should be same as syncCount. - Assert.assertTrue(tm.getMeanRate() > 0); + Assertions.assertEquals(expectedFlush, tm.getCount()); // Ideally, flushCount should be same as syncCount. + Assertions.assertTrue(tm.getMeanRate() > 0); // Test jmx. Just testing one metric's JMX is good enough. ObjectName oname = new ObjectName(RATIS_APPLICATION_NAME_METRICS, "name", syncTimeMetric); - Assert.assertEquals(expectedFlush, + Assertions.assertEquals(expectedFlush, ((Long) ManagementFactory.getPlatformMBeanServer().getAttribute(oname, "Count")) .intValue()); long cacheMissCount = ratisMetricRegistry.counter(RAFT_LOG_CACHE_MISS_COUNT).getCount(); - Assert.assertEquals(0, cacheMissCount); + Assertions.assertEquals(0, cacheMissCount); long cacheHitsCount = ratisMetricRegistry.counter(RAFT_LOG_CACHE_HIT_COUNT).getCount(); - Assert.assertTrue(cacheHitsCount > 0); + Assertions.assertTrue(cacheHitsCount > 0); - Assert.assertTrue(ratisMetricRegistry.counter(RAFT_LOG_FLUSH_COUNT).getCount() > 0); - Assert.assertTrue(ratisMetricRegistry.counter(RAFT_LOG_APPEND_ENTRY_COUNT).getCount() > 0); + Assertions.assertTrue(ratisMetricRegistry.counter(RAFT_LOG_FLUSH_COUNT).getCount() > 0); + Assertions.assertTrue(ratisMetricRegistry.counter(RAFT_LOG_APPEND_ENTRY_COUNT).getCount() > 0); - final DefaultTimekeeperImpl appendEntry = (DefaultTimekeeperImpl) ratisMetricRegistry.timer(RAFT_LOG_APPEND_ENTRY_LATENCY); - Assert.assertTrue(appendEntry.getTimer().getMeanRate() > 0); + final DefaultTimekeeperImpl appendEntry = + (DefaultTimekeeperImpl) ratisMetricRegistry.timer(RAFT_LOG_APPEND_ENTRY_LATENCY); + Assertions.assertTrue(appendEntry.getTimer().getMeanRate() > 0); - final DefaultTimekeeperImpl taskQueue = (DefaultTimekeeperImpl) ratisMetricRegistry.timer(RAFT_LOG_TASK_QUEUE_TIME); - Assert.assertTrue(taskQueue.getTimer().getMeanRate() > 0); + final DefaultTimekeeperImpl taskQueue = + (DefaultTimekeeperImpl) ratisMetricRegistry.timer(RAFT_LOG_TASK_QUEUE_TIME); + Assertions.assertTrue(taskQueue.getTimer().getMeanRate() > 0); - final DefaultTimekeeperImpl enqueueDelay = (DefaultTimekeeperImpl) ratisMetricRegistry.timer(RAFT_LOG_TASK_ENQUEUE_DELAY); - Assert.assertTrue(enqueueDelay.getTimer().getMeanRate() > 0); + final DefaultTimekeeperImpl enqueueDelay = + (DefaultTimekeeperImpl) ratisMetricRegistry.timer(RAFT_LOG_TASK_ENQUEUE_DELAY); + Assertions.assertTrue(enqueueDelay.getTimer().getMeanRate() > 0); final DefaultTimekeeperImpl write = (DefaultTimekeeperImpl) ratisMetricRegistry.timer( String.format(RAFT_LOG_TASK_EXECUTION_TIME, "writelog")); - Assert.assertTrue(write.getTimer().getMeanRate() > 0); + Assertions.assertTrue(write.getTimer().getMeanRate() > 0); - Assert.assertNotNull(ratisMetricRegistry.get(RAFT_LOG_DATA_QUEUE_SIZE)); - Assert.assertNotNull(ratisMetricRegistry.get(RAFT_LOG_WORKER_QUEUE_SIZE)); - Assert.assertNotNull(ratisMetricRegistry.get(RAFT_LOG_SYNC_BATCH_SIZE)); + Assertions.assertNotNull(ratisMetricRegistry.get(RAFT_LOG_DATA_QUEUE_SIZE)); + Assertions.assertNotNull(ratisMetricRegistry.get(RAFT_LOG_WORKER_QUEUE_SIZE)); + Assertions.assertNotNull(ratisMetricRegistry.get(RAFT_LOG_SYNC_BATCH_SIZE)); } } \ No newline at end of file diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/memory/MemoryRaftLogTest.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/memory/MemoryRaftLogTest.java index 5d8d090a38..17c309f0bd 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/memory/MemoryRaftLogTest.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/memory/MemoryRaftLogTest.java @@ -17,7 +17,7 @@ */ package org.apache.ratis.server.raftlog.memory; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.util.ArrayList; import java.util.List; @@ -34,9 +34,10 @@ import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.Slf4jUtils; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; +@SuppressWarnings({"deprecation"}) public class MemoryRaftLogTest extends BaseTest { static { Slf4jUtils.setLogLevel(MemoryRaftLog.LOG, Level.DEBUG); diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestBufferedWriteChannel.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestBufferedWriteChannel.java index c9d7928555..4df5f4992c 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestBufferedWriteChannel.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestBufferedWriteChannel.java @@ -18,9 +18,8 @@ package org.apache.ratis.server.raftlog.segmented; import org.apache.ratis.BaseTest; -import org.apache.ratis.util.StringUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.io.IOException; import java.nio.ByteBuffer; @@ -40,8 +39,8 @@ class FakeFileChannel extends FileChannel { private long forcedPosition = 0; void assertValues(long expectedPosition, long expectedForcedPosition) { - Assert.assertEquals(expectedPosition, position); - Assert.assertEquals(expectedForcedPosition, forcedPosition); + Assertions.assertEquals(expectedPosition, position); + Assertions.assertEquals(expectedForcedPosition, forcedPosition); } @Override @@ -55,7 +54,7 @@ public long read(ByteBuffer[] dsts, int offset, int length) { } @Override - public int read(ByteBuffer dst, long position) { + public int read(ByteBuffer dst, long pos) { throw new UnsupportedOperationException(); } @@ -65,7 +64,7 @@ public long write(ByteBuffer[] srcs, int offset, int length) { } @Override - public int write(ByteBuffer src, long position) { + public int write(ByteBuffer src, long pos) { throw new UnsupportedOperationException(); } @@ -104,27 +103,27 @@ public void force(boolean metaData) { } @Override - public long transferTo(long position, long count, WritableByteChannel target) { + public long transferTo(long pos, long count, WritableByteChannel target) { throw new UnsupportedOperationException(); } @Override - public long transferFrom(ReadableByteChannel src, long position, long count) { + public long transferFrom(ReadableByteChannel src, long pos, long count) { throw new UnsupportedOperationException(); } @Override - public MappedByteBuffer map(MapMode mode, long position, long size) { + public MappedByteBuffer map(MapMode mode, long pos, long size) { throw new UnsupportedOperationException(); } @Override - public FileLock lock(long position, long size, boolean shared) { + public FileLock lock(long pos, long size, boolean shared) { throw new UnsupportedOperationException(); } @Override - public FileLock tryLock(long position, long size, boolean shared) { + public FileLock tryLock(long pos, long size, boolean shared) { throw new UnsupportedOperationException(); } @@ -200,7 +199,7 @@ static void flush(BufferedWriteChannel out, FakeFileChannel fake, AtomicInteger pos, AtomicInteger force) throws IOException { final int existing = out.writeBufferPosition(); out.flush(); - Assert.assertEquals(0, out.writeBufferPosition()); + Assertions.assertEquals(0, out.writeBufferPosition()); pos.addAndGet(existing); force.set(pos.get()); fake.assertValues(pos.get(), force.get()); @@ -213,9 +212,9 @@ static void writeToBuffer(BufferedWriteChannel out, FakeFileChannel fake, Atomic out.writeToBuffer(n, b -> b.put(buffer)); if (existing + n > bufferCapacity) { pos.addAndGet(existing); - Assert.assertEquals(n, out.writeBufferPosition()); + Assertions.assertEquals(n, out.writeBufferPosition()); } else { - Assert.assertEquals(existing + n, out.writeBufferPosition()); + Assertions.assertEquals(existing + n, out.writeBufferPosition()); } fake.assertValues(pos.get(), force.get()); } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestCacheEviction.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestCacheEviction.java index 996f7ef527..163c25da90 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestCacheEviction.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestCacheEviction.java @@ -39,8 +39,8 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.mockito.Mockito; import java.io.File; @@ -51,11 +51,12 @@ import static org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogTestUtils.MAX_OP_SIZE; +@SuppressWarnings({"deprecation"}) public class TestCacheEviction extends BaseTest { - private static final CacheInvalidationPolicy policy = new CacheInvalidationPolicyDefault(); + private static final CacheInvalidationPolicy POLICY = new CacheInvalidationPolicyDefault(); static LogSegmentList prepareSegments(int numSegments, boolean[] cached, long start, long size) { - Assert.assertEquals(numSegments, cached.length); + Assertions.assertEquals(numSegments, cached.length); final LogSegmentList segments = new LogSegmentList(JavaUtils.getClassSimpleName(TestCacheEviction.class)); for (int i = 0; i < numSegments; i++) { LogSegment s = LogSegment.newCloseSegment(null, start, start + size - 1, MAX_OP_SIZE, null); @@ -76,36 +77,36 @@ public void testBasicEviction() throws Exception { new boolean[]{true, true, true, true, true}, 0, 10); // case 1, make sure we do not evict cache for segments behind local flushed index - List evicted = policy.evict(null, 5, 15, segments, maxCached); - Assert.assertEquals(0, evicted.size()); + List evicted = POLICY.evict(null, 5, 15, segments, maxCached); + Assertions.assertEquals(0, evicted.size()); // case 2, suppose the local flushed index is in the 3rd segment, then we // can evict the first two segment - evicted = policy.evict(null, 25, 30, segments, maxCached); - Assert.assertEquals(2, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(0)); - Assert.assertSame(evicted.get(1), segments.get(1)); + evicted = POLICY.evict(null, 25, 30, segments, maxCached); + Assertions.assertEquals(2, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(0)); + Assertions.assertSame(evicted.get(1), segments.get(1)); // case 3, similar with case 2, but the local applied index is less than // the local flushed index. - evicted = policy.evict(null, 25, 15, segments, maxCached); - Assert.assertEquals(1, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(0)); + evicted = POLICY.evict(null, 25, 15, segments, maxCached); + Assertions.assertEquals(1, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(0)); // case 4, the local applied index is very small, then evict cache behind it // first and let the state machine load the segments later - evicted = policy.evict(null, 35, 5, segments, maxCached); - Assert.assertEquals(1, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(2)); + evicted = POLICY.evict(null, 35, 5, segments, maxCached); + Assertions.assertEquals(1, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(2)); Mockito.when(segments.get(2).hasCache()).thenReturn(false); - evicted = policy.evict(null, 35, 5, segments, maxCached); - Assert.assertEquals(1, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(1)); + evicted = POLICY.evict(null, 35, 5, segments, maxCached); + Assertions.assertEquals(1, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(1)); Mockito.when(segments.get(1).hasCache()).thenReturn(false); - evicted = policy.evict(null, 35, 5, segments, maxCached); - Assert.assertEquals(0, evicted.size()); + evicted = POLICY.evict(null, 35, 5, segments, maxCached); + Assertions.assertEquals(0, evicted.size()); } @Test @@ -116,39 +117,39 @@ public void testEvictionWithFollowerIndices() throws Exception { // case 1, no matter where the followers are, we do not evict segments behind local // flushed index - List evicted = policy.evict(new long[]{20, 40, 40}, 5, 15, segments, + List evicted = POLICY.evict(new long[]{20, 40, 40}, 5, 15, segments, maxCached); - Assert.assertEquals(0, evicted.size()); + Assertions.assertEquals(0, evicted.size()); // case 2, the follower indices are behind the local flushed index - evicted = policy.evict(new long[]{30, 40, 45}, 25, 30, segments, maxCached); - Assert.assertEquals(2, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(0)); - Assert.assertSame(evicted.get(1), segments.get(1)); + evicted = POLICY.evict(new long[]{30, 40, 45}, 25, 30, segments, maxCached); + Assertions.assertEquals(2, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(0)); + Assertions.assertSame(evicted.get(1), segments.get(1)); // case 3, similar with case 3 in basic eviction test - evicted = policy.evict(new long[]{30, 40, 45}, 25, 15, segments, maxCached); - Assert.assertEquals(1, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(0)); + evicted = POLICY.evict(new long[]{30, 40, 45}, 25, 15, segments, maxCached); + Assertions.assertEquals(1, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(0)); // case 4, the followers are slower than local flush - evicted = policy.evict(new long[]{15, 45, 45}, 55, 50, segments, maxCached); - Assert.assertEquals(1, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(0)); + evicted = POLICY.evict(new long[]{15, 45, 45}, 55, 50, segments, maxCached); + Assertions.assertEquals(1, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(0)); Mockito.when(segments.get(0).hasCache()).thenReturn(false); - evicted = policy.evict(new long[]{15, 45, 45}, 55, 50, segments, maxCached); - Assert.assertEquals(1, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(2)); + evicted = POLICY.evict(new long[]{15, 45, 45}, 55, 50, segments, maxCached); + Assertions.assertEquals(1, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(2)); Mockito.when(segments.get(2).hasCache()).thenReturn(false); - evicted = policy.evict(new long[]{15, 45, 45}, 55, 50, segments, maxCached); - Assert.assertEquals(1, evicted.size()); - Assert.assertSame(evicted.get(0), segments.get(3)); + evicted = POLICY.evict(new long[]{15, 45, 45}, 55, 50, segments, maxCached); + Assertions.assertEquals(1, evicted.size()); + Assertions.assertSame(evicted.get(0), segments.get(3)); Mockito.when(segments.get(3).hasCache()).thenReturn(false); - evicted = policy.evict(new long[]{15, 45, 45}, 55, 50, segments, maxCached); - Assert.assertEquals(0, evicted.size()); + evicted = POLICY.evict(new long[]{15, 45, 45}, 55, 50, segments, maxCached); + Assertions.assertEquals(0, evicted.size()); } @Test @@ -177,7 +178,7 @@ public void testEvictionInSegmentedLog() throws Exception { raftLog.append(entries).forEach(CompletableFuture::join); // check the current cached segment number: the last segment is still open - Assert.assertEquals(maxCachedNum - 1, + Assertions.assertEquals(maxCachedNum - 1, raftLog.getRaftLogCache().getCachedSegmentNum()); Mockito.when(info.getLastAppliedIndex()).thenReturn(35L); @@ -188,7 +189,7 @@ public void testEvictionInSegmentedLog() throws Exception { // check the cached segment number again. since the slowest follower is on // index 21, the eviction should happen and evict 3 segments - Assert.assertEquals(maxCachedNum + 1 - 3, + Assertions.assertEquals(maxCachedNum + 1 - 3, raftLog.getRaftLogCache().getCachedSegmentNum()); } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestLogSegment.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestLogSegment.java index ece17a0524..259f163070 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestLogSegment.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestLogSegment.java @@ -21,15 +21,14 @@ import org.apache.ratis.RaftTestUtil.SimpleOperation; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.metrics.impl.DefaultTimekeeperImpl; -import org.apache.ratis.proto.RaftProtos.LogEntryProto; -import org.apache.ratis.proto.RaftProtos.StateMachineLogEntryProto; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.impl.RaftServerTestUtil; import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics; import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.server.raftlog.LogProtoUtils; -import org.apache.ratis.server.raftlog.segmented.LogSegment.Op; import org.apache.ratis.server.storage.RaftStorage; +import org.apache.ratis.proto.RaftProtos.LogEntryProto; +import org.apache.ratis.proto.RaftProtos.StateMachineLogEntryProto; import org.apache.ratis.server.storage.RaftStorageTestUtils; import org.apache.ratis.thirdparty.com.google.protobuf.CodedOutputStream; import org.apache.ratis.util.FileUtils; @@ -37,10 +36,10 @@ import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TraditionalBinaryPrefix; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import java.io.File; import java.io.IOException; @@ -59,13 +58,16 @@ /** * Test basic functionality of {@link LogSegment} */ +@SuppressWarnings({"try"}) public class TestLogSegment extends BaseTest { + public static final LogSegmentStartEnd ZERO_START_NULL_END = LogSegmentStartEnd.valueOf(0); + private File storageDir; private long segmentMaxSize; private long preallocatedSize; private int bufferSize; - @Before + @BeforeEach public void setup() { RaftProperties properties = new RaftProperties(); storageDir = getTestDir(); @@ -78,7 +80,7 @@ public void setup() { RaftServerConfigKeys.Log.writeBufferSize(properties).getSizeInt(); } - @After + @AfterEach public void tearDown() throws Exception { if (storageDir != null) { FileUtils.deleteFully(storageDir.getParentFile()); @@ -88,7 +90,8 @@ public void tearDown() throws Exception { File prepareLog(boolean isOpen, long startIndex, int numEntries, long term, boolean isLastEntryPartiallyWritten) throws IOException { if (!isOpen) { - Preconditions.assertTrue(!isLastEntryPartiallyWritten, "For closed log, the last entry cannot be partially written."); + Preconditions.assertTrue(!isLastEntryPartiallyWritten, + "For closed log, the last entry cannot be partially written."); } RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); final File file = LogSegmentStartEnd.valueOf(startIndex, startIndex + numEntries - 1, isOpen).getFile(storage); @@ -110,11 +113,11 @@ File prepareLog(boolean isOpen, long startIndex, int numEntries, long term, bool // 0 < truncatedEntrySize < entrySize final long fileLength = file.length(); final long truncatedFileLength = fileLength - (entrySize - truncatedEntrySize); - Assert.assertTrue(truncatedFileLength < fileLength); + Assertions.assertTrue(truncatedFileLength < fileLength); LOG.info("truncate last entry: entry(size={}, truncated={}), file(length={}, truncated={})", entrySize, truncatedEntrySize, fileLength, truncatedFileLength); FileUtils.truncateFile(file, truncatedFileLength); - Assert.assertEquals(truncatedFileLength, file.length()); + Assertions.assertEquals(truncatedFileLength, file.length()); } storage.close(); @@ -128,24 +131,24 @@ static int size(LogEntryProto entry) { static void checkLogSegment(LogSegment segment, long start, long end, boolean isOpen, long totalSize, long term) throws Exception { - Assert.assertEquals(start, segment.getStartIndex()); - Assert.assertEquals(end, segment.getEndIndex()); - Assert.assertEquals(isOpen, segment.isOpen()); - Assert.assertEquals(totalSize, segment.getTotalFileSize()); + Assertions.assertEquals(start, segment.getStartIndex()); + Assertions.assertEquals(end, segment.getEndIndex()); + Assertions.assertEquals(isOpen, segment.isOpen()); + Assertions.assertEquals(totalSize, segment.getTotalFileSize()); long offset = SegmentedRaftLogFormat.getHeaderLength(); for (long i = start; i <= end; i++) { LogSegment.LogRecord record = segment.getLogRecord(i); final TermIndex ti = record.getTermIndex(); - Assert.assertEquals(i, ti.getIndex()); - Assert.assertEquals(term, ti.getTerm()); - Assert.assertEquals(offset, record.getOffset()); + Assertions.assertEquals(i, ti.getIndex()); + Assertions.assertEquals(term, ti.getTerm()); + Assertions.assertEquals(offset, record.getOffset()); - LogEntryProto entry = segment.getEntryFromCache(ti); - if (entry == null) { - entry = segment.loadCache(record); + ReferenceCountedObject entryRef = segment.getEntryFromCache(ti); + if (entryRef == null) { + entryRef = segment.loadCache(ti); } - offset += getEntrySize(entry, Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE); + offset += getEntrySize(entryRef.get(), LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE); } } @@ -174,12 +177,12 @@ private void testLoadSegment(boolean loadInitial, boolean isLastEntryPartiallyWr final File openSegmentFile = prepareLog(true, 0, 100, 0, isLastEntryPartiallyWritten); RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); final LogSegment openSegment = LogSegment.loadSegment(storage, openSegmentFile, - LogSegmentStartEnd.valueOf(0), MAX_OP_SIZE, loadInitial, null, null); + ZERO_START_NULL_END, MAX_OP_SIZE, loadInitial, null, null); final int delta = isLastEntryPartiallyWritten? 1: 0; checkLogSegment(openSegment, 0, 99 - delta, true, openSegmentFile.length(), 0); storage.close(); // for open segment we currently always keep log entries in the memory - Assert.assertEquals(0, openSegment.getLoadingTimes()); + Assertions.assertEquals(0, openSegment.getLoadingTimes()); // load a closed segment (1000-1099) final File closedSegmentFile = prepareLog(false, 1000, 100, 1, false); @@ -187,7 +190,7 @@ private void testLoadSegment(boolean loadInitial, boolean isLastEntryPartiallyWr LogSegmentStartEnd.valueOf(1000, 1099L), MAX_OP_SIZE, loadInitial, null, null); checkLogSegment(closedSegment, 1000, 1099, false, closedSegment.getTotalFileSize(), 1); - Assert.assertEquals(loadInitial ? 0 : 1, closedSegment.getLoadingTimes()); + Assertions.assertEquals(loadInitial ? 0 : 1, closedSegment.getLoadingTimes()); } @Test @@ -204,11 +207,11 @@ public void testAppendEntries() throws Exception { while (size < max) { SimpleOperation op = new SimpleOperation("m" + i); LogEntryProto entry = LogProtoUtils.toLogEntryProto(op.getLogEntryContent(), term, i++ + start); - size += getEntrySize(entry, Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE); - segment.appendToOpenSegment(Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); + size += getEntrySize(entry, LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE); + segment.appendToOpenSegment(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); } - Assert.assertTrue(segment.getTotalFileSize() >= max); + Assertions.assertTrue(segment.getTotalFileSize() >= max); checkLogSegment(segment, start, i - 1 + start, true, size, term); } @@ -219,14 +222,14 @@ public void testAppendEntryMetric() throws Exception { final File openSegmentFile = prepareLog(true, 0, 100, 0, true); RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); final LogSegment openSegment = LogSegment.loadSegment(storage, openSegmentFile, - LogSegmentStartEnd.valueOf(0), MAX_OP_SIZE, true, null, raftLogMetrics); + ZERO_START_NULL_END, MAX_OP_SIZE, true, null, raftLogMetrics); checkLogSegment(openSegment, 0, 98, true, openSegmentFile.length(), 0); storage.close(); final DefaultTimekeeperImpl readEntryTimer = (DefaultTimekeeperImpl) raftLogMetrics.getReadEntryTimer(); - Assert.assertNotNull(readEntryTimer); - Assert.assertEquals(100, readEntryTimer.getTimer().getCount()); - Assert.assertTrue(readEntryTimer.getTimer().getMeanRate() > 0); + Assertions.assertNotNull(readEntryTimer); + Assertions.assertEquals(100, readEntryTimer.getTimer().getCount()); + Assertions.assertTrue(readEntryTimer.getTimer().getMeanRate() > 0); } @@ -237,19 +240,19 @@ public void testAppendWithGap() throws Exception { final StateMachineLogEntryProto m = op.getLogEntryContent(); try { LogEntryProto entry = LogProtoUtils.toLogEntryProto(m, 0, 1001); - segment.appendToOpenSegment(Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); - Assert.fail("should fail since the entry's index needs to be 1000"); + segment.appendToOpenSegment(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); + Assertions.fail("should fail since the entry's index needs to be 1000"); } catch (IllegalStateException e) { // the exception is expected. } LogEntryProto entry = LogProtoUtils.toLogEntryProto(m, 0, 1000); - segment.appendToOpenSegment(Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); + segment.appendToOpenSegment(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); try { entry = LogProtoUtils.toLogEntryProto(m, 0, 1002); - segment.appendToOpenSegment(Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); - Assert.fail("should fail since the entry's index needs to be 1001"); + segment.appendToOpenSegment(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); + Assertions.fail("should fail since the entry's index needs to be 1001"); } catch (IllegalStateException e) { // the exception is expected. } @@ -263,24 +266,24 @@ public void testTruncate() throws Exception { for (int i = 0; i < 100; i++) { LogEntryProto entry = LogProtoUtils.toLogEntryProto( new SimpleOperation("m" + i).getLogEntryContent(), term, i + start); - segment.appendToOpenSegment(Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); + segment.appendToOpenSegment(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); } // truncate an open segment (remove 1080~1099) long newSize = segment.getLogRecord(start + 80).getOffset(); segment.truncate(start + 80); - Assert.assertEquals(80, segment.numOfEntries()); + Assertions.assertEquals(80, segment.numOfEntries()); checkLogSegment(segment, start, start + 79, false, newSize, term); // truncate a closed segment (remove 1050~1079) newSize = segment.getLogRecord(start + 50).getOffset(); segment.truncate(start + 50); - Assert.assertEquals(50, segment.numOfEntries()); + Assertions.assertEquals(50, segment.numOfEntries()); checkLogSegment(segment, start, start + 49, false, newSize, term); // truncate all the remaining entries segment.truncate(start); - Assert.assertEquals(0, segment.numOfEntries()); + Assertions.assertEquals(0, segment.numOfEntries()); checkLogSegment(segment, start, start - 1, false, SegmentedRaftLogFormat.getHeaderLength(), term); } @@ -288,7 +291,7 @@ public void testTruncate() throws Exception { @Test public void testPreallocateSegment() throws Exception { RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); - final File file = LogSegmentStartEnd.valueOf(0).getFile(storage); + final File file = ZERO_START_NULL_END.getFile(storage); final int[] maxSizes = new int[]{1024, 1025, 1024 * 1024 - 1, 1024 * 1024, 1024 * 1024 + 1, 2 * 1024 * 1024 - 1, 2 * 1024 * 1024, 2 * 1024 * 1024 + 1, 8 * 1024 * 1024}; @@ -300,12 +303,12 @@ public void testPreallocateSegment() throws Exception { for (int a : preallocated) { try(SegmentedRaftLogOutputStream ignored = new SegmentedRaftLogOutputStream(file, false, max, a, ByteBuffer.allocateDirect(bufferSize))) { - Assert.assertEquals("max=" + max + ", a=" + a, file.length(), Math.min(max, a)); + Assertions.assertEquals(file.length(), Math.min(max, a), "max=" + max + ", a=" + a); } try(SegmentedRaftLogInputStream in = SegmentedRaftLogTestUtils.newSegmentedRaftLogInputStream( file, 0, INVALID_LOG_INDEX, true)) { LogEntryProto entry = in.nextEntry(); - Assert.assertNull(entry); + Assertions.assertNull(entry); } } } @@ -318,17 +321,17 @@ public void testPreallocateSegment() throws Exception { 1024, 1024, ByteBuffer.allocateDirect(bufferSize))) { SimpleOperation op = new SimpleOperation(new String(content)); LogEntryProto entry = LogProtoUtils.toLogEntryProto(op.getLogEntryContent(), 0, 0); - size = LogSegment.getEntrySize(entry, Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE); + size = LogSegment.getEntrySize(entry, LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE); out.write(entry); } - Assert.assertEquals(file.length(), + Assertions.assertEquals(file.length(), size + SegmentedRaftLogFormat.getHeaderLength()); try (SegmentedRaftLogInputStream in = SegmentedRaftLogTestUtils.newSegmentedRaftLogInputStream( file, 0, INVALID_LOG_INDEX, true)) { LogEntryProto entry = in.nextEntry(); - Assert.assertArrayEquals(content, + Assertions.assertArrayEquals(content, entry.getStateMachineLogEntry().getLogData().toByteArray()); - Assert.assertNull(in.nextEntry()); + Assertions.assertNull(in.nextEntry()); } } @@ -339,49 +342,49 @@ public void testPreallocateSegment() throws Exception { public void testPreallocationAndAppend() throws Exception { final SizeInBytes max = SizeInBytes.valueOf(2, TraditionalBinaryPrefix.MEGA); RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); - final File file = LogSegmentStartEnd.valueOf(0).getFile(storage); + final File file = ZERO_START_NULL_END.getFile(storage); final byte[] content = new byte[1024]; Arrays.fill(content, (byte) 1); SimpleOperation op = new SimpleOperation(new String(content)); LogEntryProto entry = LogProtoUtils.toLogEntryProto(op.getLogEntryContent(), 0, 0); - final long entrySize = LogSegment.getEntrySize(entry, Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE); + final long entrySize = LogSegment.getEntrySize(entry, LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE); long totalSize = SegmentedRaftLogFormat.getHeaderLength(); long preallocated = 16 * 1024; try (SegmentedRaftLogOutputStream out = new SegmentedRaftLogOutputStream(file, false, max.getSize(), 16 * 1024, ByteBuffer.allocateDirect(10 * 1024))) { - Assert.assertEquals(preallocated, file.length()); + Assertions.assertEquals(preallocated, file.length()); while (totalSize + entrySize < max.getSize()) { totalSize += entrySize; out.write(entry); if (totalSize > preallocated) { - Assert.assertEquals("totalSize==" + totalSize, - preallocated + 16 * 1024, file.length()); + Assertions.assertEquals(preallocated + 16 * 1024, file.length(), + "totalSize==" + totalSize); preallocated += 16 * 1024; } } } - Assert.assertEquals(totalSize, file.length()); + Assertions.assertEquals(totalSize, file.length()); } @Test public void testZeroSizeInProgressFile() throws Exception { final RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); - final File file = LogSegmentStartEnd.valueOf(0).getFile(storage); + final File file = ZERO_START_NULL_END.getFile(storage); storage.close(); // create zero size in-progress file LOG.info("file: " + file); - Assert.assertTrue(file.createNewFile()); + Assertions.assertTrue(file.createNewFile()); final Path path = file.toPath(); - Assert.assertTrue(Files.exists(path)); - Assert.assertEquals(0, Files.size(path)); + Assertions.assertTrue(Files.exists(path)); + Assertions.assertEquals(0, Files.size(path)); // getLogSegmentPaths should remove it. final List logs = LogSegmentPath.getLogSegmentPaths(storage); - Assert.assertEquals(0, logs.size()); - Assert.assertFalse(Files.exists(path)); + Assertions.assertEquals(0, logs.size()); + Assertions.assertFalse(Files.exists(path)); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestRaftLogReadWrite.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestRaftLogReadWrite.java index a020b43bd8..5600914070 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestRaftLogReadWrite.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestRaftLogReadWrite.java @@ -29,10 +29,10 @@ import org.apache.ratis.thirdparty.com.google.protobuf.CodedOutputStream; import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.util.FileUtils; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import java.io.File; import java.io.IOException; @@ -44,6 +44,8 @@ import java.util.Collections; import java.util.List; +import static org.apache.ratis.server.raftlog.segmented.TestLogSegment.ZERO_START_NULL_END; + /** * Test basic functionality of LogReader, SegmentedRaftLogInputStream, and SegmentedRaftLogOutputStream. */ @@ -53,7 +55,7 @@ public class TestRaftLogReadWrite extends BaseTest { private long preallocatedSize; private int bufferSize; - @Before + @BeforeEach public void setup() { storageDir = getTestDir(); RaftProperties properties = new RaftProperties(); @@ -66,7 +68,7 @@ public void setup() { RaftServerConfigKeys.Log.writeBufferSize(properties).getSizeInt(); } - @After + @AfterEach public void tearDown() throws Exception { if (storageDir != null) { FileUtils.deleteFully(storageDir.getParentFile()); @@ -105,7 +107,7 @@ private long writeMessages(LogEntryProto[] entries, SegmentedRaftLogOutputStream @Test public void testReadWriteLog() throws IOException { final RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); - final File openSegment = LogSegmentStartEnd.valueOf(0).getFile(storage); + final File openSegment = ZERO_START_NULL_END.getFile(storage); long size = SegmentedRaftLogFormat.getHeaderLength(); final LogEntryProto[] entries = new LogEntryProto[100]; @@ -116,16 +118,16 @@ public void testReadWriteLog() throws IOException { storage.close(); } - Assert.assertEquals(size, openSegment.length()); + Assertions.assertEquals(size, openSegment.length()); final LogEntryProto[] readEntries = readLog(openSegment, 0, RaftLog.INVALID_LOG_INDEX, true); - Assert.assertArrayEquals(entries, readEntries); + Assertions.assertArrayEquals(entries, readEntries); } @Test public void testAppendLog() throws IOException { final RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); - final File openSegment = LogSegmentStartEnd.valueOf(0).getFile(storage); + final File openSegment = ZERO_START_NULL_END.getFile(storage); LogEntryProto[] entries = new LogEntryProto[200]; try (SegmentedRaftLogOutputStream out = new SegmentedRaftLogOutputStream(openSegment, false, segmentMaxSize, preallocatedSize, ByteBuffer.allocateDirect(bufferSize))) { @@ -146,7 +148,7 @@ public void testAppendLog() throws IOException { } final LogEntryProto[] readEntries = readLog(openSegment, 0, RaftLog.INVALID_LOG_INDEX, true); - Assert.assertArrayEquals(entries, readEntries); + Assertions.assertArrayEquals(entries, readEntries); storage.close(); } @@ -158,7 +160,7 @@ public void testAppendLog() throws IOException { @Test public void testReadWithPadding() throws IOException { final RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); - final File openSegment = LogSegmentStartEnd.valueOf(0).getFile(storage); + final File openSegment = ZERO_START_NULL_END.getFile(storage); long size = SegmentedRaftLogFormat.getHeaderLength(); LogEntryProto[] entries = new LogEntryProto[100]; @@ -168,16 +170,16 @@ public void testReadWithPadding() throws IOException { out.flush(); // make sure the file contains padding - Assert.assertEquals( + Assertions.assertEquals( RaftServerConfigKeys.Log.PREALLOCATED_SIZE_DEFAULT.getSize(), openSegment.length()); // check if the reader can correctly read the log file final LogEntryProto[] readEntries = readLog(openSegment, 0, RaftLog.INVALID_LOG_INDEX, true); - Assert.assertArrayEquals(entries, readEntries); + Assertions.assertArrayEquals(entries, readEntries); out.close(); - Assert.assertEquals(size, openSegment.length()); + Assertions.assertEquals(size, openSegment.length()); } /** @@ -187,7 +189,7 @@ public void testReadWithPadding() throws IOException { @Test public void testReadWithCorruptPadding() throws IOException { final RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); - final File openSegment = LogSegmentStartEnd.valueOf(0).getFile(storage); + final File openSegment = ZERO_START_NULL_END.getFile(storage); LogEntryProto[] entries = new LogEntryProto[10]; final SegmentedRaftLogOutputStream out = new SegmentedRaftLogOutputStream(openSegment, false, @@ -200,12 +202,12 @@ public void testReadWithCorruptPadding() throws IOException { out.flush(); // make sure the file contains padding - Assert.assertEquals(4 * 1024 * 1024, openSegment.length()); + Assertions.assertEquals(4 * 1024 * 1024, openSegment.length()); try (FileChannel fout = FileUtils.newFileChannel(openSegment, StandardOpenOption.WRITE)) { final byte[] array = {-1, 1}; final int written = fout.write(ByteBuffer.wrap(array), 16 * 1024 * 1024 - 10); - Assert.assertEquals(array.length, written); + Assertions.assertEquals(array.length, written); } List list = new ArrayList<>(); @@ -215,7 +217,7 @@ public void testReadWithCorruptPadding() throws IOException { while ((entry = in.nextEntry()) != null) { list.add(entry); } - Assert.fail("should fail since we corrupt the padding"); + Assertions.fail("should fail since we corrupt the padding"); } catch (IOException e) { boolean findVerifyTerminator = false; for (StackTraceElement s : e.getStackTrace()) { @@ -224,9 +226,9 @@ public void testReadWithCorruptPadding() throws IOException { break; } } - Assert.assertTrue(findVerifyTerminator); + Assertions.assertTrue(findVerifyTerminator); } - Assert.assertArrayEquals(entries, + Assertions.assertArrayEquals(entries, list.toArray(new LogEntryProto[list.size()])); } @@ -236,7 +238,7 @@ public void testReadWithCorruptPadding() throws IOException { @Test public void testReadWithEntryCorruption() throws IOException { RaftStorage storage = RaftStorageTestUtils.newRaftStorage(storageDir); - final File openSegment = LogSegmentStartEnd.valueOf(0).getFile(storage); + final File openSegment = ZERO_START_NULL_END.getFile(storage); try (SegmentedRaftLogOutputStream out = new SegmentedRaftLogOutputStream(openSegment, false, segmentMaxSize, preallocatedSize, ByteBuffer.allocateDirect(bufferSize))) { for (int i = 0; i < 100; i++) { @@ -259,7 +261,7 @@ public void testReadWithEntryCorruption() throws IOException { try { readLog(openSegment, 0, RaftLog.INVALID_LOG_INDEX, true); - Assert.fail("The read of corrupted log file should fail"); + Assertions.fail("The read of corrupted log file should fail"); } catch (ChecksumException e) { LOG.info("Caught ChecksumException as expected", e); } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestSegmentedRaftLog.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestSegmentedRaftLog.java index 38fa45e6fa..1f04982823 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestSegmentedRaftLog.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestSegmentedRaftLog.java @@ -17,8 +17,6 @@ */ package org.apache.ratis.server.raftlog.segmented; -import static org.junit.Assert.assertTrue; - import org.apache.ratis.BaseTest; import org.apache.ratis.RaftTestUtil.SimpleOperation; import org.apache.ratis.conf.RaftProperties; @@ -42,37 +40,54 @@ import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.impl.BaseStateMachine; +import org.apache.ratis.util.CodeInjectionForTesting; +import org.apache.ratis.util.DataBlockingQueue; import org.apache.ratis.util.LifeCycle; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.LongSupplier; import java.util.function.Supplier; - -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import java.util.stream.Stream; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.slf4j.event.Level; -@RunWith(Parameterized.class) +import static java.lang.Boolean.FALSE; +import static java.lang.Boolean.TRUE; +import static org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogWorker.RUN_WORKER; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +@SuppressWarnings({"deprecation"}) public class TestSegmentedRaftLog extends BaseTest { static { Slf4jUtils.setLogLevel(SegmentedRaftLogWorker.LOG, Level.INFO); @@ -80,27 +95,21 @@ public class TestSegmentedRaftLog extends BaseTest { Slf4jUtils.setLogLevel(SegmentedRaftLog.LOG, Level.INFO); } - private final Boolean smSyncFlush; - private final Boolean useAsyncFlush; - - public TestSegmentedRaftLog(Boolean raftLogAsync, Boolean smSync) { - this.useAsyncFlush = raftLogAsync; - this.smSyncFlush = smSync; - } - - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList((new Boolean[][] {{Boolean.FALSE, Boolean.FALSE}, {Boolean.FALSE, Boolean.TRUE}, - {Boolean.TRUE, Boolean.FALSE}, {Boolean.TRUE, Boolean.TRUE}})); + public static Stream data() { + return Stream.of( + arguments(FALSE, FALSE), + arguments(FALSE, TRUE), + arguments(TRUE, FALSE), + arguments(TRUE, TRUE)); } public static long getOpenSegmentSize(RaftLog raftLog) { return ((SegmentedRaftLog)raftLog).getRaftLogCache().getOpenSegment().getTotalFileSize(); } - private static final RaftPeerId peerId = RaftPeerId.valueOf("s0"); - private static final RaftGroupId groupId = RaftGroupId.randomId(); - private static final RaftGroupMemberId memberId = RaftGroupMemberId.valueOf(peerId, groupId); + private static final RaftPeerId PEER_ID = RaftPeerId.valueOf("s0"); + private static final RaftGroupId GROUP_ID = RaftGroupId.randomId(); + private static final RaftGroupMemberId MEMBER_ID = RaftGroupMemberId.valueOf(PEER_ID, GROUP_ID); static class SegmentRange { final long start; @@ -137,7 +146,7 @@ SegmentedRaftLog newSegmentedRaftLog(LongSupplier getSnapshotIndexFromStateMachi static SegmentedRaftLog newSegmentedRaftLog(RaftStorage storage, RaftProperties properties) { return SegmentedRaftLog.newBuilder() - .setMemberId(memberId) + .setMemberId(MEMBER_ID) .setStorage(storage) .setProperties(properties) .build(); @@ -146,20 +155,18 @@ static SegmentedRaftLog newSegmentedRaftLog(RaftStorage storage, RaftProperties private SegmentedRaftLog newSegmentedRaftLogWithSnapshotIndex(RaftStorage storage, RaftProperties properties, LongSupplier getSnapshotIndexFromStateMachine) { return SegmentedRaftLog.newBuilder() - .setMemberId(memberId) + .setMemberId(MEMBER_ID) .setStorage(storage) .setSnapshotIndexSupplier(getSnapshotIndexFromStateMachine) .setProperties(properties) .build(); } - @Before + @BeforeEach public void setup() throws Exception { storageDir = getTestDir(); properties = new RaftProperties(); RaftServerConfigKeys.setStorageDir(properties, Collections.singletonList(storageDir)); - RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); - RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); storage = RaftStorageTestUtils.newRaftStorage(storageDir); this.segmentMaxSize = RaftServerConfigKeys.Log.segmentSizeMax(properties).getSize(); @@ -169,7 +176,7 @@ public void setup() throws Exception { RaftServerConfigKeys.Log.writeBufferSize(properties).getSizeInt(); } - @After + @AfterEach public void tearDown() throws Exception { if (storageDir != null) { FileUtils.deleteFully(storageDir.getParentFile()); @@ -211,8 +218,11 @@ private LogEntryProto getLastEntry(SegmentedRaftLog raftLog) return raftLog.get(raftLog.getLastEntryTermIndex().getIndex()); } - @Test - public void testLoadLogSegments() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testLoadLogSegments(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); // first generate log files List ranges = prepareRanges(0, 5, 100, 0); LogEntryProto[] entries = prepareLog(ranges); @@ -223,7 +233,7 @@ public void testLoadLogSegments() throws Exception { // check if log entries are loaded correctly for (LogEntryProto e : entries) { LogEntryProto entry = raftLog.get(e.getIndex()); - Assert.assertEquals(e, entry); + assertEquals(e, entry); } final LogEntryHeader[] termIndices = raftLog.getEntries(0, 500); @@ -236,10 +246,10 @@ public void testLoadLogSegments() throws Exception { } }) .toArray(LogEntryProto[]::new); - Assert.assertArrayEquals(entries, entriesFromLog); - Assert.assertEquals(entries[entries.length - 1], getLastEntry(raftLog)); + Assertions.assertArrayEquals(entries, entriesFromLog); + assertEquals(entries[entries.length - 1], getLastEntry(raftLog)); - final RatisMetricRegistry metricRegistryForLogWorker = RaftLogMetricsBase.createRegistry(memberId); + final RatisMetricRegistry metricRegistryForLogWorker = RaftLogMetricsBase.createRegistry(MEMBER_ID); final DefaultTimekeeperImpl load = (DefaultTimekeeperImpl) metricRegistryForLogWorker.timer("segmentLoadLatency"); assertTrue(load.getTimer().getMeanRate() > 0); @@ -266,7 +276,8 @@ static List prepareLogEntries(SegmentRange range, return eList; } - static LogEntryProto prepareLogEntry(long term, long index, Supplier stringSupplier, boolean hasStataMachineData) { + static LogEntryProto prepareLogEntry(long term, long index, Supplier stringSupplier, + boolean hasStataMachineData) { final SimpleOperation m = stringSupplier == null? new SimpleOperation("m" + index, hasStataMachineData): new SimpleOperation(stringSupplier.get(), hasStataMachineData); @@ -276,8 +287,11 @@ static LogEntryProto prepareLogEntry(long term, long index, Supplier str /** * Append entry one by one and check if log state is correct. */ - @Test - public void testAppendEntry() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testAppendEntry(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); List ranges = prepareRanges(0, 5, 200, 0); List entries = prepareLogEntries(ranges, null); @@ -331,8 +345,11 @@ public void testAppendEntry() throws Exception { } } - @Test - public void testAppendEntryAfterPurge() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testAppendEntryAfterPurge(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); List ranges = prepareRanges(0, 5, 200, 0); List entries = prepareLogEntries(ranges, null); @@ -362,8 +379,11 @@ public long getAsLong() { /** * Keep appending entries, make sure the rolling is correct. */ - @Test - public void testAppendAndRoll() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testAppendAndRoll(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); RaftServerConfigKeys.Log.setPreallocatedSize(properties, SizeInBytes.valueOf("16KB")); RaftServerConfigKeys.Log.setSegmentSizeMax(properties, SizeInBytes.valueOf("128KB")); @@ -382,12 +402,91 @@ public void testAppendAndRoll() throws Exception { raftLog.open(RaftLog.INVALID_LOG_INDEX, null); // check if the raft log is correct checkEntries(raftLog, entries, 0, entries.size()); - Assert.assertEquals(9, raftLog.getRaftLogCache().getNumOfSegments()); + assertEquals(9, raftLog.getRaftLogCache().getNumOfSegments()); } } - @Test - public void testTruncate() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testPurgeAfterAppendEntry(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); + RaftServerConfigKeys.Log.setPurgeGap(properties, 1); + RaftServerConfigKeys.Log.setForceSyncNum(properties, 128); + + int startTerm = 0; + int endTerm = 2; + int segmentSize = 10; + long endIndexOfClosedSegment = segmentSize * (endTerm - startTerm - 1); + long nextStartIndex = segmentSize * (endTerm - startTerm); + + // append entries and roll logSegment for later purge operation + List ranges0 = prepareRanges(startTerm, endTerm, segmentSize, 0); + List entries0 = prepareLogEntries(ranges0, null); + try (SegmentedRaftLog raftLog = newSegmentedRaftLog()) { + raftLog.open(RaftLog.INVALID_LOG_INDEX, null); + entries0.stream().map(raftLog::appendEntry).forEach(CompletableFuture::join); + } + + // test the pattern in the task queue of SegmentedRaftLogWorker: (WriteLog, ..., PurgeLog) + List ranges = prepareRanges(endTerm - 1, endTerm, 1, nextStartIndex); + List entries = prepareLogEntries(ranges, null); + + try (SegmentedRaftLog raftLog = newSegmentedRaftLog()) { + final CountDownLatch raftLogOpened = new CountDownLatch(1); + final CountDownLatch tasksAdded = new CountDownLatch(1); + + // inject test code to make the pattern (WriteLog, PurgeLog) + final ConcurrentLinkedQueue> appendFutures = new ConcurrentLinkedQueue<>(); + final AtomicReference> purgeFuture = new AtomicReference<>(); + final AtomicInteger tasksCount = new AtomicInteger(0); + CodeInjectionForTesting.put(RUN_WORKER, (localId, remoteId, args) -> { + // wait for raftLog to be opened + try { + if(!raftLogOpened.await(FIVE_SECONDS.getDuration(), FIVE_SECONDS.getUnit())) { + throw new TimeoutException(); + } + } catch (InterruptedException | TimeoutException e) { + LOG.error("an exception occurred", e); + throw new RuntimeException(e); + } + + // add WriteLog and PurgeLog tasks + entries.stream().map(raftLog::appendEntry).forEach(appendFutures::add); + purgeFuture.set(raftLog.purge(endIndexOfClosedSegment)); + + tasksCount.set(((DataBlockingQueue) args[0]).getNumElements()); + tasksAdded.countDown(); + return true; + }); + + // open raftLog + raftLog.open(RaftLog.INVALID_LOG_INDEX, null); + raftLogOpened.countDown(); + + // wait for all tasks to be added + if(!tasksAdded.await(FIVE_SECONDS.getDuration(), FIVE_SECONDS.getUnit())) { + throw new TimeoutException(); + } + assertEquals(entries.size() + 1, tasksCount.get()); + + // check if the purge task is executed + final Long purged = purgeFuture.get().get(); + LOG.info("purgeIndex = {}, purged = {}", endIndexOfClosedSegment, purged); + assertEquals(endIndexOfClosedSegment, raftLog.getRaftLogCache().getStartIndex()); + + // check if the appendEntry futures are done + JavaUtils.allOf(appendFutures).get(FIVE_SECONDS.getDuration(), FIVE_SECONDS.getUnit()); + } finally { + CodeInjectionForTesting.put(RUN_WORKER, (localId, remoteId, args) -> false); + } + } + + @ParameterizedTest + @MethodSource("data") + public void testTruncate(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); // prepare the log for truncation List ranges = prepareRanges(0, 5, 200, 0); List entries = prepareLogEntries(ranges, null); @@ -418,10 +517,10 @@ private void testTruncate(List entries, long fromIndex) raftLog.open(RaftLog.INVALID_LOG_INDEX, null); // check if the raft log is correct if (fromIndex > 0) { - Assert.assertEquals(entries.get((int) (fromIndex - 1)), + assertEquals(entries.get((int) (fromIndex - 1)), getLastEntry(raftLog)); } else { - Assert.assertNull(raftLog.getLastEntryTermIndex()); + Assertions.assertNull(raftLog.getLastEntryTermIndex()); } checkEntries(raftLog, entries, 0, (int) fromIndex); } @@ -432,7 +531,7 @@ private void checkEntries(RaftLog raftLog, List expected, if (size > 0) { for (int i = offset; i < size + offset; i++) { LogEntryProto entry = raftLog.get(expected.get(i).getIndex()); - Assert.assertEquals(expected.get(i), entry); + assertEquals(expected.get(i), entry); } final LogEntryHeader[] termIndices = raftLog.getEntries( expected.get(offset).getIndex(), @@ -448,7 +547,7 @@ private void checkEntries(RaftLog raftLog, List expected, .toArray(LogEntryProto[]::new); LogEntryProto[] expectedArray = expected.subList(offset, offset + size) .stream().toArray(LogEntryProto[]::new); - Assert.assertArrayEquals(expectedArray, entriesFromLog); + Assertions.assertArrayEquals(expectedArray, entriesFromLog); } } @@ -469,6 +568,7 @@ public void testPurgeOnOpenSegment() throws Exception { int segmentSize = 200; long beginIndexOfOpenSegment = segmentSize * (endTerm - startTerm - 1); long expectedIndex = segmentSize * (endTerm - startTerm - 1); + long purgePreservation = 0L; purgeAndVerify(startTerm, endTerm, segmentSize, 1, beginIndexOfOpenSegment, expectedIndex); } @@ -489,7 +589,7 @@ public void testPurgeLogMetric() throws Exception { int segmentSize = 200; long endIndexOfClosedSegment = segmentSize * (endTerm - startTerm - 1) - 1; long expectedIndex = segmentSize * (endTerm - startTerm - 1); - final RatisMetricRegistry metricRegistryForLogWorker = RaftLogMetricsBase.createRegistry(memberId); + final RatisMetricRegistry metricRegistryForLogWorker = RaftLogMetricsBase.createRegistry(MEMBER_ID); purgeAndVerify(startTerm, endTerm, segmentSize, 1, endIndexOfClosedSegment, expectedIndex); final DefaultTimekeeperImpl purge = (DefaultTimekeeperImpl) metricRegistryForLogWorker.timer("purgeLog"); assertTrue(purge.getTimer().getCount() > 0); @@ -505,33 +605,59 @@ public void testPurgeOnClosedSegmentsWithPurgeGap() throws Exception { purgeAndVerify(startTerm, endTerm, segmentSize, 1000, endIndexOfClosedSegment, expectedIndex); } - private void purgeAndVerify(int startTerm, int endTerm, int segmentSize, int purgeGap, long purgeIndex, long expectedIndex) throws Exception { - List ranges = prepareRanges(startTerm, endTerm, segmentSize, 0); + @Test + public void testPurgeWithLargePurgePreservationAndSmallPurgeGap() throws Exception { + int startTerm = 0; + int endTerm = 5; + int segmentSize = 200; + long endIndex = segmentSize * (endTerm - startTerm) - 1; + // start index is set so that the suggested index will not be negative, which will not trigger any purge + long startIndex = 200; + // purge preservation is larger than the total size of the log entries + // which causes suggested index to be lower than the start index + long purgePreservation = (segmentSize * (endTerm - startTerm )) + 100; + // if the suggested index is lower than the start index due to the purge preservation, we should not purge anything + purgeAndVerify(startTerm, endTerm, segmentSize, 1, endIndex, startIndex, startIndex, purgePreservation); + } + + private void purgeAndVerify(int startTerm, int endTerm, int segmentSize, int purgeGap, long purgeIndex, + long expectedIndex) throws Exception { + purgeAndVerify(startTerm, endTerm, segmentSize, purgeGap, purgeIndex, expectedIndex, 0, 0); + } + + private void purgeAndVerify(int startTerm, int endTerm, int segmentSize, int purgeGap, long purgeIndex, + long expectedIndex, long startIndex, long purgePreservation) throws Exception { + List ranges = prepareRanges(startTerm, endTerm, segmentSize, startIndex); List entries = prepareLogEntries(ranges, null); final RaftProperties p = new RaftProperties(); RaftServerConfigKeys.Log.setPurgeGap(p, purgeGap); - try (SegmentedRaftLog raftLog = newSegmentedRaftLog(storage, p)) { - raftLog.open(RaftLog.INVALID_LOG_INDEX, null); + RaftServerConfigKeys.Log.setPurgePreservationLogNum(p, purgePreservation); + try (SegmentedRaftLog raftLog = newSegmentedRaftLogWithSnapshotIndex(storage, p, () -> startIndex - 1)) { + raftLog.open(startIndex - 1, null); entries.stream().map(raftLog::appendEntry).forEach(CompletableFuture::join); final CompletableFuture f = raftLog.purge(purgeIndex); final Long purged = f.get(); LOG.info("purgeIndex = {}, purged = {}", purgeIndex, purged); - Assert.assertEquals(expectedIndex, raftLog.getRaftLogCache().getStartIndex()); + assertEquals(expectedIndex, raftLog.getRaftLogCache().getStartIndex()); } } /** * Test append with inconsistent entries */ - @Test - public void testAppendEntriesWithInconsistency() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testAppendEntriesWithInconsistency(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); // prepare the log for truncation List ranges = prepareRanges(0, 5, 200, 0); List entries = prepareLogEntries(ranges, null); final RetryCache retryCache = RetryCacheTestUtil.createRetryCache(); - try (SegmentedRaftLog raftLog = RetryCacheTestUtil.newSegmentedRaftLog(memberId, retryCache, storage, properties)) { + try (SegmentedRaftLog raftLog = + RetryCacheTestUtil.newSegmentedRaftLog(MEMBER_ID, retryCache, storage, properties)) { raftLog.open(RaftLog.INVALID_LOG_INDEX, null); entries.forEach(entry -> RetryCacheTestUtil.createEntry(retryCache, entry)); // append entries to the raftlog @@ -546,7 +672,8 @@ public void testAppendEntriesWithInconsistency() throws Exception { List newEntries = prepareLogEntries( Arrays.asList(r1, r2, r3), null); - try (SegmentedRaftLog raftLog = RetryCacheTestUtil.newSegmentedRaftLog(memberId, retryCache, storage, properties)) { + try (SegmentedRaftLog raftLog = + RetryCacheTestUtil.newSegmentedRaftLog(MEMBER_ID, retryCache, storage, properties)) { raftLog.open(RaftLog.INVALID_LOG_INDEX, null); LOG.info("newEntries[0] = {}", newEntries.get(0)); final int last = newEntries.size() - 1; @@ -556,35 +683,83 @@ public void testAppendEntriesWithInconsistency() throws Exception { checkFailedEntries(entries, 650, retryCache); checkEntries(raftLog, entries, 0, 650); checkEntries(raftLog, newEntries, 100, 100); - Assert.assertEquals(newEntries.get(newEntries.size() - 1), + assertEquals(newEntries.get(newEntries.size() - 1), getLastEntry(raftLog)); - Assert.assertEquals(newEntries.get(newEntries.size() - 1).getIndex(), + assertEquals(newEntries.get(newEntries.size() - 1).getIndex(), raftLog.getFlushIndex()); } // load the raftlog again and check - try (SegmentedRaftLog raftLog = RetryCacheTestUtil.newSegmentedRaftLog(memberId, retryCache, storage, properties)) { + try (SegmentedRaftLog raftLog = + RetryCacheTestUtil.newSegmentedRaftLog(MEMBER_ID, retryCache, storage, properties)) { raftLog.open(RaftLog.INVALID_LOG_INDEX, null); checkEntries(raftLog, entries, 0, 650); checkEntries(raftLog, newEntries, 100, 100); - Assert.assertEquals(newEntries.get(newEntries.size() - 1), + assertEquals(newEntries.get(newEntries.size() - 1), getLastEntry(raftLog)); - Assert.assertEquals(newEntries.get(newEntries.size() - 1).getIndex(), + assertEquals(newEntries.get(newEntries.size() - 1).getIndex(), raftLog.getFlushIndex()); SegmentedRaftLogCache cache = raftLog.getRaftLogCache(); - Assert.assertEquals(5, cache.getNumOfSegments()); + assertEquals(5, cache.getNumOfSegments()); } } - @Test - public void testSegmentedRaftLogStateMachineData() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testAppendEntriesWithGap(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); + // prepare the log for truncation + List ranges = prepareRanges(0, 5, 200, 0); + List entries = prepareLogEntries(ranges, null); + + final RetryCache retryCache = RetryCacheTestUtil.createRetryCache(); + try (SegmentedRaftLog raftLog = + RetryCacheTestUtil.newSegmentedRaftLog(MEMBER_ID, retryCache, storage, properties)) { + raftLog.open(RaftLog.INVALID_LOG_INDEX, null); + entries.forEach(entry -> RetryCacheTestUtil.createEntry(retryCache, entry)); + // append entries to the raftlog + entries.stream().map(raftLog::appendEntry).forEach(CompletableFuture::join); + } + + long lastIndex = ranges.get(ranges.size() - 1).end; + long snapshotIndex = lastIndex + 100; + LogEntryProto entryProto = prepareLogEntry(4, snapshotIndex + 1, null, false); + final LongSupplier getSnapshotIndexFromStateMachine = new LongSupplier() { + @Override + public long getAsLong() { + return snapshotIndex; + } + }; + try (SegmentedRaftLog raftLog = newSegmentedRaftLog(getSnapshotIndexFromStateMachine)) { + raftLog.open(RaftLog.INVALID_LOG_INDEX, null); + // Assert the wrapped exception + IllegalStateException exception = assertThrows(IllegalStateException.class, + () -> raftLog.appendEntry(entryProto)); + // Assert the original cause + assertTrue(exception.getMessage().contains("gap between entries")); + } + + // load the raftlog again and check + try (SegmentedRaftLog raftLog = + RetryCacheTestUtil.newSegmentedRaftLog(MEMBER_ID, retryCache, storage, properties)) { + raftLog.open(RaftLog.INVALID_LOG_INDEX, null); + Assertions.assertEquals(lastIndex, raftLog.getRaftLogCache().getEndIndex()); + } + } + + @ParameterizedTest + @MethodSource("data") + public void testSegmentedRaftLogStateMachineData(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); final SegmentRange range = new SegmentRange(0, 10, 1, true); final List entries = prepareLogEntries(range, null, true, new ArrayList<>()); final SimpleStateMachine4Testing sm = new SimpleStateMachine4Testing(); try (SegmentedRaftLog raftLog = SegmentedRaftLog.newBuilder() - .setMemberId(memberId) + .setMemberId(MEMBER_ID) .setStateMachine(sm) .setStorage(storage) .setProperties(properties) @@ -622,8 +797,11 @@ public void testSegmentedRaftLogStateMachineData() throws Exception { } } - @Test(expected = TimeoutIOException.class) - public void testServerShutdownOnTimeoutIOException() throws Throwable { + @ParameterizedTest + @MethodSource("data") + public void testServerShutdownOnTimeoutIOException(Boolean useAsyncFlush, Boolean smSyncFlush) throws Throwable { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); RaftServerConfigKeys.Log.StateMachineData.setSync(properties, true); final TimeDuration syncTimeout = TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); RaftServerConfigKeys.Log.StateMachineData.setSyncTimeout(properties, syncTimeout); @@ -645,16 +823,16 @@ public void notifyLogFailed(Throwable cause, LogEntryProto entry) { LOG.info("Test StateMachine: Ratis log failed notification received as expected.", cause); LOG.info("Test StateMachine: Transition to PAUSED state."); - Assert.assertNotNull(entry); + Assertions.assertNotNull(entry); getLifeCycle().transition(LifeCycle.State.PAUSING); getLifeCycle().transition(LifeCycle.State.PAUSED); } }; - Throwable ex = null; // TimeoutIOException + ExecutionException ex; try (SegmentedRaftLog raftLog = SegmentedRaftLog.newBuilder() - .setMemberId(memberId) + .setMemberId(MEMBER_ID) .setStateMachine(sm) .setStorage(storage) .setProperties(properties) @@ -663,15 +841,10 @@ public void notifyLogFailed(Throwable cause, LogEntryProto entry) { // SegmentedRaftLogWorker should catch TimeoutIOException CompletableFuture f = raftLog.appendEntry(entry); // Wait for async writeStateMachineData to finish - try { - f.get(); - } catch (ExecutionException e) { - ex = e.getCause(); - } + ex = assertThrows(ExecutionException.class, f::get); } - Assert.assertNotNull(ex); - Assert.assertSame(LifeCycle.State.PAUSED, sm.getLifeCycleState()); - throw ex; + Assertions.assertSame(LifeCycle.State.PAUSED, sm.getLifeCycleState()); + Assertions.assertInstanceOf(TimeoutIOException.class, ex.getCause()); } static Thread startAppendEntryThread(RaftLog raftLog, LogEntryProto entry) { @@ -688,18 +861,22 @@ static Thread startAppendEntryThread(RaftLog raftLog, LogEntryProto entry) { void assertIndices(RaftLog raftLog, long expectedFlushIndex, long expectedNextIndex) { LOG.info("assert expectedFlushIndex={}", expectedFlushIndex); - Assert.assertEquals(expectedFlushIndex, raftLog.getFlushIndex()); + assertEquals(expectedFlushIndex, raftLog.getFlushIndex()); LOG.info("assert expectedNextIndex={}", expectedNextIndex); - Assert.assertEquals(expectedNextIndex, raftLog.getNextIndex()); + assertEquals(expectedNextIndex, raftLog.getNextIndex()); } - void assertIndicesMultipleAttempts(RaftLog raftLog, long expectedFlushIndex, long expectedNextIndex) throws Exception { + void assertIndicesMultipleAttempts(RaftLog raftLog, long expectedFlushIndex, long expectedNextIndex) + throws Exception { JavaUtils.attempt(() -> assertIndices(raftLog, expectedFlushIndex, expectedNextIndex), 10, HUNDRED_MILLIS, "assertIndices", LOG); } - @Test - public void testAsyncFlushPerf1() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testAsyncFlushPerf1(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); List ranges = prepareRanges(0, 50, 20000, 0); List entries = prepareLogEntries(ranges, null); @@ -721,8 +898,11 @@ public void testAsyncFlushPerf1() throws Exception { } } - @Test - public void testAsyncFlushPerf2() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testAsyncFlushPerf2(Boolean useAsyncFlush, Boolean smSyncFlush) throws Exception { + RaftServerConfigKeys.Log.setAsyncFlushEnabled(properties, useAsyncFlush); + RaftServerConfigKeys.Log.StateMachineData.setSync(properties, smSyncFlush); List ranges = prepareRanges(0, 50, 20000, 0); List entries = prepareLogEntries(ranges, null); @@ -742,4 +922,74 @@ public void testAsyncFlushPerf2() throws Exception { " ns with asyncFlush " + useAsyncFlush); } } + + public static final Logger LOG = LoggerFactory.getLogger(TestSegmentedRaftLog.class); + @Test + public void testConcurrentGetDuringAppend() throws Exception { + RaftServerConfigKeys.Log.setReadLockEnabled(properties, false); + final CountDownLatch injectionPaused = new CountDownLatch(1); + final CountDownLatch readerCanProceed = new CountDownLatch(1); + final AtomicReference error = new AtomicReference<>(); + final AtomicReference readEntry = new AtomicReference<>(); + + final CodeInjectionForTesting.Code code = (localId, remoteId, args) -> { + // in log worker thread, holding write lock + injectionPaused.countDown(); + try { + if (!readerCanProceed.await(5, TimeUnit.SECONDS)) { + error.set(new TimeoutException("The reader thread did not start in time.")); + } + } catch (InterruptedException e) { + error.set(e); + } + return true; + }; + + try (SegmentedRaftLog raftLog = newSegmentedRaftLog()) { + CodeInjectionForTesting.put(LogSegment.APPEND_RECORD, code); + raftLog.open(RaftLog.INVALID_LOG_INDEX, null); + final LogEntryProto newEntry = prepareLogEntry(1, 0, () -> "newEntry", false); + + // Run appendEntry asynchronously. + final Thread appender = new Thread(() -> { + raftLog.appendEntry(newEntry).join(); + }); + appender.start(); + + // Wait until the append operation is paused at the injection point. + Assertions.assertTrue(injectionPaused.await(5, TimeUnit.SECONDS), "Injection point was not hit."); + + // Start a new reader thread to call get(). + // This thread will block until the write lock is released. + final Thread reader = new Thread(() -> { + try { + readEntry.set(raftLog.get(newEntry.getIndex())); + Assertions.assertNull(readEntry.get()); + // Unblock the writer thread. + readerCanProceed.countDown(); + } catch (Throwable t) { + error.set(t); + } + }); + reader.start(); + + // Wait for both the append and the read to complete. + reader.join(); + appender.join(); + + // Check for errors. + if (error.get() != null) { + throw new Exception("Test failed", error.get()); + } + + // When the reader's get() call completed, the append was fully finished, + // so it should have returned the correct entry. + assertEquals(newEntry.getIndex(), raftLog.getLastEntryTermIndex().getIndex()); + readEntry.set(raftLog.get(newEntry.getIndex())); + Assertions.assertNotNull(readEntry.get()); + assertEquals(newEntry, readEntry.get()); + } finally { + CodeInjectionForTesting.remove(LogSegment.APPEND_RECORD); + } + } } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestSegmentedRaftLogCache.java b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestSegmentedRaftLogCache.java index 5be3c36578..d50b2d8a54 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestSegmentedRaftLogCache.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/raftlog/segmented/TestSegmentedRaftLogCache.java @@ -20,7 +20,6 @@ import static org.apache.ratis.server.metrics.SegmentedRaftLogMetrics.*; import static org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogTestUtils.MAX_OP_SIZE; -import java.io.IOException; import java.util.Iterator; import java.util.stream.IntStream; @@ -34,29 +33,28 @@ import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.segmented.SegmentedRaftLogCache.TruncationSegments; import org.apache.ratis.server.raftlog.segmented.LogSegment.LogRecord; -import org.apache.ratis.server.raftlog.segmented.LogSegment.Op; import org.apache.ratis.proto.RaftProtos.LogEntryProto; import org.apache.ratis.util.ReferenceCountedObject; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestSegmentedRaftLogCache { - private static final RaftProperties prop = new RaftProperties(); + private static final RaftProperties PROP = new RaftProperties(); private SegmentedRaftLogCache cache; private SegmentedRaftLogMetrics raftLogMetrics; private RatisMetricRegistryImpl ratisMetricRegistry; - @Before + @BeforeEach public void setup() { raftLogMetrics = new SegmentedRaftLogMetrics(RaftServerTestUtil.TEST_MEMBER_ID); ratisMetricRegistry = (RatisMetricRegistryImpl) raftLogMetrics.getRegistry(); - cache = new SegmentedRaftLogCache(null, null, prop, raftLogMetrics); + cache = new SegmentedRaftLogCache(null, null, PROP, raftLogMetrics); } - @After + @AfterEach public void clear() { raftLogMetrics.unregister(); } @@ -66,7 +64,7 @@ private LogSegment prepareLogSegment(long start, long end, boolean isOpen) { for (long i = start; i <= end; i++) { SimpleOperation m = new SimpleOperation("m" + i); LogEntryProto entry = LogProtoUtils.toLogEntryProto(m.getLogEntryContent(), 0, i); - s.appendToOpenSegment(Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); + s.appendToOpenSegment(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); } if (!isOpen) { s.close(); @@ -75,14 +73,14 @@ private LogSegment prepareLogSegment(long start, long end, boolean isOpen) { } private void checkCache(long start, long end, int segmentSize) { - Assert.assertEquals(start, cache.getStartIndex()); - Assert.assertEquals(end, cache.getEndIndex()); + Assertions.assertEquals(start, cache.getStartIndex()); + Assertions.assertEquals(end, cache.getEndIndex()); for (long index = start; index <= end; index++) { final LogSegment segment = cache.getSegment(index); final LogRecord record = segment.getLogRecord(index); - final LogEntryProto entry = segment.getEntryFromCache(record.getTermIndex()); - Assert.assertEquals(index, entry.getIndex()); + final ReferenceCountedObject entry = segment.getEntryFromCache(record.getTermIndex()); + Assertions.assertEquals(index, entry.get().getIndex()); } long[] offsets = new long[]{start, start + 1, start + (end - start) / 2, @@ -99,9 +97,9 @@ private void checkCache(long start, long end, int segmentSize) { private void checkCacheEntries(long offset, int size, long end) { final LogEntryHeader[] entries = cache.getTermIndices(offset, offset + size); long realEnd = offset + size > end + 1 ? end + 1 : offset + size; - Assert.assertEquals(realEnd - offset, entries.length); + Assertions.assertEquals(realEnd - offset, entries.length); for (long i = offset; i < realEnd; i++) { - Assert.assertEquals(i, entries[(int) (i - offset)].getIndex()); + Assertions.assertEquals(i, entries[(int) (i - offset)].getIndex()); } } @@ -114,7 +112,7 @@ public void testAddSegments() throws Exception { try { LogSegment s = prepareLogSegment(102, 103, true); cache.addSegment(s); - Assert.fail("should fail since there is gap between two segments"); + Assertions.fail("should fail since there is gap between two segments"); } catch (IllegalStateException ignored) { } @@ -125,7 +123,7 @@ public void testAddSegments() throws Exception { try { LogSegment s = prepareLogSegment(201, 202, true); cache.addSegment(s); - Assert.fail("should fail since there is still an open segment in cache"); + Assertions.fail("should fail since there is still an open segment in cache"); } catch (IllegalStateException ignored) { } @@ -135,43 +133,41 @@ public void testAddSegments() throws Exception { try { LogSegment s = prepareLogSegment(202, 203, true); cache.addSegment(s); - Assert.fail("should fail since there is gap between two segments"); + Assertions.fail("should fail since there is gap between two segments"); } catch (IllegalStateException ignored) { } LogSegment s3 = prepareLogSegment(201, 300, true); cache.addSegment(s3); - Assert.assertNotNull(cache.getOpenSegment()); + Assertions.assertNotNull(cache.getOpenSegment()); checkCache(1, 300, 100); cache.rollOpenSegment(true); - Assert.assertNotNull(cache.getOpenSegment()); + Assertions.assertNotNull(cache.getOpenSegment()); checkCache(1, 300, 100); } @Test - public void testAppendEntry() { + public void testAppendEntry() throws Exception { LogSegment closedSegment = prepareLogSegment(0, 99, false); cache.addSegment(closedSegment); final SimpleOperation m = new SimpleOperation("m"); try { LogEntryProto entry = LogProtoUtils.toLogEntryProto(m.getLogEntryContent(), 0, 0); - cache.appendEntry(Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry) - ); - Assert.fail("the open segment is null"); - } catch (IllegalStateException ignored) { + cache.appendEntry(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); + Assertions.fail("the open segment is null"); + } catch (IllegalStateException | NullPointerException ignored) { } LogSegment openSegment = prepareLogSegment(100, 100, true); cache.addSegment(openSegment); for (long index = 101; index < 200; index++) { LogEntryProto entry = LogProtoUtils.toLogEntryProto(m.getLogEntryContent(), 0, index); - cache.appendEntry(Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry) - ); + cache.appendEntry(LogSegment.Op.WRITE_CACHE_WITHOUT_STATE_MACHINE_CACHE, ReferenceCountedObject.wrap(entry)); } - Assert.assertNotNull(cache.getOpenSegment()); + Assertions.assertNotNull(cache.getOpenSegment()); checkCache(0, 199, 100); } @@ -188,7 +184,7 @@ public void testTruncate() throws Exception { cache.addSegment(s); long end = cache.getEndIndex(); - Assert.assertEquals(599, end); + Assertions.assertEquals(599, end); int numOfSegments = 6; // start truncation for (int i = 0; i < 10; i++) { // truncate 10 times @@ -200,46 +196,46 @@ public void testTruncate() throws Exception { // check TruncationSegments int currentNum= (int) (end / 100 + 1); if (currentNum < numOfSegments) { - Assert.assertEquals(1, ts.getToDelete().length); + Assertions.assertEquals(1, ts.getToDelete().length); numOfSegments = currentNum; } else { - Assert.assertEquals(0, ts.getToDelete().length); + Assertions.assertEquals(0, ts.getToDelete().length); } } // 230 entries remaining. truncate at the segment boundary TruncationSegments ts = cache.truncate(200); checkCache(0, 199, 100); - Assert.assertEquals(1, ts.getToDelete().length); - Assert.assertEquals(200, ts.getToDelete()[0].getStartIndex()); - Assert.assertEquals(229, ts.getToDelete()[0].getEndIndex()); - Assert.assertEquals(0, ts.getToDelete()[0].getTargetLength()); - Assert.assertFalse(ts.getToDelete()[0].isOpen()); - Assert.assertNull(ts.getToTruncate()); + Assertions.assertEquals(1, ts.getToDelete().length); + Assertions.assertEquals(200, ts.getToDelete()[0].getStartIndex()); + Assertions.assertEquals(229, ts.getToDelete()[0].getEndIndex()); + Assertions.assertEquals(0, ts.getToDelete()[0].getTargetLength()); + Assertions.assertFalse(ts.getToDelete()[0].isOpen()); + Assertions.assertNull(ts.getToTruncate()); // add another open segment and truncate it as a whole LogSegment newOpen = prepareLogSegment(200, 249, true); cache.addSegment(newOpen); ts = cache.truncate(200); checkCache(0, 199, 100); - Assert.assertEquals(1, ts.getToDelete().length); - Assert.assertEquals(200, ts.getToDelete()[0].getStartIndex()); - Assert.assertEquals(249, ts.getToDelete()[0].getEndIndex()); - Assert.assertEquals(0, ts.getToDelete()[0].getTargetLength()); - Assert.assertTrue(ts.getToDelete()[0].isOpen()); - Assert.assertNull(ts.getToTruncate()); + Assertions.assertEquals(1, ts.getToDelete().length); + Assertions.assertEquals(200, ts.getToDelete()[0].getStartIndex()); + Assertions.assertEquals(249, ts.getToDelete()[0].getEndIndex()); + Assertions.assertEquals(0, ts.getToDelete()[0].getTargetLength()); + Assertions.assertTrue(ts.getToDelete()[0].isOpen()); + Assertions.assertNull(ts.getToTruncate()); // add another open segment and truncate part of it newOpen = prepareLogSegment(200, 249, true); cache.addSegment(newOpen); ts = cache.truncate(220); checkCache(0, 219, 100); - Assert.assertNull(cache.getOpenSegment()); - Assert.assertEquals(0, ts.getToDelete().length); - Assert.assertTrue(ts.getToTruncate().isOpen()); - Assert.assertEquals(219, ts.getToTruncate().getNewEndIndex()); - Assert.assertEquals(200, ts.getToTruncate().getStartIndex()); - Assert.assertEquals(249, ts.getToTruncate().getEndIndex()); + Assertions.assertNull(cache.getOpenSegment()); + Assertions.assertEquals(0, ts.getToDelete().length); + Assertions.assertTrue(ts.getToTruncate().isOpen()); + Assertions.assertEquals(219, ts.getToTruncate().getNewEndIndex()); + Assertions.assertEquals(200, ts.getToTruncate().getStartIndex()); + Assertions.assertEquals(249, ts.getToTruncate().getEndIndex()); } @Test @@ -255,9 +251,9 @@ public void testOpenSegmentPurge() { int purgeIndex = sIndex; // open segment should never be purged TruncationSegments ts = cache.purge(purgeIndex); - Assert.assertNull(ts.getToTruncate()); - Assert.assertEquals(end - start, ts.getToDelete().length); - Assert.assertEquals(sIndex, cache.getStartIndex()); + Assertions.assertNull(ts.getToTruncate()); + Assertions.assertEquals(end - start, ts.getToDelete().length); + Assertions.assertEquals(sIndex, cache.getStartIndex()); } @Test @@ -272,9 +268,9 @@ public void testCloseSegmentPurge() { // overlapped close segment will not purged. Passing in index - 1 since // we purge a closed segment when end index == passed in purge index. TruncationSegments ts = cache.purge(purgeIndex - 1); - Assert.assertNull(ts.getToTruncate()); - Assert.assertEquals(end - start - 1, ts.getToDelete().length); - Assert.assertEquals(1, cache.getNumOfSegments()); + Assertions.assertNull(ts.getToTruncate()); + Assertions.assertEquals(end - start - 1, ts.getToDelete().length); + Assertions.assertEquals(1, cache.getNumOfSegments()); } private void populatedSegment(int start, int end, int segmentSize, boolean isOpen) { @@ -286,20 +282,20 @@ private void populatedSegment(int start, int end, int segmentSize, boolean isOpe }); } - private void testIterator(long startIndex) throws IOException { + private void testIterator(long startIndex) { Iterator iterator = cache.iterator(startIndex); TermIndex prev = null; while (iterator.hasNext()) { TermIndex termIndex = iterator.next(); - Assert.assertEquals(cache.getLogRecord(termIndex.getIndex()).getTermIndex(), termIndex); + Assertions.assertEquals(cache.getTermIndex(termIndex.getIndex()), termIndex); if (prev != null) { - Assert.assertEquals(prev.getIndex() + 1, termIndex.getIndex()); + Assertions.assertEquals(prev.getIndex() + 1, termIndex.getIndex()); } prev = termIndex; } if (startIndex <= cache.getEndIndex()) { - Assert.assertNotNull(prev); - Assert.assertEquals(cache.getEndIndex(), prev.getIndex()); + Assertions.assertNotNull(prev); + Assertions.assertEquals(cache.getEndIndex(), prev.getIndex()); } } @@ -321,7 +317,7 @@ public void testIterator() throws Exception { testIterator(299); Iterator iterator = cache.iterator(300); - Assert.assertFalse(iterator.hasNext()); + Assertions.assertFalse(iterator.hasNext()); } @Test @@ -332,14 +328,14 @@ public void testCacheMetric() { Long closedSegmentsNum = (Long) ratisMetricRegistry.getGauges((s, metric) -> s.contains(RAFT_LOG_CACHE_CLOSED_SEGMENTS_NUM)).values().iterator().next().getValue(); - Assert.assertEquals(2L, closedSegmentsNum.longValue()); + Assertions.assertEquals(2L, closedSegmentsNum.longValue()); Long closedSegmentsSizeInBytes = (Long) ratisMetricRegistry.getGauges((s, metric) -> s.contains(RAFT_LOG_CACHE_CLOSED_SEGMENTS_SIZE_IN_BYTES)).values().iterator().next().getValue(); - Assert.assertEquals(closedSegmentsSizeInBytes.longValue(), cache.getClosedSegmentsSizeInBytes()); + Assertions.assertEquals(closedSegmentsSizeInBytes.longValue(), cache.getClosedSegmentsSizeInBytes()); Long openSegmentSizeInBytes = (Long) ratisMetricRegistry.getGauges((s, metric) -> s.contains(RAFT_LOG_CACHE_OPEN_SEGMENT_SIZE_IN_BYTES)).values().iterator().next().getValue(); - Assert.assertEquals(openSegmentSizeInBytes.longValue(), cache.getOpenSegmentSizeInBytes()); + Assertions.assertEquals(openSegmentSizeInBytes.longValue(), cache.getOpenSegmentSizeInBytes()); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java index 1c76f7b005..62ee387dec 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/simulation/TestRaftSnapshotWithSimulatedRpc.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,12 +17,9 @@ */ package org.apache.ratis.server.simulation; -import org.apache.ratis.server.impl.MiniRaftCluster; import org.apache.ratis.statemachine.RaftSnapshotBaseTest; -public class TestRaftSnapshotWithSimulatedRpc extends RaftSnapshotBaseTest { - @Override - public MiniRaftCluster.Factory getFactory() { - return MiniRaftClusterWithSimulatedRpc.FACTORY; - } +public class TestRaftSnapshotWithSimulatedRpc + extends RaftSnapshotBaseTest + implements MiniRaftClusterWithSimulatedRpc.FactoryGet { } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/storage/TestRaftStorage.java b/ratis-test/src/test/java/org/apache/ratis/server/storage/TestRaftStorage.java index 6e7555703f..093e9add7f 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/storage/TestRaftStorage.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/storage/TestRaftStorage.java @@ -18,6 +18,7 @@ package org.apache.ratis.server.storage; import static java.util.stream.Collectors.toList; +import static org.apache.ratis.statemachine.impl.SimpleStateMachineStorage.SNAPSHOT_MD5_REGEX; import static org.apache.ratis.statemachine.impl.SimpleStateMachineStorage.SNAPSHOT_REGEX; import static org.apache.ratis.util.MD5FileUtil.MD5_SUFFIX; @@ -29,21 +30,26 @@ import org.apache.ratis.server.storage.RaftStorageDirectoryImpl.StorageState; import org.apache.ratis.statemachine.impl.SimpleStateMachineStorage; import org.apache.ratis.statemachine.SnapshotRetentionPolicy; +import org.apache.ratis.statemachine.impl.SingleFileSnapshotInfo; import org.apache.ratis.util.FileUtils; +import org.apache.ratis.util.MD5FileUtil; import org.apache.ratis.util.Preconditions; import org.apache.ratis.util.SizeInBytes; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.mockito.Mockito; import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicReference; @@ -65,12 +71,12 @@ static RaftStorageImpl newRaftStorage(File dir) throws IOException { private File storageDir; - @Before + @BeforeEach public void setup() { storageDir = getTestDir(); } - @After + @AfterEach public void tearDown() throws Exception { if (storageDir != null) { FileUtils.deleteFully(storageDir.getParentFile()); @@ -87,28 +93,29 @@ static RaftStorageImpl formatRaftStorage(File dir) throws IOException { return impl; } + @SuppressWarnings({"squid:S5783"}) // Suppress same exception warning @Test public void testNotExistent() throws IOException { FileUtils.deleteFully(storageDir); // we will format the empty directory final RaftStorageImpl storage = newRaftStorage(storageDir); - Assert.assertEquals(StorageState.NORMAL, storage.getState()); + Assertions.assertEquals(StorageState.NORMAL, storage.getState()); try { formatRaftStorage(storageDir).close(); - Assert.fail("the format should fail since the storage is still locked"); + Assertions.fail("the format should fail since the storage is still locked"); } catch (IOException e) { - Assert.assertTrue(e.getMessage().contains("directory is already locked")); + Assertions.assertTrue(e.getMessage().contains("directory is already locked")); } storage.close(); FileUtils.deleteFully(storageDir); - Assert.assertTrue(storageDir.createNewFile()); + Assertions.assertTrue(storageDir.createNewFile()); try (RaftStorage ignored = newRaftStorage(storageDir)) { - Assert.fail(); + Assertions.fail(); } catch (IOException e) { - Assert.assertTrue( + Assertions.assertTrue( e.getMessage().contains(StorageState.NON_EXISTENT.name())); } } @@ -121,42 +128,42 @@ public void testStorage() throws Exception { final RaftStorageDirectoryImpl sd = new RaftStorageDirectoryImpl(storageDir, SizeInBytes.ZERO); try { StorageState state = sd.analyzeStorage(true); - Assert.assertEquals(StorageState.NOT_FORMATTED, state); - Assert.assertTrue(sd.isCurrentEmpty()); + Assertions.assertEquals(StorageState.NOT_FORMATTED, state); + Assertions.assertTrue(sd.isCurrentEmpty()); } finally { sd.unlock(); } RaftStorageImpl storage = newRaftStorage(storageDir); - Assert.assertEquals(StorageState.NORMAL, storage.getState()); + Assertions.assertEquals(StorageState.NORMAL, storage.getState()); storage.close(); - Assert.assertEquals(StorageState.NORMAL, sd.analyzeStorage(false)); + Assertions.assertEquals(StorageState.NORMAL, sd.analyzeStorage(false)); assertMetadataFile(sd.getMetaFile()); // test format storage = formatRaftStorage(storageDir); - Assert.assertEquals(StorageState.NORMAL, storage.getState()); + Assertions.assertEquals(StorageState.NORMAL, storage.getState()); final RaftStorageMetadataFile metaFile = new RaftStorageMetadataFileImpl(sd.getMetaFile()); - Assert.assertEquals(RaftStorageMetadata.getDefault(), metaFile.getMetadata()); + Assertions.assertEquals(RaftStorageMetadata.getDefault(), metaFile.getMetadata()); storage.close(); } static void assertMetadataFile(File m) throws Exception { - Assert.assertTrue(m.exists()); + Assertions.assertTrue(m.exists()); final RaftStorageMetadataFile metaFile = new RaftStorageMetadataFileImpl(m); - Assert.assertEquals(RaftStorageMetadata.getDefault(), metaFile.getMetadata()); + Assertions.assertEquals(RaftStorageMetadata.getDefault(), metaFile.getMetadata()); final RaftPeerId peer1 = RaftPeerId.valueOf("peer1"); final RaftStorageMetadata metadata = RaftStorageMetadata.valueOf(123, peer1); metaFile.persist(metadata); - Assert.assertEquals(metadata.getTerm(), 123); - Assert.assertEquals(metadata.getVotedFor(), peer1); - Assert.assertEquals(metadata, metaFile.getMetadata()); + Assertions.assertEquals(metadata.getTerm(), 123); + Assertions.assertEquals(metadata.getVotedFor(), peer1); + Assertions.assertEquals(metadata, metaFile.getMetadata()); final RaftStorageMetadataFile metaFile2 = new RaftStorageMetadataFileImpl(m); - Assert.assertNull(((AtomicReference) RaftTestUtil.getDeclaredField(metaFile2, "metadata")).get()); - Assert.assertEquals(metadata, metaFile2.getMetadata()); + Assertions.assertNull(((AtomicReference) RaftTestUtil.getDeclaredField(metaFile2, "metadata")).get()); + Assertions.assertEquals(metadata, metaFile2.getMetadata()); } @Test @@ -172,27 +179,27 @@ public void testMetaFile() throws Exception { @Test public void testCleanMetaTmpFile() throws Exception { RaftStorageImpl storage = newRaftStorage(storageDir); - Assert.assertEquals(StorageState.NORMAL, storage.getState()); + Assertions.assertEquals(StorageState.NORMAL, storage.getState()); storage.close(); final RaftStorageDirectoryImpl sd = new RaftStorageDirectoryImpl(storageDir, SizeInBytes.ZERO); File metaFile = sd.getMetaFile(); FileUtils.move(metaFile, sd.getMetaTmpFile()); - Assert.assertEquals(StorageState.NOT_FORMATTED, sd.analyzeStorage(false)); + Assertions.assertEquals(StorageState.NOT_FORMATTED, sd.analyzeStorage(false)); // RaftStorage initialization should succeed as the raft-meta.tmp is // always cleaned. newRaftStorage(storageDir).close(); - Assert.assertTrue(sd.getMetaFile().exists()); - Assert.assertTrue(sd.getMetaTmpFile().createNewFile()); - Assert.assertTrue(sd.getMetaTmpFile().exists()); + Assertions.assertTrue(sd.getMetaFile().exists()); + Assertions.assertTrue(sd.getMetaTmpFile().createNewFile()); + Assertions.assertTrue(sd.getMetaTmpFile().exists()); try { storage = newRaftStorage(storageDir); - Assert.assertEquals(StorageState.NORMAL, storage.getState()); - Assert.assertFalse(sd.getMetaTmpFile().exists()); - Assert.assertTrue(sd.getMetaFile().exists()); + Assertions.assertEquals(StorageState.NORMAL, storage.getState()); + Assertions.assertFalse(sd.getMetaTmpFile().exists()); + Assertions.assertTrue(sd.getMetaFile().exists()); } finally { storage.close(); } @@ -207,14 +214,14 @@ public void testSnapshotFileName() { final File file = new File(storageDir, name); final TermIndex ti = SimpleStateMachineStorage.getTermIndexFromSnapshotFile(file); System.out.println("file = " + file); - Assert.assertEquals(term, ti.getTerm()); - Assert.assertEquals(index, ti.getIndex()); + Assertions.assertEquals(term, ti.getTerm()); + Assertions.assertEquals(index, ti.getIndex()); System.out.println("ti = " + ti); final File foo = new File(storageDir, "foo"); try { SimpleStateMachineStorage.getTermIndexFromSnapshotFile(foo); - Assert.fail(); + Assertions.fail(); } catch(IllegalArgumentException iae) { System.out.println("Good " + iae); } @@ -227,7 +234,7 @@ public void testSnapshotCleanup() throws IOException { SnapshotRetentionPolicy snapshotRetentionPolicy = new SnapshotRetentionPolicy() { @Override public int getNumSnapshotsRetained() { - return 3; + return 2; } }; @@ -238,15 +245,24 @@ public int getNumSnapshotsRetained() { Set termIndexSet = new HashSet<>(); - //Create 5 snapshot files in storage dir. - while (termIndexSet.size() < 5) { + //Create 3 snapshot files in storage dir. + while (termIndexSet.size() < 3) { final long term = ThreadLocalRandom.current().nextLong(1, 10L); - final long index = ThreadLocalRandom.current().nextLong(100, 1000L); + final long index = ThreadLocalRandom.current().nextLong(100, 500L); if (termIndexSet.add(TermIndex.valueOf(term, index))) { - File file = simpleStateMachineStorage.getSnapshotFile(term, index); - Assert.assertTrue(file.createNewFile()); + createSnapshot(simpleStateMachineStorage, term, index, true); + } + } + + // Create 2 more snapshot files in storage dir without MD5 files + while (termIndexSet.size() < 5) { + final long term = ThreadLocalRandom.current().nextLong(11, 20L); + final long index = ThreadLocalRandom.current().nextLong(501, 1000L); + if (termIndexSet.add(TermIndex.valueOf(term, index))) { + createSnapshot(simpleStateMachineStorage, term, index, false); } } + // create MD5 files that will not be deleted in older version while (termIndexSet.size() < 7) { final long term = 1; @@ -254,54 +270,316 @@ public int getNumSnapshotsRetained() { if (termIndexSet.add(TermIndex.valueOf(term, index))) { File file = simpleStateMachineStorage.getSnapshotFile(term, index); File snapshotFile = new File(file.getParent(), file.getName() + MD5_SUFFIX); - Assert.assertTrue(snapshotFile.createNewFile()); + Assertions.assertTrue(snapshotFile.createNewFile()); } } File stateMachineDir = storage.getStorageDir().getStateMachineDir(); - assertFileCount(stateMachineDir, 7); + assertFileCount(stateMachineDir, 10); simpleStateMachineStorage.cleanupOldSnapshots(snapshotRetentionPolicy); - File[] remainingFiles = assertFileCount(stateMachineDir, 3); + // Since the MD5 files are not matching the snapshot files they are cleaned up. + // So we still have 6 files - 4 snapshots and 2 MD5 files. + File[] remainingFiles = assertFileCount(stateMachineDir, 6); List remainingIndices = termIndexSet.stream() .map(TermIndex::getIndex) .sorted(Collections.reverseOrder()) - .limit(3) + .limit(4) .collect(toList()); for (File file : remainingFiles) { System.out.println(file.getName()); Matcher matcher = SNAPSHOT_REGEX.matcher(file.getName()); if (matcher.matches()) { - Assert.assertTrue(remainingIndices.contains(Long.parseLong(matcher.group(2)))); + Assertions.assertTrue(remainingIndices.contains(Long.parseLong(matcher.group(2)))); } } // Attempt to clean up again should not delete any more files. simpleStateMachineStorage.cleanupOldSnapshots(snapshotRetentionPolicy); - assertFileCount(stateMachineDir, 3); + assertFileCount(stateMachineDir, 6); //Test with Retention disabled. //Create 2 snapshot files in storage dir. for (int i = 0; i < 2; i++) { - final long term = ThreadLocalRandom.current().nextLong(1, 10L); + final long term = ThreadLocalRandom.current().nextLong(21, 30L); final long index = ThreadLocalRandom.current().nextLong(1000L); - File file = simpleStateMachineStorage.getSnapshotFile(term, index); - Assert.assertTrue(file.createNewFile()); + createSnapshot(simpleStateMachineStorage, term, index, false); } simpleStateMachineStorage.cleanupOldSnapshots(new SnapshotRetentionPolicy() { }); + assertFileCount(stateMachineDir, 8); + } + + @Test + public void testSnapshotCleanupWithMissingMd5File() throws IOException { + + SnapshotRetentionPolicy snapshotRetentionPolicy = new SnapshotRetentionPolicy() { + @Override + public int getNumSnapshotsRetained() { + return 2; + } + }; + + + SimpleStateMachineStorage simpleStateMachineStorage = new SimpleStateMachineStorage(); + final RaftStorage storage = newRaftStorage(storageDir); + simpleStateMachineStorage.init(storage); + + Set termIndexSet = new HashSet<>(); + + // Create one snapshot file without MD5 file + if (termIndexSet.add(TermIndex.valueOf(1, 100))) { + createSnapshot(simpleStateMachineStorage, 1, 100, false); + } + + //Create 4 snapshot files in storage dir + while (termIndexSet.size() < 5) { + final long term = ThreadLocalRandom.current().nextLong(2, 10L); + final long index = ThreadLocalRandom.current().nextLong(100, 1000L); + if (termIndexSet.add(TermIndex.valueOf(term, index))) { + createSnapshot(simpleStateMachineStorage, term, index, true); + } + } + + // 1 snapshot file without MD5 hash, 4 snapshots + 4 md5 hash files = 9 files + File stateMachineDir = storage.getStorageDir().getStateMachineDir(); + assertFileCount(stateMachineDir, 9); + + simpleStateMachineStorage.cleanupOldSnapshots(snapshotRetentionPolicy); + + // We should have 4 files remaining, and 2 snapshots with MD5 hash + assertFileCount(stateMachineDir, 4); + } + + @Test + public void testSnapshotCleanupWithLatestSnapshotMissingMd5File() throws IOException { + + SnapshotRetentionPolicy snapshotRetentionPolicy = new SnapshotRetentionPolicy() { + @Override + public int getNumSnapshotsRetained() { + return 2; + } + }; + + + SimpleStateMachineStorage simpleStateMachineStorage = new SimpleStateMachineStorage(); + final RaftStorage storage = newRaftStorage(storageDir); + simpleStateMachineStorage.init(storage); + + Set termIndexSet = new HashSet<>(); + + //Create 4 snapshot files in storage dir + while (termIndexSet.size() < 4) { + final long term = ThreadLocalRandom.current().nextLong(1, 10L); + final long index = ThreadLocalRandom.current().nextLong(100, 1000L); + if (termIndexSet.add(TermIndex.valueOf(term, index))) { + createSnapshot(simpleStateMachineStorage, term, index, true); + } + } + + // Create a snapshot file with a missing MD5 file and having the highest term index + if (termIndexSet.add(TermIndex.valueOf(99, 1001))) { + createSnapshot(simpleStateMachineStorage, 99, 1001, false); + } + + // 1 snapshot file without MD5 hash, 4 snapshots + 4 md5 hash files = 9 files + File stateMachineDir = storage.getStorageDir().getStateMachineDir(); + assertFileCount(stateMachineDir, 9); + + simpleStateMachineStorage.cleanupOldSnapshots(snapshotRetentionPolicy); + + // We should have 5 files remaining, and 2 snapshots with MD5 hash and 1 snapshot file without MD5 hash assertFileCount(stateMachineDir, 5); } + @Test + public void testCleanupOldSnapshotsDeletesOlderSnapshotsWithMd5() throws Exception { + SnapshotRetentionPolicy snapshotRetentionPolicy = new SnapshotRetentionPolicy() { + @Override + public int getNumSnapshotsRetained() { + return 2; + } + }; + + SimpleStateMachineStorage simpleStateMachineStorage = new SimpleStateMachineStorage(); + final RaftStorage storage = newRaftStorage(storageDir); + simpleStateMachineStorage.init(storage); + try { + createSnapshot(simpleStateMachineStorage, 1, 100, true); + createSnapshot(simpleStateMachineStorage, 1, 200, true); + createSnapshot(simpleStateMachineStorage, 1, 300, true); + createSnapshot(simpleStateMachineStorage, 1, 400, true); + + File stateMachineDir = storage.getStorageDir().getStateMachineDir(); + simpleStateMachineStorage.cleanupOldSnapshots(snapshotRetentionPolicy); + + List snapshotNames = listMatchingFileNames(stateMachineDir, SNAPSHOT_REGEX); + Assertions.assertEquals(2, snapshotNames.size()); + Assertions.assertTrue(snapshotNames.contains(SimpleStateMachineStorage.getSnapshotFileName(1, 400))); + Assertions.assertTrue(snapshotNames.contains(SimpleStateMachineStorage.getSnapshotFileName(1, 300))); + Assertions.assertFalse(snapshotNames.contains(SimpleStateMachineStorage.getSnapshotFileName(1, 200))); + Assertions.assertFalse(snapshotNames.contains(SimpleStateMachineStorage.getSnapshotFileName(1, 100))); + + List md5Names = listMatchingFileNames(stateMachineDir, SNAPSHOT_MD5_REGEX); + Assertions.assertEquals(2, md5Names.size()); + Assertions.assertTrue(md5Names.contains(SimpleStateMachineStorage.getSnapshotFileName(1, 400) + MD5_SUFFIX)); + Assertions.assertTrue(md5Names.contains(SimpleStateMachineStorage.getSnapshotFileName(1, 300) + MD5_SUFFIX)); + Assertions.assertFalse(md5Names.contains(SimpleStateMachineStorage.getSnapshotFileName(1, 200) + MD5_SUFFIX)); + Assertions.assertFalse(md5Names.contains(SimpleStateMachineStorage.getSnapshotFileName(1, 100) + MD5_SUFFIX)); + } finally { + storage.close(); + } + } + + @Test + public void testCleanupOldSnapshotsWithoutAnyMd5() throws Exception { + SnapshotRetentionPolicy snapshotRetentionPolicy = new SnapshotRetentionPolicy() { + @Override + public int getNumSnapshotsRetained() { + return 2; + } + }; + + SimpleStateMachineStorage simpleStateMachineStorage = new SimpleStateMachineStorage(); + final RaftStorage storage = newRaftStorage(storageDir); + simpleStateMachineStorage.init(storage); + try { + createSnapshot(simpleStateMachineStorage, 1, 100, false); + createSnapshot(simpleStateMachineStorage, 1, 200, false); + createSnapshot(simpleStateMachineStorage, 1, 300, false); + + File stateMachineDir = storage.getStorageDir().getStateMachineDir(); + simpleStateMachineStorage.cleanupOldSnapshots(snapshotRetentionPolicy); + + List snapshotNames = listMatchingFileNames(stateMachineDir, SNAPSHOT_REGEX); + Assertions.assertEquals(3, snapshotNames.size()); + Assertions.assertTrue(listMatchingFileNames(stateMachineDir, SNAPSHOT_MD5_REGEX).isEmpty()); + } finally { + storage.close(); + } + } + + @Test + public void testGetLatestSnapshotReturnsNewest() throws Exception { + SimpleStateMachineStorage simpleStateMachineStorage = new SimpleStateMachineStorage(); + final RaftStorage storage = newRaftStorage(storageDir); + simpleStateMachineStorage.init(storage); + try { + Assertions.assertNull(simpleStateMachineStorage.getLatestSnapshot()); + + createSnapshot(simpleStateMachineStorage, 1, 100, true); + simpleStateMachineStorage.loadLatestSnapshot(); + SingleFileSnapshotInfo first = simpleStateMachineStorage.getLatestSnapshot(); + Assertions.assertNotNull(first); + Assertions.assertEquals(1, first.getTerm()); + Assertions.assertEquals(100, first.getIndex()); + Assertions.assertNotNull(first.getFile().getFileDigest()); + + createSnapshot(simpleStateMachineStorage, 1, 200, true); + simpleStateMachineStorage.loadLatestSnapshot(); + SingleFileSnapshotInfo second = simpleStateMachineStorage.getLatestSnapshot(); + Assertions.assertNotNull(second); + Assertions.assertEquals(1, second.getTerm()); + Assertions.assertEquals(200, second.getIndex()); + Assertions.assertNotNull(second.getFile().getFileDigest()); + } finally { + storage.close(); + } + } + + @Test + public void testGetLatestSnapshotIgnoresSnapshotsWithoutMd5() throws Exception { + SimpleStateMachineStorage simpleStateMachineStorage = new SimpleStateMachineStorage(); + final RaftStorage storage = newRaftStorage(storageDir); + simpleStateMachineStorage.init(storage); + try { + createSnapshot(simpleStateMachineStorage, 1, 100, true); + simpleStateMachineStorage.loadLatestSnapshot(); + + createSnapshot(simpleStateMachineStorage, 1, 200, false); + simpleStateMachineStorage.loadLatestSnapshot(); + + SingleFileSnapshotInfo latest = simpleStateMachineStorage.getLatestSnapshot(); + Assertions.assertNotNull(latest); + Assertions.assertEquals(100, latest.getIndex()); + Assertions.assertEquals(1, latest.getTerm()); + } finally { + storage.close(); + } + } + + @Test + public void testGetLatestSnapshotFallsBackToSnapshotWithoutMd5() throws Exception { + SimpleStateMachineStorage simpleStateMachineStorage = new SimpleStateMachineStorage(); + final RaftStorage storage = newRaftStorage(storageDir); + simpleStateMachineStorage.init(storage); + try { + createSnapshot(simpleStateMachineStorage, 1, 100, false); + simpleStateMachineStorage.loadLatestSnapshot(); + + SingleFileSnapshotInfo latest = simpleStateMachineStorage.getLatestSnapshot(); + Assertions.assertNotNull(latest); + Assertions.assertEquals(100, latest.getIndex()); + Assertions.assertEquals(1, latest.getTerm()); + Assertions.assertNull(latest.getFile().getFileDigest()); + } finally { + storage.close(); + } + } + + @Test + public void testGetLatestSnapshotFallsBackWhenNewestMd5IsInvalid() throws Exception { + SimpleStateMachineStorage simpleStateMachineStorage = new SimpleStateMachineStorage(); + final RaftStorage storage = newRaftStorage(storageDir); + simpleStateMachineStorage.init(storage); + try { + createSnapshot(simpleStateMachineStorage, 1, 100, true); + simpleStateMachineStorage.loadLatestSnapshot(); + + File latestSnapshot = createSnapshot(simpleStateMachineStorage, 1, 200, true); + final File latestMd5File = MD5FileUtil.getDigestFileForFile(latestSnapshot); + Files.write(latestMd5File.toPath(), "null".getBytes(StandardCharsets.UTF_8)); + + simpleStateMachineStorage.loadLatestSnapshot(); + + SingleFileSnapshotInfo latest = simpleStateMachineStorage.getLatestSnapshot(); + Assertions.assertNotNull(latest); + Assertions.assertEquals(100, latest.getIndex()); + Assertions.assertEquals(1, latest.getTerm()); + Assertions.assertNotNull(latest.getFile().getFileDigest()); + } finally { + storage.close(); + } + } + private static File[] assertFileCount(File dir, int expected) { File[] files = dir.listFiles(); - Assert.assertNotNull(files); - Assert.assertEquals(Arrays.toString(files), expected, files.length); + Assertions.assertNotNull(files); + Assertions.assertEquals(expected, files.length, Arrays.toString(files)); return files; } + private File createSnapshot(SimpleStateMachineStorage storage, + long term, long endIndex, + boolean withMd5) throws IOException { + File snapshotFile = storage.getSnapshotFile(term, endIndex); + Assertions.assertTrue(snapshotFile.createNewFile()); + + if (withMd5) { + MD5FileUtil.computeAndSaveMd5ForFile(snapshotFile); + } + + return snapshotFile; + } + + private static List listMatchingFileNames(File dir, java.util.regex.Pattern pattern) { + return Arrays.stream(Objects.requireNonNull(dir.list())) + .filter(name -> pattern.matcher(name).matches()) + .collect(toList()); + } + @Test public void testNotEnoughSpace() throws IOException { File mockStorageDir = Mockito.spy(storageDir); @@ -309,6 +587,6 @@ public void testNotEnoughSpace() throws IOException { final RaftStorageDirectoryImpl sd = new RaftStorageDirectoryImpl(mockStorageDir, SizeInBytes.valueOf("100M")); StorageState state = sd.analyzeStorage(false); - Assert.assertEquals(StorageState.NO_SPACE, state); + Assertions.assertEquals(StorageState.NO_SPACE, state); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/server/storage/TestStorageImplUtils.java b/ratis-test/src/test/java/org/apache/ratis/server/storage/TestStorageImplUtils.java index e8661e2689..905fc0345d 100644 --- a/ratis-test/src/test/java/org/apache/ratis/server/storage/TestStorageImplUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/server/storage/TestStorageImplUtils.java @@ -22,9 +22,9 @@ import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.JavaUtils; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import java.io.File; import java.io.IOException; @@ -43,7 +43,7 @@ */ public class TestStorageImplUtils extends BaseTest { - private static final Supplier rootTestDir = JavaUtils.memoize( + private static final Supplier ROOT_TEST_DIR = JavaUtils.memoize( () -> new File(BaseTest.getRootTestDir(), JavaUtils.getClassSimpleName(TestStorageImplUtils.class) + Integer.toHexString(ThreadLocalRandom.current().nextInt()))); @@ -55,9 +55,9 @@ static File chooseNewStorageDir(List volumes, String sub) throws IOExcepti return new File(vol, sub); } - @AfterClass + @AfterAll public static void tearDown() throws IOException { - FileUtils.deleteFully(rootTestDir.get()); + FileUtils.deleteFully(ROOT_TEST_DIR.get()); } /** @@ -67,7 +67,7 @@ public static void tearDown() throws IOException { */ @Test public void testChooseStorageDirWithOneVolume() throws IOException { - File testDir = new File(rootTestDir.get(), UUID.randomUUID().toString()); + File testDir = new File(ROOT_TEST_DIR.get(), UUID.randomUUID().toString()); List directories = Collections.singletonList(testDir); String subDirOne = UUID.randomUUID().toString(); String subDirTwo = UUID.randomUUID().toString(); @@ -75,9 +75,9 @@ public void testChooseStorageDirWithOneVolume() throws IOException { final File storageDirTwo = chooseNewStorageDir(directories, subDirTwo); File expectedOne = new File(testDir, subDirOne); File expectedTwo = new File(testDir, subDirTwo); - Assert.assertEquals(expectedOne.getCanonicalPath(), + Assertions.assertEquals(expectedOne.getCanonicalPath(), storageDirOne.getCanonicalPath()); - Assert.assertEquals(expectedTwo.getCanonicalPath(), + Assertions.assertEquals(expectedTwo.getCanonicalPath(), storageDirTwo.getCanonicalPath()); } @@ -88,7 +88,7 @@ public void testChooseStorageDirWithOneVolume() throws IOException { */ @Test public void testChooseStorageDirWithMultipleVolumes() throws IOException { - File testDir = new File(rootTestDir.get(), UUID.randomUUID().toString()); + File testDir = new File(ROOT_TEST_DIR.get(), UUID.randomUUID().toString()); List directories = new ArrayList<>(); IntStream.range(0, 10).mapToObj((i) -> new File(testDir, Integer.toString(i))).forEach((dir) -> { @@ -113,7 +113,7 @@ public void testChooseStorageDirWithMultipleVolumes() throws IOException { String subDir = UUID.randomUUID().toString(); final File storageDirectory = chooseNewStorageDir(directories, subDir); File expected = new File(directories.get(6), subDir); - Assert.assertEquals(expected.getCanonicalPath(), + Assertions.assertEquals(expected.getCanonicalPath(), storageDirectory.getCanonicalPath()); } @@ -124,10 +124,10 @@ public void testChooseStorageDirWithMultipleVolumes() throws IOException { public void testChooseStorageDirWithNoVolume() { try { chooseNewStorageDir(Collections.emptyList(), UUID.randomUUID().toString()); - Assert.fail(); + Assertions.fail(); } catch (IOException ex) { String expectedErrMsg = "No storage directory found."; - Assert.assertEquals(expectedErrMsg, ex.getMessage()); + Assertions.assertEquals(expectedErrMsg, ex.getMessage()); } } @@ -136,7 +136,7 @@ public void testChooseStorageDirWithNoVolume() { */ @Test public void testAutoFormatSingleDirectory() throws Exception { - final File testDir = new File(rootTestDir.get(), UUID.randomUUID().toString()); + final File testDir = new File(ROOT_TEST_DIR.get(), UUID.randomUUID().toString()); FileUtils.createDirectories(testDir); final RaftProperties properties = new RaftProperties(); @@ -144,7 +144,7 @@ public void testAutoFormatSingleDirectory() throws Exception { final RaftStorageImpl storage = StorageImplUtils.initRaftStorage( "group-1", RaftStorage.StartupOption.RECOVER, properties); - Assert.assertNotNull(storage); + Assertions.assertNotNull(storage); storage.close(); } @@ -153,7 +153,7 @@ public void testAutoFormatSingleDirectory() throws Exception { */ @Test public void testAutoFormatMultiDirectories() throws Exception { - final File testDir = new File(rootTestDir.get(), UUID.randomUUID().toString()); + final File testDir = new File(ROOT_TEST_DIR.get(), UUID.randomUUID().toString()); final List directories = new ArrayList<>(); IntStream.range(0, 3).mapToObj((i) -> new File(testDir, Integer.toString(i))).forEach((dir) -> { @@ -172,6 +172,6 @@ public void testAutoFormatMultiDirectories() throws Exception { () -> StorageImplUtils.initRaftStorage( "group-1", RaftStorage.StartupOption.RECOVER, properties), IOException.class); - Assert.assertTrue(ioe.getMessage().contains("Failed to RECOVER: Storage directory not found")); + Assertions.assertTrue(ioe.getMessage().contains("Failed to RECOVER: Storage directory not found")); } } \ No newline at end of file diff --git a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/ElectionCommandIntegrationTest.java b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/ElectionCommandIntegrationTest.java index e708153aa5..a61bb19f73 100644 --- a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/ElectionCommandIntegrationTest.java +++ b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/ElectionCommandIntegrationTest.java @@ -30,8 +30,8 @@ import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.util.concurrent.TimeUnit; @@ -63,13 +63,13 @@ void runTestElectionTransferCommand(MiniRaftCluster cluster) throws Exception { RaftServer.Division newLeader = cluster.getFollowers().get(0); final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); - Assert.assertNotEquals(cluster.getLeader().getId(), newLeader.getId()); + Assertions.assertNotEquals(cluster.getLeader().getId(), newLeader.getId()); int ret = shell.run("election", "transfer", "-peers", address, "-address", newLeader.getPeer().getAddress()); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); JavaUtils.attempt(() -> { - Assert.assertEquals(cluster.getLeader().getId(), newLeader.getId()); + Assertions.assertEquals(cluster.getLeader().getId(), newLeader.getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testElectionTransferCommand", LOG); } @@ -85,28 +85,28 @@ void runTestElectionTransferCommandToHigherPriority(MiniRaftCluster cluster) thr RaftServer.Division newLeader = cluster.getFollowers().get(0); final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); - Assert.assertTrue(cluster.getFollowers().contains(newLeader)); + Assertions.assertTrue(cluster.getFollowers().contains(newLeader)); // set current leader's priority to 2 int ret = shell.run("peer", "setPriority", "-peers", address, "-addressPriority", leader.getPeer().getAddress()+ "|" + 2); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); // transfer to new leader will set its priority to 2 (with timeout 1s) ret = shell.run("election", "transfer", "-peers", address, "-address", newLeader.getPeer().getAddress(), "-timeout", "1"); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); - JavaUtils.attempt(() -> Assert.assertEquals(cluster.getLeader().getId(), newLeader.getId()), + JavaUtils.attempt(() -> Assertions.assertEquals(cluster.getLeader().getId(), newLeader.getId()), 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testElectionTransferLeaderCommand", LOG); // verify that priorities of new leader and old leader are both 2 ret = shell.run("group", "info", "-peers", address); - Assert.assertEquals(0 , ret); + Assertions.assertEquals(0 , ret); String expected = String.format("\"%s\"%n priority: %d", newLeader.getPeer().getAddress(), 2); String expected2 = String.format("\"%s\"%n priority: %d", leader.getPeer().getAddress(), 2); - Assert.assertTrue(out.toString().contains(expected)); - Assert.assertTrue(out.toString().contains(expected2)); + Assertions.assertTrue(out.toString().contains(expected)); + Assertions.assertTrue(out.toString().contains(expected2)); } @Test @@ -120,25 +120,25 @@ void runTestElectionPauseResumeCommand(MiniRaftCluster cluster) throws Exception RaftServer.Division newLeader = cluster.getFollowers().get(0); final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); - Assert.assertNotEquals(cluster.getLeader().getId(), newLeader.getId()); + Assertions.assertNotEquals(cluster.getLeader().getId(), newLeader.getId()); int ret = shell.run("election", "pause", "-peers", sb.toString(), "-address", newLeader.getPeer().getAddress()); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); ret = shell.run("peer", "setPriority", "-peers", sb.toString(), "-addressPriority", newLeader.getPeer().getAddress() + "|" + 2); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); JavaUtils.attempt(() -> { - Assert.assertNotEquals(cluster.getLeader().getId(), newLeader.getId()); + Assertions.assertNotEquals(cluster.getLeader().getId(), newLeader.getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testElectionPauseResumeCommand", LOG); ret = shell.run("election", "resume", "-peers", sb.toString(), "-address", newLeader.getPeer().getAddress()); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); JavaUtils.attempt(() -> { - Assert.assertEquals(cluster.getLeader().getId(), newLeader.getId()); + Assertions.assertEquals(cluster.getLeader().getId(), newLeader.getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testElectionPauseResumeCommand", LOG); } @@ -153,10 +153,10 @@ void runTestElectionStepDownCommand(MiniRaftCluster cluster) throws Exception { RaftServer.Division newLeader = cluster.getFollowers().get(0); final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); - Assert.assertNotEquals(cluster.getLeader().getId(), newLeader.getId()); - Assert.assertEquals(2, cluster.getFollowers().size()); + Assertions.assertNotEquals(cluster.getLeader().getId(), newLeader.getId()); + Assertions.assertEquals(2, cluster.getFollowers().size()); int ret = shell.run("election", "stepDown", "-peers", sb.toString()); - Assert.assertEquals(0, ret); - Assert.assertEquals(3, cluster.getFollowers().size()); + Assertions.assertEquals(0, ret); + Assertions.assertEquals(3, cluster.getFollowers().size()); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/GroupCommandIntegrationTest.java b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/GroupCommandIntegrationTest.java index c745307d2d..c020075e73 100644 --- a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/GroupCommandIntegrationTest.java +++ b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/GroupCommandIntegrationTest.java @@ -20,6 +20,7 @@ import org.apache.ratis.RaftTestUtil; import org.apache.ratis.client.RaftClient; import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.server.RaftServer; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.server.impl.MiniRaftCluster; @@ -29,8 +30,8 @@ import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; public abstract class GroupCommandIntegrationTest @@ -64,11 +65,11 @@ void runTestGroupListCommand(MiniRaftCluster cluster) throws Exception { RatisShell shell = new RatisShell(out.getPrintStream()); int ret = shell.run("group", "list", "-peers", address, "-peerId", leader.getPeer().getId().toString()); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); String info = out.toString().trim(); String expected = String.format("The peerId %s (server %s) is in 1 groups, and the groupIds is: [%s]", leader.getId(), leader.getPeer().getAddress(), leader.getGroup().getGroupId()); - Assert.assertEquals(expected, info); + Assertions.assertEquals(expected, info); } @Test @@ -82,12 +83,60 @@ void runTestGroupInfoCommand(MiniRaftCluster cluster) throws Exception { final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); int ret = shell.run("group", "info", "-peers", address); - Assert.assertEquals(0 , ret); + Assertions.assertEquals(0 , ret); String result = out.toString().trim(); String hearder = String.format("group id: %s%sleader info: %s(%s)%s%s", cluster.getGroupId().getUuid(), NEW_LINE, leader.getId(), cluster.getLeader().getPeer().getAddress(), NEW_LINE, NEW_LINE); String info = result.substring(0, hearder.length()); - Assert.assertEquals(hearder, info); + Assertions.assertEquals(hearder, info); } + + @Test + public void testGroupInfoCommandIncludesCorrectLogInfo() throws Exception { + // set number of server to 1 so that we can make sure which server returns the LogInfoProto + // since information of applied index, snapshot index, and last entry index are not shared between servers + runWithNewCluster(1, this::runTestGroupInfoCommandWithLogInfoVerification); + } + + void runTestGroupInfoCommandWithLogInfoVerification(MiniRaftCluster cluster) throws Exception { + RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); + + try (final RaftClient client = cluster.createClient(leader.getId())) { + for (int i = 0; i < RaftServerConfigKeys.Snapshot.creationGap(getProperties()); i++) { + RaftClientReply + reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); + Assertions.assertTrue(reply.isSuccess()); + } + } + + leader.getStateMachine().takeSnapshot(); + + final String address = getClusterAddress(cluster); + final StringPrintStream out = new StringPrintStream(); + RatisShell shell = new RatisShell(out.getPrintStream()); + int ret = shell.run("group", "info", "-peers", address); + Assertions.assertEquals(0 , ret); + String result = out.toString().trim(); + String hearder = String.format("group id: %s%sleader info: %s(%s)%s%s", + cluster.getGroupId().getUuid(), NEW_LINE, leader.getId(), + cluster.getLeader().getPeer().getAddress(), NEW_LINE, NEW_LINE); + String info = result.substring(0, hearder.length()); + Assertions.assertEquals(hearder, info); + long currentTerm = leader.getInfo().getCurrentTerm(); + String logInfoProtoFormat = "%s {" + NEW_LINE + " term: " + currentTerm + NEW_LINE + " index: %s"; + Assertions.assertTrue(result.contains( + String.format(logInfoProtoFormat, "applied", + leader.getStateMachine().getLastAppliedTermIndex().getIndex()))); + Assertions.assertTrue(result.contains( + String.format(logInfoProtoFormat, "committed", + leader.getRaftLog().getLastCommittedIndex()))); + Assertions.assertTrue(result.contains( + String.format(logInfoProtoFormat, "lastSnapshot", + leader.getStateMachine().getLatestSnapshot().getIndex()))); + Assertions.assertTrue(result.contains( + String.format(logInfoProtoFormat, "lastEntry", + leader.getRaftLog().getLastCommittedIndex()))); + } + } diff --git a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/LocalCommandIntegrationTest.java b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/LocalCommandIntegrationTest.java new file mode 100644 index 0000000000..afc13837c5 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/LocalCommandIntegrationTest.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.shell.cli.sh; + +import org.apache.ratis.proto.RaftProtos.RaftPeerRole; +import org.apache.ratis.proto.RaftProtos.LogEntryProto; +import org.apache.ratis.proto.RaftProtos.RaftConfigurationProto; +import org.apache.ratis.proto.RaftProtos.RaftPeerProto; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class LocalCommandIntegrationTest { + + private static final String RAFT_META_CONF = "raft-meta.conf"; + private static final String NEW_RAFT_META_CONF = "new-raft-meta.conf"; + private static Pattern p = Pattern.compile("(?:\\w+\\|\\w+:\\d+,?)+"); + + + @Test + public void testDuplicatedPeerAddresses() throws Exception { + String[] duplicatedAddressesList = {"peer1_ID1|host1:9872,peer2_ID|host2:9872,peer1_ID2|host1:9872", + "host1:9872,host2:9872,host1:9872"}; + + testDuplicatedPeers(duplicatedAddressesList, "address", "host1:9872"); + } + + @Test + public void testDuplicatedPeerIds() throws Exception { + String[] duplicatedIdsList = {"peer1_ID1|host1:9872,peer2_ID|host2:9872,peer1_ID1|host3:9872"}; + + testDuplicatedPeers(duplicatedIdsList, "ID", "peer1_ID1"); + } + + private void testDuplicatedPeers(String[] peersList, String expectedErrorMessagePart, String expectedDuplicatedValue) + throws Exception { + for (String peersStr : peersList) { + StringPrintStream out = new StringPrintStream(); + RatisShell shell = new RatisShell(out.getPrintStream()); + int ret = shell.run("local", "raftMetaConf", "-peers", peersStr, "-path", "test"); + Assertions.assertEquals(-1, ret); + String message = out.toString().trim(); + Assertions.assertEquals( + String.format("Found duplicated %s: %s. Please make sure the %s of peer have no duplicated value.", + expectedErrorMessagePart, expectedDuplicatedValue, expectedErrorMessagePart), message); + } + } + + @Test + public void testRunMethod(@TempDir Path tempDir) throws Exception { + int index = 1; + generateRaftConf(tempDir.resolve(RAFT_META_CONF), index); + + String[] testPeersListArray = {"peer1_ID|localhost:9872,peer2_ID|host2:9872,peer3_ID|host3:9872", + "host1:9872,host2:9872,host3:9872"}; + + for (String peersListStr : testPeersListArray) { + generateRaftConf(tempDir.resolve(RAFT_META_CONF), index); + StringPrintStream out = new StringPrintStream(); + RatisShell shell = new RatisShell(out.getPrintStream()); + int ret = shell.run("local", "raftMetaConf", "-peers", peersListStr, "-path", tempDir.toString()); + Assertions.assertEquals(0, ret); + + // read & verify the contents of the new-raft-meta.conf file + long indexFromNewConf; + List peers; + try (InputStream in = Files.newInputStream(tempDir.resolve(NEW_RAFT_META_CONF))) { + LogEntryProto logEntry = LogEntryProto.newBuilder().mergeFrom(in).build(); + indexFromNewConf = logEntry.getIndex(); + peers = logEntry.getConfigurationEntry().getPeersList(); + } + + Assertions.assertEquals(index + 1, indexFromNewConf); + + String addressRegex = "^[a-zA-Z0-9.-]+:\\d+$"; + Pattern pattern = Pattern.compile(addressRegex); + peers.forEach(peerProto -> Assertions.assertTrue( + pattern.matcher(peerProto.getAddress()).matches())); + + String peersListStrFromNewMetaConf; + if (containsPeerId(peersListStr)) { + peersListStrFromNewMetaConf = peers.stream() + .map(peer -> peer.getId().toStringUtf8() + "|" + peer.getAddress()) + .collect(Collectors.joining(",")); + } else { + peersListStrFromNewMetaConf = peers.stream().map(RaftPeerProto::getAddress) + .collect(Collectors.joining(",")); + } + + Assertions.assertEquals(peersListStr, peersListStrFromNewMetaConf); + } + } + + + private void generateRaftConf(Path path, int index) throws IOException { + Map map = new HashMap<>(); + map.put("peer1_ID", "host1:9872"); + map.put("peer2_ID", "host2:9872"); + map.put("peer3_ID", "host3:9872"); + map.put("peer4_ID", "host4:9872"); + List raftPeerProtos = new ArrayList<>(); + for (Map.Entry en : map.entrySet()) { + raftPeerProtos.add(RaftPeerProto.newBuilder() + .setId(ByteString.copyFrom(en.getKey().getBytes(StandardCharsets.UTF_8))).setAddress(en.getValue()) + .setStartupRole(RaftPeerRole.FOLLOWER).build()); + } + + LogEntryProto generateLogEntryProto = LogEntryProto.newBuilder() + .setConfigurationEntry(RaftConfigurationProto.newBuilder().addAllPeers(raftPeerProtos).build()) + .setIndex(index).build(); + try (OutputStream out = Files.newOutputStream(path)) { + generateLogEntryProto.writeTo(out); + } + } + + private boolean containsPeerId(String str) { + return p.matcher(str).find(); + } + +} diff --git a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/PeerCommandIntegrationTest.java b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/PeerCommandIntegrationTest.java index 80400309ec..22f15e531a 100644 --- a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/PeerCommandIntegrationTest.java +++ b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/PeerCommandIntegrationTest.java @@ -28,15 +28,16 @@ import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.statemachine.StateMachine; -import org.apache.ratis.thirdparty.com.google.common.collect.ObjectArrays; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; +import java.util.Arrays; +import java.util.List; import java.util.concurrent.TimeUnit; public abstract class PeerCommandIntegrationTest @@ -65,14 +66,14 @@ void runTestPeerRemoveCommand(MiniRaftCluster cluster) throws Exception { final RaftServer.Division leader = RaftTestUtil.waitForLeader(cluster); final String address = getClusterAddress(cluster); RaftServer.Division toRemove = cluster.getFollowers().get(0); - RaftPeer[] peers = new RaftPeer[]{cluster.getFollowers().get(1).getPeer(), leader.getPeer()}; + final List peers = Arrays.asList(cluster.getFollowers().get(1).getPeer(), leader.getPeer()); final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); - Assert.assertTrue(cluster.getFollowers().contains(toRemove)); + Assertions.assertTrue(cluster.getFollowers().contains(toRemove)); int ret = shell.run("peer", "remove", "-peers", address, "-peerId", toRemove.getPeer().getId().toString()); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); RaftServerTestUtil.waitAndCheckNewConf(cluster, peers,1, null); } @@ -80,8 +81,8 @@ void runTestPeerAddCommand(MiniRaftCluster cluster) throws Exception { LOG.info("Start testMultiGroup" + cluster.printServers()); RaftTestUtil.waitForLeader(cluster); - RaftPeer[] peers = cluster.getPeers().toArray(new RaftPeer[0]); - RaftPeer[] newPeers = cluster.addNewPeers(1, true, true).newPeers; + final List peers = cluster.getPeers(); + final List newPeers = cluster.addNewPeers(1, true).getAddedPeers(); RaftServerTestUtil.waitAndCheckNewConf(cluster, peers, 0, null); StringBuilder sb = new StringBuilder(); @@ -92,11 +93,13 @@ void runTestPeerAddCommand(MiniRaftCluster cluster) throws Exception { final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); + final RaftPeer newPeer0 = newPeers.get(0); int ret = shell.run("peer", "add", "-peers", sb.toString(), "-address", - newPeers[0].getAdminAddress(), "-peerId", newPeers[0].getId().toString()); + newPeer0.getAdminAddress(), "-peerId", newPeer0.getId().toString()); - Assert.assertEquals(0, ret); - RaftServerTestUtil.waitAndCheckNewConf(cluster, ObjectArrays.concat(peers, newPeers[0]), 0, null); + Assertions.assertEquals(0, ret); + peers.add(newPeer0); + RaftServerTestUtil.waitAndCheckNewConf(cluster, peers, 0, null); } @@ -112,12 +115,12 @@ void runTestPeerSetPriorityCommand(MiniRaftCluster cluster) throws Exception { RaftServer.Division newLeader = cluster.getFollowers().get(0); final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); - Assert.assertTrue(cluster.getFollowers().contains(newLeader)); + Assertions.assertTrue(cluster.getFollowers().contains(newLeader)); int ret = shell.run("peer", "setPriority", "-peers", address, "-addressPriority", newLeader.getPeer().getAddress()+ "|" + 2); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); JavaUtils.attempt(() -> { - Assert.assertEquals(cluster.getLeader().getId(), newLeader.getId()); + Assertions.assertEquals(cluster.getLeader().getId(), newLeader.getId()); }, 10, TimeDuration.valueOf(1, TimeUnit.SECONDS), "testPeerSetPriorityCommand", LOG); } diff --git a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/SnapshotCommandIntegrationTest.java b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/SnapshotCommandIntegrationTest.java index 3c0ee9772c..f3f4e43077 100644 --- a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/SnapshotCommandIntegrationTest.java +++ b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/SnapshotCommandIntegrationTest.java @@ -30,8 +30,8 @@ import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.util.Slf4jUtils; import org.apache.ratis.util.SizeInBytes; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.io.File; @@ -67,7 +67,7 @@ void runTestSnapshotCreateCommand(MiniRaftCluster cluster) throws Exception { try (final RaftClient client = cluster.createClient(leaderId)) { for (int i = 0; i < RaftServerConfigKeys.Snapshot.creationGap(getProperties()); i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } final String address = getClusterAddress(cluster); @@ -75,14 +75,14 @@ void runTestSnapshotCreateCommand(MiniRaftCluster cluster) throws Exception { RatisShell shell = new RatisShell(out.getPrintStream()); int ret = shell.run("snapshot", "create", "-peers", address, "-peerId", leader.getPeer().getId().toString()); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); String[] str = out.toString().trim().split(" "); int snapshotIndex = Integer.parseInt(str[str.length-1]); LOG.info("snapshotIndex = {}", snapshotIndex); final File snapshotFile = SimpleStateMachine4Testing.get(leader) .getStateMachineStorage().getSnapshotFile(leader.getInfo().getCurrentTerm(), snapshotIndex); - Assert.assertTrue(snapshotFile.exists()); + Assertions.assertTrue(snapshotFile.exists()); } void runTestSnapshotCreateCommandOnSpecificServer(MiniRaftCluster cluster) throws Exception { @@ -91,16 +91,16 @@ void runTestSnapshotCreateCommandOnSpecificServer(MiniRaftCluster cluster) throw try (final RaftClient client = cluster.createClient(leaderId)) { for (int i = 0; i < RaftServerConfigKeys.Snapshot.creationGap(getProperties()); i++) { RaftClientReply reply = client.io().send(new RaftTestUtil.SimpleMessage("m" + i)); - Assert.assertTrue(reply.isSuccess()); + Assertions.assertTrue(reply.isSuccess()); } } final String address = getClusterAddress(cluster); final StringPrintStream out = new StringPrintStream(); RatisShell shell = new RatisShell(out.getPrintStream()); - Assert.assertEquals(2, cluster.getFollowers().size()); + Assertions.assertEquals(2, cluster.getFollowers().size()); int ret = shell.run("snapshot", "create", "-peers", address, "-peerId", cluster.getFollowers().get(0).getId().toString()); - Assert.assertEquals(0, ret); + Assertions.assertEquals(0, ret); String[] str = out.toString().trim().split(" "); int snapshotIndex = Integer.parseInt(str[str.length-1]); LOG.info("snapshotIndex = {}", snapshotIndex); @@ -108,7 +108,7 @@ void runTestSnapshotCreateCommandOnSpecificServer(MiniRaftCluster cluster) throw final File snapshotFile = SimpleStateMachine4Testing.get(cluster.getFollowers().get(0)) .getStateMachineStorage() .getSnapshotFile(cluster.getFollowers().get(0).getInfo().getCurrentTerm(), snapshotIndex); - Assert.assertTrue(snapshotFile.exists()); + Assertions.assertTrue(snapshotFile.exists()); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/TestRatisShell.java b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/TestRatisShell.java index 6e22276475..ccf9702f7f 100644 --- a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/TestRatisShell.java +++ b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/TestRatisShell.java @@ -27,8 +27,8 @@ import org.apache.ratis.shell.cli.sh.command.PeerCommand; import org.apache.ratis.shell.cli.sh.command.SnapshotCommand; import org.apache.ratis.util.ReflectionUtils; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.reflections.Reflections; import java.io.PrintStream; @@ -44,17 +44,18 @@ /** * Test {@link RatisShell} */ +@SuppressWarnings({"rawtypes"}) public class TestRatisShell extends BaseTest { static final PrintStream OUT = System.out; static final Class[] ARG_CLASSES = new Class[] {Context.class}; static void assertCommand(String message, Command expected, Command computed) { - Assert.assertEquals(message, expected.getClass(), computed.getClass()); - Assert.assertEquals(message, expected.getCommandName(), computed.getCommandName()); + Assertions.assertEquals(expected.getClass(), computed.getClass(), message); + Assertions.assertEquals(expected.getCommandName(), computed.getCommandName(), message); } static void assertCommands(List expected, List computed) { - Assert.assertEquals(expected.size(), computed.size()); + Assertions.assertEquals(expected.size(), computed.size()); for(int i = 0; i < expected.size(); i++) { assertCommand("Command " + i, expected.get(i), computed.get(i)); } @@ -65,12 +66,11 @@ public void testFullParentCommandList() throws Exception { final List expected = new ArrayList<>(loadCommands(RatisShell.class.getPackage().getName() + ".command")); Collections.sort(expected); - try(RatisShell shell = new RatisShell(OUT)) { - final List computed = new ArrayList<>(shell.getCommands()); - Collections.sort(computed); + RatisShell shell = new RatisShell(OUT); + final List computed = new ArrayList<>(shell.getCommands()); + Collections.sort(computed); - assertCommands(expected, computed); - } + assertCommands(expected, computed); } @Test @@ -103,7 +103,7 @@ static void runTestFullCommandList(Function pare final List computed = new ArrayList<>(parent.getSubCommands().values()); Collections.sort(computed); - Assert.assertFalse(computed.isEmpty()); + Assertions.assertFalse(computed.isEmpty()); final Package pkg = computed.iterator().next().getClass().getPackage(); final List expected = new ArrayList<>(loadCommands(pkg)); diff --git a/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/TestSecureRatisShell.java b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/TestSecureRatisShell.java new file mode 100644 index 0000000000..21e9fe229a --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/shell/cli/sh/TestSecureRatisShell.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.shell.cli.sh; + +import org.apache.ratis.BaseTest; +import org.apache.ratis.client.RaftClientConfigKeys; +import org.apache.ratis.conf.Parameters; +import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.grpc.GrpcConfigKeys; +import org.apache.ratis.grpc.GrpcTlsConfig; +import org.apache.ratis.grpc.MiniRaftClusterWithGrpc; +import org.apache.ratis.netty.NettyUtils; +import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.security.SecurityTestUtils; +import org.apache.ratis.util.Slf4jUtils; +import org.apache.ratis.util.TimeDuration; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.slf4j.event.Level; + +import javax.net.ssl.KeyManager; +import javax.net.ssl.TrustManager; +import java.io.ByteArrayOutputStream; +import java.io.OutputStream; +import java.io.PrintStream; +import java.util.List; + +public class TestSecureRatisShell extends BaseTest { + { + Slf4jUtils.setLogLevel(NettyUtils.LOG, Level.DEBUG); + } + + private static final Parameters SERVER_PARAMETERS = new Parameters(); + private static final Parameters CLIENT_PARAMETERS = new Parameters(); + + static { + final TrustManager emptyTrustManager = SecurityTestUtils.emptyTrustManager(); + try { + final KeyManager serverKeyManager = SecurityTestUtils.getKeyManager(SecurityTestUtils::getServerKeyStore); + final GrpcTlsConfig serverConfig = new GrpcTlsConfig(serverKeyManager, emptyTrustManager, true); + GrpcConfigKeys.Server.setTlsConf(SERVER_PARAMETERS, serverConfig); + GrpcConfigKeys.Admin.setTlsConf(SERVER_PARAMETERS, serverConfig); + GrpcConfigKeys.Client.setTlsConf(SERVER_PARAMETERS, serverConfig); + } catch (Exception e) { + throw new IllegalStateException("Failed to init SERVER_PARAMETERS", e); + } + + try { + final KeyManager clientKeyManager = SecurityTestUtils.getKeyManager(SecurityTestUtils::getClientKeyStore); + final GrpcTlsConfig clientConfig = new GrpcTlsConfig(clientKeyManager, emptyTrustManager, true); + GrpcConfigKeys.Admin.setTlsConf(CLIENT_PARAMETERS, clientConfig); + GrpcConfigKeys.Client.setTlsConf(CLIENT_PARAMETERS, clientConfig); + } catch (Exception e) { + throw new IllegalStateException("Failed to init CLIENT_PARAMETERS", e); + } + } + + @Test + public void testRatisShell() throws Exception { + final String[] ids = {"s0"}; + final RaftProperties properties = new RaftProperties(); + RaftClientConfigKeys.Rpc.setRequestTimeout(properties, TimeDuration.ONE_MINUTE); + + try(MiniRaftClusterWithGrpc cluster = new MiniRaftClusterWithGrpc(ids, properties, SERVER_PARAMETERS)) { + cluster.start(); + + runTestRatisShell(cluster, true); + runTestRatisShell(cluster, false); + } + } + + void runTestRatisShell(MiniRaftClusterWithGrpc cluster, boolean secure) throws Exception { + try (ByteArrayOutputStream out = new ByteArrayOutputStream(1 << 16)) { + RatisShell shell = newRatisShell(out, cluster.getProperties(), secure); + shell.run("group", "info", "-peers", toCliArg(cluster.getPeers())); + final String output = out.toString(); + LOG.info("output (secure? {}):\n{}", secure, output); + final String gid = cluster.getGroup().getGroupId().getUuid().toString(); + if (secure) { + Assertions.assertTrue(output.contains(gid), () -> gid + " not found for secure shell"); + } else { + Assertions.assertTrue(output.contains("Failed to get group ID"), "Unexpected output for unsecure shell"); + } + } + } + + static RatisShell newRatisShell(OutputStream out, RaftProperties properties, boolean secure) { + final PrintStream printStream = new PrintStream(out, true); + if (!secure) { + return new RatisShell(printStream); + } + return RatisShell.newBuilder() + .setPrintStream(printStream) + .setProperties(properties) + .setParameters(CLIENT_PARAMETERS) + .build(); + } + + static String toCliArg(List peers) { + final StringBuilder b = new StringBuilder(); + for(RaftPeer peer : peers) { + b.append(peer.getAdminAddress()).append(","); + } + return b.substring(0, b.length() - 1); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/statemachine/TestStateMachine.java b/ratis-test/src/test/java/org/apache/ratis/statemachine/TestStateMachine.java index 8e37947d3b..79c204e292 100644 --- a/ratis-test/src/test/java/org/apache/ratis/statemachine/TestStateMachine.java +++ b/ratis-test/src/test/java/org/apache/ratis/statemachine/TestStateMachine.java @@ -35,7 +35,8 @@ import org.apache.ratis.server.simulation.MiniRaftClusterWithSimulatedRpc; import org.apache.ratis.statemachine.impl.SimpleStateMachine4Testing; import org.apache.ratis.util.Slf4jUtils; -import org.junit.*; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.slf4j.event.Level; import java.util.Collections; @@ -48,11 +49,15 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * Test StateMachine related functionality */ +@SuppressWarnings({"deprecation"}) public class TestStateMachine extends BaseTest implements MiniRaftClusterWithSimulatedRpc.FactoryGet { static { Slf4jUtils.setLogLevel(RaftServer.Division.LOG, Level.DEBUG); @@ -162,9 +167,9 @@ static void runTestTransactionContextIsPassedBack(MiniRaftCluster cluster) throw final SMTransactionContext sm = SMTransactionContext.get(raftServer); final List ll = new ArrayList<>(sm.applied); Collections.sort(ll); - assertEquals(ll.toString(), ll.size(), numTrx); + assertEquals(ll.size(), numTrx, ll.toString()); for (int i=0; i < numTrx; i++) { - assertEquals(ll.toString(), Long.valueOf(i+1), ll.get(i)); + assertEquals(Long.valueOf(i+1), ll.get(i), ll.toString()); } } @@ -193,7 +198,7 @@ public void testStateMachineRegistry() throws Throwable { final RaftServer server = cluster.getServer(id); for(Map.Entry e: registry.entrySet()) { - Assert.assertSame(e.getValue(), server.getDivision(e.getKey()).getStateMachine()); + Assertions.assertSame(e.getValue(), server.getDivision(e.getKey()).getStateMachine()); } } } diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestDataBlockingQueue.java b/ratis-test/src/test/java/org/apache/ratis/util/TestDataBlockingQueue.java index df0dd2f8ed..3ad40d4ca7 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestDataBlockingQueue.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestDataBlockingQueue.java @@ -17,8 +17,9 @@ */ package org.apache.ratis.util; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,34 +37,38 @@ public class TestDataBlockingQueue { final TimeDuration slow = TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); final TimeDuration fast = TimeDuration.valueOf(10, TimeUnit.MILLISECONDS); - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testElementLimit() { TestDataQueue.runTestElementLimit(q); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testByteLimit() { TestDataQueue.runTestByteLimit(q); } - @Test(timeout = 10_000) + @Test + @Timeout(value = 10) public void testSlowOfferFastPoll() throws Exception { runTestBlockingCalls(slow, fast, q); } - @Test(timeout = 10_000) + @Test + @Timeout(value = 10) public void testFastOfferSlowPoll() throws Exception { runTestBlockingCalls(fast, slow, q); } static void assertOfferPull(long offering, long polled, long elementLimit) { - Assert.assertTrue(offering >= polled); - Assert.assertTrue(offering - polled <= elementLimit + 1); + Assertions.assertTrue(offering >= polled); + Assertions.assertTrue(offering - polled <= elementLimit + 1); } static void runTestBlockingCalls(TimeDuration offerSleepTime, TimeDuration pollSleepTime, DataBlockingQueue q) throws Exception { - Assert.assertTrue(q.isEmpty()); + Assertions.assertTrue(q.isEmpty()); ExitUtils.disableSystemExit(); final int elementLimit = q.getElementLimit(); final TimeDuration timeout = CollectionUtils.min(offerSleepTime, pollSleepTime); @@ -74,11 +79,11 @@ static void runTestBlockingCalls(TimeDuration offerSleepTime, TimeDuration pollS final Thread pollThread = new Thread(() -> { try { - for(; polledValue.get() < endValue;) { + while (polledValue.get() < endValue) { pollSleepTime.sleep(); final Long polled = q.poll(timeout); if (polled != null) { - Assert.assertEquals(polledValue.incrementAndGet(), polled.intValue()); + Assertions.assertEquals(polledValue.incrementAndGet(), polled.intValue()); LOG.info("polled {}", polled); } assertOfferPull(offeringValue.get(), polledValue.get(), elementLimit); @@ -109,10 +114,10 @@ static void runTestBlockingCalls(TimeDuration offerSleepTime, TimeDuration pollS offerThread.join(); pollThread.join(); - Assert.assertEquals(endValue + 1, offeringValue.get()); - Assert.assertEquals(endValue, polledValue.get()); + Assertions.assertEquals(endValue + 1, offeringValue.get()); + Assertions.assertEquals(endValue, polledValue.get()); - Assert.assertTrue(q.isEmpty()); + Assertions.assertTrue(q.isEmpty()); ExitUtils.assertNotTerminated(); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestDataQueue.java b/ratis-test/src/test/java/org/apache/ratis/util/TestDataQueue.java index fe8a7ac246..42a55d90ff 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestDataQueue.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestDataQueue.java @@ -18,8 +18,9 @@ package org.apache.ratis.util; import org.apache.ratis.util.function.TriConsumer; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.util.ArrayList; import java.util.Collections; @@ -37,15 +38,16 @@ static TriConsumer getTimeoutHandler(bool } static void assertSizes(long expectedNumElements, long expectedNumBytes, DataQueue q) { - Assert.assertEquals(expectedNumElements, q.getNumElements()); - Assert.assertEquals(expectedNumBytes, q.getNumBytes()); + Assertions.assertEquals(expectedNumElements, q.getNumElements()); + Assertions.assertEquals(expectedNumBytes, q.getNumBytes()); } final SizeInBytes byteLimit = SizeInBytes.valueOf(100); final int elementLimit = 5; final DataQueue q = new DataQueue<>(null, byteLimit, elementLimit, Long::longValue); - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testElementLimit() { runTestElementLimit(q); } @@ -56,30 +58,31 @@ static void runTestElementLimit(DataQueue q) { final int elementLimit = q.getElementLimit(); long numBytes = 0; for (long i = 0; i < elementLimit; i++) { - Assert.assertEquals(i, q.getNumElements()); - Assert.assertEquals(numBytes, q.getNumBytes()); + Assertions.assertEquals(i, q.getNumElements()); + Assertions.assertEquals(numBytes, q.getNumBytes()); final boolean offered = q.offer(i); - Assert.assertTrue(offered); + Assertions.assertTrue(offered); numBytes += i; assertSizes(i+1, numBytes, q); } { final boolean offered = q.offer(0L); - Assert.assertFalse(offered); + Assertions.assertFalse(offered); assertSizes(elementLimit, numBytes, q); } { // poll all elements final List polled = q.pollList(100, (i, timeout) -> i, getTimeoutHandler(false)); - Assert.assertEquals(elementLimit, polled.size()); + Assertions.assertEquals(elementLimit, polled.size()); for (int i = 0; i < polled.size(); i++) { - Assert.assertEquals(i, polled.get(i).intValue()); + Assertions.assertEquals(i, polled.get(i).intValue()); } } assertSizes(0, 0, q); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testByteLimit() { runTestByteLimit(q); } @@ -90,53 +93,54 @@ static void runTestByteLimit(DataQueue q) { final long byteLimit = q.getByteLimit(); try { q.offer(byteLimit + 1); - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException ignored) { } final long halfBytes = byteLimit / 2; { final boolean offered = q.offer(halfBytes); - Assert.assertTrue(offered); + Assertions.assertTrue(offered); assertSizes(1, halfBytes, q); } { final boolean offered = q.offer(halfBytes + 1); - Assert.assertFalse(offered); + Assertions.assertFalse(offered); assertSizes(1, halfBytes, q); } { final boolean offered = q.offer(halfBytes); - Assert.assertTrue(offered); + Assertions.assertTrue(offered); assertSizes(2, byteLimit, q); } { final boolean offered = q.offer(1L); - Assert.assertFalse(offered); + Assertions.assertFalse(offered); assertSizes(2, byteLimit, q); } { final boolean offered = q.offer(0L); - Assert.assertTrue(offered); + Assertions.assertTrue(offered); assertSizes(3, byteLimit, q); } { // poll all elements final List polled = q.pollList(100, (i, timeout) -> i, getTimeoutHandler(false)); - Assert.assertEquals(3, polled.size()); - Assert.assertEquals(halfBytes, polled.get(0).intValue()); - Assert.assertEquals(halfBytes, polled.get(1).intValue()); - Assert.assertEquals(0, polled.get(2).intValue()); + Assertions.assertEquals(3, polled.size()); + Assertions.assertEquals(halfBytes, polled.get(0).intValue()); + Assertions.assertEquals(halfBytes, polled.get(1).intValue()); + Assertions.assertEquals(0, polled.get(2).intValue()); } assertSizes(0, 0, q); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testIteratorAndRemove() { runTestIteratorAndRemove(q); } @@ -149,7 +153,7 @@ static void runTestIteratorAndRemove(DataQueue q) { long numBytes = 0; for(long i = 0; i < elementLimit; i++) { final boolean offered = q.offer(i); - Assert.assertTrue(offered); + Assertions.assertTrue(offered); numElements++; numBytes += i; assertSizes(numElements, numBytes, q); @@ -158,7 +162,7 @@ static void runTestIteratorAndRemove(DataQueue q) { { // test iterator() final Iterator i = q.iterator(); for (long expected = 0; expected < elementLimit; expected++) { - Assert.assertEquals(expected, i.next().longValue()); + Assertions.assertEquals(expected, i.next().longValue()); } } @@ -180,23 +184,24 @@ static void runTestIteratorAndRemove(DataQueue q) { assertSizes(0, 0, q); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testTimeout() { assertSizes(0, 0, q); long numBytes = 0; for (long i = 0; i < elementLimit; i++) { - Assert.assertEquals(i, q.getNumElements()); - Assert.assertEquals(numBytes, q.getNumBytes()); + Assertions.assertEquals(i, q.getNumElements()); + Assertions.assertEquals(numBytes, q.getNumBytes()); final boolean offered = q.offer(i); - Assert.assertTrue(offered); + Assertions.assertTrue(offered); numBytes += i; assertSizes(i+1, numBytes, q); } { // poll with zero time final List polled = q.pollList(0, (i, timeout) -> i, getTimeoutHandler(false)); - Assert.assertTrue(polled.isEmpty()); + Assertions.assertTrue(polled.isEmpty()); assertSizes(elementLimit, numBytes, q); } @@ -209,9 +214,9 @@ public void testTimeout() { } return i; }, getTimeoutHandler(true)); - Assert.assertEquals(halfElements, polled.size()); + Assertions.assertEquals(halfElements, polled.size()); for (int i = 0; i < polled.size(); i++) { - Assert.assertEquals(i, polled.get(i).intValue()); + Assertions.assertEquals(i, polled.get(i).intValue()); numBytes -= i; } assertSizes(elementLimit - halfElements, numBytes, q); @@ -219,9 +224,9 @@ public void testTimeout() { { // poll the remaining elements final List polled = q.pollList(100, (i, timeout) -> i, getTimeoutHandler(false)); - Assert.assertEquals(elementLimit - halfElements, polled.size()); + Assertions.assertEquals(elementLimit - halfElements, polled.size()); for (int i = 0; i < polled.size(); i++) { - Assert.assertEquals(halfElements + i, polled.get(i).intValue()); + Assertions.assertEquals(halfElements + i, polled.get(i).intValue()); } } assertSizes(0, 0, q); diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestExitUtils.java b/ratis-test/src/test/java/org/apache/ratis/util/TestExitUtils.java index 25b05fd0df..3c7cc025c9 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestExitUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestExitUtils.java @@ -19,15 +19,17 @@ import org.apache.ratis.BaseTest; import org.apache.ratis.util.ExitUtils.ExitException; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; public class TestExitUtils extends BaseTest { /** Test if {@link BaseTest} can handle uncaught exception. */ - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testUncaughtException() throws Exception { - Assert.assertFalse(ExitUtils.isTerminated()); - Assert.assertFalse(ExitUtils.clear()); + Assertions.assertFalse(ExitUtils.isTerminated()); + Assertions.assertFalse(ExitUtils.clear()); final Thread t = new Thread(null, () -> { throw new AssertionError("Testing"); @@ -35,25 +37,26 @@ public void testUncaughtException() throws Exception { t.start(); t.join(); - Assert.assertTrue(ExitUtils.isTerminated()); - Assert.assertTrue(ExitUtils.clear()); + Assertions.assertTrue(ExitUtils.isTerminated()); + Assertions.assertTrue(ExitUtils.clear()); } /** Test if {@link BaseTest} can handle ExitUtils.terminate(..). */ - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testExitStatus() { - Assert.assertFalse(ExitUtils.isTerminated()); - Assert.assertFalse(ExitUtils.clear()); + Assertions.assertFalse(ExitUtils.isTerminated()); + Assertions.assertFalse(ExitUtils.clear()); final int status = -1; try { ExitUtils.terminate(status, "testExitStatus", LOG); - Assert.fail(); + Assertions.fail(); } catch (ExitException e) { - Assert.assertEquals(status, e.getStatus()); + Assertions.assertEquals(status, e.getStatus()); } - Assert.assertTrue(ExitUtils.isTerminated()); - Assert.assertTrue(ExitUtils.clear()); + Assertions.assertTrue(ExitUtils.isTerminated()); + Assertions.assertTrue(ExitUtils.clear()); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestLifeCycle.java b/ratis-test/src/test/java/org/apache/ratis/util/TestLifeCycle.java index 3faf2a4959..201b510571 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestLifeCycle.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestLifeCycle.java @@ -18,14 +18,19 @@ package org.apache.ratis.util; import org.apache.ratis.util.function.TriConsumer; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +import java.util.Arrays; +import java.util.EnumMap; +import java.util.List; +import java.util.Map; import static org.apache.ratis.util.LifeCycle.State.*; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.*; public class TestLifeCycle { /** @@ -33,7 +38,8 @@ public class TestLifeCycle { * {@link LifeCycle} uses predecessors to validate transitions * while this test uses successors. */ - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testIsValid() { final Map> successors = new EnumMap<>(LifeCycle.State.class); @@ -49,9 +55,8 @@ public void testIsValid() { final List states = Arrays.asList(LifeCycle.State.values()); states.forEach( from -> states.forEach( - to -> assertEquals(from + " -> " + to, - successors.get(from).contains(to), - isValid(from, to)))); + to -> assertEquals(successors.get(from).contains(to), + isValid(from, to), from + " -> " + to))); } @Test @@ -81,7 +86,8 @@ public void invalidTransitions() { testInvalidTransition((from, subject, to) -> subject.transitionAndGet(any -> to), true); } - private static void testInvalidTransition(TriConsumer op, boolean shouldThrow) { + private static void testInvalidTransition(TriConsumer op, + boolean shouldThrow) { LifeCycle subject = new LifeCycle("subject"); for (LifeCycle.State to : new LifeCycle.State[] { RUNNING, EXCEPTION, CLOSING }) { LifeCycle.State from = subject.getCurrentState(); @@ -90,7 +96,7 @@ private static void testInvalidTransition(TriConsumer n).limit(10)); @@ -41,13 +43,14 @@ public void testMinMax() { static void runTestMinMax(LongStream stream) { final List list = stream.collect(ArrayList::new, List::add, List::addAll); - final LongMinMax longMinMax = toLongStream(list).collect(LongMinMax::new, LongMinMax::accumulate, LongMinMax::combine); + final LongMinMax longMinMax = toLongStream(list) + .collect(LongMinMax::new, LongMinMax::accumulate, LongMinMax::combine); if (longMinMax.isInitialized()) { - Assert.assertEquals(toLongStream(list).min().getAsLong(), longMinMax.getMin()); - Assert.assertEquals(toLongStream(list).max().getAsLong(), longMinMax.getMax()); + Assertions.assertEquals(toLongStream(list).min().getAsLong(), longMinMax.getMin()); + Assertions.assertEquals(toLongStream(list).max().getAsLong(), longMinMax.getMax()); } else { - Assert.assertEquals(OptionalLong.empty(), toLongStream(list).min()); - Assert.assertEquals(OptionalLong.empty(), toLongStream(list).max()); + Assertions.assertEquals(OptionalLong.empty(), toLongStream(list).min()); + Assertions.assertEquals(OptionalLong.empty(), toLongStream(list).max()); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestPeerProxyMap.java b/ratis-test/src/test/java/org/apache/ratis/util/TestPeerProxyMap.java index 549fbc53f8..a2255041f8 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestPeerProxyMap.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestPeerProxyMap.java @@ -20,8 +20,9 @@ import org.apache.ratis.BaseTest; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.io.Closeable; import java.io.IOException; @@ -51,7 +52,8 @@ public String toString() { } } - @Test(timeout = 10_000) + @Test + @Timeout(value = 10) public void testCloseDeadLock() throws Exception { final PeerProxyMap map = new PeerProxyMap<>("test", DummyProxy::new); final RaftPeerId id = RaftPeerId.valueOf("s0"); @@ -68,7 +70,7 @@ public void testCloseDeadLock() throws Exception { HUNDRED_MILLIS.sleep(); LOG.info("Try getProxy"); final DummyProxy newProxy = map.getProxy(id); - Assert.assertNotSame(proxy, newProxy); + Assertions.assertNotSame(proxy, newProxy); } catch (Exception e) { setFirstException(e); } @@ -115,7 +117,8 @@ public String toString() { } } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testStackTrace() { final RaftPeerId id = RaftPeerId.valueOf("s0"); final RaftPeer peer = RaftPeer.newBuilder().setId(id).build(); @@ -123,7 +126,7 @@ public void testStackTrace() { final ExceptionProxy ignored = map.computeIfAbsent(peer).get()) { } catch (IOException e) { assertThrowable("closeProxy", e, AnnotatedConnectException.class, LOG, ConnectException.class); - Assert.assertEquals(0, e.getStackTrace().length); + Assertions.assertEquals(0, e.getStackTrace().length); } } } diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestPreconditions.java b/ratis-test/src/test/java/org/apache/ratis/util/TestPreconditions.java index 884c1e5d59..73cf4c1676 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestPreconditions.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestPreconditions.java @@ -18,7 +18,8 @@ package org.apache.ratis.util; import org.apache.ratis.BaseTest; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.util.Arrays; import java.util.Collections; @@ -28,7 +29,8 @@ import java.util.Set; public class TestPreconditions extends BaseTest { - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testAssertUnique() { final Set empty = Collections.emptySet(); Preconditions.assertUnique(empty); @@ -53,7 +55,8 @@ public void testAssertUnique() { Preconditions.assertUnique(three, Arrays.asList(4, 5, 6)); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testAssertNull() { final Map map = new HashMap<>(); final String key = "abc1234"; diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestRaftIdCache.java b/ratis-test/src/test/java/org/apache/ratis/util/TestRaftIdCache.java new file mode 100644 index 0000000000..16d5cd6526 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestRaftIdCache.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import org.apache.ratis.BaseTest; +import org.apache.ratis.RaftTestUtil; +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.TestRaftId; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; + +/** Testing {@link WeakValueCache}. */ +public class TestRaftIdCache extends BaseTest { + static WeakValueCache CACHE = TestRaftId.getClientIdCache(); + + static String dumpCache() { + final List values = CACHE.getValues(); + values.sort(Comparator.comparing(ClientId::getUuid)); + String header = CACHE + ": " + values.size(); + System.out.println(header); + System.out.println(" " + values); + return header; + } + + static void assertCache(IDs expectedIDs) { + final List computed = CACHE.getValues(); + computed.sort(Comparator.comparing(ClientId::getUuid)); + + final List expected = expectedIDs.getIds(); + expected.sort(Comparator.comparing(ClientId::getUuid)); + + assertEquals(expected, computed, TestRaftIdCache::dumpCache); + } + + void assertCacheSizeWithGC(IDs expectedIDs) throws Exception{ + JavaUtils.attempt(() -> { + RaftTestUtil.gc(); + assertCache(expectedIDs); + }, 5, HUNDRED_MILLIS, "assertCacheSizeWithGC", LOG); + } + + class IDs { + private final List ids = new LinkedList<>(); + + List getIds() { + return new ArrayList<>(ids); + } + + int size() { + return ids.size(); + } + + ClientId allocate() { + final ClientId id = ClientId.randomId(); + LOG.info("allocate {}", id); + ids.add(id); + return id; + } + + void release() { + final int r = ThreadLocalRandom.current().nextInt(size()); + final ClientId removed = ids.remove(r); + LOG.info("release {}", removed); + } + } + + @Test + public void testCaching() throws Exception { + final int n = 100; + final IDs ids = new IDs(); + assertEquals(0, ids.size()); + assertCache(ids); + + for(int i = 0; i < n; i++) { + final ClientId id = ids.allocate(); + assertSame(id, ClientId.valueOf(id.getUuid())); + assertCache(ids); + } + + for(int i = 0; i < n/2; i++) { + ids.release(); + if (ThreadLocalRandom.current().nextInt(10) == 0) { + assertCacheSizeWithGC(ids); + } + } + assertCacheSizeWithGC(ids); + + for(int i = 0; i < n/2; i++) { + final ClientId id = ids.allocate(); + assertSame(id, ClientId.valueOf(id.getUuid())); + assertCache(ids); + } + + + for(int i = 0; i < n; i++) { + ids.release(); + if (ThreadLocalRandom.current().nextInt(10) == 0) { + assertCacheSizeWithGC(ids); + } + } + assertCacheSizeWithGC(ids); + + assertEquals(0, ids.size()); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestReferenceCountedObject.java b/ratis-test/src/test/java/org/apache/ratis/util/TestReferenceCountedObject.java index fe58b92da6..2436310141 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestReferenceCountedObject.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestReferenceCountedObject.java @@ -18,8 +18,9 @@ package org.apache.ratis.util; import org.apache.ratis.util.function.UncheckedAutoCloseableSupplier; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.util.concurrent.atomic.AtomicInteger; @@ -27,8 +28,8 @@ public class TestReferenceCountedObject { static void assertValues( AtomicInteger retained, int expectedRetained, AtomicInteger released, int expectedReleased) { - Assert.assertEquals("retained", expectedRetained, retained.get()); - Assert.assertEquals("released", expectedReleased, released.get()); + Assertions.assertEquals(expectedRetained, retained.get(), "retained"); + Assertions.assertEquals(expectedReleased, released.get(), "retained"); } static void assertRelease(ReferenceCountedObject ref, @@ -36,10 +37,11 @@ static void assertRelease(ReferenceCountedObject ref, AtomicInteger released, int expectedReleased) { final boolean returned = ref.release(); assertValues(retained, expectedRetained, released, expectedReleased); - Assert.assertEquals(expectedRetained == expectedReleased, returned); + Assertions.assertEquals(expectedRetained == expectedReleased, returned); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testWrap() { final String value = "testWrap"; final AtomicInteger retained = new AtomicInteger(); @@ -50,19 +52,19 @@ public void testWrap() { assertValues(retained, 0, released, 0); try { ref.get(); - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } assertValues(retained, 0, released, 0); - Assert.assertEquals(value, ref.retain()); + Assertions.assertEquals(value, ref.retain()); assertValues(retained, 1, released, 0); try(UncheckedAutoCloseableSupplier auto = ref.retainAndReleaseOnClose()) { final String got = auto.get(); - Assert.assertEquals(value, got); - Assert.assertSame(got, auto.get()); // it should return the same object. + Assertions.assertEquals(value, got); + Assertions.assertSame(got, auto.get()); // it should return the same object. assertValues(retained, 2, released, 0); } catch (IllegalStateException e) { e.printStackTrace(System.out); @@ -70,12 +72,12 @@ public void testWrap() { assertValues(retained, 2, released, 1); final UncheckedAutoCloseableSupplier notClosing = ref.retainAndReleaseOnClose(); - Assert.assertEquals(value, notClosing.get()); + Assertions.assertEquals(value, notClosing.get()); assertValues(retained, 3, released, 1); assertRelease(ref, retained, 3, released, 2); final UncheckedAutoCloseableSupplier auto = ref.retainAndReleaseOnClose(); - Assert.assertEquals(value, auto.get()); + Assertions.assertEquals(value, auto.get()); assertValues(retained, 4, released, 2); auto.close(); assertValues(retained, 4, released, 3); @@ -87,59 +89,60 @@ public void testWrap() { try { ref.get(); - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } try { ref.retain(); - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } try(UncheckedAutoCloseable ignore = ref.retainAndReleaseOnClose()) { - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } try { ref.release(); - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testReleaseWithoutRetaining() { final ReferenceCountedObject ref = ReferenceCountedObject.wrap(""); try { ref.release(); - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } try { ref.get(); - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } try { ref.retain(); - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } try(UncheckedAutoCloseable ignore = ref.retainAndReleaseOnClose()) { - Assert.fail(); + Assertions.fail(); } catch (IllegalStateException e) { e.printStackTrace(System.out); } diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestResourceSemaphore.java b/ratis-test/src/test/java/org/apache/ratis/util/TestResourceSemaphore.java index 6fe1aed7ef..72c811d877 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestResourceSemaphore.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestResourceSemaphore.java @@ -19,15 +19,17 @@ import org.apache.ratis.BaseTest; import org.apache.ratis.RaftTestUtil; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import java.util.concurrent.TimeoutException; import static org.apache.ratis.util.ResourceSemaphore.Group.SUCCESS; public class TestResourceSemaphore extends BaseTest { - @Test(timeout = 5000) + @Test + @Timeout(value = 5) public void testGroup() throws InterruptedException, TimeoutException { final int FAILED_IN_ELEMENT_LIMIT = 0; final int FAILED_IN_BYTE_SIZE_LIMIT = 1; @@ -80,15 +82,15 @@ public void testGroup() throws InterruptedException, TimeoutException { } static void assertUsed(ResourceSemaphore.Group g, int... expected) { - Assert.assertEquals(expected.length, g.resourceSize()); + Assertions.assertEquals(expected.length, g.resourceSize()); for(int i = 0; i < expected.length; i++) { - Assert.assertEquals(expected[i], g.get(i).used()); + Assertions.assertEquals(expected[i], g.get(i).used()); } } static void assertAcquire(ResourceSemaphore.Group g, int expected, int... permits) { final int computed = g.tryAcquire(permits); - Assert.assertEquals(expected, computed); + Assertions.assertEquals(expected, computed); } static Runnable acquire(ResourceSemaphore.Group g, int... permits) { diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestTermIndex.java b/ratis-test/src/test/java/org/apache/ratis/util/TestTermIndex.java new file mode 100644 index 0000000000..678d7afe68 --- /dev/null +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestTermIndex.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.util; + +import org.apache.ratis.BaseTest; +import org.apache.ratis.RaftTestUtil; +import org.apache.ratis.server.protocol.ProtocolTestUtils; +import org.apache.ratis.server.protocol.TermIndex; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** Testing {@link BiWeakValueCache}. */ +public class TestTermIndex extends BaseTest { + static BiWeakValueCache CACHE = ProtocolTestUtils.getTermIndexCache(); + + static void dumpCache(Integer expectedEmptyCount) { + final int computed = CACHE.dump(System.out::print); + if (expectedEmptyCount != null) { + assertEquals(expectedEmptyCount, computed); + } + System.out.flush(); + } + + static void assertCacheSize(int expectedSize, long term) { + final int computed = CACHE.count(term); + if (computed != expectedSize) { + dumpCache(null); + } + assertEquals(expectedSize, computed); + } + + void assertCacheSizeWithGC(int expectedSize, long term) throws Exception{ + JavaUtils.attempt(() -> { + RaftTestUtil.gc(); + assertCacheSize(expectedSize, term); + }, 5, HUNDRED_MILLIS, "assertCacheSizeWithGC", LOG); + } + + static void initTermIndex(TermIndex[][] ti, int term, int index) { + ti[term][index] = TermIndex.valueOf(term, index); + } + + @Test + public void testCaching() throws Exception { + final int n = 9; + final TermIndex[][] ti = new TermIndex[n][n]; + final long[] terms = new long[n]; + final long[] indices = new long[n]; + for(int j = 0; j < n; j++) { + terms[j] = j; + indices[j] = j; + } + + assertCacheSize(0, terms[1]); + initTermIndex(ti, 1, 1); + assertSame(ti[1][1], TermIndex.valueOf(terms[1], indices[1])); + assertCacheSize(1, terms[1]); + + initTermIndex(ti, 1, 2); + assertSame(ti[1][1], TermIndex.valueOf(terms[1], indices[1])); + assertSame(ti[1][2], TermIndex.valueOf(terms[1], indices[2])); + assertCacheSize(2, terms[1]); + dumpCache(0); + + initTermIndex(ti, 2, 2); + assertSame(ti[1][1], TermIndex.valueOf(terms[1], indices[1])); + assertSame(ti[1][2], TermIndex.valueOf(terms[1], indices[2])); + assertSame(ti[2][2], TermIndex.valueOf(terms[2], indices[2])); + assertCacheSize(2, terms[1]); + assertCacheSize(1, terms[2]); + dumpCache(0); + + ti[1][1] = null; // release ti[1][1]; + assertCacheSizeWithGC(1, terms[1]); + dumpCache(0); + + ti[1][2] = null; // release ti[1][2]; + assertCacheSizeWithGC(0, terms[1]); + dumpCache(1); + + CACHE.cleanupEmptyInnerMaps(); + dumpCache(0); + } +} diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestTimeDuration.java b/ratis-test/src/test/java/org/apache/ratis/util/TestTimeDuration.java index a2c180d922..9ba5e57235 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestTimeDuration.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestTimeDuration.java @@ -17,8 +17,9 @@ */ package org.apache.ratis.util; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.event.Level; import java.util.Arrays; @@ -28,17 +29,18 @@ import static org.apache.ratis.util.TimeDuration.Abbreviation; import static org.apache.ratis.util.TimeDuration.parse; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; public class TestTimeDuration { { Slf4jUtils.setLogLevel(TimeDuration.LOG, Level.DEBUG); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testAbbreviation() { Arrays.asList(TimeUnit.values()) .forEach(a -> assertNotNull(Abbreviation.valueOf(a.name()))); @@ -51,10 +53,11 @@ public void testAbbreviation() { Arrays.asList(TimeUnit.values()).forEach(unit -> allSymbols.stream() .map(s -> "0" + s) - .forEach(s -> assertEquals(s, 0L, parse(s, unit)))); + .forEach(s -> assertEquals(0L, parse(s, unit), s))); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testParse() { assertEquals(1L, parse("1_000_000 ns", TimeUnit.MILLISECONDS)); assertEquals(10L, parse("10_000_000 nanos", TimeUnit.MILLISECONDS)); @@ -93,7 +96,8 @@ public void testParse() { assertEquals(2400, parse("100 days", TimeUnit.HOURS)); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testRoundUp() { final long nanosPerSecond = 1_000_000_000L; final TimeDuration oneSecond = TimeDuration.valueOf(1, TimeUnit.SECONDS); @@ -108,7 +112,8 @@ public void testRoundUp() { assertEquals(2*nanosPerSecond, oneSecond.roundUpNanos(nanosPerSecond + 1)); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testTo() { final TimeDuration oneSecond = TimeDuration.valueOf(1, TimeUnit.SECONDS); assertTo(1000, "1000ms", oneSecond, TimeUnit.MILLISECONDS); @@ -131,7 +136,8 @@ static TimeDuration assertTo(long expected, String expectedString, TimeDuration return computed; } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testAddAndSubtract() { final TimeDuration oneSecond = TimeDuration.valueOf(1, TimeUnit.SECONDS); final TimeDuration tenSecond = TimeDuration.valueOf(10, TimeUnit.SECONDS); @@ -184,28 +190,30 @@ public void testAddAndSubtract() { } } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testNegate() { assertNegate(0); assertNegate(1); assertNegate(-1); assertNegate(Long.MAX_VALUE); - Assert.assertEquals( + Assertions.assertEquals( TimeDuration.valueOf(Long.MAX_VALUE, TimeUnit.SECONDS), TimeDuration.valueOf(Long.MIN_VALUE, TimeUnit.SECONDS).negate()); } private static void assertNegate(long n) { - Assert.assertEquals( + Assertions.assertEquals( TimeDuration.valueOf(-n, TimeUnit.SECONDS), TimeDuration.valueOf(n, TimeUnit.SECONDS).negate()); - Assert.assertEquals( + Assertions.assertEquals( TimeDuration.valueOf(n, TimeUnit.SECONDS), TimeDuration.valueOf(-n, TimeUnit.SECONDS).negate()); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testMultiply() { assertMultiply(0, TimeDuration.ONE_SECOND, TimeDuration.valueOf(0, TimeUnit.SECONDS)); assertMultiply(0.001, TimeDuration.ONE_SECOND, TimeDuration.ONE_MILLISECOND); @@ -230,12 +238,13 @@ private static void assertMultiply(double multiplier, TimeDuration t, TimeDurati private static void assertMultiply(TimeDuration t, double multiplier, TimeDuration expected) { final TimeDuration computed = t.multiply(multiplier); TimeDuration.LOG.info("assertMultiply: {} x {} = {} ?= {}\n\n", t, multiplier, computed, expected); - Assert.assertEquals(expected.getUnit(), computed.getUnit()); + Assertions.assertEquals(expected.getUnit(), computed.getUnit()); final long d = Math.abs(computed.getDuration() - expected.getDuration()); - Assert.assertTrue(d <= Math.abs(expected.getDuration()) * TimeDuration.ERROR_THRESHOLD); + Assertions.assertTrue(d <= Math.abs(expected.getDuration()) * TimeDuration.ERROR_THRESHOLD); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testHigherLower() { final TimeUnit[] units = {TimeUnit.NANOSECONDS, TimeUnit.MICROSECONDS, TimeUnit.MILLISECONDS, TimeUnit.SECONDS, TimeUnit.MINUTES, TimeUnit.HOURS, TimeUnit.DAYS}; @@ -243,11 +252,12 @@ public void testHigherLower() { assertHigherLower(units[i-1], units[i]); } - Assert.assertSame(TimeUnit.NANOSECONDS, TimeDuration.lowerUnit(TimeUnit.NANOSECONDS)); - Assert.assertSame(TimeUnit.DAYS, TimeDuration.higherUnit(TimeUnit.DAYS)); + Assertions.assertSame(TimeUnit.NANOSECONDS, TimeDuration.lowerUnit(TimeUnit.NANOSECONDS)); + Assertions.assertSame(TimeUnit.DAYS, TimeDuration.higherUnit(TimeUnit.DAYS)); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testCompareTo() { assertTimeDurationCompareTo(TimeDuration.ONE_MINUTE, TimeDuration.ONE_SECOND); @@ -281,7 +291,7 @@ static void assertTimeDurationCompareTo(TimeDuration larger, TimeDuration smalle } private static void assertHigherLower(TimeUnit lower, TimeUnit higher) { - Assert.assertSame(lower, TimeDuration.lowerUnit(higher)); - Assert.assertSame(higher, TimeDuration.higherUnit(lower)); + Assertions.assertSame(lower, TimeDuration.lowerUnit(higher)); + Assertions.assertSame(higher, TimeDuration.higherUnit(lower)); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestTimeoutScheduler.java b/ratis-test/src/test/java/org/apache/ratis/util/TestTimeoutScheduler.java index 848428f5b8..e8e9d6eb0c 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestTimeoutScheduler.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestTimeoutScheduler.java @@ -18,8 +18,9 @@ package org.apache.ratis.util; import org.apache.ratis.BaseTest; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.event.Level; import java.util.concurrent.TimeUnit; @@ -41,52 +42,54 @@ public void accept(RuntimeException e) { } void assertNoError() { - Assert.assertFalse(hasError.get()); + Assertions.assertFalse(hasError.get()); } } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testSingleTask() throws Exception { final TimeoutScheduler scheduler = TimeoutScheduler.newInstance(); final TimeDuration grace = TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); scheduler.setGracePeriod(grace); - Assert.assertFalse(scheduler.hasScheduler()); + Assertions.assertFalse(scheduler.hasScheduler()); final ErrorHandler errorHandler = new ErrorHandler(); final AtomicBoolean fired = new AtomicBoolean(false); scheduler.onTimeout(TimeDuration.valueOf(250, TimeUnit.MILLISECONDS), () -> { - Assert.assertFalse(fired.get()); + Assertions.assertFalse(fired.get()); fired.set(true); }, errorHandler); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertFalse(fired.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertFalse(fired.get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertFalse(fired.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertFalse(fired.get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(fired.get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired.get()); - Assert.assertFalse(scheduler.hasScheduler()); + Assertions.assertTrue(fired.get()); + Assertions.assertFalse(scheduler.hasScheduler()); errorHandler.assertNoError(); scheduler.setGracePeriod(grace); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testMultipleTasks() throws Exception { final TimeoutScheduler scheduler = TimeoutScheduler.newInstance(); final TimeDuration grace = TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); scheduler.setGracePeriod(grace); - Assert.assertFalse(scheduler.hasScheduler()); + Assertions.assertFalse(scheduler.hasScheduler()); final ErrorHandler errorHandler = new ErrorHandler(); @@ -94,126 +97,129 @@ public void testMultipleTasks() throws Exception { for(int i = 0; i < fired.length; i++) { final AtomicBoolean f = fired[i] = new AtomicBoolean(false); scheduler.onTimeout(TimeDuration.valueOf(100*i + 50, TimeUnit.MILLISECONDS), () -> { - Assert.assertFalse(f.get()); + Assertions.assertFalse(f.get()); f.set(true); }, errorHandler); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(scheduler.hasScheduler()); } Thread.sleep(100); - Assert.assertTrue(fired[0].get()); - Assert.assertFalse(fired[1].get()); - Assert.assertFalse(fired[2].get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(fired[0].get()); + Assertions.assertFalse(fired[1].get()); + Assertions.assertFalse(fired[2].get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired[0].get()); - Assert.assertTrue(fired[1].get()); - Assert.assertFalse(fired[2].get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(fired[0].get()); + Assertions.assertTrue(fired[1].get()); + Assertions.assertFalse(fired[2].get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired[0].get()); - Assert.assertTrue(fired[1].get()); - Assert.assertTrue(fired[2].get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(fired[0].get()); + Assertions.assertTrue(fired[1].get()); + Assertions.assertTrue(fired[2].get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired[0].get()); - Assert.assertTrue(fired[1].get()); - Assert.assertTrue(fired[2].get()); - Assert.assertFalse(scheduler.hasScheduler()); + Assertions.assertTrue(fired[0].get()); + Assertions.assertTrue(fired[1].get()); + Assertions.assertTrue(fired[2].get()); + Assertions.assertFalse(scheduler.hasScheduler()); errorHandler.assertNoError(); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testExtendingGracePeriod() throws Exception { final TimeoutScheduler scheduler = TimeoutScheduler.newInstance(); final TimeDuration grace = TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); scheduler.setGracePeriod(grace); - Assert.assertFalse(scheduler.hasScheduler()); + Assertions.assertFalse(scheduler.hasScheduler()); final ErrorHandler errorHandler = new ErrorHandler(); { final AtomicBoolean fired = new AtomicBoolean(false); scheduler.onTimeout(TimeDuration.valueOf(150, TimeUnit.MILLISECONDS), () -> { - Assert.assertFalse(fired.get()); + Assertions.assertFalse(fired.get()); fired.set(true); }, errorHandler); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertFalse(fired.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertFalse(fired.get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(fired.get()); + Assertions.assertTrue(scheduler.hasScheduler()); } { // submit another task during grace period final AtomicBoolean fired2 = new AtomicBoolean(false); scheduler.onTimeout(TimeDuration.valueOf(150, TimeUnit.MILLISECONDS), () -> { - Assert.assertFalse(fired2.get()); + Assertions.assertFalse(fired2.get()); fired2.set(true); }, errorHandler); Thread.sleep(100); - Assert.assertFalse(fired2.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertFalse(fired2.get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired2.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(fired2.get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired2.get()); - Assert.assertFalse(scheduler.hasScheduler()); + Assertions.assertTrue(fired2.get()); + Assertions.assertFalse(scheduler.hasScheduler()); } errorHandler.assertNoError(); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testRestartingScheduler() throws Exception { final TimeoutScheduler scheduler = TimeoutScheduler.newInstance(); final TimeDuration grace = TimeDuration.valueOf(100, TimeUnit.MILLISECONDS); scheduler.setGracePeriod(grace); - Assert.assertFalse(scheduler.hasScheduler()); + Assertions.assertFalse(scheduler.hasScheduler()); final ErrorHandler errorHandler = new ErrorHandler(); for(int i = 0; i < 2; i++) { final AtomicBoolean fired = new AtomicBoolean(false); scheduler.onTimeout(TimeDuration.valueOf(150, TimeUnit.MILLISECONDS), () -> { - Assert.assertFalse(fired.get()); + Assertions.assertFalse(fired.get()); fired.set(true); }, errorHandler); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertFalse(fired.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertFalse(fired.get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired.get()); - Assert.assertTrue(scheduler.hasScheduler()); + Assertions.assertTrue(fired.get()); + Assertions.assertTrue(scheduler.hasScheduler()); Thread.sleep(100); - Assert.assertTrue(fired.get()); - Assert.assertFalse(scheduler.hasScheduler()); + Assertions.assertTrue(fired.get()); + Assertions.assertFalse(scheduler.hasScheduler()); } errorHandler.assertNoError(); } - @Test(timeout = 10_000) + @Test + @Timeout(value = 10) public void testShutdown() throws Exception { final TimeoutScheduler scheduler = TimeoutScheduler.newInstance(); - Assert.assertEquals(TimeoutScheduler.DEFAULT_GRACE_PERIOD, scheduler.getGracePeriod()); + Assertions.assertEquals(TimeoutScheduler.DEFAULT_GRACE_PERIOD, scheduler.getGracePeriod()); final ErrorHandler errorHandler = new ErrorHandler(); final int numTasks = 100; @@ -223,7 +229,7 @@ public void testShutdown() throws Exception { } HUNDRED_MILLIS.sleep(); HUNDRED_MILLIS.sleep(); - JavaUtils.attempt(() -> Assert.assertEquals(1, scheduler.getTaskCount()), + JavaUtils.attempt(() -> Assertions.assertEquals(1, scheduler.getTaskCount()), 10, HUNDRED_MILLIS, "only 1 shutdown task is scheduled", LOG); final TimeDuration oneMillis = TimeDuration.valueOf(1, TimeUnit.MILLISECONDS); @@ -234,7 +240,7 @@ public void testShutdown() throws Exception { oneMillis.sleep(); } HUNDRED_MILLIS.sleep(); - JavaUtils.attempt(() -> Assert.assertEquals(1, scheduler.getTaskCount()), + JavaUtils.attempt(() -> Assertions.assertEquals(1, scheduler.getTaskCount()), 10, HUNDRED_MILLIS, "only 1 shutdown task is scheduled", LOG); errorHandler.assertNoError(); diff --git a/ratis-test/src/test/java/org/apache/ratis/util/TestTraditionalBinaryPrefix.java b/ratis-test/src/test/java/org/apache/ratis/util/TestTraditionalBinaryPrefix.java index 8b6b7a378f..a699b91c81 100644 --- a/ratis-test/src/test/java/org/apache/ratis/util/TestTraditionalBinaryPrefix.java +++ b/ratis-test/src/test/java/org/apache/ratis/util/TestTraditionalBinaryPrefix.java @@ -17,15 +17,18 @@ */ package org.apache.ratis.util; -import org.junit.Test; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import static org.apache.ratis.util.TraditionalBinaryPrefix.long2String; import static org.apache.ratis.util.TraditionalBinaryPrefix.string2long; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; public class TestTraditionalBinaryPrefix { - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testTraditionalBinaryPrefix() { //test string2long(..) String[] symbol = {"k", "m", "g", "t", "p", "e"}; @@ -108,19 +111,19 @@ public void testTraditionalBinaryPrefix() { { // n = 2^e final long n = 1L << e; final String expected = (n/p.getValue()) + " " + p.getSymbol(); - assertEquals("n=" + n, expected, long2String(n, null, 2)); + assertEquals(expected, long2String(n, null, 2), "n=" + n); } { // n = 2^e + 1 final long n = (1L << e) + 1; final String expected = (n/p.getValue()) + trailingZeros + p.getSymbol(); - assertEquals("n=" + n, expected, long2String(n, null, decimalPlace)); + assertEquals(expected, long2String(n, null, decimalPlace), "n=" + n); } { // n = 2^e - 1 final long n = (1L << e) - 1; final String expected = ((n+1)/p.getValue()) + trailingZeros + p.getSymbol(); - assertEquals("n=" + n, expected, long2String(n, null, decimalPlace)); + assertEquals(expected, long2String(n, null, decimalPlace), "n=" + n); } } } @@ -143,7 +146,8 @@ private static String byteDescription(long len) { return long2String(len, "B", 2); } - @Test(timeout = 1000) + @Test + @Timeout(value = 1) public void testUnderscore() { final SizeInBytes value = SizeInBytes.valueOf("1_000_000_000_000_000"); assertEquals(1_000_000_000_000_000L, value.getSize()); diff --git a/ratis-tools/pom.xml b/ratis-tools/pom.xml index c1971ced96..2d81f2467a 100644 --- a/ratis-tools/pom.xml +++ b/ratis-tools/pom.xml @@ -17,12 +17,17 @@ ratis org.apache.ratis - 3.1.0-SNAPSHOT + 3.3.0-SNAPSHOT ratis-tools Apache Ratis Tools + + + true + + org.apache.ratis @@ -36,5 +41,15 @@ org.apache.ratis ratis-common + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.platform + junit-platform-launcher + test + diff --git a/ratis-tools/src/main/java/org/apache/ratis/tools/ParseRatisLog.java b/ratis-tools/src/main/java/org/apache/ratis/tools/ParseRatisLog.java index ea512fa70c..7107977fbb 100644 --- a/ratis-tools/src/main/java/org/apache/ratis/tools/ParseRatisLog.java +++ b/ratis-tools/src/main/java/org/apache/ratis/tools/ParseRatisLog.java @@ -24,7 +24,6 @@ import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.segmented.LogSegmentPath; import org.apache.ratis.server.raftlog.segmented.LogSegment; -import org.apache.ratis.util.ReferenceCountedObject; import org.apache.ratis.util.SizeInBytes; import java.io.File; @@ -61,7 +60,7 @@ public void dumpSegmentFile() throws IOException { System.out.println("Processing Raft Log file: " + file.getAbsolutePath() + " size:" + file.length()); final int entryCount = LogSegment.readSegmentFile(file, pi.getStartEnd(), maxOpSize, - RaftServerConfigKeys.Log.CorruptionPolicy.EXCEPTION, null, this::processLogEntry); + RaftServerConfigKeys.Log.CorruptionPolicy.EXCEPTION, null, entry -> processLogEntry(entry.get())); System.out.println("Num Total Entries: " + entryCount); System.out.println("Num Conf Entries: " + numConfEntries); System.out.println("Num Metadata Entries: " + numMetadataEntries); @@ -70,8 +69,7 @@ public void dumpSegmentFile() throws IOException { } - private void processLogEntry(ReferenceCountedObject ref) { - final LogEntryProto proto = ref.retain(); + private void processLogEntry(LogEntryProto proto) { if (proto.hasConfigurationEntry()) { numConfEntries++; } else if (proto.hasMetadataEntry()) { @@ -79,13 +77,12 @@ private void processLogEntry(ReferenceCountedObject ref) { } else if (proto.hasStateMachineLogEntry()) { numStateMachineEntries++; } else { - System.out.println("Found an invalid entry: " + proto); + System.out.println("Found invalid entry" + proto.toString()); numInvalidEntries++; } String str = LogProtoUtils.toLogEntryString(proto, smLogToString); System.out.println(str); - ref.release(); } public static class Builder { diff --git a/src/main/resources/ratis-version.properties b/src/main/resources/ratis-version.properties new file mode 100644 index 0000000000..f34dc73dc7 --- /dev/null +++ b/src/main/resources/ratis-version.properties @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +name=${project.name} +version=${project.version} +revision=${version-info.scm.commit}