-
Notifications
You must be signed in to change notification settings - Fork 824
[HLK] Modify Wave match test logic to support modifications in different lanes and vector position #7991
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[HLK] Modify Wave match test logic to support modifications in different lanes and vector position #7991
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1593,41 +1593,86 @@ template <typename T> T waveMultiPrefixProduct(T A, UINT) { | |
|
|
||
| template <typename T> struct Op<OpType::WaveMatch, T, 1> : StrictValidation {}; | ||
|
|
||
| // Helper struct to build the expected result for WaveMatch tests. | ||
| struct WaveMatchResultBuilder { | ||
|
|
||
| private: | ||
| uint64_t LowWaveMask; | ||
| uint64_t HighWaveMask; | ||
| uint64_t LowBits; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason you opted for two uint64 values instead of 4 uints or one 128-bit uint?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Standard C++ does not currently include a 128-bit uint. This requires a compiler extension, I was having trouble compiling using it, and I didn't want to modify the cmake definition to support it. So, I created my own. Using the 2 64 bit uint seemed the simplest implementation with built-in types. |
||
| uint64_t HighBits; | ||
|
|
||
| public: | ||
| WaveMatchResultBuilder(UINT NumWaves) : LowBits(0), HighBits(0) { | ||
| const UINT LowWaves = std::min(64U, NumWaves); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Take it or leave it. But I would consider defensively adding a 'VERIFY_IS_TRUE(NumWaves <= 128)' just to help identify this path as needing an update if/when we ever increase the max size. That or you could also do a std::max for the HighWaves logic. |
||
| const UINT HighWaves = NumWaves - LowWaves; | ||
| LowWaveMask = (LowWaves < 64) ? (1ULL << LowWaves) - 1 : ~0ULL; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you could help improve readability of this code (not that its bad) by using inline helpers or static constexp helpers for some of the bit math instead. LowWaveMask = lowNBitsSet(LowWaves); '~0ULL' is probably fine. It's simple and common. |
||
| HighWaveMask = (HighWaves < 64) ? (1ULL << HighWaves) - 1 : ~0ULL; | ||
| LowBits &= LowWaveMask; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The assignment to LowBits and HighBits seems redundant? They're initialized to zero so the result of these operations will still always just be 0? |
||
| HighBits &= HighWaveMask; | ||
| } | ||
|
|
||
| void SetLane(UINT LaneID) { | ||
| if (LaneID < 64) | ||
| LowBits |= (1ULL << LaneID) & LowWaveMask; | ||
| else | ||
| HighBits |= (1ULL << (LaneID - 64)) & HighWaveMask; | ||
| } | ||
|
|
||
| void ClearLane(UINT LaneID) { | ||
| if (LaneID < 64) | ||
| LowBits &= ~(1ULL << LaneID) & LowWaveMask; | ||
| else | ||
| HighBits &= ~(1ULL << (LaneID - 64)) & HighWaveMask; | ||
| } | ||
|
|
||
| void InvertLanes() { | ||
| LowBits = ~LowBits & LowWaveMask; | ||
| HighBits = ~HighBits & HighWaveMask; | ||
| } | ||
|
|
||
| void SetExpected(UINT *Dest) { | ||
| Dest[0] = static_cast<UINT>(LowBits); | ||
| Dest[1] = static_cast<UINT>(LowBits >> 32); | ||
| Dest[2] = static_cast<UINT>(HighBits); | ||
| Dest[3] = static_cast<UINT>(HighBits >> 32); | ||
| } | ||
| }; | ||
|
|
||
| template <typename T> struct ExpectedBuilder<OpType::WaveMatch, T> { | ||
| static std::vector<UINT> buildExpected(Op<OpType::WaveMatch, T, 1> &, | ||
| const InputSets<T> &, | ||
| const UINT WaveSize) { | ||
| // For this test, the shader arranges it so that lane 0 is different from | ||
| // all the other lanes. Besides that all other lines write their result of | ||
| // WaveMatch as well. | ||
| // For this test, the shader arranges it so that lanes 0, WAVE_SIZE/2 and | ||
| // WAVE_SIZE-1 are different from all the other lanes, also those | ||
| // lanes modify the vector at positions 0, WAVE_SIZE/2 and WAVE_SIZE-1 | ||
| // respectively, if the input vector has enough elements. Besides that all | ||
| // other lanes write their result of WaveMatch as well. | ||
|
|
||
| std::vector<UINT> Expected; | ||
| Expected.assign(WaveSize * 4, 0); | ||
|
|
||
| const UINT LowWaves = std::min(64U, WaveSize); | ||
| const UINT HighWaves = WaveSize - LowWaves; | ||
| const UINT MidBit = WaveSize / 2; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Naming on this should be 'MidLaneID' and 'LastLaneID' respectively. |
||
| const UINT LastBit = WaveSize - 1; | ||
|
|
||
| const uint64_t LowWaveMask = | ||
| (LowWaves < 64) ? (1ULL << LowWaves) - 1 : ~0ULL; | ||
| WaveMatchResultBuilder UnchangedLanes(WaveSize); | ||
| UnchangedLanes.InvertLanes(); | ||
| UnchangedLanes.ClearLane(0); | ||
| UnchangedLanes.ClearLane(MidBit); | ||
| UnchangedLanes.ClearLane(LastBit); | ||
|
|
||
| const uint64_t HighWaveMask = | ||
| (HighWaves < 64) ? (1ULL << HighWaves) - 1 : ~0ULL; | ||
| for (UINT I = 0; I < WaveSize; ++I) { | ||
| const UINT Index = I * 4; | ||
|
|
||
| const uint64_t LowExpected = ~1ULL & LowWaveMask; | ||
| const uint64_t HighExpected = ~0ULL & HighWaveMask; | ||
| if (I == 0 || MidBit == I || LastBit == I) { | ||
| WaveMatchResultBuilder ChangedLanes(WaveSize); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What were you intending to do with ChangedLanes? It's declared as a local in this for loop and not used outside of it.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
WaveMatchResultBuilder ChangedLanes(WaveSize);
for (UINT LaneID = 0; LaneID < WaveSize; ++LaneID) {
const UINT Index = LaneID * 4;
if (LaneID == 0 || LaneID == MidLaneID || LaneID == LastLaneID) {
ChangedLanes.SetLane(LaneID);
ChangedLanes.SetExpected(&Expected[Index]);
ChangedLanes.ClearLane(LaneID);
continue;
}
UnchangedLanes.SetExpected(&Expected[Index]);
}Is that better @alsepkow? |
||
| ChangedLanes.SetLane(I); | ||
|
|
||
| Expected[0] = 1; | ||
| Expected[1] = 0; | ||
| Expected[2] = 0; | ||
| Expected[3] = 0; | ||
| ChangedLanes.SetExpected(&Expected[Index]); | ||
| continue; | ||
| } | ||
|
|
||
| // all lanes other than the first one have the same result | ||
| for (UINT I = 1; I < WaveSize; ++I) { | ||
| const UINT Index = I * 4; | ||
| Expected[Index] = static_cast<UINT>(LowExpected); | ||
| Expected[Index + 1] = static_cast<UINT>(LowExpected >> 32); | ||
| Expected[Index + 2] = static_cast<UINT>(HighExpected); | ||
| Expected[Index + 3] = static_cast<UINT>(HighExpected >> 32); | ||
| UnchangedLanes.SetExpected(&Expected[Index]); | ||
| } | ||
|
|
||
| return Expected; | ||
|
|
@@ -1893,7 +1938,7 @@ class DxilConf_SM69_Vectorized { | |
| VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( | ||
| D3D12_FEATURE_D3D12_OPTIONS1, &WaveOpts, sizeof(WaveOpts))); | ||
|
|
||
| WaveSize = WaveOpts.WaveLaneCountMin; | ||
| WaveSize = 128; // WaveOpts.WaveLaneCountMin; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think you intended to leave this commented out |
||
| } | ||
|
|
||
| DXASSERT_NOMSG(WaveSize > 0); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think LowBits/HighBits are actually intended to represent the currently active lanes for a given group?
Suggest updating the names to something like 'ActiveLanesLow' and 'ActiveLanesHigh' to better reflect that.