Update AVX

This commit is contained in:
GabsPuNs
2026-04-22 22:33:13 -04:00
parent 8e3abc5c3a
commit 85d36adf27
2 changed files with 223 additions and 1 deletions

View File

@@ -1965,7 +1965,121 @@ bool LevelRenderer::updateDirtyChunks()
int numClipChunks = static_cast<int>(chunks[p].length);
ClipChunk *pClipChunk = chunks[p].data;
#ifdef __AVX__
int i = 0;
__m256 vPx = _mm256_set1_ps((float)px);
__m256 vPy = _mm256_set1_ps((float)py);
__m256 vPz = _mm256_set1_ps((float)pz);
for (; i <= numClipChunks - 8; i += 8)
{
bool anyDirty = false;
for(int j=0; j<8; ++j) {
int gIdx = pClipChunk[i+j].globalIdx;
if (gIdx >= 0 && (globalChunkFlags[gIdx] & CHUNK_FLAG_DIRTY)) {
anyDirty = true; break;
}
}
if (!anyDirty) continue;
alignas(32) float cx[8], cy[8], cz[8];
for (int j = 0; j < 8; ++j)
{
cx[j] = (float)pClipChunk[i+j].xm;
cy[j] = (float)pClipChunk[i+j].ym;
cz[j] = (float)pClipChunk[i+j].zm;
}
__m256 vCx = _mm256_load_ps(cx);
__m256 vCy = _mm256_load_ps(cy);
__m256 vCz = _mm256_load_ps(cz);
__m256 vXd = _mm256_sub_ps(vCx, vPx);
__m256 vYd = _mm256_sub_ps(vCy, vPy);
__m256 vZd = _mm256_sub_ps(vCz, vPz);
__m256 vDistSq = _mm256_add_ps(_mm256_mul_ps(vXd, vXd),
_mm256_add_ps(_mm256_mul_ps(vYd, vYd),
_mm256_mul_ps(vZd, vZd)));
__m256 vYd2_3 = _mm256_mul_ps(_mm256_set1_ps(3.0f), _mm256_mul_ps(vYd, vYd));
__m256 vDistSqWeighted = _mm256_add_ps(vDistSq, vYd2_3);
alignas(32) float distOut[8], distWOut[8];
_mm256_store_ps(distOut, vDistSq);
_mm256_store_ps(distWOut, vDistSqWeighted);
for (int j = 0; j < 8; ++j)
{
ClipChunk* currentChunk = &pClipChunk[i+j];
const int gIdx = currentChunk->globalIdx;
if (gIdx < 0 || !(globalChunkFlags[gIdx] & CHUNK_FLAG_DIRTY)) continue;
Chunk *chunk = currentChunk->chunk;
if (chunk == nullptr) continue;
const int ySlice = (currentChunk->ym - (CHUNK_SIZE / 2)) / CHUNK_SIZE;
LevelChunk *lc = level[p]->getChunkAt(chunk->x, chunk->z);
if (lc == nullptr || lc->isRenderChunkEmpty(ySlice * 16))
{
chunk->clearDirty();
globalChunkFlags[gIdx] |= CHUNK_FLAG_EMPTYBOTH;
continue;
}
int dSq = (int)distOut[j];
int dSqW = (int)distWOut[j];
if ( (!onlyRebuild) || (globalChunkFlags[gIdx] & CHUNK_FLAG_COMPILED) || (dSq < 96 * 96) )
{
#ifdef _LARGE_WORLDS
bool isNearer = nearestClipChunks.empty();
auto itNearest = nearestClipChunks.begin();
for(; itNearest != nearestClipChunks.end(); ++itNearest)
{
isNearer = dSqW < itNearest->second;
if(isNearer) break;
}
isNearer = isNearer || (nearestClipChunks.size() < maxNearestChunks);
#else
bool isNearer = dSqW < minDistSq;
#endif
#ifdef _CRITICAL_CHUNKS
if( (!veryNearCount && isNearer) || (dSq < 20 * 20 && (globalChunkFlags[gIdx] & CHUNK_FLAG_CRITICAL)) )
#else
if( isNearer )
#endif
{
nearChunk = currentChunk;
minDistSq = dSqW;
#ifdef _LARGE_WORLDS
nearestClipChunks.insert(itNearest, std::make_pair(nearChunk, minDistSq));
if(nearestClipChunks.size() > maxNearestChunks)
nearestClipChunks.pop_back();
#endif
}
#ifdef _CRITICAL_CHUNKS
if( dSq < 20 * 20 && (globalChunkFlags[gIdx] & CHUNK_FLAG_CRITICAL) )
#else
if( dSq < 20 * 20 )
#endif
{
veryNearCount++;
}
}
}
}
pClipChunk = &chunks[p].data[i];
for( ; i < numClipChunks; i++, pClipChunk++ )
#else
// OG
for( int i = 0; i < numClipChunks; i++, pClipChunk++ )
#endif
{
// Fast reject: skip non-dirty chunks immediately before any distance work.
// globalIdx can be -1 for unassigned chunks.