neon bit in computeCoverage

This commit is contained in:
Ryan Oldenburg 2022-07-31 23:04:10 -05:00
parent b0910ed08a
commit 93e9ef79b0

View file

@ -1410,13 +1410,21 @@ proc computeCoverage(
let fillLen = at.integer - fillStart let fillLen = at.integer - fillStart
if fillLen > 0: if fillLen > 0:
var i = fillStart var i = fillStart
when defined(amd64) and allowSimd: when allowSimd:
let sampleCoverageVec = mm_set1_epi8(sampleCoverage) when defined(amd64):
for _ in 0 ..< fillLen div 16: let sampleCoverageVec = mm_set1_epi8(sampleCoverage)
var coverageVec = mm_loadu_si128(coverages[i - startX].addr) for _ in 0 ..< fillLen div 16:
coverageVec = mm_add_epi8(coverageVec, sampleCoverageVec) var coverageVec = mm_loadu_si128(coverages[i - startX].addr)
mm_storeu_si128(coverages[i - startX].addr, coverageVec) coverageVec = mm_add_epi8(coverageVec, sampleCoverageVec)
i += 16 mm_storeu_si128(coverages[i - startX].addr, coverageVec)
i += 16
elif defined(arm64):
let sampleCoverageVec = vmovq_n_u8(sampleCoverage)
for _ in 0 ..< fillLen div 16:
var coverageVec = vld1q_u8(coverages[i - startX].addr)
coverageVec = vaddq_u8(coverageVec, sampleCoverageVec)
vst1q_u8(coverages[i - startX].addr, coverageVec)
i += 16
for j in i ..< fillStart + fillLen: for j in i ..< fillStart + fillLen:
coverages[j - startX] += sampleCoverage coverages[j - startX] += sampleCoverage