Skip to content

Commit 97a6d4d

Browse files
authored
Merge pull request #130 from davidanthoff/natoken-utf8
Add UTF8 optimized NAToken parser
2 parents 1b9878e + 6557a8b commit 97a6d4d

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

src/utf8optimizations.jl

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,3 +430,46 @@ end
430430
return unsafe_string(pointer(str, i), j-i+1)
431431
end
432432
end
433+
434+
function tryparsenext(na::NAToken{T}, str::Union{VectorBackedUTF8String, String}, i, len, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) where {T}
435+
R = Nullable{T}
436+
i = eatwhitespaces(str, i, len)
437+
if i > len
438+
if na.emptyisna
439+
@goto null
440+
else
441+
@goto error
442+
end
443+
end
444+
445+
@inbounds b = codeunit(str, i)
446+
ii = i + 1
447+
if (b == opts.endchar || isnewline(b)) && na.emptyisna
448+
@goto null
449+
end
450+
451+
if isa(na.inner, Unknown)
452+
@goto maybe_null
453+
end
454+
@chk2 x,ii = tryparsenext(na.inner, str, i, len, opts) maybe_null
455+
456+
@label done
457+
return R(convert(T, x)), ii
458+
459+
@label maybe_null
460+
naopts = LocalOpts(opts.endchar, opts.spacedelim, opts.quotechar,
461+
opts.escapechar, false, opts.includenewlines)
462+
@chk2 nastr, ii = tryparsenext(StringToken(WeakRefString{UInt8}), str, i, len, naopts)
463+
if !isempty(searchsorted(na.nastrings, nastr))
464+
i=ii
465+
i = eatwhitespaces(str, i, len)
466+
@goto null
467+
end
468+
return R(), i
469+
470+
@label null
471+
return R(missing), i
472+
473+
@label error
474+
return R(), i
475+
end

0 commit comments

Comments
 (0)