Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
nitely committed Jan 4, 2024
1 parent 5ae92a6 commit 64ddc4c
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 20 deletions.
2 changes: 2 additions & 0 deletions src/regex.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1446,6 +1446,8 @@ when isMainModule:
let flags = {regexArbitraryBytes}
doAssert match("\xff", re2(r"\xff", flags))
doAssert replace("\xff", re2(r"\xff", flags), "abc") == "abc"
doAssert match("\xff\xff", re2(r"\xff\xff", flags))
doAssert replace("\xff\xff", re2(r"\xff\xff", flags), "abc") == "abc"

doAssert graph(toRegex(re2"^a+$")) == """digraph graphname {
0 [label="q0";color=blue];
Expand Down
3 changes: 2 additions & 1 deletion src/regex/compiler.nim
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ func reImpl*(s: string, flags: RegexFlags = {}): Regex {.inline.} =
.parse
.transformExp(groups)
let nfa = rpn.nfa2()
let opt = rpn.litopt3()
let bytesMode = regexArbitraryBytes in flags
let opt = rpn.litopt3(bytesMode)
result = Regex(
nfa: nfa,
groupsCount: groups.count,
Expand Down
35 changes: 21 additions & 14 deletions src/regex/litopt.nim
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func addBytes(s: var string, cp: uint32) =
s.add (cp and 0xff'u32).char
cp = cp shr 8

func lits(exp: RpnExp): Lits =
func lits(exp: RpnExp, bytesMode: bool): Lits =
template state: untyped = litNfa.s[stateIdx]
result.idx = exp.delimiterLit()
if result.idx == -1:
Expand Down Expand Up @@ -226,16 +226,16 @@ func lits(exp: RpnExp): Lits =
litIdxEnd = i
doAssert litIdxEnd >= litIdxStart
var ss = ""
var ssb = ""
for i in litIdxStart .. litIdxEnd:
let cp = litNfa.s[lits[i]].cp
ss.add cp
ssb.addBytes cp.uint32
if bytesMode:
for i in litIdxStart .. litIdxEnd:
ss.addBytes litNfa.s[lits[i]].cp.uint32
else:
for i in litIdxStart .. litIdxEnd:
ss.add litNfa.s[lits[i]].cp
# true for non ascii chars (>127) and lit sequences
if ss.len > 1:
result.idx = find(exp.s, litNfa.s[lits[litIdxStart]].uid)
result.s.add ss
result.sb.add ssb

func prefix(eNfa: Enfa, uid: NodeUid): Enfa =
template state0: untyped = eNfa.s.len.int16-1
Expand Down Expand Up @@ -295,14 +295,13 @@ type
func canOpt*(litOpt: LitOpt): bool =
return litOpt.nfa.s.len > 0

func litopt3*(exp: RpnExp): LitOpt =
func litopt3*(exp: RpnExp, bytesMode = false): LitOpt =
template litNode: untyped = exp.s[lits2.idx]
let lits2 = exp.lits()
let lits2 = exp.lits(bytesMode)
if lits2.idx == -1:
return
result.lit = litNode.cp
result.lits = lits2.s
result.bytelits = lits2.sb
result.nfa = exp
.subExps
.eNfa
Expand Down Expand Up @@ -333,9 +332,14 @@ when isMainModule:
return opt.lits

func bytelits(s: string): string =
let opt = s.rpn.litopt3
let opt = s.rpn.litopt3(bytesMode = true)
if not opt.canOpt: return
return opt.bytelits
return opt.lits

func lit(s: string): Rune =
let opt = s.rpn.litopt3
if not opt.canOpt: return # beware Rune(0) is valid
return opt.lit

func prefix(s: string): Nfa =
let opt = s.rpn.litopt3
Expand Down Expand Up @@ -512,8 +516,11 @@ when isMainModule:
doAssert bytelits"\xff\x0f" == "\xff\x0f"
doAssert bytelits"\x80\x80" == "\x80\x80"
doAssert bytelits"\x00\x00" == "\x00\x00"
doAssert bytelits"\x80" == "\x80" # 128
doAssert bytelits"\x7F" == "" # 127
doAssert lit"\xff" == '\xff'.Rune
doAssert lit"\x80" == '\x80'.Rune # 128
doAssert lit"\x7F" == '\x7F'.Rune # 127
doAssert bytelits"\xff" == ""
doAssert bytelits"\x00" == ""

doAssert r"abc".prefix.toString == r"".toNfa.toString
doAssert r"\dabc".prefix.toString == r"\d".toNfa.toString
Expand Down
7 changes: 2 additions & 5 deletions src/regex/nfafindall2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ func findSomeOptImpl*(
initMaybeImpl(ms, regexSize, groupsLen)
ms.clear()
let flags = regex.flags.toMatchFlags + {mfFindMatchOpt}
let binFlag = mfBytesInput in flags
let hasLits = opt.lits.len > 0
let step = max(1, opt.lits.len)
var limit = start.int
Expand All @@ -291,12 +290,10 @@ func findSomeOptImpl*(
doAssert i > i2; i2 = i
#debugEcho "lit=", opt.lit
#debugEcho "i=", i
let litIdx = if not hasLits:
text.find(opt.lit.char, i)
elif not binFlag:
let litIdx = if hasLits:
text.find(opt.lits, i)
else:
text.find(opt.bytelits, i)
text.find(opt.lit.char, i)
if litIdx == -1:
return -1
#debugEcho "litIdx=", litIdx
Expand Down

0 comments on commit 64ddc4c

Please sign in to comment.