1
2
3
4
5 package syntax
6
7 import (
8 "sort"
9 "strings"
10 "sync"
11 "unicode"
12 "unicode/utf8"
13 )
14
15
16
17 type Error struct {
18 Code ErrorCode
19 Expr string
20 }
21
22 func (e *Error) Error() string {
23 return "error parsing regexp: " + e.Code.String() + ": `" + e.Expr + "`"
24 }
25
26
27 type ErrorCode string
28
29 const (
30
31 ErrInternalError ErrorCode = "regexp/syntax: internal error"
32
33
34 ErrInvalidCharClass ErrorCode = "invalid character class"
35 ErrInvalidCharRange ErrorCode = "invalid character class range"
36 ErrInvalidEscape ErrorCode = "invalid escape sequence"
37 ErrInvalidNamedCapture ErrorCode = "invalid named capture"
38 ErrInvalidPerlOp ErrorCode = "invalid or unsupported Perl syntax"
39 ErrInvalidRepeatOp ErrorCode = "invalid nested repetition operator"
40 ErrInvalidRepeatSize ErrorCode = "invalid repeat count"
41 ErrInvalidUTF8 ErrorCode = "invalid UTF-8"
42 ErrMissingBracket ErrorCode = "missing closing ]"
43 ErrMissingParen ErrorCode = "missing closing )"
44 ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
45 ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
46 ErrUnexpectedParen ErrorCode = "unexpected )"
47 ErrNestingDepth ErrorCode = "expression nests too deeply"
48 ErrLarge ErrorCode = "expression too large"
49 )
50
51 func (e ErrorCode) String() string {
52 return string(e)
53 }
54
55
56 type Flags uint16
57
58 const (
59 FoldCase Flags = 1 << iota
60 Literal
61 ClassNL
62 DotNL
63 OneLine
64 NonGreedy
65 PerlX
66 UnicodeGroups
67 WasDollar
68 Simple
69
70 MatchNL = ClassNL | DotNL
71
72 Perl = ClassNL | OneLine | PerlX | UnicodeGroups
73 POSIX Flags = 0
74 )
75
76
77 const (
78 opLeftParen = opPseudo + iota
79 opVerticalBar
80 )
81
82
83
84
85
86
87
88
89
90
91
92
93
94 const maxHeight = 1000
95
96
97
98
99
100
101
102 const (
103 maxSize = 128 << 20 / instSize
104 instSize = 5 * 8
105 )
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122 const (
123 maxRunes = 128 << 20 / runeSize
124 runeSize = 4
125 )
126
127 type parser struct {
128 flags Flags
129 stack []*Regexp
130 free *Regexp
131 numCap int
132 wholeRegexp string
133 tmpClass []rune
134 numRegexp int
135 numRunes int
136 repeats int64
137 height map[*Regexp]int
138 size map[*Regexp]int64
139 }
140
141 func (p *parser) newRegexp(op Op) *Regexp {
142 re := p.free
143 if re != nil {
144 p.free = re.Sub0[0]
145 *re = Regexp{}
146 } else {
147 re = new(Regexp)
148 p.numRegexp++
149 }
150 re.Op = op
151 return re
152 }
153
154 func (p *parser) reuse(re *Regexp) {
155 if p.height != nil {
156 delete(p.height, re)
157 }
158 re.Sub0[0] = p.free
159 p.free = re
160 }
161
162 func (p *parser) checkLimits(re *Regexp) {
163 if p.numRunes > maxRunes {
164 panic(ErrLarge)
165 }
166 p.checkSize(re)
167 p.checkHeight(re)
168 }
169
170 func (p *parser) checkSize(re *Regexp) {
171 if p.size == nil {
172
173
174
175
176
177 if p.repeats == 0 {
178 p.repeats = 1
179 }
180 if re.Op == OpRepeat {
181 n := re.Max
182 if n == -1 {
183 n = re.Min
184 }
185 if n <= 0 {
186 n = 1
187 }
188 if int64(n) > maxSize/p.repeats {
189 p.repeats = maxSize
190 } else {
191 p.repeats *= int64(n)
192 }
193 }
194 if int64(p.numRegexp) < maxSize/p.repeats {
195 return
196 }
197
198
199
200
201 p.size = make(map[*Regexp]int64)
202 for _, re := range p.stack {
203 p.checkSize(re)
204 }
205 }
206
207 if p.calcSize(re, true) > maxSize {
208 panic(ErrLarge)
209 }
210 }
211
212 func (p *parser) calcSize(re *Regexp, force bool) int64 {
213 if !force {
214 if size, ok := p.size[re]; ok {
215 return size
216 }
217 }
218
219 var size int64
220 switch re.Op {
221 case OpLiteral:
222 size = int64(len(re.Rune))
223 case OpCapture, OpStar:
224
225 size = 2 + p.calcSize(re.Sub[0], false)
226 case OpPlus, OpQuest:
227 size = 1 + p.calcSize(re.Sub[0], false)
228 case OpConcat:
229 for _, sub := range re.Sub {
230 size += p.calcSize(sub, false)
231 }
232 case OpAlternate:
233 for _, sub := range re.Sub {
234 size += p.calcSize(sub, false)
235 }
236 if len(re.Sub) > 1 {
237 size += int64(len(re.Sub)) - 1
238 }
239 case OpRepeat:
240 sub := p.calcSize(re.Sub[0], false)
241 if re.Max == -1 {
242 if re.Min == 0 {
243 size = 2 + sub
244 } else {
245 size = 1 + int64(re.Min)*sub
246 }
247 break
248 }
249
250 size = int64(re.Max)*sub + int64(re.Max-re.Min)
251 }
252
253 size = max(1, size)
254 p.size[re] = size
255 return size
256 }
257
258 func (p *parser) checkHeight(re *Regexp) {
259 if p.numRegexp < maxHeight {
260 return
261 }
262 if p.height == nil {
263 p.height = make(map[*Regexp]int)
264 for _, re := range p.stack {
265 p.checkHeight(re)
266 }
267 }
268 if p.calcHeight(re, true) > maxHeight {
269 panic(ErrNestingDepth)
270 }
271 }
272
273 func (p *parser) calcHeight(re *Regexp, force bool) int {
274 if !force {
275 if h, ok := p.height[re]; ok {
276 return h
277 }
278 }
279 h := 1
280 for _, sub := range re.Sub {
281 hsub := p.calcHeight(sub, false)
282 if h < 1+hsub {
283 h = 1 + hsub
284 }
285 }
286 p.height[re] = h
287 return h
288 }
289
290
291
292
293 func (p *parser) push(re *Regexp) *Regexp {
294 p.numRunes += len(re.Rune)
295 if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
296
297 if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
298 return nil
299 }
300 re.Op = OpLiteral
301 re.Rune = re.Rune[:1]
302 re.Flags = p.flags &^ FoldCase
303 } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
304 re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
305 unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
306 unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
307 re.Op == OpCharClass && len(re.Rune) == 2 &&
308 re.Rune[0]+1 == re.Rune[1] &&
309 unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
310 unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
311
312 if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
313 return nil
314 }
315
316
317 re.Op = OpLiteral
318 re.Rune = re.Rune[:1]
319 re.Flags = p.flags | FoldCase
320 } else {
321
322 p.maybeConcat(-1, 0)
323 }
324
325 p.stack = append(p.stack, re)
326 p.checkLimits(re)
327 return re
328 }
329
330
331
332
333
334
335
336
337
338
339 func (p *parser) maybeConcat(r rune, flags Flags) bool {
340 n := len(p.stack)
341 if n < 2 {
342 return false
343 }
344
345 re1 := p.stack[n-1]
346 re2 := p.stack[n-2]
347 if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
348 return false
349 }
350
351
352 re2.Rune = append(re2.Rune, re1.Rune...)
353
354
355 if r >= 0 {
356 re1.Rune = re1.Rune0[:1]
357 re1.Rune[0] = r
358 re1.Flags = flags
359 return true
360 }
361
362 p.stack = p.stack[:n-1]
363 p.reuse(re1)
364 return false
365 }
366
367
368 func (p *parser) literal(r rune) {
369 re := p.newRegexp(OpLiteral)
370 re.Flags = p.flags
371 if p.flags&FoldCase != 0 {
372 r = minFoldRune(r)
373 }
374 re.Rune0[0] = r
375 re.Rune = re.Rune0[:1]
376 p.push(re)
377 }
378
379
380 func minFoldRune(r rune) rune {
381 if r < minFold || r > maxFold {
382 return r
383 }
384 m := r
385 r0 := r
386 for r = unicode.SimpleFold(r); r != r0; r = unicode.SimpleFold(r) {
387 m = min(m, r)
388 }
389 return m
390 }
391
392
393
394 func (p *parser) op(op Op) *Regexp {
395 re := p.newRegexp(op)
396 re.Flags = p.flags
397 return p.push(re)
398 }
399
400
401
402
403
404 func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (string, error) {
405 flags := p.flags
406 if p.flags&PerlX != 0 {
407 if len(after) > 0 && after[0] == '?' {
408 after = after[1:]
409 flags ^= NonGreedy
410 }
411 if lastRepeat != "" {
412
413
414
415 return "", &Error{ErrInvalidRepeatOp, lastRepeat[:len(lastRepeat)-len(after)]}
416 }
417 }
418 n := len(p.stack)
419 if n == 0 {
420 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
421 }
422 sub := p.stack[n-1]
423 if sub.Op >= opPseudo {
424 return "", &Error{ErrMissingRepeatArgument, before[:len(before)-len(after)]}
425 }
426
427 re := p.newRegexp(op)
428 re.Min = min
429 re.Max = max
430 re.Flags = flags
431 re.Sub = re.Sub0[:1]
432 re.Sub[0] = sub
433 p.stack[n-1] = re
434 p.checkLimits(re)
435
436 if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
437 return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
438 }
439
440 return after, nil
441 }
442
443
444
445
446
447
448
449
450
451
452 func repeatIsValid(re *Regexp, n int) bool {
453 if re.Op == OpRepeat {
454 m := re.Max
455 if m == 0 {
456 return true
457 }
458 if m < 0 {
459 m = re.Min
460 }
461 if m > n {
462 return false
463 }
464 if m > 0 {
465 n /= m
466 }
467 }
468 for _, sub := range re.Sub {
469 if !repeatIsValid(sub, n) {
470 return false
471 }
472 }
473 return true
474 }
475
476
477 func (p *parser) concat() *Regexp {
478 p.maybeConcat(-1, 0)
479
480
481 i := len(p.stack)
482 for i > 0 && p.stack[i-1].Op < opPseudo {
483 i--
484 }
485 subs := p.stack[i:]
486 p.stack = p.stack[:i]
487
488
489 if len(subs) == 0 {
490 return p.push(p.newRegexp(OpEmptyMatch))
491 }
492
493 return p.push(p.collapse(subs, OpConcat))
494 }
495
496
497 func (p *parser) alternate() *Regexp {
498
499
500 i := len(p.stack)
501 for i > 0 && p.stack[i-1].Op < opPseudo {
502 i--
503 }
504 subs := p.stack[i:]
505 p.stack = p.stack[:i]
506
507
508
509 if len(subs) > 0 {
510 cleanAlt(subs[len(subs)-1])
511 }
512
513
514
515 if len(subs) == 0 {
516 return p.push(p.newRegexp(OpNoMatch))
517 }
518
519 return p.push(p.collapse(subs, OpAlternate))
520 }
521
522
523 func cleanAlt(re *Regexp) {
524 switch re.Op {
525 case OpCharClass:
526 re.Rune = cleanClass(&re.Rune)
527 if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
528 re.Rune = nil
529 re.Op = OpAnyChar
530 return
531 }
532 if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
533 re.Rune = nil
534 re.Op = OpAnyCharNotNL
535 return
536 }
537 if cap(re.Rune)-len(re.Rune) > 100 {
538
539
540 re.Rune = append(re.Rune0[:0], re.Rune...)
541 }
542 }
543 }
544
545
546
547
548
549 func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
550 if len(subs) == 1 {
551 return subs[0]
552 }
553 re := p.newRegexp(op)
554 re.Sub = re.Sub0[:0]
555 for _, sub := range subs {
556 if sub.Op == op {
557 re.Sub = append(re.Sub, sub.Sub...)
558 p.reuse(sub)
559 } else {
560 re.Sub = append(re.Sub, sub)
561 }
562 }
563 if op == OpAlternate {
564 re.Sub = p.factor(re.Sub)
565 if len(re.Sub) == 1 {
566 old := re
567 re = re.Sub[0]
568 p.reuse(old)
569 }
570 }
571 return re
572 }
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589 func (p *parser) factor(sub []*Regexp) []*Regexp {
590 if len(sub) < 2 {
591 return sub
592 }
593
594
595 var str []rune
596 var strflags Flags
597 start := 0
598 out := sub[:0]
599 for i := 0; i <= len(sub); i++ {
600
601
602
603
604
605
606 var istr []rune
607 var iflags Flags
608 if i < len(sub) {
609 istr, iflags = p.leadingString(sub[i])
610 if iflags == strflags {
611 same := 0
612 for same < len(str) && same < len(istr) && str[same] == istr[same] {
613 same++
614 }
615 if same > 0 {
616
617
618 str = str[:same]
619 continue
620 }
621 }
622 }
623
624
625
626
627
628
629 if i == start {
630
631 } else if i == start+1 {
632
633 out = append(out, sub[start])
634 } else {
635
636 prefix := p.newRegexp(OpLiteral)
637 prefix.Flags = strflags
638 prefix.Rune = append(prefix.Rune[:0], str...)
639
640 for j := start; j < i; j++ {
641 sub[j] = p.removeLeadingString(sub[j], len(str))
642 p.checkLimits(sub[j])
643 }
644 suffix := p.collapse(sub[start:i], OpAlternate)
645
646 re := p.newRegexp(OpConcat)
647 re.Sub = append(re.Sub[:0], prefix, suffix)
648 out = append(out, re)
649 }
650
651
652 start = i
653 str = istr
654 strflags = iflags
655 }
656 sub = out
657
658
659
660
661
662
663
664
665
666 start = 0
667 out = sub[:0]
668 var first *Regexp
669 for i := 0; i <= len(sub); i++ {
670
671
672
673
674
675 var ifirst *Regexp
676 if i < len(sub) {
677 ifirst = p.leadingRegexp(sub[i])
678 if first != nil && first.Equal(ifirst) &&
679
680 (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) {
681 continue
682 }
683 }
684
685
686
687
688
689 if i == start {
690
691 } else if i == start+1 {
692
693 out = append(out, sub[start])
694 } else {
695
696 prefix := first
697 for j := start; j < i; j++ {
698 reuse := j != start
699 sub[j] = p.removeLeadingRegexp(sub[j], reuse)
700 p.checkLimits(sub[j])
701 }
702 suffix := p.collapse(sub[start:i], OpAlternate)
703
704 re := p.newRegexp(OpConcat)
705 re.Sub = append(re.Sub[:0], prefix, suffix)
706 out = append(out, re)
707 }
708
709
710 start = i
711 first = ifirst
712 }
713 sub = out
714
715
716 start = 0
717 out = sub[:0]
718 for i := 0; i <= len(sub); i++ {
719
720
721
722
723
724
725 if i < len(sub) && isCharClass(sub[i]) {
726 continue
727 }
728
729
730
731 if i == start {
732
733 } else if i == start+1 {
734 out = append(out, sub[start])
735 } else {
736
737
738 max := start
739 for j := start + 1; j < i; j++ {
740 if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
741 max = j
742 }
743 }
744 sub[start], sub[max] = sub[max], sub[start]
745
746 for j := start + 1; j < i; j++ {
747 mergeCharClass(sub[start], sub[j])
748 p.reuse(sub[j])
749 }
750 cleanAlt(sub[start])
751 out = append(out, sub[start])
752 }
753
754
755 if i < len(sub) {
756 out = append(out, sub[i])
757 }
758 start = i + 1
759 }
760 sub = out
761
762
763 start = 0
764 out = sub[:0]
765 for i := range sub {
766 if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
767 continue
768 }
769 out = append(out, sub[i])
770 }
771 sub = out
772
773 return sub
774 }
775
776
777
778 func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
779 if re.Op == OpConcat && len(re.Sub) > 0 {
780 re = re.Sub[0]
781 }
782 if re.Op != OpLiteral {
783 return nil, 0
784 }
785 return re.Rune, re.Flags & FoldCase
786 }
787
788
789
790 func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
791 if re.Op == OpConcat && len(re.Sub) > 0 {
792
793
794 sub := re.Sub[0]
795 sub = p.removeLeadingString(sub, n)
796 re.Sub[0] = sub
797 if sub.Op == OpEmptyMatch {
798 p.reuse(sub)
799 switch len(re.Sub) {
800 case 0, 1:
801
802 re.Op = OpEmptyMatch
803 re.Sub = nil
804 case 2:
805 old := re
806 re = re.Sub[1]
807 p.reuse(old)
808 default:
809 copy(re.Sub, re.Sub[1:])
810 re.Sub = re.Sub[:len(re.Sub)-1]
811 }
812 }
813 return re
814 }
815
816 if re.Op == OpLiteral {
817 re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
818 if len(re.Rune) == 0 {
819 re.Op = OpEmptyMatch
820 }
821 }
822 return re
823 }
824
825
826
827 func (p *parser) leadingRegexp(re *Regexp) *Regexp {
828 if re.Op == OpEmptyMatch {
829 return nil
830 }
831 if re.Op == OpConcat && len(re.Sub) > 0 {
832 sub := re.Sub[0]
833 if sub.Op == OpEmptyMatch {
834 return nil
835 }
836 return sub
837 }
838 return re
839 }
840
841
842
843
844 func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
845 if re.Op == OpConcat && len(re.Sub) > 0 {
846 if reuse {
847 p.reuse(re.Sub[0])
848 }
849 re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
850 switch len(re.Sub) {
851 case 0:
852 re.Op = OpEmptyMatch
853 re.Sub = nil
854 case 1:
855 old := re
856 re = re.Sub[0]
857 p.reuse(old)
858 }
859 return re
860 }
861 if reuse {
862 p.reuse(re)
863 }
864 return p.newRegexp(OpEmptyMatch)
865 }
866
867 func literalRegexp(s string, flags Flags) *Regexp {
868 re := &Regexp{Op: OpLiteral}
869 re.Flags = flags
870 re.Rune = re.Rune0[:0]
871 for _, c := range s {
872 if len(re.Rune) >= cap(re.Rune) {
873
874 re.Rune = []rune(s)
875 break
876 }
877 re.Rune = append(re.Rune, c)
878 }
879 return re
880 }
881
882
883
884
885
886
887 func Parse(s string, flags Flags) (*Regexp, error) {
888 return parse(s, flags)
889 }
890
891 func parse(s string, flags Flags) (_ *Regexp, err error) {
892 defer func() {
893 switch r := recover(); r {
894 default:
895 panic(r)
896 case nil:
897
898 case ErrLarge:
899 err = &Error{Code: ErrLarge, Expr: s}
900 case ErrNestingDepth:
901 err = &Error{Code: ErrNestingDepth, Expr: s}
902 }
903 }()
904
905 if flags&Literal != 0 {
906
907 if err := checkUTF8(s); err != nil {
908 return nil, err
909 }
910 return literalRegexp(s, flags), nil
911 }
912
913
914 var (
915 p parser
916 c rune
917 op Op
918 lastRepeat string
919 )
920 p.flags = flags
921 p.wholeRegexp = s
922 t := s
923 for t != "" {
924 repeat := ""
925 BigSwitch:
926 switch t[0] {
927 default:
928 if c, t, err = nextRune(t); err != nil {
929 return nil, err
930 }
931 p.literal(c)
932
933 case '(':
934 if p.flags&PerlX != 0 && len(t) >= 2 && t[1] == '?' {
935
936 if t, err = p.parsePerlFlags(t); err != nil {
937 return nil, err
938 }
939 break
940 }
941 p.numCap++
942 p.op(opLeftParen).Cap = p.numCap
943 t = t[1:]
944 case '|':
945 p.parseVerticalBar()
946 t = t[1:]
947 case ')':
948 if err = p.parseRightParen(); err != nil {
949 return nil, err
950 }
951 t = t[1:]
952 case '^':
953 if p.flags&OneLine != 0 {
954 p.op(OpBeginText)
955 } else {
956 p.op(OpBeginLine)
957 }
958 t = t[1:]
959 case '$':
960 if p.flags&OneLine != 0 {
961 p.op(OpEndText).Flags |= WasDollar
962 } else {
963 p.op(OpEndLine)
964 }
965 t = t[1:]
966 case '.':
967 if p.flags&DotNL != 0 {
968 p.op(OpAnyChar)
969 } else {
970 p.op(OpAnyCharNotNL)
971 }
972 t = t[1:]
973 case '[':
974 if t, err = p.parseClass(t); err != nil {
975 return nil, err
976 }
977 case '*', '+', '?':
978 before := t
979 switch t[0] {
980 case '*':
981 op = OpStar
982 case '+':
983 op = OpPlus
984 case '?':
985 op = OpQuest
986 }
987 after := t[1:]
988 if after, err = p.repeat(op, 0, 0, before, after, lastRepeat); err != nil {
989 return nil, err
990 }
991 repeat = before
992 t = after
993 case '{':
994 op = OpRepeat
995 before := t
996 min, max, after, ok := p.parseRepeat(t)
997 if !ok {
998
999 p.literal('{')
1000 t = t[1:]
1001 break
1002 }
1003 if min < 0 || min > 1000 || max > 1000 || max >= 0 && min > max {
1004
1005 return nil, &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
1006 }
1007 if after, err = p.repeat(op, min, max, before, after, lastRepeat); err != nil {
1008 return nil, err
1009 }
1010 repeat = before
1011 t = after
1012 case '\\':
1013 if p.flags&PerlX != 0 && len(t) >= 2 {
1014 switch t[1] {
1015 case 'A':
1016 p.op(OpBeginText)
1017 t = t[2:]
1018 break BigSwitch
1019 case 'b':
1020 p.op(OpWordBoundary)
1021 t = t[2:]
1022 break BigSwitch
1023 case 'B':
1024 p.op(OpNoWordBoundary)
1025 t = t[2:]
1026 break BigSwitch
1027 case 'C':
1028
1029 return nil, &Error{ErrInvalidEscape, t[:2]}
1030 case 'Q':
1031
1032 var lit string
1033 lit, t, _ = strings.Cut(t[2:], `\E`)
1034 for lit != "" {
1035 c, rest, err := nextRune(lit)
1036 if err != nil {
1037 return nil, err
1038 }
1039 p.literal(c)
1040 lit = rest
1041 }
1042 break BigSwitch
1043 case 'z':
1044 p.op(OpEndText)
1045 t = t[2:]
1046 break BigSwitch
1047 }
1048 }
1049
1050 re := p.newRegexp(OpCharClass)
1051 re.Flags = p.flags
1052
1053
1054 if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
1055 r, rest, err := p.parseUnicodeClass(t, re.Rune0[:0])
1056 if err != nil {
1057 return nil, err
1058 }
1059 if r != nil {
1060 re.Rune = r
1061 t = rest
1062 p.push(re)
1063 break BigSwitch
1064 }
1065 }
1066
1067
1068 if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
1069 re.Rune = r
1070 t = rest
1071 p.push(re)
1072 break BigSwitch
1073 }
1074 p.reuse(re)
1075
1076
1077 if c, t, err = p.parseEscape(t); err != nil {
1078 return nil, err
1079 }
1080 p.literal(c)
1081 }
1082 lastRepeat = repeat
1083 }
1084
1085 p.concat()
1086 if p.swapVerticalBar() {
1087
1088 p.stack = p.stack[:len(p.stack)-1]
1089 }
1090 p.alternate()
1091
1092 n := len(p.stack)
1093 if n != 1 {
1094 return nil, &Error{ErrMissingParen, s}
1095 }
1096 return p.stack[0], nil
1097 }
1098
1099
1100
1101
1102 func (p *parser) parseRepeat(s string) (min, max int, rest string, ok bool) {
1103 if s == "" || s[0] != '{' {
1104 return
1105 }
1106 s = s[1:]
1107 var ok1 bool
1108 if min, s, ok1 = p.parseInt(s); !ok1 {
1109 return
1110 }
1111 if s == "" {
1112 return
1113 }
1114 if s[0] != ',' {
1115 max = min
1116 } else {
1117 s = s[1:]
1118 if s == "" {
1119 return
1120 }
1121 if s[0] == '}' {
1122 max = -1
1123 } else if max, s, ok1 = p.parseInt(s); !ok1 {
1124 return
1125 } else if max < 0 {
1126
1127 min = -1
1128 }
1129 }
1130 if s == "" || s[0] != '}' {
1131 return
1132 }
1133 rest = s[1:]
1134 ok = true
1135 return
1136 }
1137
1138
1139
1140
1141 func (p *parser) parsePerlFlags(s string) (rest string, err error) {
1142 t := s
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159 startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<'
1160 startsWithName := len(t) > 3 && t[2] == '<'
1161
1162 if startsWithP || startsWithName {
1163
1164 exprStartPos := 4
1165 if startsWithName {
1166 exprStartPos = 3
1167 }
1168
1169
1170 end := strings.IndexRune(t, '>')
1171 if end < 0 {
1172 if err = checkUTF8(t); err != nil {
1173 return "", err
1174 }
1175 return "", &Error{ErrInvalidNamedCapture, s}
1176 }
1177
1178 capture := t[:end+1]
1179 name := t[exprStartPos:end]
1180 if err = checkUTF8(name); err != nil {
1181 return "", err
1182 }
1183 if !isValidCaptureName(name) {
1184 return "", &Error{ErrInvalidNamedCapture, capture}
1185 }
1186
1187
1188 p.numCap++
1189 re := p.op(opLeftParen)
1190 re.Cap = p.numCap
1191 re.Name = name
1192 return t[end+1:], nil
1193 }
1194
1195
1196 var c rune
1197 t = t[2:]
1198 flags := p.flags
1199 sign := +1
1200 sawFlag := false
1201 Loop:
1202 for t != "" {
1203 if c, t, err = nextRune(t); err != nil {
1204 return "", err
1205 }
1206 switch c {
1207 default:
1208 break Loop
1209
1210
1211 case 'i':
1212 flags |= FoldCase
1213 sawFlag = true
1214 case 'm':
1215 flags &^= OneLine
1216 sawFlag = true
1217 case 's':
1218 flags |= DotNL
1219 sawFlag = true
1220 case 'U':
1221 flags |= NonGreedy
1222 sawFlag = true
1223
1224
1225 case '-':
1226 if sign < 0 {
1227 break Loop
1228 }
1229 sign = -1
1230
1231
1232 flags = ^flags
1233 sawFlag = false
1234
1235
1236 case ':', ')':
1237 if sign < 0 {
1238 if !sawFlag {
1239 break Loop
1240 }
1241 flags = ^flags
1242 }
1243 if c == ':' {
1244
1245 p.op(opLeftParen)
1246 }
1247 p.flags = flags
1248 return t, nil
1249 }
1250 }
1251
1252 return "", &Error{ErrInvalidPerlOp, s[:len(s)-len(t)]}
1253 }
1254
1255
1256
1257
1258
1259
1260 func isValidCaptureName(name string) bool {
1261 if name == "" {
1262 return false
1263 }
1264 for _, c := range name {
1265 if c != '_' && !isalnum(c) {
1266 return false
1267 }
1268 }
1269 return true
1270 }
1271
1272
1273 func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
1274 if s == "" || s[0] < '0' || '9' < s[0] {
1275 return
1276 }
1277
1278 if len(s) >= 2 && s[0] == '0' && '0' <= s[1] && s[1] <= '9' {
1279 return
1280 }
1281 t := s
1282 for s != "" && '0' <= s[0] && s[0] <= '9' {
1283 s = s[1:]
1284 }
1285 rest = s
1286 ok = true
1287
1288 t = t[:len(t)-len(s)]
1289 for i := 0; i < len(t); i++ {
1290
1291 if n >= 1e8 {
1292 n = -1
1293 break
1294 }
1295 n = n*10 + int(t[i]) - '0'
1296 }
1297 return
1298 }
1299
1300
1301
1302 func isCharClass(re *Regexp) bool {
1303 return re.Op == OpLiteral && len(re.Rune) == 1 ||
1304 re.Op == OpCharClass ||
1305 re.Op == OpAnyCharNotNL ||
1306 re.Op == OpAnyChar
1307 }
1308
1309
1310 func matchRune(re *Regexp, r rune) bool {
1311 switch re.Op {
1312 case OpLiteral:
1313 return len(re.Rune) == 1 && re.Rune[0] == r
1314 case OpCharClass:
1315 for i := 0; i < len(re.Rune); i += 2 {
1316 if re.Rune[i] <= r && r <= re.Rune[i+1] {
1317 return true
1318 }
1319 }
1320 return false
1321 case OpAnyCharNotNL:
1322 return r != '\n'
1323 case OpAnyChar:
1324 return true
1325 }
1326 return false
1327 }
1328
1329
1330 func (p *parser) parseVerticalBar() {
1331 p.concat()
1332
1333
1334
1335
1336
1337 if !p.swapVerticalBar() {
1338 p.op(opVerticalBar)
1339 }
1340 }
1341
1342
1343
1344
1345 func mergeCharClass(dst, src *Regexp) {
1346 switch dst.Op {
1347 case OpAnyChar:
1348
1349 case OpAnyCharNotNL:
1350
1351 if matchRune(src, '\n') {
1352 dst.Op = OpAnyChar
1353 }
1354 case OpCharClass:
1355
1356 if src.Op == OpLiteral {
1357 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1358 } else {
1359 dst.Rune = appendClass(dst.Rune, src.Rune)
1360 }
1361 case OpLiteral:
1362
1363 if src.Rune[0] == dst.Rune[0] && src.Flags == dst.Flags {
1364 break
1365 }
1366 dst.Op = OpCharClass
1367 dst.Rune = appendLiteral(dst.Rune[:0], dst.Rune[0], dst.Flags)
1368 dst.Rune = appendLiteral(dst.Rune, src.Rune[0], src.Flags)
1369 }
1370 }
1371
1372
1373
1374
1375 func (p *parser) swapVerticalBar() bool {
1376
1377
1378 n := len(p.stack)
1379 if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
1380 re1 := p.stack[n-1]
1381 re3 := p.stack[n-3]
1382
1383 if re1.Op > re3.Op {
1384 re1, re3 = re3, re1
1385 p.stack[n-3] = re3
1386 }
1387 mergeCharClass(re3, re1)
1388 p.reuse(re1)
1389 p.stack = p.stack[:n-1]
1390 return true
1391 }
1392
1393 if n >= 2 {
1394 re1 := p.stack[n-1]
1395 re2 := p.stack[n-2]
1396 if re2.Op == opVerticalBar {
1397 if n >= 3 {
1398
1399
1400 cleanAlt(p.stack[n-3])
1401 }
1402 p.stack[n-2] = re1
1403 p.stack[n-1] = re2
1404 return true
1405 }
1406 }
1407 return false
1408 }
1409
1410
1411 func (p *parser) parseRightParen() error {
1412 p.concat()
1413 if p.swapVerticalBar() {
1414
1415 p.stack = p.stack[:len(p.stack)-1]
1416 }
1417 p.alternate()
1418
1419 n := len(p.stack)
1420 if n < 2 {
1421 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1422 }
1423 re1 := p.stack[n-1]
1424 re2 := p.stack[n-2]
1425 p.stack = p.stack[:n-2]
1426 if re2.Op != opLeftParen {
1427 return &Error{ErrUnexpectedParen, p.wholeRegexp}
1428 }
1429
1430 p.flags = re2.Flags
1431 if re2.Cap == 0 {
1432
1433 p.push(re1)
1434 } else {
1435 re2.Op = OpCapture
1436 re2.Sub = re2.Sub0[:1]
1437 re2.Sub[0] = re1
1438 p.push(re2)
1439 }
1440 return nil
1441 }
1442
1443
1444
1445 func (p *parser) parseEscape(s string) (r rune, rest string, err error) {
1446 t := s[1:]
1447 if t == "" {
1448 return 0, "", &Error{ErrTrailingBackslash, ""}
1449 }
1450 c, t, err := nextRune(t)
1451 if err != nil {
1452 return 0, "", err
1453 }
1454
1455 Switch:
1456 switch c {
1457 default:
1458 if c < utf8.RuneSelf && !isalnum(c) {
1459
1460
1461
1462
1463 return c, t, nil
1464 }
1465
1466
1467 case '1', '2', '3', '4', '5', '6', '7':
1468
1469 if t == "" || t[0] < '0' || t[0] > '7' {
1470 break
1471 }
1472 fallthrough
1473 case '0':
1474
1475 r = c - '0'
1476 for i := 1; i < 3; i++ {
1477 if t == "" || t[0] < '0' || t[0] > '7' {
1478 break
1479 }
1480 r = r*8 + rune(t[0]) - '0'
1481 t = t[1:]
1482 }
1483 return r, t, nil
1484
1485
1486 case 'x':
1487 if t == "" {
1488 break
1489 }
1490 if c, t, err = nextRune(t); err != nil {
1491 return 0, "", err
1492 }
1493 if c == '{' {
1494
1495
1496
1497
1498 nhex := 0
1499 r = 0
1500 for {
1501 if t == "" {
1502 break Switch
1503 }
1504 if c, t, err = nextRune(t); err != nil {
1505 return 0, "", err
1506 }
1507 if c == '}' {
1508 break
1509 }
1510 v := unhex(c)
1511 if v < 0 {
1512 break Switch
1513 }
1514 r = r*16 + v
1515 if r > unicode.MaxRune {
1516 break Switch
1517 }
1518 nhex++
1519 }
1520 if nhex == 0 {
1521 break Switch
1522 }
1523 return r, t, nil
1524 }
1525
1526
1527 x := unhex(c)
1528 if c, t, err = nextRune(t); err != nil {
1529 return 0, "", err
1530 }
1531 y := unhex(c)
1532 if x < 0 || y < 0 {
1533 break
1534 }
1535 return x*16 + y, t, nil
1536
1537
1538
1539
1540
1541
1542
1543 case 'a':
1544 return '\a', t, err
1545 case 'f':
1546 return '\f', t, err
1547 case 'n':
1548 return '\n', t, err
1549 case 'r':
1550 return '\r', t, err
1551 case 't':
1552 return '\t', t, err
1553 case 'v':
1554 return '\v', t, err
1555 }
1556 return 0, "", &Error{ErrInvalidEscape, s[:len(s)-len(t)]}
1557 }
1558
1559
1560
1561 func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err error) {
1562 if s == "" {
1563 return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
1564 }
1565
1566
1567
1568 if s[0] == '\\' {
1569 return p.parseEscape(s)
1570 }
1571
1572 return nextRune(s)
1573 }
1574
1575 type charGroup struct {
1576 sign int
1577 class []rune
1578 }
1579
1580
1581
1582
1583
1584
1585 func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
1586 if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
1587 return
1588 }
1589 g := perlGroup[s[0:2]]
1590 if g.sign == 0 {
1591 return
1592 }
1593 return p.appendGroup(r, g), s[2:]
1594 }
1595
1596
1597
1598
1599 func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) {
1600 if len(s) < 2 || s[0] != '[' || s[1] != ':' {
1601 return
1602 }
1603
1604 i := strings.Index(s[2:], ":]")
1605 if i < 0 {
1606 return
1607 }
1608 i += 2
1609 name, s := s[0:i+2], s[i+2:]
1610 g := posixGroup[name]
1611 if g.sign == 0 {
1612 return nil, "", &Error{ErrInvalidCharRange, name}
1613 }
1614 return p.appendGroup(r, g), s, nil
1615 }
1616
1617 func (p *parser) appendGroup(r []rune, g charGroup) []rune {
1618 if p.flags&FoldCase == 0 {
1619 if g.sign < 0 {
1620 r = appendNegatedClass(r, g.class)
1621 } else {
1622 r = appendClass(r, g.class)
1623 }
1624 } else {
1625 tmp := p.tmpClass[:0]
1626 tmp = appendFoldedClass(tmp, g.class)
1627 p.tmpClass = tmp
1628 tmp = cleanClass(&p.tmpClass)
1629 if g.sign < 0 {
1630 r = appendNegatedClass(r, tmp)
1631 } else {
1632 r = appendClass(r, tmp)
1633 }
1634 }
1635 return r
1636 }
1637
1638 var anyTable = &unicode.RangeTable{
1639 R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
1640 R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
1641 }
1642
1643 var asciiTable = &unicode.RangeTable{
1644 R16: []unicode.Range16{{Lo: 0, Hi: 0x7F, Stride: 1}},
1645 }
1646
1647 var asciiFoldTable = &unicode.RangeTable{
1648 R16: []unicode.Range16{
1649 {Lo: 0, Hi: 0x7F, Stride: 1},
1650 {Lo: 0x017F, Hi: 0x017F, Stride: 1},
1651 {Lo: 0x212A, Hi: 0x212A, Stride: 1},
1652 },
1653 }
1654
1655
1656
1657 var categoryAliases struct {
1658 once sync.Once
1659 m map[string]string
1660 }
1661
1662
1663 func initCategoryAliases() {
1664 categoryAliases.m = make(map[string]string)
1665 for name, actual := range unicode.CategoryAliases {
1666 categoryAliases.m[canonicalName(name)] = actual
1667 }
1668 }
1669
1670
1671
1672
1673
1674
1675 func canonicalName(name string) string {
1676 var b []byte
1677 first := true
1678 for i := range len(name) {
1679 c := name[i]
1680 switch {
1681 case c == '_' || c == '-' || c == ' ':
1682 c = ' '
1683 case first:
1684 if 'a' <= c && c <= 'z' {
1685 c -= 'a' - 'A'
1686 }
1687 first = false
1688 default:
1689 if 'A' <= c && c <= 'Z' {
1690 c += 'a' - 'A'
1691 }
1692 }
1693 if b == nil {
1694 if c == name[i] && c != ' ' {
1695
1696 continue
1697 }
1698 b = make([]byte, i, len(name))
1699 copy(b, name[:i])
1700 }
1701 if c == ' ' {
1702 continue
1703 }
1704 b = append(b, c)
1705 }
1706 if b == nil {
1707 return name
1708 }
1709 return string(b)
1710 }
1711
1712
1713
1714
1715 func unicodeTable(name string) (tab, fold *unicode.RangeTable, sign int) {
1716 name = canonicalName(name)
1717
1718
1719
1720 switch name {
1721 case "Any":
1722 return anyTable, anyTable, +1
1723 case "Assigned":
1724 return unicode.Cn, unicode.Cn, -1
1725 case "Ascii":
1726 return asciiTable, asciiFoldTable, +1
1727 case "Lc":
1728 return unicode.Categories["LC"], unicode.FoldCategory["LC"], +1
1729 }
1730 if t := unicode.Categories[name]; t != nil {
1731 return t, unicode.FoldCategory[name], +1
1732 }
1733 if t := unicode.Scripts[name]; t != nil {
1734 return t, unicode.FoldScript[name], +1
1735 }
1736
1737
1738
1739
1740 categoryAliases.once.Do(initCategoryAliases)
1741 if actual := categoryAliases.m[name]; actual != "" {
1742 t := unicode.Categories[actual]
1743 return t, unicode.FoldCategory[actual], +1
1744 }
1745 return nil, nil, 0
1746 }
1747
1748
1749
1750
1751 func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) {
1752 if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
1753 return
1754 }
1755
1756
1757 sign := +1
1758 if s[1] == 'P' {
1759 sign = -1
1760 }
1761 t := s[2:]
1762 c, t, err := nextRune(t)
1763 if err != nil {
1764 return
1765 }
1766 var seq, name string
1767 if c != '{' {
1768
1769 seq = s[:len(s)-len(t)]
1770 name = seq[2:]
1771 } else {
1772
1773 end := strings.IndexRune(s, '}')
1774 if end < 0 {
1775 if err = checkUTF8(s); err != nil {
1776 return
1777 }
1778 return nil, "", &Error{ErrInvalidCharRange, s}
1779 }
1780 seq, t = s[:end+1], s[end+1:]
1781 name = s[3:end]
1782 if err = checkUTF8(name); err != nil {
1783 return
1784 }
1785 }
1786
1787
1788 if name != "" && name[0] == '^' {
1789 sign = -sign
1790 name = name[1:]
1791 }
1792
1793 tab, fold, tsign := unicodeTable(name)
1794 if tab == nil {
1795 return nil, "", &Error{ErrInvalidCharRange, seq}
1796 }
1797 if tsign < 0 {
1798 sign = -sign
1799 }
1800
1801 if p.flags&FoldCase == 0 || fold == nil {
1802 if sign > 0 {
1803 r = appendTable(r, tab)
1804 } else {
1805 r = appendNegatedTable(r, tab)
1806 }
1807 } else {
1808
1809
1810
1811 tmp := p.tmpClass[:0]
1812 tmp = appendTable(tmp, tab)
1813 tmp = appendTable(tmp, fold)
1814 p.tmpClass = tmp
1815 tmp = cleanClass(&p.tmpClass)
1816 if sign > 0 {
1817 r = appendClass(r, tmp)
1818 } else {
1819 r = appendNegatedClass(r, tmp)
1820 }
1821 }
1822 return r, t, nil
1823 }
1824
1825
1826
1827 func (p *parser) parseClass(s string) (rest string, err error) {
1828 t := s[1:]
1829 re := p.newRegexp(OpCharClass)
1830 re.Flags = p.flags
1831 re.Rune = re.Rune0[:0]
1832
1833 sign := +1
1834 if t != "" && t[0] == '^' {
1835 sign = -1
1836 t = t[1:]
1837
1838
1839
1840 if p.flags&ClassNL == 0 {
1841 re.Rune = append(re.Rune, '\n', '\n')
1842 }
1843 }
1844
1845 class := re.Rune
1846 first := true
1847 for t == "" || t[0] != ']' || first {
1848
1849
1850 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (len(t) == 1 || t[1] != ']') {
1851 _, size := utf8.DecodeRuneInString(t[1:])
1852 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+size]}
1853 }
1854 first = false
1855
1856
1857 if len(t) > 2 && t[0] == '[' && t[1] == ':' {
1858 nclass, nt, err := p.parseNamedClass(t, class)
1859 if err != nil {
1860 return "", err
1861 }
1862 if nclass != nil {
1863 class, t = nclass, nt
1864 continue
1865 }
1866 }
1867
1868
1869 nclass, nt, err := p.parseUnicodeClass(t, class)
1870 if err != nil {
1871 return "", err
1872 }
1873 if nclass != nil {
1874 class, t = nclass, nt
1875 continue
1876 }
1877
1878
1879 if nclass, nt := p.parsePerlClassEscape(t, class); nclass != nil {
1880 class, t = nclass, nt
1881 continue
1882 }
1883
1884
1885 rng := t
1886 var lo, hi rune
1887 if lo, t, err = p.parseClassChar(t, s); err != nil {
1888 return "", err
1889 }
1890 hi = lo
1891
1892 if len(t) >= 2 && t[0] == '-' && t[1] != ']' {
1893 t = t[1:]
1894 if hi, t, err = p.parseClassChar(t, s); err != nil {
1895 return "", err
1896 }
1897 if hi < lo {
1898 rng = rng[:len(rng)-len(t)]
1899 return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
1900 }
1901 }
1902 if p.flags&FoldCase == 0 {
1903 class = appendRange(class, lo, hi)
1904 } else {
1905 class = appendFoldedRange(class, lo, hi)
1906 }
1907 }
1908 t = t[1:]
1909
1910
1911 re.Rune = class
1912 class = cleanClass(&re.Rune)
1913 if sign < 0 {
1914 class = negateClass(class)
1915 }
1916 re.Rune = class
1917 p.push(re)
1918 return t, nil
1919 }
1920
1921
1922
1923 func cleanClass(rp *[]rune) []rune {
1924
1925
1926 sort.Sort(ranges{rp})
1927
1928 r := *rp
1929 if len(r) < 2 {
1930 return r
1931 }
1932
1933
1934 w := 2
1935 for i := 2; i < len(r); i += 2 {
1936 lo, hi := r[i], r[i+1]
1937 if lo <= r[w-1]+1 {
1938
1939 if hi > r[w-1] {
1940 r[w-1] = hi
1941 }
1942 continue
1943 }
1944
1945 r[w] = lo
1946 r[w+1] = hi
1947 w += 2
1948 }
1949
1950 return r[:w]
1951 }
1952
1953
1954
1955 func inCharClass(r rune, class []rune) bool {
1956 _, ok := sort.Find(len(class)/2, func(i int) int {
1957 lo, hi := class[2*i], class[2*i+1]
1958 if r > hi {
1959 return +1
1960 }
1961 if r < lo {
1962 return -1
1963 }
1964 return 0
1965 })
1966 return ok
1967 }
1968
1969
1970 func appendLiteral(r []rune, x rune, flags Flags) []rune {
1971 if flags&FoldCase != 0 {
1972 return appendFoldedRange(r, x, x)
1973 }
1974 return appendRange(r, x, x)
1975 }
1976
1977
1978 func appendRange(r []rune, lo, hi rune) []rune {
1979
1980
1981
1982
1983 n := len(r)
1984 for i := 2; i <= 4; i += 2 {
1985 if n >= i {
1986 rlo, rhi := r[n-i], r[n-i+1]
1987 if lo <= rhi+1 && rlo <= hi+1 {
1988 if lo < rlo {
1989 r[n-i] = lo
1990 }
1991 if hi > rhi {
1992 r[n-i+1] = hi
1993 }
1994 return r
1995 }
1996 }
1997 }
1998
1999 return append(r, lo, hi)
2000 }
2001
2002 const (
2003
2004
2005 minFold = 0x0041
2006 maxFold = 0x1e943
2007 )
2008
2009
2010
2011 func appendFoldedRange(r []rune, lo, hi rune) []rune {
2012
2013 if lo <= minFold && hi >= maxFold {
2014
2015 return appendRange(r, lo, hi)
2016 }
2017 if hi < minFold || lo > maxFold {
2018
2019 return appendRange(r, lo, hi)
2020 }
2021 if lo < minFold {
2022
2023 r = appendRange(r, lo, minFold-1)
2024 lo = minFold
2025 }
2026 if hi > maxFold {
2027
2028 r = appendRange(r, maxFold+1, hi)
2029 hi = maxFold
2030 }
2031
2032
2033 for c := lo; c <= hi; c++ {
2034 r = appendRange(r, c, c)
2035 f := unicode.SimpleFold(c)
2036 for f != c {
2037 r = appendRange(r, f, f)
2038 f = unicode.SimpleFold(f)
2039 }
2040 }
2041 return r
2042 }
2043
2044
2045
2046 func appendClass(r []rune, x []rune) []rune {
2047 for i := 0; i < len(x); i += 2 {
2048 r = appendRange(r, x[i], x[i+1])
2049 }
2050 return r
2051 }
2052
2053
2054 func appendFoldedClass(r []rune, x []rune) []rune {
2055 for i := 0; i < len(x); i += 2 {
2056 r = appendFoldedRange(r, x[i], x[i+1])
2057 }
2058 return r
2059 }
2060
2061
2062
2063 func appendNegatedClass(r []rune, x []rune) []rune {
2064 nextLo := '\u0000'
2065 for i := 0; i < len(x); i += 2 {
2066 lo, hi := x[i], x[i+1]
2067 if nextLo <= lo-1 {
2068 r = appendRange(r, nextLo, lo-1)
2069 }
2070 nextLo = hi + 1
2071 }
2072 if nextLo <= unicode.MaxRune {
2073 r = appendRange(r, nextLo, unicode.MaxRune)
2074 }
2075 return r
2076 }
2077
2078
2079 func appendTable(r []rune, x *unicode.RangeTable) []rune {
2080 for _, xr := range x.R16 {
2081 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2082 if stride == 1 {
2083 r = appendRange(r, lo, hi)
2084 continue
2085 }
2086 for c := lo; c <= hi; c += stride {
2087 r = appendRange(r, c, c)
2088 }
2089 }
2090 for _, xr := range x.R32 {
2091 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2092 if stride == 1 {
2093 r = appendRange(r, lo, hi)
2094 continue
2095 }
2096 for c := lo; c <= hi; c += stride {
2097 r = appendRange(r, c, c)
2098 }
2099 }
2100 return r
2101 }
2102
2103
2104 func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
2105 nextLo := '\u0000'
2106 for _, xr := range x.R16 {
2107 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2108 if stride == 1 {
2109 if nextLo <= lo-1 {
2110 r = appendRange(r, nextLo, lo-1)
2111 }
2112 nextLo = hi + 1
2113 continue
2114 }
2115 for c := lo; c <= hi; c += stride {
2116 if nextLo <= c-1 {
2117 r = appendRange(r, nextLo, c-1)
2118 }
2119 nextLo = c + 1
2120 }
2121 }
2122 for _, xr := range x.R32 {
2123 lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
2124 if stride == 1 {
2125 if nextLo <= lo-1 {
2126 r = appendRange(r, nextLo, lo-1)
2127 }
2128 nextLo = hi + 1
2129 continue
2130 }
2131 for c := lo; c <= hi; c += stride {
2132 if nextLo <= c-1 {
2133 r = appendRange(r, nextLo, c-1)
2134 }
2135 nextLo = c + 1
2136 }
2137 }
2138 if nextLo <= unicode.MaxRune {
2139 r = appendRange(r, nextLo, unicode.MaxRune)
2140 }
2141 return r
2142 }
2143
2144
2145
2146 func negateClass(r []rune) []rune {
2147 nextLo := '\u0000'
2148 w := 0
2149 for i := 0; i < len(r); i += 2 {
2150 lo, hi := r[i], r[i+1]
2151 if nextLo <= lo-1 {
2152 r[w] = nextLo
2153 r[w+1] = lo - 1
2154 w += 2
2155 }
2156 nextLo = hi + 1
2157 }
2158 r = r[:w]
2159 if nextLo <= unicode.MaxRune {
2160
2161
2162 r = append(r, nextLo, unicode.MaxRune)
2163 }
2164 return r
2165 }
2166
2167
2168
2169
2170
2171 type ranges struct {
2172 p *[]rune
2173 }
2174
2175 func (ra ranges) Less(i, j int) bool {
2176 p := *ra.p
2177 i *= 2
2178 j *= 2
2179 return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
2180 }
2181
2182 func (ra ranges) Len() int {
2183 return len(*ra.p) / 2
2184 }
2185
2186 func (ra ranges) Swap(i, j int) {
2187 p := *ra.p
2188 i *= 2
2189 j *= 2
2190 p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
2191 }
2192
2193 func checkUTF8(s string) error {
2194 for s != "" {
2195 rune, size := utf8.DecodeRuneInString(s)
2196 if rune == utf8.RuneError && size == 1 {
2197 return &Error{Code: ErrInvalidUTF8, Expr: s}
2198 }
2199 s = s[size:]
2200 }
2201 return nil
2202 }
2203
2204 func nextRune(s string) (c rune, t string, err error) {
2205 c, size := utf8.DecodeRuneInString(s)
2206 if c == utf8.RuneError && size == 1 {
2207 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
2208 }
2209 return c, s[size:], nil
2210 }
2211
2212 func isalnum(c rune) bool {
2213 return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
2214 }
2215
2216 func unhex(c rune) rune {
2217 if '0' <= c && c <= '9' {
2218 return c - '0'
2219 }
2220 if 'a' <= c && c <= 'f' {
2221 return c - 'a' + 10
2222 }
2223 if 'A' <= c && c <= 'F' {
2224 return c - 'A' + 10
2225 }
2226 return -1
2227 }
2228
View as plain text