RegExpPerl.cs
1/*
2
3 Copyright (c) 2004-2006 Pavel Novak and Tomas Matousek.
4
5 The use and distribution terms for this software are contained in the file named License.txt,
6 which can be found in the root of the Phalanger distribution. By using this software
7 in any fashion, you are agreeing to be bound by the terms of this license.
8
9 You must not remove this notice from this software.
10*/
11using System;
12using System.Text;
13using System.Collections;
14using System.Collections.Generic;
15using System.Globalization;
16using System.Text.RegularExpressions;
17
18namespace Udger.Parser
19{
24 [Flags]
26 {
27 None = 0,
28 Evaluate = 1,
29 Ungreedy = 2,
30 Anchored = 4,
31 DollarMatchesEndOfStringOnly = 8,
32 UTF8 = 16
33 }
34
39 #region PerlRegExpConverter
40
45 {
46 #region Properties
47
52 private static Regex quantifiers
53 {
54 get
55 {
56 if (_quantifiers == null)
57 _quantifiers = new Regex(@"\G(?:\?|\*|\+|\{[0-9]+,[0-9]*\})");
58 return _quantifiers;
59 }
60 }
61 private static Regex _quantifiers;
62
66 private static Regex posixCharClasses
67 {
68 get
69 {
70 if (_posixCharClasses == null)
71 _posixCharClasses = new Regex("^\\[:(^)?(alpha|alnum|ascii|cntrl|digit|graph|lower|print|punct|space|upper|word|xdigit):]", RegexOptions.Singleline);
72 return _posixCharClasses;
73 }
74 }
75 private static Regex _posixCharClasses = null;
76
80 private string perlRegEx;
81
85 public Regex Regex { get { return regex; } }
86 private Regex regex;
87
91 private string dotNetMatchExpression;
92
96 public string DotNetReplaceExpression { get { return dotNetReplaceExpression; } }
97 private string dotNetReplaceExpression;
98
103 public RegexOptions DotNetOptions { get { return dotNetOptions; } }
104 private RegexOptions dotNetOptions;
105
106 public PerlRegexOptions PerlOptions { get { return perlOptions; } }
107 private PerlRegexOptions perlOptions = PerlRegexOptions.None;
108
109 public Encoding Encoding { get { return encoding; } }
110 private readonly Encoding encoding;
111
112 #endregion
113
120 public PerlRegExpConverter(string pattern, string replacement, Encoding encoding)
121 {
122 if (encoding == null)
123 throw new ArgumentNullException("encoding");
124
125 this.encoding = encoding;
126
127 ConvertPattern(pattern);
128
129 if (replacement != null)
130 dotNetReplaceExpression = ConvertReplacement(replacement);
131 }
132
133 private void ConvertPattern(string pattern)
134 {
135 string string_pattern = null;
136
137 string_pattern = pattern;
138
139 LoadPerlRegex(string_pattern);
140
141 dotNetMatchExpression = ConvertRegex(perlRegEx, perlOptions, encoding);
142
143 try
144 {
145 // dotNetOptions |= RegexOptions.Compiled;
146 regex = new Regex(dotNetMatchExpression, dotNetOptions);
147 }
148 catch (ArgumentException e)
149 {
150 throw new ArgumentException(ExtractExceptionalMessage(e.Message));
151 }
152 }
153
158 private string ExtractExceptionalMessage(string message)
159 {
160 if (message != null)
161 {
162 message = message.Replace(dotNetMatchExpression, "<pattern>");
163
164 int i = message.IndexOf("\r\n");
165 if (i >= 0)
166 message = message.Substring(0, i);
167
168 i = message.IndexOf("-");
169 if (i >= 0)
170 message = message.Substring(i + 2);
171 }
172 return message;
173 }
174
175 internal string ConvertString(string str, int start, int length)
176 {
177 if ((perlOptions & PerlRegexOptions.UTF8) != 0 /*&& !StringUtils.IsAsciiString(str, start, length)*/)
178 return Encoding.UTF8.GetString(encoding.GetBytes(str.Substring(start, length)));
179 else
180 return str.Substring(start, length);
181 }
182
183 internal string ConvertBytes(byte[] bytes, int start, int length)
184 {
185 if ((perlOptions & PerlRegexOptions.UTF8) != 0)
186 return Encoding.UTF8.GetString(bytes, start, length);
187 else
188 return encoding.GetString(bytes, start, length);
189 }
190
191 private void LoadPerlRegex(byte[] pattern)
192 {
193 if (pattern == null) pattern = new byte[0];
194 int regex_start, regex_end;
195
196 StringBuilder upattern = new StringBuilder();
197 upattern.Append(pattern);
198
199 FindRegexDelimiters(upattern, out regex_start, out regex_end);
200 ParseRegexOptions(upattern, regex_end + 2, out dotNetOptions, out perlOptions);
201
202 perlRegEx = ConvertBytes(pattern, regex_start, regex_end - regex_start + 1);
203 }
204
205 private void LoadPerlRegex(string pattern)
206 {
207 if (pattern == null) pattern = "";
208 int regex_start, regex_end;
209
210 StringBuilder upattern = new StringBuilder();
211 upattern.Append(pattern);
212
213 FindRegexDelimiters(upattern, out regex_start, out regex_end);
214 ParseRegexOptions(upattern, regex_end + 2, out dotNetOptions, out perlOptions);
215
216 perlRegEx = ConvertString(pattern, regex_start, regex_end - regex_start + 1);
217 }
218
219 private void FindRegexDelimiters(StringBuilder pattern, out int start, out int end)
220 {
221 int i = 0;
222 while (i < pattern.Length && Char.IsWhiteSpace(pattern[i])) i++;
223
224 if (i == pattern.Length)
225 throw new ArgumentException("RegExp empty");
226
227 char start_delimiter = pattern[i++];
228 if (Char.IsLetterOrDigit(start_delimiter) || start_delimiter == '\\')
229 throw new ArgumentException("Something bad with delimiter");
230
231 start = i;
232 char end_delimiter;
233 if (start_delimiter == '[') end_delimiter = ']';
234 else if (start_delimiter == '(') end_delimiter = ')';
235 else if (start_delimiter == '{') end_delimiter = '}';
236 else if (start_delimiter == '<') end_delimiter = '>';
237 else end_delimiter = start_delimiter;
238
239 int depth = 1;
240 while (i < pattern.Length)
241 {
242 if (pattern[i] == '\\' && i + 1 < pattern.Length)
243 {
244 i += 2;
245 continue;
246 }
247 else if (pattern[i] == end_delimiter) // (1) should precede (2) to handle end_delim == start_delim case
248 {
249 depth--;
250 if (depth == 0) break;
251 }
252 else if (pattern[i] == start_delimiter) // (2)
253 {
254 depth++;
255 }
256 i++;
257 }
258
259 if (i == pattern.Length)
260 throw new ArgumentException("No end delimiter");
261
262 end = i - 1;
263 }
264
265 private static void ParseRegexOptions(StringBuilder pattern, int start,
266 out RegexOptions dotNetOptions, out PerlRegexOptions extraOptions)
267 {
268 dotNetOptions = RegexOptions.None;
269 extraOptions = PerlRegexOptions.None;
270
271 for (int i = start; i < pattern.Length; i++)
272 {
273 char option = pattern[i];
274
275 switch (option)
276 {
277 case 'i': // PCRE_CASELESS
278 dotNetOptions |= RegexOptions.IgnoreCase;
279 break;
280
281 case 'm': // PCRE_MULTILINE
282 dotNetOptions |= RegexOptions.Multiline;
283 break;
284
285 case 's': // PCRE_DOTALL
286 dotNetOptions |= RegexOptions.Singleline;
287 break;
288
289 case 'x': // PCRE_EXTENDED
290 dotNetOptions |= RegexOptions.IgnorePatternWhitespace;
291 break;
292
293 case 'e': // evaluate as PHP code
294 extraOptions |= PerlRegexOptions.Evaluate;
295 break;
296
297 case 'A': // PCRE_ANCHORED
298 extraOptions |= PerlRegexOptions.Anchored;
299 break;
300
301 case 'D': // PCRE_DOLLAR_ENDONLY
302 extraOptions |= PerlRegexOptions.DollarMatchesEndOfStringOnly;
303 break;
304
305 case 'S': // spend more time studythe pattern - ignore
306 break;
307
308 case 'U': // PCRE_UNGREEDY
309 extraOptions |= PerlRegexOptions.Ungreedy;
310 break;
311
312 case 'u': // PCRE_UTF8
313 extraOptions |= PerlRegexOptions.UTF8;
314 break;
315 /*
316 case 'X': // PCRE_EXTRA
317 throw new Exception("Modifier not supported");
318
319
320 default:
321 throw new Exception("Modifier unknown");
322 */
323 }
324 }
325
326 // inconsistent options check:
327 if
328 (
329 (dotNetOptions & RegexOptions.Multiline) != 0 &&
330 (extraOptions & PerlRegexOptions.DollarMatchesEndOfStringOnly) != 0
331 )
332 {
333 throw new Exception("Modifier inconsistent");
334 }
335 }
336
337 private static int AlphaNumericToDigit(char x)
338 {
339 switch (x)
340 {
341 case '0':
342 return 0;
343 case '1':
344 return 1;
345 case '2':
346 return 2;
347 case '3':
348 return 3;
349 case '4':
350 return 4;
351 case '5':
352 return 5;
353 case '6':
354 return 6;
355 case '7':
356 return 7;
357 case '8':
358 return 8;
359 case '9':
360 return 9;
361 case 'A':
362 return 10;
363 case 'B':
364 return 11;
365 case 'C':
366 return 12;
367 case 'D':
368 return 13;
369 case 'E':
370 return 14;
371 case 'F':
372 return 15;
373
374 }
375
376 return 17;
377 }
378
383 private static bool ParseEscapeCode(Encoding encoding, string str, ref int pos, ref char ch, ref bool escaped)
384 {
385 //Debug.Assert(encoding != null && str != null && pos >= 0 && pos < str.Length && str[pos] == '\\');
386
387 if (pos + 3 >= str.Length) return false;
388
389 int number = 0;
390
391 if (str[pos + 1] == 'x')
392 {
393 if (str[pos + 2] == '{')
394 {
395 // hexadecimal number encoding a Unicode character:
396 int i = pos + 3;
397 while (i < str.Length && str[i] != '}' && number < Char.MaxValue)
398 {
399 int digit = AlphaNumericToDigit(str[i]);
400 if (digit > 16) return false;
401 number = (number << 4) + digit;
402 i++;
403 }
404 if (number > Char.MaxValue || i >= str.Length) return false;
405 pos = i;
406 ch = (char)number;
407 escaped = IsCharRegexSpecial(ch);
408 }
409 else
410 {
411 // hexadecimal number encoding single-byte character:
412 for (int i = pos + 2; i < pos + 4; i++)
413 {
414 //Debug.Assert(i < str.Length);
415 int digit = AlphaNumericToDigit(str[i]);
416 if (digit > 16) return false;
417 number = (number << 4) + digit;
418 }
419 pos += 3;
420 char[] chars = encoding.GetChars(new byte[] { (byte)number });
421 if (chars.Length == 1)
422 ch = chars[0];
423 else
424 ch = (char)number;
425 escaped = IsCharRegexSpecial(ch);
426 }
427 return true;
428 }
429 else if (str[pos + 1] >= '0' && str[pos + 1] <= '7')
430 {
431 // octal number:
432 for (int i = pos + 1; i < pos + 4; i++)
433 {
434 //Debug.Assert(i < str.Length);
435 int digit = AlphaNumericToDigit(str[i]);
436 if (digit > 8) return false;
437 number = (number << 3) + digit;
438 }
439 pos += 3;
440 ch = encoding.GetChars(new byte[] { (byte)number })[0];
441 escaped = IsCharRegexSpecial(ch);
442 return true;
443 }
444 else if (str[pos + 1] == 'p' || str[pos + 1] == 'P')
445 {
446 bool complement = str[pos + 1] == 'P';
447 int cat_start;
448
449 if (str[pos + 2] == '{')
450 {
451 if (!complement && str[pos + 3] == '^')
452 {
453 complement = true;
454 cat_start = pos + 4;
455 }
456 else
457 cat_start = pos + 3;
458 }
459 else
460 {
461 cat_start = pos + 2;
462 }
463
464 //UnicodeCategoryGroup group;
465 //UnicodeCategory category;
466
467 //int cat_length = StringUtils.ParseUnicodeDesignation(str, cat_start, out group, out category);
468 int cat_length = str.Length;
469 int cat_end = cat_start + cat_length - 1;
470
471 // unknown category:
472 //if (cat_length == 0) return false;
473
474 // check closing brace:
475 if (str[pos + 2] == '{' && (cat_end + 1 >= str.Length || str[cat_end + 1] != '}'))
476 return false;
477
478 // TODO: custom categories on .NET 2?
479 // Unicode category:
480 // ?? if (complement) pos = pos;
481 return false;
482 }
483 else if (str[pos + 1] == 'X')
484 {
485 return false;
486 }
487
488 return false;
489 }
490
494 static char[] encodeChars = new char[] { '.', '$', '(', ')', '*', '+', '?', '[', ']', '{', '}', '\\', '^', '|' };
495
499 private static bool IsCharRegexSpecial(char ch)
500 {
501 return Array.IndexOf(encodeChars, ch) != -1;
502 }
503
504
512 private static string ConvertRegex(string perlExpr, PerlRegexOptions opt, Encoding encoding)
513 {
514 // Ranges in bracket expressions should be replaced with appropriate characters
515
516 // assume no conversion will be performed, create string builder with exact length. Only in
517 // case there is a range StringBuilder would be prolonged, +1 for Anchored
518 StringBuilder result = new StringBuilder(perlExpr.Length + 1);
519
520 // Anchored means that the string should match only at the start of the string, add '^'
521 // at the beginning if there is no one
522 if ((opt & PerlRegexOptions.Anchored) != 0 && (perlExpr.Length == 0 || perlExpr[0] != '^'))
523 result.Append('^');
524
525 // set to true after a quantifier is matched, if there is second quantifier just behind the
526 // first it is an error
527 bool last_quantifier = false;
528
529 // 4 means we're switching from 3 back to 2 - ie. "a-b-c"
530 // (we need to make a difference here because second "-" shouldn't be expanded)
531 bool leaving_range = false;
532
533 bool escaped = false;
534 int state = 0;
535 int group_state = 0;
536
537 int i = 0;
538 while (i < perlExpr.Length)
539 {
540 char ch = perlExpr[i];
541
542 escaped = false;
543 if (ch == '\\' && !ParseEscapeCode(encoding, perlExpr, ref i, ref ch, ref escaped))
544 {
545 i++;
546 //Debug.Assert(i < perlExpr.Length, "Regex cannot end with backslash.");
547 ch = perlExpr[i];
548
549 // some characters (like '_') don't need to be escaped in .net
550 if (ch == '_') escaped = false; else escaped = true;
551 }
552
553 switch (state)
554 {
555 case 0: // outside of character class
556 if (escaped)
557 {
558 result.Append('\\');
559 result.Append(ch);
560 last_quantifier = false;
561 break;
562 }
563
564 // In perl regexps, named groups are written like this: "(?P<name> ... )"
565 // If the group is starting here, we need to skip the 'P' character (see state 4)
566 switch (group_state)
567 {
568 case 0: group_state = (ch == '(') ? 1 : 0; break;
569 case 1: group_state = (ch == '?') ? 2 : 0; break;
570 case 2: if (ch == 'P') { i++; continue; } break;
571 }
572
573 if ((opt & PerlRegexOptions.Ungreedy) != 0)
574 {
575 // match quantifier ?,*,+,{n,m} at the position i:
576 Match m = quantifiers.Match(perlExpr, i);
577
578 // quantifier matched; quentifier '?' hasn't to be preceded by '(' - a grouping construct '(?'
579 if (m.Success && (m.Value != "?" || i == 0 || perlExpr[i - 1] != '('))
580 {
581 // two quantifiers:
582 if (last_quantifier)
583 throw new ArgumentException("regexp_duplicate_quantifier");
584
585 // append quantifier:
586 result.Append(perlExpr, i, m.Length);
587 i += m.Length;
588
589 if (i < perlExpr.Length && perlExpr[i] == '?')
590 {
591 // skip question mark to make the quantifier greedy:
592 i++;
593 }
594 else if (i < perlExpr.Length && perlExpr[i] == '+')
595 {
596 // TODO: we do not yet support possesive quantifiers
597 // so we just skip the attribute it and pray
598 // nobody will ever realize :-)
599 i++;
600 }
601 else
602 {
603 // add question mark to make the quantifier lazy:
604 if (result.Length != 0 && result[result.Length - 1] == '?')
605 {
606 // HACK: Due to the issue in .NET regex we can't use "??" because it isn't interpreted correctly!!
607 // (for example "^(ab)??$" matches with "abab", but it shouldn't!!)
608 }
609 else
610 result.Append('?');
611 }
612
613 last_quantifier = true;
614 continue;
615 }
616 }
617
618 last_quantifier = false;
619
620 if (ch == '$' && (opt & PerlRegexOptions.DollarMatchesEndOfStringOnly) != 0)
621 {
622 // replaces '$' with '\z':
623 result.Append(@"\z");
624 break;
625 }
626
627 if (ch == '[')
628 state = 1;
629
630 result.Append(ch);
631 break;
632
633 case 1: // first character of character class
634 if (escaped)
635 {
636 result.Append('\\');
637 result.Append(ch);
638 state = 2;
639 break;
640 }
641
642 // special characters:
643 if (ch == '^' || ch == ']' || ch == '-')
644 {
645 result.Append(ch);
646 }
647 else
648 {
649 // other characters are not consumed here, for example [[:space:]abc] will not match if the first
650 // [ is appended here.
651 state = 2;
652 goto case 2;
653 }
654 break;
655
656 case 2: // inside of character class
657 if (escaped)
658 {
659 result.Append('\\');
660 result.Append(ch);
661 leaving_range = false;
662 break;
663 }
664
665 if (ch == '-' && !leaving_range)
666 {
667 state = 3;
668 break;
669 }
670 leaving_range = false;
671
672 // posix character classes
673 Match match = posixCharClasses.Match(perlExpr.Substring(i), 0);
674 if (match.Success)
675 {
676 string chars = CountCharacterClass(match.Groups[2].Value);
677 if (chars == null)
678 throw new ArgumentException(/*TODO*/ String.Format("Unknown character class '{0}'", match.Groups[2].Value));
679
680 if (match.Groups[1].Value.Length > 0)
681 throw new ArgumentException(/*TODO*/ "POSIX character classes negation not supported.");
682
683 result.Append(chars);
684 i += match.Length - 1; // +1 is added just behind the switch
685 break;
686 }
687
688 if (ch == ']')
689 state = 0;
690 if (ch == '-')
691 result.Append("\\x2d");
692 else
693 result.Append(ch);
694 break;
695
696 case 3: // range previous character was '-'
697 if (!escaped && ch == ']')
698 {
699 result.Append("-]");
700 state = 0;
701 break;
702 }
703
704 string range;
705 int error;
706 if (!CountRange(result[result.Length - 1], ch, out range, out error, encoding))
707 {
708 if ((error != 1) || (!CountUnicodeRange(result[result.Length - 1], ch, out range)))
709 {
710 //Debug.Assert(error == 2);
711 throw new ArgumentException("range_first_character_greater");
712 }
713 }
714 result.Append(EscapeBracketExpressionSpecialChars(range)); // left boundary is duplicated, but doesn't matter...
715 state = 2;
716 leaving_range = true;
717 break;
718 }
719
720 i++;
721 }
722
723 return result.ToString();
724 }
725
731 internal static string EscapeBracketExpressionSpecialChars(string chars)
732 {
733 StringBuilder sb = new StringBuilder();
734
735 for (int i = 0; i < chars.Length; i++)
736 {
737 char ch = chars[i];
738 switch (ch)
739 {
740 // case '^': // not necessary, not at the beginning have no special meaning
741 case '\\':
742 case ']':
743 case '-':
744 sb.Append('\\');
745 goto default;
746 default:
747 sb.Append(ch);
748 break;
749 }
750 }
751
752 return sb.ToString();
753 }
754
755
765 internal static bool CountRange(char firstCharacter, char secondCharacter, out string characters, out int result, Encoding encoding)
766 {
767 // initialize out parameters
768 characters = null;
769 result = 0;
770
771 char[] chars = new char[2];
772 chars[0] = firstCharacter;
773 chars[1] = secondCharacter;
774
775 byte[] two_bytes = new byte[encoding.GetMaxByteCount(2)];
776
777 // convert endpoints and test if characters are "normal" - they can be stored in one byte
778 if (encoding.GetBytes(chars, 0, 2, two_bytes, 0) != 2)
779 {
780 result = 1;
781 return false;
782 }
783
784 if (two_bytes[0] > two_bytes[1])
785 {
786 result = 2;
787 return false;
788 }
789
790 // array for bytes that will be converted to unicode string
791 byte[] bytes = new byte[two_bytes[1] - two_bytes[0] + 1];
792
793 int i = 0;
794 for (int ch = two_bytes[0]; ch <= two_bytes[1]; i++, ch++)
795 {
796 // casting to byte is OK, ch is always in byte range thanks to ch <= two_bytes[1] condition
797 bytes[i] = (byte)ch;
798 }
799
800 characters = encoding.GetString(bytes, 0, i);
801 return true;
802 }
803
810 internal static string CountCharacterClass(string chClassName)
811 {
812 string ret = null;
813
814 switch (chClassName)
815 {
816 case "alnum":
817 ret = @"\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}";
818 break;
819 case "digit":
820 ret = @"\p{Nd}";
821 break;
822 case "punct":
823 ret = @"\p{P}\p{S}";
824 break;
825 case "alpha":
826 ret = @"\p{Ll}\p{Lu}\p{Lt}\p{Lo}";
827 break;
828 case "graph":
829 ret = @"\p{L}\p{M}\p{N}\p{P}\p{S}";
830 break;
831 case "space":
832 ret = @"\s";
833 break;
834 case "blank":
835 ret = @" \t";
836 break;
837 case "lower":
838 ret = @"\p{Ll}";
839 break;
840 case "upper":
841 ret = @"\p{Lu}";
842 break;
843 case "cntrl":
844 ret = @"\p{Cc}";
845 break;
846 case "print":
847 ret = @"\p{L}\p{M}\p{N}\p{P}\p{S}\p{Zs}";
848 break;
849 case "xdigit":
850 ret = @"abcdefABCDEF\d";
851 break;
852 case "ascii":
853 ret = @"\u0000-\u007F";
854 break;
855 case "word":
856 ret = @"_\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}";
857 break;
858 }
859
860 return ret;
861 }
862
871 private static bool CountUnicodeRange(char f, char t, out string range)
872 {
873 range = "";
874 if (f > t) return false;
875 StringBuilder sb = new StringBuilder(t - f);
876 for (char c = f; c <= t; c++) sb.Append(c);
877 range = sb.ToString();
878 return true;
879 }
880
881
886 private static void ModifyRegExpAnchored(ref string expr)
887 {
888 // anchored means regular expression should match only at the beginning of the string
889 // => add ^ at the beginning if there is no one.
890 if (expr.Length == 0 || expr[0] != '^')
891 expr.Insert(0, "^");
892 }
893
894 internal static bool IsDigitGroupReference(string replacement, int i)
895 {
896 return (replacement[i] == '$' || replacement[i] == '\\') &&
897 (i + 1 < replacement.Length && Char.IsDigit(replacement, i + 1));
898 }
899
900 internal static bool IsParenthesizedGroupReference(string replacement, int i)
901 {
902 return replacement[i] == '$' && i + 3 < replacement.Length && replacement[i + 1] == '{' &&
903 Char.IsDigit(replacement, i + 2) &&
904 (
905 replacement[i + 3] == '}' ||
906 i + 4 < replacement.Length && replacement[i + 4] == '}' && Char.IsDigit(replacement, i + 3)
907 );
908 }
909
915 private string ConvertReplacement(string replacement)
916 {
917 StringBuilder result = new StringBuilder();
918 int[] group_numbers = regex.GetGroupNumbers();
919 int max_number = (group_numbers.Length > 0) ? group_numbers[group_numbers.Length - 1] : 0;
920
921 int i = 0;
922 while (i < replacement.Length)
923 {
924 if (IsDigitGroupReference(replacement, i) ||
925 IsParenthesizedGroupReference(replacement, i))
926 {
927 int add = 0;
928 i++;
929
930 if (replacement[i] == '{') { i++; add = 1; }
931
932 int number = replacement[i++] - '0';
933 if (i < replacement.Length && Char.IsDigit(replacement, i))
934 {
935 number = number * 10 + replacement[i];
936 i++;
937 }
938
939 // insert only existing group references (others replaced with empty string):
940 if (number <= max_number)
941 {
942 result.Append('$');
943 result.Append('{');
944 result.Append(number.ToString());
945 result.Append('}');
946 }
947
948 i += add;
949 }
950 else if (replacement[i] == '$')
951 {
952 // there is $ and it is not a substitution - duplicate it:
953 result.Append("$$");
954 i++;
955 }
956 else if (replacement[i] == '\\' && i + 1 < replacement.Length)
957 {
958 if (replacement[i + 1] == '\\')
959 {
960 // two backslashes, replace with one:
961 result.Append('\\');
962 i += 2;
963 }
964 else
965 {
966 // backslash + some character, skip two characters
967 result.Append(replacement, i, 2);
968 i += 2;
969 }
970 }
971 else
972 {
973 // no substitution, no backslash (or backslash at the end of string)
974 result.Append(replacement, i++, 1);
975 }
976 }
977
978 return result.ToString();
979 }
980
981 }
982
983 #endregion
984}
Implements PERL extended regular expressions as they are implemented in PHP.
Definition: RegExpPerl.cs:45
string DotNetReplaceExpression
Returns .NET replacement string.
Definition: RegExpPerl.cs:96
Regex Regex
Returns Regex class that can be used for matching.
Definition: RegExpPerl.cs:85
PerlRegExpConverter(string pattern, string replacement, Encoding encoding)
Creates new PerlRegExpConverter and converts Perl regular expression to .NET.
Definition: RegExpPerl.cs:120
RegexOptions DotNetOptions
RegexOptions which should be set while matching the expression. May be null if regex is already set.
Definition: RegExpPerl.cs:103
PerlRegexOptions
Perl regular expression specific options that are not captured by .NET RegexOptions or by transformat...
Definition: RegExpPerl.cs:26