Enables
canonical equivalence. Different Unicode encodings of the same
character are considered equivalent.
import java.util.regex.*; import java.io.*; public class RegExHarness { public static void main(String args[]) throws IOException{ BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); Pattern pattern; Matcher matcher; String regex; String text; System.out.println("Enter Regular Expression"); regex = "\u00e9gal"; pattern = Pattern.compile(regex, Pattern.CANON_EQ); String[] input = { "\u00e9gal", "e\u0301gal", }; int i = 0; while(i < input.length){ text = input[i]; matcher = pattern.matcher(text); while (matcher.find()) { System.out.print("I found the text " + matcher.group()); System.out.print(" starting at index " + matcher.start()); System.out.println(" Ending at index " + matcher.end()); } i++; } } }
Output
Enter Regular Expression I found the text égal starting at index 0 Ending at index 4 I found the text égal starting at index 0 Ending at index 5
No comments:
Post a Comment