Saturday, 1 November 2014

Pattern.CANON_EQ

Enables canonical equivalence. Different Unicode encodings of the same character are considered equivalent.


import java.util.regex.*;
import java.io.*;

public class RegExHarness {
    public static void main(String args[]) throws IOException{
        
        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
        
        Pattern pattern;
        Matcher matcher;
        
        String regex;
        String text;

        System.out.println("Enter Regular Expression");
        regex = "\u00e9gal";
        pattern = Pattern.compile(regex, Pattern.CANON_EQ);   
        
        String[] input = { "\u00e9gal", 
                            "e\u0301gal", 
                        };
        
        int i = 0;
        while(i < input.length){
            text = input[i];
                
            matcher = pattern.matcher(text);

            while (matcher.find()) {
                System.out.print("I found the text " + matcher.group());
                System.out.print(" starting at index " + matcher.start());
                System.out.println(" Ending at index " + matcher.end());
            }
            i++;
        }     
    }
}

Output
Enter Regular Expression
I found the text égal starting at index 0 Ending at index 4
I found the text égal starting at index 0 Ending at index 5



Prevoius                                                 Next                                                 Home

No comments:

Post a Comment