Friday 9 October 2015

Elasticsearch: Matching Phrases

There is a problem with match query. It matches the documents; even the document contains single matching word also.

Suppose I had following employees in type employees.

{
   "took": 1,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 4,
      "max_score": 1,
      "hits": [
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "4",
            "_score": 1,
            "_source": {
               "firstName": "Tushar",
               "lastName": "Goyal",
               "hobbies": [
                  "Reading Novels",
                  "Playing football"
               ],
               "age": 23
            }
         },
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "1",
            "_score": 1,
            "_source": {
               "firstName": "Phalgun",
               "lastName": "Garimella",
               "hobbies": [
                  "Watching movies",
                  "Stamp collection",
                  "Reading books",
                  "Playing Cricket"
               ],
               "age": 30
            }
         },
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "2",
            "_score": 1,
            "_source": {
               "firstName": "Sankalp",
               "lastName": "Dubey",
               "hobbies": [
                  "Shopping",
                  "Swimming",
                  "Reading books"
               ],
               "age": 32
            }
         },
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "3",
            "_score": 1,
            "_source": {
               "firstName": "Arpan",
               "lastName": "Debroy",
               "hobbies": [
                  "Tattoos",
                  "Fencing",
                  "Shopping"
               ],
               "age": 28
            }
         }
      ]
   }
}


If you are searching for employees whose hobby is “Reading books”, query like below.

GET /xyz/employees/_search
{
  "query":{
    "match":{
      "hobbies": "Reading books"
    }
  }
}


Above query return 3 documents.

{
   "took": 2,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 3,
      "max_score": 0.2169777,
      "hits": [
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "2",
            "_score": 0.2169777,
            "_source": {
               "firstName": "Sankalp",
               "lastName": "Dubey",
               "hobbies": [
                  "Shopping",
                  "Swimming",
                  "Reading books"
               ],
               "age": 32
            }
         },
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "1",
            "_score": 0.13561106,
            "_source": {
               "firstName": "Phalgun",
               "lastName": "Garimella",
               "hobbies": [
                  "Watching movies",
                  "Stamp collection",
                  "Reading books",
                  "Playing Cricket"
               ],
               "age": 30
            }
         },
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "4",
            "_score": 0.02250402,
            "_source": {
               "firstName": "Tushar",
               "lastName": "Goyal",
               "hobbies": [
                  "Reading Novels",
                  "Playing football"
               ],
               "age": 23
            }
         }
      ]
   }
}


As you observe output, document with id 4, don’t contain hobby “Reading books”, but it appears in the result, it is because match query match documents that contain “Reading” (or) “books”. Document 4 contains hobby “Reading Novels”, so it appears in the result.

Of course it is an interesting feature, but some times you want to match documents exactly. To match exact search criteria, use match_phrase query.

GET /xyz/employees/_search
{
  "query":{
    "match_phrase":{
      "hobbies": "Reading books"
    }
  }
}


Above query returns only 2 documents, that contains hobby “Reading books” exactly (No Reading, No books exactly “Reading books”).

Following are the documents returned by above query.

{
   "took": 4,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 2,
      "max_score": 0.30685282,
      "hits": [
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "2",
            "_score": 0.30685282,
            "_source": {
               "firstName": "Sankalp",
               "lastName": "Dubey",
               "hobbies": [
                  "Shopping",
                  "Swimming",
                  "Reading books"
               ],
               "age": 32
            }
         },
         {
            "_index": "xyz",
            "_type": "employees",
            "_id": "1",
            "_score": 0.19178301,
            "_source": {
               "firstName": "Phalgun",
               "lastName": "Garimella",
               "hobbies": [
                  "Watching movies",
                  "Stamp collection",
                  "Reading books",
                  "Playing Cricket"
               ],
               "age": 30
            }
         }
      ]
   }
}




Prevoius                                                 Next                                                 Home

No comments:

Post a Comment