Wednesday, 18 November 2015

Elasticsearch: Adding more than one analyzer to a field


By using multi fields, you can add more than one analyzer to a field. But it is totally a different approach.
PUT /blog/_mappings/posts
{
  "properties":{
    "title":{
      "type" : "string",
      "analyzer" : "english",
      "fields":{
        "english" : {
          "type":"string",
          "analyzer" : "english"
        },
        "raw" : {
          "type" : "string",
          "index" : "not_analyzed"
        }
      }
    }
  }
}


The main "title" field uses english analyzer, "title.english" field uses english analyzer, "title.raw" field uses no analyzer.
GET /blog/_analyze?field=title
{
  "Age is an issue of mind over matter. If you don't mind, it doesn't matter."
}


For above query, you will get following response.

{
   "tokens": [
      {
         "token": "ag",
         "start_offset": 5,
         "end_offset": 8,
         "type": "<ALPHANUM>",
         "position": 1
      },
      {
         "token": "issu",
         "start_offset": 15,
         "end_offset": 20,
         "type": "<ALPHANUM>",
         "position": 4
      },
      {
         "token": "mind",
         "start_offset": 24,
         "end_offset": 28,
         "type": "<ALPHANUM>",
         "position": 6
      },
      {
         "token": "over",
         "start_offset": 29,
         "end_offset": 33,
         "type": "<ALPHANUM>",
         "position": 7
      },
      {
         "token": "matter",
         "start_offset": 34,
         "end_offset": 40,
         "type": "<ALPHANUM>",
         "position": 8
      },
      {
         "token": "you",
         "start_offset": 45,
         "end_offset": 48,
         "type": "<ALPHANUM>",
         "position": 10
      },
      {
         "token": "don't",
         "start_offset": 49,
         "end_offset": 54,
         "type": "<ALPHANUM>",
         "position": 11
      },
      {
         "token": "mind",
         "start_offset": 55,
         "end_offset": 59,
         "type": "<ALPHANUM>",
         "position": 12
      },
      {
         "token": "doesn't",
         "start_offset": 64,
         "end_offset": 71,
         "type": "<ALPHANUM>",
         "position": 14
      },
      {
         "token": "matter",
         "start_offset": 72,
         "end_offset": 78,
         "type": "<ALPHANUM>",
         "position": 15
      }
   ]
}

GET /blog/_analyze?field=title.raw
{
  "Age is an issue of mind over matter. If you don't mind, it doesn't matter."
}


For above query, you will get following response.

{
   "tokens": [
      {
         "token": "{\n  \"Age is an issue of mind over matter. If you don't mind, it doesn't matter.\"\n}\n",
         "start_offset": 0,
         "end_offset": 83,
         "type": "word",
         "position": 1
      }
   ]
}


Post some data to type “blog”.

POST /blog/posts/1
{
  "title" : "Age is an issue of mind over matter. If you don't mind, it doesn't matter."
}

GET /blog/posts/_search
{
  "query":{
    "match":{
      "title.raw" : "age"
    }
  }
}


For above query, you will get following response.

{
   "took": 1,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 0,
      "max_score": null,
      "hits": []
   }
}


As you observer the response, it contains no documents, it is because, title.raw is not analyzed field, if you run same query against, title field you will get document1 in response.

GET /blog/posts/_search
{
  "query":{
    "match":{
      "title" : "age"
    }
  }
}


For above query, you will get following response.

{
   "took": 1,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 1,
      "max_score": 0.095891505,
      "hits": [
         {
            "_index": "blog",
            "_type": "posts",
            "_id": "1",
            "_score": 0.095891505,
            "_source": {
               "title": "Age is an issue of mind over matter. If you don't mind, it doesn't matter."
            }
         }
      ]
   }
}

GET /blog/posts/_search
{
  "query":{
    "match":{
      "title" : "Age is an issue of mind over matter. If you don't mind, it doesn't matter."
    }
  }
}


For above query, you will get following response.

{
   "took": 2,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 1,
      "max_score": 0.35347727,
      "hits": [
         {
            "_index": "blog",
            "_type": "posts",
            "_id": "1",
            "_score": 0.35347727,
            "_source": {
               "title": "Age is an issue of mind over matter. If you don't mind, it doesn't matter."
            }
         }
      ]
   }
}






Prevoius                                                 Next                                                 Home

No comments:

Post a Comment