Elasticsearch Query 101

Back in my startup life at Tataatsu Idealabs, I was given an opportunity to design Elasticsearch for one of our new products. Fortunately Elasticsearch was not new to our company but for me. Since we had used earlier for other product I could grab some basic idea about how it works though not much details around it, however it is pretty clean and serving it purpose well for it.

My first steps towards learning are nevertheless to say the wonderful Elasticsearch documentation and their sample snippet. Second step of-course to analyze my end user search pattern and my data.

Having done these exercises, I was quite confident enough to design new schema and type for our new product and I did too. Since the day I implemented that, I thought of writing a blog about Elasticsearch query 101, but had a chance after few years, so bad, but still hope it will be useful to many. Most of the snippet here used with Elasticsearch version < 1.0 and latest one is mostly same. Note* All the keywords and type definition are mentioned here are solely belongs to Tataatsu Idealabs Pvt. Ltd [code language="javascript"] 1)give bucket (count) by date field for every day { "query" : { "match" : { "_all":"u.s." } }, "facets" : { "volume_by_day" : { "date_histogram" : { "field" : "crdate", "interval" : "day" } } }, "sort":{"crdate":"desc"}, "fields":["crdate","content"] } 2) facets with multi match query for multiple fields { "query" : { "filtered":{ "query":{ "multi_match":{ "fields":["title^10","content"], "query":"u.s." } }, "filter":{ "range":{"crdate":{"from":"2013-01-01" } } } } }, "facets" : { "volume_by_day" : { "date_histogram" : { "field" : "crdate", "interval" : "day" } } }, "fields":["title","content"], "highlight":{ "fields":{ "content":{"fragment_size":50} } } } 3) query with facet and exclude search string with bool query, also highlight n chars using fragment_size { "query" : { "filtered":{ "query":{ "bool":{ "must":[ {"multi_match":{ "fields":["title^10","content"], "query":"u.s." }} ], "must_not":[ {"multi_match":{ "fields":["title^10","content"], "query":"US" }} ] } }, "filter":{ "range":{"crdate":{"from":"2013-01-01" } } } } }, "facets" : { "volume_by_day" : { "date_histogram" : { "field" : "crdate", "interval" : "day" } } }, "fields":["title","content"], "highlight":{ "fields":{ "content":{"fragment_size":50} } } } 4)return child records if the parent type field maches only. { "query" : { "filtered":{ "query":{ "has_parent":{ "parent_type":"prj", "query":{"match":{"pid":"1"}} } } } } } 5)return parent record if its chile matches a search string. { "query" : { "filtered":{ "query":{ "bool":{ "must":[ {"multi_match":{ "fields":["title^10"], "query":"feed"}}, { "has_child" : { "child_type":"doc", "query":{"match":{"content":"U.S."}} } } ], "must_not":[ ] } } } } } 6) aggregated term count bucket { "query" : { "match_all" : { } }, "facets" : { "term frequency":{ "terms": {"fields":["tagl.tag","tago.tag","tagp.tag"]} } } } 7) how to query if you indexed using nested(array) type. { "query" : { "nested":{ "path": "docmeta", "query":{ "bool":{ "must":[ {"match":{ "docmeta.tagl.tag":"new york"}} ] } } } } } 8) with global settings, facets will search the all the documents does not consider the query string. _cache will cache the result. POST _search { "facets": { "0": { "date_histogram": { "field": "crdate", "interval": "1d" }, "global": true, "facet_filter": { "fquery": { "query": { "filtered": { "query": { "query_string": { "query": "u.s." } }, "filter": { "and": { "filters": [ { "terms": { "_type": [ "doc" ] } }, { "terms": { "pid": [ "1","6" ] } } ], "_cache": true } } } } } } } }, "size": 0 } 9) with decay and score function to boost the result which has numeric field POST _search { "query": { "function_score": { "functions": [ { "gauss": { "did": { "origin": "10", "scale": "20" } }, "gauss": { "id": { "origin": "10", "scale": "20" } } } ], "query": { "match": { "content": "U.S." } }, "score_mode": "multiply" } }, "fields": [ "did" ] } 10) custom score with filters, advanced POST _search { "query": { "custom_filters_score": { "query": { "match": { "content": "stocks" }}, "filters": [ { "filter": { "term": { "doc.did": "74" } }, "boost": 2 }, { "filter": { "term": { "doc.did": "68" } }, "boost": 3 } ] } } } 11) recent document (actual script [script": "(0.08 / ((3.16*pow(10,-11)) * abs(now - doc['date'].date.getMillis()) + 0.05)) + 1.0")]) POST _search { "query": { "custom_filters_score": { "query": { "match": { "content": "u.s." }}, "params": { "now": "20140301" }, "filters": [ { "filter": { "exists": { "field": "crdate" } }, "script": "(abs(now - doc['crdate'].value))" } ] } }, "fields": [ "crdate" ], "size": 40 } 12) sample parent child query from "posts" index and return if child type(rating) match term query. curl -XPOST localhost:9200/posts/post/_search -d '{ "query": { "filtered": { "query": { "text": {"title": "bolivia"} }, "filter":{ "has_child": { "type": "rating", "query" : { "filtered": { "query": { "match_all": {}}, "filter" : { "and": [ {"term": {"user_id": 1234}}, {"range": {"rating": {"gt" : 3}}} ] } } } } } } } }' 13) simple parent child query. POST _search { "query": { "has_child": { "type": "docmeta", "query" : { "filtered": { "query": { "match_all": {}}, "filter" : { "and": [ {"term": {"did": "1"}} ] } } } } } } 14)parent child type qury with minimum condition should meet. POST _search { "query": { "bool": { "must": [ { "text": { "content": "ibm" } } ], "should": [ { "prefix": { "title": "ibm" } }, { "has_child": { "type": "docmeta", "query": { "prefix": { "stype.type": "s" } } } } ], "minimum_number_should_match" : 1 } } } 15) phrase query which maintain the full text in order. POST _search { "query": { "bool": { "must": [ { "text": { "name": "smith" } } ], "should": [ { "has_child": { "type": "email", "query": { "match_phrase": { "stype.title": "rss bucket5" } } } } ], "minimum_number_should_match" : 1 } } } 16)more complex parent child query with various filters. POST _search { "query": { "filtered": { "query": { "bool": { "should": [ { "query_string": { "query": "ibm" } } ] } }, "filter": { "bool": { "must": [ { "terms": { "_type": [ "doc" ] } }, { "terms": { "pid": [ 1 ] } }, { "range": { "crdate": { "gte": null, "lte": null } } }, { "has_child": { "type": "docmeta", "query": { "match_phrase": { "tago.tag": "apple" } } } } ], "_cache": true } } } }, "size": 50, "fields": [ "title", "content" ] } 17) highlight wiht parent child query, so that only exact match would highlight. POST _search { "query": { "filtered": { "query": { "bool": { "should": [ { "query_string": { "query": "microsoft corp" } } ] } }, "filter": { "bool": { "must": [ { "terms": { "_type": [ "doc" ] } }, { "terms": { "pid": [ 1 ] } }, { "range": { "crdate": { "gte": null, "lte": null } } }, { "has_child": { "type": "docmeta", "query": { "match_phrase": { "tago.tag": "microsoft corp" } } } } ], "_cache": true } } } }, "size": 50, "fields": [ "title", "content" ], "highlight": { "fields": { "content": { "fragment_size": 100, "highlight_query": { "match_phrase": { "content": "microsoft corp" } } } } } } 18) more specific parent child query, with multiple or condition for entity, field shoudl be not_analyzed, POST _search { "query": { "filtered": { "query": { "bool": { "should": [ { "query_string": { "query": "microsoft corp" } } ] } }, "filter": { "bool": { "must": [ { "terms": { "_type": [ "doc" ] } }, { "terms": { "pid": [ 1 ] } }, { "range": { "crdate": { "gte": null, "lte": null } } }, { "has_child": { "type": "docmeta", "query": { "filtered": { "query": {"match_all": {}}, "filter": { "or": { "filters": [ { "term": { "tagp.tag": "anju gosalia" } }, { "term": { "tagkw.tag": "software" } } ], "_cache": true } } } } } } ], "_cache": true } } } }, "size": 50, "fields": [ "title", "content" ], "highlight": { "fields": { "content": { "fragment_size": 100, "highlight_query": { "match_phrase": { "content": "microsoft" } } },"title" :{} } } } 19) using multi match approach POST _search { "query": { "filtered": { "query": { "bool": { "should": [ { "query_string": { "query": "microsoft corp" } } ] } }, "filter": { "bool": { "must": [ { "terms": { "_type": [ "doc" ] } }, { "terms": { "pid": [ 1 ] } }, { "range": { "crdate": { "gte": null, "lte": null } } }, { "has_child": { "type": "docmeta", "query": { "multi_match": { "query": "rss bucket1", "fields": ["_all"], "operator" : "and" } } } } ], "_cache": true } } } }, "size": 50, "fields": [ "title", "content" ], "highlight": { "fields": { "content": { "fragment_size": 100, "highlight_query": { "match_phrase": { "content": "microsoft corp" } } } } } } [/code]