ES 7.0.0的简单使用操作
本文是基于ES 7.0.0的简单使用操作。
大部分是一些RESTFUL API的使用,内容比较基础且杂乱,仅仅是一个记录。
ES 7.0.0版本去除了类型type的概念,每个index下默认创建一个类型_doc
创建索引示例:
创建一个名为laws的索引,指定默认分词器为ik_max_word,并过滤掉文档中的html标签,同时定义文档结构mappingPUT /laws
{
"settings": {
"number_of_shards" : 1,
"number_of_replicas" : 0,
"analysis.analyzer.default.type":"ik_max_word",
"analysis.char_filter":["html_strip"]
},
"mappings": {
"properties": {
"title":{
"type": "text"
},
"DocNo":{
"type": "text"
},
"unit":{
"type": "text"
},
"content":{
"type": "text"
}
}
}
}为每个字段设置分词器analyzer以及搜索分词器search_analyzer
PUT /laws20190718
{
"settings": {
"number_of_shards" : 1,
"number_of_replicas" : 0,
"analysis.char_filter":["html_strip"]
},
"mappings": {
"properties": {
"id":{
"type":"keyword"
},
"title":{
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"strs":{
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"category":{
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"date":{
"type":"date"
}
}
}
}写入单条数据,文档内容含有英文双引号的,需要使用三引号”””包围在文档内容两侧
PUT laws/_doc/1
{
"tid" : "1",
"content" : """<p align="center">2019</p>"""
}批量写入文档数据,文档内容含有英文双引号的,需要使用三引号”””包围在文档内容两侧
PUT /laws/_bulk
{"index":{"_id" : 2}}
{"tid" : "1","content" : """<p align="center">2019</p>"""}
{"index":{"_id" : 3}}
{"tid" : "1","content" : """<p align="center">2020</p>"""}
{"index":{"_id" : 4}}
{"tid" : "1","content" : """<p align="center">2021</p>"""}简单搜索match
参数:from 从指定的偏移量中提取搜索结果,默认为 0
参数:size 返回搜索结果条数,默认为 10GET /laws/_search?from=0&size=10
{
"query": {
"match": {
"content": "采购"
}
}
}ik分词模式
- ik_smart 最粗粒度的拆分
- ik_max_word 将文本做最细粒度的拆分,会穷尽各种可能的词语组合
结巴分词模式
- jieba_search 倾向于完整、顺序的切分,类似于ik_smart
- jieba_index 倾向于分出更多可能的词,类似于ik_max_word
测试分词引擎
GET _analyze
{
"text" : "我爱中华人民共和国",
"analyzer": "ik_smart"
}
ik_smart返回结果
{
"tokens" : [
{
"token" : "我",
"start_offset" : 0,
"end_offset" : 1,
"type" : "CN_CHAR",
"position" : 0
},
{
"token" : "爱",
"start_offset" : 1,
"end_offset" : 2,
"type" : "CN_CHAR",
"position" : 1
},
{
"token" : "中华人民共和国",
"start_offset" : 2,
"end_offset" : 9,
"type" : "CN_WORD",
"position" : 2
}
]
}
GET _analyze
{
"text" : "我爱中华人民共和国",
"analyzer": "ik_max_word"
}
ik_max_word返回结果
{
"tokens" : [
{
"token" : "我",
"start_offset" : 0,
"end_offset" : 1,
"type" : "CN_CHAR",
"position" : 0
},
{
"token" : "爱",
"start_offset" : 1,
"end_offset" : 2,
"type" : "CN_CHAR",
"position" : 1
},
{
"token" : "中华人民共和国",
"start_offset" : 2,
"end_offset" : 9,
"type" : "CN_WORD",
"position" : 2
},
{
"token" : "中华人民",
"start_offset" : 2,
"end_offset" : 6,
"type" : "CN_WORD",
"position" : 3
},
{
"token" : "中华",
"start_offset" : 2,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 4
},
{
"token" : "华人",
"start_offset" : 3,
"end_offset" : 5,
"type" : "CN_WORD",
"position" : 5
},
{
"token" : "人民共和国",
"start_offset" : 4,
"end_offset" : 9,
"type" : "CN_WORD",
"position" : 6
},
{
"token" : "人民",
"start_offset" : 4,
"end_offset" : 6,
"type" : "CN_WORD",
"position" : 7
},
{
"token" : "共和国",
"start_offset" : 6,
"end_offset" : 9,
"type" : "CN_WORD",
"position" : 8
},
{
"token" : "共和",
"start_offset" : 6,
"end_offset" : 8,
"type" : "CN_WORD",
"position" : 9
},
{
"token" : "国",
"start_offset" : 8,
"end_offset" : 9,
"type" : "CN_CHAR",
"position" : 10
}
]
}
GET _analyze
{
"text" : "我爱中华人民共和国",
"analyzer": "jieba_index"
}
jieba_index返回结果
{
"tokens" : [
{
"token" : "我爱",
"start_offset" : 0,
"end_offset" : 2,
"type" : "word",
"position" : 0
},
{
"token" : "中华",
"start_offset" : 2,
"end_offset" : 4,
"type" : "word",
"position" : 1
},
{
"token" : "中华人民共和国",
"start_offset" : 2,
"end_offset" : 9,
"type" : "word",
"position" : 1
},
{
"token" : "华人",
"start_offset" : 3,
"end_offset" : 5,
"type" : "word",
"position" : 1
},
{
"token" : "人民",
"start_offset" : 4,
"end_offset" : 6,
"type" : "word",
"position" : 2
},
{
"token" : "共和",
"start_offset" : 6,
"end_offset" : 8,
"type" : "word",
"position" : 3
},
{
"token" : "共和国",
"start_offset" : 6,
"end_offset" : 9,
"type" : "word",
"position" : 3
}
]
}
GET _analyze
{
"text" : "我爱中华人民共和国",
"analyzer": "jieba_search"
}
jieba_search返回结果
{
"tokens" : [
{
"token" : "我爱",
"start_offset" : 0,
"end_offset" : 2,
"type" : "word",
"position" : 0
},
{
"token" : "中华人民共和国",
"start_offset" : 2,
"end_offset" : 9,
"type" : "word",
"position" : 1
}
]
}多字段检索multi_match
GET /lawss/_search
{
"query": {
"multi_match": {
"query": "spark",
"fields": ["title","strs"]
}
}
}结果关键词高亮
GET /lawss/_search
{
"query": {
"multi_match": {
"query": "数据仓库",
"fields": ["title","strs"]
}
},
"highlight": {
"pre_tags": ["<b>"],
"post_tags": ["</b>"],
"fields": {
"title": {},
"strs": {}
}
}
}简单SQL查询
POST /_sql
{
"query": "SELECT title,category FROM lawss WHERE date > '2018-01-01'"
}
# 加入format=txt参数可以将json结果转为表格形式
# 支持的返回格式:csv、json(默认)、tsv、txt、yaml、cbor(二进制)、smile(二进制)
# 设置“fetch_size”数值可控制返回记录数(可在SQL语句中添加LIMIT控制返回记录数)
# 默认每次请求提取1000条记录
POST /_sql?format=txt
{
"query": "SELECT title,category FROM lawss WHERE date > '2018-01-01'",
"fetch_size" : 5
}索引别名,重建索引时可不影响现有业务正常运行。官方文档
# 添加别名
POST /_aliases
{
"actions": [
{
"add": {
"index": "lawss",
"alias": "laws"
}
}
]
}
# 删除别名
POST /_aliases
{
"actions": [
{
"remove": {
"index": "lawss",
"alias": "laws"
}
}
]
}重建索引reindex
需要预先创建好新索引的设置以及映射等。官方文档# 将索引"lawss"重建至索引"laws20190718"
POST _reindex
{
"source": {
"index": "lawss"
},
"dest": {
"index": "laws20190718"
}
}自定义检索返回字段
如下所示,只返回ID、NAME、TYPE这三个字段的数据GET /audit_law/_search
{
"query": {
"multi_match": {
"query": "项目",
"fields": []
}
},
"_source": ["ID","NAME","TYPE"]
}多条件组合查询
GET /audit_law/_search
{
"query": {
"bool": {
"must": [
{"multi_match": {"query": "专项资金","fields": []}}
],
"filter": [
{"term": {"RELEASE_ORGAN": {"value": "财政"}}},
{"term": {"REGION_NAME": {"value": "全国"}}},
{"term": {"NAME": {"value": "财政"}}},
{"term": {"DOCUMENT_NO": {"value": "2018年12月28日"}}} ,
{"term": {"INDUSTRY_NAME": "教育"}},
{"terms": {"LAW_TYPE_CODE": ["10805","10303"]}}
]
}
},
"sort": [
{
"PUBLISH_TIME": {
"order": "desc"
}
}
],
"_source": ["ID","NAME","TYPE","INDUSTRY_NAME", "RELEASE_ORGAN","DOCUMENT_NO","REGION_NAME"]
}