Replaced troublesome pyes integration with direct calls made to elasticsearch rest api
This commit is contained in:
36
common/djangoapps/search/analyzer.json
Normal file
36
common/djangoapps/search/analyzer.json
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
{
|
||||||
|
"analyzer": {
|
||||||
|
|
||||||
|
"transcript_analyzer": {
|
||||||
|
"type": "custom",
|
||||||
|
"tokenizer": "standard",
|
||||||
|
"filter": ["protected", "asciifolding", "custom_word_delimiter", "lowercase", "custom_stemmer", "shingle"],
|
||||||
|
"char_filter": ["custom_mapping"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
"filter" : {
|
||||||
|
|
||||||
|
"custom_word_delimiter":{
|
||||||
|
"type": "word_delimiter",
|
||||||
|
"preserve_original": "true"
|
||||||
|
},
|
||||||
|
|
||||||
|
"custom_stemmer": {
|
||||||
|
"type": "stemmer",
|
||||||
|
"name": "english"
|
||||||
|
},
|
||||||
|
|
||||||
|
"protected": {
|
||||||
|
"type": "keyword_marker",
|
||||||
|
"keywords_path": "protectedWords.txt"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
"char_filter": {
|
||||||
|
"custom_mapping": {
|
||||||
|
"type": "mapping",
|
||||||
|
"mappings": ["\n=>-"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
104
common/djangoapps/search/es_requests.py
Normal file
104
common/djangoapps/search/es_requests.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class ElasticDatabase:
|
||||||
|
|
||||||
|
def __init__(self, url, index_settings_file, *args):
|
||||||
|
"""
|
||||||
|
Will initialize elastic search object with any indices specified by args
|
||||||
|
|
||||||
|
specifically the url should be something of the form `http://localhost:9200`
|
||||||
|
importantly do not include a slash at the end of the url name.
|
||||||
|
|
||||||
|
args should be a list of dictionaries, each dictionary specifying a JSON mapping
|
||||||
|
to be used for a specific type.
|
||||||
|
|
||||||
|
Example Dictionary:
|
||||||
|
{"index": "transcript", "type": "6-002x", "mapping":
|
||||||
|
{
|
||||||
|
"properties" : {
|
||||||
|
"searchable_text": {
|
||||||
|
"type": "string",
|
||||||
|
"store": "yes",
|
||||||
|
"index": "analyzed"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Eventually we will support different configuration files for different indices, but
|
||||||
|
since this is only indexing transcripts right now it seems excessive"""
|
||||||
|
|
||||||
|
self.url = url
|
||||||
|
self.args = args
|
||||||
|
self.index_settings = open(index_settings_file, 'rb').read()
|
||||||
|
|
||||||
|
def parse_args(self):
|
||||||
|
for mapping in self.args:
|
||||||
|
try:
|
||||||
|
json_mapping = json.loads(mapping)
|
||||||
|
except ValueError:
|
||||||
|
print "Badly formed JSON args, please check your mappings file"
|
||||||
|
break
|
||||||
|
|
||||||
|
try:
|
||||||
|
index = json_mapping['index']
|
||||||
|
type_ = json_mapping['type']
|
||||||
|
mapping = json_mapping['mapping']
|
||||||
|
self.setup_index(index)
|
||||||
|
self.setup_type(index, type_, mapping)
|
||||||
|
except KeyError:
|
||||||
|
print "Could not find needed keys. Keys found: "
|
||||||
|
print mapping.keys()
|
||||||
|
continue
|
||||||
|
|
||||||
|
def setup_type(self, index, type_, json_mapping):
|
||||||
|
"""
|
||||||
|
json_mapping should be a dictionary starting at the properties level of a mapping.
|
||||||
|
|
||||||
|
The type level will be added, so if you include it things will break. The purpose of this
|
||||||
|
is to encourage loose coupling between types and mappings for better code
|
||||||
|
"""
|
||||||
|
|
||||||
|
full_url = "/".join([self.url, index, type_, "_mapping"])
|
||||||
|
json_put_body = {type_: json_mapping}
|
||||||
|
requests.put(full_url, data=json_put_body)
|
||||||
|
|
||||||
|
def has_index(self, index):
|
||||||
|
"""Checks to see if a given index exists in the database returns existance boolean,
|
||||||
|
|
||||||
|
If this returns something other than a 200 or a 404 something is wrong and so we error"""
|
||||||
|
full_url = "/".join([self.url, index])
|
||||||
|
status = requests.head(full_url).status_code
|
||||||
|
if status == 200:
|
||||||
|
return True
|
||||||
|
if status == 404:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
print "Got an unexpected reponse code: " + str(status)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def setup_index(self, index):
|
||||||
|
"""Creates a new elasticsearch index, returns the response it gets"""
|
||||||
|
full_url = "/".join(self.url, index) + "/"
|
||||||
|
return requests.put(full_url, data=self.index_settings)
|
||||||
|
|
||||||
|
def index_data(self, index, type_, id_, data):
|
||||||
|
"""Data should be passed in as a dictionary, assumes it matches the given mapping"""
|
||||||
|
full_url = "/".join([self.url, index, type_, id_])
|
||||||
|
response = requests.put(full_url, json.dumps(data))
|
||||||
|
return json.loads(response)['ok']
|
||||||
|
|
||||||
|
def get_index_settings(self, index):
|
||||||
|
"""Returns the current settings of """
|
||||||
|
full_url = "/".join([self.url, index, "_settings"])
|
||||||
|
return json.loads(requests.get(full_url)._content)
|
||||||
|
|
||||||
|
def get_type_mapping(self, index, type_):
|
||||||
|
full_url = "/".join([self.url, index, type_, "_mapping"])
|
||||||
|
return json.loads(requests.get(full_url)._content)
|
||||||
|
|
||||||
|
def index_data(self, index, type_, id_, json_data):
|
||||||
|
full_url = "/".join([self.url, index, type_, id_])
|
||||||
|
requests.put(full_url, data=json_data)
|
||||||
@@ -5,20 +5,7 @@
|
|||||||
"index": "analyzed",
|
"index": "analyzed",
|
||||||
"store": "yes",
|
"store": "yes",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"term_vector": "with_positions_offsets"
|
"term_vector": "with_positions_offsets",
|
||||||
},
|
"analyzer": "transcript_analyzer"
|
||||||
|
|
||||||
"phonetic_text": {
|
|
||||||
"boost": 1.0,
|
|
||||||
"index": "analyzed",
|
|
||||||
"store": "yes",
|
|
||||||
"type": "string",
|
|
||||||
"term_vector": "with_positions_offsets"
|
|
||||||
},
|
|
||||||
|
|
||||||
"uuid": {
|
|
||||||
"index": "not_analyzed",
|
|
||||||
"store": "yes",
|
|
||||||
"type": "string"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
15
common/djangoapps/search/protectedWords.txt
Normal file
15
common/djangoapps/search/protectedWords.txt
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
"gauss",
|
||||||
|
"stokes",
|
||||||
|
"navier",
|
||||||
|
"einstein",
|
||||||
|
"goddard",
|
||||||
|
"oppenheimer",
|
||||||
|
"bloch",
|
||||||
|
"hawkings",
|
||||||
|
"newton",
|
||||||
|
"bohr",
|
||||||
|
"darwin",
|
||||||
|
"planck",
|
||||||
|
"rontgen",
|
||||||
|
"tesla",
|
||||||
|
"franklin"
|
||||||
8
common/djangoapps/search/settings.json
Normal file
8
common/djangoapps/search/settings.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"settings": {
|
||||||
|
"index": {
|
||||||
|
"number_of_replicas": 2,
|
||||||
|
"number_of_shards": 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user