Replaced troublesome pyes integration with direct calls made to elasticsearch rest api
This commit is contained in:
36
common/djangoapps/search/analyzer.json
Normal file
36
common/djangoapps/search/analyzer.json
Normal file
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"analyzer": {
|
||||
|
||||
"transcript_analyzer": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["protected", "asciifolding", "custom_word_delimiter", "lowercase", "custom_stemmer", "shingle"],
|
||||
"char_filter": ["custom_mapping"]
|
||||
}
|
||||
},
|
||||
|
||||
"filter" : {
|
||||
|
||||
"custom_word_delimiter":{
|
||||
"type": "word_delimiter",
|
||||
"preserve_original": "true"
|
||||
},
|
||||
|
||||
"custom_stemmer": {
|
||||
"type": "stemmer",
|
||||
"name": "english"
|
||||
},
|
||||
|
||||
"protected": {
|
||||
"type": "keyword_marker",
|
||||
"keywords_path": "protectedWords.txt"
|
||||
}
|
||||
},
|
||||
|
||||
"char_filter": {
|
||||
"custom_mapping": {
|
||||
"type": "mapping",
|
||||
"mappings": ["\n=>-"]
|
||||
}
|
||||
}
|
||||
}
|
||||
104
common/djangoapps/search/es_requests.py
Normal file
104
common/djangoapps/search/es_requests.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import requests
|
||||
import json
|
||||
|
||||
|
||||
class ElasticDatabase:
|
||||
|
||||
def __init__(self, url, index_settings_file, *args):
|
||||
"""
|
||||
Will initialize elastic search object with any indices specified by args
|
||||
|
||||
specifically the url should be something of the form `http://localhost:9200`
|
||||
importantly do not include a slash at the end of the url name.
|
||||
|
||||
args should be a list of dictionaries, each dictionary specifying a JSON mapping
|
||||
to be used for a specific type.
|
||||
|
||||
Example Dictionary:
|
||||
{"index": "transcript", "type": "6-002x", "mapping":
|
||||
{
|
||||
"properties" : {
|
||||
"searchable_text": {
|
||||
"type": "string",
|
||||
"store": "yes",
|
||||
"index": "analyzed"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Eventually we will support different configuration files for different indices, but
|
||||
since this is only indexing transcripts right now it seems excessive"""
|
||||
|
||||
self.url = url
|
||||
self.args = args
|
||||
self.index_settings = open(index_settings_file, 'rb').read()
|
||||
|
||||
def parse_args(self):
|
||||
for mapping in self.args:
|
||||
try:
|
||||
json_mapping = json.loads(mapping)
|
||||
except ValueError:
|
||||
print "Badly formed JSON args, please check your mappings file"
|
||||
break
|
||||
|
||||
try:
|
||||
index = json_mapping['index']
|
||||
type_ = json_mapping['type']
|
||||
mapping = json_mapping['mapping']
|
||||
self.setup_index(index)
|
||||
self.setup_type(index, type_, mapping)
|
||||
except KeyError:
|
||||
print "Could not find needed keys. Keys found: "
|
||||
print mapping.keys()
|
||||
continue
|
||||
|
||||
def setup_type(self, index, type_, json_mapping):
|
||||
"""
|
||||
json_mapping should be a dictionary starting at the properties level of a mapping.
|
||||
|
||||
The type level will be added, so if you include it things will break. The purpose of this
|
||||
is to encourage loose coupling between types and mappings for better code
|
||||
"""
|
||||
|
||||
full_url = "/".join([self.url, index, type_, "_mapping"])
|
||||
json_put_body = {type_: json_mapping}
|
||||
requests.put(full_url, data=json_put_body)
|
||||
|
||||
def has_index(self, index):
|
||||
"""Checks to see if a given index exists in the database returns existance boolean,
|
||||
|
||||
If this returns something other than a 200 or a 404 something is wrong and so we error"""
|
||||
full_url = "/".join([self.url, index])
|
||||
status = requests.head(full_url).status_code
|
||||
if status == 200:
|
||||
return True
|
||||
if status == 404:
|
||||
return False
|
||||
else:
|
||||
print "Got an unexpected reponse code: " + str(status)
|
||||
raise
|
||||
|
||||
def setup_index(self, index):
|
||||
"""Creates a new elasticsearch index, returns the response it gets"""
|
||||
full_url = "/".join(self.url, index) + "/"
|
||||
return requests.put(full_url, data=self.index_settings)
|
||||
|
||||
def index_data(self, index, type_, id_, data):
|
||||
"""Data should be passed in as a dictionary, assumes it matches the given mapping"""
|
||||
full_url = "/".join([self.url, index, type_, id_])
|
||||
response = requests.put(full_url, json.dumps(data))
|
||||
return json.loads(response)['ok']
|
||||
|
||||
def get_index_settings(self, index):
|
||||
"""Returns the current settings of """
|
||||
full_url = "/".join([self.url, index, "_settings"])
|
||||
return json.loads(requests.get(full_url)._content)
|
||||
|
||||
def get_type_mapping(self, index, type_):
|
||||
full_url = "/".join([self.url, index, type_, "_mapping"])
|
||||
return json.loads(requests.get(full_url)._content)
|
||||
|
||||
def index_data(self, index, type_, id_, json_data):
|
||||
full_url = "/".join([self.url, index, type_, id_])
|
||||
requests.put(full_url, data=json_data)
|
||||
@@ -5,20 +5,7 @@
|
||||
"index": "analyzed",
|
||||
"store": "yes",
|
||||
"type": "string",
|
||||
"term_vector": "with_positions_offsets"
|
||||
},
|
||||
|
||||
"phonetic_text": {
|
||||
"boost": 1.0,
|
||||
"index": "analyzed",
|
||||
"store": "yes",
|
||||
"type": "string",
|
||||
"term_vector": "with_positions_offsets"
|
||||
},
|
||||
|
||||
"uuid": {
|
||||
"index": "not_analyzed",
|
||||
"store": "yes",
|
||||
"type": "string"
|
||||
"term_vector": "with_positions_offsets",
|
||||
"analyzer": "transcript_analyzer"
|
||||
}
|
||||
}
|
||||
15
common/djangoapps/search/protectedWords.txt
Normal file
15
common/djangoapps/search/protectedWords.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
"gauss",
|
||||
"stokes",
|
||||
"navier",
|
||||
"einstein",
|
||||
"goddard",
|
||||
"oppenheimer",
|
||||
"bloch",
|
||||
"hawkings",
|
||||
"newton",
|
||||
"bohr",
|
||||
"darwin",
|
||||
"planck",
|
||||
"rontgen",
|
||||
"tesla",
|
||||
"franklin"
|
||||
8
common/djangoapps/search/settings.json
Normal file
8
common/djangoapps/search/settings.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"settings": {
|
||||
"index": {
|
||||
"number_of_replicas": 2,
|
||||
"number_of_shards": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user