diff options
Diffstat (limited to 'solr-8.1.1/example/files/conf/update-script.js')
-rw-r--r-- | solr-8.1.1/example/files/conf/update-script.js | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/solr-8.1.1/example/files/conf/update-script.js b/solr-8.1.1/example/files/conf/update-script.js new file mode 100644 index 000000000..2589968b5 --- /dev/null +++ b/solr-8.1.1/example/files/conf/update-script.js @@ -0,0 +1,115 @@ +function get_class(name) { + var clazz; + try { + // Java8 Nashorn + clazz = eval("Java.type(name).class"); + } catch(e) { + // Java7 Rhino + clazz = eval("Packages."+name); + } + + return clazz; +} + +function processAdd(cmd) { + + doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument + var id = doc.getFieldValue("id"); + logger.info("update-script#processAdd: id=" + id); + + // The idea here is to use the file's content_type value to + // simplify into user-friendly values, such that types of, say, image/jpeg and image/tiff + // are in an "Images" facet + + var ct = doc.getFieldValue("content_type"); + if (ct) { + // strip off semicolon onward + var semicolon_index = ct.indexOf(';'); + if (semicolon_index != -1) { + ct = ct.substring(0,semicolon_index); + } + // and split type/subtype + var ct_type = ct.substring(0,ct.indexOf('/')); + var ct_subtype = ct.substring(ct.indexOf('/')+1); + + var doc_type; + switch(true) { + case /^application\/rtf/.test(ct) || /wordprocessing/.test(ct): + doc_type = "doc"; + break; + + case /html/.test(ct): + doc_type = "html"; + break; + + case /^image\/.*/.test(ct): + doc_type = "image"; + break; + + case /presentation|powerpoint/.test(ct): + doc_type = "presentation"; + break; + + case /spreadsheet|excel/.test(ct): + doc_type = "spreadsheet"; + break; + + case /^application\/pdf/.test(ct): + doc_type = "pdf"; + break; + + case /^text\/plain/.test(ct): + doc_type = "text" + break; + + default: + break; + } + + // TODO: error handling needed? What if there is no slash? + if(doc_type) { doc.setField("doc_type", doc_type); } + doc.setField("content_type_type_s", ct_type); + doc.setField("content_type_subtype_s", ct_subtype); + } + + var content = doc.getFieldValue("content"); + if (!content) { + return; //No content found, so we are done here + } + + var analyzer = + req.getCore().getLatestSchema() + .getFieldTypeByName("text_email_url") + .getIndexAnalyzer(); + + var token_stream = + analyzer.tokenStream("content", content); + var term_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.CharTermAttribute")); + var type_att = token_stream.getAttribute(get_class("org.apache.lucene.analysis.tokenattributes.TypeAttribute")); + token_stream.reset(); + while (token_stream.incrementToken()) { + doc.addField(type_att.type().replace(/\<|\>/g,'').toLowerCase()+"_ss", term_att.toString()); + } + token_stream.end(); + token_stream.close(); +} + +function processDelete(cmd) { + // no-op +} + +function processMergeIndexes(cmd) { + // no-op +} + +function processCommit(cmd) { + // no-op +} + +function processRollback(cmd) { + // no-op +} + +function finish() { + // no-op +} |