diff options
author | Mohammad Amoush <47069173+mamoush34@users.noreply.github.com> | 2020-01-19 15:15:53 +0300 |
---|---|---|
committer | Mohammad Amoush <47069173+mamoush34@users.noreply.github.com> | 2020-01-19 15:15:53 +0300 |
commit | 7683e1fbb53fe683c0d04e537d89fb53d768e852 (patch) | |
tree | d81eebcd5a129550a49fdfc852b8bb6220907a1a /solr-8.3.1/example/example-DIH/solr/tika | |
parent | f4382d73eec75f7d7f4bfe6eae3fb1efa128a021 (diff) | |
parent | aff9cc02750eb032ade98d77cf9ff45677063fc8 (diff) |
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into webcam_mohammad
Diffstat (limited to 'solr-8.3.1/example/example-DIH/solr/tika')
4 files changed, 142 insertions, 0 deletions
diff --git a/solr-8.3.1/example/example-DIH/solr/tika/conf/managed-schema b/solr-8.3.1/example/example-DIH/solr/tika/conf/managed-schema new file mode 100644 index 000000000..b90f314ff --- /dev/null +++ b/solr-8.3.1/example/example-DIH/solr/tika/conf/managed-schema @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<schema name="example-DIH-tika" version="1.6"> + + <uniqueKey>id</uniqueKey> + + <field name="id" type="string" indexed="true" stored="true"/> + <field name="author" type="text_simple" indexed="true" stored="true"/> + <field name="title" type="text_simple" indexed="true" stored="true" multiValued="true"/> + <field name="format" type="string" indexed="true" stored="true"/> + + <!-- field "text" is searchable but it is not stored to save space --> + <field name="text" type="text_simple" indexed="true" stored="false" multiValued="true"/> + + + <!-- Uncomment the dynamicField definition to catch any other fields + that may have been declared in the DIH configuration. + This allows to speed up prototyping. + --> + <!-- <dynamicField name="*" type="string" indexed="true" stored="true" multiValued="true"/> --> + + <!-- The StrField type is not analyzed, but is indexed/stored verbatim. --> + <fieldType name="string" class="solr.StrField" sortMissingLast="true"/> + + + <!-- A basic text field that has reasonable, generic + cross-language defaults: it tokenizes with StandardTokenizer, + and down cases. It does not deal with stopwords or other issues. + See other examples for alternative definitions. + --> + <fieldType name="text_simple" class="solr.TextField" positionIncrementGap="100"> + <analyzer> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.LowerCaseFilterFactory"/> + </analyzer> + </fieldType> + +</schema>
\ No newline at end of file diff --git a/solr-8.3.1/example/example-DIH/solr/tika/conf/solrconfig.xml b/solr-8.3.1/example/example-DIH/solr/tika/conf/solrconfig.xml new file mode 100644 index 000000000..cc189e2fe --- /dev/null +++ b/solr-8.3.1/example/example-DIH/solr/tika/conf/solrconfig.xml @@ -0,0 +1,61 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + This is a DEMO configuration highlighting elements + specifically needed to get this example running + such as libraries and request handler specifics. + + It uses defaults or does not define most of production-level settings + such as various caches or auto-commit policies. + + See Solr Reference Guide and other examples for + more details on a well configured solrconfig.xml + https://lucene.apache.org/solr/guide/the-well-configured-solr-instance.html +--> + +<config> + <!-- Controls what version of Lucene various components of Solr + adhere to. Generally, you want to use the latest version to + get all bug fixes and improvements. It is highly recommended + that you fully re-index after changing this setting as it can + affect both how text is indexed and queried. + --> + <luceneMatchVersion>8.3.1</luceneMatchVersion> + + <!-- Load Data Import Handler and Apache Tika (extraction) libraries --> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar"/> + <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar"/> + + <requestHandler name="/select" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <str name="df">text</str> + <!-- Change from JSON to XML format (the default prior to Solr 7.0) + <str name="wt">xml</str> + --> + </lst> + </requestHandler> + + <requestHandler name="/dataimport" class="solr.DataImportHandler"> + <lst name="defaults"> + <str name="config">tika-data-config.xml</str> + </lst> + </requestHandler> + +</config> diff --git a/solr-8.3.1/example/example-DIH/solr/tika/conf/tika-data-config.xml b/solr-8.3.1/example/example-DIH/solr/tika/conf/tika-data-config.xml new file mode 100644 index 000000000..5286fc418 --- /dev/null +++ b/solr-8.3.1/example/example-DIH/solr/tika/conf/tika-data-config.xml @@ -0,0 +1,26 @@ +<dataConfig> + <dataSource type="BinFileDataSource"/> + <document> + <entity name="file" processor="FileListEntityProcessor" dataSource="null" + baseDir="${solr.install.dir}/example/exampledocs" fileName=".*pdf" + rootEntity="false"> + + <field column="file" name="id"/> + + <entity name="pdf" processor="TikaEntityProcessor" + url="${file.fileAbsolutePath}" format="text"> + + <field column="Author" name="author" meta="true"/> + <!-- in the original PDF, the Author meta-field name is upper-cased, + but in Solr schema it is lower-cased + --> + + <field column="title" name="title" meta="true"/> + <field column="dc:format" name="format" meta="true"/> + + <field column="text" name="text"/> + + </entity> + </entity> + </document> +</dataConfig> diff --git a/solr-8.3.1/example/example-DIH/solr/tika/core.properties b/solr-8.3.1/example/example-DIH/solr/tika/core.properties new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/solr-8.3.1/example/example-DIH/solr/tika/core.properties @@ -0,0 +1 @@ + |