aboutsummaryrefslogtreecommitdiff
path: root/solr-8.3.1/example/example-DIH/solr/tika
diff options
context:
space:
mode:
authorMohammad Amoush <47069173+mamoush34@users.noreply.github.com>2020-01-19 15:15:53 +0300
committerMohammad Amoush <47069173+mamoush34@users.noreply.github.com>2020-01-19 15:15:53 +0300
commit7683e1fbb53fe683c0d04e537d89fb53d768e852 (patch)
treed81eebcd5a129550a49fdfc852b8bb6220907a1a /solr-8.3.1/example/example-DIH/solr/tika
parentf4382d73eec75f7d7f4bfe6eae3fb1efa128a021 (diff)
parentaff9cc02750eb032ade98d77cf9ff45677063fc8 (diff)
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into webcam_mohammad
Diffstat (limited to 'solr-8.3.1/example/example-DIH/solr/tika')
-rw-r--r--solr-8.3.1/example/example-DIH/solr/tika/conf/managed-schema54
-rw-r--r--solr-8.3.1/example/example-DIH/solr/tika/conf/solrconfig.xml61
-rw-r--r--solr-8.3.1/example/example-DIH/solr/tika/conf/tika-data-config.xml26
-rw-r--r--solr-8.3.1/example/example-DIH/solr/tika/core.properties1
4 files changed, 142 insertions, 0 deletions
diff --git a/solr-8.3.1/example/example-DIH/solr/tika/conf/managed-schema b/solr-8.3.1/example/example-DIH/solr/tika/conf/managed-schema
new file mode 100644
index 000000000..b90f314ff
--- /dev/null
+++ b/solr-8.3.1/example/example-DIH/solr/tika/conf/managed-schema
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="example-DIH-tika" version="1.6">
+
+ <uniqueKey>id</uniqueKey>
+
+ <field name="id" type="string" indexed="true" stored="true"/>
+ <field name="author" type="text_simple" indexed="true" stored="true"/>
+ <field name="title" type="text_simple" indexed="true" stored="true" multiValued="true"/>
+ <field name="format" type="string" indexed="true" stored="true"/>
+
+ <!-- field "text" is searchable but it is not stored to save space -->
+ <field name="text" type="text_simple" indexed="true" stored="false" multiValued="true"/>
+
+
+ <!-- Uncomment the dynamicField definition to catch any other fields
+ that may have been declared in the DIH configuration.
+ This allows to speed up prototyping.
+ -->
+ <!-- <dynamicField name="*" type="string" indexed="true" stored="true" multiValued="true"/> -->
+
+ <!-- The StrField type is not analyzed, but is indexed/stored verbatim. -->
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
+
+
+ <!-- A basic text field that has reasonable, generic
+ cross-language defaults: it tokenizes with StandardTokenizer,
+ and down cases. It does not deal with stopwords or other issues.
+ See other examples for alternative definitions.
+ -->
+ <fieldType name="text_simple" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+</schema> \ No newline at end of file
diff --git a/solr-8.3.1/example/example-DIH/solr/tika/conf/solrconfig.xml b/solr-8.3.1/example/example-DIH/solr/tika/conf/solrconfig.xml
new file mode 100644
index 000000000..cc189e2fe
--- /dev/null
+++ b/solr-8.3.1/example/example-DIH/solr/tika/conf/solrconfig.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is a DEMO configuration highlighting elements
+ specifically needed to get this example running
+ such as libraries and request handler specifics.
+
+ It uses defaults or does not define most of production-level settings
+ such as various caches or auto-commit policies.
+
+ See Solr Reference Guide and other examples for
+ more details on a well configured solrconfig.xml
+ https://lucene.apache.org/solr/guide/the-well-configured-solr-instance.html
+-->
+
+<config>
+ <!-- Controls what version of Lucene various components of Solr
+ adhere to. Generally, you want to use the latest version to
+ get all bug fixes and improvements. It is highly recommended
+ that you fully re-index after changing this setting as it can
+ affect both how text is indexed and queried.
+ -->
+ <luceneMatchVersion>8.3.1</luceneMatchVersion>
+
+ <!-- Load Data Import Handler and Apache Tika (extraction) libraries -->
+ <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar"/>
+ <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar"/>
+
+ <requestHandler name="/select" class="solr.SearchHandler">
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <str name="df">text</str>
+ <!-- Change from JSON to XML format (the default prior to Solr 7.0)
+ <str name="wt">xml</str>
+ -->
+ </lst>
+ </requestHandler>
+
+ <requestHandler name="/dataimport" class="solr.DataImportHandler">
+ <lst name="defaults">
+ <str name="config">tika-data-config.xml</str>
+ </lst>
+ </requestHandler>
+
+</config>
diff --git a/solr-8.3.1/example/example-DIH/solr/tika/conf/tika-data-config.xml b/solr-8.3.1/example/example-DIH/solr/tika/conf/tika-data-config.xml
new file mode 100644
index 000000000..5286fc418
--- /dev/null
+++ b/solr-8.3.1/example/example-DIH/solr/tika/conf/tika-data-config.xml
@@ -0,0 +1,26 @@
+<dataConfig>
+ <dataSource type="BinFileDataSource"/>
+ <document>
+ <entity name="file" processor="FileListEntityProcessor" dataSource="null"
+ baseDir="${solr.install.dir}/example/exampledocs" fileName=".*pdf"
+ rootEntity="false">
+
+ <field column="file" name="id"/>
+
+ <entity name="pdf" processor="TikaEntityProcessor"
+ url="${file.fileAbsolutePath}" format="text">
+
+ <field column="Author" name="author" meta="true"/>
+ <!-- in the original PDF, the Author meta-field name is upper-cased,
+ but in Solr schema it is lower-cased
+ -->
+
+ <field column="title" name="title" meta="true"/>
+ <field column="dc:format" name="format" meta="true"/>
+
+ <field column="text" name="text"/>
+
+ </entity>
+ </entity>
+ </document>
+</dataConfig>
diff --git a/solr-8.3.1/example/example-DIH/solr/tika/core.properties b/solr-8.3.1/example/example-DIH/solr/tika/core.properties
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/solr-8.3.1/example/example-DIH/solr/tika/core.properties
@@ -0,0 +1 @@
+