aboutsummaryrefslogtreecommitdiff
path: root/solr-8.1.1/example/example-DIH/solr/atom
diff options
context:
space:
mode:
authorserver <brownptcdash@gmail.com>2019-12-10 18:12:37 -0500
committerserver <brownptcdash@gmail.com>2019-12-10 18:12:37 -0500
commit7478e610d99d1f2fb383ecbfa0b70d72eae27f81 (patch)
tree59486701838067351f00580315c05690f36a39ce /solr-8.1.1/example/example-DIH/solr/atom
parent096718ee546afbd7568bf8ec9e23dca0556d814d (diff)
solr changes
Diffstat (limited to 'solr-8.1.1/example/example-DIH/solr/atom')
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml35
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt54
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema106
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt17
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml64
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt29
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt1
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/core.properties1
8 files changed, 0 insertions, 307 deletions
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
deleted file mode 100644
index b7de812d0..000000000
--- a/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
+++ /dev/null
@@ -1,35 +0,0 @@
-<dataConfig>
- <dataSource type="URLDataSource"/>
- <document>
-
- <entity name="stackoverflow"
- url="https://stackoverflow.com/feeds/tag/solr"
- processor="XPathEntityProcessor"
- forEach="/feed|/feed/entry"
- transformer="HTMLStripTransformer,RegexTransformer">
-
- <!-- Pick this value up from the feed level and apply to all documents -->
- <field column="lastchecked_dt" xpath="/feed/updated" commonField="true"/>
-
- <!-- Keep only the final numeric part of the URL -->
- <field column="id" xpath="/feed/entry/id" regex=".*/" replaceWith=""/>
-
- <field column="title" xpath="/feed/entry/title"/>
- <field column="author" xpath="/feed/entry/author/name"/>
- <field column="category" xpath="/feed/entry/category/@term"/>
- <field column="link" xpath="/feed/entry/link[@rel='alternate']/@href"/>
-
- <!-- Use transformers to convert HTML into plain text.
- There is also an UpdateRequestProcess to trim remaining spaces.
- -->
- <field column="summary" xpath="/feed/entry/summary" stripHTML="true" regex="( |\n)+" replaceWith=" "/>
-
- <!-- Ignore namespaces when matching XPath -->
- <field column="rank" xpath="/feed/entry/rank"/>
-
- <field column="published_dt" xpath="/feed/entry/published"/>
- <field column="updated_dt" xpath="/feed/entry/updated"/>
- </entity>
-
- </document>
-</dataConfig>
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt
deleted file mode 100644
index 2c164c0b2..000000000
--- a/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema b/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema
deleted file mode 100644
index 58751520d..000000000
--- a/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema
+++ /dev/null
@@ -1,106 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<schema name="example-DIH-atom" version="1.6">
- <uniqueKey>id</uniqueKey>
-
- <field name="id" type="string" indexed="true" stored="true" required="true"/>
- <field name="title" type="text_en_splitting" indexed="true" stored="true"/>
- <field name="author" type="string" indexed="true" stored="true"/>
- <field name="category" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="link" type="string" indexed="true" stored="true"/>
- <field name="summary" type="text_en_splitting" indexed="true" stored="true"/>
- <field name="rank" type="pint" indexed="true" stored="true"/>
-
- <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
-
- <!-- Catch-all field, aggregating all "useful to search as text" fields via the copyField instructions -->
- <field name="text" type="text_en_splitting" indexed="true" stored="false" multiValued="true"/>
-
- <field name="urls" type="url_only" indexed="true" stored="false"/>
-
-
- <copyField source="id" dest="text"/>
- <copyField source="title" dest="text"/>
- <copyField source="author" dest="text"/>
- <copyField source="category" dest="text"/>
- <copyField source="summary" dest="text"/>
-
- <!-- extract URLs from summary for faceting -->
- <copyField source="summary" dest="urls"/>
-
- <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/>
- <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
- <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
-
-
- <!-- A text field with defaults appropriate for English, plus
- aggressive word-splitting and autophrase features enabled.
- This field is just like text_en, except it adds
- WordDelimiterFilter to enable splitting and matching of
- words on case-change, alpha numeric boundaries, and
- non-alphanumeric chars. This means certain compound word
- cases will work, for example query "wi fi" will match
- document "WiFi" or "wi-fi".
- -->
- <fieldType name="text_en_splitting" class="solr.TextField"
- positionIncrementGap="100" autoGeneratePhraseQueries="true">
- <analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
- <!-- Case insensitive stop word removal. -->
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
- <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
- catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.PorterStemFilterFactory"/>
- <filter class="solr.FlattenGraphFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
- <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
- catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
- <filter class="solr.PorterStemFilterFactory"/>
- </analyzer>
- </fieldType>
-
- <!-- Field type that extracts URLs from the text.
- As the stored representation is not changed, it is only useful for faceting.
- It is not terribly useful for searching URLs either, as there are too many special symbols.
- -->
- <fieldType name="url_only" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
- <filter class="solr.TypeTokenFilterFactory" types="url_types.txt" useWhitelist="true"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.KeywordTokenizerFactory"/>
- </analyzer>
- </fieldType>
-
-</schema>
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt
deleted file mode 100644
index 1303e42a0..000000000
--- a/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-lucene
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml b/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml
deleted file mode 100644
index f78511354..000000000
--- a/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml
+++ /dev/null
@@ -1,64 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!--
- This is a DEMO configuration, highlighting elements
- specifically needed to get this example running
- such as libraries and request handler specifics.
-
- It uses defaults or does not define most of production-level settings
- such as various caches or auto-commit policies.
-
- See Solr Reference Guide and other examples for
- more details on a well configured solrconfig.xml
- https://lucene.apache.org/solr/guide/the-well-configured-solr-instance.html
--->
-<config>
-
- <!-- Controls what version of Lucene various components of Solr
- adhere to. Generally, you want to use the latest version to
- get all bug fixes and improvements. It is highly recommended
- that you fully re-index after changing this setting as it can
- affect both how text is indexed and queried.
- -->
- <luceneMatchVersion>8.1.1</luceneMatchVersion>
-
- <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar"/>
-
- <requestHandler name="/select" class="solr.SearchHandler">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="df">text</str>
- <!-- Change from JSON to XML format (the default prior to Solr 7.0)
- <str name="wt">xml</str>
- -->
- </lst>
- </requestHandler>
-
- <requestHandler name="/dataimport" class="solr.DataImportHandler">
- <lst name="defaults">
- <str name="config">atom-data-config.xml</str>
- <str name="processor">trim_text</str>
- </lst>
- </requestHandler>
-
- <updateProcessor class="solr.processor.TrimFieldUpdateProcessorFactory" name="trim_text">
- <str name="typeName">text_en_splitting</str>
- </updateProcessor>
-
-</config>
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt
deleted file mode 100644
index eab4ee875..000000000
--- a/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-#some test synonym mappings unlikely to appear in real input text
-aaafoo => aaabar
-bbbfoo => bbbfoo bbbbar
-cccfoo => cccbar cccbaz
-fooaaa,baraaa,bazaaa
-
-# Some synonym groups specific to this example
-GB,gib,gigabyte,gigabytes
-MB,mib,megabyte,megabytes
-Television, Televisions, TV, TVs
-#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
-#after us won't split it into two words.
-
-# Synonym mappings can be used for spelling correction too
-pixima => pixma
-
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt
deleted file mode 100644
index 808f31384..000000000
--- a/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt
+++ /dev/null
@@ -1 +0,0 @@
-<URL>
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/core.properties b/solr-8.1.1/example/example-DIH/solr/atom/core.properties
deleted file mode 100644
index 8b1378917..000000000
--- a/solr-8.1.1/example/example-DIH/solr/atom/core.properties
+++ /dev/null
@@ -1 +0,0 @@
-