diff options
author | server <brownptcdash@gmail.com> | 2019-12-10 18:12:37 -0500 |
---|---|---|
committer | server <brownptcdash@gmail.com> | 2019-12-10 18:12:37 -0500 |
commit | 7478e610d99d1f2fb383ecbfa0b70d72eae27f81 (patch) | |
tree | 59486701838067351f00580315c05690f36a39ce /solr-8.1.1/example/example-DIH/solr/atom | |
parent | 096718ee546afbd7568bf8ec9e23dca0556d814d (diff) |
solr changes
Diffstat (limited to 'solr-8.1.1/example/example-DIH/solr/atom')
8 files changed, 0 insertions, 307 deletions
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml deleted file mode 100644 index b7de812d0..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml +++ /dev/null @@ -1,35 +0,0 @@ -<dataConfig> - <dataSource type="URLDataSource"/> - <document> - - <entity name="stackoverflow" - url="https://stackoverflow.com/feeds/tag/solr" - processor="XPathEntityProcessor" - forEach="/feed|/feed/entry" - transformer="HTMLStripTransformer,RegexTransformer"> - - <!-- Pick this value up from the feed level and apply to all documents --> - <field column="lastchecked_dt" xpath="/feed/updated" commonField="true"/> - - <!-- Keep only the final numeric part of the URL --> - <field column="id" xpath="/feed/entry/id" regex=".*/" replaceWith=""/> - - <field column="title" xpath="/feed/entry/title"/> - <field column="author" xpath="/feed/entry/author/name"/> - <field column="category" xpath="/feed/entry/category/@term"/> - <field column="link" xpath="/feed/entry/link[@rel='alternate']/@href"/> - - <!-- Use transformers to convert HTML into plain text. - There is also an UpdateRequestProcess to trim remaining spaces. - --> - <field column="summary" xpath="/feed/entry/summary" stripHTML="true" regex="( |\n)+" replaceWith=" "/> - - <!-- Ignore namespaces when matching XPath --> - <field column="rank" xpath="/feed/entry/rank"/> - - <field column="published_dt" xpath="/feed/entry/published"/> - <field column="updated_dt" xpath="/feed/entry/updated"/> - </entity> - - </document> -</dataConfig> diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt deleted file mode 100644 index 2c164c0b2..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -# Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -such -that -the -their -then -there -these -they -this -to -was -will -with diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema b/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema deleted file mode 100644 index 58751520d..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/managed-schema +++ /dev/null @@ -1,106 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<schema name="example-DIH-atom" version="1.6"> - <uniqueKey>id</uniqueKey> - - <field name="id" type="string" indexed="true" stored="true" required="true"/> - <field name="title" type="text_en_splitting" indexed="true" stored="true"/> - <field name="author" type="string" indexed="true" stored="true"/> - <field name="category" type="string" indexed="true" stored="true" multiValued="true"/> - <field name="link" type="string" indexed="true" stored="true"/> - <field name="summary" type="text_en_splitting" indexed="true" stored="true"/> - <field name="rank" type="pint" indexed="true" stored="true"/> - - <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> - - <!-- Catch-all field, aggregating all "useful to search as text" fields via the copyField instructions --> - <field name="text" type="text_en_splitting" indexed="true" stored="false" multiValued="true"/> - - <field name="urls" type="url_only" indexed="true" stored="false"/> - - - <copyField source="id" dest="text"/> - <copyField source="title" dest="text"/> - <copyField source="author" dest="text"/> - <copyField source="category" dest="text"/> - <copyField source="summary" dest="text"/> - - <!-- extract URLs from summary for faceting --> - <copyField source="summary" dest="urls"/> - - <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/> - <fieldType name="pint" class="solr.IntPointField" docValues="true"/> - <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> - - - <!-- A text field with defaults appropriate for English, plus - aggressive word-splitting and autophrase features enabled. - This field is just like text_en, except it adds - WordDelimiterFilter to enable splitting and matching of - words on case-change, alpha numeric boundaries, and - non-alphanumeric chars. This means certain compound word - cases will work, for example query "wi fi" will match - document "WiFi" or "wi-fi". - --> - <fieldType name="text_en_splitting" class="solr.TextField" - positionIncrementGap="100" autoGeneratePhraseQueries="true"> - <analyzer type="index"> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <!-- in this example, we will only use synonyms at query time - <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> - --> - <!-- Case insensitive stop word removal. --> - <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> - <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" - catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> - <filter class="solr.PorterStemFilterFactory"/> - <filter class="solr.FlattenGraphFilterFactory"/> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.WhitespaceTokenizerFactory"/> - <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> - <filter class="solr.StopFilterFactory" - ignoreCase="true" - words="lang/stopwords_en.txt" - /> - <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" - catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> - <filter class="solr.LowerCaseFilterFactory"/> - <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> - <filter class="solr.PorterStemFilterFactory"/> - </analyzer> - </fieldType> - - <!-- Field type that extracts URLs from the text. - As the stored representation is not changed, it is only useful for faceting. - It is not terribly useful for searching URLs either, as there are too many special symbols. - --> - <fieldType name="url_only" class="solr.TextField" positionIncrementGap="100"> - <analyzer type="index"> - <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/> - <filter class="solr.TypeTokenFilterFactory" types="url_types.txt" useWhitelist="true"/> - </analyzer> - <analyzer type="query"> - <tokenizer class="solr.KeywordTokenizerFactory"/> - </analyzer> - </fieldType> - -</schema> diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt deleted file mode 100644 index 1303e42a0..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/protwords.txt +++ /dev/null @@ -1,17 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# Use a protected word file to protect against the stemmer reducing two -# unrelated words to the same base word. - -lucene diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml b/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml deleted file mode 100644 index f78511354..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/solrconfig.xml +++ /dev/null @@ -1,64 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- - This is a DEMO configuration, highlighting elements - specifically needed to get this example running - such as libraries and request handler specifics. - - It uses defaults or does not define most of production-level settings - such as various caches or auto-commit policies. - - See Solr Reference Guide and other examples for - more details on a well configured solrconfig.xml - https://lucene.apache.org/solr/guide/the-well-configured-solr-instance.html ---> -<config> - - <!-- Controls what version of Lucene various components of Solr - adhere to. Generally, you want to use the latest version to - get all bug fixes and improvements. It is highly recommended - that you fully re-index after changing this setting as it can - affect both how text is indexed and queried. - --> - <luceneMatchVersion>8.1.1</luceneMatchVersion> - - <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar"/> - - <requestHandler name="/select" class="solr.SearchHandler"> - <lst name="defaults"> - <str name="echoParams">explicit</str> - <str name="df">text</str> - <!-- Change from JSON to XML format (the default prior to Solr 7.0) - <str name="wt">xml</str> - --> - </lst> - </requestHandler> - - <requestHandler name="/dataimport" class="solr.DataImportHandler"> - <lst name="defaults"> - <str name="config">atom-data-config.xml</str> - <str name="processor">trim_text</str> - </lst> - </requestHandler> - - <updateProcessor class="solr.processor.TrimFieldUpdateProcessorFactory" name="trim_text"> - <str name="typeName">text_en_splitting</str> - </updateProcessor> - -</config> diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt deleted file mode 100644 index eab4ee875..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/synonyms.txt +++ /dev/null @@ -1,29 +0,0 @@ -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -#some test synonym mappings unlikely to appear in real input text -aaafoo => aaabar -bbbfoo => bbbfoo bbbbar -cccfoo => cccbar cccbaz -fooaaa,baraaa,bazaaa - -# Some synonym groups specific to this example -GB,gib,gigabyte,gigabytes -MB,mib,megabyte,megabytes -Television, Televisions, TV, TVs -#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming -#after us won't split it into two words. - -# Synonym mappings can be used for spelling correction too -pixima => pixma - diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt b/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt deleted file mode 100644 index 808f31384..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/conf/url_types.txt +++ /dev/null @@ -1 +0,0 @@ -<URL> diff --git a/solr-8.1.1/example/example-DIH/solr/atom/core.properties b/solr-8.1.1/example/example-DIH/solr/atom/core.properties deleted file mode 100644 index 8b1378917..000000000 --- a/solr-8.1.1/example/example-DIH/solr/atom/core.properties +++ /dev/null @@ -1 +0,0 @@ - |