diff options
author | kimdahey <claire_kim1@brown.edu> | 2020-01-16 11:31:41 -0500 |
---|---|---|
committer | kimdahey <claire_kim1@brown.edu> | 2020-01-16 11:31:41 -0500 |
commit | 6be0e19ed0bd13f3796f542affa5a2e52674650c (patch) | |
tree | 1be222ea9341ecd8020fad3149035fa650a8a07f /solr-8.3.1/example/example-DIH/solr/atom/conf/managed-schema | |
parent | 5cde81d8c6b4dcd8d0796f8669b668763957f395 (diff) | |
parent | e410cde0e430553002d4e1a2f64364b57b65fdbc (diff) |
merged w master
Diffstat (limited to 'solr-8.3.1/example/example-DIH/solr/atom/conf/managed-schema')
-rw-r--r-- | solr-8.3.1/example/example-DIH/solr/atom/conf/managed-schema | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/solr-8.3.1/example/example-DIH/solr/atom/conf/managed-schema b/solr-8.3.1/example/example-DIH/solr/atom/conf/managed-schema new file mode 100644 index 000000000..58751520d --- /dev/null +++ b/solr-8.3.1/example/example-DIH/solr/atom/conf/managed-schema @@ -0,0 +1,106 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<schema name="example-DIH-atom" version="1.6"> + <uniqueKey>id</uniqueKey> + + <field name="id" type="string" indexed="true" stored="true" required="true"/> + <field name="title" type="text_en_splitting" indexed="true" stored="true"/> + <field name="author" type="string" indexed="true" stored="true"/> + <field name="category" type="string" indexed="true" stored="true" multiValued="true"/> + <field name="link" type="string" indexed="true" stored="true"/> + <field name="summary" type="text_en_splitting" indexed="true" stored="true"/> + <field name="rank" type="pint" indexed="true" stored="true"/> + + <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> + + <!-- Catch-all field, aggregating all "useful to search as text" fields via the copyField instructions --> + <field name="text" type="text_en_splitting" indexed="true" stored="false" multiValued="true"/> + + <field name="urls" type="url_only" indexed="true" stored="false"/> + + + <copyField source="id" dest="text"/> + <copyField source="title" dest="text"/> + <copyField source="author" dest="text"/> + <copyField source="category" dest="text"/> + <copyField source="summary" dest="text"/> + + <!-- extract URLs from summary for faceting --> + <copyField source="summary" dest="urls"/> + + <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/> + <fieldType name="pint" class="solr.IntPointField" docValues="true"/> + <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> + + + <!-- A text field with defaults appropriate for English, plus + aggressive word-splitting and autophrase features enabled. + This field is just like text_en, except it adds + WordDelimiterFilter to enable splitting and matching of + words on case-change, alpha numeric boundaries, and + non-alphanumeric chars. This means certain compound word + cases will work, for example query "wi fi" will match + document "WiFi" or "wi-fi". + --> + <fieldType name="text_en_splitting" class="solr.TextField" + positionIncrementGap="100" autoGeneratePhraseQueries="true"> + <analyzer type="index"> + <tokenizer class="solr.WhitespaceTokenizerFactory"/> + <!-- in this example, we will only use synonyms at query time + <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> + --> + <!-- Case insensitive stop word removal. --> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> + <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" + catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> + <filter class="solr.PorterStemFilterFactory"/> + <filter class="solr.FlattenGraphFilterFactory"/> + </analyzer> + <analyzer type="query"> + <tokenizer class="solr.WhitespaceTokenizerFactory"/> + <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> + <filter class="solr.StopFilterFactory" + ignoreCase="true" + words="lang/stopwords_en.txt" + /> + <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" + catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> + <filter class="solr.PorterStemFilterFactory"/> + </analyzer> + </fieldType> + + <!-- Field type that extracts URLs from the text. + As the stored representation is not changed, it is only useful for faceting. + It is not terribly useful for searching URLs either, as there are too many special symbols. + --> + <fieldType name="url_only" class="solr.TextField" positionIncrementGap="100"> + <analyzer type="index"> + <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/> + <filter class="solr.TypeTokenFilterFactory" types="url_types.txt" useWhitelist="true"/> + </analyzer> + <analyzer type="query"> + <tokenizer class="solr.KeywordTokenizerFactory"/> + </analyzer> + </fieldType> + +</schema> |