diff options
author | Sam Wilkins <samwilkins333@gmail.com> | 2019-05-09 22:14:30 -0400 |
---|---|---|
committer | Sam Wilkins <samwilkins333@gmail.com> | 2019-05-09 22:14:30 -0400 |
commit | ea1bffb43f0533cde4cf8a3f34da8b0e25ef6268 (patch) | |
tree | 5016ac7d3bbb37e62a809d738ddc45c3797bab18 | |
parent | 11c6ecdebdfb84246169f1e573c0e47e5336eff6 (diff) | |
parent | 310d4b0f4bb56adaa71e4b42d09e483e4face0f7 (diff) |
merged with master and fixed linter issues
66 files changed, 9386 insertions, 68 deletions
diff --git a/.vscode/launch.json b/.vscode/launch.json index fb91a1080..e92a4949a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -15,6 +15,15 @@ }, { "type": "node", + "request": "attach", + "name": "Typescript Server", + "protocol": "inspector", + "port": 9229, + "localRoot": "${workspaceFolder}", + "remoteRoot": "." + }, + { + "type": "node", "request": "launch", "name": "Mocha Tests", "program": "${workspaceFolder}/node_modules/mocha/bin/_mocha", diff --git a/package.json b/package.json index 943451c3b..147f59c25 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "main": "index.js", "scripts": { "start": "ts-node-dev -- src/server/index.ts", + "debug": "ts-node-dev --inspect -- src/server/index.ts", "build": "webpack --env production", "test": "mocha -r ts-node/register test/**/*.ts", "tsc": "tsc" @@ -166,6 +167,7 @@ "serializr": "^1.5.1", "socket.io": "^2.2.0", "socket.io-client": "^2.2.0", + "solr-node": "^1.1.3", "typescript-collections": "^1.3.2", "url-loader": "^1.1.2", "uuid": "^3.3.2", diff --git a/solr/conf/lang/contractions_ca.txt b/solr/conf/lang/contractions_ca.txt new file mode 100644 index 000000000..307a85f91 --- /dev/null +++ b/solr/conf/lang/contractions_ca.txt @@ -0,0 +1,8 @@ +# Set of Catalan contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +l +m +n +s +t diff --git a/solr/conf/lang/contractions_fr.txt b/solr/conf/lang/contractions_fr.txt new file mode 100644 index 000000000..f1bba51b2 --- /dev/null +++ b/solr/conf/lang/contractions_fr.txt @@ -0,0 +1,15 @@ +# Set of French contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +l +m +t +qu +n +s +j +d +c +jusqu +quoiqu +lorsqu +puisqu diff --git a/solr/conf/lang/contractions_ga.txt b/solr/conf/lang/contractions_ga.txt new file mode 100644 index 000000000..9ebe7fa34 --- /dev/null +++ b/solr/conf/lang/contractions_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +d +m +b diff --git a/solr/conf/lang/contractions_it.txt b/solr/conf/lang/contractions_it.txt new file mode 100644 index 000000000..cac040953 --- /dev/null +++ b/solr/conf/lang/contractions_it.txt @@ -0,0 +1,23 @@ +# Set of Italian contractions for ElisionFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +c +l +all +dall +dell +nell +sull +coll +pell +gl +agl +dagl +degl +negl +sugl +un +m +t +s +v +d diff --git a/solr/conf/lang/hyphenations_ga.txt b/solr/conf/lang/hyphenations_ga.txt new file mode 100644 index 000000000..4d2642cc5 --- /dev/null +++ b/solr/conf/lang/hyphenations_ga.txt @@ -0,0 +1,5 @@ +# Set of Irish hyphenations for StopFilter +# TODO: load this as a resource from the analyzer and sync it in build.xml +h +n +t diff --git a/solr/conf/lang/stemdict_nl.txt b/solr/conf/lang/stemdict_nl.txt new file mode 100644 index 000000000..441072971 --- /dev/null +++ b/solr/conf/lang/stemdict_nl.txt @@ -0,0 +1,6 @@ +# Set of overrides for the dutch stemmer +# TODO: load this as a resource from the analyzer and sync it in build.xml +fiets fiets +bromfiets bromfiets +ei eier +kind kinder diff --git a/solr/conf/lang/stoptags_ja.txt b/solr/conf/lang/stoptags_ja.txt new file mode 100644 index 000000000..71b750845 --- /dev/null +++ b/solr/conf/lang/stoptags_ja.txt @@ -0,0 +1,420 @@ +# +# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. +# +# Any token with a part-of-speech tag that exactly matches those defined in this +# file are removed from the token stream. +# +# Set your own stoptags by uncommenting the lines below. Note that comments are +# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, +# etc. that can be useful for building you own stoptag set. +# +# The entire possible tagset is provided below for convenience. +# +##### +# noun: unclassified nouns +#名詞 +# +# noun-common: Common nouns or nouns where the sub-classification is undefined +#名詞-一般 +# +# noun-proper: Proper nouns where the sub-classification is undefined +#名詞-固有名詞 +# +# noun-proper-misc: miscellaneous proper nouns +#名詞-固有名詞-一般 +# +# noun-proper-person: Personal names where the sub-classification is undefined +#名詞-固有名詞-人名 +# +# noun-proper-person-misc: names that cannot be divided into surname and +# given name; foreign names; names where the surname or given name is unknown. +# e.g. お市の方 +#名詞-固有名詞-人名-一般 +# +# noun-proper-person-surname: Mainly Japanese surnames. +# e.g. 山田 +#名詞-固有名詞-人名-姓 +# +# noun-proper-person-given_name: Mainly Japanese given names. +# e.g. 太郎 +#名詞-固有名詞-人名-名 +# +# noun-proper-organization: Names representing organizations. +# e.g. 通産省, NHK +#名詞-固有名詞-組織 +# +# noun-proper-place: Place names where the sub-classification is undefined +#名詞-固有名詞-地域 +# +# noun-proper-place-misc: Place names excluding countries. +# e.g. アジア, バルセロナ, 京都 +#名詞-固有名詞-地域-一般 +# +# noun-proper-place-country: Country names. +# e.g. 日本, オーストラリア +#名詞-固有名詞-地域-国 +# +# noun-pronoun: Pronouns where the sub-classification is undefined +#名詞-代名詞 +# +# noun-pronoun-misc: miscellaneous pronouns: +# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ +#名詞-代名詞-一般 +# +# noun-pronoun-contraction: Spoken language contraction made by combining a +# pronoun and the particle 'wa'. +# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ +#名詞-代名詞-縮約 +# +# noun-adverbial: Temporal nouns such as names of days or months that behave +# like adverbs. Nouns that represent amount or ratios and can be used adverbially, +# e.g. 金曜, 一月, 午後, 少量 +#名詞-副詞可能 +# +# noun-verbal: Nouns that take arguments with case and can appear followed by +# 'suru' and related verbs (する, できる, なさる, くださる) +# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り +#名詞-サ変接続 +# +# noun-adjective-base: The base form of adjectives, words that appear before な ("na") +# e.g. 健康, 安易, 駄目, だめ +#名詞-形容動詞語幹 +# +# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. +# e.g. 0, 1, 2, 何, 数, 幾 +#名詞-数 +# +# noun-affix: noun affixes where the sub-classification is undefined +#名詞-非自立 +# +# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that +# attach to the base form of inflectional words, words that cannot be classified +# into any of the other categories below. This category includes indefinite nouns. +# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, +# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, +# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, +# わり, 割り, 割, ん-口語/, もん-口語/ +#名詞-非自立-一般 +# +# noun-affix-adverbial: noun affixes that that can behave as adverbs. +# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, +# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, +# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, +# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, +# 儘, 侭, みぎり, 矢先 +#名詞-非自立-副詞可能 +# +# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars +# with the stem よう(だ) ("you(da)"). +# e.g. よう, やう, 様 (よう) +#名詞-非自立-助動詞語幹 +# +# noun-affix-adjective-base: noun affixes that can connect to the indeclinable +# connection form な (aux "da"). +# e.g. みたい, ふう +#名詞-非自立-形容動詞語幹 +# +# noun-special: special nouns where the sub-classification is undefined. +#名詞-特殊 +# +# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is +# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base +# form of inflectional words. +# e.g. そう +#名詞-特殊-助動詞語幹 +# +# noun-suffix: noun suffixes where the sub-classification is undefined. +#名詞-接尾 +# +# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect +# to ガル or タイ and can combine into compound nouns, words that cannot be classified into +# any of the other categories below. In general, this category is more inclusive than +# 接尾語 ("suffix") and is usually the last element in a compound noun. +# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, +# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 +#名詞-接尾-一般 +# +# noun-suffix-person: Suffixes that form nouns and attach to person names more often +# than other nouns. +# e.g. 君, 様, 著 +#名詞-接尾-人名 +# +# noun-suffix-place: Suffixes that form nouns and attach to place names more often +# than other nouns. +# e.g. 町, 市, 県 +#名詞-接尾-地域 +# +# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that +# can appear before スル ("suru"). +# e.g. 化, 視, 分け, 入り, 落ち, 買い +#名詞-接尾-サ変接続 +# +# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, +# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the +# conjunctive form of inflectional words. +# e.g. そう +#名詞-接尾-助動詞語幹 +# +# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive +# form of inflectional words and appear before the copula だ ("da"). +# e.g. 的, げ, がち +#名詞-接尾-形容動詞語幹 +# +# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. +# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) +#名詞-接尾-副詞可能 +# +# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category +# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach +# to numbers. +# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 +#名詞-接尾-助数詞 +# +# noun-suffix-special: Special suffixes that mainly attach to inflecting words. +# e.g. (楽し) さ, (考え) 方 +#名詞-接尾-特殊 +# +# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words +# together. +# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) +#名詞-接続詞的 +# +# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are +# semantically verb-like. +# e.g. ごらん, ご覧, 御覧, 頂戴 +#名詞-動詞非自立的 +# +# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, +# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") +# is いわく ("iwaku"). +#名詞-引用文字列 +# +# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and +# behave like an adjective. +# e.g. 申し訳, 仕方, とんでも, 違い +#名詞-ナイ形容詞語幹 +# +##### +# prefix: unclassified prefixes +#接頭詞 +# +# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) +# excluding numerical expressions. +# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) +#接頭詞-名詞接続 +# +# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb +# in conjunctive form followed by なる/なさる/くださる. +# e.g. お (読みなさい), お (座り) +#接頭詞-動詞接続 +# +# prefix-adjectival: Prefixes that attach to adjectives. +# e.g. お (寒いですねえ), バカ (でかい) +#接頭詞-形容詞接続 +# +# prefix-numerical: Prefixes that attach to numerical expressions. +# e.g. 約, およそ, 毎時 +#接頭詞-数接続 +# +##### +# verb: unclassified verbs +#動詞 +# +# verb-main: +#動詞-自立 +# +# verb-auxiliary: +#動詞-非自立 +# +# verb-suffix: +#動詞-接尾 +# +##### +# adjective: unclassified adjectives +#形容詞 +# +# adjective-main: +#形容詞-自立 +# +# adjective-auxiliary: +#形容詞-非自立 +# +# adjective-suffix: +#形容詞-接尾 +# +##### +# adverb: unclassified adverbs +#副詞 +# +# adverb-misc: Words that can be segmented into one unit and where adnominal +# modification is not possible. +# e.g. あいかわらず, 多分 +#副詞-一般 +# +# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, +# な, する, だ, etc. +# e.g. こんなに, そんなに, あんなに, なにか, なんでも +#副詞-助詞類接続 +# +##### +# adnominal: Words that only have noun-modifying forms. +# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, +# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, +# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き +#連体詞 +# +##### +# conjunction: Conjunctions that can occur independently. +# e.g. が, けれども, そして, じゃあ, それどころか +接続詞 +# +##### +# particle: unclassified particles. +助詞 +# +# particle-case: case particles where the subclassification is undefined. +助詞-格助詞 +# +# particle-case-misc: Case particles. +# e.g. から, が, で, と, に, へ, より, を, の, にて +助詞-格助詞-一般 +# +# particle-case-quote: the "to" that appears after nouns, a person’s speech, +# quotation marks, expressions of decisions from a meeting, reasons, judgements, +# conjectures, etc. +# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) +助詞-格助詞-引用 +# +# particle-case-compound: Compounds of particles and verbs that mainly behave +# like case particles. +# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, +# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, +# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, +# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, +# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, +# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, +# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, +# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ +助詞-格助詞-連語 +# +# particle-conjunctive: +# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, +# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, +# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ +助詞-接続助詞 +# +# particle-dependency: +# e.g. こそ, さえ, しか, すら, は, も, ぞ +助詞-係助詞 +# +# particle-adverbial: +# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, +# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, +# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, +# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, +# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) +助詞-副助詞 +# +# particle-interjective: particles with interjective grammatical roles. +# e.g. (松島) や +助詞-間投助詞 +# +# particle-coordinate: +# e.g. と, たり, だの, だり, とか, なり, や, やら +助詞-並立助詞 +# +# particle-final: +# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, +# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ +助詞-終助詞 +# +# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is +# adverbial, conjunctive, or sentence final. For example: +# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 +# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 +# 「(祈りが届いたせい) か (, 試験に合格した.)」 +# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 +# e.g. か +助詞-副助詞/並立助詞/終助詞 +# +# particle-adnominalizer: The "no" that attaches to nouns and modifies +# non-inflectional words. +助詞-連体化 +# +# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs +# that are giongo, giseigo, or gitaigo. +# e.g. に, と +助詞-副詞化 +# +# particle-special: A particle that does not fit into one of the above classifications. +# This includes particles that are used in Tanka, Haiku, and other poetry. +# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) +助詞-特殊 +# +##### +# auxiliary-verb: +助動詞 +# +##### +# interjection: Greetings and other exclamations. +# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, +# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい +#感動詞 +# +##### +# symbol: unclassified Symbols. +記号 +# +# symbol-misc: A general symbol not in one of the categories below. +# e.g. [○◎@$〒→+] +記号-一般 +# +# symbol-comma: Commas +# e.g. [,、] +記号-読点 +# +# symbol-period: Periods and full stops. +# e.g. [..。] +記号-句点 +# +# symbol-space: Full-width whitespace. +記号-空白 +# +# symbol-open_bracket: +# e.g. [({‘“『【] +記号-括弧開 +# +# symbol-close_bracket: +# e.g. [)}’”』」】] +記号-括弧閉 +# +# symbol-alphabetic: +#記号-アルファベット +# +##### +# other: unclassified other +#その他 +# +# other-interjection: Words that are hard to classify as noun-suffixes or +# sentence-final particles. +# e.g. (だ)ァ +その他-間投 +# +##### +# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. +# e.g. あの, うんと, えと +フィラー +# +##### +# non-verbal: non-verbal sound. +非言語音 +# +##### +# fragment: +#語断片 +# +##### +# unknown: unknown part of speech. +#未知語 +# +##### End of file diff --git a/solr/conf/lang/stopwords_ar.txt b/solr/conf/lang/stopwords_ar.txt new file mode 100644 index 000000000..046829db6 --- /dev/null +++ b/solr/conf/lang/stopwords_ar.txt @@ -0,0 +1,125 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا +من +ومن +منها +منه +في +وفي +فيها +فيه +و +ف +ثم +او +أو +ب +بها +به +ا +أ +اى +اي +أي +أى +لا +ولا +الا +ألا +إلا +لكن +ما +وما +كما +فما +عن +مع +اذا +إذا +ان +أن +إن +انها +أنها +إنها +انه +أنه +إنه +بان +بأن +فان +فأن +وان +وأن +وإن +التى +التي +الذى +الذي +الذين +الى +الي +إلى +إلي +على +عليها +عليه +اما +أما +إما +ايضا +أيضا +كل +وكل +لم +ولم +لن +ولن +هى +هي +هو +وهى +وهي +وهو +فهى +فهي +فهو +انت +أنت +لك +لها +له +هذه +هذا +تلك +ذلك +هناك +كانت +كان +يكون +تكون +وكانت +وكان +غير +بعض +قد +نحو +بين +بينما +منذ +ضمن +حيث +الان +الآن +خلال +بعد +قبل +حتى +عند +عندما +لدى +جميع diff --git a/solr/conf/lang/stopwords_bg.txt b/solr/conf/lang/stopwords_bg.txt new file mode 100644 index 000000000..1ae4ba2ae --- /dev/null +++ b/solr/conf/lang/stopwords_bg.txt @@ -0,0 +1,193 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +а +аз +ако +ала +бе +без +беше +би +бил +била +били +било +близо +бъдат +бъде +бяха +в +вас +ваш +ваша +вероятно +вече +взема +ви +вие +винаги +все +всеки +всички +всичко +всяка +във +въпреки +върху +г +ги +главно +го +д +да +дали +до +докато +докога +дори +досега +доста +е +едва +един +ето +за +зад +заедно +заради +засега +затова +защо +защото +и +из +или +им +има +имат +иска +й +каза +как +каква +какво +както +какъв +като +кога +когато +което +които +кой +който +колко +която +къде +където +към +ли +м +ме +между +мен +ми +мнозина +мога +могат +може +моля +момента +му +н +на +над +назад +най +направи +напред +например +нас +не +него +нея +ни +ние +никой +нито +но +някои +някой +няма +обаче +около +освен +особено +от +отгоре +отново +още +пак +по +повече +повечето +под +поне +поради +после +почти +прави +пред +преди +през +при +пък +първо +с +са +само +се +сега +си +скоро +след +сме +според +сред +срещу +сте +съм +със +също +т +тази +така +такива +такъв +там +твой +те +тези +ти +тн +то +това +тогава +този +той +толкова +точно +трябва +тук +тъй +тя +тях +у +харесва +ч +че +често +чрез +ще +щом +я diff --git a/solr/conf/lang/stopwords_ca.txt b/solr/conf/lang/stopwords_ca.txt new file mode 100644 index 000000000..3da65deaf --- /dev/null +++ b/solr/conf/lang/stopwords_ca.txt @@ -0,0 +1,220 @@ +# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) +a +abans +ací +ah +així +això +al +als +aleshores +algun +alguna +algunes +alguns +alhora +allà +allí +allò +altra +altre +altres +amb +ambdós +ambdues +apa +aquell +aquella +aquelles +aquells +aquest +aquesta +aquestes +aquests +aquí +baix +cada +cadascú +cadascuna +cadascunes +cadascuns +com +contra +d'un +d'una +d'unes +d'uns +dalt +de +del +dels +des +després +dins +dintre +donat +doncs +durant +e +eh +el +els +em +en +encara +ens +entre +érem +eren +éreu +es +és +esta +està +estàvem +estaven +estàveu +esteu +et +etc +ets +fins +fora +gairebé +ha +han +has +havia +he +hem +heu +hi +ho +i +igual +iguals +ja +l'hi +la +les +li +li'n +llavors +m'he +ma +mal +malgrat +mateix +mateixa +mateixes +mateixos +me +mentre +més +meu +meus +meva +meves +molt +molta +moltes +molts +mon +mons +n'he +n'hi +ne +ni +no +nogensmenys +només +nosaltres +nostra +nostre +nostres +o +oh +oi +on +pas +pel +pels +per +però +perquè +poc +poca +pocs +poques +potser +propi +qual +quals +quan +quant +que +què +quelcom +qui +quin +quina +quines +quins +s'ha +s'han +sa +semblant +semblants +ses +seu +seus +seva +seva +seves +si +sobre +sobretot +sóc +solament +sols +son +són +sons +sota +sou +t'ha +t'han +t'he +ta +tal +també +tampoc +tan +tant +tanta +tantes +teu +teus +teva +teves +ton +tons +tot +tota +totes +tots +un +una +unes +uns +us +va +vaig +vam +van +vas +veu +vosaltres +vostra +vostre +vostres diff --git a/solr/conf/lang/stopwords_cz.txt b/solr/conf/lang/stopwords_cz.txt new file mode 100644 index 000000000..53c6097da --- /dev/null +++ b/solr/conf/lang/stopwords_cz.txt @@ -0,0 +1,172 @@ +a +s +k +o +i +u +v +z +dnes +cz +tímto +budeš +budem +byli +jseš +můj +svým +ta +tomto +tohle +tuto +tyto +jej +zda +proč +máte +tato +kam +tohoto +kdo +kteří +mi +nám +tom +tomuto +mít +nic +proto +kterou +byla +toho +protože +asi +ho +naši +napište +re +což +tím +takže +svých +její +svými +jste +aj +tu +tedy +teto +bylo +kde +ke +pravé +ji +nad +nejsou +či +pod +téma +mezi +přes +ty +pak +vám +ani +když +však +neg +jsem +tento +článku +články +aby +jsme +před +pta +jejich +byl +ještě +až +bez +také +pouze +první +vaše +která +nás +nový +tipy +pokud +může +strana +jeho +své +jiné +zprávy +nové +není +vás +jen +podle +zde +už +být +více +bude +již +než +který +by +které +co +nebo +ten +tak +má +při +od +po +jsou +jak +další +ale +si +se +ve +to +jako +za +zpět +ze +do +pro +je +na +atd +atp +jakmile +přičemž +já +on +ona +ono +oni +ony +my +vy +jí +ji +mě +mne +jemu +tomu +těm +těmu +němu +němuž +jehož +jíž +jelikož +jež +jakož +načež diff --git a/solr/conf/lang/stopwords_da.txt b/solr/conf/lang/stopwords_da.txt new file mode 100644 index 000000000..42e6145b9 --- /dev/null +++ b/solr/conf/lang/stopwords_da.txt @@ -0,0 +1,110 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Danish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + +og | and +i | in +jeg | I +det | that (dem. pronoun)/it (pers. pronoun) +at | that (in front of a sentence)/to (with infinitive) +en | a/an +den | it (pers. pronoun)/that (dem. pronoun) +til | to/at/for/until/against/by/of/into, more +er | present tense of "to be" +som | who, as +på | on/upon/in/on/at/to/after/of/with/for, on +de | they +med | with/by/in, along +han | he +af | of/by/from/off/for/in/with/on, off +for | at/for/to/from/by/of/ago, in front/before, because +ikke | not +der | who/which, there/those +var | past tense of "to be" +mig | me/myself +sig | oneself/himself/herself/itself/themselves +men | but +et | a/an/one, one (number), someone/somebody/one +har | present tense of "to have" +om | round/about/for/in/a, about/around/down, if +vi | we +min | my +havde | past tense of "to have" +ham | him +hun | she +nu | now +over | over/above/across/by/beyond/past/on/about, over/past +da | then, when/as/since +fra | from/off/since, off, since +du | you +ud | out +sin | his/her/its/one's +dem | them +os | us/ourselves +op | up +man | you/one +hans | his +hvor | where +eller | or +hvad | what +skal | must/shall etc. +selv | myself/youself/herself/ourselves etc., even +her | here +alle | all/everyone/everybody etc. +vil | will (verb) +blev | past tense of "to stay/to remain/to get/to become" +kunne | could +ind | in +når | when +være | present tense of "to be" +dog | however/yet/after all +noget | something +ville | would +jo | you know/you see (adv), yes +deres | their/theirs +efter | after/behind/according to/for/by/from, later/afterwards +ned | down +skulle | should +denne | this +end | than +dette | this +mit | my/mine +også | also +under | under/beneath/below/during, below/underneath +have | have +dig | you +anden | other +hende | her +mine | my +alt | everything +meget | much/very, plenty of +sit | his, her, its, one's +sine | his, her, its, one's +vor | our +mod | against +disse | these +hvis | if +din | your/yours +nogle | some +hos | by/at +blive | be/become +mange | many +ad | by/through +bliver | present tense of "to be/to become" +hendes | her/hers +været | be +thi | for (conj) +jer | you +sådan | such, like this/like that diff --git a/solr/conf/lang/stopwords_de.txt b/solr/conf/lang/stopwords_de.txt new file mode 100644 index 000000000..86525e7ae --- /dev/null +++ b/solr/conf/lang/stopwords_de.txt @@ -0,0 +1,294 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A German stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | The number of forms in this list is reduced significantly by passing it + | through the German stemmer. + + +aber | but + +alle | all +allem +allen +aller +alles + +als | than, as +also | so +am | an + dem +an | at + +ander | other +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders + +auch | also +auf | on +aus | out of +bei | by +bin | am +bis | until +bist | art +da | there +damit | with it +dann | then + +der | the +den +des +dem +die +das + +daß | that + +derselbe | the same +derselben +denselben +desselben +demselben +dieselbe +dieselben +dasselbe + +dazu | to that + +dein | thy +deine +deinem +deinen +deiner +deines + +denn | because + +derer | of those +dessen | of him + +dich | thee +dir | to thee +du | thou + +dies | this +diese +diesem +diesen +dieser +dieses + + +doch | (several meanings) +dort | (over) there + + +durch | through + +ein | a +eine +einem +einen +einer +eines + +einig | some +einige +einigem +einigen +einiger +einiges + +einmal | once + +er | he +ihn | him +ihm | to him + +es | it +etwas | something + +euer | your +eure +eurem +euren +eurer +eures + +für | for +gegen | towards +gewesen | p.p. of sein +hab | have +habe | have +haben | have +hat | has +hatte | had +hatten | had +hier | here +hin | there +hinter | behind + +ich | I +mich | me +mir | to me + + +ihr | you, to her +ihre +ihrem +ihren +ihrer +ihres +euch | to you + +im | in + dem +in | in +indem | while +ins | in + das +ist | is + +jede | each, every +jedem +jeden +jeder +jedes + +jene | that +jenem +jenen +jener +jenes + +jetzt | now +kann | can + +kein | no +keine +keinem +keinen +keiner +keines + +können | can +könnte | could +machen | do +man | one + +manche | some, many a +manchem +manchen +mancher +manches + +mein | my +meine +meinem +meinen +meiner +meines + +mit | with +muss | must +musste | had to +nach | to(wards) +nicht | not +nichts | nothing +noch | still, yet +nun | now +nur | only +ob | whether +oder | or +ohne | without +sehr | very + +sein | his +seine +seinem +seinen +seiner +seines + +selbst | self +sich | herself + +sie | they, she +ihnen | to them + +sind | are +so | so + +solche | such +solchem +solchen +solcher +solches + +soll | shall +sollte | should +sondern | but +sonst | else +über | over +um | about, around +und | and + +uns | us +unse +unsem +unsen +unser +unses + +unter | under +viel | much +vom | von + dem +von | from +vor | before +während | while +war | was +waren | were +warst | wast +was | what +weg | away, off +weil | because +weiter | further + +welche | which +welchem +welchen +welcher +welches + +wenn | when +werde | will +werden | will +wie | how +wieder | again +will | want +wir | we +wird | will +wirst | willst +wo | where +wollen | want +wollte | wanted +würde | would +würden | would +zu | to +zum | zu + dem +zur | zu + der +zwar | indeed +zwischen | between + diff --git a/solr/conf/lang/stopwords_el.txt b/solr/conf/lang/stopwords_el.txt new file mode 100644 index 000000000..232681f5b --- /dev/null +++ b/solr/conf/lang/stopwords_el.txt @@ -0,0 +1,78 @@ +# Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' +ο +η +το +οι +τα +του +τησ +των +τον +την +και +κι +κ +ειμαι +εισαι +ειναι +ειμαστε +ειστε +στο +στον +στη +στην +μα +αλλα +απο +για +προσ +με +σε +ωσ +παρα +αντι +κατα +μετα +θα +να +δε +δεν +μη +μην +επι +ενω +εαν +αν +τοτε +που +πωσ +ποιοσ +ποια +ποιο +ποιοι +ποιεσ +ποιων +ποιουσ +αυτοσ +αυτη +αυτο +αυτοι +αυτων +αυτουσ +αυτεσ +αυτα +εκεινοσ +εκεινη +εκεινο +εκεινοι +εκεινεσ +εκεινα +εκεινων +εκεινουσ +οπωσ +ομωσ +ισωσ +οσο +οτι diff --git a/solr/conf/lang/stopwords_en.txt b/solr/conf/lang/stopwords_en.txt new file mode 100644 index 000000000..2c164c0b2 --- /dev/null +++ b/solr/conf/lang/stopwords_en.txt @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# a couple of test stopwords to test that the words are really being +# configured from this file: +stopworda +stopwordb + +# Standard english stop words taken from Lucene's StopAnalyzer +a +an +and +are +as +at +be +but +by +for +if +in +into +is +it +no +not +of +on +or +such +that +the +their +then +there +these +they +this +to +was +will +with diff --git a/solr/conf/lang/stopwords_es.txt b/solr/conf/lang/stopwords_es.txt new file mode 100644 index 000000000..487d78c8d --- /dev/null +++ b/solr/conf/lang/stopwords_es.txt @@ -0,0 +1,356 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Spanish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | from, of +la | the, her +que | who, that +el | the +en | in +y | and +a | to +los | the, them +del | de + el +se | himself, from him etc +las | the, them +por | for, by, etc +un | a +para | for +con | with +no | no +una | a +su | his, her +al | a + el + | es from SER +lo | him +como | how +más | more +pero | pero +sus | su plural +le | to him, her +ya | already +o | or + | fue from SER +este | this + | ha from HABER +sí | himself etc +porque | because +esta | this + | son from SER +entre | between + | está from ESTAR +cuando | when +muy | very +sin | without +sobre | on + | ser from SER + | tiene from TENER +también | also +me | me +hasta | until +hay | there is/are +donde | where + | han from HABER +quien | whom, that + | están from ESTAR + | estado from ESTAR +desde | from +todo | all +nos | us +durante | during + | estados from ESTAR +todos | all +uno | a +les | to them +ni | nor +contra | against +otros | other + | fueron from SER +ese | that +eso | that + | había from HABER +ante | before +ellos | they +e | and (variant of y) +esto | this +mí | me +antes | before +algunos | some +qué | what? +unos | a +yo | I +otro | other +otras | other +otra | other +él | he +tanto | so much, many +esa | that +estos | these +mucho | much, many +quienes | who +nada | nothing +muchos | many +cual | who + | sea from SER +poco | few +ella | she +estar | to be + | haber from HABER +estas | these + | estaba from ESTAR + | estamos from ESTAR +algunas | some +algo | something +nosotros | we + + | other forms + +mi | me +mis | mi plural +tú | thou +te | thee +ti | thee +tu | thy +tus | tu plural +ellas | they +nosotras | we +vosotros | you +vosotras | you +os | you +mío | mine +mía | +míos | +mías | +tuyo | thine +tuya | +tuyos | +tuyas | +suyo | his, hers, theirs +suya | +suyos | +suyas | +nuestro | ours +nuestra | +nuestros | +nuestras | +vuestro | yours +vuestra | +vuestros | +vuestras | +esos | those +esas | those + + | forms of estar, to be (not including the infinitive): +estoy +estás +está +estamos +estáis +están +esté +estés +estemos +estéis +estén +estaré +estarás +estará +estaremos +estaréis +estarán +estaría +estarías +estaríamos +estaríais +estarían +estaba +estabas +estábamos +estabais +estaban +estuve +estuviste +estuvo +estuvimos +estuvisteis +estuvieron +estuviera +estuvieras +estuviéramos +estuvierais +estuvieran +estuviese +estuvieses +estuviésemos +estuvieseis +estuviesen +estando +estado +estada +estados +estadas +estad + + | forms of haber, to have (not including the infinitive): +he +has +ha +hemos +habéis +han +haya +hayas +hayamos +hayáis +hayan +habré +habrás +habrá +habremos +habréis +habrán +habría +habrías +habríamos +habríais +habrían +había +habías +habíamos +habíais +habían +hube +hubiste +hubo +hubimos +hubisteis +hubieron +hubiera +hubieras +hubiéramos +hubierais +hubieran +hubiese +hubieses +hubiésemos +hubieseis +hubiesen +habiendo +habido +habida +habidos +habidas + + | forms of ser, to be (not including the infinitive): +soy +eres +es +somos +sois +son +sea +seas +seamos +seáis +sean +seré +serás +será +seremos +seréis +serán +sería +serías +seríamos +seríais +serían +era +eras +éramos +erais +eran +fui +fuiste +fue +fuimos +fuisteis +fueron +fuera +fueras +fuéramos +fuerais +fueran +fuese +fueses +fuésemos +fueseis +fuesen +siendo +sido + | sed also means 'thirst' + + | forms of tener, to have (not including the infinitive): +tengo +tienes +tiene +tenemos +tenéis +tienen +tenga +tengas +tengamos +tengáis +tengan +tendré +tendrás +tendrá +tendremos +tendréis +tendrán +tendría +tendrías +tendríamos +tendríais +tendrían +tenía +tenías +teníamos +teníais +tenían +tuve +tuviste +tuvo +tuvimos +tuvisteis +tuvieron +tuviera +tuvieras +tuviéramos +tuvierais +tuvieran +tuviese +tuvieses +tuviésemos +tuvieseis +tuviesen +teniendo +tenido +tenida +tenidos +tenidas +tened + diff --git a/solr/conf/lang/stopwords_eu.txt b/solr/conf/lang/stopwords_eu.txt new file mode 100644 index 000000000..25f1db934 --- /dev/null +++ b/solr/conf/lang/stopwords_eu.txt @@ -0,0 +1,99 @@ +# example set of basque stopwords +al +anitz +arabera +asko +baina +bat +batean +batek +bati +batzuei +batzuek +batzuetan +batzuk +bera +beraiek +berau +berauek +bere +berori +beroriek +beste +bezala +da +dago +dira +ditu +du +dute +edo +egin +ere +eta +eurak +ez +gainera +gu +gutxi +guzti +haiei +haiek +haietan +hainbeste +hala +han +handik +hango +hara +hari +hark +hartan +hau +hauei +hauek +hauetan +hemen +hemendik +hemengo +hi +hona +honek +honela +honetan +honi +hor +hori +horiei +horiek +horietan +horko +horra +horrek +horrela +horretan +horri +hortik +hura +izan +ni +noiz +nola +non +nondik +nongo +nor +nora +ze +zein +zen +zenbait +zenbat +zer +zergatik +ziren +zituen +zu +zuek +zuen +zuten diff --git a/solr/conf/lang/stopwords_fa.txt b/solr/conf/lang/stopwords_fa.txt new file mode 100644 index 000000000..723641c6d --- /dev/null +++ b/solr/conf/lang/stopwords_fa.txt @@ -0,0 +1,313 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' +انان +نداشته +سراسر +خياه +ايشان +وي +تاكنون +بيشتري +دوم +پس +ناشي +وگو +يا +داشتند +سپس +هنگام +هرگز +پنج +نشان +امسال +ديگر +گروهي +شدند +چطور +ده +و +دو +نخستين +ولي +چرا +چه +وسط +ه +كدام +قابل +يك +رفت +هفت +همچنين +در +هزار +بله +بلي +شايد +اما +شناسي +گرفته +دهد +داشته +دانست +داشتن +خواهيم +ميليارد +وقتيكه +امد +خواهد +جز +اورده +شده +بلكه +خدمات +شدن +برخي +نبود +بسياري +جلوگيري +حق +كردند +نوعي +بعري +نكرده +نظير +نبايد +بوده +بودن +داد +اورد +هست +جايي +شود +دنبال +داده +بايد +سابق +هيچ +همان +انجا +كمتر +كجاست +گردد +كسي +تر +مردم +تان +دادن +بودند +سري +جدا +ندارند +مگر +يكديگر +دارد +دهند +بنابراين +هنگامي +سمت +جا +انچه +خود +دادند +زياد +دارند +اثر +بدون +بهترين +بيشتر +البته +به +براساس +بيرون +كرد +بعضي +گرفت +توي +اي +ميليون +او +جريان +تول +بر +مانند +برابر +باشيم +مدتي +گويند +اكنون +تا +تنها +جديد +چند +بي +نشده +كردن +كردم +گويد +كرده +كنيم +نمي +نزد +روي +قصد +فقط +بالاي +ديگران +اين +ديروز +توسط +سوم +ايم +دانند +سوي +استفاده +شما +كنار +داريم +ساخته +طور +امده +رفته +نخست +بيست +نزديك +طي +كنيد +از +انها +تمامي +داشت +يكي +طريق +اش +چيست +روب +نمايد +گفت +چندين +چيزي +تواند +ام +ايا +با +ان +ايد +ترين +اينكه +ديگري +راه +هايي +بروز +همچنان +پاعين +كس +حدود +مختلف +مقابل +چيز +گيرد +ندارد +ضد +همچون +سازي +شان +مورد +باره +مرسي +خويش +برخوردار +چون +خارج +شش +هنوز +تحت +ضمن +هستيم +گفته +فكر +بسيار +پيش +براي +روزهاي +انكه +نخواهد +بالا +كل +وقتي +كي +چنين +كه +گيري +نيست +است +كجا +كند +نيز +يابد +بندي +حتي +توانند +عقب +خواست +كنند +بين +تمام +همه +ما +باشند +مثل +شد +اري +باشد +اره +طبق +بعد +اگر +صورت +غير +جاي +بيش +ريزي +اند +زيرا +چگونه +بار +لطفا +مي +درباره +من +ديده +همين +گذاري +برداري +علت +گذاشته +هم +فوق +نه +ها +شوند +اباد +همواره +هر +اول +خواهند +چهار +نام +امروز +مان +هاي +قبل +كنم +سعي +تازه +را +هستند +زير +جلوي +عنوان +بود diff --git a/solr/conf/lang/stopwords_fi.txt b/solr/conf/lang/stopwords_fi.txt new file mode 100644 index 000000000..4372c9a05 --- /dev/null +++ b/solr/conf/lang/stopwords_fi.txt @@ -0,0 +1,97 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + +| forms of BE + +olla +olen +olet +on +olemme +olette +ovat +ole | negative form + +oli +olisi +olisit +olisin +olisimme +olisitte +olisivat +olit +olin +olimme +olitte +olivat +ollut +olleet + +en | negation +et +ei +emme +ette +eivät + +|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans +minä minun minut minua minussa minusta minuun minulla minulta minulle | I +sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you +hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she +me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we +te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you +he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they + +tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this +tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that +se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it +nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these +nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those +ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they + +kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who +ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl) +mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what +mitkä | (pl) + +joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which +jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl) + +| conjunctions + +että | that +ja | and +jos | if +koska | because +kuin | than +mutta | but +niin | so +sekä | and +sillä | for +tai | or +vaan | but +vai | or +vaikka | although + + +| prepositions + +kanssa | with +mukaan | according to +noin | about +poikki | across +yli | over, across + +| other + +kun | when +niin | so +nyt | now +itse | self + diff --git a/solr/conf/lang/stopwords_fr.txt b/solr/conf/lang/stopwords_fr.txt new file mode 100644 index 000000000..749abae68 --- /dev/null +++ b/solr/conf/lang/stopwords_fr.txt @@ -0,0 +1,186 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A French stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +au | a + le +aux | a + les +avec | with +ce | this +ces | these +dans | with +de | of +des | de + les +du | de + le +elle | she +en | `of them' etc +et | and +eux | them +il | he +je | I +la | the +le | the +leur | their +lui | him +ma | my (fem) +mais | but +me | me +même | same; as in moi-même (myself) etc +mes | me (pl) +moi | me +mon | my (masc) +ne | not +nos | our (pl) +notre | our +nous | we +on | one +ou | where +par | by +pas | not +pour | for +qu | que before vowel +que | that +qui | who +sa | his, her (fem) +se | oneself +ses | his (pl) +son | his, her (masc) +sur | on +ta | thy (fem) +te | thee +tes | thy (pl) +toi | thee +ton | thy (masc) +tu | thou +un | a +une | a +vos | your (pl) +votre | your +vous | you + + | single letter forms + +c | c' +d | d' +j | j' +l | l' +à | to, at +m | m' +n | n' +s | s' +t | t' +y | there + + | forms of être (not including the infinitive): +été +étée +étées +étés +étant +suis +es +est +sommes +êtes +sont +serai +seras +sera +serons +serez +seront +serais +serait +serions +seriez +seraient +étais +était +étions +étiez +étaient +fus +fut +fûmes +fûtes +furent +sois +soit +soyons +soyez +soient +fusse +fusses +fût +fussions +fussiez +fussent + + | forms of avoir (not including the infinitive): +ayant +eu +eue +eues +eus +ai +as +avons +avez +ont +aurai +auras +aura +aurons +aurez +auront +aurais +aurait +aurions +auriez +auraient +avais +avait +avions +aviez +avaient +eut +eûmes +eûtes +eurent +aie +aies +ait +ayons +ayez +aient +eusse +eusses +eût +eussions +eussiez +eussent + + | Later additions (from Jean-Christophe Deschamps) +ceci | this +cela | that +celà | that +cet | this +cette | this +ici | here +ils | they +les | the (pl) +leurs | their (pl) +quel | which +quels | which +quelle | which +quelles | which +sans | without +soi | oneself + diff --git a/solr/conf/lang/stopwords_ga.txt b/solr/conf/lang/stopwords_ga.txt new file mode 100644 index 000000000..9ff88d747 --- /dev/null +++ b/solr/conf/lang/stopwords_ga.txt @@ -0,0 +1,110 @@ + +a +ach +ag +agus +an +aon +ar +arna +as +b' +ba +beirt +bhúr +caoga +ceathair +ceathrar +chomh +chtó +chuig +chun +cois +céad +cúig +cúigear +d' +daichead +dar +de +deich +deichniúr +den +dhá +do +don +dtí +dá +dár +dó +faoi +faoin +faoina +faoinár +fara +fiche +gach +gan +go +gur +haon +hocht +i +iad +idir +in +ina +ins +inár +is +le +leis +lena +lenár +m' +mar +mo +mé +na +nach +naoi +naonúr +ná +ní +níor +nó +nócha +ocht +ochtar +os +roimh +sa +seacht +seachtar +seachtó +seasca +seisear +siad +sibh +sinn +sna +sé +sí +tar +thar +thú +triúr +trí +trína +trínár +tríocha +tú +um +ár +é +éis +í +ó +ón +óna +ónár diff --git a/solr/conf/lang/stopwords_gl.txt b/solr/conf/lang/stopwords_gl.txt new file mode 100644 index 000000000..d8760b12c --- /dev/null +++ b/solr/conf/lang/stopwords_gl.txt @@ -0,0 +1,161 @@ +# galican stopwords +a +aínda +alí +aquel +aquela +aquelas +aqueles +aquilo +aquí +ao +aos +as +así +á +ben +cando +che +co +coa +comigo +con +connosco +contigo +convosco +coas +cos +cun +cuns +cunha +cunhas +da +dalgunha +dalgunhas +dalgún +dalgúns +das +de +del +dela +delas +deles +desde +deste +do +dos +dun +duns +dunha +dunhas +e +el +ela +elas +eles +en +era +eran +esa +esas +ese +eses +esta +estar +estaba +está +están +este +estes +estiven +estou +eu +é +facer +foi +foron +fun +había +hai +iso +isto +la +las +lle +lles +lo +los +mais +me +meu +meus +min +miña +miñas +moi +na +nas +neste +nin +no +non +nos +nosa +nosas +noso +nosos +nós +nun +nunha +nuns +nunhas +o +os +ou +ó +ós +para +pero +pode +pois +pola +polas +polo +polos +por +que +se +senón +ser +seu +seus +sexa +sido +sobre +súa +súas +tamén +tan +te +ten +teñen +teño +ter +teu +teus +ti +tido +tiña +tiven +túa +túas +un +unha +unhas +uns +vos +vosa +vosas +voso +vosos +vós diff --git a/solr/conf/lang/stopwords_hi.txt b/solr/conf/lang/stopwords_hi.txt new file mode 100644 index 000000000..86286bb08 --- /dev/null +++ b/solr/conf/lang/stopwords_hi.txt @@ -0,0 +1,235 @@ +# Also see http://www.opensource.org/licenses/bsd-license.html +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. +अंदर +अत +अपना +अपनी +अपने +अभी +आदि +आप +इत्यादि +इन +इनका +इन्हीं +इन्हें +इन्हों +इस +इसका +इसकी +इसके +इसमें +इसी +इसे +उन +उनका +उनकी +उनके +उनको +उन्हीं +उन्हें +उन्हों +उस +उसके +उसी +उसे +एक +एवं +एस +ऐसे +और +कई +कर +करता +करते +करना +करने +करें +कहते +कहा +का +काफ़ी +कि +कितना +किन्हें +किन्हों +किया +किर +किस +किसी +किसे +की +कुछ +कुल +के +को +कोई +कौन +कौनसा +गया +घर +जब +जहाँ +जा +जितना +जिन +जिन्हें +जिन्हों +जिस +जिसे +जीधर +जैसा +जैसे +जो +तक +तब +तरह +तिन +तिन्हें +तिन्हों +तिस +तिसे +तो +था +थी +थे +दबारा +दिया +दुसरा +दूसरे +दो +द्वारा +न +नहीं +ना +निहायत +नीचे +ने +पर +पर +पहले +पूरा +पे +फिर +बनी +बही +बहुत +बाद +बाला +बिलकुल +भी +भीतर +मगर +मानो +मे +में +यदि +यह +यहाँ +यही +या +यिह +ये +रखें +रहा +रहे +ऱ्वासा +लिए +लिये +लेकिन +व +वर्ग +वह +वह +वहाँ +वहीं +वाले +वुह +वे +वग़ैरह +संग +सकता +सकते +सबसे +सभी +साथ +साबुत +साभ +सारा +से +सो +ही +हुआ +हुई +हुए +है +हैं +हो +होता +होती +होते +होना +होने +# additional normalized forms of the above +अपनि +जेसे +होति +सभि +तिंहों +इंहों +दवारा +इसि +किंहें +थि +उंहों +ओर +जिंहें +वहिं +अभि +बनि +हि +उंहिं +उंहें +हें +वगेरह +एसे +रवासा +कोन +निचे +काफि +उसि +पुरा +भितर +हे +बहि +वहां +कोइ +यहां +जिंहों +तिंहें +किसि +कइ +यहि +इंहिं +जिधर +इंहें +अदि +इतयादि +हुइ +कोनसा +इसकि +दुसरे +जहां +अप +किंहों +उनकि +भि +वरग +हुअ +जेसा +नहिं diff --git a/solr/conf/lang/stopwords_hu.txt b/solr/conf/lang/stopwords_hu.txt new file mode 100644 index 000000000..37526da8a --- /dev/null +++ b/solr/conf/lang/stopwords_hu.txt @@ -0,0 +1,211 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + +| Hungarian stop word list +| prepared by Anna Tordai + +a +ahogy +ahol +aki +akik +akkor +alatt +által +általában +amely +amelyek +amelyekben +amelyeket +amelyet +amelynek +ami +amit +amolyan +amíg +amikor +át +abban +ahhoz +annak +arra +arról +az +azok +azon +azt +azzal +azért +aztán +azután +azonban +bár +be +belül +benne +cikk +cikkek +cikkeket +csak +de +e +eddig +egész +egy +egyes +egyetlen +egyéb +egyik +egyre +ekkor +el +elég +ellen +elő +először +előtt +első +én +éppen +ebben +ehhez +emilyen +ennek +erre +ez +ezt +ezek +ezen +ezzel +ezért +és +fel +felé +hanem +hiszen +hogy +hogyan +igen +így +illetve +ill. +ill +ilyen +ilyenkor +ison +ismét +itt +jó +jól +jobban +kell +kellett +keresztül +keressünk +ki +kívül +között +közül +legalább +lehet +lehetett +legyen +lenne +lenni +lesz +lett +maga +magát +majd +majd +már +más +másik +meg +még +mellett +mert +mely +melyek +mi +mit +míg +miért +milyen +mikor +minden +mindent +mindenki +mindig +mint +mintha +mivel +most +nagy +nagyobb +nagyon +ne +néha +nekem +neki +nem +néhány +nélkül +nincs +olyan +ott +össze +ő +ők +őket +pedig +persze +rá +s +saját +sem +semmi +sok +sokat +sokkal +számára +szemben +szerint +szinte +talán +tehát +teljes +tovább +továbbá +több +úgy +ugyanis +új +újabb +újra +után +utána +utolsó +vagy +vagyis +valaki +valami +valamint +való +vagyok +van +vannak +volt +voltam +voltak +voltunk +vissza +vele +viszont +volna diff --git a/solr/conf/lang/stopwords_hy.txt b/solr/conf/lang/stopwords_hy.txt new file mode 100644 index 000000000..60c1c50fb --- /dev/null +++ b/solr/conf/lang/stopwords_hy.txt @@ -0,0 +1,46 @@ +# example set of Armenian stopwords. +այդ +այլ +այն +այս +դու +դուք +եմ +են +ենք +ես +եք +է +էի +էին +էինք +էիր +էիք +էր +ըստ +թ +ի +ին +իսկ +իր +կամ +համար +հետ +հետո +մենք +մեջ +մի +ն +նա +նաև +նրա +նրանք +որ +որը +որոնք +որպես +ու +ում +պիտի +վրա +և diff --git a/solr/conf/lang/stopwords_id.txt b/solr/conf/lang/stopwords_id.txt new file mode 100644 index 000000000..4617f83a5 --- /dev/null +++ b/solr/conf/lang/stopwords_id.txt @@ -0,0 +1,359 @@ +# from appendix D of: A Study of Stemming Effects on Information +# Retrieval in Bahasa Indonesia +ada +adanya +adalah +adapun +agak +agaknya +agar +akan +akankah +akhirnya +aku +akulah +amat +amatlah +anda +andalah +antar +diantaranya +antara +antaranya +diantara +apa +apaan +mengapa +apabila +apakah +apalagi +apatah +atau +ataukah +ataupun +bagai +bagaikan +sebagai +sebagainya +bagaimana +bagaimanapun +sebagaimana +bagaimanakah +bagi +bahkan +bahwa +bahwasanya +sebaliknya +banyak +sebanyak +beberapa +seberapa +begini +beginian +beginikah +beginilah +sebegini +begitu +begitukah +begitulah +begitupun +sebegitu +belum +belumlah +sebelum +sebelumnya +sebenarnya +berapa +berapakah +berapalah +berapapun +betulkah +sebetulnya +biasa +biasanya +bila +bilakah +bisa +bisakah +sebisanya +boleh +bolehkah +bolehlah +buat +bukan +bukankah +bukanlah +bukannya +cuma +percuma +dahulu +dalam +dan +dapat +dari +daripada +dekat +demi +demikian +demikianlah +sedemikian +dengan +depan +di +dia +dialah +dini +diri +dirinya +terdiri +dong +dulu +enggak +enggaknya +entah +entahlah +terhadap +terhadapnya +hal +hampir +hanya +hanyalah +harus +haruslah +harusnya +seharusnya +hendak +hendaklah +hendaknya +hingga +sehingga +ia +ialah +ibarat +ingin +inginkah +inginkan +ini +inikah +inilah +itu +itukah +itulah +jangan +jangankan +janganlah +jika +jikalau +juga +justru +kala +kalau +kalaulah +kalaupun +kalian +kami +kamilah +kamu +kamulah +kan +kapan +kapankah +kapanpun +dikarenakan +karena +karenanya +ke +kecil +kemudian +kenapa +kepada +kepadanya +ketika +seketika +khususnya +kini +kinilah +kiranya +sekiranya +kita +kitalah +kok +lagi +lagian +selagi +lah +lain +lainnya +melainkan +selaku +lalu +melalui +terlalu +lama +lamanya +selama +selama +selamanya +lebih +terlebih +bermacam +macam +semacam +maka +makanya +makin +malah +malahan +mampu +mampukah +mana +manakala +manalagi +masih +masihkah +semasih +masing +mau +maupun +semaunya +memang +mereka +merekalah +meski +meskipun +semula +mungkin +mungkinkah +nah +namun +nanti +nantinya +nyaris +oleh +olehnya +seorang +seseorang +pada +padanya +padahal +paling +sepanjang +pantas +sepantasnya +sepantasnyalah +para +pasti +pastilah +per +pernah +pula +pun +merupakan +rupanya +serupa +saat +saatnya +sesaat +saja +sajalah +saling +bersama +sama +sesama +sambil +sampai +sana +sangat +sangatlah +saya +sayalah +se +sebab +sebabnya +sebuah +tersebut +tersebutlah +sedang +sedangkan +sedikit +sedikitnya +segala +segalanya +segera +sesegera +sejak +sejenak +sekali +sekalian +sekalipun +sesekali +sekaligus +sekarang +sekarang +sekitar +sekitarnya +sela +selain +selalu +seluruh +seluruhnya +semakin +sementara +sempat +semua +semuanya +sendiri +sendirinya +seolah +seperti +sepertinya +sering +seringnya +serta +siapa +siapakah +siapapun +disini +disinilah +sini +sinilah +sesuatu +sesuatunya +suatu +sesudah +sesudahnya +sudah +sudahkah +sudahlah +supaya +tadi +tadinya +tak +tanpa +setelah +telah +tentang +tentu +tentulah +tentunya +tertentu +seterusnya +tapi +tetapi +setiap +tiap +setidaknya +tidak +tidakkah +tidaklah +toh +waduh +wah +wahai +sewaktu +walau +walaupun +wong +yaitu +yakni +yang diff --git a/solr/conf/lang/stopwords_it.txt b/solr/conf/lang/stopwords_it.txt new file mode 100644 index 000000000..1219cc773 --- /dev/null +++ b/solr/conf/lang/stopwords_it.txt @@ -0,0 +1,303 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | An Italian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + +ad | a (to) before vowel +al | a + il +allo | a + lo +ai | a + i +agli | a + gli +all | a + l' +agl | a + gl' +alla | a + la +alle | a + le +con | with +col | con + il +coi | con + i (forms collo, cogli etc are now very rare) +da | from +dal | da + il +dallo | da + lo +dai | da + i +dagli | da + gli +dall | da + l' +dagl | da + gll' +dalla | da + la +dalle | da + le +di | of +del | di + il +dello | di + lo +dei | di + i +degli | di + gli +dell | di + l' +degl | di + gl' +della | di + la +delle | di + le +in | in +nel | in + el +nello | in + lo +nei | in + i +negli | in + gli +nell | in + l' +negl | in + gl' +nella | in + la +nelle | in + le +su | on +sul | su + il +sullo | su + lo +sui | su + i +sugli | su + gli +sull | su + l' +sugl | su + gl' +sulla | su + la +sulle | su + le +per | through, by +tra | among +contro | against +io | I +tu | thou +lui | he +lei | she +noi | we +voi | you +loro | they +mio | my +mia | +miei | +mie | +tuo | +tua | +tuoi | thy +tue | +suo | +sua | +suoi | his, her +sue | +nostro | our +nostra | +nostri | +nostre | +vostro | your +vostra | +vostri | +vostre | +mi | me +ti | thee +ci | us, there +vi | you, there +lo | him, the +la | her, the +li | them +le | them, the +gli | to him, the +ne | from there etc +il | the +un | a +uno | a +una | a +ma | but +ed | and +se | if +perché | why, because +anche | also +come | how +dov | where (as dov') +dove | where +che | who, that +chi | who +cui | whom +non | not +più | more +quale | who, that +quanto | how much +quanti | +quanta | +quante | +quello | that +quelli | +quella | +quelle | +questo | this +questi | +questa | +queste | +si | yes +tutto | all +tutti | all + + | single letter forms: + +a | at +c | as c' for ce or ci +e | and +i | the +l | as l' +o | or + + | forms of avere, to have (not including the infinitive): + +ho +hai +ha +abbiamo +avete +hanno +abbia +abbiate +abbiano +avrò +avrai +avrà +avremo +avrete +avranno +avrei +avresti +avrebbe +avremmo +avreste +avrebbero +avevo +avevi +aveva +avevamo +avevate +avevano +ebbi +avesti +ebbe +avemmo +aveste +ebbero +avessi +avesse +avessimo +avessero +avendo +avuto +avuta +avuti +avute + + | forms of essere, to be (not including the infinitive): +sono +sei +è +siamo +siete +sia +siate +siano +sarò +sarai +sarà +saremo +sarete +saranno +sarei +saresti +sarebbe +saremmo +sareste +sarebbero +ero +eri +era +eravamo +eravate +erano +fui +fosti +fu +fummo +foste +furono +fossi +fosse +fossimo +fossero +essendo + + | forms of fare, to do (not including the infinitive, fa, fat-): +faccio +fai +facciamo +fanno +faccia +facciate +facciano +farò +farai +farà +faremo +farete +faranno +farei +faresti +farebbe +faremmo +fareste +farebbero +facevo +facevi +faceva +facevamo +facevate +facevano +feci +facesti +fece +facemmo +faceste +fecero +facessi +facesse +facessimo +facessero +facendo + + | forms of stare, to be (not including the infinitive): +sto +stai +sta +stiamo +stanno +stia +stiate +stiano +starò +starai +starà +staremo +starete +staranno +starei +staresti +starebbe +staremmo +stareste +starebbero +stavo +stavi +stava +stavamo +stavate +stavano +stetti +stesti +stette +stemmo +steste +stettero +stessi +stesse +stessimo +stessero +stando diff --git a/solr/conf/lang/stopwords_ja.txt b/solr/conf/lang/stopwords_ja.txt new file mode 100644 index 000000000..d4321be6b --- /dev/null +++ b/solr/conf/lang/stopwords_ja.txt @@ -0,0 +1,127 @@ +# +# This file defines a stopword set for Japanese. +# +# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. +# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 +# for frequency lists, etc. that can be useful for making your own set (if desired) +# +# Note that there is an overlap between these stopwords and the terms stopped when used +# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note +# that comments are not allowed on the same line as stopwords. +# +# Also note that stopping is done in a case-insensitive manner. Change your StopFilter +# configuration if you need case-sensitive stopping. Lastly, note that stopping is done +# using the same character width as the entries in this file. Since this StopFilter is +# normally done after a CJKWidthFilter in your chain, you would usually want your romaji +# entries to be in half-width and your kana entries to be in full-width. +# +の +に +は +を +た +が +で +て +と +し +れ +さ +ある +いる +も +する +から +な +こと +として +い +や +れる +など +なっ +ない +この +ため +その +あっ +よう +また +もの +という +あり +まで +られ +なる +へ +か +だ +これ +によって +により +おり +より +による +ず +なり +られる +において +ば +なかっ +なく +しかし +について +せ +だっ +その後 +できる +それ +う +ので +なお +のみ +でき +き +つ +における +および +いう +さらに +でも +ら +たり +その他 +に関する +たち +ます +ん +なら +に対して +特に +せる +及び +これら +とき +では +にて +ほか +ながら +うち +そして +とともに +ただし +かつて +それぞれ +または +お +ほど +ものの +に対する +ほとんど +と共に +といった +です +とも +ところ +ここ +##### End of file diff --git a/solr/conf/lang/stopwords_lv.txt b/solr/conf/lang/stopwords_lv.txt new file mode 100644 index 000000000..e21a23c06 --- /dev/null +++ b/solr/conf/lang/stopwords_lv.txt @@ -0,0 +1,172 @@ +# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins +# the original list of over 800 forms was refined: +# pronouns, adverbs, interjections were removed +# +# prepositions +aiz +ap +ar +apakš +ārpus +augšpus +bez +caur +dēļ +gar +iekš +iz +kopš +labad +lejpus +līdz +no +otrpus +pa +par +pār +pēc +pie +pirms +pret +priekš +starp +šaipus +uz +viņpus +virs +virspus +zem +apakšpus +# Conjunctions +un +bet +jo +ja +ka +lai +tomēr +tikko +turpretī +arī +kaut +gan +tādēļ +tā +ne +tikvien +vien +kā +ir +te +vai +kamēr +# Particles +ar +diezin +droši +diemžēl +nebūt +ik +it +taču +nu +pat +tiklab +iekšpus +nedz +tik +nevis +turpretim +jeb +iekam +iekām +iekāms +kolīdz +līdzko +tiklīdz +jebšu +tālab +tāpēc +nekā +itin +jā +jau +jel +nē +nezin +tad +tikai +vis +tak +iekams +vien +# modal verbs +būt +biju +biji +bija +bijām +bijāt +esmu +esi +esam +esat +būšu +būsi +būs +būsim +būsiet +tikt +tiku +tiki +tika +tikām +tikāt +tieku +tiec +tiek +tiekam +tiekat +tikšu +tiks +tiksim +tiksiet +tapt +tapi +tapāt +topat +tapšu +tapsi +taps +tapsim +tapsiet +kļūt +kļuvu +kļuvi +kļuva +kļuvām +kļuvāt +kļūstu +kļūsti +kļūst +kļūstam +kļūstat +kļūšu +kļūsi +kļūs +kļūsim +kļūsiet +# verbs +varēt +varēju +varējām +varēšu +varēsim +var +varēji +varējāt +varēsi +varēsiet +varat +varēja +varēs diff --git a/solr/conf/lang/stopwords_nl.txt b/solr/conf/lang/stopwords_nl.txt new file mode 100644 index 000000000..47a2aeacf --- /dev/null +++ b/solr/conf/lang/stopwords_nl.txt @@ -0,0 +1,119 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Dutch stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large sample of Dutch text. + + | Dutch stop words frequently exhibit homonym clashes. These are indicated + | clearly below. + +de | the +en | and +van | of, from +ik | I, the ego +te | (1) chez, at etc, (2) to, (3) too +dat | that, which +die | that, those, who, which +in | in, inside +een | a, an, one +hij | he +het | the, it +niet | not, nothing, naught +zijn | (1) to be, being, (2) his, one's, its +is | is +was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river +op | on, upon, at, in, up, used up +aan | on, upon, to (as dative) +met | with, by +als | like, such as, when +voor | (1) before, in front of, (2) furrow +had | had, past tense all persons sing. of 'hebben' (have) +er | there +maar | but, only +om | round, about, for etc +hem | him +dan | then +zou | should/would, past tense all persons sing. of 'zullen' +of | or, whether, if +wat | what, something, anything +mijn | possessive and noun 'mine' +men | people, 'one' +dit | this +zo | so, thus, in this way +door | through by +over | over, across +ze | she, her, they, them +zich | oneself +bij | (1) a bee, (2) by, near, at +ook | also, too +tot | till, until +je | you +mij | me +uit | out of, from +der | Old Dutch form of 'van der' still found in surnames +daar | (1) there, (2) because +haar | (1) her, their, them, (2) hair +naar | (1) unpleasant, unwell etc, (2) towards, (3) as +heb | present first person sing. of 'to have' +hoe | how, why +heeft | present third person sing. of 'to have' +hebben | 'to have' and various parts thereof +deze | this +u | you +want | (1) for, (2) mitten, (3) rigging +nog | yet, still +zal | 'shall', first and third person sing. of verb 'zullen' (will) +me | me +zij | she, they +nu | now +ge | 'thou', still used in Belgium and south Netherlands +geen | none +omdat | because +iets | something, somewhat +worden | to become, grow, get +toch | yet, still +al | all, every, each +waren | (1) 'were' (2) to wander, (3) wares, (3) +veel | much, many +meer | (1) more, (2) lake +doen | to do, to make +toen | then, when +moet | noun 'spot/mote' and present form of 'to must' +ben | (1) am, (2) 'are' in interrogative second person singular of 'to be' +zonder | without +kan | noun 'can' and present form of 'to be able' +hun | their, them +dus | so, consequently +alles | all, everything, anything +onder | under, beneath +ja | yes, of course +eens | once, one day +hier | here +wie | who +werd | imperfect third person sing. of 'become' +altijd | always +doch | yet, but etc +wordt | present third person sing. of 'become' +wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans +kunnen | to be able +ons | us/our +zelf | self +tegen | against, towards, at +na | after, near +reeds | already +wil | (1) present tense of 'want', (2) 'will', noun, (3) fender +kon | could; past tense of 'to be able' +niets | nothing +uw | your +iemand | somebody +geweest | been; past participle of 'be' +andere | other diff --git a/solr/conf/lang/stopwords_no.txt b/solr/conf/lang/stopwords_no.txt new file mode 100644 index 000000000..a7a2c28ba --- /dev/null +++ b/solr/conf/lang/stopwords_no.txt @@ -0,0 +1,194 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Norwegian stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This stop word list is for the dominant bokmål dialect. Words unique + | to nynorsk are marked *. + + | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005 + +og | and +i | in +jeg | I +det | it/this/that +at | to (w. inf.) +en | a/an +et | a/an +den | it/this/that +til | to +er | is/am/are +som | who/that +på | on +de | they / you(formal) +med | with +han | he +av | of +ikke | not +ikkje | not * +der | there +så | so +var | was/were +meg | me +seg | you +men | but +ett | one +har | have +om | about +vi | we +min | my +mitt | my +ha | have +hadde | had +hun | she +nå | now +over | over +da | when/as +ved | by/know +fra | from +du | you +ut | out +sin | your +dem | them +oss | us +opp | up +man | you/one +kan | can +hans | his +hvor | where +eller | or +hva | what +skal | shall/must +selv | self (reflective) +sjøl | self (reflective) +her | here +alle | all +vil | will +bli | become +ble | became +blei | became * +blitt | have become +kunne | could +inn | in +når | when +være | be +kom | come +noen | some +noe | some +ville | would +dere | you +som | who/which/that +deres | their/theirs +kun | only/just +ja | yes +etter | after +ned | down +skulle | should +denne | this +for | for/because +deg | you +si | hers/his +sine | hers/his +sitt | hers/his +mot | against +å | to +meget | much +hvorfor | why +dette | this +disse | these/those +uten | without +hvordan | how +ingen | none +din | your +ditt | your +blir | become +samme | same +hvilken | which +hvilke | which (plural) +sånn | such a +inni | inside/within +mellom | between +vår | our +hver | each +hvem | who +vors | us/ours +hvis | whose +både | both +bare | only/just +enn | than +fordi | as/because +før | before +mange | many +også | also +slik | just +vært | been +være | to be +båe | both * +begge | both +siden | since +dykk | your * +dykkar | yours * +dei | they * +deira | them * +deires | theirs * +deim | them * +di | your (fem.) * +då | as/when * +eg | I * +ein | a/an * +eit | a/an * +eitt | a/an * +elles | or * +honom | he * +hjå | at * +ho | she * +hoe | she * +henne | her +hennar | her/hers +hennes | hers +hoss | how * +hossen | how * +ikkje | not * +ingi | noone * +inkje | noone * +korleis | how * +korso | how * +kva | what/which * +kvar | where * +kvarhelst | where * +kven | who/whom * +kvi | why * +kvifor | why * +me | we * +medan | while * +mi | my * +mine | my * +mykje | much * +no | now * +nokon | some (masc./neut.) * +noka | some (fem.) * +nokor | some * +noko | some * +nokre | some * +si | his/hers * +sia | since * +sidan | since * +so | so * +somt | some * +somme | some * +um | about* +upp | up * +vere | be * +vore | was * +verte | become * +vort | become * +varte | became * +vart | became * + diff --git a/solr/conf/lang/stopwords_pt.txt b/solr/conf/lang/stopwords_pt.txt new file mode 100644 index 000000000..acfeb01af --- /dev/null +++ b/solr/conf/lang/stopwords_pt.txt @@ -0,0 +1,253 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Portuguese stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + + | The following is a ranked list (commonest to rarest) of stopwords + | deriving from a large sample of text. + + | Extra words have been added at the end. + +de | of, from +a | the; to, at; her +o | the; him +que | who, that +e | and +do | de + o +da | de + a +em | in +um | a +para | for + | é from SER +com | with +não | not, no +uma | a +os | the; them +no | em + o +se | himself etc +na | em + a +por | for +mais | more +as | the; them +dos | de + os +como | as, like +mas | but + | foi from SER +ao | a + o +ele | he +das | de + as + | tem from TER +à | a + a +seu | his +sua | her +ou | or + | ser from SER +quando | when +muito | much + | há from HAV +nos | em + os; us +já | already, now + | está from EST +eu | I +também | also +só | only, just +pelo | per + o +pela | per + a +até | up to +isso | that +ela | he +entre | between + | era from SER +depois | after +sem | without +mesmo | same +aos | a + os + | ter from TER +seus | his +quem | whom +nas | em + as +me | me +esse | that +eles | they + | estão from EST +você | you + | tinha from TER + | foram from SER +essa | that +num | em + um +nem | nor +suas | her +meu | my +às | a + as +minha | my + | têm from TER +numa | em + uma +pelos | per + os +elas | they + | havia from HAV + | seja from SER +qual | which + | será from SER +nós | we + | tenho from TER +lhe | to him, her +deles | of them +essas | those +esses | those +pelas | per + as +este | this + | fosse from SER +dele | of him + + | other words. There are many contractions such as naquele = em+aquele, + | mo = me+o, but they are rare. + | Indefinite article plural forms are also rare. + +tu | thou +te | thee +vocês | you (plural) +vos | you +lhes | to them +meus | my +minhas +teu | thy +tua +teus +tuas +nosso | our +nossa +nossos +nossas + +dela | of her +delas | of them + +esta | this +estes | these +estas | these +aquele | that +aquela | that +aqueles | those +aquelas | those +isto | this +aquilo | that + + | forms of estar, to be (not including the infinitive): +estou +está +estamos +estão +estive +esteve +estivemos +estiveram +estava +estávamos +estavam +estivera +estivéramos +esteja +estejamos +estejam +estivesse +estivéssemos +estivessem +estiver +estivermos +estiverem + + | forms of haver, to have (not including the infinitive): +hei +há +havemos +hão +houve +houvemos +houveram +houvera +houvéramos +haja +hajamos +hajam +houvesse +houvéssemos +houvessem +houver +houvermos +houverem +houverei +houverá +houveremos +houverão +houveria +houveríamos +houveriam + + | forms of ser, to be (not including the infinitive): +sou +somos +são +era +éramos +eram +fui +foi +fomos +foram +fora +fôramos +seja +sejamos +sejam +fosse +fôssemos +fossem +for +formos +forem +serei +será +seremos +serão +seria +seríamos +seriam + + | forms of ter, to have (not including the infinitive): +tenho +tem +temos +tém +tinha +tínhamos +tinham +tive +teve +tivemos +tiveram +tivera +tivéramos +tenha +tenhamos +tenham +tivesse +tivéssemos +tivessem +tiver +tivermos +tiverem +terei +terá +teremos +terão +teria +teríamos +teriam diff --git a/solr/conf/lang/stopwords_ro.txt b/solr/conf/lang/stopwords_ro.txt new file mode 100644 index 000000000..4fdee90a5 --- /dev/null +++ b/solr/conf/lang/stopwords_ro.txt @@ -0,0 +1,233 @@ +# This file was created by Jacques Savoy and is distributed under the BSD license. +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# Also see http://www.opensource.org/licenses/bsd-license.html +acea +aceasta +această +aceea +acei +aceia +acel +acela +acele +acelea +acest +acesta +aceste +acestea +aceşti +aceştia +acolo +acum +ai +aia +aibă +aici +al +ăla +ale +alea +ălea +altceva +altcineva +am +ar +are +aş +aşadar +asemenea +asta +ăsta +astăzi +astea +ăstea +ăştia +asupra +aţi +au +avea +avem +aveţi +azi +bine +bucur +bună +ca +că +căci +când +care +cărei +căror +cărui +cât +câte +câţi +către +câtva +ce +cel +ceva +chiar +cînd +cine +cineva +cît +cîte +cîţi +cîtva +contra +cu +cum +cumva +curând +curînd +da +dă +dacă +dar +datorită +de +deci +deja +deoarece +departe +deşi +din +dinaintea +dintr +dintre +drept +după +ea +ei +el +ele +eram +este +eşti +eu +face +fără +fi +fie +fiecare +fii +fim +fiţi +iar +ieri +îi +îl +îmi +împotriva +în +înainte +înaintea +încât +încît +încotro +între +întrucât +întrucît +îţi +la +lângă +le +li +lîngă +lor +lui +mă +mâine +mea +mei +mele +mereu +meu +mi +mine +mult +multă +mulţi +ne +nicăieri +nici +nimeni +nişte +noastră +noastre +noi +noştri +nostru +nu +ori +oricând +oricare +oricât +orice +oricînd +oricine +oricît +oricum +oriunde +până +pe +pentru +peste +pînă +poate +pot +prea +prima +primul +prin +printr +sa +să +săi +sale +sau +său +se +şi +sînt +sîntem +sînteţi +spre +sub +sunt +suntem +sunteţi +ta +tăi +tale +tău +te +ţi +ţie +tine +toată +toate +tot +toţi +totuşi +tu +un +una +unde +undeva +unei +unele +uneori +unor +vă +vi +voastră +voastre +voi +voştri +vostru +vouă +vreo +vreun diff --git a/solr/conf/lang/stopwords_ru.txt b/solr/conf/lang/stopwords_ru.txt new file mode 100644 index 000000000..55271400c --- /dev/null +++ b/solr/conf/lang/stopwords_ru.txt @@ -0,0 +1,243 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | a russian stop word list. comments begin with vertical bar. each stop + | word is at the start of a line. + + | this is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | letter `ё' is translated to `е'. + +и | and +в | in/into +во | alternative form +не | not +что | what/that +он | he +на | on/onto +я | i +с | from +со | alternative form +как | how +а | milder form of `no' (but) +то | conjunction and form of `that' +все | all +она | she +так | so, thus +его | him +но | but +да | yes/and +ты | thou +к | towards, by +у | around, chez +же | intensifier particle +вы | you +за | beyond, behind +бы | conditional/subj. particle +по | up to, along +только | only +ее | her +мне | to me +было | it was +вот | here is/are, particle +от | away from +меня | me +еще | still, yet, more +нет | no, there isnt/arent +о | about +из | out of +ему | to him +теперь | now +когда | when +даже | even +ну | so, well +вдруг | suddenly +ли | interrogative particle +если | if +уже | already, but homonym of `narrower' +или | or +ни | neither +быть | to be +был | he was +него | prepositional form of его +до | up to +вас | you accusative +нибудь | indef. suffix preceded by hyphen +опять | again +уж | already, but homonym of `adder' +вам | to you +сказал | he said +ведь | particle `after all' +там | there +потом | then +себя | oneself +ничего | nothing +ей | to her +может | usually with `быть' as `maybe' +они | they +тут | here +где | where +есть | there is/are +надо | got to, must +ней | prepositional form of ей +для | for +мы | we +тебя | thee +их | them, their +чем | than +была | she was +сам | self +чтоб | in order to +без | without +будто | as if +человек | man, person, one +чего | genitive form of `what' +раз | once +тоже | also +себе | to oneself +под | beneath +жизнь | life +будет | will be +ж | short form of intensifer particle `же' +тогда | then +кто | who +этот | this +говорил | was saying +того | genitive form of `that' +потому | for that reason +этого | genitive form of `this' +какой | which +совсем | altogether +ним | prepositional form of `его', `они' +здесь | here +этом | prepositional form of `этот' +один | one +почти | almost +мой | my +тем | instrumental/dative plural of `тот', `то' +чтобы | full form of `in order that' +нее | her (acc.) +кажется | it seems +сейчас | now +были | they were +куда | where to +зачем | why +сказать | to say +всех | all (acc., gen. preposn. plural) +никогда | never +сегодня | today +можно | possible, one can +при | by +наконец | finally +два | two +об | alternative form of `о', about +другой | another +хоть | even +после | after +над | above +больше | more +тот | that one (masc.) +через | across, in +эти | these +нас | us +про | about +всего | in all, only, of all +них | prepositional form of `они' (they) +какая | which, feminine +много | lots +разве | interrogative particle +сказала | she said +три | three +эту | this, acc. fem. sing. +моя | my, feminine +впрочем | moreover, besides +хорошо | good +свою | ones own, acc. fem. sing. +этой | oblique form of `эта', fem. `this' +перед | in front of +иногда | sometimes +лучше | better +чуть | a little +том | preposn. form of `that one' +нельзя | one must not +такой | such a one +им | to them +более | more +всегда | always +конечно | of course +всю | acc. fem. sing of `all' +между | between + + + | b: some paradigms + | + | personal pronouns + | + | я меня мне мной [мною] + | ты тебя тебе тобой [тобою] + | он его ему им [него, нему, ним] + | она ее эи ею [нее, нэи, нею] + | оно его ему им [него, нему, ним] + | + | мы нас нам нами + | вы вас вам вами + | они их им ими [них, ним, ними] + | + | себя себе собой [собою] + | + | demonstrative pronouns: этот (this), тот (that) + | + | этот эта это эти + | этого эты это эти + | этого этой этого этих + | этому этой этому этим + | этим этой этим [этою] этими + | этом этой этом этих + | + | тот та то те + | того ту то те + | того той того тех + | тому той тому тем + | тем той тем [тою] теми + | том той том тех + | + | determinative pronouns + | + | (a) весь (all) + | + | весь вся все все + | всего всю все все + | всего всей всего всех + | всему всей всему всем + | всем всей всем [всею] всеми + | всем всей всем всех + | + | (b) сам (himself etc) + | + | сам сама само сами + | самого саму само самих + | самого самой самого самих + | самому самой самому самим + | самим самой самим [самою] самими + | самом самой самом самих + | + | stems of verbs `to be', `to have', `to do' and modal + | + | быть бы буд быв есть суть + | име + | дел + | мог мож мочь + | уме + | хоч хот + | долж + | можн + | нужн + | нельзя + diff --git a/solr/conf/lang/stopwords_sv.txt b/solr/conf/lang/stopwords_sv.txt new file mode 100644 index 000000000..096f87f67 --- /dev/null +++ b/solr/conf/lang/stopwords_sv.txt @@ -0,0 +1,133 @@ + | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt + | This file is distributed under the BSD License. + | See http://snowball.tartarus.org/license.php + | Also see http://www.opensource.org/licenses/bsd-license.html + | - Encoding was converted to UTF-8. + | - This notice was added. + | + | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" + + | A Swedish stop word list. Comments begin with vertical bar. Each stop + | word is at the start of a line. + + | This is a ranked list (commonest to rarest) of stopwords derived from + | a large text sample. + + | Swedish stop words occasionally exhibit homonym clashes. For example + | så = so, but also seed. These are indicated clearly below. + +och | and +det | it, this/that +att | to (with infinitive) +i | in, at +en | a +jag | I +hon | she +som | who, that +han | he +på | on +den | it, this/that +med | with +var | where, each +sig | him(self) etc +för | for +så | so (also: seed) +till | to +är | is +men | but +ett | a +om | if; around, about +hade | had +de | they, these/those +av | of +icke | not, no +mig | me +du | you +henne | her +då | then, when +sin | his +nu | now +har | have +inte | inte någon = no one +hans | his +honom | him +skulle | 'sake' +hennes | her +där | there +min | my +man | one (pronoun) +ej | nor +vid | at, by, on (also: vast) +kunde | could +något | some etc +från | from, off +ut | out +när | when +efter | after, behind +upp | up +vi | we +dem | them +vara | be +vad | what +över | over +än | than +dig | you +kan | can +sina | his +här | here +ha | have +mot | towards +alla | all +under | under (also: wonder) +någon | some etc +eller | or (else) +allt | all +mycket | much +sedan | since +ju | why +denna | this/that +själv | myself, yourself etc +detta | this/that +åt | to +utan | without +varit | was +hur | how +ingen | no +mitt | my +ni | you +bli | to be, become +blev | from bli +oss | us +din | thy +dessa | these/those +några | some etc +deras | their +blir | from bli +mina | my +samma | (the) same +vilken | who, that +er | you, your +sådan | such a +vår | our +blivit | from bli +dess | its +inom | within +mellan | between +sådant | such a +varför | why +varje | each +vilka | who, that +ditt | thy +vem | who +vilket | who, that +sitta | his +sådana | such a +vart | each +dina | thy +vars | whose +vårt | our +våra | our +ert | your +era | your +vilkas | whose + diff --git a/solr/conf/lang/stopwords_th.txt b/solr/conf/lang/stopwords_th.txt new file mode 100644 index 000000000..07f0fabe6 --- /dev/null +++ b/solr/conf/lang/stopwords_th.txt @@ -0,0 +1,119 @@ +# Thai stopwords from: +# "Opinion Detection in Thai Political News Columns +# Based on Subjectivity Analysis" +# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak +ไว้ +ไม่ +ไป +ได้ +ให้ +ใน +โดย +แห่ง +แล้ว +และ +แรก +แบบ +แต่ +เอง +เห็น +เลย +เริ่ม +เรา +เมื่อ +เพื่อ +เพราะ +เป็นการ +เป็น +เปิดเผย +เปิด +เนื่องจาก +เดียวกัน +เดียว +เช่น +เฉพาะ +เคย +เข้า +เขา +อีก +อาจ +อะไร +ออก +อย่าง +อยู่ +อยาก +หาก +หลาย +หลังจาก +หลัง +หรือ +หนึ่ง +ส่วน +ส่ง +สุด +สําหรับ +ว่า +วัน +ลง +ร่วม +ราย +รับ +ระหว่าง +รวม +ยัง +มี +มาก +มา +พร้อม +พบ +ผ่าน +ผล +บาง +น่า +นี้ +นํา +นั้น +นัก +นอกจาก +ทุก +ที่สุด +ที่ +ทําให้ +ทํา +ทาง +ทั้งนี้ +ทั้ง +ถ้า +ถูก +ถึง +ต้อง +ต่างๆ +ต่าง +ต่อ +ตาม +ตั้งแต่ +ตั้ง +ด้าน +ด้วย +ดัง +ซึ่ง +ช่วง +จึง +จาก +จัด +จะ +คือ +ความ +ครั้ง +คง +ขึ้น +ของ +ขอ +ขณะ +ก่อน +ก็ +การ +กับ +กัน +กว่า +กล่าว diff --git a/solr/conf/lang/stopwords_tr.txt b/solr/conf/lang/stopwords_tr.txt new file mode 100644 index 000000000..84d9408d4 --- /dev/null +++ b/solr/conf/lang/stopwords_tr.txt @@ -0,0 +1,212 @@ +# Turkish stopwords from LUCENE-559 +# merged with the list from "Information Retrieval on Turkish Texts" +# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) +acaba +altmış +altı +ama +ancak +arada +aslında +ayrıca +bana +bazı +belki +ben +benden +beni +benim +beri +beş +bile +bin +bir +birçok +biri +birkaç +birkez +birşey +birşeyi +biz +bize +bizden +bizi +bizim +böyle +böylece +bu +buna +bunda +bundan +bunlar +bunları +bunların +bunu +bunun +burada +çok +çünkü +da +daha +dahi +de +defa +değil +diğer +diye +doksan +dokuz +dolayı +dolayısıyla +dört +edecek +eden +ederek +edilecek +ediliyor +edilmesi +ediyor +eğer +elli +en +etmesi +etti +ettiği +ettiğini +gibi +göre +halen +hangi +hatta +hem +henüz +hep +hepsi +her +herhangi +herkesin +hiç +hiçbir +için +iki +ile +ilgili +ise +işte +itibaren +itibariyle +kadar +karşın +katrilyon +kendi +kendilerine +kendini +kendisi +kendisine +kendisini +kez +ki +kim +kimden +kime +kimi +kimse +kırk +milyar +milyon +mu +mü +mı +nasıl +ne +neden +nedenle +nerde +nerede +nereye +niye +niçin +o +olan +olarak +oldu +olduğu +olduğunu +olduklarını +olmadı +olmadığı +olmak +olması +olmayan +olmaz +olsa +olsun +olup +olur +olursa +oluyor +on +ona +ondan +onlar +onlardan +onları +onların +onu +onun +otuz +oysa +öyle +pek +rağmen +sadece +sanki +sekiz +seksen +sen +senden +seni +senin +siz +sizden +sizi +sizin +şey +şeyden +şeyi +şeyler +şöyle +şu +şuna +şunda +şundan +şunları +şunu +tarafından +trilyon +tüm +üç +üzere +var +vardı +ve +veya +ya +yani +yapacak +yapılan +yapılması +yapıyor +yapmak +yaptı +yaptığı +yaptığını +yaptıkları +yedi +yerine +yetmiş +yine +yirmi +yoksa +yüz +zaten diff --git a/solr/conf/lang/userdict_ja.txt b/solr/conf/lang/userdict_ja.txt new file mode 100644 index 000000000..6f0368e4d --- /dev/null +++ b/solr/conf/lang/userdict_ja.txt @@ -0,0 +1,29 @@ +# +# This is a sample user dictionary for Kuromoji (JapaneseTokenizer) +# +# Add entries to this file in order to override the statistical model in terms +# of segmentation, readings and part-of-speech tags. Notice that entries do +# not have weights since they are always used when found. This is by-design +# in order to maximize ease-of-use. +# +# Entries are defined using the following CSV format: +# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> +# +# Notice that a single half-width space separates tokens and readings, and +# that the number tokens and readings must match exactly. +# +# Also notice that multiple entries with the same <text> is undefined. +# +# Whitespace only lines are ignored. Comments are not allowed on entry lines. +# + +# Custom segmentation for kanji compounds +日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 +関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 + +# Custom segmentation for compound katakana +トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 +ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 + +# Custom reading for former sumo wrestler +朝青龍,朝青龍,アサショウリュウ,カスタム人名 diff --git a/solr/conf/params.json b/solr/conf/params.json new file mode 100644 index 000000000..06114ef25 --- /dev/null +++ b/solr/conf/params.json @@ -0,0 +1,20 @@ +{"params":{ + "query":{ + "defType":"edismax", + "q.alt":"*:*", + "rows":"10", + "fl":"*,score", + "":{"v":0} + }, + "facets":{ + "facet":"on", + "facet.mincount": "1", + "":{"v":0} + }, + "velocity":{ + "wt": "velocity", + "v.template":"browse", + "v.layout": "layout", + "":{"v":0} + } +}}
\ No newline at end of file diff --git a/solr/conf/protwords.txt b/solr/conf/protwords.txt new file mode 100644 index 000000000..1dfc0abec --- /dev/null +++ b/solr/conf/protwords.txt @@ -0,0 +1,21 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. +dontstems +zwhacky + diff --git a/solr/conf/schema.xml b/solr/conf/schema.xml new file mode 100644 index 000000000..a568db14c --- /dev/null +++ b/solr/conf/schema.xml @@ -0,0 +1,60 @@ +<?xml version="1.0" encoding="UTF-8"?> +<schema name="default-config" version="1.6"> + <uniqueKey>id</uniqueKey> + <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/> + <fieldType name="text" class="solr.TextField"> + <analyzer type="index"> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.StopFilterFactory" words="stopwords.txt"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.PorterStemFilterFactory"/> + <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="12"/> + </analyzer> + <analyzer type="query"> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.StopFilterFactory" words="stopwords.txt"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.PorterStemFilterFactory"/> + </analyzer> + </fieldType> + <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true"> + <analyzer type="index"> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> + <!-- in this example, we will only use synonyms at query time + <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> + <filter class="solr.FlattenGraphFilterFactory"/> + --> + <filter class="solr.LowerCaseFilterFactory"/> + </analyzer> + <analyzer type="query"> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> + <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> + <filter class="solr.LowerCaseFilterFactory"/> + </analyzer> + </fieldType> + + <fieldType name="plong" class="solr.LongPointField"/> + <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> + <fieldType name="pdate" class="solr.DatePointField"/> + <fieldType name="pdaterange" class="solr.DateRangeField"/> + <fieldType name="pdouble" class="solr.DoublePointField"/> + + <field name="text" type="text" indexed="true" stored="false" uninvertible="false" multiValued="true"/> + <field name="id" type="string" indexed="true" stored="true" uninvertible="false" required="true"/> + <field name="_version_" type="plong" indexed="true" stored="true"/> + + <dynamicField name="*_t" type="text" indexed="true" stored="true" uninvertible="false" docValues="false"/> + <dynamicField name="*_n" type="pdouble" indexed="true" stored="true" uninvertible="false" docValues="false"/> + <dynamicField name="*_d" type="pdaterange" indexed="true" stored="true" uninvertible="false" docValues="false"/> + <dynamicField name="*_l" type="text" indexed="true" stored="true" uninvertible="false" docValues="false" multiValued="true"/> + <dynamicField name="*_i" type="string" indexed="true" stored="true" uninvertible="false" docValues="false"/> + + <dynamicField name="*_a" type="text" indexed="true" stored="false" uninvertible="false" docValues="false"/> + + <copyField source="*_t" dest="text"/> + + <copyField source="*_t" dest="*_a"/> + <copyField source="*_n" dest="*_a"/> +</schema> diff --git a/solr/conf/schema.xml~ b/solr/conf/schema.xml~ new file mode 100644 index 000000000..3b2addf78 --- /dev/null +++ b/solr/conf/schema.xml~ @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="UTF-8"?> +<schema name="default-config" version="1.6"> + <uniqueKey>id</uniqueKey> + <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/> + <fieldType name="text" class="solr.TextField"> + <analyzer type="index"> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.StopFilterFactory" words="stopwords.txt"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.PorterStemFilterFactory"/> + <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="12"/> + </analyzer> + <analyzer type="query"> + <tokenizer class="solr.StandardTokenizerFactory"/> + <filter class="solr.StopFilterFactory" words="stopwords.txt"/> + <filter class="solr.LowerCaseFilterFactory"/> + <filter class="solr.PorterStemFilterFactory"/> + </analyzer> + </fieldType> + <field name="data" type="text" indexed="true" stored="true" uninvertible="false"/> + <field name="id" type="string" indexed="true" stored="true" uninvertible="false" required="true"/> + <field name="_version_" type="long" indexed="false" stored="false" multiValued="false"/> +</schema> diff --git a/solr/conf/solrconfig.xml b/solr/conf/solrconfig.xml new file mode 100644 index 000000000..90eff5363 --- /dev/null +++ b/solr/conf/solrconfig.xml @@ -0,0 +1,1328 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + For more details about configurations options that may appear in + this file, see http://wiki.apache.org/solr/SolrConfigXml. +--> +<config> + <!-- In all configuration below, a prefix of "solr." for class names + is an alias that causes solr to search appropriate packages, + including org.apache.solr.(search|update|request|core|analysis) + + You may also specify a fully qualified Java classname if you + have your own custom plugins. + --> + + <!-- Controls what version of Lucene various components of Solr + adhere to. Generally, you want to use the latest version to + get all bug fixes and improvements. It is highly recommended + that you fully re-index after changing this setting as it can + affect both how text is indexed and queried. + --> + <luceneMatchVersion>8.0.0</luceneMatchVersion> + + <!-- <lib/> directives can be used to instruct Solr to load any Jars + identified and use them to resolve any "plugins" specified in + your solrconfig.xml or schema.xml (ie: Analyzers, Request + Handlers, etc...). + + All directories and paths are resolved relative to the + instanceDir. + + Please note that <lib/> directives are processed in the order + that they appear in your solrconfig.xml file, and are "stacked" + on top of each other when building a ClassLoader - so if you have + plugin jars with dependencies on other jars, the "lower level" + dependency jars should be loaded first. + + If a "./lib" directory exists in your instanceDir, all files + found in it are included as if you had used the following + syntax... + + <lib dir="./lib" /> + --> + + <!-- A 'dir' option by itself adds any files found in the directory + to the classpath, this is useful for including all jars in a + directory. + + When a 'regex' is specified in addition to a 'dir', only the + files in that directory which completely match the regex + (anchored on both ends) will be included. + + If a 'dir' option (with or without a regex) is used and nothing + is found that matches, a warning will be logged. + + The examples below can be used to load some solr-contribs along + with their external dependencies. + --> + <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" /> + + <lib dir="${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" /> + + <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" /> + + <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-ltr-\d.*\.jar" /> + + <!-- an exact 'path' can be used instead of a 'dir' to specify a + specific jar file. This will cause a serious error to be logged + if it can't be loaded. + --> + <!-- + <lib path="../a-jar-that-does-not-exist.jar" /> + --> + + <!-- Data Directory + + Used to specify an alternate directory to hold all index data + other than the default ./data under the Solr home. If + replication is in use, this should match the replication + configuration. + --> + <dataDir>${solr.data.dir:}</dataDir> + + + <!-- The DirectoryFactory to use for indexes. + + solr.StandardDirectoryFactory is filesystem + based and tries to pick the best implementation for the current + JVM and platform. solr.NRTCachingDirectoryFactory, the default, + wraps solr.StandardDirectoryFactory and caches small files in memory + for better NRT performance. + + One can force a particular implementation via solr.MMapDirectoryFactory, + solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. + + solr.RAMDirectoryFactory is memory based and not persistent. + --> + <directoryFactory name="DirectoryFactory" + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> + + <schemaFactory class="ClassicIndexSchemaFactory"/> + <!-- The CodecFactory for defining the format of the inverted index. + The default implementation is SchemaCodecFactory, which is the official Lucene + index format, but hooks into the schema to provide per-field customization of + the postings lists and per-document values in the fieldType element + (postingsFormat/docValuesFormat). Note that most of the alternative implementations + are experimental, so if you choose to customize the index format, it's a good + idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader) + before upgrading to a newer version to avoid unnecessary reindexing. + A "compressionMode" string element can be added to <codecFactory> to choose + between the existing compression modes in the default codec: "BEST_SPEED" (default) + or "BEST_COMPRESSION". + --> + <codecFactory class="solr.SchemaCodecFactory"/> + + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Index Config - These settings control low-level behavior of indexing + Most example settings here show the default value, but are commented + out, to more easily see where customizations have been made. + + Note: This replaces <indexDefaults> and <mainIndex> from older versions + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> + <indexConfig> + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a + LimitTokenCountFilterFactory in your fieldType definition. E.g. + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> + --> + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> + <!-- <writeLockTimeout>1000</writeLockTimeout> --> + + <!-- Expert: Enabling compound file will use less files for the index, + using fewer file descriptors on the expense of performance decrease. + Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> + <!-- <useCompoundFile>false</useCompoundFile> --> + + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene + indexing for buffering added documents and deletions before they are + flushed to the Directory. + maxBufferedDocs sets a limit on the number of documents buffered + before flushing. + If both ramBufferSizeMB and maxBufferedDocs is set, then + Lucene will flush based on whichever limit is hit first. --> + <!-- <ramBufferSizeMB>100</ramBufferSizeMB> --> + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> + + <!-- Expert: Merge Policy + The Merge Policy in Lucene controls how merging of segments is done. + The default since Solr/Lucene 3.3 is TieredMergePolicy. + The default since Lucene 2.3 was the LogByteSizeMergePolicy, + Even older versions of Lucene used LogDocMergePolicy. + --> + <!-- + <mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory"> + <int name="maxMergeAtOnce">10</int> + <int name="segmentsPerTier">10</int> + <double name="noCFSRatio">0.1</double> + </mergePolicyFactory> + --> + + <!-- Expert: Merge Scheduler + The Merge Scheduler in Lucene controls how merges are + performed. The ConcurrentMergeScheduler (Lucene 2.3 default) + can perform merges in the background using separate threads. + The SerialMergeScheduler (Lucene 2.2 default) does not. + --> + <!-- + <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> + --> + + <!-- LockFactory + + This option specifies which Lucene LockFactory implementation + to use. + + single = SingleInstanceLockFactory - suggested for a + read-only index or when there is no possibility of + another process trying to modify the index. + native = NativeFSLockFactory - uses OS native file locking. + Do not use when multiple solr webapps in the same + JVM are attempting to share a single index. + simple = SimpleFSLockFactory - uses a plain file for locking + + Defaults: 'native' is default for Solr3.6 and later, otherwise + 'simple' is the default + + More details on the nuances of each LockFactory... + http://wiki.apache.org/lucene-java/AvailableLockFactories + --> + <lockType>${solr.lock.type:native}</lockType> + + <!-- Commit Deletion Policy + Custom deletion policies can be specified here. The class must + implement org.apache.lucene.index.IndexDeletionPolicy. + + The default Solr IndexDeletionPolicy implementation supports + deleting index commit points on number of commits, age of + commit point and optimized status. + + The latest commit point should always be preserved regardless + of the criteria. + --> + <!-- + <deletionPolicy class="solr.SolrDeletionPolicy"> + --> + <!-- The number of commit points to be kept --> + <!-- <str name="maxCommitsToKeep">1</str> --> + <!-- The number of optimized commit points to be kept --> + <!-- <str name="maxOptimizedCommitsToKeep">0</str> --> + <!-- + Delete all commit points once they have reached the given age. + Supports DateMathParser syntax e.g. + --> + <!-- + <str name="maxCommitAge">30MINUTES</str> + <str name="maxCommitAge">1DAY</str> + --> + <!-- + </deletionPolicy> + --> + + <!-- Lucene Infostream + + To aid in advanced debugging, Lucene provides an "InfoStream" + of detailed information when indexing. + + Setting The value to true will instruct the underlying Lucene + IndexWriter to write its debugging info the specified file + --> + <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> + </indexConfig> + + + <!-- JMX + + This example enables JMX if and only if an existing MBeanServer + is found, use this if you want to configure JMX through JVM + parameters. Remove this to disable exposing Solr configuration + and statistics to JMX. + + For more details see http://wiki.apache.org/solr/SolrJmx + --> + <jmx /> + <!-- If you want to connect to a particular server, specify the + agentId + --> + <!-- <jmx agentId="myAgent" /> --> + <!-- If you want to start a new MBeanServer, specify the serviceUrl --> + <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> + --> + + <!-- The default high-performance update handler --> + <updateHandler class="solr.DirectUpdateHandler2"> + + <!-- Enables a transaction log, used for real-time get, durability, and + and solr cloud replica recovery. The log can grow as big as + uncommitted changes to the index, so use of a hard autoCommit + is recommended (see below). + "dir" - the target directory for transaction logs, defaults to the + solr data directory. + "numVersionBuckets" - sets the number of buckets used to keep + track of max version values when checking for re-ordered + updates; increase this value to reduce the cost of + synchronizing access to version buckets during high-volume + indexing, this requires 8 bytes (long) * numVersionBuckets + of heap space per Solr core. + --> + <updateLog> + <str name="dir">${solr.ulog.dir:}</str> + <int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int> + </updateLog> + + <!-- AutoCommit + + Perform a hard commit automatically under certain conditions. + Instead of enabling autoCommit, consider using "commitWithin" + when adding documents. + + http://wiki.apache.org/solr/UpdateXmlMessages + + maxDocs - Maximum number of documents to add since the last + commit before automatically triggering a new commit. + + maxTime - Maximum amount of time in ms that is allowed to pass + since a document was added before automatically + triggering a new commit. + openSearcher - if false, the commit causes recent index changes + to be flushed to stable storage, but does not cause a new + searcher to be opened to make those changes visible. + + If the updateLog is enabled, then it's highly recommended to + have some sort of hard autoCommit to limit the log size. + --> + <autoCommit> + <maxTime>600000</maxTime> + <openSearcher>false</openSearcher> + </autoCommit> + + <!-- softAutoCommit is like autoCommit except it causes a + 'soft' commit which only ensures that changes are visible + but does not ensure that data is synced to disk. This is + faster and more near-realtime friendly than a hard commit. + --> + + <autoSoftCommit> + <maxTime>1000</maxTime> + </autoSoftCommit> + + <!-- Update Related Event Listeners + + Various IndexWriter related events can trigger Listeners to + take actions. + + postCommit - fired after every commit or optimize command + postOptimize - fired after every optimize command + --> + + </updateHandler> + + <!-- IndexReaderFactory + + Use the following format to specify a custom IndexReaderFactory, + which allows for alternate IndexReader implementations. + + ** Experimental Feature ** + + Please note - Using a custom IndexReaderFactory may prevent + certain other features from working. The API to + IndexReaderFactory may change without warning or may even be + removed from future releases if the problems cannot be + resolved. + + + ** Features that may not work with custom IndexReaderFactory ** + + The ReplicationHandler assumes a disk-resident index. Using a + custom IndexReader implementation may cause incompatibility + with ReplicationHandler and may cause replication to not work + correctly. See SOLR-1366 for details. + + --> + <!-- + <indexReaderFactory name="IndexReaderFactory" class="package.class"> + <str name="someArg">Some Value</str> + </indexReaderFactory > + --> + + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Query section - these settings control query time things like caches + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> + <query> + + <!-- Maximum number of clauses in each BooleanQuery, an exception + is thrown if exceeded. It is safe to increase or remove this setting, + since it is purely an arbitrary limit to try and catch user errors where + large boolean queries may not be the best implementation choice. + --> + <maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses> + + <!-- Solr Internal Query Caches + + There are two implementations of cache available for Solr, + LRUCache, based on a synchronized LinkedHashMap, and + FastLRUCache, based on a ConcurrentHashMap. + + FastLRUCache has faster gets and slower puts in single + threaded operation and thus is generally faster than LRUCache + when the hit ratio of the cache is high (> 75%), and may be + faster under other scenarios on multi-cpu systems. + --> + + <!-- Filter Cache + + Cache used by SolrIndexSearcher for filters (DocSets), + unordered sets of *all* documents that match a query. When a + new searcher is opened, its caches may be prepopulated or + "autowarmed" using data from caches in the old searcher. + autowarmCount is the number of items to prepopulate. For + LRUCache, the autowarmed items will be the most recently + accessed items. + + Parameters: + class - the SolrCache implementation LRUCache or + (LRUCache or FastLRUCache) + size - the maximum number of entries in the cache + initialSize - the initial capacity (number of entries) of + the cache. (see java.util.HashMap) + autowarmCount - the number of entries to prepopulate from + and old cache. + maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed + to occupy. Note that when this option is specified, the size + and initialSize parameters are ignored. + --> + <filterCache class="solr.FastLRUCache" + size="512" + initialSize="512" + autowarmCount="0"/> + + <!-- Query Result Cache + + Caches results of searches - ordered lists of document ids + (DocList) based on a query, a sort, and the range of documents requested. + Additional supported parameter by LRUCache: + maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed + to occupy + --> + <queryResultCache class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="0"/> + + <!-- Document Cache + + Caches Lucene Document objects (the stored fields for each + document). Since Lucene internal document ids are transient, + this cache will not be autowarmed. + --> + <documentCache class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="0"/> + + <!-- custom cache currently used by block join --> + <cache name="perSegFilter" + class="solr.search.LRUCache" + size="10" + initialSize="0" + autowarmCount="10" + regenerator="solr.NoOpRegenerator" /> + + <!-- Field Value Cache + + Cache used to hold field values that are quickly accessible + by document id. The fieldValueCache is created by default + even if not configured here. + --> + <!-- + <fieldValueCache class="solr.FastLRUCache" + size="512" + autowarmCount="128" + showItems="32" /> + --> + + <!-- Custom Cache + + Example of a generic cache. These caches may be accessed by + name through SolrIndexSearcher.getCache(),cacheLookup(), and + cacheInsert(). The purpose is to enable easy caching of + user/application level data. The regenerator argument should + be specified as an implementation of solr.CacheRegenerator + if autowarming is desired. + --> + <!-- + <cache name="myUserCache" + class="solr.LRUCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="com.mycompany.MyRegenerator" + /> + --> + + + <!-- Lazy Field Loading + + If true, stored fields that are not requested will be loaded + lazily. This can result in a significant speed improvement + if the usual case is to not load all stored fields, + especially if the skipped fields are large compressed text + fields. + --> + <enableLazyFieldLoading>true</enableLazyFieldLoading> + + <!-- Use Filter For Sorted Query + + A possible optimization that attempts to use a filter to + satisfy a search. If the requested sort does not include + score, then the filterCache will be checked for a filter + matching the query. If found, the filter will be used as the + source of document ids, and then the sort will be applied to + that. + + For most situations, this will not be useful unless you + frequently get the same search repeatedly with different sort + options, and none of them ever use "score" + --> + <!-- + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <!-- Result Window Size + + An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. + --> + <queryResultWindowSize>20</queryResultWindowSize> + + <!-- Maximum number of documents to cache for any entry in the + queryResultCache. + --> + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + + <!-- Query Related Event Listeners + + Various IndexSearcher related events can trigger Listeners to + take actions. + + newSearcher - fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka + registered). It can be used to prime certain caches to + prevent long request times for certain requests. + + firstSearcher - fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. + + + --> + <!-- QuerySenderListener takes an array of NamedList and executes a + local query request for each NamedList in sequence. + --> + <listener event="newSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <!-- + <lst><str name="q">solr</str><str name="sort">price asc</str></lst> + <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst> + --> + </arr> + </listener> + <listener event="firstSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <!-- + <lst> + <str name="q">static firstSearcher warming in solrconfig.xml</str> + </lst> + --> + </arr> + </listener> + + <!-- Use Cold Searcher + + If a search request comes in and there is no current + registered searcher, then immediately register the still + warming searcher and use it. If "false" then all requests + will block until the first searcher is done warming. + --> + <useColdSearcher>false</useColdSearcher> + + </query> + + + <!-- Request Dispatcher + + This section contains instructions for how the SolrDispatchFilter + should behave when processing requests for this SolrCore. + + --> + <requestDispatcher> + <!-- Request Parsing + + These settings indicate how Solr Requests may be parsed, and + what restrictions may be placed on the ContentStreams from + those requests + + enableRemoteStreaming - enables use of the stream.file + and stream.url parameters for specifying remote streams. + + multipartUploadLimitInKB - specifies the max size (in KiB) of + Multipart File Uploads that Solr will allow in a Request. + + formdataUploadLimitInKB - specifies the max size (in KiB) of + form data (application/x-www-form-urlencoded) sent via + POST. You can use POST to pass request parameters not + fitting into the URL. + + addHttpRequestToContext - if set to true, it will instruct + the requestParsers to include the original HttpServletRequest + object in the context map of the SolrQueryRequest under the + key "httpRequest". It will not be used by any of the existing + Solr components, but may be useful when developing custom + plugins. + + *** WARNING *** + Before enabling remote streaming, you should make sure your + system has authentication enabled. + + <requestParsers enableRemoteStreaming="false" + multipartUploadLimitInKB="-1" + formdataUploadLimitInKB="-1" + addHttpRequestToContext="false"/> + --> + + <!-- HTTP Caching + + Set HTTP caching related parameters (for proxy caches and clients). + + The options below instruct Solr not to output any HTTP Caching + related headers + --> + <httpCaching never304="true" /> + <!-- If you include a <cacheControl> directive, it will be used to + generate a Cache-Control header (as well as an Expires header + if the value contains "max-age=") + + By default, no Cache-Control header is generated. + + You can use the <cacheControl> option even if you have set + never304="true" + --> + <!-- + <httpCaching never304="true" > + <cacheControl>max-age=30, public</cacheControl> + </httpCaching> + --> + <!-- To enable Solr to respond with automatically generated HTTP + Caching headers, and to response to Cache Validation requests + correctly, set the value of never304="false" + + This will cause Solr to generate Last-Modified and ETag + headers based on the properties of the Index. + + The following options can also be specified to affect the + values of these headers... + + lastModFrom - the default value is "openTime" which means the + Last-Modified value (and validation against If-Modified-Since + requests) will all be relative to when the current Searcher + was opened. You can change it to lastModFrom="dirLastMod" if + you want the value to exactly correspond to when the physical + index was last modified. + + etagSeed="..." is an option you can change to force the ETag + header (and validation against If-None-Match requests) to be + different even if the index has not changed (ie: when making + significant changes to your config file) + + (lastModifiedFrom and etagSeed are both ignored if you use + the never304="true" option) + --> + <!-- + <httpCaching lastModifiedFrom="openTime" + etagSeed="Solr"> + <cacheControl>max-age=30, public</cacheControl> + </httpCaching> + --> + </requestDispatcher> + + <!-- Request Handlers + + http://wiki.apache.org/solr/SolrRequestHandler + + Incoming queries will be dispatched to a specific handler by name + based on the path specified in the request. + + If a Request Handler is declared with startup="lazy", then it will + not be initialized until the first request that uses it. + + --> + <!-- SearchHandler + + http://wiki.apache.org/solr/SearchHandler + + For processing Search Queries, the primary Request Handler + provided with Solr is "SearchHandler" It delegates to a sequent + of SearchComponents (see below) and supports distributed + queries across multiple shards + --> + <requestHandler name="/select" class="solr.SearchHandler"> + <!-- default values for query parameters can be specified, these + will be overridden by parameters in the request + --> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <int name="rows">10</int> + <str name="df">text</str> + <!-- Default search field + <str name="df">text</str> + --> + <!-- Change from JSON to XML format (the default prior to Solr 7.0) + <str name="wt">xml</str> + --> + </lst> + <!-- In addition to defaults, "appends" params can be specified + to identify values which should be appended to the list of + multi-val params from the query (or the existing "defaults"). + --> + <!-- In this example, the param "fq=instock:true" would be appended to + any query time fq params the user may specify, as a mechanism for + partitioning the index, independent of any user selected filtering + that may also be desired (perhaps as a result of faceted searching). + + NOTE: there is *absolutely* nothing a client can do to prevent these + "appends" values from being used, so don't use this mechanism + unless you are sure you always want it. + --> + <!-- + <lst name="appends"> + <str name="fq">inStock:true</str> + </lst> + --> + <!-- "invariants" are a way of letting the Solr maintainer lock down + the options available to Solr clients. Any params values + specified here are used regardless of what values may be specified + in either the query, the "defaults", or the "appends" params. + + In this example, the facet.field and facet.query params would + be fixed, limiting the facets clients can use. Faceting is + not turned on by default - but if the client does specify + facet=true in the request, these are the only facets they + will be able to see counts for; regardless of what other + facet.field or facet.query params they may specify. + + NOTE: there is *absolutely* nothing a client can do to prevent these + "invariants" values from being used, so don't use this mechanism + unless you are sure you always want it. + --> + <!-- + <lst name="invariants"> + <str name="facet.field">cat</str> + <str name="facet.field">manu_exact</str> + <str name="facet.query">price:[* TO 500]</str> + <str name="facet.query">price:[500 TO *]</str> + </lst> + --> + <!-- If the default list of SearchComponents is not desired, that + list can either be overridden completely, or components can be + prepended or appended to the default list. (see below) + --> + <!-- + <arr name="components"> + <str>nameOfCustomComponent1</str> + <str>nameOfCustomComponent2</str> + </arr> + --> + </requestHandler> + + <!-- A request handler that returns indented JSON by default --> + <requestHandler name="/query" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <str name="wt">json</str> + <str name="indent">true</str> + </lst> + </requestHandler> + + + <!-- A Robust Example + + This example SearchHandler declaration shows off usage of the + SearchHandler with many defaults declared + + Note that multiple instances of the same Request Handler + (SearchHandler) can be registered multiple times with different + names (and different init parameters) + --> + <requestHandler name="/browse" class="solr.SearchHandler" useParams="query,facets,velocity,browse"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + </requestHandler> + + <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse"> + <lst name="defaults"> + <str name="df">_text_</str> + </lst> + </initParams> + + <!-- Solr Cell Update Request Handler + + http://wiki.apache.org/solr/ExtractingRequestHandler + + --> + <requestHandler name="/update/extract" + startup="lazy" + class="solr.extraction.ExtractingRequestHandler" > + <lst name="defaults"> + <str name="lowernames">true</str> + <str name="fmap.content">_text_</str> + </lst> + </requestHandler> + + <!-- Search Components + + Search components are registered to SolrCore and used by + instances of SearchHandler (which can access them by name) + + By default, the following components are available: + + <searchComponent name="query" class="solr.QueryComponent" /> + <searchComponent name="facet" class="solr.FacetComponent" /> + <searchComponent name="mlt" class="solr.MoreLikeThisComponent" /> + <searchComponent name="highlight" class="solr.HighlightComponent" /> + <searchComponent name="stats" class="solr.StatsComponent" /> + <searchComponent name="debug" class="solr.DebugComponent" /> + + Default configuration in a requestHandler would look like: + + <arr name="components"> + <str>query</str> + <str>facet</str> + <str>mlt</str> + <str>highlight</str> + <str>stats</str> + <str>debug</str> + </arr> + + If you register a searchComponent to one of the standard names, + that will be used instead of the default. + + To insert components before or after the 'standard' components, use: + + <arr name="first-components"> + <str>myFirstComponentName</str> + </arr> + + <arr name="last-components"> + <str>myLastComponentName</str> + </arr> + + NOTE: The component registered with the name "debug" will + always be executed after the "last-components" + + --> + + <!-- Spell Check + + The spell check component can return a list of alternative spelling + suggestions. + + http://wiki.apache.org/solr/SpellCheckComponent + --> + <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> + + <str name="queryAnalyzerFieldType">text_general</str> + + <!-- Multiple "Spell Checkers" can be declared and used by this + component + --> + + <!-- a spellchecker built from a field of the main index --> + <lst name="spellchecker"> + <str name="name">default</str> + <str name="field">_text_</str> + <str name="classname">solr.DirectSolrSpellChecker</str> + <!-- the spellcheck distance measure used, the default is the internal levenshtein --> + <str name="distanceMeasure">internal</str> + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> + <float name="accuracy">0.5</float> + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> + <int name="maxEdits">2</int> + <!-- the minimum shared prefix when enumerating terms --> + <int name="minPrefix">1</int> + <!-- maximum number of inspections per result. --> + <int name="maxInspections">5</int> + <!-- minimum length of a query term to be considered for correction --> + <int name="minQueryLength">4</int> + <!-- maximum threshold of documents a query term can appear to be considered for correction --> + <float name="maxQueryFrequency">0.01</float> + <!-- uncomment this to require suggestions to occur in 1% of the documents + <float name="thresholdTokenFrequency">.01</float> + --> + </lst> + + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> + <!-- + <lst name="spellchecker"> + <str name="name">wordbreak</str> + <str name="classname">solr.WordBreakSolrSpellChecker</str> + <str name="field">name</str> + <str name="combineWords">true</str> + <str name="breakWords">true</str> + <int name="maxChanges">10</int> + </lst> + --> + </searchComponent> + + <!-- A request handler for demonstrating the spellcheck component. + + NOTE: This is purely as an example. The whole purpose of the + SpellCheckComponent is to hook it into the request handler that + handles your normal user queries so that a separate request is + not needed to get suggestions. + + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! + + See http://wiki.apache.org/solr/SpellCheckComponent for details + on the request parameters. + --> + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <!-- Solr will use suggestions from both the 'default' spellchecker + and from the 'wordbreak' spellchecker and combine them. + collations (re-written queries) can include a combination of + corrections from both spellcheckers --> + <str name="spellcheck.dictionary">default</str> + <str name="spellcheck">on</str> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.count">10</str> + <str name="spellcheck.alternativeTermCount">5</str> + <str name="spellcheck.maxResultsForSuggest">5</str> + <str name="spellcheck.collate">true</str> + <str name="spellcheck.collateExtendedResults">true</str> + <str name="spellcheck.maxCollationTries">10</str> + <str name="spellcheck.maxCollations">5</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <!-- Term Vector Component + + http://wiki.apache.org/solr/TermVectorComponent + --> + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> + + <!-- A request handler for demonstrating the term vector component + + This is purely as an example. + + In reality you will likely want to add the component to your + already specified request handlers. + --> + <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <bool name="tv">true</bool> + </lst> + <arr name="last-components"> + <str>tvComponent</str> + </arr> + </requestHandler> + + <!-- Clustering Component. (Omitted here. See the default Solr example for a typical configuration.) --> + + <!-- Terms Component + + http://wiki.apache.org/solr/TermsComponent + + A component to return terms and document frequency of those + terms + --> + <searchComponent name="terms" class="solr.TermsComponent"/> + + <!-- A request handler for demonstrating the terms component --> + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <bool name="terms">true</bool> + <bool name="distrib">false</bool> + </lst> + <arr name="components"> + <str>terms</str> + </arr> + </requestHandler> + + + <!-- Query Elevation Component + + http://wiki.apache.org/solr/QueryElevationComponent + + a search component that enables you to configure the top + results for a given query regardless of the normal lucene + scoring. + --> + <searchComponent name="elevator" class="solr.QueryElevationComponent" > + <!-- pick a fieldType to analyze queries --> + <str name="queryFieldType">string</str> + </searchComponent> + + <!-- A request handler for demonstrating the elevator component --> + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + <arr name="last-components"> + <str>elevator</str> + </arr> + </requestHandler> + + <!-- Highlighting Component + + http://wiki.apache.org/solr/HighlightingParameters + --> + <searchComponent class="solr.HighlightComponent" name="highlight"> + <highlighting> + <!-- Configure the standard fragmenter --> + <!-- This could most likely be commented out in the "default" case --> + <fragmenter name="gap" + default="true" + class="solr.highlight.GapFragmenter"> + <lst name="defaults"> + <int name="hl.fragsize">100</int> + </lst> + </fragmenter> + + <!-- A regular-expression-based fragmenter + (for sentence extraction) + --> + <fragmenter name="regex" + class="solr.highlight.RegexFragmenter"> + <lst name="defaults"> + <!-- slightly smaller fragsizes work better because of slop --> + <int name="hl.fragsize">70</int> + <!-- allow 50% slop on fragment sizes --> + <float name="hl.regex.slop">0.5</float> + <!-- a basic sentence pattern --> + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> + </lst> + </fragmenter> + + <!-- Configure the standard formatter --> + <formatter name="html" + default="true" + class="solr.highlight.HtmlFormatter"> + <lst name="defaults"> + <str name="hl.simple.pre"><![CDATA[<em>]]></str> + <str name="hl.simple.post"><![CDATA[</em>]]></str> + </lst> + </formatter> + + <!-- Configure the standard encoder --> + <encoder name="html" + class="solr.highlight.HtmlEncoder" /> + + <!-- Configure the standard fragListBuilder --> + <fragListBuilder name="simple" + class="solr.highlight.SimpleFragListBuilder"/> + + <!-- Configure the single fragListBuilder --> + <fragListBuilder name="single" + class="solr.highlight.SingleFragListBuilder"/> + + <!-- Configure the weighted fragListBuilder --> + <fragListBuilder name="weighted" + default="true" + class="solr.highlight.WeightedFragListBuilder"/> + + <!-- default tag FragmentsBuilder --> + <fragmentsBuilder name="default" + default="true" + class="solr.highlight.ScoreOrderFragmentsBuilder"> + <!-- + <lst name="defaults"> + <str name="hl.multiValuedSeparatorChar">/</str> + </lst> + --> + </fragmentsBuilder> + + <!-- multi-colored tag FragmentsBuilder --> + <fragmentsBuilder name="colored" + class="solr.highlight.ScoreOrderFragmentsBuilder"> + <lst name="defaults"> + <str name="hl.tag.pre"><![CDATA[ + <b style="background:yellow">,<b style="background:lawgreen">, + <b style="background:aquamarine">,<b style="background:magenta">, + <b style="background:palegreen">,<b style="background:coral">, + <b style="background:wheat">,<b style="background:khaki">, + <b style="background:lime">,<b style="background:deepskyblue">]]></str> + <str name="hl.tag.post"><![CDATA[</b>]]></str> + </lst> + </fragmentsBuilder> + + <boundaryScanner name="default" + default="true" + class="solr.highlight.SimpleBoundaryScanner"> + <lst name="defaults"> + <str name="hl.bs.maxScan">10</str> + <str name="hl.bs.chars">.,!? 	 </str> + </lst> + </boundaryScanner> + + <boundaryScanner name="breakIterator" + class="solr.highlight.BreakIteratorBoundaryScanner"> + <lst name="defaults"> + <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE --> + <str name="hl.bs.type">WORD</str> + <!-- language and country are used when constructing Locale object. --> + <!-- And the Locale object will be used when getting instance of BreakIterator --> + <str name="hl.bs.language">en</str> + <str name="hl.bs.country">US</str> + </lst> + </boundaryScanner> + </highlighting> + </searchComponent> + + <!-- Update Processors + + Chains of Update Processor Factories for dealing with Update + Requests can be declared, and then used by name in Update + Request Processors + + http://wiki.apache.org/solr/UpdateRequestProcessor + + --> + + <!-- Add unknown fields to the schema + + Field type guessing update processors that will + attempt to parse string-typed field values as Booleans, Longs, + Doubles, or Dates, and then add schema fields with the guessed + field types. Text content will be indexed as "text_general" as + well as a copy to a plain string version in *_str. + + These require that the schema is both managed and mutable, by + declaring schemaFactory as ManagedIndexSchemaFactory, with + mutable specified as true. + + See http://wiki.apache.org/solr/GuessingFieldTypes + --> + <updateProcessor class="solr.UUIDUpdateProcessorFactory" name="uuid"/> + <updateProcessor class="solr.RemoveBlankFieldUpdateProcessorFactory" name="remove-blank"/> + <updateProcessor class="solr.FieldNameMutatingUpdateProcessorFactory" name="field-name-mutating"> + <str name="pattern">[^\w-\.]</str> + <str name="replacement">_</str> + </updateProcessor> + <updateProcessor class="solr.ParseBooleanFieldUpdateProcessorFactory" name="parse-boolean"/> + <updateProcessor class="solr.ParseLongFieldUpdateProcessorFactory" name="parse-long"/> + <updateProcessor class="solr.ParseDoubleFieldUpdateProcessorFactory" name="parse-double"/> + <updateProcessor class="solr.ParseDateFieldUpdateProcessorFactory" name="parse-date"> + <arr name="format"> + <str>yyyy-MM-dd['T'[HH:mm[:ss[.SSS]][z</str> + <str>yyyy-MM-dd['T'[HH:mm[:ss[,SSS]][z</str> + <str>yyyy-MM-dd HH:mm[:ss[.SSS]][z</str> + <str>yyyy-MM-dd HH:mm[:ss[,SSS]][z</str> + <str>[EEE, ]dd MMM yyyy HH:mm[:ss] z</str> + <str>EEEE, dd-MMM-yy HH:mm:ss z</str> + <str>EEE MMM ppd HH:mm:ss [z ]yyyy</str> + </arr> + </updateProcessor> + + <!-- The update.autoCreateFields property can be turned to false to disable schemaless mode --> + <updateRequestProcessorChain name="add-unknown-fields-to-the-schema" default="${update.autoCreateFields:false}" + processor="uuid,remove-blank,field-name-mutating,parse-boolean,parse-long,parse-double,parse-date"> + <processor class="solr.LogUpdateProcessorFactory"/> + <processor class="solr.DistributedUpdateProcessorFactory"/> + <processor class="solr.RunUpdateProcessorFactory"/> + </updateRequestProcessorChain> + + <!-- Deduplication + + An example dedup update processor that creates the "id" field + on the fly based on the hash code of some other fields. This + example has overwriteDupes set to false since we are using the + id field as the signatureField and Solr will maintain + uniqueness based on that anyway. + + --> + <!-- + <updateRequestProcessorChain name="dedupe"> + <processor class="solr.processor.SignatureUpdateProcessorFactory"> + <bool name="enabled">true</bool> + <str name="signatureField">id</str> + <bool name="overwriteDupes">false</bool> + <str name="fields">name,features,cat</str> + <str name="signatureClass">solr.processor.Lookup3Signature</str> + </processor> + <processor class="solr.LogUpdateProcessorFactory" /> + <processor class="solr.RunUpdateProcessorFactory" /> + </updateRequestProcessorChain> + --> + + <!-- Language identification + + This example update chain identifies the language of the incoming + documents using the langid contrib. The detected language is + written to field language_s. No field name mapping is done. + The fields used for detection are text, title, subject and description, + making this example suitable for detecting languages form full-text + rich documents injected via ExtractingRequestHandler. + See more about langId at http://wiki.apache.org/solr/LanguageDetection + --> + <!-- + <updateRequestProcessorChain name="langid"> + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory"> + <str name="langid.fl">text,title,subject,description</str> + <str name="langid.langField">language_s</str> + <str name="langid.fallback">en</str> + </processor> + <processor class="solr.LogUpdateProcessorFactory" /> + <processor class="solr.RunUpdateProcessorFactory" /> + </updateRequestProcessorChain> + --> + + <!-- Script update processor + + This example hooks in an update processor implemented using JavaScript. + + See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor + --> + <!-- + <updateRequestProcessorChain name="script"> + <processor class="solr.StatelessScriptUpdateProcessorFactory"> + <str name="script">update-script.js</str> + <lst name="params"> + <str name="config_param">example config parameter</str> + </lst> + </processor> + <processor class="solr.RunUpdateProcessorFactory" /> + </updateRequestProcessorChain> + --> + + <!-- Response Writers + + http://wiki.apache.org/solr/QueryResponseWriter + + Request responses will be written using the writer specified by + the 'wt' request parameter matching the name of a registered + writer. + + The "default" writer is the default and will be used if 'wt' is + not specified in the request. + --> + <!-- The following response writers are implicitly configured unless + overridden... + --> + <!-- + <queryResponseWriter name="xml" + default="true" + class="solr.XMLResponseWriter" /> + <queryResponseWriter name="json" class="solr.JSONResponseWriter"/> + <queryResponseWriter name="python" class="solr.PythonResponseWriter"/> + <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> + <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> + <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> + <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> + <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/> + --> + + <queryResponseWriter name="json" class="solr.JSONResponseWriter"> + <!-- For the purposes of the tutorial, JSON responses are written as + plain text so that they are easy to read in *any* browser. + If you expect a MIME type of "application/json" just remove this override. + --> + <str name="content-type">text/plain; charset=UTF-8</str> + </queryResponseWriter> + + <!-- + Custom response writers can be declared as needed... + --> + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"> + <str name="template.base.dir">${velocity.template.base.dir:}</str> + <str name="solr.resource.loader.enabled">${velocity.solr.resource.loader.enabled:true}</str> + <str name="params.resource.loader.enabled">${velocity.params.resource.loader.enabled:false}</str> + </queryResponseWriter> + + <!-- XSLT response writer transforms the XML output by any xslt file found + in Solr's conf/xslt directory. Changes to xslt files are checked for + every xsltCacheLifetimeSeconds. + --> + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> + <int name="xsltCacheLifetimeSeconds">5</int> + </queryResponseWriter> + + <!-- Query Parsers + + https://lucene.apache.org/solr/guide/query-syntax-and-parsing.html + + Multiple QParserPlugins can be registered by name, and then + used in either the "defType" param for the QueryComponent (used + by SearchHandler) or in LocalParams + --> + <!-- example of registering a query parser --> + <!-- + <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/> + --> + + <!-- Function Parsers + + http://wiki.apache.org/solr/FunctionQuery + + Multiple ValueSourceParsers can be registered by name, and then + used as function names when using the "func" QParser. + --> + <!-- example of registering a custom function parser --> + <!-- + <valueSourceParser name="myfunc" + class="com.mycompany.MyValueSourceParser" /> + --> + + + <!-- Document Transformers + http://wiki.apache.org/solr/DocTransformers + --> + <!-- + Could be something like: + <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" > + <int name="connection">jdbc://....</int> + </transformer> + + To add a constant value to all docs, use: + <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" > + <int name="value">5</int> + </transformer> + + If you want the user to still be able to change it with _value:something_ use this: + <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" > + <double name="defaultValue">5</double> + </transformer> + + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The + EditorialMarkerFactory will do exactly that: + <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" /> + --> +</config> diff --git a/solr/conf/solrconfig.xml~ b/solr/conf/solrconfig.xml~ new file mode 100644 index 000000000..5854cf2d9 --- /dev/null +++ b/solr/conf/solrconfig.xml~ @@ -0,0 +1,1358 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + For more details about configurations options that may appear in + this file, see http://wiki.apache.org/solr/SolrConfigXml. +--> +<config> + <!-- In all configuration below, a prefix of "solr." for class names + is an alias that causes solr to search appropriate packages, + including org.apache.solr.(search|update|request|core|analysis) + + You may also specify a fully qualified Java classname if you + have your own custom plugins. + --> + + <!-- Controls what version of Lucene various components of Solr + adhere to. Generally, you want to use the latest version to + get all bug fixes and improvements. It is highly recommended + that you fully re-index after changing this setting as it can + affect both how text is indexed and queried. + --> + <luceneMatchVersion>8.0.0</luceneMatchVersion> + + <!-- <lib/> directives can be used to instruct Solr to load any Jars + identified and use them to resolve any "plugins" specified in + your solrconfig.xml or schema.xml (ie: Analyzers, Request + Handlers, etc...). + + All directories and paths are resolved relative to the + instanceDir. + + Please note that <lib/> directives are processed in the order + that they appear in your solrconfig.xml file, and are "stacked" + on top of each other when building a ClassLoader - so if you have + plugin jars with dependencies on other jars, the "lower level" + dependency jars should be loaded first. + + If a "./lib" directory exists in your instanceDir, all files + found in it are included as if you had used the following + syntax... + + <lib dir="./lib" /> + --> + + <!-- A 'dir' option by itself adds any files found in the directory + to the classpath, this is useful for including all jars in a + directory. + + When a 'regex' is specified in addition to a 'dir', only the + files in that directory which completely match the regex + (anchored on both ends) will be included. + + If a 'dir' option (with or without a regex) is used and nothing + is found that matches, a warning will be logged. + + The examples below can be used to load some solr-contribs along + with their external dependencies. + --> + <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" /> + + <lib dir="${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" /> + + <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" /> + + <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" /> + <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-ltr-\d.*\.jar" /> + + <!-- an exact 'path' can be used instead of a 'dir' to specify a + specific jar file. This will cause a serious error to be logged + if it can't be loaded. + --> + <!-- + <lib path="../a-jar-that-does-not-exist.jar" /> + --> + + <!-- Data Directory + + Used to specify an alternate directory to hold all index data + other than the default ./data under the Solr home. If + replication is in use, this should match the replication + configuration. + --> + <dataDir>${solr.data.dir:}</dataDir> + + + <!-- The DirectoryFactory to use for indexes. + + solr.StandardDirectoryFactory is filesystem + based and tries to pick the best implementation for the current + JVM and platform. solr.NRTCachingDirectoryFactory, the default, + wraps solr.StandardDirectoryFactory and caches small files in memory + for better NRT performance. + + One can force a particular implementation via solr.MMapDirectoryFactory, + solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory. + + solr.RAMDirectoryFactory is memory based and not persistent. + --> + <directoryFactory name="DirectoryFactory" + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> + + <schemaFactory class="ClassicIndexSchemaFactory"/> + <!-- The CodecFactory for defining the format of the inverted index. + The default implementation is SchemaCodecFactory, which is the official Lucene + index format, but hooks into the schema to provide per-field customization of + the postings lists and per-document values in the fieldType element + (postingsFormat/docValuesFormat). Note that most of the alternative implementations + are experimental, so if you choose to customize the index format, it's a good + idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader) + before upgrading to a newer version to avoid unnecessary reindexing. + A "compressionMode" string element can be added to <codecFactory> to choose + between the existing compression modes in the default codec: "BEST_SPEED" (default) + or "BEST_COMPRESSION". + --> + <codecFactory class="solr.SchemaCodecFactory"/> + + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Index Config - These settings control low-level behavior of indexing + Most example settings here show the default value, but are commented + out, to more easily see where customizations have been made. + + Note: This replaces <indexDefaults> and <mainIndex> from older versions + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> + <indexConfig> + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a + LimitTokenCountFilterFactory in your fieldType definition. E.g. + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> + --> + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> + <!-- <writeLockTimeout>1000</writeLockTimeout> --> + + <!-- Expert: Enabling compound file will use less files for the index, + using fewer file descriptors on the expense of performance decrease. + Default in Lucene is "true". Default in Solr is "false" (since 3.6) --> + <!-- <useCompoundFile>false</useCompoundFile> --> + + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene + indexing for buffering added documents and deletions before they are + flushed to the Directory. + maxBufferedDocs sets a limit on the number of documents buffered + before flushing. + If both ramBufferSizeMB and maxBufferedDocs is set, then + Lucene will flush based on whichever limit is hit first. --> + <!-- <ramBufferSizeMB>100</ramBufferSizeMB> --> + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> + + <!-- Expert: Merge Policy + The Merge Policy in Lucene controls how merging of segments is done. + The default since Solr/Lucene 3.3 is TieredMergePolicy. + The default since Lucene 2.3 was the LogByteSizeMergePolicy, + Even older versions of Lucene used LogDocMergePolicy. + --> + <!-- + <mergePolicyFactory class="org.apache.solr.index.TieredMergePolicyFactory"> + <int name="maxMergeAtOnce">10</int> + <int name="segmentsPerTier">10</int> + <double name="noCFSRatio">0.1</double> + </mergePolicyFactory> + --> + + <!-- Expert: Merge Scheduler + The Merge Scheduler in Lucene controls how merges are + performed. The ConcurrentMergeScheduler (Lucene 2.3 default) + can perform merges in the background using separate threads. + The SerialMergeScheduler (Lucene 2.2 default) does not. + --> + <!-- + <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> + --> + + <!-- LockFactory + + This option specifies which Lucene LockFactory implementation + to use. + + single = SingleInstanceLockFactory - suggested for a + read-only index or when there is no possibility of + another process trying to modify the index. + native = NativeFSLockFactory - uses OS native file locking. + Do not use when multiple solr webapps in the same + JVM are attempting to share a single index. + simple = SimpleFSLockFactory - uses a plain file for locking + + Defaults: 'native' is default for Solr3.6 and later, otherwise + 'simple' is the default + + More details on the nuances of each LockFactory... + http://wiki.apache.org/lucene-java/AvailableLockFactories + --> + <lockType>${solr.lock.type:native}</lockType> + + <!-- Commit Deletion Policy + Custom deletion policies can be specified here. The class must + implement org.apache.lucene.index.IndexDeletionPolicy. + + The default Solr IndexDeletionPolicy implementation supports + deleting index commit points on number of commits, age of + commit point and optimized status. + + The latest commit point should always be preserved regardless + of the criteria. + --> + <!-- + <deletionPolicy class="solr.SolrDeletionPolicy"> + --> + <!-- The number of commit points to be kept --> + <!-- <str name="maxCommitsToKeep">1</str> --> + <!-- The number of optimized commit points to be kept --> + <!-- <str name="maxOptimizedCommitsToKeep">0</str> --> + <!-- + Delete all commit points once they have reached the given age. + Supports DateMathParser syntax e.g. + --> + <!-- + <str name="maxCommitAge">30MINUTES</str> + <str name="maxCommitAge">1DAY</str> + --> + <!-- + </deletionPolicy> + --> + + <!-- Lucene Infostream + + To aid in advanced debugging, Lucene provides an "InfoStream" + of detailed information when indexing. + + Setting The value to true will instruct the underlying Lucene + IndexWriter to write its debugging info the specified file + --> + <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> --> + </indexConfig> + + + <!-- JMX + + This example enables JMX if and only if an existing MBeanServer + is found, use this if you want to configure JMX through JVM + parameters. Remove this to disable exposing Solr configuration + and statistics to JMX. + + For more details see http://wiki.apache.org/solr/SolrJmx + --> + <jmx /> + <!-- If you want to connect to a particular server, specify the + agentId + --> + <!-- <jmx agentId="myAgent" /> --> + <!-- If you want to start a new MBeanServer, specify the serviceUrl --> + <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> + --> + + <!-- The default high-performance update handler --> + <updateHandler class="solr.DirectUpdateHandler2"> + + <!-- Enables a transaction log, used for real-time get, durability, and + and solr cloud replica recovery. The log can grow as big as + uncommitted changes to the index, so use of a hard autoCommit + is recommended (see below). + "dir" - the target directory for transaction logs, defaults to the + solr data directory. + "numVersionBuckets" - sets the number of buckets used to keep + track of max version values when checking for re-ordered + updates; increase this value to reduce the cost of + synchronizing access to version buckets during high-volume + indexing, this requires 8 bytes (long) * numVersionBuckets + of heap space per Solr core. + --> + <updateLog> + <str name="dir">${solr.ulog.dir:}</str> + <int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int> + </updateLog> + + <!-- AutoCommit + + Perform a hard commit automatically under certain conditions. + Instead of enabling autoCommit, consider using "commitWithin" + when adding documents. + + http://wiki.apache.org/solr/UpdateXmlMessages + + maxDocs - Maximum number of documents to add since the last + commit before automatically triggering a new commit. + + maxTime - Maximum amount of time in ms that is allowed to pass + since a document was added before automatically + triggering a new commit. + openSearcher - if false, the commit causes recent index changes + to be flushed to stable storage, but does not cause a new + searcher to be opened to make those changes visible. + + If the updateLog is enabled, then it's highly recommended to + have some sort of hard autoCommit to limit the log size. + --> + <autoCommit> + <maxTime>600000</maxTime> + <openSearcher>false</openSearcher> + </autoCommit> + + <!-- softAutoCommit is like autoCommit except it causes a + 'soft' commit which only ensures that changes are visible + but does not ensure that data is synced to disk. This is + faster and more near-realtime friendly than a hard commit. + --> + + <autoSoftCommit> + <maxTime>1000</maxTime> + </autoSoftCommit> + + <!-- Update Related Event Listeners + + Various IndexWriter related events can trigger Listeners to + take actions. + + postCommit - fired after every commit or optimize command + postOptimize - fired after every optimize command + --> + + </updateHandler> + + <!-- IndexReaderFactory + + Use the following format to specify a custom IndexReaderFactory, + which allows for alternate IndexReader implementations. + + ** Experimental Feature ** + + Please note - Using a custom IndexReaderFactory may prevent + certain other features from working. The API to + IndexReaderFactory may change without warning or may even be + removed from future releases if the problems cannot be + resolved. + + + ** Features that may not work with custom IndexReaderFactory ** + + The ReplicationHandler assumes a disk-resident index. Using a + custom IndexReader implementation may cause incompatibility + with ReplicationHandler and may cause replication to not work + correctly. See SOLR-1366 for details. + + --> + <!-- + <indexReaderFactory name="IndexReaderFactory" class="package.class"> + <str name="someArg">Some Value</str> + </indexReaderFactory > + --> + + <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Query section - these settings control query time things like caches + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> + <query> + + <!-- Maximum number of clauses in each BooleanQuery, an exception + is thrown if exceeded. It is safe to increase or remove this setting, + since it is purely an arbitrary limit to try and catch user errors where + large boolean queries may not be the best implementation choice. + --> + <maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses> + + <!-- Solr Internal Query Caches + + There are two implementations of cache available for Solr, + LRUCache, based on a synchronized LinkedHashMap, and + FastLRUCache, based on a ConcurrentHashMap. + + FastLRUCache has faster gets and slower puts in single + threaded operation and thus is generally faster than LRUCache + when the hit ratio of the cache is high (> 75%), and may be + faster under other scenarios on multi-cpu systems. + --> + + <!-- Filter Cache + + Cache used by SolrIndexSearcher for filters (DocSets), + unordered sets of *all* documents that match a query. When a + new searcher is opened, its caches may be prepopulated or + "autowarmed" using data from caches in the old searcher. + autowarmCount is the number of items to prepopulate. For + LRUCache, the autowarmed items will be the most recently + accessed items. + + Parameters: + class - the SolrCache implementation LRUCache or + (LRUCache or FastLRUCache) + size - the maximum number of entries in the cache + initialSize - the initial capacity (number of entries) of + the cache. (see java.util.HashMap) + autowarmCount - the number of entries to prepopulate from + and old cache. + maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed + to occupy. Note that when this option is specified, the size + and initialSize parameters are ignored. + --> + <filterCache class="solr.FastLRUCache" + size="512" + initialSize="512" + autowarmCount="0"/> + + <!-- Query Result Cache + + Caches results of searches - ordered lists of document ids + (DocList) based on a query, a sort, and the range of documents requested. + Additional supported parameter by LRUCache: + maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed + to occupy + --> + <queryResultCache class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="0"/> + + <!-- Document Cache + + Caches Lucene Document objects (the stored fields for each + document). Since Lucene internal document ids are transient, + this cache will not be autowarmed. + --> + <documentCache class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="0"/> + + <!-- custom cache currently used by block join --> + <cache name="perSegFilter" + class="solr.search.LRUCache" + size="10" + initialSize="0" + autowarmCount="10" + regenerator="solr.NoOpRegenerator" /> + + <!-- Field Value Cache + + Cache used to hold field values that are quickly accessible + by document id. The fieldValueCache is created by default + even if not configured here. + --> + <!-- + <fieldValueCache class="solr.FastLRUCache" + size="512" + autowarmCount="128" + showItems="32" /> + --> + + <!-- Custom Cache + + Example of a generic cache. These caches may be accessed by + name through SolrIndexSearcher.getCache(),cacheLookup(), and + cacheInsert(). The purpose is to enable easy caching of + user/application level data. The regenerator argument should + be specified as an implementation of solr.CacheRegenerator + if autowarming is desired. + --> + <!-- + <cache name="myUserCache" + class="solr.LRUCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="com.mycompany.MyRegenerator" + /> + --> + + + <!-- Lazy Field Loading + + If true, stored fields that are not requested will be loaded + lazily. This can result in a significant speed improvement + if the usual case is to not load all stored fields, + especially if the skipped fields are large compressed text + fields. + --> + <enableLazyFieldLoading>true</enableLazyFieldLoading> + + <!-- Use Filter For Sorted Query + + A possible optimization that attempts to use a filter to + satisfy a search. If the requested sort does not include + score, then the filterCache will be checked for a filter + matching the query. If found, the filter will be used as the + source of document ids, and then the sort will be applied to + that. + + For most situations, this will not be useful unless you + frequently get the same search repeatedly with different sort + options, and none of them ever use "score" + --> + <!-- + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <!-- Result Window Size + + An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. + --> + <queryResultWindowSize>20</queryResultWindowSize> + + <!-- Maximum number of documents to cache for any entry in the + queryResultCache. + --> + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + + <!-- Query Related Event Listeners + + Various IndexSearcher related events can trigger Listeners to + take actions. + + newSearcher - fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka + registered). It can be used to prime certain caches to + prevent long request times for certain requests. + + firstSearcher - fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. + + + --> + <!-- QuerySenderListener takes an array of NamedList and executes a + local query request for each NamedList in sequence. + --> + <listener event="newSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <!-- + <lst><str name="q">solr</str><str name="sort">price asc</str></lst> + <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst> + --> + </arr> + </listener> + <listener event="firstSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <!-- + <lst> + <str name="q">static firstSearcher warming in solrconfig.xml</str> + </lst> + --> + </arr> + </listener> + + <!-- Use Cold Searcher + + If a search request comes in and there is no current + registered searcher, then immediately register the still + warming searcher and use it. If "false" then all requests + will block until the first searcher is done warming. + --> + <useColdSearcher>false</useColdSearcher> + + </query> + + + <!-- Request Dispatcher + + This section contains instructions for how the SolrDispatchFilter + should behave when processing requests for this SolrCore. + + --> + <requestDispatcher> + <!-- Request Parsing + + These settings indicate how Solr Requests may be parsed, and + what restrictions may be placed on the ContentStreams from + those requests + + enableRemoteStreaming - enables use of the stream.file + and stream.url parameters for specifying remote streams. + + multipartUploadLimitInKB - specifies the max size (in KiB) of + Multipart File Uploads that Solr will allow in a Request. + + formdataUploadLimitInKB - specifies the max size (in KiB) of + form data (application/x-www-form-urlencoded) sent via + POST. You can use POST to pass request parameters not + fitting into the URL. + + addHttpRequestToContext - if set to true, it will instruct + the requestParsers to include the original HttpServletRequest + object in the context map of the SolrQueryRequest under the + key "httpRequest". It will not be used by any of the existing + Solr components, but may be useful when developing custom + plugins. + + *** WARNING *** + Before enabling remote streaming, you should make sure your + system has authentication enabled. + + <requestParsers enableRemoteStreaming="false" + multipartUploadLimitInKB="-1" + formdataUploadLimitInKB="-1" + addHttpRequestToContext="false"/> + --> + + <!-- HTTP Caching + + Set HTTP caching related parameters (for proxy caches and clients). + + The options below instruct Solr not to output any HTTP Caching + related headers + --> + <httpCaching never304="true" /> + <!-- If you include a <cacheControl> directive, it will be used to + generate a Cache-Control header (as well as an Expires header + if the value contains "max-age=") + + By default, no Cache-Control header is generated. + + You can use the <cacheControl> option even if you have set + never304="true" + --> + <!-- + <httpCaching never304="true" > + <cacheControl>max-age=30, public</cacheControl> + </httpCaching> + --> + <!-- To enable Solr to respond with automatically generated HTTP + Caching headers, and to response to Cache Validation requests + correctly, set the value of never304="false" + + This will cause Solr to generate Last-Modified and ETag + headers based on the properties of the Index. + + The following options can also be specified to affect the + values of these headers... + + lastModFrom - the default value is "openTime" which means the + Last-Modified value (and validation against If-Modified-Since + requests) will all be relative to when the current Searcher + was opened. You can change it to lastModFrom="dirLastMod" if + you want the value to exactly correspond to when the physical + index was last modified. + + etagSeed="..." is an option you can change to force the ETag + header (and validation against If-None-Match requests) to be + different even if the index has not changed (ie: when making + significant changes to your config file) + + (lastModifiedFrom and etagSeed are both ignored if you use + the never304="true" option) + --> + <!-- + <httpCaching lastModifiedFrom="openTime" + etagSeed="Solr"> + <cacheControl>max-age=30, public</cacheControl> + </httpCaching> + --> + </requestDispatcher> + + <!-- Request Handlers + + http://wiki.apache.org/solr/SolrRequestHandler + + Incoming queries will be dispatched to a specific handler by name + based on the path specified in the request. + + If a Request Handler is declared with startup="lazy", then it will + not be initialized until the first request that uses it. + + --> + <!-- SearchHandler + + http://wiki.apache.org/solr/SearchHandler + + For processing Search Queries, the primary Request Handler + provided with Solr is "SearchHandler" It delegates to a sequent + of SearchComponents (see below) and supports distributed + queries across multiple shards + --> + <requestHandler name="/select" class="solr.SearchHandler"> + <!-- default values for query parameters can be specified, these + will be overridden by parameters in the request + --> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <int name="rows">10</int> + <str name="df">text</str> + <!-- Default search field + <str name="df">text</str> + --> + <!-- Change from JSON to XML format (the default prior to Solr 7.0) + <str name="wt">xml</str> + --> + </lst> + <!-- In addition to defaults, "appends" params can be specified + to identify values which should be appended to the list of + multi-val params from the query (or the existing "defaults"). + --> + <!-- In this example, the param "fq=instock:true" would be appended to + any query time fq params the user may specify, as a mechanism for + partitioning the index, independent of any user selected filtering + that may also be desired (perhaps as a result of faceted searching). + + NOTE: there is *absolutely* nothing a client can do to prevent these + "appends" values from being used, so don't use this mechanism + unless you are sure you always want it. + --> + <!-- + <lst name="appends"> + <str name="fq">inStock:true</str> + </lst> + --> + <!-- "invariants" are a way of letting the Solr maintainer lock down + the options available to Solr clients. Any params values + specified here are used regardless of what values may be specified + in either the query, the "defaults", or the "appends" params. + + In this example, the facet.field and facet.query params would + be fixed, limiting the facets clients can use. Faceting is + not turned on by default - but if the client does specify + facet=true in the request, these are the only facets they + will be able to see counts for; regardless of what other + facet.field or facet.query params they may specify. + + NOTE: there is *absolutely* nothing a client can do to prevent these + "invariants" values from being used, so don't use this mechanism + unless you are sure you always want it. + --> + <!-- + <lst name="invariants"> + <str name="facet.field">cat</str> + <str name="facet.field">manu_exact</str> + <str name="facet.query">price:[* TO 500]</str> + <str name="facet.query">price:[500 TO *]</str> + </lst> + --> + <!-- If the default list of SearchComponents is not desired, that + list can either be overridden completely, or components can be + prepended or appended to the default list. (see below) + --> + <!-- + <arr name="components"> + <str>nameOfCustomComponent1</str> + <str>nameOfCustomComponent2</str> + </arr> + --> + </requestHandler> + + <!-- A request handler that returns indented JSON by default --> + <requestHandler name="/query" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <str name="wt">json</str> + <str name="indent">true</str> + </lst> + </requestHandler> + + + <!-- A Robust Example + + This example SearchHandler declaration shows off usage of the + SearchHandler with many defaults declared + + Note that multiple instances of the same Request Handler + (SearchHandler) can be registered multiple times with different + names (and different init parameters) + --> + <requestHandler name="/browse" class="solr.SearchHandler" useParams="query,facets,velocity,browse"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + </requestHandler> + + <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse"> + <lst name="defaults"> + <str name="df">_text_</str> + </lst> + </initParams> + + <!-- Solr Cell Update Request Handler + + http://wiki.apache.org/solr/ExtractingRequestHandler + + --> + <requestHandler name="/update/extract" + startup="lazy" + class="solr.extraction.ExtractingRequestHandler" > + <lst name="defaults"> + <str name="lowernames">true</str> + <str name="fmap.content">_text_</str> + </lst> + </requestHandler> + + <!-- Search Components + + Search components are registered to SolrCore and used by + instances of SearchHandler (which can access them by name) + + By default, the following components are available: + + <searchComponent name="query" class="solr.QueryComponent" /> + <searchComponent name="facet" class="solr.FacetComponent" /> + <searchComponent name="mlt" class="solr.MoreLikeThisComponent" /> + <searchComponent name="highlight" class="solr.HighlightComponent" /> + <searchComponent name="stats" class="solr.StatsComponent" /> + <searchComponent name="debug" class="solr.DebugComponent" /> + + Default configuration in a requestHandler would look like: + + <arr name="components"> + <str>query</str> + <str>facet</str> + <str>mlt</str> + <str>highlight</str> + <str>stats</str> + <str>debug</str> + </arr> + + If you register a searchComponent to one of the standard names, + that will be used instead of the default. + + To insert components before or after the 'standard' components, use: + + <arr name="first-components"> + <str>myFirstComponentName</str> + </arr> + + <arr name="last-components"> + <str>myLastComponentName</str> + </arr> + + NOTE: The component registered with the name "debug" will + always be executed after the "last-components" + + --> + + <!-- Spell Check + + The spell check component can return a list of alternative spelling + suggestions. + + http://wiki.apache.org/solr/SpellCheckComponent + --> + <!-- <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> + + <str name="queryAnalyzerFieldType">text_general</str> + + <!-- Multiple "Spell Checkers" can be declared and used by this + component + --> + + <!-- a spellchecker built from a field of the main index --> + <lst name="spellchecker"> + <str name="name">default</str> + <str name="field">_text_</str> + <str name="classname">solr.DirectSolrSpellChecker</str> + <!-- the spellcheck distance measure used, the default is the internal levenshtein --> + <str name="distanceMeasure">internal</str> + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> + <float name="accuracy">0.5</float> + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> + <int name="maxEdits">2</int> + <!-- the minimum shared prefix when enumerating terms --> + <int name="minPrefix">1</int> + <!-- maximum number of inspections per result. --> + <int name="maxInspections">5</int> + <!-- minimum length of a query term to be considered for correction --> + <int name="minQueryLength">4</int> + <!-- maximum threshold of documents a query term can appear to be considered for correction --> + <float name="maxQueryFrequency">0.01</float> + <!-- uncomment this to require suggestions to occur in 1% of the documents + <float name="thresholdTokenFrequency">.01</float> + --> + </lst> + + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> + <!-- + <lst name="spellchecker"> + <str name="name">wordbreak</str> + <str name="classname">solr.WordBreakSolrSpellChecker</str> + <str name="field">name</str> + <str name="combineWords">true</str> + <str name="breakWords">true</str> + <int name="maxChanges">10</int> + </lst> + --> + </searchComponent> + --> + + <!-- A request handler for demonstrating the spellcheck component. + + NOTE: This is purely as an example. The whole purpose of the + SpellCheckComponent is to hook it into the request handler that + handles your normal user queries so that a separate request is + not needed to get suggestions. + + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! + + See http://wiki.apache.org/solr/SpellCheckComponent for details + on the request parameters. + --> + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <!-- Solr will use suggestions from both the 'default' spellchecker + and from the 'wordbreak' spellchecker and combine them. + collations (re-written queries) can include a combination of + corrections from both spellcheckers --> + <str name="spellcheck.dictionary">default</str> + <str name="spellcheck">on</str> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.count">10</str> + <str name="spellcheck.alternativeTermCount">5</str> + <str name="spellcheck.maxResultsForSuggest">5</str> + <str name="spellcheck.collate">true</str> + <str name="spellcheck.collateExtendedResults">true</str> + <str name="spellcheck.maxCollationTries">10</str> + <str name="spellcheck.maxCollations">5</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <!-- Term Vector Component + + http://wiki.apache.org/solr/TermVectorComponent + --> + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> + + <!-- A request handler for demonstrating the term vector component + + This is purely as an example. + + In reality you will likely want to add the component to your + already specified request handlers. + --> + <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <bool name="tv">true</bool> + </lst> + <arr name="last-components"> + <str>tvComponent</str> + </arr> + </requestHandler> + + <!-- Clustering Component. (Omitted here. See the default Solr example for a typical configuration.) --> + + <!-- Terms Component + + http://wiki.apache.org/solr/TermsComponent + + A component to return terms and document frequency of those + terms + --> + <searchComponent name="terms" class="solr.TermsComponent"/> + + <!-- A request handler for demonstrating the terms component --> + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <bool name="terms">true</bool> + <bool name="distrib">false</bool> + </lst> + <arr name="components"> + <str>terms</str> + </arr> + </requestHandler> + + + <!-- Query Elevation Component + + http://wiki.apache.org/solr/QueryElevationComponent + + a search component that enables you to configure the top + results for a given query regardless of the normal lucene + scoring. + --> + <searchComponent name="elevator" class="solr.QueryElevationComponent" > + <!-- pick a fieldType to analyze queries --> + <str name="queryFieldType">string</str> + </searchComponent> + + <!-- A request handler for demonstrating the elevator component --> + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + <arr name="last-components"> + <str>elevator</str> + </arr> + </requestHandler> + + <!-- Highlighting Component + + http://wiki.apache.org/solr/HighlightingParameters + --> + <searchComponent class="solr.HighlightComponent" name="highlight"> + <highlighting> + <!-- Configure the standard fragmenter --> + <!-- This could most likely be commented out in the "default" case --> + <fragmenter name="gap" + default="true" + class="solr.highlight.GapFragmenter"> + <lst name="defaults"> + <int name="hl.fragsize">100</int> + </lst> + </fragmenter> + + <!-- A regular-expression-based fragmenter + (for sentence extraction) + --> + <fragmenter name="regex" + class="solr.highlight.RegexFragmenter"> + <lst name="defaults"> + <!-- slightly smaller fragsizes work better because of slop --> + <int name="hl.fragsize">70</int> + <!-- allow 50% slop on fragment sizes --> + <float name="hl.regex.slop">0.5</float> + <!-- a basic sentence pattern --> + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> + </lst> + </fragmenter> + + <!-- Configure the standard formatter --> + <formatter name="html" + default="true" + class="solr.highlight.HtmlFormatter"> + <lst name="defaults"> + <str name="hl.simple.pre"><![CDATA[<em>]]></str> + <str name="hl.simple.post"><![CDATA[</em>]]></str> + </lst> + </formatter> + + <!-- Configure the standard encoder --> + <encoder name="html" + class="solr.highlight.HtmlEncoder" /> + + <!-- Configure the standard fragListBuilder --> + <fragListBuilder name="simple" + class="solr.highlight.SimpleFragListBuilder"/> + + <!-- Configure the single fragListBuilder --> + <fragListBuilder name="single" + class="solr.highlight.SingleFragListBuilder"/> + + <!-- Configure the weighted fragListBuilder --> + <fragListBuilder name="weighted" + default="true" + class="solr.highlight.WeightedFragListBuilder"/> + + <!-- default tag FragmentsBuilder --> + <fragmentsBuilder name="default" + default="true" + class="solr.highlight.ScoreOrderFragmentsBuilder"> + <!-- + <lst name="defaults"> + <str name="hl.multiValuedSeparatorChar">/</str> + </lst> + --> + </fragmentsBuilder> + + <!-- multi-colored tag FragmentsBuilder --> + <fragmentsBuilder name="colored" + class="solr.highlight.ScoreOrderFragmentsBuilder"> + <lst name="defaults"> + <str name="hl.tag.pre"><![CDATA[ + <b style="background:yellow">,<b style="background:lawgreen">, + <b style="background:aquamarine">,<b style="background:magenta">, + <b style="background:palegreen">,<b style="background:coral">, + <b style="background:wheat">,<b style="background:khaki">, + <b style="background:lime">,<b style="background:deepskyblue">]]></str> + <str name="hl.tag.post"><![CDATA[</b>]]></str> + </lst> + </fragmentsBuilder> + + <boundaryScanner name="default" + default="true" + class="solr.highlight.SimpleBoundaryScanner"> + <lst name="defaults"> + <str name="hl.bs.maxScan">10</str> + <str name="hl.bs.chars">.,!? 	 </str> + </lst> + </boundaryScanner> + + <boundaryScanner name="breakIterator" + class="solr.highlight.BreakIteratorBoundaryScanner"> + <lst name="defaults"> + <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE --> + <str name="hl.bs.type">WORD</str> + <!-- language and country are used when constructing Locale object. --> + <!-- And the Locale object will be used when getting instance of BreakIterator --> + <str name="hl.bs.language">en</str> + <str name="hl.bs.country">US</str> + </lst> + </boundaryScanner> + </highlighting> + </searchComponent> + + <!-- Update Processors + + Chains of Update Processor Factories for dealing with Update + Requests can be declared, and then used by name in Update + Request Processors + + http://wiki.apache.org/solr/UpdateRequestProcessor + + --> + + <!-- Add unknown fields to the schema + + Field type guessing update processors that will + attempt to parse string-typed field values as Booleans, Longs, + Doubles, or Dates, and then add schema fields with the guessed + field types. Text content will be indexed as "text_general" as + well as a copy to a plain string version in *_str. + + These require that the schema is both managed and mutable, by + declaring schemaFactory as ManagedIndexSchemaFactory, with + mutable specified as true. + + See http://wiki.apache.org/solr/GuessingFieldTypes + --> + <updateProcessor class="solr.UUIDUpdateProcessorFactory" name="uuid"/> + <updateProcessor class="solr.RemoveBlankFieldUpdateProcessorFactory" name="remove-blank"/> + <updateProcessor class="solr.FieldNameMutatingUpdateProcessorFactory" name="field-name-mutating"> + <str name="pattern">[^\w-\.]</str> + <str name="replacement">_</str> + </updateProcessor> + <updateProcessor class="solr.ParseBooleanFieldUpdateProcessorFactory" name="parse-boolean"/> + <updateProcessor class="solr.ParseLongFieldUpdateProcessorFactory" name="parse-long"/> + <updateProcessor class="solr.ParseDoubleFieldUpdateProcessorFactory" name="parse-double"/> + <updateProcessor class="solr.ParseDateFieldUpdateProcessorFactory" name="parse-date"> + <arr name="format"> + <str>yyyy-MM-dd['T'[HH:mm[:ss[.SSS]][z</str> + <str>yyyy-MM-dd['T'[HH:mm[:ss[,SSS]][z</str> + <str>yyyy-MM-dd HH:mm[:ss[.SSS]][z</str> + <str>yyyy-MM-dd HH:mm[:ss[,SSS]][z</str> + <str>[EEE, ]dd MMM yyyy HH:mm[:ss] z</str> + <str>EEEE, dd-MMM-yy HH:mm:ss z</str> + <str>EEE MMM ppd HH:mm:ss [z ]yyyy</str> + </arr> + </updateProcessor> + <updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" name="add-schema-fields"> + <lst name="typeMapping"> + <str name="valueClass">java.lang.String</str> + <str name="fieldType">text_general</str> + <lst name="copyField"> + <str name="dest">*_str</str> + <int name="maxChars">256</int> + </lst> + <!-- Use as default mapping instead of defaultFieldType --> + <bool name="default">true</bool> + </lst> + <lst name="typeMapping"> + <str name="valueClass">java.lang.Boolean</str> + <str name="fieldType">booleans</str> + </lst> + <lst name="typeMapping"> + <str name="valueClass">java.util.Date</str> + <str name="fieldType">pdates</str> + </lst> + <lst name="typeMapping"> + <str name="valueClass">java.lang.Long</str> + <str name="valueClass">java.lang.Integer</str> + <str name="fieldType">plongs</str> + </lst> + <lst name="typeMapping"> + <str name="valueClass">java.lang.Number</str> + <str name="fieldType">pdoubles</str> + </lst> + </updateProcessor> + + <!-- The update.autoCreateFields property can be turned to false to disable schemaless mode --> + <updateRequestProcessorChain name="add-unknown-fields-to-the-schema" default="${update.autoCreateFields:true}" + processor="uuid,remove-blank,field-name-mutating,parse-boolean,parse-long,parse-double,parse-date,add-schema-fields"> + <processor class="solr.LogUpdateProcessorFactory"/> + <processor class="solr.DistributedUpdateProcessorFactory"/> + <processor class="solr.RunUpdateProcessorFactory"/> + </updateRequestProcessorChain> + + <!-- Deduplication + + An example dedup update processor that creates the "id" field + on the fly based on the hash code of some other fields. This + example has overwriteDupes set to false since we are using the + id field as the signatureField and Solr will maintain + uniqueness based on that anyway. + + --> + <!-- + <updateRequestProcessorChain name="dedupe"> + <processor class="solr.processor.SignatureUpdateProcessorFactory"> + <bool name="enabled">true</bool> + <str name="signatureField">id</str> + <bool name="overwriteDupes">false</bool> + <str name="fields">name,features,cat</str> + <str name="signatureClass">solr.processor.Lookup3Signature</str> + </processor> + <processor class="solr.LogUpdateProcessorFactory" /> + <processor class="solr.RunUpdateProcessorFactory" /> + </updateRequestProcessorChain> + --> + + <!-- Language identification + + This example update chain identifies the language of the incoming + documents using the langid contrib. The detected language is + written to field language_s. No field name mapping is done. + The fields used for detection are text, title, subject and description, + making this example suitable for detecting languages form full-text + rich documents injected via ExtractingRequestHandler. + See more about langId at http://wiki.apache.org/solr/LanguageDetection + --> + <!-- + <updateRequestProcessorChain name="langid"> + <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory"> + <str name="langid.fl">text,title,subject,description</str> + <str name="langid.langField">language_s</str> + <str name="langid.fallback">en</str> + </processor> + <processor class="solr.LogUpdateProcessorFactory" /> + <processor class="solr.RunUpdateProcessorFactory" /> + </updateRequestProcessorChain> + --> + + <!-- Script update processor + + This example hooks in an update processor implemented using JavaScript. + + See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor + --> + <!-- + <updateRequestProcessorChain name="script"> + <processor class="solr.StatelessScriptUpdateProcessorFactory"> + <str name="script">update-script.js</str> + <lst name="params"> + <str name="config_param">example config parameter</str> + </lst> + </processor> + <processor class="solr.RunUpdateProcessorFactory" /> + </updateRequestProcessorChain> + --> + + <!-- Response Writers + + http://wiki.apache.org/solr/QueryResponseWriter + + Request responses will be written using the writer specified by + the 'wt' request parameter matching the name of a registered + writer. + + The "default" writer is the default and will be used if 'wt' is + not specified in the request. + --> + <!-- The following response writers are implicitly configured unless + overridden... + --> + <!-- + <queryResponseWriter name="xml" + default="true" + class="solr.XMLResponseWriter" /> + <queryResponseWriter name="json" class="solr.JSONResponseWriter"/> + <queryResponseWriter name="python" class="solr.PythonResponseWriter"/> + <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/> + <queryResponseWriter name="php" class="solr.PHPResponseWriter"/> + <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/> + <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/> + <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/> + --> + + <queryResponseWriter name="json" class="solr.JSONResponseWriter"> + <!-- For the purposes of the tutorial, JSON responses are written as + plain text so that they are easy to read in *any* browser. + If you expect a MIME type of "application/json" just remove this override. + --> + <str name="content-type">text/plain; charset=UTF-8</str> + </queryResponseWriter> + + <!-- + Custom response writers can be declared as needed... + --> + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"> + <str name="template.base.dir">${velocity.template.base.dir:}</str> + <str name="solr.resource.loader.enabled">${velocity.solr.resource.loader.enabled:true}</str> + <str name="params.resource.loader.enabled">${velocity.params.resource.loader.enabled:false}</str> + </queryResponseWriter> + + <!-- XSLT response writer transforms the XML output by any xslt file found + in Solr's conf/xslt directory. Changes to xslt files are checked for + every xsltCacheLifetimeSeconds. + --> + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> + <int name="xsltCacheLifetimeSeconds">5</int> + </queryResponseWriter> + + <!-- Query Parsers + + https://lucene.apache.org/solr/guide/query-syntax-and-parsing.html + + Multiple QParserPlugins can be registered by name, and then + used in either the "defType" param for the QueryComponent (used + by SearchHandler) or in LocalParams + --> + <!-- example of registering a query parser --> + <!-- + <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/> + --> + + <!-- Function Parsers + + http://wiki.apache.org/solr/FunctionQuery + + Multiple ValueSourceParsers can be registered by name, and then + used as function names when using the "func" QParser. + --> + <!-- example of registering a custom function parser --> + <!-- + <valueSourceParser name="myfunc" + class="com.mycompany.MyValueSourceParser" /> + --> + + + <!-- Document Transformers + http://wiki.apache.org/solr/DocTransformers + --> + <!-- + Could be something like: + <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" > + <int name="connection">jdbc://....</int> + </transformer> + + To add a constant value to all docs, use: + <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" > + <int name="value">5</int> + </transformer> + + If you want the user to still be able to change it with _value:something_ use this: + <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" > + <double name="defaultValue">5</double> + </transformer> + + If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The + EditorialMarkerFactory will do exactly that: + <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" /> + --> +</config> diff --git a/solr/conf/stopwords.txt b/solr/conf/stopwords.txt new file mode 100644 index 000000000..ae1e83eeb --- /dev/null +++ b/solr/conf/stopwords.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/solr/conf/synonyms.txt b/solr/conf/synonyms.txt new file mode 100644 index 000000000..eab4ee875 --- /dev/null +++ b/solr/conf/synonyms.txt @@ -0,0 +1,29 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma + diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index 63ba01b6a..11929455c 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -174,6 +174,7 @@ export namespace Docs { if (!("creationDate" in protoProps)) { protoProps.creationDate = new DateField; } + protoProps.isPrototype = true; return SetDelegateOptions(SetInstanceOptions(proto, protoProps, data), delegateProps); } diff --git a/src/client/util/DocumentManager.ts b/src/client/util/DocumentManager.ts index 779b07ce5..a8b643d4d 100644 --- a/src/client/util/DocumentManager.ts +++ b/src/client/util/DocumentManager.ts @@ -1,9 +1,10 @@ import { computed, observable } from 'mobx'; import { DocumentView } from '../views/nodes/DocumentView'; import { Doc } from '../../new_fields/Doc'; -import { FieldValue, Cast, BoolCast } from '../../new_fields/Types'; +import { FieldValue, Cast, NumCast, BoolCast } from '../../new_fields/Types'; import { listSpec } from '../../new_fields/Schema'; -import { SelectionManager } from './SelectionManager'; +import { undoBatch } from './UndoManager'; +import { CollectionDockingView } from '../views/collections/CollectionDockingView'; export class DocumentManager { @@ -101,4 +102,30 @@ export class DocumentManager { return pairs; }, [] as { a: DocumentView, b: DocumentView, l: Doc }[]); } + + @undoBatch + public jumpToDocument = async (doc: Doc): Promise<void> => { + let docView = DocumentManager.Instance.getDocumentView(doc); + if (docView) { + docView.props.focus(docView.props.Document); + } else { + const contextDoc = await Cast(doc.annotationOn, Doc); + if (!contextDoc) { + CollectionDockingView.Instance.AddRightSplit(Doc.MakeDelegate(doc)); + } else { + const page = NumCast(doc.page, undefined); + const curPage = NumCast(contextDoc.curPage, undefined); + if (page !== curPage) { + contextDoc.curPage = page; + } + let contextView = DocumentManager.Instance.getDocumentView(contextDoc); + if (contextView) { + contextDoc.panTransformType = "Ease"; + contextView.props.focus(contextDoc); + } else { + CollectionDockingView.Instance.AddRightSplit(contextDoc); + } + } + } + } }
\ No newline at end of file diff --git a/src/client/util/TooltipTextMenu.tsx b/src/client/util/TooltipTextMenu.tsx index 358fdc81b..b860f6758 100644 --- a/src/client/util/TooltipTextMenu.tsx +++ b/src/client/util/TooltipTextMenu.tsx @@ -49,15 +49,14 @@ export class TooltipTextMenu { private fontSizeIndicator: HTMLSpanElement = document.createElement("span"); private link: HTMLAnchorElement; + private linkEditor?: HTMLDivElement; + private linkText?: HTMLDivElement; + private linkDrag?: HTMLImageElement; //dropdown doms private fontSizeDom?: Node; private fontStyleDom?: Node; private listTypeBtnDom?: Node; - private linkEditor?: HTMLDivElement; - private linkText?: HTMLDivElement; - private linkDrag?: HTMLImageElement; - constructor(view: EditorView, editorProps: FieldViewProps) { this.view = view; this.state = view.state; @@ -186,7 +185,7 @@ export class TooltipTextMenu { this.linkText.style.width = "150px"; this.linkText.style.overflow = "hidden"; this.linkText.style.color = "white"; - this.linkText.onpointerdown = (e: PointerEvent) => { e.stopPropagation(); } + this.linkText.onpointerdown = (e: PointerEvent) => { e.stopPropagation(); }; let linkBtn = document.createElement("div"); linkBtn.textContent = ">>"; linkBtn.style.width = "20px"; @@ -203,8 +202,9 @@ export class TooltipTextMenu { let docid = href.replace(DocServer.prepend("/doc/"), ""); DocServer.GetRefField(docid).then(action((f: Opt<Field>) => { if (f instanceof Doc) { - if (DocumentManager.Instance.getDocumentView(f)) + if (DocumentManager.Instance.getDocumentView(f)) { DocumentManager.Instance.getDocumentView(f)!.props.focus(f); + } else CollectionDockingView.Instance.AddRightSplit(f); } })); @@ -212,7 +212,7 @@ export class TooltipTextMenu { e.stopPropagation(); e.preventDefault(); } - } + }; this.linkDrag = document.createElement("img"); this.linkDrag.src = "https://seogurusnyc.com/wp-content/uploads/2016/12/link-1.png"; this.linkDrag.style.width = "20px"; @@ -232,7 +232,7 @@ export class TooltipTextMenu { }), }, hideSource: false - }) + }); }; this.linkEditor.appendChild(this.linkDrag); this.linkEditor.appendChild(this.linkText); @@ -250,7 +250,7 @@ export class TooltipTextMenu { e.stopPropagation(); e.preventDefault(); } - } + }; this.tooltip.appendChild(this.linkEditor); } @@ -351,7 +351,7 @@ export class TooltipTextMenu { icon: icons.link, css: "color:white;", class: "menuicon", - enable(state) { return !state.selection.empty }, + enable(state) { return !state.selection.empty; }, run: (state, dispatch, view) => { // to remove link if (this.markActive(state, markType)) { @@ -396,10 +396,10 @@ export class TooltipTextMenu { } markActive = function (state: EditorState<any>, type: MarkType<Schema<string, string>>) { - let { from, $from, to, empty } = state.selection - if (empty) return type.isInSet(state.storedMarks || $from.marks()) - else return state.doc.rangeHasMark(from, to, type) - } + let { from, $from, to, empty } = state.selection; + if (empty) return type.isInSet(state.storedMarks || $from.marks()); + else return state.doc.rangeHasMark(from, to, type); + }; // Helper function to create menu icons icon(text: string, name: string) { @@ -480,8 +480,8 @@ export class TooltipTextMenu { let attributes = this.getMarksInSelection(this.view.state, [schema.marks.link])[0].attrs; this.link.href = attributes.href; this.link.textContent = attributes.title; - this.link.style.visibility = "visible" - } else this.link.style.visibility = "hidden" + this.link.style.visibility = "visible"; + } else this.link.style.visibility = "hidden"; // Otherwise, reposition it and update its content this.tooltip.style.display = ""; diff --git a/src/client/views/Main.scss b/src/client/views/Main.scss index 5c5c252e9..2430e8f6c 100644 --- a/src/client/views/Main.scss +++ b/src/client/views/Main.scss @@ -47,7 +47,7 @@ h1 { } .jsx-parser { - width:100%; + width: 100%; pointer-events: none; border-radius: inherit; } @@ -184,6 +184,7 @@ button:hover { overflow: scroll; z-index: 1; } + #mainContent-div { width: 100%; height: 100%; diff --git a/src/client/views/Main.tsx b/src/client/views/Main.tsx index 617580332..c9d5c395c 100644 --- a/src/client/views/Main.tsx +++ b/src/client/views/Main.tsx @@ -31,6 +31,7 @@ import "./Main.scss"; import { MainOverlayTextBox } from './MainOverlayTextBox'; import { DocumentView } from './nodes/DocumentView'; import { PreviewCursor } from './PreviewCursor'; +import { SearchBox } from './SearchBox'; import { SelectionManager } from '../util/SelectionManager'; import { FieldResult, Field, Doc, Opt } from '../../new_fields/Doc'; import { Cast, FieldValue, StrCast } from '../../new_fields/Types'; @@ -38,7 +39,6 @@ import { DocServer } from '../DocServer'; import { listSpec } from '../../new_fields/Schema'; import { Id } from '../../new_fields/RefField'; - @observer export class Main extends React.Component { public static Instance: Main; @@ -266,9 +266,11 @@ export class Main extends React.Component { <button className="toolbar-button round-button" title="Redo" onClick={() => UndoManager.Redo()}><FontAwesomeIcon icon="redo-alt" size="sm" /></button> <button className="toolbar-button round-button" title="Ink" onClick={() => InkingControl.Instance.toggleDisplay()}><FontAwesomeIcon icon="pen-nib" size="sm" /></button> </div >, - <div className="main-buttonDiv" key="logout" style={{ top: '34px', right: '1px', position: 'absolute' }} ref={logoutRef}> + <div className="main-searchDiv" key="search" style={{ top: '34px', right: '1px', position: 'absolute' }} > <SearchBox /> </div>, + <div className="main-buttonDiv" key="logout" style={{ bottom: '0px', right: '1px', position: 'absolute' }} ref={logoutRef}> <button onClick={() => request.get(DocServer.prepend(RouteStore.logout), emptyFunction)}>Log Out</button></div> ]; + } render() { diff --git a/src/client/views/PresentationView.tsx b/src/client/views/PresentationView.tsx index 94867b813..2a456324c 100644 --- a/src/client/views/PresentationView.tsx +++ b/src/client/views/PresentationView.tsx @@ -1,7 +1,7 @@ import { observer } from "mobx-react"; -import React = require("react") +import React = require("react"); import { observable, action, runInAction, reaction } from "mobx"; -import "./PresentationView.scss" +import "./PresentationView.scss"; import "./Main.tsx"; import { DocumentManager } from "../util/DocumentManager"; import { Utils } from "../../Utils"; @@ -141,7 +141,7 @@ export class PresentationView extends React.Component<PresViewProps> { if (activeW && activeW instanceof Doc) { PromiseValue(Cast(activeW.presentationView, Doc)). then(pv => runInAction(() => - self.Document = pv ? pv : (activeW.presentationView = new Doc()))) + self.Document = pv ? pv : (activeW.presentationView = new Doc()))); } }, { fireImmediately: true }); @@ -165,8 +165,9 @@ export class PresentationView extends React.Component<PresViewProps> { } render() { - if (!this.Document) + if (!this.Document) { return (null); + } let titleStr = this.Document.Title; let width = NumCast(this.Document.width); diff --git a/src/client/views/SearchBox.scss b/src/client/views/SearchBox.scss new file mode 100644 index 000000000..792d6dd3c --- /dev/null +++ b/src/client/views/SearchBox.scss @@ -0,0 +1,99 @@ +@import "globalCssVariables"; + +.searchBox { + height: 32px; + //display: flex; + //padding: 4px; + -webkit-transition: width 0.4s ease-in-out; + transition: width 0.4s ease-in-out; + align-items: center; + + + + input[type=text] { + width: 130px; + -webkit-transition: width 0.4s; + transition: width 0.4s; + position: absolute; + right: 100px; + } + + input[type=text]:focus { + width: 500px; + outline: 3px solid lightblue; + } + + .filter-button { + position: absolute; + right: 30px; + } + + .submit-search { + text-align: right; + color: $dark-color; + -webkit-transition: right 0.4s; + transition: right 0.4s; + } + + .submit-search:hover { + color: $main-accent; + transform: scale(1.05); + cursor: pointer; + } +} + +.filter-form { + background: $dark-color; + height: 400px; + width: 400px; + position: relative; + right: 1px; + color: $light-color; + padding: 10px; + flex-direction: column; +} + +#header { + text-transform: uppercase; + letter-spacing: 2px; + font-size: 100%; + height: 40px; +} + +#option { + height: 20px; +} + +.results { + top: 300px; + display: flex; + flex-direction: column; + + .search-item { + width: 500px; + height: 50px; + background: $light-color-secondary; + display: flex; + justify-content: space-between; + align-items: center; + transition: all 0.1s; + border-width: 0.11px; + border-style: none; + border-color: $intermediate-color; + border-bottom-style: solid; + padding: 10px; + white-space: nowrap; + font-size: 13px; + } + + .search-item:hover { + transition: all 0.1s; + background: $lighter-alt-accent; + } + + .search-title { + text-transform: uppercase; + text-align: left; + width: 8vw; + } +}
\ No newline at end of file diff --git a/src/client/views/SearchBox.tsx b/src/client/views/SearchBox.tsx new file mode 100644 index 000000000..7dd1af4e7 --- /dev/null +++ b/src/client/views/SearchBox.tsx @@ -0,0 +1,133 @@ +import * as React from 'react'; +import { observer } from 'mobx-react'; +import { observable, action, runInAction } from 'mobx'; +import { Utils } from '../../Utils'; +import { MessageStore } from '../../server/Message'; +import "./SearchBox.scss"; +import { faSearch } from '@fortawesome/free-solid-svg-icons'; +import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; +import { library } from '@fortawesome/fontawesome-svg-core'; +// const app = express(); +// import * as express from 'express'; +import { Search } from '../../server/Search'; +import * as rp from 'request-promise'; +import { SearchItem } from './SearchItem'; +import { isString } from 'util'; +import { constant } from 'async'; +import { DocServer } from '../DocServer'; +import { Doc } from '../../new_fields/Doc'; +import { Id } from '../../new_fields/RefField'; +import { DocumentManager } from '../util/DocumentManager'; + + +library.add(faSearch); + +@observer +export class SearchBox extends React.Component { + @observable + searchString: string = ""; + + @observable private _open: boolean = false; + @observable private _resultsOpen: boolean = false; + + @observable + private _results: Doc[] = []; + + @action.bound + onChange(e: React.ChangeEvent<HTMLInputElement>) { + this.searchString = e.target.value; + } + + @action + submitSearch = async () => { + runInAction(() => this._results = []); + let query = this.searchString; + + let response = await rp.get('http://localhost:1050/search', { + qs: { + query + } + }); + let results = JSON.parse(response); + + //gets json result into a list of documents that can be used + this.getResults(results); + + runInAction(() => { this._resultsOpen = true; }); + } + + @action + getResults = async (res: string[]) => { + res.map(async result => { + const doc = await DocServer.GetRefField(result); + if (doc instanceof Doc) { + runInAction(() => this._results.push(doc)); + } + }); + } + + @action + handleClickFilter = (e: Event): void => { + var className = (e.target as any).className; + var id = (e.target as any).id; + if (className !== "filter-button" && className !== "filter-form") { + this._open = false; + } + + } + + @action + handleClickResults = (e: Event): void => { + var className = (e.target as any).className; + var id = (e.target as any).id; + if (id !== "result") { + this._resultsOpen = false; + } + + } + + componentWillMount() { + document.addEventListener('mousedown', this.handleClickFilter, false); + document.addEventListener('mousedown', this.handleClickResults, false); + } + + componentWillUnmount() { + document.removeEventListener('mousedown', this.handleClickFilter, false); + document.removeEventListener('mousedown', this.handleClickResults, false); + } + + @action + toggleFilterDisplay = () => { + this._open = !this._open; + } + + enter = (e: React.KeyboardEvent<HTMLInputElement>) => { + if (e.key === "Enter") { + this.submitSearch(); + } + } + + render() { + return ( + <div id="outer"> + <div className="searchBox" id="outer"> + + <input value={this.searchString} onChange={this.onChange} type="text" placeholder="Search.." className="search" id="input" onKeyPress={this.enter} /> + <button className="filter-button" onClick={this.toggleFilterDisplay}> Filter </button> + <div className="submit-search" id="submit" onClick={this.submitSearch}><FontAwesomeIcon style={{ height: "100%" }} icon="search" size="lg" /></div> + <div className="results" id="results" style={this._resultsOpen ? { display: "flex" } : { display: "none" }}> + {this._results.map(result => <SearchItem doc={result} key={result[Id]} />)} + </div> + </div> + <div className="filter-form" id="filter" style={this._open ? { display: "flex" } : { display: "none" }}> + <div className="filter-form" id="header">Filter Search Results</div> + <div className="filter-form" id="option"> + filter by collection, key, type of node + </div> + + </div> + </div> + + ); + } +}
\ No newline at end of file diff --git a/src/client/views/SearchItem.tsx b/src/client/views/SearchItem.tsx new file mode 100644 index 000000000..d30579907 --- /dev/null +++ b/src/client/views/SearchItem.tsx @@ -0,0 +1,52 @@ +import React = require("react"); +import { Doc } from "../../new_fields/Doc"; +import { DocumentManager } from "../util/DocumentManager"; +import { library } from '@fortawesome/fontawesome-svg-core'; +import { faCaretUp, faFilePdf, faFilm, faImage, faObjectGroup, faStickyNote } from '@fortawesome/free-solid-svg-icons'; +import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; +import { Cast } from "../../new_fields/Types"; +import { FieldView, FieldViewProps } from './nodes/FieldView'; +import { computed } from "mobx"; +import { IconField } from "../../new_fields/IconField"; + + +export interface SearchProps { + doc: Doc; +} + +library.add(faCaretUp); +library.add(faObjectGroup); +library.add(faStickyNote); +library.add(faFilePdf); +library.add(faFilm); + +export class SearchItem extends React.Component<SearchProps> { + + onClick = () => { + DocumentManager.Instance.jumpToDocument(this.props.doc); + } + + //needs help + // @computed get layout(): string { const field = Cast(this.props.doc[fieldKey], IconField); return field ? field.icon : "<p>Error loading icon data</p>"; } + + + public static DocumentIcon(layout: string) { + let button = layout.indexOf("PDFBox") !== -1 ? faFilePdf : + layout.indexOf("ImageBox") !== -1 ? faImage : + layout.indexOf("Formatted") !== -1 ? faStickyNote : + layout.indexOf("Video") !== -1 ? faFilm : + layout.indexOf("Collection") !== -1 ? faObjectGroup : + faCaretUp; + return <FontAwesomeIcon icon={button} className="documentView-minimizedIcon" />; + } + + render() { + return ( + <div className="search-item" id="result" onClick={this.onClick}> + <div className="search-title" id="result" >title: {this.props.doc.title}</div> + {/* <div className="search-type" id="result" >Type: {this.props.doc.layout}</div> */} + {/* <div className="search-type" >{SearchItem.DocumentIcon(this.layout)}</div> */} + </div> + ); + } +}
\ No newline at end of file diff --git a/src/client/views/Templates.tsx b/src/client/views/Templates.tsx index 02f9aa510..bbedc95f1 100644 --- a/src/client/views/Templates.tsx +++ b/src/client/views/Templates.tsx @@ -38,18 +38,9 @@ export class Template { export namespace Templates { // export const BasicLayout = new Template("Basic layout", "{layout}"); - export const OuterCaption = new Template("Outer caption", TemplatePosition.OutterBottom, - `<div id="screenSpace" style="margin-top: 100%; font-size:14px; background:yellow; width:100%; position:absolute"><FormattedTextBox {...props} fieldKey={"caption"} /></div>` + export const Caption = new Template("Caption", TemplatePosition.OutterBottom, + `<div id="screenSpace" style="top: 100%; font-size:14px; background:yellow; width:100%; position:absolute"><FormattedTextBox {...props} fieldKey={"caption"} /></div>` ); - - export const InnerCaption = new Template("Inner caption", TemplatePosition.InnerBottom, - `<div><div style="margin:auto; height:calc(100% - 50px); width:100%;">{layout}</div><div style="height:50px; width:100%; position:absolute"><FormattedTextBox {...props} fieldKey={"caption"}/></div></div>` - ); - - export const SideCaption = new Template("Side caption", TemplatePosition.OutterRight, - `<div><div style="margin:auto; height:100%; width:100%;">{layout}</div><div style="height:100%; width:300px; position:absolute; top: 0; right: -300px;"><FormattedTextBox {...props} fieldKey={"caption"}/></div> </div>` - ); - export const TitleOverlay = new Template("TitleOverlay", TemplatePosition.InnerTop, `<div><div style="height:100%; width:100%;position:absolute;">{layout}</div> <div style="height:25px; width:100%; position:absolute; top: 0; background-color: rgba(0, 0, 0, .4); color: white; "> @@ -72,7 +63,7 @@ export namespace Templates { </div>` ); - export const TemplateList: Template[] = [Title, TitleOverlay, OuterCaption, InnerCaption, SideCaption, Bullet]; + export const TemplateList: Template[] = [Title, TitleOverlay, Caption, Bullet]; export function sortTemplates(a: Template, b: Template) { if (a.Position < b.Position) { return -1; } diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormLinksView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormLinksView.tsx index 1d4584cfe..62b1456cf 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormLinksView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormLinksView.tsx @@ -67,8 +67,8 @@ export class CollectionFreeFormLinksView extends React.Component<CollectionViewP if (srcBrushDocs === undefined) srcTarg.brushingDocs = srcBrushDocs = new List<Doc>(); else brushAction(srcBrushDocs); } - }) - }) + }); + }); }); } componentWillUnmount() { diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 25a75904b..5aa74c703 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -176,7 +176,7 @@ export class DocumentView extends DocComponent<DocumentViewProps, Document>(Docu if (CollectionFreeFormView.RIGHT_BTN_DRAG && (e.button === 2 || (e.button === 0 && e.altKey)) && !this.isSelected()) { return; } - this._hitIsBullet = (e.target && (e.target as any).id === "isBullet"); + this._hitIsBullet = (e.target && (e.target as any).id === "isBullet") || Cast(this.props.Document.subBulletDocs, listSpec(Doc), []).filter(d => d).map(d => d as Doc).length > 0; if (e.shiftKey && e.buttons === 1) { if (this.props.isTopMost) { this.startDragging(e.pageX, e.pageY, e.altKey || e.ctrlKey ? "alias" : undefined, this._hitIsBullet); diff --git a/src/client/views/nodes/FormattedTextBox.tsx b/src/client/views/nodes/FormattedTextBox.tsx index fc58e513b..df3eb159b 100644 --- a/src/client/views/nodes/FormattedTextBox.tsx +++ b/src/client/views/nodes/FormattedTextBox.tsx @@ -184,7 +184,7 @@ export class FormattedTextBox extends DocComponent<(FieldViewProps & FormattedTe state: field && field.Data ? EditorState.fromJSON(config, JSON.parse(field.Data)) : EditorState.create(config), dispatchTransaction: this.dispatchTransaction, nodeViews: { - image(node, view, getPos) { return new ImageResizeView(node, view, getPos) } + image(node, view, getPos) { return new ImageResizeView(node, view, getPos); } } }); let text = StrCast(this.props.Document.documentText); @@ -232,8 +232,9 @@ export class FormattedTextBox extends DocComponent<(FieldViewProps & FormattedTe let docid = href.replace(DocServer.prepend("/doc/"), ""); DocServer.GetRefField(docid).then(action((f: Opt<Field>) => { if (f instanceof Doc) { - if (DocumentManager.Instance.getDocumentView(f)) + if (DocumentManager.Instance.getDocumentView(f)) { DocumentManager.Instance.getDocumentView(f)!.props.focus(f); + } else CollectionDockingView.Instance.AddRightSplit(f); } })); diff --git a/src/client/views/nodes/LinkBox.tsx b/src/client/views/nodes/LinkBox.tsx index 08cfa590b..611cb66b6 100644 --- a/src/client/views/nodes/LinkBox.tsx +++ b/src/client/views/nodes/LinkBox.tsx @@ -31,28 +31,7 @@ export class LinkBox extends React.Component<Props> { @undoBatch onViewButtonPressed = async (e: React.PointerEvent): Promise<void> => { e.stopPropagation(); - let docView = DocumentManager.Instance.getDocumentView(this.props.pairedDoc); - if (docView) { - docView.props.focus(docView.props.Document); - } else { - const contextDoc = await Cast(this.props.pairedDoc.annotationOn, Doc); - if (!contextDoc) { - CollectionDockingView.Instance.AddRightSplit(Doc.MakeDelegate(this.props.pairedDoc)); - } else { - const page = NumCast(this.props.pairedDoc.page, undefined); - const curPage = NumCast(contextDoc.curPage, undefined); - if (page !== curPage) { - contextDoc.curPage = page; - } - let contextView = DocumentManager.Instance.getDocumentView(contextDoc); - if (contextView) { - contextDoc.panTransformType = "Ease"; - contextView.props.focus(contextDoc); - } else { - CollectionDockingView.Instance.AddRightSplit(contextDoc); - } - } - } + DocumentManager.Instance.jumpToDocument(this.props.pairedDoc); } onEditButtonPressed = (e: React.PointerEvent): void => { diff --git a/src/debug/Test.tsx b/src/debug/Test.tsx index 04ef00722..0dca4b4b1 100644 --- a/src/debug/Test.tsx +++ b/src/debug/Test.tsx @@ -41,7 +41,7 @@ class Test extends React.Component { doc.fields = "test"; doc.test = "hello doc"; doc.url = url; - doc.testDoc = doc2; + //doc.testDoc = doc2; const test1: TestDoc = TestDoc(doc); @@ -70,7 +70,9 @@ class Test extends React.Component { } render() { - return <button onClick={this.onClick}>Click me</button>; + return <div><button onClick={this.onClick}>Click me</button> + {/* <input onKeyPress={this.onEnter}></input> */} + </div>; } } diff --git a/src/new_fields/util.ts b/src/new_fields/util.ts index bbd8157f6..a5f5e368b 100644 --- a/src/new_fields/util.ts +++ b/src/new_fields/util.ts @@ -56,6 +56,7 @@ export function getter(target: any, prop: string | symbol | number, receiver: an return getField(target, prop); } +//TODO The callback parameter is never being passed in currently, so we should be able to get rid of it. export function getField(target: any, prop: string | number, ignoreProto: boolean = false, callback?: (field: Field | undefined) => void): any { const field = target.__fields[prop]; if (field instanceof ProxyField) { diff --git a/src/server/Search.ts b/src/server/Search.ts new file mode 100644 index 000000000..c3cb3c3e6 --- /dev/null +++ b/src/server/Search.ts @@ -0,0 +1,45 @@ +import * as rp from 'request-promise'; +import { Database } from './database'; +import { thisExpression } from 'babel-types'; + +export class Search { + public static Instance = new Search(); + private url = 'http://localhost:8983/solr/'; + + public async updateDocument(document: any) { + try { + return await rp.post(this.url + "dash/update", { + headers: { 'content-type': 'application/json' }, + body: JSON.stringify([document]) + }); + } catch { } + } + + public async search(query: string) { + try { + const searchResults = JSON.parse(await rp.get(this.url + "dash/select", { + qs: { + q: query + } + })); + const fields = searchResults.response.docs; + const ids = fields.map((field: any) => field.id); + return ids; + } catch { + return []; + } + } + + public async clear() { + try { + return await rp.post(this.url + "dash/update", { + body: { + delete: { + query: "*:*" + } + }, + json: true + }); + } catch { } + } +}
\ No newline at end of file diff --git a/src/server/index.ts b/src/server/index.ts index 6801b3132..f816af48a 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -7,10 +7,10 @@ import * as expressValidator from 'express-validator'; import * as formidable from 'formidable'; import * as fs from 'fs'; import * as mobileDetect from 'mobile-detect'; -import { ObservableMap } from 'mobx'; import * as passport from 'passport'; import * as path from 'path'; import * as request from 'request'; +import * as rp from 'request-promise'; import * as io from 'socket.io'; import { Socket } from 'socket.io'; import * as webpack from 'webpack'; @@ -21,7 +21,7 @@ import { getForgot, getLogin, getLogout, getReset, getSignup, postForgot, postLo import { DashUserModel } from './authentication/models/user_model'; import { Client } from './Client'; import { Database } from './database'; -import { MessageStore, Transferable, Diff } from "./Message"; +import { MessageStore, Transferable, Types, Diff } from "./Message"; import { RouteStore } from './RouteStore'; const app = express(); const config = require('../../webpack.config'); @@ -31,6 +31,9 @@ const serverPort = 4321; import expressFlash = require('express-flash'); import flash = require('connect-flash'); import c = require("crypto"); +import { Search } from './Search'; +import { debug } from 'util'; +import _ = require('lodash'); const MongoStore = require('connect-mongo')(session); const mongoose = require('mongoose'); @@ -117,8 +120,16 @@ app.get("/pull", (req, res) => res.redirect("/"); })); +// SEARCH + // GETTERS +app.get("/search", async (req, res) => { + let query = req.query.query || "hello"; + let results = await Search.Instance.search(query); + res.send(results); +}); + // anyone attempting to navigate to localhost at this port will // first have to login addSecureRoute( @@ -240,6 +251,7 @@ server.on("connection", function (socket: Socket) { async function deleteFields() { await Database.Instance.deleteAll(); + await Search.Instance.clear(); await Database.Instance.deleteAll('newDocuments'); } @@ -248,6 +260,7 @@ async function deleteAll() { await Database.Instance.deleteAll('newDocuments'); await Database.Instance.deleteAll('sessions'); await Database.Instance.deleteAll('users'); + await Search.Instance.clear(); } function barReceived(guid: String) { @@ -266,6 +279,10 @@ function getFields([ids, callback]: [string[], (result: Transferable[]) => void] function setField(socket: Socket, newValue: Transferable) { Database.Instance.update(newValue.id, newValue, () => socket.broadcast.emit(MessageStore.SetField.Message, newValue)); + if (newValue.type === Types.Text) { + Search.Instance.updateDocument({ id: newValue.id, data: (newValue as any).data }); + console.log("set field"); + } } function GetRefField([id, callback]: [string, (result?: Transferable) => void]) { @@ -276,9 +293,73 @@ function GetRefFields([ids, callback]: [string[], (result?: Transferable[]) => v Database.Instance.getDocuments(ids, callback, "newDocuments"); } + +const suffixMap: { [type: string]: (string | [string, string | ((json: any) => any)]) } = { + "number": "_n", + "string": "_t", + "image": ["_t", "url"], + "video": ["_t", "url"], + "pdf": ["_t", "url"], + "audio": ["_t", "url"], + "web": ["_t", "url"], + "date": ["_d", value => new Date(value.date).toISOString()], + "proxy": ["_i", "fieldId"], + "list": ["_l", list => { + const results = []; + for (const value of list.fields) { + const term = ToSearchTerm(value); + if (term) { + results.push(term.value); + } + } + return results.length ? results : null; + }] +}; + +function ToSearchTerm(val: any): { suffix: string, value: any } | undefined { + const type = val.__type || typeof val; + let suffix = suffixMap[type]; + if (!suffix) { + return; + } + + if (Array.isArray(suffix)) { + const accessor = suffix[1]; + if (typeof accessor === "function") { + val = accessor(val); + } else { + val = val[accessor]; + } + suffix = suffix[0]; + } + + return { suffix, value: val }; +} + function UpdateField(socket: Socket, diff: Diff) { Database.Instance.update(diff.id, diff.diff, () => socket.broadcast.emit(MessageStore.UpdateField.Message, diff), false, "newDocuments"); + const docfield = diff.diff.$set; + if (!docfield) { + return; + } + const update: any = { id: diff.id }; + let dynfield = false; + for (let key in docfield) { + if (!key.startsWith("fields.")) continue; + let val = docfield[key]; + let term = ToSearchTerm(val); + if (term !== undefined) { + let { suffix, value } = term; + key = key.substring(7); + Object.values(suffixMap).forEach(suf => update[key + suf] = null); + update[key + suffix] = { set: value }; + dynfield = true; + } + } + if (dynfield) { + Search.Instance.updateDocument(update); + } } function CreateField(newValue: any) { |