aboutsummaryrefslogtreecommitdiff
path: root/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
diff options
context:
space:
mode:
authorEleanor Eng <eleanor_eng@brown.edu>2019-08-06 10:54:33 -0400
committerEleanor Eng <eleanor_eng@brown.edu>2019-08-06 10:54:33 -0400
commitb7194d88ba9733413063c7f371dedefd3a97e35f (patch)
tree418203fe1ee1f4aa0771c555ab672541cc775473 /solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
parent2c4440be2807b3b1da691ea04b061c35e50ecb72 (diff)
parent298d1c9b29d6ce2171fd9ac8274b64583b73f6f5 (diff)
merge with master
Diffstat (limited to 'solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml')
-rw-r--r--solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml35
1 files changed, 35 insertions, 0 deletions
diff --git a/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
new file mode 100644
index 000000000..b7de812d0
--- /dev/null
+++ b/solr-8.1.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
@@ -0,0 +1,35 @@
+<dataConfig>
+ <dataSource type="URLDataSource"/>
+ <document>
+
+ <entity name="stackoverflow"
+ url="https://stackoverflow.com/feeds/tag/solr"
+ processor="XPathEntityProcessor"
+ forEach="/feed|/feed/entry"
+ transformer="HTMLStripTransformer,RegexTransformer">
+
+ <!-- Pick this value up from the feed level and apply to all documents -->
+ <field column="lastchecked_dt" xpath="/feed/updated" commonField="true"/>
+
+ <!-- Keep only the final numeric part of the URL -->
+ <field column="id" xpath="/feed/entry/id" regex=".*/" replaceWith=""/>
+
+ <field column="title" xpath="/feed/entry/title"/>
+ <field column="author" xpath="/feed/entry/author/name"/>
+ <field column="category" xpath="/feed/entry/category/@term"/>
+ <field column="link" xpath="/feed/entry/link[@rel='alternate']/@href"/>
+
+ <!-- Use transformers to convert HTML into plain text.
+ There is also an UpdateRequestProcess to trim remaining spaces.
+ -->
+ <field column="summary" xpath="/feed/entry/summary" stripHTML="true" regex="( |\n)+" replaceWith=" "/>
+
+ <!-- Ignore namespaces when matching XPath -->
+ <field column="rank" xpath="/feed/entry/rank"/>
+
+ <field column="published_dt" xpath="/feed/entry/published"/>
+ <field column="updated_dt" xpath="/feed/entry/updated"/>
+ </entity>
+
+ </document>
+</dataConfig>