aboutsummaryrefslogtreecommitdiff
path: root/solr-8.3.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
diff options
context:
space:
mode:
authorStanley Yip <stanley_yip@brown.edu>2020-01-08 13:47:29 -0500
committerStanley Yip <stanley_yip@brown.edu>2020-01-08 13:47:29 -0500
commitabfa42b6f2cf863deee19aac19328a23687464cb (patch)
treeb481f23ffa7bccbde7a31de34f50d765b6b73162 /solr-8.3.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
parentd8fc218f3481728f221ceacc60ac4bc553f8e295 (diff)
parent19a71cb2788b9c1c8d8ced4af285bf91033ba626 (diff)
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into pen
Diffstat (limited to 'solr-8.3.1/example/example-DIH/solr/atom/conf/atom-data-config.xml')
-rw-r--r--solr-8.3.1/example/example-DIH/solr/atom/conf/atom-data-config.xml35
1 files changed, 35 insertions, 0 deletions
diff --git a/solr-8.3.1/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr-8.3.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
new file mode 100644
index 000000000..b7de812d0
--- /dev/null
+++ b/solr-8.3.1/example/example-DIH/solr/atom/conf/atom-data-config.xml
@@ -0,0 +1,35 @@
+<dataConfig>
+ <dataSource type="URLDataSource"/>
+ <document>
+
+ <entity name="stackoverflow"
+ url="https://stackoverflow.com/feeds/tag/solr"
+ processor="XPathEntityProcessor"
+ forEach="/feed|/feed/entry"
+ transformer="HTMLStripTransformer,RegexTransformer">
+
+ <!-- Pick this value up from the feed level and apply to all documents -->
+ <field column="lastchecked_dt" xpath="/feed/updated" commonField="true"/>
+
+ <!-- Keep only the final numeric part of the URL -->
+ <field column="id" xpath="/feed/entry/id" regex=".*/" replaceWith=""/>
+
+ <field column="title" xpath="/feed/entry/title"/>
+ <field column="author" xpath="/feed/entry/author/name"/>
+ <field column="category" xpath="/feed/entry/category/@term"/>
+ <field column="link" xpath="/feed/entry/link[@rel='alternate']/@href"/>
+
+ <!-- Use transformers to convert HTML into plain text.
+ There is also an UpdateRequestProcess to trim remaining spaces.
+ -->
+ <field column="summary" xpath="/feed/entry/summary" stripHTML="true" regex="( |\n)+" replaceWith=" "/>
+
+ <!-- Ignore namespaces when matching XPath -->
+ <field column="rank" xpath="/feed/entry/rank"/>
+
+ <field column="published_dt" xpath="/feed/entry/published"/>
+ <field column="updated_dt" xpath="/feed/entry/updated"/>
+ </entity>
+
+ </document>
+</dataConfig>