-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
80 changed files
with
1,539 additions
and
2,049 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# Use an OpenJDK runtime as a parent image - This is a Debian distro | ||
FROM openjdk:8-jre-stretch | ||
# Use an OpenJDK runtime as a parent image | ||
FROM openjdk:8-jdk | ||
|
||
MAINTAINER [email protected] | ||
|
||
|
@@ -9,14 +9,12 @@ WORKDIR /var/lib/metadig | |
# This file was created from the https://github.com/NCEAS/metadig-r repo | ||
# and contains R functions that assist in writing R based quality checks. | ||
COPY metadig_0.2.0.tar.gz metadig.tar.gz | ||
#COPY log4j.properties . | ||
# The most recently built jar file is copied from the maven build directory to this dir by maven, so that | ||
# it can be copyied to the image. | ||
COPY metadig-engine.jar metadig-engine.jar | ||
# For some reason, the DataONE indexer software can't find these files unless they are put in | ||
# directory and included in the CLASSPATH (see java CMD). They are in the source tree under 'main/resources', | ||
# but maybe further action needs to be taken for java/dataone to be able to find them without having to | ||
# do this extra step. | ||
# The DataONE indexer software can't find these files unless they are put in the ./solr directory and included in the | ||
# CLASSPATH (see java CMD). They are in the source tree under 'main/resources', but maybe further action needs to be | ||
# taken for java/dataone to be able to find them without having to do this extra step. | ||
COPY solr solr | ||
|
||
# DataONE indexer prints copious error msgs if these files don't exist | ||
|
@@ -25,15 +23,12 @@ RUN mkdir -p /etc/dataone/index && touch /etc/dataone/index/d1client.properties | |
# Add R runtime and install packges required by the quality suites | ||
RUN apt update | ||
RUN apt -y install vim bash | ||
RUN apt -y install r-base r-cran-httr r-cran-xml2 r-cran-tidyr r-cran-scales r-cran-lubridate r-cran-ggplot2 r-cran-magrittr | ||
# Debian stretch doesn't have a pre-cooked package for readr, so install now. | ||
RUN Rscript --vanilla -e 'install.packages("readr", repos=c(CRAN = "http://cran.rstudio.com"))' | ||
RUN apt -y install r-base r-cran-httr r-cran-xml2 r-cran-tidyr r-cran-scales r-cran-lubridate r-cran-ggplot2 r-cran-magrittr r-cran-readr | ||
# Install the metadig-engine distribution | ||
RUN Rscript --vanilla -e 'install.packages("metadig.tar.gz", repos=NULL)' | ||
|
||
# Run the Scorer process | ||
# Note: docker --build-arg only allows one argument (one token only, multiple tokens inside quotes doesn't work, so have | ||
# to specify java options directly on command line. | ||
# Set classpath to include /opt/local/metadig/log4j.properties, if it exists, so that logging can be changed without | ||
# having to rebuild the container. Note that on k8s, this dir is mapped to the persistent volume, so will be /data/metadig/log4j.properties | ||
CMD java -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.scorer.Scorer | ||
CMD java -Dlog4j2.formatMsgNoLookups=true -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:+UseSerialGC -cp /opt/local/metadig/config:./metadig-engine.jar:./solr edu.ucsb.nceas.mdqengine.scorer.Scorer | ||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,207 @@ | ||
<beans xmlns="http://www.springframework.org/schema/beans" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd"> | ||
|
||
<bean id="mdqSubprocessor" class="org.dataone.cn.indexer.parser.ScienceMetadataDocumentSubprocessor"> | ||
|
||
<!-- match MDQ documents --> | ||
<property name="matchDocuments"> | ||
<list> | ||
<value>https://nceas.ucsb.edu/mdqe/v1</value> | ||
</list> | ||
</property> | ||
<property name="fieldList"> | ||
<list> | ||
<ref bean="mdq.runId"/> | ||
<ref bean="mdq.suiteId"/> | ||
<ref bean="mdq.timestamp"/> | ||
<ref bean="mdq.datasource"/> | ||
<ref bean="mdq.metadata.formatId"/> | ||
<ref bean="mdq.dateUploaded"/> | ||
<ref bean="mdq.obsoletes"/> | ||
<ref bean="mdq.obsoletedBy"/> | ||
<ref bean="mdq.sequenceId"/> | ||
<ref bean="mdq.seriesId"/> | ||
<ref bean="mdq.funder"/> | ||
<ref bean="mdq.funder.lookup"/> | ||
<ref bean="mdq.rightsHolder"/> | ||
<ref bean="mdq.group"/> | ||
<ref bean="mdq.checks.passed"/> | ||
<ref bean="mdq.checks.warned"/> | ||
<ref bean="mdq.checks.failed"/> | ||
<ref bean="mdq.checks.info"/> | ||
<ref bean="mdq.checks.errored"/> | ||
<ref bean="mdq.check.count"/> | ||
<ref bean="mdq.score.overall"/> | ||
</list> | ||
</property> | ||
</bean> | ||
|
||
<bean id="mdq.runId" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="runId"/> | ||
<constructor-arg name="xpath" | ||
value="/*[local-name() = 'run']/id"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.sequenceId" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="sequenceId"/> | ||
<constructor-arg name="xpath" | ||
value="/*[local-name() = 'run']/sequenceId"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.suiteId" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="suiteId"/> | ||
<constructor-arg name="xpath" | ||
value="/*[local-name() = 'run']/suiteId"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.timestamp" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="timestamp"/> | ||
<constructor-arg name="xpath" | ||
value="/*[local-name() = 'run']/timestamp"/> | ||
<property name="multivalue" value="false"/> | ||
<property name="converter" ref="dateConverter"/> | ||
</bean> | ||
|
||
<bean id="mdq.metadata.formatId" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="metadataFormatId"/> | ||
<constructor-arg name="xpath" | ||
value="normalize-space(/*/sysmeta/formatId)"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.obsoletes" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="obsoletes"/> | ||
<constructor-arg name="xpath" | ||
value="normalize-space(/*/sysmeta/obsoletes)"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.obsoletedBy" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="obsoletedBy"/> | ||
<constructor-arg name="xpath" | ||
value="normalize-space(/*/sysmeta/obsoletedBy)"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.seriesId" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="seriesId"/> | ||
<constructor-arg name="xpath" | ||
value="normalize-space(/*/sysmeta/seriesId)"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.datasource" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="datasource"/> | ||
<constructor-arg name="xpath" | ||
value="normalize-space(/*/sysmeta/originMemberNode)"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.dateUploaded" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="dateUploaded"/> | ||
<constructor-arg name="xpath" | ||
value="normalize-space(/*/sysmeta/dateUploaded)"/> | ||
<property name="multivalue" value="false"/> | ||
<!-- Note: The dateConverter doesn't work for this field, so determine why. --> | ||
<property name="converter" ref="dateConverter"/> | ||
</bean> | ||
|
||
<bean id="mdq.funder" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="funder"/> | ||
<!-- Note: use 'contains' (xpath 1.0, which java supports) so that we can match check name without version number. --> | ||
<constructor-arg name="xpath" | ||
value="//result[check/id[contains(text(),'check.echo.funder.')]]/output/text()"/> | ||
<property name="multivalue" value="true"/> | ||
<property name="dedupe" value="true"/> | ||
</bean> | ||
|
||
<bean id="mdq.funder.lookup" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="funderInfo"/> | ||
<constructor-arg name="xpath" | ||
value="//result[check/id[contains(text(),'check.lookup.award.')]]/output/text()"/> | ||
<property name="multivalue" value="true"/> | ||
<property name="dedupe" value="true"/> | ||
</bean> | ||
|
||
<bean id="mdq.rightsHolder" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="rightsHolder"/> | ||
<constructor-arg name="xpath" | ||
value="normalize-space(/*/sysmeta/rightsHolder)"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<bean id="mdq.group" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="group"/> | ||
<constructor-arg name="xpath" | ||
value="/*/sysmeta/groups/group/text()"/> | ||
<property name="multivalue" value="true"/> | ||
<property name="dedupe" value="true"/> | ||
</bean> | ||
|
||
<!-- scoring by result status --> | ||
<!-- pass: (Status = SUCCESS) and (level != INFO & level != METADATA) --> | ||
<bean id="mdq.checks.passed" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="checksPassed"/> | ||
<constructor-arg name="xpath" | ||
value="count(//result[check/level[text() != 'INFO' and text() != 'METADATA']]/status[text() = 'SUCCESS'])"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
<!-- warned: (Status = FAILURE) & (level = OPTIONAL) --> | ||
<bean id="mdq.checks.warned" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="checksWarned"/> | ||
<constructor-arg name="xpath" | ||
value="count(//result[check/level[text() = 'OPTIONAL']]/status[text() = 'FAILURE'])"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
<!-- failed: (Status = FAILURE) & (level = REQUIRED) --> | ||
<bean id="mdq.checks.failed" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="checksFailed"/> | ||
<constructor-arg name="xpath" | ||
value="count(//result[check/level[text() = 'REQUIRED']]/status[text() = 'FAILURE'])"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
<!-- failed: (status = ERROR) --> | ||
<bean id="mdq.checks.errored" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="checksErrored"/> | ||
<constructor-arg name="xpath" | ||
value="count(//result/status[text() = 'ERROR'])"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
<!-- info: Level = INFO or status = SKIP --> | ||
<bean id="mdq.checks.info" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="checksInfo"/> | ||
<constructor-arg name="xpath" | ||
value="count(//result/status[text() = 'SKIP'] | //result[check/level[text() = 'INFO']])"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
<!-- checkCount: all checks except level = 'METADATA'. (Not used for overall score --> | ||
<bean id="mdq.check.count" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="checkCount"/> | ||
<constructor-arg name="xpath" | ||
value="count(//result) - count(//result[check/level[text() = 'METADATA']])"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
|
||
<!-- the composite score --> | ||
<!-- overallScore: count(pass) div count(pass + fail) --> | ||
<bean id="mdq.score.overall" class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="scoreOverall"/> | ||
<constructor-arg name="xpath" | ||
value="(count(//result[check/level[text() != 'INFO' and text() != 'METADATA']]/status[text() = 'SUCCESS'])) div | ||
(count(//result[check/level[text() != 'INFO' and text() != 'METADATA']]/status[text() = 'SUCCESS']) + | ||
count(//result[check/level[text() = 'REQUIRED']]/status[text() = 'ERROR'] | //result[check/level[text() = 'REQUIRED']]/status[text() = 'FAILURE']))"/> | ||
<property name="multivalue" value="false"/> | ||
</bean> | ||
<!-- Calculate the scores for check 'types', i.e. --> | ||
<bean id="mdqAddDynamicFieldsSubprocessor" class="edu.ucsb.nceas.mdqengine.solr.QualityReportSubprocessor"> | ||
<property name="matchDocuments"> | ||
<list> | ||
<value>https://nceas.ucsb.edu/mdqe/v1</value> | ||
</list> | ||
</property> | ||
</bean> | ||
</beans> |
22 changes: 22 additions & 0 deletions
22
Docker/metadig-scorer/solr/application-context-systemmeta-200.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<beans xmlns="http://www.springframework.org/schema/beans" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://www.springframework.org/schema/beans | ||
http://www.springframework.org/schema/beans/spring-beans.xsd"> | ||
|
||
<bean id="xpath_system_metadata_200" class="java.util.ArrayList"> | ||
<constructor-arg> | ||
<list> | ||
<bean class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="metadataId" /> | ||
<constructor-arg name="xpath" | ||
value="/d200:systemMetadata/identifier/text()" /> | ||
</bean> | ||
<bean class="org.dataone.cn.indexer.parser.SolrField"> | ||
<constructor-arg name="name" value="formatId" /> | ||
<constructor-arg name="xpath" | ||
value="/d200:systemMetadata/formatId/text()" /> | ||
</bean> | ||
</list> | ||
</constructor-arg> | ||
</bean> | ||
</beans> |
Oops, something went wrong.