diff --git a/.gitignore b/.gitignore index 922d62185..e9cd5d0d2 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,6 @@ migration/test* *.sublime-workspace # Rbenv artifacts -.ruby-version test/test_run.log # Debug output diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 000000000..9cec7165a --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +3.1.6 diff --git a/Dockerfile b/Dockerfile index bbc327ceb..b9529fd14 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,22 +1,36 @@ -ARG RUBY_VERSION -ARG DISTRO_NAME=bullseye +ARG RUBY_VERSION=3.1 +ARG DISTRO=bullseye -FROM ruby:$RUBY_VERSION-$DISTRO_NAME +FROM ruby:$RUBY_VERSION-$DISTRO -RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \ - openjdk-11-jre-headless \ - raptor2-utils \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + git \ + libxml2 \ + libxslt-dev \ + openjdk-11-jre-headless \ + raptor2-utils \ + && rm -rf /var/lib/apt/lists/* -RUN mkdir -p /srv/ontoportal/ontologies_linked_data -RUN mkdir -p /srv/ontoportal/bundle -COPY Gemfile* /srv/ontoportal/ontologies_linked_data/ +WORKDIR /app -WORKDIR /srv/ontoportal/ontologies_linked_data +COPY Gemfile* *.gemspec ./ -RUN gem update --system -RUN gem install bundler -ENV BUNDLE_PATH=/srv/ontoportal/bundle -RUN bundle install +# Copy only the `version.rb` file to prevent missing file errors! +COPY lib/ontologies_linked_data/version.rb lib/ontologies_linked_data/ -COPY . /srv/ontoportal/ontologies_linked_data +#Install the exact Bundler version from Gemfile.lock (if it exists) +RUN gem update --system && \ + if [ -f Gemfile.lock ]; then \ + BUNDLER_VERSION=$(grep -A 1 "BUNDLED WITH" Gemfile.lock | tail -n 1 | tr -d ' '); \ + gem install bundler -v "$BUNDLER_VERSION"; \ + else \ + gem install bundler; \ + fi + +RUN bundle config set --global no-document 'true' +RUN bundle install --jobs 4 --retry 3 + +COPY . ./ + +CMD ["bundle", "exec", "rake"] diff --git a/Gemfile b/Gemfile index c20d9ca0d..11561bdb7 100644 --- a/Gemfile +++ b/Gemfile @@ -1,5 +1,7 @@ source 'https://rubygems.org' +gemspec + gem 'activesupport', '~> 4' gem 'addressable', '~> 2.8' gem 'bcrypt', '~> 3.0' @@ -37,6 +39,5 @@ end gem 'goo', github: 'ncbo/goo', branch: 'master' gem 'sparql-client', github: 'ncbo/sparql-client', branch: 'master' -gem 'net-ftp' gem 'public_suffix', '~> 5.1.1' -gem 'net-imap', '~> 0.4.18' \ No newline at end of file +gem 'net-imap', '~> 0.4.18' diff --git a/Gemfile.lock b/Gemfile.lock index b5e13479a..fd4e234dd 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/ncbo/goo.git - revision: 39f67ab7fae7675b6ff417ace0ab923e40ffcbcd + revision: b9019ad9e1eb78c74105fc6c6a879085066da17d branch: master specs: goo (0.0.2) @@ -24,6 +24,25 @@ GIT net-http-persistent (= 2.9.4) rdf (>= 1.0) +PATH + remote: . + specs: + ontologies_linked_data (0.0.1) + activesupport + bcrypt + goo + json + libxml-ruby + multi_json + net-ftp + oj + omni_logger + pony + rack + rack-test + rsolr + rubyzip + GEM remote: https://rubygems.org/ specs: @@ -36,7 +55,7 @@ GEM addressable (2.8.7) public_suffix (>= 2.0.2, < 7.0) ansi (1.5.0) - ast (2.4.2) + ast (2.4.3) bcrypt (3.1.20) bigdecimal (3.1.9) builder (3.3.0) @@ -44,7 +63,7 @@ GEM logger (~> 1.5) coderay (1.1.3) concurrent-ruby (1.3.5) - connection_pool (2.5.0) + connection_pool (2.5.3) cube-ruby (0.0.3) daemons (1.4.1) date (3.4.1) @@ -55,15 +74,15 @@ GEM launchy (>= 2.1, < 4.0) mail (~> 2.7) eventmachine (1.2.7) - faraday (2.12.2) + faraday (2.13.1) faraday-net_http (>= 2.0, < 3.5) json logger faraday-net_http (3.4.0) net-http (>= 0.5.0) - ffi (1.17.1-aarch64-linux-gnu) - ffi (1.17.1-arm64-darwin) - ffi (1.17.1-x86_64-linux-gnu) + ffi (1.17.2-aarch64-linux-gnu) + ffi (1.17.2-arm64-darwin) + ffi (1.17.2-x86_64-linux-gnu) hashie (5.0.0) htmlentities (4.3.4) http-accept (1.7.0) @@ -71,15 +90,16 @@ GEM domain_name (~> 0.5) i18n (0.9.5) concurrent-ruby (~> 1.0) - json (2.10.1) + json (2.11.3) json_pure (2.8.1) language_server-protocol (3.17.0.4) - launchy (3.1.0) + launchy (3.1.1) addressable (~> 2.8) childprocess (~> 5.0) logger (~> 1.6) libxml-ruby (5.0.3) - logger (1.6.5) + lint_roller (1.1.0) + logger (1.7.0) macaddr (1.7.2) systemu (~> 2.6.5) mail (2.8.1) @@ -88,10 +108,10 @@ GEM net-pop net-smtp method_source (1.1.0) - mime-types (3.6.0) + mime-types (3.6.2) logger mime-types-data (~> 3.2015) - mime-types-data (3.2025.0204) + mime-types-data (3.2025.0422) mini_mime (1.1.5) minitest (4.7.5) minitest-reporters (0.14.24) @@ -106,7 +126,7 @@ GEM net-http (0.6.0) uri net-http-persistent (2.9.4) - net-imap (0.4.19) + net-imap (0.4.21) date net-protocol net-pop (0.1.2) @@ -116,35 +136,36 @@ GEM net-smtp (0.5.1) net-protocol netrc (0.11.0) - oj (3.16.9) + oj (3.16.10) bigdecimal (>= 3.0) ostruct (>= 0.2) omni_logger (0.1.4) logger ostruct (0.6.1) - parallel (1.26.3) - parser (3.3.7.1) + parallel (1.27.0) + parser (3.3.8.0) ast (~> 2.4.1) racc pony (1.13.1) mail (>= 2.0) powerbar (2.0.1) hashie (>= 1.1.0) + prism (1.4.0) pry (0.15.2) coderay (~> 1.1) method_source (~> 1.0) public_suffix (5.1.1) racc (1.8.1) - rack (2.2.11) + rack (2.2.13) rack-test (0.8.3) rack (>= 1.0, < 3) rainbow (3.1.1) rake (10.5.0) rdf (1.0.8) addressable (>= 2.2) - redis (5.3.0) + redis (5.4.0) redis-client (>= 0.22.0) - redis-client (0.23.2) + redis-client (0.24.0) connection_pool regexp_parser (2.10.0) request_store (1.7.0) @@ -154,22 +175,24 @@ GEM http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) - rexml (3.4.0) + rexml (3.4.1) rsolr (2.6.0) builder (>= 2.1.2) faraday (>= 0.9, < 3, != 2.0.0) - rubocop (1.71.2) + rubocop (1.75.4) json (~> 2.3) - language_server-protocol (>= 3.17.0) + language_server-protocol (~> 3.17.0.2) + lint_roller (~> 1.1.0) parallel (~> 1.10) parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) regexp_parser (>= 2.9.3, < 3.0) - rubocop-ast (>= 1.38.0, < 2.0) + rubocop-ast (>= 1.44.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.38.0) - parser (>= 3.3.1.0) + rubocop-ast (1.44.1) + parser (>= 3.3.7.2) + prism (~> 1.4) ruby-progressbar (1.13.0) rubyzip (1.3.0) simplecov (0.22.0) @@ -194,7 +217,7 @@ GEM unicode-display_width (3.1.4) unicode-emoji (~> 4.0, >= 4.0.4) unicode-emoji (4.0.4) - uri (1.0.2) + uri (1.0.3) uuid (2.3.9) macaddr (~> 1.0) @@ -202,7 +225,6 @@ PLATFORMS aarch64-linux arm64-darwin-22 arm64-darwin-23 - arm64-darwin-24 x86_64-linux DEPENDENCIES @@ -217,10 +239,10 @@ DEPENDENCIES minitest (~> 4) minitest-reporters (>= 0.5.0) multi_json (~> 1.0) - net-ftp net-imap (~> 0.4.18) oj (~> 3.0) omni_logger + ontologies_linked_data! pony pry public_suffix (~> 5.1.1) @@ -238,4 +260,4 @@ DEPENDENCIES thin BUNDLED WITH - 2.4.22 + 2.6.3 diff --git a/bin/owlapi-wrapper-1.4.2.jar b/bin/owlapi-wrapper-1.4.2.jar old mode 100644 new mode 100755 diff --git a/bin/owlapi-wrapper-1.4.3.jar b/bin/owlapi-wrapper-1.4.3.jar new file mode 100644 index 000000000..1fa884c65 Binary files /dev/null and b/bin/owlapi-wrapper-1.4.3.jar differ diff --git a/config/schemes/ontology_submission.yml b/config/schemes/ontology_submission.yml index 6086bc693..6bc606291 100644 --- a/config/schemes/ontology_submission.yml +++ b/config/schemes/ontology_submission.yml @@ -21,10 +21,10 @@ #Acronym => Ontology object (omv:acronym) #Name => Ontology object (omv:name) -#URI -URI: +#uri +uri: display: "general" - label: "URI" + label: "uri" helpText: "The URI of the ontology which is described by these metadata." example: 'https://w3id.org/myontology' description: [ @@ -56,7 +56,7 @@ version: "PAV: The version number of a resource.", "DOAP: A project release", "SCHEMA: The version of the CreativeWork embodied by a specified resource."] - extractedMetadata: true + extractedMetadata: false metadataMappings: [ "omv:version", "mod:version", "owl:versionInfo", "pav:version", "doap:release", "schema:version", "oboInOwl:data-version", "oboInOwl:version" ] #Status @@ -175,7 +175,7 @@ hasOntologySyntax: naturalLanguage: display: "general" label: "Natural language" - helpText: "The language of the content of the ontology (with values in Lexvo/iso639-1)." + helpText: "The language of the content of the ontology (with values in ISO 639-1)." description: [ "DCTERMS: A language of the resource. Recommended practice is to use either a non-literal value representing a language from a controlled vocabulary such as ISO 639-2 or ISO 639-3, or a literal value consisting of an IETF Best Current Practice 47 language tag.", "OMV: The language of the content of the ontology, i.e. English, French, etc.", @@ -183,20 +183,57 @@ naturalLanguage: "SCHEMA: The language of the content or performance or used in an action. Please use one of the language codes from the IETF BCP 47 standard." ] extractedMetadata: true enforcedValues: { - "http://lexvo.org/id/iso639-1/en": "English", - "http://lexvo.org/id/iso639-1/fr": "French", - "http://lexvo.org/id/iso639-1/es": "Spanish", - "http://lexvo.org/id/iso639-1/pt": "Portuguese", - "http://lexvo.org/id/iso639-1/it": "Italian", - "http://lexvo.org/id/iso639-1/de": "German", - "http://lexvo.org/id/iso639-1/ar": "Arabic", - "http://lexvo.org/id/iso639-1/zh": "Chinese", - "http://lexvo.org/id/iso639-1/hi": "Hindi", - "http://lexvo.org/id/iso639-1/nl": "Dutch", - "http://lexvo.org/id/iso639-1/fi": "Finnish", - "http://lexvo.org/id/iso639-1/el": "Greek", - "http://lexvo.org/id/iso639-1/ja": "Japanese", - "http://lexvo.org/id/iso639-1/pt-br": "Brazilian" + "ar": "Arabic", + "az": "Azerbaijani", + "be": "Belarusian", + "bn": "Bengali", + "cs": "Czech", + "da": "Danish", + "de": "German", + "el": "Greek", + "en": "English", + "es": "Spanish", + "et": "Estonian", + "eu": "Basque", + "fa": "Persian", + "fi": "Finnish", + "fr": "French", + "he": "Hebrew", + "hi": "Hindi", + "hr": "Croatian", + "hu": "Hungarian", + "id": "Indonesian", + "it": "Italian", + "ja": "Japanese", + "ka": "Georgian", + "kk": "Kazakh", + "ko": "Korean", + "lt": "Lithuanian", + "lv": "Latvian", + "mn": "Mongolian", + "ms": "Malay", + "my": "Burmese", + "nl": "Dutch", + "no": "Norwegian", + "pa": "Punjabi", + "pl": "Polish", + "pt": "Portuguese", + "pt-br": "Brazilian Portuguese", + "ro": "Romanian", + "ru": "Russian", + "sk": "Slovak", + "sl": "Slovenian", + "sr": "Serbian", + "sv": "Swedish", + "sw": "Swahili", + "ta": "Tamil", + "te": "Telugu", + "th": "Thai", + "tr": "Turkish", + "uk": "Ukrainian", + "uz": "Uzbek", + "vi": "Vietnamese", + "zh": "Chinese" } metadataMappings: [ "omv:naturalLanguage", "dc:language", "dcterms:language", "doap:language", "schema:inLanguage" ] diff --git a/docker-compose.yml b/docker-compose.yml index a63c268f1..257ed02d8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,12 +4,8 @@ x-app: &app args: RUBY_VERSION: '3.1' # Increase the version number in the image tag every time Dockerfile or its arguments is changed - image: ontologies_ld-dev:0.0.3 + image: ontologies_ld-dev:0.0.4 environment: &env - # default bundle config resolves to /usr/local/bundle/config inside of the container - # we are setting it to local app directory if we need to use 'bundle config local' - BUNDLE_APP_CONFIG: /srv/ontoportal/ontologies_api/.bundle - BUNDLE_PATH: /srv/ontoportal/bundle COVERAGE: 'true' # enable simplecov code coverage REDIS_HOST: redis-ut REDIS_PORT: 6379 @@ -20,8 +16,8 @@ x-app: &app command: /bin/bash volumes: # bundle volume for hosting gems installed by bundle; it speeds up gem install in local development - - bundle:/srv/ontoportal/bundle - - .:/srv/ontoportal/ontologies_linked_data + - bundle:/usr/local/bundle + - .:/app # mount directory containing development version of the gems if you need to use 'bundle config local' #- /Users/alexskr/ontoportal:/Users/alexskr/ontoportal depends_on: &depends_on @@ -117,7 +113,7 @@ services: retries: 5 agraph-ut: - image: franzinc/agraph:v8.1.0 + image: franzinc/agraph:v8.3.1 platform: linux/amd64 environment: - AGRAPH_SUPER_USER=test diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb index 229b46301..901178c30 100644 --- a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -3,21 +3,43 @@ module Concerns module OntologySubmission module MetadataExtractor - def extract_metadata + def extract_metadata(logger = nil, heavy_extraction = true, user_params = nil) + logger ||= Logger.new(STDOUT) + logger.info('Extracting metadata from the ontology submission.') + + @submission = self version_info = extract_version ontology_iri = extract_ontology_iri + @submission.version = version_info if version_info + @submission.uri = ontology_iri if ontology_iri + @submission.save - self.version = version_info if version_info - self.uri = RDF::URI.new(ontology_iri) if ontology_iri + if heavy_extraction + begin + # Extract metadata directly from the ontology + extract_ontology_metadata(logger, user_params) + logger.info('Additional metadata extracted.') + rescue StandardError => e + e.backtrace + logger.error("Error while extracting additional metadata: #{e}") + end + end + if @submission.valid? + @submission.save + else + logger.error("Error while extracting additional metadata: #{@submission.errors}") + @submission = LinkedData::Models::OntologySubmission.find(@submission.id).first.bring_remaining + end end def extract_version + query = Goo.sparql_query_client.select(:versionInfo).distinct - .from(self.id) - .where([RDF::URI.new('http://bioportal.bioontology.org/ontologies/versionSubject'), - RDF::URI.new('http://www.w3.org/2002/07/owl#versionInfo'), - :versionInfo]) + .from(@submission.id) + .where([RDF::URI.new('http://bioportal.bioontology.org/ontologies/versionSubject'), + RDF::URI.new('http://www.w3.org/2002/07/owl#versionInfo'), + :versionInfo]) sol = query.each_solution.first || {} sol[:versionInfo]&.to_s @@ -25,12 +47,238 @@ def extract_version def extract_ontology_iri query = Goo.sparql_query_client.select(:uri).distinct - .from(self.id) + .from(@submission.id) .where([:uri, RDF::URI.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), RDF::URI.new('http://www.w3.org/2002/07/owl#Ontology')]) sol = query.each_solution.first || {} - sol[:uri]&.to_s + RDF::URI.new(sol[:uri]) if sol[:uri] + end + + # Extract additional metadata about the ontology + # First it extracts the main metadata, then the mapped metadata + def extract_ontology_metadata(logger, user_params) + user_params = {} if user_params.nil? || !user_params + ontology_uri = @submission.uri + logger.info("Extraction metadata from ontology #{ontology_uri}") + + # go through all OntologySubmission attributes. Returns symbols + LinkedData::Models::OntologySubmission.attributes(:all).each do |attr| + # for attribute with the :extractedMetadata setting on, and that have not been defined by the user + attr_settings = LinkedData::Models::OntologySubmission.attribute_settings(attr) + + attr_not_excluded = user_params && !(user_params.key?(attr) && !user_params[attr].nil? && !user_params[attr].empty?) + + next unless attr_settings[:extractedMetadata] && attr_not_excluded + + # a boolean to check if a value that should be single have already been extracted + single_extracted = false + type = enforce?(attr, :list) ? :list : :string + old_value = value(attr, type) + + unless attr_settings[:namespace].nil? + property_to_extract = "#{attr_settings[:namespace].to_s}:#{attr.to_s}" + hash_results = extract_each_metadata(ontology_uri, attr, property_to_extract, logger) + single_extracted = send_value(attr, hash_results, logger) unless hash_results.empty? + end + + # extracts attribute value from metadata mappings + attr_settings[:metadataMappings] ||= [] + + attr_settings[:metadataMappings].each do |mapping| + break if single_extracted + + hash_mapping_results = extract_each_metadata(ontology_uri, attr, mapping.to_s, logger) + single_extracted = send_value(attr, hash_mapping_results, logger) unless hash_mapping_results.empty? + end + + new_value = value(attr, type) + + send_value(attr, old_value, logger) if empty_value?(new_value) && !empty_value?(old_value) + end + end + + def empty_value?(value) + value.nil? || (value.is_a?(Array) && value.empty?) || value.to_s.strip.empty? + end + + def value(attr, type) + val = @submission.send(attr.to_s) + type.eql?(:list) ? Array(val) || [] : val || '' + end + + def send_value(attr, new_value, logger) + old_val = nil + single_extracted = false + + if enforce?(attr, :list) + old_val = value(attr, :list) + old_values = old_val.dup + new_values = new_value.values + new_values = new_values.map { |v| find_or_create_agent(attr, v, logger) }.compact if enforce?(attr, :Agent) + + old_values.push(*new_values) + + @submission.send("#{attr}=", old_values.uniq) + elsif enforce?(attr, :concatenate) + # if multiple value for this attribute, then we concatenate it + # Add the concat at the very end, to easily join the content of the array + old_val = value(attr, :string) + metadata_values = old_val.split(', ') + new_values = new_value.values.map { |x| x.to_s.split(', ') }.flatten + + @submission.send("#{attr}=", (metadata_values + new_values).uniq.join(', ')) + else + new_value = new_value.values.first + + new_value = find_or_create_agent(attr, nil, logger) if enforce?(attr, :Agent) + + @submission.send("#{attr}=", new_value) + single_extracted = true + end + + unless @submission.valid? + logger.error("Error while extracting metadata for the attribute #{attr}: #{@submission.errors[attr] || @submission.errors}") + new_value&.delete if enforce?(attr, :Agent) && new_value.respond_to?(:delete) + @submission.send("#{attr}=", old_val) + end + + single_extracted + end + + # Return a hash with the best literal value for an URI + # it selects the literal according to their language: no language > english > french > other languages + def select_metadata_literal(metadata_uri, metadata_literal, hash) + return unless metadata_literal.is_a?(RDF::Literal) + + if hash.key?(metadata_uri) + if metadata_literal.has_language? + if !hash[metadata_uri].has_language? + return hash + else + case metadata_literal.language + when :en, :eng + # Take the value with english language over other languages + hash[metadata_uri] = metadata_literal + return hash + when :fr, :fre + # If no english, take french + if hash[metadata_uri].language == :en || hash[metadata_uri].language == :eng + return hash + else + hash[metadata_uri] = metadata_literal + return hash + end + else + return hash + end + end + else + # Take the value with no language in priority (considered as a default) + hash[metadata_uri] = metadata_literal + return hash + end + else + hash[metadata_uri] = metadata_literal + hash + end + end + + # A function to extract additional metadata + # Take the literal data if the property is pointing to a literal + # If pointing to an URI: first it takes the "omv:name" of the object pointed by the property, if nil it takes the "rdfs:label". + # If not found it check for "omv:firstName + omv:lastName" (for "omv:Person") of this object. And to finish it takes the "URI" + # The hash_results contains the metadataUri (objet pointed on by the metadata property) with the value we are using from it + def extract_each_metadata(ontology_uri, attr, prop_to_extract, logger) + + query_metadata = < #{prop_to_extract} ?extractedObject . + OPTIONAL { ?extractedObject omv:name ?omvname } . + OPTIONAL { ?extractedObject omv:firstName ?omvfirstname } . + OPTIONAL { ?extractedObject omv:lastName ?omvlastname } . + OPTIONAL { ?extractedObject rdfs:label ?rdfslabel } . +} +eos + Goo.namespaces.each do |prefix, uri| + query_metadata = "PREFIX #{prefix}: <#{uri}>\n" + query_metadata + end + + # logger.info(query_metadata) + # This hash will contain the "literal" metadata for each object (uri or literal) pointed by the metadata predicate + hash_results = {} + Goo.sparql_query_client.query(query_metadata).each_solution do |sol| + value = sol[:extractedObject] + if enforce?(attr, :uri) + # If the attr is enforced as URI then it directly takes the URI + uri_value = value ? RDF::URI.new(value.to_s.strip) : nil + hash_results[value] = uri_value if uri_value&.valid? + elsif enforce?(attr, :date_time) + begin + hash_results[value] = DateTime.iso8601(value.to_s) + rescue StandardError => e + logger.error("Impossible to extract DateTime metadata for #{attr}: #{value}. It should follow iso8601 standards. Error message: #{e}") + end + elsif enforce?(attr, :integer) + begin + hash_results[value] = value.to_s.to_i + rescue StandardError => e + logger.error("Impossible to extract integer metadata for #{attr}: #{value}. Error message: #{e}") + end + elsif enforce?(attr, :boolean) + case value.to_s.downcase + when 'true' + hash_results[value] = true + when 'false' + hash_results[value] = false + else + logger.error("Impossible to extract boolean metadata for #{attr}: #{value}. Error message: #{e}") + end + elsif value.is_a?(RDF::URI) + hash_results = find_object_label(hash_results, sol, value) + else + # If this is directly a literal + hash_results = select_metadata_literal(value, value, hash_results) + end + end + hash_results + end + + def find_object_label(hash_results, sol, value) + if !sol[:omvname].nil? + hash_results = select_metadata_literal(value, sol[:omvname], hash_results) + elsif !sol[:rdfslabel].nil? + hash_results = select_metadata_literal(value, sol[:rdfslabel], hash_results) + elsif !sol[:omvfirstname].nil? + hash_results = select_metadata_literal(value, sol[:omvfirstname], hash_results) + # if first and last name are defined (for omv:Person) + hash_results[value] = "#{hash_results[value]} #{sol[:omvlastname]}" unless sol[:omvlastname].nil? + elsif !sol[:omvlastname].nil? + # if only last name is defined + hash_results = select_metadata_literal(value, sol[:omvlastname], hash_results) + else + # if the object is an URI but we are requesting a String + hash_results[value] = value.to_s + end + hash_results + end + + def enforce?(attr, type) + LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(type) + end + + def find_or_create_agent(attr, old_val, logger) + agent = LinkedData::Models::Agent.where(agentType: 'person', name: old_val).first + begin + agent ||= LinkedData::Models::Agent.new(name: old_val, agentType: 'person', creator: @submission.ontology.administeredBy.first).save + rescue + logger.error("Error while extracting metadata for the attribute #{attr}: Can't create Agent #{agent.errors} ") + agent = nil + end + agent end end end diff --git a/lib/ontologies_linked_data/mappings/mappings.rb b/lib/ontologies_linked_data/mappings/mappings.rb index 10abc75e1..f2dddc536 100644 --- a/lib/ontologies_linked_data/mappings/mappings.rb +++ b/lib/ontologies_linked_data/mappings/mappings.rb @@ -5,21 +5,32 @@ module LinkedData module Mappings OUTSTANDING_LIMIT = 30 - def self.mapping_predicates() - predicates = {} - predicates["CUI"] = ["http://bioportal.bioontology.org/ontologies/umls/cui"] - predicates["SAME_URI"] = - ["http://data.bioontology.org/metadata/def/mappingSameURI"] - predicates["LOOM"] = - ["http://data.bioontology.org/metadata/def/mappingLoom"] - predicates["REST"] = - ["http://data.bioontology.org/metadata/def/mappingRest"] - return predicates - end + def self.mapping_predicates + predicates = {} + predicates["CUI"] = ["http://bioportal.bioontology.org/ontologies/umls/cui"] + predicates["SAME_URI"] = + ["http://data.bioontology.org/metadata/def/mappingSameURI"] + predicates["LOOM"] = + ["http://data.bioontology.org/metadata/def/mappingLoom"] + predicates["REST"] = + ["http://data.bioontology.org/metadata/def/mappingRest"] + return predicates + end - def self.handle_triple_store_downtime(logger=nil) - epr = Goo.sparql_query_client(:main) - status = epr.status + def self.internal_mapping_predicates + predicates = {} + predicates["SKOS:EXACT_MATCH"] = ["http://www.w3.org/2004/02/skos/core#exactMatch"] + predicates["SKOS:CLOSE_MATCH"] = ["http://www.w3.org/2004/02/skos/core#closeMatch"] + predicates["SKOS:BROAD_MATH"] = ["http://www.w3.org/2004/02/skos/core#broadMatch"] + predicates["SKOS:NARROW_MATH"] = ["http://www.w3.org/2004/02/skos/core#narrowMatch"] + predicates["SKOS:RELATED_MATH"] = ["http://www.w3.org/2004/02/skos/core#relatedMatch"] + + return predicates + end + + def self.handle_triple_store_downtime(logger = nil) + epr = Goo.sparql_query_client(:main) + status = epr.status if status[:exception] logger.info(status[:exception]) if logger @@ -145,142 +156,59 @@ def self.empty_page(page,size) return p end - def self.mappings_ontologies(sub1,sub2,page,size,classId=nil,reload_cache=false) - union_template = <<-eos -{ - GRAPH <#{sub1.id.to_s}> { - classId ?o . - } - GRAPH graph { - ?s2 ?o . - } - bind -} -eos - blocks = [] - mappings = [] - persistent_count = 0 - acr1 = sub1.id.to_s.split("/")[-3] - - if classId.nil? - acr2 = nil - acr2 = sub2.id.to_s.split("/")[-3] unless sub2.nil? - pcount = LinkedData::Models::MappingCount.where(ontologies: acr1) - pcount = pcount.and(ontologies: acr2) unless acr2.nil? - f = Goo::Filter.new(:pair_count) == (not acr2.nil?) - pcount = pcount.filter(f) - pcount = pcount.include(:count) - pcount_arr = pcount.all - persistent_count = pcount_arr.length == 0 ? 0 : pcount_arr.first.count + def self.mappings_ontologies(sub1, sub2, page, size, classId = nil, reload_cache = false) + sub1, acr1 = extract_acronym(sub1) + sub2, acr2 = extract_acronym(sub2) - return LinkedData::Mappings.empty_page(page,size) if persistent_count == 0 - end + mappings = [] + persistent_count = 0 - if classId.nil? - union_template = union_template.gsub("classId", "?s1") - else - union_template = union_template.gsub("classId", "<#{classId.to_s}>") - end - # latest_sub_ids = self.retrieve_latest_submission_ids - - mapping_predicates().each do |_source,mapping_predicate| - union_block = union_template.gsub("predicate", mapping_predicate[0]) - union_block = union_block.gsub("bind","BIND ('#{_source}' AS ?source)") - - if sub2.nil? - union_block = union_block.gsub("graph","?g") - else - union_block = union_block.gsub("graph","<#{sub2.id.to_s}>") + if classId.nil? + persistent_count = count_mappings(acr1, acr2) + return LinkedData::Mappings.empty_page(page, size) if persistent_count == 0 end - blocks << union_block - end - unions = blocks.join("\nUNION\n") - - mappings_in_ontology = <<-eos -SELECT DISTINCT query_variables -WHERE { -unions -filter -} page_group -eos - query = mappings_in_ontology.gsub("unions", unions) - variables = "?s2 graph ?source ?o" - variables = "?s1 " + variables if classId.nil? - query = query.gsub("query_variables", variables) - filter = classId.nil? ? "FILTER ((?s1 != ?s2) || (?source = 'SAME_URI'))" : '' - if sub2.nil? - query = query.gsub("graph","?g") - ont_id = sub1.id.to_s.split("/")[0..-3].join("/") + query = mappings_ont_build_query(classId, page, size, sub1, sub2) + epr = Goo.sparql_query_client(:main) + graphs = [sub1] + unless sub2.nil? + graphs << sub2 + end + solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) + s1 = nil + s1 = RDF::URI.new(classId.to_s) unless classId.nil? + + solutions.each do |sol| + graph2 = sub2.nil? ? sol[:g] : sub2 + s1 = sol[:s1] if classId.nil? + backup_mapping = nil + + if sol[:source].to_s == "REST" + backup_mapping = LinkedData::Models::RestBackupMapping + .find(sol[:o]).include(:process, :class_urns).first + backup_mapping.process.bring_remaining + end - # latest_sub_filter_arr = latest_sub_ids.map { |_, id| "?g = <#{id}>" } - # filter += "\nFILTER (#{latest_sub_filter_arr.join(' || ')}) " + classes = get_mapping_classes_instance(s1, sub1, sol[:s2], graph2) - #STRSTARTS is used to not count older graphs - #no need since now we delete older graphs - filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}'))" - else - query = query.gsub("graph", "") - end - query = query.gsub("filter", filter) + mapping = if backup_mapping.nil? + LinkedData::Models::Mapping.new(classes, sol[:source].to_s) + else + LinkedData::Models::Mapping.new( + classes, sol[:source].to_s, + backup_mapping.process, backup_mapping.id) + end - if size > 0 - pagination = "OFFSET offset LIMIT limit" - query = query.gsub("page_group",pagination) - limit = size - offset = (page-1) * size - query = query.gsub("limit", "#{limit}").gsub("offset", "#{offset}") - else - query = query.gsub("page_group","") - end - epr = Goo.sparql_query_client(:main) - graphs = [sub1.id] - unless sub2.nil? - graphs << sub2.id - end - solutions = epr.query(query, graphs: graphs, reload_cache: reload_cache) - s1 = nil - unless classId.nil? - s1 = RDF::URI.new(classId.to_s) - end - solutions.each do |sol| - graph2 = nil - if sub2.nil? - graph2 = sol[:g] - else - graph2 = sub2.id - end - if classId.nil? - s1 = sol[:s1] + mappings << mapping end - classes = [ read_only_class(s1.to_s,sub1.id.to_s), - read_only_class(sol[:s2].to_s,graph2.to_s) ] - - backup_mapping = nil - mapping = nil - if sol[:source].to_s == "REST" - backup_mapping = LinkedData::Models::RestBackupMapping - .find(sol[:o]).include(:process).first - backup_mapping.process.bring_remaining - end - if backup_mapping.nil? - mapping = LinkedData::Models::Mapping.new( - classes,sol[:source].to_s) - else - mapping = LinkedData::Models::Mapping.new( - classes,sol[:source].to_s, - backup_mapping.process,backup_mapping.id) + + if size == 0 + return mappings end - mappings << mapping - end - if size == 0 - return mappings + page = Goo::Base::Page.new(page, size, persistent_count, mappings) + return page end - page = Goo::Base::Page.new(page,size,nil,mappings) - page.aggregate = persistent_count - return page - end def self.mappings_ontology(sub,page,size,classId=nil,reload_cache=false) return self.mappings_ontologies(sub,nil,page,size,classId=classId, @@ -437,7 +365,7 @@ def self.create_rest_mapping(classes,process) graph_insert << [c.id, RDF::URI.new(rest_predicate), backup_mapping.id] Goo.sparql_update_client.insert_data(graph_insert, graph: sub.id) end - mapping = LinkedData::Models::Mapping.new(classes,"REST",process, backup_mapping.id) + mapping = LinkedData::Models::Mapping.new(classes,"REST", process, backup_mapping.id) return mapping end @@ -773,5 +701,115 @@ def self.create_mapping_count_pairs_for_ontologies(logger, arr_acronyms) # fsave.close end + private + + def self.get_mapping_classes_instance(s1, graph1, s2, graph2) + [read_only_class(s1.to_s, graph1.to_s), + read_only_class(s2.to_s, graph2.to_s)] + end + + def self.mappings_ont_build_query(class_id, page, size, sub1, sub2) + blocks = [] + mapping_predicates.each do |_source, mapping_predicate| + blocks << mappings_union_template(class_id, sub1, sub2, + mapping_predicate[0], + "BIND ('#{_source}' AS ?source)") + end + + + + + + + filter = class_id.nil? ? "FILTER ((?s1 != ?s2) || (?source = 'SAME_URI'))" : '' + if sub2.nil? + + class_id_subject = class_id.nil? ? '?s1' : "<#{class_id.to_s}>" + source_graph = sub1.nil? ? '?g' : "<#{sub1.to_s}>" + internal_mapping_predicates.each do |_source, predicate| + blocks << <<-eos + { + GRAPH #{source_graph} { + #{class_id_subject} <#{predicate[0]}> ?s2 . + } + BIND( AS ?g) + BIND(?s2 AS ?o) + BIND ('#{_source}' AS ?source) + } + eos + end + + ont_id = sub1.to_s.split("/")[0..-3].join("/") + #STRSTARTS is used to not count older graphs + #no need since now we delete older graphs + + filter += "\nFILTER (!STRSTARTS(str(?g),'#{ont_id}')" + filter += " || " + internal_mapping_predicates.keys.map{|x| "(?source = '#{x}')"}.join('||') + filter += ")" + end + + variables = "?s2 #{sub2.nil? ? '?g' : ''} ?source ?o" + variables = "?s1 " + variables if class_id.nil? + + pagination = '' + if size > 0 + limit = size + offset = (page - 1) * size + pagination = "OFFSET #{offset} LIMIT #{limit}" + end + + query = <<-eos +SELECT DISTINCT #{variables} +WHERE { + #{blocks.join("\nUNION\n")} + #{filter} +} #{pagination} + eos + + query + end + + def self.mappings_union_template(class_id, sub1, sub2, predicate, bind) + class_id_subject = class_id.nil? ? '?s1' : "<#{class_id.to_s}>" + target_graph = sub2.nil? ? '?g' : "<#{sub2.to_s}>" + union_template = <<-eos +{ + GRAPH <#{sub1.to_s}> { + #{class_id_subject} <#{predicate}> ?o . + } + GRAPH #{target_graph} { + ?s2 <#{predicate}> ?o . + } + #{bind} +} + eos + end + + def self.count_mappings(acr1, acr2) + count = LinkedData::Models::MappingCount.where(ontologies: acr1) + count = count.and(ontologies: acr2) unless acr2.nil? + f = Goo::Filter.new(:pair_count) == (not acr2.nil?) + count = count.filter(f) + count = count.include(:count) + pcount_arr = count.all + pcount_arr.length == 0 ? 0 : pcount_arr.first.count + end + + def self.extract_acronym(submission) + sub = submission + if submission.nil? + acr = nil + elsif submission.respond_to?(:id) + # Case where sub2 is a Submission + sub = submission.id + acr = sub.to_s.split("/")[-3] + else + acr = sub.to_s + end + + return sub, acr + end + + end end -end + diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index 305709dd9..e4bd7df6a 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -18,6 +18,9 @@ class OntologySubmission < LinkedData::Models::Base extend LinkedData::Concerns::OntologySubmission::DefaultCallbacks FLAT_ROOTS_LIMIT = 1000 + # default file permissions for files copied from tempdir + REPOSITORY_FILE_MODE = 0o660 # rw-rw---- + REPOSITORY_DIR_MODE = 0o2770 # rwxrws--- + set-GID model :ontology_submission, scheme: File.join(__dir__, '../../../config/schemes/ontology_submission.yml'), name_with: ->(s) { submission_id_generator(s) } @@ -39,8 +42,7 @@ class OntologySubmission < LinkedData::Models::Base # Ontology metadata # General metadata - attribute :uri, namespace: :omv, type: :uri, enforce: %i[distinct_of_identifier], fuzzy_search: true - attribute :versionIRI, namespace: :owl, type: :uri, enforce: [:distinct_of_URI] + attribute :versionIRI, namespace: :owl, type: :uri, enforce: [:distinct_of_uri] attribute :version, namespace: :omv attribute :status, namespace: :omv, default: ->(x) { 'production' } attribute :deprecated, namespace: :owl, type: :boolean, default: ->(x) { false } @@ -49,18 +51,25 @@ class OntologySubmission < LinkedData::Models::Base attribute :hasOntologySyntax, namespace: :omv, type: :uri, default: ->(s) { ontology_syntax_default(s) } attribute :naturalLanguage, namespace: :omv, type: %i[list] attribute :isOfType, namespace: :omv, type: :uri - attribute :identifier, namespace: :dct, type: %i[list uri], enforce: [:distinct_of_URI] + attribute :identifier, namespace: :dct, type: %i[list uri], enforce: [:distinct_of_uri] # Description metadata attribute :description, namespace: :omv, enforce: %i[concatenate], fuzzy_search: true + + # attribute :homepage + # attribute :documentation, namespace: :omv + # attribute :publication + # attribute :uri, namespace: :omv attribute :homepage, namespace: :foaf, type: :uri attribute :documentation, namespace: :omv, type: :uri + attribute :publication, type: %i[uri list] + attribute :uri, namespace: :omv, type: :uri, enforce: %i[distinct_of_identifier], fuzzy_search: true + attribute :notes, namespace: :omv, type: :list attribute :keywords, namespace: :omv, type: :list attribute :hiddenLabel, namespace: :skos, type: :list attribute :alternative, namespace: :dct, type: :list attribute :abstract, namespace: :dct - attribute :publication, type: %i[uri list] # Licensing metadata attribute :hasLicense, namespace: :omv, type: :uri @@ -236,6 +245,7 @@ def synchronize(&block) def URI=(value) self.uri = value end + def URI self.uri end @@ -274,23 +284,33 @@ def self.submission_id_generator(ss) ) end - def self.copy_file_repository(acronym, submissionId, src, filename = nil) - path_to_repo = File.join([LinkedData.settings.repository_folder, acronym.to_s, submissionId.to_s]) - name = filename || File.basename(File.new(src).path) - # THIS LOGGER IS JUST FOR DEBUG - remove after NCBO-795 is closed - # https://github.com/ncbo/bioportal-project/issues/323 - # logger = Logger.new(Dir.pwd + "/logs/create_permissions.log") - if not Dir.exist? path_to_repo - FileUtils.mkdir_p path_to_repo - # logger.debug("Dir created #{path_to_repo} | #{"%o" % File.stat(path_to_repo).mode} | umask: #{File.umask}") # NCBO-795 - end - dst = File.join([path_to_repo, name]) - FileUtils.copy(src, dst) - # logger.debug("File created #{dst} | #{"%o" % File.stat(dst).mode} | umask: #{File.umask}") # NCBO-795 - if not File.exist? dst - raise Exception, "Unable to copy #{src} to #{dst}" + def self.copy_file_repository(acronym, submission_id, src, filename = nil) + path_to_repo = File.join( + LinkedData.settings.repository_folder, + acronym.to_s, + submission_id.to_s + ) + + name = filename || File.basename(src) + dst = File.join(path_to_repo, name) + + begin + FileUtils.mkdir_p(path_to_repo) + FileUtils.chmod(REPOSITORY_DIR_MODE, path_to_repo) + + FileUtils.copy(src, dst) + # Uploaded files are initially written to a Tempfile in tmpdir with + # permissions 0600 (owner read/write only) for security. To ensure + # repository files are also accessible by the service group as intended, + # we explicitly chmod the destination file to REPOSITORY_FILE_MODE. + FileUtils.chmod(REPOSITORY_FILE_MODE, dst) + + raise "Unable to copy #{src} to #{dst}" unless File.exist?(dst) + + dst + rescue StandardError => e + raise "Failed to copy #{src} to #{dst}: [#{e.class}] #{e.message}" end - return dst end def valid? @@ -356,9 +376,10 @@ def sanity_check self.errors[:uploadFilePath] = ["In non-summary only submissions a data file or url must be provided."] return false elsif self.pullLocation - self.errors[:pullLocation] = ["File at #{self.pullLocation.to_s} does not exist"] if self.uploadFilePath.nil? - return remote_file_exists?(self.pullLocation.to_s) + remote_exists = remote_file_exists?(self.pullLocation.to_s) + self.errors[:pullLocation] = ["File at #{self.pullLocation.to_s} does not exist"] unless remote_exists + return remote_exists end return true end diff --git a/lib/ontologies_linked_data/parser/owlapi.rb b/lib/ontologies_linked_data/parser/owlapi.rb index 1a83239d7..f1cd93592 100644 --- a/lib/ontologies_linked_data/parser/owlapi.rb +++ b/lib/ontologies_linked_data/parser/owlapi.rb @@ -13,7 +13,7 @@ class RDFFileNotGeneratedException < Parser::ParserException class OWLAPICommand def initialize(input_file, output_repo, opts = {}) - @owlapi_wrapper_jar_path = LinkedData.bindir + "/owlapi-wrapper-1.4.2.jar" + @owlapi_wrapper_jar_path = LinkedData.bindir + "/owlapi-wrapper-1.4.3.jar" @input_file = input_file @output_repo = output_repo @master_file = opts[:master_file] diff --git a/lib/ontologies_linked_data/services/submission_process/submission_processor.rb b/lib/ontologies_linked_data/services/submission_process/submission_processor.rb index 319c20587..9e922494c 100644 --- a/lib/ontologies_linked_data/services/submission_process/submission_processor.rb +++ b/lib/ontologies_linked_data/services/submission_process/submission_processor.rb @@ -56,6 +56,8 @@ def process_submission(logger, options = {}) @submission.index_properties(logger, commit: index_commit) end + @submission.generate_diff(logger) if diff + if run_metrics unless parsed raise StandardError, "Metrics cannot be generated on the submission @@ -64,7 +66,6 @@ def process_submission(logger, options = {}) end @submission.generate_metrics(logger) end - @submission.generate_diff(logger) if diff end @submission.save diff --git a/ontologies_linked_data.gemspec b/ontologies_linked_data.gemspec index c3443aff6..cdc98518b 100644 --- a/ontologies_linked_data.gemspec +++ b/ontologies_linked_data.gemspec @@ -1,33 +1,37 @@ # -*- encoding: utf-8 -*- -require File.expand_path('../lib/ontologies_linked_data/version', __FILE__) +require_relative 'lib/ontologies_linked_data/version' Gem::Specification.new do |gem| + gem.name = "ontologies_linked_data" + gem.version = LinkedData::VERSION + gem.summary = "Models and serializers for ontologies and related artifacts backed by an RDF database" + gem.summary = "This library can be used for interacting with an AllegroGraph or 4store instance that stores " \ + "BioPortal-based ontology information. Models in the library are based on Goo. Serializers " \ + "support RDF serialization as Rack Middleware and automatic generation of hypermedia links." gem.authors = ["Paul R Alexander"] - gem.email = ["palexander@stanford.edu"] - gem.description = %q{Models and serializers for ontologies and related artifacts backed by 4store} - gem.summary = %q{This library can be used for interacting with a 4store instance that stores NCBO-based ontology information. Models in the library are based on Goo. Serializers support RDF serialization as Rack Middleware and automatic generation of hypermedia links.} + gem.email = ["support@bioontology.org"] gem.homepage = "https://github.com/ncbo/ontologies_linked_data" - gem.files = `git ls-files`.split($\) - gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } - gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) - gem.name = "ontologies_linked_data" + gem.files = %x(git ls-files).split("\n") + gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) } gem.require_paths = ["lib"] - gem.version = LinkedData::VERSION + gem.required_ruby_version = ">= 3.1" + + gem.add_dependency("activesupport") + gem.add_dependency("bcrypt") gem.add_dependency("goo") gem.add_dependency("json") + gem.add_dependency("libxml-ruby") gem.add_dependency("multi_json") + gem.add_dependency("net-ftp") gem.add_dependency("oj") - gem.add_dependency("bcrypt") + gem.add_dependency("omni_logger") + gem.add_dependency("pony") gem.add_dependency("rack") gem.add_dependency("rack-test") - gem.add_dependency("rubyzip") - gem.add_dependency("libxml-ruby") - gem.add_dependency("activesupport") gem.add_dependency("rsolr") - gem.add_dependency("pony") - gem.add_dependency("omni_logger") + gem.add_dependency("rubyzip") gem.add_development_dependency("email_spec") diff --git a/test/models/test_mappings.rb b/test/models/test_mappings.rb index e53a6b80d..501baa18c 100644 --- a/test/models/test_mappings.rb +++ b/test/models/test_mappings.rb @@ -35,6 +35,11 @@ def self.ontologies_parse process_rdf: true, extract_metadata: false) end + def delete_all_rest_mappings + LinkedData::Models::RestBackupMapping.all.each do |m| + LinkedData::Mappings.delete_rest_mapping(m.id) + end + end def test_mapping_count_models LinkedData::Models::MappingCount.where.all(&:delete) diff --git a/test/models/test_ontology.rb b/test/models/test_ontology.rb index f0b4f3c64..68d687218 100644 --- a/test/models/test_ontology.rb +++ b/test/models/test_ontology.rb @@ -152,8 +152,8 @@ def test_ontology_properties ont = LinkedData::Models::Ontology.find('BRO35').first ont.bring(:submissions) sub = ont.submissions[0] - props = ont.properties() - assert_equal 85, props.length + props = ont.properties + assert_equal 86, props.length # verify sorting assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#AlgorithmPurpose", props[0].id.to_s @@ -192,7 +192,7 @@ def test_ontology_properties # test property roots pr = ont.property_roots(sub, extra_include=[:hasChildren, :children]) - assert_equal 64, pr.length + assert_equal 65, pr.length # verify sorting assert_equal "http://bioontology.org/ontologies/BiomedicalResourceOntology.owl#AlgorithmPurpose", pr[0].id.to_s @@ -206,7 +206,7 @@ def test_ontology_properties assert_equal 33, dpr.length # count annotation properties apr = pr.select { |p| p.class == LinkedData::Models::AnnotationProperty } - assert_equal 13, apr.length + assert_equal 14, apr.length # check for non-root properties assert_empty pr.select { |p| ["http://www.w3.org/2004/02/skos/core#broaderTransitive", "http://www.w3.org/2004/02/skos/core#topConceptOf", diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index a233c74ec..4c2e458a7 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -289,13 +289,13 @@ def test_submission_parse unless ENV["BP_SKIP_HEAVY_TESTS"] == "1" submission_parse("MCCLTEST", "MCCLS TEST", "./test/data/ontology_files/CellLine_OWL_BioPortal_v1.0.owl", 11, - process_rdf: true, extract_metadata: false) + process_rdf: true, extract_metadata: true) sub = LinkedData::Models::OntologySubmission.where(ontology: [acronym: "MCCLTEST"], submissionId: 11) .include(:version) .first - assert sub.version == "3.0" + assert_equal sub.version, "3.0" end #This one has resources wih accents. @@ -448,7 +448,7 @@ def test_index_properties "./test/data/ontology_files/BRO_v3.5.owl", 1, process_rdf: true, extract_metadata: false, index_properties: true) res = LinkedData::Models::Class.search("*:*", {:fq => "submissionAcronym:\"BRO\"", :start => 0, :rows => 80}, :property) - assert_equal 83 , res["response"]["numFound"] + assert_equal 84 , res["response"]["numFound"] found = 0 res["response"]["docs"].each do |doc| @@ -1224,4 +1224,74 @@ def test_submission_delete_remove_files sub.delete assert !Dir.exist?(data_folder) end + + def test_copy_file_repository_from_tempfile + # Simulate a Rack Tempfile upload from tmpdir; + # tmpfile get 0600 permission and we need 660 for the copy to repository + fixture = "./test/data/ontology_files/BRO_v3.2.owl" + tmp = Tempfile.new(["upload", ".owl"]) + begin + FileUtils.cp(fixture, tmp.path) + tmp.close + + # Assert the source Tempfile has default 0600 permissions + # `& 0o777` is a bitwise AND that out all non-permission bits + # convers 0o100600 (regular file with owner rw) to 0600 + src_mode = File.stat(tmp.path).mode & 0o0777 + assert_equal 0o0600, src_mode + + dst = LinkedData::Models::OntologySubmission + .copy_file_repository("TMPTEST", 99, tmp.path) + + repo_root = LinkedData.settings.repository_folder + assert_match( + %r{\A#{Regexp.escape(repo_root)}/TMPTEST/99/}, + dst, + "Expected file to be copied into #{repo_root}/TMPTEST/99/" + ) + assert File.exist?(dst), "Destination file should exist" + + mode = File.stat(dst).mode & 0o0777 + assert_equal 0o0660, mode, format("Expected file mode 0660, got %o", mode) + ensure + tmp.unlink + end + end + + # To test extraction of metadata when parsing a submission (we extract the submission attributes that have the + # extractedMetadata on true) + def test_submission_extract_metadata + 2.times.each do |i| + submission_parse("AGROOE", "AGROOE Test extract metadata ontology", + "./test/data/ontology_files/agrooeMappings-05-05-2016.owl", i + 1, + process_rdf: true, extract_metadata: true, generate_missing_labels: false) + ont = LinkedData::Models::Ontology.find("AGROOE").first + sub = ont.latest_submission + refute_nil sub + + sub.bring_remaining + assert_equal false, sub.deprecated + assert_equal '2015-09-28', sub.creationDate.to_date.to_s + assert_equal '2015-10-01', sub.modificationDate.to_date.to_s + assert_equal "description example, AGROOE is an ontology used to test the metadata extraction, AGROOE is an ontology to illustrate how to describe their ontologies", sub.description + assert_equal [RDF::URI.new('http://agroportal.lirmm.fr')], sub.identifier + assert_equal ["http://lexvo.org/id/iso639-3/fra", "http://lexvo.org/id/iso639-3/eng"].sort, sub.naturalLanguage.sort + assert_equal [RDF::URI.new("http://lirmm.fr/2015/ontology/door-relation.owl"), RDF::URI.new("http://lirmm.fr/2015/ontology/dc-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/dcterms-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/voaf-relation.owl"), + RDF::URI.new("http://lirmm.fr/2015/ontology/void-import.owl") + ].sort, sub.ontologyRelatedTo.sort + + + + + # assert_equal ["Agence 007", "Éditions \"La Science en Marche\"", " LIRMM (default name) "].sort, sub.publisher.map { |x| x.bring_remaining.name }.sort + # assert_equal ["Alfred DC", "Clement Jonquet", "Gaston Dcterms", "Huguette Doap", "Mirabelle Prov", "Paul Foaf", "Vincent Emonet"].sort, sub.hasCreator.map { |x| x.bring_remaining.name }.sort + # assert_equal ["Léontine Dessaiterm", "Anne Toulet", "Benjamine Dessay", "Augustine Doap", "Vincent Emonet"].sort, sub.hasContributor.map { |x| x.bring_remaining.name }.sort + # assert_equal 1, LinkedData::Models::Agent.where(name: "Vincent Emonet").count + + sub.description = "test changed value" + sub.save + end + end end