diff --git a/.github/workflows/aot-test.yml b/.github/workflows/aot-test.yml index c2240d7..b43e6b3 100644 --- a/.github/workflows/aot-test.yml +++ b/.github/workflows/aot-test.yml @@ -32,11 +32,19 @@ jobs: - name: Test the binary run: | - target/graalvm-native-image/jelly-cli version && \ + set -eux + + # See if it runs at all + target/graalvm-native-image/jelly-cli version + + # Make sure reflection works + target/graalvm-native-image/jelly-cli version | grep "JVM reflection: supported" + + # Test RDF conversions echo '_:b _:b .' | target/graalvm-native-image/jelly-cli \ rdf to-jelly --in-format=nt > out.jelly && \ - [ -s out.jelly ] && - target/graalvm-native-image/jelly-cli \ + [ -s out.jelly ] + target/graalvm-native-image/jelly-cli \ rdf from-jelly --out-format=jelly-text out.jelly > out.txt && \ [ -s out.txt ] target/graalvm-native-image/jelly-cli \ @@ -45,6 +53,9 @@ jobs: echo '{"@graph":[{"@id":"http://e.org/r","http://e.org/p":{"@value":"v"}}]}' | \ target/graalvm-native-image/jelly-cli rdf to-jelly --in-format "jsonld" > jsonld.jelly && \ [ -s jsonld.jelly ] + echo '' | \ + target/graalvm-native-image/jelly-cli rdf to-jelly --in-format "rdfxml" > rdfxml.jelly && \ + [ -s rdfxml.jelly ] - name: Upload binary uses: actions/upload-artifact@v4 diff --git a/.github/workflows/scala.yml b/.github/workflows/scala.yml index 21028bf..6b04eb0 100644 --- a/.github/workflows/scala.yml +++ b/.github/workflows/scala.yml @@ -40,7 +40,7 @@ jobs: - name: Build and test shell: bash - run: sbt -v +test + run: sbt -v +test test-serial:test test-assembly: runs-on: ubuntu-latest diff --git a/build.sbt b/build.sbt index 16945c4..303c9f0 100644 --- a/build.sbt +++ b/build.sbt @@ -26,7 +26,8 @@ lazy val graalOptions = Seq( // For the release build, optimize for speed and make a build report if (isDevBuild) Seq("-Ob") else Seq("-O3", "--emit build-report"), ).flatten ++ Seq( - "--features=eu.neverblink.jelly.cli.graal.ProtobufFeature", + "--features=eu.neverblink.jelly.cli.graal.ProtobufFeature," + + "eu.neverblink.jelly.cli.graal.JenaInternalsFeature", "-H:ReflectionConfigurationFiles=" + file("graal.json").getAbsolutePath, // Needed to skip initializing all charsets. // See: https://github.com/Jelly-RDF/cli/issues/154 @@ -35,11 +36,14 @@ lazy val graalOptions = Seq( "-H:+UsePredicates", // SkipFlow optimization -- will be default in GraalVM 25 ) +lazy val TestSerial = config("test-serial") extend Test + lazy val root = (project in file(".")) .enablePlugins( BuildInfoPlugin, GraalVMNativeImagePlugin, ) + .configs(TestSerial) .settings( name := "jelly-cli", libraryDependencies ++= Seq( @@ -51,7 +55,7 @@ lazy val root = (project in file(".")) ("eu.neverblink.jelly" % "jelly-jena" % jellyV).excludeAll(ExclusionRule("org.apache.jena")), "eu.neverblink.jelly" % "jelly-core-protos-google" % jellyV, "com.github.alexarchambault" %% "case-app" % "2.1.0", - "org.scalatest" %% "scalatest" % "3.2.19" % Test, + "org.scalatest" %% "scalatest" % "3.2.19" % "test,test-serial", "org.yaml" % "snakeyaml" % "2.4" % Test, // For native-image reflection compatibility "org.graalvm.sdk" % "graal-sdk" % graalvmV % "provided", @@ -79,6 +83,11 @@ lazy val root = (project in file(".")) case _ => MergeStrategy.first }, + // Serial tests should not run in parallel. + // They are used for tests that manipulate global state, like system properties. + inConfig(TestSerial)(Defaults.testSettings), + TestSerial / parallelExecution := false, + // GraalVM settings Compile / mainClass := Some("eu.neverblink.jelly.cli.App"), // Do a fast build if it's a dev build diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/Version.scala b/src/main/scala/eu/neverblink/jelly/cli/command/Version.scala index 263fce5..0ba3549 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/Version.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/Version.scala @@ -2,6 +2,9 @@ package eu.neverblink.jelly.cli.command import caseapp.* import eu.neverblink.jelly.cli.* +import eu.neverblink.jelly.cli.util.jena.JenaSystemOptions + +import scala.util.{Failure, Success} @HelpMessage( "Prints the version of the jelly-cli utility and the Jelly-JVM library.", @@ -23,10 +26,21 @@ object Version extends JellyCommand[VersionOptions]: .find(_.startsWith("org.apache.jena:jena-core:")).get.split(":")(2) val jellyV = BuildInfo.libraryDependencies .find(_.startsWith("eu.neverblink.jelly:jelly-jena:")).get.split(":")(2) + val reflectionSupported = JenaSystemOptions.disableTermValidation() printLine(f""" |jelly-cli ${BuildInfo.version} |---------------------------------------------- |Jelly-JVM $jellyV |Apache Jena $jenaV |JVM ${System.getProperty("java.vm.name")} ${System.getProperty("java.vm.version")} + |---------------------------------------------- |""".stripMargin.trim) + reflectionSupported match { + case Failure(ex) => + printLine("[ ] JVM reflection: not supported. Parsing will be slower.") + if getOptions.common.debug then + printLine(" The exception was:") + ex.printStackTrace(out) + else printLine(" Run with --debug for details.") + case Success(_) => printLine("[X] JVM reflection: supported. Parsing optimizations enabled.") + } diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala index 9aac84a..9c43619 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJelly.scala @@ -5,7 +5,11 @@ import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.cli.command.rdf.util.* import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat.* import eu.neverblink.jelly.cli.util.args.IndexRange -import eu.neverblink.jelly.cli.util.jena.StreamRdfBatchWriter +import eu.neverblink.jelly.cli.util.jena.{ + JenaSystemOptions, + StreamRdfBatchWriter, + StreamRdfCombiningBatchWriter, +} import eu.neverblink.jelly.convert.jena.JenaConverterFactory import eu.neverblink.jelly.core.JellyOptions import eu.neverblink.jelly.core.RdfHandler.AnyStatementHandler @@ -18,7 +22,6 @@ import org.apache.jena.sparql.core.Quad import java.io.{InputStream, OutputStream} import scala.jdk.CollectionConverters.* -import eu.neverblink.jelly.cli.util.jena.StreamRdfCombiningBatchWriter object RdfFromJellyPrint extends RdfCommandPrintUtil[RdfFormat.Writeable]: override val defaultFormat: RdfFormat = RdfFormat.NQuads @@ -57,6 +60,8 @@ case class RdfFromJellyOptions( "Ignored otherwise. Take care with input size, as this option will load everything into memory.", ) combine: Boolean = false, + @Recurse + rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(), ) extends HasJellyCommandOptions object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writeable]: @@ -73,6 +78,8 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ private def takeFrames: IndexRange = IndexRange(getOptions.takeFrames, "--take-frames") override def doRun(options: RdfFromJellyOptions, remainingArgs: RemainingArgs): Unit = + if !options.rdfPerformanceOptions.validateTerms.getOrElse(false) then + JenaSystemOptions.disableTermValidation() // Parse options now to make sure they are valid takeFrames val (inputStream, outputStream) = diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 0eaa138..3366f2b 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -5,16 +5,16 @@ import com.google.protobuf.TextFormat import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.cli.command.rdf.util.* import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat.* -import eu.neverblink.jelly.cli.util.jena.riot.JellyStreamWriterGraphs +import eu.neverblink.jelly.cli.util.jena.JenaSystemOptions +import eu.neverblink.jelly.cli.util.jena.riot.{JellyStreamWriterGraphs, RiotParserUtil} import eu.neverblink.jelly.convert.jena.JenaConverterFactory import eu.neverblink.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage, JellyStreamWriter} import eu.neverblink.jelly.core.{JellyOptions, RdfProtoDeserializationError} import eu.neverblink.jelly.core.proto.google.v1 as google import eu.neverblink.jelly.core.proto.v1.* import eu.neverblink.jelly.core.utils.IoUtils -import org.apache.jena.riot.lang.LabelToNode import org.apache.jena.riot.system.StreamRDFWriter -import org.apache.jena.riot.{Lang, RDFParser, RIOT} +import org.apache.jena.riot.{Lang, RIOT} import java.io.{BufferedReader, FileInputStream, InputStream, InputStreamReader, OutputStream} import scala.util.Using @@ -64,6 +64,8 @@ case class RdfToJellyOptions( "frame – make sure you know what you are doing. Default: true", ) delimited: Boolean = true, + @Recurse + rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(), ) extends HasJellyCommandOptions object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable]: @@ -87,6 +89,8 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable frame.get.getRows.iterator().next().getOptions override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit = + if !options.rdfPerformanceOptions.validateTerms.getOrElse(false) then + JenaSystemOptions.disableTermValidation() // Infer before touching options options.optionsFrom.map(loadOptionsFromFile).foreach( options.jellySerializationOptions.setOptions, @@ -184,10 +188,12 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable .build() JellyStreamWriter(JenaConverterFactory.getInstance(), variant, outputStream) - RDFParser.source(inputStream) - .lang(jenaLang) - .labelToNode(LabelToNode.createUseLabelAsGiven()) - .parse(jellyWriter) + RiotParserUtil.parse( + getOptions.rdfPerformanceOptions.validateTerms.getOrElse(false), + jenaLang, + inputStream, + jellyWriter, + ) jellyWriter.finish() /** Convert Jelly text to Jelly binary. diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala index bed8fbb..a99c85f 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala @@ -6,12 +6,12 @@ import eu.neverblink.jelly.cli.command.rdf.util.* import eu.neverblink.jelly.cli.util.args.IndexRange import eu.neverblink.jelly.cli.util.io.IoUtil import eu.neverblink.jelly.cli.util.jena.* +import eu.neverblink.jelly.cli.util.jena.riot.RiotParserUtil import eu.neverblink.jelly.convert.jena.JenaConverterFactory import eu.neverblink.jelly.core.JellyOptions import eu.neverblink.jelly.core.RdfHandler.AnyStatementHandler import eu.neverblink.jelly.core.proto.v1.{RdfStreamFrame, RdfStreamOptions} import org.apache.jena.graph.{Node, Triple} -import org.apache.jena.riot.RDFParser import org.apache.jena.riot.system.StreamRDFLib import org.apache.jena.sparql.core.Quad @@ -63,6 +63,8 @@ case class RdfValidateOptions( "Possible values: 'either', 'true', 'false'. Default: 'either'.", ) delimited: String = "either", + @Recurse + rdfPerformanceOptions: RdfPerformanceOptions = RdfPerformanceOptions(), ) extends HasJellyCommandOptions object RdfValidate extends JellyCommand[RdfValidateOptions]: @@ -90,6 +92,8 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]: options.compareToRdfFile.map(n => getRdfForComparison(n, options.compareToFormat)) val (inputStream, _) = getIoStreamsFromOptions(remainingArgs.remaining.headOption, None) val (delimited, frameIterator) = JellyUtil.iterateRdfStreamWithDelimitingInfo(inputStream) + if !options.rdfPerformanceOptions.validateTerms.getOrElse(true) then + JenaSystemOptions.disableTermValidation() // Step 1: Validate delimiting validateDelimiting(delimiting, delimited) @@ -245,8 +249,11 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]: } val output = StreamRdfCollector() Using.resource(IoUtil.inputStream(fileName)) { is => - RDFParser.source(is) - .lang(format.jenaLang) - .parse(output) + RiotParserUtil.parse( + getOptions.rdfPerformanceOptions.validateTerms.getOrElse(true), + format.jenaLang, + is, + output, + ) } output diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala new file mode 100644 index 0000000..166c09f --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfPerformanceOptions.scala @@ -0,0 +1,12 @@ +package eu.neverblink.jelly.cli.command.rdf.util + +import caseapp.HelpMessage + +/** Performance-related options for RDF processing. + */ +case class RdfPerformanceOptions( + @HelpMessage( + "Enable term validation and IRI resolution (slower). Default: false for all commands except 'rdf validate'.", + ) + validateTerms: Option[Boolean] = None, +) diff --git a/src/main/scala/eu/neverblink/jelly/cli/graal/JenaInternalsFeature.scala b/src/main/scala/eu/neverblink/jelly/cli/graal/JenaInternalsFeature.scala new file mode 100644 index 0000000..e3c9100 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/graal/JenaInternalsFeature.scala @@ -0,0 +1,17 @@ +package eu.neverblink.jelly.cli.graal + +import org.apache.jena.graph.impl.LiteralLabel +import org.graalvm.nativeimage.hosted.{Feature, RuntimeReflection} + +class JenaInternalsFeature extends Feature: + import Feature.* + + override def getDescription: String = + "Registers Jena internals for reflection. Needed for JenaSystemOptions to disable a few " + + "checks during RDF parsing." + + override def beforeAnalysis(access: BeforeAnalysisAccess): Unit = + val classes = classOf[LiteralLabel].getDeclaredClasses + val valueModeClass = classes.find(_.getSimpleName == "ValueMode").get + RuntimeReflection.register(valueModeClass) + RuntimeReflection.register(valueModeClass.getDeclaredField("LAZY")) diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala new file mode 100644 index 0000000..2a7683b --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/util/jena/JenaSystemOptions.scala @@ -0,0 +1,42 @@ +package eu.neverblink.jelly.cli.util.jena + +import org.apache.jena.graph.impl.LiteralLabel +import org.apache.jena.irix.{IRIProviderAny, SystemIRIx} + +import scala.util.Try + +object JenaSystemOptions: + /** Enable faster parsing by disabling strict IRI and literal validation. + * @return + * A Success if the operation was successful, or a Failure with the exception if not. The + * operation may fail in environments where reflection is not supported. The failure can be + * ignored, but parsing will be slower. + */ + def disableTermValidation(): Try[Unit] = + toggle(false) + + /** For use only in tests. + */ + def resetTermValidation(): Try[Unit] = + toggle(true) + + private def toggle(enable: Boolean): Try[Unit] = + val valueMode = if enable then + SystemIRIx.reset() + "EAGER" + else + // Set the IRI provider to one that does no validation or resolving whatsoever + SystemIRIx.setProvider(IRIProviderAny.stringProvider()) + "LAZY" + + // Disable/enable eager computation of literal values, which does strict checking. + // This requires reflection as the field is private static final. + Try { + val f = classOf[LiteralLabel].getDeclaredField("valueMode") + val valueModeClass = + classOf[LiteralLabel].getDeclaredClasses.find(_.getSimpleName == "ValueMode").get + val valueModeLazy = valueModeClass.getDeclaredField(valueMode) + valueModeLazy.setAccessible(true) + f.setAccessible(true) + f.set(null, valueModeLazy.get(null)) + } diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/FastParserProfile.scala b/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/FastParserProfile.scala new file mode 100644 index 0000000..ef899d0 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/FastParserProfile.scala @@ -0,0 +1,27 @@ +package eu.neverblink.jelly.cli.util.jena.riot + +import org.apache.jena.irix.IRIxResolver +import org.apache.jena.riot.RIOT +import org.apache.jena.riot.lang.LabelToNode +import org.apache.jena.riot.system.* + +/** Jena RIOT parser profile with optimizations for speed: + * - No IRI resolution + * - No error logging + * - Passing blank node labels as-is + * - No extra checks + */ +final class FastParserProfile + extends ParserProfileStd( + FactoryRDFCaching(FactoryRDFCaching.DftNodeCacheSize, LabelToNode.createUseLabelAsGiven()), + ErrorHandlerFactory.errorHandlerNoLogging, + IRIxResolver.create().noBase().resolve(false).allowRelative(true).build(), + PrefixMapStd(), + RIOT.getContext, + false, + false, + ): + + /** Skip IRI resolution for speed. + */ + override def resolveIRI(uriStr: String, line: Long, col: Long): String = uriStr diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala b/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala new file mode 100644 index 0000000..7004fa3 --- /dev/null +++ b/src/main/scala/eu/neverblink/jelly/cli/util/jena/riot/RiotParserUtil.scala @@ -0,0 +1,27 @@ +package eu.neverblink.jelly.cli.util.jena.riot + +import org.apache.jena.riot.{Lang, RDFParser, RDFParserRegistry, RIOT} +import org.apache.jena.riot.system.StreamRDF + +import java.io.InputStream + +/** Utility for creating Jena RDF parsers in jelly-cli. + */ +object RiotParserUtil: + def parse( + enableTermValidation: Boolean, + lang: Lang, + source: InputStream, + output: StreamRDF, + ): Unit = + if enableTermValidation then + // Standard parser with validation enabled + RDFParser.source(source) + .lang(lang) + .parse(output) + else + // Fast parser with validation disabled + RDFParserRegistry + .getFactory(lang) + .create(lang, FastParserProfile()) + .read(source, "", lang.getContentType, output, RIOT.getContext) diff --git a/src/test-serial/scala/eu/neverblink/jelly/cli/command/rdf/TermValidationSpec.scala b/src/test-serial/scala/eu/neverblink/jelly/cli/command/rdf/TermValidationSpec.scala new file mode 100644 index 0000000..dfd4a92 --- /dev/null +++ b/src/test-serial/scala/eu/neverblink/jelly/cli/command/rdf/TermValidationSpec.scala @@ -0,0 +1,204 @@ +package eu.neverblink.jelly.cli.command.rdf + +import eu.neverblink.jelly.cli.command.helpers.TestFixtureHelper +import eu.neverblink.jelly.cli.command.rdf.RdfToJellySpec.readJellyFile +import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat +import eu.neverblink.jelly.cli.util.jena.JenaSystemOptions +import eu.neverblink.jelly.cli.{ExitException, JellyDeserializationError} +import eu.neverblink.jelly.core.{JellyOptions, RdfProtoDeserializationError} +import eu.neverblink.jelly.core.proto.v1.PhysicalStreamType +import org.apache.jena.datatypes.DatatypeFormatException +import org.apache.jena.shared.impl.JenaParameters +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.ByteArrayInputStream +import scala.util.Try + +class TermValidationSpec extends AnyWordSpec, Matchers, TestFixtureHelper: + + protected val testCardinality: Int = 33 + + val frame = { + import eu.neverblink.jelly.cli.command.helpers.RdfAdapter.* + rdfStreamFrame( + Seq( + rdfStreamRow( + JellyOptions.BIG_GENERALIZED.clone() + .setPhysicalType(PhysicalStreamType.TRIPLES) + .setVersion(1), + ), + rdfStreamRow(rdfNameEntry(0, "notgood://malformed iri")), + rdfStreamRow(rdfDatatypeEntry(0, "http://www.w3.org/2001/XMLSchema#date")), + rdfStreamRow( + rdfTriple( + "b1", + rdfIri(0, 0), // malformed IRI + rdfLiteral("2025-02-31", 1), // invalid date + ), + ), + ), + ) + } + val frameBytes = frame.toByteArray + + "RdfFromJelly" should { + "warm up" in { + // Pre-test needed to properly initialize the command for testing + Try { + RdfFromJelly.runTestCommand(List("rdf", "from-jelly", "--help")) + } + } + + "term validation disabled (default)" in { + JenaSystemOptions.resetTermValidation() + JenaParameters.enableEagerLiteralValidation = true + RdfFromJelly.setStdIn(ByteArrayInputStream(frameBytes)) + val (out, err) = RdfFromJelly.runTestCommand( + List("rdf", "from-jelly", "--out-format", RdfFormat.NQuads.cliOptions.head), + ) + out.length should be > 0 + out should include(" 0 + out should include(" \"lalala\"^^ ." + .getBytes + + "warm up" in { + // Pre-test needed to properly initialize the command for testing + Try { + RdfToJelly.runTestCommand(List("rdf", "to-jelly", "--help")) + } + } + + "term validation disabled (default)" in { + JenaSystemOptions.resetTermValidation() + JenaParameters.enableEagerLiteralValidation = true + RdfToJelly.setStdIn(new ByteArrayInputStream(input)) + val (out, err) = RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--in-format=nt"), + ) + val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) + val frame = readJellyFile(newIn) + frame.size should be(1) + frame.head.getRows.size() should be > 3 + err shouldBe empty + } + + "term validation disabled (explicit)" in { + JenaSystemOptions.resetTermValidation() + JenaParameters.enableEagerLiteralValidation = true + RdfToJelly.setStdIn(new ByteArrayInputStream(input)) + val (out, err) = RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--in-format=nt", "--validate-terms=false"), + ) + val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) + val frame = readJellyFile(newIn) + frame.size should be(1) + frame.head.getRows.size() should be > 3 + err shouldBe empty + } + + "term validation enabled (explicit)" in { + JenaSystemOptions.resetTermValidation() + // This is normally not set. We use it to make sure the invalid date literal is actually detected. + JenaParameters.enableEagerLiteralValidation = true + RdfToJelly.setStdIn(new ByteArrayInputStream(input)) + val e = intercept[ExitException] { + RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--in-format=nt", "--validate-terms=true"), + ) + } + e.code should be(1) + e.cause.get shouldBe a[DatatypeFormatException] + } + } + + "RdfValidate" should { + "warm up" in { + // Pre-test needed to properly initialize the command for testing + Try { + RdfValidate.runTestCommand(List("rdf", "validate", "--help")) + } + } + + "term validation enabled (default)" in { + JenaSystemOptions.resetTermValidation() + JenaParameters.enableEagerLiteralValidation = true + RdfValidate.setStdIn(ByteArrayInputStream(frameBytes)) + val e = intercept[ExitException] { + RdfValidate.runTestCommand(List("rdf", "validate")) + } + e.cause.get shouldBe a[RdfProtoDeserializationError] + e.cause.get.getMessage should include("datatype") + } + + "term validation enabled (explicit)" in { + JenaSystemOptions.resetTermValidation() + JenaParameters.enableEagerLiteralValidation = true + RdfValidate.setStdIn(ByteArrayInputStream(frameBytes)) + val e = intercept[ExitException] { + RdfValidate.runTestCommand(List("rdf", "validate", "--validate-terms=true")) + } + e.cause.get shouldBe a[RdfProtoDeserializationError] + e.cause.get.getMessage should include("datatype") + } + + "term validation disabled" in { + JenaSystemOptions.resetTermValidation() + // This is normally not set. We use it to make sure the invalid date literal is actually detected. + JenaParameters.enableEagerLiteralValidation = true + RdfValidate.setStdIn(ByteArrayInputStream(frameBytes)) + val (out, err) = RdfValidate.runTestCommand( + List("rdf", "validate", "--validate-terms=false"), + ) + out shouldBe empty + err shouldBe empty + } + } diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/VersionSpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/VersionSpec.scala index bd3e9b3..584f2da 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/VersionSpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/VersionSpec.scala @@ -12,4 +12,9 @@ class VersionSpec extends AnyWordSpec, Matchers: out should include("Jelly-JVM") out should include("Apache Jena") } + + "report that reflection is supported" in { + val (out, err) = Version.runTestCommand(List(alias)) + out should include("[X] JVM reflection: supported.") + } } diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala index 2352bd2..0abb120 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala @@ -1,10 +1,12 @@ package eu.neverblink.jelly.cli.command.helpers +import eu.neverblink.jelly.cli.util.jena.JenaSystemOptions import eu.neverblink.jelly.cli.util.jena.riot.CliRiot import eu.neverblink.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage} import eu.neverblink.jelly.core.JellyOptions import org.apache.jena.graph.Triple import org.apache.jena.riot.{Lang, RDFDataMgr, RDFFormat, RDFLanguages, RDFWriter, RIOT} +import org.apache.jena.shared.impl.JenaParameters import org.apache.jena.sparql.graph.GraphFactory import org.apache.jena.sys.JenaSystem import org.scalatest.BeforeAndAfterAll @@ -130,3 +132,6 @@ trait TestFixtureHelper extends BeforeAndAfterAll: override def afterAll(): Unit = Files.deleteIfExists(tmpDir) + // Reset any Jena system options we might have changed during tests + JenaSystemOptions.resetTermValidation() + JenaParameters.enableEagerLiteralValidation = false diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala index 903c1ad..f108687 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfFromJellySpec.scala @@ -7,6 +7,8 @@ import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat import eu.neverblink.jelly.core.proto.v1.{PhysicalStreamType, RdfStreamFrame} import eu.neverblink.jelly.core.{JellyOptions, JellyTranscoderFactory} import org.apache.jena.query.DatasetFactory +import org.apache.jena.rdf.model.ModelFactory +import org.apache.jena.riot.RDFDataMgr import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec @@ -15,8 +17,6 @@ import java.nio.file.attribute.PosixFilePermissions import java.nio.file.{Files, Paths} import scala.io.Source import scala.util.Using -import org.apache.jena.riot.RDFDataMgr -import org.apache.jena.rdf.model.ModelFactory class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper: diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index b7ed437..9507e00 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -18,10 +18,7 @@ import java.io.{ByteArrayInputStream, FileInputStream, InputStream} import scala.jdk.CollectionConverters.* import scala.util.Using -class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: - - protected val testCardinality: Int = 33 - +object RdfToJellySpec: def translateJellyBack(inputStream: InputStream): Model = Using(inputStream) { content => val newModel = ModelFactory.createDefaultModel() @@ -42,6 +39,11 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: case scala.util.Failure(exception) => throw exception } +class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: + import RdfToJellySpec.* + + protected val testCardinality: Int = 33 + "rdf to-jelly command" should { "handle conversion of NQuads to Jelly" when { "a file to output stream" in withFullJenaFile { f => diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidateSpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidateSpec.scala index a9eb319..597acf2 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidateSpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidateSpec.scala @@ -1,6 +1,6 @@ package eu.neverblink.jelly.cli.command.rdf -import eu.neverblink.jelly.cli.command.helpers.TestFixtureHelper +import eu.neverblink.jelly.cli.command.helpers.{RdfAdapter, TestFixtureHelper} import eu.neverblink.jelly.cli.command.helpers.RdfAdapter.* import eu.neverblink.jelly.cli.{CriticalException, ExitException} import eu.neverblink.jelly.convert.jena.JenaConverterFactory