From d01e05ab48a24f7844158531baa236a1fd698c47 Mon Sep 17 00:00:00 2001 From: rzmk <30333942+rzmk@users.noreply.github.com> Date: Tue, 5 May 2026 16:08:44 -0400 Subject: [PATCH] feat: add/update schemas, config, README, and test --- README.md | 12 +++++++++ config.toml | 2 -- config/example.toml | 9 +++++++ src/lib.rs | 45 +++++++++++++++++++++++++++++++ src/main.rs | 18 ++++--------- src/utils.rs | 14 ---------- tests/validate_jsonld.rs | 58 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 129 insertions(+), 29 deletions(-) delete mode 100644 config.toml create mode 100644 config/example.toml create mode 100644 src/lib.rs delete mode 100644 src/utils.rs create mode 100644 tests/validate_jsonld.rs diff --git a/README.md b/README.md index d10fc21..1920b8b 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,15 @@ This runner is expected to be implemented for a water data hub with the relevant ```bash cargo run --release ``` + +## Run tests + +```bash +cargo test +``` + +To include print statements in test output, run: + +```bash +cargo test -- --nocapture +``` diff --git a/config.toml b/config.toml deleted file mode 100644 index 0f779f7..0000000 --- a/config.toml +++ /dev/null @@ -1,2 +0,0 @@ -ckan_url = "https://data.dathere.com" -geoconnex_about_field_name = "geoconnex_about" diff --git a/config/example.toml b/config/example.toml new file mode 100644 index 0000000..5d92a24 --- /dev/null +++ b/config/example.toml @@ -0,0 +1,9 @@ +# Example configuration file. Customize for a specific CKAN instance when making a new Docker image. + +# This is the path after https://geoconnex.us/ and before /{id}. +# Same as namespace location at https://github.com/internetofwater/geoconnex.us. +namespace_path = "nmwdh/ckan-dataset" +# The base URL for the CKAN instance to run API requests to. +ckan_url = "http://localhost:5000" +# The name of the key in /package_show for a given CKAN dataset where the Geoconnex `about` field is populated. +about_field_name = "geoconnex_about" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..ccec0e6 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,45 @@ +use serde_json::json; + +pub fn get_dataset_schema() -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "@context": {"type": ["string", "object"]}, + "@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}}, + "@id": {"type": "string"}, + "name": {"type": "string"}, + "provider": { + "type": "object", + "properties": {"@type": {"type": "string"}, "name": {"type": "string"}}, + }, + "about": { + "type": ["string", "array"], + "items": { + "type": "object", + "properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}}, + }, + "minItems": 1 + }, + }, + "required": ["@context", "@type", "@id", "name", "provider", "about"] + }) +} + +pub fn get_location_schema() -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "@context": {"type": ["string", "object"]}, + "@type": {"type": ["string", "array"], "contains": {"const": "Place"}}, + "@id": {"type": "string"}, + "name": {"type": "string"}, + "provider": { + "type": "object", + "properties": {"@type": {"type": "string"}, "name": {"type": "string"}}, + }, + "geo": {"type": "object"}, + "gsp:hasGeometry": {"type": "object"} + }, + "required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"] + }) +} diff --git a/src/main.rs b/src/main.rs index c85a8c9..dec52a6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,29 +1,21 @@ -mod utils; - use anyhow::Result; #[tokio::main] async fn main() -> Result<(), Box> { let ckan = ckanaction::CKAN::builder() - .url("https://data.dathere.com") + .url("http://localhost:5000") .build(); - // TODO: Paginate through package_list and run package_show for each package - // If about exists then construct JSON-LD and validate then output JSON-LD to stdout on a new line - let response = ckan.package_show().id("".to_string()).call().await?; + + // Paginate through /api/3/action/package_list until only an empty array is returned + let response = ckan.package_list().call().await?; let result = response .as_object() .unwrap() .get("result") .unwrap() - .as_object() + .as_array() .unwrap(); - if let Some(geoconnex_about) = result.get("geoconnex_about") { - // Check if at least one valid reference feature exists in dataset metadata - } - // TODO: Construct JSON-LD if valid `about` - // TODO: Validate constructed JSON-LD against JSON schema - // TODO: Print JSON-LD to new line println!("{result:#?}"); diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 52a1012..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,14 +0,0 @@ -use serde_json::json; - -async fn construct_jsonld(metadata: serde_json::Value) -> serde_json::Value {} - -async fn get_dataset_schema() -> serde_json::Value { - json!({ - "type": "object", - "properties": { - "@context": {"type": ["string", "object"]}, - "@type": {"const": "Dataset"} - }, - "required": [] - }) -} diff --git a/tests/validate_jsonld.rs b/tests/validate_jsonld.rs new file mode 100644 index 0000000..3cb2fa5 --- /dev/null +++ b/tests/validate_jsonld.rs @@ -0,0 +1,58 @@ +use anyhow::{Result, bail}; +use serde_json::json; + +#[test] +fn validate_usgs_location_jsonld() -> Result<()> { + let usgs_location_jsonld = json!({ + "@context": { + "@vocab": "https://schema.org/", + "gsp": "http://www.opengis.net/ont/geosparql#", + "hyf": "https://www.opengis.net/def/schema/hy_features/hyf/", + "locType": "https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items/" + }, + "@type": [ + "Place", + "hyf:HY_HydrometricFeature", + "hyf:HY_HydroLocation", + "locType:ST-CA" + ], + "@id": "https://geoconnex.us/usgs/monitoring-location/USGS-253937080285200", + "name": "BLACKCREEKCANALWESTOFSOUTHMIAMI FLA", + "identifier": { + "@type": "PropertyValue", + "propertyID": "USGS site identifier", + "value": "253937080285200" + }, + "url": "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items/USGS-253937080285200", + "provider": { + "@type": "GovernmentOrganization", + "name": "U.S. Geological Survey" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": 25.6606597832648, + "longitude": -80.4808896071386 + }, + "gsp:hasGeometry": { + "@type": "http://www.opengis.net/ont/sf#Point", + "gsp:asWKT": { + "@type": "gsp:wktLiteral", + "@value": "POINT (-80.4808896071386 25.6606597832648)" + }, + "gsp:crs": { + "@id": "http://www.opengis.net/def/crs/OGC/1.3/CRS84" + } + } + }); + + let dataset_json_schema = ckan_geoconnex_bulk_runner::get_location_schema(); + + if let Err(e) = jsonschema::validate(&dataset_json_schema, &usgs_location_jsonld) { + println!("Error during validation:"); + bail!("{e}"); + } else { + println!("Successfully validated."); + } + + Ok(()) +}