From baeb09acb7960838ad80e040f4fe47d3da2901aa Mon Sep 17 00:00:00 2001 From: rzmk <30333942+rzmk@users.noreply.github.com> Date: Tue, 5 May 2026 17:32:27 -0400 Subject: [PATCH] feat: location schema, architecture, ckanaction update, new test --- Cargo.lock | 5 ++-- Cargo.toml | 2 +- src/jsonld.rs | 3 ++ src/lib.rs | 47 ++---------------------------- src/main.rs | 62 +++++++++++++++++++++++++++++++--------- src/schema.rs | 45 +++++++++++++++++++++++++++++ tests/validate_jsonld.rs | 3 +- 7 files changed, 105 insertions(+), 62 deletions(-) create mode 100644 src/jsonld.rs create mode 100644 src/schema.rs diff --git a/Cargo.lock b/Cargo.lock index e72b1e7..0da0b4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -178,14 +178,15 @@ dependencies = [ [[package]] name = "ckanaction" -version = "0.1.4" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13ec17a3808b02c993f8fae3be6dc8a7f153b44b17cdebb2e2a9b0f836cb8e03" +checksum = "d2e5332c456e22de38ddf1c8ed0cdfaeba4ab432f5de8cd971b6450ef62f8add" dependencies = [ "bon", "reqwest 0.12.28", "serde", "serde_json", + "thiserror", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index bd3b6d6..9b08f04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ edition = "2024" [dependencies] anyhow = "1.0.102" -ckanaction = "0.1.4" +ckanaction = "0.2.0" jsonschema = "0.46.4" serde_json = "1.0.149" tokio = { version = "1.52.1", features = ["full"] } diff --git a/src/jsonld.rs b/src/jsonld.rs new file mode 100644 index 0000000..14eb40a --- /dev/null +++ b/src/jsonld.rs @@ -0,0 +1,3 @@ +pub fn construct_dataset_jsonld_from_metadata(metadata: serde_json::Value) -> serde_json::Value { + todo!() +} diff --git a/src/lib.rs b/src/lib.rs index ccec0e6..3600dba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,45 +1,2 @@ -use serde_json::json; - -pub fn get_dataset_schema() -> serde_json::Value { - json!({ - "type": "object", - "properties": { - "@context": {"type": ["string", "object"]}, - "@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}}, - "@id": {"type": "string"}, - "name": {"type": "string"}, - "provider": { - "type": "object", - "properties": {"@type": {"type": "string"}, "name": {"type": "string"}}, - }, - "about": { - "type": ["string", "array"], - "items": { - "type": "object", - "properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}}, - }, - "minItems": 1 - }, - }, - "required": ["@context", "@type", "@id", "name", "provider", "about"] - }) -} - -pub fn get_location_schema() -> serde_json::Value { - json!({ - "type": "object", - "properties": { - "@context": {"type": ["string", "object"]}, - "@type": {"type": ["string", "array"], "contains": {"const": "Place"}}, - "@id": {"type": "string"}, - "name": {"type": "string"}, - "provider": { - "type": "object", - "properties": {"@type": {"type": "string"}, "name": {"type": "string"}}, - }, - "geo": {"type": "object"}, - "gsp:hasGeometry": {"type": "object"} - }, - "required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"] - }) -} +pub mod jsonld; +pub mod schema; diff --git a/src/main.rs b/src/main.rs index dec52a6..52f13ed 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,23 +1,59 @@ -use anyhow::Result; +use anyhow::{Result, bail}; + +// TODO: Ensure error output is only streamed to stderr as per Geoconnex docs #[tokio::main] -async fn main() -> Result<(), Box> { +async fn main() -> Result<()> { let ckan = ckanaction::CKAN::builder() .url("http://localhost:5000") .build(); // Paginate through /api/3/action/package_list until only an empty array is returned - let response = ckan.package_list().call().await?; - - let result = response - .as_object() - .unwrap() - .get("result") - .unwrap() - .as_array() - .unwrap(); - - println!("{result:#?}"); + let mut offset = 0; + loop { + // TODO: Verify that only public datasets are returned, otherwise consider /package_search + let response = ckan.package_list().offset(offset).limit(100).call().await?; + // Verify successful response from CKAN API + let Some(success_opt) = response.get("success") else { + bail!("CKAN API did not return `success` key. Full response: {response}"); + }; + let Some(success) = success_opt.as_bool() else { + bail!( + "Could not parse success key as boolean from CKAN API. Full response: {response}" + ); + }; + if success { + let Some(result) = response.get("result") else { + bail!("CKAN API did not return `result` key. Full response: {response}"); + }; + // Retrieve dataset names from current pagination + let dataset_names = result.as_array().unwrap(); + if dataset_names.is_empty() { + break; + } else { + // For each dataset in current pagination: + for dataset_name in dataset_names { + // 0. Get the dataset name as a string + let dataset_name_str = dataset_name.as_str().unwrap(); + println!("{dataset_name_str}"); + // TODO: Identify if dataset names are unique + // 1. Get the dataset's metadata with /package_show by using the dataset name as the id + let dataset_metadata = ckan + .package_show() + .id(dataset_name_str.to_string()) + .call() + .await?; + println!("{dataset_metadata:#?}"); + // 2. Construct JSON-LD based on the data from /package_show + // 3. Validate the JSON-LD against the dataset JSON schema + // 4. Print the JSON-LD on a new line to stdout + } + } + } else { + bail!("CKAN API returned {{\"success\": false\"}}. Full response: {response}"); + } + offset = offset + 100; + } Ok(()) } diff --git a/src/schema.rs b/src/schema.rs new file mode 100644 index 0000000..ccec0e6 --- /dev/null +++ b/src/schema.rs @@ -0,0 +1,45 @@ +use serde_json::json; + +pub fn get_dataset_schema() -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "@context": {"type": ["string", "object"]}, + "@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}}, + "@id": {"type": "string"}, + "name": {"type": "string"}, + "provider": { + "type": "object", + "properties": {"@type": {"type": "string"}, "name": {"type": "string"}}, + }, + "about": { + "type": ["string", "array"], + "items": { + "type": "object", + "properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}}, + }, + "minItems": 1 + }, + }, + "required": ["@context", "@type", "@id", "name", "provider", "about"] + }) +} + +pub fn get_location_schema() -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "@context": {"type": ["string", "object"]}, + "@type": {"type": ["string", "array"], "contains": {"const": "Place"}}, + "@id": {"type": "string"}, + "name": {"type": "string"}, + "provider": { + "type": "object", + "properties": {"@type": {"type": "string"}, "name": {"type": "string"}}, + }, + "geo": {"type": "object"}, + "gsp:hasGeometry": {"type": "object"} + }, + "required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"] + }) +} diff --git a/tests/validate_jsonld.rs b/tests/validate_jsonld.rs index 3cb2fa5..596af2d 100644 --- a/tests/validate_jsonld.rs +++ b/tests/validate_jsonld.rs @@ -1,4 +1,5 @@ use anyhow::{Result, bail}; +use ckan_geoconnex_bulk_runner::schema::get_location_schema; use serde_json::json; #[test] @@ -45,7 +46,7 @@ fn validate_usgs_location_jsonld() -> Result<()> { } }); - let dataset_json_schema = ckan_geoconnex_bulk_runner::get_location_schema(); + let dataset_json_schema = get_location_schema(); if let Err(e) = jsonschema::validate(&dataset_json_schema, &usgs_location_jsonld) { println!("Error during validation:");