feat: add/update schemas, config, README, and test

This commit is contained in:
rzmk 2026-05-05 16:08:44 -04:00
parent f7caaaae8e
commit d01e05ab48
7 changed files with 129 additions and 29 deletions

View file

@ -13,3 +13,15 @@ This runner is expected to be implemented for a water data hub with the relevant
```bash
cargo run --release
```
## Run tests
```bash
cargo test
```
To include print statements in test output, run:
```bash
cargo test -- --nocapture
```

View file

@ -1,2 +0,0 @@
ckan_url = "https://data.dathere.com"
geoconnex_about_field_name = "geoconnex_about"

9
config/example.toml Normal file
View file

@ -0,0 +1,9 @@
# Example configuration file. Customize for a specific CKAN instance when making a new Docker image.
# This is the path after https://geoconnex.us/ and before /{id}.
# Same as namespace location at https://github.com/internetofwater/geoconnex.us.
namespace_path = "nmwdh/ckan-dataset"
# The base URL for the CKAN instance to run API requests to.
ckan_url = "http://localhost:5000"
# The name of the key in /package_show for a given CKAN dataset where the Geoconnex `about` field is populated.
about_field_name = "geoconnex_about"

45
src/lib.rs Normal file
View file

@ -0,0 +1,45 @@
use serde_json::json;
pub fn get_dataset_schema() -> serde_json::Value {
json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}},
"@id": {"type": "string"},
"name": {"type": "string"},
"provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"about": {
"type": ["string", "array"],
"items": {
"type": "object",
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
},
"minItems": 1
},
},
"required": ["@context", "@type", "@id", "name", "provider", "about"]
})
}
pub fn get_location_schema() -> serde_json::Value {
json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
"@id": {"type": "string"},
"name": {"type": "string"},
"provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"geo": {"type": "object"},
"gsp:hasGeometry": {"type": "object"}
},
"required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"]
})
}

View file

@ -1,29 +1,21 @@
mod utils;
use anyhow::Result;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let ckan = ckanaction::CKAN::builder()
.url("https://data.dathere.com")
.url("http://localhost:5000")
.build();
// TODO: Paginate through package_list and run package_show for each package
// If about exists then construct JSON-LD and validate then output JSON-LD to stdout on a new line
let response = ckan.package_show().id("".to_string()).call().await?;
// Paginate through /api/3/action/package_list until only an empty array is returned
let response = ckan.package_list().call().await?;
let result = response
.as_object()
.unwrap()
.get("result")
.unwrap()
.as_object()
.as_array()
.unwrap();
if let Some(geoconnex_about) = result.get("geoconnex_about") {
// Check if at least one valid reference feature exists in dataset metadata
}
// TODO: Construct JSON-LD if valid `about`
// TODO: Validate constructed JSON-LD against JSON schema
// TODO: Print JSON-LD to new line
println!("{result:#?}");

View file

@ -1,14 +0,0 @@
use serde_json::json;
async fn construct_jsonld(metadata: serde_json::Value) -> serde_json::Value {}
async fn get_dataset_schema() -> serde_json::Value {
json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"const": "Dataset"}
},
"required": []
})
}

58
tests/validate_jsonld.rs Normal file
View file

@ -0,0 +1,58 @@
use anyhow::{Result, bail};
use serde_json::json;
#[test]
fn validate_usgs_location_jsonld() -> Result<()> {
let usgs_location_jsonld = json!({
"@context": {
"@vocab": "https://schema.org/",
"gsp": "http://www.opengis.net/ont/geosparql#",
"hyf": "https://www.opengis.net/def/schema/hy_features/hyf/",
"locType": "https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items/"
},
"@type": [
"Place",
"hyf:HY_HydrometricFeature",
"hyf:HY_HydroLocation",
"locType:ST-CA"
],
"@id": "https://geoconnex.us/usgs/monitoring-location/USGS-253937080285200",
"name": "BLACKCREEKCANALWESTOFSOUTHMIAMI FLA",
"identifier": {
"@type": "PropertyValue",
"propertyID": "USGS site identifier",
"value": "253937080285200"
},
"url": "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items/USGS-253937080285200",
"provider": {
"@type": "GovernmentOrganization",
"name": "U.S. Geological Survey"
},
"geo": {
"@type": "GeoCoordinates",
"latitude": 25.6606597832648,
"longitude": -80.4808896071386
},
"gsp:hasGeometry": {
"@type": "http://www.opengis.net/ont/sf#Point",
"gsp:asWKT": {
"@type": "gsp:wktLiteral",
"@value": "POINT (-80.4808896071386 25.6606597832648)"
},
"gsp:crs": {
"@id": "http://www.opengis.net/def/crs/OGC/1.3/CRS84"
}
}
});
let dataset_json_schema = ckan_geoconnex_bulk_runner::get_location_schema();
if let Err(e) = jsonschema::validate(&dataset_json_schema, &usgs_location_jsonld) {
println!("Error during validation:");
bail!("{e}");
} else {
println!("Successfully validated.");
}
Ok(())
}