mirror of
https://github.com/dathere/ckan_geoconnex_bulk_runner.git
synced 2026-07-05 15:12:20 +00:00
feat: add/update schemas, config, README, and test
This commit is contained in:
parent
f7caaaae8e
commit
d01e05ab48
7 changed files with 129 additions and 29 deletions
12
README.md
12
README.md
|
|
@ -13,3 +13,15 @@ This runner is expected to be implemented for a water data hub with the relevant
|
||||||
```bash
|
```bash
|
||||||
cargo run --release
|
cargo run --release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Run tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo test
|
||||||
|
```
|
||||||
|
|
||||||
|
To include print statements in test output, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo test -- --nocapture
|
||||||
|
```
|
||||||
|
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
ckan_url = "https://data.dathere.com"
|
|
||||||
geoconnex_about_field_name = "geoconnex_about"
|
|
||||||
9
config/example.toml
Normal file
9
config/example.toml
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
# Example configuration file. Customize for a specific CKAN instance when making a new Docker image.
|
||||||
|
|
||||||
|
# This is the path after https://geoconnex.us/ and before /{id}.
|
||||||
|
# Same as namespace location at https://github.com/internetofwater/geoconnex.us.
|
||||||
|
namespace_path = "nmwdh/ckan-dataset"
|
||||||
|
# The base URL for the CKAN instance to run API requests to.
|
||||||
|
ckan_url = "http://localhost:5000"
|
||||||
|
# The name of the key in /package_show for a given CKAN dataset where the Geoconnex `about` field is populated.
|
||||||
|
about_field_name = "geoconnex_about"
|
||||||
45
src/lib.rs
Normal file
45
src/lib.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
pub fn get_dataset_schema() -> serde_json::Value {
|
||||||
|
json!({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"@context": {"type": ["string", "object"]},
|
||||||
|
"@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}},
|
||||||
|
"@id": {"type": "string"},
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"provider": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||||
|
},
|
||||||
|
"about": {
|
||||||
|
"type": ["string", "array"],
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
|
||||||
|
},
|
||||||
|
"minItems": 1
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["@context", "@type", "@id", "name", "provider", "about"]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_location_schema() -> serde_json::Value {
|
||||||
|
json!({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"@context": {"type": ["string", "object"]},
|
||||||
|
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
|
||||||
|
"@id": {"type": "string"},
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"provider": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||||
|
},
|
||||||
|
"geo": {"type": "object"},
|
||||||
|
"gsp:hasGeometry": {"type": "object"}
|
||||||
|
},
|
||||||
|
"required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"]
|
||||||
|
})
|
||||||
|
}
|
||||||
18
src/main.rs
18
src/main.rs
|
|
@ -1,29 +1,21 @@
|
||||||
mod utils;
|
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let ckan = ckanaction::CKAN::builder()
|
let ckan = ckanaction::CKAN::builder()
|
||||||
.url("https://data.dathere.com")
|
.url("http://localhost:5000")
|
||||||
.build();
|
.build();
|
||||||
// TODO: Paginate through package_list and run package_show for each package
|
|
||||||
// If about exists then construct JSON-LD and validate then output JSON-LD to stdout on a new line
|
// Paginate through /api/3/action/package_list until only an empty array is returned
|
||||||
let response = ckan.package_show().id("".to_string()).call().await?;
|
let response = ckan.package_list().call().await?;
|
||||||
|
|
||||||
let result = response
|
let result = response
|
||||||
.as_object()
|
.as_object()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.get("result")
|
.get("result")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.as_object()
|
.as_array()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
if let Some(geoconnex_about) = result.get("geoconnex_about") {
|
|
||||||
// Check if at least one valid reference feature exists in dataset metadata
|
|
||||||
}
|
|
||||||
// TODO: Construct JSON-LD if valid `about`
|
|
||||||
// TODO: Validate constructed JSON-LD against JSON schema
|
|
||||||
// TODO: Print JSON-LD to new line
|
|
||||||
|
|
||||||
println!("{result:#?}");
|
println!("{result:#?}");
|
||||||
|
|
||||||
|
|
|
||||||
14
src/utils.rs
14
src/utils.rs
|
|
@ -1,14 +0,0 @@
|
||||||
use serde_json::json;
|
|
||||||
|
|
||||||
async fn construct_jsonld(metadata: serde_json::Value) -> serde_json::Value {}
|
|
||||||
|
|
||||||
async fn get_dataset_schema() -> serde_json::Value {
|
|
||||||
json!({
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"@context": {"type": ["string", "object"]},
|
|
||||||
"@type": {"const": "Dataset"}
|
|
||||||
},
|
|
||||||
"required": []
|
|
||||||
})
|
|
||||||
}
|
|
||||||
58
tests/validate_jsonld.rs
Normal file
58
tests/validate_jsonld.rs
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
use anyhow::{Result, bail};
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn validate_usgs_location_jsonld() -> Result<()> {
|
||||||
|
let usgs_location_jsonld = json!({
|
||||||
|
"@context": {
|
||||||
|
"@vocab": "https://schema.org/",
|
||||||
|
"gsp": "http://www.opengis.net/ont/geosparql#",
|
||||||
|
"hyf": "https://www.opengis.net/def/schema/hy_features/hyf/",
|
||||||
|
"locType": "https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items/"
|
||||||
|
},
|
||||||
|
"@type": [
|
||||||
|
"Place",
|
||||||
|
"hyf:HY_HydrometricFeature",
|
||||||
|
"hyf:HY_HydroLocation",
|
||||||
|
"locType:ST-CA"
|
||||||
|
],
|
||||||
|
"@id": "https://geoconnex.us/usgs/monitoring-location/USGS-253937080285200",
|
||||||
|
"name": "BLACKCREEKCANALWESTOFSOUTHMIAMI FLA",
|
||||||
|
"identifier": {
|
||||||
|
"@type": "PropertyValue",
|
||||||
|
"propertyID": "USGS site identifier",
|
||||||
|
"value": "253937080285200"
|
||||||
|
},
|
||||||
|
"url": "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items/USGS-253937080285200",
|
||||||
|
"provider": {
|
||||||
|
"@type": "GovernmentOrganization",
|
||||||
|
"name": "U.S. Geological Survey"
|
||||||
|
},
|
||||||
|
"geo": {
|
||||||
|
"@type": "GeoCoordinates",
|
||||||
|
"latitude": 25.6606597832648,
|
||||||
|
"longitude": -80.4808896071386
|
||||||
|
},
|
||||||
|
"gsp:hasGeometry": {
|
||||||
|
"@type": "http://www.opengis.net/ont/sf#Point",
|
||||||
|
"gsp:asWKT": {
|
||||||
|
"@type": "gsp:wktLiteral",
|
||||||
|
"@value": "POINT (-80.4808896071386 25.6606597832648)"
|
||||||
|
},
|
||||||
|
"gsp:crs": {
|
||||||
|
"@id": "http://www.opengis.net/def/crs/OGC/1.3/CRS84"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let dataset_json_schema = ckan_geoconnex_bulk_runner::get_location_schema();
|
||||||
|
|
||||||
|
if let Err(e) = jsonschema::validate(&dataset_json_schema, &usgs_location_jsonld) {
|
||||||
|
println!("Error during validation:");
|
||||||
|
bail!("{e}");
|
||||||
|
} else {
|
||||||
|
println!("Successfully validated.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue