feat: location schema, architecture, ckanaction update, new test

This commit is contained in:
rzmk 2026-05-05 17:32:27 -04:00
parent d01e05ab48
commit baeb09acb7
7 changed files with 105 additions and 62 deletions

5
Cargo.lock generated
View file

@ -178,14 +178,15 @@ dependencies = [
[[package]]
name = "ckanaction"
version = "0.1.4"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13ec17a3808b02c993f8fae3be6dc8a7f153b44b17cdebb2e2a9b0f836cb8e03"
checksum = "d2e5332c456e22de38ddf1c8ed0cdfaeba4ab432f5de8cd971b6450ef62f8add"
dependencies = [
"bon",
"reqwest 0.12.28",
"serde",
"serde_json",
"thiserror",
]
[[package]]

View file

@ -5,7 +5,7 @@ edition = "2024"
[dependencies]
anyhow = "1.0.102"
ckanaction = "0.1.4"
ckanaction = "0.2.0"
jsonschema = "0.46.4"
serde_json = "1.0.149"
tokio = { version = "1.52.1", features = ["full"] }

3
src/jsonld.rs Normal file
View file

@ -0,0 +1,3 @@
pub fn construct_dataset_jsonld_from_metadata(metadata: serde_json::Value) -> serde_json::Value {
todo!()
}

View file

@ -1,45 +1,2 @@
use serde_json::json;
pub fn get_dataset_schema() -> serde_json::Value {
json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}},
"@id": {"type": "string"},
"name": {"type": "string"},
"provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"about": {
"type": ["string", "array"],
"items": {
"type": "object",
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
},
"minItems": 1
},
},
"required": ["@context", "@type", "@id", "name", "provider", "about"]
})
}
pub fn get_location_schema() -> serde_json::Value {
json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
"@id": {"type": "string"},
"name": {"type": "string"},
"provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"geo": {"type": "object"},
"gsp:hasGeometry": {"type": "object"}
},
"required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"]
})
}
pub mod jsonld;
pub mod schema;

View file

@ -1,23 +1,59 @@
use anyhow::Result;
use anyhow::{Result, bail};
// TODO: Ensure error output is only streamed to stderr as per Geoconnex docs
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
async fn main() -> Result<()> {
let ckan = ckanaction::CKAN::builder()
.url("http://localhost:5000")
.build();
// Paginate through /api/3/action/package_list until only an empty array is returned
let response = ckan.package_list().call().await?;
let result = response
.as_object()
.unwrap()
.get("result")
.unwrap()
.as_array()
.unwrap();
println!("{result:#?}");
let mut offset = 0;
loop {
// TODO: Verify that only public datasets are returned, otherwise consider /package_search
let response = ckan.package_list().offset(offset).limit(100).call().await?;
// Verify successful response from CKAN API
let Some(success_opt) = response.get("success") else {
bail!("CKAN API did not return `success` key. Full response: {response}");
};
let Some(success) = success_opt.as_bool() else {
bail!(
"Could not parse success key as boolean from CKAN API. Full response: {response}"
);
};
if success {
let Some(result) = response.get("result") else {
bail!("CKAN API did not return `result` key. Full response: {response}");
};
// Retrieve dataset names from current pagination
let dataset_names = result.as_array().unwrap();
if dataset_names.is_empty() {
break;
} else {
// For each dataset in current pagination:
for dataset_name in dataset_names {
// 0. Get the dataset name as a string
let dataset_name_str = dataset_name.as_str().unwrap();
println!("{dataset_name_str}");
// TODO: Identify if dataset names are unique
// 1. Get the dataset's metadata with /package_show by using the dataset name as the id
let dataset_metadata = ckan
.package_show()
.id(dataset_name_str.to_string())
.call()
.await?;
println!("{dataset_metadata:#?}");
// 2. Construct JSON-LD based on the data from /package_show
// 3. Validate the JSON-LD against the dataset JSON schema
// 4. Print the JSON-LD on a new line to stdout
}
}
} else {
bail!("CKAN API returned {{\"success\": false\"}}. Full response: {response}");
}
offset = offset + 100;
}
Ok(())
}

45
src/schema.rs Normal file
View file

@ -0,0 +1,45 @@
use serde_json::json;
pub fn get_dataset_schema() -> serde_json::Value {
json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}},
"@id": {"type": "string"},
"name": {"type": "string"},
"provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"about": {
"type": ["string", "array"],
"items": {
"type": "object",
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
},
"minItems": 1
},
},
"required": ["@context", "@type", "@id", "name", "provider", "about"]
})
}
pub fn get_location_schema() -> serde_json::Value {
json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
"@id": {"type": "string"},
"name": {"type": "string"},
"provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"geo": {"type": "object"},
"gsp:hasGeometry": {"type": "object"}
},
"required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"]
})
}

View file

@ -1,4 +1,5 @@
use anyhow::{Result, bail};
use ckan_geoconnex_bulk_runner::schema::get_location_schema;
use serde_json::json;
#[test]
@ -45,7 +46,7 @@ fn validate_usgs_location_jsonld() -> Result<()> {
}
});
let dataset_json_schema = ckan_geoconnex_bulk_runner::get_location_schema();
let dataset_json_schema = get_location_schema();
if let Err(e) = jsonschema::validate(&dataset_json_schema, &usgs_location_jsonld) {
println!("Error during validation:");